码迷,mamicode.com
首页 > 其他好文 > 详细

Hadoop 自定义数据类型

时间:2015-02-04 13:11:16      阅读:202      评论:0      收藏:0      [点我收藏+]

标签:

Hadoop的自定制数据类型有两种,一种较为简单的是针对值,另外一种更为完整针对于键和值都适合

一、针对值,实现 Writable 接口
package org.apache.hadoop.io;

import java.io.DataOutput;
import java.io.DataInput;
import java.io.IOException;

public interface Writable {
  void write(DataOutput out) throws IOException;
  void readFields(DataInput in) throws IOException;
}

例子:

import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Address implements Writable {
    public String city;
    public String street;
    public int doorplate;

    public Address() {
        this("", "", 0);
    }

    public Address(String city, String street, int doorplate) {
        this.city = city;
        this.street = street;
        this.doorplate = doorplate;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(this.city);
        out.writeUTF(this.street);
        out.writeInt(this.doorplate);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.city = in.readUTF();
        this.street = in.readUTF();
        this.doorplate = in.readInt();
    }

    public String toString() {
        return this.city + "," + this.street + "," + this.doorplate;
    }
}

二、针对于键和值,需要指定排序规则,自定义类需要实现 WritableComparable 泛型接口

package org.apache.hadoop.io;
public interface WritableComparable<T> extends Writable, Comparable<T> {
}

package java.lang;
import java.util.*;
public interface Comparable<T> {
    public int compareTo(T o);
}

例子:

import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Address implements WritableComparable {
    public String city;
    public String street;
    public int doorplate;

    public Address() {
        this("", "", 0);
    }

    public Address(String city, String street, int doorplate) {
        this.city = city;
        this.street = street;
        this.doorplate = doorplate;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(this.city);
        out.writeUTF(this.street);
        out.writeInt(this.doorplate);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.city = in.readUTF();
        this.street = in.readUTF();
        this.doorplate = in.readInt();
    }

    @Override
    public int compareTo(Object o) {
        Address other = (Address) o;
        int n = this.strCompareTo(this.city, other.city);
        if (n != 0) return n;
        n = this.strCompareTo(this.street, other.street);
        if (n != 0) return n;
        return (this.doorplate < other.doorplate ? -1 : (this.doorplate == other.doorplate ? 0 : 1));
    }

    public String toString() {
        return this.city + "," + this.street + "," + this.doorplate;
    }

    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj instanceof Address) {
            Address other = (Address) obj;
            return this.strEquals(this.city, other.city)
                    && this.strEquals(this.street, other.street)
                    && this.doorplate == other.doorplate;
        }
        return false;
    }

    /**
     * 重写 hashCode() 方法很重要,Hadoop 的 Partitioners 会用到这个方法
     */
    public int hashCode() {
        return 13 * (this.city == null ? 0 : this.city.hashCode())
                + 67 * (this.street == null ? 0 : this.street.hashCode())
                + 151 * this.doorplate;
    }

    public boolean strEquals(String a, String b) {
        return (a == null && b == null) || (a != null && a.equals(b));
    }

    public int strCompareTo(String a, String b) {
        if (a == null && b == null) return 0;
        else if (a != null && b == null) return 1;
        else if (a == null && b != null) return -1;
        else return a.compareTo(b);
    }
}


Hadoop 自定义数据类型

标签:

原文地址:http://my.oschina.net/zc741520/blog/375187

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!