java中類能夠序列化是實現接口Serializable。html
hadoop中類能夠序列化是實現接口Writable。java
hadoop對應java基本數據類型實現序列化類:apache
Writable接口中定義了兩個方法:api
readFields(DataInput in )反序列化方法,write(DataOutput out)序列化方法。數組
官網例子:oracle
public class MyWritable implements Writable{ // Some data private int counter; private long timestamp; public static MyWritable read(DataInput in) throws IOException { MyWritable w = new MyWritable(); w.readFields(in); return w; } public void write(DataOutput out) throws IOException { //反序列化,從流中讀取數據 out.writeInt(counter); out.writeLong(timestamp); } public void readFields(DataInput in) throws IOException { //序列化,將對象數據讀入到流中 counter = in.readInt(); timestamp = in.readLong(); } }
經過hadoop的IntWritable和java的Integer對比 oop
package com.jf.hdfs; import java.io.ByteArrayOutputStream; import java.io.ObjectOutputStream; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Writable; public class SerializationCompare { // hadoop全部對象類型的父類型Writable public static byte[] serialize(Writable writable) throws Exception { //序列化其實就是將對象轉行爲字節數組 ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos); writable.write(oos); oos.close(); return baos.toByteArray(); } //java中序列化將類類型對象轉化爲字節數組 public static byte[] serialize(Integer integer) throws Exception{ ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos); oos.writeInt(integer); oos.close(); return baos.toByteArray(); } public static void main(String[] args) throws Exception { IntWritable intWritable = new IntWritable(200); byte[] bytes = serialize(intWritable); System.out.println("hadoop序列化:"+bytes.length); Integer integer = new Integer(200); byte[] bytes2 = serialize(integer); System.out.println("java序列化:"+bytes2.length); } }
執行結果:雖然同樣,其實在大數據裏面hadoop更佔優點。測試
hadoop序列化:10
java序列化:10大數據
package com.jf.hdfs; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; public class ObjecSerialize { public static void main(String[] args) throws Exception { Student student = new Student(); student.setId(new IntWritable(10001)); student.setName(new Text("sean")); student.setGender(true); List<Text> list = new ArrayList<Text>(); list.add(new Text("學校")); list.add(new Text("年紀")); list.add(new Text("班級")); student.setList(list); // 對象序列化,將對象寫入到流中 ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(baos); student.write(dos); byte[] b = baos.toByteArray(); System.out.println("序列化以後結果:" + Arrays.toString(b) + ",字節數組長度:" + b.length); // 進行反序列化 ByteArrayInputStream bais = new ByteArrayInputStream(b); DataInputStream dis = new DataInputStream(bais); Student student2 = new Student(); student2.readFields(dis); System.out.println("反序列化ID="+student2.getId().get()+",name="+student2.getName().toString()+",gender="+student2.isGender()+",list=["+student2.getList().get(0).toString()+","+student2.getList().get(1).toString()+","+student2.getList().get(2).toString()+"]"); } } class Student implements Writable { private IntWritable id; private Text name; private boolean gender; private List<Text> list = new ArrayList<Text>(); Student() { id = new IntWritable(); name = new Text(); } Student(Student student) { // 這種屬於引用複製,hadoop中嚴格杜絕 // this.id = student.id; // this.name = student.name; // 在hadoop中要使用這種屬性值的複製 id = new IntWritable(student.id.get()); name = new Text(student.name.toString()); } public void write(DataOutput out) throws IOException { // 序列化過程,將對象中全部數據寫入到流中 id.write(out); name.write(out); BooleanWritable genter = new BooleanWritable(gender); genter.write(out); // 在hadoop中序列化集合時,要將集合的長度也進行序列化 int size = list.size(); new IntWritable(size).write(out); // 而後再序列化集合中的每個元素 for (int i = 0; i < size; i++) { Text text = list.get(i); text.write(out); } } // 反序列化將流中的二進制讀出到對象中 public void readFields(DataInput in) throws IOException { id.readFields(in); name.readFields(in); // 從流中讀出Writable類型,而後再複製給java基本類型 BooleanWritable bw = new BooleanWritable(); bw.readFields(in); gender = bw.get(); // 反序列化集合時首選將集合長度進行反序列化 IntWritable size = new IntWritable(); size.readFields(in); list.clear(); // 再反序列化流中集合的每個元素 for (int i = 0; i < size.get(); i++) { Text text = new Text(); text.readFields(in); list.add(text); } } public IntWritable getId() { return id; } public void setId(IntWritable id) { this.id = id; } public Text getName() { return name; } public void setName(Text name) { this.name = name; } public boolean isGender() { return gender; } public void setGender(boolean gender) { this.gender = gender; } public List<Text> getList() { return list; } public void setList(List<Text> list) { this.list = list; } }
執行結果:this
序列化以後結果:[0, 0, 39, 17, 4, 115, 101, 97, 110, 1, 0, 0, 0, 3, 6, -27, -83, -90, -26, -96, -95, 6, -27, -71, -76, -25, -70, -86, 6, -25, -113, -83, -25, -70, -89],字節數組長度:35
反序列化ID=10001,name=sean,gender=true,list=[學校,年紀,班級]
WritableComparable<T>接口繼承Comparable<T>和Writable接口,繼承過來三個方法,從Writable繼承過來readFields, write,從Comparable<T>繼承過來compareTo。
官網提供例子:
package com.jf.hdfs; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.WritableComparable; public class MyWritableComparable implements WritableComparable { private int counter; private long timestamp; public void write(DataOutput out) throws IOException { out.writeInt(counter); out.writeLong(timestamp); } public void readFields(DataInput in) throws IOException { counter = in.readInt(); timestamp = in.readLong(); } public int compareTo(Object o) { MyWritableComparable obj = (MyWritableComparable) o; int value = this.counter; int value2 = obj.counter; return value < value2 ? -1 : (value == value2 ? 0 : 1); } public int hashCode() { final int prime = 31; int result = 1; result = prime * result + counter; result = prime * result + (int) (timestamp ^ (timestamp >>> 32)); return result; } }
RawComparator<T>接口繼承了java.util.Comparator<T>接口,除了從Comparator<T>繼承過來的兩個方法compare、equals以外,它本身也定義了一個方法compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)有6個參數。該方法是在字節流的層面上去作比較,第一個參數:指定字節數組,第二個參數:從哪裏開始比較,第三個參數:比較多長。
WritableComparator類,實現了Comparator, Configurable, RawComparator三個接口。
構造方法
部分實現方法
有兩種方式,一種是該類實現WritableComparator接口,另外一種是經過實現一個比較器去進行比較。
這裏經過WritableComparator接口實現一個自定義類的比較方法。
package com.jf.hdfs; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; public class AccountWritable implements WritableComparable<AccountWritable> { private IntWritable code; private Text name; private BooleanWritable gender; AccountWritable() { code = new IntWritable(); name = new Text(); gender = new BooleanWritable(); } // 把參數類型和類類型相同的構造器,叫複製構造器 AccountWritable(AccountWritable accountWritable) { code = new IntWritable(accountWritable.code.get()); name = new Text(accountWritable.name.toString()); gender = new BooleanWritable(accountWritable.gender.get()); } // 注意要賦值類型,不要賦引用類型 public void set(IntWritable code, Text name, BooleanWritable gender) { this.code = new IntWritable(code.get()); this.name = new Text(name.toString()); this.gender = new BooleanWritable(gender.get()); } // 將值寫到輸出流中 public void write(DataOutput out) throws IOException { code.write(out); name.write(out); gender.write(out); } // 將值從輸入流中讀取出來 public void readFields(DataInput in) throws IOException { code.readFields(in); name.readFields(in); gender.readFields(in); } // 比較方法 public int compareTo(AccountWritable o) { int result = this.code.compareTo(o.code); if (result == 0) { result = this.name.compareTo(o.name); if (result == 0) { result = this.gender.compareTo(o.gender); } } return result; } public int hashCode() { final int prime = 31; int result = 1; result = prime * result + code.get(); result = prime * result + (int) (name.toString().hashCode() ^ (name.toString().hashCode() >>> 32)); return result; } public IntWritable getCode() { return code; } public void setCode(IntWritable code) { this.code = code; } public Text getName() { return name; } public void setName(Text name) { this.name = name; } public BooleanWritable getGender() { return gender; } public void setGender(BooleanWritable gender) { this.gender = gender; } }
測試:
public static void main(String[] args) { AccountWritable a1 = new AccountWritable(); a1.set(new IntWritable(30), new Text("sean"), new BooleanWritable(true)); AccountWritable a2 = new AccountWritable(); a2.set(new IntWritable(30), new Text("sean"), new BooleanWritable(true)); //比較a1和a2 System.out.println(a1.compareTo(a2)); }