order001,u001,小米6,1999.9,2 order001,u001,雀巢咖啡,99.0,2 order001,u001,安慕希,250.0,2 order001,u001,經典紅雙喜,200.0,4 order001,u001,防水電腦包,400.0,2 order002,u002,小米手環,199.0,3 order002,u002,榴蓮,15.0,10 order002,u002,蘋果,4.5,20 order002,u002,肥皂,10.0,40 order003,u001,小米6,1999.9,2 order003,u001,雀巢咖啡,99.0,2 order003,u001,安慕希,250.0,2 order003,u001,經典紅雙喜,200.0,4 order003,u001,防水電腦包,400.0,2
問題分析網絡
須要求出每個訂單中成交金額最大的n筆app
本質:求分組TOPNide
實現思路:oop
map: 讀取數據切分字段,封裝數據到一個bean中做爲key傳輸,key要按照成交金額比大小this
預期結果:spa
order001,u001,小米6,1999.9,3999.8 order001,u001,雀巢咖啡,99.0,198.0 order001,u001,安慕希,250.0,500.0 order001,u001,經典紅雙喜,200.0,800.0 order001,u001,防水電腦包,400.0,800.0 order002,u002,小米手環,199.0,597.0 order002,u002,榴蓮,15.0,150.0 order002,u002,蘋果,4.5,90.0 order002,u002,肥皂,10.0,400.0 order003,u001,小米6,1999.9,3999.8 order003,u001,雀巢咖啡,99.0,198.0 order003,u001,安慕希,250.0,500.0 order003,u001,經典紅雙喜,200.0,800.0 order003,u001,防水電腦包,400.0,800.0reduce:利用自定義GroupingComparator將數據按訂單id進行分組,而後在reduce方法中輸出每組數據的前N條便可.net
預期結果輸出每一個訂單前2條:code
order001,u001,小米6,1999.9,3999.8 order001,u001,經典紅雙喜,200.0,800.0 order003,u001,小米6,1999.9,3999.8 order003,u001,經典紅雙喜,200.0,800.0 order002,u002,小米手環,199.0,597.0 order002,u002,肥皂,10.0,400.0
代碼實現orm
封裝數據到一個bean做爲key傳輸,須要實現Hadoop序列化,走網絡傳輸對象
public class OrderBean implements WritableComparable<OrderBean>{ private String orderId; private String userId; private String pdtName; private float price; private int number; private float amountFee; public void set(String orderId, String userId, String pdtName, float price, int number) { this.orderId = orderId; this.userId = userId; this.pdtName = pdtName; this.price = price; this.number = number; this.amountFee = price * number; } public String getOrderId() { return orderId; } public void setOrderId(String orderId) { this.orderId = orderId; } public String getUserId() { return userId; } public void setUserId(String userId) { this.userId = userId; } public String getPdtName() { return pdtName; } public void setPdtName(String pdtName) { this.pdtName = pdtName; } public float getPrice() { return price; } public void setPrice(float price) { this.price = price; } public int getNumber() { return number; } public void setNumber(int number) { this.number = number; } public float getAmountFee() { return amountFee; } public void setAmountFee(float amountFee) { this.amountFee = amountFee; } @Override public String toString() { return this.orderId + "," + this.userId + "," + this.pdtName + "," + this.price + "," + this.amountFee; } public int compareTo(OrderBean o) { //比兩個浮點數 return Float.compare(o.getAmountFee(),this.getAmountFee())==0 ?this.pdtName.compareTo(o.pdtName) :Float.compare(o.getAmountFee(),this.getAmountFee()); } public void write(DataOutput dataOutput) throws IOException { dataOutput.writeUTF(this.orderId); dataOutput.writeUTF(this.userId); dataOutput.writeUTF(this.pdtName); dataOutput.writeFloat(this.price); dataOutput.writeInt(this.number); } public void readFields(DataInput dataInput) throws IOException { this.orderId=dataInput.readUTF(); this.userId=dataInput.readUTF(); this.pdtName=dataInput.readUTF(); this.price=dataInput.readFloat(); this.number=dataInput.readInt(); this.amountFee=this.price * this.number; } }
實現邏輯代碼
public class OrderTopn { public static class OrderTopnMapper extends Mapper<LongWritable,Text,Text,OrderBean>{ OrderBean orderBean = new OrderBean(); Text k=new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] fields = value.toString().split(","); orderBean.set(fields[0],fields[1], fields[2],Float.parseFloat(fields[3]), Integer.parseInt(fields[4])); k.set(fields[0]); //序列化的數據跟原來的不同了 context.write(k,orderBean); } } public static class OrderTopnReducer extends Reducer<Text,OrderBean,OrderBean,NullWritable>{ @Override protected void reduce(Text key, Iterable<OrderBean> values, Context context) throws IOException, InterruptedException { int topn = context.getConfiguration().getInt("order.top.n", 3); ArrayList<OrderBean> beanList = new ArrayList<OrderBean>(); //reduce task提供的values迭代器,每次迭代返回的是同一個對象,只是set了不一樣的值 for(OrderBean orderBean:values){ //構造一個新的對象,來存儲本次迭代出來的值 OrderBean newBean = new OrderBean(); newBean.set(orderBean.getOrderId(),orderBean.getUserId(),orderBean.getPdtName() ,orderBean.getPrice(),orderBean.getNumber()); beanList.add(newBean); } //對beanList中的orderBean對象進行排序(金額,商品名稱) Collections.sort(beanList); for(int i=0;i<topn;i++){ context.write(beanList.get(i),NullWritable.get()); } } } public static void main(String[] args) throws Exception{ Configuration conf = new Configuration(); conf.setInt("order.top.n",2); Job job = Job.getInstance(conf); //動態獲取jar包在哪裏 job.setJarByClass(OrderTopn.class); //2.封裝參數:本次job所要調用的mapper實現類 job.setMapperClass(OrderTopnMapper.class); job.setReducerClass(OrderTopnReducer.class); //3.封裝參數:本次job的Mapper實現類產生的數據key,value的類型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(OrderBean.class); //4.封裝參數:本次Reduce返回的key,value數據類型 job.setOutputKeyClass(OrderBean.class); job.setOutputValueClass(NullWritable.class); //6.封裝參數:想要啓動的reduce task的數量 job.setNumReduceTasks(2); FileInputFormat.setInputPaths(job,new Path("F:\\mrdata\\ordertopn\\input")); FileOutputFormat.setOutputPath(job,new Path("F:\\mrdata\\ordertopn\\out")); boolean res = job.waitForCompletion(true); System.exit(res ? 0:-1); } }
輸出結果
part-r-000000
order001,u001,小米6,1999.9,3999.8
order001,u001,經典紅雙喜,200.0,800.0
order003,u001,小米6,1999.9,3999.8
order003,u001,經典紅雙喜,200.0,800.0
part-r-000001
order002,u002,小米手環,199.0,597.0
order002,u002,肥皂,10.0,400.0
原理圖
代碼實現
OrderBean
public class OrderBean implements WritableComparable<OrderBean> { private String orderId; private String userId; private String pdtName; private float price; private int number; private float amountFee; public void set(String orderId, String userId, String pdtName, float price, int number) { this.orderId = orderId; this.userId = userId; this.pdtName = pdtName; this.price = price; this.number = number; this.amountFee = price * number; } public String getOrderId() { return orderId; } public void setOrderId(String orderId) { this.orderId = orderId; } public String getUserId() { return userId; } public void setUserId(String userId) { this.userId = userId; } public String getPdtName() { return pdtName; } public void setPdtName(String pdtName) { this.pdtName = pdtName; } public float getPrice() { return price; } public void setPrice(float price) { this.price = price; } public int getNumber() { return number; } public void setNumber(int number) { this.number = number; } public float getAmountFee() { return amountFee; } public void setAmountFee(float amountFee) { this.amountFee = amountFee; } @Override public String toString() { return this.orderId + "," + this.userId + "," + this.pdtName + "," + this.price + "," + this.amountFee; } public int compareTo(OrderBean o) { return this.orderId.compareTo(o.getOrderId())==0? Float.compare(o.getAmountFee(),this.getAmountFee()) :this.orderId.compareTo(o.getOrderId()); } public void write(DataOutput dataOutput) throws IOException { dataOutput.writeUTF(this.orderId); dataOutput.writeUTF(this.userId); dataOutput.writeUTF(this.pdtName); dataOutput.writeFloat(this.price); dataOutput.writeInt(this.number); } public void readFields(DataInput dataInput) throws IOException { this.orderId=dataInput.readUTF(); this.userId=dataInput.readUTF(); this.pdtName=dataInput.readUTF(); this.price=dataInput.readFloat(); this.number=dataInput.readInt(); this.amountFee=this.price * this.number; } }
OrderIdPartitioner :重寫分發規則
public class OrderIdPartitioner extends Partitioner<OrderBean,NullWritable> { public int getPartition(OrderBean key, NullWritable value, int numPartitions) { //按照訂單中的orderId來分發數據 return (key.getOrderId().hashCode() & Integer.MAX_VALUE)% numPartitions; } }
OrderIdGroupComparator :重寫reduce比較排序規則
public class OrderIdGroupComparator extends WritableComparator { public OrderIdGroupComparator() { //調用父類構造器 super(OrderBean.class,true); } @Override public int compare(WritableComparable a, WritableComparable b) { OrderBean o1=(OrderBean)a; OrderBean o2=(OrderBean)b; //orderId相同返回0-》爲一組 return o1.getOrderId().compareTo(o2.getOrderId()); } }
OrderTopn :程序入口
public class OrderTopn { public static class OrderTopenMapper extends Mapper<LongWritable,Text,OrderBean,NullWritable>{ OrderBean orderBean=new OrderBean(); NullWritable v=NullWritable.get(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] fields = value.toString().split(","); orderBean.set(fields[0],fields[1], fields[2],Float.parseFloat(fields[3]), Integer.parseInt(fields[4])); context.write(orderBean,v); } } public static class OrderTopnReducer extends Reducer<OrderBean,NullWritable,OrderBean,NullWritable>{ /** * 雖然reduce方法中的參數key只有一個,但只要迭代器迭代一次, * key中的值就會變 */ @Override protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { int i=0; for(NullWritable v:values){ context.write(key,v); if(++i==3){ return; } } } } public static void main(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = Job.getInstance(conf); //動態獲取jar包在哪裏 job.setJarByClass(OrderTopn.class); //2.封裝參數:本次job所要調用的mapper實現類 job.setMapperClass(OrderTopenMapper.class); job.setReducerClass(OrderTopnReducer.class); //3.封裝參數:本次job的Mapper實現類產生的數據key,value的類型 job.setMapOutputKeyClass(OrderBean.class); job.setMapOutputValueClass(NullWritable.class); //4.封裝參數:本次Reduce返回的key,value數據類型 job.setOutputKeyClass(OrderBean.class); job.setOutputValueClass(NullWritable.class); job.setPartitionerClass(OrderIdPartitioner.class); job.setGroupingComparatorClass(OrderIdGroupComparator.class); //6.封裝參數:想要啓動的reduce task的數量 job.setNumReduceTasks(2); FileInputFormat.setInputPaths(job,new Path("F:\\mrdata\\ordertopn\\input")); FileOutputFormat.setOutputPath(job,new Path("F:\\mrdata\\ordertopn\\out-2")); boolean res = job.waitForCompletion(true); System.exit(res ? 0:-1); } }
結果輸出
part-r-000000
order002,u002,小米手環,199.0,597.0
order002,u002,肥皂,10.0,400.0
order002,u002,榴蓮,15.0,150.0
part-r-000001
order001,u001,小米6,1999.9,3999.8 order001,u001,防水電腦包,400.0,800.0 order001,u001,經典紅雙喜,200.0,800.0 order003,u001,小米6,1999.9,3999.8 order003,u001,經典紅雙喜,200.0,800.0 order003,u001,防水電腦包,400.0,800.0