答案:java
package com.duking.mapreduce; import java.io.IOException; import java.util.Set; import java.util.StringTokenizer; import java.util.TreeSet; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class FindFriends { /** * map方法 * @author duking * */ public static class Map extends Mapper<Object, Text, Text, Text> { /** * 實現map方法 */ public void map(Object key, Text value, Context context) throws IOException, InterruptedException { //將輸入的每一行數據切分後存到persions中 StringTokenizer persions = new StringTokenizer(value.toString()); //定義一個Text 存放本人信息owner Text owner = new Text(); //定義一個Set集合,存放朋友信息 Set<String> set = new TreeSet<String>(); //將這一行的本人信息存入owner中 owner.set(persions.nextToken()); //將全部的朋友信息存放到Set集合中 while(persions.hasMoreTokens()){ set.add(persions.nextToken()); } //定義一個String數組存放朋友信息 String[] friends = new String[set.size()]; //將集合轉換爲數組,並將集合中的數據存放到friend friends = set.toArray(friends); //將朋友進行兩兩組合 for(int i=0;i<friends.length;i++){ for(int j=i+1;j<friends.length;j++){ String outputkey = friends[i]+friends[j]; context.write(new Text(outputkey), owner); } } } } /** * Reduce方法 * @author duking * */ public static class Reduce extends Reducer<Text, Text, Text, Text> { /** * 實現Reduce方法 */ public void reduce(Text key, Iterable<Text> values,Context context) throws IOException, InterruptedException { String commonfriends = ""; for (Text val : values){ if(commonfriends == ""){ commonfriends = val.toString(); }else{ commonfriends = commonfriends + ":" +val.toString(); } } context.write(key,new Text(commonfriends)); } } /** * main * @param args * @throws Exception */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "192.168.60.129:9000"); //指定待運行參數的目錄爲輸入輸出目錄 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); /* 指定工程目錄下的input output爲輸入輸出目錄 String[] ioArgs = new String[] {"input", "output" }; String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs(); */ if (otherArgs.length != 2) { //判斷運行參數個數 System.err.println("Usage: Data Deduplication <in> <out>"); System.exit(2); } // set maprduce job name Job job = new Job(conf, "findfriends"); job.setJarByClass(FindFriends.class); // 設置map reduce處理類 job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); // 設置輸出類型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //設置輸入輸出路徑 FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
結果apache