數據庫中有不少個QQ、且這些QQ的好友都可以查詢到;結果能夠規整以下: 數據庫
#結構---人:好友1,好友2,好友3,好友4.... A:B,C,D,F,E,O B:C,E,G,F,O,D D:Q,W,B,P,T,Y Y:S,Q,L,V,B,H,J,K,L O:L,E,Q,R,U,S,B P:O,L,E,L,F,Q,W,G K:S,L,D,U,R,E,A,X .....
A-B,C F O E A-D,B A-Y,B D-Y,Q B ......
第一步,首先實現結構{友 人,人,人}:A I,K,C,B,G,F,H,O,D, 服務器
package com.empire.hadoop.mr.fensi; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class SharedFriendsStepOne { static class SharedFriendsStepOneMapper extends Mapper<LongWritable, Text, Text, Text> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // A:B,C,D,F,E,O String line = value.toString(); String[] person_friends = line.split(":"); String person = person_friends[0]; String friends = person_friends[1]; for (String friend : friends.split(",")) { // 輸出<好友,人> context.write(new Text(friend), new Text(person)); } } } static class SharedFriendsStepOneReducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text friend, Iterable<Text> persons, Context context) throws IOException, InterruptedException { StringBuffer sb = new StringBuffer(); for (Text person : persons) { sb.append(person).append(","); } context.write(friend, new Text(sb.toString())); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(SharedFriendsStepOne.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(SharedFriendsStepOneMapper.class); job.setReducerClass(SharedFriendsStepOneReducer.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } }
第二步,實現共同好友{人-人,友 友 友 友}:A-B,C F O Eapp
package com.empire.hadoop.mr.fensi; import java.io.IOException; import java.util.Arrays; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class SharedFriendsStepTwo { static class SharedFriendsStepTwoMapper extends Mapper<LongWritable, Text, Text, Text> { // 拿到的數據是上一個步驟的輸出結果 // A I,K,C,B,G,F,H,O,D, // 友 人,人,人 @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] friend_persons = line.split("\t"); String friend = friend_persons[0]; String[] persons = friend_persons[1].split(","); Arrays.sort(persons); for (int i = 0; i < persons.length - 1; i++) { for (int j = i + 1; j < persons.length; j++) { // 發出 <人-人,好友> ,這樣,相同的「人-人」對的全部好友就會到同1個reduce中去 context.write(new Text(persons[i] + "-" + persons[j]), new Text(friend)); } } } } static class SharedFriendsStepTwoReducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text person_person, Iterable<Text> friends, Context context) throws IOException, InterruptedException { StringBuffer sb = new StringBuffer(); for (Text friend : friends) { sb.append(friend).append(" "); } context.write(person_person, new Text(sb.toString())); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(SharedFriendsStepTwo.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(SharedFriendsStepTwoMapper.class); job.setReducerClass(SharedFriendsStepTwoReducer.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } }
#上傳jar Alt+p lcd d:/ put SharedStepOne.jar SharedStepTwo.jar put shared.txt #準備hadoop處理的數據文件 cd /home/hadoop hadoop fs -mkdir -p /shared/sharedinput hdfs dfs -put shared.txt /shared/sharedinput #運行程序 hadoop jar SharedStepOne.jar com.empire.hadoop.mr.fensi.SharedFriendsStepOne /shared/sharedinput /shared/sharedsteponeoutput hadoop jar SharedStepTwo.jar com.empire.hadoop.mr.fensi.SharedFriendsStepTwo /shared/sharedsteponeoutput/part-r-00000 /shared/sharedsteptwooutput
[hadoop@centos-aaron-h1 ~]$ hdfs dfs -cat /shared/sharedsteponeoutput/part-r-00000 A K, B A,O,Y,D, C A,B, D B,A,K, E O,K,A,P,B, F P,A,B, G B,P, H Y, J Y, K Y, L K,Y,P,P,Y,O, O A,B,P, P D, Q P,D,Y,O, R K,O, S O,Y,K, T D, U O,K, V Y, W D,P, X K, Y D, [hadoop@centos-aaron-h1 ~]$ hdfs dfs -cat /shared/sharedsteptwooutput/part-r-00000 A-B F C D O E A-D B A-K D E A-O B E A-P E F O A-Y B B-K E D B-O E B-P F G O E D-O B Q D-P W Q D-Y B Q K-O E U L S R K-P E L L K-Y L L S O-P L L Q E O-Y B S Q L L P-P L P-Y L L Q L L Y-Y L [hadoop@centos-aaron-h1 ~]$