wordcount代碼

時間 2020-02-18

原文原文鏈接

1.寫出map類數組

public class WCMapper extends Mapper<LongWritable,Text,Text,LongWritable>{緩存

   @Override
   protected void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{

       //value轉換成string類型返回字符串
       String line = value.toSting();
       //使用空格切分單詞返回字符數組
       String[] words = StringUtils.split(line," ");

       //遍歷這個數組，按照特定格式輸出key-value形式 key:單詞 value：1
       for(String word : words){
           context.write(new Text(word),new LongWritable(1));
       }
       LongWritable,Text,Text,LongWritable

   }
       app

}框架

2.寫出reduce類ide

public class WCReducer extends Reducer<Text,LongWritable,Text,LongWritable>{

   //框架在map處理完以後，將全部kv對緩存起來，進行分組，而後傳遞給<key,values{}>,調用一次reduce方法
   //<hello,{1,1,1,1,1,1.......}>
   @Override
   protected void reduce(Text key,Iterable<LongWriteable> values,Context context)throws IOException,InterruptedException{
       long count = 0;
       //遍歷values的list，進行累加求和
       for(LongWriteable value:values){
           count += value.get();
       }
       //輸出這個單詞的統計結果
       context.write(key,new LongWritable(count));
   }

}oop

3.job提交類.net

   //用來描述一個特定的做業，好比說
   //該job使用哪一個類做爲邏輯處理中的map，哪一個做爲reduce
   //指定需處理的數據的路徑
   //指定輸出結果路徑
public class WCRunner{
   public static void main(String[] args){

       Context conf = new Context();

       Job job = Job.getInstance(conf);

       //設置整個job所用的那些類在哪一個jar包
       job.setJarByClass(WCRunner);

       //使用map、reduce的類
       job.setMapperClass(WCMapper.class);
       job.setReduceClass(WCReducer.class);

       //指定reduce的輸出的kv類型
       job.setOutputKeyClass(Text.class);
       job.setOutputValueClass(LongWritable.class);

       //指定map的輸出的kv類型
       job.setMapOutputKeyClass(Text.class);
       job.setMapOutputValueClass(LongWritable.class);

       //指定數據源路徑
       FileInputFormat.setInputPaths(job,new Path("/wc/srcdata"));
       //指定結果輸出路徑
       FileOutputFormat.setOutputPath(job,new Path("/wc/output/"));

       //將job提交給集羣
       job.waitForCompletion(true);
   }

}orm