統計單詞出現次數的mapreduce

一、新建Java項目java

二、導包
E:\工具\大數據\大數據提高資料\01-軟件資料\06-Hadoop\安裝包\Java1.8
環境下編譯\hadoop-2.7.3\hadoop-2.7.3\share\hadoop\mapreduce
+hsfs的那些包+commonlinux

三、寫項目web

3.1 WCMapperapache

package com.zy.wc; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable>{ //map
    /* * 輸入<0,"tom lili tom"> 輸出<"tom",1> * */

//public class WCMapper extends Mapper<KEYIN, VALUEIN, KEYOUT,VALUEOUT> // 輸入的key  long  value  String    輸出的  key String  value long類型
 @Override //數字 //string
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException { //輸入的value是一行字符串"tom lili tom" //切分
    String[] split = value.toString().split("\t");//tab鍵隔開 製表符 for (String name : split) { //mapper輸出內容
        context.write(new Text(name), new LongWritable(1)); } } }

3.2 WCReduceapp

package com.zy.wc; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.WordCount.Reduce; import org.apache.hadoop.mapreduce.Reducer; public class WCReduce extends  Reducer<Text,LongWritable,Text,LongWritable>{ //輸入<"tom",{1,1,1,1,1,1,1}> 輸出<"tom",7>
 @Override //輸入鍵 //輸入值
protected void reduce(Text key, Iterable<LongWritable> value, Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException { //計算迭代其中1的累加值
    long sum=0; for (LongWritable longWritable : value) { sum+=1; } //輸出的鍵值
    context.write(key, new LongWritable(sum)); } }

3.3 WCAppide

package com.zy.wc; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WCApp { public static void main(String[] args) throws Exception { //建立配置對象
        Configuration configuration = new Configuration(); //獲得job實例
        Job job = Job.getInstance(configuration); //指定job運行類
        job.setJarByClass(WCApp.class); //指定job中的mapper
        job.setMapperClass(WCMapper.class); //指定mapper中的輸出鍵和值類型
        job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); //指定job中的reducer
        job.setReducerClass(WCReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); //指定輸入文件
        FileInputFormat.setInputPaths(job, new Path("/wc.txt")); //指定輸出文件
        FileOutputFormat.setOutputPath(job, new Path("/myWCResult")); //提交做業
        job.waitForCompletion(true); } }

四、打包上傳工具

把項目打包  (java打成jar包,web項目打成war包),上傳到linux,而後hadoop jar WCAPP.jar運行jar包oop

相關文章
相關標籤/搜索