hadoop第一個程序WordCountjava
package test; import org.apache.hadoop.mapreduce.Job; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /* * 做者:翟超科 * 時間:2019.9.3 * 任務:mapreduce 實現單詞計數 * */ public class WordCount { //map類繼承Mapper,實現map功能 public static class doMapper extends Mapper<Object, Text, Text, IntWritable>{ //定義變量 one 爲數字1 public static final IntWritable one = new IntWritable(1); //定義關鍵字變量 word public static Text word = new Text(); @Override protected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { //將hdfs上的文件按行分割放入tokenzer集合中 StringTokenizer tokenizer = new StringTokenizer(value.toString(),"\t"); //將每一行做爲一個關鍵字 word.set(tokenizer.nextToken()); //每一個關鍵字出現1次,將鍵值對寫入緩存。 context.write(word, one); } } //reduce部分整合緩存的鍵值對, public static class doReduce extends Reducer<Text, IntWritable, Text, IntWritable>{ //定義每次讀入的鍵值對的同鍵值對的個數 private IntWritable result = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int sum = 0;//定義每一個鍵對應的值只用0個 for(IntWritable value:values) { sum += value.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Job job = Job.getInstance(); job.setJobName("WordCount"); job.setJarByClass(WordCount.class); job.setMapperClass(doMapper.class); job.setReducerClass(doReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); Path in = new Path("hdfs://192.168.13.101:9000/data");//文件所處位置 Path out = new Path("hdfs://192.168.13.101:9000/output");//輸出位置 FileInputFormat.addInputPath(job,in); FileOutputFormat.setOutputPath(job,out); System.exit(job.waitForCompletion(true) ? 0 : 1); } }