hadoop2x WordCount MapReduce

package com.jhl.haoop.examples;java


import java.io.IOException;apache

import java.util.StringTokenizer;app

import org.apache.hadoop.conf.Configuration;oop

import org.apache.hadoop.fs.Path;this

import org.apache.hadoop.io.IntWritable;spa

import org.apache.hadoop.io.LongWritable;orm

import org.apache.hadoop.io.Text;對象

import org.apache.hadoop.mapreduce.Job;hadoop

import org.apache.hadoop.mapreduce.Mapper;get

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;


public class WordCount {

// map區域

public static class TokenizerMapper extends

Mapper<LongWritable, Text, Text, IntWritable> {


private final static IntWritable one = new IntWritable(1);//每一個單詞統計一次

private Text word = new Text();

public void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException {

//進行分割 [空格 製表符 \t 換行 \n 回車符\r \f]

// public StringTokenizer(String str) {

//this(str, " \t\n\r\f", false);

       // }

StringTokenizer itr = new StringTokenizer(value.toString());//獲取每行數據的值value.toString()

while (itr.hasMoreTokens()) {

word.set(itr.nextToken());//設置map輸出的key值

context.write(word, one);//上下文輸出map的key和value值 

}

}

}

                    

    

//reduce 區域

public static class IntSumReducer extends

Reducer<Text, IntWritable, Text, IntWritable> {

private IntWritable result = new IntWritable();


public void reduce(Text key, Iterable<IntWritable> values,

Context context) throws IOException, InterruptedException {

int sum = 0;

for (IntWritable val : values) {//循環遍歷Iterable

sum += val.get();//累加

}

result.set(sum);//設置總次數

context.write(key, result);

}

}


        

   //client區域

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();//獲取配置信息

//GenericOptionsParser 用來經常使用的Hadoop命令選項,並根據須要,爲Configuration對象設置相應的取值。

String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

if (otherArgs.length != 2) {

System.err.println("Usage: wordcount  ");

System.exit(2);

}

Job job = new Job(conf, "WordCount");//建立Job、設置Job配置和名稱

job.setJarByClass(WordCount.class);//設置Job 運行的類

job.setMapperClass(TokenizerMapper.class);//設置Mapper類和Reducer類

job.setCombinerClass(IntSumReducer.class);

job.setReducerClass(IntSumReducer.class);

FileInputFormat.addInputPath(job, new Path(otherArgs[0]));//設置輸入文件的路徑和輸出文件的路徑

FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

job.setOutputKeyClass(Text.class);//設置輸出結果的key和value類型

job.setOutputValueClass(IntWritable.class);

boolean isSuccess = job.waitForCompletion(true);//提交Job,等待運行結果,並在客戶端顯示運行信息

System.exit(isSuccess ? 0 : 1);//結束程序

}

}

相關文章
相關標籤/搜索