1.寫出map類數組
public class WCMapper extends Mapper<LongWritable,Text,Text,LongWritable>{緩存
@Override
protected void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{
//value轉換成string類型 返回字符串
String line = value.toSting();
//使用空格切分單詞 返回字符數組
String[] words = StringUtils.split(line," ");
//遍歷這個數組,按照特定格式輸出key-value形式 key:單詞 value:1
for(String word : words){
context.write(new Text(word),new LongWritable(1));
}
LongWritable,Text,Text,LongWritable
}
app
}框架
2.寫出reduce類ide
public class WCReducer extends Reducer<Text,LongWritable,Text,LongWritable>{
//框架在map處理完以後,將全部kv對緩存起來,進行分組,而後傳遞給<key,values{}>,調用一次reduce方法
//<hello,{1,1,1,1,1,1.......}>
@Override
protected void reduce(Text key,Iterable<LongWriteable> values,Context context)throws IOException,InterruptedException{
long count = 0;
//遍歷values的list,進行累加求和
for(LongWriteable value:values){
count += value.get();
}
//輸出這個單詞的統計結果
context.write(key,new LongWritable(count));
}
}oop
3.job提交類.net
//用來描述一個特定的做業,好比說
//該job使用哪一個類做爲邏輯處理中的map,哪一個做爲reduce
//指定需處理的數據的路徑
//指定輸出結果路徑
public class WCRunner{
public static void main(String[] args){
Context conf = new Context();
Job job = Job.getInstance(conf);
//設置整個job所用的那些類在哪一個jar包
job.setJarByClass(WCRunner);
//使用map、reduce的類
job.setMapperClass(WCMapper.class);
job.setReduceClass(WCReducer.class);
//指定reduce的輸出的kv類型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//指定map的輸出的kv類型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//指定數據源路徑
FileInputFormat.setInputPaths(job,new Path("/wc/srcdata"));
//指定結果輸出路徑
FileOutputFormat.setOutputPath(job,new Path("/wc/output/"));
//將job提交給集羣
job.waitForCompletion(true);
}
}orm
4打成jar包hadoop
5.hadoop上運行字符串