hadoop筆記九:MapReduce做業配置

1.MapReduce做業中有一些默認的配置

當未配置時就會使用自動的配置。java

2.mapper介紹

Mapper:封裝了應用程序Mapper階段的數據處理邏輯,下面是hadoop中以及現實了的mapper子類正則表達式

ChainMapper:方便用戶編寫鏈式Map任務, 即Map階段包含多個Mapper,便可以別寫多個自定義map去參與運算。
InverseMapper:一個能交換key和value的Mapper
RegexMapper:檢查輸入是否匹配某正則表達式, 輸出匹配字符串和計數器(用的不多)
TockenCounterMapper:將輸入分解爲獨立的單詞, 輸出個單詞和計數器(以空格分割單詞,value值爲1)apache

代碼實例:app

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
import org.apache.hadoop.mapreduce.lib.chain.ChainReducer;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.map.InverseMapper;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class PatentReference_0010 extends Configured implements Tool{

    static class PatentReferenceMapper extends Mapper<Text,Text,Text,IntWritable>{
        private IntWritable one=new IntWritable(1);
        @Override
        protected void map(Text key,Text value,Context context) throws IOException, InterruptedException{
            context.write(key,one);
        }
    }

    @Override
    public int run(String[] args) throws Exception{
        Configuration conf=getConf();
        Path input=new Path(conf.get("input"));
        Path output=new Path(conf.get("output"));

        //輸入格式爲KeyValueTextInputFormat時能夠指定key和value的分隔符是什麼,它的默認分隔符是"\t"也就是tab鍵
        conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator",",");

        Job job=Job.getInstance(conf,this.getClass().getSimpleName());
        job.setJarByClass(this.getClass());

        ChainMapper.addMapper(job,InverseMapper.class,
            // 輸入的鍵值類型由InputFormat決定
            Text.class,Text.class,
            // 輸出的鍵值類型與輸入的鍵值類型相反
            Text.class,Text.class,conf);

        ChainMapper.addMapper(job,PatentReferenceMapper.class,
            // 輸入的鍵值類型由前一個Mapper輸出的鍵值類型決定
            Text.class,Text.class,
            Text.class,IntWritable.class,conf);

        ChainReducer.setReducer(job,IntSumReducer.class,
            Text.class,IntWritable.class,
            Text.class,IntWritable.class,conf);

        ChainReducer.addMapper(job,InverseMapper.class,
            Text.class,IntWritable.class,
            IntWritable.class,Text.class,conf);

        job.setInputFormatClass(KeyValueTextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        KeyValueTextInputFormat.addInputPath(job,input);
        TextOutputFormat.setOutputPath(job,output);

        return job.waitForCompletion(true)?0:1;
    }

    public static void main(String[] args) throws Exception{
        System.exit(ToolRunner.run(new P00010_PatentReference_0010(),args));
    }
}
相關文章
相關標籤/搜索