[toc]java
在MapReduce程序寫Mapper和Reducer的驅動程序時,有不少代碼都是重複性代碼,所以能夠將其提取出來寫成一個工具類,後面再寫MapReduce程序時都會使用這個工具類。apache
程序代碼以下:app
package com.uplooking.bigdata.common.utils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class MapReduceJobUtil { public static Job buildJob(Configuration conf, Class<?> jobClazz, String inputpath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapperClass, Class<?> mapKeyClass, Class<?> mapValueClass, Path outputpath, Class<? extends OutputFormat> outputFormat, Class<? extends Reducer> reducerClass, Class<?> outkeyClass, Class<?> outvalueClass) throws IOException { String jobName = jobClazz.getSimpleName(); Job job = Job.getInstance(conf, jobName); //設置job運行的jar job.setJarByClass(jobClazz); //設置整個程序的輸入 FileInputFormat.setInputPaths(job, inputpath); job.setInputFormatClass(inputFormat);//就是設置如何將輸入文件解析成一行一行內容的解析類 //設置mapper job.setMapperClass(mapperClass); job.setMapOutputKeyClass(mapKeyClass); job.setMapOutputValueClass(mapValueClass); //設置整個程序的輸出 outputpath.getFileSystem(conf).delete(outputpath, true);//若是當前輸出目錄存在,刪除之,以免.FileAlreadyExistsException FileOutputFormat.setOutputPath(job, outputpath); job.setOutputFormatClass(outputFormat); //設置reducer,若是有才設置,沒有的話就不用設置 if (null != reducerClass) { job.setReducerClass(reducerClass); job.setOutputKeyClass(outkeyClass); job.setOutputValueClass(outvalueClass); } return job; } }