1、建立Maven Projectjava
hello-hadoopapache
bash-3.2$ /usr/local/bin/tree -L 4 . ├── pom.xml └── src ├── main │ ├── java │ │ └── com │ └── resources │ ├── core-site.xml ==>從Hadoop機器上下載配置文件 │ ├── hdfs-site.xml │ ├── log4j.properties ==>調整日誌級別 │ ├── mapred-site.xml │ └── yarn-site.xml └── test 6 directories, 6 files
2、添加Hadoop依賴bash
cat pom.xml .... <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.7.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.7.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-jobclient</artifactId> <version>2.7.1</version> </dependency> ...
3、編寫MRapp
package com.harry.examples; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WordCount { public static class TokenizerMapper extends Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable value : values) { sum += value.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); // configuration.set("mapreduce.framework.name", "yarn"); // configuration.addResource("classpath:/hadoop/core-site.xml"); // configuration.addResource("classpath:/hadoop/hdfs-site.xml"); // configuration.addResource("classpath:/hadoop/mapred-site.xml"); // configuration.addResource("classpath:/hadoop/yarn-site.xml"); Job job = Job.getInstance(configuration, "WordCount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //添加mr job的jar,使用mvn clean package 打jar,填寫絕對路徑 job.setJar("hello-hadoop-0.0.1-SNAPSHOT.jar"); //hdfs路徑 FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/tmp/README.md")); FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/tmp/wc")); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
4、runide
-> Run As -> Java Applicationoop
能夠調整log4j.properties的日誌級別。能夠在console中看到日誌spa