注意:若是隻想遠程調試,則直接跳到第四步html
Hadoop-2.7.1.tar.gz 下載地址:http://hadoop.apache.org/releases.html#Downloadjava
JDK1.8 下載地址:http://www.oracle.com/technetwork/java/javase/downloads/index.html node
hadoop-common-bin-2.7.1.zip(hadoop windows 運行必須文件 hadoop.dll,winutils.exe): apache
http://download.csdn.net/download/faq_tong/9413293windows
HADOOP_HOME:D:\developer\hadoop-2.7.1服務器
Path: ;%HADOOP_HOME%\bin;%HADOOP_HOME%\sbinoracle
a. hadoop-env.cmdapp
set JAVA_HOME=D:\developer\Java8\jdk1.8.0_121maven
b.core-site.xmlide
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
c.hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/hadoop/hadoop271/data/dfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/hadoop/hadoop271/data/dfs/datanode</value>
</property>
</configuration>
d.mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
e.yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
將hadoop.dll拷貝到C:\Windows\System32;
將hadoop.dll,winutils.exe拷貝到your_hadoop\bin目錄下;
第一次啓動須要格式化namenode, 在your_hadoop\bin下執行hdfs namenode -format;
啓動, 在sbin下運行 start-dfs.cmd;
package com.wsp.hadoop.wc; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import java.io.IOException; import java.util.StringTokenizer; /** * Description: * * @Author: weishenpeng * Date: 2017/11/14 * Time: 下午 12:31 */ public class WordCount { public static class WcMap extends Mapper<LongWritable, Text, Text, IntWritable> { private final IntWritable one = new IntWritable(1); private Text word = new Text(); @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer token = new StringTokenizer(line); while (token.hasMoreTokens()) { word.set(token.nextToken()); context.write(word, one); } } } public static class WcReduce extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } context.write(key, new IntWritable(sum)); } } public static void main(String[] args) throws Exception { System.setProperty("hadoop.home.dir", "D:\\developer\\hadoop-2.7.1"); Configuration configuration = new Configuration(); Job job = new Job(configuration); job.setJarByClass(WordCount.class); job.setJobName("wordCount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(WcMap.class); job.setReducerClass(WcReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/hd-test/wc/log.txt")); FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/hd-test/wc/result3")); job.waitForCompletion(true); } }
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.wsp.hadoop.wc</groupId> <artifactId>hadoop-wordcount</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.7.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.7.1</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.7.1</version> </dependency> </dependencies> </project>
log4j.xml
log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.Target=System.out log4j.appender.console.layout=org.apache.log4j.PatternLayout log4j.appender.console.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n log4j.rootLogger=INFO, console
注意必定要手動指定hadoop目錄,否則出錯;
System.setProperty("hadoop.home.dir", "D:\\developer\\hadoop-2.7.1");
遠程調試的話,本地不須要安裝hadoop,只需解壓hadoop-common-bin-2.7.1, hadoop.home.dir指定爲hadoop-common-bin-2.7.1解壓好的目錄便可。pom.xml、log4j.xml可參考第三步。
package com.wsp.hadoop.wc; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import java.io.IOException; import java.util.StringTokenizer; /** * Description: * * @Author: weishenpeng * Date: 2017/11/14 * Time: 下午 12:31 */ public class WordCount { public static class WcMap extends Mapper<LongWritable, Text, Text, IntWritable> { private final IntWritable one = new IntWritable(1); private Text word = new Text(); @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer token = new StringTokenizer(line); while (token.hasMoreTokens()) { word.set(token.nextToken()); context.write(word, one); } } } public static class WcReduce extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } context.write(key, new IntWritable(sum)); } } public static void main(String[] args) throws Exception { System.setProperty("hadoop.home.dir", "D:\\developer\\hadoop-2.7.1"); //System.setProperty("hadoop.home.dir", "\\opt\\soft\\bdq\\hadoop-2.7.1"); Configuration configuration = new Configuration(); Job job = new Job(configuration); job.setJarByClass(WordCount.class); job.setJobName("wordCount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(WcMap.class); job.setReducerClass(WcReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path("hdfs://172.16.121.142:9000/hd-test/wc/hellow.txt")); FileOutputFormat.setOutputPath(job, new Path("D:\\platform\\hadoop\\hadoop-test\\out\\result.log")); job.waitForCompletion(true); } }
注意: 只需修改遠程hdfs地址, 本地調試主要依賴以前下載的hadoop-common-bin-2.7.1下winutils.exe。
若是提示你權限問題,把Windows當前登陸用戶重命名爲服務器上存在的用戶名,好比work.