core # 基礎通用配置內容 1.namenode總入口 2.臨時目錄
hdfs # hdfs相關內容的配置 1.權限 2.副本 3. HA高可用
mapred # mapreduce相關的配置
yarn # yarn相關的配置
#底層的配置文件,存儲都是默認值,根據須要進行修改
core-default.xml
hdfs-default.xml
marpred-default.xml
yarn-default.xml
# HADOOP_HOME/etc/hadoop
core-site.xml
hdfs-site.xml
mapred-site.xml
yarn-site.xml
# 代碼級 維護性查 優先級高
Configuration configuration = new Configuration();
configuration.set("fs.default.name","hdfs://hadoop:8020");
configuration.set("key","value");
.....
FileSystem fileSystem = FileSystem.get(configuration);
# 代碼級 維護性好 優先級低
Configuration configuration = new Configuration();
configuration.addResource("core-site.xml");
configuration.addResource("hdfs-site.xml");
configuration.addResource("marpred-site.xml");
configuration.addResource("yarn-site.xml");
FileSystem fileSystem = FileSystem.get(configuration);
#Hadoop shell命令 直接指定 配置信息
#測試
bin/hdfs dfs -ls / -Dfs.defaultFS=xxxx
MapReduce基於HDFS之上一種計算平臺,計算框架
MapReduce運行原理:java
搭建yarn集羣node
NameNode不能和ResourceManager放置在同一臺節點
#保證resourcemanager和namenode不放置在同一個節點,修改yarn-site.xml
#啓動yarn 必定要在resourcemanager所在的機器上執行啓動命令
sbin/start-yarn.sh
佈置做業: HAHDFS集羣基礎上 搭建HAYarn集羣
MapReduce的核心5步驟shell
MR經典案例WordCount 思路分析apache
MapReduce編程代碼編程
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.5.2</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.5.2</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.5.2</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>2.5.2</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-common --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-common</artifactId> <version>2.5.2</version> </dependency>
public class TestMapReduce { /** * k1 LongWritable * v1 Text * * * k2 Text * v2 IntWritable */ public static class MyMap extends Mapper<LongWritable,Text,Text, IntWritable> { Text k2 = new Text(); IntWritable v2 = new IntWritable(); @Override /** * k1 key 0 * v1 value suns xiaohei */ protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split("\t"); for (String word:words) { k2.set(word); v2.set(1); context.write(k2,v2); } } } public static class MyReduce extends Reducer<Text,IntWritable,Text,IntWritable>{ Text k3 = new Text(); IntWritable v3 = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int result = 0; for (IntWritable value:values) { result+=value.get(); } k3.set(key); v3.set(result); context.write(k3,v3); } } public static void main(String[] args)throws Exception { Job job = Job.getInstance(); job.setJarByClass(TestMapReduce.class); job.setJobName("first"); //inputFormat TextInputFormat.addInputPath(job,new Path("/test")); //map job.setMapperClass(MyMap.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //shuffle 自動完成 //reduce job.setReducerClass(MyReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //outputFormat TextOutputFormat.setOutputPath(job,new Path("/dest1")); job.waitForCompletion(true); } }
注意:(yarn命令須要早hadoop安裝的bin目錄運行)服務器
直接maven打包,將jar包scp上傳到到服務器便可app
bin/yarn jar hadoop-mapreduce.jar 運行 bin/hdfs dfs -text /dest1/part-r-00000 查看結果 Bytes Written=38 [root@hadoop hadoop-2.5.2]# bin/hdfs dfs -text /dest1/part-r-00000 19/01/24 09:40:19 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable aaa 2(次) bbb 2 jjj 1 kkkk 1 lhc 1 ssss 1
IDEA-file-setting-plugins搜索Maven Helper 安裝後重啓IDEA框架
pom.xml以下配置:ssh
<properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <maven.compiler.source>1.7</maven.compiler.source> <maven.compiler.target>1.7</maven.compiler.target> <baizhi-mainClass>com.baizhi.TestMapReduce</baizhi-mainClass> <target-host>192.168.194.147</target-host><!--此處是resourceManager的ip--> <target-position>/opt/install/hadoop-2.5.2</target-position> </properties> <dependences>... <build> <!--引入wagon上傳插件--> <extensions> <extension> <groupId>org.apache.maven.wagon</groupId> <artifactId>wagon-ssh</artifactId> <version>2.8</version> </extension> </extensions> <plugings> <!--jar包插件--> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> <version>2.3.2</version> <configuration> <outputDirectory>${basedir}</outputDirectory> <archive> <manifest> <mainClass>${baizhi-mainClass}</mainClass> </manifest> </archive> </configuration> </plugin> <!--wagon插件的配置--> <plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>wagon-maven-plugin</artifactId> <version>1.0</version> <configuration> <fromFile>${project.build.finalName}.jar</fromFile> <url>scp://root:123456@${target-host}${target-position}</url> </configuration> </plugin> </plugings> </build>
以上配置好後就能夠點擊maven插件,先雙擊Jar:jar完成打包,在點擊wagon:upload完成上傳maven
可是怎麼一鍵完成上訴兩個步驟呢?
這時候就須要上面安裝的插件maven helper了,pom.xml文件上右鍵點擊:
Run Maven ->new Goal 輸入內容:jar:jar wagon:upload 點擊OK便可完成打包上傳一鍵完成
在②上面的基礎上,給wagon添加commands運行命令,以下:
<plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>wagon-maven-plugin</artifactId> <version>1.0</version> <configuration> <fromFile>${project.build.finalName}.jar</fromFile> <url>scp://root:123456@${target-host}${target-position}</url> <commands> <!-- 殺死原來的進程 --> <command>pkill -f ${project.build.finalName}.jar</command> <!-- 從新啓動test.jar,程序的輸出結果寫到根目錄下的nohup.out日誌文件中 --> <command>nohup /opt/install/hadoop-2.5.2/bin/yarn jar /opt/install/hadoop-2.5.2/${project.build.finalName}.jar > /root/nohup.out 2>&1 &</command> </commands> <!-- 顯示運行命令的輸出結果 --> <displayCommandOutputs>true</displayCommandOutputs> </configuration> </plugin>
接着在mavenhelper 添加new Goal:
jar:jar wagon:upload-single wagon:sshexec
運行以前記得先complie一下,確保項目的target目錄裏已將編譯好了
在resourcemanager節點上查看nohup.out文件,可見運行成功
①.yarn-site.xml下配置以下內容
<configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <!-- 指定RM的cluster id --> <property> <name>yarn.resourcemanager.cluster-id</name> <value>lhc</value> </property> <!-- 指定RM的名字 --> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <!-- 分別指定RM的地址 --> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>hadoop1</value> </property> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>hadoop2</value> </property> <!-- 指定zk集羣地址 --> <property> <name>yarn.resourcemanager.zk-address</name> <value>hadoop:2181,hadoop1:2181,hadoop2:2181</value> </property> </configuration>
②.分別在hadoop1,hadoop2的hadoop安裝目錄上運行: sbin/start-yarn.sh 啓動ResourceManag
③.運行jps查看進程, ResourceManager正常啓動
[root@hadoop1 hadoop-2.5.2]# jps
4552 NameNode
4762 DFSZKFailoverController
4610 DataNode
5822 ResourceManager
6251 Jps
4472 JournalNode
4426 QuorumPeerMain
④.分別運行:bin/yarn rmadmin -getServiceState rm2和bin/yarn rmadmin -getServiceState rm1
查看兩節點的REsourceMananger的狀態,一個爲active,另外一個爲standby
[root@hadoop1 hadoop-2.5.2]# bin/yarn rmadmin -getServiceState rm1
19/01/24 11:56:07 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
active
[root@hadoop1 hadoop-2.5.2]# bin/yarn rmadmin -getServiceState rm2
19/01/24 11:58:24 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
standby
⑤將一臺的rm1的ResourceManager關閉,再次執行:bin/yarn rmadmin -getServiceState rm2
發現:rm2狀態爲active,這就實現了ResManager的自動故障轉移
詳情見博客:https://blog.csdn.net/skywalker_only/article/details/41726189