HA 登錄master1 >cd {hadoop-install-path} >wget 對應的hadoop 包 >tar zxvf hadoop-x.tar.gz >cd hadoop-x/etc/hadoopjava
<!-- zk ha 配置 將其中的-->node
<property> <name>ha.zookeeper.quorum</name> <value>{hostname}:2181,{hostname}:2181,{hostname}:2181</value> </property>web
```
vi hdfs-site.xmlapache
<property> <name>dfs.namenode.name.dir</name> <value>{hadoop-home}/dfs/name</value> # </property> <property> <name>dfs.datanode.data.dir</name> <value>{other-path}/dfs/data</value> </property> <property> <name>dfs.replication</name> <value>2</value>#這裏有幾臺從機就配置幾 例如:主機*1+從機*2 這裏就配置2 </property> <property> <name>dfs.nameservices</name> <value>ns</value> </property> <!-- ns下面有兩個NameNode,分別是nn1,nn2 --> <property> <name>dfs.ha.namenodes.ns</name> <value>nn1,nn2</value> </property> <!-- nn1的RPC通訊地址 --> <property> <name>dfs.namenode.rpc-address.ns.nn1</name> <value>mast1:9000</value> </property> <!-- nn1的http通訊地址 --> <property> <name>dfs.namenode.http-address.ns.nn1</name> <value>mast1:50070</value> </property> <!-- nn2的RPC通訊地址 --> <property> <name>dfs.namenode.rpc-address.ns.nn2</name> <value>mast2:9000</value> </property> <!-- nn2的http通訊地址 --> <property> <name>dfs.namenode.http-address.ns.nn2</name> <value>mast2:50070</value> </property> <!-- 指定NameNode的元數據在JournalNode上的存放位置 --> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://{journal-address}:8485;{journal-address}:8485;{journal-address}:8485/ns</value> </property> <!-- 指定JournalNode在本地磁盤存放數據的位置 --> <property> <name>dfs.journalnode.edits.dir</name> <value>{hadoop-home}/hdfs/journal</value> </property> <!-- 開啓NameNode故障時自動切換 --> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <!-- 配置失敗自動切換實現方式 --> <property> <name>dfs.client.failover.proxy.provider.ns</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <!-- 配置隔離機制 --> <property> <name>dfs.ha.fencing.methods</name> <value>sshfence</value> </property> <!-- 使用隔離機制時須要ssh免登錄 --> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>{user-dir}/.ssh/id_rsa</value> </property> <!-- 在NN和DN上開啓WebHDFS (REST API)功能,不是必須 --> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property>
<configuration> <!--2.0之後的配置 ,mapreduce 升級爲 yarn --> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>{master}:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>{master}:19888</value> </property> </configuration>
<configuration> <!--rm失聯後從新連接的時間--> <property> <name>yarn.resourcemanager.connect.retry-interval.ms</name> <value>2000</value> </property> <!--開啓resourcemanagerHA,默認爲false--> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <!--配置resourcemanager--> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <property> <name>ha.zookeeper.quorum</name> <value>master:2181,node1:2181</value> </property> <!--開啓故障自動切換--> <property> <name>yarn.resourcemanager.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>master</value> </property> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>node1</value> </property> <!-- 在hadoop001上配置rm1,在hadoop002上配置rm2, 注意:通常都喜歡把配置好的文件遠程複製到其它機器上,但這個在YARN的另外一個機器上必定要修改 --> <property> <name>yarn.resourcemanager.ha.id</name> <value>rm1</value> <description>If we want to launch more than one RM in single node,we need this configuration</description> </property> <!--開啓自動恢復功能--> <property> <name>yarn.resourcemanager.recovery.enabled</name> <value>true</value> </property> <!--配置與zookeeper的鏈接地址--> <property> <name>yarn.resourcemanager.zk-state-store.address</name> <value>master:2181,node1:2181</value> </property> <property> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>master:2181,node1:2181</value> </property> <property> <name>yarn.resourcemanager.cluster-id</name> <value>appcluster-yarn</value> </property> <!--schelduler失聯等待鏈接時間--> <property> <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name> <value>5000</value> </property> <!--配置rm1--> <property> <name>yarn.resourcemanager.address.rm1</name> <value>master:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm1</name> <value>master:8030</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm1</name> <value>master:8088</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm1</name> <value>master:8031</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm1</name> <value>master:8033</value> </property> <property> <name>yarn.resourcemanager.ha.admin.address.rm1</name> <value>master:23142</value> </property> <!--配置rm2--> <property> <name>yarn.resourcemanager.address.rm2</name> <value>node1:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm2</name> <value>node1:8030</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm2</name> <value>node1:8088</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm2</name> <value>node1:8031</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm2</name> <value>node1:8033</value> </property> <property> <name>yarn.resourcemanager.ha.admin.address.rm2</name> <value>node1:23142</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/data/hadoop/yarn/local</value> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>/data/hadoop/yarn/log</value> </property> <property> <name>mapreduce.shuffle.port</name> <value>23080</value> </property> <!--故障處理類--> <property> <name>yarn.client.failover-proxy-provider</name> <value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.zk-base-path</name> <value>/yarn-leader-election</value> <description>Optionalsetting.Thedefaultvalueis/yarn-leader-election</description> </property> <property> <name>yarn.scheduler.fair.preemption</name> <value>true</value> <description>是否支持搶佔,默認值爲false</description> </property> <property> <name>yarn.scheduler.fair.sizebasedweight</name> <value>false</value> <description>是否啓用按應用程序資源需求分配資源,默認值爲false即採用公平輪詢的方法分配資源</description> </property> <property> <name>yarn.scheduler.increment-allocation-mb</name> <value>1024</value> <description>僅fair有效,內存規整化單位,墨認值1024.(示例一個container請求1.5G,則調度器規整化爲2G)</description> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>14336</value> <discription>每一個節點可用內存,單位MB,默認是8g,spark須要大量內存,這裏調整爲18g</discription> </property> <property> <name>yarn.nodemanager.resource.cpu-vcores</name> <value>12</value> <discription>1真core=2 vcores</discription> </property> </configuration>
* vi hadoop-env.sh 修改 export JAVA_HOME=${JAVA_HOME} 將 java_home 改成實際地址 * vi slvaes 追加 從節點hostname,有幾臺加幾臺 * scp hadoop 將hadoop 發送到其餘的節點,包括 master2 * 啓動 * 第一次啓動 hdfs 啓動 在dfs.namenode.shared.edits.dir配置的機器下執行下面的命令,啓動journalnode >sbin/hadoop-daemon.sh start journalnode master 執行 bin/hdfs namenode -format bin/hdfs zkfc -formatZK sbin/hadoop-daemon.sh start namenode sbin/hadoop-daemon.sh start zkfc node1 執行 bin/hdfs namenode -bootstrapStandby sbin/hadoop-daemon.sh start namenode master 執行 sbin/hadoop-daemon.sh start zkfc sbin/hadoop-daemons.sh start datanode * 第N次啓動 hdfs 啓動 sbin/start-dfs.sh * 第1-N次yarn 啓動 在master 啓動 sbin/yarn-daemon.sh start resourcemanager sbin/yarn-daemons.sh start nodemanager 在master2 啓動 sbin/yarn-daemon.sh start resourcemanager