無說明須要登陸其它機器操做,都是在集羣的HD-2-101上執行的命令。
全部安裝包地址:百度網盤,提取碼:24oyphp
虛擬的安裝與靜態IP等配置見:Linux傳送門彙總html
systemctl restart network.service
注:這裏配置三臺機器(192.168.2.101;192.168.2.102;192.168.2.103)java
# 先安裝本機的依賴,其它沒有的依賴後續再安裝,目前我電腦如今須要的就這幾個o_0
yum install tcl-devel.x86_64 rsync.x86_64 ntp.x86_64 -y
shell腳本內容:node
執行步驟:linux
tar -zxvf autoconfig.tar.gz -C /home
4.切換到/home/autoconfig/bin目錄執行:sh autoconfig.sh all
5.分發到全部機器執行。git
cd /home/autoconfig/bin; sh xsync "/home/autoconfig" "/home"; sh doCommand other "cd /home/autoconfig/bin/; sh autoconfig.sh trust";
sh doCommand other "init 0"; init 0;
cd /home/autoconfig/bin; sh doCommand all "yum install tcl-devel.x86_64 rsync.x86_64 ntp.x86_64 -y"
首先要檢查全部機器是否安裝java,並卸載github
檢查:sh doCommand all "rpm -qa | grep java";
卸載用:rpm -e --nodeps 要卸載的軟件包shell
sh doCommand all "mkdir -p /opt/cluster";
tar -zxvf /opt/cluster/jdk-8u144-linux-x64.tar.gz; sh xsync "/opt/cluster/jdk1.8.0_144" "/opt/cluster";
# java版本是jdk1.8.0_144
sh doCommand all "ln -s /opt/cluster/jdk1.8.0_144 /opt/cluster/java";
#JAVA_HOME
export JAVA_HOME=/opt/cluster/java
export PATH=$PATH:$JAVA_HOME/bin
注:這裏選擇HD-2-101爲ntpd對時服務器apache
restrict 192.168.2.0 mask 255.255.255.0 nomodify notrap restrict 127.0.0.1
# 註釋掉如下,內網中不能使用外網的
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
# 修改當該節點丟失網絡鏈接,依然能夠採用本地時間做爲時間服務器爲集羣中的其餘節點提供時間同步
server 127.127.1.0
fudge 127.127.1.0 stratum 5
# 增長內容以下(讓硬件時間與系統時間一塊兒同步)
SYNC_HWCLOCK="yes"
*/10 * * * * root /usr/sbin/ntpdate HD-2-101
sh xsync "/etc/cron.d/ntp_crond" "/etc/cron.d";
# 重啓
sh doCommand all "systemctl restart crond.service"
# reach是已經向上層NTP服務器要求更新的次數,是一個八進制,每次改變是poll對應的秒數,等reach大於等於17其它服務器就可對本服務器對時了。
watch ntpq -p
# 保證其餘機器ntpd不開啓
sh doCommand other "systemctl stop ntpd.service;/usr/sbin/ntpdate HD-2-101;"
安裝包下載地址:zookeeper-3.4.14.tar.gzbootstrap
tar -zxvf /opt/cluster/zookeeper-3.4.14.tar.gz -C /opt/cluster;
sh doCommand all "mkdir -p /hdata/zookeeper;";
dataDir=/hdata/zookeeper server.1=HD-2-101:2888:3888 server.2=HD-2-102:2888:3888 server.3=HD-2-103:2888:3888
#server.A=B:C:D。
#A是一個數字,表示這個是第幾號服務器;
#B是這個服務器的IP地址;
#C是這個服務器與集羣中的Leader服務器交換信息的端口;
#D是萬一集羣中的Leader服務器掛了,須要一個端口來從新進行選#舉,選出一個新的Leader,而這個端口就是用來執行選舉時服務器相互通訊的端口。
#集羣模式下配置一個文件myid,這個文件在dataDir目錄下,這個文件裏面有一個數據就是A的值,Zookeeper啓動時讀取此文件,拿到裏面的數據與zoo.cfg裏面的配置信息比較從而判斷究竟是哪一個server。
sh xsync "/opt/cluster/zookeeper-3.4.14" "/opt/cluster";
# 如server.1=B:C:D
echo "1" > /hdata/zookeeper/myid;
sh doCommand all "ln -s /opt/cluster/zookeeper-3.4.14 /opt/cluster/zookeeper";
sh doCommand all "/opt/cluster/zookeeper/bin/zkServer.sh start";
sh doCommand all "/opt/cluster/zookeeper/bin/zkServer.sh status";
上傳hadoop_template.tar.gz模版壓縮包到/home目錄下並解壓:tar -zxvf /home/hadoop_template.tar.gz -C /home
根據機器配置填寫env.sh export 導出的變量值。
sh /home/hadoop_template/ha/env.sh運行腳本,自動完成配置。
ha模板路徑/home/hadoop_template/ha,全部的模板文件配置以下:
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 把兩個NameNode的地址組裝成一個集羣mycluster -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://${HADOOP_CLUSTER_NAME}</value>
</property>
<!-- 指定hadoop運行時產生文件的存儲目錄 -->
<property>
<name>hadoop.tmp.dir</name>
<value>${HADOOP_TMP_DIR}</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>${HADOOP_ZOOKEEPERS}</value>
</property>
<!-- 防止使用start-dfs.sh journalnode未啓動NameNode鏈接不上journalnode沒法啓動 -->
<property>
<name>ipc.client.connect.max.retries</name>
<value>100</value>
<description>Indicates the number of retries a client will make to establisha server connection.
</description>
</property>
<property>
<name>ipc.client.connect.retry.interval</name>
<value>10000</value>
<description>Indicates the number of milliseconds a client will wait for before retrying to establish a server connection.
</description>
</property>
</configuration>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 徹底分佈式集羣名稱 -->
<property>
<name>dfs.nameservices</name>
<value>${HADOOP_CLUSTER_NAME}</value>
</property>
<!-- 集羣中NameNode節點都有哪些 -->
<property>
<name>dfs.ha.namenodes.${HADOOP_CLUSTER_NAME}</name>
<value>${HADOOP_NAME_NODES}</value>
</property>
<!-- nn1的RPC通訊地址 -->
<property>
<name>dfs.namenode.rpc-address.${HADOOP_CLUSTER_NAME}.nn1</name>
<value>${HADOOP_NN1}:9000</value>
</property>
<!-- nn2的RPC通訊地址 -->
<property>
<name>dfs.namenode.rpc-address.${HADOOP_CLUSTER_NAME}.nn2</name>
<value>${HADOOP_NN2}:9000</value>
</property>
<!-- nn1的http通訊地址 -->
<property>
<name>dfs.namenode.http-address.${HADOOP_CLUSTER_NAME}.nn1</name>
<value>${HADOOP_NN1}:50070</value>
</property>
<!-- nn2的http通訊地址 -->
<property>
<name>dfs.namenode.http-address.${HADOOP_CLUSTER_NAME}.nn2</name>
<value>${HADOOP_NN2}:50070</value>
</property>
<!-- 指定NameNode元數據在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>${HADOOP_JN}</value>
</property>
<!-- 配置隔離機制,即同一時刻只能有一臺服務器對外響應 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- 使用隔離機制時須要ssh無祕鑰登陸-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>${HADOOP_ISA_PATH}</value>
</property>
<!-- 聲明journalnode服務器存儲目錄-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>${HADOOP_JN_DATA_DIR}</value>
</property>
<!-- 關閉權限檢查-->
<property>
<name>dfs.permissions.enable</name>
<value>false</value>
</property>
<!-- 訪問代理類:client,mycluster,active配置失敗自動切換實現方式-->
<property>
<name>dfs.client.failover.proxy.provider.${HADOOP_CLUSTER_NAME}</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
</configuration>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--啓用resourcemanager ha-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--聲明兩臺resourcemanager的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>${HADOOP_YARN_ID}</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>${HADOOP_YARN_RMS}</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>${HADOOP_YARN_RM1}</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>${HADOOP_YARN_RM2}</value>
</property>
<!--指定zookeeper集羣的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>${HADOOP_ZOOKEEPERS}</value>
</property>
<!--啓用自動恢復-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--指定resourcemanager的狀態信息存儲在zookeeper集羣-->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
</configuration>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
#!/bin/bash
# hadoop安裝目錄
export HADOOP_HOME="/opt/cluster/hadoop-2.7.2"
#
# hadoop集羣名稱
export HADOOP_CLUSTER_NAME="myhadoop"
# hadoop運行時產生文件的存儲目錄
export HADOOP_TMP_DIR="/hdata/hadoop"
#
# 集羣中全部NameNode節點
export HADOOP_NAME_NODES="nn1,nn2"
# 根據上面列出的NameNode配置全部NameNode節點地址,變量名稱如HADOOP_NN1,HADOOP_NN2依次增長
export HADOOP_NN1="HD-2-101"
export HADOOP_NN2="HD-2-102"
# NameNode元數據在JournalNode上的存放位置
export HADOOP_JN="qjournal://HD-2-101:8485;HD-2-102:8485;HD-2-103:8485/myhadoop"
# id_rsa公鑰地址
export HADOOP_ISA_PATH="~/.ssh/id_rsa"
# journalnode服務器存儲目錄
export HADOOP_JN_DATA_DIR="/hdata/hadoop/journal"
# zookeeper機器列表
export HADOOP_ZOOKEEPERS="HD-2-101:2181,HD-2-102:2181,HD-2-103:2181"
# yarn集羣id
export HADOOP_YARN_ID="yarn-ha"
# 集羣中全部的resourcemanager
export HADOOP_YARN_RMS="rm1,rm2"
# 根據上面列出的resourcemanager配置全部resourcemanager節點地址,變量名稱如HADOOP_YARN_RM1,HADOOP_YARN_RM2依次增長
export HADOOP_YARN_RM1="HD-2-101"
export HADOOP_YARN_RM2="HD-2-102"
baseDir=$(cd `dirname $0`; pwd)
for template in `cd ${baseDir}; ls *template`
do
siteFile=`echo ${template} | gawk -F"." '{print $1"."$2}'`
envsubst < ${template} > ${HADOOP_HOME}/etc/hadoop/${siteFile}
echo -e "#### set ${siteFile} succeed"
done
# 同步
sh xsync "/opt/cluster/hadoop-2.7.2" "/opt/cluster";
# 創建軟鏈接
sh doCommand all "ln -s /opt/cluster/hadoop-2.7.2 /opt/cluster/hadoop;";
# 啓動zk集羣
sh doCommand all "source /etc/profile; /opt/cluster/zookeeper/bin/zkServer.sh start";
# 初始化在ZK中的狀態
sh /opt/cluster/hadoop/bin/hdfs zkfc -formatZK
sh doCommand all "sh /opt/cluster/hadoop/sbin/hadoop-daemon.sh start journalnode";
# 格式化
sh /opt/cluster/hadoop/bin/hdfs namenode -format;
# 啓動
sh /opt/cluster/hadoop/sbin/hadoop-daemon.sh start namenode;
# 同步元數據
sh /opt/cluster/hadoop/bin/hdfs namenode -bootstrapStandby;
# 啓動NameNode2
sh /opt/cluster/hadoop/sbin/hadoop-daemon.sh start namenode;
sh /opt/cluster/hadoop/sbin/stop-dfs.sh sh /opt/cluster/hadoop/sbin/start-dfs.sh
# 檢查狀態
sh /opt/cluster/hadoop/bin/hdfs haadmin -getServiceState nn1;
sh /opt/cluster/hadoop/bin/hdfs haadmin -getServiceState nn2;
sh /opt/cluster/hadoop/sbin/start-yarn.sh;
sh /opt/cluster/hadoop/sbin/yarn-daemon.sh start resourcemanager;
sh /opt/cluster/hadoop/bin/yarn rmadmin -getServiceState rm1; sh /opt/cluster/hadoop/bin/yarn rmadmin -getServiceState rm2;
export HADOOP_CLUSTER_NAME myhadoop export HADOOP_TMP_DIR hdata hadoop hdata export HADOOP_TMP_DIR myhadoop export
# 建立路徑
/opt/cluster/hadoop/bin/hadoop fs -mkdir -p /mapreduce/test/input/20180702;
# 上傳
/opt/cluster/hadoop/bin/hadoop fs -put ./word.txt /mapreduce/test/input/20180702;
cd /opt/cluster/hadoop; bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar wordcount /mapreduce/test/input/20180702 /mapreduce/test/output/20180702;