第三步:HADOOP-HDFS & YARN HA

集羣規劃

系統版本 主機名 IP 用途
CentOS-7.7 hadoop-test-1 192.168.233.65 namenode datanode DFSZKFailoverController hive hmaster resourcemanager NodeManager
CentOS-7.7 hadoop-test-2 192.168.233.94 namenode datanode DFSZKFailoverController hmaster resourcemanager NodeManager
CentOS-7.7 hadoop-test-3 192.168.233.17 datanode zookeeper NodeManager
CentOS-7.7 hadoop-test-4 192.168.233.238 datanode zookeeper NodeManager
CentOS-7.7 hadoop-test-5 192.168.233.157 datanode zookeeper NodeManager

hadoop安裝

$ wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.10.0/hadoop-2.10.0.tar.gz
$ tar -zxvf hadoop-2.10.0.tar.gz -C /data
$ for i in {1..5};do ssh hadoop-test-$i "mkdir /data/hdfs -p";done
$ for i in {1..5};do ssh hadoop-test-$i "chown -R hadoop.hadoop /data/hdfs ";done
$ su hadoop
$ cd /data/hadoop/etc/hadoop/

hdfs-site.xmljava

$ vim hdfs-site.xml 
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
    <!--指定hdfs的block大小64M -->
     <property>  
        <name>dfs.block.size</name>  
        <value>67108864</value>
     </property> 
    <property>
      <name>dfs.replication</name>
      <value>3</value>
    </property>
    <!--配置nameservice-->
    <property>
        <name>dfs.nameservices</name>
        <value>hadoop-test-cluster</value>
    </property>
    <!-- 指定hdfs中namenode的存儲位置 -->
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/data/hdfs/nn</value>
    </property>
    <!--指定hdfs中datanode的存儲位置-->
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/data/hdfs/dn</value>
    </property> 
    <!--nameservice下的名稱節點兩個id-->
    <property>
      <name>dfs.ha.namenodes.hadoop-test-cluster</name>
      <value>nn1,nn2</value>
    </property>
    <!--配置每一個nn的rpc地址-->
    <property>
        <name>dfs.namenode.rpc-address.hadoop-test-cluster.nn1</name>
        <value>192.168.233.65:8020</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.hadoop-test-cluster.nn2</name>
        <value>192.168.233.94:8020</value>
    </property>
    <!--配置WebUI-->
    <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
    </property>

    <property>
        <name>dfs.journalnode.http-address</name>
        <value>0.0.0.0:8480</value>
    </property>
    <property>
        <name>dfs.journalnode.rpc-address</name>
        <value>0.0.0.0:8481</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.hadoop-test-cluster.nn1</name>
        <value>192.168.233.65:50070</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.hadoop-test-cluster.nn2</name>
        <value>192.168.233.94:50070</value>
    </property>
    <!--名稱節點共享編輯目錄.選擇三臺journalnode節點-->
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://192.168.233.17:8481;192.168.233.238:8481;192.168.233.157:8481/mtr-test-cluster</value>
    </property>
    <!--配置一個HA失敗轉移的java類(改配置是固定的),client使用它判斷哪一個節點是激活態-->
    <property>
        <name>dfs.client.failover.proxy.provider.hadoop-test-cluster</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>
    <!--開啓NameNode失敗自動切換-->
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>
    <!--腳本列表或者java類,在容災保護激活態的nn-->
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>sshfence</value>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/hadoop/.ssh/id_rsa</value>
    </property>
    <!--配置JN存放edit的本地路徑-->
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/data/hdfs/journal</value>
    </property>
    <!--指定 zookeeper  ,還能夠更多的設置超時時間等內容-->
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>192.168.233.17:2181,192.168.233.238:2181,192.168.233.157:2181</value>
    </property>
    <property>
        <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.block.local-path-access.user</name>
        <value>impala</value>
    </property>
    <property>
        <name>dfs.client.file-block-storage-locations.timeout.millis</name>
        <value>60000</value>
    </property>
</configuration>

core-site.xmlnode

$ vim core-site.xml 
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
    <!--配置hdfs文件系統名稱服務-->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://hadoop-test-cluster</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/data/hdfs/tmp</value>
    </property>
    <property>
        <name>hadoop.logfile.size</name>
        <value>10000000</value>
        <description>每一個日誌文件的最大值,單位:bytes </description>
    </property>
    <property>
        <name>hadoop.logfile.count</name>
        <value>10</value>
        <description>日誌文件的最大數量</description>
    </property>
    <!--指定 zookeeper  ,還能夠更多的設置超時時間等內容-->
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>192.168.233.17:2181,192.168.233.238:2181,192.168.233.157:2181</value>
    </property>
    <property>
         <name>dfs.client.read.shortcircuit</name>
         <value>true</value>
    </property>
    <property>
         <name>dfs.client.read.shortcircuit.skip.checksum</name>
         <value>false</value>
    </property>
    <property>
         <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
         <value>true</value>
    </property>
</configuration>

mapred-site.xmlweb

$ vim mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
   <property>
       <name>mapreduce.framework.name</name>
       <value>yarn</value>
   </property>
         <!-- 指定mapreduce jobhistory地址 -->
        <property>  
            <name>mapreduce.jobhistory.address</name>  
            <value>192.168.233.65:10020</value>  
        </property> 
        
        <!-- 任務歷史服務器的web地址 -->
        <property>  
            <name>mapreduce.jobhistory.webapp.address</name>  
            <value>192.168.233.65:19888</value>  
        </property> 
</configuration>

yarn-site.xmlexpress

$ vim /data/hadoop/etc/hadoop/yarn-site.xml
<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>
        <property>
            <name>yarn.acl.enable</name>
            <value>true</value>
        </property>
        <property>
            <name>yarn.admin.acl</name>
            <value>*</value>
        </property>

       <!--日誌聚合功能--> 
        <property>
            <name>yarn.log-aggregation-enable</name>
            <value>true</value>
        </property>
       <!--在HDFS上聚合的日誌最長保留多少秒。3天--> 
        <property>
            <name>yarn.log-aggregation.retain-seconds</name>
            <value>259200</value>
        </property>
        <property>
            <name>yarn.resourcemanager.cluster-id</name>
            <value>hadoop-test</value>
        </property>
       <!--rm失聯後從新連接的時間--> 
        <property> 
            <name>yarn.resourcemanager.connect.retry-interval.ms</name> 
            <value>2000</value> 
        </property>
        <!-- 爲了可以運行MapReduce程序,須要讓各個NodeManager在啓動時加載shuffle server,shuffle server其實是Jetty/Netty Server,Reduce Task經過該server從各個NodeManager上遠程拷貝Map Task產生的中間結果。下面增長的兩個配置均用於指定shuffle serve。 -->
        <property>  
            <name>yarn.nodemanager.aux-services</name>  
            <value>mapreduce_shuffle</value>  
        </property>
        <!--啓用resourcemanager ha-->  
        <!--是否開啓RM ha,默認是開啓的-->  
        <property>  
           <name>yarn.resourcemanager.ha.enabled</name>  
           <value>true</value>  
        </property>  
        <property>
           <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
           <value>true</value>
        </property>
        <!--指定rm的名字-->
        <property>  
           <name>yarn.resourcemanager.ha.rm-ids</name>  
           <value>rm1,rm2</value>  
        </property>
        <!--Ha功能,須要一組zk地址,用逗號分隔。被ZKFailoverController使用於自動失效備援failover。 --> 
        <property>
          <name>ha.zookeeper.quorum</name>
          <value>192.168.233.17:2181,192.168.233.238:2181,192.168.233.157:2181</value> 
        </property>
        <!--開啓故障自動切換--> 
         <property> 
            <name>yarn.resourcemanager.ha.automatic-failover.enabled</name> 
            <value>true</value> 
         </property> 
        <!--指定rm的地址-->
        <property>  
           <name>yarn.resourcemanager.hostname.rm1</name>  
           <value>192.168.233.65</value>  
        </property>  
        <property>  
           <name>yarn.resourcemanager.hostname.rm2</name>  
           <value>192.168.233.94</value>  
        </property>  
        <!--使用ZK集羣保存狀態信息,指定zookeeper隊列 -->
        <property>  
           <name>yarn.resourcemanager.zk-address</name>  
            <value>192.168.233.17:2181,192.168.233.238:2181,192.168.233.157:2181</value>  
        </property>  
        <!--啓用自動恢復,當任務進行一半,rm壞掉,就要啓動自動恢復,默認是false-->   
        <property>  
           <name>yarn.resourcemanager.recovery.enabled</name>  
           <value>true</value>  
        </property>  
       <!--配置與zookeeper的鏈接地址,被RM用於狀態存儲的ZooKeeper服務器的主機:端口號,多個ZooKeeper的話使用逗號分隔。 --> 
        <property> 
          <name>yarn.resourcemanager.zk-state-store.address</name> 
          <value>192.168.233.17:2181,192.168.233.238:2181,192.168.233.157:2181</value>
        </property>  
        <!--指定resourcemanager的狀態信息存儲在zookeeper集羣,默認是存放在FileSystem裏面。-->   
        <property>  
           <name>yarn.resourcemanager.store.class</name>  
           <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>  
        </property> 
        <!--schelduler失聯等待鏈接時間--> 
        <property> 
           <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name> 
           <value>5000</value> 
        </property> 
        <!--配置rm1--> 
         <!-- 客戶端經過該地址向RM提交對應用程序操做 -->
         <property> 
            <name>yarn.resourcemanager.address.rm1</name> 
            <value>192.168.233.65:8132</value> 
         </property> 
         <!--ResourceManager 對ApplicationMaster暴露的訪問地址。ApplicationMaster經過該地址向RM申請資源、釋放資源等。 -->
         <property> 
            <name>yarn.resourcemanager.scheduler.address.rm1</name> 
            <value>192.168.233.65:8130</value> 
         </property> 
         <!-- RM HTTP訪問地址,查看集羣信息-->
         <property> 
            <name>yarn.resourcemanager.webapp.address.rm1</name> 
            <value>192.168.233.65:8188</value> 
         </property> 
         <!-- NodeManager經過該地址交換信息 -->
         <property>
            <name>yarn.resourcemanager.resource-tracker.address.rm1</name> 
            <value>192.168.233.65:8131</value> 
         </property> 
         <!--管理員經過該地址向RM發送管理命令 -->
         <property> 
            <name>yarn.resourcemanager.admin.address.rm1</name> 
            <value>192.168.233.65:8033</value> 
         </property> 
         <property> 
            <name>yarn.resourcemanager.ha.admin.address.rm1</name> 
            <value>192.168.233.65:23142</value> 
         </property> 
         
        <!--配置rm2--> 
         <property> 
            <name>yarn.resourcemanager.address.rm2</name> 
            <value>192.168.233.94:8132</value> 
         </property> 
         <property> 
            <name>yarn.resourcemanager.scheduler.address.rm2</name> 
            <value>192.168.233.94:8130</value> 
         </property> 
         <property> 
            <name>yarn.resourcemanager.webapp.address.rm2</name> 
            <value>192.168.233.94:8188</value> 
         </property> 
         <property>
            <name>yarn.resourcemanager.resource-tracker.address.rm2</name> 
            <value>192.168.233.94:8131</value> 
         </property> 
         <property> 
            <name>yarn.resourcemanager.admin.address.rm2</name> 
            <value>192.168.233.94:8033</value> 
         </property> 
         <property> 
            <name>yarn.resourcemanager.ha.admin.address.rm2</name> 
            <value>192.168.233.94:23142</value> 
         </property> 
         <property>
           <name>yarn.log-aggregation-enable</name>
           <value>true</value>
     </property>                                                                

         <!--資源配置-->
         <property>
             <name>yarn.scheduler.fair.preemption</name>
             <value>true</value>
             <description>開啓資源搶佔,default is True</description>
         </property>
         <!--當應用程序未指定隊列名時,是否指定用戶名做爲應用程序所在的隊列名。若是設置爲false或者未設置,全部未知隊列的應用程序將被提交到default隊列中,默認值爲true。-->
         <property>
            <name>yarn.scheduler.fair.user-as-default-queue</name>
            <value>true</value>
            <description>default is True</description>
         </property>
         <!--是否容許建立未定義的資源池。若是設置成true,yarn將會自動建立任務中指定的未定義過的資源池。設置成false以後,任務中指定的未定義的資源池將無效,該任務會被分配到default資源池中。,default is True-->
         <property>
            <name>yarn.scheduler.fair.allow-undeclared-pools</name>
            <value>false</value>
         </property>
         <!-- 單個任務container可申請的最少物理內存量,默認是1024(MB),若是一個任務申請的物理內存量少於該值,則該對應的值改成這個數 -->
         <property>
            <name>yarn.scheduler.minimum-allocation-mb</name>
            <value>512</value>
         </property>
         <!-- 單個任務container可申請的最多物理內存量,默認是8192(MB)。默認狀況下,YARN採用了線程監控的方法判斷任務是否超量使用內存,一旦發現超量,則直接將其殺死。因爲Cgroup對內存的控制缺少靈活性(即任務任什麼時候刻不能超過內存上限,若是超過,則直接將其殺死或者報OOM),而Java進程在建立瞬間內存將翻倍,以後驟降到正常值,這種狀況下,採用線程監控的方式更加靈活(當發現進程樹內存瞬間翻倍超過設定值時,可認爲是正常現象,不會將任務殺死),所以YARN未提供Cgroups內存隔離機制 -->
         <property>
            <name>yarn.scheduler.maximum-allocation-mb</name>
            <value>4096</value>
         </property>
         <property>
            <name>yarn.scheduler.minimum-allocation-vcores</name>
         <value>1</value>
         </property>
         <property>
            <name>yarn.scheduler.maximum-allocation-vcores</name>
            <value>4</value>
         </property>
         <property>
            <name>yarn.scheduler.increment-allocation-vcores</name>
            <value>1</value>
         </property>
         <property>
            <name>yarn.scheduler.increment-allocation-mb</name>
            <value>512</value>
         </property>
         <!--yarn提交應用時,爲單獨一個應用設置最大重試次數-->
         <property>
            <name>yarn.resourcemanager.am.max-attempts</name>
            <value>2</value>
         </property>
         <property>
            <name>yarn.resourcemanager.container.liveness-monitor.interval-ms</name>
            <value>600000</value>
         </property>
         <property>
            <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name>
            <value>1000</value>
         </property>
         <property>
            <name>yarn.nm.liveness-monitor.expiry-interval-ms</name>
            <value>600000</value>
         </property>
         <property>
            <name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
            <value>50</value>
         </property>
         <!-- 表示該節點上YARN可以使用的物理內存總量,默認是8192(MB),注意,若是節點內存資源不夠8GB,則須要調減少這個值。-->
         <property>
           <name>yarn.nodemanager.resource.memory-mb</name>
           <value>6000</value>
           <discription>每一個節點可用內存,單位MB</discription>
         </property>
         <!-- 表示該節點上YARN可以使用的虛擬CPU個數,默認是8,注意,目前推薦將該值設值爲與物理CPU核數數目相同。若是節點CPU核數不夠8個,則須要調減少這個值。-->
         <property>
           <name>yarn.nodemanager.resource.cpu-vcores</name>
           <value>2</value>
         </property>
         <property>
            <name>yarn.nodemanager.pmem-check-enabled</name>
            <value>false</value>
         </property>
         <property>
            <name>yarn.nodemanager.vmem-check-enabled</name>
            <value>false</value>
         </property>
         <property>
            <name>yarn.resourcemanager.scheduler.class</name>
            <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
         </property>
         <!--RM中保留的最大的已完成的任務信息數量-->
         <property>
           <name>yarn.resourcemanager.max-completed-applications</name>
           <value>10000</value>
         </property>
         <!--故障處理類,以輪訓方式尋找活動的RM所使用的類-->
         <property> 
            <name>yarn.client.failover-proxy-provider</name> 
            <value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value>
         </property> 
         
         <property>
            <name>yarn.resourcemanager.ha.automatic-failover.zk-base-path</name>
            <value>/yarn-leader-election</value>
        </property>
</configuration>

fair-scheduler.xml資源池配置apache

$ vim /data/hadoop/etc/hadoop/fair-scheduler.xml
<?xml version="1.0"?>
<allocations>
<userMaxAppsDefault>30</userMaxAppsDefault>
<queue name="root">
<minResources>5120mb,5vcores</minResources>
<maxResources>29000mb,10vcores</maxResources>
<maxRunningApps>100</maxRunningApps>
<weight>1.0</weight>
<schedulingMode>DRF</schedulingMode>
<aclSubmitApps> </aclSubmitApps>
<aclAdministerApps> </aclAdministerApps>
  <queue name="users" type="parent">
    <minResources>10000mb,2vcores</minResources>
    <maxResources>15000mb,6vcores</maxResources>
    <maxRunningApps>50</maxRunningApps>
    <weight>3</weight>
    <schedulingPolicy>fair</schedulingPolicy>
    <aclSubmitApps>hadoop,hdfs</aclSubmitApps>
    <aclAdministerApps>hadoop</aclAdministerApps>
  </queue>
  <queue name="default" type="parent">
    <minResources>1000mb,1vcores</minResources>
    <maxResources>2000mb,2vcores</maxResources>
    <maxRunningApps>50</maxRunningApps>
    <weight>3</weight>
    <schedulingPolicy>fair</schedulingPolicy>
    <aclSubmitApps>hadoop</aclSubmitApps>
    <aclAdministerApps>hadoop</aclAdministerApps>
  </queue>
  <queue name="prod">
    <minResources>1000mb,1vcores</minResources>
    <maxResources>10000mb,4vcores</maxResources>
    <maxRunningApps>50</maxRunningApps>
    <weight>3</weight>
    <schedulingPolicy>fair</schedulingPolicy>
    <aclSubmitApps>hadoop,hdfs</aclSubmitApps>
    <aclAdministerApps>hadoop</aclAdministerApps>
  </queue>

  <queueMaxResourcesDefault>20000mb,16vcores</queueMaxResourcesDefault>
</queue>

  <queuePlacementPolicy>
    <rule name="specified" />
    <rule name="primaryGroup" create="false" />
    <rule name="nestedUserQueue">
        <rule name="secondaryGroupExistingQueue" create="false" />
    </rule>
    <rule name="default"   queue="users"/>

配置dn,slavesbootstrap

$ vim slaves 
hadoop-test-1
hadoop-test-2
hadoop-test-3
hadoop-test-4
hadoop-test-5

修改hadoop-env.shvim

$ vim hadoop-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_231
export HADOOP_LOG_DIR=/data/hdfs/logs
export HADOOP_SECURE_DN_LOG_DIR=/data/hdfs/logs
export HADOOP_PRIVILEGED_NFS_LOG_DIR=/data/hdfs/logs
export HADOOP_MAPRED_LOG_DIR=/data/hdfs/logs
export HADOOP_LOG_DIR=/data/hdfs/logs
export YARN_LOG_DIR=/data/hdfs/logs

將/data/hadoop拷貝至每一個節點的/data/
在hadoop-test-2配置yarnbash

$ vim yarn-site.xml 
        <property> 
           <name>yarn.resourcemanager.ha.id</name> 
           <value>rm1</value> 
        <description>If we want to launch more than one RM in single node, we need this configuration</description> 
        </property>

全部節點配置.bashrc服務器

$ vim ~/.bashrc
#hadoop
export HADOOP_HOME=/data/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
$ source ~/.bashrc

初始化session

$ hdfs zkfc -formatZK
20/06/18 11:29:37 INFO tools.DFSZKFailoverController: STARTUP_MSG: 
/************************************************************
STARTUP_MSG: Starting DFSZKFailoverController
...
...
...
20/06/18 11:29:38 INFO ha.ActiveStandbyElector: Session connected.
20/06/18 11:29:38 INFO ha.ActiveStandbyElector: Successfully created /hadoop-ha/hadoop-test-cluster in ZK.
20/06/18 11:29:38 INFO zookeeper.ZooKeeper: Session: 0x300002f96aa0000 closed
20/06/18 11:29:38 INFO zookeeper.ClientCnxn: EventThread shut down for session: 0x300002f96aa0000
20/06/18 11:29:38 INFO tools.DFSZKFailoverController: SHUTDOWN_MSG: 
/************************************************************
SHUTDOWN_MSG: Shutting down DFSZKFailoverController at hadoop-test-1/192.168.233.65
************************************************************

在journalnode啓動journalnode

$ hadoop-daemon.sh start journalnode
starting journalnode, logging to /data/hdfs/logs/hadoop-hadoop-journalnode-hadoop-test-3.out
$ jps
9140 Jps
9078 JournalNode
4830 QuorumPeerMain

在主namenode格式化journalnode

$ hadoop namenode -format
DEPRECATED: Use of this script to execute hdfs command is deprecated.
Instead use the hdfs command for it.

20/06/18 11:32:42 INFO namenode.NameNode: STARTUP_MSG: 
/************************************************************
STARTUP_MSG: Starting NameNode
...
...
...
20/06/18 11:32:44 INFO common.Storage: Storage directory /data/hdfs/nn has been successfully formatted.
20/06/18 11:32:44 INFO namenode.FSImageFormatProtobuf: Saving image file /data/hdfs/nn/current/fsimage.ckpt_0000000000000000000 using no compression
20/06/18 11:32:44 INFO namenode.FSImageFormatProtobuf: Image file /data/hdfs/nn/current/fsimage.ckpt_0000000000000000000 of size 325 bytes saved in 0 seconds .
20/06/18 11:32:44 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
20/06/18 11:32:44 INFO namenode.FSImage: FSImageSaver clean checkpoint: txid = 0 when meet shutdown.
20/06/18 11:32:44 INFO namenode.NameNode: SHUTDOWN_MSG: 
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at hadoop-test-1/192.168.233.65
************************************************************/

查看zk

$ zkCli.sh -server hadoop-test-4:2181
...
...
...
2020-06-18 11:34:25,075 [myid:hadoop-test-4:2181] - INFO  [main-SendThread(hadoop-test-4:2181):ClientCnxn$SendThread@959] - Socket connection established, initiating session, client: /192.168.233.157:52598, server: hadoop-test-4/192.168.233.238:2181
2020-06-18 11:34:25,094 [myid:hadoop-test-4:2181] - INFO  [main-SendThread(hadoop-test-4:2181):ClientCnxn$SendThread@1394] - Session establishment complete on server hadoop-test-4/192.168.233.238:2181, sessionid = 0x200002f8ee50001, negotiated timeout = 30000

WATCHER::

WatchedEvent state:SyncConnected type:None path:null
[zk: hadoop-test-4:2181(CONNECTED) 0] ls /
[hadoop-ha, zookeeper]
[zk: hadoop-test-4:2181(CONNECTED) 1] ls /hadoop-ha

在主namenode啓動主namenode

$ hadoop-daemon.sh start namenode
starting namenode, logging to /data/hdfs/logs/hadoop-hadoop-namenode-hadoop-test-1.out
$ jps
10864 NameNode
10951 Jps

在備namenode copy主namenode的數據並啓動

$ hdfs namenode -bootstrapStandby

20/06/18 11:37:08 INFO namenode.NameNode: STARTUP_MSG: 
/************************************************************
STARTUP_MSG: Starting NameNode
...
...
...
20/06/18 11:37:09 INFO namenode.FSEditLog: Edit logging is async:true
20/06/18 11:37:09 INFO namenode.TransferFsImage: Opening connection to http://192.168.233.65:50070/imagetransfer?getimage=1&txid=0&storageInfo=-63:2055238485:1592451163995:CID-e2c9292c-6fca-46eb-aef3-c96149a72ade&bootstrapstandby=true
20/06/18 11:37:09 INFO common.Util: Combined time for fsimage download and fsync to all disks took 0.00s. The fsimage download took 0.00s at 0.00 KB/s. Synchronous (fsync) write to disk of /data/hdfs/nn/current/fsimage.ckpt_0000000000000000000 took 0.00s.
20/06/18 11:37:09 INFO namenode.TransferFsImage: Downloaded file fsimage.ckpt_0000000000000000000 size 325 bytes.
20/06/18 11:37:09 INFO namenode.NameNode: SHUTDOWN_MSG: 
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at hadoop-test-2/192.168.233.94
************************************************************/
$ hadoop-daemon.sh start namenode
starting namenode, logging to /data/hdfs/logs/hadoop-hadoop-namenode-hadoop-test-2.out

在主備namenode節點執行

$ hadoop-daemon.sh start zkfc
starting zkfc, logging to /data/hdfs/logs/hadoop-hadoop-zkfc-hadoop-test-1.out
$ hadoop-daemon.sh start zkfc
starting zkfc, logging to /data/hdfs/logs/hadoop-hadoop-zkfc-hadoop-test-2.out

在主namenode執行命令啓動datanode

$ hadoop-daemons.sh start datanode

在主namenode resourcemanager啓動yarn

$ start-yarn.sh 
starting yarn daemons
starting resourcemanager, logging to /data/hdfs/logs/yarn-hadoop-resourcemanager-hadoop-test-1.out
hadoop-test-1: starting nodemanager, logging to /data/hdfs/logs/yarn-hadoop-nodemanager-hadoop-test-1.out
hadoop-test-4: starting nodemanager, logging to /data/hdfs/logs/yarn-hadoop-nodemanager-hadoop-test-4.out
hadoop-test-2: starting nodemanager, logging to /data/hdfs/logs/yarn-hadoop-nodemanager-hadoop-test-2.out
hadoop-test-3: starting nodemanager, logging to /data/hdfs/logs/yarn-hadoop-nodemanager-hadoop-test-3.out
hadoop-test-5: starting nodemanager, logging to /data/hdfs/logs/yarn-hadoop-nodemanager-hadoop-test-5.out

在備namenode resourcemanager啓動備resourcemanager

$ start-yarn.sh

查看進程

$  for i in {1..5};do ssh hadoop-test-$i "jps" && echo ---;done
10864 NameNode
12753 NodeManager
17058 Jps
12628 ResourceManager
11381 DataNode
11146 DFSZKFailoverController
---
9201 NameNode
14997 ResourceManager
15701 Jps
9431 DFSZKFailoverController
9623 DataNode
10701 NodeManager
---
14353 Jps
9078 JournalNode
9639 DataNode
4830 QuorumPeerMain
10574 NodeManager
---
9616 DataNode
10547 NodeManager
14343 Jps
4808 QuorumPeerMain
9115 JournalNode
---
9826 DataNode
10758 NodeManager
4807 QuorumPeerMain
9255 JournalNode
14538 Jps

登錄hdfs主namenode的UI:
TIM圖片20200618114650.pngTIM圖片20200618114700.png
登錄yarn resoucemanager的UI:
TIM圖片20200618115210.png

WordCount演示

$ cd /data/hadoop/
$ ll LICENSE.txt
-rw-r--r-- 1 hadoop hadoop 106210 6月  18 09:26 LICENSE.txt
$ hadoop fs -mkdir /input
$ hadoop fs -put LICENSE.txt /input 
$ hadoop jar share/hadoop/
common/    hdfs/      httpfs/    kms/       mapreduce/ tools/     yarn/
$ hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.10.0.jar wordcount /input /output
20/06/19 13:55:59 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2
20/06/19 13:56:00 INFO input.FileInputFormat: Total input files to process : 1
20/06/19 13:56:00 INFO mapreduce.JobSubmitter: number of splits:1
20/06/19 13:56:00 INFO Configuration.deprecation: yarn.resourcemanager.zk-address is deprecated. Instead, use hadoop.zk.address
20/06/19 13:56:00 INFO Configuration.deprecation: yarn.resourcemanager.system-metrics-publisher.enabled is deprecated. Instead, use yarn.system-metrics-publisher.enabled
20/06/19 13:56:00 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1592470438667_0007
20/06/19 13:56:00 INFO conf.Configuration: resource-types.xml not found
20/06/19 13:56:00 INFO resource.ResourceUtils: Unable to find 'resource-types.xml'.
20/06/19 13:56:00 INFO resource.ResourceUtils: Adding resource type - name = memory-mb, units = Mi, type = COUNTABLE
20/06/19 13:56:00 INFO resource.ResourceUtils: Adding resource type - name = vcores, units = , type = COUNTABLE
20/06/19 13:56:00 INFO impl.YarnClientImpl: Submitted application application_1592470438667_0007
20/06/19 13:56:01 INFO mapreduce.Job: The url to track the job: http://hadoop-test-2:8188/proxy/application_1592470438667_0007/
20/06/19 13:56:01 INFO mapreduce.Job: Running job: job_1592470438667_0007
20/06/19 13:56:08 INFO mapreduce.Job: Job job_1592470438667_0007 running in uber mode : false
20/06/19 13:56:08 INFO mapreduce.Job:  map 0% reduce 0%
20/06/19 13:56:12 INFO mapreduce.Job:  map 100% reduce 0%
20/06/19 13:56:17 INFO mapreduce.Job:  map 100% reduce 100%
20/06/19 13:56:18 INFO mapreduce.Job: Job job_1592470438667_0007 completed successfully
20/06/19 13:56:18 INFO mapreduce.Job: Counters: 49
    File System Counters
        FILE: Number of bytes read=36735
        FILE: Number of bytes written=496235
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
        HDFS: Number of bytes read=106319
        HDFS: Number of bytes written=27714
        HDFS: Number of read operations=6
        HDFS: Number of large read operations=0
        HDFS: Number of write operations=2
    Job Counters 
        Launched map tasks=1
        Launched reduce tasks=1
        Data-local map tasks=1
        Total time spent by all maps in occupied slots (ms)=2442
        Total time spent by all reduces in occupied slots (ms)=2678
        Total time spent by all map tasks (ms)=2442
        Total time spent by all reduce tasks (ms)=2678
        Total vcore-milliseconds taken by all map tasks=2442
        Total vcore-milliseconds taken by all reduce tasks=2678
        Total megabyte-milliseconds taken by all map tasks=2500608
        Total megabyte-milliseconds taken by all reduce tasks=2742272
    Map-Reduce Framework
        Map input records=1975
        Map output records=15433
        Map output bytes=166257
        Map output materialized bytes=36735
        Input split bytes=109
        Combine input records=15433
        Combine output records=2332
        Reduce input groups=2332
        Reduce shuffle bytes=36735
        Reduce input records=2332
        Reduce output records=2332
        Spilled Records=4664
        Shuffled Maps =1
        Failed Shuffles=0
        Merged Map outputs=1
        GC time elapsed (ms)=140
        CPU time spent (ms)=1740
        Physical memory (bytes) snapshot=510885888
        Virtual memory (bytes) snapshot=4263968768
        Total committed heap usage (bytes)=330301440
    Shuffle Errors
        BAD_ID=0
        CONNECTION=0
        IO_ERROR=0
        WRONG_LENGTH=0
        WRONG_MAP=0
        WRONG_REDUCE=0
    File Input Format Counters 
        Bytes Read=106210
    File Output Format Counters 
        Bytes Written=27714

TIM圖片20200619135625word.png

$ hdfs dfs -ls /input
Found 1 items
-rw-r--r--   3 hadoop supergroup     106210 2020-06-19 13:55 /input/LICENSE.txt
$ hdfs dfs -ls /output
Found 2 items
-rw-r--r--   3 hadoop supergroup          0 2020-06-19 13:56 /output/_SUCCESS
-rw-r--r--   3 hadoop supergroup      27714 2020-06-19 13:56 /output/part-r-00000
相關文章
相關標籤/搜索