HADOOP-Yarn資源池

$ vim /data/hadoop/etc/hadoop/yarn-site.xml
<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>
        <property>
            <name>yarn.acl.enable</name>
            <value>true</value>
        </property>
        <property>
            <name>yarn.admin.acl</name>
            <value>*</value>
        </property>

       <!--日誌聚合功能--> 
        <property>
            <name>yarn.log-aggregation-enable</name>
            <value>true</value>
        </property>
       <!--在HDFS上聚合的日誌最長保留多少秒。3天--> 
        <property>
            <name>yarn.log-aggregation.retain-seconds</name>
            <value>259200</value>
        </property>
        <property>
            <name>yarn.resourcemanager.cluster-id</name>
            <value>hadoop-test</value>
        </property>
       <!--rm失聯後從新連接的時間--> 
        <property> 
            <name>yarn.resourcemanager.connect.retry-interval.ms</name> 
            <value>2000</value> 
        </property>
        <!-- 爲了可以運行MapReduce程序,須要讓各個NodeManager在啓動時加載shuffle server,shuffle server其實是Jetty/Netty Server,Reduce Task經過該server從各個NodeManager上遠程拷貝Map Task產生的中間結果。下面增長的兩個配置均用於指定shuffle serve。 -->
        <property>  
            <name>yarn.nodemanager.aux-services</name>  
            <value>mapreduce_shuffle</value>  
        </property>
        <!--啓用resourcemanager ha-->  
        <!--是否開啓RM ha,默認是開啓的-->  
        <property>  
           <name>yarn.resourcemanager.ha.enabled</name>  
           <value>true</value>  
        </property>  
        <property>
           <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
           <value>true</value>
        </property>
        <!--指定rm的名字-->
        <property>  
           <name>yarn.resourcemanager.ha.rm-ids</name>  
           <value>rm1,rm2</value>  
        </property>
        <!--Ha功能,須要一組zk地址,用逗號分隔。被ZKFailoverController使用於自動失效備援failover。 --> 
        <property>
          <name>ha.zookeeper.quorum</name>
          <value>192.168.233.17:2181,192.168.233.238:2181,192.168.233.157:2181</value> 
        </property>
        <!--開啓故障自動切換--> 
         <property> 
            <name>yarn.resourcemanager.ha.automatic-failover.enabled</name> 
            <value>true</value> 
         </property> 
        <!--指定rm的地址-->
        <property>  
           <name>yarn.resourcemanager.hostname.rm1</name>  
           <value>192.168.233.65</value>  
        </property>  
        <property>  
           <name>yarn.resourcemanager.hostname.rm2</name>  
           <value>192.168.233.94</value>  
        </property>  
        <!--使用ZK集羣保存狀態信息,指定zookeeper隊列 -->
        <property>  
           <name>yarn.resourcemanager.zk-address</name>  
            <value>192.168.233.17:2181,192.168.233.238:2181,192.168.233.157:2181</value>  
        </property>  
        <!--啓用自動恢復,當任務進行一半,rm壞掉,就要啓動自動恢復,默認是false-->   
        <property>  
           <name>yarn.resourcemanager.recovery.enabled</name>  
           <value>true</value>  
        </property>  
       <!--配置與zookeeper的鏈接地址,被RM用於狀態存儲的ZooKeeper服務器的主機:端口號,多個ZooKeeper的話使用逗號分隔。 --> 
        <property> 
          <name>yarn.resourcemanager.zk-state-store.address</name> 
          <value>192.168.233.17:2181,192.168.233.238:2181,192.168.233.157:2181</value>
        </property>  
        <!--指定resourcemanager的狀態信息存儲在zookeeper集羣,默認是存放在FileSystem裏面。-->   
        <property>  
           <name>yarn.resourcemanager.store.class</name>  
           <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>  
        </property> 
        <!--schelduler失聯等待鏈接時間--> 
        <property> 
           <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name> 
           <value>5000</value> 
        </property> 
        <!--配置rm1--> 
         <!-- 客戶端經過該地址向RM提交對應用程序操做 -->
         <property> 
            <name>yarn.resourcemanager.address.rm1</name> 
            <value>192.168.233.65:8132</value> 
         </property> 
         <!--ResourceManager 對ApplicationMaster暴露的訪問地址。ApplicationMaster經過該地址向RM申請資源、釋放資源等。 -->
         <property> 
            <name>yarn.resourcemanager.scheduler.address.rm1</name> 
            <value>192.168.233.65:8130</value> 
         </property> 
         <!-- RM HTTP訪問地址,查看集羣信息-->
         <property> 
            <name>yarn.resourcemanager.webapp.address.rm1</name> 
            <value>192.168.233.65:8188</value> 
         </property> 
         <!-- NodeManager經過該地址交換信息 -->
         <property>
            <name>yarn.resourcemanager.resource-tracker.address.rm1</name> 
            <value>192.168.233.65:8131</value> 
         </property> 
         <!--管理員經過該地址向RM發送管理命令 -->
         <property> 
            <name>yarn.resourcemanager.admin.address.rm1</name> 
            <value>192.168.233.65:8033</value> 
         </property> 
         <property> 
            <name>yarn.resourcemanager.ha.admin.address.rm1</name> 
            <value>192.168.233.65:23142</value> 
         </property> 
         
        <!--配置rm2--> 
         <property> 
            <name>yarn.resourcemanager.address.rm2</name> 
            <value>192.168.233.94:8132</value> 
         </property> 
         <property> 
            <name>yarn.resourcemanager.scheduler.address.rm2</name> 
            <value>192.168.233.94:8130</value> 
         </property> 
         <property> 
            <name>yarn.resourcemanager.webapp.address.rm2</name> 
            <value>192.168.233.94:8188</value> 
         </property> 
         <property>
            <name>yarn.resourcemanager.resource-tracker.address.rm2</name> 
            <value>192.168.233.94:8131</value> 
         </property> 
         <property> 
            <name>yarn.resourcemanager.admin.address.rm2</name> 
            <value>192.168.233.94:8033</value> 
         </property> 
         <property> 
            <name>yarn.resourcemanager.ha.admin.address.rm2</name> 
            <value>192.168.233.94:23142</value> 
         </property> 
         <property>
           <name>yarn.log-aggregation-enable</name>
           <value>true</value>
     </property>                                                                

         <!--資源配置-->
         <property>
             <name>yarn.scheduler.fair.preemption</name>
             <value>true</value>
             <description>開啓資源搶佔,default is True</description>
         </property>
         <!--當應用程序未指定隊列名時,是否指定用戶名做爲應用程序所在的隊列名。若是設置爲false或者未設置,全部未知隊列的應用程序將被提交到default隊列中,默認值爲true。-->
         <property>
            <name>yarn.scheduler.fair.user-as-default-queue</name>
            <value>true</value>
            <description>default is True</description>
         </property>
         <!--是否容許建立未定義的資源池。若是設置成true,yarn將會自動建立任務中指定的未定義過的資源池。設置成false以後,任務中指定的未定義的資源池將無效,該任務會被分配到default資源池中。,default is True-->
         <property>
            <name>yarn.scheduler.fair.allow-undeclared-pools</name>
            <value>false</value>
         </property>
         <!-- 單個任務container可申請的最少物理內存量,默認是1024(MB),若是一個任務申請的物理內存量少於該值,則該對應的值改成這個數 -->
         <property>
            <name>yarn.scheduler.minimum-allocation-mb</name>
            <value>512</value>
         </property>
         <!-- 單個任務container可申請的最多物理內存量,默認是8192(MB)。默認狀況下,YARN採用了線程監控的方法判斷任務是否超量使用內存,一旦發現超量,則直接將其殺死。因爲Cgroup對內存的控制缺少靈活性(即任務任什麼時候刻不能超過內存上限,若是超過,則直接將其殺死或者報OOM),而Java進程在建立瞬間內存將翻倍,以後驟降到正常值,這種狀況下,採用線程監控的方式更加靈活(當發現進程樹內存瞬間翻倍超過設定值時,可認爲是正常現象,不會將任務殺死),所以YARN未提供Cgroups內存隔離機制 -->
         <property>
            <name>yarn.scheduler.maximum-allocation-mb</name>
            <value>4096</value>
         </property>
         <property>
            <name>yarn.scheduler.minimum-allocation-vcores</name>
         <value>1</value>
         </property>
         <property>
            <name>yarn.scheduler.maximum-allocation-vcores</name>
            <value>4</value>
         </property>
         <property>
            <name>yarn.scheduler.increment-allocation-vcores</name>
            <value>1</value>
         </property>
         <property>
            <name>yarn.scheduler.increment-allocation-mb</name>
            <value>512</value>
         </property>
         <!--yarn提交應用時,爲單獨一個應用設置最大重試次數-->
         <property>
            <name>yarn.resourcemanager.am.max-attempts</name>
            <value>2</value>
         </property>
         <property>
            <name>yarn.resourcemanager.container.liveness-monitor.interval-ms</name>
            <value>600000</value>
         </property>
         <property>
            <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name>
            <value>1000</value>
         </property>
         <property>
            <name>yarn.nm.liveness-monitor.expiry-interval-ms</name>
            <value>600000</value>
         </property>
         <property>
            <name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
            <value>50</value>
         </property>
         <!-- 表示該節點上YARN可以使用的物理內存總量,默認是8192(MB),注意,若是節點內存資源不夠8GB,則須要調減少這個值。-->
         <property>
           <name>yarn.nodemanager.resource.memory-mb</name>
           <value>6000</value>
           <discription>每一個節點可用內存,單位MB</discription>
         </property>
         <!-- 表示該節點上YARN可以使用的虛擬CPU個數,默認是8,注意,目前推薦將該值設值爲與物理CPU核數數目相同。若是節點CPU核數不夠8個,則須要調減少這個值。-->
         <property>
           <name>yarn.nodemanager.resource.cpu-vcores</name>
           <value>2</value>
         </property>
         <property>
            <name>yarn.nodemanager.pmem-check-enabled</name>
            <value>false</value>
         </property>
         <property>
            <name>yarn.nodemanager.vmem-check-enabled</name>
            <value>false</value>
         </property>
         <property>
            <name>yarn.resourcemanager.scheduler.class</name>
            <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
         </property>
         <!--RM中保留的最大的已完成的任務信息數量-->
         <property>
           <name>yarn.resourcemanager.max-completed-applications</name>
           <value>10000</value>
         </property>
         <!--故障處理類,以輪訓方式尋找活動的RM所使用的類-->
         <property> 
            <name>yarn.client.failover-proxy-provider</name> 
            <value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value>
         </property> 
         
         <property>
            <name>yarn.resourcemanager.ha.automatic-failover.zk-base-path</name>
            <value>/yarn-leader-election</value>
        </property>
</configuration>
$ vim /data/hadoop/etc/hadoop/fair-scheduler.xml
$ cat /data/hadoop/etc/hadoop/fair-scheduler.xml
<?xml version="1.0"?>
<allocations>
<userMaxAppsDefault>30</userMaxAppsDefault>
<queue name="root">
<!--最小資源-->
<minResources>5120mb,5vcores</minResources>
<!--最大資源-->
<maxResources>29000mb,10vcores</maxResources>
<maxRunningApps>100</maxRunningApps>
<weight>1.0</weight>
<schedulingMode>DRF</schedulingMode>
<!--容許提交任務的用戶名和組-->
<aclSubmitApps> </aclSubmitApps>
<!--容許管理任務的用戶名和組-->
<aclAdministerApps> </aclAdministerApps>
  <queue name="users" type="parent">
    <minResources>10000mb,2vcores</minResources>
    <maxResources>15000mb,6vcores</maxResources>
    <maxRunningApps>50</maxRunningApps>
    <weight>3</weight>
    <schedulingPolicy>fair</schedulingPolicy>
    <aclSubmitApps>hadoop,hdfs</aclSubmitApps>
    <aclAdministerApps>hadoop</aclAdministerApps>
  </queue>
  <queue name="default" type="parent">
    <minResources>1000mb,1vcores</minResources>
    <maxResources>2000mb,2vcores</maxResources>
    <maxRunningApps>50</maxRunningApps>
    <weight>3</weight>
    <schedulingPolicy>fair</schedulingPolicy>
    <aclSubmitApps>hadoop</aclSubmitApps>
    <aclAdministerApps>hadoop</aclAdministerApps>
  </queue>
  <queue name="prod">
    <minResources>1000mb,1vcores</minResources>
    <maxResources>10000mb,4vcores</maxResources>
    <maxRunningApps>50</maxRunningApps>
    <weight>3</weight>
    <schedulingPolicy>fair</schedulingPolicy>
    <aclSubmitApps>hadoop,hdfs</aclSubmitApps>
    <aclAdministerApps>hadoop</aclAdministerApps>
  </queue>

  <queueMaxResourcesDefault>20000mb,16vcores</queueMaxResourcesDefault>
</queue>

  <queuePlacementPolicy>
    <rule name="specified" />
    <rule name="primaryGroup" create="false" />
    <rule name="nestedUserQueue">
        <rule name="secondaryGroupExistingQueue" create="false" />
    </rule>
    <rule name="default"   queue="users"/>
  </queuePlacementPolicy>
</allocations>

測試prod資源池node

$ spark-shell --master yarn --master yarn --queue prod --executor-memory 1000m --total-executor-cores 1
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context Web UI available at http://hadoop-test-1:4040
Spark context available as 'sc' (master = yarn, app id = application_1592814747219_0002).
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 2.4.6
      /_/
         
Using Scala version 2.11.12 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_231)
Type in expressions to have them evaluated.
Type :help for more information.

scala>

測試users父級資源池web

$ spark-shell --master yarn --master yarn --queue root.users.hadoop --executor-memory 3000m --total-executor-cores 3
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context Web UI available at http://hadoop-test-2:4040
Spark context available as 'sc' (master = yarn, app id = application_1592814747219_0003).
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 2.4.6
      /_/
         
Using Scala version 2.11.12 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_231)
Type in expressions to have them evaluated.
Type :help for more information.

scala>

image.png

相關文章
相關標籤/搜索