hadoop2.6分佈式環境搭建

時間 2019-11-25

標籤 hadoop2.6 hadoop 分佈式環境搭建欄目 Hadoop 简体版

原文原文鏈接

1. 虛機的環境準備不詳細列出，保證個虛機之間能夠ping通，以及無密碼登陸。java

環境準備階段。node

192.168.137.117	master,slave1	zookeeper	hadoop
192.168.137.118	slave2	zookeeper	hadoop
192.168.137.119	slave3	zookeeper	hadoop

下載hadoop，http://apache.fayea.com/hadoop/common/stable/hadoop-2.6.0.tar.gz web

安裝jdk，jdk 1.7.0_71apache

2. 切換到hadoop用戶，解壓hadoop-2.6.0.tar.gz。bash

cd /data0/
tar zxvf hadoop-2.6.0.tar.gz

配置環境變量app

vi ~/.bashrc
export JAVA_HOME=/usr/java/jdk1.7.0_71
export HADOOP_HOME=/data0/hadoop-2.6.0
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

source ~/.bashrc

3. 配置分佈式環境，hadoop的配置文件主要有core-site.xml，mapred-site.xml, hdfs-site.xml 、 yarn-site.xml文件。webapp

core-site.xml分佈式

<configuration>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/data0/hadoop/tmp</value>
        <description>Abase for other temporary directories.</description>
    </property>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://master:9000</value>
    </property>
    <property>
        <name>io.file.buffer.size</name>
        <value>4096</value>
    </property>
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>master:2181,slave2:2181,slave3:2181</value>
    </property>
</configuration>

mapred-site.xmloop

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapreduce.jobtracker.http.address</name>
        <value>master:50030</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>master:10020</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>master:19888</value>
    </property>
    <property>
        <name>mapreduce.job.queuename</name>
        <value>hadoop</value>
    </property>

</configuration>

hdfs-site.xmlspa

<configuration>
    <property>
        <name>dfs.nameservices</name>
        <value>ns1</value>
    </property>
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>master:50090</value>
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/data0/hadoop/dfs/name</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/data0/hadoop/dfs/data</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.permissions</name>
        <value>false</value>
    </property>
</configuration>

yarn-site.xml

<configuration>

<!-- Site specific YARN configuration properties -->
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>master:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>master:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>master:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address</name>
        <value>master:8033</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>master:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.allocation.file</name>
        <value>/data0/hadoop-2.6.0/etc/hadoop/fair-scheduler.xml</value>
    </property>
    <property>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>4096</value>
    </property>
    <property>
        <name>yarn.nodemanager.resource.cpu-vcores</name>
        <value>4</value>
    </property>
</configuration>

配置公平調度器

fair-scheduler.xml

<allocations>
  <queue name="default">
        <minResources>0 mb,0vcores</minResources>
        <maxResources>16384 mb,16vcores</maxResources>
        <maxRunningApps>5</maxRunningApps>
        <weight>1.0</weight>
    </queue>
    <user name="default">
        <maxRunningApps>5</maxRunningApps>
    </user>
  <queue name="hadoop">
    <minResources>1024 mb, 4 vcores</minResources>
    <maxResources>4096 mb, 8 vcores</maxResources>
    <maxRunningApps>200</maxRunningApps>
    <minSharePreemptionTimeout>300</minSharePreemptionTimeout>
    <weight>1.0</weight>
    <!--<schedulingPolicy>fifo</schedulingPolicy>-->
  </queue>
  <user name="hadoop">
    <maxRunningApps>400</maxRunningApps>
  </user>
  <queue name="hive">
    <minResources>1024 mb, 1 vcores</minResources>
    <maxResources>8092 mb, 8 vcores</maxResources>
    <maxRunningApps>200</maxRunningApps>
    <minSharePreemptionTimeout>300</minSharePreemptionTimeout>
    <weight>1.0</weight>
    <!--<schedulingPolicy>fifo</schedulingPolicy>-->
  </queue>
  <user name="hadoop">
    <maxRunningApps>400</maxRunningApps>
  </user>
  <userMaxAppsDefault>40</userMaxAppsDefault>
  <fairSharePreemptionTimeout>6000</fairSharePreemptionTimeout>
</allocations>

hadoop-env.sh ，yarn-env.sh 導入JAVA_HOME

export JAVA_HOME=/usr/java/jdk1.7.0_71

vi masers

master

vi slaves

slave1
slave2
slave3

將配置好的環境打包

tar zcvf hadoop-2.6.0.tar.gz hadoop-2.6.0
scp  hadoop-2.6.0.tar.gz hadoop@slave2:/data0
scp  hadoop-2.6.0.tar.gz hadoop@slave3:/data0

分別登陸到另外兩臺機器，解壓hadoop包，並配置環境變量。

最後，格式化hdfs，啓動hadoop集羣,在master節點上執行以下命令，

hadoop namenode -format
cd $HADOOP_HOME/sbin
./start-all.sh

jps查看後臺進程
25337 QuorumPeerMain
1075 HMaster
1694 Jps
25619 DataNode
25494 NameNode
25973 NodeManager
25839 ResourceManager
1470 Main

web查看hdfs，http://192.168.137.117:50070/