Hadoop生產環境配置文件

前提:java

①已經搭建好zknode

②已經安裝好JDKweb

正文開始:express

首先從官網下載hadoop 2.7.3 (雖然官網3.0都出了。可是目前還沒通過徹底的測試。。待測試後。。。)apache

1、hadoop-env.sh(環境變量相關)bootstrap

 

export JAVA_HOME=/app/jdk/jdk1.8.0_92
export HOME=/app/hadoop
export HADOOP_HOME=$HOME
export HADOOP_COMMON_HOME=$HOME
export HADOOP_MAPRED_HOME=$HOME
export HADOOP_HDFS_HOME=$HOME
export YARN_HOME=$HOME
export CLASSPATH=.:$HADOOP_HOME/lib:$SQOOP_HOME/lib:$HIVE_HOME/lib:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$SQOOP_HOME/bin:$JAVA_HOME/jre/bin:$PATH:$HOME/bin
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_PID_DIR=/app/hadoop/tmp
export YARN_PID_DIR=/app/hadoop/tmp
export HADOOP_LOG_DIR="/log/hadoop"
export YARN_LOG_DIR=/log/yarnbash

#export HADOOP_HEAPSIZE=4096
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}app

export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC"
export HADOOP_NAMENODE_OPTS="-Xmx80G -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled -XX:+PrintTenuringDistribution"
export HADOOP_DATANODE_OPTS="-Xmx6G -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=80 -XX:+CMSParallelRemarkEnabled -XX:+PrintTenuringDistribution"
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}less

# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
donedom

# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""

# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"

# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"

export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"

export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"

# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"

# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}

# Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER

# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}

###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""

###
# Advanced Users Only!
###

# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}

# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER

 

 

2、core-site.xml

 

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://bdp-core</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/app/hadoop/tmp/</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>CNSZ17PL1523:2181,CNSZ17PL1524:2181,CNSZ17PL1525:2181,CNSZ17PL1526:2181,CNSZ17PL1527:2181,CNSZ17PL1528:2181,CNSZ17PL1529:2181</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.Lz4Codec</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>2880</value>
</property>
<property>
<name>net.topology.script.file.name</name>
<value>/app/hadoop/etc/hadoop/rack.sh</value>
</property>
</configuration>

 

 

 

3、hdfs-site.xml

 

 

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.nameservices</name>
<value>bdp-core</value>
</property>
<property>
<name>dfs.ha.namenodes.bdp-core</name>
<value>nn1,nn2</value>
</property>
<!--NameNode1 的地址-->
<property>
<name>dfs.namenode.rpc-address.bdp-core.nn1</name>
<value>CNSZ17PL1523:8020</value>
</property>
<!--NameNode2 的地址-->
<property>
<name>dfs.namenode.rpc-address.bdp-core.nn2</name>
<value>CNSZ17PL1524:8020</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/dfs/nn/local</value>
</property>
<property>
<name>dfs.namenode.http-address.bdp-core.nn1</name>
<value>CNSZ17PL1523:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.bdp-core.nn2</name>
<value>CNSZ17PL1524:50070</value>
</property>
<!--journal node的地址-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://CNSZ17PL1523:8485;CNSZ17PL1524:8485;CNSZ17PL1525:8485;CNSZ17PL1526:8485;CNSZ17PL1527:8485;CNSZ17PL1528:8485;CNSZ17PL1529:8485/bdp-core</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/dfs/jn</value>
</property>
<property>
<name>dfs.qjournal.start-segment.timeout.ms</name>
<value>60000</value>
</property>
<property>
<name>dfs.qjournal.prepare-recovery.timeout.ms</name>
<value>240000</value>
</property>
<property>
<name>dfs.qjournal.accept-recovery.timeout.ms</name>
<value>240000</value>
</property>
<property>
<name>dfs.qjournal.finalize-segment.timeout.ms</name>
<value>240000</value>
</property>
<property>
<name>dfs.qjournal.select-input-streams.timeout.ms</name>
<value>60000</value>
</property>
<property>
<name>dfs.qjournal.get-journal-state.timeout.ms</name>
<value>240000</value>
</property>
<property>
<name>dfs.qjournal.new-epoch.timeout.ms</name>
<value>240000</value>
</property>
<property>
<name>dfs.qjournal.write-txns.timeout.ms</name>
<value>60000</value>
</property>

<property>
<name>dfs.client.failover.proxy.provider.bdp-core</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.namenode.acls.enabled</name>
<value>true</value>
<description>Number of replication for each chunk.</description>
</property>
<!--須要根據實際配置進行修改-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/var/lib/hadoop-hdfs/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/HDATA/12/dfs/local,/HDATA/11/dfs/local,/HDATA/10/dfs/local,/HDATA/9/dfs/local,/HDATA/8/dfs/local,/HDATA/7/dfs/local,/HDATA/6/dfs/local,/HDATA/5/dfs/local,/HDATA/4/dfs/local,/HDATA/3/dfs/local,/HDATA/2/dfs/local,/HDATA/1/dfs/local</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/app/hadoop/etc/hadoop/exclude.list</value>
<description> List of nodes to decommission </description>
</property>
<property>
<name>dfs.datanode.fsdataset.volume.choosing.policy</name>
<value>org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy</value>
</property>
<property>
<name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name>
<value>10737418240</value>
</property>
<property>
<name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name>
<value>0.75</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.streams.cache.size</name>
<value>1000</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.streams.cache.expiry.ms</name>
<value>10000</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/app/var/run/hadoop-hdfs/dn._PORT</value>
</property>
<property>
<name>dfs.block.size</name>
<value>134217728</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>300</value>
</property>
<property>
<name>dfs.datanode.handler.count</name>
<value>40</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>

 

 

4、yarn-site.xml

 

<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<description>List of directories to store localized files in.</description>
<name>yarn.nodemanager.local-dirs</name>
<value>file:///data/yarn/local</value>
</property>
<property>
<description>Where to store container logs.</description>
<name>yarn.nodemanager.log-dirs</name>
<value>file:///data/yarn/log</value>
</property>
<property>
<description>Where to aggregate logs to.</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>hdfs://bdp-core/var/log/hadoop-yarn/apps</value>
</property>
<property>
<description>Classpath for typical applications.</description>
<name>yarn.application.classpath</name>
<value>
$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/common/*,
$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/hdfs/*,
$HADOOP_COMMON_HOME/share/hadoop/hdfs/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/*,
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/yarn/*,
$HADOOP_COMMON_HOME/share/hadoop/yarn/lib/*
</value>
</property>
<!-- resourcemanager config -->
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-rm-cluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>CNSZ17PL1523</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>CNSZ17PL1524</value>
</property>
<!-- fair scheduler -->
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.allocation.file</name>
<value>/app/hadoop/etc/hadoop/fair-scheduler.xml</value>
</property>
<property>
<name>yarn.scheduler.fair.user-as-default-queue</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
<value>5000</value>
</property>
<property>
<name>yarn.resourcemanager.nodes.exclude-path</name>
<value>/app/hadoop/etc/hadoop/yarn.exclude</value>
<final>true</final>
</property>
<!-- ZKRMStateStore config -->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>CNSZ17PL1523:2181,CNSZ17PL1524:2181,CNSZ17PL1525:2181,CNSZ17PL1526:2181,CNSZ17PL1527:2181,CNSZ17PL1528:2181,CNSZ17PL1529:2181</value>
</property>
<!--
<property>
<name>yarn.resourcemanager.zk.state-store.address</name>
<value>cnsz23pl0073:2181,cnsz23pl0069:2181,cnsz23pl0070:2181,cnsz23pl0071:2181,cnsz23pl0072:2181</value>
</property>
-->
<!-- applications manager interface -->
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>CNSZ17PL1523:23140</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>CNSZ17PL1524:23140</value>
</property>
<!-- scheduler interface -->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>CNSZ17PL1523:23130</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>CNSZ17PL1524:23130</value>
</property>
<!-- RM admin interface -->
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>CNSZ17PL1523:23141</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>CNSZ17PL1524:23141</value>
</property>
<!-- RM resource-tracker interface -->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>CNSZ17PL1523:23125</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>CNSZ17PL1524:23125</value>
</property>
<!-- RM web application interface -->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>CNSZ17PL1523:23188</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>CNSZ17PL1524:23188</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm1</name>
<value>CNSZ17PL1523:23189</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm2</name>
<value>CNSZ17PL1524:23189</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://CNSZ17PL1525:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>CNSZ17PL1525:54315</value>
</property>
<!-- Node Manager Configs -->
<property>
<description>Address where the localizer IPC is.</description>
<name>yarn.nodemanager.localizer.address</name>
<value>0.0.0.0:23344</value>
</property>
<property>
<description>NM Webapp address.</description>
<name>yarn.nodemanager.webapp.address</name>
<value>0.0.0.0:23999</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>file:///HDATA/11/yarn/local,file:///HDATA/10/yarn/local,file:///HDATA/9/yarn/local,file:///HDATA/8/yarn/local,file:///HDATA/7/yarn/local,file:///HDATA/6/yarn/local,file:///HDATA/5/yarn/local,file:///HDATA/4/yarn/local,file:///HDATA/3/yarn/local,file:///HDATA/2/yarn/local,file:///HDATA/1/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>file:///HDATA/11/yarn/logs,file:///HDATA/10/yarn/logs,file:///HDATA/9/yarn/logs,file:///HDATA/8/yarn/logs,file:///HDATA/7/yarn/logs,file:///HDATA/6/yarn/logs,file:///HDATA/5/yarn/logs,file:///HDATA/4/yarn/logs,file:///HDATA/3/yarn/logs,file:///HDATA/2/yarn/logs,file:///HDATA/1/yarn/logs</value>
</property>
<property>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>1200</value>
</property>
<property>
<name>mapreduce.shuffle.port</name>
<value>23080</value>
</property>
<property>
<name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
<value>true</value>
</property>
<!-- tuning -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>120000</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>16</value>
</property>
<!-- tuning yarn container -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>4096</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>16384</value>
</property>
<property>
<name>yarn.scheduler.increment-allocation-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.fair.allow-undeclared-pools</name>
<value>false</value>
</property>
<property>
<name>yarn.scheduler.fair.allow-undeclared-pools</name>
<value>false</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>1209600</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>CNSZ17PL1525:54315</value>
</property>
</configuration>

 

 

5、mapred-site.xml

 

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>CNSZ17PL1525:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>CNSZ17PL1525:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user</value>
</property>

<!-- tuning mapreduce -->
<property>
<name>mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx4096m -Dfile.encoding=UTF-8</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>13312</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx10649m -Dfile.encoding=UTF-8</value>
</property>
<property>
<name>mapreduce.map.cpu.vcores</name>
<value>1</value>
</property>
<property>
<name>mapreduce.reduce.cpu.vcores</name>
<value>2</value>
</property>
<property>
<name>mapreduce.jobhistory.max-age-ms</name>
<value>1296000000</value>
<source>mapred-default.xml</source>
</property>
<property>
<name>mapreduce.jobhistory.joblist.cache.size</name>
<value>200000</value>
<source>mapred-default.xml</source>
</property>
</configuration>

 

 

5個配置文件配完就ok了。其中的參數意思會有專門帖子講,如今分發到每臺機器上,執行腳本新建運行的時候的data  log等須要的目錄,   dir.sh  (每臺機器都執行。改爲上述配置文件的hadoop2.7.3安裝包也都每臺機器都分發。每臺機器的環境變量都須要增長。)

 

環境變量相似:

export ZOOKEEPER_HOME=/app/zookeeper
export PATH=$PATH:$ZOOKEEPER_HOME/bin
export JAVA_HOME=/app/jdk
export JRE_HOME=/app/jdk/jre
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/app/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_HOME=$HADOOP_HOME

 

 

 

 

(dir,sh 腳本)

#!/bin/bash
groupadd hadoop
groupadd hdfs
useradd -g hdfs -G hadoop hdfs

echo "sf#123456" | passwd --stdin hdfs

for i in 1 2 3 4 5 6 7 8 9 10 11
do
datadir=/HDATA/$i/dfs/local
mrdir=/HDATA/$i/mapred/local
yarndir=/HDATA/$i/yarn/local
yarnlog=/HDATA/$i/yarn/logs
mkdir -p $datadir
mkdir -p $mrdir
mkdir -p $yarndir
mkdir -p $yarnlog
echo "$datadir $mrdir $yarndir $yarnlog make over and chown hdfs:hadoop"
chown hdfs:hadoop -R $datadir $mrdir

chown yarn:yarn -R  $yarndir $yarnlog

done

#log
mkdir -p /data/dfs/nn/local
chown hdfs:hadoop /data/dfs/nn/local
mkdir -p /log/hadoop /log/yarn /log/yarn-log /log/balant /log/hadoop-datanode-log/ /app/hadoop/tmp /app/var/run/hadoop-hdfs
chown hdfs:hadoop /log/hadoop /log/yarn /log/yarn-log /log/balant /log/hadoop-datanode-log/ /app/hadoop/tmp /app/var/run/hadoop-hdfs

 

 

最後將hadoop的應用目錄賦給hdfs:hadoop

 

 

而後啓動過程:

1. 啓動 ZooKeeper 集羣
在集羣中安裝 ZooKeeper 的主機上啓動 ZooKeeper 服務。在本教程中也就是在 slave5一、slave5二、slave53 的主機上啓動相應進程。分別登錄到三臺機子上執行:
zookeeper的啓動在每臺zookeeper節點執行這句命令

zkServer.sh start


2. 格式化 ZooKeeper 集羣
在任意的 namenode 上均可以執行,筆者仍是選擇了 master1 主機執行格式化命令(namenode1上執行)

hdfs zkfc -formatZK


3. 啓動 JournalNode 集羣
分別在 slave一、slave二、slave3 上執行如下命令(全部的journal節點)

hadoop-daemon.sh start journalnode


4. 格式化集羣的 NameNode

在 master1 的主機上執行如下命令,以格式化 namenode:(namenode1節點執行)

hdfs namenode -format


5. 啓動剛格式化的 NameNode
剛在 master1 上格式化了 namenode ,故就在 master1上執行(namenode1節點執行)

hadoop-daemon.sh start namenode


6. 同步 NameNode1 元數據到 NameNode2 上
複製你 NameNode 上的元數據目錄到另外一個 NameNode,也就是此處的 master5 複製元數據到 master52 上。在 master52 上執行如下命令:(namenode2節點執行)

hdfs namenode -bootstrapStandby


7. 啓動 NameNode2
master2 主機拷貝了元數據以後,就接着啓動 namenode 進程了,執行(namenode2節點執行)

hadoop-daemon.sh start namenode


8. 啓動集羣中全部的DataNode(全部datanode節點執行)


hadoop-daemon.sh start datanode

 


9. 啓動 ZKFC

在 master1 和 master2 的主機上分別執行以下命令:((namenode1節點執行)&&(namenode2節點執行))

hadoop-daemon.sh start zkfc


10. 開啓歷史日誌服務

在 master1和 master2 的主機上執行((namenode1節點執行)&&(namenode2節點執行))

mr-jobhistory-daemon.sh start historyserver


11. 在 RM1 啓動 YARN

在 master1的主機上執行如下命令:((namenode1節點執行))

yarn-daemon.sh start resourcemanager

12. 在 RM2 單獨啓動 YARN

雖然上一步啓動了 YARN ,可是在 master2 上是沒有相應的 ResourceManager 進程,故須要在 master2 主機上單獨啓動:(namenode2節點執行)

yarn-daemon.sh start resourcemanager

13.啓動全部datanode 的 nodemanager(全部datanode節點)

yarn-daemon.sh start nodemanager

相關文章
相關標籤/搜索