############################# #ENV #spark01 192.168.51.6 #spark02 192.168.51.18 #spark03 192.168.51.19 #spark04 192.168.51.21 #spark05 192.168.51.24 ############################ ##We must to improve file limits on every nodes echo "ulimit -SHn 204800" >> /etc/rc.local echo "ulimit -SHu 204800" >> /etc/rc.local cat >> /etc/security/limits.conf << EOF * soft nofile 204800 * hard nofile 204800 * soft nproc 204800 * hard nproc 204800 EOF ##We must to disable ipv6 on every nodes echo 'net.ipv6.conf.all.disable_ipv6 = 1'>>/etc/sysctl.conf echo 'net.ipv6.conf.default.disable_ipv6 = 1' >>/etc/sysctl.conf echo 'vm.swappiness = 0' >> /etc/sysctl.conf sysctl -p echo 'echo never > /sys/kernel/mm/transparent_hugepage/defrag' >> /etc/rc.local chmod +x /etc/rc.d/rc.local #1)Edit /etc/hosts file on every nodes cat >/etc/hosts<<EOF 127.0.0.1 localhost 192.168.51.6 spark01 192.168.51.18 spark02 192.168.51.19 spark03 192.168.51.21 spark04 192.168.51.24 spark05 EOF #2)install jdk on every nodes wget http://god.nongdingbang.net/downloads/auto_jdk_1.8.sh sh auto_jdk_1.8.sh source /etc/profile.d/java.sh #3)create hadoop user on every nodes groupadd hadoop -g 700 useradd hadoop -g hadoop -u 700 echo "hadoop123"|passwd --stdin hadoop echo 'hadoop ALL=(ALL) NOPASSWD: ALL' >>/etc/sudoers #4)set permission with opt directory on every nodes chown -R hadoop.hadoop /opt/ #5)Set up key-based (passwordless) login: #just do it no spark01 su - hadoop ssh-keygen ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@spark01 ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@spark02 ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@spark03 ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@spark04 ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@spark05 #6)install hadoop on spark01 and propagate /opt/hadoop2.7.3 to other nodes: cd /home/tools sudo wget http://god.nongdingbang.net/downloads/hadoop-2.7.3.tar.gz sudo tar zxvf hadoop-2.7.3.tar.gz -C /opt/ sudo chown -R hadoop.hadoop /opt/hadoop-2.7.3 scp -r /opt/hadoop-2.7.3 hadoop@spark02:/opt scp -r /opt/hadoop-2.7.3 hadoop@spark03:/opt scp -r /opt/hadoop-2.7.3 hadoop@spark04:/opt scp -r /opt/hadoop-2.7.3 hadoop@spark05:/opt #7)Edit this file on every nodes sudo su - cat >/etc/profile.d/hadoop.sh <<EOF export HADOOP_PREFIX=/opt/hadoop-2.7.3 export HADOOP_HOME=\$HADOOP_PREFIX export HADOOP_COMMON_HOME=\$HADOOP_PREFIX export HADOOP_CONF_DIR=\$HADOOP_PREFIX/etc/hadoop export HADOOP_HDFS_HOME=\$HADOOP_PREFIX export HADOOP_MAPRED_HOME=\$HADOOP_PREFIX export HADOOP_YARN_HOME=\$HADOOP_PREFIX export PATH=\$PATH:\$HADOOP_PREFIX/sbin:\$HADOOP_PREFIX/bin EOF source /etc/profile.d/hadoop.sh #8)Edit /opt/hadoop/etc/hadoop/core-site.xml ##set up NameNode URI on every node: ###################################################### cat >/opt/hadoop-2.7.3/etc/hadoop/core-site.xml<<EOF <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://spark01:9000</value> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/opt/hadoop-2.7.3/tmp/</value> </property> <property> <name>hadoop.proxyuser.hadoop.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.hadoop.groups</name> <value>*</value> </property> </configuration> EOF #9)Create HDFS DataNode data dirs on every node and change ownership mkdir -p /opt/storage/{datanode,namenode} chown -R hadoop.hadoop /opt/storage #10)Edit /opt/hadoop/etc/hadoop/hdfs-site.xml on every nodes– set up DataNodes: ############################################### cat >/opt/hadoop-2.7.3/etc/hadoop/hdfs-site.xml<<EOF <configuration> <property> <name>dfs.replication</name> <value>3</value> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/opt/storage/datanode</value> </property> <property> <name>dfs.namenode.data.dir</name> <value>/opt/storage/namenode</value> </property> <property> <name>dfs.secondary.http.address</name> <value>spark01:50090</value> </property> <property> <name>dfs.namenode.http-address</name> <value>spark01:50070</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> </configuration> EOF #11)Edit /opt/hadoop/etc/hadoop/mapred-site.xml on spark01. ################################################################# cat > /opt/hadoop-2.7.3/etc/hadoop/mapred-site.xml <<EOF <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>spark01:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>spark01:19888</value> </property> </configuration> EOF #12)setup ResourceManager on spark01 and NodeManagers on spark02-05 ######################################################################### cat >/opt/hadoop-2.7.3/etc/hadoop/yarn-site.xml<<EOF <configuration> <property> <name>yarn.resourcemanager.hostname</name> <value>spark01</value> </property> <property> <name>yarn.nodemanager.hostname.nm1</name> <value>spark02</value> </property> <property> <name>yarn.nodemanager.hostname.nm2</name> <value>spark03</value> </property> <property> <name>yarn.nodemanager.hostname.nm3</name> <value>spark04</value> </property> <property> <name>yarn.nodemanager.hostname.nm4</name> <value>spark05</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> </configuration> EOF #13)Edit /opt/hadoop-2.7.3/etc/hadoop/slaves on spark01 ##(so that master may start all necessary services on slaves automagically): ############################################################### cat >/opt/hadoop-2.7.3/etc/hadoop/slaves<<EOF spark02 spark03 spark04 spark05 EOF #14)Format NameNode ##Just do it on spark01 su - hadoop hdfs namenode -format #15)Start HDFS on spark01:(as user hadoop): start-dfs.sh #16)Also try accessing http://spark01:50070/ #Start YARN on spark01:(as user hadoop): start-yarn.sh ################################################################################ #Install Hue ################################################################################# ##17) Install Hue on spark01 ##http://archive.cloudera.com/cdh5/cdh/5/ sudo su - cat > /etc/yum.repos.d/apache-maven.repo <<EOF [apache-maven] name=maven from apache foundation. baseurl=http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-\$releasever/\$basearch/ enabled=1 skip_if_unavailable=1 gpgcheck=0 [apache-maven-source] name=maven from apache foundation. - Source baseurl=http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-\$releasever/SRPMS enabled=0 skip_if_unavailable=1 gpgcheck=0 EOF cd /home/tools yum -y install epel-release apache-maven ant python-simplejson gmp-devel \ libffi-devel asciidoc cyrus-sasl-devel rsync \ cyrus-sasl-gssapi gcc gcc-c++ krb5-devel \ libxml2-devel libxslt-devel make openssl-devel \ openldap-devel python-devel sqlite-devel wget http://god.nongdingbang.net/downloads/hue-latest.tar.gz tar zxvf hue-latest.tar.gz cd /home/tools/hue-3.9.0-cdh5.12.0 && PREFIX=/opt/ make install sed -i '21s/secret_key=/secret_key=jerry2049205020512052,with==nothing/' /opt/hue/desktop/conf/hue.ini sed -i 's#America/Los_Angeles#Asia/Shanghai#' /opt/hue/desktop/conf/hue.ini sed -i '62s/## server_user=hue/server_user=hadoop/' /opt/hue/desktop/conf/hue.ini sed -i '63s/## server_group=hue/server_group=hadoop/' /opt/hue/desktop/conf/hue.ini sed -i '69s/## default_hdfs_superuser=hdfs/default_hdfs_superuser=hadoop/' /opt/hue/desktop/conf/hue.ini sed -i '66s/## default_user=hue/default_user=hadoop/' /opt/hue/desktop/conf/hue.ini sed -i '837s#hdfs://localhost:8020#hdfs://spark01:9000#' /opt/hue/desktop/conf/hue.ini sed -i '845s#http://localhost:50070#http://spark01:50070#' /opt/hue/desktop/conf/hue.ini sed -i '845s/## webhdfs_url/webhdfs_url/' /opt/hue/desktop/conf/hue.ini chown -R hadoop.hadoop /opt/hue/ ##18) Start Hue on spark01 by hadoop user(web_port:8888) su - hadoop nohup /opt/hue/build/env/bin/supervisor &