1、安裝配置java
1.建立hadoop用戶(我是添加到root group,也能夠新增hadoop group)node
[root@hftclclw0001 ~]# useradd hadoop [root@hftclclw0001 ~]# usermod -g root [root@hftclclw0001 ~]# cat /etc/passwd ... ... hadoop:x:50295:0::/home/hadoop:/bin/bash [root@hftclclw0001 ~]# chmod 644 /etc/suders [root@hftclclw0001 ~]# vi 644 /etc/suders ... ... root ALL=(ALL) ALL hadoop ALL=(ALL) ALL ...
2.ssh免密碼登錄web
[hadoop@hftclclw0001 hadoop]$ ssh-keygen -t rsa [hadoop@hftclclw0001 hadoop]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys [hadoop@hftclclw0001 hadoop]$ tree ~/.ssh/ /home/hadoop/.ssh/ ├── authorized_keys ├── id_rsa ├── id_rsa.pub └── known_hosts 0 directories, 4 files 操做其餘各個機器,並複製公鑰(d_rsa.pub)到其餘各個機器的authorized_keys中。我使用的scp,複製到其餘機器,再使用cat追加到authorized_keys文件中
3. 下載hadoop-2.x.y.tar.gzshell
[root@hftclclw0001 hadoop]# pwd /home/hadoop [root@hftclclw0001 hadoop]# tar -zxvf hadoop-2.7.1.tar.gz [root@hftclclw0001 hadoop]# ll total 546584 drwx------ 11 hadoop root 4096 Oct 20 09:05 hadoop-2.7.1 -rw------- 1 hadoop root 210606807 Oct 20 09:00 hadoop-2.7.1.tar.gz drwx------ 13 hadoop root 4096 Oct 20 09:22 spark-1.5.1-bin-hadoop2.6 -rw------- 1 hadoop root 280901736 Oct 20 09:19 spark-1.5.1-bin-hadoop2.6.tgz drwx------ 22 hadoop root 4096 Oct 21 00:07 sqoop-1.99.6-bin-hadoop200 -rw------- 1 hadoop root 68177818 May 5 22:34 sqoop-1.99.6-bin-hadoop200.tar.gz
4.配置hadoop-2.x.ybash
[hadoop@hftclclw0001 hadoop]$ pwd /home/hadoop/hadoop-2.7.1/etc/hadoop [hadoop@hftclclw0001 hadoop]$ vi hadoop-env.sh # The java implementation to use. export JAVA_HOME=/usr/java/latest => 配置java_home [hadoop@hftclclw0001 hadoop]$ vi core-site.xml <configuration> <property> <name>hadoop.tmp.dir</name> <value>/home/hadoop/hadoop-2.7.1/tmp</value> => 需建立,默認在/tmp下 </property> <property> <name>fs.defaultFS</name> <value>hdfs://{master:IP}:9000</value> </property> </configuration> [hadoop@hftclclw0001 hadoop]$ vi hdfs-site.xml <configuration> <property> <name>dfs.http.address</name> <value>{master:ip}:50070</value> </property> <property> <name>dfs.replication</name> <value>2</value> =>我這有3臺機器,2臺datanode 1臺Namenode </property> </configuration> [hadoop@hftclclw0001 hadoop]$ vi mapred-site.xml <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration> [hadoop@hftclclw0001 hadoop]$ vi yarn-env.sh ... export JAVA_HOME=/usr/java/latest ... [hadoop@hftclclw0001 hadoop]$ vi yarn-site.xml <property> <name>yarn.resourcemanager.hostname</name> =>須要配置,在啓動時候nodemanager會訪問resouremanager <value>{master:ip}</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> </configuration> [hadoop@hftclclw0001 hadoop]$ vi masters =>其實做用的secondary namenode在那個節點上 {master:ip} [hadoop@hftclclw0001 hadoop]$ vi slaves =>做用的datanode再那些節點上 {slave-1:ip} {slave-2:ip}
5. 複製到其餘機器ssh
[hadoop@hftclclw0001 ~]$ pwd /home/hadoop [hadoop@hftclclw0001 ~]$ scp -r hadoop-2.7.1 hadoop@{ip}:/home/hadoop
6.啓動oop
[hadoop@hftclclw0001 hadoop-2.7.1]$ ./bin/hadoop namenode -format [hadoop@hftclclw0001 hadoop-2.7.1]$ pwd /home/hadoop/hadoop-2.7.1 [hadoop@hftclclw0001 hadoop-2.7.1]$ ./sbin/start-dfs.sh => 啓動dfs, jps查看進程 master:namenode ,secondary namenode slave:datanode [hadoop@hftclclw0001 hadoop-2.7.1]$ ./sbin/start-yarn.sh =>啓動yarn
7.驗證ui
a.jps => 校驗各個進程spa
b.netstat => 校驗端口.net
c.webui => 能夠校驗cluster總體情況
d.也能夠操做hdfs、或是submit mr job
[hadoop@hftclclw0001 hadoop-2.7.1]$ pwd /home/hadoop/hadoop-2.7.1 [hadoop@hftclclw0001 hadoop-2.7.1]$ ./bin/hdfs dfs -ls / ... ... [hadoop@hftclclw0001 hadoop-2.7.1]$ ./bin/hdfs dfs -mkdir /test ... ... [hadoop@hftclclw0001 hadoop-2.7.1]$ ./bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount {in} {out} [hadoop@hftclclw0001 hadoop-2.7.1]$ ./bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar pi 10 10
2、trouble shooting
文件寫入權限問題
當外部程序寫入hdfs時,默認都要進行用戶認證。如按照上述配置,只能hadoop帳戶能夠寫hdfs
dfs.premissions.enabled=true 即對用戶進行認證。修改成false
dfs.datanode.data.dir.perm=700 即本地目錄的寫入權限。修改成755