基於環境:centos7.6.1810,jdk 1.8.0_77,4G的系統內存,建議安裝CentOS7-Minimal版本java
hadoop-3.0.0安裝node
vi /etc/hosts
192.168.118.129 masterweb
vi /etc/hostname
mastershell
vi /etc/security/limits.confapache
* soft nproc 4096
* hard nproc 4096
* soft nofile 65536
* hard nofile 65536
* soft memlock unlimited
* hard memlock unlimitedcentos
vi /etc/security/limits.d/20-nproc.confbash
* soft nproc 65536
root soft nproc unlimitedapp
su es
vi ~/.bashrc
export HADOOP_HOME=/home/es/hadoop-3.0.0
export HBASE_HOME=/home/es/hbase-2.1.0
export JAVA_HOME=/usr/local/jdkwebapp
cd /home/es
wget http://archive.apache.org/dist/hadoop/common/hadoop-3.0.0/hadoop-3.0.0.tar.gztcp
mkdir -p /home/es/data/hadoop/dfs/{namenode,datanode,tmp}
mkdir -p /home/es/data/hadoop/tmp
tar -zxf hadoop-3.0.0.tar.gz
cd hadoop-3.0.0
vi etc/hadoop/mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>master:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>master:19888</value> </property> <property> <name>mapreduce.jobhistory.done-dir</name> <value>/history/done</value> </property> <property> <name>mapreduce.jobhistory.intermediate-done-dir</name> <value>/history/done_intermediate</value> </property> <property> <name>mapreduce.input.fileinputformat.split.minsize</name> <value>5120</value> <description>5M</description> </property> <property> <name>mapreduce.cluster.local.dir</name> <value>/home/es/data/hadoop/local</value> </property> <property> <name>mapreduce.map.memory.mb</name> <value>512</value> <description>The amount of memory to request from the scheduler for each map task. </description> </property> <property> <name>mapreduce.reduce.memory.mb</name> <value>1024</value> <description>The amount of memory to request from the scheduler for each reduce task. </description> </property> <property> <name>mapreduce.map.java.opts</name> <value>-Xmx435m -XX:-UseGCOverheadLimit</value> <description>Java opts only for the child processes that are maps. If set, this will be used instead of mapred.child.java.opts. </description> </property> <property> <name>mapreduce.reduce.java.opts</name> <value>-Xmx870m -XX:-UseGCOverheadLimit</value> <description>Java opts only for the child processes that are reduces. If set, this will be used instead of mapred.child.java.opts. </description> </property> <property> <name>mapreduce.task.timeout</name> <value>300000</value> </property> <property> <name>mapreduce.jobtracker.handler.count</name> <value>10</value> </property> <property> <name>mapreduce.job.reduce.slowstart.completedmaps</name> <value>0.07</value> <description>Fraction of the number of maps in the job which should be complete before reduces are scheduled for the job. </description> </property> <property> <name>mapreduce.reduce.shuffle.parallelcopies</name> <value>10</value> </property> <property> <name>mapreduce.tasktracker.http.threads</name> <value>10</value> </property> <property> <name>mapreduce.tasktracker.map.tasks.maximum</name> <value>4</value> </property> <property> <name>mapreduce.tasktracker.reduce.tasks.maximum</name> <value>1</value> </property> </configuration>
vi etc/hadoop/core-site.xml
<configuration> <property> <name>hadoop.tmp.dir</name> <value>file:///home/es/data/hadoop/tmp</value> <description>A base for other temporary directories.</description> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>fs.defaultFS</name> <value>hdfs://master:8020</value> </property> </configuration>
vi etc/hadoop/hdfs-site.xml
<configuration> <property> <name>dfs.namenode.name.dir</name> <value>/home/es/data/hadoop/dfs/namenode</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/home/es/data/hadoop/dfs/datanode</value> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> <property> <name>dfs.replication</name> <value>1</value> </property> </configuration>
vi etc/hadoop/yarn-site.xml
<configuration> <property> <name>yarn.resourcemanager.hostname</name> <value>master</value> </property> <property> <description>The address of the applications manager interface in the RM.</description> <name>yarn.resourcemanager.address</name> <value>${yarn.resourcemanager.hostname}:8032</value> </property> <property> <description>The address of the scheduler interface.</description> <name>yarn.resourcemanager.scheduler.address</name> <value>${yarn.resourcemanager.hostname}:8030</value> </property> <property> <description>The http address of the RM web application.</description> <name>yarn.resourcemanager.webapp.address</name> <value>${yarn.resourcemanager.hostname}:8088</value> </property> <property> <description>The https adddress of the RM web application.</description> <name>yarn.resourcemanager.webapp.https.address</name> <value>${yarn.resourcemanager.hostname}:8090</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>${yarn.resourcemanager.hostname}:8031</value> </property> <property> <description>The address of the RM admin interface.</description> <name>yarn.resourcemanager.admin.address</name> <value>${yarn.resourcemanager.hostname}:8033</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> <description></description> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <property> <description>Amount of physical memory, in MB, that can be allocated for containers.default is 8192MB</description> <name>yarn.nodemanager.resource.memory-mb</name> <value>2048</value> </property> <property> <description>The minimum allocation for every container request at the RM, in MBs. Memory requests lower than this won't take effect, and the specified value will get allocated at minimum.</description> <name>yarn.scheduler.minimum-allocation-mb</name> <value>512</value> </property> <property> <description>The maximum allocation for every container request at the RM, in MBs. Memory requests higher than this won't take effect, and will get capped to this value.</description> <name>yarn.scheduler.maximum-allocation-mb</name> <value>2048</value> </property> <property> <name>yarn.app.mapreduce.am.resource.mb</name> <value>1024</value> </property> <property> <name>yarn.app.mapreduce.am.command-opts</name> <value>-Xmx870m</value> </property> <property> <description>Number of vcores that can be allocated for containers. This is used by the RM scheduler when allocating resources for containers. This is not used to limit the number of physical cores used by YARN containers.</description> <name>yarn.nodemanager.resource.cpu-vcores</name> <value>4</value> </property> <property> <name>yarn.scheduler.minimum-allocation-vcores</name> <value>1</value> </property> <property> <description>The maximum allocation for every container request at the RM, in terms of virtual CPU cores. Requests higher than this won't take effect, and will get capped to this value.</description> <name>yarn.scheduler.maximum-allocation-vcores</name> <value>4</value> </property> <property> <name>yarn.nodemanager.vmem-pmem-ratio</name> <value>5</value> </property> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> <property> <description>List of directories to store localized files in. An application's localized file directory will be found in: ${yarn.nodemanager.local-dirs}/usercache/${user}/appcache/application_${appid}. Individual containers' work directories, called container_${contid}, will be subdirectories of this.</description> <name>yarn.nodemanager.local-dirs</name> <value>/home/es/data/hadoop/tmp/nm_local_dir</value> </property> <!-- hadoop-3.0.0 new config--> <property> <name>yarn.nodemanager.env-whitelist</name> <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value> </property> </configuration>
vi etc/hadoop/hadoop-env.sh add in bottom
export JAVA_HOME=/usr/local/jdk
export HADOOP_HEAPSIZE=256
vi etc/hadoop/workers
master
#format namenode
bin/hdfs namenode -format ==== bin/hadoop namenode -format
#start hdfs
bin/hdfs --daemon start namenode ===sbin/hadoop-daemon.sh start namenode
bin/hdfs --daemon start datanode
#stop
bin/hdfs --daemon stop datanode
bin/hdfs --daemon stop namenode
#start yarn
bin/yarn --daemon start resourcemanager
bin/yarn --daemon start nodemanager
#stop yarn
bin/yarn --daemon stop nodemanager
bin/yarn --daemon stop resourcemanager
#test hdfs
bin/hdfs dfs -mkdir /tmp
bin/hdfs dfs -put README.txt /tmp
bin/hdfs dfs -text /tmp/README.txt
bin/hdfs dfs -rm /tmp/README.txt
#test mr in namenode
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.0.0.jar pi 2 2
su root
#firewall-cmd --zone=public --remove-port=9200/tcp --permanent
#firewall-cmd --zone=public --list-ports
#firewall-cmd --reload
#namenode http
firewall-cmd --zone=public --add-port=9870/tcp --permanent
##namenode rpc
firewall-cmd --zone=public --add-port=8020/tcp --permanent
#datanode
firewall-cmd --zone=public --add-port=9864/tcp --permanent
firewall-cmd --zone=public --add-port=9866/tcp --permanent
firewall-cmd --zone=public --add-port=9867/tcp --permanent
#resourcemanager
firewall-cmd --zone=public --add-port=8088/tcp --permanent
firewall-cmd --zone=public --add-port=8030/tcp --permanent
firewall-cmd --zone=public --add-port=8031/tcp --permanent
firewall-cmd --zone=public --add-port=8032/tcp --permanent
firewall-cmd --zone=public --add-port=8033/tcp --permanent
#nodemanager
firewall-cmd --zone=public --add-port=8040/tcp --permanent
firewall-cmd --zone=public --add-port=8042/tcp --permanent
#firewall-cmd --zone=public --add-port=13562/tcp --permanent
#firewall-cmd --zone=public --add-port=37828/tcp --permanent
hbase-2.1.0 安裝
su es
cd /home/es
wget http://archive.apache.org/dist/hbase/2.1.0/hbase-2.1.0-bin.tar.gz
tar -zxf hbase-2.1.0-bin.tar.gz
cd hbase-2.1.0
vi conf/hbase-site.xml
<configuration> <property> <name>hbase.rootdir</name> <value>hdfs://master:8020/hbase</value> </property> <property> <name>hbase.cluster.distributed</name> <value>true</value> </property> <property> <name>hbase.zookeeper.property.dataDir</name> <value>/home/es/data/zookeeper</value> </property> <property> <name>hbase.zookeeper.quorum</name> <value>master:2181</value> </property> <property> <name>hbase.unsafe.stream.capability.enforce</name> <value>false</value> </property> </configuration>
vi conf/hbase-env.sh
export HBASE_HEAPSIZE=1G
export JAVA_HOME=/usr/local/jdk
vi conf/regionservers
master
#start
bin/hbase-daemon.sh start zookeeper
bin/hbase-daemon.sh start master
bin/hbase-daemon.sh start regionserver
bin/hbase-daemon.sh start master-backup
#stop
bin/hbase-daemon.sh stop regionserver
hbase-daemon.sh stop master
#zk
firewall-cmd --zone=public --add-port=2181/tcp --permanent
#hmaster
#http
firewall-cmd --zone=public --add-port=16010/tcp --permanent
#rpc
firewall-cmd --zone=public --add-port=16000/tcp --permanent
#regionserver
firewall-cmd --zone=public --add-port=16030/tcp --permanent
firewall-cmd --zone=public --add-port=16020/tcp --permanent
#test hbase
bin/hbase shell
create 'blog','info'
desc 'blog'
list
put 'blog','r1','info:c1','v1'
scan 'blog',{LIMIT=>2}
get 'blog','r1'
delete 'blog','r1','info:c1'
第一個family列族對應一個文件
bin/hdfs dfs -ls /hbase/blog
手工把memstore寫到Hfile中
flush 'blog'
手工合併hfile
compact 't1'
hbase2列族必需事先定義,後不可新增
disable 'blog'drop 'blog'create 'blog','info','info1'enable 'blog'exists 'blog'truncate 'blog'