hadoop-3.0.0與hbase-2.1.0 快速安裝

基於環境:centos7.6.1810,jdk 1.8.0_77,4G的系統內存,建議安裝CentOS7-Minimal版本java

 

hadoop-3.0.0安裝node

vi /etc/hosts
192.168.118.129 masterweb

vi /etc/hostname
mastershell

 vi /etc/security/limits.confapache

* soft nproc 4096
* hard nproc 4096
* soft nofile 65536
* hard nofile 65536
* soft memlock unlimited
* hard memlock unlimitedcentos

vi /etc/security/limits.d/20-nproc.confbash

* soft nproc 65536
root soft nproc unlimitedapp

 


su es
vi ~/.bashrc
export HADOOP_HOME=/home/es/hadoop-3.0.0
export HBASE_HOME=/home/es/hbase-2.1.0
export JAVA_HOME=/usr/local/jdkwebapp

 

cd /home/es
wget http://archive.apache.org/dist/hadoop/common/hadoop-3.0.0/hadoop-3.0.0.tar.gztcp

mkdir -p /home/es/data/hadoop/dfs/{namenode,datanode,tmp}
mkdir -p /home/es/data/hadoop/tmp

tar -zxf hadoop-3.0.0.tar.gz
cd hadoop-3.0.0
vi etc/hadoop/mapred-site.xml

<configuration>

        <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
        </property>
        <property>
                <name>mapreduce.jobhistory.address</name>
                <value>master:10020</value>
        </property>
        <property>
                <name>mapreduce.jobhistory.webapp.address</name>
                <value>master:19888</value>
        </property>
        <property>
                <name>mapreduce.jobhistory.done-dir</name>
                <value>/history/done</value>
        </property>
        <property>
                <name>mapreduce.jobhistory.intermediate-done-dir</name>
                <value>/history/done_intermediate</value>
        </property>

        <property>
                <name>mapreduce.input.fileinputformat.split.minsize</name>
                <value>5120</value>
                <description>5M</description>
        </property>
            <property>
                <name>mapreduce.cluster.local.dir</name>
                <value>/home/es/data/hadoop/local</value>
        </property>
        <property>
                <name>mapreduce.map.memory.mb</name>
                <value>512</value>
                <description>The amount of memory to request from the scheduler for each
                map task.
                </description>
        </property>
        <property>
                <name>mapreduce.reduce.memory.mb</name>
                <value>1024</value>
                <description>The amount of memory to request from the scheduler for each
                reduce task.
                </description>
        </property>
        <property>
                <name>mapreduce.map.java.opts</name>
                <value>-Xmx435m -XX:-UseGCOverheadLimit</value>
                <description>Java opts only for the child processes that are maps. If set,
                this will be used instead of mapred.child.java.opts.
                </description>
        </property>
        <property>
                <name>mapreduce.reduce.java.opts</name>
                <value>-Xmx870m -XX:-UseGCOverheadLimit</value>
                <description>Java opts only for the child processes that are reduces. If set,
                this will be used instead of mapred.child.java.opts.
                </description>
        </property>
            <property>
                <name>mapreduce.task.timeout</name>
                <value>300000</value>
        </property>
        <property>
                <name>mapreduce.jobtracker.handler.count</name>
                <value>10</value>
        </property>
        <property>
                <name>mapreduce.job.reduce.slowstart.completedmaps</name>
                <value>0.07</value>
                <description>Fraction of the number of maps in the job which should be
                complete before reduces are scheduled for the job.
                </description>
                </property>
        <property>
                <name>mapreduce.reduce.shuffle.parallelcopies</name>
                <value>10</value>
        </property>
        <property>
                <name>mapreduce.tasktracker.http.threads</name>
                <value>10</value>
        </property>
        <property>
                <name>mapreduce.tasktracker.map.tasks.maximum</name>
                <value>4</value>
        </property>
        <property>
                <name>mapreduce.tasktracker.reduce.tasks.maximum</name>
                <value>1</value>
        </property>

</configuration>

vi etc/hadoop/core-site.xml

<configuration>

        <property>
                <name>hadoop.tmp.dir</name>
                <value>file:///home/es/data/hadoop/tmp</value>
                <description>A base for other temporary directories.</description>
        </property>
        <property>
                <name>io.file.buffer.size</name>
                <value>131072</value>
        </property>
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://master:8020</value>
        </property>

</configuration>

 

vi etc/hadoop/hdfs-site.xml

<configuration>

        <property>
                <name>dfs.namenode.name.dir</name>
                <value>/home/es/data/hadoop/dfs/namenode</value>
        </property>
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>/home/es/data/hadoop/dfs/datanode</value>
        </property>
        <property>
                <name>dfs.permissions</name>
                <value>false</value>
        </property>
    <property>
                <name>dfs.replication</name>
                <value>1</value>
        </property>

</configuration>

vi etc/hadoop/yarn-site.xml

<configuration>

        <property>
                <name>yarn.resourcemanager.hostname</name>
                <value>master</value>
        </property>
        <property>
                <description>The address of the applications manager interface in the RM.</description>
                <name>yarn.resourcemanager.address</name>
                <value>${yarn.resourcemanager.hostname}:8032</value>
        </property>

        <property>
                <description>The address of the scheduler interface.</description>
                <name>yarn.resourcemanager.scheduler.address</name>
                <value>${yarn.resourcemanager.hostname}:8030</value>
        </property>

        <property>
                <description>The http address of the RM web application.</description>
                <name>yarn.resourcemanager.webapp.address</name>
                <value>${yarn.resourcemanager.hostname}:8088</value>
        </property>

        <property>
                <description>The https adddress of the RM web application.</description>
                <name>yarn.resourcemanager.webapp.https.address</name>
                <value>${yarn.resourcemanager.hostname}:8090</value>
        </property>
        <property>
                <name>yarn.resourcemanager.resource-tracker.address</name>
                <value>${yarn.resourcemanager.hostname}:8031</value>
        </property>
        <property>
                <description>The address of the RM admin interface.</description>
                <name>yarn.resourcemanager.admin.address</name>
                <value>${yarn.resourcemanager.hostname}:8033</value>
        </property>

        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
                <description></description>
        </property>

        <property>
                <name>yarn.log-aggregation-enable</name>
                <value>true</value>
        </property>



        <property>
                <description>Amount of physical memory, in MB, that can be allocated
                 for containers.default is 8192MB</description>
                <name>yarn.nodemanager.resource.memory-mb</name>
                <value>2048</value>
        </property>
            <property>
                <description>The minimum allocation for every container request at the RM,
                in MBs. Memory requests lower than this won't take effect,
                and the specified value will get allocated at minimum.</description>
                <name>yarn.scheduler.minimum-allocation-mb</name>
                <value>512</value>
        </property>
        <property>
                <description>The maximum allocation for every container request at the RM,
                in MBs. Memory requests higher than this won't take effect,
                and will get capped to this value.</description>
                <name>yarn.scheduler.maximum-allocation-mb</name>
                <value>2048</value>
        </property>
        <property>
                <name>yarn.app.mapreduce.am.resource.mb</name>
                <value>1024</value>
        </property>
        <property>
                <name>yarn.app.mapreduce.am.command-opts</name>
                <value>-Xmx870m</value>
        </property>


        <property>
                <description>Number of vcores that can be allocated
                for containers. This is used by the RM scheduler when allocating
                resources for containers. This is not used to limit the number of
                physical cores used by YARN containers.</description>
                <name>yarn.nodemanager.resource.cpu-vcores</name>
                <value>4</value>
        </property>
        <property>
                <name>yarn.scheduler.minimum-allocation-vcores</name>
                <value>1</value>
        </property>
        <property>
                <description>The maximum allocation for every container request at the RM,
                in terms of virtual CPU cores. Requests higher than this won't take effect,
                and will get capped to this value.</description>
                <name>yarn.scheduler.maximum-allocation-vcores</name>
                <value>4</value>
        </property>
        <property>
                <name>yarn.nodemanager.vmem-pmem-ratio</name>
                <value>5</value>
        </property>
        <property>
                <name>yarn.nodemanager.vmem-check-enabled</name>
                <value>false</value>
        </property>
        <property>
                <description>List of directories to store localized files in. An
                application's localized file directory will be found in:
                ${yarn.nodemanager.local-dirs}/usercache/${user}/appcache/application_${appid}.
                Individual containers' work directories, called container_${contid}, will
                be subdirectories of this.</description>
                <name>yarn.nodemanager.local-dirs</name>
                <value>/home/es/data/hadoop/tmp/nm_local_dir</value>
        </property>

    <!-- hadoop-3.0.0 new config-->
        <property>
                <name>yarn.nodemanager.env-whitelist</name>
                <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
        </property>

</configuration>

 

 

vi etc/hadoop/hadoop-env.sh add in bottom
export JAVA_HOME=/usr/local/jdk
export HADOOP_HEAPSIZE=256

vi etc/hadoop/workers
master
#format namenode
bin/hdfs namenode -format ==== bin/hadoop namenode -format
#start hdfs
bin/hdfs --daemon start namenode ===sbin/hadoop-daemon.sh start namenode
bin/hdfs --daemon start datanode

#stop
bin/hdfs --daemon stop datanode
bin/hdfs --daemon stop namenode


#start yarn
bin/yarn --daemon start resourcemanager
bin/yarn --daemon start nodemanager

#stop yarn
bin/yarn --daemon stop nodemanager
bin/yarn --daemon stop resourcemanager


#test hdfs
bin/hdfs dfs -mkdir /tmp
bin/hdfs dfs -put README.txt /tmp
bin/hdfs dfs -text /tmp/README.txt
bin/hdfs dfs -rm /tmp/README.txt

#test mr in namenode
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.0.0.jar pi 2 2


su root
#firewall-cmd --zone=public --remove-port=9200/tcp --permanent
#firewall-cmd --zone=public --list-ports
#firewall-cmd --reload

#namenode http
firewall-cmd --zone=public --add-port=9870/tcp --permanent
##namenode rpc
firewall-cmd --zone=public --add-port=8020/tcp --permanent

#datanode
firewall-cmd --zone=public --add-port=9864/tcp --permanent
firewall-cmd --zone=public --add-port=9866/tcp --permanent
firewall-cmd --zone=public --add-port=9867/tcp --permanent

#resourcemanager
firewall-cmd --zone=public --add-port=8088/tcp --permanent
firewall-cmd --zone=public --add-port=8030/tcp --permanent
firewall-cmd --zone=public --add-port=8031/tcp --permanent
firewall-cmd --zone=public --add-port=8032/tcp --permanent
firewall-cmd --zone=public --add-port=8033/tcp --permanent

#nodemanager
firewall-cmd --zone=public --add-port=8040/tcp --permanent
firewall-cmd --zone=public --add-port=8042/tcp --permanent
#firewall-cmd --zone=public --add-port=13562/tcp --permanent
#firewall-cmd --zone=public --add-port=37828/tcp --permanent

 

hbase-2.1.0 安裝
su es
cd /home/es
wget http://archive.apache.org/dist/hbase/2.1.0/hbase-2.1.0-bin.tar.gz
tar -zxf hbase-2.1.0-bin.tar.gz
cd hbase-2.1.0

vi conf/hbase-site.xml

<configuration>
        <property>
                <name>hbase.rootdir</name>
                <value>hdfs://master:8020/hbase</value>
        </property>
        <property>
                <name>hbase.cluster.distributed</name>
                <value>true</value>
        </property>
        <property>
                <name>hbase.zookeeper.property.dataDir</name>
                <value>/home/es/data/zookeeper</value>
        </property>
        <property>
                <name>hbase.zookeeper.quorum</name>
                <value>master:2181</value>
        </property>
        
        <property>
                <name>hbase.unsafe.stream.capability.enforce</name>
                <value>false</value>
        </property>

</configuration>

 

vi conf/hbase-env.sh
export HBASE_HEAPSIZE=1G
export JAVA_HOME=/usr/local/jdk

vi conf/regionservers
master

#start
bin/hbase-daemon.sh start zookeeper
bin/hbase-daemon.sh start master
bin/hbase-daemon.sh start regionserver
bin/hbase-daemon.sh start master-backup

#stop
bin/hbase-daemon.sh stop regionserver
hbase-daemon.sh stop master

#zk
firewall-cmd --zone=public --add-port=2181/tcp --permanent
#hmaster
#http
firewall-cmd --zone=public --add-port=16010/tcp --permanent
#rpc
firewall-cmd --zone=public --add-port=16000/tcp --permanent
#regionserver
firewall-cmd --zone=public --add-port=16030/tcp --permanent
firewall-cmd --zone=public --add-port=16020/tcp --permanent

#test hbase
bin/hbase shell
create 'blog','info'
desc 'blog'
list
put 'blog','r1','info:c1','v1'
scan 'blog',{LIMIT=>2}
get 'blog','r1'
delete 'blog','r1','info:c1'


第一個family列族對應一個文件
bin/hdfs dfs -ls /hbase/blog
手工把memstore寫到Hfile中
flush 'blog'
手工合併hfile
compact 't1'

hbase2列族必需事先定義,後不可新增

disable 'blog'drop 'blog'create 'blog','info','info1'enable 'blog'exists 'blog'truncate 'blog'

相關文章
相關標籤/搜索