1. 什麼是sqoop sqoop是apache組織開源的一個hadoop的輔助工具 2. 做用: hdfs/hive ----- rdb
搭建Sqoop的開發環境java
CDH版 Hadoop 和 Hive 1. 環境搭建 mac 防火牆 ip 主機名 映射 selinux jdk 2. 安裝hadoop 1. 解壓縮 2. hadoop-evn.sh core-site.xml <property> <name>fs.default.name</name> <value>hdfs://sqoop:8020</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/opt/install/hadoop-2.5.0-cdh5.3.6/data/tmp</value> </property> hdfs-site.xml <property> <name>dfs.replication</name> <value>1</value> </property> <property> <name>dfs.permissions.enabled</name> <value>false</value> </property> yarn-site.xml <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> mapred-site.xml <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> slave 改成sqoop 3. 加壓縮 cdh5.3.6-snappy-lib-natirve.tar.gz lib/native/* cp /opt/install/hadoop-2.5.0-cdh5.3.6/lib/native 命令: cp * /opt/install/hadoop-2.5.0-cdh5.3.6/lib/native/ 4. 格式化 bin/hdfs namenode -format 5. 啓動進程 sbin/hadoop-daemon.sh start namenode sbin/yarn-daemon.sh start resourcemanager sbin/yarn-daemon.sh start nodemanager sbin/hadoop-daemon.sh start datanode 以下則啓動成功 [root@sqoop hadoop-2.5.0-cdh5.3.6]# jps 1609 ResourceManager 2021 Jps 1858 NodeManager 1982 DataNode 1485 NameNode 3. 安裝hive 1. 解壓縮 2. 修改配置文件 hive-env.sh # Set HADOOP_HOME to point to a specific hadoop install directory HADOOP_HOME=/opt/install/hadoop-2.5.0-cdh5.3.6 # Hive Configuration Directory can be controlled by: export HIVE_CONF_DIR=/opt/install/hive-0.13.1-cdh5.3.6/conf hive-site.xml [metastore] <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://hadoop5.baizhiedu.com:3306/cdhmetastore?createDatabaseIfNotExist=true</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>123456</value> </property> <property> <name>hive.cli.print.header</name> <value>true</value> </property> <property> <name>hive.cli.print.current.db</name> <value>true</value> </property> 3. hdfs 建立2個目錄 \tmp \user\hive\warehouse 4. hive_home/lib 導入 mysql jar包 5. 啓動hive bin/hive
#sqoop安裝 1. 解壓縮Sqoop 2. 修改配置 sqoop_home/conf cp sqoop-env-template.sh sqoop-env.sh 修改conf/sqoop-env.sh export HADOOP_COMMON_HOME=/opt/install/hadoop-2.5.0-cdh5.3.6 export HADOOP_MAPRED_HOME=/opt/install/hadoop-2.5.0-cdh5.3.6 export HIVE_HOME=/opt/cdh5/hive-0.13.1-cdh5.3.6 3. mysql-connect.jar copy sqoop_home/lib 4. 測試sqoop是否正常使用 bin/sqoop list-databases -connect jdbc:mysql://sqoop:3306 -username root -password 1234456
命令換行使用node
bin/sqoop list-databases \ --connect \ jdbc:mysql://sqoop:3306 \ --username root \ --password 1234456
import (mysql 導入數據 hdfs)mysql
#mysql 建立數據庫 建立表 create database sqoop create table mysql_user( id int primary key, name varchar(12) ); insert into mysql_user values (1,'lhc1'); insert into mysql_user values (2,'lhc2'); insert into mysql_user values (3,'lhc3'); insert into mysql_user values (4,'lhc4'); insert into mysql_user values (5,'lhc5');
基本導入linux
bin/sqoop import \ --connect \ jdbc:mysql://sqoop:3306/sqoop \ --username root \ --password 1234456 \ --table mysql_user #hdfs上傳的默認位置 /user/root/mysql_user #默認狀況 5行數據 5split 5map #沒有reduce #數據默認是以,分割列 1,suns1
指定hdfs中的導入目錄sql
bin/sqoop import \ --connect \ jdbc:mysql://sqoop:3306/sqoop \ --username root \ --password 1234456 \ --table mysql_user \ --target-dir /sqoop
刪除hdfs中已經存在的目錄數據庫
bin/sqoop import \ --connect \ jdbc:mysql://sqoop:3306/sqoop \ --username root \ --password 1234456 \ --table mysql_user \ --target-dir /sqoop \ --delete-target-dir
修改默認的map數量apache
bin/sqoop import \ --connect \ jdbc:mysql://hadoop6:3306/sqoop \ --username root \ --password 123456 \ --table mysql_user \ --target-dir /sqoop \ --delete-target-dir \ --num-mappers 1
修改默認的列分割符app
bin/sqoop import \ --connect \ jdbc:mysql://hadoop6:3306/sqoop \ --username root \ --password 123456 \ --table mysql_user \ --target-dir /sqoop \ --delete-target-dir \ --num-mappers 1 \ --fields-terminated-by '\t'
快速導入工具
bin/sqoop import \ --connect \ jdbc:mysql://hadoop6:3306/sqoop \ --username root \ --password 123456 \ --table mysql_user \ --target-dir /sqoop \ --delete-target-dir \ --num-mappers 1 \ --fields-terminated-by '\t' \ --direct #快速模式 sqoop必須和mysql安裝在同一個機器上 #若是集羣環境下 沒有sqoop與mysql安裝在一塊兒,那麼須要在全部節點 賦值mysql/bin目錄
增長導入數據oop
# 增量導入
--check-column <column> Source column to check for incremental id
--last-value <value> Last imported value in the incremental 5
--incremental <import-type> Define an incremental import of type append
'append' or 'lastmodified'
#注意 不能引入 --delete-target-dir \
bin/sqoop import \
--connect \
jdbc:mysql://hadoop6:3306/sqoop \
--username root \
--password 123456 \
--table mysql_user \
--target-dir /sqoop \
--num-mappers 1 \
--fields-terminated-by '\t' \
--direct \
--check-column id \
--last-value 5 \
--incremental append
export (hdfs導出數據到數據庫)
先肯定數據庫有表 create table to_mysql( id int primary key, name varchar(12) ); bin/sqoop export \ --connect \ jdbc:mysql://sqoop:3306/sqoop \ --username root \ --password 1234456 \ --table to_mysql \ --export-dir /sqoop \ --num-mappers 1 \ --input-fields-terminated-by ','
Hive Import
--hive-import \ --hive-database baizhi129 \ --hive-table t_user \ bin/sqoop import \ --connect \ jdbc:mysql://hadoop6:3306/sqoop \ --username root \ --password 123456 \ --table mysql_user \ --delete-target-dir \ --hive-import \ --hive-database baizhi_140 \ --hive-table t_user \ --num-mappers 1 \ --fields-terminated-by '\t'
Hive Export
bin/sqoop export \ --connect \ jdbc:mysql://hadoop6:3306/sqoop \ --username root \ --password 123456 \ --table hive_mysql \ --export-dir /user/hive/warehouse/baizhi_140.db/t_user \ --num-mappers 1 \ --input-fields-terminated-by '\t'
腳本化的Sqoop
存在一個獨立的文件中
create table filetomysql( id int, name varchar(12) ); 1. 建立一個Sqoop文件 普通文件 sqoop.file export --connect jdbc:mysql://hadoop6:3306/sqoop --username root --password 123456 --table filetomysql --export-dir /sqoop --num-mappers 1 --input-fields-terminated-by '\t' 2. 執行文件 bin/sqoop --options-file /root/sqoop.file
定義Sqoop的做業
1. 建立做業 bin/sqoop job \ --create test_job1 \ -- \ export \ --connect \ jdbc:mysql://hadoop6:3306/sqoop \ --username root \ --password 123456 \ --table filetomysql \ --export-dir /sqoop \ --num-mappers 1 \ --input-fields-terminated-by '\t' 2. 使用做業 bin/sqoop job --exec test_job1 3. 問題 每一次輸入密碼 不利於自動化處理 因此定密碼的存儲文件 echo -n "123456" >> /root/password bin/sqoop job \ --create test_job2 \ -- \ export \ --connect \ jdbc:mysql://hadoop6:3306/sqoop \ --username root \ --password-file file:///root/password \ --table filetomysql \ --export-dir /sqoop \ --num-mappers 1 \ --input-fields-terminated-by '\t' bin/sqoop job --exec test_job2
定時任務
1. 安裝crontab yum -y install vixie-cron 2. crontab -e 編輯配置文件 minute hour day month week command */5 * * * * command */1 * * * * echo 'suns' >> /root/sunshuai */1 * * * * /opt/install/sqoop-1.4.5-cdh5.3.6/bin/sqoop job --exec test_job2 * 24 * * * /opt/install/sqoop-1.4.5-cdh5.3.6/bin/sqoop job --exec test_job2 3. 啓動關閉服務 /sbin/service crond start /sbin/service crond stop ps -ef | grep cron