[root@srv01 ~]# sqoop import --connect jdbc:mysql://localhost:3306/test --username root --password root --table user --columns 'uid,uname' -m 1 -target-dir '/sqoop/user'; //-m 指定map進程數,-target-dir指定存放目錄
[root@srv01 ~]# sqoop import --hive-import --connect jdbc:mysql://localhost:3306/test --username root --password root --table user --columns 'uid,uname' -m 1
[root@srv01 ~]# sqoop import --hive-import --connect jdbc:mysql://localhost:3306/test --username root --password root --table user --columns 'uid,uname' -m 1 --hive-table user1; //若是hive中沒有這張表,則建立這張表保存對應數據
[root@srv01 ~]# sqoop import --hive-import --connect jdbc:mysql://localhost:3306/test --username root --password root --table user --columns 'uid,uname' -m 1 --hive-table user2 where uid=10;
[root@srv01 ~]# sqoop import --hive-import --connect jdbc:mysql://localhost:3306/test --username root --password root -m 1 --hive-table user6 --query 'select * from user where uid<10 and $conditions' --target-dir /sqoop/user5; //and $conditions 必須加在查詢語句中,不加報錯
[root@srv01 ~]# sqoop import --connect jdbc:mysql://localhost:3306/test --username root --password root -m 1 --table user5 --export-dir /sqoop/user5 //兩張表的列的個數和類型必須相同
啓動sqoop-shelljava
1 jjzhu:bin didi$ sqoop2-shell 2 Setting conf dir: /opt/sqoop-1.99.7/bin/../conf 3 Sqoop home directory: /opt/sqoop-1.99.7 4 Sqoop Shell: Type 'help' or '\h' for help. 5 6 sqoop:000> set server --host localhost --port 12000 --webapp sqoop 7 Server is set successfully 8 sqoop:000> show version --all 9 client version: 10 Sqoop 1.99.7 source revision 435d5e61b922a32d7bce567fe5fb1a9c0d9b1bbb 11 Compiled by abefine on Tue Jul 19 16:08:27 PDT 2016 12 0 [main] WARN org.apache.hadoop.util.NativeCodeLoader - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 13 server version: 14 Sqoop 1.99.7 source revision 435d5e61b922a32d7bce567fe5fb1a9c0d9b1bbb 15 Compiled by abefine on Tue Jul 19 16:08:27 PDT 2016 16 API versions: 17 [v1] 18 sqoop:000>
配置sqoop servermysql
sqoop:000> set server --host localhost --port 12000 --webapp sqoop Server is set successfully
查看server鏈接是否可用web
sqoop:000> show version --all client version: Sqoop 1.99.7 source revision 435d5e61b922a32d7bce567fe5fb1a9c0d9b1bbb Compiled by abefine on Tue Jul 19 16:08:27 PDT 2016 0 [main] WARN org.apache.hadoop.util.NativeCodeLoader - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable server version: Sqoop 1.99.7 source revision 435d5e61b922a32d7bce567fe5fb1a9c0d9b1bbb Compiled by abefine on Tue Jul 19 16:08:27 PDT 2016 API versions: [v1] sqoop:000>
查看sqoop server上可用的連接算法
1 sqoop:000> show connector 2 +------------------------+---------+------------------------------------------------------------+----------------------+ 3 | Name | Version | Class | Supported Directions | 4 +------------------------+---------+------------------------------------------------------------+----------------------+ 5 | generic-jdbc-connector | 1.99.7 | org.apache.sqoop.connector.jdbc.GenericJdbcConnector | FROM/TO | 6 | kite-connector | 1.99.7 | org.apache.sqoop.connector.kite.KiteConnector | FROM/TO | 7 | oracle-jdbc-connector | 1.99.7 | org.apache.sqoop.connector.jdbc.oracle.OracleJdbcConnector | FROM/TO | 8 | ftp-connector | 1.99.7 | org.apache.sqoop.connector.ftp.FtpConnector | TO | 9 | hdfs-connector | 1.99.7 | org.apache.sqoop.connector.hdfs.HdfsConnector | FROM/TO | 10 | kafka-connector | 1.99.7 | org.apache.sqoop.connector.kafka.KafkaConnector | TO | 11 | sftp-connector | 1.99.7 | org.apache.sqoop.connector.sftp.SftpConnector | TO | 12 +------------------------+---------+------------------------------------------------------------+----------------------+ 13 sqoop:000>
用以下命令建立一個generic-jdbc-connector的連接sql
1 sqoop:002> create link -c generic-jdbc-connector 2 Creating link for connector with name generic-jdbc-connector 3 Please fill following values to create new link object 4 Name: mysql_weibouser_link 5 6 Database connection 7 8 Driver class: com.mysql.jdbc.Driver 9 Connection String: jdbc:mysql://127.0.0.1:3306/spider 10 Username: root 11 Password: **** 12 Fetch Size: 13 Connection Properties: 14 There are currently 0 values in the map: 15 entry# protocol=tcp 16 There are currently 1 values in the map: 17 protocol = tcp 18 entry# 19 20 SQL Dialect 21 22 Identifier enclose: **注意 這裏不能直接回車!要打一個空格符號!由於若是不打,查詢mysql表的時候會在表上加上「」,致使查詢出錯! 23 ** 24 New link was successfully created with validation status OK and name mysql_weibouser_link
建立hdfs linkshell
1 sqoop:002> create link -c hdfs-connector 2 Creating link for connector with name hdfs-connector 3 Please fill following values to create new link object 4 Name: hdfs_weibouser_link 5 6 HDFS cluster 7 8 URI: hdfs://localhost:9000 9 Conf directory: /opt/hadoop-2.7.3/etc/hadoop 10 Additional configs:: 11 There are currently 0 values in the map: 12 entry# 13 New link was successfully created with validation status OK and name hdfs_weibouser_link
查看linkapache
1 sqoop:002> show link 2 +----------------------+------------------------+---------+ 3 | Name | Connector Name | Enabled | 4 +----------------------+------------------------+---------+ 5 | mysql_weibouser | generic-jdbc-connector | true | 6 | mysql_weibouser_link | generic-jdbc-connector | true | 7 | hdfs_link | hdfs-connector | true | 8 | hdfs_link2 | hdfs-connector | true | 9 | hdfs_weibouser_link | hdfs-connector | true | 10 +----------------------+------------------------+---------+
1 sqoop:002> create job -f "mysql_weibouser_link" -t "hdfs_weibouser_link" 2 Creating job for links with from name mysql_weibouser_link and to name hdfs_weibouser_link 3 Please fill following values to create new job object 4 Name: job_weibouser 5 6 Database source 7 8 Schema name: spider 9 Table name: spiders_weibouser 10 SQL statement: 11 Column names: 12 There are currently 0 values in the list: 13 element# 14 Partition column: 15 Partition column nullable: 16 Boundary query: 17 18 Incremental read 19 20 Check column: 21 Last value: 22 23 Target configuration 24 25 Override null value: 26 Null value: 27 File format: 28 0 : TEXT_FILE 29 1 : SEQUENCE_FILE 30 2 : PARQUET_FILE 31 Choose: 0 32 Compression codec: 33 0 : NONE 34 1 : DEFAULT 35 2 : DEFLATE 36 3 : GZIP 37 4 : BZIP2 38 5 : LZO 39 6 : LZ4 40 7 : SNAPPY 41 8 : CUSTOM 42 Choose: 0 43 Custom codec: 44 Output directory: hdfs://localhost:9000/usr/jjzhu/spider/spiders_weibouser 45 Append mode: 46 47 Throttling resources 48 49 Extractors: 2 50 Loaders: 2 51 52 Classpath configuration 53 54 Extra mapper jars: 55 There are currently 0 values in the list: 56 element# 57 New job was successfully created with validation status OK and name job_weibouser
各參數意義:oracle
1 如下是各個屬性 2 Name:一個標示符,本身指定便可。 3 Schema Name:指定Database或Schema的名字,在MySQL中,Schema同Database相似,具體什麼區別沒有深究過,但官網描述在建立時差很少。。 4 Table Name:本身指定導出的表。 5 SQL Statement:就是sql查詢語句,文檔上說須要指定一個$condition,但我一直沒有建立成功,貌似是一個條件子句。 6 配置完以上幾項,又回出現element#提示符,提示輸入一些hash值,直接回車過。 7 Partition column: 8 Partition column nullable: 9 Boundary query 10 Last value 11 後面須要配置數據目的地各項值: 12 Null alue:大概說的是若是有空值用什麼覆蓋 13 File format:指定在HDFS中的數據文件是什麼文件格式,這裏使用TEXT_FILE,即最簡單的文本文件。 14 Compression codec:用於指定使用什麼壓縮算法進行導出數據文件壓縮,我指定NONE,這個也可使用自定義的壓縮算法CUSTOM,用Java實現相應的接口。 15 Custom codec:這個就是指定的custom壓縮算法,本例選擇NONE,因此直接回車過去。 16 Output directory:指定存儲在HDFS文件系統中的路徑,這裏最好指定一個存在的路徑,或者存在但路勁下是空的,貌似這樣才能成功。 17 Append mode:用於指定是不是在已存在導出文件的狀況下將新數據追加到數據文件中。 18 Extractors:2 19 Loaders:2 20 最後再次出現element#提示符,用於輸入extra mapper jars的屬性,能夠什麼都不寫。直接回車。 21 22 至此若出現successful則證實已經成功建立。
查看建立的jobapp
1 sqoop:002> show job 2 +----+---------------+-----------------------------------------------+--------------------------------------+---------+ 3 | Id | Name | From Connector | To Connector | Enabled | 4 +----+---------------+-----------------------------------------------+--------------------------------------+---------+ 5 | 1 | spider_job | mysql_weibouser (generic-jdbc-connector) | hdfs_link (hdfs-connector) | true | 6 | 2 | job_weibouser | mysql_weibouser_link (generic-jdbc-connector) | hdfs_weibouser_link (hdfs-connector) | true | 7 +----+---------------+-----------------------------------------------+--------------------------------------+---------+ 8 sqoop:002>
啓動jobwebapp
1 start job -n job_weibouser 2 sqoop:002> start job -n job_weibouser 3 Submission details 4 Job Name: job_weibouser 5 Server URL: http://localhost:12000/sqoop/ 6 Created by: didi 7 Creation date: 2017-04-11 14:37:46 CST 8 Lastly updated by: didi 9 External ID: job_1491888730134_0003 10 http://jjzhu:8088/proxy/application_1491888730134_0003/ 11 2017-04-11 14:37:46 CST: BOOTING - Progress is not available
查看job運行狀態
1 sqoop:002> status job -n job_weibouser 2 Submission details 3 Job Name: job_weibouser 4 Server URL: http://localhost:12000/sqoop/ 5 Created by: didi 6 Creation date: 2017-04-11 14:37:46 CST 7 Lastly updated by: didi 8 External ID: job_1491888730134_0003 9 http://jjzhu:8088/proxy/application_1491888730134_0003/ 10 2017-04-11 14:38:41 CST: SUCCEEDED 11 Counters: 12 org.apache.hadoop.mapreduce.FileSystemCounter 13 FILE_LARGE_READ_OPS: 0 14 FILE_WRITE_OPS: 0 15 HDFS_READ_OPS: 2 16 HDFS_BYTES_READ: 290 17 HDFS_LARGE_READ_OPS: 0 18 FILE_READ_OPS: 0 19 FILE_BYTES_WRITTEN: 51361466 20 FILE_BYTES_READ: 25115854 21 HDFS_WRITE_OPS: 2 22 HDFS_BYTES_WRITTEN: 24652721 23 org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter 24 BYTES_WRITTEN: 0 25 org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter 26 BYTES_READ: 0 27 org.apache.hadoop.mapreduce.JobCounter 28 TOTAL_LAUNCHED_MAPS: 2 29 VCORES_MILLIS_REDUCES: 20225 30 MB_MILLIS_MAPS: 27120640 31 TOTAL_LAUNCHED_REDUCES: 2 32 SLOTS_MILLIS_REDUCES: 20225 33 VCORES_MILLIS_MAPS: 26485 34 MB_MILLIS_REDUCES: 20710400 35 SLOTS_MILLIS_MAPS: 26485 36 MILLIS_REDUCES: 20225 37 OTHER_LOCAL_MAPS: 2 38 MILLIS_MAPS: 26485 39 org.apache.sqoop.submission.counter.SqoopCounters 40 ROWS_READ: 109408 41 ROWS_WRITTEN: 109408 42 org.apache.hadoop.mapreduce.TaskCounter 43 MAP_OUTPUT_MATERIALIZED_BYTES: 25115866 44 REDUCE_INPUT_RECORDS: 109408 45 SPILLED_RECORDS: 218816 46 MERGED_MAP_OUTPUTS: 4 47 VIRTUAL_MEMORY_BYTES: 0 48 MAP_INPUT_RECORDS: 0 49 SPLIT_RAW_BYTES: 290 50 FAILED_SHUFFLE: 0 51 MAP_OUTPUT_BYTES: 24762129 52 REDUCE_SHUFFLE_BYTES: 25115866 53 PHYSICAL_MEMORY_BYTES: 0 54 GC_TIME_MILLIS: 1648 55 REDUCE_INPUT_GROUPS: 109408 56 COMBINE_OUTPUT_RECORDS: 0 57 SHUFFLED_MAPS: 4 58 REDUCE_OUTPUT_RECORDS: 109408 59 MAP_OUTPUT_RECORDS: 109408 60 COMBINE_INPUT_RECORDS: 0 61 CPU_MILLISECONDS: 0 62 COMMITTED_HEAP_BYTES: 1951399936 63 Shuffle Errors 64 CONNECTION: 0 65 WRONG_LENGTH: 0 66 BAD_ID: 0 67 WRONG_MAP: 0 68 WRONG_REDUCE: 0 69 IO_ERROR: 0 70 Job executed successfully
查看hdfs的相關路徑,看是否有輸出文件
1 jjzhu:~ didi$ hdfs dfs -ls /usr/jjzhu/spider 2 Found 4 items 3 drwxr-xr-x - didi supergroup 0 2017-04-11 14:38 /usr/jjzhu/spider/spiders_weibouser 4 drwxr-xr-x - 777 supergroup 0 2017-04-11 10:58 /usr/jjzhu/spider/weibouser 5 drwxr-xr-x - 777 supergroup 0 2017-04-11 13:33 /usr/jjzhu/spider/weobouser 6 drwxr-xr-x - didi supergroup 0 2017-04-11 13:39 /usr/jjzhu/spider/weobouser2 7 jjzhu:~ didi$ hdfs dfs -ls /usr/jjzhu/spider/spiders_weibouser 8 Found 2 items 9 -rw-r--r-- 1 didi supergroup 12262783 2017-04-11 14:38 /usr/jjzhu/spider/spiders_weibouser/33b56441-b638-48cc-8d0d-37a808f25653.txt 10 -rw-r--r-- 1 didi supergroup 12389938 2017-04-11 14:38 /usr/jjzhu/spider/spiders_weibouser/73b20d50-de72-4aea-8c8c-d97cdc48e667.txt
轉自:https://yq.aliyun.com/articles/73582