DataX是一個在異構的數據庫/文件系統之間高速交換數據的工具,實現了在任意的數據處理系統(RDBMS/Hdfs/Local filesystem)之間的數據交換python
http://code.taobao.org/p/datax/wiki/DataX%E4%BA%A7%E5%93%81%E8%AF%B4%E6%98%8E/mysql
//啓動腳本 #!/bin/bash source ~/.bashrc python /home/hadoop/ceshi/datax/bin/datax.py /home/hadoop/test/jobJson/test2.json
//任務json { "job": { "setting": { "speed": { "channel": 5 } }, "content": [ { "reader": { "name": "oraclereader", "parameter": { "username": "****", "password": "****", "where": "", "connection": [ { "querySql": [ "select callingtel,calledtel from trecord where calledtel <= 100 group by callingtel,calledtel" ], "jdbcUrl": [ "jdbc:oracle:thin:@192.168.140.30:1521:TEST" ] } ] } }, "writer": { "name": "streamwriter", "parameter": { "visible": true, "encoding": "UTF-8" } } } ] } }
//任務json; 啓動命令與以上相似 【須要注意channel的不一樣?】 { "job": { "content": [ { "reader": { "name": "oraclereader", "parameter": { "connection": [ { "querySql": [ "SELECT CALLINGTEL AS START_ID, (CASE DATATYPE WHEN 0 THEN 'voice'WHEN 3 THEN 'sms'ELSE ''END ) calltype, (BEGINTIME - TO_DATE ('1970-01-01', 'yyyy-mm-dd') ) * 24 * 60 * 60 * 1000 AS BeginTime, ((BEGINTIME - TO_DATE ('1970-01-01', 'yyyy-mm-dd') ) * 24 * 60 * 60 * 1000 ) + (SPAN * 1000) AS EndTime, SPAN AS Span, CALLEDTEL AS END_ID, (CASE DATATYPE WHEN 0 THEN 'voice'WHEN 3 THEN 'sms'ELSE ''END ) TYPE FROM TRECORD WHERE CALLINGTEL != CALLEDTEL AND CALLINGTEL IS NOT NULL AND CALLEDTEL IS NOT NULL" ], "jdbcUrl": [ "jdbc:oracle:thin:@10.1.140.30:1521:TEST" ] } ], "password": "test", "username": "test" } }, "writer": { "name": "txtfilewriter", "parameter": { "path": "/home/hadoop/test/data/", "fileName": "rel", "fileType": "csv", "fieldDelimiter": ",", "writeMode": "append" } } } ], "setting": { "speed": { "channel": "10" } } } }
//querySql模式,parameter-column能夠不用指定 { "job": { "content": [ { "reader": { "name": "oraclereader", "parameter": { "connection": [ { "querySql": [ "select callingtel,calledtel from trecord where to_char(rectime,'yyyy-mm-dd')=to_char(sysdate - 1,'yyyy-mm-dd') group by callingtel,calledtel" ], "jdbcUrl": [ "jdbc:oracle:thin:@192.168.140.30:1521:TEST" ] } ], "password": "****", "username": "****" } }, "writer": { "name": "hdfswriter", "parameter": { "column": [ { "name": "callingtel", "type": "INT" }, { "name": "calledtel", "type": "INT" } ], "compress": "", "defaultFS": "hdfs://192.168.140.11:9000", "fieldDelimiter": " ", "fileName": "trecord", "fileType": "text", "path": "/user/test/data/", "writeMode": "append" } } } ], "setting": { "speed": { "channel": "2" } } } }