spark sql thrift server

### create data
## cat /dev/urandom | head -1 | md5sum | head -c 8
## echo "$(date +%s)"|sha256sum|base64|head -c 16;echo
## cat /dev/urandom | awk 'NR==1{print $0|"md5sum|base64|grep -Eo '^.{16}'";exit}'
for i in {1..100000}
do
    passwd=$(echo $RANDOM| md5sum | head -c 8)
    echo "${i},${passwd}"
done >/tmp/tmpa

-- create table 
use dbName;
create table tmpa (id string,name string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
stored as textfile;

### create hql file
cat >/tmp/tmpa.q <<'EOF'
use dbName;
select 
    cast(id%4 as int) as id
    ,count(1) as cnt 
    ,count(distinct name) as diff
    ,count(distinct substr(name,1,4)) as diff2
    ,count(distinct substr(name,1,3)) as diff3
    ,count(distinct substr(name,1,2)) as diff4
    ,count(distinct substr(name,1,1)) as diff5
from tmpa 
group by id%4 
order by id;
EOF

-- load data
load data local inpath '/tmp/tmpa' overwrite into table tmpa;

### 建立thrift server
/usr/hdp/2.6.0.3-8/spark/sbin/start-thriftserver.sh \
--master yarn-client \
--hiveconf hive.server2.thrift.bind.host=192.168.1.77 \
--hiveconf hive.server2.thrift.port=14000

### excute job
beeline -n root -u jdbc:hive2://192.168.1.77:14000/dbName -f /tmp/tmpa.q
相關文章
相關標籤/搜索