impala 隨記

時間 2019-11-20

標籤 impala 欄目 Hadoop 简体版

原文原文鏈接

#修改分隔符
alter table DB.TABLE set SERDEPROPERTIES('field.delim'='\001'); shell

#查看建表語句
show CREATE TABLE DB.TABLE數據庫

#修改字段類型
ALTER TABLE DB.TABLE CHANGE hive_count_num hive_count_num string 函數

#更改表名
ALTER TABLE DB.TABLE RENAME TO DB.RETABLE;優化

#添加表字段
ALTER TABLE DB.TABLE ADD COLUMNS (is_black string);排序

#刪除表字段
ALTER TABLE DB.TABLE drop is_black;string

#清除表數據
TRUNCATE DB.TABLE;it

#刷新表
REFRESH DB.TABLE;io

#刷新表元數據
INVALIDATE METADATA DB.TABLE;table

#從新shuffle數據
INSERT OVERWRITE DB.TABLE SELECT * FROM DB.TABLE;ast

#複製表結果和數據
create table NEWDB.TABLE as select * from DB.TABLE;

#複製表結構
create table NEWDB.TABLE like DB.TABLE;

#修改表名
ALTER TABLE DB.TABLE TO DB.RETABLE

#修改表字段類型
alter TABLE DB.TABLE CHANGE dt dt string

#顯示全部函數
show functions;

#查看函數用法(hive)
describe function substr;

#導出查詢結果到文件
impala-shell -q "select * from DB.TABLE limit 10" -B --output_delimiter="\t" -o output.txt

#收集表的統計信息,讓Impala 基於每個表的大小、每個列不一樣值的個數、等等信息自動的優化查詢。
compute stats DB.TABLE;

#導入和導出
export table DB.TABLE to '/user/hive/test';
import from '/user/hive/test';

#分區表導出和導入
export table DB.TABLE partition (etl_dt="2017-12-14") to '/user/hive/test';
import from '/user/hive/test';

#import重命名錶
import table DB.TABLE from '/user/hive/test';

#導出表而且導入到分區表分區
import table DB.TABLE partition (etl_dt="2017-12-14") from '/user/hive/test';

#指定導入位置
import table DB.TABLE from '/user/hive/test' location '/user/hive/test';

#導入做爲一個外部表
import external TABLE DB.TABLE from '/user/hive/test';

#強制刪除數據庫
DROP DATABASE DB cascade

#數據類型轉換
SELECT cast(substr(createdate,1,10) as int)/86400 created FROM DB.TABLE

#分組排序
row_number() over (PARTITION BY t4.extractcashbillid,t1.ze_checktime ORDER BY t4.overduedateb DESC) flag
除Row_number外還有rank,dense_rank
rank() over([partition by col1] order by col2)
dense_rank() over([partition by col1] order by col2)
row_number() over([partition by col1] order by col2)
rank排序時出現相等的值時會有並列，即值相等的兩條數據會有相同的序列值
row_number的排序不容許並列，即便兩條記錄的值相等也不會出現相等的排序值
dense_rank排序的值容許並列，但會跳躍的排序，像這樣：1,1,3,4,5,5,7.

#建立分區表 CREATE TABLE IF NOT EXISTS DB.TABLE( autoindex INT, vindicatorteamid STRING, createtime STRING) PARTITIONED BY (dt string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' STORED AS TEXTFILE; #添加分區 alter table DB.TABLE add partition (dt='2017-12-12'); #刪除分區 ALTER TABLE DB.TABLE DROP IF EXISTS PARTITION(dt='2017-12-12'); #加載數據 LOAD DATA INPATH ‘/user/hive/warehouse/db/table/2017-12-12/000000_0’ INTO TABLE DB.TABLE PARTITION(dt='2017-12-12'); #加載數據到hdfs(hive) insert overwrite directory '/user/hive/warehouse/db/table/dt=2017-12-12' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' select * from DB.TABLE where queryday = '2017-12-12' #加載數據到本地(hive) insert overwrite local directory '/data/2017-12-12' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' select * from DB.TABLE where queryday = '2017-12-12' #插入數據到表分區 insert into table DB.TABLE1 PARTITION(dt='2017-12-10') select * from DB.TABLE where queryday = '2017-12-10' #展現表中有多少分區 show partitions DB.TABLE; #刷新分區 REFRESH DB.TABLE PARTITION(dt='2017-12-12');

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。