Hive DML經常使用操做及示例

一、文件載入Hive表node

語法:express

LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]

示例:apache

# 建立分區表
create table tb_load_data(
id      int,
name    string,
hobby   array<string>,
add     map<String,string>
)
partitioned by(part_tag1 string,part_tag2 int)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
;

# 加載數據,數據加載後扔是目錄下的源文件
load data local inpath '/usr/local/hive-2.1.1/data_dir/complex_data_type.txt' overwrite into table tb_load_data partition (part_tag1 = 'first',part_tag2=100);

# 數據文件信息
[hadoop@node224 ~]$ hdfs dfs -cat /user/hive/warehouse/db01.db/tb_load_data/part_tag1=first/part_tag2=100/complex_data_type.txt
1,xiaoming,book-TV-code,beijing:chaoyang-shagnhai:pudong
2,lilei,book-code,nanjing:jiangning-taiwan:taibei
3,lihua,music-book,heilongjiang:haerbin

二、經過查詢向Hive表中插入數據數組

語法:bash

# 基礎語法
NSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
 
# Hive extension (multiple inserts):
FROM from_statement
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2]
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] ...;
FROM from_statement
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2]
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] ...;
 
# Hive extension (dynamic partition inserts):
INSERT OVERWRITE TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;
INSERT INTO TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;

動態分區插入相關的配置參數oop

示例:測試

# 建立基礎插入測試表
create table tb_insert_data(
id      int,
name    string,
hobby   array<string>,
add     map<String,string>
)
partitioned by(part_tag3 int,part_tag4 string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
;

# insert into 模式加載追加
# 靜態
insert into table tb_insert_data partition(part_tag3=200,part_tag4='second')  select id,name,hobby,add from tb_load_data;

# 動態
set hive.exec.dynamic.partition.mode=nonstrict;  # 是會話級別的設置
insert into table tb_insert_data partition(part_tag3,part_tag4)  select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data;
select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data

# insert overwrite 模式覆蓋相同的數據,而不是下一批的數據覆蓋整個表的數據
# 靜態
insert overwrite table tb_insert_data partition(part_tag3=200,part_tag4='second')  select id,name,hobby,add from tb_load_data where name='lilei';
insert overwrite table tb_insert_data partition(part_tag3=300,part_tag4='second')  select id,name,hobby,add from tb_load_data;

# 動態
insert overwrite table tb_insert_data partition(part_tag3,part_tag4)  select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data;
select * from  tb_insert_data;

#hive擴展一次多表寫入
# 建立一次多表插入測試表

create table tb_insert_multi_01(
id      int,
name    string,
hobby   array<string>,
add     map<String,string>
)
partitioned by(part_tag7 int,part_tag8 string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
;

create table tb_insert_multi_02(
id      int,
name    string,
hobby   array<string>,
add     map<String,string>
)
partitioned by(part_tag5 string,part_tag6 int)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
;

create table tb_insert_multi_03(
id      int,
name    string,
hobby   array<string>,
add     map<String,string>
)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
;

# 一次多表 overwrite\into\分區\非分區
from tb_load_data
insert overwrite table tb_insert_multi_01 partition(part_tag7=400,part_tag8='multi_01') select id,name,hobby,add
insert into table tb_insert_multi_02 partition(part_tag5,part_tag6) select id,name,hobby,add,part_tag1,part_tag2
insert into tb_insert_multi_03 select id,name,hobby,add
;

# 加載完成後測試載入數據狀況
select * from tb_insert_multi_01;
select * from tb_insert_multi_02;
select * from tb_insert_multi_03;

三、查詢結果寫入文件系統(本地\hdfs)code

語法:orm

INSERT OVERWRITE [LOCAL] DIRECTORY directory1
  [ROW FORMAT row_format] [STORED AS file_format] (Note: Only available starting with Hive 0.11.0)
  SELECT ... FROM ...
 
# Hive extension (multiple inserts):
FROM from_statement
INSERT OVERWRITE [LOCAL] DIRECTORY directory1 select_statement1
[INSERT OVERWRITE [LOCAL] DIRECTORY directory2 select_statement2] ...
 
  
# row_format
  : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
        [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
        [NULL DEFINED AS char] (Note: Only available starting with Hive 0.13)

示例:blog

# 寫入本地文件系統
insert overwrite local directory '/usr/local/hive-2.1.1/data_dir/tb_insert_multi_02'
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
stored as textfile
select * from  tb_insert_multi_02
;

# 寫入HDFS目錄
insert overwrite  directory '/tmp/hive/tb_insert_multi_02'
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
stored as textfile
select * from  tb_insert_multi_02
;

# 擴展語句一次分別寫入本地和HDFS
FROM tb_insert_multi_02
INSERT OVERWRITE LOCAL DIRECTORY '/usr/local/hive-2.1.1/data_dir/tb_insert_multi_02_local' select id,name,hobby,add
INSERT OVERWRITE DIRECTORY '/tmp/hive/tb_insert_multi_02_hdfs' select *
;

四、將指定的值插入Hive表

不能插入複合數據類型,如集合,數組等

語法

INSERT INTO TABLE tablename [PARTITION (partcol1[=val1], partcol2[=val2] ...)] VALUES values_row [, values_row ...]
  
Where values_row is:
( value [, value ...] )
where a value is either null or any valid SQL literal

示例

# 建表
CREATE TABLE students (name VARCHAR(64), age INT, gpa DECIMAL(3, 2));
# 插入數據
INSERT INTO TABLE students VALUES ('fred flintstone', 35, 1.28), ('barney rubble', 32, 2.32);
 
# 複雜數據類型插入是提示錯誤
0: jdbc:hive2://node225:10000/db01> insert into table tb_insert_multi_02 partition(part_tag5,part_tag6) values (4,huafeng,'["music","book"]','{"heilongjiang":"haerbin"}','third',500);
FAILED: SemanticException [Error 10293]: Unable to create temp file for insert values Expression of type TOK_TABLE_OR_COL not supported in insert/values
Error: Error while compiling statement: FAILED: SemanticException [Error 10293]: Unable to create temp file for insert values Expression of type TOK_TABLE_OR_COL not supported in insert/values (state=42000,code=10293)

五、更新(update)合併(merge)刪除(delete)

Update\Merge\Delete  can only be performed on tables that support ACID,此處遺留一個點,Hive的事務開啓。。

語法

# update
UPDATE tablename SET column = value [, column = value ...] [WHERE expression]

# delete
DELETE FROM tablename [WHERE expression]

# merge
MERGE INTO <target table> AS T USING <source expression/table> AS S
ON <boolean expression1>
WHEN MATCHED [AND <boolean expression2>] THEN UPDATE SET <set clause list>
WHEN MATCHED [AND <boolean expression3>] THEN DELETE
WHEN NOT MATCHED [AND <boolean expression4>] THEN INSERT VALUES<value list>

Hive LanguageManual DML

相關文章
相關標籤/搜索