一、文件載入Hive表node
語法:express
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
示例:apache
# 建立分區表 create table tb_load_data( id int, name string, hobby array<string>, add map<String,string> ) partitioned by(part_tag1 string,part_tag2 int) row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' ; # 加載數據,數據加載後扔是目錄下的源文件 load data local inpath '/usr/local/hive-2.1.1/data_dir/complex_data_type.txt' overwrite into table tb_load_data partition (part_tag1 = 'first',part_tag2=100); # 數據文件信息 [hadoop@node224 ~]$ hdfs dfs -cat /user/hive/warehouse/db01.db/tb_load_data/part_tag1=first/part_tag2=100/complex_data_type.txt 1,xiaoming,book-TV-code,beijing:chaoyang-shagnhai:pudong 2,lilei,book-code,nanjing:jiangning-taiwan:taibei 3,lihua,music-book,heilongjiang:haerbin
二、經過查詢向Hive表中插入數據數組
語法:bash
# 基礎語法 NSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement; INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement; # Hive extension (multiple inserts): FROM from_statement INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 [INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] [INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] ...; FROM from_statement INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 [INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] [INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] ...; # Hive extension (dynamic partition inserts): INSERT OVERWRITE TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement; INSERT INTO TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;
動態分區插入相關的配置參數oop
示例:測試
# 建立基礎插入測試表 create table tb_insert_data( id int, name string, hobby array<string>, add map<String,string> ) partitioned by(part_tag3 int,part_tag4 string) row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' ; # insert into 模式加載追加 # 靜態 insert into table tb_insert_data partition(part_tag3=200,part_tag4='second') select id,name,hobby,add from tb_load_data; # 動態 set hive.exec.dynamic.partition.mode=nonstrict; # 是會話級別的設置 insert into table tb_insert_data partition(part_tag3,part_tag4) select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data; select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data # insert overwrite 模式覆蓋相同的數據,而不是下一批的數據覆蓋整個表的數據 # 靜態 insert overwrite table tb_insert_data partition(part_tag3=200,part_tag4='second') select id,name,hobby,add from tb_load_data where name='lilei'; insert overwrite table tb_insert_data partition(part_tag3=300,part_tag4='second') select id,name,hobby,add from tb_load_data; # 動態 insert overwrite table tb_insert_data partition(part_tag3,part_tag4) select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data; select * from tb_insert_data; #hive擴展一次多表寫入 # 建立一次多表插入測試表 create table tb_insert_multi_01( id int, name string, hobby array<string>, add map<String,string> ) partitioned by(part_tag7 int,part_tag8 string) row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' ; create table tb_insert_multi_02( id int, name string, hobby array<string>, add map<String,string> ) partitioned by(part_tag5 string,part_tag6 int) row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' ; create table tb_insert_multi_03( id int, name string, hobby array<string>, add map<String,string> ) row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' ; # 一次多表 overwrite\into\分區\非分區 from tb_load_data insert overwrite table tb_insert_multi_01 partition(part_tag7=400,part_tag8='multi_01') select id,name,hobby,add insert into table tb_insert_multi_02 partition(part_tag5,part_tag6) select id,name,hobby,add,part_tag1,part_tag2 insert into tb_insert_multi_03 select id,name,hobby,add ; # 加載完成後測試載入數據狀況 select * from tb_insert_multi_01; select * from tb_insert_multi_02; select * from tb_insert_multi_03;
三、查詢結果寫入文件系統(本地\hdfs)code
語法:orm
INSERT OVERWRITE [LOCAL] DIRECTORY directory1 [ROW FORMAT row_format] [STORED AS file_format] (Note: Only available starting with Hive 0.11.0) SELECT ... FROM ... # Hive extension (multiple inserts): FROM from_statement INSERT OVERWRITE [LOCAL] DIRECTORY directory1 select_statement1 [INSERT OVERWRITE [LOCAL] DIRECTORY directory2 select_statement2] ... # row_format : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char] [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char] [NULL DEFINED AS char] (Note: Only available starting with Hive 0.13)
示例:blog
# 寫入本地文件系統 insert overwrite local directory '/usr/local/hive-2.1.1/data_dir/tb_insert_multi_02' row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' stored as textfile select * from tb_insert_multi_02 ; # 寫入HDFS目錄 insert overwrite directory '/tmp/hive/tb_insert_multi_02' row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' stored as textfile select * from tb_insert_multi_02 ; # 擴展語句一次分別寫入本地和HDFS FROM tb_insert_multi_02 INSERT OVERWRITE LOCAL DIRECTORY '/usr/local/hive-2.1.1/data_dir/tb_insert_multi_02_local' select id,name,hobby,add INSERT OVERWRITE DIRECTORY '/tmp/hive/tb_insert_multi_02_hdfs' select * ;
四、將指定的值插入Hive表
不能插入複合數據類型,如集合,數組等
語法
INSERT INTO TABLE tablename [PARTITION (partcol1[=val1], partcol2[=val2] ...)] VALUES values_row [, values_row ...] Where values_row is: ( value [, value ...] ) where a value is either null or any valid SQL literal
示例
# 建表 CREATE TABLE students (name VARCHAR(64), age INT, gpa DECIMAL(3, 2)); # 插入數據 INSERT INTO TABLE students VALUES ('fred flintstone', 35, 1.28), ('barney rubble', 32, 2.32); # 複雜數據類型插入是提示錯誤 0: jdbc:hive2://node225:10000/db01> insert into table tb_insert_multi_02 partition(part_tag5,part_tag6) values (4,huafeng,'["music","book"]','{"heilongjiang":"haerbin"}','third',500); FAILED: SemanticException [Error 10293]: Unable to create temp file for insert values Expression of type TOK_TABLE_OR_COL not supported in insert/values Error: Error while compiling statement: FAILED: SemanticException [Error 10293]: Unable to create temp file for insert values Expression of type TOK_TABLE_OR_COL not supported in insert/values (state=42000,code=10293)
五、更新(update)合併(merge)刪除(delete)
Update\Merge\Delete can only be performed on tables that support ACID,此處遺留一個點,Hive的事務開啓。。
語法
# update UPDATE tablename SET column = value [, column = value ...] [WHERE expression] # delete DELETE FROM tablename [WHERE expression] # merge MERGE INTO <target table> AS T USING <source expression/table> AS S ON <boolean expression1> WHEN MATCHED [AND <boolean expression2>] THEN UPDATE SET <set clause list> WHEN MATCHED [AND <boolean expression3>] THEN DELETE WHEN NOT MATCHED [AND <boolean expression4>] THEN INSERT VALUES<value list>