hive-version2.1.1正則表達式
1 //官方指導 2 CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name 3 [COMMENT database_comment] 4 [LOCATION hdfs_path] 5 [WITH DBPROPERTIES (property_name=property_value, ...)];
1 //示例 2 create database if not exists testdb; sql
1 //官方指導 2 DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT|CASCADE];
1 //示例 2 drop database testdb; 數據庫
說明:restrict不跟參數爲默認參數,若是數據庫不爲空,則會刪除失敗;apache
cascade 若是數據庫有表時也要刪除。json
1 //官方指導 2 ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...); -- (Note: SCHEMA added in Hive 0.14.0) 3 4 ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role; -- (Note: Hive 0.13.0 and later; SCHEMA added in Hive 0.14.0) 5 6 ALTER (DATABASE|SCHEMA) database_name SET LOCATION hdfs_path; -- (Note: Hive 2.2.1, 2.4.0 and later)
1 //示例1 set dbproperties 設置數據庫屬性 2 alter database testdb set dbproperties('name'='xiaoming');
1 //示例2 set owner 修改所屬者名字和類型 2 alter database testdb set owner user root;
說明:SET LOCATION要2.2.1,2.4.0及更高版本才支持,不會更改數據庫現有的內容的位置,建立新表時新表父目錄會更改。ide
1 //官方指導2 USE database_name;3 USE DEFAULT;
//示例USE testdb;
建立表oop
1 //官方指導 2 CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later) 3 [(col_name data_type [COMMENT col_comment], ... [constraint_specification])] 4 [COMMENT table_comment] 5 [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] 6 [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] 7 [SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)] 8 ON ((col_value, col_value, ...), (col_value, col_value, ...), ...) 9 [STORED AS DIRECTORIES] 10 [ 11 [ROW FORMAT row_format] 12 [STORED AS file_format] 13 | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later) 14 ] 15 [LOCATION hdfs_path] 16 [TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later) 17 [AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables) 18 19 CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name 20 LIKE existing_table_or_view_name 21 [LOCATION hdfs_path]; 22 23 data_type 24 : primitive_type 25 | array_type 26 | map_type 27 | struct_type 28 | union_type -- (Note: Available in Hive 0.7.0 and later) 29 30 primitive_type 31 : TINYINT 32 | SMALLINT 33 | INT 34 | BIGINT 35 | BOOLEAN 36 | FLOAT 37 | DOUBLE 38 | DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later) 39 | STRING 40 | BINARY -- (Note: Available in Hive 0.8.0 and later) 41 | TIMESTAMP -- (Note: Available in Hive 0.8.0 and later) 42 | DECIMAL -- (Note: Available in Hive 0.11.0 and later) 43 | DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later) 44 | DATE -- (Note: Available in Hive 0.12.0 and later) 45 | VARCHAR -- (Note: Available in Hive 0.12.0 and later) 46 | CHAR -- (Note: Available in Hive 0.13.0 and later) 47 48 array_type 49 : ARRAY < data_type > 50 51 map_type 52 : MAP < primitive_type, data_type > 53 54 struct_type 55 : STRUCT < col_name : data_type [COMMENT col_comment], ...> 56 57 union_type 58 : UNIONTYPE < data_type, data_type, ... > -- (Note: Available in Hive 0.7.0 and later) 59 60 row_format 61 : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char] 62 [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char] 63 [NULL DEFINED AS char] -- (Note: Available in Hive 0.13 and later) 64 | SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)] 65 66 file_format: 67 : SEQUENCEFILE 68 | TEXTFILE -- (Default, depending on hive.default.fileformat configuration) 69 | RCFILE -- (Note: Available in Hive 0.6.0 and later) 70 | ORC -- (Note: Available in Hive 0.11.0 and later) 71 | PARQUET -- (Note: Available in Hive 0.13.0 and later) 72 | AVRO -- (Note: Available in Hive 0.14.0 and later) 73 | INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname 74 75 constraint_specification: 76 : [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE ] 77 [, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE
1 //示例 外部分區表 2 create external table if not exists testdb.test 3 (id int COMMENT '這是主鍵', 4 ad array<String>, 5 bd map<string,String>, 6 cd struct<a:String,b:string,c:string,d:String>) 7 COMMENT '這是測試表' 8 partitioned by(dd String,time String) 9 row format delimited 10 fields terminated by '\u0001' 11 collection items terminated by ',' 12 map keys terminated by ':' 13 lines terminated by '\n' 14 location '/use/';
說明:external 建立一張表不使用hive默認位置,能夠指向hdfs任何位置,也就是外部表測試
partitioned 分區字段url
fields terminated by 字段分割方式spa
collection items terminated by array,map,struct特殊字段中數據分割方式
map keys terminated by map中key:value的分割方式
stored as textfile location 指定外部表數據的位置
還能夠建立排序表,傾斜表,臨時表以及加約束條件
TEMPORARY臨時表如今還不支持分區
分桶表
create external table if not exists ods_db_poker.tb_player (`id` int COMMENT '主鍵',`playerName` string,updateTime bigint COMMENT '跟新時間') partitioned by(currentDate int COMMENT '當前日期') CLUSTERED BY(id) SORTED BY(updateTime) INTO 10 BUCKETS row format delimited fields terminated by '\u0001' lines terminated by '\n' location '/warehouse/tablespace/external/hive/ods_db_poker.db/tb_player'
CLUSTERED BY 分桶字段,根據改字段取hash分桶
SORTED BY 排序字段,根據該字段作升降序排序
INTO 10 BUCKETS 分桶個數 10
RegEx/JSON/CSV/TSV
//官方指導 //正則表達式 regex CREATE TABLE apachelog ( host STRING, identity STRING, user STRING, time STRING, request STRING, status STRING, size STRING, referer STRING, agent STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES ( "input.regex" = "([^]*) ([^]*) ([^]*) (-|\\[^\\]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\".*\") ([^ \"]*|\".*\"))?" ) STORED AS TEXTFILE;
//json字符串 ADD JAR /usr/lib/hive-hcatalog/lib/hive-hcatalog-core.jar; CREATE TABLE my_table(a string, b bigint, ...) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' STORED AS TEXTFILE;
// CSV / TSV格式 CREATE TABLE my_table(a string, b string, ...) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' WITH SERDEPROPERTIES ( "separatorChar" = "\t", "quoteChar" = "'", "escapeChar" = "\\" ) STORED AS TEXTFILE;
//示例 json //加入jar
add jar /root/hive-json-serde-0.2.jar; add jar /root/hive-hcatalog-core-2.1.1.jar; //sql CREATE TABLE my_table(name string, url string) partitioned by(time String) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe';
刪除表
1 //官方指導 2 DROP TABLE [IF EXISTS] table_name [PURGE]; -- (Note: PURGE available in Hive 0.14.0 and later)
1 //示例 2 drop table testdb.test;
說明:PURGE 在配置了垃圾回收站的狀況下,若是指定此參數,數據將會直接刪除。不然將移至垃圾回收目錄。
截斷表
1 //官方指導 2 TRUNCATE TABLE table_name [PARTITION partition_spec]; 3 4 partition_spec: 5 : (partition_column = partition_col_value, partition_column = partition_col_value, ...)
1 //示例 2 truncate table testdb.test partition(time=2017);
說明:刪除該分區中全部數據,能夠同時刪除多個分區中的數據,分區依舊存在。
重命名錶
1 //官方指導 2 ALTER TABLE table_name RENAME TO new_table_name;
1 //示例 2 alter table testdb.test rename to testone;
更改表屬性
1 //官方指導 2 ALTER TABLE table_name SET TBLPROPERTIES table_properties; 3 4 table_properties: 5 : (property_name = property_value, property_name = property_value, ... )
1 //示例 2 alter table testdb.testone set tblproperties('propertyone'='第一個測試');
說明: 設置本身的元數據
添加serdo屬性
1 //官方指導 2 ALTER TABLE table_name [PARTITION partition_spec] SET SERDE serde_class_name [WITH SERDEPROPERTIES serde_properties]; 3 4 ALTER TABLE table_name [PARTITION partition_spec] SET SERDEPROPERTIES serde_properties; 5 6 serde_properties: 7 : (property_name = property_value, property_name = property_value, ... )
1 //示例 2 alter table testdb.testone set serdeproperties('test'='test');
添加分區
1 //官方指導 2 ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec [LOCATION 'location'][, PARTITION partition_spec [LOCATION 'location'], ...]; 3 4 partition_spec: 5 : (partition_column = partition_col_value, partition_column = partition_col_value, ...)
1 //示例
2 alter table testdb.test add partition (dd='10',time='2020') location '/root/a.txt';
向多分區表加分區時,必須指定在那個分區中;
動態分區
1 //官方指導 2 FROM page_view_stg pvs 3 INSERT OVERWRITE TABLE page_view PARTITION(dt='2008-06-08', country) 4 SELECT pvs.viewTime, pvs.userid, pvs.page_url, pvs.referrer_url, null, null, pvs.ip, pvs.country
1 //示例 2 //修改配置文件hive-site 3 //hive.exec.dynamic.partition=true; 4 //hive.exec.dynamic.partition.mode=nonstrict; 5 insert overwrite table test1 partition(dd,time) select id,ad,bd,cd,dd,time from test;
重命名分區
1 //官方指導 2 ALTER TABLE table_name PARTITION partition_spec RENAME TO PARTITION partition_spec;
1 //示例 2 ALTER TABLE test PARTITION(dd='a',time='2018') RENAME TO PARTITION(dd='a',time='2019');
多個分區時必須指定不改變的分區,支持同時修改
刪除分區
//官方指導 ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec[, PARTITION partition_spec, ...] [IGNORE PROTECTION] [PURGE]; -- (Note: PURGE available in Hive 1.2.0 and later, IGNORE PROTECTION not available 2.0.0 and later)
1 //示例 2 alter table testdb.test drop partition (dd = 'a');
修改分區或表文件格式
1 //官方指導 2 ALTER TABLE table_name [PARTITION partition_spec] SET FILEFORMAT file_format;
1 // 示例 通常都用text 2 alter table test partition (dd='b') set fileformat rcfile;
1 //官方指導 2 ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type 3 [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
//示例 id 改成id1 類型int 放在第1位 first alter table test change id id1 int first;
添加或替換列
1 //官方指導 2 ALTER TABLE table_name 3 [PARTITION partition_spec] -- (Note: Hive 0.14.0 and later) 4 ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...) 5 [CASCADE|RESTRICT] -- (Note: Hive 1.1.0 and later)
1 //示例 2 alter table test add columns(ed string);
視圖、索引、權限等DDL操做請參考apache Hive官方文檔