轉載於:http://blog.csdn.net/lovelovelovelovelo/article/details/52234971mysql
數據類型
基本數據類型
集合類型,array、map、struct
文件格式,textfile、sequencefile、rcfilesql
建立表(內部表)數據庫
create table employee( name string comment 'name', salary float, subordinates array<string>, deductions map<string,float>, address struct<street:string,city:string,state:string,zip:int> ) row format delimited fields termited by '\t' lines terminated by '\n' stored as textfile;
從文件加載數據,覆蓋源表markdown
load data local infile 'path' overwrite into table 'table'
建立外部表oop
create external table employee( name string comment 'name', salary float, subordinates array<string>, deductions map<string,float>, address struct<street:string,city:string,state:string,zip:int> ) row format delimited fields terminated by '\t' collection items terminated by ',' map keys terminated by ':' lines terminated by '\n' stored as textfile location '/data/';
表中數據post
lucy 11000 tom,jack,dave,kate tom:1200,jack:1560 beijing,changanjie,xichengqu,10000 lily 13000 dave,kate dave:1300,kate:1260 beijing,changanjie,xichengqu,10000
和咱們熟悉的關係型數據庫不同,Hive如今還不支持在insert語句裏面直接給出一組記錄的文字形式,也就是說,hive並不支持INSERT INTO …. VALUES形式的語句。spa
新建employee.txt,將數據存入文件中,注意字段間用tab,行間換行enter
經過hive命令加載數據.net
hive> load data local inpath '/root/employee.txt' into table employee; hive> select * from employee; OK lucy 11000.0 ["tom","jack","dave","kate"] {"tom":1200.0,"jack":1560.0} {"street":"beijing","city":"changanjie","state":"xichengqu","zip":10000} lily 13000.0 ["dave","kate"] {"dave":1300.0,"kate":1260.0} {"street":"beijing","city":"changanjie","state":"xichengqu","zip":10000} Time taken: 0.054 seconds, Fetched: 2 row(s)
select * from table不走mapreduce
由一個表建立另外一個表code
create table table2 like table1;
從其餘表查詢建立表orm
create table table2 as select name,age,add from table1;
hive不一樣文件讀取
stored as textfile: hadoop fs -text stored as sequencefile: hadoop fs -text stored as rcfile: hive -service rcfilecat path stored as input format 'class': outformat 'class'
分區表操做
alter table employee add if not exists partition(country='') alter table employee drop if exists partition(country='')
hive分桶
create table bucket_table( id int, name string ) clustered by(id) sorted by(name) into 4 buckets row format delimited fields terminated by '\t' stored as textfile; set hive.enforce.bucketing=true;
建立分區表
create table partitionTable( name string, age int ) partitioned by(dt string) row format delimited fields terminated by '\t' lines terminated by '\n' stored as textfile;