select * from XXX;
tar -zxvf .tar.gz -C 目標目錄
mv apache-hive-1.2.2-bin/ hive-1.2.2
mv hive-env.sh.template hive-env.sh
vi hive-env.sh
# Set HADOOP_HOME to point to a specific hadoop install directory
# 指定Hadoop安裝路徑
HADOOP_HOME=Hadoop安裝路徑
# Hive Configuration Directory can be controlled by:
# 指定Hive配置文件夾
export HIVE_CONF_DIR=/XXXXXX/hive-1.2.2/conf
複製代碼
vi /etc/profile
export HIVE_HOME=hive安裝路徑
export PATH=$PATH:$HIVE_HOME/bin
# Hadoop環境加入Hive依賴
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$HIVE_HOME/lib/*
source /etc/profile
hive
quit;
vi hive-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://主機名:3306/metastore?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>密碼</value>
<description>password to use against metastore database</description>
</property>
# 查詢表時顯示錶頭信息
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
# 顯示當前所在的數據庫
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
</configuration>
複製代碼
hive
Java數據類型 | Hive數據類型 | 長度 |
---|---|---|
byte | TINYINT | 8位二進制 |
short | SMALLINT | 2byte有符號整數 |
int | INT | 4byte有符號整數 |
long | BIGINT | 8byte有符號整數 |
boolean | BOOLEAN | false/true |
float | FLOAT | 單精度浮點 |
double | DOUBLE | 雙精度浮點 |
string | STRING | 字符 |
BINARY | 字節數組 |
show databases;
create database 數據庫名;
create database if not exists 數據庫名;
create database 數據庫名 location '路徑';
create [external] table [if not exists] 表名(參數) [partitioned by(字段信息)] [clustered by(字段信息)] [sorted by(字段信息)]
row format
---根據行格式化delimited fields
---分割字段terminated by '切割符';
---分割依據desc formatted 表名;
select * from 表名;
select 表名.列1, 表名.列2 from 表名;
select 表名.列 (as) 列別名 from 列原名;
select * from 表名;
select * from 表名 where 分區條件;
select * from 表名1 where 分區條件 union select * from 表名1 where 分區條件;
select count(1) from 表名;
select max(列名) from 表名;
select min(列名) from 表名;
select sum(列名) from 表名;
select avg(列名) from 表名;
select * from 表名 limit n;
select * from 表名 where A>n and A<m;
select * from 表名 where A between n and m;
select * from 表名 where A in(n,m);
select * from 表名 where A<n or A>m;
select * from 表名 where A not in(n,m);
select * from 表名 where A is null;
select * from 表名 where A is not null;
select * from 表名 where A like 'n%';
select * from 表名 where A like '_n%';
select * from 表名 where A like '%n%';
select A,B from 表名 group by B;
select * from 表名 order by 列名 asc;
select * from 表名 order by 列名 desc;
set mapreduce.job.reduces = n;
select * from 表名 sort by 列名;
select * from 表名 sort by 列名 desc;
select * from 表名 distribute by A sort by B desc;
select * from 表名 cluster by A;
select * from 表名 distribute by A sort by A;
alter table 表名 add partition(新分區信息);
show partitions 表名;
alter table 表名 drop partition(分區信息);
msck repair table dept_partitions;
clustered by(字段信息) into n buckets
set hive.enforce.bucketing = true;
set mapreduce.job.reduces = -1;
select * from 表名(bucket n out of a on A);
desc database 數據庫名;
alter database 數據庫名 set dbproperties('key'='value');
desc database extended 數據庫名;
show databases like 'i*';
drop database 數據庫名;
drop database if exists 數據庫名;
drop database 數據庫名 cascade;
drop database if exists 數據庫名 cascade;
load data [local] inpath '/XXXX/文件名' into table 表名 [partition(分區位置)];
insert into table 表名 partition(分區信息) values(數據內容);
insert overwrite table 表名 partition(分區信息) select * from 表名 where 查詢條件;
create table if not exists 表名 as select * from 表名 where 查詢條件;
create table 表名(參數) row fromat delimited fields terminated by '切割符' locatition '';
insert overwrite local directory '本地路徑' select * from 表名;
export table 表名 to 'hdfs路徑';
import table 表名 from 'hive路徑';
truncate table 表名;
hive -e "Hive-DDL語句(注意分號)"
hive -f sql路徑
dfs -ls 路徑;
dfs -cat 文件路徑;
dfs -mkdir -p 目錄路徑;
dfs -put 文件路徑 目錄路徑;
cat ~/.hivehistory
show functions;
desc function extended 函數名;
add jar jar包路徑;
create temporary function 別名 as "java函數類";
<property>
<name>hive.aux.jars.path</name>
<value>file://文件夾路徑</value>
</property>
複製代碼
set hive.exec.compress.intermediate = true;
set mapreduce.map.output.compress = true;
set mapreduce.map.output.compress.codec = org.apache.hadoop.io.compress.SnappyCodec;
set hive.exec.compress.output= true;
set mapreduce.output.fileoutputformat.compress = true;
set mapreduce.output.fileoutputformat.compress.codec = org.apache.hadoop.io.compress.SnappyCodec;
set mapreduce.output.fileoutputformat.compress.type = BLOCK;
<configuration>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/opt/module/hive-1.2.2/warehouse</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
# MySQL數據庫位置 <value>jdbc:mysql://bigdata01:3306/metastore?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>MySQL用戶名</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>MySQL密碼</value>
<description>password to use against metastore database</description>
</property>
</configuration>
複製代碼
client端配置文件:java
<configuration>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/opt/module/hive-1.2.2/warehouse</value>
</property>
<property>
<name>hive.metastore.local</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.uris</name>
# server端地址信息
<value>thrift://bigdata01:9083</value>
</property>
# 查詢表時顯示錶頭信息
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
# 顯示當前所在的數據庫
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
</configuration>
複製代碼
啓動:mysql
hive --service metastore
hive
注意:linux
lls: cannot access /opt/module/spark-2.1.0/lib/spark-assembly-*.jar: No such file or directory
vi /XXXX/hive/bin/hive
sparkAssemblyPath=`ls ${SPARK_HOME}/jars/*.jar`
複製代碼