參考 https://www.cloudera.com/documentation/enterprise/5-5-x/topics/impala_datetime_functions.htmlhtml
hdfs文件操做
$ whoami cloudera $ hdfs dfs -ls /user Found 3 items drwxr-xr-x - cloudera cloudera 0 2013-04-22 18:54 /user/cloudera drwxrwx--- - mapred mapred 0 2013-03-15 20:11 /user/history drwxr-xr-x - hue supergroup 0 2013-03-15 20:10 /user/hive $ hdfs dfs -mkdir -p /user/cloudera/sample_data/tab1 /user/cloudera/sample_data/tab2 $ hdfs dfs -put tab1.csv /user/cloudera/sample_data/tab1 $ hdfs dfs -cat /user/hive/tpcds/customer/customer.dat
Impala和關係數據庫
Impala | 關係型數據庫 |
---|---|
Impala使用相似於HiveQL的相似SQL的查詢語言。 | 關係數據庫使用SQL語言。 |
在Impala中,您沒法更新或刪除單個記錄。 | 在關係數據庫中,能夠更新或刪除單個記錄。 |
Impala不支持事務。 | 關係數據庫支持事務。 |
Impala不支持索引。 | 關係數據庫支持索引。 |
Impala存儲和管理大量數據(PB)。 | 與Impala相比,關係數據庫處理的數據量較少(TB)。 |
HBase、Hive和Impala
HBase | Hive | Impala |
---|---|---|
HBase是基於Apache Hadoop的寬列存儲數據庫。 它使用BigTable的概念。 | Hive是一個數據倉庫軟件。 使用它,咱們能夠訪問和管理基於Hadoop的大型分佈式數據集。 | Impala是一個管理,分析存儲在Hadoop上的數據的工具。 |
HBase的數據模型是寬列存儲。 | Hive遵循關係模型。 | Impala遵循關係模型。 |
HBase是使用Java語言開發的。 | Hive是使用Java語言開發的。 | Impala是使用C ++開發的。 |
HBase的數據模型是無模式的。 | Hive的數據模型是基於模式的。 | Impala的數據模型是基於模式的。 |
HBase提供Java,RESTful和Thrift API。 | Hive提供JDBC,ODBC,Thrift API。 | Impala提供JDBC和ODBC API。 |
支持C,C#,C ++,Groovy,Java PHP,Python和Scala等編程語言。 | 支持C ++,Java,PHP和Python等編程語言。 | Impala支持全部支持JDBC / ODBC的語言。 |
HBase提供對觸發器的支持。 | Hive不提供任何觸發器支持。 | Impala不提供對觸發器的任何支持。 |
打開Impala-shellshell
[root@quickstart cloudera] # impala-shell
SQL
利用外部數據文件建立Impala的表數據庫
CREATE EXTERNAL TABLE tab1 ( id INT, col_1 BOOLEAN, col_2 DOUBLE, col_3 TIMESTAMP ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/user/cloudera/sample_data/tab1';
建立分區express
create database external_partitions; use external_partitions; create table logs (field1 string, field2 string, field3 string) partitioned by (year string, month string , day string, host string) row format delimited fields terminated by ','; insert into logs partition (year="2013", month="07", day="28", host="host1") values ("foo","foo","foo"); insert into logs partition (year="2013", month="07", day="28", host="host2") values ("foo","foo","foo");
ALTER TABLE編程
ALTER TABLE [old_db_name.]old_table_name RENAME TO [new_db_name.]new_table_name ALTER TABLE name ADD COLUMNS (col_spec[, col_spec ...]) ALTER TABLE name DROP [COLUMN] column_name ALTER TABLE name CHANGE column_name new_name new_type ALTER TABLE name REPLACE COLUMNS (col_spec[, col_spec ...]) ALTER TABLE name { ADD [IF NOT EXISTS] | DROP [IF EXISTS] } PARTITION (partition_spec) [PURGE] ALTER TABLE name RECOVER PARTITIONS ALTER TABLE name [PARTITION (partition_spec)] SET { FILEFORMAT file_format | LOCATION 'hdfs_path_of_directory' | TBLPROPERTIES (table_properties) | SERDEPROPERTIES (serde_properties) } ALTER TABLE name [PARTITION (partition_spec)] SET { CACHED IN 'pool_name' [WITH REPLICATION = integer] | UNCACHED } new_name ::= [new_database.]new_table_name col_spec ::= col_name type_name partition_spec ::= partition_col=constant_value table_properties ::= 'name'='value'[, 'name'='value' ...] serde_properties ::= 'name'='value'[, 'name'='value' ...] file_format ::= { PARQUET | TEXTFILE | RCFILE | SEQUENCEFILE | AVRO }
ALTER VIEW編程語言
ALTER VIEW [database_name.]view_name AS select_statement ALTER VIEW [database_name.]view_name RENAME TO [database_name.]view_name
COMPUTE STATS分佈式
COMPUTE STATS [db_name.]table_name COMPUTE INCREMENTAL STATS [db_name.]table_name [PARTITION (partition_spec)] partition_spec ::= partition_col=constant_value
CREATE DATABASE函數
CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name[COMMENT 'database_comment'] [LOCATION hdfs_path];
CREATE FUNCTION工具
--Creates a user-defined function (UDF) CREATE FUNCTION [IF NOT EXISTS] [db_name.]function_name([arg_type[, arg_type...]) RETURNS return_type LOCATION 'hdfs_path' SYMBOL='symbol_or_class'
--user-defined aggregate function (UDA) CREATE [AGGREGATE] FUNCTION [IF NOT EXISTS] [db_name.]function_name([arg_type[, arg_type...]) RETURNS return_type LOCATION 'hdfs_path' [INIT_FN='function] UPDATE_FN='function MERGE_FN='function [PREPARE_FN='function] [CLOSEFN='function] [SERIALIZE_FN='function] [FINALIZE_FN='function] [INTERMEDIATE type_spec]
CREATE ROLEoop
CREATE ROLE role_name
CREATE TABLE
--Explicit column definitions: CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name (col_name data_type [COMMENT 'col_comment'], ...) [PARTITIONED BY (col_name data_type [COMMENT 'col_comment'], ...)] [COMMENT 'table_comment'] [WITH SERDEPROPERTIES ('key1'='value1', 'key2'='value2', ...)] [ [ROW FORMAT row_format] [STORED AS file_format] ] [LOCATION 'hdfs_path'] [TBLPROPERTIES ('key1'='value1', 'key2'='value2', ...)] [CACHED IN 'pool_name' [WITH REPLICATION = integer] | UNCACHED]
--Column definitions inferred from data file: CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name LIKE PARQUET 'hdfs_path_of_parquet_file' [COMMENT 'table_comment'] [PARTITIONED BY (col_name data_type [COMMENT 'col_comment'], ...)] [WITH SERDEPROPERTIES ('key1'='value1', 'key2'='value2', ...)] [ [ROW FORMAT row_format] [STORED AS file_format] ] [LOCATION 'hdfs_path'] [TBLPROPERTIES ('key1'='value1', 'key2'='value2', ...)] [CACHED IN 'pool_name' [WITH REPLICATION = integer] | UNCACHED] data_type: primitive_type | array_type | map_type | struct_type
--CREATE TABLE AS SELECT: CREATE [EXTERNAL] TABLE [IF NOT EXISTS] db_name.]table_name [COMMENT 'table_comment'] [WITH SERDEPROPERTIES ('key1'='value1', 'key2'='value2', ...)] [ [ROW FORMAT row_format] [STORED AS ctas_file_format] ] [LOCATION 'hdfs_path'] [TBLPROPERTIES ('key1'='value1', 'key2'='value2', ...)] [CACHED IN 'pool_name' [WITH REPLICATION = integer] | UNCACHED] AS select_statement
primitive_type: TINYINT | SMALLINT | INT | BIGINT | BOOLEAN | FLOAT | DOUBLE | DECIMAL | STRING | CHAR | VARCHAR | TIMESTAMP complex_type: struct_type | array_type | map_type struct_type: STRUCT < name : primitive_or_complex_type [COMMENT 'comment_string'], ... > array_type: ARRAY < primitive_or_complex_type > map_type: MAP < primitive_type, primitive_or_complex_type > row_format: DELIMITED [FIELDS TERMINATED BY 'char' [ESCAPED BY 'char']] [LINES TERMINATED BY 'char'] file_format: PARQUET | TEXTFILE | AVRO | SEQUENCEFILE | RCFILE ctas_file_format: PARQUET | TEXTFILE
CREATE VIEW
CREATE VIEW [IF NOT EXISTS] view_name [(column_list)] AS select_statement
DESCRIBE
DESCRIBE [FORMATTED] [db_name.]table_name[.complex_col_name ...]
DROP DATABASE
DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT | CASCADE];
DROP TABLE
DROP TABLE [IF EXISTS] [db_name.]table_name [PURGE]
DROP VIEW
DROP VIEW [IF EXISTS] [db_name.]view_name
DROP FUNCTION
DROP [AGGREGATE] FUNCTION [IF EXISTS] [db_name.]function_name(type[, type...])
DROP ROLE
DROP ROLE role_name
DROP STATS
DROP STATS [database_name.]table_name DROP INCREMENTAL STATS [database_name.]table_name PARTITION (partition_spec) partition_spec ::= partition_col=constant_value
EXPLAIN
EXPLAIN { select_query | ctas_stmt | insert_stmt }
GRANT
GRANT ROLE role_name TO GROUP group_name GRANT privilege ON object_type object_name TO [ROLE] roleName [WITH GRANT OPTION] privilege ::= SELECT | SELECT(column_name) | INSERT | ALL object_type ::= TABLE | DATABASE | SERVER | URI
INSERT
[with_clause] INSERT { INTO | OVERWRITE } [TABLE] table_name [(column_list)] [ PARTITION (partition_clause)] { [hint_clause] select_statement | VALUES (value [, value ...]) [, (value [, value ...]) ...] } partition_clause ::= col_name [= constant] [, col_name [= constant] ...] hint_clause ::= [SHUFFLE] | [NOSHUFFLE] (Note: the square brackets are part of the syntax.)
INVALIDATE METADATA
INVALIDATE METADATA [[db_name.]table_name]
LOAD DATA
LOAD DATA INPATH 'hdfs_file_or_directory_path' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
REFRESH
REFRESH [db_name.]table_name
REVOKE
REVOKE ROLE role_name FROM GROUP group_name REVOKE privilege ON object_type object_name FROM [ROLE] role_name privilege ::= SELECT | SELECT(column_name) | INSERT | ALL object_type ::= TABLE | DATABASE | SERVER | URI
SELECT
[WITH name AS (select_expression) [, ...] ] SELECT [ALL | DISTINCT] [STRAIGHT_JOIN] expression [, expression ...] FROM table_reference [, table_reference ...] [[FULL | [LEFT | RIGHT] INNER | [LEFT | RIGHT] OUTER | [LEFT | RIGHT] SEMI | [LEFT | RIGHT] ANTI | CROSS] JOIN table_reference [ON join_equality_clauses | USING (col1[, col2 ...]] ... WHERE conditions GROUP BY { column | expression [ASC | DESC] [NULLS FIRST | NULLS LAST] [, ...] } HAVING conditions GROUP BY { column | expression [ASC | DESC] [, ...] } LIMIT expression [OFFSET expression] [UNION [ALL] select_statement] ...]
SET
SET [query_option=option_value]
SHOW
SHOW DATABASES [[LIKE] 'pattern'] SHOW SCHEMAS [[LIKE] 'pattern'] - an alias for SHOW DATABASES SHOW TABLES [IN database_name] [[LIKE] 'pattern'] SHOW [AGGREGATE | ANALYTIC] FUNCTIONS [IN database_name] [[LIKE] 'pattern'] SHOW CREATE TABLE [database_name].table_name SHOW TABLE STATS [database_name.]table_name SHOW COLUMN STATS [database_name.]table_name SHOW PARTITIONS [database_name.]table_name SHOW FILES IN [database_name.]table_name [PARTITION (key_col=value [, key_col=value]] SHOW ROLES SHOW CURRENT ROLES SHOW ROLE GRANT GROUP group_name SHOW GRANT ROLE role_name
TRUNCATE TABLE
TRUNCATE TABLE [db_name.]table_name
USE
USE db_name
經常使用函數
數學函數
abs(numeric_type a) acos(double a) asin(double a) atan(double a) atan2(double a, double b) bin(bigint a) ceil(double a), ceil(decimal(p,s) a), ceiling(double a), ceiling(decimal(p,s) a), dceil(double a), dceil(decimal(p,s) a) conv(bigint num, int from_base, int to_base), conv(string num, int from_base, int to_base) cos(double a) cosh(double a) cot(double a) degrees(double a) e() exp(double a), dexp(double a) factorial(integer_type a)
位運算函數
bitand(integer_type a, same_type b) bitnot(integer_type a) bitor(integer_type a, same_type b) bitxor(integer_type a, same_type b)
類型轉換函數
cast(expr AS type) typeof(type value)
時間日期函數
add_months(timestamp date, int months), add_months(timestamp date, bigint months) adddate(timestamp startdate, int days), adddate(timestamp startdate, bigint days), current_timestamp() now() date_add(timestamp startdate, int days), date_add(timestamp startdate, interval_expression) date_part(string, timestamp) date_sub(timestamp startdate, int days), date_sub(timestamp startdate, interval_expression) datediff(timestamp enddate, timestamp startdate) day(timestamp date), dayofmonth(timestamp date) dayname(timestamp date) dayofweek(timestamp date) dayofyear(timestamp date) days_add(timestamp startdate, int days), days_add(timestamp startdate, bigint days) days_sub(timestamp startdate, int days), days_sub(timestamp startdate, bigint days) extract(timestamp, string unit)extract(unit FROM timestamp)
條件函數
CASE a WHEN b THEN c [WHEN d THEN e]... [ELSE f] END if(boolean condition, type ifTrue, type ifFalseOrNull) ifnull(type a, type ifNull) isfalse(boolean) isnotfalse(boolean) isnull(type a, type ifNull) istrue(boolean)
字符串函數
ascii(string str) btrim(string a), btrim(string a, string chars_to_trim) char_length(string a), character_length(string a) chr(int character_code) concat(string a, string b...) find_in_set(string str, string strList) length(string a) lower(string a), lcase(string a) regexp_extract(string subject, string pattern, int index) regexp_like(string source, string pattern[, string options]) repeat(string str, int n) reverse(string a) substr(string a, int start [, int len]), substring(string a, int start [, int len]) translate(string input, string from, string to) trim(string a) upper(string a), ucase(string a)
聚合函數
APPX_MEDIAN([DISTINCT | ALL] expression) AVG([DISTINCT | ALL] expression) [OVER (analytic_clause)] COUNT([DISTINCT | ALL] expression) [OVER (analytic_clause)] GROUP_CONCAT([ALL] expression [, separator]) MAX([DISTINCT | ALL] expression) [OVER (analytic_clause)] MIN([DISTINCT | ALL] expression) [OVER (analytic_clause)] NDV([DISTINCT | ALL] expression) { STDDEV | STDDEV_SAMP | STDDEV_POP } ([DISTINCT | ALL] expression) SUM([DISTINCT | ALL] expression) [OVER (analytic_clause)] { VARIANCE | VAR[IANCE]_SAMP | VAR[IANCE]_POP } ([DISTINCT | ALL] expression)