hive_很是用

1.count(distinct xxx) 窗口函數    函數

hive 2.x版本支持:spa

  count(distinct cust_num) over(partition by xxx order by xxx)     -- 分組內去重求和code

hive1.x版本不支持:改版blog

 size(collect_set(cust_num) over(partition by xxx order by xxx)) -- 分組內去重求和排序

 2.string

collect_set : set集合,沒有重複元素it

collect_list :list列表,能夠有重複元素io

select collect_list(value)
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

[1,3,2,2]
select collect_set(value)
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

[1,3,2]

 

3.排序ast

sort_arrayclass

select sort_array(collect_set(value))
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

[1,2,3]

4.集合元素鏈接:

select concat_ws('-','1','2','3')
1-2-3
select concat_ws('-',collect_set(cast(value as string)))
from 
(
select 1 as id,1 as value from dual 
union all 
select 1 as id,3 as value from dual 
union all 
select 1 as id,2 as value from dual 
union all 
select 1 as id,2 as value from dual 
) t 
group by id;

1-3-2
相關文章
相關標籤/搜索