1.count(distinct xxx) 窗口函數 函數
hive 2.x版本支持:spa
count(distinct cust_num) over(partition by xxx order by xxx) -- 分組內去重求和code
hive1.x版本不支持:改版blog
size(collect_set(cust_num) over(partition by xxx order by xxx)) -- 分組內去重求和排序
2.string
collect_set : set集合,沒有重複元素it
collect_list :list列表,能夠有重複元素io
select collect_list(value) from ( select 1 as id,1 as value from dual union all select 1 as id,3 as value from dual union all select 1 as id,2 as value from dual union all select 1 as id,2 as value from dual ) t group by id;
[1,3,2,2]
select collect_set(value) from ( select 1 as id,1 as value from dual union all select 1 as id,3 as value from dual union all select 1 as id,2 as value from dual union all select 1 as id,2 as value from dual ) t group by id; [1,3,2]
3.排序ast
sort_arrayclass
select sort_array(collect_set(value)) from ( select 1 as id,1 as value from dual union all select 1 as id,3 as value from dual union all select 1 as id,2 as value from dual union all select 1 as id,2 as value from dual ) t group by id; [1,2,3]
4.集合元素鏈接:
select concat_ws('-','1','2','3') 1-2-3
select concat_ws('-',collect_set(cast(value as string))) from ( select 1 as id,1 as value from dual union all select 1 as id,3 as value from dual union all select 1 as id,2 as value from dual union all select 1 as id,2 as value from dual ) t group by id; 1-3-2