Hive高階聚合函數 GROUPING SETS、Cube、Rollup

 

-- GROUPING SETS做爲GROUP BY的子句,容許開發人員在GROUP BY語句後面指定多個統計選項,能夠簡單理解爲多條group by語句經過union all把查詢結果聚合起來結合起來。
select 
     device_id
    ,os_id
    ,app_id
    ,count(user_id) 
from test_xinyan_reg 
group by device_id,os_id,app_id 
grouping sets((device_id),(os_id),(device_id,os_id),())
-- 等價於
SELECT device_id,null,null,count(user_id) FROM test_xinyan_reg group by device_id UNION ALL 
SELECT null,os_id,null,count(user_id) FROM test_xinyan_reg group by os_id UNION ALL 
SELECT device_id,os_id,null,count(user_id) FROM test_xinyan_reg group by device_id,os_id UNION ALL 
SELECT null,null,null,count(user_id) FROM test_xinyan_reg
;

-- cube簡稱數據魔方,能夠實現hive多個任意維度的查詢,cube(a,b,c)則首先會對(a,b,c)進行group by,而後依次是(a,b),(a,c),(a),(b,c),(b),©,最後在對全表進行group by,他會統計所選列中值的全部組合的聚合
-- cube即爲grouping sets的簡化過程函數
select device_id,os_id,app_id,client_version,from_id,count(user_id)
from test_xinyan_reg
group by device_id,os_id,app_id,client_version,from_id with cube;


-- rollup能夠實現從右到作遞減多級的統計,顯示統計某一層次結構的聚合。
select device_id,os_id,app_id,client_version,from_id,count(user_id)
from test_xinyan_reg
group by device_id,os_id,app_id,client_version,from_id with rollup;

 

select
     id
    ,name
    ,count(age)
from (
    select 1 as id, 'a' as name,11 as age union all
    select 2 as id, 'b' as name,12 as age union all
    select 3 as id, 'c' as name,13 as age union all
    select 4 as id, 'd' as name,14 as age union all
    select 4 as id, 'd' as name,15 as age union all
    select 4 as id, 'd' as name,16 as age union all
    select 4 as id, 'd' as name,17 as age union all
    select 4 as id, 'd' as name,18 as age
) t1
group by
     id
    ,name
with cube
;
+------------+------------+------------+
| id         | name       | _c2        |
+------------+------------+------------+
| NULL       | NULL       | 8          |
| NULL       | a          | 1          |
| NULL       | b          | 1          |
| NULL       | c          | 1          |
| NULL       | d          | 5          |
| 1          | NULL       | 1          |
| 1          | a          | 1          |
| 2          | NULL       | 1          |
| 2          | b          | 1          |
| 3          | NULL       | 1          |
| 3          | c          | 1          |
| 4          | NULL       | 5          |
| 4          | d          | 5          |
+------------+------------+------------+

select
     id
    ,name
    ,count(age)
from (
    select 1 as id, 'a' as name,11 as age union all
    select 2 as id, 'b' as name,12 as age union all
    select 3 as id, 'c' as name,13 as age union all
    select 4 as id, 'd' as name,14 as age union all
    select 4 as id, 'd' as name,15 as age union all
    select 4 as id, 'd' as name,16 as age union all
    select 4 as id, 'd' as name,17 as age union all
    select 4 as id, 'd' as name,18 as age
) t1
group by
     id
    ,name
with rollup
;
+------------+------------+------------+
| id         | name       | _c2        |
+------------+------------+------------+
| NULL       | NULL       | 8          |
| 1          | NULL       | 1          |
| 1          | a          | 1          |
| 2          | NULL       | 1          |
| 2          | b          | 1          |
| 3          | NULL       | 1          |
| 3          | c          | 1          |
| 4          | NULL       | 5          |
| 4          | d          | 5          |
+------------+------------+------------+

 

 

ref: https://blog.csdn.net/qq_31573519/article/details/89054136app

相關文章
相關標籤/搜索