測試數據
-- 建表
create table student_scores(
id int,
studentId int,
language int,
math int,
english int,
classId string,
departmentId string
);
-- 寫入數據
insert into table student_scores values
(1,111,68,69,90,'class1','department1'),
(2,112,73,80,96,'class1','department1'),
(3,113,90,74,75,'class1','department1'),
(4,114,89,94,93,'class1','department1'),
(5,115,99,93,89,'class1','department1'),
(6,121,96,74,79,'class2','department1'),
(7,122,89,86,85,'class2','department1'),
(8,123,70,78,61,'class2','department1'),
(9,124,76,70,76,'class2','department1'),
(10,211,89,93,60,'class1','department2'),
(11,212,76,83,75,'class1','department2'),
(12,213,71,94,90,'class1','department2'),
(13,214,94,94,66,'class1','department2'),
(14,215,84,82,73,'class1','department2'),
(15,216,85,74,93,'class1','department2'),
(16,221,77,99,61,'class2','department2'),
(17,222,80,78,96,'class2','department2'),
(18,223,79,74,96,'class2','department2'),
(19,224,75,80,78,'class2','department2'),
(20,225,82,85,63,'class2','department2');
聚合開窗函數
count
-- count 開窗函數
select studentId,math,departmentId,classId,
-- 以符合條件的全部行做爲窗口
count(math) over() as count1,
-- 以按classId分組的全部行做爲窗口
count(math) over(partition by classId) as count2,
-- 以按classId分組、按math排序的全部行做爲窗口
count(math) over(partition by classId order by math) as count3,
-- 以按classId分組、按math排序、按 當前行+往前1行+日後2行的行做爲窗口
count(math) over(partition by classId order by math rows between 1 preceding and 2 following) as count4
from student_scores where departmentId='department1';
結果
studentid math departmentid classid count1 count2 count3 count4
111 69 department1 class1 9 5 1 3
113 74 department1 class1 9 5 2 4
112 80 department1 class1 9 5 3 4
115 93 department1 class1 9 5 4 3
114 94 department1 class1 9 5 5 2
124 70 department1 class2 9 4 1 3
121 74 department1 class2 9 4 2 4
123 78 department1 class2 9 4 3 3
122 86 department1 class2 9 4 4 2
結果解釋:
studentid=115,count1爲全部的行數9,count2爲分區class1中的行數5,count3爲分區class1中math值<=93的行數4,
count4爲分區class1中math值向前+1行向後+2行(實際只有1行)的總行數3。
sum
-- sum開窗函數
select studentId,math,departmentId,classId,
-- 以符合條件的全部行做爲窗口
sum(math) over() as sum1,
-- 以按classId分組的全部行做爲窗口
sum(math) over(partition by classId) as sum2,
-- 以按classId分組、按math排序後、按到當前行(含當前行)的全部行做爲窗口
sum(math) over(partition by classId order by math) as sum3,
-- 以按classId分組、按math排序後、按當前行+往前1行+日後2行的行做爲窗口
sum(math) over(partition by classId order by math rows between 1 preceding and 2 following) as sum4
from student_scores where departmentId='department1';
結果
studentid math departmentid classid sum1 sum2 sum3 sum4
111 69 department1 class1 718 410 69 223
113 74 department1 class1 718 410 143 316
112 80 department1 class1 718 410 223 341
115 93 department1 class1 718 410 316 267
114 94 department1 class1 718 410 410 187
124 70 department1 class2 718 308 70 222
121 74 department1 class2 718 308 144 308
123 78 department1 class2 718 308 222 238
122 86 department1 class2 718 308 308 164
結果解釋:
同count開窗函數
min
-- min 開窗函數
select studentId,math,departmentId,classId,
-- 以符合條件的全部行做爲窗口
min(math) over() as min1,
-- 以按classId分組的全部行做爲窗口
min(math) over(partition by classId) as min2,
-- 以按classId分組、按math排序後、按到當前行(含當前行)的全部行做爲窗口
min(math) over(partition by classId order by math) as min3,
-- 以按classId分組、按math排序後、按當前行+往前1行+日後2行的行做爲窗口
min(math) over(partition by classId order by math rows between 1 preceding and 2 following) as min4
from student_scores where departmentId='department1';
結果
studentid math departmentid classid min1 min2 min3 min4
111 69 department1 class1 69 69 69 69
113 74 department1 class1 69 69 69 69
112 80 department1 class1 69 69 69 74
115 93 department1 class1 69 69 69 80
114 94 department1 class1 69 69 69 93
124 70 department1 class2 69 70 70 70
121 74 department1 class2 69 70 70 70
123 78 department1 class2 69 70 70 74
122 86 department1 class2 69 70 70 78
結果解釋:
同count開窗函數
max
-- max 開窗函數
select studentId,math,departmentId,classId,
-- 以符合條件的全部行做爲窗口
max(math) over() as max1,
-- 以按classId分組的全部行做爲窗口
max(math) over(partition by classId) as max2,
-- 以按classId分組、按math排序後、按到當前行(含當前行)的全部行做爲窗口
max(math) over(partition by classId order by math) as max3,
-- 以按classId分組、按math排序後、按當前行+往前1行+日後2行的行做爲窗口
max(math) over(partition by classId order by math rows between 1 preceding and 2 following) as max4
from student_scores where departmentId='department1';
結果
studentid math departmentid classid max1 max2 max3 max4
111 69 department1 class1 94 94 69 80
113 74 department1 class1 94 94 74 93
112 80 department1 class1 94 94 80 94
115 93 department1 class1 94 94 93 94
114 94 department1 class1 94 94 94 94
124 70 department1 class2 94 86 70 78
121 74 department1 class2 94 86 74 86
123 78 department1 class2 94 86 78 86
122 86 department1 class2 94 86 86 86
結果解釋:
同count開窗函數
avg
-- avg 開窗函數
select studentId,math,departmentId,classId,
-- 以符合條件的全部行做爲窗口
avg(math) over() as avg1,
-- 以按classId分組的全部行做爲窗口
avg(math) over(partition by classId) as avg2,
-- 以按classId分組、按math排序後、按到當前行(含當前行)的全部行做爲窗口
avg(math) over(partition by classId order by math) as avg3,
-- 以按classId分組、按math排序後、按當前行+往前1行+日後2行的行做爲窗口
avg(math) over(partition by classId order by math rows between 1 preceding and 2 following) as avg4
from student_scores where departmentId='department1';
結果
studentid math departmentid classid avg1 avg2 avg3 avg4
111 69 department1 class1 79.77777777777777 82.0 69.0 74.33333333333333
113 74 department1 class1 79.77777777777777 82.0 71.5 79.0
112 80 department1 class1 79.77777777777777 82.0 74.33333333333333 85.25
115 93 department1 class1 79.77777777777777 82.0 79.0 89.0
114 94 department1 class1 79.77777777777777 82.0 82.0 93.5
124 70 department1 class2 79.77777777777777 77.0 70.0 74.0
121 74 department1 class2 79.77777777777777 77.0 72.0 77.0
123 78 department1 class2 79.77777777777777 77.0 74.0 79.33333333333333
122 86 department1 class2 79.77777777777777 77.0 77.0 82.0
結果解釋:
同count開窗函數
first_value
-- first_value 開窗函數
select studentId,math,departmentId,classId,
-- 以符合條件的全部行做爲窗口
first_value(math) over() as first_value1,
-- 以按classId分組的全部行做爲窗口
first_value(math) over(partition by classId) as first_value2,
-- 以按classId分組、按math排序後、按到當前行(含當前行)的全部行做爲窗口
first_value(math) over(partition by classId order by math) as first_value3,
-- 以按classId分組、按math排序後、按當前行+往前1行+日後2行的行做爲窗口
first_value(math) over(partition by classId order by math rows between 1 preceding and 2 following) as first_value4
from student_scores where departmentId='department1';
結果
studentid math departmentid classid first_value1 first_value2 first_value3 first_value4
111 69 department1 class1 69 69 69 69
113 74 department1 class1 69 69 69 69
112 80 department1 class1 69 69 69 74
115 93 department1 class1 69 69 69 80
114 94 department1 class1 69 69 69 93
124 70 department1 class2 69 74 70 70
121 74 department1 class2 69 74 70 70
123 78 department1 class2 69 74 70 74
122 86 department1 class2 69 74 70 78
結果解釋:
studentid=124 first_value1:第一個值是69,first_value2:classId=class1分區 math的第一個值是69。
last_value
-- last_value 開窗函數
select studentId,math,departmentId,classId,
-- 以符合條件的全部行做爲窗口
last_value(math) over() as last_value1,
-- 以按classId分組的全部行做爲窗口
last_value(math) over(partition by classId) as last_value2,
-- 以按classId分組、按math排序後、按到當前行(含當前行)的全部行做爲窗口
last_value(math) over(partition by classId order by math) as last_value3,
-- 以按classId分組、按math排序後、按當前行+往前1行+日後2行的行做爲窗口
last_value(math) over(partition by classId order by math rows between 1 preceding and 2 following) as last_value4
from student_scores where departmentId='department1';
結果
studentid math departmentid classid last_value1 last_value2 last_value3 last_value4
111 69 department1 class1 70 93 69 80
113 74 department1 class1 70 93 74 93
112 80 department1 class1 70 93 80 94
115 93 department1 class1 70 93 93 94
114 94 department1 class1 70 93 94 94
124 70 department1 class2 70 70 70 78
121 74 department1 class2 70 70 74 86
123 78 department1 class2 70 70 78 86
122 86 department1 class2 70 70 86 86
lag
lag(col,n,default) 用於統計窗口內往上第n個值。
col:列名
n:往上第n行
default:往上第n行爲NULL時候,取默認值,不指定則取NULL
-- lag 開窗函數
select studentId,math,departmentId,classId,
--窗口內 往上取第二個 取不到時賦默認值60
lag(math,2,60) over(partition by classId order by math) as lag1,
--窗口內 往上取第二個 取不到時賦默認值NULL
lag(math,2) over(partition by classId order by math) as lag2
from student_scores where departmentId='department1';
結果
studentid math departmentid classid lag1 lag2
111 69 department1 class1 60 NULL
113 74 department1 class1 60 NULL
112 80 department1 class1 69 69
115 93 department1 class1 74 74
114 94 department1 class1 80 80
124 70 department1 class2 60 NULL
121 74 department1 class2 60 NULL
123 78 department1 class2 70 70
122 86 department1 class2 74 74
結果解釋:
第3行 lag1:窗口內(69 74 80) 當前行80 向上取第二個值爲69
倒數第3行 lag2:窗口內(70 74) 當前行74 向上取第二個值爲NULL
lead
lead(col,n,default) 用於統計窗口內往下第n個值。
col:列名
n:往下第n行
default:往下第n行爲NULL時候,取默認值,不指定則取NULL
-- lead開窗函數
select studentId,math,departmentId,classId,
--窗口內 往下取第二個 取不到時賦默認值60
lead(math,2,60) over(partition by classId order by math) as lead1,
--窗口內 往下取第二個 取不到時賦默認值NULL
lead(math,2) over(partition by classId order by math) as lead2
from student_scores where departmentId='department1';
結果
studentid math departmentid classid lead1 lead2
111 69 department1 class1 80 80
113 74 department1 class1 93 93
112 80 department1 class1 94 94
115 93 department1 class1 60 NULL
114 94 department1 class1 60 NULL
124 70 department1 class2 78 78
121 74 department1 class2 86 86
123 78 department1 class2 60 NULL
122 86 department1 class2 60 NULL
結果解釋:
第4行lead1 窗口內向下第二個值爲空,賦值60
cume_dist
計算某個窗口或分區中某個值的累積分佈。假定升序排序,則使用如下公式肯定累積分佈:
小於等於當前值x的行數 / 窗口或partition分區內的總行數。其中,x 等於 order by 子句中指定的列的當前行中的值。
-- cume_dist 開窗函數
select studentId,math,departmentId,classId,
-- 統計小於等於當前分數的人數佔總人數的比例
cume_dist() over(order by math) as cume_dist1,
-- 統計大於等於當前分數的人數佔總人數的比例
cume_dist() over(order by math desc) as cume_dist2,
-- 統計分區內小於等於當前分數的人數佔總人數的比例
cume_dist() over(partition by classId order by math) as cume_dist3
from student_scores where departmentId='department1';
結果
studentid math departmentid classid cume_dist1 cume_dist2 cume_dist3
111 69 department1 class1 0.1111111111111111 1.0 0.2
113 74 department1 class1 0.4444444444444444 0.7777777777777778 0.4
112 80 department1 class1 0.6666666666666666 0.4444444444444444 0.6
115 93 department1 class1 0.8888888888888888 0.2222222222222222 0.8
114 94 department1 class1 1.0 0.1111111111111111 1.0
124 70 department1 class2 0.2222222222222222 0.8888888888888888 0.25
121 74 department1 class2 0.4444444444444444 0.7777777777777778 0.5
123 78 department1 class2 0.5555555555555556 0.5555555555555556 0.75
122 86 department1 class2 0.7777777777777778 0.3333333333333333 1.0
結果解釋:
第三行:
cume_dist1=小於等於80的人數爲6/總人數9=0.6666666666666666
cume_dist2=大於等於80的人數爲4/總人數9=0.4444444444444444
cume_dist3=分區內小於等於80的人數爲3/分區內總人數5=0.6
排序開窗函數
rank
肯定一組值中一個值的排名。若是存在partition by ,則爲每一個分區組中的每一個值排名。排名可能不是連續的。例如,若是兩個行的排名爲 1,則下一個排名爲 3。
-- rank 開窗函數
select *,
-- 對所有學生按數學分數排序
rank() over(order by math) as rank1,
-- 對院系 按數學分數排序
rank() over(partition by departmentId order by math) as rank2,
-- 對每一個院系每一個班級 按數學分數排序
rank() over(partition by departmentId,classId order by math) as rank3
from student_scores;
結果
id studentid language math english classid departmentid rank1 rank2 rank3
1 111 68 69 90 class1 department1 1 1 1
3 113 90 74 75 class1 department1 3 3 2
2 112 73 80 96 class1 department1 9 6 3
5 115 99 93 89 class1 department1 15 8 4
4 114 89 94 93 class1 department1 17 9 5
9 124 76 70 76 class2 department1 2 2 1
6 121 96 74 79 class2 department1 3 3 2
8 123 70 78 61 class2 department1 7 5 3
7 122 89 86 85 class2 department1 14 7 4
15 216 85 74 93 class1 department2 3 1 1
14 215 84 82 73 class1 department2 11 5 2
11 212 76 83 75 class1 department2 12 6 3
10 211 89 93 60 class1 department2 15 8 4
12 213 71 94 90 class1 department2 17 9 5
13 214 94 94 66 class1 department2 17 9 5
18 223 79 74 96 class2 department2 3 1 1
17 222 80 78 96 class2 department2 7 3 2
19 224 75 80 78 class2 department2 9 4 3
20 225 82 85 63 class2 department2 13 7 4
16 221 77 99 61 class2 department2 20 11 5
dense_rank
dense_rank與rank有一點不一樣,當排名同樣的時候,接下來的行是連續的。如兩個行的排名爲 1,則下一個排名爲 2。
-- dense_rank 開窗函數
select *,
-- 對所有學生按數學分數排序
dense_rank() over(order by math) as dense_rank1,
-- 對院系 按數學分數排序
dense_rank() over(partition by departmentId order by math) as dense_rank2,
-- 對每一個院系每一個班級 按數學分數排序
dense_rank() over(partition by departmentId,classId order by math) as dense_rank3
from student_scores;
結果:
id studentid language math english classid departmentid dense_rank1 dense_rank2 dense_rank3
1 111 68 69 90 class1 department1 1 1 1
3 113 90 74 75 class1 department1 3 3 2
2 112 73 80 96 class1 department1 5 5 3
5 115 99 93 89 class1 department1 10 7 4
4 114 89 94 93 class1 department1 11 8 5
9 124 76 70 76 class2 department1 2 2 1
6 121 96 74 79 class2 department1 3 3 2
8 123 70 78 61 class2 department1 4 4 3
7 122 89 86 85 class2 department1 9 6 4
15 216 85 74 93 class1 department2 3 1 1
14 215 84 82 73 class1 department2 6 4 2
11 212 76 83 75 class1 department2 7 5 3
10 211 89 93 60 class1 department2 10 7 4
12 213 71 94 90 class1 department2 11 8 5
13 214 94 94 66 class1 department2 11 8 5
18 223 79 74 96 class2 department2 3 1 1
17 222 80 78 96 class2 department2 4 2 2
19 224 75 80 78 class2 department2 5 3 3
20 225 82 85 63 class2 department2 8 6 4
16 221 77 99 61 class2 department2 12 9 5
ntile
將分區中已排序的行劃分爲大小盡量相等的指定數量的排名的組,並返回給定行所在的組的排名。
-- ntile 開窗函數
select *,
-- 對分區內的數據分紅兩組
ntile(2) over(partition by departmentid order by math) as ntile1,
-- 對分區內的數據分紅三組
ntile(3) over(partition by departmentid order by math) as ntile2
from student_scores;
結果
id studentid language math english classid departmentid ntile1 ntile2
1 111 68 69 90 class1 department1 1 1
9 124 76 70 76 class2 department1 1 1
6 121 96 74 79 class2 department1 1 1
3 113 90 74 75 class1 department1 1 2
8 123 70 78 61 class2 department1 1 2
2 112 73 80 96 class1 department1 2 2
7 122 89 86 85 class2 department1 2 3
5 115 99 93 89 class1 department1 2 3
4 114 89 94 93 class1 department1 2 3
18 223 79 74 96 class2 department2 1 1
15 216 85 74 93 class1 department2 1 1
17 222 80 78 96 class2 department2 1 1
19 224 75 80 78 class2 department2 1 1
14 215 84 82 73 class1 department2 1 2
11 212 76 83 75 class1 department2 1 2
20 225 82 85 63 class2 department2 2 2
10 211 89 93 60 class1 department2 2 2
12 213 71 94 90 class1 department2 2 3
13 214 94 94 66 class1 department2 2 3
16 221 77 99 61 class2 department2 2 3
結果解釋:
第8行
ntile1:對分區的數據均勻分紅2組後,當前行的組排名爲2
ntile2:對分區的數據均勻分紅3組後,當前行的組排名爲3
row_number
從1開始對分區內的數據排序。
- row_number 開窗函數
select studentid,departmentid,classid,math,
-- 對分區departmentid,classid內的數據按math排序
row_number() over(partition by departmentid,classid order by math) as row_number
from student_scores;
結果
studentid departmentid classid math row_number
111 department1 class1 69 1
113 department1 class1 74 2
112 department1 class1 80 3
115 department1 class1 93 4
114 department1 class1 94 5
124 department1 class2 70 1
121 department1 class2 74 2
123 department1 class2 78 3
122 department1 class2 86 4
216 department2 class1 74 1
215 department2 class1 82 2
212 department2 class1 83 3
211 department2 class1 93 4
213 department2 class1 94 5
214 department2 class1 94 6
223 department2 class2 74 1
222 department2 class2 78 2
224 department2 class2 80 3
225 department2 class2 85 4
221 department2 class2 99 5
結果解釋:
同一分區,相同值,不一樣序。如studentid=213 studentid=214 值都爲94 排序爲5,6。
precent_rank
計算給定行的百分比排名。能夠用來計算超過了百分之多少的人。如360小助手開機速度超過了百分之多少的人。
(當前行的rank值-1)/(分組內的總行數-1)
-- percent_rank 開窗函數
select studentid,departmentid,classid,math,
row_number() over(partition by departmentid,classid order by math) as row_number,
percent_rank() over(partition by departmentid,classid order by math) as percent_rank
from student_scores;
結果
studentid departmentid classid math row_number percent_rank
111 department1 class1 69 1 0.0
113 department1 class1 74 2 0.25
112 department1 class1 80 3 0.5
115 department1 class1 93 4 0.75
114 department1 class1 94 5 1.0
124 department1 class2 70 1 0.0
121 department1 class2 74 2 0.3333333333333333
123 department1 class2 78 3 0.6666666666666666
122 department1 class2 86 4 1.0
216 department2 class1 74 1 0.0
215 department2 class1 82 2 0.2
212 department2 class1 83 3 0.4
211 department2 class1 93 4 0.6
213 department2 class1 94 5 0.8
214 department2 class1 94 6 0.8
223 department2 class2 74 1 0.0
222 department2 class2 78 2 0.25
224 department2 class2 80 3 0.5
225 department2 class2 85 4 0.75
221 department2 class2 99 5 1.0
結果解釋:
studentid=115,percent_rank=(4-1)/(5-1)=0.75
studentid=123,percent_rank=(3-1)/(4-1)=0.6666666666666666