DROP TABLE IF EXISTS empsalary; CREATE TABLE empsalary( depname varchar, empno bigint, salary int, enroll_date date ); INSERT INTO empsalary VALUES('develop',10, 5200, '2007/08/01'); INSERT INTO empsalary VALUES('sales', 1, 5000, '2006/10/01'); INSERT INTO empsalary VALUES('personnel', 5, 3500, '2007/12/10'); INSERT INTO empsalary VALUES('sales', 4, 4800, '2007/08/08'); INSERT INTO empsalary VALUES('sales', 6, 5500, '2007/01/02'); INSERT INTO empsalary VALUES('personnel', 2, 3900, '2006/12/23'); INSERT INTO empsalary VALUES('develop', 7, 4200, '2008/01/01'); INSERT INTO empsalary VALUES('develop', 9, 4500, '2008/01/01'); INSERT INTO empsalary VALUES('sales', 3, 4800, '2007/08/01'); INSERT INTO empsalary VALUES('develop', 8, 6000, '2006/10/01'); INSERT INTO empsalary VALUES('develop', 11, 5200, '2007/08/15'); postgres=# select * from empsalary ; depname | empno | salary | enroll_date -----------+-------+--------+------------- develop | 10 | 5200 | 2007-08-01 sales | 1 | 5000 | 2006-10-01 personnel | 5 | 3500 | 2007-12-10 sales | 4 | 4800 | 2007-08-08 sales | 6 | 5500 | 2007-01-02 personnel | 2 | 3900 | 2006-12-23 develop | 7 | 4200 | 2008-01-01 develop | 9 | 4500 | 2008-01-01 sales | 3 | 4800 | 2007-08-01 develop | 8 | 6000 | 2006-10-01 develop | 11 | 5200 | 2007-08-15 (11 rows)2.統計示例
postgres=# select sum(salary) OVER (PARTITION BY depname),avg(salary) OVER (PARTITION BY depname),* from empsalary; sum | avg | depname | empno | salary | enroll_date -------+-----------------------+-----------+-------+--------+------------- 25100 | 5020.0000000000000000 | develop | 10 | 5200 | 2007-08-01 25100 | 5020.0000000000000000 | develop | 7 | 4200 | 2008-01-01 25100 | 5020.0000000000000000 | develop | 9 | 4500 | 2008-01-01 25100 | 5020.0000000000000000 | develop | 8 | 6000 | 2006-10-01 25100 | 5020.0000000000000000 | develop | 11 | 5200 | 2007-08-15 7400 | 3700.0000000000000000 | personnel | 2 | 3900 | 2006-12-23 7400 | 3700.0000000000000000 | personnel | 5 | 3500 | 2007-12-10 20100 | 5025.0000000000000000 | sales | 3 | 4800 | 2007-08-01 20100 | 5025.0000000000000000 | sales | 1 | 5000 | 2006-10-01 20100 | 5025.0000000000000000 | sales | 4 | 4800 | 2007-08-08 20100 | 5025.0000000000000000 | sales | 6 | 5500 | 2007-01-02 (11 rows)b.統計人員在所在部門的薪水排名狀況
postgres=# select rank() OVER (PARTITION BY depname ORDER BY salary),* from empsalary; rank | depname | empno | salary | enroll_date ------+-----------+-------+--------+------------- 1 | develop | 7 | 4200 | 2008-01-01 2 | develop | 9 | 4500 | 2008-01-01 3 | develop | 10 | 5200 | 2007-08-01 3 | develop | 11 | 5200 | 2007-08-15 5 | develop | 8 | 6000 | 2006-10-01 1 | personnel | 5 | 3500 | 2007-12-10 2 | personnel | 2 | 3900 | 2006-12-23 1 | sales | 4 | 4800 | 2007-08-08 1 | sales | 3 | 4800 | 2007-08-01 3 | sales | 1 | 5000 | 2006-10-01 4 | sales | 6 | 5500 | 2007-01-02 (11 rows)3.一個有趣的例子 注意使用order by,結果會兩樣
create table foo(a int,b int) ; insert into foo values (1,1); insert into foo values (1,1); insert into foo values (2,1); insert into foo values (4,1); insert into foo values (2,1); insert into foo values (4,1); insert into foo values (5,1); insert into foo values (11,3); insert into foo values (12,3); insert into foo values (22,3); insert into foo values (16,3); insert into foo values (16,3); insert into foo values (16,3); postgres=# select sum(a) over (partition by b), a, b from foo; sum | a | b -----+----+--- 19 | 1 | 1 19 | 1 | 1 19 | 2 | 1 19 | 4 | 1 19 | 2 | 1 19 | 4 | 1 19 | 5 | 1 93 | 11 | 3 93 | 12 | 3 93 | 22 | 3 93 | 16 | 3 93 | 16 | 3 93 | 16 | 3 (13 rows) postgres=# select sum(a) over (partition by b order by a), a, b from foo; sum | a | b -----+----+--- 2 | 1 | 1 2 | 1 | 1 6 | 2 | 1 6 | 2 | 1 14 | 4 | 1 14 | 4 | 1 19 | 5 | 1 11 | 11 | 3 23 | 12 | 3 71 | 16 | 3 71 | 16 | 3 71 | 16 | 3 93 | 22 | 3 (13 rows) postgres=# select a, b, sum(a) over (partition by b order by a ROWS postgres(# BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) from foo; a | b | sum ----+---+----- 1 | 1 | 19 1 | 1 | 19 2 | 1 | 19 2 | 1 | 19 4 | 1 | 19 4 | 1 | 19 5 | 1 | 19 11 | 3 | 93 12 | 3 | 93 16 | 3 | 93 16 | 3 | 93 16 | 3 | 93 22 | 3 | 93 (13 rows)官網中的解釋是: By default, if ORDER BY is supplied then the frame consists of all rows from the start of the partition up through the current row, plus any following rows that are equal to the current row according to the ORDER BY clause. When ORDER BY is omitted the default frame consists of all rows in the partition.
postgres=# select row_number() over (partition by depname order by salary desc),* from empsalary; row_number | depname | empno | salary | enroll_date ------------+-----------+-------+--------+------------- 1 | develop | 8 | 6000 | 2006-10-01 2 | develop | 10 | 5200 | 2007-08-01 3 | develop | 11 | 5200 | 2007-08-15 4 | develop | 9 | 4500 | 2008-01-01 5 | develop | 7 | 4200 | 2008-01-01 1 | personnel | 2 | 3900 | 2006-12-23 2 | personnel | 5 | 3500 | 2007-12-10 1 | sales | 6 | 5500 | 2007-01-02 2 | sales | 1 | 5000 | 2006-10-01 3 | sales | 3 | 4800 | 2007-08-01 4 | sales | 4 | 4800 | 2007-08-08 (11 rows) postgres=# select rank() over(partition by depname order by salary desc),* from empsalary; rank | depname | empno | salary | enroll_date ------+-----------+-------+--------+------------- 1 | develop | 8 | 6000 | 2006-10-01 2 | develop | 10 | 5200 | 2007-08-01 2 | develop | 11 | 5200 | 2007-08-15 4 | develop | 9 | 4500 | 2008-01-01 5 | develop | 7 | 4200 | 2008-01-01 1 | personnel | 2 | 3900 | 2006-12-23 2 | personnel | 5 | 3500 | 2007-12-10 1 | sales | 6 | 5500 | 2007-01-02 2 | sales | 1 | 5000 | 2006-10-01 3 | sales | 3 | 4800 | 2007-08-01 3 | sales | 4 | 4800 | 2007-08-08 (11 rows) postgres=# select dense_rank() over(partition by depname order by salary desc),* from empsalary; dense_rank | depname | empno | salary | enroll_date ------------+-----------+-------+--------+------------- 1 | develop | 8 | 6000 | 2006-10-01 2 | develop | 10 | 5200 | 2007-08-01 2 | develop | 11 | 5200 | 2007-08-15 3 | develop | 9 | 4500 | 2008-01-01 4 | develop | 7 | 4200 | 2008-01-01 1 | personnel | 2 | 3900 | 2006-12-23 2 | personnel | 5 | 3500 | 2007-12-10 1 | sales | 6 | 5500 | 2007-01-02 2 | sales | 1 | 5000 | 2006-10-01 3 | sales | 3 | 4800 | 2007-08-01 3 | sales | 4 | 4800 | 2007-08-08 (11 rows) postgres=# select percent_rank() over(partition by depname order by salary desc),* from empsalary; percent_rank | depname | empno | salary | enroll_date -------------------+-----------+-------+--------+------------- 0 | develop | 8 | 6000 | 2006-10-01 0.25 | develop | 10 | 5200 | 2007-08-01 0.25 | develop | 11 | 5200 | 2007-08-15 0.75 | develop | 9 | 4500 | 2008-01-01 1 | develop | 7 | 4200 | 2008-01-01 0 | personnel | 2 | 3900 | 2006-12-23 1 | personnel | 5 | 3500 | 2007-12-10 0 | sales | 6 | 5500 | 2007-01-02 0.333333333333333 | sales | 1 | 5000 | 2006-10-01 0.666666666666667 | sales | 3 | 4800 | 2007-08-01 0.666666666666667 | sales | 4 | 4800 | 2007-08-08 (11 rows) postgres=# select cume_dist()over(partition by depname order by salary desc),* from empsalary; cume_dist | depname | empno | salary | enroll_date -----------+-----------+-------+--------+------------- 0.2 | develop | 8 | 6000 | 2006-10-01 0.6 | develop | 10 | 5200 | 2007-08-01 0.6 | develop | 11 | 5200 | 2007-08-15 0.8 | develop | 9 | 4500 | 2008-01-01 1 | develop | 7 | 4200 | 2008-01-01 0.5 | personnel | 2 | 3900 | 2006-12-23 1 | personnel | 5 | 3500 | 2007-12-10 0.25 | sales | 6 | 5500 | 2007-01-02 0.5 | sales | 1 | 5000 | 2006-10-01 1 | sales | 3 | 4800 | 2007-08-01 1 | sales | 4 | 4800 | 2007-08-08 (11 rows) postgres=# select ntile(3)over(partition by depname order by salary desc),* from empsalary; ntile | depname | empno | salary | enroll_date -------+-----------+-------+--------+------------- 1 | develop | 8 | 6000 | 2006-10-01 1 | develop | 10 | 5200 | 2007-08-01 2 | develop | 11 | 5200 | 2007-08-15 2 | develop | 9 | 4500 | 2008-01-01 3 | develop | 7 | 4200 | 2008-01-01 1 | personnel | 2 | 3900 | 2006-12-23 2 | personnel | 5 | 3500 | 2007-12-10 1 | sales | 6 | 5500 | 2007-01-02 1 | sales | 1 | 5000 | 2006-10-01 2 | sales | 3 | 4800 | 2007-08-01 3 | sales | 4 | 4800 | 2007-08-08 (11 rows) postgres=# select lag(salary,2,null)over(partition by depname order by salary desc),* from empsalary; lag | depname | empno | salary | enroll_date ------+-----------+-------+--------+------------- | develop | 8 | 6000 | 2006-10-01 | develop | 10 | 5200 | 2007-08-01 6000 | develop | 11 | 5200 | 2007-08-15 5200 | develop | 9 | 4500 | 2008-01-01 5200 | develop | 7 | 4200 | 2008-01-01 | personnel | 2 | 3900 | 2006-12-23 | personnel | 5 | 3500 | 2007-12-10 | sales | 6 | 5500 | 2007-01-02 | sales | 1 | 5000 | 2006-10-01 5500 | sales | 3 | 4800 | 2007-08-01 5000 | sales | 4 | 4800 | 2007-08-08 (11 rows) postgres=# select first_value(salary)over(partition by depname order by salary desc),* from empsalary; first_value | depname | empno | salary | enroll_date -------------+-----------+-------+--------+------------- 6000 | develop | 8 | 6000 | 2006-10-01 6000 | develop | 10 | 5200 | 2007-08-01 6000 | develop | 11 | 5200 | 2007-08-15 6000 | develop | 9 | 4500 | 2008-01-01 6000 | develop | 7 | 4200 | 2008-01-01 3900 | personnel | 2 | 3900 | 2006-12-23 3900 | personnel | 5 | 3500 | 2007-12-10 5500 | sales | 6 | 5500 | 2007-01-02 5500 | sales | 1 | 5000 | 2006-10-01 5500 | sales | 3 | 4800 | 2007-08-01 5500 | sales | 4 | 4800 | 2007-08-08 (11 rows) postgres=# select last_value(salary)over(partition by depname order by salary desc),* from empsalary; last_value | depname | empno | salary | enroll_date ------------+-----------+-------+--------+------------- 6000 | develop | 8 | 6000 | 2006-10-01 5200 | develop | 10 | 5200 | 2007-08-01 5200 | develop | 11 | 5200 | 2007-08-15 4500 | develop | 9 | 4500 | 2008-01-01 4200 | develop | 7 | 4200 | 2008-01-01 3900 | personnel | 2 | 3900 | 2006-12-23 3500 | personnel | 5 | 3500 | 2007-12-10 5500 | sales | 6 | 5500 | 2007-01-02 5000 | sales | 1 | 5000 | 2006-10-01 4800 | sales | 3 | 4800 | 2007-08-01 4800 | sales | 4 | 4800 | 2007-08-08 (11 rows) postgres=# select last_value(aa.salary)over(partition by aa.depname),* from (select depname,empno,salary,enroll_date from empsalary order by depname,salary ) aa; last_value | depname | empno | salary | enroll_date ------------+-----------+-------+--------+------------- 6000 | develop | 7 | 4200 | 2008-01-01 6000 | develop | 9 | 4500 | 2008-01-01 6000 | develop | 10 | 5200 | 2007-08-01 6000 | develop | 11 | 5200 | 2007-08-15 6000 | develop | 8 | 6000 | 2006-10-01 3900 | personnel | 5 | 3500 | 2007-12-10 3900 | personnel | 2 | 3900 | 2006-12-23 5500 | sales | 4 | 4800 | 2007-08-08 5500 | sales | 3 | 4800 | 2007-08-01 5500 | sales | 1 | 5000 | 2006-10-01 5500 | sales | 6 | 5500 | 2007-01-02 (11 rows) postgres=# select nth_value(salary,2)over(partition by depname order by salary desc),* from empsalary; nth_value | depname | empno | salary | enroll_date -----------+-----------+-------+--------+------------- | develop | 8 | 6000 | 2006-10-01 5200 | develop | 10 | 5200 | 2007-08-01 5200 | develop | 11 | 5200 | 2007-08-15 5200 | develop | 9 | 4500 | 2008-01-01 5200 | develop | 7 | 4200 | 2008-01-01 | personnel | 2 | 3900 | 2006-12-23 3500 | personnel | 5 | 3500 | 2007-12-10 | sales | 6 | 5500 | 2007-01-02 5000 | sales | 1 | 5000 | 2006-10-01 5000 | sales | 3 | 4800 | 2007-08-01 5000 | sales | 4 | 4800 | 2007-08-08 (11 rows)當一個查詢涉及多個窗口函數的時候,能夠用別名的辦法來使用,更簡單:
postgres=# select sum(salary)over w,avg(salary) over w,* from empsalary window w as (partition by depname order by salary desc); sum | avg | depname | empno | salary | enroll_date -------+-----------------------+-----------+-------+--------+------------- 6000 | 6000.0000000000000000 | develop | 8 | 6000 | 2006-10-01 16400 | 5466.6666666666666667 | develop | 10 | 5200 | 2007-08-01 16400 | 5466.6666666666666667 | develop | 11 | 5200 | 2007-08-15 20900 | 5225.0000000000000000 | develop | 9 | 4500 | 2008-01-01 25100 | 5020.0000000000000000 | develop | 7 | 4200 | 2008-01-01 3900 | 3900.0000000000000000 | personnel | 2 | 3900 | 2006-12-23 7400 | 3700.0000000000000000 | personnel | 5 | 3500 | 2007-12-10 5500 | 5500.0000000000000000 | sales | 6 | 5500 | 2007-01-02 10500 | 5250.0000000000000000 | sales | 1 | 5000 | 2006-10-01 20100 | 5025.0000000000000000 | sales | 3 | 4800 | 2007-08-01 20100 | 5025.0000000000000000 | sales | 4 | 4800 | 2007-08-08 (11 rows)這個寫法和下面的是同樣的,不過更簡單
上面給了兩個last_value的示例,可是第一種寫法是沒有問題的,雖然並不能達到返回窗口最後一個值的效果,並且first_value沒有這種問題的,其實,參考前面的官網提醒,同樣可得出相似的結論:都是order by 惹的禍
官網中的解釋是: By default, if ORDER BY is supplied then the frame consists of all rows from the start of the partition up through the current row, plus any following rows that are equal to the current row according to the ORDER BY clause. When ORDER BY is omitted the default frame consists of all rows in the partition.
默認狀況下,帶了order by 參數會從分組的起始值開始一直疊加,直到當前值爲止,當忽略order by 參數則會計算分組中全部值的和。
感謝joan_he@189.cn的提醒和digoal的解釋。 html
參考文檔:http://umitanuki.net/pgsql/wfv08/design.html sql