create table sort_test(
id int,
name string
)
row format delimited
fields terminated by '\t'
lines terminated by '\n'
stored as textfile;
[root@wadeyu ~]# cat sort_test.log
4679 aaa
4728 aaa
3040 aaa
4207 aaa
2231 aaa
1279 aaa
7954 aaa
582 aaa
7096 aaa
4878 aaa
9684 aaa
1540 aaa
4826 aaa
2543 aaa
2323 aaa
1420 aaa
5083 aaa
8965 aaa
1391 aaa
9719 aaa
9901 aaa
2393 aaa
6024 aaa
444 aaa
1574 aaa
8881 aaa
5739 aaa
8689 aaa
1614 aaa
9340 aaa
6726 aaa
109 aaa
6941 aaa
9562 aaa
9019 aaa
4945 aaa
2206 aaa
5910 aaa
8552 aaa
1795 aaa
2720 aaa
9007 aaa
8377 aaa
2179 aaa
3683 aaa
5869 aaa
5448 aaa
5223 aaa
5127 aaa
4616 aaa
2340 aaa
1268 aaa
4332 aaa
2989 aaa
19 aaa
7880 aaa
505 aaa
5975 aaa
5288 aaa
5682 aaa
376 aaa
7502 aaa
6448 aaa
3774 aaa
5541 aaa
9636 aaa
2037 aaa
246 aaa
6151 aaa
7837 aaa
1506 aaa
3749 aaa
9335 aaa
3973 aaa
5160 aaa
7929 aaa
834 aaa
3451 aaa
1766 aaa
6228 aaa
8961 aaa
8177 aaa
2340 aaa
4245 aaa
3226 aaa
2670 aaa
784 aaa
7699 aaa
2054 aaa
6006 aaa
4204 aaa
8905 aaa
6182 aaa
1271 aaa
5415 aaa
5164 aaa
4320 aaa
3736 aaa
2287 aaa
6559 aaa
- Order By
- Job中只會啓動一個reduce作全局排序,數據量大時,耗時會好久
- 在strict模式(hive.mapred.mode=strict)下,必須添加limit語句限制返回條數
# 語法格式
colOrder: ( ASC | DESC )
colNullOrder: (NULLS FIRST | NULLS LAST) -- (Note: Available in Hive 2.1.0 and later)
orderBy: ORDER BY colName colOrder? colNullOrder? (',' colName colOrder? colNullOrder?)*
query: SELECT expression (',' expression)* FROM src orderBy
# 排序
select * from sort_test order by id desc;
+---------------+-----------------+--+
| sort_test.id | sort_test.name |
+---------------+-----------------+--+
| 9901 | aaa |
| 9719 | aaa |
| 9684 | aaa |
| 9636 | aaa |
| 9562 | aaa |
| 9340 | aaa |
| 9335 | aaa |
| 9019 | aaa |
| 9007 | aaa |
| 8965 | aaa |
| 8961 | aaa |
| 8905 | aaa |
| 8881 | aaa |
| 8689 | aaa |
| 8552 | aaa |
| 8377 | aaa |
| 8177 | aaa |
| 7954 | aaa |
| 7929 | aaa |
| 7880 | aaa |
| 7837 | aaa |
| 7699 | aaa |
| 7502 | aaa |
| 7096 | aaa |
| 6941 | aaa |
| 6726 | aaa |
| 6559 | aaa |
| 6448 | aaa |
| 6228 | aaa |
| 6182 | aaa |
| 6151 | aaa |
| 6024 | aaa |
| 6006 | aaa |
| 5975 | aaa |
| 5910 | aaa |
| 5869 | aaa |
| 5739 | aaa |
| 5682 | aaa |
| 5541 | aaa |
| 5448 | aaa |
| 5415 | aaa |
| 5288 | aaa |
| 5223 | aaa |
| 5164 | aaa |
| 5160 | aaa |
| 5127 | aaa |
| 5083 | aaa |
| 4945 | aaa |
| 4878 | aaa |
| 4826 | aaa |
| 4728 | aaa |
| 4679 | aaa |
| 4616 | aaa |
| 4332 | aaa |
| 4320 | aaa |
| 4245 | aaa |
| 4207 | aaa |
| 4204 | aaa |
| 3973 | aaa |
| 3774 | aaa |
| 3749 | aaa |
| 3736 | aaa |
| 3683 | aaa |
| 3451 | aaa |
| 3226 | aaa |
| 3040 | aaa |
| 2989 | aaa |
| 2720 | aaa |
| 2670 | aaa |
| 2543 | aaa |
| 2393 | aaa |
| 2340 | aaa |
| 2340 | aaa |
| 2323 | aaa |
| 2287 | aaa |
| 2231 | aaa |
| 2206 | aaa |
| 2179 | aaa |
| 2054 | aaa |
| 2037 | aaa |
| 1795 | aaa |
| 1766 | aaa |
| 1614 | aaa |
| 1574 | aaa |
| 1540 | aaa |
| 1506 | aaa |
| 1420 | aaa |
| 1391 | aaa |
| 1279 | aaa |
| 1271 | aaa |
| 1268 | aaa |
| 834 | aaa |
| 784 | aaa |
| 582 | aaa |
| 505 | aaa |
| 444 | aaa |
| 376 | aaa |
| 246 | aaa |
| 109 | aaa |
| 19 | aaa |
+---------------+-----------------+--+
- Sort By
- 排序前會根據排序字段分區,一個job啓動多個reduce進行局部排序
- 若是有limit語句,會再次啓動一個job,取出每一個局部排好序的前n條,再進行全局排序
- 只保證局部有序,不保證全局有序
# Sort By語法
colOrder: ( ASC | DESC )
sortBy: SORT BY colName colOrder? (',' colName colOrder?)*
query: SELECT expression (',' expression)* FROM src sortBy
# 設置開啓的reduce個數
set mapreduce.job.reduces=2;
0: jdbc:hive2://> set mapreduce.job.reduces;
+--------------------------+--+
| set |
+--------------------------+--+
| mapreduce.job.reduces=2 |
+--------------------------+--+
# 執行局部排序(未帶limit)
0: jdbc:hive2://> select * from sort_test sort by id desc;
+---------------+-----------------+--+
| sort_test.id | sort_test.name |
+---------------+-----------------+--+
| 9901 | aaa |
| 9684 | aaa |
| 9340 | aaa |
| 9019 | aaa |
| 9007 | aaa |
| 8965 | aaa |
| 8961 | aaa |
| 8689 | aaa |
| 8552 | aaa |
| 8177 | aaa |
| 7837 | aaa |
| 7699 | aaa |
| 7502 | aaa |
| 6559 | aaa |
| 6448 | aaa |
| 6228 | aaa |
| 6024 | aaa |
| 6006 | aaa |
| 5975 | aaa |
| 5910 | aaa |
| 5869 | aaa |
| 5739 | aaa |
| 5682 | aaa |
| 5541 | aaa |
| 5448 | aaa |
| 5415 | aaa |
| 5288 | aaa |
| 5164 | aaa |
| 5160 | aaa |
| 5083 | aaa |
| 4878 | aaa |
| 4826 | aaa |
| 4679 | aaa |
| 4616 | aaa |
| 4245 | aaa |
| 4207 | aaa |
| 3736 | aaa |
| 3451 | aaa |
| 3226 | aaa |
| 3040 | aaa |
| 2989 | aaa |
| 2720 | aaa |
| 2670 | aaa |
| 2340 | aaa |
| 2231 | aaa |
| 2206 | aaa |
| 2054 | aaa |
| 2037 | aaa |
| 1766 | aaa |
| 1614 | aaa |
| 1540 | aaa |
| 1506 | aaa |
| 1420 | aaa |
| 1268 | aaa |
| 834 | aaa |
| 784 | aaa |
| 582 | aaa |
| 444 | aaa |
| 376 | aaa |
| 246 | aaa |
| 19 | aaa |
| 9719 | aaa |
| 9636 | aaa |
| 9562 | aaa |
| 9335 | aaa |
| 8905 | aaa |
| 8881 | aaa |
| 8377 | aaa |
| 7954 | aaa |
| 7929 | aaa |
| 7880 | aaa |
| 7096 | aaa |
| 6941 | aaa |
| 6726 | aaa |
| 6182 | aaa |
| 6151 | aaa |
| 5223 | aaa |
| 5127 | aaa |
| 4945 | aaa |
| 4728 | aaa |
| 4332 | aaa |
| 4320 | aaa |
| 4204 | aaa |
| 3973 | aaa |
| 3774 | aaa |
| 3749 | aaa |
| 3683 | aaa |
| 2543 | aaa |
| 2393 | aaa |
| 2340 | aaa |
| 2323 | aaa |
| 2287 | aaa |
| 2179 | aaa |
| 1795 | aaa |
| 1574 | aaa |
| 1391 | aaa |
| 1279 | aaa |
| 1271 | aaa |
| 505 | aaa |
| 109 | aaa |
+---------------+-----------------+--+
# 帶limit排序(會額外再啓動一個job進行全局排序)
0: jdbc:hive2://> select * from sort_test sort by id desc limit 300;
+---------------+-----------------+--+
| sort_test.id | sort_test.name |
+---------------+-----------------+--+
| 9901 | aaa |
| 9719 | aaa |
| 9684 | aaa |
| 9636 | aaa |
| 9562 | aaa |
| 9340 | aaa |
| 9335 | aaa |
| 9019 | aaa |
| 9007 | aaa |
| 8965 | aaa |
| 8961 | aaa |
| 8905 | aaa |
| 8881 | aaa |
| 8689 | aaa |
| 8552 | aaa |
| 8377 | aaa |
| 8177 | aaa |
| 7954 | aaa |
| 7929 | aaa |
| 7880 | aaa |
| 7837 | aaa |
| 7699 | aaa |
| 7502 | aaa |
| 7096 | aaa |
| 6941 | aaa |
| 6726 | aaa |
| 6559 | aaa |
| 6448 | aaa |
| 6228 | aaa |
| 6182 | aaa |
| 6151 | aaa |
| 6024 | aaa |
| 6006 | aaa |
| 5975 | aaa |
| 5910 | aaa |
| 5869 | aaa |
| 5739 | aaa |
| 5682 | aaa |
| 5541 | aaa |
| 5448 | aaa |
| 5415 | aaa |
| 5288 | aaa |
| 5223 | aaa |
| 5164 | aaa |
| 5160 | aaa |
| 5127 | aaa |
| 5083 | aaa |
| 4945 | aaa |
| 4878 | aaa |
| 4826 | aaa |
| 4728 | aaa |
| 4679 | aaa |
| 4616 | aaa |
| 4332 | aaa |
| 4320 | aaa |
| 4245 | aaa |
| 4207 | aaa |
| 4204 | aaa |
| 3973 | aaa |
| 3774 | aaa |
| 3749 | aaa |
| 3736 | aaa |
| 3683 | aaa |
| 3451 | aaa |
| 3226 | aaa |
| 3040 | aaa |
| 2989 | aaa |
| 2720 | aaa |
| 2670 | aaa |
| 2543 | aaa |
| 2393 | aaa |
| 2340 | aaa |
| 2340 | aaa |
| 2323 | aaa |
| 2287 | aaa |
| 2231 | aaa |
| 2206 | aaa |
| 2179 | aaa |
| 2054 | aaa |
| 2037 | aaa |
| 1795 | aaa |
| 1766 | aaa |
| 1614 | aaa |
| 1574 | aaa |
| 1540 | aaa |
| 1506 | aaa |
| 1420 | aaa |
| 1391 | aaa |
| 1279 | aaa |
| 1271 | aaa |
| 1268 | aaa |
| 834 | aaa |
| 784 | aaa |
| 582 | aaa |
| 505 | aaa |
| 444 | aaa |
| 376 | aaa |
| 246 | aaa |
| 109 | aaa |
| 19 | aaa |
+---------------+-----------------+--+
- Order By 和 Sort By區別
- Order By全局排序,Sort By局部排序
- 取TopN時,Sort By 比 Order By效率更高
- Distribute By
- 查詢語句對指定字段分組
- 一般結合Sort By語句使用,好比同一個地區,不一樣商家排序,就須要用到這個
- Cluster By
- 分組且排序,等價於 Distribute By 和 Sort By 的結合
-- 使用示例
SELECT col1, col2 FROM t1 CLUSTER BY col1
SELECT col1, col2 FROM t1 DISTRIBUTE BY col1
SELECT col1, col2 FROM t1 DISTRIBUTE BY col1 SORT BY col1 ASC, col2 DESC
參考資料
【0】Hive wiki - LanguageManual SortBysql