std::sort算是STL中對OIer比較友好的函數了,但你有想過sort是如何保證它的高速且穩定嗎?算法
咱們首先來到第一層:sort函數less
template<typename _RandomAccessIterator> inline void sort(_RandomAccessIterator __first, _RandomAccessIterator __last) { //申請使用隨機訪問迭代器 __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept<_RandomAccessIterator>) //申請使用內置的__gnu_cxx::__ops::__iter_less_iter函數 __glibcxx_function_requires(_LessThanComparableConcept<typename iterator_traits<_RandomAccessIterator>::value_type>) //聲明有效區間 __glibcxx_requires_valid_range(__first, __last); //推鍋給std::__sort函數 std::__sort(__first, __last, __gnu_cxx::__ops::__iter_less_iter()); }
這一層其實也沒幹什麼,只是把鍋推給了第二層:__sort函數dom
template<typename _RandomAccessIterator, typename _Compare> inline void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { if (__first != __last) { //有限制地O(n log n)排序(複雜度優秀,但常數大),將數據幾乎有序 std::__introsort_loop(__first, __last, std::__lg(__last - __first) * 2, __comp); //當數據較有序時,用常數比較小的插入排序(複雜度差,但常數優秀) std::__final_insertion_sort(__first, __last, __comp); } }
這裏咱們就能夠見到當年那些大神的神奇操做了:不一樣的排序方法各司其職,取長補短函數
接下來咱們分開來看,首先看O(n log n)排序的部分:__introsort_loop函數oop
在全部O(n log n)排序中,常數最優秀的當屬快速排序,它天然也成了實現O(n log n)排序的首要選擇測試
固然,直接用快速排序是極可能會被卡的,因此咱們要用一個另外的函數兜底優化
template<typename _RandomAccessIterator, typename _Size, typename _Compare> void __introsort_loop(_RandomAccessIterator __first, _RandomAccessIterator __last, _Size __depth_limit, _Compare __comp) { //若是排序區間較大,複雜度對效率的影響超過了算法的長度,則使用快速排序 while (__last - __first > int(_S_threshold)) { //若是快速排序的層數過大,說明數據對快速排序不友好 if (__depth_limit == 0) { //改用堆排序 std::__partial_sort(__first, __last, __last, __comp); return; } --__depth_limit; //將數據分爲兩個集合 _RandomAccessIterator __cut = std::__unguarded_partition_pivot(__first, __last, __comp); //將後一半遞歸排序 std::__introsort_loop(__cut, __last, __depth_limit, __comp); //繼續排序前一半 __last = __cut; //其實這個方法很騷,只會向下增長一次遞歸,另外一層用循環代替 //對棧空間的影響比直接兩次遞歸小了很多 } }
小聲BB:這個參照值的選取太隨便了:__unguarded_partition_pivot函數ui
template<typename _RandomAccessIterator, typename _Compare> inline _RandomAccessIterator __unguarded_partition_pivot(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { //選取中間值 _RandomAccessIterator __mid = __first + (__last - __first) / 2; //選取first + 1, mid, last - 1的三個位置的中位數做爲參照值,並存儲在first這個位置上 //裏面的函數實現太暴力了,全是if(比暴力還暴力),就不拿出來了 std::__move_median_to_first(__first, __first + 1, __mid, __last - 1, __comp); //快排標準移動,實現以下 return std::__unguarded_partition(__first + 1, __last, __first, __comp); } template<typename _RandomAccessIterator, typename _Compare> _RandomAccessIterator __unguarded_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _RandomAccessIterator __pivot, _Compare __comp) { //標準的快速排序 while (true) { while (__comp(__first, __pivot)) ++__first; --__last; while (__comp(__pivot, __last)) --__last; if (!(__first < __last)) return __first; std::iter_swap(__first, __last); ++__first; } }
若是用快速排序太卡,就改用堆排序:__partial_sort函數spa
template<typename _RandomAccessIterator, typename _Compare> inline void __partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { //建堆 std::__heap_select(__first, __middle, __last, __comp); //彈堆 std::__sort_heap(__first, __middle, __comp); } template<typename _RandomAccessIterator, typename _Compare> void __heap_select(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { //建堆 //估計這個算法是用堆獲得優先級最大的多個元素,全部會有一個空循環 std::__make_heap(__first, __middle, __comp); for (_RandomAccessIterator __i = __middle; __i < __last; ++__i) if (__comp(__i, __first)) std::__pop_heap(__first, __middle, __i, __comp); } template<typename _RandomAccessIterator, typename _Compare> void __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { typedef typename iterator_traits<_RandomAccessIterator>::value_type _ValueType; typedef typename iterator_traits<_RandomAccessIterator>::difference_type _DistanceType; if (__last - __first < 2) return; const _DistanceType __len = __last - __first; _DistanceType __parent = (__len - 2) / 2; while (true) { //從堆底向堆頂更新 _ValueType __value = _GLIBCXX_MOVE(*(__first + __parent)); std::__adjust_heap(__first, __parent, __len, _GLIBCXX_MOVE(__value), __comp); if (__parent == 0) return; __parent--; } } template<typename _RandomAccessIterator, typename _Compare> void __sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { //pop and pop while (__last - __first > 1) { --__last; std::__pop_heap(__first, __last, __last, __comp); } }
固然,STL延續了一向大常數的「祖宗之法」,調整堆和彈堆都如此複雜code
template<typename _RandomAccessIterator, typename _Distance, typename _Tp, typename _Compare> void __push_heap(_RandomAccessIterator __first, _Distance __holeIndex, _Distance __topIndex, _Tp __value, _Compare __comp) { //向上跳,直到堆穩定爲止 _Distance __parent = (__holeIndex - 1) / 2; while (__holeIndex > __topIndex && __comp(__first + __parent, __value)) { *(__first + __holeIndex) = _GLIBCXX_MOVE(*(__first + __parent)); __holeIndex = __parent; __parent = (__holeIndex - 1) / 2; } *(__first + __holeIndex) = _GLIBCXX_MOVE(__value); } template<typename _RandomAccessIterator, typename _Distance, typename _Tp, typename _Compare> void __adjust_heap(_RandomAccessIterator __first, _Distance __holeIndex, _Distance __len, _Tp __value, _Compare __comp) { //調整堆 //調整方法:先無腦移動到堆底,再向上更新 const _Distance __topIndex = __holeIndex; _Distance __secondChild = __holeIndex; while (__secondChild < (__len - 1) / 2) { __secondChild = 2 * (__secondChild + 1); if (__comp(__first + __secondChild, __first + (__secondChild - 1))) __secondChild--;//選擇優先級較高的兒子 *(__first + __holeIndex) = _GLIBCXX_MOVE(*(__first + __secondChild)); __holeIndex = __secondChild;//向下調整 }//若是隻有一個兒子 if ((__len & 1) == 0 && __secondChild == (__len - 2) / 2) { __secondChild = 2 * (__secondChild + 1); *(__first + __holeIndex) = _GLIBCXX_MOVE(*(__first + (__secondChild - 1))); __holeIndex = __secondChild - 1; } //向上更新 std::__push_heap(__first, __holeIndex, __topIndex, _GLIBCXX_MOVE(__value), __gnu_cxx::__ops::__iter_comp_val(__comp)); } template<typename _RandomAccessIterator, typename _Compare> inline void __pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _RandomAccessIterator __result, _Compare __comp) { typedef typename iterator_traits<_RandomAccessIterator>::value_type _ValueType; typedef typename iterator_traits<_RandomAccessIterator>::difference_type _DistanceType; //刪除堆頂 _ValueType __value = _GLIBCXX_MOVE(*__result); *__result = _GLIBCXX_MOVE(*__first); //調整堆 std::__adjust_heap(__first, _DistanceType(0), _DistanceType(__last - __first), _GLIBCXX_MOVE(__value), __comp); }
當比較有序時,咱們就能夠用插入排序優化常數:__final_insertion_sort函數
template<typename _RandomAccessIterator, typename _Compare> void __final_insertion_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { if (__last - __first > int(_S_threshold)) { //先將序列開頭排序,做爲後面插入排序的預排序區間 std::__insertion_sort(__first, __first + int(_S_threshold), __comp); //將後面的全部元素排序 std::__unguarded_insertion_sort(__first + int(_S_threshold), __last, __comp); } else std::__insertion_sort(__first, __last, __comp);//若是序列較短,就直接排序 }
實現方法也很簡單,只是有一點奇怪的操做:
template<typename _RandomAccessIterator, typename _Compare> void __insertion_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { if (__first == __last) return; //標準的插入排序 for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i) { //若是插入位置爲序列開頭,那麼直接移動整個序列??? //什麼騷操做??? if (__comp(__i, __first)) { typename iterator_traits<_RandomAccessIterator>::value_type __val = _GLIBCXX_MOVE(*__i); _GLIBCXX_MOVE_BACKWARD3(__first, __i, __i + 1); *__first = _GLIBCXX_MOVE(__val); } //不然按照標準插入排序去作 else std::__unguarded_linear_insert(__i, __gnu_cxx::__ops::__val_comp_iter(__comp)); } } template<typename _RandomAccessIterator, typename _Compare> inline void __unguarded_insertion_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { //十分老實的插入排序 for (_RandomAccessIterator __i = __first; __i != __last; ++__i) std::__unguarded_linear_insert(__i, __gnu_cxx::__ops::__val_comp_iter(__comp)); } template<typename _RandomAccessIterator, typename _Compare> void __unguarded_linear_insert(_RandomAccessIterator __last, _Compare __comp) { //別看了,這真的就是插入排序 typename iterator_traits<_RandomAccessIterator>::value_type __val = _GLIBCXX_MOVE(*__last); _RandomAccessIterator __next = __last; --__next; while (__comp(__val, __next)) { *__last = _GLIBCXX_MOVE(*__next); __last = __next; --__next; } *__last = _GLIBCXX_MOVE(__val); }
事實上我真的去測試過,在基本有序時,快排真的比插入排序慢(常數太大了)
總結,sort的實現時這樣的:
sort( *begin, *end ) { __sort( *begin, *end ) { __introsort_loop( *begin, *end, floor ) { if(/*區間長度較大*/) { if(/*遞歸層數過大*/) { //堆排序 __partial_sort( *begin, *end ); } //選擇參照值,並將元素分離 __cut = __unguarded_partition_pivot( *begin, *end ) //分治 __introsort_loop( *begin, *__cut ); __introsort_loop( *__cut, *end ); } } __final_insertion_sort( *begin, *end ) { //插入排序 } } }
懵逼~~~
看代碼看得頭暈
發現Luogu有個神貼:https://www.luogu.org/discuss/show/112808
能夠發現,若是__last在__first前面,那麼就永遠不會有__i==__last出現,也就是說,在插入排序時會把__first後面全部的數據所有「排序」,emmm
這內存必定是中暑了,要不咱們……
STL這魯棒性太差了
——會某人