科普:std::sort幹了什麼

std::sort算是STL中對OIer比較友好的函數了,但你有想過sort是如何保證它的高速且穩定嗎?算法

 

正文

咱們首先來到第一層:sort函數less

template<typename _RandomAccessIterator> inline void sort(_RandomAccessIterator __first, _RandomAccessIterator __last) { //申請使用隨機訪問迭代器 
    __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept<_RandomAccessIterator>) //申請使用內置的__gnu_cxx::__ops::__iter_less_iter函數 
    __glibcxx_function_requires(_LessThanComparableConcept<typename iterator_traits<_RandomAccessIterator>::value_type>) //聲明有效區間 
 __glibcxx_requires_valid_range(__first, __last); //推鍋給std::__sort函數 
 std::__sort(__first, __last, __gnu_cxx::__ops::__iter_less_iter()); }

 

這一層其實也沒幹什麼,只是把鍋推給了第二層:__sort函數dom

template<typename _RandomAccessIterator, typename _Compare> inline void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { if (__first != __last) { //有限制地O(n log n)排序(複雜度優秀,但常數大),將數據幾乎有序 
        std::__introsort_loop(__first, __last, std::__lg(__last - __first) * 2, __comp); //當數據較有序時,用常數比較小的插入排序(複雜度差,但常數優秀) 
 std::__final_insertion_sort(__first, __last, __comp); } }

這裏咱們就能夠見到當年那些大神的神奇操做了:不一樣的排序方法各司其職,取長補短函數

 

接下來咱們分開來看,首先看O(n log n)排序的部分:__introsort_loop函數oop

 

在全部O(n log n)排序中,常數最優秀的當屬快速排序,它天然也成了實現O(n log n)排序的首要選擇測試

固然,直接用快速排序是極可能會被卡的,因此咱們要用一個另外的函數兜底優化

template<typename _RandomAccessIterator, typename _Size, typename _Compare>
void __introsort_loop(_RandomAccessIterator __first, _RandomAccessIterator __last, _Size __depth_limit, _Compare __comp) { //若是排序區間較大,複雜度對效率的影響超過了算法的長度,則使用快速排序 
    while (__last - __first > int(_S_threshold)) { //若是快速排序的層數過大,說明數據對快速排序不友好 
        if (__depth_limit == 0) { //改用堆排序 
 std::__partial_sort(__first, __last, __last, __comp); return; } --__depth_limit; //將數據分爲兩個集合 
        _RandomAccessIterator __cut = std::__unguarded_partition_pivot(__first, __last, __comp); //將後一半遞歸排序 
 std::__introsort_loop(__cut, __last, __depth_limit, __comp); //繼續排序前一半 
        __last = __cut; //其實這個方法很騷,只會向下增長一次遞歸,另外一層用循環代替 //對棧空間的影響比直接兩次遞歸小了很多 
 } }

 小聲BB:這個參照值的選取太隨便了:__unguarded_partition_pivot函數ui

template<typename _RandomAccessIterator, typename _Compare> inline _RandomAccessIterator __unguarded_partition_pivot(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { //選取中間值 
    _RandomAccessIterator __mid = __first + (__last - __first) / 2; //選取first + 1, mid, last - 1的三個位置的中位數做爲參照值,並存儲在first這個位置上 //裏面的函數實現太暴力了,全是if(比暴力還暴力),就不拿出來了 
    std::__move_median_to_first(__first, __first + 1, __mid, __last - 1, __comp); //快排標準移動,實現以下 
    return std::__unguarded_partition(__first + 1, __last, __first, __comp); } template<typename _RandomAccessIterator, typename _Compare> _RandomAccessIterator __unguarded_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _RandomAccessIterator __pivot, _Compare __comp) { //標準的快速排序 
    while (true) { while (__comp(__first, __pivot)) ++__first; --__last; while (__comp(__pivot, __last)) --__last; if (!(__first < __last)) return __first; std::iter_swap(__first, __last); ++__first; } }

 

若是用快速排序太卡,就改用堆排序:__partial_sort函數spa

template<typename _RandomAccessIterator, typename _Compare> inline void __partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { //建堆 
 std::__heap_select(__first, __middle, __last, __comp); //彈堆 
 std::__sort_heap(__first, __middle, __comp); } template<typename _RandomAccessIterator, typename _Compare>
void __heap_select(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { //建堆 //估計這個算法是用堆獲得優先級最大的多個元素,全部會有一個空循環 
 std::__make_heap(__first, __middle, __comp); for (_RandomAccessIterator __i = __middle; __i < __last; ++__i) if (__comp(__i, __first)) std::__pop_heap(__first, __middle, __i, __comp); } template<typename _RandomAccessIterator, typename _Compare>
void __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { typedef typename iterator_traits<_RandomAccessIterator>::value_type _ValueType; typedef typename iterator_traits<_RandomAccessIterator>::difference_type _DistanceType; if (__last - __first < 2) return; const _DistanceType __len = __last - __first; _DistanceType __parent = (__len - 2) / 2; while (true) { //從堆底向堆頂更新 
        _ValueType __value = _GLIBCXX_MOVE(*(__first + __parent)); std::__adjust_heap(__first, __parent, __len, _GLIBCXX_MOVE(__value), __comp); if (__parent == 0) return; __parent--; } } template<typename _RandomAccessIterator, typename _Compare>
void __sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { //pop and pop 
    while (__last - __first > 1) { --__last; std::__pop_heap(__first, __last, __last, __comp); } }

固然,STL延續了一向大常數的「祖宗之法」,調整堆和彈堆都如此複雜code

template<typename _RandomAccessIterator, typename _Distance, typename _Tp, typename _Compare>
void __push_heap(_RandomAccessIterator __first, _Distance __holeIndex, _Distance __topIndex, _Tp __value, _Compare __comp) { //向上跳,直到堆穩定爲止 
    _Distance __parent = (__holeIndex - 1) / 2; while (__holeIndex > __topIndex && __comp(__first + __parent, __value)) { *(__first + __holeIndex) = _GLIBCXX_MOVE(*(__first + __parent)); __holeIndex = __parent; __parent = (__holeIndex - 1) / 2; } *(__first + __holeIndex) = _GLIBCXX_MOVE(__value); } template<typename _RandomAccessIterator, typename _Distance, typename _Tp, typename _Compare>
void __adjust_heap(_RandomAccessIterator __first, _Distance __holeIndex, _Distance __len, _Tp __value, _Compare __comp) { //調整堆 //調整方法:先無腦移動到堆底,再向上更新 
    const _Distance __topIndex = __holeIndex; _Distance __secondChild = __holeIndex; while (__secondChild < (__len - 1) / 2) { __secondChild = 2 * (__secondChild + 1); if (__comp(__first + __secondChild, __first + (__secondChild - 1))) __secondChild--;//選擇優先級較高的兒子 
        *(__first + __holeIndex) = _GLIBCXX_MOVE(*(__first + __secondChild)); __holeIndex = __secondChild;//向下調整 
    }//若是隻有一個兒子 
    if ((__len & 1) == 0 && __secondChild == (__len - 2) / 2) { __secondChild = 2 * (__secondChild + 1); *(__first + __holeIndex) = _GLIBCXX_MOVE(*(__first  + (__secondChild - 1))); __holeIndex = __secondChild - 1; } //向上更新 
 std::__push_heap(__first, __holeIndex, __topIndex, _GLIBCXX_MOVE(__value), __gnu_cxx::__ops::__iter_comp_val(__comp)); } template<typename _RandomAccessIterator, typename _Compare> inline void __pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _RandomAccessIterator __result, _Compare __comp) { typedef typename iterator_traits<_RandomAccessIterator>::value_type _ValueType; typedef typename iterator_traits<_RandomAccessIterator>::difference_type _DistanceType; //刪除堆頂 
    _ValueType __value = _GLIBCXX_MOVE(*__result); *__result = _GLIBCXX_MOVE(*__first); //調整堆 
    std::__adjust_heap(__first, _DistanceType(0), _DistanceType(__last - __first), _GLIBCXX_MOVE(__value), __comp); }

 

當比較有序時,咱們就能夠用插入排序優化常數:__final_insertion_sort函數

template<typename _RandomAccessIterator, typename _Compare>
void __final_insertion_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { if (__last - __first > int(_S_threshold)) { //先將序列開頭排序,做爲後面插入排序的預排序區間 
        std::__insertion_sort(__first, __first + int(_S_threshold), __comp); //將後面的全部元素排序 
        std::__unguarded_insertion_sort(__first + int(_S_threshold), __last, __comp); } else std::__insertion_sort(__first, __last, __comp);//若是序列較短,就直接排序 
}

實現方法也很簡單,只是有一點奇怪的操做:

template<typename _RandomAccessIterator, typename _Compare>
void __insertion_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { if (__first == __last) return; //標準的插入排序 
    for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i) { //若是插入位置爲序列開頭,那麼直接移動整個序列??? //什麼騷操做??? 
        if (__comp(__i, __first)) { typename iterator_traits<_RandomAccessIterator>::value_type __val = _GLIBCXX_MOVE(*__i); _GLIBCXX_MOVE_BACKWARD3(__first, __i, __i + 1); *__first = _GLIBCXX_MOVE(__val); } //不然按照標準插入排序去作 
        else std::__unguarded_linear_insert(__i, __gnu_cxx::__ops::__val_comp_iter(__comp)); } } template<typename _RandomAccessIterator, typename _Compare> inline void __unguarded_insertion_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { //十分老實的插入排序 
    for (_RandomAccessIterator __i = __first; __i != __last; ++__i) std::__unguarded_linear_insert(__i, __gnu_cxx::__ops::__val_comp_iter(__comp)); } template<typename _RandomAccessIterator, typename _Compare>
void __unguarded_linear_insert(_RandomAccessIterator __last, _Compare __comp) { //別看了,這真的就是插入排序 
    typename iterator_traits<_RandomAccessIterator>::value_type    __val = _GLIBCXX_MOVE(*__last); _RandomAccessIterator __next = __last; --__next; while (__comp(__val, __next)) { *__last = _GLIBCXX_MOVE(*__next); __last = __next; --__next; } *__last = _GLIBCXX_MOVE(__val); }

 事實上我真的去測試過,在基本有序時,快排真的比插入排序慢(常數太大了)

 

總結,sort的實現時這樣的:

sort( *begin, *end ) { __sort( *begin, *end ) { __introsort_loop( *begin, *end, floor ) { if(/*區間長度較大*/) { if(/*遞歸層數過大*/) { //堆排序
                    __partial_sort( *begin, *end ); } //選擇參照值,並將元素分離 
                __cut = __unguarded_partition_pivot( *begin, *end ) //分治 
                __introsort_loop( *begin, *__cut ); __introsort_loop( *__cut, *end ); } } __final_insertion_sort( *begin, *end ) { //插入排序 
 } } }

 

懵逼~~~

看代碼看得頭暈

 

Update

發現Luogu有個神貼:https://www.luogu.org/discuss/show/112808

能夠發現,若是__last在__first前面,那麼就永遠不會有__i==__last出現,也就是說,在插入排序時會把__first後面全部的數據所有「排序」,emmm

這內存必定是中暑了,要不咱們……

STL這魯棒性太差了

——會某人

相關文章
相關標籤/搜索