複製數據的快速方法std::copyhtml
C++複製數據各類方法你們都會,不少時候咱們都會用到std::copy這個STL函數,這個效率確實很不錯,比咱們一個一個元素複製或者用迭代器複製都來的要快不少。數組
const int size = 100000000; int *k = new int[size]; int *p = new int[size]; //const int size = 5F5E100h; DWORD t1, t2; t1 = GetTickCount(); for (int i = 0; i != size; i++) p[i] = k[i]; t2 = GetTickCount(); cout << t2 - t1 << "ms" << std::endl; t1 = GetTickCount(); int *pStart = k, *pEnd = k + size, *pDest = p; for (; pStart != pEnd; pDest++, pStart++) *pDest = *pStart; t2 = GetTickCount(); cout << t2 - t1 << "ms" << std::endl; t1 = GetTickCount(); std::copy(k, k + size, p); t2 = GetTickCount(); cout << t2 - t1 << "ms" << std::endl;
for (int i = 0; i != size; i++) 00F0A8B1 mov dword ptr [ebp-54h],0 00F0A8B8 jmp main+0A3h (0F0A8C3h) 00F0A8BA mov eax,dword ptr [ebp-54h] 00F0A8BD add eax,1 00F0A8C0 mov dword ptr [ebp-54h],eax 00F0A8C3 cmp dword ptr [ebp-54h],5F5E100h 00F0A8CA je main+0C0h (0F0A8E0h) p[i] = k[i]; 00F0A8CC mov eax,dword ptr [ebp-54h] 00F0A8CF mov ecx,dword ptr [p] 00F0A8D2 mov edx,dword ptr [ebp-54h] 00F0A8D5 mov esi,dword ptr [k] 00F0A8D8 mov edx,dword ptr [esi+edx*4] 00F0A8DB mov dword ptr [ecx+eax*4],edx 00F0A8DE jmp main+9Ah (0F0A8BAh)
int *pStart = k, *pEnd = k + size, *pDest = p; 00F0A944 mov eax,dword ptr [k] 00F0A947 mov dword ptr [pStart],eax 00F0A94A mov eax,dword ptr [k] 00F0A94D add eax,17D78400h 00F0A952 mov dword ptr [pEnd],eax 00F0A955 mov eax,dword ptr [p] 00F0A958 mov dword ptr [pDest],eax for (; pStart != pEnd; pDest++, pStart++) 00F0A95B jmp main+14Fh (0F0A96Fh) 00F0A95D mov eax,dword ptr [pDest] 00F0A960 add eax,4 00F0A963 mov dword ptr [pDest],eax 00F0A966 mov ecx,dword ptr [pStart] 00F0A969 add ecx,4 00F0A96C mov dword ptr [pStart],ecx 00F0A96F mov eax,dword ptr [pStart] 00F0A972 cmp eax,dword ptr [pEnd] 00F0A975 je main+163h (0F0A983h) *pDest = *pStart; 00F0A977 mov eax,dword ptr [pDest] 00F0A97A mov ecx,dword ptr [pStart] 00F0A97D mov edx,dword ptr [ecx] 00F0A97F mov dword ptr [eax],edx 00F0A981 jmp main+13Dh (0F0A95Dh)
template<class _InIt, class _OutIt> inline _OutIt _Copy_memmove(_InIt _First, _InIt _Last, _OutIt _Dest) { // implement copy-like function as memmove const char * const _First_ch = reinterpret_cast<const char *>(_First); const char * const _Last_ch = reinterpret_cast<const char *>(_Last); char * const _Dest_ch = reinterpret_cast<char *>(_Dest); const size_t _Count = _Last_ch - _First_ch; _CSTD memmove(_Dest_ch, _First_ch, _Count); return (reinterpret_cast<_OutIt>(_Dest_ch + _Count)); } template<class _InIt, class _OutIt> inline _OutIt _Copy_unchecked1(_InIt _First, _InIt _Last, _OutIt _Dest, _General_ptr_iterator_tag) { // copy [_First, _Last) to [_Dest, ...), arbitrary iterators for (; _First != _Last; ++_Dest, (void)++_First) *_Dest = *_First; return (_Dest); } template<class _InIt, class _OutIt> inline _OutIt _Copy_unchecked1(_InIt _First, _InIt _Last, _OutIt _Dest, _Trivially_copyable_ptr_iterator_tag) { // copy [_First, _Last) to [_Dest, ...), pointers to trivially copyable return (_Copy_memmove(_First, _Last, _Dest)); } template<class _InIt, class _OutIt> inline _OutIt _Copy_unchecked(_InIt _First, _InIt _Last, _OutIt _Dest) { // copy [_First, _Last) to [_Dest, ...) // note: _Copy_unchecked is called directly elsewhere in the STL return (_Copy_unchecked1(_First, _Last, _Dest, _Ptr_copy_cat(_First, _Dest))); } template<class _InIt, class _OutIt> inline _OutIt _Copy_no_deprecate1(_InIt _First, _InIt _Last, _OutIt _Dest, input_iterator_tag, _Any_tag) { // copy [_First, _Last) to [_Dest, ...), arbitrary iterators return (_Rechecked(_Dest, _Copy_unchecked(_First, _Last, _Unchecked_idl0(_Dest)))); } template<class _InIt, class _OutIt> inline _OutIt _Copy_no_deprecate1(_InIt _First, _InIt _Last, _OutIt _Dest, random_access_iterator_tag, random_access_iterator_tag) { // copy [_First, _Last) to [_Dest, ...), random-access iterators _CHECK_RANIT_RANGE(_First, _Last, _Dest); return (_Rechecked(_Dest, _Copy_unchecked(_First, _Last, _Unchecked(_Dest)))); } template<class _InIt, class _OutIt> inline _OutIt _Copy_no_deprecate(_InIt _First, _InIt _Last, _OutIt _Dest) { // copy [_First, _Last) to [_Dest, ...), no _SCL_INSECURE_DEPRECATE_FN warnings _DEBUG_RANGE_PTR(_First, _Last, _Dest); return (_Copy_no_deprecate1(_Unchecked(_First), _Unchecked(_Last), _Dest, _Iter_cat_t<_InIt>(), _Iter_cat_t<_OutIt>())); } template<class _InIt, class _OutIt> inline _OutIt copy(_InIt _First, _InIt _Last, _OutIt _Dest) { // copy [_First, _Last) to [_Dest, ...) _DEPRECATE_UNCHECKED(copy, _Dest); return (_Copy_no_deprecate(_First, _Last, _Dest)); }
return (_Copy_no_deprecate1(_Unchecked(_First), _Unchecked(_Last), _Dest, _Iter_cat_t<_InIt>(), _Iter_cat_t<_OutIt>()));
template<class _Iter> using _Iter_cat_t = typename iterator_traits<_Iter>::iterator_category;
input_iterator_tag //輸入迭代器,單向一次一步移動,讀取一次
output_iterator_tag //輸出迭代器,單向一次一步移動,塗寫一次
forward_iterator_tag //向前迭代器,單向一次一步移動,屢次讀寫,繼承自輸入迭代器
bidirectional_iterator_tag //雙向迭代器,雙向一次一步移動,屢次讀寫,繼承自向前迭代器
random_access_iterator_tag //隨機迭代器,任意位置屢次讀寫,繼承自雙向迭代器
template<class _Source, class _Dest> inline _General_ptr_iterator_tag _Ptr_copy_cat(const _Source&, const _Dest&) { // return pointer copy optimization category for arbitrary iterators return {}; } template<class _Source, class _Dest> inline conditional_t<is_trivially_assignable<_Dest&, _Source&>::value, typename _Ptr_cat_helper<remove_const_t<_Source>, _Dest>::type, _General_ptr_iterator_tag> _Ptr_copy_cat(_Source * const&, _Dest * const&) { // return pointer copy optimization category for pointers return {}; }
_CSTD memmove(_Dest_ch, _First_ch, _Count);
當源內存的首地址等於目標內存的首地址時,不進行任何拷貝
當源內存的首地址大於目標內存的首地址時,實行正向拷貝
當源內存的首地址小於目標內存的首地址時,實行反向拷貝
這三個指令每一次執行都會將源地址到目的地址的數據的複製
目標地址由di決定(對於movsb,movsw是di,movsd是edi),每執行一次,根據DF的值+1(DF == 0)或者-1(DF ==1)
源地址由si決定(對於movsb,movsw是si,movsd是esi),每執行一次,根據DF的值+1(DF == 0)或者-1(DF ==1)
__asm { mov esi, dword ptr[k]; mov edi, dword ptr[p]; mov ecx, 5F5E100h; rep movsd; };
好吧,其實上面是memcpy。若是要實現memmove,還須要多進行一些判斷,就像memmove要求的那樣app
事實上,咱們只要單步調試就能夠看到memmove執行的代碼了,在VS裏面看,的確是進行了彙編優化(注意VS編譯器用的memmove的並非在memmove.c定義的C的版本,而是在memcpy.asm的彙編版本),在咱們的例子中,彙編代碼以下:dom
ifdef MEM_MOVE _MEM_ equ <memmove> else ; MEM_MOVE
_MEM_ equ <memcpy> endif ; MEM_MOVE
% public _MEM_ _MEM_ proc \ dst:ptr byte, \ src:ptr byte, \ count:IWORD ; destination pointer
; source pointer
; number of bytes to copy
OPTION PROLOGUE:NONE, EPILOGUE:NONE push edi ; save edi
push esi ; save esi
; size param/4 prolog byte #reg saved
.FPO ( 0, 3 , $-_MEM_ , 2, 0, 0 ) mov esi,[esp + 010h] ; esi = source
mov ecx,[esp + 014h] ; ecx = number of bytes to move
mov edi,[esp + 0Ch] ; edi = dest
; ; Check for overlapping buffers: ; If (dst <= src) Or (dst >= src + Count) Then ; Do normal (Upwards) Copy ; Else ; Do Downwards Copy to avoid propagation ;
mov eax,ecx ; eax = byte count
mov edx,ecx ; edx = byte count
add eax,esi ; eax = point past source end
cmp edi,esi ; dst <= src ?
jbe short CopyUp ; no overlap: copy toward higher addresses
cmp edi,eax ; dst < (src + count) ?
jb CopyDown ; overlap: copy toward lower addresses
; ; Buffers do not overlap, copy toward higher addresses.
CopyUp:
cmp ecx, 020h jb CopyUpDwordMov ; size smaller than 32 bytes, use dwords
cmp ecx, 080h jae CopyUpLargeMov ; if greater than or equal to 128 bytes, use Enhanced fast Strings
bt __isa_enabled, __ISA_AVAILABLE_SSE2 jc XmmCopySmallTest jmp Dword_align CopyUpLargeMov:
bt __favor, __FAVOR_ENFSTRG ; check if Enhanced Fast Strings is supported
jnc CopyUpSSE2Check ; if not, check for SSE2 support
rep movsb
mov eax,[esp + 0Ch] ; return original destination pointer
pop esi pop edi M_EXIT
由於咱們的例子中沒有重疊的內存區,並且大小也比128bytes要大,天然就進入了CopyUpLargeMov過程,咱們能夠很清楚地發現rep movsb了,memmove實現過程就是咱們所想的那樣。實際上memmove彙編版本還有其餘大量的優化,有興趣的朋友能夠點進去memcpy.asm去看一看。函數
這樣感受很不錯,用movsd指令之後咱們能夠很直觀地發現咱們已經減小了不少無謂的寄存器賦值操做(movsd指令還有被CPU進行加速的)咱們接下來試下效果:post