從接觸 Golang 開始,斷斷續續已有差很少一年左右的時間了,都是業餘本身學學看看,尚主要限於語法及語言特性,尚未用它寫過實際的項目。ios
關於 Golang 的語法及語言特性,網上有不少資源能夠學習。後面某個時間,我也許會寫一篇粗淺的文章,來比較一下 Golang 和 C++、Delphi 甚至 C# 等語言語法方面的特性。c++
我算是個急性子的人(固然如今好一些了),因而做爲碼農,顯而易見會對「效率」比較敏感。這裏的效率不僅僅指編譯器生成的機器碼優化程度,也包括編譯器的編譯速度,因此我對 C++ 興趣不算大,雖然它是我平時的工做語言。後端
言歸正傳。網絡
分別用 Golang、C++、Delphi 寫了四個小例子,包括普通的應用場景、字符串(串接)操做及數據密集計算(固然也會涉及到譬如庫函數的優化等)。個人電腦軟硬件環境爲:Win7 64bit,Xeon E3-1230(8核),16G RAM。Golang 版本是 1.3.1 Windows/386,VC 則用的 VS 2012,而 Delphi 則用的 XE6 Update1。VC 和 Delphi 編譯設置爲 Win32 & Release,Golang 則使用默認配置。併發
全部測試計量單位均爲毫秒(ms)。函數
首先是計算 π 的例子,代碼分別以下。oop
Golang:佈局
package main import ( "fmt" "time" ) const cNumMax = 999999999 func main() { sign := 1.0 pi := 0.0 t1 := time.Now() for i := 1; i < cNumMax+2; i += 2 { pi += (1.0 / float64(i)) * sign sign = -sign } pi *= 4 t2 := time.Now() fmt.Printf("PI = %f; Time = %d\n", pi, t2.Sub(t1)/time.Millisecond) }
C++:
#include "stdafx.h" #include <stdio.h> #include <time.h> int _tmain(int argc, _TCHAR* argv[]) { const int cNumMax = 999999999; double sign = 1.0; double pi = 0; clock_t t1 = clock(); for (int i = 1; i < cNumMax + 2; i += 2) { pi += (1.0f / (double)i) * sign; sign = -sign; } pi *= 4; clock_t t2 = clock(); printf("PI = %lf; Time = %d\n", pi, t2 - t1); return 0; }
Delphi:學習
program PiCalcer; {$APPTYPE CONSOLE} {$R *.res} uses System.SysUtils, System.DateUtils; const cNumMax = 999999999; var Sign: Double = 1.0; Pi : Double = 0.0; I : Integer; T1 : Double; T2 : Double; S : string; begin T1 := Now; I := 1; while I < cNumMax + 2 do begin Pi := Pi + (1.0 / I) * Sign; Sign := -Sign; I := I + 2; end; Pi := Pi * 4; T2 := Now; S := Format('PI = %.6f; Time = %d', [Pi, MilliSecondsBetween(T2, T1)]); Writeln(S); Readln; end.
分別執行 10 次,結果以下。測試
Golang:2038 2028 2036 2024 2034 2015 2034 2018 2024 2018,平均:2026.9;
C++ :2041 2052 2062 2036 2033 2049 2039 2026 2037 2038,平均:2041.3;
Delphi :2594 2572 2574 2584 2574 2564 2575 2575 2571 2563,平均:2574.6。
結果竟然很不錯,比 VC 還快,而 Delphi,你們都懂,優化向來不是它的「強項」。
而後是個質數生成例子。
Golang:
package main import ( "fmt" "time" ) const cNumMax = 10000000 func main() { t1 := time.Now() var nums [cNumMax + 1]int var i, j int for i = 2; i < cNumMax+1; i++ { nums[i] = i } for i = 2; i < cNumMax+1; i++ { j = 2 for j*i < cNumMax+1 { nums[j*i] = 0 j++ } } cnt := 0 for i = 2; i < cNumMax+1; i++ { if nums[i] != 0 { cnt++ } } t2 := time.Now() fmt.Println("Time:", t2.Sub(t1), " Count:", cnt) }
C++:
#include "stdafx.h" #include <stdlib.h> #include <time.h> const int cNumMax = 10000000; int _tmain(int argc, _TCHAR* argv[]) { clock_t t1 = clock(); int *nums = (int*)malloc(sizeof(int) * (cNumMax + 1)); int i; for (i = 2; i < cNumMax + 1; i++) { nums[i] = i; } int j; for (i = 2; i < cNumMax + 1; i++) { j = 2; while (j * i < cNumMax + 1) { nums[j * i] = 0; j++; } } int cnt = 0; for (i = 2; i < cNumMax + 1; i++) { if (nums[i] != 0) { cnt++; } } free(nums); clock_t t2 = clock(); printf("Time: %dms; Count: %d\n", t2 - t1, cnt); }
Delphi:
program PrimeSieve; {$APPTYPE CONSOLE} {$R *.res} uses System.SysUtils, System.DateUtils; const cNumMax = 10000000; var T1, T2: Double; I, J : Integer; Cnt : Integer; Nums : array of Integer; begin T1 := Now; SetLength(Nums, cNumMax + 1); for I := 2 to cNumMax do Nums[I] := I; for I := 2 to cNumMax do begin J := 2; while J * I < cNumMax + 1 do begin Nums[J * I] := 0; Inc(J); end; end; Cnt := 0; for I := 2 to cNumMax do begin if Nums[I] <> 0 then Inc(Cnt); end; SetLength(Nums, 0); T2 := Now; Writeln(Format('Cnt = %d; Time = %d', [Cnt, MilliSecondsBetween(T2, T1)])); Readln; end.
一樣分別執行 10 次,結果以下。
Golang:959 957 959 953 961 951 948 956 956 956,平均:955.6;
C++ :965 965 967 953 961 964 963 960 956 956,平均:961;
Delphi : 973 976 973 982 981 970 977 979 971 977,平均:975.9;
仍然,Golang 看上去最快,而 Delphi 則很正常地居末。
因此我忍不住想要來一個能展示 Delphi 優勢的例子,這個例子幾乎毫無疑問,和字符串操做(及內存管理器)相關,因此有以下字符串串接的示例(其中涉及到了譬如 IntToStr / itoa 這樣的函數調用,我本身實現了個 C++ 版的 IntToStr)。
Golang:
package main import ( "bytes" "fmt" "strconv" "time" ) const cNumMax = 1000000 // bytes.Buffer(7.2.6) func testViaBuffer() string { var buf bytes.Buffer for i := 0; i < cNumMax; i++ { buf.WriteString(strconv.Itoa(i)) } return buf.String() } // += func testViaNormal() string { var ret string for i := 0; i < cNumMax; i++ { ret += strconv.Itoa(i) } return ret } func main() { fmt.Println("Test via bytes.Buffer...") t1 := time.Now() s := testViaBuffer() t2 := time.Now() fmt.Printf("Result: %s...(Length = %d); Time: %dms\n", s[2000:2005], len(s), t2.Sub(t1)/time.Millisecond) /* fmt.Println("Test via normal way...") t1 = time.Now() s = testViaNormal() t2 = time.Now() fmt.Printf("Result: %s...(Length = %d); Time: %dms\n", s[2000:2005], len(s), t2.Sub(t1)/time.Millisecond) */ }
C++:
#include "stdafx.h" #include <time.h> #include <stdarg.h> #include <string> #include <iostream> using namespace std; const int cNumMax = 1000000; wstring FormatV(const wchar_t* pwcFormat, va_list argList) { wstring ws; int nLen = _vscwprintf(pwcFormat, argList); if (nLen > 0) { ws.resize(nLen); vswprintf_s(&ws[0], nLen + 1, pwcFormat, argList); } return ws; } wstring __cdecl Format(const wchar_t* pwcFormat, ...) { va_list argList; va_start(argList, pwcFormat); wstring ws = FormatV(pwcFormat, argList); va_end(argList); return ws; } string FormatVA(const char* pcFormat, va_list argList) { string s; int nLen = _vscprintf(pcFormat, argList); if (nLen > 0) { s.resize(nLen); vsprintf_s(&s[0], nLen + 1, pcFormat, argList); } return s; } string __cdecl FormatA(const char* pcFormat, ...) { va_list argList; va_start(argList, pcFormat); string s = FormatVA(pcFormat, argList); va_end(argList); return s; } wstring IntToStr(int nValue) { return Format(L"%d", nValue); } string IntToStrA(int nValue) { return FormatA("%d", nValue); } wstring testW() { wstring ret = L""; for (int i = 0; i < cNumMax; i++) { ret += IntToStr(i); } return ret; } string test() { string ret = ""; for (int i = 0; i < cNumMax; i++) { ret += IntToStrA(i); } return ret; } int _tmain(int argc, _TCHAR* argv[]) { cout << "Starting test with a loop num of " << cNumMax << endl; clock_t t1 = clock(); string s = test(); clock_t t2 = clock(); cout << "Result: " << s.substr(2000, 5) << "..." << "; Size: " << s.size() << "; Time: " << t2 - t1 << "ms" << endl; cout << endl; cout << "Starting test for WSTRING with a loop num of " << cNumMax << endl; t1 = clock(); wstring ws = testW(); t2 = clock(); wcout << "Result: " << ws.substr(2000, 5) << "..." << "; Size: " << ws.size() << "; Time: " << t2 - t1 << "ms" << endl; return 0; }
Delphi:
program StrPerformanceTest; {$APPTYPE CONSOLE} {$R *.res} uses System.SysUtils, System.DateUtils; const cNumMax = 1000000; function TestViaStringBuilder: string; var SB: TStringBuilder; I : Integer; begin SB := TStringBuilder.Create; for I := 0 to cNumMax - 1 do SB.Append(IntToStr(I)); Result := SB.ToString; FreeAndNil(SB); end; function TestViaNormal: string; var I : Integer; begin Result := ''; for I := 0 to cNumMax - 1 do Result := Result + IntToStr(I); end; var T1: Double; T2: Double; S : string; begin Writeln('Starting test with a loop num of ', cNumMax, '...'); T1 := Now; S := TestViaStringBuilder; T2 := Now; Writeln(Format('Test via TStringBuilder result: %s...(Length = %d); Time: %dms', [Copy(S, 2001, 5), Length(S), MilliSecondsBetween(T2, T1)])); T1 := Now; S := TestViaNormal; T2 := Now; Writeln(Format('Test via normal-way(+=) result: %s...(Length = %d); Time: %dms', [Copy(S, 2001, 5), Length(S), MilliSecondsBetween(T2, T1)])); Readln; end.
分別執行 10 次。悲劇的是,Golang 裏的字符串 += 操做實在太慢了,我實在不想等下去,因此只給出了其官方推薦的使用 bytes.Buffer 的結果。而在這個例子中,Delphi 使用 TStringBuilder 並未顯示出什麼優化(FastMM 實在太強悍了!),因此我也只給出了普通的串接結果(AnsiString 和 string 都是 Delphi 的原生類型,有着類同的內存佈局,效率上應沒有什麼差異,因此這裏只測試了 string)。
Golang :141 148 134 119 133 123 145 127 122 132,平均:132.4;
C++(std::string) :384 400 384 385 389 391 389 384 390 383,平均:387.9;
C++(std::wstring) :519 521 522 521 519 522 518 519 518 518,平均:519.7;
Delphi(string) :41 41 41 41 41 41 41 41 44 41,平均:41.3;
果真,Delphi 大幅領先,固然這主要歸功於 FastMM,這個開源的 Pascal 家族的內存管理器實在太強大了!
固然這個測試對 C++ 並不公平,由於 Golang 的寫法並不是普通的串接,只是我不知道 STL 或 Boost 裏有無相似 StringBuilder 這樣的利器呢?
最後是個數據密集計算型的例子。
Golang:
package main import ( "fmt" "time" ) const cSize int = 30 type mymatrix [cSize][cSize]int func mkmatrix(rows, cols int, mx *mymatrix) { rows-- cols-- count := 1 for r := 0; r <= rows; r++ { for c := 0; c <= cols; c++ { mx[r][c] = count count++ } } } func multmatrix(rows, cols int, m1, m2 *mymatrix, mm *mymatrix) { rows-- cols-- for i := 0; i <= rows; i++ { for j := 0; j <= cols; j++ { val := 0 for k := 0; k <= cols; k++ { val += m1[i][k] * m2[k][j] mm[i][j] = val } } } } func main() { var m1, m2, mm mymatrix mkmatrix(cSize, cSize, &m1) mkmatrix(cSize, cSize, &m2) t0 := time.Now() for i := 0; i <= 100000; i++ { multmatrix(cSize, cSize, &m1, &m2, &mm) } t := time.Since(t0) fmt.Println(mm[0][0], mm[2][3], mm[3][2], mm[4][4], mm[29][29]) fmt.Println("tick = ", t) }
C++:
#include "stdafx.h" #include <time.h> #include <iostream> using namespace std; const int MATRIX_SIZE = 30; int Matrix[MATRIX_SIZE][MATRIX_SIZE]; void MakeMatrix(int rows, int cols, int mx[MATRIX_SIZE][MATRIX_SIZE]) { rows--; cols--; int count = 1; for (int r = 0; r <= rows; r++) { for (int c = 0; c <= cols; c++) { mx[r][c] = count; count++; } } } void MatrixMult(int rows, int cols, const int m1[MATRIX_SIZE][MATRIX_SIZE], const int m2[MATRIX_SIZE][MATRIX_SIZE], int mx[MATRIX_SIZE][MATRIX_SIZE]) { rows--; cols--; int val; for (int i = 0; i <= rows; i++) { for (int j = 0; j <= cols; j++) { val = 0; for (int k = 0; k <= cols; k++) { val += m1[i][k] * m2[k][j]; mx[i][j] = val; } } } } int _tmain(int argc, _TCHAR* argv[]) { int num = 100000; int m1[MATRIX_SIZE][MATRIX_SIZE], m2[MATRIX_SIZE][MATRIX_SIZE], mx[MATRIX_SIZE][MATRIX_SIZE]; MakeMatrix(MATRIX_SIZE, MATRIX_SIZE, m1); MakeMatrix(MATRIX_SIZE, MATRIX_SIZE, m2); clock_t t1 = clock(); for (int i = 0; i <= num; i++) { MatrixMult(MATRIX_SIZE, MATRIX_SIZE, m1, m2, mx); } clock_t t2 = clock(); cout << mx[0][0] << " " << mx[2][3] << " " << mx[3][2] << " " << mx[4][4] << endl; cout << t2 - t1 << " ms" << endl; return 0; }
Delphi:
program Project1; {$APPTYPE CONSOLE} {$R *.res} uses System.SysUtils, System.DateUtils; const cSize = 30; type TMatrix = array[0..cSize - 1, 0..cSize - 1] of Integer; procedure MakeMatrix(Rows, Cols: Integer; var Mx: TMatrix); var R, C, Count: Integer; begin Dec(Rows); Dec(Cols); Count := 1; for R := 0 to Rows do for C := 0 to Cols do begin Mx[R, C] := Count; Inc(Count); end; end; procedure MatrixMult(Rows, Cols: Integer; const M1, M2: TMatrix; var Mx: TMatrix); inline; var I, J, K, Val: Integer; begin Dec(Rows); Dec(Cols); for I := 0 to Rows do for J := 0 to Cols do begin Val := 0; for K := 0 to Cols do Inc(Val, M1[I, K] * M2[K, J]); Mx[I, J] := Val; end; end; var Num, I : Integer; M1, M2, Mx: TMatrix; T1, T2 : Double; begin Num := 100000; MakeMatrix(cSize, cSize, M1); MakeMatrix(cSize, cSize, M2); T1 := Now; for I := 0 to Num do MatrixMult(cSize, cSize, M1, M2, Mx); T2 := Now; WriteLn(Mx[0, 0], ' ', Mx[2, 3], ' ', Mx[3, 2], ' ', Mx[4, 4], ' ', mx[29, 29]); WriteLn(' C = ', MilliSecondsBetween(T2, T1), ' ms'); end.
分別執行 10 次後結果以下。
Golang:8757 8790 8713 8748 8737 8744 8752 8752 8746 8754,平均:8749.3;
C++ :1723 1735 1714 1707 1713 1725 1708 1723 1720 1725,平均:1719.3;
Delphi :2384 2362 2359 2389 2362 2351 2340 2352 2356 2352,平均:2360.7;
在這樣的密集運算例子裏,Golang 的表現實在不好,Golang 的編譯器優化還有很長的路。而 Delphi 則不出意外,不溫不火,勉強也還算能接受吧。
至此,或許大體能夠這樣初步評斷,Golang 在大部分應用場景下在效率方面是知足要求的,而若涉及到密集運算,當前比較好的方法應該是要經過 CGo 了。考慮到 Golang 強大的 goroutine 和 channel、豐富的標準庫(譬如網絡方面)、精簡的語法和很是快速的編譯速度(幾乎媲美 Delphi),後端開發嘗試下 Golang 應是比較可行的,而也確實有很多早已用 Golang 做後端開發的項目實例了。
注:關於 Golang 的語言語法及併發方面的特性,過段時間再淺敘。
經由 Colin 同窗建議,測試字符串串接中使用的自實現版 IntToStr 效率不行,對 C++ 很不公平,因而我用回了 _itoa_s 和 _itow_s 這倆庫函數,以下:
#include "stdafx.h" #include <time.h> #include <stdarg.h> #include <string> #include <iostream> using namespace std; const int cNumMax = 1000000; wstring testW() { wstring ret = L""; wchar_t ws[10]; for (int i = 0; i < cNumMax; i++) { _itow_s(i, ws, 10); ret += ws; } return ret; } string test() { string ret = ""; char s[10]; for (int i = 0; i < cNumMax; i++) { _itoa_s(i, s, 10); ret += s; } return ret; } int _tmain(int argc, _TCHAR* argv[]) { cout << "Starting test with a loop num of " << cNumMax << endl; clock_t t1 = clock(); string s = test(); clock_t t2 = clock(); cout << "Result: " << s.substr(2000, 5) << "..." << "; Size: " << s.size() << "; Time: " << t2 - t1 << "ms" << endl; cout << endl; cout << "Starting test for WSTRING with a loop num of " << cNumMax << endl; t1 = clock(); wstring ws = testW(); t2 = clock(); wcout << "Result: " << ws.substr(2000, 5) << "..." << "; Size: " << ws.size() << "; Time: " << t2 - t1 << "ms" << endl; return 0; }
測試 10 次,效率果真大幅提高,平均大約分別是:std::string - 70ms、std::wstring - 75ms,至關快速!不過仍是比 Delphi 慢了 40% 左右。