在OpenMP並行程序設計中,for循環是一種獨立的並行指令。它很是重要!它的指令格式是:ios
#include <omp.h> #pragma omp parallel for for(i = begin;i < end;++i) { // Content }
parallel for指令的後面必需要緊跟for語句塊!!!!markdown
而且for循環並行必需要處在parallel並行區塊內!!!!不然會看成串行執行!多線程
此前一篇博客已經說明了,OpenMP的並行計算模式是插入並行語句的方法,如上圖。當咱們的串行程序執行到並行語句塊的時候,會從主線程中派生出線程組,而後線程組對計算任務進行均分,而後並行計算。並行計算結束後從新回到串行程序。ide
#include <omp.h> #include <cstdio> #include <cstdlib> #include <cmath> #include <ctime> const int maxn = 5e7; const int mod = 10000; int vec1[maxn], vec2[maxn], vec[maxn], i; int main() { srand((unsigned int)time(NULL)); for (i = 0; i < maxn; ++i) { vec1[i] = rand() % mod; vec2[i] = rand() % mod; } printf("--------------before parallel compute---------------\n"); clock_t s, t; s = clock(); for (i = 0; i < maxn; ++i) { vec[i] = vec1[i] * vec2[i]; } t = clock(); printf("--------------used time = %d ms---------------\n", t - s); s = clock(); printf("--------------enter parallel compute---------------\n"); #pragma omp parallel num_threads(20) shared(vec1, vec2, vec) private(i) { #pragma omp for for (i = 0; i < maxn; ++i) { vec[i] = vec1[i] * vec2[i]; } } t = clock(); printf("--------------used time = %d ms---------------\n", t - s); return 0; }
#include <omp.h> #include <cstdio> #include <cstdlib> #include <cmath> #include <ctime> const int maxn = 1000; const int mod = 10000; int vec1[maxn][maxn], vec2[maxn][maxn], vec[maxn][maxn], i, j, k; int main() { srand((unsigned int)time(NULL)); for (i = 0; i < maxn; ++i) { for (j = 0; j < maxn; ++j) { vec1[i][j] = rand() % mod; vec2[i][j] = rand() % mod; } } printf("--------------before parallel compute---------------\n"); clock_t s1, t1, s2, t2; s1 = clock(); for (i = 0; i < maxn; ++i) { for (j = 0; j < maxn; ++j) { for (k = 0; k < maxn; ++k) { vec[i][j] += (vec1[i][k] * vec2[k][j]); } } } t1 = clock(); printf("----------------used time = %d ms-----------------\n", t1 - s1); printf("--------------enter parallel compute---------------\n"); s2 = clock(); #pragma omp parallel for collapse(2) schedule(dynamic) private(i, j, k) shared(vec1, vec2, vec) for (i = 0; i < maxn; ++i) { for (j = 0; j < maxn; ++j) { for (k = 0; k < maxn; ++k) { vec[i][j] += (vec1[i][k] * vec2[k][j]); } } } t2 = clock(); printf("----------------used time = %d ms-----------------\n", t2 - s2); printf("\n----------------the speedup ratio = %lf---------------\n", 1.0 * (t1 - s1) / (t2 - s2)); return 0; }
一旦在前面有使用過學習
#pragma omp parallel ...
語句,而且當前還處在這個並行區,而後這個時候你想使用for循環並行,則千萬不要再搞一次:atom
#pragma omp parallel for
這樣的操做了,由於這樣會讓線程組重組,而後至關於有兩重並行,舉個例子看看:spa
#include <omp.h> #include <iostream> using namespace std; int main() { #pragma omp parallel num_threads(10) { #pragma omp for for (int i = 0; i < 5; ++i) { #pragma omp critical { cout << "i = " << i << endl; } } } return 0; }
此時的運行結果是:操作系統
#include <omp.h> #include <iostream> using namespace std; int main() { #pragma omp parallel num_threads(4) { #pragma omp parallel for // 注意看這裏哦 for (int i = 0; i < 5; ++i) { #pragma omp critical { cout << "i = " << i << endl; } } } return 0; }
看看這樣的「畫蛇添足」的運行結果:線程
咱們會發現,這個0 ~ 4 被重複執行啦!這樣會影響並行程序結果,還會誤覺得運行的開銷變大哦!!!!設計
形成這種結果的緣由就是:parallel命令會告訴操做系統,此時我要重組線程組,要從新開始並行程序運行。而後這下好啦,每一個線程到了那句指令的時候都重組線程組,白白多執行4次(取決於線程數)
其實同步啊,在並行計算裏有兩種含義:
第一:線程/進程的運行有的快有的慢,我想要在某處各個線程/進程達到一樣的狀態,這叫並行程序的運行同步
第二:對於共享內存的模型,咱們須要控制數據的訪問,達到線程同步。這樣作的目的是爲了防止多個進程/線程同時訪問某個數據、內存,致使該數據同時改變,這樣的做用下會讓數據失真!舉個例子:初始有變量a = 2,好比線程A要讓a++,線程B要讓a*=2。若是不控制訪問,讓變量a(或者某語句塊)的執行的時候只能讓一個線程進入執行,其餘線程等待執行。則會讓資源出現同步問題。這就叫數據同步。
#include <omp.h> #include <iostream> using namespace std; int main() { int i, len, cnt = 0; #pragma omp parallel num_threads(6) { len = omp_get_num_threads(); #pragma omp for private(i) for (i = 0; i < len; ++i) { #pragma omp critical { cout << "Current is " << i << endl; cnt += i; } } } cout << "cnt = " << cnt << endl; return 0; }