OpenCL + OpenCV 圖像旋轉

使用 OpenCV 從文件讀取彩色的 png 圖像,旋轉必定角度之後寫回文件函數

● 代碼,核函數visual-studio


 1 //                                  
 2 //__constant  sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP;// 設備採樣器,能夠啓用,並刪除函數 imageRotate 中的採樣器參數
 4 __kernel void imageRotate(__read_only image2d_t inputImage, __write_only image2d_t outputImage, float angle, sampler_t sampler)
 5 {
 6     const int width = get_image_width(inputImage), height = get_image_height(inputImage);
 7     const int halfWidth = width / 2, halfHeight = height / 2;
 8     const int x = get_global_id(0), y = get_global_id(1);
 9     const int xt = x - halfWidth, yt = y - halfHeight;
10     const float sinFactor = sin(angle), cosFactor = cos(angle);
12     float2 readCoord = (float2)(halfWidth + cosFactor * xt - sinFactor * yt, readCoord.y = halfHeight + sinFactor * xt + cosFactor * yt);
13     float4 value = read_imagef(inputImage, sampler, readCoord);
14     write_imagef(outputImage, (int2)(x, y), value);
15     return;
16 }


● 代碼,分三通道分別旋轉ui


  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <cl.h>
  4 #include <opencv.hpp>
  5 #include <opencv2\core\cvstd.hpp>   // namespace cv 的定義
  7 #pragma warning(disable : 4996)     // 解封OPenCL1.2
  9 using namespace cv;
 11 const char *sourceProgram = "D:/Code/OpenCL/";// 核函數文件
 12 const char *imagePath = "D:\\input.png";
 13 const float angle = 3.14f / 4;
 15 int readSource(const char* kernelPath, char **output)// 讀取文本文件,存儲爲 char *
 16 {
 17     FILE *fp;
 18     int size;
 19     fopen_s(&fp, kernelPath, "rb");
 20     if (!fp)
 21     {
 22         printf("Open kernel file failed\n");
 23         exit(-1);
 24     }
 25     if (fseek(fp, 0, SEEK_END) != 0)
 26     {
 27         printf("Seek end of file faildd\n");
 28         exit(-1);
 29     }
 30     if ((size = ftell(fp)) < 0)
 31     {
 32         printf("Get file position failed\n");
 33         exit(-1);
 34     }
 35     rewind(fp);
 36     if ((*output = (char *)malloc(size + 1)) == NULL)
 37     {
 38         printf("Allocate space failed\n");
 39         exit(-1);
 40     }    
 41     fread((void*)*output, 1, size, fp);
 42     fclose(fp);
 43     (*output)[size] = '\0';
 44     printf("readSource succeed, program file: %s\n", kernelPath);
 45     return size;
 46 }
 48 int main()
 49 {
 50     // 準備平臺,設備,上下文,命令隊列部分    
 51     cl_int status;
 52     cl_uint nPlatform;
 53     clGetPlatformIDs(0, NULL, &nPlatform);
 54     cl_platform_id *listPlatform = (cl_platform_id*)malloc(nPlatform * sizeof(cl_platform_id));
 55     clGetPlatformIDs(nPlatform, listPlatform, NULL);
 56     cl_uint nDevice = 0;
 57     clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, 0, NULL, &nDevice);
 58     cl_device_id *listDevice = (cl_device_id*)malloc(nDevice * sizeof(cl_device_id));
 59     clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, nDevice, listDevice, NULL);
 60     cl_context context = clCreateContext(NULL, nDevice, listDevice, NULL, NULL, &status);    
 61     cl_command_queue queue = clCreateCommandQueue(context, listDevice[0], 0, &status);                          // OpenCL1.2
 63     //                                            CL_QUEUE_SIZE, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE,
 64     //                                            0};
 65     //cl_command_queue queue = clCreateCommandQueueWithProperties(context, listDevice[0], &queueProp, &status); // 第三個參數 queueProp 各類改都會報內存越界 0xC0000005
 67     // 圖片相關
 68     Mat image, channel[3];
 69     image = imread(imagePath);
 70     split(image, channel);                          // 拆分爲三通道,分別旋轉後拼合
 71     const int imageHeight = image.rows, imageWidth = image.cols;
 72     unsigned char *imageData = (unsigned char*)malloc(sizeof(unsigned char) * imageHeight * imageWidth);
 74     cl_image_format format;
 75     format.image_channel_order = CL_R;              // 單通道
 76     format.image_channel_data_type = CL_UNORM_INT8; // 無符號 8 位整形,0 ~ 255
 77     cl_image_desc desc;
 78     desc.image_type = CL_MEM_OBJECT_IMAGE2D;        // 能夠 memset(desc,sizeof(cl_image_desc)); 後僅對前三項賦值
 79     desc.image_width = imageWidth;
 80     desc.image_height = imageHeight;
 81     desc.image_depth = 0;
 82     desc.image_array_size = 0;
 83     desc.image_row_pitch = 0;
 84     desc.image_slice_pitch = 0;
 85     desc.num_mip_levels = 0;
 86     desc.num_samples = 0;
 87     desc.buffer = NULL;
 88     cl_mem d_inputImage = clCreateImage(context, CL_MEM_READ_ONLY, &format, &desc, NULL, &status);
 89     cl_mem d_outputImage = clCreateImage(context, CL_MEM_WRITE_ONLY, &format, &desc, NULL, &status);
 91     // 採樣器
 92     cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status);  // OpenCL1.2
 93     //cl_sampler_properties samplerProp[7] = {CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,                               // OpenCL2.0
 94     //                                        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE, 
 95     //                                        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
 96     //                                        0};             
 97     //cl_sampler sampler = clCreateSamplerWithProperties(context, samplerProp, &status);                            // 也是內存越界,用不了
 99     // 程序和內核
100     char* source = NULL;
101     const size_t lenSource = readSource(sourceProgram, &source);
102     cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source, &lenSource, &status);
103     clBuildProgram(program, 1, listDevice, NULL, NULL, NULL);
104     cl_kernel kernel = clCreateKernel(program, "imageRotate", &status);    
105     clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage);
106     clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage);
107     clSetKernelArg(kernel, 2, sizeof(cl_float), &angle);
108     clSetKernelArg(kernel, 3, sizeof(cl_sampler), &sampler);
109     size_t origin[3] = { 0, 0, 0 }, region[3] = { imageWidth, imageHeight, 1 };// 拷貝圖片緩衝區時使用的起點和範圍參數
110     size_t globalSize[2] = { imageWidth, imageHeight };
112     for (int i = 0; i < 3; i++)// 分三個通道拷入緩衝區,執行旋轉操做,拷回內存
113     {
114         memcpy(imageData, channel[i].data, sizeof(unsigned char) * imageHeight * imageWidth);
115         clEnqueueWriteImage(queue, d_inputImage, CL_TRUE, origin, region, 0, 0, imageData, 0, NULL, NULL);
116         clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL);
117         clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin, region, 0, 0, imageData, 0, NULL, NULL);
118         memcpy(channel[i].data, imageData, sizeof(unsigned char) * imageHeight * imageWidth);
119     }
121     merge(channel, 3, image);// 合併通道,結果寫入文件,在窗口中展現結果
122     imwrite("D:/output.png", image);
123     imshow("Result", image);
124     waitKey(0);
126     free(listPlatform);
127     free(listDevice);
128     clReleaseContext(context);
129     clReleaseMemObject(d_inputImage);
130     clReleaseMemObject(d_outputImage);
131     clReleaseCommandQueue(queue);
132     clReleaseProgram(program);
133     clReleaseKernel(kernel);    
134     //getchar();
135     return 0;
136 }


● 代碼,四個通道同時操做,注意圖片讀入和輸出的時候只有三個通道,須要進行調整spa


  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <cl.h>
  4 #include <opencv.hpp>
  5 #include <opencv2\core\cvstd.hpp>   // namespace cv 的定義
  7 #pragma warning(disable : 4996)     // 解封OPenCL1.2
  9 using namespace cv;
 11 const char *sourceProgram = "D:/Code/OpenCL/";// 核函數文件
 12 const char *imagePath = "D:/input.png";
 13 const float angle = 3.14f / 4;
 15 int readSource(const char* kernelPath, char **output)// 讀取文本文件,存儲爲 char *
 16 {
 17     FILE *fp;
 18     int size;
 19     fopen_s(&fp, kernelPath, "rb");
 20     if (!fp)
 21     {
 22         printf("Open kernel file failed\n");
 23         exit(-1);
 24     }
 25     if (fseek(fp, 0, SEEK_END) != 0)
 26     {
 27         printf("Seek end of file faildd\n");
 28         exit(-1);
 29     }
 30     if ((size = ftell(fp)) < 0)
 31     {
 32         printf("Get file position failed\n");
 33         exit(-1);
 34     }
 35     rewind(fp);
 36     if ((*output = (char *)malloc(size + 1)) == NULL)
 37     {
 38         printf("Allocate space failed\n");
 39         exit(-1);
 40     }    
 41     fread((void*)*output, 1, size, fp);
 42     fclose(fp);
 43     (*output)[size] = '\0';
 44     printf("readSource succeed, program file: %s\n", kernelPath);
 45     return size;
 46 }
 48 int main()
 49 {
 50     // 準備平臺,設備,上下文,命令隊列部分    
 51     cl_int status;
 52     cl_uint nPlatform;
 53     clGetPlatformIDs(0, NULL, &nPlatform);
 54     cl_platform_id *listPlatform = (cl_platform_id*)malloc(nPlatform * sizeof(cl_platform_id));
 55     clGetPlatformIDs(nPlatform, listPlatform, NULL);
 56     cl_uint nDevice = 0;
 57     clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, 0, NULL, &nDevice);
 58     cl_device_id *listDevice = (cl_device_id*)malloc(nDevice * sizeof(cl_device_id));
 59     clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, nDevice, listDevice, NULL);
 60     cl_context context = clCreateContext(NULL, nDevice, listDevice, NULL, NULL, &status);    
 61     cl_command_queue queue = clCreateCommandQueue(context, listDevice[0], 0, &status);// OpenCL1.2    
 63     // 圖片相關
 64     Mat image = imread(imagePath);
 65     const int imageHeight = image.rows, imageWidth = image.cols;
 66     unsigned char *imageData = (unsigned char*)malloc(sizeof(unsigned char) * imageHeight * imageWidth * 4);
 68     for (int i = 0; i < imageWidth * imageHeight; i++)// imread 讀進來只有 RGB 三個通道(可能跟圖片自己有關),要補成 4 個通道
 69     {
 70         imageData[4 * i + 0] =[3 * i + 2];//R
 71         imageData[4 * i + 1] =[3 * i + 1];//G
 72         imageData[4 * i + 2] =[3 * i + 0];//B
 73         imageData[4 * i + 3] = 255;                  //A
 74     }
 76     cl_image_format format;
 77     format.image_channel_order = CL_RGBA;            // 合併通道
 78     format.image_channel_data_type = CL_UNORM_INT8; // 無符號 8 位整形,0 ~ 255
 79     cl_image_desc desc;
 80     desc.image_type = CL_MEM_OBJECT_IMAGE2D;        // 能夠 memset(desc,sizeof(cl_image_desc)); 後僅對前三項賦值
 81     desc.image_width = imageWidth;
 82     desc.image_height = imageHeight;
 83     desc.image_depth = 0;
 84     desc.image_array_size = 0;
 85     desc.image_row_pitch = 0;
 86     desc.image_slice_pitch = 0;
 87     desc.num_mip_levels = 0;
 88     desc.num_samples = 0;
 89     desc.buffer = NULL;
 90     cl_mem d_inputImage = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &format, &desc, imageData, &status);// 輸入圖片直接在主機上
 91     cl_mem d_outputImage = clCreateImage(context, CL_MEM_WRITE_ONLY, &format, &desc, NULL, &status);    
 93     // 採樣器
 94     cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status);  // OpenCL1.2
 96     // 程序和內核
 97     char* source = NULL;
 98     const size_t lenSource = readSource(sourceProgram, &source);
 99     cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source, &lenSource, &status);
100     clBuildProgram(program, 1, listDevice, NULL, NULL, NULL);
101     cl_kernel kernel = clCreateKernel(program, "imageRotate", &status);    
102     clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage);
103     clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage);
104     clSetKernelArg(kernel, 2, sizeof(cl_float), &angle);
105     clSetKernelArg(kernel, 3, sizeof(cl_sampler), &sampler);
106     size_t origin[3] = { 0, 0, 0 }, region[3] = { imageWidth, imageHeight, 1 };// 拷貝圖片緩衝區時使用的起點和範圍參數
107     size_t globalSize[2] = { imageWidth, imageHeight };
109     clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL);
110     clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin, region, 0, 0, imageData, 0, NULL, NULL);
112     for (int i = 0; i < imageWidth * imageHeight; i++)// 去掉第 4 個通道,返回 image 中
113     {
114[3 * i + 0] = imageData[4 * i + 2];//B
115[3 * i + 1] = imageData[4 * i + 1];//G
116[3 * i + 2] = imageData[4 * i + 0];//R        
117     }    
119     imwrite("D:/output.png", image);
120     imshow("Result", image);
121     waitKey(0);
123     free(listPlatform);
124     free(listDevice);
125     clReleaseContext(context);
126     clReleaseMemObject(d_inputImage);
127     clReleaseMemObject(d_outputImage);
128     clReleaseCommandQueue(queue);
129     clReleaseProgram(program);
130     clReleaseKernel(kernel);    
131     //getchar();
132     return 0;
133 }


● 輸入、輸出結果,順時針轉 45 度,由於使用了最近鄰採樣,結果中鋸齒比較嚴重.net




● 另外一種解封舊 API 的方法,在 包含頭文件 <cl.h> 前使用  #define CL_USE_DEPRECATED_OPENCL_1_2_APIS ,其中 1_2 能夠改爲 1_0,1_1 等(

● 使用 cl_command_queue_properties 和函數 clCreateCommandQueueWithProperties 來建立命令隊列,或是用 cl_sampler_properties 和函數 clCreateSamplerWithProperties 來建立採樣器都失敗了,報內存訪問越界錯誤(0xC0000005),不管是按格式書寫仍是把 queueProp 改爲 0,建立時第三個參數寫成 &queueProp 都不行;有人說更新顯卡驅動之後就行了(。最後解決了,用 AMD APP SDK 下面的動態庫 amdocl64.dll 替換掉 C:\Windows\System32 裏邊那個相同庫就行了,能夠徹底使用 OpenCL2.0 的 API,再也不報錯。blog

● cv::imread 讀入的圖片是按照 [ R, G, B, R, G, B, R, G, B, ...] 存放的,在用 OpenCL處理以前須要進行必定的預處理,要麼用 split 分解各通道爲單獨的圖片,要麼手工拆解,算完之後也要按照這種存放方式轉回圖像數據中。在發現通道個數和順序的問題前,要麼在調用函數 clCreateImage 的時候返回 -37,-38,-39,要麼直接旋轉獲得像下面這樣的圖片。之後記得,若是出現這種交叉條紋的圖像,有多是通道交錯致使的。隊列


● 吐槽一下,網上能找到的 OpenCL + OpenCV 作圖片旋轉的基本上有幾個版本(,,都是用 FreeImage 庫把圖像處理成灰度圖來旋轉的(參考了 劉文志等(2016). OpenCL 異構並行計算[M]. 的代碼?),輸出確定是灰度圖了,而後你們博客就相互抄吧,全是垃圾。好不容易找到一個彩色的(代碼還看不了。圖片