使用 OpenCV 從文件讀取彩色的 png 圖像,旋轉必定角度之後寫回文件函數
● 代碼,核函數visual-studio
1 // rotate.cl 2 //__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP;// 設備採樣器,能夠啓用,並刪除函數 imageRotate 中的採樣器參數 3 4 __kernel void imageRotate(__read_only image2d_t inputImage, __write_only image2d_t outputImage, float angle, sampler_t sampler) 5 { 6 const int width = get_image_width(inputImage), height = get_image_height(inputImage); 7 const int halfWidth = width / 2, halfHeight = height / 2; 8 const int x = get_global_id(0), y = get_global_id(1); 9 const int xt = x - halfWidth, yt = y - halfHeight; 10 const float sinFactor = sin(angle), cosFactor = cos(angle); 11 12 float2 readCoord = (float2)(halfWidth + cosFactor * xt - sinFactor * yt, readCoord.y = halfHeight + sinFactor * xt + cosFactor * yt); 13 float4 value = read_imagef(inputImage, sampler, readCoord); 14 write_imagef(outputImage, (int2)(x, y), value); 15 return; 16 }
● 代碼,分三通道分別旋轉ui
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <cl.h> 4 #include <opencv.hpp> 5 #include <opencv2\core\cvstd.hpp> // namespace cv 的定義 6 7 #pragma warning(disable : 4996) // 解封OPenCL1.2 8 9 using namespace cv; 10 11 const char *sourceProgram = "D:/Code/OpenCL/rotate.cl";// 核函數文件 12 const char *imagePath = "D:\\input.png"; 13 const float angle = 3.14f / 4; 14 15 int readSource(const char* kernelPath, char **output)// 讀取文本文件,存儲爲 char * 16 { 17 FILE *fp; 18 int size; 19 fopen_s(&fp, kernelPath, "rb"); 20 if (!fp) 21 { 22 printf("Open kernel file failed\n"); 23 exit(-1); 24 } 25 if (fseek(fp, 0, SEEK_END) != 0) 26 { 27 printf("Seek end of file faildd\n"); 28 exit(-1); 29 } 30 if ((size = ftell(fp)) < 0) 31 { 32 printf("Get file position failed\n"); 33 exit(-1); 34 } 35 rewind(fp); 36 if ((*output = (char *)malloc(size + 1)) == NULL) 37 { 38 printf("Allocate space failed\n"); 39 exit(-1); 40 } 41 fread((void*)*output, 1, size, fp); 42 fclose(fp); 43 (*output)[size] = '\0'; 44 printf("readSource succeed, program file: %s\n", kernelPath); 45 return size; 46 } 47 48 int main() 49 { 50 // 準備平臺,設備,上下文,命令隊列部分 51 cl_int status; 52 cl_uint nPlatform; 53 clGetPlatformIDs(0, NULL, &nPlatform); 54 cl_platform_id *listPlatform = (cl_platform_id*)malloc(nPlatform * sizeof(cl_platform_id)); 55 clGetPlatformIDs(nPlatform, listPlatform, NULL); 56 cl_uint nDevice = 0; 57 clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, 0, NULL, &nDevice); 58 cl_device_id *listDevice = (cl_device_id*)malloc(nDevice * sizeof(cl_device_id)); 59 clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, nDevice, listDevice, NULL); 60 cl_context context = clCreateContext(NULL, nDevice, listDevice, NULL, NULL, &status); 61 cl_command_queue queue = clCreateCommandQueue(context, listDevice[0], 0, &status); // OpenCL1.2 62 //cl_command_queue_properties queueProp[5] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT,// OpenCL2.0 63 // CL_QUEUE_SIZE, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, 64 // 0}; 65 //cl_command_queue queue = clCreateCommandQueueWithProperties(context, listDevice[0], &queueProp, &status); // 第三個參數 queueProp 各類改都會報內存越界 0xC0000005 66 67 // 圖片相關 68 Mat image, channel[3]; 69 image = imread(imagePath); 70 split(image, channel); // 拆分爲三通道,分別旋轉後拼合 71 const int imageHeight = image.rows, imageWidth = image.cols; 72 unsigned char *imageData = (unsigned char*)malloc(sizeof(unsigned char) * imageHeight * imageWidth); 73 74 cl_image_format format; 75 format.image_channel_order = CL_R; // 單通道 76 format.image_channel_data_type = CL_UNORM_INT8; // 無符號 8 位整形,0 ~ 255 77 cl_image_desc desc; 78 desc.image_type = CL_MEM_OBJECT_IMAGE2D; // 能夠 memset(desc,sizeof(cl_image_desc)); 後僅對前三項賦值 79 desc.image_width = imageWidth; 80 desc.image_height = imageHeight; 81 desc.image_depth = 0; 82 desc.image_array_size = 0; 83 desc.image_row_pitch = 0; 84 desc.image_slice_pitch = 0; 85 desc.num_mip_levels = 0; 86 desc.num_samples = 0; 87 desc.buffer = NULL; 88 cl_mem d_inputImage = clCreateImage(context, CL_MEM_READ_ONLY, &format, &desc, NULL, &status); 89 cl_mem d_outputImage = clCreateImage(context, CL_MEM_WRITE_ONLY, &format, &desc, NULL, &status); 90 91 // 採樣器 92 cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status); // OpenCL1.2 93 //cl_sampler_properties samplerProp[7] = {CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE, // OpenCL2.0 94 // CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE, 95 // CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST, 96 // 0}; 97 //cl_sampler sampler = clCreateSamplerWithProperties(context, samplerProp, &status); // 也是內存越界,用不了 98 99 // 程序和內核 100 char* source = NULL; 101 const size_t lenSource = readSource(sourceProgram, &source); 102 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source, &lenSource, &status); 103 clBuildProgram(program, 1, listDevice, NULL, NULL, NULL); 104 cl_kernel kernel = clCreateKernel(program, "imageRotate", &status); 105 clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage); 106 clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage); 107 clSetKernelArg(kernel, 2, sizeof(cl_float), &angle); 108 clSetKernelArg(kernel, 3, sizeof(cl_sampler), &sampler); 109 size_t origin[3] = { 0, 0, 0 }, region[3] = { imageWidth, imageHeight, 1 };// 拷貝圖片緩衝區時使用的起點和範圍參數 110 size_t globalSize[2] = { imageWidth, imageHeight }; 111 112 for (int i = 0; i < 3; i++)// 分三個通道拷入緩衝區,執行旋轉操做,拷回內存 113 { 114 memcpy(imageData, channel[i].data, sizeof(unsigned char) * imageHeight * imageWidth); 115 clEnqueueWriteImage(queue, d_inputImage, CL_TRUE, origin, region, 0, 0, imageData, 0, NULL, NULL); 116 clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL); 117 clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin, region, 0, 0, imageData, 0, NULL, NULL); 118 memcpy(channel[i].data, imageData, sizeof(unsigned char) * imageHeight * imageWidth); 119 } 120 121 merge(channel, 3, image);// 合併通道,結果寫入文件,在窗口中展現結果 122 imwrite("D:/output.png", image); 123 imshow("Result", image); 124 waitKey(0); 125 126 free(listPlatform); 127 free(listDevice); 128 clReleaseContext(context); 129 clReleaseMemObject(d_inputImage); 130 clReleaseMemObject(d_outputImage); 131 clReleaseCommandQueue(queue); 132 clReleaseProgram(program); 133 clReleaseKernel(kernel); 134 //getchar(); 135 return 0; 136 }
● 代碼,四個通道同時操做,注意圖片讀入和輸出的時候只有三個通道,須要進行調整spa
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <cl.h> 4 #include <opencv.hpp> 5 #include <opencv2\core\cvstd.hpp> // namespace cv 的定義 6 7 #pragma warning(disable : 4996) // 解封OPenCL1.2 8 9 using namespace cv; 10 11 const char *sourceProgram = "D:/Code/OpenCL/rotate.cl";// 核函數文件 12 const char *imagePath = "D:/input.png"; 13 const float angle = 3.14f / 4; 14 15 int readSource(const char* kernelPath, char **output)// 讀取文本文件,存儲爲 char * 16 { 17 FILE *fp; 18 int size; 19 fopen_s(&fp, kernelPath, "rb"); 20 if (!fp) 21 { 22 printf("Open kernel file failed\n"); 23 exit(-1); 24 } 25 if (fseek(fp, 0, SEEK_END) != 0) 26 { 27 printf("Seek end of file faildd\n"); 28 exit(-1); 29 } 30 if ((size = ftell(fp)) < 0) 31 { 32 printf("Get file position failed\n"); 33 exit(-1); 34 } 35 rewind(fp); 36 if ((*output = (char *)malloc(size + 1)) == NULL) 37 { 38 printf("Allocate space failed\n"); 39 exit(-1); 40 } 41 fread((void*)*output, 1, size, fp); 42 fclose(fp); 43 (*output)[size] = '\0'; 44 printf("readSource succeed, program file: %s\n", kernelPath); 45 return size; 46 } 47 48 int main() 49 { 50 // 準備平臺,設備,上下文,命令隊列部分 51 cl_int status; 52 cl_uint nPlatform; 53 clGetPlatformIDs(0, NULL, &nPlatform); 54 cl_platform_id *listPlatform = (cl_platform_id*)malloc(nPlatform * sizeof(cl_platform_id)); 55 clGetPlatformIDs(nPlatform, listPlatform, NULL); 56 cl_uint nDevice = 0; 57 clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, 0, NULL, &nDevice); 58 cl_device_id *listDevice = (cl_device_id*)malloc(nDevice * sizeof(cl_device_id)); 59 clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, nDevice, listDevice, NULL); 60 cl_context context = clCreateContext(NULL, nDevice, listDevice, NULL, NULL, &status); 61 cl_command_queue queue = clCreateCommandQueue(context, listDevice[0], 0, &status);// OpenCL1.2 62 63 // 圖片相關 64 Mat image = imread(imagePath); 65 const int imageHeight = image.rows, imageWidth = image.cols; 66 unsigned char *imageData = (unsigned char*)malloc(sizeof(unsigned char) * imageHeight * imageWidth * 4); 67 68 for (int i = 0; i < imageWidth * imageHeight; i++)// imread 讀進來只有 RGB 三個通道(可能跟圖片自己有關),要補成 4 個通道 69 { 70 imageData[4 * i + 0] = image.data[3 * i + 2];//R 71 imageData[4 * i + 1] = image.data[3 * i + 1];//G 72 imageData[4 * i + 2] = image.data[3 * i + 0];//B 73 imageData[4 * i + 3] = 255; //A 74 } 75 76 cl_image_format format; 77 format.image_channel_order = CL_RGBA; // 合併通道 78 format.image_channel_data_type = CL_UNORM_INT8; // 無符號 8 位整形,0 ~ 255 79 cl_image_desc desc; 80 desc.image_type = CL_MEM_OBJECT_IMAGE2D; // 能夠 memset(desc,sizeof(cl_image_desc)); 後僅對前三項賦值 81 desc.image_width = imageWidth; 82 desc.image_height = imageHeight; 83 desc.image_depth = 0; 84 desc.image_array_size = 0; 85 desc.image_row_pitch = 0; 86 desc.image_slice_pitch = 0; 87 desc.num_mip_levels = 0; 88 desc.num_samples = 0; 89 desc.buffer = NULL; 90 cl_mem d_inputImage = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &format, &desc, imageData, &status);// 輸入圖片直接在主機上 91 cl_mem d_outputImage = clCreateImage(context, CL_MEM_WRITE_ONLY, &format, &desc, NULL, &status); 92 93 // 採樣器 94 cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status); // OpenCL1.2 95 96 // 程序和內核 97 char* source = NULL; 98 const size_t lenSource = readSource(sourceProgram, &source); 99 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source, &lenSource, &status); 100 clBuildProgram(program, 1, listDevice, NULL, NULL, NULL); 101 cl_kernel kernel = clCreateKernel(program, "imageRotate", &status); 102 clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage); 103 clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage); 104 clSetKernelArg(kernel, 2, sizeof(cl_float), &angle); 105 clSetKernelArg(kernel, 3, sizeof(cl_sampler), &sampler); 106 size_t origin[3] = { 0, 0, 0 }, region[3] = { imageWidth, imageHeight, 1 };// 拷貝圖片緩衝區時使用的起點和範圍參數 107 size_t globalSize[2] = { imageWidth, imageHeight }; 108 109 clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL); 110 clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin, region, 0, 0, imageData, 0, NULL, NULL); 111 112 for (int i = 0; i < imageWidth * imageHeight; i++)// 去掉第 4 個通道,返回 image 中 113 { 114 image.data[3 * i + 0] = imageData[4 * i + 2];//B 115 image.data[3 * i + 1] = imageData[4 * i + 1];//G 116 image.data[3 * i + 2] = imageData[4 * i + 0];//R 117 } 118 119 imwrite("D:/output.png", image); 120 imshow("Result", image); 121 waitKey(0); 122 123 free(listPlatform); 124 free(listDevice); 125 clReleaseContext(context); 126 clReleaseMemObject(d_inputImage); 127 clReleaseMemObject(d_outputImage); 128 clReleaseCommandQueue(queue); 129 clReleaseProgram(program); 130 clReleaseKernel(kernel); 131 //getchar(); 132 return 0; 133 }
● 輸入、輸出結果,順時針轉 45 度,由於使用了最近鄰採樣,結果中鋸齒比較嚴重.net
● 另外一種解封舊 API 的方法,在 包含頭文件 <cl.h> 前使用 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS ,其中 1_2 能夠改爲 1_0,1_1 等(https://stackoverflow.com/questions/28500496/opencl-function-found-deprecated-by-visual-studio/28500846#28500846)orm
● 使用 cl_command_queue_properties 和函數 clCreateCommandQueueWithProperties 來建立命令隊列,或是用 cl_sampler_properties 和函數 clCreateSamplerWithProperties 來建立採樣器都失敗了,報內存訪問越界錯誤(0xC0000005),不管是按格式書寫仍是把 queueProp 改爲 0,建立時第三個參數寫成 &queueProp 都不行;有人說更新顯卡驅動之後就行了(https://stackoverflow.com/questions/39864947/opencl-cl-out-of-host-memory-on-clcreatecommandqueuewithproperties-with-minima)。最後解決了,用 AMD APP SDK 下面的動態庫 amdocl64.dll 替換掉 C:\Windows\System32 裏邊那個相同庫就行了,能夠徹底使用 OpenCL2.0 的 API,再也不報錯。blog
● cv::imread 讀入的圖片是按照 [ R, G, B, R, G, B, R, G, B, ...] 存放的,在用 OpenCL處理以前須要進行必定的預處理,要麼用 split 分解各通道爲單獨的圖片,要麼手工拆解,算完之後也要按照這種存放方式轉回圖像數據中。在發現通道個數和順序的問題前,要麼在調用函數 clCreateImage 的時候返回 -37,-38,-39,要麼直接旋轉獲得像下面這樣的圖片。之後記得,若是出現這種交叉條紋的圖像,有多是通道交錯致使的。隊列
● 吐槽一下,網上能找到的 OpenCL + OpenCV 作圖片旋轉的基本上有幾個版本(https://blog.csdn.net/c602273091/article/details/45418223,https://blog.csdn.net/icamera0/article/details/71598323,https://blog.csdn.net/jaccen2012/article/details/51367388)都是用 FreeImage 庫把圖像處理成灰度圖來旋轉的(參考了 劉文志等(2016). OpenCL 異構並行計算[M]. 的代碼?),輸出確定是灰度圖了,而後你們博客就相互抄吧,全是垃圾。好不容易找到一個彩色的(https://blog.csdn.net/Bob_Dong/article/details/64906734)代碼還看不了。圖片