/* * General note: Error checking is kind of a circus in this file */ #include #include #include #include #include #include "scolor.h" typedef struct timeval timeval; cl_device_info query_items[] = {CL_DEVICE_TYPE, CL_DEVICE_VENDOR_ID, CL_DEVICE_MAX_COMPUTE_UNITS, CL_DEVICE_GLOBAL_MEM_SIZE, CL_DEVICE_LOCAL_MEM_SIZE, CL_DEVICE_NAME, CL_DRIVER_VERSION, CL_DEVICE_VENDOR, CL_DEVICE_EXTENSIONS}; char* query_names[] = {"CL_DEVICE_TYPE", "CL_DEVICE_VENDOR_ID", "CL_DEVICE_MAX_COMPUTE_UNITS", "CL_DEVICE_GLOBAL_MEM_SIZE", "CL_DEVICE_LOCAL_MEM_SIZE", "CL_DEVICE_NAME", "CL_DRIVER_VERSION", "CL_DEVICE_VENDOR", "CL_DEVICE_EXTENSIONS"}; #define check(m) if(errcode != CL_SUCCESS){ puts(m); goto err; } double timediff(timeval* start, timeval* end) { double s_diff = end->tv_sec - start->tv_sec; double us_diff = end->tv_usec - start->tv_usec; s_diff += us_diff / 1000000; return s_diff; } int main(){ timeval start, end; cl_platform_id plt_ids[10]; cl_uint plt_count; cl_device_id dev_ids[10]; cl_uint dev_count; char pname[1024]; cl_int retval = clGetPlatformIDs(10, plt_ids, &plt_count); if(retval != CL_SUCCESS){ printf(RED("clGetPlatformIDs failed with error %d\n"), retval); goto err; } for(int i = 0; i < plt_count; i++){ printf(PURPLE("Platform %d (ID %lu):\n"), i, (size_t)plt_ids[i]); cl_int retval = clGetDeviceIDs(plt_ids[i], CL_DEVICE_TYPE_ALL, 10, dev_ids, &dev_count); if(retval != CL_SUCCESS){ printf(RED("clGetPlatformIDs failed with error %d\n"), retval); goto err; } for(int j = 0; j < dev_count; j++){ printf(GREEN("Device %d (ID %lu):\n"), j, (size_t)dev_ids[j]); for(int q = 0; q < sizeof(query_items)/sizeof(cl_device_info); q++){ size_t psize; retval = clGetDeviceInfo(dev_ids[j], query_items[q], 1024, pname, &psize); if(retval != CL_SUCCESS){ printf(RED("Query Failed: %s\n"), query_names[q]); continue; } if(isalnum(pname[0]) || (psize != 4 && psize != 8)){ // Not bulletproof pname[psize] = 0; printf("%s : "BBLUE("%s")" (%ld)\n", query_names[q], pname, psize); } else if(psize == 4) { printf("%s : "BBLUE("%u")" (%ld)\n", query_names[q], *((cl_uint*)pname), psize); } else { printf("%s : "BBLUE("%lu")" (%ld)\n", query_names[q], *((size_t*)pname), psize); } } } } /* We'll assume platform 0, device 0 from this point on */ cl_int errcode; cl_context context = clCreateContext(0, 1, dev_ids, 0, 0, &errcode); if(errcode != CL_SUCCESS) goto err; /* Note here: You won't find a manpage for clCreateCommandQueueWithProperties on Debian Linux, and probably others * The deprecated version is just called clCreateCommandQueue, and they take the same parameters. * On Debian at least, the compiler will warn that clCreateCommandQueue is deprecated, and accept this version * nVidia has been slow with OpenCL support and clCreateCommandQueueWithProperties is OpenCL 2.0+, but it works */ cl_command_queue queue = clCreateCommandQueueWithProperties(context, dev_ids[0], 0, &errcode); check("clCreateCommandQueue"); // Get the picture into memory somewhere int width, height; unsigned char* image = SOIL_load_image("mice.png", &width, &height, 0, SOIL_LOAD_RGB); size_t imgsize = width*height*3; printf(CYAN("Image size: %d (%ld bytes)\n"), width*height, imgsize); cl_mem input_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imgsize, image, &errcode); if(errcode != CL_SUCCESS) goto err; cl_mem output_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, height, 0, &errcode); if(errcode != CL_SUCCESS) goto err; const char* kernel_code = " \ __kernel void entropy(__global unsigned char *in, __global unsigned char *out, int width, int height){ \ const int i = get_global_id(0); \ if(i < 1 || i > height-2) \ return; \ long sum = 0; \ for(int j = 0; j < height; j++){ \ int idx = i*width + j; \ int d_above = abs(in[idx*3 - width*3] - in[idx*3]); \ int d_below = abs(in[idx*3 + width*3] - in[idx*3]); \ int d_left = abs(in[(idx-1)*3] - in[idx*3]); \ int d_right = abs(in[(idx+1)*3] - in[idx*3]); \ int d_average = (d_above + d_below + d_left + d_right)/4; \ sum += d_average;\ }\ unsigned char avg = sum/width;\ out[i] = avg;\ }"; cl_program program = clCreateProgramWithSource(context, 1, &kernel_code, 0, &errcode); check("clCreateProgramWithSource"); if(clBuildProgram(program, 1, dev_ids, 0, 0, 0) != CL_SUCCESS){ puts("clBuildProgram"); goto err; } cl_kernel kernel = clCreateKernel(program, "entropy", &errcode); check("clCreateKernel"); errcode = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_buffer); check("clSetKernelArg"); if(CL_SUCCESS != (errcode = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_buffer))){ printf("clSetKernelArg #2\n"); goto err; } errcode = clSetKernelArg(kernel, 2, sizeof(int), &width); check("clSetKernelArg"); errcode = clSetKernelArg(kernel, 3, sizeof(int), &height); check("clSetKernelArg"); // Alternative /* for(int i = 0; i < width*height; i++){ unsigned char green = image[i*3+1]; image[i*3+1] = image[i*3]; image[i*3] = green; }*/ gettimeofday(&start, NULL); const size_t worklen = height; if(CL_SUCCESS != (errcode = clEnqueueNDRangeKernel(queue, kernel, 1, 0, &worklen, 0, 0, 0, 0))){ perror("clEnqueueNDRangeKernel"); goto err; } gettimeofday(&end, NULL); errcode = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, height, image, 0, 0, 0); check("clEnqueueReadBuffer"); printf("Time: %lf\n", timediff(&start, &end)); size_t sum = 0; for(int i = 0; i < height; i++){ sum += image[i]; } printf("Entropy: %ld\n", sum/height); return 0; err: printf(RED("We have failed to discover the wonders of OpenCL, errcode = %d\n"), errcode); return 1; }