/* * Enhance definition of edges * General note: Error checking is kind of a circus in this file */ #include #include #include #include #include #include "scolor.h" #include #include typedef struct timeval timeval; cl_device_info query_items[] = {CL_DEVICE_TYPE, CL_DEVICE_VENDOR_ID, CL_DEVICE_MAX_COMPUTE_UNITS, CL_DEVICE_GLOBAL_MEM_SIZE, CL_DEVICE_LOCAL_MEM_SIZE, CL_DEVICE_NAME, CL_DRIVER_VERSION, CL_DEVICE_VENDOR, CL_DEVICE_EXTENSIONS}; char* query_names[] = {"CL_DEVICE_TYPE", "CL_DEVICE_VENDOR_ID", "CL_DEVICE_MAX_COMPUTE_UNITS", "CL_DEVICE_GLOBAL_MEM_SIZE", "CL_DEVICE_LOCAL_MEM_SIZE", "CL_DEVICE_NAME", "CL_DRIVER_VERSION", "CL_DEVICE_VENDOR", "CL_DEVICE_EXTENSIONS"}; #define check(m) if(errcode != CL_SUCCESS){ puts(m); goto err; } double timediff(timeval* start, timeval* end) { double s_diff = end->tv_sec - start->tv_sec; double us_diff = end->tv_usec - start->tv_usec; s_diff += us_diff / 1000000; return s_diff; } int main(){ timeval start, end; cl_platform_id plt_ids[10]; cl_uint plt_count; cl_device_id dev_ids[10]; cl_uint dev_count; char pname[1024]; cl_int retval = clGetPlatformIDs(10, plt_ids, &plt_count); if(retval != CL_SUCCESS){ printf(RED("clGetPlatformIDs failed with error %d\n"), retval); goto err; } for(int i = 0; i < plt_count; i++){ printf(PURPLE("Platform %d (ID %lu):\n"), i, (size_t)plt_ids[i]); cl_int retval = clGetDeviceIDs(plt_ids[i], CL_DEVICE_TYPE_ALL, 10, dev_ids, &dev_count); if(retval != CL_SUCCESS){ printf(RED("clGetPlatformIDs failed with error %d\n"), retval); goto err; } for(int j = 0; j < dev_count; j++){ printf(GREEN("Device %d (ID %lu):\n"), j, (size_t)dev_ids[j]); for(int q = 0; q < sizeof(query_items)/sizeof(cl_device_info); q++){ size_t psize; retval = clGetDeviceInfo(dev_ids[j], query_items[q], 1024, pname, &psize); if(retval != CL_SUCCESS){ printf(RED("Query Failed: %s\n"), query_names[q]); continue; } if(isalnum(pname[0]) || (psize != 4 && psize != 8)){ // Not bulletproof pname[psize] = 0; printf("%s : "BBLUE("%s")" (%ld)\n", query_names[q], pname, psize); } else if(psize == 4) { printf("%s : "BBLUE("%u")" (%ld)\n", query_names[q], *((cl_uint*)pname), psize); } else { printf("%s : "BBLUE("%lu")" (%ld)\n", query_names[q], *((size_t*)pname), psize); } } } } /* We'll assume platform 0, device 0 from this point on */ cl_int errcode; cl_context context = clCreateContext(0, 1, dev_ids, 0, 0, &errcode); if(errcode != CL_SUCCESS) goto err; /* Note here: You won't find a manpage for clCreateCommandQueueWithProperties on Debian Linux, and probably others * The deprecated version is just called clCreateCommandQueue, and they take the same parameters. * On Debian at least, the compiler will warn that clCreateCommandQueue is deprecated, and accept this version * nVidia has been slow with OpenCL support and clCreateCommandQueueWithProperties is OpenCL 2.0+, but it works */ cl_command_queue queue = clCreateCommandQueueWithProperties(context, dev_ids[0], 0, &errcode); check("clCreateCommandQueue"); // Get the picture into memory somewhere int width, height; unsigned char* image = SOIL_load_image("brick.jpg", &width, &height, 0, SOIL_LOAD_RGB); size_t imgsize = width*height*3; printf(CYAN("Image size: %d (%ld bytes)\n"), width*height, imgsize); cl_mem input_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imgsize, image, &errcode); if(errcode != CL_SUCCESS) goto err; cl_mem output_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, imgsize, 0, &errcode); if(errcode != CL_SUCCESS) goto err; char kernel_code[4096]; int fd = open("rotate_kernel.c", O_RDONLY); size_t codelen = read(fd, kernel_code, 4095); kernel_code[codelen] = 0; close(fd); const char* sarray = kernel_code; printf(DPURPLE("Building Program: ")"\n%s\n", (&sarray)[0]); cl_program program = clCreateProgramWithSource(context, 1, &sarray, 0, &errcode); check("clCreateProgramWithSource"); if((errcode = clBuildProgram(program, 1, dev_ids, 0, 0, 0)) != CL_SUCCESS){ if(errcode == -11){ char log[4096] = {0}; size_t logsize; clGetProgramBuildInfo(program, dev_ids[0], CL_PROGRAM_BUILD_LOG, 4096, log, &logsize); printf(RED("Failed to build, log: ")"\n%s\n", log); } goto err; } cl_kernel kernel = clCreateKernel(program, "imgrotate", &errcode); check("clCreateKernel"); errcode = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_buffer); check("clSetKernelArg"); if(CL_SUCCESS != (errcode = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_buffer))){ printf("clSetKernelArg #2\n"); goto err; } errcode = clSetKernelArg(kernel, 2, sizeof(int), &width); check("clSetKernelArg"); errcode = clSetKernelArg(kernel, 3, sizeof(int), &height); check("clSetKernelArg"); int rx = 2000, ry = 2000; float radians = 3.141592653f/4.0f; errcode = clSetKernelArg(kernel, 4, sizeof(int), &rx); check("clSetKernelArg"); errcode = clSetKernelArg(kernel, 5, sizeof(int), &ry); check("clSetKernelArg"); errcode = clSetKernelArg(kernel, 6, sizeof(float), &radians); check("clSetKernelArg"); gettimeofday(&start, NULL); const size_t worklen = width*height; if(CL_SUCCESS != (errcode = clEnqueueNDRangeKernel(queue, kernel, 1, 0, &worklen, 0, 0, 0, 0))){ perror("clEnqueueNDRangeKernel"); goto err; } gettimeofday(&end, NULL); errcode = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, imgsize, image, 0, 0, 0); check("clEnqueueReadBuffer"); printf("Time: %lf\n", timediff(&start, &end)); SOIL_save_image("done.bmp", SOIL_SAVE_TYPE_BMP, width, height, 3, image); return 0; err: printf(RED("We have failed to discover the wonders of OpenCL, errcode = %d\n"), errcode); return 1; }