这篇教程C++ CHECK_OPENCL_ERROR函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中CHECK_OPENCL_ERROR函数的典型用法代码示例。如果您正苦于以下问题:C++ CHECK_OPENCL_ERROR函数的具体用法?C++ CHECK_OPENCL_ERROR怎么用?C++ CHECK_OPENCL_ERROR使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了CHECK_OPENCL_ERROR函数的29个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: switch/// Tells LibOI that the image source is located in OpenGL device memory at the location/// specified. You must also indicate whether the OpenGL location is a/// OPENGL_FRAMEBUFFER | OPENGL_TEXTUREBUFFER/// All subsequent CopyImageToBuffer commands will read from this location.void CLibOI::SetImageSource(GLuint gl_device_memory, LibOIEnums::ImageTypes type){ mImageType = type; int status = CL_SUCCESS; switch(type) { case LibOIEnums::OPENGL_FRAMEBUFFER: mImage_gl = clCreateFromGLBuffer(mOCL->GetContext(), CL_MEM_READ_ONLY, gl_device_memory, &status); CHECK_OPENCL_ERROR(status, "clCreateFromGLBuffer failed."); break; case LibOIEnums::OPENGL_TEXTUREBUFFER:#if defined(DETECTED_OPENCL_1_0) || defined(DETECTED_OPENCL_1_1) || defined(DETECTED_OPENCL_UNKNOWN_VERSION) mImage_gl = clCreateFromGLTexture3D(mOCL->GetContext(), CL_MEM_READ_ONLY, GL_TEXTURE_3D, 0, gl_device_memory, &status);#else mImage_gl = clCreateFromGLTexture(mOCL->GetContext(), CL_MEM_READ_ONLY, GL_TEXTURE_2D_ARRAY, 0, gl_device_memory, &status);#endif // defined(DETECTED_OPENCL_1_0) || defined(DETECTED_OPENCL_1_1) CHECK_OPENCL_ERROR(status, "clCreateFromGLTexture failed."); break; case LibOIEnums::OPENGL_RENDERBUFFER: // TODO: note that the clCreateFromGLTexture2D was depreciated in the OpenCL 1.2 specifications. mImage_gl = clCreateFromGLRenderbuffer(mOCL->GetContext(), CL_MEM_READ_ONLY, gl_device_memory, &status); CHECK_OPENCL_ERROR(status, "clCreateFromGLRenderbuffer failed."); break; default: // We don't know what type of image this is! assert(false); break; }}
开发者ID:matdander,项目名称:liboi,代码行数:41,
示例2: clEnqueueWriteBuffer/// Copies host memory to a cl_mem buffervoid CLibOI::CopyImageToBuffer(float * host_mem, cl_mem cl_buffer, int width, int height, int layer){ int status = CL_SUCCESS; int size = width * height; cl_float * tmp = new cl_float[size]; for(int i = 0; i < size; i++) tmp[i] = host_mem[i]; // Enqueue a blocking write status = clEnqueueWriteBuffer(mOCL->GetQueue(), cl_buffer, CL_TRUE, 0, sizeof(cl_float) * size, tmp, 0, NULL, NULL); CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer failed."); delete[] tmp;}
开发者ID:bkloppenborg,项目名称:liboi,代码行数:16,
示例3: clEnqueueReadBuffer/// Copies the current image in mCLImage to the floating point buffer, image, iff the sizes match exactly.void CLibOI::ExportImage(float * image, unsigned int width, unsigned int height, unsigned int depth){ if(width != mImageWidth || height != mImageHeight || depth != mImageDepth) return; int status = CL_SUCCESS; size_t num_elements = mImageWidth * mImageHeight * mImageDepth; cl_float tmp[num_elements]; status |= clEnqueueReadBuffer(mOCL->GetQueue(), mImage_cl, CL_TRUE, 0, num_elements * sizeof(cl_float), tmp, 0, NULL, NULL); CHECK_OPENCL_ERROR(status, "clEnqueueReadBuffer failed."); // Copy to the output buffer, converting as we go. for(size_t i = 0; i < num_elements; i++) image[i] = tmp[i];}
开发者ID:bkloppenborg,项目名称:liboi,代码行数:17,
示例4: clGetPlatformIDsOCL_Device::OCL_Device(int iPlatformNum, int iDeviceNum){ // For error checking cl_int err; // Get Platfom Info cl_uint iNumPlatforms = 0; err = clGetPlatformIDs(NULL, NULL, &iNumPlatforms); CHECK_OPENCL_ERROR(err); cl_platform_id* vPlatformIDs = (cl_platform_id *) new cl_platform_id[iNumPlatforms]; err = clGetPlatformIDs(iNumPlatforms, vPlatformIDs, NULL); CHECK_OPENCL_ERROR(err); if (iPlatformNum >= iNumPlatforms) { printf("Platform index must me between 0 and %d./n",iNumPlatforms-1); delete[] vPlatformIDs; return; } m_platform_id = vPlatformIDs[iPlatformNum]; delete[] vPlatformIDs; // Get Device Info cl_uint iNumDevices = 0; err = clGetDeviceIDs(m_platform_id, CL_DEVICE_TYPE_ALL, NULL, NULL, &iNumDevices); CHECK_OPENCL_ERROR(err); cl_device_id* vDeviceIDs = (cl_device_id*) new cl_device_id[iNumDevices]; err = clGetDeviceIDs(m_platform_id, CL_DEVICE_TYPE_ALL, iNumDevices, vDeviceIDs, &iNumDevices); CHECK_OPENCL_ERROR(err); if (iDeviceNum >= iNumDevices) { printf("Device index must me between 0 and %d./n", iNumDevices-1); delete[] vDeviceIDs; return; } m_device_id = vDeviceIDs[iDeviceNum]; delete[] vDeviceIDs; cl_context_properties vProprieties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)m_platform_id, 0}; m_context = clCreateContext(vProprieties, 1, &m_device_id, NULL, NULL, &err); CHECK_OPENCL_ERROR(err); m_queue = clCreateCommandQueue(m_context, m_device_id, NULL, &err); CHECK_OPENCL_ERROR(err); char* m_sBuildOptions = "";}
开发者ID:Soledad89,项目名称:learnOpenCL,代码行数:53,
示例5: clReleaseKernelintBoxFilterSeparable::cleanup(){ if(!byteRWSupport) { return SDK_SUCCESS; } // Releases OpenCL resources (Context, Memory etc.) cl_int status; status = clReleaseKernel(verticalKernel); CHECK_OPENCL_ERROR(status, "clReleaseKernel failed.(vertical)"); status = clReleaseKernel(horizontalKernel); CHECK_OPENCL_ERROR(status, "clReleaseKernel failed.(Horizontal)"); status = clReleaseProgram(program); CHECK_OPENCL_ERROR(status, "clReleaseProgram failed."); status = clReleaseMemObject(inputImageBuffer); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed."); status = clReleaseMemObject(outputImageBuffer); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed."); status = clReleaseMemObject(tempImageBuffer); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed."); status = clReleaseCommandQueue(commandQueue); CHECK_OPENCL_ERROR(status, "clReleaseCommandQueue failed."); status = clReleaseContext(context); CHECK_OPENCL_ERROR(status, "clReleaseContext failed."); // release program resources (input memory etc.) FREE(inputImageData); FREE(outputImageData); FREE(verificationOutput); FREE(devices); return SDK_SUCCESS;}
开发者ID:xianggong,项目名称:m2c-llvm-devtools-host,代码行数:43,
示例6: clEnqueueMapBufferint ComputeBench::mapBuffer(cl_mem deviceBuffer, T* &hostPointer, size_t sizeInBytes, cl_map_flags flags){ cl_int status; hostPointer = (T*) clEnqueueMapBuffer(commandQueue, deviceBuffer, CL_TRUE, flags, 0, sizeInBytes, 0, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clEnqueueMapBuffer failed"); return SDK_SUCCESS;}
开发者ID:JiniusResearch,项目名称:oclb,代码行数:18,
示例7: CHECK_OPENCL_ERRORvoid CLHelper::printAllPlatformsAndDevices(){ cl_int err; std::vector<cl::Platform> platforms; err = cl::Platform::get(&platforms); CHECK_OPENCL_ERROR(err, "cl::Platform::get() failed."); std::cout << std::endl; std::cout << "Listing platform vendors and devices" << std::endl; std::cout << "===========================================" << std::endl; std::vector<cl::Platform>::iterator platform; for(platform = platforms.begin(); platform != platforms.end(); platform++) { CLHelper::printVendor(*platform); CLHelper::printDevices(*platform, CL_DEVICE_TYPE_ALL); std::cout << "===========================================" << std::endl; }}
开发者ID:deranen,项目名称:OpenCLTemplate,代码行数:19,
示例8: clReleaseMemObjectint AtomicCounters::cleanup() { // Releases OpenCL resources (Context, Memory etc.) cl_int status; status = clReleaseMemObject(inBuf); CHECK_OPENCL_ERROR(status, "clReleaseMemObject(inBuf) failed."); status = clReleaseMemObject(counterOutBuf); CHECK_OPENCL_ERROR(status, "clReleaseMemObject(counterOutBuf) failed."); status = clReleaseMemObject(globalOutBuf); CHECK_OPENCL_ERROR(status, "clReleaseMemObject(globalOutBuf) failed."); status = clReleaseKernel(counterKernel); CHECK_OPENCL_ERROR(status, "clReleaseKernel(counterKernel) failed."); status = clReleaseKernel(globalKernel); CHECK_OPENCL_ERROR(status, "clReleaseKernel(globalKernel) failed."); status = clReleaseProgram(program); CHECK_OPENCL_ERROR(status, "clReleaseProgram(program) failed."); status = clReleaseCommandQueue(commandQueue); CHECK_OPENCL_ERROR(status, "clReleaseCommandQueue(commandQueue) failed."); status = clReleaseContext(context); CHECK_OPENCL_ERROR(status, "clReleaseContext(context) failed."); free(input); return SDK_SUCCESS;}
开发者ID:xianggong,项目名称:m2s-bench-amdapp-2.9-si,代码行数:22,
示例9: clReleaseKernelint MatrixMulImage::cleanup(){ // Releases OpenCL resources (Context, Memory etc. cl_int status; status = clReleaseKernel(kernel); CHECK_OPENCL_ERROR(status, "clReleaseKernel failed.(kernel)"); status = clReleaseProgram(program); CHECK_OPENCL_ERROR(status, "clReleaseProgram failed.(program)"); status = clReleaseMemObject(inputBuffer0); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed.(inputBuffer0)"); status = clReleaseMemObject(inputBuffer1); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed.(inputBuffer1)"); status = clReleaseMemObject(outputBuffer); CHECK_OPENCL_ERROR(status, "clReleaseCommandQueue failed.(outputBuffer)"); status = clReleaseCommandQueue(commandQueue); CHECK_OPENCL_ERROR(status, "clReleaseCommandQueue failed.(commandQueue)"); status = clReleaseContext(context); CHECK_OPENCL_ERROR(status, "clReleaseContext failed.(context)"); // release program resources (input memory etc.) FREE(input0); FREE(input1); FREE(output); FREE(verificationOutput); // release device list FREE(devices); return SDK_SUCCESS;}
开发者ID:ChiahungTai,项目名称:OpenCL-playgorund,代码行数:42,
示例10: voidvoid CLHelper::compileProgram( cl::Program& program, std::vector<cl::Device>& devices, const char* options, void (CL_CALLBACK * notifyFptr)(cl_program, void *), void* data){ cl_int err; err = program.build(devices, options, NULL, NULL); if(err != CL_SUCCESS) { std::cout << "Build error! Showing build log:" << std::endl << std::endl; std::string errorLog; std::vector<cl::Device>::iterator device; for(device = devices.begin(); device != devices.end(); device++) { errorLog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device); std::cout << errorLog << std::endl; } CHECK_OPENCL_ERROR(err, "cl::Program::build() failed."); }}
开发者ID:deranen,项目名称:OpenCLTemplate,代码行数:23,
示例11: buildOpenCLProgramint MotionDetector::setupKernel(std::string name){ cl_int status = CL_SUCCESS; // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string(name+"_Kernel.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string(""); if (sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if (sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } int retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, 0, "buildOpenCLProgram() failed"); // get a kernel object handle for a kernel with the given name char* charname = &name[0]; kernl = clCreateKernel( program, charname, &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed."); status = kernelInfo.setKernelWorkGroupInfo(kernl, devices[sampleArgs->deviceId]); CHECK_ERROR(status, SDK_SUCCESS, "kernelInfo.setKernelWorkGroupInfo() failed"); return SDK_SUCCESS;}
开发者ID:nico-mkhatvari,项目名称:OpenCL,代码行数:37,
示例12: clReleaseMemObjectint DwtHaar1D::cleanup(){ // Releases OpenCL resources (Context, Memory etc.) cl_int status; status = clReleaseMemObject(inDataBuf); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed.(inDataBuf)"); status = clReleaseMemObject(dOutDataBuf); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed.(dOutDataBuf)"); status = clReleaseMemObject(dPartialOutDataBuf); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed.(dPartialOutDataBuf)"); status = clReleaseKernel(kernel); CHECK_OPENCL_ERROR(status, "clReleaseKernel failed.(kernel)"); status = clReleaseProgram(program); CHECK_OPENCL_ERROR(status, "clReleaseProgram failed.(program)"); status = clReleaseCommandQueue(commandQueue); CHECK_OPENCL_ERROR(status, "clReleaseCommandQueue failed.(commandQueue)"); status = clReleaseContext(context); CHECK_OPENCL_ERROR(status, "clReleaseContext failed.(context)"); // Release program resources (input memory etc.) FREE(inData); FREE(dOutData); FREE(dPartialOutData); FREE(hOutData); FREE(devices); return SDK_SUCCESS;}
开发者ID:CryDevPortal,项目名称:amd-app-sdk-fixes,代码行数:36,
示例13: clReleaseKernelint ConstantBandwidth::cleanup(){ // Releases OpenCL resources (Context, Memory etc.) cl_int status; for(int i = 0; i < NUM_KERNELS; i++) { status = clReleaseKernel(kernel[i]); CHECK_OPENCL_ERROR(status, "clReleaseKernel failed."); } status = clReleaseProgram(program); CHECK_OPENCL_ERROR(status, "clReleaseProgram failed."); status = clReleaseMemObject(constantBuffer); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed."); status = clReleaseMemObject(outputBuffer); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed."); status = clReleaseCommandQueue(commandQueue); CHECK_OPENCL_ERROR(status, "clReleaseCommandQueue failed."); status = clReleaseContext(context); CHECK_OPENCL_ERROR(status, "clReleaseContext failed."); // release program resources (input memory etc.) FREE(input); FREE(output); FREE(verificationOutput); // release device list FREE(devices); return SDK_SUCCESS;}
开发者ID:ChiahungTai,项目名称:OpenCL-playgorund,代码行数:40,
示例14: clReleaseMemObjectintMersenneTwister::cleanup(){ // Releases OpenCL resources cl_int status; status = clReleaseMemObject(seedsBuf); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed.(seedsBuf)"); status = clReleaseMemObject(resultBuf); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed.(resultBuf)"); status = clReleaseKernel(kernel); CHECK_OPENCL_ERROR(status, "clReleaseKernel failed.(kernel)"); status = clReleaseProgram(program); CHECK_OPENCL_ERROR(status, "clReleaseProgram failed.(program)"); status = clReleaseCommandQueue(commandQueue); CHECK_OPENCL_ERROR(status, "clReleaseCommandQueue failed.(commandQueue)"); status = clReleaseContext(context); CHECK_OPENCL_ERROR(status, "clReleaseContext failed.(context)"); // Release program resources FREE(deviceResult);#if defined (_WIN32) ALIGNED_FREE(seeds);#else FREE(seeds);#endif FREE(devices); return SDK_SUCCESS;}
开发者ID:morellid,项目名称:featureBasedScheduling,代码行数:37,
示例15: getPlatformint AtomicCounters::setupCL(void) { cl_int status = 0; cl_device_type dType; if (sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else // deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if (sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform() failed."); // Display available devices. retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed."); cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0}; context = clCreateContextFromType(cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "getDevices() failed "); // Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, SDK_SUCCESS, "SDKDeviceInfo::setDeviceInfo() failed"); // Check device extensions if (!strstr(deviceInfo.extensions, "cl_ext_atomic_counters_32")) { OPENCL_EXPECTED_ERROR( "Device does not support cl_ext_atomic_counters_32 extension!"); } if (!strstr(deviceInfo.extensions, "cl_khr_local_int32_base_atomics")) { OPENCL_EXPECTED_ERROR( "Device does not support cl_khr_local_int32_base_atomics extension!"); } // Get OpenCL device version std::string deviceVersionStr = std::string(deviceInfo.deviceVersion); size_t vStart = deviceVersionStr.find(" ", 0); size_t vEnd = deviceVersionStr.find(" ", vStart + 1); std::string vStrVal = deviceVersionStr.substr(vStart + 1, vEnd - vStart - 1);// Check of OPENCL_C_VERSION if device version is 1.1 or later#ifdef CL_VERSION_1_1 if (deviceInfo.openclCVersion) { // Exit if OpenCL C device version is 1.0 deviceVersionStr = std::string(deviceInfo.openclCVersion); vStart = deviceVersionStr.find(" ", 0); vStart = deviceVersionStr.find(" ", vStart + 1); vEnd = deviceVersionStr.find(" ", vStart + 1); vStrVal = deviceVersionStr.substr(vStart + 1, vEnd - vStart - 1); if (vStrVal.compare("1.0") <= 0) { OPENCL_EXPECTED_ERROR( "Unsupported device! Required CL_DEVICE_OPENCL_C_VERSION as 1.1"); } } else { OPENCL_EXPECTED_ERROR( "Unsupported device! Required CL_DEVICE_OPENCL_C_VERSION as 1.1"); }#else OPENCL_EXPECTED_ERROR( "Unsupported device! Required CL_DEVICE_OPENCL_C_VERSION as 1.1");#endif // Setup application data if (setupAtomicCounters() != SDK_SUCCESS) { return SDK_FAILURE; } cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE; commandQueue = clCreateCommandQueue(context, devices[sampleArgs->deviceId], props, &status); CHECK_OPENCL_ERROR(status, "clCreateCommandQueue failed(commandQueue)"); // Set Persistent memory only for AMD platform cl_mem_flags inMemFlags = CL_MEM_READ_ONLY; if (sampleArgs->isAmdPlatform()) { inMemFlags |= CL_MEM_USE_PERSISTENT_MEM_AMD; } // Create buffer for input array inBuf = clCreateBuffer(context, inMemFlags, length * sizeof(cl_uint), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed.(inBuf)"); // Set up data for input array cl_event writeEvt; status = clEnqueueWriteBuffer(commandQueue, inBuf, CL_FALSE, 0, length * sizeof(cl_uint), input, 0, NULL, &writeEvt); CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer(inBuf) failed.."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush(commandQueue) failed."); counterOutBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed.(counterOutBuf)."); globalOutBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed.(globalOutBuf)."); // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string("AtomicCounters_Kernels.cl");//.........这里部分代码省略.........
开发者ID:xianggong,项目名称:m2s-bench-amdapp-2.9-si,代码行数:101,
示例16: clEnqueueWriteBufferint AtomicCounters::runGlobalAtomicKernel() { cl_int status = CL_SUCCESS; // Set Global and Local work items size_t globalWorkItems = length; size_t localWorkItems = globalWorkGroupSize; // Initialize the counter value cl_event writeEvt; status = clEnqueueWriteBuffer(commandQueue, globalOutBuf, CL_FALSE, 0, sizeof(cl_uint), &initValue, 0, NULL, &writeEvt); CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer(globalOutBuf) failed."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush() failed."); // Wait for event and release event status = waitForEventAndRelease(&writeEvt); CHECK_OPENCL_ERROR(status, "waitForEventAndRelease(writeEvt) failed."); // Set kernel arguments status = clSetKernelArg(globalKernel, 0, sizeof(cl_mem), &inBuf); CHECK_OPENCL_ERROR(status, "clSetKernelArg(inBuf) failed."); status = clSetKernelArg(globalKernel, 1, sizeof(cl_uint), &value); CHECK_OPENCL_ERROR(status, "clSetKernelArg(value) failed."); status = clSetKernelArg(globalKernel, 2, sizeof(cl_mem), &globalOutBuf); CHECK_OPENCL_ERROR(status, "clSetKernelArg(globalOutBuf) failed."); // Run Kernel cl_event ndrEvt; status = clEnqueueNDRangeKernel(commandQueue, globalKernel, 1, NULL, &globalWorkItems, &localWorkItems, 0, NULL, &ndrEvt); CHECK_OPENCL_ERROR(status, "clEnqueueNDRangeKernel(globalKernel) failed."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush(commandQueue) failed."); cl_int eventStatus = CL_QUEUED; while (eventStatus != CL_COMPLETE) { status = clGetEventInfo(ndrEvt, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL); CHECK_OPENCL_ERROR(status, "clGetEventInfo(ndrEvt) failed."); } cl_ulong startTime; cl_ulong endTime; // Get profiling information status = clGetEventProfilingInfo(ndrEvt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, NULL); CHECK_OPENCL_ERROR( status, "clGetEventProfilingInfo(CL_PROFILING_COMMAND_START) failed."); status = clGetEventProfilingInfo(ndrEvt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL); CHECK_OPENCL_ERROR( status, "clGetEventProfilingInfo(CL_PROFILING_COMMAND_END) failed."); double sec = 1e-9 * (endTime - startTime); kTimeAtomGlobal += sec; status = clReleaseEvent(ndrEvt); CHECK_OPENCL_ERROR(status, "clReleaseEvent(ndrEvt) failed."); // Get the occurrences of Value from atomicKernel cl_event readEvt; status = clEnqueueReadBuffer(commandQueue, globalOutBuf, CL_FALSE, 0, sizeof(cl_uint), &globalOut, 0, NULL, &readEvt); CHECK_OPENCL_ERROR(status, "clEnqueueReadBuffer(globalOutBuf) failed."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush() failed."); // Wait for event and release event status = waitForEventAndRelease(&readEvt); CHECK_OPENCL_ERROR(status, "waitForEventAndRelease(readEvt) failed."); return SDK_SUCCESS;}
开发者ID:xianggong,项目名称:m2s-bench-amdapp-2.9-si,代码行数:64,
示例17: clGetDeviceInfovoid CLHelper::DeviceInfo::setDeviceInfo(cl::Device device) { cl_int err = CL_SUCCESS; //Get device type err = clGetDeviceInfo( device(), CL_DEVICE_TYPE, sizeof(cl_device_type), &dType, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_TYPE) failed"); //Get vender ID err = clGetDeviceInfo( device(), CL_DEVICE_VENDOR_ID, sizeof(cl_uint), &venderId, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_VENDOR_ID) failed"); //Get max compute units err = clGetDeviceInfo( device(), CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &maxComputeUnits, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_MAX_COMPUTE_UNITS) failed"); //Get max work item dimensions err = clGetDeviceInfo( device(), CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDims, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) failed"); //Get max work item sizes delete maxWorkItemSizes; maxWorkItemSizes = new size_t[maxWorkItemDims]; err = clGetDeviceInfo( device(), CL_DEVICE_MAX_WORK_ITEM_SIZES, maxWorkItemDims * sizeof(size_t), maxWorkItemSizes, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) failed"); // Maximum work group size err = clGetDeviceInfo( device(), CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &maxWorkGroupSize, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed"); // Preferred vector sizes of all data types err = clGetDeviceInfo( device(), CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &preferredCharVecWidth, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) failed"); err = clGetDeviceInfo( device(), CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &preferredShortVecWidth, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) failed"); err = clGetDeviceInfo( device(), CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &preferredIntVecWidth, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) failed"); err = clGetDeviceInfo( device(), CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &preferredLongVecWidth, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) failed"); err = clGetDeviceInfo( device(), CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &preferredFloatVecWidth, NULL); CHECK_OPENCL_ERROR(err, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) failed");//.........这里部分代码省略.........
开发者ID:deranen,项目名称:OpenCLTemplate,代码行数:101,
示例18: CHECK_ERRORint DwtHaar1D::runDwtHaar1DKernel(){ cl_int status; status = this->setWorkGroupSize(); CHECK_ERROR(status, SDK_SUCCESS, "setWorkGroupSize failed"); // Force write to inData Buf to update its values cl_event writeEvt; status = clEnqueueWriteBuffer( commandQueue, inDataBuf, CL_FALSE, 0, curSignalLength * sizeof(cl_float), inData, 0, NULL, &writeEvt); CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer failed. (inDataBuf)"); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush failed."); status = sampleCommon->waitForEventAndRelease(&writeEvt); CHECK_ERROR(status, SDK_SUCCESS, "WaitForEventAndRelease(writeEvt1) Failed"); ParaClass *paraClass = new ParaClass;//new a paraclass this->classObj = clCreateBuffer(context,CL_MEM_USE_HOST_PTR,sizeof(ParaClass),paraClass,&status); CHECK_OPENCL_ERROR(status, "clclCreateBuffer failed. (inDataBuf)"); cl_event mapEvt; paraClass=(ParaClass *)clEnqueueMapBuffer(commandQueue,this->classObj,CL_FALSE,CL_MAP_WRITE,0,sizeof(ParaClass),0,NULL,&mapEvt,&status); CHECK_OPENCL_ERROR(status, "clEnqueueMapBuffer failed. (classObj)"); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush failed."); status = sampleCommon->waitForEventAndRelease(&mapEvt); CHECK_ERROR(status, SDK_SUCCESS, "WaitForEventAndRelease(mapEvt1) Failed"); paraClass->setValue(this->totalLevels,this->curSignalLength,this->levelsDone,this->maxLevelsOnDevice); cl_event unmapEvt; status=clEnqueueUnmapMemObject(commandQueue,this->classObj,paraClass,0,NULL,&unmapEvt);//class is passed to the Device CHECK_OPENCL_ERROR(status, "clEnqueueunMapBuffer failed. (classObj)"); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush failed."); status = sampleCommon->waitForEventAndRelease(&unmapEvt); CHECK_ERROR(status, SDK_SUCCESS, "WaitForEventAndRelease(mapEvt1) Failed"); // Whether sort is to be in increasing order. CL_TRUE implies increasing status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&inDataBuf); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (inDataBuf)"); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&dOutDataBuf); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (dOutDataBuf)"); status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&dPartialOutDataBuf); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (dPartialOutData)"); status = clSetKernelArg(kernel, 3, (localThreads * 2 * sizeof(cl_float)), NULL); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (local memory)"); status = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*)&this->classObj); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (global memory)"); /* * Enqueue a kernel run call. */ cl_event ndrEvt; status = clEnqueueNDRangeKernel( commandQueue, kernel, 1, NULL, &globalThreads,//.........这里部分代码省略.........
开发者ID:CryDevPortal,项目名称:amd-app-sdk-fixes,代码行数:101,
示例19: isPlatformEnabledintMatrixMulImage::setupCL(void){ cl_int status = 0; cl_device_type dType; if(deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = sampleCommon->getPlatform(platform, platformId, isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::getPlatform() failed"); // Display available devices. retValue = sampleCommon->displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::displayDevices() failed"); /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType( cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = sampleCommon->getDevices(context, &devices, deviceId, isDeviceIdEnabled()); CHECK_ERROR(status, 0, "sampleCommon::getDevices() failed"); //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[deviceId]); CHECK_ERROR(retValue, SDK_SUCCESS, "deviceInfo.setDeviceInfo. failed"); { // The block is to move the declaration of prop closer to its use cl_command_queue_properties prop = 0; prop |= CL_QUEUE_PROFILING_ENABLE; commandQueue = clCreateCommandQueue( context, devices[deviceId], prop, &status); CHECK_ERROR(retValue, SDK_SUCCESS, "clCreateCommandQueue. failed"); } cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; if(!deviceInfo.imageSupport) { std::cout << "Expected Error: Image is not supported on the Device" << std::endl; return SDK_EXPECTED_FAILURE; } cl_image_desc imageDesc; memset(&imageDesc, '/0', sizeof(cl_image_desc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; // Create image for matrix A imageDesc.image_width = width0 / 4; imageDesc.image_height = height0; inputBuffer0 = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, input0, &status); CHECK_OPENCL_ERROR(status, "clCreateImage failed. (inputBuffer0)"); // Create image for matrix B imageDesc.image_width = width1 / 4;//.........这里部分代码省略.........
开发者ID:ChiahungTai,项目名称:OpenCL-playgorund,代码行数:101,
示例20: isPlatformEnabledintDwtHaar1D::setupCL(void){ cl_int status = 0; cl_device_type dType; if(deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = sampleCommon->getPlatform(platform, platformId, isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::getPlatform() failed"); // Display available devices. retValue = sampleCommon->displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::displayDevices() failed"); // If we could find our platform, use it. Otherwise use just available platform. cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType(cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = sampleCommon->getDevices(context, &devices, deviceId, isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "sampleCommon::getDevices() failed"); commandQueue = clCreateCommandQueue(context, devices[deviceId], 0, &status); CHECK_OPENCL_ERROR(status, "clCreateCommandQueue failed."); //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[deviceId]); CHECK_ERROR(retValue, 0, "SDKDeviceInfo::setDeviceInfo() failed"); // Set Presistent memory only for AMD platform cl_mem_flags inMemFlags = CL_MEM_READ_ONLY; if(isAmdPlatform()) inMemFlags |= CL_MEM_USE_PERSISTENT_MEM_AMD; inDataBuf = clCreateBuffer(context, inMemFlags, sizeof(cl_float) * signalLength, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (inDataBuf)"); dOutDataBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, signalLength * sizeof(cl_float), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (dOutDataBuf)"); dPartialOutDataBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, signalLength * sizeof(cl_float), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (dPartialOutDataBuf)"); // create a CL program using the kernel source streamsdk::buildProgramData buildData; buildData.kernelName = std::string("DwtHaar1DCPPKernel_Kernels.cl"); buildData.devices = devices; buildData.deviceId = deviceId; buildData.flagsStr = std::string("-x clc++ "); if(isLoadBinaryEnabled()) buildData.binaryName = std::string(loadBinary.c_str()); if(isComplierFlagsSpecified()) buildData.flagsFileName = std::string(flags.c_str());//.........这里部分代码省略.........
开发者ID:CryDevPortal,项目名称:amd-app-sdk-fixes,代码行数:101,
示例21: CHECK_OPENCL_ERRORint MatrixMulImage::runCLKernels(void){ cl_int status; /* * Kernel runs over complete output matrix with blocks of blockSize x blockSize * running concurrently */ size_t globalThreads[2]= {width1 / 4, height0 / 8}; size_t localThreads[2] = {blockSize, blockSize}; status = kernelInfo.setKernelWorkGroupInfo(kernel, devices[deviceId]); CHECK_OPENCL_ERROR(status, "kernelInfo.setKernelWorkGroupInfo failed"); availableLocalMemory = deviceInfo.localMemSize - kernelInfo.localMemoryUsed; neededLocalMemory = 2 * blockSize * blockSize * sizeof(cl_float); if(neededLocalMemory > availableLocalMemory) { std::cout << "Unsupported: Insufficient local memory on device." << std::endl; return SDK_SUCCESS; } if((cl_uint)(localThreads[0]*localThreads[1]) > kernelInfo.kernelWorkGroupSize) { if(kernelInfo.kernelWorkGroupSize >= 64) { blockSize = 8; localThreads[0] = blockSize; localThreads[1] = blockSize; } else if(kernelInfo.kernelWorkGroupSize >= 32) { blockSize = 4; localThreads[0] = blockSize; localThreads[1] = blockSize; } else { std::cout << "Out of Resources!" << std::endl; std::cout << "Group Size specified : " << localThreads[0] * localThreads[1] << std::endl; std::cout << "Max Group Size supported on the kernel : " << kernelInfo.kernelWorkGroupSize<<std::endl; return SDK_FAILURE; } } if(localThreads[0] > deviceInfo.maxWorkItemSizes[0] || localThreads[1] > deviceInfo.maxWorkItemSizes[1] || localThreads[0]*localThreads[1] > deviceInfo.maxWorkGroupSize) { std::cout << "Unsupported: Device does not support requested number of work items." << std::endl; return SDK_FAILURE; } //For small matrix sizes while(globalThreads[0] % localThreads[0]) localThreads[0] /= 2; while(globalThreads[1] % localThreads[1]) localThreads[1] /= 2; // Set appropriate arguments to the kernel status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer0); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (outputBuffer)"); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&inputBuffer1); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (inputBuffer0)"); status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&outputBuffer); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (inputBuffer1)"); status = clSetKernelArg(kernel, 3, sizeof(cl_int),(void*)&width0); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (width0)"); status = clSetKernelArg(kernel, 4, sizeof(cl_int), &width1); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (width1)"); // Enqueue a kernel run call cl_event ndrEvt; status = clEnqueueNDRangeKernel( commandQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, &ndrEvt); CHECK_OPENCL_ERROR(status, "clEnqueueNDRangeKernel failed."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush failed."); cl_int eventStatus = CL_QUEUED; while(eventStatus != CL_COMPLETE) { status = clGetEventInfo(//.........这里部分代码省略.........
开发者ID:ChiahungTai,项目名称:OpenCL-playgorund,代码行数:101,
示例22: clEnqueueCopyBufferfloat CRoutine_Sum_NVidia::Sum(cl_mem input_buffer){ // First zero out the temporary sum buffer. mrZero->Zero(mTempBuffer1, mBufferSize); int status = CL_SUCCESS; // Copy the input buffer into mTempBuffer1 // The work was all completed on the GPU. Copy the summed value to the final buffer: status = clEnqueueCopyBuffer(mQueue, input_buffer, mTempBuffer1, 0, 0, mInputSize * sizeof(cl_float), 0, NULL, NULL); CHECK_OPENCL_ERROR(status, "clEnqueueCopyBuffer failed."); status = clFinish(mQueue); CHECK_OPENCL_ERROR(status, "clFinish failed."); // Init locals: cl_float gpu_result = 0; int numThreads = mThreads[0]; int threads = 0; int blocks = 0; cl_mem buff1 = mTempBuffer1; cl_mem buff2 = mTempBuffer2; size_t globalWorkSize[1]; size_t localWorkSize[1]; cl_kernel reductionKernel; for(int kernel_id = 0; kernel_id < mReductionPasses; kernel_id++) { threads = mThreads[kernel_id]; blocks = mBlocks[kernel_id]; globalWorkSize[0] = blocks * threads; localWorkSize[0] = threads; reductionKernel = mKernels[kernel_id]; clSetKernelArg(reductionKernel, 0, sizeof(cl_mem), (void *) &buff1); clSetKernelArg(reductionKernel, 1, sizeof(cl_mem), (void *) &buff2); clSetKernelArg(reductionKernel, 2, sizeof(cl_int), &mBufferSize); clSetKernelArg(reductionKernel, 3, sizeof(cl_float) * numThreads, NULL); status = clEnqueueNDRangeKernel(mQueue, reductionKernel, 1, 0, globalWorkSize, localWorkSize, 0, NULL, NULL); CHECK_OPENCL_ERROR(status, "clEnqueueNDRangeKernel failed."); buff1 = buff2; } clFinish(mQueue); // If a few elements remain, we will need to compute their sum on the CPU: if (mFinalS > 1) { cl_float h_odata[mFinalS]; // copy result from device to host status = clEnqueueReadBuffer(mQueue, mTempBuffer2, CL_TRUE, 0, mFinalS * sizeof(cl_float), h_odata, 0, NULL, NULL); CHECK_OPENCL_ERROR(status, "clEnqueueReadBuffer failed."); for(int i=0; i < mFinalS; i++) { gpu_result += h_odata[i]; } } else { // The work was all completed on the GPU. Copy the summed value to the CPU: status = clEnqueueReadBuffer(mQueue, mTempBuffer2, CL_TRUE, 0, sizeof(cl_float), &gpu_result, 0, NULL, NULL); CHECK_OPENCL_ERROR(status, "clEnqueueReadBuffer failed."); } return float(gpu_result);}
开发者ID:bkloppenborg,项目名称:liboi,代码行数:70,
示例23: ImageOperationECBint ImageOperationECB(int argc, char** argv, bool bEncrypt = true){ // Parse arguments // OpenCL arguments: platform and device cl_int err; int iPlatform = GetArgInt (argc, argv, "p"); int iDevice = GetArgInt (argc, argv, "d"); char* sInFile = GetArgString(argc, argv, "in"); char* sOutFile = GetArgString(argc, argv, "out"); if (sInFile == NULL || sOutFile == NULL || !FileExists(sInFile)) { PrintUsage(); return -1; } // Initialize ImageMagick Magick::InitializeMagick(*argv); ImageData img = ReadImageFile(sInFile); // Allocate Host Memory unsigned char key[16] = { 0x2B, 0x7E, 0x15, 0x16, 0x28, 0xAE, 0xD2, 0xA6, 0xAB, 0xF7, 0x15, 0x88, 0x09, 0xCF, 0x4F, 0x3C}; unsigned char* roundKeys = NULL; int rounds = 0; ComputeRoundKeys(&roundKeys, &rounds, 16, key); // Set-up OpenCL Platform OCL_Device* pOCL_Device = new OCL_Device(iPlatform, iDevice); pOCL_Device->SetBuildOptions(""); pOCL_Device->PrintInfo(); // Set up OpenCL cl_kernel Kernel = pOCL_Device->GetKernel("aes-kernel.cl", bEncrypt ? "AES_ECB_Encrypt" : "AES_ECB_Decrypt"); // Allocate Device Memory cl_mem d_A = pOCL_Device->DeviceMalloc(0, img.padded_bytes); cl_mem d_B = pOCL_Device->DeviceMalloc(1, img.padded_bytes); cl_mem d_C = pOCL_Device->DeviceMalloc(2, rounds * 16); // Copy Image to Device pOCL_Device->CopyBufferToDevice(img.data, 0, img.padded_bytes); // Keys pOCL_Device->CopyBufferToDevice(roundKeys, 2, rounds * 16); // Set Kernel Arguments cl_int _num = img.padded_bytes / 16; err = clSetKernelArg(Kernel, 0, sizeof(cl_mem), &d_A); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(Kernel, 1, sizeof(cl_mem), &d_B); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(Kernel, 2, sizeof(cl_mem), &d_C); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(Kernel, 3, sizeof(cl_int), &rounds); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(Kernel, 4, sizeof(cl_int), &_num); CHECK_OPENCL_ERROR(err); // Wait for previous action to finish err = clFinish(pOCL_Device->GetQueue()); CHECK_OPENCL_ERROR(err); size_t off = 0; size_t num = img.padded_bytes / 16; size_t threads = 256; // Run the kernel err = clEnqueueNDRangeKernel(pOCL_Device->GetQueue(), Kernel, 1, NULL, &num, &threads, 0, NULL, NULL); CHECK_OPENCL_ERROR(err); // Wait for kernel to finish err = clFinish(pOCL_Device->GetQueue()); CHECK_OPENCL_ERROR(err); // Copy Data From Device pOCL_Device->CopyBufferToHost (img.data, 1, img.padded_bytes); // Free resources delete pOCL_Device; delete[] roundKeys; // Write Output data WriteImageFile(sOutFile, img); free(img.data);//.........这里部分代码省略.........
开发者ID:Soledad89,项目名称:learnOpenCL,代码行数:101,
示例24: CHECK_ERRORint DwtHaar1D::runDwtHaar1DKernel(){ cl_int status; status = this->setWorkGroupSize(); CHECK_ERROR(status, SDK_SUCCESS, "setWorkGroupSize failed"); // Force write to inData Buf to update its values cl_event writeEvt; status = clEnqueueWriteBuffer( commandQueue, inDataBuf, CL_FALSE, 0, curSignalLength * sizeof(cl_float), inData, 0, NULL, &writeEvt); CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer failed. (inDataBuf)"); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush failed."); status = waitForEventAndRelease(&writeEvt); CHECK_ERROR(status, SDK_SUCCESS, "WaitForEventAndRelease(writeEvt1) Failed"); // Whether sort is to be in increasing order. CL_TRUE implies increasing status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&inDataBuf); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (inDataBuf)"); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&dOutDataBuf); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (dOutDataBuf)"); status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&dPartialOutDataBuf); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (dPartialOutData)"); status = clSetKernelArg(kernel, 3, (localThreads * 2 * sizeof(cl_float)), NULL); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (local memory)"); status = clSetKernelArg(kernel, 4, sizeof(cl_uint), (void*)&totalLevels); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (totalLevels)"); status = clSetKernelArg(kernel, 5, sizeof(cl_uint), (void*)&curSignalLength); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (curSignalLength)"); status = clSetKernelArg(kernel, 6, sizeof(cl_uint), (void*)&levelsDone); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (levelsDone)"); status = clSetKernelArg(kernel, 7, sizeof(cl_uint), (void*)&maxLevelsOnDevice); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (levelsDone)"); /* * Enqueue a kernel run call. */ cl_event ndrEvt; status = clEnqueueNDRangeKernel( commandQueue, kernel, 1, NULL, &globalThreads, &localThreads, 0, NULL, &ndrEvt); CHECK_OPENCL_ERROR(status, "clEnqueueNDRangeKernel failed."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush failed."); status = waitForEventAndRelease(&ndrEvt); CHECK_ERROR(status, SDK_SUCCESS, "WaitForEventAndRelease(ndrEvt1) Failed"); // Enqueue the results to application pointer cl_event readEvt1;//.........这里部分代码省略.........
开发者ID:xianggong,项目名称:m2c-llvm-devtools-host,代码行数:101,
示例25: test1int test1(int argc, char** argv){ // Parse arguments // OpenCL arguments: platform and device cl_int err; int iPlatform = GetArgInt(argc, argv, "p"); int iDevice = GetArgInt(argc, argv, "d"); char* sFileName = GetArgString(argc, argv, "n"); // Allocate Host Memory unsigned char pattern[16] = { 0x32, 0x43, 0xF6, 0xA8, 0x88, 0x5A, 0x30, 0x8D, 0x31, 0x31, 0x98, 0xA2, 0xE0, 0x37, 0x07, 0x34}; unsigned char data[16*256]; for (int i = 0; i < 256; i++) for (int k; k < 16; k++) data[i*16 + k] = pattern[k]; for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) printf("%2X ", data[i + j*4]); printf("/n"); } printf("/n"); unsigned char key[16] = { 0x2B, 0x7E, 0x15, 0x16, 0x28, 0xAE, 0xD2, 0xA6, 0xAB, 0xF7, 0x15, 0x88, 0x09, 0xCF, 0x4F, 0x3C}; unsigned char* roundKeys = NULL; int rounds = 0; ComputeRoundKeys(&roundKeys, &rounds, 16, key); // Set-up OpenCL Platform OCL_Device* pOCL_Device = new OCL_Device(iPlatform, iDevice); pOCL_Device->SetBuildOptions(""); pOCL_Device->PrintInfo(); // Set up OpenCL cl_kernel kernel = pOCL_Device->GetKernel("aes-kernel.cl", "AES_ECB_Encypt4"); // Allocate Device Memory cl_mem d_A = pOCL_Device->DeviceMalloc(0, 16); cl_mem d_B = pOCL_Device->DeviceMalloc(1, 16); cl_mem d_C = pOCL_Device->DeviceMalloc(2, rounds * 16); // Copy Image to Device pOCL_Device->CopyBufferToDevice(data, 0, 16); // Keys pOCL_Device->CopyBufferToDevice(roundKeys, 2, rounds * 16); // Set Kernel Arguments err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_A); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_B); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_C); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(kernel, 3, sizeof(cl_int), &rounds); CHECK_OPENCL_ERROR(err); cl_int _num = 1; err = clSetKernelArg(kernel, 4, sizeof(cl_int), &_num); CHECK_OPENCL_ERROR(err); // Wait for previous action to finish err = clFinish(pOCL_Device->GetQueue()); CHECK_OPENCL_ERROR(err); double seconds = GetTime(); // Run the kernel size_t off = 0; size_t num = 256; size_t threads = 256; err = clEnqueueNDRangeKernel(pOCL_Device->GetQueue(), kernel, 1, NULL, &num, &threads, 0, NULL, NULL); CHECK_OPENCL_ERROR(err); // Wait for kernel to finish err = clFinish(pOCL_Device->GetQueue()); CHECK_OPENCL_ERROR(err); seconds = GetTime() - seconds; printf("Elapsed Time: %f s (%f MiB/s)/n" , seconds, 16 /seconds * 10.f / 1024.f / 1024.f); // Copy Data From Device pOCL_Device->CopyBufferToHost (data, 1, 16); for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) printf("%2X ", data[i + j*4]); printf("/n"); } printf("/n"); // Free resources delete pOCL_Device; delete[] roundKeys;//.........这里部分代码省略.........
开发者ID:Soledad89,项目名称:learnOpenCL,代码行数:101,
示例26: benchmark_ctrint benchmark_ctr(int argc, char** argv){ // Parse arguments // OpenCL arguments: platform and device cl_int err; int count = 100; int iPlatform = GetArgInt(argc, argv, "p"); int iDevice = GetArgInt(argc, argv, "d"); // Set-up Encryption keys unsigned char key[16] = { 0x2B, 0x7E, 0x15, 0x16, 0x28, 0xAE, 0xD2, 0xA6, 0xAB, 0xF7, 0x15, 0x88, 0x09, 0xCF, 0x4F, 0x3C}; unsigned char nonce[12]; srand(time(NULL)); for (int i = 0; i < 12; i++) nonce[i] = rand() % 256; unsigned char* roundKeys = NULL; int rounds = 0; ComputeRoundKeys(&roundKeys, &rounds, 16, key); // Set-up OpenCL Platform OCL_Device* pOCL_Device = new OCL_Device(iPlatform, iDevice); pOCL_Device->SetBuildOptions(""); pOCL_Device->PrintInfo(); // Set up OpenCL cl_kernel EncryptionKernel = pOCL_Device->GetKernel("aes-kernel.cl", "AES_CTR_Encrypt"); size_t MinSize = 16; // 16 B = 128 bits size_t MaxSize = 512 << 20; // 512 MiB. // keys cl_mem d_C = pOCL_Device->DeviceMalloc(2, rounds * 16); pOCL_Device->CopyBufferToDevice(roundKeys, 2, rounds * 16); // nonce cl_mem d_D = pOCL_Device->DeviceMalloc(3, 12); pOCL_Device->CopyBufferToDevice(nonce, 3, 12); printf("/n"); printf("Time is reported for %d passes./n", count); printf("/n"); printf(" MiB , Encryption Speed (MiB/s), Encryption Time (s), Decryption Speed (MiB/s), Decryption Time (s)/n"); for (size_t size = MinSize; size <= MaxSize; size *= 2) { printf("%12.8f, ", ((double)size) / 1024 / 1024); // Allocate Device Memory cl_mem d_A = pOCL_Device->DeviceMalloc(0, size); cl_mem d_B = pOCL_Device->DeviceMalloc(1, size); // Allocate Host Memory char* h_A = new char[size]; char* h_B = new char[size]; // Fill Host Memory for (size_t i = 0; i < size; i++) { h_A[i] = i % 27; } // Copy Data to Device pOCL_Device->CopyBufferToDevice(h_A, 0, size); pOCL_Device->CopyBufferToDevice(h_A, 1, size); // just to ensure that both buffers are on the device // Set Kernel Arguments // Encrypt kernel cl_int _num = size / 16; err = clSetKernelArg(EncryptionKernel, 0, sizeof(cl_mem), &d_A); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(EncryptionKernel, 1, sizeof(cl_mem), &d_B); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(EncryptionKernel, 2, sizeof(cl_mem), &d_C); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(EncryptionKernel, 3, sizeof(cl_int), &rounds); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(EncryptionKernel, 4, sizeof(cl_int), &_num); CHECK_OPENCL_ERROR(err); err = clSetKernelArg(EncryptionKernel, 5, sizeof(cl_mem), &d_D); CHECK_OPENCL_ERROR(err); // Wait for previous action to finish err = clFinish(pOCL_Device->GetQueue()); CHECK_OPENCL_ERROR(err); size_t off = 0; size_t num = (((size / 16) + 255) / 256) * 256; size_t threads = 256; // Run the encryption kernel//.........这里部分代码省略.........
开发者ID:Soledad89,项目名称:learnOpenCL,代码行数:101,
示例27: clEnqueueReadBuffervoid OCL_Device::CopyBufferToHost (void* h_Buffer, int idx, size_t size){ cl_int err = clEnqueueReadBuffer (m_queue, m_buffers[idx], CL_TRUE, 0, size, h_Buffer, 0, NULL, NULL); CHECK_OPENCL_ERROR(err);}
开发者ID:Soledad89,项目名称:learnOpenCL,代码行数:6,
示例28: clGetKernelWorkGroupInfointComputeBench::bandwidth(cl_kernel &kernel, cl_mem outputBuffer, double *timeTaken, double *gbps ){ cl_int status; // Check group size against kernelWorkGroupSize status = clGetKernelWorkGroupInfo(kernel, devices[sampleArgs->deviceId], CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernelWorkGroupSize, 0); CHECK_OPENCL_ERROR(status, "clGetKernelWorkGroupInfo failed."); if (localThreads > kernelWorkGroupSize) { localThreads = kernelWorkGroupSize; } //Set appropriate arguments to the kernel int argIndex = 0; { status = clSetKernelArg(kernel, argIndex++, sizeof (cl_mem), (void *) &outputBuffer); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed.(outputBuffer)"); } double sec = 0; int iter = iterations; // Run the kernel for a number of iterations for (int i = 0; i < iter; i++) { // Enqueue a kernel run call cl_event ndrEvt; status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, &globalThreads, &localThreads, 0, NULL, &ndrEvt); CHECK_OPENCL_ERROR(status, "clEnqueueNDRangeKernel failed."); // wait for the kernel call to finish execution status = clWaitForEvents(1, &ndrEvt); CHECK_OPENCL_ERROR(status, "clWaitForEvents failed."); // Calculate performance cl_ulong startTime; cl_ulong endTime; // Get kernel profiling info status = clGetEventProfilingInfo(ndrEvt, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &startTime, 0); CHECK_OPENCL_ERROR(status, "clGetEventProfilingInfo failed.(startTime)"); status = clGetEventProfilingInfo(ndrEvt, CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &endTime, 0); CHECK_OPENCL_ERROR(status, "clGetEventProfilingInfo failed.(endTime)"); // Cumulate time for each iteration sec += 1e-9 * (endTime - startTime); status = clReleaseEvent(ndrEvt); CHECK_OPENCL_ERROR(status, "clGetEventProfilingInfo failed.(endTime)"); status = clFinish(commandQueue); CHECK_OPENCL_ERROR(status, "clFinish failed"); } // Copy bytes int bytesPerThread = FORLOOP; double bytes = (double) (iter * bytesPerThread); double perf = (bytes / sec) * 1e-9; perf *= globalThreads * vectorSize; *gbps = perf; *timeTaken = sec / iter; return SDK_SUCCESS;}
开发者ID:JiniusResearch,项目名称:oclb,代码行数:94,
示例29: getPlatformintComputeBench::setupCL(void){ cl_int status = 0; cl_device_type dType; if (sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if (sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform() failed"); // Display available devices. retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed"); /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0 }; context = clCreateContextFromType(cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "getDevices() failed"); //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, SDK_SUCCESS, "SDKDeviceInfo::setDeviceInfo() failed"); std::string deviceStr(deviceInfo.deviceVersion); size_t vStart = deviceStr.find(" ", 0); size_t vEnd = deviceStr.find(" ", vStart + 1); std::string vStrVal = deviceStr.substr(vStart + 1, vEnd - vStart - 1); // OpenCL 1.1 has inbuilt support for vec3 data types if (vec3 == true) { OPENCL_EXPECTED_ERROR("Device doesn't support built-in 3 component vectors!"); } // The block is to move the declaration of prop closer to its use /* Note: Using deprecated clCreateCommandQueue as CL_QUEUE_PROFILING_ENABLE flag not currently working ***with clCreateCommandQueueWithProperties*/ cl_command_queue_properties prop = 0; prop |= CL_QUEUE_PROFILING_ENABLE; commandQueue = clCreateCommandQueue(context, devices[sampleArgs->deviceId], prop, &status); CHECK_OPENCL_ERROR(status, "clCreateCommandQueue failed."); if (sampleArgs->isLoadBinaryEnabled()) { // Always assuming kernel was dumped for vector-width 1 if (vectorSize != 0) { std::cout << "Ignoring specified vector-width. Assuming kernel was dumped for vector-width 1" << std::endl; } vectorSize = 1; } else { // If vector-size is not specified in the command-line, choose the preferred size for the device if (vectorSize == 0) { vectorSize = deviceInfo.preferredFloatVecWidth; } else if (vectorSize == 3) { //Make vectorSize as 4 if -v option is 3. //This memory alignment is required as per OpenCL for type3 vectors vec3 = true; vectorSize = 4; } else if ((1 != vectorSize) && (2 != vectorSize) && (4 != vectorSize) && (8 != vectorSize) && (16 != vectorSize)) { std::cout << "The vectorsize can only be one of 1,2,3(4),4,8,16!" << std::endl; return SDK_FAILURE; } }//.........这里部分代码省略.........
开发者ID:JiniusResearch,项目名称:oclb,代码行数:101,
注:本文中的CHECK_OPENCL_ERROR函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ CHECK_PACKET_SIZE函数代码示例 C++ CHECK_OP函数代码示例 |