这篇教程C++ CUDA_SAFE_CALL函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中CUDA_SAFE_CALL函数的典型用法代码示例。如果您正苦于以下问题:C++ CUDA_SAFE_CALL函数的具体用法?C++ CUDA_SAFE_CALL怎么用?C++ CUDA_SAFE_CALL使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了CUDA_SAFE_CALL函数的27个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: CUDA_SAFE_CALLvoid CUDAResourceManager::deallocUSG(GPUUsg *usg){ CUDA_SAFE_CALL(cudaFree(usg->getElemList())); CUDA_SAFE_CALL(cudaFree(usg->getTypeList())); CUDA_SAFE_CALL(cudaFree(usg->getConnList())); CUDA_SAFE_CALL(cudaFree(usg->getVertices()));}
开发者ID:nixz,项目名称:covise,代码行数:7,
示例2: sizeofvoidLiGL2D::setVbo(int spaceVect){ GLuint oldVbo = 0; GLuint newVbo = 0; if(vbo != 0){ oldVbo = vbo; vbo = 0; } if(iw != 0 && ih !=0){ GLint bsize; // create buffer object unsigned int size = ((int)iw/(spaceVect+1))*((int)ih/(spaceVect+1)) * 6 * sizeof(float2); glGenBuffers( 1, &newVbo); glBindBuffer( GL_ARRAY_BUFFER, newVbo); // initialize buffer object glBufferData( GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); glGetBufferParameterivARB(GL_ARRAY_BUFFER_ARB, GL_BUFFER_SIZE_ARB, &bsize); glBindBuffer( GL_ARRAY_BUFFER, 0); // register buffer object with CUDA CUDA_SAFE_CALL(cudaGLRegisterBufferObject(newVbo)); sVbo = ((int)iw/(spaceVect+1))*((int)ih/(spaceVect+1))*6; vbo = newVbo; emit sendVbo(vbo); } if(oldVbo != 0){ CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(oldVbo)); glDeleteBuffers(1, &oldVbo); }}
开发者ID:dongmingdmdm,项目名称:camnavi,代码行数:30,
示例3: DBG void CpuSNN::printSimSummary(FILE *fp) { DBG(2, fpLog, AT, "printSimSummary()"); float etime; if(currentMode == GPU_MODE) { stopGPUTiming(); etime = gpuExecutionTime; CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD2, "secD2fireCnt", sizeof(int), 0, cudaMemcpyDeviceToHost)); CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD1, "secD1fireCnt", sizeof(int), 0, cudaMemcpyDeviceToHost)); spikeCountAll1sec = spikeCountD1 + spikeCountD2; CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD2, "spikeCountD2", sizeof(int), 0, cudaMemcpyDeviceToHost)); CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD1, "spikeCountD1", sizeof(int), 0, cudaMemcpyDeviceToHost)); spikeCountAll = spikeCountD1 + spikeCountD2; } else { stopCPUTiming(); etime = cpuExecutionTime; } fprintf(fp, "/n*** Network configuration dumped in %s.dot file.../n/ Use graphViz to see the network connectivity.../n/n", networkName.c_str()); fprintf(fp, "*********** %s Simulation Summary **********/n", (currentMode == GPU_MODE)?("GPU"):"CPU"); fprintf(fp, "Network Parameters: /n/tN = %d (numNExcReg:numNInhReg=%2.1f:%2.1f), numPostSynapses = %d, D = %d/n", numN, 100.0*numNExcReg/numN, 100.0*numNInhReg/numN, numPostSynapses, D); fprintf(fp, "Random Seed: %d/n", randSeed); fprintf(fp, "Timing: /n/tModel Simulation Time = %lld sec /n/tActual Execution Time = %4.2f sec/n", (unsigned long long)simTimeSec, etime/1000.0); fprintf(fp, "Average Firing Rate /n/t2+ms delay = %3.3f Hz /n/t1ms delay = %3.3f Hz /n/tOverall = %3.3f Hz/n", spikeCountD2/(1.0*simTimeSec*numNExcReg), spikeCountD1/(1.0*simTimeSec*numNInhReg), spikeCountAll/(1.0*simTimeSec*numN)); fprintf(fp, "Overall Firing Count: /n/t2+ms delay = %d /n/t1ms delay = %d /n/tTotal = %d/n", spikeCountD2, spikeCountD1, spikeCountAll ); fprintf(fp, "**************************************/n/n"); fflush(fp); }
开发者ID:drghirlanda,项目名称:mint,代码行数:33,
示例4: CUDA_SAFE_CALLbool MultivalueHashTable::Initialize(const unsigned max_table_entries, const float space_usage, const unsigned num_hash_functions){ bool success = HashTable::Initialize(max_table_entries, space_usage, num_hash_functions); target_space_usage_ = space_usage; // + 2N 32-bit entries CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_offsets_, sizeof(unsigned) * max_table_entries )); CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_is_unique_, sizeof(unsigned) * max_table_entries )); success &= (d_scratch_offsets_ != NULL); success &= (d_scratch_is_unique_ != NULL); // Allocate memory for the scan. // + Unknown memory usage CUDPPConfiguration config; config.op = CUDPP_ADD; config.datatype = CUDPP_UINT; config.algorithm = CUDPP_SCAN; config.options = CUDPP_OPTION_FORWARD | CUDPP_OPTION_INCLUSIVE; CUDPPResult result = cudppPlan(theCudpp, &scanplan_, config, max_table_entries, 1, 0); if (CUDPP_SUCCESS != result) { fprintf(stderr, "Failed to create plan."); return false; } return success;}
开发者ID:DeepCV,项目名称:cudpp,代码行数:32,
示例5: makeCurrentvoidLiGL2D::setPbo(int image_width, int image_height){ makeCurrent(); iw = image_width; ih = image_height; GLuint oldPbo = 0; GLuint newPbo = 0; GLuint oldTex = 0; if(pbo != 0){ oldPbo = pbo; pbo = 0; oldTex = tex; } if(iw != 0 && ih !=0){ glGenBuffers(1, &newPbo); glBindBuffer(GL_ARRAY_BUFFER, newPbo); glBufferData(GL_ARRAY_BUFFER, image_height*image_width* 4*sizeof(GLubyte),NULL, GL_DYNAMIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); CUDA_SAFE_CALL(cudaGLRegisterBufferObject(newPbo)); createTexture(&tex, iw, ih); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); pbo = newPbo; emit sendPbo(pbo); } if(oldPbo != 0){ CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(oldPbo)); glDeleteBuffers(1, &oldPbo); } if(oldTex != 0){ glDeleteTextures(1, &oldTex); }}
开发者ID:dongmingdmdm,项目名称:camnavi,代码行数:34,
示例6: memcpyFromDeviceAsyncvoidFiringBuffer::sync(cudaStream_t stream){ memcpyFromDeviceAsync(mh_buffer.get(), md_buffer.get(), m_mapper.partitionCount() * m_pitch, stream); CUDA_SAFE_CALL(cudaEventRecord(m_copyDone, stream)); CUDA_SAFE_CALL(cudaEventSynchronize(m_copyDone)); populateSparse(mh_buffer.get());}
开发者ID:MogeiWang,项目名称:nemosim,代码行数:9,
示例7: runbench_warmupvoid runbench_warmup(double *cd, long size){ const long reduced_grid_size = size/(UNROLLED_MEMORY_ACCESSES)/32; const int BLOCK_SIZE = 256; const int TOTAL_REDUCED_BLOCKS = reduced_grid_size/BLOCK_SIZE; dim3 dimBlock(BLOCK_SIZE, 1, 1); dim3 dimReducedGrid(TOTAL_REDUCED_BLOCKS, 1, 1); hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< short, BLOCK_SIZE, 0 >), dim3(dimReducedGrid), dim3(dimBlock ), 0, 0, (short)1, (short*)cd); CUDA_SAFE_CALL( hipGetLastError() ); CUDA_SAFE_CALL( hipDeviceSynchronize() );}
开发者ID:sunway513,项目名称:mixbench,代码行数:12,
示例8: CUDA_SAFE_CALL//---------------------------------------------//GPU memory operations//---------------------------------------------char *D_MALLOC(size_t size){ char *buf = NULL; CUDA_SAFE_CALL(cudaMalloc((void**)&buf, size)); CUDA_SAFE_CALL(cudaMemset(buf, 0, size));#ifdef __DEBUG__# ifdef __ALLOC__ BenLog("+d%d bytes/n", size);# endif //__ALLOC__ d_dmemUsage += size;#endif return buf;}
开发者ID:xiaobaidemu,项目名称:GPU_Apriori,代码行数:16,
示例9: memcpyvoid ParticleListCPUSorted::copy_from(const ParticleList* list_in){ ispecies = list_in -> ispecies; // Free realkind arrays if(list_in -> device_type == 0){ for(int i=0;i<ParticleList_nfloats;i++) { memcpy(*get_float(i),*(list_in->get_float(i)),nptcls*sizeof(realkind)); } // Allocate int arrays for(int i=0;i<ParticleList_nints;i++) { memcpy(*get_int(i),*(list_in->get_int(i)),nptcls*sizeof(int)); } // allocate short ints for cluster id's memcpy(cluster_id,list_in->cluster_id,nptcls*sizeof(int));// memcpy(num_subcycles,list_in->num_subcycles,nptcls*sizeof(int));//// memcpy(num_piccard,list_in->num_piccard,nptcls*sizeof(double));// memcpy(num_piccard2,list_in->num_piccard2,nptcls*sizeof(double)); } else if(list_in->device_type == 1) {#ifndef NO_CUDA enum cudaMemcpyKind kind = cudaMemcpyDeviceToHost; // Free realkind arrays for(int i=0;i<ParticleList_nfloats;i++) { CUDA_SAFE_CALL(cudaMemcpyAsync(*get_float(i),*(list_in->get_float(i)),nptcls*sizeof(realkind),kind)); } // Allocate int arrays for(int i=0;i<ParticleList_nints;i++) { CUDA_SAFE_CALL(cudaMemcpyAsync(*get_int(i),*(list_in->get_int(i)),nptcls*sizeof(int),kind)); } // allocate short ints for cluster id's CUDA_SAFE_CALL(cudaMemcpyAsync(cluster_id,(list_in->cluster_id),nptcls*sizeof(int),kind)); CUDA_SAFE_CALL(cudaDeviceSynchronize());#endif }}
开发者ID:cocomans,项目名称:plasma,代码行数:49,
示例10: streamCudaGridMap::CudaGridMap(const Vec3i &numGridPoints, const Vec3i &numGridPointsPadded, const double *inputEnergies, cudaStream_t stream) : stream(stream), numGridPoints(numGridPoints), numGridPointsPadded(numGridPointsPadded){ // Allocate the padded grid in global memory CUDA_SAFE_CALL(cudaMalloc((void**)&energiesDevice, sizeof(float) * numGridPointsPadded.Cube())); // Convert doubles to floats and save them in page-locked memory int numGridPointsPerMap = numGridPoints.Cube(); CUDA_SAFE_CALL(cudaMallocHost((void**)&energiesHost, sizeof(float) * numGridPointsPerMap)); std::transform(inputEnergies, inputEnergies + numGridPointsPerMap, energiesHost, typecast<float, double>); // Copy the initial energies from the original grid to the padded one in global memory // Elements in the area of padding will stay uninitialized copyGridMapPadded(energiesDevice, numGridPointsPadded, energiesHost, numGridPoints, cudaMemcpyHostToDevice);}
开发者ID:marekolsak,项目名称:fastgrid,代码行数:15,
示例11: CUDA_SAFE_CALLvoid CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const{ try { Kokkos::Impl::num_uvm_allocations -= 1; CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) ); } catch(...) {}}
开发者ID:gmackey,项目名称:kokkos,代码行数:7,
示例12: attach_texture_object::cudaTextureObject_tSharedAllocationRecord< Kokkos::CudaSpace , void >::attach_texture_object( const unsigned sizeof_alias , void * const alloc_ptr , size_t const alloc_size ){ // Only valid for 300 <= __CUDA_ARCH__ // otherwise return zero. ::cudaTextureObject_t tex_obj ; struct cudaResourceDesc resDesc ; struct cudaTextureDesc texDesc ; memset( & resDesc , 0 , sizeof(resDesc) ); memset( & texDesc , 0 , sizeof(texDesc) ); resDesc.resType = cudaResourceTypeLinear ; resDesc.res.linear.desc = ( sizeof_alias == 4 ? cudaCreateChannelDesc< int >() : ( sizeof_alias == 8 ? cudaCreateChannelDesc< ::int2 >() : /* sizeof_alias == 16 */ cudaCreateChannelDesc< ::int4 >() ) ); resDesc.res.linear.sizeInBytes = alloc_size ; resDesc.res.linear.devPtr = alloc_ptr ; CUDA_SAFE_CALL( cudaCreateTextureObject( & tex_obj , & resDesc, & texDesc, NULL ) ); return tex_obj ;}
开发者ID:gurkih,项目名称:lammps,代码行数:28,
示例13: mainint main( int, char ** ){ do_main(); CUDA_SAFE_CALL( cudaDeviceReset() ); return 0;}
开发者ID:ABHISKUMAR,项目名称:nsight-gtc2013,代码行数:7,
示例14: Vec3ivoid CudaGridMap::copyGridMapPadded(float *dst, const Vec3i &numGridPointsDst, const float *src, const Vec3i &numGridPointsSrc, cudaMemcpyKind kind){ Vec3i numGridPointsMin = Vec3i(Mathi::Min(numGridPointsDst.x, numGridPointsSrc.x), Mathi::Min(numGridPointsDst.y, numGridPointsSrc.y), Mathi::Min(numGridPointsDst.z, numGridPointsSrc.z)); int numGridPointsDstXMulY = numGridPointsDst.x * numGridPointsDst.y; int numGridPointsSrcXMulY = numGridPointsSrc.x * numGridPointsSrc.y; for (int z = 0; z < numGridPointsMin.z; z++) { // Set the base of output indices from z int outputIndexZBaseDst = z * numGridPointsDstXMulY; int outputIndexZBaseSrc = z * numGridPointsSrcXMulY; for (int y = 0; y < numGridPointsMin.y; y++) { // Set the base of output indices from (z,y) int outputIndexZYBaseDst = outputIndexZBaseDst + y * numGridPointsDst.x; int outputIndexZYBaseSrc = outputIndexZBaseSrc + y * numGridPointsSrc.x; // Copy one row in axis X CUDA_SAFE_CALL(cudaMemcpyAsync(dst + outputIndexZYBaseDst, src + outputIndexZYBaseSrc, sizeof(float) * numGridPointsMin.x, kind, stream)); } }}
开发者ID:marekolsak,项目名称:fastgrid,代码行数:27,
示例15: FieldDataCPUvoid NodeFieldData::allocate(PlasmaData* _pdata){ pdata = _pdata; nx = pdata->nx; ny = pdata->ny; nz = pdata->nz; cpu_fields = new FieldDataCPU(); cpu_fields -> allocate(pdata); if(pdata->node_info->nGPU > 0) { gpu_fields = (FieldDataGPU*)malloc(pdata->node_info->nGPU * sizeof(FieldDataGPU));#pragma omp parallel for for(int i=0;i<pdata->node_info->nGPU;i++) { CUDA_SAFE_CALL(cudaSetDevice(pdata->thread_info[pdata->node_info->nspecies+i]->gpu_info->igpu)); gpu_fields[i] = *(new FieldDataGPU()); gpu_fields[i].allocate(pdata); } } if(pdata->node_info->nMIC > 0) { mic_fields = new FieldDataMIC(); mic_fields -> allocate(pdata); } bcast_timer = new CPUTimer();}
开发者ID:cocomans,项目名称:plasma,代码行数:32,
示例16: CUDA_SAFE_CALLstd::vector<int> host::QueryDevices() { int device_count = 0; CUDA_SAFE_CALL(cudaGetDeviceCount(&device_count)); if (device_count < 1) { fprintf(stderr, "No suitable CUDA devices found!/n"); exit(EXIT_FAILURE); } std::vector<int> device_ids; for (int i = 0; i < device_count; i++) { cudaDeviceProp device_prop; CUDA_SAFE_CALL(cudaGetDeviceProperties(&device_prop, i)); int compute_cap_major = device_prop.major; int compute_cap_minor = device_prop.minor; int core_count = ConvertSMVer2Cores(compute_cap_major, compute_cap_minor) * device_prop.multiProcessorCount; float clock_speed = device_prop.clockRate * 1e-6f; float mem_size = device_prop.totalGlobalMem / 1024.0f / 1024.0f; if (compute_cap_major >= 2) { device_ids.push_back(i); printf("/t[%d] %s (%d.%d, %d cores, %.2f GHz, %.2f MB)/n", i, device_prop.name, compute_cap_major, compute_cap_minor, core_count, clock_speed, mem_size); } else { printf("/t[%d] %s (%d.%d not usable)/n", i, device_prop.name, compute_cap_major, compute_cap_minor); } } if (device_ids.size() == 0) { fprintf(stderr, "No suitable CUDA devices found!/n"); exit(EXIT_FAILURE); } return device_ids;}
开发者ID:bobsomers,项目名称:haste,代码行数:47,
示例17: deallocate_rowsvoid Matrix::allocate_rows(int num_rows){ deallocate_rows(); set_num_rows(num_rows); if( num_rows == 0 ) return; CUDA_SAFE_CALL( cudaMalloc((void**) &m_rows, (num_rows+1)*sizeof(int)) );}
开发者ID:ABHISKUMAR,项目名称:nsight-gtc2013,代码行数:8,
示例18: deallocate_valsvoid Matrix::allocate_vals(int num_vals){ deallocate_vals(); set_num_vals(num_vals); if( num_vals == 0 ) return; CUDA_SAFE_CALL( cudaMalloc((void**) &m_vals, 16*num_vals*sizeof(double)) );}
开发者ID:ABHISKUMAR,项目名称:nsight-gtc2013,代码行数:8,
示例19: CUDA_SAFE_CALLvoid * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const{ void * ptr = NULL; CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) ); return ptr ;}
开发者ID:gurkih,项目名称:lammps,代码行数:8,
示例20: deallocate_colsvoid Matrix::allocate_cols(int num_cols){ deallocate_cols(); set_num_cols(num_cols); if( num_cols == 0 ) return; CUDA_SAFE_CALL( cudaMalloc((void**) &m_cols, num_cols*sizeof(int)) );}
开发者ID:ABHISKUMAR,项目名称:nsight-gtc2013,代码行数:8,
示例21: cudaCreateChannelDescCudaFloatTexture1D::CudaFloatTexture1D(int width, const double *data, CudaAction action, cudaStream_t stream, CudaInternalAPI *api){ channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); // Allocate the texture on the GPU... CUDA_SAFE_CALL(cudaMallocArray(&deviceArray, &channelDesc, width, 1)); // ... and in page-locked system memory CUDA_SAFE_CALL(cudaMallocHost((void**)&hostMem, sizeof(float) * width)); // Convert doubles to floats and save them to page-locked system memory std::transform(data, data + width, hostMem, typecast<float, double>); // Copy floats from the page-locked memory to the GPU CUDA_SAFE_CALL(cudaMemcpyToArrayAsync(deviceArray, 0, 0, hostMem, sizeof(float) * width, cudaMemcpyHostToDevice, stream)); if (action == BindToKernel) api->setDistDepDielTexture(deviceArray, &channelDesc);}
开发者ID:marekolsak,项目名称:fastgrid,代码行数:18,
示例22: InitCUDAvoid InitCUDA(int device){ /////////////////////////// // CUDA initialisation /////////////////////////// int deviceCount; CUDA_SAFE_CALL(cudaGetDeviceCount(&deviceCount)); if (deviceCount == 0) std::cout << "There is no device supporting CUDA" << std::endl; CUDA_SAFE_CALL(cudaSetDevice(device)); cudaDeviceProp deviceProp; CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, device)); std::cout << "Device " << device << ": " << deviceProp.name << std::endl; // or // CUT_DEVICE_INIT(); // with --device=1 (num device chosen)}
开发者ID:canercandan,项目名称:linear-algebra,代码行数:19,
示例23: CUDA_SAFE_CALLvoid CompactingHashTable::Release() { HashTable::Release(); CUDA_SAFE_CALL(cudaFree(d_unique_keys_)); CUDA_SAFE_CALL(cudaFree(d_scratch_cuckoo_keys_)); CUDA_SAFE_CALL(cudaFree(d_scratch_counts_)); CUDA_SAFE_CALL(cudaFree(d_scratch_unique_ids_)); d_unique_keys_ = NULL; d_scratch_cuckoo_keys_ = NULL; d_scratch_counts_ = NULL; d_scratch_unique_ids_ = NULL; if (scanplan_) { cudppDestroyPlan(scanplan_); } scanplan_ = 0; unique_keys_size_ = 0;}
开发者ID:DeepCV,项目名称:cudpp,代码行数:19,
示例24: CTfactory CTfactory( const VolumeGPU<T>& src, U& texRef, const cudaTextureFilterMode fm = cudaFilterModePoint, const cudaTextureAddressMode am = cudaAddressModeClamp, const int norm = false ) : dca_data(NULL) { // Check for valid input if( src.d_data.ptr == NULL ) { std::cerr << __FUNCTION__ << ": Source has no data" << std::endl; abort(); } // Allocate memory cudaChannelFormatDesc cd = cudaCreateChannelDesc<T>(); cudaExtent tmpExtent = ExtentFromDims( src.dims ); CUDA_SAFE_CALL( cudaMalloc3DArray( &(this->dca_data), &cd, tmpExtent ) ); // Do the copy cudaMemcpy3DParms cp = {0}; cp.srcPtr = src.d_data; cp.dstArray = this->dca_data; cp.extent = tmpExtent; cp.kind = cudaMemcpyDeviceToDevice; CUDA_SAFE_CALL( cudaMemcpy3D( &cp ) ); // Bind the texture texRef.normalized = norm; texRef.addressMode[0] = am; texRef.addressMode[1] = am; texRef.addressMode[2] = am; texRef.filterMode = fm; CUDA_SAFE_CALL( cudaBindTextureToArray( texRef, this->dca_data ) ); }
开发者ID:guo2004131,项目名称:freesurfer,代码行数:42,
示例25: D_FREE//------------------------------------------------//free memory on device and set the pointer to NULL////param : buf//------------------------------------------------void D_FREE(void *buf, size_t size){ CUDA_SAFE_CALL(cudaFree(buf)); buf = NULL;#ifdef __DEBUG__# ifdef __ALLOC__ BenLog("-d%d bytes/n", size);# endif //__ALLOC__ d_dmemUsage -= size;#endif}
开发者ID:xiaobaidemu,项目名称:GPU_Apriori,代码行数:16,
示例26: CUDA_SAFE_CALLunsigned int cSystem::getNumGPUs(void){ int nGPU;#ifdef __GEM_USE_CUDA__ CUDA_SAFE_CALL(cudaGetDeviceCount(&nGPU));#else nGPU = 0;#endif return (unsigned int) nGPU;}
开发者ID:hvthaibk,项目名称:ccrunch,代码行数:12,
示例27: finalizeEventsfloat finalizeEvents(hipEvent_t start, hipEvent_t stop){ CUDA_SAFE_CALL( hipGetLastError() ); CUDA_SAFE_CALL( hipEventRecord(stop, 0) ); CUDA_SAFE_CALL( hipEventSynchronize(stop) ); float kernel_time; CUDA_SAFE_CALL( hipEventElapsedTime(&kernel_time, start, stop) ); CUDA_SAFE_CALL( hipEventDestroy(start) ); CUDA_SAFE_CALL( hipEventDestroy(stop) ); return kernel_time;}
开发者ID:scchan,项目名称:hcc_perf,代码行数:10,
注:本文中的CUDA_SAFE_CALL函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ CUDNN_CHECK函数代码示例 C++ CUDA_CHECK函数代码示例 |