这篇教程C++ wtime函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中wtime函数的典型用法代码示例。如果您正苦于以下问题:C++ wtime函数的具体用法?C++ wtime怎么用?C++ wtime使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了wtime函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: mainint main(int argc, char *argv[]){ if (argc < 2) { printf("Missing size of array!/n"); return EXIT_FAILURE; } int size_array = atoi(argv[1]); int *array = (int *) malloc(size_array * sizeof(uint32_t)); for (int i = 0; i < size_array; i++) { array[i] = getrand(0, 100000);// printf ("array[%d]= %d ",i,array[i]); } double time = wtime(); for (int i = 0; i < size_array - 1; i++) { for (int j = 0; j < size_array - i - 1; j++) { if (array[j] > array[j + 1]) { int tmp = array[j]; array[j] = array[j + 1]; array[j + 1] = tmp; } } }/* for (int i = 0; i < size_array; i++) { printf ("array[%d]= %d ",i,array[i]); }*/ time = wtime() - time; FILE *tb; tb = fopen("bubblesort.dat", "a"); fprintf(tb, "%d %.6f/n", size_array, time); free(array); return EXIT_SUCCESS;}
开发者ID:evg-kazartseff,项目名称:DSA,代码行数:40,
示例2: mainint main(int argc, char **argv){ int i, me, target; unsigned int size; double t; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &me); target = 1 - me; init_buf(send_buf, me); init_buf(recv_buf, target); if(me==0) print_items(); for(size=1;size<MAX_SIZE+1;size*=2){ MPI_Barrier(MPI_COMM_WORLD); for(i=0;i<LOOP+WARMUP;i++){ if(WARMUP == i) t = wtime(); if(me == 0){ MPI_Send(send_buf, size, MPI_CHAR, target, 9, MPI_COMM_WORLD); MPI_Recv(recv_buf, size, MPI_CHAR, target, 5, MPI_COMM_WORLD, &status); } else { MPI_Recv(recv_buf, size, MPI_CHAR, target, 9, MPI_COMM_WORLD, &status); MPI_Send(send_buf, size, MPI_CHAR, target, 5, MPI_COMM_WORLD); } } MPI_Barrier(MPI_COMM_WORLD); t = wtime() - t; if(me == 0) print_results(size, t); } MPI_Finalize(); return 0;}
开发者ID:mnakao,项目名称:pingpong,代码行数:40,
示例3: startrun// startrun: startup hierarchical N-body code.// ___________________________________________// This runs once. local void startrun(void) { printf("startrun/n"); startrun_time_0 = wtime(); bodyptr p1, p2, p; stream gravstr; define_body(sizeof(body), Precision, NDIM); // setup phat body struct define_body_offset(PosTag, BodyOffset(Pos)); define_body_offset(VelTag, BodyOffset(Vel)); define_body_offset(MassTag, BodyOffset(Mass)); define_body_offset(PhiTag, BodyOffset(Phi)); define_body_offset(AccTag, BodyOffset(Acc)); infile = getparam("in"); // set I/O file names outfile = getparam("out"); savefile = getparam("save"); if (strnull(getparam("restore"))) // starting a new run? newrun(); else // else resume old run oldrun(); if (ABS(nstatic) > nbody) // check nstatic is OK error("%s: absurd value for nstatic/n", getargv0()); p1 = bodytab + MAX(nstatic, 0); // set dynamic body range p2 = bodytab + nbody + MIN(nstatic, 0); testcalc = TRUE; // determine type of calc: for (p = p1; p < p2; p++) testcalc = testcalc && (Mass(p) == 0); // look for dynamic masses strfile = getparam("stream"); logfile = getparam("log");#if defined(EXTGRAV) if (! strnull(getparam("gravgsp"))) { // was GSP file given? gravstr = stropen(getparam("gravgsp"), "r"); get_history(gravstr); gravgsp = get_gsprof(gravstr); // read external field GSP strclose(gravstr); }#endif startrun_time_1 = wtime();}
开发者ID:jasminegrosso,项目名称:zeno,代码行数:42,
示例4: mainint main(int argc, char **argv){ int n; int repeat; double dot; long start_time, end_time; if ((argc != 3)) { printf("Uso: %s <tamanho dos vetores> <repeticoes>/n", argv[0]); exit(EXIT_FAILURE); } n = atoi(argv[1]); // tamanho dos vetores repeat = atoi(argv[2]); // numero de repeticoes (variar carga) // Cria vetores double *a = (double *) malloc(sizeof(double) * n); double *b = (double *) malloc(sizeof(double) * n); if (a == NULL || b == NULL) { printf("Erro de alocacao de memoria/n"); exit(EXIT_FAILURE); } init_vectors(a, b, n); start_time = wtime(); dot = dot_product(a, b, n, repeat); end_time = wtime(); printf("Produto escalar = %f/n", dot); printf("Tempo de calculo = %ld usec/n", (long) (end_time - start_time)); free((void *) a); free((void *) b); return EXIT_SUCCESS;}
开发者ID:AndreaInfUFSM,项目名称:elc139-2016a,代码行数:39,
示例5: mainint main(int argc, char **argv) { pthread_t thread1; int ret = -1; int i = 0; double time1, time2; ret = pthread_create(&thread1, NULL, thread1_fn, NULL); assert(ret == 0); time1 = wtime(); for(i=0; i<ITERATIONS; i++) { wakeywakey(); } time2 = wtime(); printf("time for %d iterations: %f seconds./n", ITERATIONS, (time2-time1)); printf("per iteration: %f/n", (time2-time1)/(double)ITERATIONS); return(0);}
开发者ID:Goon83,项目名称:SALB,代码行数:22,
示例6: init_synchronizationvoid init_synchronization(void){ current_synchronization = form_of_synchronization; max_counter = First_max_counter; interval = First_interval; first_measurement_run = True; logging(DBG_SYNC, "starting with max_counter = %d interval = %9.1f/n", max_counter, interval*1.0e6); if( current_synchronization == SYNC_REAL) { if( ! mpi_wtime_is_global ) determine_time_differences(); if( lrootproc() ) start_batch = wtime(); logging(DBG_SYNC, "---- new start_batch ----------------/n"); MPI_Bcast(&start_batch, 1, MPI_DOUBLE, 0, get_measurement_comm()); }}
开发者ID:jonarbo,项目名称:KUBE,代码行数:17,
示例7: whiledepth_t bc::bfs_sssp( index_t root){ sa[root] = 0; sp_count[root] = 1; depth_t level = 0; dist[root]=0; while(true) { double ltm= wtime(); index_t front_count = 0; for(vertex_t vert_id = 0; vert_id<g->vert_count; vert_id++) { if(sa[vert_id] == level) { index_t my_beg = g->beg_pos[vert_id]; index_t my_end = g->beg_pos[vert_id + 1]; for(; my_beg<my_end; my_beg++) { vertex_t nebr=g->csr[my_beg]; path_t weit=g->weight[my_beg]; if(dist[nebr]>dist[vert_id]+weit) { dist[nebr]=dist[vert_id]+weit; sp_count[nebr]=0; //prior parent is wrong sa[nebr]=level+1; front_count++; } if(dist[nebr]==dist[vert_id]+weit) sp_count[nebr]+=sp_count[vert_id]; } } }// std::cout<<"Level "<<(int) level<<": "<<front_count<<" "// <<wtime() - ltm<<"/n"; if(front_count == 0) break; level ++; } return level+1;}
开发者ID:ChickenRunjyd,项目名称:cpu_bc,代码行数:46,
示例8: stop_synchronizationdouble stop_synchronization(void){ stop_batch = stop_sync = wtime(); if( current_synchronization == SYNC_REAL ) { if( stop_sync - start_sync > interval ) invalid[counter] = INVALID_TOOK_TOO_LONG; logging(DBG_SYNC, "stop_sync = %9.1f ", normalize_time(stop_sync)); switch( invalid[counter] ) { case INVALID_TOOK_TOO_LONG: logging(DBG_SYNC, "invalid_too_long/n"); break; case INVALID_STARTED_LATE: logging(DBG_SYNC, "invalid_started_late/n"); break; default: logging(DBG_SYNC, "/n"); } } return stop_sync;}
开发者ID:jonarbo,项目名称:KUBE,代码行数:19,
示例9: mainint main(){ double t; int i, me, target; unsigned int size; me = xmp_node_num(); target = 3 - me; init_buf(local_buf, me); init_buf(target_buf, me); if(me==1) print_items(); for(size=4;size<MAX_SIZE+1;size*=2){ // size must be more than 4 when using Fujitsu RDMA xmp_sync_all(NULL); for(i=0;i<LOOP+WARMUP;i++){ if(WARMUP == i) t = wtime(); if(me == 1){ local_buf[0:size] = target_buf[0:size]:[target]; xmp_sync_memory(NULL);#ifdef DEBUG if(local_buf[0] != '2' && local_buf[size-1] != '2') fprintf(stderr, "Error !/n"); local_buf[0] = '1'; local_buf[size-1] = '1';#endif xmp_sync_all(NULL); } else{ xmp_sync_all(NULL); local_buf[0:size] = target_buf[0:size]:[target];#ifdef DEBUG if(local_buf[0] != '1' && local_buf[size-1] != '1') fprintf(stderr, "Error !/n"); local_buf[0] = '2'; local_buf[size-1] = '2';#endif } xmp_sync_all(NULL); }
开发者ID:mnakao,项目名称:pingpong,代码行数:39,
示例10: mainint main(){ Init(); double start = wtime(); double start_linked_list = wtime(); RunThoughLinkedList(); double end_linked_list = wtime(); double start_explicit = wtime(); RunExplicit(); double end_explicit = wtime(); double end = wtime(); printf("Time through Linked List %7.2f/n" "Time through explicit %7.2f/n" "Total Time taken %7.2f/n", end_linked_list-start_linked_list, end_explicit-start_explicit, end-start );}
开发者ID:hjmjohnson,项目名称:XEParallelProg,代码行数:20,
示例11: main//.........这里部分代码省略......... goto ENDOFTESTS; } else length = total_length/Num_procs; offset = atol(*++argv); if (offset < 0) { printf("ERROR: Invalid array offset: %ld/n", offset); error = 1; goto ENDOFTESTS; }#ifdef STATIC_ALLOCATION if ((3*length + 2*offset) > N) { printf("ERROR: vector length/offset %ld/%ld too ", total_length, offset); printf("large; increase MAXLENGTH in Makefile or decrease vector length/n"); error = 1; goto ENDOFTESTS; }#endifENDOFTESTS: ; } bail_out(error); /* broadcast initialization data */ MPI_Bcast(&length,1, MPI_LONG, root, MPI_COMM_WORLD); MPI_Bcast(&offset,1, MPI_LONG, root, MPI_COMM_WORLD); MPI_Bcast(&iterations,1, MPI_INT, root, MPI_COMM_WORLD);#ifndef STATIC_ALLOCATION space = (3*length + 2*offset)*sizeof(double); a = (double *) malloc(space); if (!a && my_ID == root) { printf("ERROR: Could not allocate %ld bytes for vectors/n", (long int)space); error = 1; } bail_out(error);#endif b = a + length + offset; c = b + length + offset; bytes = 3.0 * sizeof(double) * length * Num_procs; if (my_ID == root) { printf("Number of processes = %d/n", Num_procs); printf("Vector length = %ld/n", total_length); printf("Offset = %ld/n", offset); printf("Number of iterations = %d/n", iterations); }#pragma vector always for (j=0; j<length; j++) { a[j] = 0.0; b[j] = 2.0; c[j] = 2.0; } /* --- MAIN LOOP --- repeat Triad iterations times --- */ scalar = SCALAR; for (iter=0; iter<iterations; iter++) { MPI_Barrier(MPI_COMM_WORLD); if (my_ID == root) { nstream_time = wtime(); }#pragma vector always for (j=0; j<length; j++) a[j] = b[j]+scalar*c[j]; if (my_ID == root) { if (iter>0 || iterations==1) { /* skip the first iteration */ nstream_time = wtime() - nstream_time; avgtime = avgtime + nstream_time; mintime = MIN(mintime, nstream_time); maxtime = MAX(maxtime, nstream_time); } } /* insert a dependency between iterations to avoid dead-code elimination */#pragma vector always for (j=0; j<length; j++) b[j] = a[j]; } /********************************************************************* ** Analyze and output results. *********************************************************************/ if (my_ID == root) { if (checkTRIADresults(iterations, length)) { avgtime = avgtime/(double)(MAX(iterations-1,1)); printf("Rate (MB/s): %lf, Avg time (s): %lf, Min time (s): %lf", 1.0E-06 * bytes/mintime, avgtime, mintime); printf(", Max time (s): %lf/n", maxtime); } else error = 1; } bail_out(error); MPI_Finalize();}
开发者ID:molguin-qc,项目名称:ParResKernels,代码行数:101,
示例12: mainint main( int argc, char *argv[] ){ unsigned iter; FILE *infile, *resfile; char *resfilename; // algorithmic parameters algoparam_t param; int np; double runtime, flop; double residual=0.0; // check arguments if( argc < 2 ) { usage( argv[0] ); return 1; } // check input file if( !(infile=fopen(argv[1], "r")) ) { fprintf(stderr, "/nError: Cannot open /"%s/" for reading./n/n", argv[1]); usage(argv[0]); return 1; } // check result file resfilename= (argc>=3) ? argv[2]:"heat.ppm"; if( !(resfile=fopen(resfilename, "w")) ) { fprintf(stderr, "/nError: Cannot open /"%s/" for writing./n/n", resfilename); usage(argv[0]); return 1; } // check input if( !read_input(infile, ¶m) ) { fprintf(stderr, "/nError: Error parsing input file./n/n"); usage(argv[0]); return 1; } print_params(¶m); if( !initialize(¶m) ) { fprintf(stderr, "Error in Solver initialization./n/n"); usage(argv[0]); return 1; } // full size (param.resolution are only the inner points) np = param.resolution + 2; #if _EXTRAE_ Extrae_init();#endif // starting time runtime = wtime(); iter = 0; while(1) { switch( param.algorithm ) { case 0: // JACOBI residual = relax_jacobi(param.u, param.uhelp, np, np); // Copy uhelp into u copy_mat(param.uhelp, param.u, np, np); break; case 1: // GAUSS residual = relax_gauss(param.u, np, np); break; } iter++; // solution good enough ? if (residual < 0.00005) break; // max. iteration reached ? (no limit with maxiter=0) if (param.maxiter>0 && iter>=param.maxiter) break; } // Flop count after iter iterations flop = iter * 11.0 * param.resolution * param.resolution; // stopping time runtime = wtime() - runtime;#if _EXTRAE_ Extrae_fini();#endif fprintf(stdout, "Time: %04.3f /n", runtime);//.........这里部分代码省略.........
开发者ID:AlbertSuarez,项目名称:PAR-Labs,代码行数:101,
示例13: mainint main(int argc, char* argv[]){ double t1, t2, t3, t4, t5; double sum1, sum2, sum3, sum4; int arg = 1, len = 0, iters = 0, verb = 0, run = 1; int do_vcopy = 1, do_vadd = 1, do_vjacobi = 1; while(argc>arg) { if (strcmp(argv[arg],"-v")==0) verb++; else if (strcmp(argv[arg],"-vv")==0) verb+=2; else if (strcmp(argv[arg],"-n")==0) run = 0; else if (strcmp(argv[arg],"-c")==0) do_vadd = 0, do_vjacobi = 0; else if (strcmp(argv[arg],"-a")==0) do_vcopy = 0, do_vjacobi = 0; else if (strcmp(argv[arg],"-j")==0) do_vcopy = 0, do_vadd = 0; else break; arg++; } if (argc>arg) { len = atoi(argv[arg]); arg++; } if (argc>arg) { iters = atoi(argv[arg]); arg++; } if (len == 0) len = 10000; if (iters == 0) iters = 20; len = len * 1000; printf("Alloc/init 3 double arrays of length %d .../n", len); double* a = (double*) malloc(len * sizeof(double)); double* b = (double*) malloc(len * sizeof(double)); double* c = (double*) malloc(len * sizeof(double)); for(int i = 0; i<len; i++) { a[i] = 1.0; b[i] = (double) (i % 20); c[i] = 3.0; } // Generate vectorized variants & run against naive/original#if __AVX__ bool do32 = true;#else bool do32 = false;#endif // vcopy if (do_vcopy) { vcopy_t vcopy16, vcopy32; Rewriter* rc16 = dbrew_new(); if (verb>1) dbrew_verbose(rc16, true, true, true); dbrew_set_function(rc16, (uint64_t) vcopy); dbrew_config_parcount(rc16, 3); dbrew_config_force_unknown(rc16, 0); dbrew_set_vectorsize(rc16, 16); vcopy16 = (vcopy_t) dbrew_rewrite(rc16, a, b, len); if (verb) decode_func(rc16, "vcopy16"); if (do32) { Rewriter* rc32 = dbrew_new(); if (verb>1) dbrew_verbose(rc32, true, true, true); dbrew_set_function(rc32, (uint64_t) vcopy); dbrew_config_parcount(rc32, 3); dbrew_config_force_unknown(rc32, 0); dbrew_set_vectorsize(rc32, 32); vcopy32 = (vcopy_t) dbrew_rewrite(rc32, a, b, len); if (verb) decode_func(rc32, "vcopy32"); } printf("Running %d iterations of vcopy .../n", iters); t1 = wtime(); for(int iter = 0; iter < iters; iter++) naive_vcopy(a, b, len); t2 = wtime(); for(int iter = 0; iter < iters; iter++) vcopy(a, b, len); t3 = wtime(); if (run) for(int iter = 0; iter < iters; iter++) vcopy16(a, b, len); t4 = wtime(); if (do32 && run) for(int iter = 0; iter < iters; iter++) vcopy32(a, b, len); t5 = wtime(); printf(" naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s", t2-t1, t3-t2, t4-t3); if (do32) printf(", rewritten-32: %.3f s", t5-t4); printf("/n"); } // vadd if (do_vadd) { vadd_t vadd16, vadd32; Rewriter* ra16 = dbrew_new(); if (verb>1) dbrew_verbose(ra16, true, true, true); dbrew_set_function(ra16, (uint64_t) vadd); dbrew_config_parcount(ra16, 4); dbrew_config_force_unknown(ra16, 0);//.........这里部分代码省略.........
开发者ID:lrr-tum,项目名称:dbrew,代码行数:101,
示例14: main//.........这里部分代码省略......... bail_out(error); } /* Fill the original column matrix */ istart = 0; int chunk_size = Block_order/group_size; if (tiling) { for (j=shm_ID*chunk_size;j<(shm_ID+1)*chunk_size;j+=Tile_order) { for (i=0;i<order; i+=Tile_order) for (jt=j; jt<MIN((shm_ID+1)*chunk_size,j+Tile_order); jt++) for (it=i; it<MIN(order,i+Tile_order); it++) { A(it,jt) = (double) ((double)order*(jt+colstart) + it); B(it,jt) = -1.0; } } } else { for (j=shm_ID*chunk_size;j<(shm_ID+1)*chunk_size;j++) for (i=0;i<order; i++) { A(i,j) = (double)((double)order*(j+colstart) + i); B(i,j) = -1.0; } } /* NEED A STORE FENCE HERE */ MPI_Win_sync(shm_win_A); MPI_Win_sync(shm_win_B); MPI_Barrier(shm_comm); for (iter=0; iter<=iterations; iter++) { /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_trans_time = wtime(); } /* do the local transpose */ istart = colstart; if (!tiling) { for (i=shm_ID*chunk_size; i<(shm_ID+1)*chunk_size; i++) { for (j=0; j<Block_order; j++) B(j,i) = A(i,j); } } else { for (i=shm_ID*chunk_size; i<(shm_ID+1)*chunk_size; i+=Tile_order) { for (j=0; j<Block_order; j+=Tile_order) for (it=i; it<MIN(Block_order,i+Tile_order); it++) for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) { B(jt,it) = A(it,jt); } } } for (phase=1; phase<Num_groups; phase++){ recv_from = ((group_ID + phase )%Num_groups); send_to = ((group_ID - phase + Num_groups)%Num_groups); istart = send_to*Block_order; if (!tiling) { for (i=shm_ID*chunk_size; i<(shm_ID+1)*chunk_size; i++) for (j=0; j<Block_order; j++){ Work_out(j,i) = A(i,j); } } else {
开发者ID:nchaimov,项目名称:ParResKernels,代码行数:67,
示例15: multiply_by_blas/*--------------------------------------------------------------------------- * * Compute matrix product using BLAS routine DGEMM. * * Input * int argc - length of argv[] array * char* argv[] - pointer to command line parameter array * int verbosity - program verification: verbosity > 0 gives more output * * Output * double - elapsed time for product computation */double multiply_by_blas( int argc, char* argv[], int verbosity ){ int rows, cols, mids; double **a, **b, **c; double t1, t2; double sec; double gflop_count; /* * process command line arguments */ rows = atoi( argv[0] ); mids = atoi( argv[1] ); cols = atoi( argv[2] ); gflop_count = 2.0 * rows * mids * cols / 1.0e9; if ( verbosity > 0 ) { printf( "BLAS: rows = %d, mids = %d, columns = %d/n", rows, mids, cols ); } /* * allocate and initialize matrices */ a = (double**) allocateMatrix( rows, mids ); b = (double**) allocateMatrix( mids, cols ); c = (double**) allocateMatrix( rows, cols ); initialize_matrices( a, b, c, rows, cols, mids, verbosity ); /* * compute product: There is an implicit matrix transpose when * passing from Fortran to C and vice-versa. To compute C := * alpha * A * B + beta * C we use dgemm() to compute C' := alpha * * B' * A' + beta * C'. The first two arguments to dgemm() are * 'N' indicating we don't want a transpose in addition to the * implicit one. The matrices A and B are passed in reverse order * so dgemm() receives (after the implicit transpose) B' and A'. * Arguments 3 and 4 are the dimensions of C' and argument 5 is * the column dimension of B' (and the row dimension of A'). */ t1 = wtime(); dgemm( 'N', 'N', cols, rows, mids, 1.0, &b[0][0], cols, &a[0][0], mids, 0.0, &c[0][0], cols ); t2 = wtime(); sec = t2 - t1; if ( verbosity > 1 ) printf( "checksum = %f/n", checksum( c, rows, cols ) ); printf( "BLAS: %6.3f secs %6.3f gflops ( %5d x %5d x %5d )/n", sec, gflop_count / sec, rows, mids, cols ); /* * clean up */ deallocateMatrix( a ); deallocateMatrix( b ); deallocateMatrix( c ); return t2 - t1;}
开发者ID:gordon-cs,项目名称:cps343-hoe,代码行数:74,
示例16: mainint main(int argc, char ** argv){ int vector_length; /* length of vectors to be aggregated */ int total_length; /* bytes needed to store reduction vectors */ double reduce_time, /* timing parameters */ avgtime = 0.0, maxtime = 0.0, mintime = 366.0*24.0*3600.0; /* set the minimum time to a large value; one leap year should be enough */ double epsilon=1.e-8; /* error tolerance */ int i, iter; /* dummies */ double element_value; /* reference element value for final vector */ int iterations; /* number of times the reduction is carried out */ static double /* use static so it goes on the heap, not stack */ RESTRICT vector[MEMWORDS];/* we would like to allocate "vector" dynamically, but need to be able to flush the thing in some versions of the reduction algorithm -> static *//******************************************************************************* process and test input parameters ******************************************************************************/ if (argc != 3){ printf("Usage: %s <# iterations> <vector length>/n", *argv); return(EXIT_FAILURE); } iterations = atoi(*++argv); if (iterations < 1){ printf("ERROR: Iterations must be positive : %d /n", iterations); exit(EXIT_FAILURE); } vector_length = atoi(*++argv); if (vector_length < 1){ printf("ERROR: vector length must be >= 1 : %d /n",vector_length); exit(EXIT_FAILURE); } /* make sure we stay within the memory allocated for vector */ total_length = 2*vector_length; if (total_length/2 != vector_length || total_length > MEMWORDS) { printf("Vector length of %d too large; ", vector_length); printf("increase MEMWORDS in Makefile or reduce vector length/n"); exit(EXIT_FAILURE); } printf("Serial Vector Reduction/n"); printf("Vector length = %d/n", vector_length); printf("Number of iterations = %d/n", iterations); for (iter=0; iter<iterations; iter++) { /* initialize the arrays, assuming first-touch memory placement */ for (i=0; i<vector_length; i++) { VEC0(i) = (double)(1); VEC1(i) = (double)(2); } reduce_time = wtime(); /* do actual reduction */ /* first do the "local" part, which is the same for all algorithms */ for (i=0; i<vector_length; i++) { VEC0(i) += VEC1(i); } reduce_time = wtime() - reduce_time;#ifdef VERBOSE printf("/nFinished with reduction, using %lf seconds /n", reduce_time);#endif if (iter>0 || iterations==1) { /* skip the first iteration */ avgtime = avgtime + reduce_time; mintime = MIN(mintime, reduce_time); maxtime = MAX(maxtime, reduce_time); } } /* end of iter loop */ /* verify correctness */ element_value = (2.0+1.0); for (i=0; i<vector_length; i++) { if (ABS(VEC0(i) - element_value) >= epsilon) { printf("First error at i=%d; value: %lf; reference value: %lf/n", i, VEC0(i), element_value); exit(EXIT_FAILURE); } } printf("Solution validates/n");#ifdef VERBOSE printf("Element verification value: %lf/n", element_value);#endif avgtime = avgtime/(double)(MAX(iterations-1,1)); printf("Rate (MFlops/s): %lf, Avg time (s): %lf, Min time (s): %lf", 1.0E-06 * (2.0-1.0)*vector_length/mintime, avgtime, mintime); printf(", Max time (s): %lf/n", maxtime); exit(EXIT_SUCCESS);}
开发者ID:jbreitbart,项目名称:Kernels,代码行数:100,
示例17: xmp_sync_memory if(me == 1){ local_buf[0:size] = target_buf[0:size]:[target]; xmp_sync_memory(NULL);#ifdef DEBUG if(local_buf[0] != '2' && local_buf[size-1] != '2') fprintf(stderr, "Error !/n"); local_buf[0] = '1'; local_buf[size-1] = '1';#endif xmp_sync_all(NULL); } else{ xmp_sync_all(NULL); local_buf[0:size] = target_buf[0:size]:[target];#ifdef DEBUG if(local_buf[0] != '1' && local_buf[size-1] != '1') fprintf(stderr, "Error !/n"); local_buf[0] = '2'; local_buf[size-1] = '2';#endif } xmp_sync_all(NULL); } xmp_sync_all(NULL); t = wtime() - t; if(me == 1) print_results(size, t); } return 0;}
开发者ID:mnakao,项目名称:pingpong,代码行数:29,
示例18: wtimestatic doublewtime(){ static struct timeval tv0 = {.tv_sec = 0}; struct timeval tv; int cc; cc = gettimeofday(&tv, 0); assert(cc == 0); if (tv0.tv_sec == 0) { tv0 = tv; assert(tv0.tv_sec != 0); } double dt = ((double)(tv.tv_sec - tv0.tv_sec) + ((double)(tv.tv_usec - tv0.tv_usec) * 1e-6)); return dt;}/* Puts 200 key-value pairs to output KVO. It is a map-function. It runs only on rank0. Inputs (KV0 and KVS0) are dummy. */static intaddkeysfn(const struct kmr_kv_box kv0, const KMR_KVS *kvs0, KMR_KVS *kvo, void *p, const long ind){ assert(kvs0 == 0 && kv0.klen == 0 && kv0.vlen == 0 && kvo != 0); char k[80]; char v[80]; int cc; for (int i = 0; i < 200; i++) { snprintf(k, 80, "key%d", i); snprintf(v, 80, "value%d", i); struct kmr_kv_box kv = { .klen = (int)(strlen(k) + 1), .vlen = (int)(strlen(v) + 1), .k.p = k, .v.p = v }; cc = kmr_add_kv(kvo, kv); assert(cc == MPI_SUCCESS); } return MPI_SUCCESS;}static intreplacevaluefn(const struct kmr_kv_box kv0, const KMR_KVS *kvs0, KMR_KVS *kvo, void *p, const long i){ assert(kvs0 != 0 && kvo != 0); int cc, x; char gomi; cc = sscanf((&((char *)kv0.k.p)[3]), "%d%c", &x, &gomi); assert(cc == 1); char v[80]; snprintf(v, 10, "newvalue%d", x); struct kmr_kv_box kv = {.klen = kv0.klen, .vlen = (int)(strlen(v) + 1), .k.p = kv0.k.p, .v.p = v }; cc = kmr_add_kv(kvo, kv); assert(cc == MPI_SUCCESS); return MPI_SUCCESS;}static intemptyreducefn(const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p){ return MPI_SUCCESS;}/* Do KMR operations many times. */static voidsimple0(int nprocs, int rank){ int cc; KMR *mr = kmr_create_context(MPI_COMM_WORLD, MPI_INFO_NULL, 0); double t0, t1; t0 = wtime(); for (int i = 0; i < 10000; i++) { /* Check timeout. */ t1 = wtime(); KMR_KVS *to0 = kmr_create_kvs(mr, KMR_KV_INTEGER, KMR_KV_INTEGER); if (rank == 0) { struct kmr_kv_box kv = { .klen = (int)sizeof(long), .vlen = (int)sizeof(long), .k.i = 0, .v.i = ((t1 - t0) > 20.0) }; cc = kmr_add_kv(to0, kv); assert(cc == MPI_SUCCESS); }//.........这里部分代码省略.........
开发者ID:hisashiyashiro,项目名称:kmr,代码行数:101,
示例19: main//.........这里部分代码省略......... } #pragma omp parallel private(i, old_size, group_size, my_ID, iter, start, end, / segment_size, stage, id, my_donor, my_segment) { my_ID = omp_get_thread_num(); #pragma omp master { nthread = omp_get_num_threads(); if (nthread != nthread_input) { num_error = 1; printf("ERROR: number of requested threads %d does not equal ", nthread_input); printf("number of spawned threads %d/n", nthread); } else { printf("Number of threads = %d/n",nthread_input); printf("Vector length = %ld/n", vector_length); printf("Reduction algorithm = %s/n", algorithm); printf("Number of iterations = %d/n", iterations); } } bail_out(num_error); for (iter=0; iter<=iterations; iter++) { /* start timer after a warmup iteration */ if (iter == 1) { #pragma omp barrier #pragma omp master { reduce_time = wtime(); } } /* in case of the long-optimal algorithm we need a barrier before the reinitialization to make sure that we don't overwrite parts of the vector before other threads are done with those parts */ if (intalgorithm == LONG_OPTIMAL) { #pragma omp barrier } /* initialize the arrays, assuming first-touch memory placement */ for (i=0; i<vector_length; i++) { VEC0(my_ID,i) = (double)(my_ID+1); VEC1(my_ID,i) = (double)(my_ID+1+nthread); } if (intalgorithm == BINARY_P2P) { /* we need a barrier before setting all flags to zero, to avoid zeroing some that are still in use in a previous iteration */ #pragma omp barrier flag(my_ID) = 0; /* we also need a barrier after setting the flags, to make each is visible to all threads, and to synchronize before the timer starts */ #pragma omp barrier } /* do actual reduction */ /* first do the "local" part, which is the same for all algorithms */ for (i=0; i<vector_length; i++) { VEC0(my_ID,i) += VEC1(my_ID,i);
开发者ID:davidozog,项目名称:Kernels,代码行数:67,
示例20: main//.........这里部分代码省略......... /* intialize the input and output arrays */ #pragma omp parallel for private (i) for (j=jstart; j<=jend; j++) for (i=istart; i<=iend; i++) { IN(i,j) = COEFX*i+COEFY*j; OUT(i,j) = (DTYPE)0.0; } /* allocate communication buffers for halo values */ top_buf_out = (DTYPE *) prk_malloc(4*sizeof(DTYPE)*RADIUS*width); if (!top_buf_out) { printf("ERROR: Rank %d could not allocated comm buffers for y-direction/n", my_ID); error = 1; } bail_out(error); top_buf_in = top_buf_out + RADIUS*width; bottom_buf_out = top_buf_out + 2*RADIUS*width; bottom_buf_in = top_buf_out + 3*RADIUS*width; right_buf_out = (DTYPE *) prk_malloc(4*sizeof(DTYPE)*RADIUS*height); if (!right_buf_out) { printf("ERROR: Rank %d could not allocated comm buffers for x-direction/n", my_ID); error = 1; } bail_out(error); right_buf_in = right_buf_out + RADIUS*height; left_buf_out = right_buf_out + 2*RADIUS*height; left_buf_in = right_buf_out + 3*RADIUS*height; for (iter = 0; iter<=iterations; iter++){ /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_stencil_time = wtime(); } /* need to fetch ghost point data from neighbors in y-direction */ if (my_IDy < Num_procsy-1) { MPI_Irecv(top_buf_in, RADIUS*width, MPI_DTYPE, top_nbr, 101, MPI_COMM_WORLD, &(request[1])); for (kk=0,j=jend-RADIUS+1; j<=jend; j++) for (i=istart; i<=iend; i++) { top_buf_out[kk++]= IN(i,j); } MPI_Isend(top_buf_out, RADIUS*width,MPI_DTYPE, top_nbr, 99, MPI_COMM_WORLD, &(request[0])); } if (my_IDy > 0) { MPI_Irecv(bottom_buf_in,RADIUS*width, MPI_DTYPE, bottom_nbr, 99, MPI_COMM_WORLD, &(request[3])); for (kk=0,j=jstart; j<=jstart+RADIUS-1; j++) for (i=istart; i<=iend; i++) { bottom_buf_out[kk++]= IN(i,j); } MPI_Isend(bottom_buf_out, RADIUS*width,MPI_DTYPE, bottom_nbr, 101, MPI_COMM_WORLD, &(request[2])); } if (my_IDy < Num_procsy-1) { MPI_Wait(&(request[0]), MPI_STATUS_IGNORE); MPI_Wait(&(request[1]), MPI_STATUS_IGNORE); for (kk=0,j=jend+1; j<=jend+RADIUS; j++) for (i=istart; i<=iend; i++) { IN(i,j) = top_buf_in[kk++]; } } if (my_IDy > 0) { MPI_Wait(&(request[2]), MPI_STATUS_IGNORE); MPI_Wait(&(request[3]), MPI_STATUS_IGNORE); for (kk=0,j=jstart-RADIUS; j<=jstart-1; j++) for (i=istart; i<=iend; i++) {
开发者ID:elliottslaughter,项目名称:Kernels,代码行数:67,
示例21: multiply_by_tiles/*--------------------------------------------------------------------------- * * Compute matrix product using tiling. The loop order used for the tile * products is specified in string variable "mode". * * Input * int argc - length of argv[] array * char* argv[] - pointer to command line parameter array * int verbosity - program verification: verbosity > 0 gives more output * char* order - string indicating loop order, e.g., "ijk" or "jki" * * Output * double - elapsed time for product computation */double multiply_by_tiles( int argc, char* argv[], int verbosity, char* order ){ int rows, cols, mids; int rows_per_tile, cols_per_tile, mids_per_tile; int row_start, row_end; int col_start, col_end; int mid_start, mid_end; double **a, **b, **c; double t1, t2; double sec; double gflop_count; /* * process command line arguments */ rows = atoi( argv[0] ); mids = atoi( argv[1] ); cols = atoi( argv[2] ); rows_per_tile = atoi( argv[3] ); mids_per_tile = atoi( argv[4] ); cols_per_tile = atoi( argv[5] ); gflop_count = 2.0 * rows * mids * cols / 1.0e9; if ( verbosity > 0 ) { printf( "Tiles(%3s): rows = %d, mids = %d, columns = %d/n", order, rows, mids, cols ); printf( "block rows = %d, mids = %d, columns = %d/n", rows_per_tile, mids_per_tile, cols_per_tile ); } /* * allocate and initialize matrices */ a = (double**) allocateMatrix( rows, mids ); b = (double**) allocateMatrix( mids, cols ); c = (double**) allocateMatrix( rows, cols ); initialize_matrices( a, b, c, rows, cols, mids, verbosity ); /* * compute product */ t1 = wtime(); for ( row_start = 0; row_start < rows; row_start += rows_per_tile ) { row_end = row_start + rows_per_tile - 1; if ( row_end >= rows ) row_end = rows - 1; for ( col_start = 0; col_start < cols; col_start += cols_per_tile ) { col_end = col_start + cols_per_tile - 1; if ( col_end >= cols ) col_end = cols - 1; for ( mid_start = 0; mid_start < mids; mid_start += mids_per_tile ) { mid_end = mid_start + mids_per_tile - 1; if ( mid_end >= mids ) mid_end = mids - 1; do_product( a, b, c, row_start, row_end, col_start, col_end, mid_start, mid_end ); } } } t2 = wtime(); sec = t2 - t1; if ( verbosity > 1 ) printf( "checksum = %f/n", checksum( c, rows, cols ) ); printf( "tiles(%3s): %6.3f secs %6.3f gflops ", order, sec, gflop_count / sec ); printf( "( %5d x %5d x %5d ) ( %4d x %4d x %4d )/n", rows, mids, cols, rows_per_tile, mids_per_tile, cols_per_tile ); /* * clean up */ deallocateMatrix( a ); deallocateMatrix( b ); deallocateMatrix( c ); return t2 - t1;}
开发者ID:gordon-cs,项目名称:cps343-hoe,代码行数:95,
示例22: main//.........这里部分代码省略......... } bail_out(error); right_buf_in = right_buf_out + RADIUS*height_rank; left_buf_out = right_buf_out + 2*RADIUS*height_rank; left_buf_in = right_buf_out + 3*RADIUS*height_rank; /* fill the stencil weights to reflect a discrete divergence operator */ for (int jj=-RADIUS; jj<=RADIUS; jj++) for (int ii=-RADIUS; ii<=RADIUS; ii++) WEIGHT(ii,jj) = (DTYPE) 0.0; stencil_size = 4*RADIUS+1; for (int ii=1; ii<=RADIUS; ii++) { WEIGHT(0, ii) = WEIGHT( ii,0) = (DTYPE) (1.0/(2.0*ii*RADIUS)); WEIGHT(0,-ii) = WEIGHT(-ii,0) = -(DTYPE) (1.0/(2.0*ii*RADIUS)); } norm = (DTYPE) 0.0; f_active_points = (DTYPE) (n-2*RADIUS)*(DTYPE) (n-2*RADIUS); /* intialize the input and output arrays */ for (int j=jstart_rank; j<=jend_rank; j++) for (int i=istart_rank; i<=iend_rank; i++) { IN(i,j) = COEFX*i+COEFY*j; OUT(i,j) = (DTYPE)0.0; } /* LOAD/STORE FENCE */ MPI_Win_sync(shm_win_in); MPI_Win_sync(shm_win_out); MPI_Barrier(shm_comm); for (iter = 0; iter<=iterations; iter++){ /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_stencil_time = wtime(); } /* need to fetch ghost point data from neighbors in y-direction */ if (top_nbr != -1) { MPI_Irecv(top_buf_in, RADIUS*width_rank, MPI_DTYPE, top_nbr, 101, MPI_COMM_WORLD, &(request[1])); for (int kk=0,j=jend_rank-RADIUS+1; j<=jend_rank; j++) for (int i=istart_rank; i<=iend_rank; i++) { top_buf_out[kk++]= IN(i,j); } MPI_Isend(top_buf_out, RADIUS*width_rank,MPI_DTYPE, top_nbr, 99, MPI_COMM_WORLD, &(request[0])); } if (bottom_nbr != -1) { MPI_Irecv(bottom_buf_in,RADIUS*width_rank, MPI_DTYPE, bottom_nbr, 99, MPI_COMM_WORLD, &(request[3])); for (int kk=0,j=jstart_rank; j<=jstart_rank+RADIUS-1; j++) for (int i=istart_rank; i<=iend_rank; i++) { bottom_buf_out[kk++]= IN(i,j); } MPI_Isend(bottom_buf_out, RADIUS*width_rank,MPI_DTYPE, bottom_nbr, 101, MPI_COMM_WORLD, &(request[2])); } if (top_nbr != -1) { MPI_Wait(&(request[0]), MPI_STATUS_IGNORE); MPI_Wait(&(request[1]), MPI_STATUS_IGNORE); for (int kk=0,j=jend_rank+1; j<=jend_rank+RADIUS; j++) for (int i=istart_rank; i<=iend_rank; i++) { IN(i,j) = top_buf_in[kk++]; }
开发者ID:elliottslaughter,项目名称:Kernels,代码行数:67,
示例23: main//.........这里部分代码省略......... #pragma omp parallel { #pragma omp master { nthread = omp_get_num_threads(); if (nthread != nthread_input) { num_error = 1; printf("ERROR: number of requested threads %d does not equal ", nthread_input); printf("number of spawned threads %d/n", nthread); } else { printf("Number of threads = %d/n",nthread_input); printf("Grid size = %lld/n", L); printf("Number of particles requested = %lld/n", n); printf("Number of time steps = %lld/n", iterations); printf("Initialization mode = %s/n", init_mode); switch(particle_mode) { case GEOMETRIC: printf(" Attenuation factor = %lf/n", rho); break; case SINUSOIDAL: break; case LINEAR: printf(" Negative slope = %lf/n", alpha); printf(" Offset = %lf/n", beta); break; case PATCH: printf(" Bounding box = %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "/n", init_patch.left, init_patch.right, init_patch.bottom, init_patch.top); break; default: printf("ERROR: Unsupported particle initializating mode/n"); exit(FAILURE); } printf("Particle charge semi-increment = %"PRIu64"/n", k); printf("Vertical velocity = %"PRIu64"/n", m); /* Initialize grid of charges and particles */ Qgrid = initializeGrid(L); LCG_init(&dice); switch(particle_mode) { case GEOMETRIC: particles = initializeGeometric(n, L, rho, k, m, &n, &dice); break; case SINUSOIDAL: particles = initializeSinusoidal(n, L, k, m, &n, &dice); break; case LINEAR: particles = initializeLinear(n, L, alpha, beta, k, m, &n, &dice); break; case PATCH: particles = initializePatch(n, L, init_patch, k, m, &n, &dice); break; default: printf("ERROR: Unsupported particle distribution/n"); exit(FAILURE); } printf("Number of particles placed = %lld/n", n); } } bail_out(num_error); } for (iter=0; iter<=iterations; iter++) { /* start the timer after one warm-up time step */ if (iter==1) { pic_time = wtime(); } /* Calculate forces on particles and update positions */ #pragma omp parallel for private(i, p, fx, fy, ax, ay) for (i=0; i<n; i++) { p = particles; fx = 0.0; fy = 0.0; computeTotalForce(p[i], L, Qgrid, &fx, &fy); ax = fx * MASS_INV; ay = fy * MASS_INV; /* Update particle positions, taking into account periodic boundaries */ p[i].x = fmod(p[i].x + p[i].v_x*DT + 0.5*ax*DT*DT + L, L); p[i].y = fmod(p[i].y + p[i].v_y*DT + 0.5*ay*DT*DT + L, L); /* Update velocities */ p[i].v_x += ax * DT; p[i].v_y += ay * DT; } } pic_time = wtime() - pic_time; /* Run the verification test */ for (i=0; i<n; i++) { correctness *= verifyParticle(particles[i], iterations, Qgrid, L); } if (correctness) { printf("Solution validates/n");#ifdef VERBOSE printf("Simulation time is %lf seconds/n", pic_time);#endif avg_time = n*iterations/pic_time; printf("Rate (Mparticles_moved/s): %lf/n", 1.0e-6*avg_time); } else { printf("Solution does not validate/n"); } return(EXIT_SUCCESS);}
开发者ID:ParRes,项目名称:Kernels,代码行数:101,
示例24: main//.........这里部分代码省略......... in_arrays[MYTHREAD] = in_array; out_arrays[MYTHREAD] = out_array; buf_arrays[MYTHREAD] = buf_array; double **in_array_private = shared_2d_array_to_private(in_array, sizex, sizey, myoffsetx, myoffsety); double **out_array_private = shared_2d_array_to_private(out_array, sizex, sizey, myoffsetx, myoffsety); double **buf_array_private = shared_2d_array_to_private(buf_array, sizex, sizey, myoffsetx, myoffsety); upc_barrier; /********************************************************************* ** Initialize the matrices *********************************************************************/ for(int y=myoffsety; y<myoffsety + sizey; y++){ for(int x=myoffsetx; x<myoffsetx + sizex; x++){ in_array_private[y][x] = (double) (x+N*y); out_array[y][x] = -1.0; } } upc_barrier; for(int y=myoffsety; y<myoffsety + sizey; y++){ for(int x=myoffsetx; x<myoffsetx + sizex; x++){ if(in_array_private[y][x] !=(double) (x+N*y)) die("x=%d y=%d in_array=%f != %f", x, y, in_array[y][x], (x+N*y)); if(out_array_private[y][x] != -1.0) die("out_array_private error"); } } /********************************************************************* ** Transpose *********************************************************************/ int transfer_size = sizex * sizex * sizeof(double); if(MYTHREAD == 0) debug("transfer size = %d", transfer_size); for(int iter=0; iter<=num_iterations; iter++){ /* start timer after a warmup iteration */ if(iter == 1){ upc_barrier; start_time = wtime(); } for(int i=0; i<THREADS; i++){ int local_blk_id = (MYTHREAD + i) % THREADS; int remote_blk_id = MYTHREAD; int remote_thread = local_blk_id; upc_memget(&buf_array_private[local_blk_id * sizex][myoffsetx], &in_arrays[remote_thread][remote_blk_id * sizex][remote_thread * sizex], transfer_size);#define OUT_ARRAY(x,y) out_array_private[local_blk_id * sizex + x][myoffsetx + y]#define BUF_ARRAY(x,y) buf_array_private[local_blk_id * sizex + x][myoffsetx + y] if(!tiling){ for(int x=0; x<sizex; x++){ for(int y=0; y<sizex; y++){ OUT_ARRAY(x,y) = BUF_ARRAY(y,x); } } } else{ for(int x=0; x<sizex; x+=tile_size){ for(int y=0; y<sizex; y+=tile_size){ for(int bx=x; bx<MIN(sizex, x+tile_size); bx++){ for(int by=y; by<MIN(sizex, y+tile_size); by++){ OUT_ARRAY(bx,by) = BUF_ARRAY(by,bx); } } } } } } upc_barrier; } upc_barrier; end_time = wtime(); /********************************************************************* ** Analyze and output results. *********************************************************************/ for(int y=myoffsety; y<myoffsety + sizey; y++){ for(int x=myoffsetx; x<myoffsetx + sizex; x++){ if(in_array_private[y][x] != (double)(x+ N*y)) die("Error in input: x=%d y=%d", x, y); if(out_array_private[y][x] != (double)(y + N*x)) die("x=%d y=%d in_array=%f != %f %d %d", x, y, out_array[y][x], (double)(y + N*x), (int)(out_array[y][x]) % N, (int)(out_array[y][x]) / N); } } if(MYTHREAD == 0){ printf("Solution validates/n"); double transfer_size = 2 * N * N * sizeof(double); avgtime = (end_time - start_time) / num_iterations; double rate = transfer_size / avgtime * 1.0E-06; printf("Rate (MB/s): %lf Avg time (s): %lf/n",rate, avgtime); }}
开发者ID:beginZero,项目名称:Kernels,代码行数:101,
示例25: read_input/*********************************************************************** * Read the input file. ***********************************************************************/int read_input ( FILE *fp_in, FILE *fp_out, input_data *input_vars, para_data *para_vars, time_data *time_vars ){/*********************************************************************** * Local variables. ***********************************************************************/ double t1, t2; int ierr = 0; char *error = NULL; char *line = NULL; size_t len = 0; ssize_t read; char *tmpData = NULL; int tmpStrLen, i;/*********************************************************************** * Read the input file. Echo to output file. Call for an input variable * check. Only root reads, echoes, checks input. ***********************************************************************/ t1 = wtime (); if ( IPROC == ROOT ) { if ( !fp_in ) { tmpStrLen = strlen (" ***ERROR: READ_INPUT:" " Problem reading input file./n"); ALLOC_STR ( error, tmpStrLen + 1, &ierr ); snprintf ( error, tmpStrLen + 1, " ***ERROR: READ_INPUT:" " Problem reading input file./n" ); print_error ( fp_out, error, IPROC, ROOT ); FREE ( error ); ierr = 1; } else { while ( (read = getline(&line, &len, fp_in)) != -1 ) { i = 0; while ( isspace(line[i]) ) { i++; } // Parallel processing inputs // npey: number of process elements in y-dir if ( strncmp(&line[i], "npey=", strlen("npey=")) == 0 ) { get_input_value ( &line[i], "npey=", &tmpData ); NPEY = atoi ( tmpData ); } // npez: input number of process elements in z-dir else if ( strncmp(&line[i], "npez=", strlen("npez=")) == 0 ) { get_input_value ( &line[i], "npez=", &tmpData ); NPEZ = atoi ( tmpData ); } // ichunk: else if ( strncmp(&line[i], "ichunk=", strlen("ichunk=")) == 0 ) { get_input_value ( &line[i], "ichunk=", &tmpData ); ICHUNK = atoi ( tmpData ); } // nthreads: input number of threads else if ( strncmp(&line[i], "nthreads=", strlen("nthreads=")) == 0 ) { get_input_value ( &line[i], "nthreads=", &tmpData ); NTHREADS = atoi ( tmpData ); } // nnested: else if ( strncmp(&line[i], "nnested=", strlen("nnested=")) == 0 ) { get_input_value ( &line[i], "nnested=", &tmpData ); NNESTED = atoi ( tmpData ); } // Geometry inputs // ndimen: else if ( strncmp(&line[i], "ndimen=", strlen("ndimen=")) == 0 ) { get_input_value ( &line[i], "ndimen=", &tmpData ); NDIMEN = atoi ( tmpData ); } // nx: else if ( strncmp(&line[i], "nx=", strlen("nx=")) == 0 ) {//.........这里部分代码省略.........
开发者ID:GCZhang,项目名称:SNAP,代码行数:101,
示例26: main//.........这里部分代码省略......... /* get space for local blocks of A, B, C */ a = (double *) malloc( lda*myncols*sizeof(double) ); b = (double *) malloc( lda*myncols*sizeof(double) ); c = (double *) malloc( lda*myncols*sizeof(double) ); if ( a == NULL || b == NULL || c == NULL ) { error = 1; printf("ERROR: Proc %d could not allocate a, b, and/or c/n",my_ID); } bail_out(error); /* get space for two work arrays for dgemm */ work1 = (double *) malloc( nb*lda*sizeof(double) ); work2 = (double *) malloc( nb*myncols*sizeof(double) ); if ( !work1 || !work2 ) { printf("ERROR: Proc %d could not allocate work buffers/n", my_ID); error = 1; } bail_out(error); /* collect array that holds mynrows from all nodes in my row of the rank grid (array of all m_i) */ MPI_Allgather( &mynrows, 1, MPI_INT, mm, 1, MPI_INT, comm_col ); /* myfrow = first row on my node */ for (myfrow=1,i=0; i<myrow; i++) myfrow += mm[i]; mylrow = myfrow+mynrows-1; /* collect array that holds myncols from all nodes in my column of the rank grid (array of all n_j) */ MPI_Allgather( &myncols, 1, MPI_INT, nn, 1, MPI_INT, comm_row ); /* myfcol = first col on my node */ for (myfcol=1,i=0; i<mycol; i++) myfcol += nn[i]; mylcol = myfcol+myncols-1; /* initialize matrices A, B, and C */ ldc = ldb = lda; for (jj=0, j=myfcol; j<=mylcol; j++,jj++ ) for (ii=0, i=myfrow; i<=mylrow; i++, ii++ ) { A(ii,jj) = (double) (j-1); B(ii,jj) = (double) (j-1); C(ii,jj) = 0.0; } for (iter=0; iter<=iterations; iter++) { /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_dgemm_time = wtime(); } /* actual matrix-vector multiply */ dgemm(order, nb, inner_block_flag, a, lda, b, lda, c, lda, mm, nn, comm_row, comm_col, work1, work2 ); } /* end of iterations */ local_dgemm_time = wtime() - local_dgemm_time; MPI_Reduce(&local_dgemm_time, &dgemm_time, 1, MPI_DOUBLE, MPI_MAX, root, MPI_COMM_WORLD); /* verification test */ for (jj=0, j=myfcol; j<=mylcol; j++, jj++) for (ii=0, i=myfrow; i<=mylrow; i++, ii++) checksum_local += C(ii,jj); MPI_Reduce(&checksum_local, &checksum, 1, MPI_DOUBLE, MPI_SUM, root, MPI_COMM_WORLD); forder = (double) order; ref_checksum = (0.25*forder*forder*forder*(forder-1.0)*(forder-1.0)); ref_checksum *= (iterations+1); if (my_ID == root) { if (ABS((checksum - ref_checksum)/ref_checksum) > epsilon) { printf("ERROR: Checksum = %lf, Reference checksum = %lf/n", checksum, ref_checksum); error = 1; } else { printf("Solution validates/n");#ifdef VERBOSE printf("Reference checksum = %lf, checksum = %lf/n", ref_checksum, checksum);#endif } } bail_out(error); /* report elapsed time */ nflops = 2.0*forder*forder*forder; if ( my_ID == root ) { avgtime = dgemm_time/iterations; printf("Rate (MFlops/s): %lf Avg time (s): %lf/n", 1.0E-06 * nflops/avgtime, avgtime); } MPI_Finalize();}
开发者ID:nchaimov,项目名称:ParResKernels,代码行数:101,
示例27: main//.........这里部分代码省略......... } bail_out(error); top_buf_in[1] = top_buf_in[0] + RADIUS*width; bottom_buf_in[0] = top_buf_in[1] + RADIUS*width; bottom_buf_in[1] = bottom_buf_in[0] + RADIUS*width; right_buf_out=(DTYPE*)malloc(2*sizeof(DTYPE)*RADIUS*height); if (!right_buf_out) { printf("ERROR: Rank %d could not allocate output comm buffers for x-direction/n", my_ID); error = 1; } bail_out(error); left_buf_out=right_buf_out+RADIUS*height; right_buf_in[0]=(DTYPE*)prk_shmem_malloc(4*sizeof(DTYPE)*RADIUS*height); if(!right_buf_in) { printf("ERROR: Rank %d could not allocate input comm buffers for x-dimension/n", my_ID); error=1; } bail_out(error); right_buf_in[1] = right_buf_in[0] + RADIUS*height; left_buf_in[0] = right_buf_in[1] + RADIUS*height; left_buf_in[1] = left_buf_in[0] + RADIUS*height; /* make sure all symmetric heaps are allocated before being used */ shmem_barrier_all(); for (iter = 0; iter<=iterations; iter++){ /* start timer after a warmup iteration */ if (iter == 1) { shmem_barrier_all(); local_stencil_time[0] = wtime(); } /* sw determines which incoming buffer to select */ sw = iter%2; /* need to fetch ghost point data from neighbors */ if (my_IDy < Num_procsy-1) { for (kk=0,j=jend-RADIUS; j<=jend-1; j++) for (i=istart; i<=iend; i++) { top_buf_out[kk++]= IN(i,j); } shmem_putmem(bottom_buf_in[sw], top_buf_out, RADIUS*width*sizeof(DTYPE), top_nbr);#if SPLITFENCE shmem_fence(); shmem_int_inc(&iterflag[sw], top_nbr);#endif } if (my_IDy > 0) { for (kk=0,j=jstart; j<=jstart+RADIUS-1; j++) for (i=istart; i<=iend; i++) { bottom_buf_out[kk++]= IN(i,j); } shmem_putmem(top_buf_in[sw], bottom_buf_out, RADIUS*width*sizeof(DTYPE), bottom_nbr);#if SPLITFENCE shmem_fence(); shmem_int_inc(&iterflag[sw], bottom_nbr);#endif } if(my_IDx < Num_procsx-1) { for(kk=0,j=jstart;j<=jend;j++) for(i=iend-RADIUS;i<=iend-1;i++) { right_buf_out[kk++]=IN(i,j); } shmem_putmem(left_buf_in[sw], right_buf_out, RADIUS*height*sizeof(DTYPE), right_nbr);
开发者ID:kempj,项目名称:Kernels,代码行数:67,
示例28: main//.........这里部分代码省略......... printf("Sparsity = %16.10lf/n", sparsity);#ifdef SCRAMBLE printf("Using scrambled indexing/n");#else printf("Using canonical indexing/n");#endif printf("Number of iterations = %16d/n", iterations); } } bail_out(num_error); /* initialize the input and result vectors */ #pragma omp for for (row=0; row<size2; row++) result[row] = vector[row] = 0.0; /* fill matrix with nonzeroes corresponding to difference stencil. We use the scrambling for reordering the points in the grid. */ #pragma omp for private (i,j,r) for (row=0; row<size2; row++) { j = row/size; i=row%size; elm = row*stencil_size; colIndex[elm] = REVERSE(LIN(i,j),lsize2); for (r=1; r<=radius; r++, elm+=4) { colIndex[elm+1] = REVERSE(LIN((i+r)%size,j),lsize2); colIndex[elm+2] = REVERSE(LIN((i-r+size)%size,j),lsize2); colIndex[elm+3] = REVERSE(LIN(i,(j+r)%size),lsize2); colIndex[elm+4] = REVERSE(LIN(i,(j-r+size)%size),lsize2); } // sort colIndex to make sure the compressed row accesses // vector elements in increasing order qsort(&(colIndex[row*stencil_size]), stencil_size, sizeof(s64Int), compare); for (elm=row*stencil_size; elm<(row+1)*stencil_size; elm++) matrix[elm] = 1.0/(double)(colIndex[elm]+1); } for (iter=0; iter<iterations; iter++) { #pragma omp barrier #pragma omp master { sparse_time = wtime(); } /* fill vector */ #pragma omp for for (row=0; row<size2; row++) vector[row] += (double) (row+1); /* do the actual matrix-vector multiplication */ #pragma omp for for (row=0; row<size2; row++) { temp = 0.0; first = stencil_size*row; last = first+stencil_size-1; #pragma simd reduction(+:temp) for (col=first; col<=last; col++) { temp += matrix[col]*vector[colIndex[col]]; } result[row] += temp; } #pragma omp master { sparse_time = wtime() - sparse_time; if (iter>0 || iterations==1) { /* skip the first iteration */ avgtime = avgtime + sparse_time; mintime = MIN(mintime, sparse_time); maxtime = MAX(maxtime, sparse_time); } } } } /* end of parallel region */ /* verification test */ reference_sum = 0.5 * (double) nent * (double) iterations * (double) (iterations +1); vector_sum = 0.0; for (row=0; row<size2; row++) vector_sum += result[row]; if (ABS(vector_sum-reference_sum) > epsilon) { printf("ERROR: Vector sum = %lf, Reference vector sum = %lf/n", vector_sum, reference_sum); exit(EXIT_FAILURE); } else { printf("Solution validates/n");#ifdef VERBOSE printf("Reference sum = %lf, vector sum = %lf/n", reference_sum, vector_sum);#endif } avgtime = avgtime/(double)(MAX(iterations-1,1)); printf("Rate (MFlops/s): %lf, Avg time (s): %lf, Min time (s): %lf", 1.0E-06 * (2.0*nent)/mintime, avgtime, mintime); printf(", Max time (s): %lf/n", maxtime); exit(EXIT_SUCCESS);}
开发者ID:jbreitbart,项目名称:Kernels,代码行数:101,
示例29: main//.........这里部分代码省略......... } else printf("Untiled/n"); } } bail_out(num_error); /* Fill the original matrix, set transpose to known garbage value. */ if (tiling) {#ifdef COLLAPSE #pragma omp for private (i,it,jt) collapse(2)#else #pragma omp for private (i,it,jt)#endif for (j=0; j<order; j+=Tile_order) for (i=0; i<order; i+=Tile_order) for (jt=j; jt<MIN(order,j+Tile_order);jt++) for (it=i; it<MIN(order,i+Tile_order); it++){ A(it,jt) = (double) (order*jt + it); B(it,jt) = 0.0; } } else { #pragma omp for private (i) for (j=0;j<order;j++) for (i=0;i<order; i++) { A(i,j) = (double) (order*j + i); B(i,j) = 0.0; } } for (iter = 0; iter<=iterations; iter++){ /* start timer after a warmup iteration */ if (iter == 1) { #pragma omp barrier #pragma omp master { transpose_time = wtime(); } } /* Transpose the matrix */ if (!tiling) { #pragma omp for private (j) for (i=0;i<order; i++) for (j=0;j<order;j++) { B(j,i) += A(i,j); A(i,j) += 1.0; } } else {#ifdef COLLAPSE #pragma omp for private (j,it,jt) collapse(2)#else #pragma omp for private (j,it,jt)#endif for (i=0; i<order; i+=Tile_order) for (j=0; j<order; j+=Tile_order) for (it=i; it<MIN(order,i+Tile_order); it++) for (jt=j; jt<MIN(order,j+Tile_order);jt++) { B(jt,it) += A(it,jt); A(it,jt) += 1.0; } } } /* end of iter loop */ #pragma omp barrier #pragma omp master { transpose_time = wtime() - transpose_time; } } /* end of OpenMP parallel region */ abserr = test_results (order, B, iterations); /********************************************************************* ** Analyze and output results. *********************************************************************/ if (abserr < epsilon) { printf("Solution validates/n"); avgtime = transpose_time/iterations; printf("Rate (MB/s): %lf Avg time (s): %lf/n", 1.0E-06 * bytes/avgtime, avgtime);#ifdef VERBOSE printf("Squared errors: %f /n", abserr);#endif exit(EXIT_SUCCESS); } else { printf("ERROR: Aggregate squared error %lf exceeds threshold %e/n", abserr, epsilon); exit(EXIT_FAILURE); }} /* end of main */
开发者ID:kempj,项目名称:Kernels,代码行数:101,
示例30: main//.........这里部分代码省略......... else { printf("Number of threads = %d/n", nthread_input); printf("Vector length = %d/n", vector_length); printf("Number of iterations = %d/n", iterations); printf("Branching type = %s/n", branch_type); } } bail_out(num_error); my_ID = omp_get_thread_num(); vector = malloc(vector_length*2*sizeof(int)); if (!vector) { printf("ERROR: Thread %d failed to allocate space for vector/n", my_ID); num_error = 1; } bail_out(num_error); /* grab the second half of vector to store index array */ index = vector + vector_length; /* initialize the array with entries with varying signs; array "index" is only used to obfuscate the compiler (i.e. it won't vectorize a loop containing indirect referencing). It functions as the identity operator. */ for (i=0; i<vector_length; i++) { vector[i] = 3 - (i&7); index[i] = i; } #pragma omp barrier #pragma omp master { branch_time = wtime(); } /* do actual branching */ switch (btype) { case VECTOR_STOP: /* condition vector[index[i]]>0 inhibits vectorization */ for (iter=0; iter<iterations; iter+=2) { #pragma vector always for (i=0; i<vector_length; i++) { aux = -(3 - (i&7)); if (vector[index[i]]>0) vector[i] -= 2*vector[i]; else vector[i] -= 2*aux; } #pragma vector always for (i=0; i<vector_length; i++) { aux = (3 - (i&7)); if (vector[index[i]]>0) vector[i] -= 2*vector[i]; else vector[i] -= 2*aux; } } break; case VECTOR_GO: /* condition aux>0 allows vectorization */ for (iter=0; iter<iterations; iter+=2) { #pragma vector always for (i=0; i<vector_length; i++) { aux = -(3 - (i&7)); if (aux>0) vector[i] -= 2*vector[i]; else vector[i] -= 2*aux;
开发者ID:nchaimov,项目名称:ParResKernels,代码行数:67,
注:本文中的wtime函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ wunlock函数代码示例 C++ wtap_dump_file_write函数代码示例 |