您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ wtime函数代码示例

51自学网 2021-06-03 10:08:30
  C++
这篇教程C++ wtime函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中wtime函数的典型用法代码示例。如果您正苦于以下问题:C++ wtime函数的具体用法?C++ wtime怎么用?C++ wtime使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了wtime函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: main

int main(int argc, char *argv[]){    if (argc < 2) {        printf("Missing size of array!/n");        return EXIT_FAILURE;    }    int size_array = atoi(argv[1]);    int *array = (int *) malloc(size_array * sizeof(uint32_t));    for (int i = 0; i < size_array; i++) {        array[i] = getrand(0, 100000);//      printf ("array[%d]= %d ",i,array[i]);    }    double time = wtime();    for (int i = 0; i < size_array - 1; i++) {        for (int j = 0; j < size_array - i - 1; j++) {            if (array[j] > array[j + 1]) {                int tmp = array[j];                array[j] = array[j + 1];                array[j + 1] = tmp;            }        }    }/*    for (int i = 0; i < size_array; i++) {      printf ("array[%d]= %d ",i,array[i]);    }*/    time = wtime() - time;    FILE *tb;    tb = fopen("bubblesort.dat", "a");    fprintf(tb, "%d %.6f/n", size_array, time);    free(array);    return EXIT_SUCCESS;}
开发者ID:evg-kazartseff,项目名称:DSA,代码行数:40,


示例2: main

int main(int argc, char **argv){  int i, me, target;  unsigned int size;  double t;  MPI_Status status;  MPI_Init(&argc, &argv);  MPI_Comm_rank(MPI_COMM_WORLD, &me);  target = 1 - me;  init_buf(send_buf, me);  init_buf(recv_buf, target);  if(me==0) print_items();  for(size=1;size<MAX_SIZE+1;size*=2){    MPI_Barrier(MPI_COMM_WORLD);    for(i=0;i<LOOP+WARMUP;i++){      if(WARMUP == i)	t = wtime();      if(me == 0){	MPI_Send(send_buf, size, MPI_CHAR, target, 9, MPI_COMM_WORLD);	MPI_Recv(recv_buf, size, MPI_CHAR, target, 5, MPI_COMM_WORLD, &status);      }       else {	MPI_Recv(recv_buf, size, MPI_CHAR, target, 9, MPI_COMM_WORLD, &status);	MPI_Send(send_buf, size, MPI_CHAR, target, 5, MPI_COMM_WORLD);      }     }    MPI_Barrier(MPI_COMM_WORLD);    t = wtime() - t;    if(me == 0)      print_results(size, t);  }  MPI_Finalize();  return 0;}
开发者ID:mnakao,项目名称:pingpong,代码行数:40,


示例3: startrun

//  startrun: startup hierarchical N-body code.//  ___________________________________________// This runs once. local void startrun(void) {  printf("startrun/n");  startrun_time_0 = wtime();  bodyptr p1, p2, p;  stream gravstr;  define_body(sizeof(body), Precision, NDIM);	// setup phat body struct  define_body_offset(PosTag,  BodyOffset(Pos));  define_body_offset(VelTag,  BodyOffset(Vel));  define_body_offset(MassTag, BodyOffset(Mass));  define_body_offset(PhiTag,  BodyOffset(Phi));  define_body_offset(AccTag,  BodyOffset(Acc));  infile = getparam("in");			// set I/O file names  outfile = getparam("out");  savefile = getparam("save");  if (strnull(getparam("restore")))		// starting a new run?    newrun();  else						// else resume old run    oldrun();  if (ABS(nstatic) > nbody)			// check nstatic is OK    error("%s: absurd value for nstatic/n", getargv0());  p1 = bodytab + MAX(nstatic, 0);		// set dynamic body range  p2 = bodytab + nbody + MIN(nstatic, 0);  testcalc = TRUE;				// determine type of calc:  for (p = p1; p < p2; p++)    testcalc = testcalc && (Mass(p) == 0);	// look for dynamic masses  strfile = getparam("stream");  logfile = getparam("log");#if defined(EXTGRAV)  if (! strnull(getparam("gravgsp"))) {		// was GSP file given?    gravstr = stropen(getparam("gravgsp"), "r");    get_history(gravstr);    gravgsp = get_gsprof(gravstr);		// read external field GSP    strclose(gravstr);  }#endif  startrun_time_1 = wtime();}
开发者ID:jasminegrosso,项目名称:zeno,代码行数:42,


示例4: main

int main(int argc, char **argv){   int n;   int repeat;   double dot;   long start_time, end_time;   if ((argc != 3)) {      printf("Uso: %s <tamanho dos vetores> <repeticoes>/n", argv[0]);      exit(EXIT_FAILURE);   }   n = atoi(argv[1]);       // tamanho dos vetores   repeat = atoi(argv[2]);  // numero de repeticoes (variar carga)   // Cria vetores   double *a = (double *) malloc(sizeof(double) * n);   double *b = (double *) malloc(sizeof(double) * n);   if (a == NULL || b == NULL) {      printf("Erro de alocacao de memoria/n");      exit(EXIT_FAILURE);         }   init_vectors(a, b, n);   start_time = wtime();   dot = dot_product(a, b, n, repeat);   end_time = wtime();   printf("Produto escalar = %f/n", dot);   printf("Tempo de calculo = %ld usec/n", (long) (end_time - start_time));   free((void *) a);   free((void *) b);   return EXIT_SUCCESS;}
开发者ID:AndreaInfUFSM,项目名称:elc139-2016a,代码行数:39,


示例5: main

int main(int argc, char **argv)	{    pthread_t thread1;    int ret = -1;    int i = 0;    double time1, time2;    ret = pthread_create(&thread1, NULL, thread1_fn, NULL);    assert(ret == 0);    time1 = wtime();    for(i=0; i<ITERATIONS; i++)    {	wakeywakey();    }    time2 = wtime();    printf("time for %d iterations: %f seconds./n", ITERATIONS, (time2-time1));    printf("per iteration: %f/n", (time2-time1)/(double)ITERATIONS);    return(0);}
开发者ID:Goon83,项目名称:SALB,代码行数:22,


示例6: init_synchronization

void init_synchronization(void){  current_synchronization = form_of_synchronization;  max_counter = First_max_counter;  interval = First_interval;  first_measurement_run = True;  logging(DBG_SYNC, "starting with max_counter = %d interval = %9.1f/n", 	 max_counter, interval*1.0e6);  if( current_synchronization == SYNC_REAL) {    if( ! mpi_wtime_is_global ) determine_time_differences();    if( lrootproc() ) start_batch = wtime();    logging(DBG_SYNC, "---- new start_batch ----------------/n");    MPI_Bcast(&start_batch, 1, MPI_DOUBLE, 0, get_measurement_comm());  }}
开发者ID:jonarbo,项目名称:KUBE,代码行数:17,


示例7: while

depth_t bc::bfs_sssp(	index_t root){	sa[root] = 0;	sp_count[root] = 1;	depth_t level = 0;	dist[root]=0;	while(true)	{		double ltm= wtime();		index_t front_count = 0;		for(vertex_t vert_id = 0; vert_id<g->vert_count; vert_id++)		{			if(sa[vert_id] == level)			{				index_t my_beg = g->beg_pos[vert_id];				index_t my_end = g->beg_pos[vert_id + 1];				for(; my_beg<my_end; my_beg++)				{					vertex_t nebr=g->csr[my_beg];					path_t weit=g->weight[my_beg];					if(dist[nebr]>dist[vert_id]+weit)					{						dist[nebr]=dist[vert_id]+weit;						sp_count[nebr]=0; //prior parent is wrong						sa[nebr]=level+1;						front_count++;					}					if(dist[nebr]==dist[vert_id]+weit)						sp_count[nebr]+=sp_count[vert_id];				}			}		}//		std::cout<<"Level "<<(int) level<<": "<<front_count<<" "//							<<wtime() - ltm<<"/n";		if(front_count == 0) break;		level ++;	}		return level+1;}
开发者ID:ChickenRunjyd,项目名称:cpu_bc,代码行数:46,


示例8: stop_synchronization

double stop_synchronization(void){  stop_batch = stop_sync = wtime();  if( current_synchronization == SYNC_REAL ) {    if( stop_sync - start_sync > interval )      invalid[counter] = INVALID_TOOK_TOO_LONG;      logging(DBG_SYNC, "stop_sync = %9.1f ", normalize_time(stop_sync));    switch( invalid[counter] ) {    case INVALID_TOOK_TOO_LONG: logging(DBG_SYNC, "invalid_too_long/n"); break;    case INVALID_STARTED_LATE:  logging(DBG_SYNC, "invalid_started_late/n"); break;    default:                    logging(DBG_SYNC, "/n");     }  }  return stop_sync;}
开发者ID:jonarbo,项目名称:KUBE,代码行数:19,


示例9: main

int main(){  double t;  int i, me, target;  unsigned int size;  me = xmp_node_num();  target = 3 - me;  init_buf(local_buf, me);  init_buf(target_buf, me);  if(me==1) print_items();  for(size=4;size<MAX_SIZE+1;size*=2){ // size must be more than 4 when using Fujitsu RDMA    xmp_sync_all(NULL);    for(i=0;i<LOOP+WARMUP;i++){      if(WARMUP == i)        t = wtime();      if(me == 1){        local_buf[0:size] = target_buf[0:size]:[target];	xmp_sync_memory(NULL);#ifdef DEBUG	if(local_buf[0] != '2' && local_buf[size-1] != '2') fprintf(stderr, "Error !/n");	local_buf[0] = '1'; local_buf[size-1] = '1';#endif	xmp_sync_all(NULL);      }      else{	xmp_sync_all(NULL);	local_buf[0:size] = target_buf[0:size]:[target];#ifdef DEBUG        if(local_buf[0] != '1' && local_buf[size-1] != '1') fprintf(stderr, "Error !/n");	local_buf[0] = '2'; local_buf[size-1] = '2';#endif      }      xmp_sync_all(NULL);    }
开发者ID:mnakao,项目名称:pingpong,代码行数:39,


示例10: main

int main(){  Init();  double start = wtime();  double start_linked_list = wtime();  RunThoughLinkedList();  double end_linked_list = wtime();  double start_explicit = wtime();  RunExplicit();  double end_explicit = wtime();  double end = wtime();  printf("Time through Linked List %7.2f/n"    "Time through explicit %7.2f/n"    "Total Time taken %7.2f/n",    end_linked_list-start_linked_list,    end_explicit-start_explicit,    end-start  );}
开发者ID:hjmjohnson,项目名称:XEParallelProg,代码行数:20,


示例11: main

//.........这里部分代码省略.........            goto ENDOFTESTS;        }        else length = total_length/Num_procs;        offset       = atol(*++argv);        if (offset < 0) {            printf("ERROR: Invalid array offset: %ld/n", offset);            error = 1;            goto ENDOFTESTS;        }#ifdef STATIC_ALLOCATION        if ((3*length + 2*offset) > N) {            printf("ERROR: vector length/offset %ld/%ld too ", total_length, offset);            printf("large; increase MAXLENGTH in Makefile or decrease vector length/n");            error = 1;            goto ENDOFTESTS;        }#endifENDOFTESTS:        ;    }    bail_out(error);    /* broadcast initialization data */    MPI_Bcast(&length,1, MPI_LONG, root, MPI_COMM_WORLD);    MPI_Bcast(&offset,1, MPI_LONG, root, MPI_COMM_WORLD);    MPI_Bcast(&iterations,1, MPI_INT, root, MPI_COMM_WORLD);#ifndef STATIC_ALLOCATION    space = (3*length + 2*offset)*sizeof(double);    a = (double *) malloc(space);    if (!a && my_ID == root) {        printf("ERROR: Could not allocate %ld bytes for vectors/n", (long int)space);        error = 1;    }    bail_out(error);#endif    b = a + length + offset;    c = b + length + offset;    bytes   = 3.0 * sizeof(double) * length * Num_procs;    if (my_ID == root) {        printf("Number of processes  = %d/n", Num_procs);        printf("Vector length        = %ld/n", total_length);        printf("Offset               = %ld/n", offset);        printf("Number of iterations = %d/n", iterations);    }#pragma vector always    for (j=0; j<length; j++) {        a[j] = 0.0;        b[j] = 2.0;        c[j] = 2.0;    }    /* --- MAIN LOOP --- repeat Triad iterations times --- */    scalar = SCALAR;    for (iter=0; iter<iterations; iter++) {        MPI_Barrier(MPI_COMM_WORLD);        if (my_ID == root) {            nstream_time = wtime();        }#pragma vector always        for (j=0; j<length; j++) a[j] = b[j]+scalar*c[j];        if (my_ID == root) {            if (iter>0 || iterations==1) { /* skip the first iteration */                nstream_time = wtime() - nstream_time;                avgtime = avgtime + nstream_time;                mintime = MIN(mintime, nstream_time);                maxtime = MAX(maxtime, nstream_time);            }        }        /* insert a dependency between iterations to avoid dead-code elimination */#pragma vector always        for (j=0; j<length; j++) b[j] = a[j];    }    /*********************************************************************    ** Analyze and output results.    *********************************************************************/    if (my_ID == root) {        if (checkTRIADresults(iterations, length)) {            avgtime = avgtime/(double)(MAX(iterations-1,1));            printf("Rate (MB/s): %lf, Avg time (s): %lf, Min time (s): %lf",                   1.0E-06 * bytes/mintime, avgtime, mintime);            printf(", Max time (s): %lf/n", maxtime);        }        else error = 1;    }    bail_out(error);    MPI_Finalize();}
开发者ID:molguin-qc,项目名称:ParResKernels,代码行数:101,


示例12: main

int main( int argc, char *argv[] ){    unsigned iter;    FILE *infile, *resfile;    char *resfilename;    // algorithmic parameters    algoparam_t param;    int np;    double runtime, flop;    double residual=0.0;    // check arguments    if( argc < 2 )    {	usage( argv[0] );	return 1;    }    // check input file    if( !(infile=fopen(argv[1], "r"))  )     {	fprintf(stderr, 		"/nError: Cannot open /"%s/" for reading./n/n", argv[1]);      	usage(argv[0]);	return 1;    }    // check result file    resfilename= (argc>=3) ? argv[2]:"heat.ppm";    if( !(resfile=fopen(resfilename, "w")) )    {	fprintf(stderr, 		"/nError: Cannot open /"%s/" for writing./n/n", 		resfilename);	usage(argv[0]);	return 1;    }    // check input    if( !read_input(infile, &param) )    {	fprintf(stderr, "/nError: Error parsing input file./n/n");	usage(argv[0]);	return 1;    }    print_params(&param);    if( !initialize(&param) )	{	    fprintf(stderr, "Error in Solver initialization./n/n");	    usage(argv[0]);            return 1;	}    // full size (param.resolution are only the inner points)    np = param.resolution + 2;    #if _EXTRAE_    Extrae_init();#endif    // starting time    runtime = wtime();    iter = 0;    while(1) {	switch( param.algorithm ) {	    case 0: // JACOBI	            residual = relax_jacobi(param.u, param.uhelp, np, np);		    // Copy uhelp into u		    copy_mat(param.uhelp, param.u, np, np);		    break;	    case 1: // GAUSS		    residual = relax_gauss(param.u, np, np);		    break;	    }        iter++;        // solution good enough ?        if (residual < 0.00005) break;        // max. iteration reached ? (no limit with maxiter=0)        if (param.maxiter>0 && iter>=param.maxiter) break;    }    // Flop count after iter iterations    flop = iter * 11.0 * param.resolution * param.resolution;    // stopping time    runtime = wtime() - runtime;#if _EXTRAE_    Extrae_fini();#endif    fprintf(stdout, "Time: %04.3f /n", runtime);//.........这里部分代码省略.........
开发者ID:AlbertSuarez,项目名称:PAR-Labs,代码行数:101,


示例13: main

int main(int argc, char* argv[]){    double t1, t2, t3, t4, t5;    double sum1, sum2, sum3, sum4;    int arg = 1, len = 0, iters = 0, verb = 0, run = 1;    int do_vcopy = 1, do_vadd = 1, do_vjacobi = 1;    while(argc>arg) {        if      (strcmp(argv[arg],"-v")==0)  verb++;        else if (strcmp(argv[arg],"-vv")==0) verb+=2;        else if (strcmp(argv[arg],"-n")==0)  run = 0;        else if (strcmp(argv[arg],"-c")==0)  do_vadd = 0,  do_vjacobi = 0;        else if (strcmp(argv[arg],"-a")==0)  do_vcopy = 0, do_vjacobi = 0;        else if (strcmp(argv[arg],"-j")==0)  do_vcopy = 0, do_vadd = 0;        else            break;        arg++;    }    if (argc>arg) { len   = atoi(argv[arg]); arg++; }    if (argc>arg) { iters = atoi(argv[arg]); arg++; }    if (len == 0) len = 10000;    if (iters == 0) iters = 20;    len = len * 1000;    printf("Alloc/init 3 double arrays of length %d .../n", len);    double* a = (double*) malloc(len * sizeof(double));    double* b = (double*) malloc(len * sizeof(double));    double* c = (double*) malloc(len * sizeof(double));    for(int i = 0; i<len; i++) {        a[i] = 1.0;        b[i] = (double) (i % 20);        c[i] = 3.0;    }    // Generate vectorized variants & run against naive/original#if __AVX__    bool do32 = true;#else    bool do32 = false;#endif    // vcopy    if (do_vcopy) {        vcopy_t vcopy16, vcopy32;        Rewriter* rc16 = dbrew_new();        if (verb>1) dbrew_verbose(rc16, true, true, true);        dbrew_set_function(rc16, (uint64_t) vcopy);        dbrew_config_parcount(rc16, 3);        dbrew_config_force_unknown(rc16, 0);        dbrew_set_vectorsize(rc16, 16);        vcopy16 = (vcopy_t) dbrew_rewrite(rc16, a, b, len);        if (verb) decode_func(rc16, "vcopy16");        if (do32) {            Rewriter* rc32 = dbrew_new();            if (verb>1) dbrew_verbose(rc32, true, true, true);            dbrew_set_function(rc32, (uint64_t) vcopy);            dbrew_config_parcount(rc32, 3);            dbrew_config_force_unknown(rc32, 0);            dbrew_set_vectorsize(rc32, 32);            vcopy32 = (vcopy_t) dbrew_rewrite(rc32, a, b, len);            if (verb) decode_func(rc32, "vcopy32");        }        printf("Running %d iterations of vcopy .../n", iters);        t1 = wtime();        for(int iter = 0; iter < iters; iter++)            naive_vcopy(a, b, len);        t2 = wtime();        for(int iter = 0; iter < iters; iter++)            vcopy(a, b, len);        t3 = wtime();        if (run)            for(int iter = 0; iter < iters; iter++)                vcopy16(a, b, len);        t4 = wtime();        if (do32 && run)            for(int iter = 0; iter < iters; iter++)                vcopy32(a, b, len);        t5 = wtime();        printf("  naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s",               t2-t1, t3-t2, t4-t3);        if (do32)            printf(", rewritten-32: %.3f s", t5-t4);        printf("/n");    }    // vadd    if (do_vadd) {        vadd_t vadd16, vadd32;        Rewriter* ra16 = dbrew_new();        if (verb>1) dbrew_verbose(ra16, true, true, true);        dbrew_set_function(ra16, (uint64_t) vadd);        dbrew_config_parcount(ra16, 4);        dbrew_config_force_unknown(ra16, 0);//.........这里部分代码省略.........
开发者ID:lrr-tum,项目名称:dbrew,代码行数:101,


示例14: main

//.........这里部分代码省略.........    bail_out(error);  }  /* Fill the original column matrix                                             */  istart = 0;    int chunk_size = Block_order/group_size;  if (tiling) {      for (j=shm_ID*chunk_size;j<(shm_ID+1)*chunk_size;j+=Tile_order) {      for (i=0;i<order; i+=Tile_order)         for (jt=j; jt<MIN((shm_ID+1)*chunk_size,j+Tile_order); jt++)          for (it=i; it<MIN(order,i+Tile_order); it++) {            A(it,jt) = (double) ((double)order*(jt+colstart) + it);            B(it,jt) = -1.0;          }    }  }  else {    for (j=shm_ID*chunk_size;j<(shm_ID+1)*chunk_size;j++)       for (i=0;i<order; i++) {        A(i,j) = (double)((double)order*(j+colstart) + i);        B(i,j) = -1.0;      }  }  /* NEED A STORE FENCE HERE                                                     */  MPI_Win_sync(shm_win_A);  MPI_Win_sync(shm_win_B);  MPI_Barrier(shm_comm);  for (iter=0; iter<=iterations; iter++) {    /* start timer after a warmup iteration */    if (iter == 1) {       MPI_Barrier(MPI_COMM_WORLD);      local_trans_time = wtime();    }    /* do the local transpose                                                    */    istart = colstart;     if (!tiling) {      for (i=shm_ID*chunk_size; i<(shm_ID+1)*chunk_size; i++) {        for (j=0; j<Block_order; j++)               B(j,i) = A(i,j);	}    }    else {      for (i=shm_ID*chunk_size; i<(shm_ID+1)*chunk_size; i+=Tile_order) {        for (j=0; j<Block_order; j+=Tile_order)           for (it=i; it<MIN(Block_order,i+Tile_order); it++)            for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) {              B(jt,it) = A(it,jt); 	    }      }    }    for (phase=1; phase<Num_groups; phase++){      recv_from = ((group_ID + phase             )%Num_groups);      send_to   = ((group_ID - phase + Num_groups)%Num_groups);      istart = send_to*Block_order;       if (!tiling) {        for (i=shm_ID*chunk_size; i<(shm_ID+1)*chunk_size; i++)           for (j=0; j<Block_order; j++){	    Work_out(j,i) = A(i,j);	  }      }      else {
开发者ID:nchaimov,项目名称:ParResKernels,代码行数:67,


示例15: multiply_by_blas

/*--------------------------------------------------------------------------- * * Compute matrix product using BLAS routine DGEMM. * * Input *   int argc        - length of argv[] array *   char* argv[]    - pointer to command line parameter array *   int verbosity   - program verification: verbosity > 0 gives more output * * Output *   double          - elapsed time for product computation */double multiply_by_blas( int argc, char* argv[], int verbosity ){    int rows, cols, mids;    double **a, **b, **c;    double t1, t2;    double sec;    double gflop_count;    /*     * process command line arguments     */    rows = atoi( argv[0] );    mids = atoi( argv[1] );    cols = atoi( argv[2] );    gflop_count = 2.0 * rows * mids * cols / 1.0e9;    if ( verbosity > 0 )    {        printf( "BLAS: rows = %d, mids = %d, columns = %d/n",                rows, mids, cols );    }    /*     * allocate and initialize matrices     */    a = (double**) allocateMatrix( rows, mids );    b = (double**) allocateMatrix( mids, cols );    c = (double**) allocateMatrix( rows, cols );    initialize_matrices( a, b, c, rows, cols, mids, verbosity );    /*     * compute product: There is an implicit matrix transpose when     * passing from Fortran to C and vice-versa.  To compute C :=     * alpha * A * B + beta * C we use dgemm() to compute C' := alpha     * * B' * A' + beta * C'.  The first two arguments to dgemm() are     * 'N' indicating we don't want a transpose in addition to the     * implicit one.  The matrices A and B are passed in reverse order     * so dgemm() receives (after the implicit transpose) B' and A'.     * Arguments 3 and 4 are the dimensions of C' and argument 5 is     * the column dimension of B' (and the row dimension of A').     */    t1 = wtime();    dgemm( 'N', 'N', cols, rows, mids, 1.0, &b[0][0], cols, &a[0][0], mids,            0.0, &c[0][0], cols );    t2 = wtime();    sec = t2 - t1;    if ( verbosity > 1 )        printf( "checksum = %f/n", checksum( c, rows, cols ) );    printf( "BLAS:        %6.3f secs %6.3f gflops ( %5d x %5d x %5d )/n",            sec, gflop_count / sec, rows, mids, cols );    /*     * clean up     */    deallocateMatrix( a );    deallocateMatrix( b );    deallocateMatrix( c );    return t2 - t1;}
开发者ID:gordon-cs,项目名称:cps343-hoe,代码行数:74,


示例16: main

int main(int argc, char ** argv){  int    vector_length;   /* length of vectors to be aggregated            */  int    total_length;    /* bytes needed to store reduction vectors       */  double reduce_time,     /* timing parameters                             */         avgtime = 0.0,          maxtime = 0.0,          mintime = 366.0*24.0*3600.0; /* set the minimum time to a large                              value; one leap year should be enough           */  double epsilon=1.e-8;   /* error tolerance                                 */  int    i, iter;         /* dummies                                         */  double element_value;   /* reference element value for final vector        */  int    iterations;      /* number of times the reduction is carried out    */  static double           /* use static so it goes on the heap, not stack    */  RESTRICT vector[MEMWORDS];/* we would like to allocate "vector" dynamically,                              but need to be able to flush the thing in some                              versions of the reduction algorithm -> static   *//******************************************************************************* process and test input parameters    ******************************************************************************/  if (argc != 3){    printf("Usage:     %s <# iterations> <vector length>/n", *argv);    return(EXIT_FAILURE);  }  iterations = atoi(*++argv);  if (iterations < 1){    printf("ERROR: Iterations must be positive : %d /n", iterations);    exit(EXIT_FAILURE);  }  vector_length  = atoi(*++argv);  if (vector_length < 1){    printf("ERROR: vector length must be >= 1 : %d /n",vector_length);    exit(EXIT_FAILURE);  }  /*  make sure we stay within the memory allocated for vector               */  total_length = 2*vector_length;  if (total_length/2 != vector_length || total_length > MEMWORDS) {    printf("Vector length of %d too large; ", vector_length);    printf("increase MEMWORDS in Makefile or reduce vector length/n");    exit(EXIT_FAILURE);  }  printf("Serial Vector Reduction/n");  printf("Vector length                  = %d/n", vector_length);  printf("Number of iterations           = %d/n", iterations);  for (iter=0; iter<iterations; iter++) {    /* initialize the arrays, assuming first-touch memory placement          */    for (i=0; i<vector_length; i++) {      VEC0(i) = (double)(1);      VEC1(i) = (double)(2);    }       reduce_time = wtime();    /* do actual reduction                                                   */    /* first do the "local" part, which is the same for all algorithms       */    for (i=0; i<vector_length; i++) {      VEC0(i) += VEC1(i);    }    reduce_time = wtime() - reduce_time;#ifdef VERBOSE    printf("/nFinished with reduction, using %lf seconds /n", reduce_time);#endif    if (iter>0 || iterations==1) { /* skip the first iteration               */      avgtime = avgtime + reduce_time;      mintime = MIN(mintime, reduce_time);      maxtime = MAX(maxtime, reduce_time);    }  } /* end of iter loop                                                      */  /* verify correctness */  element_value = (2.0+1.0);  for (i=0; i<vector_length; i++) {    if (ABS(VEC0(i) - element_value) >= epsilon) {       printf("First error at i=%d; value: %lf; reference value: %lf/n",              i, VEC0(i), element_value);       exit(EXIT_FAILURE);    }  }  printf("Solution validates/n");#ifdef VERBOSE  printf("Element verification value: %lf/n", element_value);#endif  avgtime = avgtime/(double)(MAX(iterations-1,1));  printf("Rate (MFlops/s): %lf,  Avg time (s): %lf,  Min time (s): %lf",         1.0E-06 * (2.0-1.0)*vector_length/mintime, avgtime, mintime);  printf(", Max time (s): %lf/n", maxtime);  exit(EXIT_SUCCESS);}
开发者ID:jbreitbart,项目名称:Kernels,代码行数:100,


示例17: xmp_sync_memory

      if(me == 1){        local_buf[0:size] = target_buf[0:size]:[target];	xmp_sync_memory(NULL);#ifdef DEBUG	if(local_buf[0] != '2' && local_buf[size-1] != '2') fprintf(stderr, "Error !/n");	local_buf[0] = '1'; local_buf[size-1] = '1';#endif	xmp_sync_all(NULL);      }      else{	xmp_sync_all(NULL);	local_buf[0:size] = target_buf[0:size]:[target];#ifdef DEBUG        if(local_buf[0] != '1' && local_buf[size-1] != '1') fprintf(stderr, "Error !/n");	local_buf[0] = '2'; local_buf[size-1] = '2';#endif      }      xmp_sync_all(NULL);    }    xmp_sync_all(NULL);    t = wtime() - t;    if(me == 1)      print_results(size, t);  }  return 0;}
开发者ID:mnakao,项目名称:pingpong,代码行数:29,


示例18: wtime

static doublewtime(){    static struct timeval tv0 = {.tv_sec = 0};    struct timeval tv;    int cc;    cc = gettimeofday(&tv, 0);    assert(cc == 0);    if (tv0.tv_sec == 0) {	tv0 = tv;	assert(tv0.tv_sec != 0);    }    double dt = ((double)(tv.tv_sec - tv0.tv_sec)		 + ((double)(tv.tv_usec - tv0.tv_usec) * 1e-6));    return dt;}/* Puts 200 key-value pairs to output KVO.  It is a map-function.  It   runs only on rank0.  Inputs (KV0 and KVS0) are dummy. */static intaddkeysfn(const struct kmr_kv_box kv0,	  const KMR_KVS *kvs0, KMR_KVS *kvo, void *p, const long ind){    assert(kvs0 == 0 && kv0.klen == 0 && kv0.vlen == 0 && kvo != 0);    char k[80];    char v[80];    int cc;    for (int i = 0; i < 200; i++) {	snprintf(k, 80, "key%d", i);	snprintf(v, 80, "value%d", i);	struct kmr_kv_box kv = {	    .klen = (int)(strlen(k) + 1),	    .vlen = (int)(strlen(v) + 1),	    .k.p = k,	    .v.p = v	};	cc = kmr_add_kv(kvo, kv);	assert(cc == MPI_SUCCESS);    }    return MPI_SUCCESS;}static intreplacevaluefn(const struct kmr_kv_box kv0,	       const KMR_KVS *kvs0, KMR_KVS *kvo, void *p,	       const long i){    assert(kvs0 != 0 && kvo != 0);    int cc, x;    char gomi;    cc = sscanf((&((char *)kv0.k.p)[3]), "%d%c", &x, &gomi);    assert(cc == 1);    char v[80];    snprintf(v, 10, "newvalue%d", x);    struct kmr_kv_box kv = {.klen = kv0.klen,			    .vlen = (int)(strlen(v) + 1),			    .k.p = kv0.k.p,			    .v.p = v    };    cc = kmr_add_kv(kvo, kv);    assert(cc == MPI_SUCCESS);    return MPI_SUCCESS;}static intemptyreducefn(const struct kmr_kv_box kv[], const long n,	      const KMR_KVS *kvs, KMR_KVS *kvo, void *p){    return MPI_SUCCESS;}/* Do KMR operations many times. */static voidsimple0(int nprocs, int rank){    int cc;    KMR *mr = kmr_create_context(MPI_COMM_WORLD, MPI_INFO_NULL, 0);    double t0, t1;    t0 = wtime();    for (int i = 0; i < 10000; i++) {	/* Check timeout. */	t1 = wtime();	KMR_KVS *to0 = kmr_create_kvs(mr, KMR_KV_INTEGER, KMR_KV_INTEGER);	if (rank == 0) {	    struct kmr_kv_box kv = {		.klen = (int)sizeof(long),		.vlen = (int)sizeof(long),		.k.i = 0,		.v.i = ((t1 - t0) > 20.0)	    };	    cc = kmr_add_kv(to0, kv);	    assert(cc == MPI_SUCCESS);	}//.........这里部分代码省略.........
开发者ID:hisashiyashiro,项目名称:kmr,代码行数:101,


示例19: main

//.........这里部分代码省略.........  }  #pragma omp parallel private(i, old_size, group_size, my_ID, iter, start, end, /                               segment_size, stage, id, my_donor, my_segment)   {  my_ID = omp_get_thread_num();  #pragma omp master   {  nthread = omp_get_num_threads();  if (nthread != nthread_input) {    num_error = 1;    printf("ERROR: number of requested threads %d does not equal ",           nthread_input);    printf("number of spawned threads %d/n", nthread);  }   else {    printf("Number of threads              = %d/n",nthread_input);    printf("Vector length                  = %ld/n", vector_length);    printf("Reduction algorithm            = %s/n", algorithm);    printf("Number of iterations           = %d/n", iterations);  }  }  bail_out(num_error);  for (iter=0; iter<=iterations; iter++) {    /* start timer after a warmup iteration                                        */    if (iter == 1) {       #pragma omp barrier      #pragma omp master      {        reduce_time = wtime();      }    }    /* in case of the long-optimal algorithm we need a barrier before the       reinitialization to make sure that we don't overwrite parts of the       vector before other threads are done with those parts                 */    if (intalgorithm == LONG_OPTIMAL) {      #pragma omp barrier    }    /* initialize the arrays, assuming first-touch memory placement          */    for (i=0; i<vector_length; i++) {      VEC0(my_ID,i) = (double)(my_ID+1);      VEC1(my_ID,i) = (double)(my_ID+1+nthread);    }       if (intalgorithm == BINARY_P2P) {      /* we need a barrier before setting all flags to zero, to avoid          zeroing some that are still in use in a previous iteration          */      #pragma omp barrier      flag(my_ID) = 0;      /* we also need a barrier after setting the flags, to make each is         visible to all threads, and to synchronize before the timer starts  */      #pragma omp barrier    }        /* do actual reduction                                                   */    /* first do the "local" part, which is the same for all algorithms       */    for (i=0; i<vector_length; i++) {      VEC0(my_ID,i) += VEC1(my_ID,i);
开发者ID:davidozog,项目名称:Kernels,代码行数:67,


示例20: main

//.........这里部分代码省略.........  /* intialize the input and output arrays                                     */  #pragma omp parallel for private (i)  for (j=jstart; j<=jend; j++) for (i=istart; i<=iend; i++) {    IN(i,j)  = COEFX*i+COEFY*j;    OUT(i,j) = (DTYPE)0.0;  }  /* allocate communication buffers for halo values                            */  top_buf_out = (DTYPE *) prk_malloc(4*sizeof(DTYPE)*RADIUS*width);  if (!top_buf_out) {    printf("ERROR: Rank %d could not allocated comm buffers for y-direction/n", my_ID);    error = 1;  }  bail_out(error);  top_buf_in     = top_buf_out +   RADIUS*width;  bottom_buf_out = top_buf_out + 2*RADIUS*width;  bottom_buf_in  = top_buf_out + 3*RADIUS*width;  right_buf_out  = (DTYPE *) prk_malloc(4*sizeof(DTYPE)*RADIUS*height);  if (!right_buf_out) {    printf("ERROR: Rank %d could not allocated comm buffers for x-direction/n", my_ID);    error = 1;  }  bail_out(error);  right_buf_in   = right_buf_out +   RADIUS*height;  left_buf_out   = right_buf_out + 2*RADIUS*height;  left_buf_in    = right_buf_out + 3*RADIUS*height;  for (iter = 0; iter<=iterations; iter++){    /* start timer after a warmup iteration */    if (iter == 1) {      MPI_Barrier(MPI_COMM_WORLD);      local_stencil_time = wtime();    }    /* need to fetch ghost point data from neighbors in y-direction                 */    if (my_IDy < Num_procsy-1) {      MPI_Irecv(top_buf_in, RADIUS*width, MPI_DTYPE, top_nbr, 101,                MPI_COMM_WORLD, &(request[1]));      for (kk=0,j=jend-RADIUS+1; j<=jend; j++) for (i=istart; i<=iend; i++) {          top_buf_out[kk++]= IN(i,j);      }      MPI_Isend(top_buf_out, RADIUS*width,MPI_DTYPE, top_nbr, 99,                MPI_COMM_WORLD, &(request[0]));    }    if (my_IDy > 0) {      MPI_Irecv(bottom_buf_in,RADIUS*width, MPI_DTYPE, bottom_nbr, 99,                MPI_COMM_WORLD, &(request[3]));      for (kk=0,j=jstart; j<=jstart+RADIUS-1; j++) for (i=istart; i<=iend; i++) {          bottom_buf_out[kk++]= IN(i,j);      }      MPI_Isend(bottom_buf_out, RADIUS*width,MPI_DTYPE, bottom_nbr, 101,                MPI_COMM_WORLD, &(request[2]));    }    if (my_IDy < Num_procsy-1) {      MPI_Wait(&(request[0]), MPI_STATUS_IGNORE);      MPI_Wait(&(request[1]), MPI_STATUS_IGNORE);      for (kk=0,j=jend+1; j<=jend+RADIUS; j++) for (i=istart; i<=iend; i++) {          IN(i,j) = top_buf_in[kk++];      }    }    if (my_IDy > 0) {      MPI_Wait(&(request[2]), MPI_STATUS_IGNORE);      MPI_Wait(&(request[3]), MPI_STATUS_IGNORE);      for (kk=0,j=jstart-RADIUS; j<=jstart-1; j++) for (i=istart; i<=iend; i++) {
开发者ID:elliottslaughter,项目名称:Kernels,代码行数:67,


示例21: multiply_by_tiles

/*--------------------------------------------------------------------------- * * Compute matrix product using tiling.  The loop order used for the tile * products is specified in string variable "mode". * * Input *   int argc        - length of argv[] array *   char* argv[]    - pointer to command line parameter array *   int verbosity   - program verification: verbosity > 0 gives more output *   char* order     - string indicating loop order, e.g., "ijk" or "jki" * * Output *   double          - elapsed time for product computation */double multiply_by_tiles( int argc, char* argv[], int verbosity, char* order ){    int rows, cols, mids;    int rows_per_tile, cols_per_tile, mids_per_tile;    int row_start, row_end;    int col_start, col_end;    int mid_start, mid_end;    double **a, **b, **c;    double t1, t2;    double sec;    double gflop_count;    /*     * process command line arguments     */    rows = atoi( argv[0] );    mids = atoi( argv[1] );    cols = atoi( argv[2] );    rows_per_tile = atoi( argv[3] );    mids_per_tile = atoi( argv[4] );    cols_per_tile = atoi( argv[5] );    gflop_count = 2.0 * rows * mids * cols / 1.0e9;    if ( verbosity > 0 )    {        printf( "Tiles(%3s): rows = %d, mids = %d, columns = %d/n",                order, rows, mids, cols );        printf( "block rows = %d, mids = %d, columns = %d/n",                rows_per_tile, mids_per_tile, cols_per_tile );    }    /*     * allocate and initialize matrices     */    a = (double**) allocateMatrix( rows, mids );    b = (double**) allocateMatrix( mids, cols );    c = (double**) allocateMatrix( rows, cols );    initialize_matrices( a, b, c, rows, cols, mids, verbosity );    /*     * compute product     */    t1 = wtime();    for ( row_start = 0; row_start < rows; row_start += rows_per_tile )    {        row_end = row_start + rows_per_tile - 1;        if ( row_end >= rows ) row_end = rows - 1;        for ( col_start = 0; col_start < cols; col_start += cols_per_tile )        {            col_end = col_start + cols_per_tile - 1;            if ( col_end >= cols ) col_end = cols - 1;            for ( mid_start = 0; mid_start < mids; mid_start += mids_per_tile )            {                mid_end = mid_start + mids_per_tile - 1;                if ( mid_end >= mids ) mid_end = mids - 1;                do_product( a, b, c, row_start, row_end, col_start,                            col_end, mid_start, mid_end );            }        }    }    t2 = wtime();    sec = t2 - t1;    if ( verbosity > 1 )        printf( "checksum = %f/n", checksum( c, rows, cols ) );    printf( "tiles(%3s):  %6.3f secs %6.3f gflops ",            order, sec, gflop_count / sec );    printf( "( %5d x %5d x %5d ) ( %4d x %4d x %4d )/n",            rows, mids, cols, rows_per_tile, mids_per_tile,            cols_per_tile );    /*     * clean up     */    deallocateMatrix( a );    deallocateMatrix( b );    deallocateMatrix( c );    return t2 - t1;}
开发者ID:gordon-cs,项目名称:cps343-hoe,代码行数:95,


示例22: main

//.........这里部分代码省略.........  }  bail_out(error);  right_buf_in   = right_buf_out +   RADIUS*height_rank;  left_buf_out   = right_buf_out + 2*RADIUS*height_rank;  left_buf_in    = right_buf_out + 3*RADIUS*height_rank;    /* fill the stencil weights to reflect a discrete divergence operator         */  for (int jj=-RADIUS; jj<=RADIUS; jj++) for (int ii=-RADIUS; ii<=RADIUS; ii++)    WEIGHT(ii,jj) = (DTYPE) 0.0;  stencil_size = 4*RADIUS+1;  for (int ii=1; ii<=RADIUS; ii++) {    WEIGHT(0, ii) = WEIGHT( ii,0) =  (DTYPE) (1.0/(2.0*ii*RADIUS));    WEIGHT(0,-ii) = WEIGHT(-ii,0) = -(DTYPE) (1.0/(2.0*ii*RADIUS));  }  norm = (DTYPE) 0.0;  f_active_points = (DTYPE) (n-2*RADIUS)*(DTYPE) (n-2*RADIUS);  /* intialize the input and output arrays                                     */  for (int j=jstart_rank; j<=jend_rank; j++) for (int i=istart_rank; i<=iend_rank; i++) {    IN(i,j)  = COEFX*i+COEFY*j;    OUT(i,j) = (DTYPE)0.0;  }  /* LOAD/STORE FENCE */  MPI_Win_sync(shm_win_in);  MPI_Win_sync(shm_win_out);  MPI_Barrier(shm_comm);  for (iter = 0; iter<=iterations; iter++){    /* start timer after a warmup iteration */    if (iter == 1) {      MPI_Barrier(MPI_COMM_WORLD);      local_stencil_time = wtime();    }    /* need to fetch ghost point data from neighbors in y-direction                 */    if (top_nbr != -1) {      MPI_Irecv(top_buf_in, RADIUS*width_rank, MPI_DTYPE, top_nbr, 101,                MPI_COMM_WORLD, &(request[1]));      for (int kk=0,j=jend_rank-RADIUS+1; j<=jend_rank; j++)      for (int i=istart_rank; i<=iend_rank; i++) {        top_buf_out[kk++]= IN(i,j);      }      MPI_Isend(top_buf_out, RADIUS*width_rank,MPI_DTYPE, top_nbr, 99,                MPI_COMM_WORLD, &(request[0]));    }    if (bottom_nbr != -1) {      MPI_Irecv(bottom_buf_in,RADIUS*width_rank, MPI_DTYPE, bottom_nbr, 99,                MPI_COMM_WORLD, &(request[3]));      for (int kk=0,j=jstart_rank; j<=jstart_rank+RADIUS-1; j++)      for (int i=istart_rank; i<=iend_rank; i++) {        bottom_buf_out[kk++]= IN(i,j);      }      MPI_Isend(bottom_buf_out, RADIUS*width_rank,MPI_DTYPE, bottom_nbr, 101, 	  MPI_COMM_WORLD, &(request[2]));      }    if (top_nbr != -1) {      MPI_Wait(&(request[0]), MPI_STATUS_IGNORE);      MPI_Wait(&(request[1]), MPI_STATUS_IGNORE);      for (int kk=0,j=jend_rank+1; j<=jend_rank+RADIUS; j++)      for (int i=istart_rank; i<=iend_rank; i++) {        IN(i,j) = top_buf_in[kk++];      }
开发者ID:elliottslaughter,项目名称:Kernels,代码行数:67,


示例23: main

//.........这里部分代码省略.........  #pragma omp parallel  {  #pragma omp master  {  nthread = omp_get_num_threads();  if (nthread != nthread_input) {    num_error = 1;    printf("ERROR: number of requested threads %d does not equal ",           nthread_input);    printf("number of spawned threads %d/n", nthread);  }  else {    printf("Number of threads              = %d/n",nthread_input);    printf("Grid size                      = %lld/n", L);    printf("Number of particles requested  = %lld/n", n);    printf("Number of time steps           = %lld/n", iterations);    printf("Initialization mode            = %s/n", init_mode);    switch(particle_mode) {    case GEOMETRIC: printf("  Attenuation factor           = %lf/n", rho);    break;    case SINUSOIDAL:                                                          break;    case LINEAR:    printf("  Negative slope               = %lf/n", alpha);                    printf("  Offset                       = %lf/n", beta);   break;    case PATCH:     printf("  Bounding box                 = %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "/n",                           init_patch.left, init_patch.right,                           init_patch.bottom, init_patch.top);                break;    default:        printf("ERROR: Unsupported particle initializating mode/n");                     exit(FAILURE);    }    printf("Particle charge semi-increment = %"PRIu64"/n", k);    printf("Vertical velocity              = %"PRIu64"/n", m);    /* Initialize grid of charges and particles */    Qgrid = initializeGrid(L);    LCG_init(&dice);    switch(particle_mode) {    case GEOMETRIC:  particles = initializeGeometric(n, L, rho, k, m, &n, &dice);      break;    case SINUSOIDAL: particles = initializeSinusoidal(n, L, k, m, &n, &dice);          break;    case LINEAR:     particles = initializeLinear(n, L, alpha, beta, k, m, &n, &dice); break;    case PATCH:      particles = initializePatch(n, L, init_patch, k, m, &n, &dice);   break;    default:         printf("ERROR: Unsupported particle distribution/n");  exit(FAILURE);    }    printf("Number of particles placed     = %lld/n", n);  }  }  bail_out(num_error);  }  for (iter=0; iter<=iterations; iter++) {    /* start the timer after one warm-up time step */    if (iter==1) {      pic_time = wtime();    }    /* Calculate forces on particles and update positions */    #pragma omp parallel for private(i, p, fx, fy, ax, ay)    for (i=0; i<n; i++) {      p = particles;      fx = 0.0;      fy = 0.0;      computeTotalForce(p[i], L, Qgrid, &fx, &fy);      ax = fx * MASS_INV;      ay = fy * MASS_INV;      /* Update particle positions, taking into account periodic boundaries */      p[i].x = fmod(p[i].x + p[i].v_x*DT + 0.5*ax*DT*DT + L, L);      p[i].y = fmod(p[i].y + p[i].v_y*DT + 0.5*ay*DT*DT + L, L);      /* Update velocities */      p[i].v_x += ax * DT;      p[i].v_y += ay * DT;    }  }  pic_time = wtime() - pic_time;  /* Run the verification test */  for (i=0; i<n; i++) {    correctness *= verifyParticle(particles[i], iterations, Qgrid, L);  }  if (correctness) {    printf("Solution validates/n");#ifdef VERBOSE    printf("Simulation time is %lf seconds/n", pic_time);#endif    avg_time = n*iterations/pic_time;    printf("Rate (Mparticles_moved/s): %lf/n", 1.0e-6*avg_time);  } else {    printf("Solution does not validate/n");  }  return(EXIT_SUCCESS);}
开发者ID:ParRes,项目名称:Kernels,代码行数:101,


示例24: main

//.........这里部分代码省略.........  in_arrays[MYTHREAD] = in_array;  out_arrays[MYTHREAD] = out_array;  buf_arrays[MYTHREAD] = buf_array;  double **in_array_private = shared_2d_array_to_private(in_array, sizex, sizey, myoffsetx, myoffsety);  double **out_array_private = shared_2d_array_to_private(out_array, sizex, sizey, myoffsetx, myoffsety);  double **buf_array_private = shared_2d_array_to_private(buf_array, sizex, sizey, myoffsetx, myoffsety);  upc_barrier;  /*********************************************************************  ** Initialize the matrices  *********************************************************************/  for(int y=myoffsety; y<myoffsety + sizey; y++){    for(int x=myoffsetx; x<myoffsetx + sizex; x++){      in_array_private[y][x] = (double) (x+N*y);      out_array[y][x] = -1.0;    }  }  upc_barrier;  for(int y=myoffsety; y<myoffsety + sizey; y++){    for(int x=myoffsetx; x<myoffsetx + sizex; x++){      if(in_array_private[y][x] !=(double) (x+N*y))        die("x=%d y=%d in_array=%f != %f", x, y, in_array[y][x], (x+N*y));      if(out_array_private[y][x] != -1.0)        die("out_array_private error");    }  }  /*********************************************************************  ** Transpose  *********************************************************************/  int transfer_size = sizex * sizex * sizeof(double);  if(MYTHREAD == 0)    debug("transfer size = %d", transfer_size);  for(int iter=0; iter<=num_iterations; iter++){    /* start timer after a warmup iteration */    if(iter == 1){      upc_barrier;      start_time = wtime();    }    for(int i=0; i<THREADS; i++){      int local_blk_id = (MYTHREAD + i) % THREADS;      int remote_blk_id = MYTHREAD;      int remote_thread = local_blk_id;      upc_memget(&buf_array_private[local_blk_id * sizex][myoffsetx],                  &in_arrays[remote_thread][remote_blk_id * sizex][remote_thread * sizex], transfer_size);#define OUT_ARRAY(x,y) out_array_private[local_blk_id * sizex + x][myoffsetx + y]#define BUF_ARRAY(x,y) buf_array_private[local_blk_id * sizex + x][myoffsetx + y]      if(!tiling){        for(int x=0; x<sizex; x++){          for(int y=0; y<sizex; y++){            OUT_ARRAY(x,y) = BUF_ARRAY(y,x);          }        }      }      else{        for(int x=0; x<sizex; x+=tile_size){          for(int y=0; y<sizex; y+=tile_size){            for(int bx=x; bx<MIN(sizex, x+tile_size); bx++){              for(int by=y; by<MIN(sizex, y+tile_size); by++){                OUT_ARRAY(bx,by) = BUF_ARRAY(by,bx);              }            }          }        }      }    }    upc_barrier;  }  upc_barrier;  end_time = wtime();  /*********************************************************************  ** Analyze and output results.  *********************************************************************/  for(int y=myoffsety; y<myoffsety + sizey; y++){    for(int x=myoffsetx; x<myoffsetx + sizex; x++){      if(in_array_private[y][x] != (double)(x+ N*y))        die("Error in input: x=%d y=%d", x, y);      if(out_array_private[y][x] != (double)(y + N*x))        die("x=%d y=%d in_array=%f != %f   %d %d", x, y, out_array[y][x], (double)(y + N*x), (int)(out_array[y][x]) % N, (int)(out_array[y][x]) / N);    }  }  if(MYTHREAD == 0){    printf("Solution validates/n");    double transfer_size = 2 * N * N * sizeof(double);    avgtime = (end_time - start_time) / num_iterations;    double rate = transfer_size / avgtime * 1.0E-06;    printf("Rate (MB/s): %lf Avg time (s): %lf/n",rate, avgtime);  }}
开发者ID:beginZero,项目名称:Kernels,代码行数:101,


示例25: read_input

/*********************************************************************** * Read the input file. ***********************************************************************/int read_input ( FILE *fp_in, FILE *fp_out, input_data *input_vars,                 para_data *para_vars, time_data *time_vars ){/*********************************************************************** * Local variables. ***********************************************************************/    double t1, t2;    int ierr = 0;    char *error = NULL;    char *line = NULL;    size_t len = 0;    ssize_t read;    char *tmpData = NULL;    int tmpStrLen, i;/*********************************************************************** * Read the input file. Echo to output file. Call for an input variable * check. Only root reads, echoes, checks input. ***********************************************************************/    t1 = wtime ();    if ( IPROC == ROOT )    {        if ( !fp_in  )        {            tmpStrLen = strlen ("   ***ERROR: READ_INPUT:"                                " Problem reading input file./n");            ALLOC_STR ( error, tmpStrLen + 1, &ierr );            snprintf ( error, tmpStrLen + 1,                       "   ***ERROR: READ_INPUT:"                       " Problem reading input file./n" );            print_error ( fp_out, error, IPROC, ROOT );            FREE ( error );            ierr = 1;        }        else        {            while ( (read = getline(&line, &len, fp_in)) != -1 )            {                i = 0;                while ( isspace(line[i]) )                {                    i++;                }                // Parallel processing inputs                // npey: number of process elements in y-dir                if ( strncmp(&line[i], "npey=", strlen("npey=")) == 0 )                {                    get_input_value ( &line[i], "npey=", &tmpData );                    NPEY = atoi ( tmpData );                }                // npez: input number of process elements in z-dir                else if ( strncmp(&line[i], "npez=", strlen("npez=")) == 0 )                {                    get_input_value ( &line[i], "npez=", &tmpData );                    NPEZ = atoi ( tmpData );                }                // ichunk:                else if ( strncmp(&line[i], "ichunk=", strlen("ichunk=")) == 0 )                {                    get_input_value ( &line[i], "ichunk=", &tmpData );                    ICHUNK = atoi ( tmpData );                }                // nthreads: input number of threads                else if ( strncmp(&line[i], "nthreads=", strlen("nthreads=")) == 0 )                {                    get_input_value ( &line[i], "nthreads=", &tmpData );                    NTHREADS = atoi ( tmpData );                }                // nnested:                else if ( strncmp(&line[i], "nnested=", strlen("nnested=")) == 0 )                {                    get_input_value ( &line[i], "nnested=", &tmpData );                    NNESTED = atoi ( tmpData );                }                // Geometry inputs                // ndimen:                else if ( strncmp(&line[i], "ndimen=", strlen("ndimen=")) == 0 )                {                    get_input_value ( &line[i], "ndimen=", &tmpData );                    NDIMEN = atoi ( tmpData );                }                // nx:                else if ( strncmp(&line[i], "nx=", strlen("nx=")) == 0 )                {//.........这里部分代码省略.........
开发者ID:GCZhang,项目名称:SNAP,代码行数:101,


示例26: main

//.........这里部分代码省略.........    /* get space for local blocks of A, B, C                         */    a = (double *) malloc( lda*myncols*sizeof(double) );    b = (double *) malloc( lda*myncols*sizeof(double) );    c = (double *) malloc( lda*myncols*sizeof(double) );    if ( a == NULL || b == NULL || c == NULL ) {        error = 1;        printf("ERROR: Proc %d could not allocate a, b, and/or c/n",my_ID);    }    bail_out(error);    /* get space for two work arrays for dgemm                       */    work1 = (double *) malloc( nb*lda*sizeof(double) );    work2 = (double *) malloc( nb*myncols*sizeof(double) );    if ( !work1 || !work2 ) {        printf("ERROR: Proc %d could not allocate work buffers/n", my_ID);        error = 1;    }    bail_out(error);    /* collect array that holds mynrows from all nodes in my row       of the rank grid (array of all m_i)                           */    MPI_Allgather( &mynrows, 1, MPI_INT, mm, 1, MPI_INT, comm_col );    /* myfrow = first row on my node                                 */    for (myfrow=1,i=0; i<myrow; i++) myfrow += mm[i];    mylrow = myfrow+mynrows-1;    /* collect array that holds myncols from all nodes in my column       of the rank grid (array of all n_j)                           */    MPI_Allgather( &myncols, 1, MPI_INT, nn, 1, MPI_INT, comm_row );    /* myfcol = first col on my node                                 */    for (myfcol=1,i=0; i<mycol; i++) myfcol += nn[i];    mylcol = myfcol+myncols-1;    /* initialize matrices A, B, and C                               */    ldc = ldb = lda;    for (jj=0, j=myfcol; j<=mylcol; j++,jj++ )        for (ii=0, i=myfrow; i<=mylrow; i++, ii++ ) {            A(ii,jj) = (double) (j-1);            B(ii,jj) = (double) (j-1);            C(ii,jj) = 0.0;        }    for (iter=0; iter<=iterations; iter++) {        /* start timer after a warmup iteration */        if (iter == 1) {            MPI_Barrier(MPI_COMM_WORLD);            local_dgemm_time = wtime();        }        /* actual matrix-vector multiply                               */        dgemm(order, nb, inner_block_flag, a, lda, b, lda, c, lda,              mm, nn, comm_row, comm_col, work1, work2 );    } /* end of iterations                                           */    local_dgemm_time = wtime() - local_dgemm_time;    MPI_Reduce(&local_dgemm_time, &dgemm_time, 1, MPI_DOUBLE, MPI_MAX, root,               MPI_COMM_WORLD);    /* verification test                                             */    for (jj=0, j=myfcol; j<=mylcol; j++, jj++)        for (ii=0, i=myfrow; i<=mylrow; i++, ii++)            checksum_local += C(ii,jj);    MPI_Reduce(&checksum_local, &checksum, 1, MPI_DOUBLE, MPI_SUM,               root, MPI_COMM_WORLD);    forder = (double) order;    ref_checksum = (0.25*forder*forder*forder*(forder-1.0)*(forder-1.0));    ref_checksum *= (iterations+1);    if (my_ID == root) {        if (ABS((checksum - ref_checksum)/ref_checksum) > epsilon) {            printf("ERROR: Checksum = %lf, Reference checksum = %lf/n",                   checksum, ref_checksum);            error = 1;        }        else {            printf("Solution validates/n");#ifdef VERBOSE            printf("Reference checksum = %lf, checksum = %lf/n",                   ref_checksum, checksum);#endif        }    }    bail_out(error);    /* report elapsed time                                           */    nflops = 2.0*forder*forder*forder;    if ( my_ID == root ) {        avgtime = dgemm_time/iterations;        printf("Rate (MFlops/s): %lf Avg time (s): %lf/n",               1.0E-06 * nflops/avgtime, avgtime);    }    MPI_Finalize();}
开发者ID:nchaimov,项目名称:ParResKernels,代码行数:101,


示例27: main

//.........这里部分代码省略.........  }  bail_out(error);  top_buf_in[1]    = top_buf_in[0]    + RADIUS*width;  bottom_buf_in[0] = top_buf_in[1]    + RADIUS*width;  bottom_buf_in[1] = bottom_buf_in[0] + RADIUS*width;   right_buf_out=(DTYPE*)malloc(2*sizeof(DTYPE)*RADIUS*height);  if (!right_buf_out) {    printf("ERROR: Rank %d could not allocate output comm buffers for x-direction/n", my_ID);    error = 1;  }  bail_out(error);  left_buf_out=right_buf_out+RADIUS*height;  right_buf_in[0]=(DTYPE*)prk_shmem_malloc(4*sizeof(DTYPE)*RADIUS*height);  if(!right_buf_in)  {    printf("ERROR: Rank %d could not allocate input comm buffers for x-dimension/n", my_ID);    error=1;  }  bail_out(error);  right_buf_in[1] = right_buf_in[0] + RADIUS*height;  left_buf_in[0]  = right_buf_in[1] + RADIUS*height;  left_buf_in[1]  = left_buf_in[0]  + RADIUS*height;  /* make sure all symmetric heaps are allocated before being used  */  shmem_barrier_all();  for (iter = 0; iter<=iterations; iter++){    /* start timer after a warmup iteration */    if (iter == 1) {       shmem_barrier_all();      local_stencil_time[0] = wtime();    }    /* sw determines which incoming buffer to select */    sw = iter%2;    /* need to fetch ghost point data from neighbors */    if (my_IDy < Num_procsy-1) {      for (kk=0,j=jend-RADIUS; j<=jend-1; j++) for (i=istart; i<=iend; i++) {          top_buf_out[kk++]= IN(i,j);      }      shmem_putmem(bottom_buf_in[sw], top_buf_out, RADIUS*width*sizeof(DTYPE), top_nbr);#if SPLITFENCE      shmem_fence();      shmem_int_inc(&iterflag[sw], top_nbr);#endif    }    if (my_IDy > 0) {      for (kk=0,j=jstart; j<=jstart+RADIUS-1; j++) for (i=istart; i<=iend; i++) {          bottom_buf_out[kk++]= IN(i,j);      }      shmem_putmem(top_buf_in[sw], bottom_buf_out, RADIUS*width*sizeof(DTYPE), bottom_nbr);#if SPLITFENCE      shmem_fence();      shmem_int_inc(&iterflag[sw], bottom_nbr);#endif    }    if(my_IDx < Num_procsx-1) {      for(kk=0,j=jstart;j<=jend;j++) for(i=iend-RADIUS;i<=iend-1;i++) {	right_buf_out[kk++]=IN(i,j);      }      shmem_putmem(left_buf_in[sw], right_buf_out, RADIUS*height*sizeof(DTYPE), right_nbr);
开发者ID:kempj,项目名称:Kernels,代码行数:67,


示例28: main

//.........这里部分代码省略.........    printf("Sparsity              = %16.10lf/n", sparsity);#ifdef SCRAMBLE    printf("Using scrambled indexing/n");#else    printf("Using canonical indexing/n");#endif    printf("Number of iterations  = %16d/n", iterations);  }  }  bail_out(num_error);  /* initialize the input and result vectors                                      */  #pragma omp for  for (row=0; row<size2; row++) result[row] = vector[row] = 0.0;  /* fill matrix with nonzeroes corresponding to difference stencil. We use the      scrambling for reordering the points in the grid.                            */  #pragma omp for private (i,j,r)  for (row=0; row<size2; row++) {    j = row/size; i=row%size;    elm = row*stencil_size;    colIndex[elm] = REVERSE(LIN(i,j),lsize2);    for (r=1; r<=radius; r++, elm+=4) {      colIndex[elm+1] = REVERSE(LIN((i+r)%size,j),lsize2);      colIndex[elm+2] = REVERSE(LIN((i-r+size)%size,j),lsize2);      colIndex[elm+3] = REVERSE(LIN(i,(j+r)%size),lsize2);      colIndex[elm+4] = REVERSE(LIN(i,(j-r+size)%size),lsize2);    }    // sort colIndex to make sure the compressed row accesses    // vector elements in increasing order    qsort(&(colIndex[row*stencil_size]), stencil_size, sizeof(s64Int), compare);    for (elm=row*stencil_size; elm<(row+1)*stencil_size; elm++)      matrix[elm] = 1.0/(double)(colIndex[elm]+1);  }  for (iter=0; iter<iterations; iter++) {    #pragma omp barrier    #pragma omp master    {       sparse_time = wtime();    }    /* fill vector                                                                */    #pragma omp for     for (row=0; row<size2; row++) vector[row] += (double) (row+1);    /* do the actual matrix-vector multiplication                                 */    #pragma omp for    for (row=0; row<size2; row++) {      temp = 0.0;      first = stencil_size*row; last = first+stencil_size-1;      #pragma simd reduction(+:temp)       for (col=first; col<=last; col++) {        temp += matrix[col]*vector[colIndex[col]];      }      result[row] += temp;    }    #pragma omp master    {    sparse_time = wtime() - sparse_time;    if (iter>0 || iterations==1) { /* skip the first iteration                    */      avgtime = avgtime + sparse_time;      mintime = MIN(mintime, sparse_time);      maxtime = MAX(maxtime, sparse_time);    }    }  }  } /* end of parallel region                                                     */  /* verification test                                                            */  reference_sum = 0.5 * (double) nent * (double) iterations *                         (double) (iterations +1);  vector_sum = 0.0;  for (row=0; row<size2; row++) vector_sum += result[row];  if (ABS(vector_sum-reference_sum) > epsilon) {    printf("ERROR: Vector sum = %lf, Reference vector sum = %lf/n",           vector_sum, reference_sum);    exit(EXIT_FAILURE);  }  else {    printf("Solution validates/n");#ifdef VERBOSE    printf("Reference sum = %lf, vector sum = %lf/n",            reference_sum, vector_sum);#endif  }  avgtime = avgtime/(double)(MAX(iterations-1,1));  printf("Rate (MFlops/s): %lf,  Avg time (s): %lf,  Min time (s): %lf",         1.0E-06 * (2.0*nent)/mintime, avgtime, mintime);  printf(", Max time (s): %lf/n", maxtime);  exit(EXIT_SUCCESS);}
开发者ID:jbreitbart,项目名称:Kernels,代码行数:101,


示例29: main

//.........这里部分代码省略.........    }    else                         printf("Untiled/n");  }  }  bail_out(num_error);  /*  Fill the original matrix, set transpose to known garbage value. */  if (tiling) {#ifdef COLLAPSE    #pragma omp for private (i,it,jt) collapse(2)#else    #pragma omp for private (i,it,jt)#endif    for (j=0; j<order; j+=Tile_order)       for (i=0; i<order; i+=Tile_order)         for (jt=j; jt<MIN(order,j+Tile_order);jt++)          for (it=i; it<MIN(order,i+Tile_order); it++){            A(it,jt) = (double) (order*jt + it);            B(it,jt) = 0.0;          }  }  else {    #pragma omp for private (i)    for (j=0;j<order;j++)       for (i=0;i<order; i++) {        A(i,j) = (double) (order*j + i);        B(i,j) = 0.0;      }  }  for (iter = 0; iter<=iterations; iter++){    /* start timer after a warmup iteration                                        */    if (iter == 1) {       #pragma omp barrier      #pragma omp master      {        transpose_time = wtime();      }    }    /* Transpose the  matrix                                                       */    if (!tiling) {      #pragma omp for private (j)      for (i=0;i<order; i++)         for (j=0;j<order;j++) {           B(j,i) += A(i,j);          A(i,j) += 1.0;        }    }    else {#ifdef COLLAPSE      #pragma omp for private (j,it,jt) collapse(2)#else      #pragma omp for private (j,it,jt)#endif      for (i=0; i<order; i+=Tile_order)         for (j=0; j<order; j+=Tile_order)           for (it=i; it<MIN(order,i+Tile_order); it++)             for (jt=j; jt<MIN(order,j+Tile_order);jt++) {              B(jt,it) += A(it,jt);              A(it,jt) += 1.0;            }     }	  }  /* end of iter loop  */  #pragma omp barrier  #pragma omp master  {    transpose_time = wtime() - transpose_time;  }  } /* end of OpenMP parallel region */  abserr =  test_results (order, B, iterations);  /*********************************************************************  ** Analyze and output results.  *********************************************************************/  if (abserr < epsilon) {    printf("Solution validates/n");    avgtime = transpose_time/iterations;    printf("Rate (MB/s): %lf Avg time (s): %lf/n",           1.0E-06 * bytes/avgtime, avgtime);#ifdef VERBOSE    printf("Squared errors: %f /n", abserr);#endif    exit(EXIT_SUCCESS);  }  else {    printf("ERROR: Aggregate squared error %lf exceeds threshold %e/n",           abserr, epsilon);    exit(EXIT_FAILURE);  }}  /* end of main */
开发者ID:kempj,项目名称:Kernels,代码行数:101,


示例30: main

//.........这里部分代码省略.........  else {    printf("Number of threads          = %d/n", nthread_input);    printf("Vector length              = %d/n", vector_length);    printf("Number of iterations       = %d/n", iterations);    printf("Branching type             = %s/n", branch_type);  }  }  bail_out(num_error);  my_ID = omp_get_thread_num();  vector = malloc(vector_length*2*sizeof(int));  if (!vector) {    printf("ERROR: Thread %d failed to allocate space for vector/n", my_ID);    num_error = 1;  }  bail_out(num_error);  /* grab the second half of vector to store index array                         */  index   = vector + vector_length;  /* initialize the array with entries with varying signs; array "index" is only      used to obfuscate the compiler (i.e. it won't vectorize a loop containing     indirect referencing). It functions as the identity operator.               */  for (i=0; i<vector_length; i++) {     vector[i]  = 3 - (i&7);    index[i]   = i;  }  #pragma omp barrier     #pragma omp master  {     branch_time = wtime();  }  /* do actual branching */  switch (btype) {    case VECTOR_STOP:      /* condition vector[index[i]]>0 inhibits vectorization                     */      for (iter=0; iter<iterations; iter+=2) {        #pragma vector always        for (i=0; i<vector_length; i++) {           aux = -(3 - (i&7));          if (vector[index[i]]>0) vector[i] -= 2*vector[i];          else                    vector[i] -= 2*aux;        }        #pragma vector always        for (i=0; i<vector_length; i++) {           aux = (3 - (i&7));          if (vector[index[i]]>0) vector[i] -= 2*vector[i];          else                    vector[i] -= 2*aux;        }      }      break;    case VECTOR_GO:      /* condition aux>0 allows vectorization                                    */      for (iter=0; iter<iterations; iter+=2) {        #pragma vector always        for (i=0; i<vector_length; i++) {          aux = -(3 - (i&7));          if (aux>0) vector[i] -= 2*vector[i];          else       vector[i] -= 2*aux;
开发者ID:nchaimov,项目名称:ParResKernels,代码行数:67,



注:本文中的wtime函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ wunlock函数代码示例
C++ wtap_dump_file_write函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。