您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ FLA_Clock函数代码示例

51自学网 2021-06-01 20:41:48
  C++
这篇教程C++ FLA_Clock函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中FLA_Clock函数的典型用法代码示例。如果您正苦于以下问题:C++ FLA_Clock函数的具体用法?C++ FLA_Clock怎么用?C++ FLA_Clock使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了FLA_Clock函数的29个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: mexFunction

void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {  int attr[NINT];  FLA_Obj obj[NOBJ];  double *dtime;  FLA_Init();  /* Check if the number of arguments supplied is correct */  FLA_M2C_CheckNumArgs(NRHS, nrhs);  /* Convert Matlab arguments into the appropriate FLAME C arguments */  FLA_M2C_ConvertArgs(NRHS, prhs, NINT, attr, obj);  /* If an extra argument is supplied, collect timing informaion in it. */  if (nrhs == NRHS+1)    dtime = FLA_M2C_ConvertDoublePtr(prhs[NRHS]);  /* Now call the C FLAME function, timing it if the extra argument is given. */  if (nrhs == NRHS+1)    *dtime = FLA_Clock();  FLA_Axpyt_external(attr[0], obj[0], obj[1], obj[2]);  if (nrhs == NRHS+1)    *dtime = FLA_Clock() - *dtime;  FLA_Finalize();}
开发者ID:anaptyxis,项目名称:libflame,代码行数:30,


示例2: main

int main(int argc, char *argv[]){  int m, n, k, nfirst, nlast, ninc, i, irep,    nrepeats, nb_alg, check;;  double    dtime,    dtime_best,    gflops,    max_gflops,    diff,    d_n;  FLA_Obj    A, B, C, Cref, Cold;    /* Initialize FLAME */  FLA_Init( );  /* Every time trial is repeated "repeat" times */  printf( "%% number of repeats:" );  scanf( "%d", &nrepeats );  printf( "%% %d/n", nrepeats );  /* Enter the max GFLOPS attainable */  printf( "%% enter max GFLOPS:" );  scanf( "%lf", &max_gflops );  printf( "%% %lf/n", max_gflops );  /* Enter the algorithmic block size */  printf( "%% enter nb_alg:" );  scanf( "%d", &nb_alg );  printf( "%% %d/n", nb_alg );  /* Timing trials for matrix sizes n=nfirst to nlast in increments      of ninc will be performed */  printf( "%% enter nfirst, nlast, ninc:" );  scanf( "%d%d%d", &nfirst, &nlast, &ninc );  printf( "%% %d %d %d/n", nfirst, nlast, ninc );  i = 1;  for ( n=nfirst; n<= nlast; n+=ninc ){       /* Allocate space for the matrices */    FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &A );    FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &B );    FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &C );    FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Cref );    FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Cold );    /* Generate random matrices L and B */    FLA_Random_matrix( A );    FLA_Random_matrix( B );    FLA_Random_matrix( Cold );    gflops = 2.0 * n * n * n * 1.0e-09;    /* Time FLA_Symm */    for ( irep=0; irep<nrepeats; irep++ ){      FLA_Copy( Cold, Cref );      dtime = FLA_Clock();      FLA_Symm( FLA_LEFT, FLA_LOWER_TRIANGULAR, 		FLA_ONE, A, B, FLA_ONE, Cref );      dtime = FLA_Clock() - dtime;      if ( irep == 0 ) 	dtime_best = dtime;      else	dtime_best = ( dtime < dtime_best ? dtime : dtime_best );    }    printf( "data_FLAME( %d, 1:2 ) = [ %d %le ];/n", i, n,            gflops / dtime_best );    fflush( stdout );    /* Time the your implementations */#if TEST_UNB_VAR1==TRUE    /* Variant 1 unblocked */    for ( irep=0; irep<nrepeats; irep++ ){      FLA_Copy( Cold, C );          dtime = FLA_Clock();      Symm_unb_var1( A, B, C );      dtime = FLA_Clock() - dtime;      if ( irep == 0 ) 	dtime_best = dtime;      else//.........这里部分代码省略.........
开发者ID:ebeweber,项目名称:CS378-Invariant-Project,代码行数:101,


示例3: time_Apply_Q

void time_Apply_Q(                int param_combo, int type, int nrepeats, int m, int n,               FLA_Obj A, FLA_Obj B, FLA_Obj B_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    B_save, A_flat, B_flat;  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, B, &B_save );  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, B, &B_flat );  FLASH_Copy( B, B_save );  for ( irep = 0 ; irep < nrepeats; irep++ )  {    FLASH_Copy( B_save, B );    FLASH_Obj_flatten( A, A_flat );    FLASH_Obj_flatten( B, B_flat );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Apply_Q( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A_flat, t, B_flat );        break;      case FLA_ALG_FRONT://printf("/n");        FLASH_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A, T, W, B );        break;      default:        printf("trouble/n");      }      break;    }    }	    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE )  {    FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,                       FLA_NONUNIT_DIAG, FLA_ONE, A_flat, B_flat );    FLASH_Obj_hierarchify( B_flat, B_ref );    *diff = 0.0;  }  else  {    FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,                FLA_ONE, A, B );    *diff = FLASH_Max_elemwise_diff( B, B_ref );  }  *gflops = 2.0 *             FLASH_Obj_scalar_length( A ) *             FLASH_Obj_scalar_width( A ) *             FLASH_Obj_scalar_width( B ) /             dtime_old /             1.0e9;  if ( FLA_Obj_is_complex( A ) )    *gflops *= 4.0;  *dtime = dtime_old;  FLASH_Copy( B_save, B );  FLASH_Obj_free( &B_save );  FLASH_Obj_free( &A_flat );  FLASH_Obj_free( &B_flat );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:89,


示例4: time_Her2k_ln

void time_Her2k_ln(                int variant, int type, int nrepeats, int n, int nb_alg,               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;   FLA_Obj    C_old;  fla_blocksize_t*    bp;  fla_gemm_t*    cntl_gemm_blas;  fla_her2k_t*    cntl_her2k_blas;  fla_her2k_t*    cntl_her2k_var;  bp              = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );  cntl_gemm_blas  = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );  cntl_her2k_blas = FLA_Cntl_her2k_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL, NULL );  cntl_her2k_var  = FLA_Cntl_her2k_obj_create( FLA_FLAT, variant, bp, cntl_her2k_blas, cntl_gemm_blas, cntl_gemm_blas );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ )  {    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( variant ){    case 0:      // Time reference implementation      REF_Her2k( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ONE, C );      break;    case 1:{      // Time variant 1      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Her2k_ln_unb_var1( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Her2k_ln_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      // Time variant 2      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Her2k_ln_unb_var2( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Her2k_ln_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );        break;      default:        printf("trouble/n");      }      break;    }    case 3:{      // Time variant 3      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Her2k_ln_unb_var3( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Her2k_ln_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );        break;      default:        printf("trouble/n");      }      break;    }    case 4:{      // Time variant 4      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Her2k_ln_unb_var4( FLA_ONE, A, B, FLA_ONE, C );        break;//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例5: libfla_test_symm_experiment

//.........这里部分代码省略.........	// Save the original object contents in a temporary object.	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, C, &C_save );	// Use hierarchical matrices if we're testing the FLASH front-end.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_Obj_create_hier_copy_of_flat( A, 1, &b_flash, &A_test );		FLASH_Obj_create_hier_copy_of_flat( B, 1, &b_flash, &B_test );		FLASH_Obj_create_hier_copy_of_flat( C, 1, &b_flash, &C_test );	}	else	{		A_test = A;		B_test = B;		C_test = C;	}	// Create a control tree for the individual variants.	if ( impl == FLA_TEST_FLAT_UNB_VAR ||	     impl == FLA_TEST_FLAT_OPT_VAR ||	     impl == FLA_TEST_FLAT_BLK_VAR ||	     impl == FLA_TEST_FLAT_UNB_EXT ||	     impl == FLA_TEST_FLAT_BLK_EXT )		libfla_test_symm_cntl_create( var, b_alg_flat );	// Repeat the experiment n_repeats times and record results.	for ( i = 0; i < n_repeats; ++i )	{		if ( impl == FLA_TEST_HIER_FRONT_END )			FLASH_Obj_hierarchify( C_save, C_test );		else			FLA_Copy_external( C_save, C_test );				time = FLA_Clock();		libfla_test_symm_impl( impl, side, uplo, alpha, A_test, B_test, beta, C_test );				time = FLA_Clock() - time;		time_min = min( time_min, time );	}	// Copy the solution to flat matrix X.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_Obj_flatten( C_test, C );	}	else    {		// No action needed since C_test and C refer to the same object.	}	// Free the hierarchical matrices if we're testing the FLASH front-end.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_Obj_free( &A_test );		FLASH_Obj_free( &B_test );		FLASH_Obj_free( &C_test );	}	// Free the control trees if we're testing the variants.	if ( impl == FLA_TEST_FLAT_UNB_VAR ||	     impl == FLA_TEST_FLAT_OPT_VAR ||	     impl == FLA_TEST_FLAT_BLK_VAR ||	     impl == FLA_TEST_FLAT_UNB_EXT ||	     impl == FLA_TEST_FLAT_BLK_EXT )		libfla_test_symm_cntl_free();
开发者ID:flame,项目名称:libflame,代码行数:67,


示例6: time_Sylv_nn

void time_Sylv_nn(                   int variant, int type, int n_repeats, int m, int n, int nb_alg,                   FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, FLA_Obj scale,                   double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  fla_blocksize_t*    bp;  fla_sylv_t*    cntl_sylv_var;  fla_sylv_t*    cntl_sylv_unb;  fla_gemm_t*    cntl_gemm_blas;/*  if( type == FLA_ALG_UNBLOCKED && n > 400 )  {    *gflops = 0.0;    *diff   = 0.0;    return;  }*/  bp               = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );  cntl_sylv_unb    = FLA_Cntl_sylv_obj_create( FLA_FLAT, FLA_UNB_OPT_VARIANT1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );  cntl_gemm_blas   = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL );  cntl_sylv_var    = FLA_Cntl_sylv_obj_create( FLA_FLAT, variant, bp, cntl_sylv_unb, cntl_sylv_unb, cntl_sylv_unb, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < n_repeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( variant ){    case 0:      /* Time reference implementation */      REF_Sylv_nn( isgn, A, B, C, scale );      break;    case 1:{      /* Time variant 1 */      switch( type ){      case FLA_ALG_UNB_OPT:        FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );        break;      case FLA_ALG_BLOCKED:        FLA_Sylv_nn_blk_var1( isgn, A, B, C, scale, cntl_sylv_var );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      /* Time variant 2 */      switch( type ){      case FLA_ALG_UNB_OPT:        FLA_Sylv_nn_opt_var2( isgn, A, B, C, scale );        break;      case FLA_ALG_BLOCKED:        FLA_Sylv_nn_blk_var2( isgn, A, B, C, scale, cntl_sylv_var );        break;      default:        printf("trouble/n");      }      break;    }    case 3:{      /* Time variant 3 */      switch( type ){      case FLA_ALG_UNB_OPT:        FLA_Sylv_nn_opt_var3( isgn, A, B, C, scale );        break;      case FLA_ALG_BLOCKED:        FLA_Sylv_nn_blk_var3( isgn, A, B, C, scale, cntl_sylv_var );        break;      default:        printf("trouble/n");//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例7: time_QR_UT

void time_QR_UT(                 int variant, int type, int nrepeats, int m, int n,                 FLA_Obj A, FLA_Obj A_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W, FLA_Obj b, FLA_Obj b_orig,                 double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    A_save, b_save, norm;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, b, &b_save );  if ( FLA_Obj_is_single_precision( A ) )    FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );  else    FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );  FLA_Copy_external( A, A_save );  FLA_Copy_external( b, b_save );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( A_save, A );    *dtime = FLA_Clock();    switch( variant ){    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_QR_UT( A, t );        break;      case FLA_ALG_FRONT:        FLA_QR_UT( A, T );        break;      default:        printf("trouble/n");      }      break;    }    }    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE )  {    FLA_Obj AT, AB;    FLA_Obj bT, bB;    FLA_Obj y;    FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y );    FLA_Copy_external( b, b_orig );    if ( FLA_Obj_is_real( A ) )      FLA_Apply_Q_blk_external( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A, t, b );    else      FLA_Apply_Q_blk_external( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, A, t, b );    FLA_Part_2x1( A,    &AT,                        &AB,    FLA_Obj_width( A ), FLA_TOP );    FLA_Part_2x1( b,    &bT,                        &bB,    FLA_Obj_width( A ), FLA_TOP );    FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,                       FLA_NONUNIT_DIAG, FLA_ONE, AT, bT );    FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, bT, FLA_ONE, b_orig );    FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y );    FLA_Nrm2_external( y, norm );    FLA_Obj_extract_real_scalar( norm, diff );    FLA_Obj_free( &y );  }  else  {    FLA_Obj x, y;    FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y );    FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &x );    FLA_Copy_external( b, b_orig );    FLA_QR_UT_solve( A, T, b, x );    FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, x, FLA_ONE, b_orig );    FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y );    FLA_Nrm2_external( y, norm );    FLA_Obj_extract_real_scalar( norm, diff );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例8: time_Gemm_hh

void time_Gemm_hh(                int variant, int type, int nrepeats, int n, int nb_alg,               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  fla_blocksize_t*    bp;  fla_gemm_t*    cntl_gemm_blas;  fla_gemm_t*    cntl_gemm_var;  bp             = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );  cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );  cntl_gemm_var  = FLA_Cntl_gemm_obj_create( FLA_FLAT, variant, bp, cntl_gemm_blas );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( variant ){    // Time reference implementation    case 0:      REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,                 FLA_ONE, A, B, FLA_ONE, C );      break;    // Time variant 1    case 1:{      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Gemm_hh_unb_var1( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Gemm_hh_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );        break;      default:        printf("trouble/n");      }      break;    }    // Time variant 2    case 2:{      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Gemm_hh_unb_var2( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Gemm_hh_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );        break;      default:        printf("trouble/n");      }      break;    }    // Time variant 3    case 3:{      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Gemm_hh_unb_var3( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Gemm_hh_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );        break;      default:        printf("trouble/n");      }      break;    }    // Time variant 4    case 4:{      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Gemm_hh_unb_var4( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Gemm_hh_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );        break;      default://.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例9: time_Gemm

void time_Gemm(                int param_combo, int type, int nrepeats, int m, int k, int n,               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  if ( param_combo != 4 )  {    *gflops = 0.0;    *diff   = 0.0;    return;  }  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 1    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 2    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 3    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 4    case 4:{      switch( type ){//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例10: time_Tevd_v

void time_Tevd_v(               int variant, int type, int n_repeats, int m, int k_accum, int b_alg, int n_iter_max,               FLA_Obj A_orig, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj A, FLA_Obj l,               double *dtime, double *diff1, double* diff2, double *gflops ){  int irep;  double    k, dtime_old = 1.0e9;  FLA_Obj    A_save, G_save, d_save, e_save;  if (       //( variant == 0 ) ||       //( variant == 1 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 2 && type == FLA_ALG_UNB_OPT ) ||       FALSE     )  {    *dtime  = 0.0;    *gflops = 0.0;    *diff1  = 0.0;    *diff2  = 0.0;    return;  }  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, G, &G_save );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, d, &d_save );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, e, &e_save );  FLA_Copy_external( A, A_save );  FLA_Copy_external( G, G_save );  FLA_Copy_external( d, d_save );  FLA_Copy_external( e, e_save );  for ( irep = 0 ; irep < n_repeats; irep++ ){    FLA_Copy_external( A_save, A );    FLA_Copy_external( G_save, G );    FLA_Copy_external( d_save, d );    FLA_Copy_external( e_save, e );    *dtime = FLA_Clock();    switch( variant ){    case 0:      REF_Tevd_v( d, e, A );      break;    // Time variant 1    case 1:    {      switch( type ){      case FLA_ALG_UNB_OPT:        FLA_Tevd_v_opt_var1( n_iter_max, d, e, G, A, b_alg );        break;      }      break;    }    // Time variant 2    case 2:    {      switch( type ){      case FLA_ALG_UNB_OPT:        FLA_Tevd_v_opt_var2( n_iter_max, d, e, G, R, W, A, b_alg );        break;      }      break;    }    }    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  {    FLA_Obj V, A_rev_evd, norm, eye;	FLA_Copy( d, l );//FLA_Obj_show( "A_save", A_save, "%9.2e + %9.2e ", "" );//FLA_Obj_show( "A_evd", A, "%9.2e + %9.2e ", "" );	FLA_Sort_evd( FLA_FORWARD, l, A );    FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &V );     FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_rev_evd );     FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &eye );     FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );    FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, l, A );    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,              FLA_ONE, A, V, FLA_ZERO, A_rev_evd );    FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A_rev_evd );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例11: time_Sylv

void time_Sylv(                int param_combo, int type, int nrepeats, int m, int n,                FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, FLA_Obj scale,                double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( param_combo ){    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Sylv( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );        break;      case FLA_ALG_FRONT:        FLA_Sylv( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );        break;      default:        printf("trouble/n");      }      break;    }    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Sylv( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );        break;      case FLA_ALG_FRONT:        FLA_Sylv( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Sylv( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );        break;      case FLA_ALG_FRONT:        FLA_Sylv( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );        break;      default:        printf("trouble/n");      }      break;    }    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Sylv( FLA_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );        break;      case FLA_ALG_FRONT:        FLA_Sylv( FLA_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );        break;      default:        printf("trouble/n");      }      break;    }    }    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE ){    FLA_Copy_external( C, C_ref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( C, C_ref );  }//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例12: time_Gemm_nn

//.........这里部分代码省略.........        break;      default:        printf("trouble/n");      }      break;    }    case 15:{      // Time variant 1->5      switch( type ){      case FLA_ALG_OPENMP_CVAR:        FLA_Gemm_nn_omp_var15( FLA_ONE, A, B, C, nb_alg );        break;      default:        printf("trouble/n");      }      break;    }    case 31:{      // Time variant 3->1       switch( type ){      case FLA_ALG_OPENMP_CVAR:        FLA_Gemm_nn_omp_var31( FLA_ONE, A, B, C, nb_alg );        break;      default:        printf("trouble/n");      }      break;    }    case 35:{      // Time variant 3->5       switch( type ){      case FLA_ALG_OPENMP_CVAR:        FLA_Gemm_nn_omp_var35( FLA_ONE, A, B, C, nb_alg );        break;      default:        printf("trouble/n");      }      break;    }    case 51:{      // Time variant 5->1       switch( type ){      case FLA_ALG_OPENMP_CVAR:        FLA_Gemm_nn_omp_var51( FLA_ONE, A, B, C, nb_alg );        break;      default:        printf("trouble/n");      }      break;    }    case 53:{      // Time variant 5->3       switch( type ){      case FLA_ALG_OPENMP_CVAR:        FLA_Gemm_nn_omp_var53( FLA_ONE, A, B, C, nb_alg );        break;      default:        printf("trouble/n");      }      break;    }    }    if ( irep == 0 )      dtime_old = FLA_Clock() - *dtime;    else{      *dtime = FLA_Clock() - *dtime;      dtime_old = min( *dtime, dtime_old );    }  }  if ( variant == 0 ){    FLA_Copy_external( C, Cref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( C, Cref );    //FLA_Obj_show( "C:", C, "%f", "/n");  }  *gflops = 2.0 *             FLA_Obj_length( C ) *             FLA_Obj_width( C ) *             FLA_Obj_width( A ) /             dtime_old /             1e9;  *dtime = dtime_old;  FLA_Copy_external( Cold, C );  FLA_Obj_free( &Cold );}
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例13: time_Syrk_ln

//.........这里部分代码省略.........      switch( type ){      case FLA_ALG_OPENMP_1TASK:        FLA_Syrk_ln_omp1t_var2( A, C );        break;      case FLA_ALG_OPENMP_2TASKS:        FLA_Syrk_ln_omp2t_var2( A, C );        break;      case FLA_ALG_OPENMP_2LOOPS:        FLA_Syrk_ln_omp2l_var2( A, C );        break;      case FLA_ALG_OPENMP_2LOOPSPLUS:        FLA_Syrk_ln_omp2x_var2( A, C );        break;      default:        printf("trouble/n");      }      break;    }     case 3:{      // Time variant 3       switch( type ){      case FLA_ALG_OPENMP_1TASK:        FLA_Syrk_ln_omp1t_var3( A, C );        break;      case FLA_ALG_OPENMP_2TASKS:        FLA_Syrk_ln_omp2t_var3( A, C );        break;      case FLA_ALG_OPENMP_2LOOPS:        FLA_Syrk_ln_omp2l_var3( A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 4:{      // Time variant 4      switch( type ){      case FLA_ALG_OPENMP_1TASK:        FLA_Syrk_ln_omp1t_var4( A, C );        break;      case FLA_ALG_OPENMP_2TASKS:        FLA_Syrk_ln_omp2t_var4( A, C );        break;      case FLA_ALG_OPENMP_2LOOPS:        FLA_Syrk_ln_omp2l_var4( A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 5:{      // Time variant 5      switch( type ){      case FLA_ALG_OPENMP_1TASK:        FLA_Syrk_ln_omp1t_var5( A, C );        break;      default:        printf("trouble/n");      }      break;    }    }    if ( irep == 0 )      dtime_old = FLA_Clock() - *dtime;    else{      *dtime = FLA_Clock() - *dtime;      dtime_old = min( *dtime, dtime_old );    }  }  if ( variant == 0 ){    FLA_Copy_external( C, C_ref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( C, C_ref );    //FLA_Obj_show( "C:", C, "%f", "/n");  }  *gflops = 1.0 *             FLA_Obj_length( A ) *             FLA_Obj_length( A ) *             FLA_Obj_width( A ) /             dtime_old /             1e9;  *dtime = dtime_old;  FLA_Copy_external( C_old, C );  FLA_Obj_free( &C_old );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例14: time_Syrk

void time_Syrk(                int param_combo, int type, int nrepeats, int m, int k,               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old, A_flat, C_flat;  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat );  FLASH_Copy( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ )  {    FLASH_Copy( C_old, C );    FLASH_Obj_flatten( A, A_flat );    FLASH_Obj_flatten( C, C_flat );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Syrk( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );        break;      case FLA_ALG_FRONT:        FLASH_Syrk( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 1    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Syrk( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );        break;      case FLA_ALG_FRONT:        FLASH_Syrk( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 2    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Syrk( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );        break;      case FLA_ALG_FRONT:        FLASH_Syrk( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 3    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Syrk( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A_flat, FLA_ZERO, C_flat );        break;      case FLA_ALG_FRONT:        FLASH_Syrk( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_ONE, A, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    }	    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例15: time_Apply_G_rf

void time_Apply_G_rf(               int variant, int type, int n_repeats, int m, int k, int n, int b_alg,               FLA_Obj A, FLA_Obj A_ref, FLA_Obj G, FLA_Obj P,               double *dtime, double *diff, double *gflops ){  int irep;  double    dtime_old = 1.0e9;  FLA_Obj    A_save, G_save, norm;  if ( FLA_Obj_is_real( A ) )  {    if (       //( variant == 1 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 1 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 1 && type == FLA_ALG_BLOCKED ) ||       //( variant == 2 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 2 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 2 && type == FLA_ALG_BLOCKED ) ||       //( variant == 3 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 3 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 3 && type == FLA_ALG_BLOCKED ) ||       //( variant == 6 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 6 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 6 && type == FLA_ALG_BLOCKED ) ||       //( variant == 9 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 9 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 9 && type == FLA_ALG_BLOCKED ) ||       ( variant == 4 ) ||       ( variant == 5 ) ||       ( variant == 7 ) ||       ( variant == 8 ) ||       FALSE    )     {      *gflops = 0.0;      *diff   = 0.0;      return;    }  }  else if ( FLA_Obj_is_complex( A ) )  {    if (       //( variant == 1 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 1 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 1 && type == FLA_ALG_BLOCKED ) ||       //( variant == 2 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 2 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 2 && type == FLA_ALG_BLOCKED ) ||       //( variant == 3 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 3 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 3 && type == FLA_ALG_BLOCKED ) ||       //( variant == 6 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 6 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 6 && type == FLA_ALG_BLOCKED ) ||       //( variant == 9 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 9 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 9 && type == FLA_ALG_BLOCKED ) ||       ( variant == 4 ) ||       ( variant == 5 ) ||       ( variant == 7 ) ||       ( variant == 8 ) ||       FALSE    )    {      *gflops = 0.0;      *diff   = 0.0;      return;    }  }  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, G, &G_save );  FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );  //dim_t b_flash_m = b_alg;  //dim_t b_flash_n = n;  //FLASH_Obj_create_hier_copy_of_flat_ext( A, 1, &b_flash_m, &b_flash_n, &AH ); //printf ( "flash dims: %d x %d/n", FLA_Obj_length( AH ), FLA_Obj_width( AH ) );  FLA_Copy_external( A, A_save );  FLA_Copy_external( G, G_save );  for ( irep = 0 ; irep < n_repeats; irep++ ){    FLA_Copy_external( A_save, A );    FLA_Copy_external( G_save, G );    //FLASH_Obj_hierarchify( A_save, AH );    *dtime = FLA_Clock();    switch( variant ){//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例16: libfla_test_qrut_experiment

void libfla_test_qrut_experiment( test_params_t params,                                  unsigned int  var,                                  char*         sc_str,                                  FLA_Datatype  datatype,                                  unsigned int  p_cur,                                  unsigned int  pci,                                  unsigned int  n_repeats,                                  signed int    impl,                                  double*       perf,                                  double*       residual ){	dim_t        b_flash    = params.b_flash;	dim_t        b_alg_flat = params.b_alg_flat;	double       time_min   = 1e9;	double       time;	unsigned int i;	unsigned int m, n;	unsigned int min_m_n;	signed int   m_input    = -2;	signed int   n_input    = -1;	FLA_Obj      A, T, x, b, y, norm;	FLA_Obj      A_save;	FLA_Obj      A_test, T_test, x_test, b_test;	// Determine the dimensions.	if ( m_input < 0 ) m = p_cur * abs(m_input);	else               m = p_cur;	if ( n_input < 0 ) n = p_cur * abs(n_input);	else               n = p_cur;	// Compute the minimum dimension.	min_m_n = min( m, n );	// Create the matrices for the current operation.	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, n, &A );	if ( impl == FLA_TEST_FLAT_FRONT_END ||	     ( impl == FLA_TEST_FLAT_BLK_VAR && var == 1 ) )		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, min_m_n, &T );	else if ( var == 2 )		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], min_m_n, min_m_n, &T );	else		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], 1, min_m_n, &T );	// Initialize the test matrices.	FLA_Random_matrix( A );	// Save the original object contents in a temporary object.	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );	// Create vectors to form a linear system.	FLA_Obj_create( datatype, n, 1, 0, 0, &x );	FLA_Obj_create( datatype, m, 1, 0, 0, &b );	FLA_Obj_create( datatype, n, 1, 0, 0, &y );	// Create a real scalar object to hold the norm of A.	FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );	// Create a random right-hand side vector.	FLA_Random_matrix( b );	// Use hierarchical matrices if we're testing the FLASH front-end.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_QR_UT_create_hier_matrices( A, 1, &b_flash, &A_test, &T_test );		FLASH_Obj_create_hier_copy_of_flat( b, 1, &b_flash, &b_test );		FLASH_Obj_create_hier_copy_of_flat( x, 1, &b_flash, &x_test );	}	else	{		A_test = A;		T_test = T;	}	// Create a control tree for the individual variants.	if ( impl == FLA_TEST_FLAT_UNB_VAR ||	     impl == FLA_TEST_FLAT_OPT_VAR ||	     impl == FLA_TEST_FLAT_BLK_VAR )		libfla_test_qrut_cntl_create( var, b_alg_flat );	// Repeat the experiment n_repeats times and record results.	for ( i = 0; i < n_repeats; ++i )	{		if ( impl == FLA_TEST_HIER_FRONT_END )			FLASH_Obj_hierarchify( A_save, A_test );		else			FLA_Copy_external( A_save, A_test );				time = FLA_Clock();		libfla_test_qrut_impl( impl, A_test, T_test );				time = FLA_Clock() - time;		time_min = min( time_min, time );	}	// Perform a linear solve with the result.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_QR_UT_solve( A_test, T_test, b_test, x_test );//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,


示例17: time_Her2k

void time_Her2k(                int param_combo, int type, int nrepeats, int m, int k,               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Her2k( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Her2k( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 1    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Her2k( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Her2k( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 2    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Her2k( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Her2k( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 3    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Her2k( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Her2k( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    }	    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE )  {    FLA_Copy_external( C, C_ref );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例18: REF_Svdd_uv_components

FLA_Error REF_Svdd_uv_components( FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V,                                  double* dtime_bred, double* dtime_bsvd, double* dtime_appq,                                  double* dtime_qrfa, double* dtime_gemm )/*{  *dtime_bred = 1;  *dtime_bsvd = 1;  *dtime_appq = 1;  *dtime_qrfa = 1;  *dtime_gemm = 1;  return FLA_Svdd_external( FLA_SVD_VECTORS_ALL, A, s, U, V );}*/{  FLA_Datatype dt_A;  FLA_Datatype dt_A_real;  dim_t        m_A, n_A;  dim_t        min_m_n;  FLA_Obj      tq, tu, tv, d, e, Ur, Vr, W;  FLA_Obj      eT, epsilonB;  FLA_Uplo     uplo = FLA_UPPER_TRIANGULAR;  double       crossover_ratio = 16.0 / 10.0;  double       dtime_temp;  dt_A      = FLA_Obj_datatype( A );  dt_A_real = FLA_Obj_datatype_proj_to_real( A );  m_A       = FLA_Obj_length( A );  n_A       = FLA_Obj_width( A );  min_m_n   = FLA_Obj_min_dim( A );  FLA_Obj_create( dt_A,      min_m_n, 1,   0, 0, &tq );  FLA_Obj_create( dt_A,      min_m_n, 1,   0, 0, &tu );  FLA_Obj_create( dt_A,      min_m_n, 1,   0, 0, &tv );  FLA_Obj_create( dt_A_real, min_m_n, 1,   0, 0, &d );  FLA_Obj_create( dt_A_real, min_m_n, 1,   0, 0, &e );  FLA_Obj_create( dt_A_real, n_A,     n_A, 0, 0, &Ur );  FLA_Obj_create( dt_A_real, n_A,     n_A, 0, 0, &Vr );  FLA_Part_2x1( e,   &eT,                     &epsilonB,    1, FLA_BOTTOM );  if ( m_A >= n_A )  {    if ( m_A < crossover_ratio * n_A )    {      dtime_temp = FLA_Clock();      {        // Reduce to bidiagonal form.        FLA_Bidiag_blk_external( A, tu, tv );        FLA_Bidiag_UT_extract_diagonals( A, d, eT );      }      *dtime_bred = FLA_Clock() - dtime_temp;      dtime_temp = FLA_Clock();      {        // Divide-and-conquor algorithm.        FLA_Bsvdd_external( uplo, d, e, Ur, Vr );      }      *dtime_bsvd = FLA_Clock() - dtime_temp;      dtime_temp = FLA_Clock();      {        // Form U.        FLA_Copy_external( Ur, U );        FLA_Bidiag_apply_U_external( FLA_LEFT, FLA_NO_TRANSPOSE, A, tu, U );        // Form V.        FLA_Copy_external( Vr, V );        FLA_Bidiag_apply_V_external( FLA_RIGHT, FLA_CONJ_TRANSPOSE, A, tv, V );      }      *dtime_appq = FLA_Clock() - dtime_temp;      *dtime_qrfa = 0.0;      *dtime_gemm = 0.0;    }    else    {      FLA_Obj AT,              AB;      FLA_Obj UL, UR;      FLA_Part_2x1( A,   &AT,                         &AB,        n_A, FLA_TOP );      FLA_Part_1x2( U,   &UL, &UR,   n_A, FLA_LEFT );      // Create a temporary n-by-n matrix R.      FLA_Obj_create( dt_A, n_A, n_A, 0, 0, &W );      dtime_temp = FLA_Clock();      {        // Perform a QR factorization.        FLA_QR_blk_external( A, tq );        FLA_Copyr_external( FLA_LOWER_TRIANGULAR, A, UL );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例19: time_Trmm

void time_Trmm(                int param_combo, int type, int nrepeats, int m, int n,               FLA_Obj A, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old, A_flat, C_flat;  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat );  FLASH_Copy( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ )  {    FLASH_Copy( C_old, C );    FLASH_Obj_flatten( A, A_flat );    FLASH_Obj_flatten( C, C_flat );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );        break;      case FLA_ALG_FRONT:        FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 1    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );        break;      case FLA_ALG_FRONT:        FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 2    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );        break;      case FLA_ALG_FRONT:        FLASH_Trmm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 3    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );        break;      case FLA_ALG_FRONT:        FLASH_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 4    case 4:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Trmm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_TWO, A_flat, C_flat );        break;//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例20: time_Copyt

void time_Copyt(                int param_combo, int type, int nrepeats, int m, int n,               FLA_Obj A, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_NO_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_NO_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_CONJ_NO_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_CONJ_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_CONJ_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    }	    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE )  {    FLA_Copy_external( C, C_ref );    *diff = 0.0;  }  else//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例21: FLA_Svd_uv_var2_components

FLA_Error FLA_Svd_uv_var2_components( dim_t n_iter_max, dim_t k_accum, dim_t b_alg,                                      FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V,                                      double* dtime_bred, double* dtime_bsvd, double* dtime_appq,                                      double* dtime_qrfa, double* dtime_gemm ){	FLA_Error    r_val = FLA_SUCCESS;	FLA_Datatype dt;	FLA_Datatype dt_real;	FLA_Datatype dt_comp;	FLA_Obj      T, S, rL, rR, d, e, G, H, RG, RH, W;	dim_t        m_A, n_A;	dim_t        min_m_n;	dim_t        n_GH;	double       crossover_ratio = 17.0 / 9.0;	double       dtime_temp;	n_GH    = k_accum;	m_A     = FLA_Obj_length( A );	n_A     = FLA_Obj_width( A );	min_m_n = FLA_Obj_min_dim( A );	dt      = FLA_Obj_datatype( A );	dt_real = FLA_Obj_datatype_proj_to_real( A );	dt_comp = FLA_Obj_datatype_proj_to_complex( A );	// If the matrix is a scalar, then the SVD is easy.	if ( min_m_n == 1 )	{		FLA_Copy( A, s );		FLA_Set_to_identity( U );		FLA_Set_to_identity( V );		return FLA_SUCCESS;	}	// Create matrices to hold block Householder transformations.	FLA_Bidiag_UT_create_T( A, &T, &S );	// Create vectors to hold the realifying scalars.	FLA_Obj_create( dt,      min_m_n,      1, 0, 0, &rL );	FLA_Obj_create( dt,      min_m_n,      1, 0, 0, &rR );	// Create vectors to hold the diagonal and sub-diagonal.	FLA_Obj_create( dt_real, min_m_n,      1, 0, 0, &d );	FLA_Obj_create( dt_real, min_m_n-1,    1, 0, 0, &e );	// Create matrices to hold the left and right Givens scalars.	FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &G );	FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &H );	// Create matrices to hold the left and right Givens matrices.	FLA_Obj_create( dt_real, min_m_n, min_m_n, 0, 0, &RG );	FLA_Obj_create( dt_real, min_m_n, min_m_n, 0, 0, &RH );	FLA_Obj_create( dt,      m_A,     n_A,     0, 0, &W );	if ( m_A >= n_A )	{		if ( m_A < crossover_ratio * n_A )		{			dtime_temp = FLA_Clock();			{			// Reduce the matrix to bidiagonal form.			// Apply scalars to rotate elements on the sub-diagonal to the real domain.			// Extract the diagonal and sub-diagonal from A.			FLA_Bidiag_UT( A, T, S );			FLA_Bidiag_UT_realify( A, rL, rR );			FLA_Bidiag_UT_extract_diagonals( A, d, e );			}			*dtime_bred = FLA_Clock() - dtime_temp;			dtime_temp = FLA_Clock();			{			// Form U and V.			FLA_Bidiag_UT_form_U( A, T, U );			FLA_Bidiag_UT_form_V( A, S, V );			}			*dtime_appq = FLA_Clock() - dtime_temp;			// Apply the realifying scalars in rL and rR to U and V, respectively.			{				FLA_Obj UL, UR;				FLA_Obj VL, VR;				FLA_Part_1x2( U,   &UL, &UR,   min_m_n, FLA_LEFT );				FLA_Part_1x2( V,   &VL, &VR,   min_m_n, FLA_LEFT );				FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE,    rL, UL );				FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, VL );			}			dtime_temp = FLA_Clock();			{			// Perform a singular value decomposition on the bidiagonal matrix.			r_val = FLA_Bsvd_v_opt_var2( n_iter_max, d, e, G, H, RG, RH, W, U, V, b_alg );			}			*dtime_bsvd = FLA_Clock() - dtime_temp;		}		else // if ( crossover_ratio * n_A <= m_A )		{			FLA_Obj TQ, R;//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例22: FLA_Hevd_lv_var4_components

FLA_Error FLA_Hevd_lv_var4_components( dim_t n_iter_max, FLA_Obj A, FLA_Obj l, dim_t k_accum, dim_t b_alg,                                       double* dtime_tred, double* dtime_tevd, double* dtime_appq ){	FLA_Error    r_val = FLA_SUCCESS;	FLA_Uplo     uplo = FLA_LOWER_TRIANGULAR;	FLA_Datatype dt;	FLA_Datatype dt_real;	FLA_Datatype dt_comp;	FLA_Obj      T, r, d, e, G, R, W;	FLA_Obj      d0, e0, ls, pu;	dim_t        mn_A;	dim_t        n_G = k_accum;	double       dtime_temp;	mn_A    = FLA_Obj_length( A );	dt      = FLA_Obj_datatype( A );	dt_real = FLA_Obj_datatype_proj_to_real( A );	dt_comp = FLA_Obj_datatype_proj_to_complex( A );	*dtime_tred = 1;	*dtime_tevd = 1;	*dtime_appq = 1;	// If the matrix is a scalar, then the EVD is easy.	if ( mn_A == 1 )	{		FLA_Copy( A, l );		FLA_Set( FLA_ONE, A );		return FLA_SUCCESS;	}	// Create a matrix to hold block Householder transformations.	FLA_Tridiag_UT_create_T( A, &T );	// Create a vector to hold the realifying scalars.	FLA_Obj_create( dt,      mn_A,     1, 0, 0, &r );	// Create vectors to hold the diagonal and sub-diagonal.	FLA_Obj_create( dt_real, mn_A,     1, 0, 0, &d );	FLA_Obj_create( dt_real, mn_A-1,   1, 0, 0, &e );	FLA_Obj_create( dt_real, mn_A,     1, 0, 0, &d0 );	FLA_Obj_create( dt_real, mn_A-1,   1, 0, 0, &e0 );	FLA_Obj_create( dt_real, mn_A,     1, 0, 0, &pu );	FLA_Obj_create( FLA_INT, mn_A,     1, 0, 0, &ls );	FLA_Obj_create( dt_comp, mn_A-1, n_G, 0, 0, &G );	FLA_Obj_create( dt_real, mn_A,  mn_A, 0, 0, &R );	FLA_Obj_create( dt,      mn_A,  mn_A, 0, 0, &W );  dtime_temp = FLA_Clock();  {	// Reduce the matrix to tridiagonal form.	FLA_Tridiag_UT( uplo, A, T );  }  *dtime_tred = FLA_Clock() - dtime_temp;	// Apply scalars to rotate elements on the sub-diagonal to the real domain.	FLA_Tridiag_UT_realify( uplo, A, r );	// Extract the diagonal and sub-diagonal from A.	FLA_Tridiag_UT_extract_diagonals( uplo, A, d, e );  dtime_temp = FLA_Clock();  {	// Form Q, overwriting A.	FLA_Tridiag_UT_form_Q( uplo, A, T );  }  *dtime_appq = FLA_Clock() - dtime_temp;	// Apply the scalars in r to Q.	FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, r, A );	// Find the eigenvalues only.	FLA_Copy( d, d0 ); FLA_Copy( e, e0 );	//r_val = FLA_Tevd_n_opt_var1( n_iter_max, d0, e0, G, A );{	int info;	double* buff_d = FLA_DOUBLE_PTR( d0 );	double* buff_e = FLA_DOUBLE_PTR( e0 );	dsterf_( &mn_A, buff_d, buff_e, &info );}	FLA_Sort( FLA_FORWARD, d0 );	FLA_Set( FLA_ZERO, ls );	FLA_Set( FLA_ZERO, pu );  dtime_temp = FLA_Clock();  {	// Perform an eigenvalue decomposition on the tridiagonal matrix.	r_val = FLA_Tevd_v_opt_var4( n_iter_max, d, e, d0, ls, pu, G, R, W, A, b_alg );  }  *dtime_tevd = FLA_Clock() - dtime_temp;	// Copy the converged eigenvalues to the output vector.	FLA_Copy( d, l );	// Sort the eigenvalues and eigenvectors in ascending order.	FLA_Sort_evd( FLA_FORWARD, l, A );	FLA_Obj_free( &T );	FLA_Obj_free( &r );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例23: time_Gemm_pp_nn

void time_Gemm_pp_nn( 		     int variant, int type, int nrepeats, int n, int nb_alg,		     FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj Cref,		     double *dtime, double *diff, double *mflops ){  int    irep,    info, lwork;  double    dtime_old,    d_minus_one = -1.0, d_one = 1.0;  FLA_Obj    Cold;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &Cold );  FLA_Copy_external( C, Cold );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( Cold, C );    *dtime = FLA_Clock();    switch( variant ){    case 0:      // Time reference implementation      REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 		ONE, A, B, FLA_ONE, C );      break;    case 1:{      // Time variant 1      switch( type ){      case FLA_ALG_UNBLOCKED:	FLA_Gemm_pp_nn_var1( FLA_ONE, A, B, C, nb_alg );	break;      case FLA_ALG_BLOCKED:        REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 		  ONE, A, B, FLA_ONE, C );	break;      default:	printf("trouble/n");      }      break;    }    }    if ( irep == 0 )      dtime_old = FLA_Clock() - *dtime;    else{      *dtime = FLA_Clock() - *dtime;      dtime_old = min( *dtime, dtime_old );    }  }  if ( variant == 0 ){    FLA_Copy_external( C, Cref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( C, Cref );  }  *mflops = 2.0 *             FLA_Obj_length( C ) *             FLA_Obj_width( C ) *             FLA_Obj_width( A ) /             dtime_old /             1000000;  *dtime = dtime_old;  FLA_Copy_external( Cold, C );  FLA_Obj_free( &Cold );}
开发者ID:pgawron,项目名称:tlash,代码行数:79,


示例24: main

//.........这里部分代码省略.........   fprintf( fpp, "%%/n" );   fprintf( fpp, "%% | Matrix Size |    FLASH    |/n" );   fprintf( fpp, "%% |    n x n    |    GFlops   |/n" );   fprintf( fpp, "%% -----------------------------/n" );   fprintf( fpp, "%s_%u = [/n", OUTPUT_FILE, nb_alg );#endif   FLA_Init();   dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) );   flops  = ( double * ) FLA_malloc( n_trials  * sizeof( double ) );      FLASH_Queue_set_num_threads( n_threads );   FLASH_Queue_set_sorting( sorting );   FLASH_Queue_set_caching( caching );   FLASH_Queue_set_work_stealing( work_stealing );   FLASH_Queue_set_data_affinity( data_affinity );   for ( i = 0; i < n_trials; i++ )   {      size = begin + i * increment;            FLA_Obj_create( datatype, size, size, 0, 0, &A );      FLA_Obj_create( datatype, size, 1,    0, 0, &x );      FLA_Obj_create( datatype, size, 1,    0, 0, &b );      FLA_Obj_create( datatype, 1,    1,    0, 0, &b_norm );      for ( j = 0; j < n_repeats; j++ )      {         FLA_Random_matrix( A );         FLA_Random_matrix( b );         FLASH_Obj_create_hier_copy_of_flat( A, 1, &nb_alg, &AH );         FLASH_Obj_create( FLA_INT,    size, 1, 1, &nb_alg, &pH );         FLASH_Obj_create_hier_copy_of_flat( b, 1, &nb_alg, &bH );         dtime = FLA_Clock();         FLASH_LU_piv( AH, pH );         dtime = FLA_Clock() - dtime;         dtimes[j] = dtime;         FLASH_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pH, bH );         FLASH_Trsv( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,                      AH, bH );         FLASH_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,                     AH, bH );         FLASH_Obj_free( &AH );         FLASH_Obj_free( &pH );         FLASH_Obj_flatten( bH, x );         FLASH_Obj_free( &bH );      }            dtime = dtimes[0];      for ( j = 1; j < n_repeats; j++ )         dtime = min( dtime, dtimes[j] );      flops[i] = 2.0 / 3.0 * size * size * size / dtime / 1e9;      FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_ONE,                          A, x, FLA_MINUS_ONE, b );      FLA_Nrm2_external( b, b_norm );      FLA_Obj_extract_real_scalar( b_norm, &b_norm_value );#ifdef FLA_ENABLE_WINDOWS_BUILD      fprintf( stdout, "   %d   %6.3f   %le/n", size, flops[i], b_norm_value );#else            fprintf( fpp, "   %d   %6.3f/n", size, flops[i] );      fprintf( stdout, "Time: %e  |  GFlops: %6.3f/n", dtime, flops[i] );      fprintf( stdout, "Matrix size: %u x %u  |  nb_alg: %u/n",                size, size, nb_alg );      fprintf( stdout, "Norm of difference: %le/n/n", b_norm_value );#endif      FLA_Obj_free( &A );      FLA_Obj_free( &x );      FLA_Obj_free( &b );      FLA_Obj_free( &b_norm );   }#ifdef FLA_ENABLE_WINDOWS_BUILD   fprintf( stdout, "];/n/n" );#else   fprintf( fpp, "];/n" );      fflush( fpp );   fclose( fpp );#endif   FLA_free( dtimes );   FLA_free( flops );   FLA_Finalize();       return 0; }
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例25: main

int main(int argc, char *argv[]){  int n, nfirst, nlast, ninc, nlast_unb, i, irep,    nrepeats, nb_alg;  double    dtime, dtime_best,     gflops, max_gflops,    diff, d_n;  FLA_Obj    A, Aref, Aold, delta;    /* Initialize FLAME */  FLA_Init( );  /* Every time trial is repeated "repeat" times and the fastest run in recorded */  printf( "%% number of repeats:" );  scanf( "%d", &nrepeats );  printf( "%% %d/n", nrepeats );  /* Enter the max GFLOPS attainable      This is used to set the y-axis range for the graphs. Here is how     you figure out what to enter (on Linux machines):     1) more /proc/cpuinfo   (this lists the contents of this file).     2) read through this and figure out the clock rate of the machine (in GHz).     3) Find out (from an expert of from the web) the number of floating point        instructions that can be performed per core per clock cycle.     4) Figure out if you are using "multithreaded BLAS" which automatically        parallelize calls to the Basic Linear Algebra Subprograms.  If so,        check how many cores are available.     5) Multiply 2) x 3) x 4) and enter this in response to the below.     If you enter a value for max GFLOPS that is lower that the maximum that     is observed in the experiments, then the top of the graph is set to the      observed maximum.  Thus, one possibility is to simply set this to 0.0.  */  printf( "%% enter max GFLOPS:" );  scanf( "%lf", &max_gflops );  printf( "%% %lf/n", max_gflops );  /* Enter the algorithmic block size */  printf( "%% enter nb_alg:" );  scanf( "%d", &nb_alg );  printf( "%% %d/n", nb_alg );  /* Timing trials for matrix sizes n=nfirst to nlast in increments      of ninc will be performed.  Unblocked versions are only tested to     nlast_unb */  printf( "%% enter nfirst, nlast, ninc, nlast_unb:" );  scanf( "%d%d%d%d", &nfirst, &nlast, &ninc, &nlast_unb );  printf( "%% %d %d %d %d/n", nfirst, nlast, ninc, nlast_unb );  i = 1;  for ( n=nfirst; n<= nlast; n+=ninc ){       /* Allocate space for the matrices */    FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &A );    FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Aref );    FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Aold );    FLA_Obj_create( FLA_DOUBLE, 1, 1, 1, 1, &delta );    /* Generate random matrix A and save in Aold */    FLA_Random_matrix( Aold );    /* Add something large to the diagonal to make sure it isn't ill-conditionsed */    d_n = ( double ) n;    *( ( double * ) FLA_Obj_buffer_at_view( delta ) ) = d_n;    FLA_Shift_diag( FLA_NO_CONJUGATE, delta, Aold );        /* Set gflops = billions of floating point operations that will be performed */    gflops = 1.0/3.0 * n * n * n * 1.0e-09;    /* Time the reference implementation */#if TIME_LAPACK == TRUE#else    //    if ( n <= nlast_unb )#endif    {      for ( irep=0; irep<nrepeats; irep++ ){	FLA_Copy( Aold, Aref );    	dtime = FLA_Clock();    	REF_Chol( TIME_LAPACK, Aref, nb_alg );    	dtime = FLA_Clock() - dtime;    	if ( irep == 0 ) 	  dtime_best = dtime;	else	  dtime_best = ( dtime < dtime_best ? dtime : dtime_best );      }        printf( "data_REF( %d, 1:2 ) = [ %d %le ];/n", i, n,	      gflops / dtime_best );      fflush( stdout );    }  //.........这里部分代码省略.........
开发者ID:ztschir,项目名称:High-Performance,代码行数:101,


示例26: time_Lyap

void time_Lyap(                int param_combo, int type, int nrepeats, int m,                FLA_Obj isgn, FLA_Obj A, FLA_Obj C, FLA_Obj scale,                double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_save, norm;  if ( param_combo == 0 && type == FLA_ALG_FRONT )  {    *gflops = 0.0;    *diff   = 0.0;    return;  }  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_save );  FLA_Obj_create( FLA_Obj_datatype_proj_to_real( C ), 1, 1, 0, 0, &norm );  FLASH_Copy( C, C_save );  for ( irep = 0 ; irep < nrepeats; irep++ )  {    FLASH_Copy( C_save, C );    *dtime = FLA_Clock();    switch( param_combo ){    case 0:{      switch( type ){      //case FLA_ALG_REFERENCE:      //  REF_Lyap( FLA_NO_TRANSPOSE, isgn, A_flat, C_flat, scale );      //  break;      case FLA_ALG_FRONT:        FLASH_Lyap( FLA_NO_TRANSPOSE, isgn, A, C, scale );        break;      default:        printf("trouble/n");      }      break;    }    case 1:{      switch( type ){      //case FLA_ALG_REFERENCE:      //  REF_Lyap( FLA_CONJ_TRANSPOSE, isgn, A_flat, C_flat, scale );      //  break;      case FLA_ALG_FRONT:        FLASH_Lyap( FLA_CONJ_TRANSPOSE, isgn, A, C, scale );        break;      default:        printf("trouble/n");      }      break;    }    }    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }/*  if ( type == FLA_ALG_REFERENCE )  {    FLASH_Obj_hierarchify( C_flat, C_ref );    *diff = 0.0;  }  else  {    *diff = FLASH_Max_elemwise_diff( C, C_ref );  }*/  {    FLA_Obj X, W;    FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &X );    FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &W );    FLASH_Copy( C, X );    FLASH_Hermitianize( FLA_UPPER_TRIANGULAR, X );    if ( param_combo == 0 )    {      FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,   FLA_ONE, A, X, FLA_ZERO, W );      FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, X, A, FLA_ONE,  W );    }    else if ( param_combo == 1 )    {      FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, X, FLA_ZERO, W );      FLASH_Gemm( FLA_NO_TRANSPOSE,   FLA_NO_TRANSPOSE, FLA_ONE, X, A, FLA_ONE,  W );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例27: main

//.........这里部分代码省略.........   sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE );   fpp = fopen( output_file_m, "a" );   fprintf( fpp, "%%/n" );   fprintf( fpp, "%% | Matrix Size |    FLASH    |/n" );   fprintf( fpp, "%% |    n x n    |    GFlops   |/n" );   fprintf( fpp, "%% -----------------------------/n" );   fprintf( fpp, "%s_%u = [/n", OUTPUT_FILE, nb_alg );#endif   FLA_Init();   dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) );   flops  = ( double * ) FLA_malloc( n_trials  * sizeof( double ) );      FLASH_Queue_set_num_threads( n_threads );   FLASH_Queue_set_sorting( sorting );   FLASH_Queue_set_caching( caching );   FLASH_Queue_set_work_stealing( work_stealing );   FLASH_Queue_set_data_affinity( data_affinity );   for ( i = 0; i < n_trials; i++ )   {      size = begin + i * increment;            FLA_Obj_create( datatype, size, size, 0, 0, &A );       FLA_Obj_create( datatype, size, size, 0, 0, &B );       FLA_Obj_create( datatype, size, 1,    0, 0, &x );       FLA_Obj_create( datatype, size, 1,    0, 0, &b );       FLA_Obj_create( datatype, 1,    1,    0, 0, &b_norm );             for ( j = 0; j < n_repeats; j++ )      {         FLA_Random_matrix( A );         FLA_Random_matrix( B );         FLA_Random_matrix( x );         FLA_Random_matrix( b );         FLA_Symmetrize( uplo, A );         FLA_Symmetrize( uplo, B );         length = ( double ) FLA_Obj_length( B );         FLA_Add_to_diag( &length, B );         FLA_Symv_external( uplo, FLA_ONE, B, x, FLA_ZERO, b );         FLASH_Obj_create_hier_copy_of_flat( A, 1, &nb_alg, &AH );           FLASH_Obj_create_hier_copy_of_flat( B, 1, &nb_alg, &BH );           FLASH_Chol( uplo, BH );                  dtime = FLA_Clock();                  FLASH_Eig_gest( inv, uplo, AH, BH );                  dtime = FLA_Clock() - dtime;         dtimes[j] = dtime;                  FLASH_Obj_free( &AH );         FLASH_Obj_free( &BH );      }            dtime = dtimes[0];      for ( j = 1; j < n_repeats; j++ )         dtime = min( dtime, dtimes[j] );      flops[i] = 1.0 * size * size * size / dtime / 1e9;#ifdef FLA_ENABLE_WINDOWS_BUILD            fprintf( stdout, "   %d   %6.3f   %le/n", size, flops[i], b_norm_value );#else      fprintf( fpp, "   %d   %6.3f/n", size, flops[i] );            fprintf( stdout, "Time: %e  |  GFlops: %6.3f/n", dtime, flops[i] );      fprintf( stdout, "Matrix size: %u x %u  |  nb_alg: %u/n",                size, size, nb_alg );       fprintf( stdout, "Norm of difference: %le/n/n", b_norm_value ); #endif       FLA_Obj_free( &A );       FLA_Obj_free( &B );       FLA_Obj_free( &x );       FLA_Obj_free( &b );       FLA_Obj_free( &b_norm );    }#ifdef FLA_ENABLE_WINDOWS_BUILD   fprintf( stdout, "];/n/n" );#else   fprintf( fpp, "];/n" );      fflush( fpp );   fclose( fpp );#endif   FLA_free( dtimes );   FLA_free( flops );   FLA_Finalize();       return 0; }
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例28: time_Copy

void time_Copy(                int param_combo, int type, int nrepeats, int m, int n,               FLA_Obj A, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old, A_flat, C_flat;  FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, A, &A_flat );  FLASH_Obj_create_flat_conf_to_hier( FLA_NO_TRANSPOSE, C, &C_flat );  FLASH_Copy( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ )  {    FLASH_Copy( C_old, C );    FLASH_Obj_flatten( A, A_flat );    FLASH_Obj_flatten( C, C_flat );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copy( A_flat, C_flat );        break;      case FLA_ALG_FRONT:        FLASH_Copy( A, C );        break;      default:        printf("trouble/n");      }      break;    }    }	    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE )  {    FLASH_Obj_hierarchify( C_flat, C_ref );    *diff = 0.0;  }  else  {    *diff = FLASH_Max_elemwise_diff( C, C_ref );  }  *gflops = 2.0 * m * n /             dtime_old /             1.0e9;  *dtime = dtime_old;  FLASH_Copy( C_old, C );  FLASH_Obj_free( &C_old );  FLASH_Obj_free( &A_flat );  FLASH_Obj_free( &C_flat );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:75,


示例29: time_Transpose

void time_Transpose(                  int variant, int type, int nrepeats, int n, int nb_alg,                  FLA_Obj A, FLA_Obj A_ref,                  double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    A_old, A_tmp;  fla_blocksize_t*    bp;  fla_transpose_t*    cntl_trans_var_unb;  fla_transpose_t*    cntl_trans_var_blk;  fla_swap_t*    cntl_swap_var_blk;  fla_swap_t*    cntl_swap_blas;  bp                 = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );  cntl_swap_blas     = FLA_Cntl_swap_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );  cntl_swap_var_blk  = FLA_Cntl_swap_obj_create( FLA_FLAT, FLA_UNBLOCKED_VARIANT1, bp, cntl_swap_blas );  cntl_trans_var_unb = FLA_Cntl_transpose_obj_create( FLA_FLAT, FLA_UNBLOCKED_VARIANT1, NULL, NULL, NULL );  cntl_trans_var_blk = FLA_Cntl_transpose_obj_create( FLA_FLAT, variant, bp, cntl_trans_var_unb, cntl_swap_var_blk );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_old );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_tmp );  FLA_Copy_external( A, A_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( A_old, A );    *dtime = FLA_Clock();    switch( variant ){    case 0:      //FLA_Copyt_external( FLA_TRANSPOSE, A, A_tmp );      //FLA_Set( FLA_ZERO, A );      //FLA_Copyt_external( FLA_NO_TRANSPOSE, A_tmp, A );      FLA_Transpose( A );      break;    case 1:{      /* Time variant 1 */      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Transpose_unb_var1( A );        break;      case FLA_ALG_BLOCKED:        FLA_Transpose_blk_var1( A, cntl_trans_var_blk );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      /* Time variant 2 */      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Transpose_unb_var2( A );        break;      case FLA_ALG_BLOCKED:        FLA_Transpose_blk_var2( A, cntl_trans_var_blk );        break;      default:        printf("trouble/n");      }      break;    }     }    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  FLA_Cntl_obj_free( cntl_trans_var_blk );  FLA_Cntl_obj_free( cntl_trans_var_unb );  FLA_Cntl_obj_free( cntl_swap_var_blk );  FLA_Cntl_obj_free( cntl_swap_blas );  FLA_Blocksize_free( bp );  if ( variant == 0 ){//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,



注:本文中的FLA_Clock函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ FLA_Cntl_blocksize函数代码示例
C++ FLA_Check_error_level函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。