您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ FLA_Copy_external函数代码示例

51自学网 2021-06-01 20:41:52
  C++
这篇教程C++ FLA_Copy_external函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中FLA_Copy_external函数的典型用法代码示例。如果您正苦于以下问题:C++ FLA_Copy_external函数的具体用法?C++ FLA_Copy_external怎么用?C++ FLA_Copy_external使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了FLA_Copy_external函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: FLA_Chol_solve

FLA_Error FLA_Chol_solve( FLA_Uplo uplo, FLA_Obj A, FLA_Obj B, FLA_Obj X ){  // Check parameters.  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )    FLA_Chol_solve_check( uplo, A, B, X );  if ( FLA_Obj_is_identical( B, X ) == FALSE )    FLA_Copy_external( B, X );  if ( uplo == FLA_LOWER_TRIANGULAR )  {      FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE,                         FLA_NONUNIT_DIAG, FLA_ONE, A, X );      FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE,                         FLA_NONUNIT_DIAG, FLA_ONE, A, X );  }  else // if ( uplo == FLA_UPPER_TRIANGULAR )  {      FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE,                         FLA_NONUNIT_DIAG, FLA_ONE, A, X );      FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,                         FLA_NONUNIT_DIAG, FLA_ONE, A, X );  }  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:26,


示例2: FLA_QR_UT_solve

FLA_Error FLA_QR_UT_solve( FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X ){  FLA_Obj W, Y;  FLA_Obj AT, AB;  FLA_Obj YT, YB;  // Check parameters.  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )    FLA_QR_UT_solve_check( A, T, B, X );  FLA_Apply_Q_UT_create_workspace( T, B, &W );  FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &Y );  FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,                  A, T, W, Y );  FLA_Part_2x1( A,   &AT,                     &AB,    FLA_Obj_width( A ), FLA_TOP );  FLA_Part_2x1( Y,   &YT,                     &YB,    FLA_Obj_width( A ), FLA_TOP );  FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,                     FLA_NONUNIT_DIAG, FLA_ONE, AT, YT );  FLA_Copy_external( YT, X );  FLA_Obj_free( &Y );  FLA_Obj_free( &W );  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:32,


示例3: FLA_LU_piv_copy_task

FLA_Error FLA_LU_piv_copy_task( FLA_Obj A, FLA_Obj p, FLA_Obj U, fla_lu_t* cntl ){  FLA_Error r_val;  r_val = FLA_LU_piv_task( A, p, cntl );  FLA_Copy_external( A, U );  return r_val;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:10,


示例4: FLA_UDdate_UT_solve

FLA_Error FLA_UDdate_UT_solve( FLA_Obj R, FLA_Obj bR, FLA_Obj x ){	// Check parameters.	if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )		FLA_UDdate_UT_solve_check( R, bR, x );	// Copy the contents of bR to x so that after the triangular solve, the	// solution resides in x (and bR is preserved).	FLA_Copy_external( bR, x );		// Perform a triangular solve with R the right-hand side.	FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR,	                   FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,	                   FLA_ONE, R, x );	return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:17,


示例5: FLA_Copy

FLA_Error FLA_Copy( FLA_Obj A, FLA_Obj B ){  FLA_Error r_val;#ifdef FLA_ENABLE_BLAS1_FRONT_END_CNTL_TREES  // Check parameters.  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )    FLA_Copy_check( A, B );  // Invoke FLA_Copy_internal() with flat control tree that simply calls  // external wrapper.  r_val = FLA_Copy_internal( A, B, fla_copy_cntl_blas );#else  r_val = FLA_Copy_external( A, B );#endif  return r_val;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:19,


示例6: FLA_Trmvsx_external

FLA_Error FLA_Trmvsx_external( FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) {  FLA_Obj x_copy;  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )     FLA_Trmvsx_check( uplo, transa, diag, alpha, A, x, beta, y );  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, x, &x_copy );    FLA_Copy_external( x, x_copy );  FLA_Trmv_external( uplo, transa, diag, A, x_copy );  FLA_Scal_external( beta, y );    FLA_Axpy_external( alpha, x_copy, y );  FLA_Obj_free( &x_copy );  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:23,


示例7: time_Sylv_nn

void time_Sylv_nn(                   int variant, int type, int n_repeats, int m, int n, int nb_alg,                   FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref, FLA_Obj scale,                   double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  fla_blocksize_t*    bp;  fla_sylv_t*    cntl_sylv_var;  fla_sylv_t*    cntl_sylv_unb;  fla_gemm_t*    cntl_gemm_blas;/*  if( type == FLA_ALG_UNBLOCKED && n > 400 )  {    *gflops = 0.0;    *diff   = 0.0;    return;  }*/  bp               = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );  cntl_sylv_unb    = FLA_Cntl_sylv_obj_create( FLA_FLAT, FLA_UNB_OPT_VARIANT1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );  cntl_gemm_blas   = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL );  cntl_sylv_var    = FLA_Cntl_sylv_obj_create( FLA_FLAT, variant, bp, cntl_sylv_unb, cntl_sylv_unb, cntl_sylv_unb, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas, cntl_gemm_blas );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < n_repeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( variant ){    case 0:      /* Time reference implementation */      REF_Sylv_nn( isgn, A, B, C, scale );      break;    case 1:{      /* Time variant 1 */      switch( type ){      case FLA_ALG_UNB_OPT:        FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );        break;      case FLA_ALG_BLOCKED:        FLA_Sylv_nn_blk_var1( isgn, A, B, C, scale, cntl_sylv_var );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      /* Time variant 2 */      switch( type ){      case FLA_ALG_UNB_OPT:        FLA_Sylv_nn_opt_var2( isgn, A, B, C, scale );        break;      case FLA_ALG_BLOCKED:        FLA_Sylv_nn_blk_var2( isgn, A, B, C, scale, cntl_sylv_var );        break;      default:        printf("trouble/n");      }      break;    }    case 3:{      /* Time variant 3 */      switch( type ){      case FLA_ALG_UNB_OPT:        FLA_Sylv_nn_opt_var3( isgn, A, B, C, scale );        break;      case FLA_ALG_BLOCKED:        FLA_Sylv_nn_blk_var3( isgn, A, B, C, scale, cntl_sylv_var );        break;      default:        printf("trouble/n");//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例8: FLA_Copy_task

FLA_Error FLA_Copy_task( FLA_Obj A, FLA_Obj B, fla_copy_t* cntl ){  return FLA_Copy_external( A, B );}
开发者ID:pgawron,项目名称:tlash,代码行数:4,


示例9: time_Gemm

void time_Gemm(                int param_combo, int type, int nrepeats, int m, int k, int n,               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  if ( param_combo != 4 )  {    *gflops = 0.0;    *diff   = 0.0;    return;  }  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 1    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 2    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 3    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 4    case 4:{      switch( type ){//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例10: libfla_test_qrut_experiment

void libfla_test_qrut_experiment( test_params_t params,                                  unsigned int  var,                                  char*         sc_str,                                  FLA_Datatype  datatype,                                  unsigned int  p_cur,                                  unsigned int  pci,                                  unsigned int  n_repeats,                                  signed int    impl,                                  double*       perf,                                  double*       residual ){	dim_t        b_flash    = params.b_flash;	dim_t        b_alg_flat = params.b_alg_flat;	double       time_min   = 1e9;	double       time;	unsigned int i;	unsigned int m, n;	unsigned int min_m_n;	signed int   m_input    = -2;	signed int   n_input    = -1;	FLA_Obj      A, T, x, b, y, norm;	FLA_Obj      A_save;	FLA_Obj      A_test, T_test, x_test, b_test;	// Determine the dimensions.	if ( m_input < 0 ) m = p_cur * abs(m_input);	else               m = p_cur;	if ( n_input < 0 ) n = p_cur * abs(n_input);	else               n = p_cur;	// Compute the minimum dimension.	min_m_n = min( m, n );	// Create the matrices for the current operation.	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, n, &A );	if ( impl == FLA_TEST_FLAT_FRONT_END ||	     ( impl == FLA_TEST_FLAT_BLK_VAR && var == 1 ) )		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, min_m_n, &T );	else if ( var == 2 )		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], min_m_n, min_m_n, &T );	else		libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], 1, min_m_n, &T );	// Initialize the test matrices.	FLA_Random_matrix( A );	// Save the original object contents in a temporary object.	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );	// Create vectors to form a linear system.	FLA_Obj_create( datatype, n, 1, 0, 0, &x );	FLA_Obj_create( datatype, m, 1, 0, 0, &b );	FLA_Obj_create( datatype, n, 1, 0, 0, &y );	// Create a real scalar object to hold the norm of A.	FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );	// Create a random right-hand side vector.	FLA_Random_matrix( b );	// Use hierarchical matrices if we're testing the FLASH front-end.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_QR_UT_create_hier_matrices( A, 1, &b_flash, &A_test, &T_test );		FLASH_Obj_create_hier_copy_of_flat( b, 1, &b_flash, &b_test );		FLASH_Obj_create_hier_copy_of_flat( x, 1, &b_flash, &x_test );	}	else	{		A_test = A;		T_test = T;	}	// Create a control tree for the individual variants.	if ( impl == FLA_TEST_FLAT_UNB_VAR ||	     impl == FLA_TEST_FLAT_OPT_VAR ||	     impl == FLA_TEST_FLAT_BLK_VAR )		libfla_test_qrut_cntl_create( var, b_alg_flat );	// Repeat the experiment n_repeats times and record results.	for ( i = 0; i < n_repeats; ++i )	{		if ( impl == FLA_TEST_HIER_FRONT_END )			FLASH_Obj_hierarchify( A_save, A_test );		else			FLA_Copy_external( A_save, A_test );				time = FLA_Clock();		libfla_test_qrut_impl( impl, A_test, T_test );				time = FLA_Clock() - time;		time_min = min( time_min, time );	}	// Perform a linear solve with the result.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_QR_UT_solve( A_test, T_test, b_test, x_test );//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,


示例11: time_QR_UT

void time_QR_UT(                 int variant, int type, int nrepeats, int m, int n,                 FLA_Obj A, FLA_Obj A_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W, FLA_Obj b, FLA_Obj b_orig,                 double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    A_save, b_save, norm;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, b, &b_save );  if ( FLA_Obj_is_single_precision( A ) )    FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );  else    FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );  FLA_Copy_external( A, A_save );  FLA_Copy_external( b, b_save );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( A_save, A );    *dtime = FLA_Clock();    switch( variant ){    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_QR_UT( A, t );        break;      case FLA_ALG_FRONT:        FLA_QR_UT( A, T );        break;      default:        printf("trouble/n");      }      break;    }    }    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE )  {    FLA_Obj AT, AB;    FLA_Obj bT, bB;    FLA_Obj y;    FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y );    FLA_Copy_external( b, b_orig );    if ( FLA_Obj_is_real( A ) )      FLA_Apply_Q_blk_external( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A, t, b );    else      FLA_Apply_Q_blk_external( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, A, t, b );    FLA_Part_2x1( A,    &AT,                        &AB,    FLA_Obj_width( A ), FLA_TOP );    FLA_Part_2x1( b,    &bT,                        &bB,    FLA_Obj_width( A ), FLA_TOP );    FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,                       FLA_NONUNIT_DIAG, FLA_ONE, AT, bT );    FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, bT, FLA_ONE, b_orig );    FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y );    FLA_Nrm2_external( y, norm );    FLA_Obj_extract_real_scalar( norm, diff );    FLA_Obj_free( &y );  }  else  {    FLA_Obj x, y;    FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y );    FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &x );    FLA_Copy_external( b, b_orig );    FLA_QR_UT_solve( A, T, b, x );    FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, x, FLA_ONE, b_orig );    FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y );    FLA_Nrm2_external( y, norm );    FLA_Obj_extract_real_scalar( norm, diff );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例12: main

int main(int argc, char *argv[]){  int     datatype,    m_input,    m,    p_first, p_last, p_inc,    p,    nb_alg,    variant,    n_repeats,    i, j,    n_variants = N_VARIANTS;    char *colors = "brkgmcbrkg";  char *ticks  = "o+*xso+*xs";  char m_dim_desc[14];  char m_dim_tag[10];  double max_gflops=6.0;    double    dtime,    gflops,    diff;  FLA_Obj    A, b, b_orig, norm;    FLA_Init();  fprintf( stdout, "%c number of repeats:", '%' );  scanf( "%d", &n_repeats );  fprintf( stdout, "%c %d/n", '%', n_repeats );  fprintf( stdout, "%c Enter blocking size:", '%' );  scanf( "%d", &nb_alg );  fprintf( stdout, "%c %d/n", '%', nb_alg );  fprintf( stdout, "%c enter problem size first, last, inc:", '%' );  scanf( "%d%d%d", &p_first, &p_last, &p_inc );  fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc );  fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' );  scanf( "%d", &m_input );  fprintf( stdout, "%c %d/n", '%', m_input );  fprintf( stdout, "/nclear all;/n/n" );  if     ( m_input >  0 ) {    sprintf( m_dim_desc, "m = %d", m_input );    sprintf( m_dim_tag,  "m%dc", m_input);  }  else if( m_input <  -1 ) {    sprintf( m_dim_desc, "m = p/%d", -m_input );    sprintf( m_dim_tag,  "m%dp", -m_input );  }  else if( m_input == -1 ) {    sprintf( m_dim_desc, "m = p" );    sprintf( m_dim_tag,  "m%dp", 1 );  }  //datatype = FLA_FLOAT;  //datatype = FLA_DOUBLE;  //datatype = FLA_COMPLEX;  datatype = FLA_DOUBLE_COMPLEX;  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    m = m_input;    if( m < 0 ) m = p / f2c_abs(m_input);    FLA_Obj_create( datatype, m, m, 0, 0, &A );    FLA_Obj_create( datatype, m, 1, 0, 0, &b );    FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );/*    FLA_Obj_create( datatype, m, m, m, 1, &A );    FLA_Obj_create( datatype, m, 1, 1, 1, &b );    FLA_Obj_create( datatype, m, 1, 1, 1, &b_orig );*/    if ( FLA_Obj_is_single_precision( A ) )      FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );    else      FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );    FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A );    FLA_Random_matrix( b );    FLA_Copy_external( b, b_orig );/*    time_Trinv_un( 0, FLA_ALG_REFERENCE, n_repeats, m, nb_alg,                   A, b, b_orig, norm, &dtime, &diff, &gflops );//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,


示例13: time_Her2k_ln

void time_Her2k_ln(                int variant, int type, int nrepeats, int n, int nb_alg,               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;   FLA_Obj    C_old;  fla_blocksize_t*    bp;  fla_gemm_t*    cntl_gemm_blas;  fla_her2k_t*    cntl_her2k_blas;  fla_her2k_t*    cntl_her2k_var;  bp              = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );  cntl_gemm_blas  = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );  cntl_her2k_blas = FLA_Cntl_her2k_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL, NULL );  cntl_her2k_var  = FLA_Cntl_her2k_obj_create( FLA_FLAT, variant, bp, cntl_her2k_blas, cntl_gemm_blas, cntl_gemm_blas );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ )  {    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( variant ){    case 0:      // Time reference implementation      REF_Her2k( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ONE, C );      break;    case 1:{      // Time variant 1      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Her2k_ln_unb_var1( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Her2k_ln_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      // Time variant 2      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Her2k_ln_unb_var2( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Her2k_ln_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );        break;      default:        printf("trouble/n");      }      break;    }    case 3:{      // Time variant 3      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Her2k_ln_unb_var3( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Her2k_ln_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_her2k_var );        break;      default:        printf("trouble/n");      }      break;    }    case 4:{      // Time variant 4      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Her2k_ln_unb_var4( FLA_ONE, A, B, FLA_ONE, C );        break;//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例14: main

//.........这里部分代码省略.........      if ( pc_str[param_combo][0] == 'c' ||           pc_str[param_combo][1] == 'c' )      {        if ( precision == FLA_SINGLE_PRECISION )          datatype = FLA_COMPLEX;        else          datatype = FLA_DOUBLE_COMPLEX;      }      else      {        if ( precision == FLA_SINGLE_PRECISION )          datatype = FLA_FLOAT;        else          datatype = FLA_DOUBLE;      }      // If transposing A, switch dimensions.      if ( pc_str[param_combo][0] == 'n' )        FLA_Obj_create( datatype, m, k, 0, 0, &A );      else        FLA_Obj_create( datatype, k, m, 0, 0, &A );            // If transposing B, switch dimensions.      if ( pc_str[param_combo][1] == 'n' )        FLA_Obj_create( datatype, k, n, 0, 0, &B );      else        FLA_Obj_create( datatype, n, k, 0, 0, &B );      FLA_Obj_create( datatype, m, n, 0, 0, &C );      FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );      FLA_Random_matrix( A );      FLA_Random_matrix( B );      FLA_Random_matrix( C );      FLA_Copy_external( C, C_ref );            fprintf( stdout, "data_gemm_%s( %d, 1:5 ) = [ %4d %4d %4d  ", pc_str[param_combo], i, m, k, n );      fflush( stdout );      time_Gemm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, n,                 A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );/*      time_Gemm( param_combo, FLA_ALG_FRONT, n_repeats, m, k, n,                 A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );*/      fprintf( stdout, " ]; /n" );      fflush( stdout );      FLA_Obj_free( &A );      FLA_Obj_free( &B );      FLA_Obj_free( &C );      FLA_Obj_free( &C_ref );    }    fprintf( stdout, "/n" );  }/*  fprintf( stdout, "figure;/n" );  fprintf( stdout, "hold on;/n" );  for ( i = 0; i < n_param_combos; i++ ) {    fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 2 ), '%c:%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );    fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 4 ), '%c-.%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  for ( i = 0; i < n_param_combos; i++ )    fprintf( stdout, "'ref//_gemm//_%s', 'fla//_gemm//_%s', ... /n", pc_str[i], pc_str[i] );  fprintf( stdout, "'Location', 'SouthEast' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME gemm front-end performance (%s, %s, %s)' );/n",           m_dim_desc, k_dim_desc, n_dim_desc );  fprintf( stdout, "print -depsc gemm_front_%s_%s_%s.eps/n", m_dim_tag, k_dim_tag, n_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize( );  return 0;}
开发者ID:flame,项目名称:libflame,代码行数:101,


示例15: time_Apply_G_rf

void time_Apply_G_rf(               int variant, int type, int n_repeats, int m, int k, int n, int b_alg,               FLA_Obj A, FLA_Obj A_ref, FLA_Obj G, FLA_Obj P,               double *dtime, double *diff, double *gflops ){  int irep;  double    dtime_old = 1.0e9;  FLA_Obj    A_save, G_save, norm;  if ( FLA_Obj_is_real( A ) )  {    if (       //( variant == 1 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 1 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 1 && type == FLA_ALG_BLOCKED ) ||       //( variant == 2 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 2 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 2 && type == FLA_ALG_BLOCKED ) ||       //( variant == 3 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 3 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 3 && type == FLA_ALG_BLOCKED ) ||       //( variant == 6 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 6 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 6 && type == FLA_ALG_BLOCKED ) ||       //( variant == 9 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 9 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 9 && type == FLA_ALG_BLOCKED ) ||       ( variant == 4 ) ||       ( variant == 5 ) ||       ( variant == 7 ) ||       ( variant == 8 ) ||       FALSE    )     {      *gflops = 0.0;      *diff   = 0.0;      return;    }  }  else if ( FLA_Obj_is_complex( A ) )  {    if (       //( variant == 1 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 1 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 1 && type == FLA_ALG_BLOCKED ) ||       //( variant == 2 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 2 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 2 && type == FLA_ALG_BLOCKED ) ||       //( variant == 3 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 3 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 3 && type == FLA_ALG_BLOCKED ) ||       //( variant == 6 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 6 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 6 && type == FLA_ALG_BLOCKED ) ||       //( variant == 9 && type == FLA_ALG_UNB_OPT ) ||       //( variant == 9 && type == FLA_ALG_UNB_ASM ) ||       //( variant == 9 && type == FLA_ALG_BLOCKED ) ||       ( variant == 4 ) ||       ( variant == 5 ) ||       ( variant == 7 ) ||       ( variant == 8 ) ||       FALSE    )    {      *gflops = 0.0;      *diff   = 0.0;      return;    }  }  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, G, &G_save );  FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );  //dim_t b_flash_m = b_alg;  //dim_t b_flash_n = n;  //FLASH_Obj_create_hier_copy_of_flat_ext( A, 1, &b_flash_m, &b_flash_n, &AH ); //printf ( "flash dims: %d x %d/n", FLA_Obj_length( AH ), FLA_Obj_width( AH ) );  FLA_Copy_external( A, A_save );  FLA_Copy_external( G, G_save );  for ( irep = 0 ; irep < n_repeats; irep++ ){    FLA_Copy_external( A_save, A );    FLA_Copy_external( G_save, G );    //FLASH_Obj_hierarchify( A_save, AH );    *dtime = FLA_Clock();    switch( variant ){//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例16: time_Syrk_ln

void time_Syrk_ln( 	       int variant, int type, int nrepeats, int n, int nb_alg,	       FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj Cref,	       double *dtime, double *diff, double *gflops ){  int    irep,    info, lwork;  double    dtime_old,    d_minus_one = -1.0, d_one = 1.0;  FLA_Obj    Cold;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &Cold );  FLA_Copy_external( C, Cold );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( Cold, C );    *dtime = FLA_Clock();    switch( variant ){    case 0:      // Time reference implementation      REF_Syrk_ln( FLA_ONE, A, FLA_ONE, C );      break;    default:	 printf("trouble/n");      break;    }    if ( irep == 0 )      dtime_old = FLA_Clock() - *dtime;    else{      *dtime = FLA_Clock() - *dtime;      dtime_old = min( *dtime, dtime_old );    }  }  if ( variant == 0 ){    FLA_Copy_external( C, Cref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( C, Cref );  }  *gflops = 1.0 *             FLA_Obj_length( A ) *             FLA_Obj_length( A ) *             FLA_Obj_width( A ) /             dtime_old /             1e9;  *dtime = dtime_old;  FLA_Copy_external( Cold, C );  FLA_Obj_free( &Cold );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:66,


示例17: time_Gemm_pp_nn

void time_Gemm_pp_nn( 		     int variant, int type, int nrepeats, int n, int nb_alg,		     FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj Cref,		     double *dtime, double *diff, double *mflops ){  int    irep,    info, lwork;  double    dtime_old,    d_minus_one = -1.0, d_one = 1.0;  FLA_Obj    Cold;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &Cold );  FLA_Copy_external( C, Cold );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( Cold, C );    *dtime = FLA_Clock();    switch( variant ){    case 0:      // Time reference implementation      REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 		ONE, A, B, FLA_ONE, C );      break;    case 1:{      // Time variant 1      switch( type ){      case FLA_ALG_UNBLOCKED:	FLA_Gemm_pp_nn_var1( FLA_ONE, A, B, C, nb_alg );	break;      case FLA_ALG_BLOCKED:        REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 		  ONE, A, B, FLA_ONE, C );	break;      default:	printf("trouble/n");      }      break;    }    }    if ( irep == 0 )      dtime_old = FLA_Clock() - *dtime;    else{      *dtime = FLA_Clock() - *dtime;      dtime_old = min( *dtime, dtime_old );    }  }  if ( variant == 0 ){    FLA_Copy_external( C, Cref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( C, Cref );  }  *mflops = 2.0 *             FLA_Obj_length( C ) *             FLA_Obj_width( C ) *             FLA_Obj_width( A ) /             dtime_old /             1000000;  *dtime = dtime_old;  FLA_Copy_external( Cold, C );  FLA_Obj_free( &Cold );}
开发者ID:pgawron,项目名称:tlash,代码行数:79,


示例18: time_Copyt

void time_Copyt(                int param_combo, int type, int nrepeats, int m, int n,               FLA_Obj A, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_NO_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_NO_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_CONJ_NO_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_CONJ_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_CONJ_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    }	    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE )  {    FLA_Copy_external( C, C_ref );    *diff = 0.0;  }  else//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例19: REF_Svdd_uv_components

FLA_Error REF_Svdd_uv_components( FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V,                                  double* dtime_bred, double* dtime_bsvd, double* dtime_appq,                                  double* dtime_qrfa, double* dtime_gemm )/*{  *dtime_bred = 1;  *dtime_bsvd = 1;  *dtime_appq = 1;  *dtime_qrfa = 1;  *dtime_gemm = 1;  return FLA_Svdd_external( FLA_SVD_VECTORS_ALL, A, s, U, V );}*/{  FLA_Datatype dt_A;  FLA_Datatype dt_A_real;  dim_t        m_A, n_A;  dim_t        min_m_n;  FLA_Obj      tq, tu, tv, d, e, Ur, Vr, W;  FLA_Obj      eT, epsilonB;  FLA_Uplo     uplo = FLA_UPPER_TRIANGULAR;  double       crossover_ratio = 16.0 / 10.0;  double       dtime_temp;  dt_A      = FLA_Obj_datatype( A );  dt_A_real = FLA_Obj_datatype_proj_to_real( A );  m_A       = FLA_Obj_length( A );  n_A       = FLA_Obj_width( A );  min_m_n   = FLA_Obj_min_dim( A );  FLA_Obj_create( dt_A,      min_m_n, 1,   0, 0, &tq );  FLA_Obj_create( dt_A,      min_m_n, 1,   0, 0, &tu );  FLA_Obj_create( dt_A,      min_m_n, 1,   0, 0, &tv );  FLA_Obj_create( dt_A_real, min_m_n, 1,   0, 0, &d );  FLA_Obj_create( dt_A_real, min_m_n, 1,   0, 0, &e );  FLA_Obj_create( dt_A_real, n_A,     n_A, 0, 0, &Ur );  FLA_Obj_create( dt_A_real, n_A,     n_A, 0, 0, &Vr );  FLA_Part_2x1( e,   &eT,                     &epsilonB,    1, FLA_BOTTOM );  if ( m_A >= n_A )  {    if ( m_A < crossover_ratio * n_A )    {      dtime_temp = FLA_Clock();      {        // Reduce to bidiagonal form.        FLA_Bidiag_blk_external( A, tu, tv );        FLA_Bidiag_UT_extract_diagonals( A, d, eT );      }      *dtime_bred = FLA_Clock() - dtime_temp;      dtime_temp = FLA_Clock();      {        // Divide-and-conquor algorithm.        FLA_Bsvdd_external( uplo, d, e, Ur, Vr );      }      *dtime_bsvd = FLA_Clock() - dtime_temp;      dtime_temp = FLA_Clock();      {        // Form U.        FLA_Copy_external( Ur, U );        FLA_Bidiag_apply_U_external( FLA_LEFT, FLA_NO_TRANSPOSE, A, tu, U );        // Form V.        FLA_Copy_external( Vr, V );        FLA_Bidiag_apply_V_external( FLA_RIGHT, FLA_CONJ_TRANSPOSE, A, tv, V );      }      *dtime_appq = FLA_Clock() - dtime_temp;      *dtime_qrfa = 0.0;      *dtime_gemm = 0.0;    }    else    {      FLA_Obj AT,              AB;      FLA_Obj UL, UR;      FLA_Part_2x1( A,   &AT,                         &AB,        n_A, FLA_TOP );      FLA_Part_1x2( U,   &UL, &UR,   n_A, FLA_LEFT );      // Create a temporary n-by-n matrix R.      FLA_Obj_create( dt_A, n_A, n_A, 0, 0, &W );      dtime_temp = FLA_Clock();      {        // Perform a QR factorization.        FLA_QR_blk_external( A, tq );        FLA_Copyr_external( FLA_LOWER_TRIANGULAR, A, UL );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例20: main

int main(int argc, char *argv[]){  int     m_input,    m,    p_first, p_last, p_inc,    p,    n_repeats,    param_combo,    i,    n_param_combos = N_PARAM_COMBOS;  FLA_Datatype datatype;    char *colors = "brkgmcbrkg";  char *ticks  = "o+*xso+*xs";  char m_dim_desc[14];  char m_dim_tag[10];  double max_gflops=6.0;  double    dtime,    gflops,    diff;  FLA_Obj    A, b, b_orig, norm;    FLA_Init();  fprintf( stdout, "%c number of repeats: ", '%' );  scanf( "%d", &n_repeats );  fprintf( stdout, "%c %d/n", '%', n_repeats );  fprintf( stdout, "%c enter problem size first, last, inc: ", '%' );  scanf( "%d%d%d", &p_first, &p_last, &p_inc );  fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc );  fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' );  scanf( "%d", &m_input );  fprintf( stdout, "%c %d/n", '%', m_input );  fprintf( stdout, "/nclear all;/n/n" );  if     ( m_input >  0 ) {    sprintf( m_dim_desc, "m = %d", m_input );    sprintf( m_dim_tag,  "m%dc", m_input);  }  else if( m_input <  -1 ) {    sprintf( m_dim_desc, "m = p/%d", -m_input );    sprintf( m_dim_tag,  "m%dp", -m_input );  }  else if( m_input == -1 ) {    sprintf( m_dim_desc, "m = p" );    sprintf( m_dim_tag,  "m%dp", 1 );  }  //datatype = FLA_FLOAT;  //datatype = FLA_DOUBLE;  //datatype = FLA_COMPLEX;  datatype = FLA_DOUBLE_COMPLEX;  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    m = m_input;    if( m < 0 ) m = p / abs(m_input);    for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){      FLA_Obj_create( datatype, m, m, 0, 0, &A );      FLA_Obj_create( datatype, m, 1, 0, 0, &b );      FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );      if ( FLA_Obj_is_single_precision( A ) )        FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );      else        FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );      if ( pc_str[param_combo][0] == 'l' )        FLA_Random_spd_matrix( FLA_LOWER_TRIANGULAR, A );      else        FLA_Random_spd_matrix( FLA_UPPER_TRIANGULAR, A );            FLA_Copy_external( b, b_orig );      fprintf( stdout, "data_chol_%s( %d, 1:5 ) = [ %d  ", pc_str[param_combo], i, p );      fflush( stdout );      time_Chol( param_combo, FLA_ALG_REFERENCE, n_repeats, m,                 A, b, b_orig, norm, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例21: main

//.........这里部分代码省略.........  else if( n_input <  -1 ) {    sprintf( n_dim_desc, "n = p/%d", -n_input );    sprintf( n_dim_tag,  "n%dp", -n_input );  }  else if( n_input == -1 ) {    sprintf( n_dim_desc, "n = p" );    sprintf( n_dim_tag,  "n%dp", 1 );  }  //datatype = FLA_FLOAT;  //datatype = FLA_DOUBLE;  //datatype = FLA_COMPLEX;  datatype = FLA_DOUBLE_COMPLEX;  for ( pp = p_first, i = 1; pp <= p_last; pp += p_inc, i += 1 )  {    m = m_input;    n = n_input;    if( m < 0 ) m = pp / abs(m_input);    if( n < 0 ) n = pp / abs(n_input);    min_m_n = min( m, n );    for ( pivot_combo = 0; pivot_combo < n_pivot_combos; pivot_combo++ ){            FLA_Obj_create( datatype, m, n, 0, 0, &C );      FLA_Obj_create( datatype, m, 1, 0, 0, &b );      FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );      if ( FLA_Obj_is_single_precision( C ) )        FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &b_norm );      else        FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &b_norm );      FLA_Random_matrix( C );      FLA_Random_matrix( b );      FLA_Copy_external( b, b_orig );      fprintf( stdout, "data_lu_%s( %d, 1:5 ) = [ %d  ", pc_str[pivot_combo], i, pp );      fflush( stdout );      //time_LU( pivot_combo, FLA_ALG_REFERENCE, n_repeats, m, n,      //         C, b, b_orig, b_norm, &dtime, &diff, &gflops );      //fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      //fflush( stdout );      time_LU( pivot_combo, FLA_ALG_FRONT, n_repeats, m, n,               C, b, b_orig, b_norm, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      fprintf( stdout, " ]; /n" );      fflush( stdout );      FLA_Obj_free( &C );      FLA_Obj_free( &b );      FLA_Obj_free( &b_orig );      FLA_Obj_free( &b_norm );    }    fprintf( stdout, "/n" );  }/*  fprintf( stdout, "figure;/n" );  fprintf( stdout, "hold on;/n" );  for ( i = 0; i < n_pivot_combos; i++ ) {    fprintf( stdout, "plot( data_lu_%s( :,1 ), data_lu_%s( :, 2 ), '%c:%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );    fprintf( stdout, "plot( data_lu_%s( :,1 ), data_lu_%s( :, 4 ), '%c-.%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  for ( i = 0; i < n_pivot_combos; i++ )    fprintf( stdout, "'ref//_lu//_%s', 'fla//_lu//_%s', ... /n", pc_str[i], pc_str[i] );  fprintf( stdout, "'Location', 'SouthEast' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME LU front-end performance (%s, %s)' );/n",            m_dim_desc, n_dim_desc );  fprintf( stdout, "print -depsc lu_front_%s_%s.eps/n", m_dim_tag, n_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize( );  return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例22: main

//.........这里部分代码省略.........  }  else if( n_input == -1 ) {    sprintf( n_dim_desc, "n = p" );    sprintf( n_dim_tag,  "n%dp", 1 );  }  //datatype = FLA_FLOAT;  //datatype = FLA_DOUBLE;  //datatype = FLA_COMPLEX;  datatype = FLA_DOUBLE_COMPLEX;  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    m = m_input;    n = n_input;    if( m < 0 ) m = p / abs(m_input);    if( n < 0 ) n = p / abs(n_input);    for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){            // If multiplying A on the left, A is m x m; ...on the right, A is n x n.      if ( pc_str[param_combo][0] == 'l' )        FLA_Obj_create( datatype, m, m, 0, 0, &A );      else        FLA_Obj_create( datatype, n, n, 0, 0, &A );      FLA_Obj_create( datatype, m, n, 0, 0, &B );      FLA_Obj_create( datatype, m, n, 0, 0, &C );      FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );      FLA_Random_matrix( A );      FLA_Random_matrix( B );      FLA_Random_matrix( C );      FLA_Copy_external( C, C_ref );      fprintf( stdout, "data_symm_%s( %d, 1:5 ) = [ %d  ", pc_str[param_combo], i, p );      fflush( stdout );      time_Symm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n,                 A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      time_Symm( param_combo, FLA_ALG_FRONT, n_repeats, m, n,                 A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      fprintf( stdout, " ]; /n" );      fflush( stdout );      FLA_Obj_free( &A );      FLA_Obj_free( &B );      FLA_Obj_free( &C );      FLA_Obj_free( &C_ref );    }    fprintf( stdout, "/n" );  }/*  fprintf( stdout, "figure;/n" );  fprintf( stdout, "hold on;/n" );  for ( i = 0; i < n_param_combos; i++ ) {    fprintf( stdout, "plot( data_symm_%s( :,1 ), data_symm_%s( :, 2 ), '%c:%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );    fprintf( stdout, "plot( data_symm_%s( :,1 ), data_symm_%s( :, 4 ), '%c-.%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  for ( i = 0; i < n_param_combos; i++ )    fprintf( stdout, "'ref//_symm//_%s', 'fla//_symm//_%s', ... /n", pc_str[i], pc_str[i] );  fprintf( stdout, "'Location', 'SouthEast' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME symm front-end performance (%s, %s)' );/n",           m_dim_desc, n_dim_desc );  fprintf( stdout, "print -depsc symm_front_%s_%s.eps/n", m_dim_tag, n_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize( );  return 0;}
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例23: libfla_test_symm_experiment

//.........这里部分代码省略.........	beta  = FLA_MINUS_ONE;	// Save the original object contents in a temporary object.	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, C, &C_save );	// Use hierarchical matrices if we're testing the FLASH front-end.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_Obj_create_hier_copy_of_flat( A, 1, &b_flash, &A_test );		FLASH_Obj_create_hier_copy_of_flat( B, 1, &b_flash, &B_test );		FLASH_Obj_create_hier_copy_of_flat( C, 1, &b_flash, &C_test );	}	else	{		A_test = A;		B_test = B;		C_test = C;	}	// Create a control tree for the individual variants.	if ( impl == FLA_TEST_FLAT_UNB_VAR ||	     impl == FLA_TEST_FLAT_OPT_VAR ||	     impl == FLA_TEST_FLAT_BLK_VAR ||	     impl == FLA_TEST_FLAT_UNB_EXT ||	     impl == FLA_TEST_FLAT_BLK_EXT )		libfla_test_symm_cntl_create( var, b_alg_flat );	// Repeat the experiment n_repeats times and record results.	for ( i = 0; i < n_repeats; ++i )	{		if ( impl == FLA_TEST_HIER_FRONT_END )			FLASH_Obj_hierarchify( C_save, C_test );		else			FLA_Copy_external( C_save, C_test );				time = FLA_Clock();		libfla_test_symm_impl( impl, side, uplo, alpha, A_test, B_test, beta, C_test );				time = FLA_Clock() - time;		time_min = min( time_min, time );	}	// Copy the solution to flat matrix X.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_Obj_flatten( C_test, C );	}	else    {		// No action needed since C_test and C refer to the same object.	}	// Free the hierarchical matrices if we're testing the FLASH front-end.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_Obj_free( &A_test );		FLASH_Obj_free( &B_test );		FLASH_Obj_free( &C_test );	}	// Free the control trees if we're testing the variants.	if ( impl == FLA_TEST_FLAT_UNB_VAR ||	     impl == FLA_TEST_FLAT_OPT_VAR ||	     impl == FLA_TEST_FLAT_BLK_VAR ||	     impl == FLA_TEST_FLAT_UNB_EXT ||
开发者ID:flame,项目名称:libflame,代码行数:67,


示例24: main

//.........这里部分代码省略.........    sprintf( nth_str, "OMP_NUM_THREADS=%d", n_threads_exp[ n_thread_experiments-1 ] );  putenv( nth_str );  blas_cpu_number = n_threads_exp[ n_thread_experiments-1 ];  blas_thread_init();  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    m = m_input;    k = k_input;    n = n_input;    if( m < 0 ) m = p / abs(m_input);    if( k < 0 ) k = p / abs(k_input);    if( n < 0 ) n = p / abs(n_input);	    FLA_Obj_create( FLA_DOUBLE, m, k, &A );    FLA_Obj_create( FLA_DOUBLE, k, n, &B );    FLA_Obj_create( FLA_DOUBLE, m, n, &C );    FLA_Obj_create( FLA_DOUBLE, m, n, &C_ref );	    /* Generate random matrices A, C */	if( p > 4000 ){    FLA_Random_matrix( A );    FLA_Random_matrix( B );    FLA_Random_matrix( C );	    FLA_Copy_external( C, C_ref );	}	    blas_cpu_number = 1;    //time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,    //                A, B, C, C_ref, &dtime, &diff, &gflops );    //fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; /n", i, p, gflops );    //fflush( stdout );    for ( j = 0; j < n_thread_experiments; j++ ){      n_threads = n_threads_exp[j];      blas_cpu_number = n_threads;      fprintf( stdout, "data_nth%d( %d, 1:3 ) = [ %d  ", n_threads, i, p );      fflush( stdout );      time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,                    A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      fprintf( stdout, " ]; /n" );      fflush( stdout );    }    fprintf( stdout, "/n" );
开发者ID:pgawron,项目名称:tlash,代码行数:66,


示例25: FLA_Eig_gest_il_unb_var5

FLA_Error FLA_Eig_gest_il_unb_var5( FLA_Obj A, FLA_Obj Y, FLA_Obj B ){  FLA_Obj ATL,   ATR,      A00,  a01,     A02,           ABL,   ABR,      a10t, alpha11, a12t,                           A20,  a21,     A22;  FLA_Obj BTL,   BTR,      B00,  b01,    B02,           BBL,   BBR,      b10t, beta11, b12t,                           B20,  b21,    B22;  //FLA_Obj yT,              y01,  //        yB,              psi11,  //                         y21;  //FLA_Obj y21_l, y21_r;  FLA_Obj psi11, y12t,          y21,   Y22;  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_2x2( B,    &BTL, &BTR,                      &BBL, &BBR,     0, 0, FLA_TL );  //FLA_Part_2x1( Y,    &yT,   //                    &yB,            0, FLA_TOP );  FLA_Part_2x2( Y,    &psi11, &y12t,                      &y21,   &Y22,     1, 1, FLA_TL );  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,                        /* ************* */   /* ************************** */                                                &a10t, /**/ &alpha11, &a12t,                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,                           1, 1, FLA_BR );    FLA_Repart_2x2_to_3x3( BTL, /**/ BTR,       &B00,  /**/ &b01,    &B02,                        /* ************* */   /* ************************* */                                                &b10t, /**/ &beta11, &b12t,                           BBL, /**/ BBR,       &B20,  /**/ &b21,    &B22,                           1, 1, FLA_BR );    //FLA_Repart_2x1_to_3x1( yT,                  &y01,    //                    /* ** */              /* ***** */    //                                            &psi11,    //                       yB,                  &y21,        1, FLA_BOTTOM );    /*------------------------------------------------------------*/    //FLA_Part_1x2( y21,    &y21_l, &y21_r,     1, FLA_LEFT );    // alpha11 = inv(beta11) * alpha11 * inv(conj(beta11));    //         = inv(beta11) * alpha11 * inv(beta11);    FLA_Inv_scal_external( beta11, alpha11 );    FLA_Inv_scal_external( beta11, alpha11 );    //// y21 = b21 * alpha11;    //FLA_Copy_external( b21, y21_l );    //FLA_Scal_external( alpha11, y21_l );    // psi11 = - 1/2 * alpha11;    FLA_Copy_external( alpha11, psi11 );    FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 );    // a21 = a21 * inv(conj(beta11));    //     = a21 * inv(beta11);    FLA_Inv_scal_external( beta11, a21 );    //// a21 = a21 - 1/2 * y21;    //FLA_Axpy_external( FLA_MINUS_ONE_HALF, y21_l, a21 );    // a21 = a21 - 1/2 * alpha11 * b21;    FLA_Axpy_external( psi11, b21, a21 );    // A22 = A22 - a21 * b21' - b21 * a21';    FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_NO_CONJUGATE,                        FLA_MINUS_ONE, a21, b21, A22 );    //// a21 = a21 - 1/2 * y21;    //FLA_Axpy_external( FLA_MINUS_ONE_HALF, y21_l, a21 );    // a21 = a21 - 1/2 * alpha11 * b21;    FLA_Axpy_external( psi11, b21, a21 );    // a21 = inv( tril( B22 ) ) * a21;    FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,                       B22, a21 );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,                                                     a10t, alpha11, /**/ a12t,                            /* ************** */  /* ************************ */                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,                              FLA_TL );    FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR,       B00,  b01,    /**/ B02,                                                     b10t, beta11, /**/ b12t,                            /* ************** */  /* *********************** */                              &BBL, /**/ &BBR,       B20,  b21,    /**/ B22,//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例26: time_Gemm_hh

void time_Gemm_hh(                int variant, int type, int nrepeats, int n, int nb_alg,               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  fla_blocksize_t*    bp;  fla_gemm_t*    cntl_gemm_blas;  fla_gemm_t*    cntl_gemm_var;  bp             = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );  cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );  cntl_gemm_var  = FLA_Cntl_gemm_obj_create( FLA_FLAT, variant, bp, cntl_gemm_blas );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( variant ){    // Time reference implementation    case 0:      REF_Gemm( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,                 FLA_ONE, A, B, FLA_ONE, C );      break;    // Time variant 1    case 1:{      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Gemm_hh_unb_var1( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Gemm_hh_blk_var1( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );        break;      default:        printf("trouble/n");      }      break;    }    // Time variant 2    case 2:{      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Gemm_hh_unb_var2( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Gemm_hh_blk_var2( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );        break;      default:        printf("trouble/n");      }      break;    }    // Time variant 3    case 3:{      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Gemm_hh_unb_var3( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Gemm_hh_blk_var3( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );        break;      default:        printf("trouble/n");      }      break;    }    // Time variant 4    case 4:{      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Gemm_hh_unb_var4( FLA_ONE, A, B, FLA_ONE, C );        break;      case FLA_ALG_BLOCKED:        FLA_Gemm_hh_blk_var4( FLA_ONE, A, B, FLA_ONE, C, cntl_gemm_var );        break;      default://.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例27: FLA_Hess_UT_blk_var4

FLA_Error FLA_Hess_UT_blk_var4( FLA_Obj A, FLA_Obj T ){  FLA_Obj  ATL,   ATR,      A00, A01, A02,            ABL,   ABR,      A10, A11, A12,                            A20, A21, A22;  FLA_Obj  UT,              U0,           UB,              U1,                            U2;  FLA_Obj  YT,              Y0,           YB,              Y1,                            Y2;  FLA_Obj  ZT,              Z0,           ZB,              Z1,                            Z2;  FLA_Obj  TL,    TR,       T0, T1, T2;   FLA_Obj  U, Y, Z;  FLA_Obj  ABR_l;  FLA_Obj  UB_l, U2_l;  FLA_Obj  YB_l, Y2_l;  FLA_Obj  ZB_l, Z2_l;  FLA_Obj  WT_l;  FLA_Obj  T1_tl;  FLA_Obj  none, none2, none3;  FLA_Obj  UB_tl,           UB_bl;  FLA_Datatype datatype_A;  dim_t        m_A;  dim_t        b_alg, b, bb;  b_alg      = FLA_Obj_length( T );  datatype_A = FLA_Obj_datatype( A );  m_A        = FLA_Obj_length( A );  FLA_Obj_create( datatype_A, m_A,    b_alg, 0, 0, &U );  FLA_Obj_create( datatype_A, m_A,    b_alg, 0, 0, &Y );  FLA_Obj_create( datatype_A, m_A,    b_alg, 0, 0, &Z );  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_2x1( U,    &UT,                       &UB,            0, FLA_TOP );  FLA_Part_2x1( Y,    &YT,                       &YB,            0, FLA_TOP );  FLA_Part_2x1( Z,    &ZT,                       &ZB,            0, FLA_TOP );  FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );   while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )  {    b = min( FLA_Obj_length( ABR ), b_alg );    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,                        /* ************* */   /* ******************** */                                                &A10, /**/ &A11, &A12,                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,                           b, b, FLA_BR );    FLA_Repart_2x1_to_3x1( UT,                &U0,                         /* ** */            /* ** */                                              &U1,                            UB,                &U2,        b, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( YT,                &Y0,                         /* ** */            /* ** */                                              &Y1,                            YB,                &Y2,        b, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( ZT,                &Z0,                         /* ** */            /* ** */                                              &Z1,                            ZB,                &Z2,        b, FLA_BOTTOM );    FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &T2,                           b, FLA_RIGHT );    /*------------------------------------------------------------*/    FLA_Part_2x2( T1,     &T1_tl, &none,                             &none2, &none3,   b, b, FLA_TL );     bb = min( FLA_Obj_length( ABR ) - 1, b_alg );    FLA_Part_1x2( ABR,    &ABR_l, &none,    bb, FLA_LEFT );     FLA_Part_1x2( UB,     &UB_l,  &none,    bb, FLA_LEFT );     FLA_Part_1x2( YB,     &YB_l,  &none,    bb, FLA_LEFT );     FLA_Part_1x2( ZB,     &ZB_l,  &none,    bb, FLA_LEFT );     FLA_Part_2x1( UB_l,   &none,                          &U2_l,             b, FLA_TOP );    FLA_Part_2x1( YB_l,   &none,                          &Y2_l,             b, FLA_TOP );    FLA_Part_2x1( ZB_l,   &none,                          &Z2_l,             b, FLA_TOP );    // [ ABR, YB, ZB, T1 ] = FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1, b );    //FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1_tl );    //FLA_Hess_UT_step_ofu_var4( ABR, YB, ZB, T1_tl );    FLA_Hess_UT_step_opt_var4( ABR, YB, ZB, T1_tl );    // Build UB from ABR, with explicit unit subdiagonal and zeros.    FLA_Copy_external( ABR_l, UB_l );    FLA_Part_2x1( UB_l,   &UB_tl, //.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例28: FLA_Eig_gest_nl_unb_var4

FLA_Error FLA_Eig_gest_nl_unb_var4( FLA_Obj A, FLA_Obj Y, FLA_Obj B ){  FLA_Obj ATL,   ATR,      A00,  a01,     A02,           ABL,   ABR,      a10t, alpha11, a12t,                           A20,  a21,     A22;  FLA_Obj BTL,   BTR,      B00,  b01,    B02,           BBL,   BBR,      b10t, beta11, b12t,                           B20,  b21,    B22;  //FLA_Obj yL,    yR,       y10t, psi11,  y12t;  //FLA_Obj y10t_t,  //        y10t_b;  FLA_Obj psi11, y12t,          y21,   Y22;  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_2x2( B,    &BTL, &BTR,                      &BBL, &BBR,     0, 0, FLA_TL );  //FLA_Part_1x2( Y,    &yL,  &yR,      0, FLA_LEFT );  FLA_Part_2x2( Y,    &psi11, &y12t,                      &y21,   &Y22,     1, 1, FLA_TL );  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,                        /* ************* */   /* ************************** */                                                &a10t, /**/ &alpha11, &a12t,                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,                           1, 1, FLA_BR );    FLA_Repart_2x2_to_3x3( BTL, /**/ BTR,       &B00,  /**/ &b01,    &B02,                        /* ************* */   /* ************************* */                                                &b10t, /**/ &beta11, &b12t,                           BBL, /**/ BBR,       &B20,  /**/ &b21,    &B22,                           1, 1, FLA_BR );    //FLA_Repart_1x2_to_1x3( yL,  /**/ yR,        &y10t, /**/ &psi11,  &y12t,    //                       1, FLA_RIGHT );    /*------------------------------------------------------------*/    //FLA_Part_2x1( y10t,   &y10t_t,    //                      &y10t_b,    1, FLA_TOP );    //// y10t = alpha11 * b10t;    //FLA_Copy_external( b10t, y10t_t );    //FLA_Scal_external( alpha11, y10t_t );    // psi11 = 1/2 * alpha11;    FLA_Copy_external( alpha11, psi11 );    FLA_Scal_external( FLA_ONE_HALF, psi11 );    //// a10t = a10t + 1/2 * y10t;    //FLA_Axpy_external( FLA_ONE_HALF, y10t_t, a10t );    // a10t = a10t + 1/2 * alpha11 * b10t;    FLA_Axpy_external( psi11, b10t, a10t );    // A00 = A00 + a10t' * b10t + b10t' * a10t;    FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_CONJUGATE,                        FLA_ONE, a10t, b10t, A00 );    //// a10t = a10t + 1/2 * y10t;    //FLA_Axpy_external( FLA_ONE_HALF, y10t_t, a10t );    // a10t = a10t + 1/2 * alpha11 * b10t;    FLA_Axpy_external( psi11, b10t, a10t );    // a10t = conj(beta11) * a10t;    //      = beta11 * a10t;    FLA_Scal_external( beta11, a10t );    // alpha11 = conj(beta11) * alpha11 * beta11;    //         = beta11 * alpha11 * beta11;    FLA_Scal_external( beta11, alpha11 );    FLA_Scal_external( beta11, alpha11 );    // A20 = A20 + a21 * b10t;    FLA_Ger_external( FLA_ONE, a21, b10t, A20 );    // a21 = a21 * beta11;    FLA_Scal_external( beta11, a21 );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,                                                     a10t, alpha11, /**/ a12t,                            /* ************** */  /* ************************ */                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,                              FLA_TL );    FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR,       B00,  b01,    /**/ B02,                                                     b10t, beta11, /**/ b12t,                            /* ************** */  /* *********************** */                              &BBL, /**/ &BBR,       B20,  b21,    /**/ B22,                              FLA_TL );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例29: main

//.........这里部分代码省略.........  }  else if( n_input <  -1 ) {    sprintf( n_dim_desc, "n = p/%d", -n_input );    sprintf( n_dim_tag,  "n%dp", -n_input );  }  else if( n_input == -1 ) {    sprintf( n_dim_desc, "n = p" );    sprintf( n_dim_tag,  "n%dp", 1 );  }  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    m = m_input;    n = n_input;    if( m < 0 ) m = p / abs(m_input);    if( n < 0 ) n = p / abs(n_input);    //datatype = FLA_COMPLEX;    datatype = FLA_DOUBLE_COMPLEX;    /* Allocate space for the matrices */    FLA_Obj_create( datatype, m, m, &A );    FLA_Obj_create( datatype, m, n, &C );    FLA_Obj_create( datatype, m, n, &C_ref );    /* Generate random matrices A, C */    FLA_Random_tri_matrix( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, A );    FLA_Random_matrix( C );    FLA_Copy_external( C, C_ref );    /* Time the reference implementation */    time_Trmm_luh( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,                  A, B, C, C_ref, &dtime, &diff, &gflops );    fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; /n", i, p, gflops );    fflush( stdout );    for ( variant = 1; variant <= n_variants; variant++ ){            //fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d  ", variant, i, p );      fprintf( stdout, "data_var%d( %d, 1:5 ) = [ %d  ", variant, i, p );      fflush( stdout );      time_Trmm_luh( variant, FLA_ALG_UNBLOCKED, n_repeats, p, nb_alg,                    A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      time_Trmm_luh( variant, FLA_ALG_BLOCKED, n_repeats, p, nb_alg,                    A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      //time_Trmm_luh( variant, FLA_ALG_OPTIMIZED, n_repeats, p, nb_alg,      //              A, B, C, C_ref, &dtime, &diff, &gflops );      //fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
开发者ID:pgawron,项目名称:tlash,代码行数:67,


示例30: time_Her2k

void time_Her2k(                int param_combo, int type, int nrepeats, int m, int k,               FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj C_ref,               double *dtime, double *diff, double *gflops ){  int    irep;  double    dtime_old = 1.0e9;  FLA_Obj    C_old;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Her2k( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Her2k( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 1    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Her2k( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Her2k( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 2    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Her2k( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Her2k( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 3    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Her2k( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Her2k( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    }	    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE )  {    FLA_Copy_external( C, C_ref );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,



注:本文中的FLA_Copy_external函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ FLA_Determine_blocksize函数代码示例
C++ FLA_Cont_with_3x3_to_2x2函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。