您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ FLA_Obj_free函数代码示例

51自学网 2021-06-01 20:42:01
  C++
这篇教程C++ FLA_Obj_free函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中FLA_Obj_free函数的典型用法代码示例。如果您正苦于以下问题:C++ FLA_Obj_free函数的具体用法?C++ FLA_Obj_free怎么用?C++ FLA_Obj_free使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了FLA_Obj_free函数的29个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: main

//.........这里部分代码省略.........    diff = FLA_Max_elemwise_diff( C, Cref );    printf( "data_unb_var8( %d, 1:3 ) = [ %d %le  %le];/n", i, n,            gflops / dtime_best, diff );    fflush( stdout );#endif#if TEST_BLK_VAR8==TRUE    /* Variant 4 blocked */    for ( irep=0; irep<nrepeats; irep++ ){      FLA_Copy( Cold, C );          dtime = FLA_Clock();      Symm_blk_var8( A, B, C, nb_alg );      dtime = FLA_Clock() - dtime;      if ( irep == 0 ) 	dtime_best = dtime;      else	dtime_best = ( dtime < dtime_best ? dtime : dtime_best );    }    diff = FLA_Max_elemwise_diff( C, Cref );    printf( "data_blk_var8( %d, 1:3 ) = [ %d %le  %le];/n", i, n,            gflops / dtime_best, diff );    fflush( stdout );#endif    FLA_Obj_free( &A );    FLA_Obj_free( &B );    FLA_Obj_free( &C );    FLA_Obj_free( &Cref );    FLA_Obj_free( &Cold );    printf( "/n" );    i++;  }  /* Print the MATLAB commands to plot the data */  /* Delete all existing figures */  printf( "close all/n" );  /* Plot the performance of FLAME */  printf( "plot( data_FLAME( :,1 ), data_FLAME( :, 2 ), 'k--' ); /n" );  /* Indicate that you want to add to the existing plot */  printf( "hold on/n" );  /* Plot the performance of the reference implementation */  //  printf( "plot( data_REF( :,1 ), data_REF( :, 2 ), 'k-' ); /n" );  /* Plot the performance of your implementations */#if TEST_UNB_VAR1==TRUE  printf( "plot( data_unb_var1( :,1 ), data_unb_var1( :, 2 ), 'r-.' ); /n" );#endif#if TEST_UNB_VAR2==TRUE  printf( "plot( data_unb_var2( :,1 ), data_unb_var2( :, 2 ), 'g-.' ); /n" );#endif#if TEST_UNB_VAR3==TRUE
开发者ID:ebeweber,项目名称:CS378-Invariant-Project,代码行数:67,


示例2: main

//.........这里部分代码省略.........      if ( pc_str[param_combo][0] == 'c' ||           pc_str[param_combo][1] == 'c' )      {        if ( precision == FLA_SINGLE_PRECISION )          datatype = FLA_COMPLEX;        else          datatype = FLA_DOUBLE_COMPLEX;      }      else      {        if ( precision == FLA_SINGLE_PRECISION )          datatype = FLA_FLOAT;        else          datatype = FLA_DOUBLE;      }      // If transposing A, switch dimensions.      if ( pc_str[param_combo][0] == 'n' )        FLA_Obj_create( datatype, m, k, 0, 0, &A );      else        FLA_Obj_create( datatype, k, m, 0, 0, &A );            // If transposing B, switch dimensions.      if ( pc_str[param_combo][1] == 'n' )        FLA_Obj_create( datatype, k, n, 0, 0, &B );      else        FLA_Obj_create( datatype, n, k, 0, 0, &B );      FLA_Obj_create( datatype, m, n, 0, 0, &C );      FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );      FLA_Random_matrix( A );      FLA_Random_matrix( B );      FLA_Random_matrix( C );      FLA_Copy_external( C, C_ref );            fprintf( stdout, "data_gemm_%s( %d, 1:5 ) = [ %4d %4d %4d  ", pc_str[param_combo], i, m, k, n );      fflush( stdout );      time_Gemm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, n,                 A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );/*      time_Gemm( param_combo, FLA_ALG_FRONT, n_repeats, m, k, n,                 A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );*/      fprintf( stdout, " ]; /n" );      fflush( stdout );      FLA_Obj_free( &A );      FLA_Obj_free( &B );      FLA_Obj_free( &C );      FLA_Obj_free( &C_ref );    }    fprintf( stdout, "/n" );  }/*  fprintf( stdout, "figure;/n" );  fprintf( stdout, "hold on;/n" );  for ( i = 0; i < n_param_combos; i++ ) {    fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 2 ), '%c:%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );    fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 4 ), '%c-.%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  for ( i = 0; i < n_param_combos; i++ )    fprintf( stdout, "'ref//_gemm//_%s', 'fla//_gemm//_%s', ... /n", pc_str[i], pc_str[i] );  fprintf( stdout, "'Location', 'SouthEast' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME gemm front-end performance (%s, %s, %s)' );/n",           m_dim_desc, k_dim_desc, n_dim_desc );  fprintf( stdout, "print -depsc gemm_front_%s_%s_%s.eps/n", m_dim_tag, k_dim_tag, n_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize( );  return 0;}
开发者ID:flame,项目名称:libflame,代码行数:101,


示例3: main

//.........这里部分代码省略.........  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    m = m_input;    if( m < 0 ) m = p / abs(m_input);    FLA_Obj_create( datatype, m, m, 0, 0, &A );    FLA_Obj_create( datatype, m, 1, 0, 0, &b );    FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );/*    FLA_Obj_create( datatype, m, m, m, 1, &A );    FLA_Obj_create( datatype, m, 1, 1, 1, &b );    FLA_Obj_create( datatype, m, 1, 1, 1, &b_orig );*/    if ( FLA_Obj_is_single_precision( A ) )      FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );    else      FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );    FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_UNIT_DIAG, A );    FLA_Random_matrix( b );    FLA_Copy_external( b, b_orig );/*    time_Trinv_uu( 0, FLA_ALG_REFERENCE, n_repeats, m, nb_alg,                   A, b, b_orig, norm, &dtime, &diff, &gflops );    fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; /n", i, p, gflops );    fflush( stdout );*/    for ( variant = 1; variant <= n_variants; variant++ ){            fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d  ", variant, i, p );      fflush( stdout );      time_Trinv_uu( variant, FLA_ALG_UNBLOCKED, n_repeats, m, nb_alg,                     A, b, b_orig, norm, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      time_Trinv_uu( variant, FLA_ALG_UNB_OPT, n_repeats, m, nb_alg,                     A, b, b_orig, norm, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      time_Trinv_uu( variant, FLA_ALG_BLOCKED, n_repeats, m, nb_alg,                     A, b, b_orig, norm, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      fprintf( stdout, " ]; /n" );      fflush( stdout );    }    FLA_Obj_free( &A );    FLA_Obj_free( &b );    FLA_Obj_free( &b_orig );    FLA_Obj_free( &norm );    fprintf( stdout, "/n" );  }/*  fprintf( stdout, "figure;/n" );  fprintf( stdout, "hold on;/n" );  fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); /n" );  for ( i = 1; i <= n_variants; i++ ){    fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); /n",              variant, variant, colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  fprintf( stdout, "'Reference', ... /n" );  for ( i = 1; i <= n_variants; i++ )    fprintf( stdout, "'FLAME var%d', ... /n", i );  fprintf( stdout, "'Location', 'SouthWest' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME trinv//_u performance (%s)' );/n",            m_dim_desc );  fprintf( stdout, "print -depsc trinv_l_%s.eps/n", m_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize( );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例4: FLA_Hevd_lv_var3_components

FLA_Error FLA_Hevd_lv_var3_components( dim_t n_iter_max, FLA_Obj A, FLA_Obj l, dim_t k_accum, dim_t b_alg,                                       double* dtime_tred, double* dtime_tevd, double* dtime_appq ){	FLA_Error    r_val = FLA_SUCCESS;	FLA_Uplo     uplo = FLA_LOWER_TRIANGULAR;	FLA_Datatype dt;	FLA_Datatype dt_real;	FLA_Datatype dt_comp;	FLA_Obj      T, r, d, e, G;	FLA_Obj      d0, e0, ls, pu;	dim_t        mn_A;	dim_t        n_G = k_accum;	double       dtime_temp;	mn_A    = FLA_Obj_length( A );	dt      = FLA_Obj_datatype( A );	dt_real = FLA_Obj_datatype_proj_to_real( A );	dt_comp = FLA_Obj_datatype_proj_to_complex( A );	// If the matrix is a scalar, then the EVD is easy.	if ( mn_A == 1 )	{		FLA_Copy( A, l );		FLA_Set( FLA_ONE, A );		return FLA_SUCCESS;	}	// Create a matrix to hold block Householder transformations.	FLA_Tridiag_UT_create_T( A, &T );	// Create a vector to hold the realifying scalars.	FLA_Obj_create( dt,      mn_A,     1, 0, 0, &r );	// Create vectors to hold the diagonal and sub-diagonal.	FLA_Obj_create( dt_real, mn_A,     1, 0, 0, &d );	FLA_Obj_create( dt_real, mn_A-1,   1, 0, 0, &e );	FLA_Obj_create( dt_real, mn_A,     1, 0, 0, &d0 );	FLA_Obj_create( dt_real, mn_A-1,   1, 0, 0, &e0 );	FLA_Obj_create( dt_real, mn_A,     1, 0, 0, &pu );	FLA_Obj_create( FLA_INT, mn_A,     1, 0, 0, &ls );	FLA_Obj_create( dt_comp, mn_A-1, n_G, 0, 0, &G );  dtime_temp = FLA_Clock();  {	// Reduce the matrix to tridiagonal form.	FLA_Tridiag_UT( uplo, A, T );  }  *dtime_tred = FLA_Clock() - dtime_temp;	// Apply scalars to rotate elements on the sub-diagonal to the real domain.	FLA_Tridiag_UT_realify( uplo, A, r );	// Extract the diagonal and sub-diagonal from A.	FLA_Tridiag_UT_extract_diagonals( uplo, A, d, e );  dtime_temp = FLA_Clock();  {	// Form Q, overwriting A.	FLA_Tridiag_UT_form_Q( uplo, A, T );  }  *dtime_appq = FLA_Clock() - dtime_temp;	// Apply the scalars in r to Q.	FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, r, A );	// Find the eigenvalues only.	FLA_Copy( d, d0 ); FLA_Copy( e, e0 );	//r_val = FLA_Tevd_n_opt_var1( n_iter_max, d0, e0, G, A );{	int info;	double* buff_d = FLA_DOUBLE_PTR( d0 );	double* buff_e = FLA_DOUBLE_PTR( e0 );	dsterf_( &mn_A, buff_d, buff_e, &info );}	FLA_Sort( FLA_FORWARD, d0 );	FLA_Set( FLA_ZERO, ls );	FLA_Set( FLA_ZERO, pu );  dtime_temp = FLA_Clock();  {	// Perform an eigenvalue decomposition on the tridiagonal matrix.	r_val = FLA_Tevd_v_opt_var3( n_iter_max, d, e, d0, ls, pu, G, A, b_alg );  }  *dtime_tevd = FLA_Clock() - dtime_temp;//FLA_Obj_show( "var4: e", e, "%22.15e", "" );	// Copy the converged eigenvalues to the output vector.	FLA_Copy( d, l );	// Sort the eigenvalues and eigenvectors in ascending order.	FLA_Sort_evd( FLA_FORWARD, l, A );//FLA_Obj_show( "var4: d", l, "%22.15e", "" );//FLA_Obj_show( "var4: A", A, "%8.1e + %8.1e", "" );	//FLA_Copy( d0, l );	FLA_Obj_free( &T );	FLA_Obj_free( &r );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例5: time_Trsm_lln

//.........这里部分代码省略.........                FLA_ONE, A, C );      break;    case 1:{      // Time variant 1      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Trsm_lln_unb_var1( FLA_NONUNIT_DIAG, FLA_ONE, A, C );        break;      case FLA_ALG_BLOCKED:        FLA_Trsm_lln_blk_var1( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      // Time variant 2      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Trsm_lln_unb_var2( FLA_NONUNIT_DIAG, FLA_ONE, A, C );        break;      case FLA_ALG_BLOCKED:        FLA_Trsm_lln_blk_var2( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var );        break;      default:        printf("trouble/n");      }      break;    }    case 3:{      // Time variant 3      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Trsm_lln_unb_var3( FLA_NONUNIT_DIAG, FLA_ONE, A, C );        break;      case FLA_ALG_BLOCKED:        FLA_Trsm_lln_blk_var3( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var );        break;      default:        printf("trouble/n");      }      break;    }    case 4:{      // Time variant 4      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Trsm_lln_unb_var4( FLA_NONUNIT_DIAG, FLA_ONE, A, C );        break;      case FLA_ALG_BLOCKED:        FLA_Trsm_lln_blk_var4( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var );        break;      default:        printf("trouble/n");      }      break;    }    }    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  FLA_Cntl_obj_free( cntl_trsm_var );  FLA_Cntl_obj_free( cntl_trsm_blas );  FLA_Cntl_obj_free( cntl_gemm_blas );  FLA_Blocksize_free( bp );  if ( variant == 0 )  {    FLA_Copy_external( C, C_ref );    *diff = 0.0;  }  else  {    *diff = FLA_Max_elemwise_diff( C, C_ref );  }  *gflops = 1.0 *             FLA_Obj_length( C ) *             FLA_Obj_width( C ) *             FLA_Obj_width( A ) /             dtime_old /             1.0e9;  *dtime = dtime_old;  FLA_Copy_external( C_old, C );  FLA_Obj_free( &C_old );}
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例6: REF_Svdd_uv_components

//.........这里部分代码省略.........      *dtime_qrfa = 0.0;      *dtime_gemm = 0.0;    }    else    {      FLA_Obj AT,              AB;      FLA_Obj UL, UR;      FLA_Part_2x1( A,   &AT,                         &AB,        n_A, FLA_TOP );      FLA_Part_1x2( U,   &UL, &UR,   n_A, FLA_LEFT );      // Create a temporary n-by-n matrix R.      FLA_Obj_create( dt_A, n_A, n_A, 0, 0, &W );      dtime_temp = FLA_Clock();      {        // Perform a QR factorization.        FLA_QR_blk_external( A, tq );        FLA_Copyr_external( FLA_LOWER_TRIANGULAR, A, UL );        FLA_Setr( FLA_LOWER_TRIANGULAR, FLA_ZERO, A );      }      *dtime_qrfa = FLA_Clock() - dtime_temp;      dtime_temp = FLA_Clock();      {        // Form Q.        FLA_QR_form_Q_external( U, tq );      }      *dtime_appq = FLA_Clock() - dtime_temp;      dtime_temp = FLA_Clock();      {        // Reduce R to bidiagonal form.        FLA_Bidiag_blk_external( AT, tu, tv );        FLA_Bidiag_UT_extract_diagonals( A, d, eT );      }      *dtime_bred = FLA_Clock() - dtime_temp;      dtime_temp = FLA_Clock();      {        // Divide-and-conquor algorithm.        FLA_Bsvdd_external( uplo, d, e, Ur, Vr );      }      *dtime_bsvd = FLA_Clock() - dtime_temp;      dtime_temp = FLA_Clock();      {        // Form U in W.        FLA_Copy_external( Ur, W );        FLA_Bidiag_apply_U_external( FLA_LEFT, FLA_NO_TRANSPOSE, AT, tu, W );        // Form V.        FLA_Copy_external( Vr, V );        FLA_Bidiag_apply_V_external( FLA_RIGHT, FLA_CONJ_TRANSPOSE, AT, tv, V );      }      *dtime_appq += FLA_Clock() - dtime_temp;      dtime_temp = FLA_Clock();      {        // Multiply R into U, storing the result in A and then copying        // back to U.        FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,                           FLA_ONE, UL, W, FLA_ZERO, A );        FLA_Copy( A, UL );      }      *dtime_gemm = FLA_Clock() - dtime_temp;      // Free R.      FLA_Obj_free( &W );    }  }  else  {    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );  }  // Copy singular values to output vector.  FLA_Copy( d, s );  // Sort singular values and vectors.  FLA_Sort_svd( FLA_BACKWARD, s, U, V );  FLA_Obj_free( &tq );  FLA_Obj_free( &tu );  FLA_Obj_free( &tv );  FLA_Obj_free( &d );  FLA_Obj_free( &e );  FLA_Obj_free( &Ur );  FLA_Obj_free( &Vr );  return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例7: main

//.........这里部分代码省略.........    for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){      // Determine datatype based on trans argument.      if ( pc_str[param_combo][2] == 'h' )      {        if ( precision == FLA_SINGLE_PRECISION )          datatype = FLA_COMPLEX;        else          datatype = FLA_DOUBLE_COMPLEX;      }      else      {        if ( precision == FLA_SINGLE_PRECISION )          datatype = FLA_FLOAT;        else          datatype = FLA_DOUBLE;      }      // If multiplying A on the left, A is m x m; ...on the right, A is n x n.      if ( pc_str[param_combo][0] == 'l' )        FLA_Obj_create( datatype, m, m, 0, 0, &A );      else        FLA_Obj_create( datatype, n, n, 0, 0, &A );      FLA_Obj_create( datatype, m, n, 0, 0, &C );      FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );      if ( pc_str[param_combo][1] == 'l' )      {        FLA_Random_tri_matrix( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A );        FLA_Random_matrix( C );      }      else      {        FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A );        FLA_Random_matrix( C );      }      fprintf( stdout, "data_trsm_%s( %d, 1:3 ) = [ %d  ", pc_str[param_combo], i, p );      fflush( stdout );      time_Trsm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n,                 A, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );/*      time_Trsm( param_combo, FLA_ALG_FRONT, n_repeats, m, n,                 A, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );*/      fprintf( stdout, " ]; /n" );      fflush( stdout );      FLA_Obj_free( &A );      FLA_Obj_free( &C );      FLA_Obj_free( &C_ref );    }    fprintf( stdout, "/n" );  }/*  fprintf( stdout, "figure;/n" );  fprintf( stdout, "hold on;/n" );  for ( i = 0; i < n_param_combos; i++ ) {    fprintf( stdout, "plot( data_trsm_%s( :,1 ), data_trsm_%s( :, 2 ), '%c:%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );    fprintf( stdout, "plot( data_trsm_%s( :,1 ), data_trsm_%s( :, 4 ), '%c-.%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  for ( i = 0; i < n_param_combos; i++ )    fprintf( stdout, "'ref//_trsm//_%s', 'fla//_trsm//_%s', ... /n", pc_str[i], pc_str[i] );  fprintf( stdout, "'Location', 'SouthEast' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME trsm front-end performance (%s, %s)' );/n",           m_dim_desc, n_dim_desc );  fprintf( stdout, "print -depsc trsm_front_%s_%s.eps/n", m_dim_tag, n_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize( );  return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例8: time_Gemm_nn

//.........这里部分代码省略.........        break;      default:        printf("trouble/n");      }      break;    }    case 15:{      // Time variant 1->5      switch( type ){      case FLA_ALG_OPENMP_CVAR:        FLA_Gemm_nn_omp_var15( FLA_ONE, A, B, C, nb_alg );        break;      default:        printf("trouble/n");      }      break;    }    case 31:{      // Time variant 3->1       switch( type ){      case FLA_ALG_OPENMP_CVAR:        FLA_Gemm_nn_omp_var31( FLA_ONE, A, B, C, nb_alg );        break;      default:        printf("trouble/n");      }      break;    }    case 35:{      // Time variant 3->5       switch( type ){      case FLA_ALG_OPENMP_CVAR:        FLA_Gemm_nn_omp_var35( FLA_ONE, A, B, C, nb_alg );        break;      default:        printf("trouble/n");      }      break;    }    case 51:{      // Time variant 5->1       switch( type ){      case FLA_ALG_OPENMP_CVAR:        FLA_Gemm_nn_omp_var51( FLA_ONE, A, B, C, nb_alg );        break;      default:        printf("trouble/n");      }      break;    }    case 53:{      // Time variant 5->3       switch( type ){      case FLA_ALG_OPENMP_CVAR:        FLA_Gemm_nn_omp_var53( FLA_ONE, A, B, C, nb_alg );        break;      default:        printf("trouble/n");      }      break;    }    }    if ( irep == 0 )      dtime_old = FLA_Clock() - *dtime;    else{      *dtime = FLA_Clock() - *dtime;      dtime_old = min( *dtime, dtime_old );    }  }  if ( variant == 0 ){    FLA_Copy_external( C, Cref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( C, Cref );    //FLA_Obj_show( "C:", C, "%f", "/n");  }  *gflops = 2.0 *             FLA_Obj_length( C ) *             FLA_Obj_width( C ) *             FLA_Obj_width( A ) /             dtime_old /             1e9;  *dtime = dtime_old;  FLA_Copy_external( Cold, C );  FLA_Obj_free( &Cold );}
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例9: FLA_Gemm_nn_omp_var15

FLA_Error FLA_Gemm_nn_omp_var15( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj C, fla_gemm_t* cntl ){  FLA_Obj AT,              A0,          AB,              A1,                           A2;  FLA_Obj CT,              C0,          CB,              C1,                           C2;  FLA_Obj AL,    AR,       A10,  A11,  A12;  FLA_Obj BT,              B0,          BB,              B1,                           B2;  FLA_Obj C1_local;  int i, j, lock_ldim, lock_i;  int b_m, b_k;  FLA_Part_2x1( A,    &AT,                       &AB,            0, FLA_TOP );  FLA_Part_2x1( C,    &CT,                       &CB,            0, FLA_TOP );  #pragma intel omp parallel taskq  {  while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) )  {    b_m = FLA_Determine_blocksize( A, AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) );    FLA_Repart_2x1_to_3x1( AT,                &A0,                         /* ** */            /* ** */                                              &A1,                            AB,                &A2,        b_m, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( CT,                &C0,                         /* ** */            /* ** */                                              &C1,                            CB,                &C2,        b_m, FLA_BOTTOM );    /*------------------------------------------------------------*/    /* C1 = alpha * A1 * B + C1; */      FLA_Part_1x2( A1,   &AL,  &AR,      0, FLA_LEFT );    FLA_Part_2x1( B,    &BT,                         &BB,            0, FLA_TOP );      while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) )    {      b_k = FLA_Determine_blocksize( A, AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) );      // Get the index of the current partition.      // FIX THIS: need + b_m - 1 or something like this      //j = FLA_Obj_length( CT ) / b_m;      //i = FLA_Obj_width( AL ) / b_k;      //lock_ldim = FLA_get_num_threads_in_m_dim(omp_get_num_threads());      lock_i = FLA_Obj_length( CT ) / b_m;        FLA_Repart_1x2_to_1x3( AL,  /**/ AR,        &A10, /**/ &A11, &A12,                             b_k, FLA_RIGHT );      FLA_Repart_2x1_to_3x1( BT,                &B0,                           /* ** */            /* ** */                                                &B1,                              BB,                &B2,        b_k, FLA_BOTTOM );        /*------------------------------------------------------------*/        /*    C1 = alpha * A11 * B1 + C1; */      //// FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,      ////           alpha, A11, B1, FLA_ONE, C1 );      #pragma intel omp task captureprivate( lock_i, A11, B1, C1 ), private( C1_local )      {      FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C1, &C1_local );      FLA_Obj_set_to_zero( C1_local );      /*    C1_local = alpha * A1 * B11 + C1_local; */      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,                         alpha, A11, B1, FLA_ONE, C1_local );      // Acquire lock[i] (the lock for C1).      omp_set_lock( &fla_omp_lock[lock_i] );      /* C1 += C1_local */      FLA_Axpy_external( FLA_ONE, C1_local, C1 );      //FLA_Axpy_sync_pipeline2( j*lock_ldim, FLA_ONE, C1_local, C1 );      //FLA_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 );      //REF_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 );      // Release lock[i] (the lock for C1).      omp_unset_lock( &fla_omp_lock[lock_i] );      FLA_Obj_free( &C1_local );      }  //.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例10: time_Syrk_ln

//.........这里部分代码省略.........      switch( type ){      case FLA_ALG_OPENMP_1TASK:        FLA_Syrk_ln_omp1t_var2( A, C );        break;      case FLA_ALG_OPENMP_2TASKS:        FLA_Syrk_ln_omp2t_var2( A, C );        break;      case FLA_ALG_OPENMP_2LOOPS:        FLA_Syrk_ln_omp2l_var2( A, C );        break;      case FLA_ALG_OPENMP_2LOOPSPLUS:        FLA_Syrk_ln_omp2x_var2( A, C );        break;      default:        printf("trouble/n");      }      break;    }     case 3:{      // Time variant 3       switch( type ){      case FLA_ALG_OPENMP_1TASK:        FLA_Syrk_ln_omp1t_var3( A, C );        break;      case FLA_ALG_OPENMP_2TASKS:        FLA_Syrk_ln_omp2t_var3( A, C );        break;      case FLA_ALG_OPENMP_2LOOPS:        FLA_Syrk_ln_omp2l_var3( A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 4:{      // Time variant 4      switch( type ){      case FLA_ALG_OPENMP_1TASK:        FLA_Syrk_ln_omp1t_var4( A, C );        break;      case FLA_ALG_OPENMP_2TASKS:        FLA_Syrk_ln_omp2t_var4( A, C );        break;      case FLA_ALG_OPENMP_2LOOPS:        FLA_Syrk_ln_omp2l_var4( A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 5:{      // Time variant 5      switch( type ){      case FLA_ALG_OPENMP_1TASK:        FLA_Syrk_ln_omp1t_var5( A, C );        break;      default:        printf("trouble/n");      }      break;    }    }    if ( irep == 0 )      dtime_old = FLA_Clock() - *dtime;    else{      *dtime = FLA_Clock() - *dtime;      dtime_old = min( *dtime, dtime_old );    }  }  if ( variant == 0 ){    FLA_Copy_external( C, C_ref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( C, C_ref );    //FLA_Obj_show( "C:", C, "%f", "/n");  }  *gflops = 1.0 *             FLA_Obj_length( A ) *             FLA_Obj_length( A ) *             FLA_Obj_width( A ) /             dtime_old /             1e9;  *dtime = dtime_old;  FLA_Copy_external( C_old, C );  FLA_Obj_free( &C_old );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例11: main

int main( int argc, char** argv ) {  FLA_Datatype testtype = TESTTYPE;  dim_t        m;  FLA_Obj      A;  FLA_Obj      a1, b1, r1;  FLA_Obj      a2, b2, r2;  FLA_Uplo     uplo;  FLA_Error    init_result;   if ( argc == 3 ) {    m = atoi(argv[1]);    uplo = ( atoi(argv[2]) == 1  ? FLA_UPPER_TRIANGULAR : FLA_LOWER_TRIANGULAR );  } else {    fprintf(stderr, "       /n");    fprintf(stderr, "Usage: %s m uplo/n", argv[0]);    fprintf(stderr, "       m    : test matrix length/n");    fprintf(stderr, "       uplo : 0) lower, 1) upper/n");    fprintf(stderr, "       /n");    return -1;  }  if ( m == 0 )    return 0;  FLA_Init_safe( &init_result );            // Test matrix A   FLA_Obj_create( testtype, m, m, 0, 0, &A );  FLA_Random_spd_matrix( uplo, A );  FLA_Hermitianize( uplo, A );  FLA_Obj_fshow( stdout,  "- A -", A, "% 6.4e", "--" );  FLA_Obj_create( testtype, m, 1, 0, 0, &a1 );  FLA_Obj_create( testtype, m, 1, 0, 0, &a2 );  if ( m > 1 ) {    FLA_Obj_create( testtype, m-1, 1, 0, 0, &b1 );    FLA_Obj_create( testtype, m-1, 1, 0, 0, &b2 );  }    FLA_Obj_create( testtype, m, 1, 0, 0, &r1 );  FLA_Obj_create( testtype, m, 1, 0, 0, &r2 );  // Mine   FLA_Tridiag_UT_extract_diagonals( uplo, A, a1, b1 );  FLA_Obj_fshow( stdout,  "- a1 -", a1, "% 6.4e", "--" );    if ( m > 1 ) FLA_Obj_fshow( stdout,  "- b1 -", b1, "% 6.4e", "--" );    FLA_Tridiag_UT_realify_subdiagonal( b1, r1 );  if ( m > 1 ) FLA_Obj_fshow( stdout,  "- b1 realified -", b1, "% 6.4e", "--" );    FLA_Obj_fshow( stdout,  "- r1 -", r1, "% 6.4e", "--" );      // Field  FLA_Tridiag_UT_realify( uplo, A, r2 );  FLA_Tridiag_UT_extract_diagonals( uplo, A, a2, b2 );  FLA_Obj_fshow( stdout,  "- a2  -", a2, "% 6.4e", "--" );    if ( m > 1 ) FLA_Obj_fshow( stdout,  "- b2 realified -", b2, "% 6.4e", "--" );    FLA_Obj_fshow( stdout,  "- r2 -", r2, "% 6.4e", "--" );    printf(" diff_a  = %e/n", FLA_Max_elemwise_diff( a1, a2 ));  if ( m > 1 ) printf(" diff_b  = %e/n", FLA_Max_elemwise_diff( b1, b2 ));  printf(" diff_rL = %e/n", FLA_Max_elemwise_diff( r1, r2 ));  FLA_Obj_fshow( stdout,  "- A realified-", A, "% 6.4e", "--" );  FLA_Obj_free( &r2 );  FLA_Obj_free( &r1 );  if ( m > 1 ) {    FLA_Obj_free( &b2 );    FLA_Obj_free( &b1 );  }  FLA_Obj_free( &a2 );  FLA_Obj_free( &a1 );  FLA_Obj_free( &A );  FLA_Finalize_safe( init_result );     }
开发者ID:anaptyxis,项目名称:libflame,代码行数:80,


示例12: main

//.........这里部分代码省略.........        sprintf( n_dim_desc, "n = %d", n_input );        sprintf( n_dim_tag,  "n%dc", n_input);    }    else if( n_input <  -1 ) {        sprintf( n_dim_desc, "n = p/%d", -n_input );        sprintf( n_dim_tag,  "n%dp", -n_input );    }    else if( n_input == -1 ) {        sprintf( n_dim_desc, "n = p" );        sprintf( n_dim_tag,  "n%dp", 1 );    }    //datatype = FLA_FLOAT;    //datatype = FLA_DOUBLE;    //datatype = FLA_COMPLEX;    datatype = FLA_DOUBLE_COMPLEX;    for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )    {        m = m_input;        n = n_input;        if( m < 0 ) m = p / abs(m_input);        if( n < 0 ) n = p / abs(n_input);        for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ) {            if ( pc_str[param_combo][0] == 'n' || pc_str[param_combo][0] == 'c' )                FLA_Obj_create( datatype, m, n, &A );            else                FLA_Obj_create( datatype, n, m, &A );            FLA_Obj_create( datatype, m, n, &C );            FLA_Obj_create( datatype, m, n, &C_ref );            FLA_Random_matrix( A );            FLA_Random_matrix( C );            FLA_Copy_external( C, C_ref );            fprintf( stdout, "data_axpyt_%s( %d, 1:5 ) = [ %d  ", pc_str[param_combo], i, p );            fflush( stdout );            time_Axpyt( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n,                        A, C, C_ref, &dtime, &diff, &gflops );            fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );            fflush( stdout );            time_Axpyt( param_combo, FLA_ALG_FRONT, n_repeats, m, n,                        A, C, C_ref, &dtime, &diff, &gflops );            fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );            fflush( stdout );            fprintf( stdout, " ]; /n" );            fflush( stdout );            FLA_Obj_free( &A );            FLA_Obj_free( &C );            FLA_Obj_free( &C_ref );        }        fprintf( stdout, "/n" );    }    fprintf( stdout, "figure;/n" );    fprintf( stdout, "hold on;/n" );    for ( i = 0; i < n_param_combos; i++ ) {        fprintf( stdout, "plot( data_axpyt_%s( :,1 ), data_axpyt_%s( :, 2 ), '%c:%c' ); /n",                 pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );        fprintf( stdout, "plot( data_axpyt_%s( :,1 ), data_axpyt_%s( :, 4 ), '%c-.%c' ); /n",                 pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );    }    fprintf( stdout, "legend( ... /n" );    for ( i = 0; i < n_param_combos; i++ )        fprintf( stdout, "'ref//_axpyt//_%s', 'fla//_axpyt//_%s', ... /n", pc_str[i], pc_str[i] );    fprintf( stdout, "'Location', 'SouthEast' ); /n" );    fprintf( stdout, "xlabel( 'problem size p' );/n" );    fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );    fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );    fprintf( stdout, "title( 'FLAME axpyt front-end performance (%s, %s)' );/n",             m_dim_desc, n_dim_desc );    fprintf( stdout, "print -depsc axpyt_front_%s_%s.eps/n", m_dim_tag, n_dim_tag );    fprintf( stdout, "hold off;/n");    fflush( stdout );    FLA_Finalize( );    return 0;}
开发者ID:fmarrabal,项目名称:libflame,代码行数:101,


示例13: main

//.........这里部分代码省略.........    sprintf( k_dim_desc, "k = p" );    sprintf( k_dim_tag,  "k%dp", 1 );  }  //datatype = FLA_FLOAT;  datatype = FLA_DOUBLE;  //datatype = FLA_COMPLEX;  //datatype = FLA_DOUBLE_COMPLEX;  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    m = m_input;    k = k_input;    if( m < 0 ) m = p / f2c_abs(m_input);    if( k < 0 ) k = p / f2c_abs(k_input);    for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){      // If transposing A, switch dimensions.      if ( pc_str[param_combo][1] == 'n' )      {        FLA_Obj_create( datatype, m, k, 0, 0, &A );        FLA_Obj_create( datatype, m, k, 0, 0, &B );      }      else      {        FLA_Obj_create( datatype, k, m, 0, 0, &A );        FLA_Obj_create( datatype, k, m, 0, 0, &B );      }      FLA_Obj_create( datatype, m, m, 0, 0, &C );      FLA_Obj_create( datatype, m, m, 0, 0, &C_ref );      FLA_Random_matrix( A );      FLA_Random_matrix( B );      FLA_Random_matrix( C );      fprintf( stdout, "data_syr2k_%s( %d, 1:3 ) = [ %d  ", pc_str[param_combo], i, p );      fflush( stdout );      time_Syr2k( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k,                 A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );/*      time_Syr2k( param_combo, FLA_ALG_FRONT, n_repeats, m, k,                 A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );*/      fprintf( stdout, " ]; /n" );      fflush( stdout );      FLA_Obj_free( &A );      FLA_Obj_free( &B );      FLA_Obj_free( &C );      FLA_Obj_free( &C_ref );    }    fprintf( stdout, "/n" );  }/*  fprintf( stdout, "figure;/n" );  fprintf( stdout, "hold on;/n" );  for ( i = 0; i < n_param_combos; i++ ) {    fprintf( stdout, "plot( data_syr2k_%s( :,1 ), data_syr2k_%s( :, 2 ), '%c:%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );    fprintf( stdout, "plot( data_syr2k_%s( :,1 ), data_syr2k_%s( :, 4 ), '%c-.%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  for ( i = 0; i < n_param_combos; i++ )    fprintf( stdout, "'ref//_syr2k//_%s', 'fla//_syr2k//_%s', ... /n", pc_str[i], pc_str[i] );  fprintf( stdout, "'Location', 'SouthEast' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME syr2k front-end performance (%s, %s)' );/n",           m_dim_desc, k_dim_desc );  fprintf( stdout, "print -depsc syr2k_front_%s_%s.eps/n", m_dim_tag, k_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize( );  return 0;}
开发者ID:flame,项目名称:libflame,代码行数:101,


示例14: main

//.........这里部分代码省略.........    if( m < 0 ) m = p / f2c_abs(m_input);    //datatype = FLA_FLOAT;    //datatype = FLA_DOUBLE;    //datatype = FLA_COMPLEX;    datatype = FLA_DOUBLE_COMPLEX;    FLA_Obj_create( datatype, m, m, 0, 0, &A );    FLA_Obj_create( datatype, m, m, 0, 0, &Y );    FLA_Obj_create( datatype, m, m, 0, 0, &B );    FLA_Random_spd_matrix( uplo, A );    FLA_Hermitianize( uplo, A );    FLA_Random_spd_matrix( uplo, B );    FLA_Chol( uplo, B );/*    time_Eig_gest_nu( 0, FLA_ALG_REFERENCE, n_repeats, p, b_alg,                      inv, uplo, A, B, &dtime, &diff, &gflops );    fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; /n", i, p, gflops );    fflush( stdout );*/    for ( variant = 1; variant <= n_variants; variant++ ){            fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d  ", variant, i, p );      fflush( stdout );      time_Eig_gest_nu( variant, FLA_ALG_UNBLOCKED, n_repeats, p, b_alg,                        inv, uplo, A, Y, B, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      time_Eig_gest_nu( variant, FLA_ALG_UNB_OPT, n_repeats, p, b_alg,                        inv, uplo, A, Y, B, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      time_Eig_gest_nu( variant, FLA_ALG_BLOCKED, n_repeats, p, b_alg,                        inv, uplo, A, Y, B, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      fprintf( stdout, " ]; /n" );      fflush( stdout );    }    FLA_Obj_free( &A );    FLA_Obj_free( &Y );    FLA_Obj_free( &B );    fprintf( stdout, "/n" );  }/*  // Print the MATLAB commands to plot the data  // Delete all existing figures  fprintf( stdout, "figure;/n" );  // Plot the performance of the reference implementation  fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); /n" );  // Indicate that you want to add to the existing plot  fprintf( stdout, "hold on;/n" );  // Plot the data for the other numbers of threads  for ( i = 1; i <= n_variants; i++ ){    fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); /n",              i, i, colors[ i-1 ], ticks[ i-1 ] );  }  fprintf( stdout, "legend( ... /n" );  fprintf( stdout, "'Reference', ... /n" );  for ( i = 1; i <= n_variants; i++ )    fprintf( stdout, "'FLAME var%d', ... /n", i );  fprintf( stdout, "'Location', 'SouthEast' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME chol//_l performance (%s)' );/n",            m_dim_desc );  fprintf( stdout, "print -depsc chol_l_%s.eps/n", m_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize( );  return 0;}
开发者ID:flame,项目名称:libflame,代码行数:101,


示例15: main

//.........这里部分代码省略.........  else if( m_input <  -1 ) {    sprintf( m_dim_desc, "m = p/%d", -m_input );    sprintf( m_dim_tag,  "m%dp", -m_input );  }  else if( m_input == -1 ) {    sprintf( m_dim_desc, "m = p" );    sprintf( m_dim_tag,  "m%dp", 1 );  }  //datatype = FLA_FLOAT;  //datatype = FLA_DOUBLE;  //datatype = FLA_COMPLEX;  datatype = FLA_DOUBLE_COMPLEX;  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    m = m_input;    if( m < 0 ) m = p / abs(m_input);    for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){      FLA_Obj_create( datatype, m, m, 0, 0, &A );      FLA_Obj_create( datatype, m, 1, 0, 0, &b );      FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );      if ( FLA_Obj_is_single_precision( A ) )        FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm );      else        FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm );      if ( pc_str[param_combo][0] == 'l' )        FLA_Random_spd_matrix( FLA_LOWER_TRIANGULAR, A );      else        FLA_Random_spd_matrix( FLA_UPPER_TRIANGULAR, A );            FLA_Copy_external( b, b_orig );      fprintf( stdout, "data_chol_%s( %d, 1:5 ) = [ %d  ", pc_str[param_combo], i, p );      fflush( stdout );      time_Chol( param_combo, FLA_ALG_REFERENCE, n_repeats, m,                 A, b, b_orig, norm, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      time_Chol( param_combo, FLA_ALG_FRONT, n_repeats, m,                 A, b, b_orig, norm, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      fprintf( stdout, " ]; /n" );      fflush( stdout );      FLA_Obj_free( &A );      FLA_Obj_free( &b );      FLA_Obj_free( &b_orig );      FLA_Obj_free( &norm );    }    fprintf( stdout, "/n" );  }/*  fprintf( stdout, "figure;/n" );  fprintf( stdout, "hold on;/n" );  for ( i = 0; i < n_param_combos; i++ ) {    fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 2 ), '%c:%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );    fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 4 ), '%c-.%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  for ( i = 0; i < n_param_combos; i++ )    fprintf( stdout, "'ref//_chol//_%s', 'fla//_chol//_%s', ... /n", pc_str[i], pc_str[i] );  fprintf( stdout, "'Location', 'SouthEast' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME chol front-end performance (%s)' );/n", m_dim_desc );  fprintf( stdout, "print -depsc chol_front_%s.eps/n", m_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize();  return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例16: time_Sylv

//.........这里部分代码省略.........    C_old;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old );  FLA_Copy_external( C, C_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( param_combo ){    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Sylv( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );        break;      case FLA_ALG_FRONT:        FLA_Sylv( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );        break;      default:        printf("trouble/n");      }      break;    }    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Sylv( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );        break;      case FLA_ALG_FRONT:        FLA_Sylv( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Sylv( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );        break;      case FLA_ALG_FRONT:        FLA_Sylv( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale );        break;      default:        printf("trouble/n");      }      break;    }    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Sylv( FLA_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );        break;      case FLA_ALG_FRONT:        FLA_Sylv( FLA_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale );        break;      default:        printf("trouble/n");      }      break;    }    }    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE ){    FLA_Copy_external( C, C_ref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( C, C_ref );  }  *gflops = ( m * m * n + n * n * m ) /             dtime_old / 1e9;  if ( FLA_Obj_is_complex( C ) )    *gflops *= 4.0;  *dtime = dtime_old;  FLA_Copy_external( C_old, C );  FLA_Obj_free( &C_old );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例17: main

//.........这里部分代码省略.........  fprintf( stdout, "/nclear all;/n/n" );  //datatype = FLA_FLOAT;  //datatype = FLA_DOUBLE;  //datatype = FLA_COMPLEX;  datatype = FLA_DOUBLE_COMPLEX;  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    mB = mB_input;    mC = mC_input;    mD = mD_input;    n  = n_input;    if( mB < 0 ) mB = p / abs(mB_input);    if( mC < 0 ) mC = p / abs(mC_input);    if( mD < 0 ) mD = p / abs(mD_input);    if( n  < 0 ) n  = p / abs(n_input);    for ( variant = 0; variant < n_variants; variant++ ){            FLA_Obj_create( datatype, mB, n, 0, 0, &B );      FLA_Obj_create( datatype, mC, n, 0, 0, &C );      FLA_Obj_create( datatype, mD, n, 0, 0, &D );      FLA_Obj_create( datatype, b_alg, n, 0, 0, &T );      FLA_Obj_create( datatype, n,  n, 0, 0, &R );      FLA_Obj_create( datatype, n,  n, 0, 0, &E );      FLA_Random_matrix( B );      FLA_Random_matrix( C );      FLA_Random_matrix( D );      FLA_Set( FLA_ZERO, R );      FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, B, FLA_ONE, R );      FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, D, FLA_ONE, R );      FLA_Chol( FLA_UPPER_TRIANGULAR, R );      FLA_Set( FLA_ZERO, E );      FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, B, FLA_ONE, E );      FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, C, FLA_ONE, E );      FLA_Chol( FLA_UPPER_TRIANGULAR, E );      fprintf( stdout, "data_uddate_ut( %d, 1:5 ) = [ %d  ", i, p );      fflush( stdout );      time_UDdate_UT( variant, FLA_ALG_FRONT, n_repeats, mB, mC, mD, n,                      B, C, D, T, R, E, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      fprintf( stdout, " ]; /n" );      fflush( stdout );      FLA_Obj_free( &B );      FLA_Obj_free( &C );      FLA_Obj_free( &D );      FLA_Obj_free( &T );      FLA_Obj_free( &R );      FLA_Obj_free( &E );    }    fprintf( stdout, "/n" );  }/*  fprintf( stdout, "figure;/n" );  fprintf( stdout, "hold on;/n" );  for ( i = 0; i < n_variants; i++ ) {    fprintf( stdout, "plot( data_qr_ut( :,1 ), data_qr_ut( :, 2 ), '%c:%c' ); /n",            colors[ i ], ticks[ i ] );    fprintf( stdout, "plot( data_qr_ut( :,1 ), data_qr_ut( :, 4 ), '%c-.%c' ); /n",            colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  for ( i = 0; i < n_variants; i++ )    fprintf( stdout, "'ref//_qr//_ut', 'fla//_qr//_ut', ... /n" );  fprintf( stdout, "'Location', 'SouthEast' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME UDdate_UT front-end performance (%s, %s)' );/n",            m_dim_desc, n_dim_desc );  fprintf( stdout, "print -depsc qr_ut_front_%s_%s.eps/n", m_dim_tag, n_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize( );  return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例18: time_Tevd_v

//.........这里部分代码省略.........      switch( type ){      case FLA_ALG_UNB_OPT:        FLA_Tevd_v_opt_var2( n_iter_max, d, e, G, R, W, A, b_alg );        break;      }      break;    }    }    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  {    FLA_Obj V, A_rev_evd, norm, eye;	FLA_Copy( d, l );//FLA_Obj_show( "A_save", A_save, "%9.2e + %9.2e ", "" );//FLA_Obj_show( "A_evd", A, "%9.2e + %9.2e ", "" );	FLA_Sort_evd( FLA_FORWARD, l, A );    FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &V );     FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_rev_evd );     FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &eye );     FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );    FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, l, A );    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,              FLA_ONE, A, V, FLA_ZERO, A_rev_evd );    FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A_rev_evd );/*    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,              FLA_ONE, A, D, FLA_ZERO, A_rev_evd );    FLA_Copy( A_rev_evd, D );    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,              FLA_ONE, D, V, FLA_ZERO, A_rev_evd );    FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A_rev_evd );*///FLA_Obj_show( "A_rev_evd", A_rev_evd, "%9.2e + %9.2e ", "" );     FLA_Axpy( FLA_MINUS_ONE, A_orig, A_rev_evd );    FLA_Norm_frob( A_rev_evd, norm );    FLA_Obj_extract_real_scalar( norm, diff1 );    //*diff = FLA_Max_elemwise_diff( A_orig, A_rev_evd );    FLA_Set_to_identity( eye );	FLA_Copy( V, A_rev_evd );    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,              FLA_ONE, V, A_rev_evd, FLA_MINUS_ONE, eye );    FLA_Norm_frob( eye, norm );    FLA_Obj_extract_real_scalar( norm, diff2 );/*FLA_Obj_free( &EL );FLA_Obj_free( &EU );FLA_Obj_free( &D );FLA_Obj_free( &dc );FLA_Obj_free( &ec );*/    FLA_Obj_free( &V );    FLA_Obj_free( &A_rev_evd );    FLA_Obj_free( &eye );    FLA_Obj_free( &norm );  }  k = 2.00;  if ( FLA_Obj_is_complex( A ) )  {    *gflops = (                      (       4.5 * k * m * m     ) +                2.0 * (       3.0 * k * m * m * m ) ) /               dtime_old / 1e9;  }  else   {    *gflops = (                      (       4.5 * k * m * m     ) +                1.0 * (       3.0 * k * m * m * m ) ) /               dtime_old / 1e9;  }  *dtime = dtime_old;  FLA_Copy_external( A_save, A );  FLA_Copy_external( G_save, G );  FLA_Copy_external( d_save, d );  FLA_Copy_external( e_save, e );  FLA_Obj_free( &A_save );  FLA_Obj_free( &G_save );  FLA_Obj_free( &d_save );  FLA_Obj_free( &e_save );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例19: time_Copyt

//.........这里部分代码省略.........  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( C_old, C );    *dtime = FLA_Clock();    switch( param_combo ){    // Time parameter combination 0    case 0:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_NO_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_NO_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 1:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_CONJ_NO_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    case 3:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Copyt( FLA_CONJ_TRANSPOSE, A, C );        break;      case FLA_ALG_FRONT:        FLA_Copyt( FLA_CONJ_TRANSPOSE, A, C );        break;      default:        printf("trouble/n");      }      break;    }    }	    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  if ( type == FLA_ALG_REFERENCE )  {    FLA_Copy_external( C, C_ref );    *diff = 0.0;  }  else  {    *diff = FLA_Max_elemwise_diff( C, C_ref );  }  *gflops = 2.0 * m * n /             dtime_old /             1.0e9;  if ( FLA_Obj_is_complex( A ) )    *gflops *= 4.0;  *dtime = dtime_old;  FLA_Copy_external( C_old, C );  FLA_Obj_free( &C_old );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例20: FLA_Lyap_n_unb_var4

FLA_Error FLA_Lyap_n_unb_var4( FLA_Obj isgn, FLA_Obj A, FLA_Obj C ){  FLA_Obj ATL,   ATR,      A00,  a01,     A02,           ABL,   ABR,      a10t, alpha11, a12t,                           A20,  a21,     A22;  FLA_Obj CTL,   CTR,      C00,  c01,     C02,           CBL,   CBR,      c10t, gamma11, c12t,                           C20,  c21,     C22;  FLA_Obj WTL,   WTR,      W00,  w01,     W02,          WBL,   WBR,      w10t, omega11, w12t,                           W20,  w21,     W22;  FLA_Obj W, omega;  FLA_Scal( isgn, C );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &W );  FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &omega );  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_BR );  FLA_Part_2x2( C,    &CTL, &CTR,                      &CBL, &CBR,     0, 0, FLA_BR );  FLA_Part_2x2( W,    &WTL, &WTR,                      &WBL, &WBR,     0, 0, FLA_BR );  while ( FLA_Obj_length( CTL ) > 0 ){    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  &a01,     /**/ &A02,                                                &a10t, &alpha11, /**/ &a12t,                        /* ************* */   /* ************************** */                           ABL, /**/ ABR,       &A20,  &a21,     /**/ &A22,                           1, 1, FLA_TL );    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00,  &c01,     /**/ &C02,                                                &c10t, &gamma11, /**/ &c12t,                        /* ************* */   /* ************************** */                           CBL, /**/ CBR,       &C20,  &c21,     /**/ &C22,                           1, 1, FLA_TL );    FLA_Repart_2x2_to_3x3( WTL, /**/ WTR,       &W00,  &w01,     /**/ &W02,                                                &w10t, &omega11, /**/ &w12t,                        /* ************* */   /* ************************** */                           WBL, /**/ WBR,       &W20,  &w21,     /**/ &W22,                           1, 1, FLA_TL );    /*------------------------------------------------------------*/    // gamma11 = gamma11 / ( alpha11 + alpha11' );    FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega );    FLA_Mult_add( FLA_ONE, alpha11, omega );    FLA_Inv_scal( omega, gamma11 );    // c01 = c01 - a01 * gamma11;    FLA_Axpys( FLA_MINUS_ONE, gamma11, a01, FLA_ONE, c01 );    // c01 = inv( triu(A00) + conj(alpha) * I ) * c01;    FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, A00, W00 );    FLA_Shift_diag( FLA_CONJUGATE, alpha11, W00 );    FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, W00, c01 );    // C00 = C00 - a01 * c01' - c01 * a01';    FLA_Her2( FLA_UPPER_TRIANGULAR, FLA_MINUS_ONE, a01, c01, C00 );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  /**/ a01,     A02,                            /* ************** */  /* ************************ */                                                     a10t, /**/ alpha11, a12t,                              &ABL, /**/ &ABR,       A20,  /**/ a21,     A22,                              FLA_BR );    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00,  /**/ c01,     C02,                            /* ************** */  /* ************************ */                                                     c10t, /**/ gamma11, c12t,                              &CBL, /**/ &CBR,       C20,  /**/ c21,     C22,                              FLA_BR );    FLA_Cont_with_3x3_to_2x2( &WTL, /**/ &WTR,       W00,  /**/ w01,     W02,                            /* ************** */  /* ************************ */                                                     w10t, /**/ omega11, w12t,                              &WBL, /**/ &WBR,       W20,  /**/ w21,     W22,                              FLA_BR );  }  FLA_Obj_free( &W );  FLA_Obj_free( &omega );  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:94,


示例21: main

//.........这里部分代码省略.........  }  else if( m_input <  -1 ) {    sprintf( m_dim_desc, "m = p/%d", -m_input );    sprintf( m_dim_tag,  "m%dp", -m_input );  }  else if( m_input == -1 ) {    sprintf( m_dim_desc, "m = p" );    sprintf( m_dim_tag,  "m%dp", 1 );  }  //datatype = FLA_FLOAT;  datatype = FLA_DOUBLE;  //datatype = FLA_COMPLEX;  //datatype = FLA_DOUBLE_COMPLEX;  FLASH_Queue_set_num_threads( n_threads );  //FLASH_Queue_set_verbose_output( TRUE );  //FLA_Check_error_level_set( FLA_NO_ERROR_CHECKING );  //FLASH_Queue_disable();  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    m = m_input;    n = n_input;    if ( m < 0 ) m = p * abs(m_input);    if ( n < 0 ) n = p * abs(n_input);    for ( param_combo = 0; param_combo < n_param_combos; param_combo++ )    {      FLA_Obj_create( datatype, m, n, 0, 0, &A_flat );      FLA_Obj_create( datatype, n, 1, 0, 0, &x_flat );      FLA_Obj_create( datatype, m, 1, 0, 0, &b_flat );      FLA_Random_matrix( A_flat );      FLA_Random_matrix( b_flat );      FLASH_QR_UT_create_hier_matrices( A_flat, 1, &b_flash, &A, &TW );      FLASH_Obj_create_hier_copy_of_flat( b_flat, 1, &b_flash, &b );      FLASH_Obj_create_hier_copy_of_flat( x_flat, 1, &b_flash, &x );      fprintf( stdout, "data_qrut_%s( %d, 1:5 ) = [ %d  ", pc_str[param_combo], i, p );      fflush( stdout );      time_QR_UT( param_combo, FLA_ALG_FRONT, n_repeats, m, n,                  A, TW, b, x, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      fprintf( stdout, " ]; /n" );      fflush( stdout );      FLA_Obj_free( &A_flat );      FLA_Obj_free( &b_flat );      FLA_Obj_free( &x_flat );      FLASH_Obj_free( &A );      FLASH_Obj_free( &TW );      FLASH_Obj_free( &b );      FLASH_Obj_free( &x );    }  }/*  fprintf( stdout, "figure;/n" );  fprintf( stdout, "hold on;/n" );  for ( i = 0; i < n_param_combos; i++ ) {    fprintf( stdout, "plot( data_qrut_%s( :,1 ), data_qrut_%s( :, 2 ), '%c:%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );    fprintf( stdout, "plot( data_qrut_%s( :,1 ), data_qrut_%s( :, 4 ), '%c-.%c' ); /n",            pc_str[i], pc_str[i], colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  for ( i = 0; i < n_param_combos; i++ )    fprintf( stdout, "'ref//_qrut//_%s', 'fla//_qrut//_%s', ... /n", pc_str[i], pc_str[i] );  fprintf( stdout, "'Location', 'SouthEast' ); /n" );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME qrut front-end performance (%s)' );/n",           m_dim_desc );  fprintf( stdout, "print -depsc qrut_front_%s.eps/n", m_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );*/  FLA_Finalize( );  return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例22: time_Gemm_pp_nn

void time_Gemm_pp_nn( 		     int variant, int type, int nrepeats, int n, int nb_alg,		     FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj Cref,		     double *dtime, double *diff, double *mflops ){  int    irep,    info, lwork;  double    dtime_old,    d_minus_one = -1.0, d_one = 1.0;  FLA_Obj    Cold;  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &Cold );  FLA_Copy_external( C, Cold );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( Cold, C );    *dtime = FLA_Clock();    switch( variant ){    case 0:      // Time reference implementation      REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 		ONE, A, B, FLA_ONE, C );      break;    case 1:{      // Time variant 1      switch( type ){      case FLA_ALG_UNBLOCKED:	FLA_Gemm_pp_nn_var1( FLA_ONE, A, B, C, nb_alg );	break;      case FLA_ALG_BLOCKED:        REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 		  ONE, A, B, FLA_ONE, C );	break;      default:	printf("trouble/n");      }      break;    }    }    if ( irep == 0 )      dtime_old = FLA_Clock() - *dtime;    else{      *dtime = FLA_Clock() - *dtime;      dtime_old = min( *dtime, dtime_old );    }  }  if ( variant == 0 ){    FLA_Copy_external( C, Cref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( C, Cref );  }  *mflops = 2.0 *             FLA_Obj_length( C ) *             FLA_Obj_width( C ) *             FLA_Obj_width( A ) /             dtime_old /             1000000;  *dtime = dtime_old;  FLA_Copy_external( Cold, C );  FLA_Obj_free( &Cold );}
开发者ID:pgawron,项目名称:tlash,代码行数:79,


示例23: main

//.........这里部分代码省略.........  blas_thread_init();  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )  {    m = m_input;    k = k_input;    n = n_input;    if( m < 0 ) m = p / abs(m_input);    if( k < 0 ) k = p / abs(k_input);    if( n < 0 ) n = p / abs(n_input);	    FLA_Obj_create( FLA_DOUBLE, m, k, &A );    FLA_Obj_create( FLA_DOUBLE, k, n, &B );    FLA_Obj_create( FLA_DOUBLE, m, n, &C );    FLA_Obj_create( FLA_DOUBLE, m, n, &C_ref );	    /* Generate random matrices A, C */	if( p > 4000 ){    FLA_Random_matrix( A );    FLA_Random_matrix( B );    FLA_Random_matrix( C );	    FLA_Copy_external( C, C_ref );	}	    blas_cpu_number = 1;    //time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,    //                A, B, C, C_ref, &dtime, &diff, &gflops );    //fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; /n", i, p, gflops );    //fflush( stdout );    for ( j = 0; j < n_thread_experiments; j++ ){      n_threads = n_threads_exp[j];      blas_cpu_number = n_threads;      fprintf( stdout, "data_nth%d( %d, 1:3 ) = [ %d  ", n_threads, i, p );      fflush( stdout );      time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,                    A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      fprintf( stdout, " ]; /n" );      fflush( stdout );    }    fprintf( stdout, "/n" );    FLA_Obj_free( &A );    FLA_Obj_free( &B );    FLA_Obj_free( &C );    FLA_Obj_free( &C_ref );  }  /* Print the MATLAB commands to plot the data */  /* Delete all existing figures */  fprintf( stdout, "figure;/n" );  /* Indicate that you want to add to the existing plot */  fprintf( stdout, "hold on;/n" );  /* Plot the data for the other numbers of threads */  for ( i = 0; i < n_thread_experiments; i++ ){    fprintf( stdout, "plot( data_nth%d( :,1 ), data_nth%d( :, 2 ), '%c:%c' ); /n",              n_threads_exp[ i ], n_threads_exp[ i ], colors[ i ], ticks[ i ] );  }  fprintf( stdout, "legend( ... /n" );  for ( i = 0; i < n_thread_experiments-1; i++ )    fprintf( stdout, "'%d threads', ... /n", n_threads_exp[ i ] );  fprintf( stdout, "'%d threads', 'Location', 'Best' ); /n", n_threads_exp[ n_thread_experiments-1 ] );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, n_threads_exp[n_thread_experiments-1] * max_gflops );  fprintf( stdout, "title( 'Goto BLAS dgemm performance (%s, %s, %s)' );/n",            m_dim_desc, k_dim_desc, n_dim_desc );  fprintf( stdout, "print -depsc gemm_nn_goto_p_%s_%s_%s.eps/n", m_dim_tag, k_dim_tag, n_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );  FLA_Finalize( );}
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例24: main

//.........这里部分代码省略.........            printf( "data_unb_var3( %d, 1:3 ) = [ %d %le  %le];/n", i, n,	      gflops / dtime_best, diff );      fflush( stdout );    }    /* Variant 3 blocked */    for ( irep=0; irep<nrepeats; irep++ ){      FLA_Copy( Aold, A );          dtime = FLA_Clock();#if TIME_BLK_VAR3 == TRUE      Chol_blk_var3( A, nb_alg );#else      REF_Chol( TIME_LAPACK, A, nb_alg );#endif      dtime = FLA_Clock() - dtime;      if ( irep == 0 ) 	dtime_best = dtime;      else	dtime_best = ( dtime < dtime_best ? dtime : dtime_best );    }    diff = FLA_Max_elemwise_diff( A, Aref );    printf( "data_blk_var3( %d, 1:3 ) = [ %d %le  %le];/n", i, n,            gflops / dtime_best, diff );    fflush( stdout );    FLA_Obj_free( &A );    FLA_Obj_free( &Aold );    FLA_Obj_free( &Aref );    FLA_Obj_free( &delta );    printf( "/n" );    i++;  }  /* Print the MATLAB commands to plot the data */  /* Delete all existing figures */  printf( "close all/n" );#if OCTAVE == TRUE  /* Plot the performance of FLAME */  printf( "plot( data_FLAME( :,1 ), data_FLAME( :, 2 ), '-k;libflame;' ); /n" );  /* Indicate that you want to add to the existing plot */  printf( "hold on/n" );  /* Plot the performance of the reference implementation */  printf( "plot( data_REF( :,1 ), data_REF( :, 2 ), '-m;reference;' ); /n" );  /* Plot the performance of your implementations */  printf( "plot( data_unb_var1( :,1 ), data_unb_var1( :, 2 ), /"-rx;UnbVar1;/" ); /n" );  printf( "plot( data_unb_var2( :,1 ), data_unb_var2( :, 2 ), /"-go;UnbVar2;/" ); /n" );  printf( "plot( data_unb_var3( :,1 ), data_unb_var3( :, 2 ), /"-b*;UnbVar3;/" ); /n" );  printf( "plot( data_blk_var1( :,1 ), data_blk_var1( :, 2 ), /"-rx;BlkVar1;/", /"markersize/", 3 ); /n" );  printf( "plot( data_blk_var2( :,1 ), data_blk_var2( :, 2 ), /"-go;BlkVar2;/", /"markersize/", 3  ); /n" );  printf( "plot( data_blk_var3( :,1 ), data_blk_var3( :, 2 ), /"-b*;BlkVar3;/", /"markersize/", 3  ); /n" );
开发者ID:ztschir,项目名称:High-Performance,代码行数:66,


示例25: time_Gemm

//.........这里部分代码省略.........    // Time parameter combination 5    case 5:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 6    case 6:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 7    case 7:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    // Time parameter combination 8    case 8:{      switch( type ){      case FLA_ALG_REFERENCE:        REF_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      case FLA_ALG_FRONT:        FLA_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C );        break;      default:        printf("trouble/n");      }      break;    }    }	    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }/*  if ( type == FLA_ALG_REFERENCE )  {    FLA_Copy_external( C, C_ref );    *diff = 0.0;  }  else  {    *diff = FLA_Max_elemwise_diff( C, C_ref );  }*/  *gflops = 2.0 * m * k * n /             dtime_old /             1.0e9;  if ( param_combo == 0 ||       param_combo == 1 ||       param_combo == 2 ||       param_combo == 3 ||       param_combo == 6 )  *gflops *= 4.0;  *dtime = dtime_old;  FLA_Copy_external( C_old, C );  FLA_Obj_free( &C_old );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例26: libfla_test_hemm_experiment

//.........这里部分代码省略.........		A_test = A;		B_test = B;		C_test = C;	}	// Create a control tree for the individual variants.	if ( impl == FLA_TEST_FLAT_UNB_VAR ||	     impl == FLA_TEST_FLAT_OPT_VAR ||	     impl == FLA_TEST_FLAT_BLK_VAR ||	     impl == FLA_TEST_FLAT_UNB_EXT ||	     impl == FLA_TEST_FLAT_BLK_EXT )		libfla_test_hemm_cntl_create( var, b_alg_flat );	// Repeat the experiment n_repeats times and record results.	for ( i = 0; i < n_repeats; ++i )	{		if ( impl == FLA_TEST_HIER_FRONT_END )			FLASH_Obj_hierarchify( C_save, C_test );		else			FLA_Copy_external( C_save, C_test );				time = FLA_Clock();		libfla_test_hemm_impl( impl, side, uplo, alpha, A_test, B_test, beta, C_test );				time = FLA_Clock() - time;		time_min = min( time_min, time );	}	// Copy the solution to flat matrix X.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_Obj_flatten( C_test, C );	}	else    {		// No action needed since C_test and C refer to the same object.	}	// Free the hierarchical matrices if we're testing the FLASH front-end.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_Obj_free( &A_test );		FLASH_Obj_free( &B_test );		FLASH_Obj_free( &C_test );	}	// Free the control trees if we're testing the variants.	if ( impl == FLA_TEST_FLAT_UNB_VAR ||	     impl == FLA_TEST_FLAT_OPT_VAR ||	     impl == FLA_TEST_FLAT_BLK_VAR ||	     impl == FLA_TEST_FLAT_UNB_EXT ||	     impl == FLA_TEST_FLAT_BLK_EXT )		libfla_test_hemm_cntl_free();	// Compute the performance of the best experiment repeat.	if ( side == FLA_LEFT )		*perf = ( 1 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF;	else		*perf = ( 1 * m * n * n ) / time_min / FLOPS_PER_UNIT_PERF;	if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;	// Compute:	//   y = C * x	// and compare to	//   z = ( beta * C_orig + alpha * A * B ) x      (side = left)	//   z = ( beta * C_orig + alpha * B * A ) x      (side = right)	FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_ONE, C, x, FLA_ZERO, y );	if ( side == FLA_LEFT )	{		FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_ONE, B, x, FLA_ZERO, w );		FLA_Hemv_external( uplo,             alpha,   A, w, FLA_ZERO, z );	}	else	{		FLA_Hemv_external( uplo,             FLA_ONE, A, x, FLA_ZERO, w );		FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha,   B, w, FLA_ZERO, z );	}	FLA_Gemv_external( FLA_NO_TRANSPOSE, beta, C_save, x, FLA_ONE, z );	// Compute || y - z ||.	//FLA_Axpy_external( FLA_MINUS_ONE, y, z );	//FLA_Nrm2_external( z, norm );	//FLA_Obj_extract_real_scalar( norm, residual );	*residual = FLA_Max_elemwise_diff( y, z );	// Free the supporting flat objects.	FLA_Obj_free( &C_save );	// Free the flat test matrices.	FLA_Obj_free( &A );	FLA_Obj_free( &B );	FLA_Obj_free( &C );	FLA_Obj_free( &x );	FLA_Obj_free( &y );	FLA_Obj_free( &z );	FLA_Obj_free( &w );	FLA_Obj_free( &norm );}
开发者ID:fmarrabal,项目名称:libflame,代码行数:101,


示例27: libfla_test_qrut_experiment

//.........这里部分代码省略.........	// Initialize the test matrices.	FLA_Random_matrix( A );	// Save the original object contents in a temporary object.	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );	// Create vectors to form a linear system.	FLA_Obj_create( datatype, n, 1, 0, 0, &x );	FLA_Obj_create( datatype, m, 1, 0, 0, &b );	FLA_Obj_create( datatype, n, 1, 0, 0, &y );	// Create a real scalar object to hold the norm of A.	FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );	// Create a random right-hand side vector.	FLA_Random_matrix( b );	// Use hierarchical matrices if we're testing the FLASH front-end.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_QR_UT_create_hier_matrices( A, 1, &b_flash, &A_test, &T_test );		FLASH_Obj_create_hier_copy_of_flat( b, 1, &b_flash, &b_test );		FLASH_Obj_create_hier_copy_of_flat( x, 1, &b_flash, &x_test );	}	else	{		A_test = A;		T_test = T;	}	// Create a control tree for the individual variants.	if ( impl == FLA_TEST_FLAT_UNB_VAR ||	     impl == FLA_TEST_FLAT_OPT_VAR ||	     impl == FLA_TEST_FLAT_BLK_VAR )		libfla_test_qrut_cntl_create( var, b_alg_flat );	// Repeat the experiment n_repeats times and record results.	for ( i = 0; i < n_repeats; ++i )	{		if ( impl == FLA_TEST_HIER_FRONT_END )			FLASH_Obj_hierarchify( A_save, A_test );		else			FLA_Copy_external( A_save, A_test );				time = FLA_Clock();		libfla_test_qrut_impl( impl, A_test, T_test );				time = FLA_Clock() - time;		time_min = min( time_min, time );	}	// Perform a linear solve with the result.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_QR_UT_solve( A_test, T_test, b_test, x_test );		FLASH_Obj_flatten( x_test, x );	}	else    {		FLA_QR_UT_solve( A_test, T_test, b, x );	}	// Free the hierarchical matrices if we're testing the FLASH front-end.	if ( impl == FLA_TEST_HIER_FRONT_END )	{		FLASH_Obj_free( &A_test );		FLASH_Obj_free( &T_test );		FLASH_Obj_free( &b_test );		FLASH_Obj_free( &x_test );	}	// Free the control trees if we're testing the variants.	if ( impl == FLA_TEST_FLAT_UNB_VAR ||	     impl == FLA_TEST_FLAT_OPT_VAR ||	     impl == FLA_TEST_FLAT_BLK_VAR )		libfla_test_qrut_cntl_free();	// Compute the performance of the best experiment repeat.	*perf = (         2.0   * m * n * n - 	          ( 2.0 / 3.0 ) * n * n * n ) / time_min / FLOPS_PER_UNIT_PERF;	if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;	// Compute the residual.	FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_ONE, A_save, x, FLA_MINUS_ONE, b );	FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b, FLA_ZERO, y );	FLA_Nrm2_external( y, norm );	FLA_Obj_extract_real_scalar( norm, residual );	// Free the supporting flat objects.	FLA_Obj_free( &x );	FLA_Obj_free( &b );	FLA_Obj_free( &y );	FLA_Obj_free( &norm );	FLA_Obj_free( &A_save );	// Free the flat test matrices.	FLA_Obj_free( &A );	FLA_Obj_free( &T );}
开发者ID:flame,项目名称:libflame,代码行数:101,


示例28: main

//.........这里部分代码省略.........    FLA_Obj_create( datatype, k, n, 0, 0, &B );    FLA_Obj_create( datatype, m, n, 0, 0, &C );    FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );    /* Generate random matrices A, C */    FLA_Random_matrix( A );    FLA_Random_matrix( B );    FLA_Random_matrix( C );    FLA_Copy_external( C, C_ref );    /* Time the reference implementation */    time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg,                  A, B, C, C_ref, &dtime, &diff, &gflops );    fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d  %6.3lf ]; /n", i, p, gflops );    fflush( stdout );    for ( variant = 1; variant <= n_variants; variant++ ){            fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d  ", variant, i, p );      fflush( stdout );      time_Gemm_nn( variant, FLA_ALG_UNBLOCKED, n_repeats, p, nb_alg,                    A, B, C, C_ref, &dtime, &diff, &gflops );      //gflops = 0.0;      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      time_Gemm_nn( variant, FLA_ALG_BLOCKED, n_repeats, p, nb_alg,                    A, B, C, C_ref, &dtime, &diff, &gflops );      fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      fflush( stdout );      //time_Gemm_nn( variant, FLA_ALG_OPTIMIZED, n_repeats, p, nb_alg,      //              A, B, C, C_ref, &dtime, &diff, &gflops );      //fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );      //fflush( stdout );      fprintf( stdout, " ]; /n" );      fflush( stdout );    }    fprintf( stdout, "/n" );    FLA_Obj_free( &A );    FLA_Obj_free( &B );    FLA_Obj_free( &C );    FLA_Obj_free( &C_ref );  }  /* Print the MATLAB commands to plot the data */  /* Delete all existing figures */  fprintf( stdout, "figure;/n" );  /* Plot the performance of the reference implementation */  fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); /n" );  /* Indicate that you want to add to the existing plot */  fprintf( stdout, "hold on;/n" );  /* Plot the data for the other numbers of threads */  for ( i = 1; i <= n_variants; i++ ) {    fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); /n",            i, i, colors[ i-1 ], ticks[ i-1 ] );    fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 4 ), '%c-.%c' ); /n",            i, i, colors[ i-1 ], ticks[ i-1 ] );    //fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 6 ), '%c--%c' ); /n",    //        i, i, colors[ i-1 ], ticks[ i-1 ] );  }  fprintf( stdout, "legend( ... /n" );  fprintf( stdout, "'Reference', ... /n" );  for ( i = 1; i < n_variants; i++ )    //fprintf( stdout, "'unb//_var%d', 'blk//_var%d', 'opt//_var%d' ... /n", i, i, i );    fprintf( stdout, "'unb//_var%d', 'blk//_var%d', ... /n", i, i );  i = n_variants;  //fprintf( stdout, "'unb//_var%d', 'blk//_var%d', 'opt//_var%d' ); /n", i, i, i );  fprintf( stdout, "'unb//_var%d', 'blk//_var%d' ); /n", i, i );  fprintf( stdout, "xlabel( 'problem size p' );/n" );  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" );  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops );  fprintf( stdout, "title( 'FLAME gemm//_nn performance (%s, %s, %s)' );/n",            m_dim_desc, k_dim_desc, n_dim_desc );  fprintf( stdout, "print -depsc gemm_nn_%s_%s_%s.eps/n", m_dim_tag, k_dim_tag, n_dim_tag );  fprintf( stdout, "hold off;/n");  fflush( stdout );  FLA_Finalize( );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例29: time_Transpose

//.........这里部分代码省略.........    cntl_trans_var_unb;  fla_transpose_t*    cntl_trans_var_blk;  fla_swap_t*    cntl_swap_var_blk;  fla_swap_t*    cntl_swap_blas;  bp                 = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg );  cntl_swap_blas     = FLA_Cntl_swap_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL );  cntl_swap_var_blk  = FLA_Cntl_swap_obj_create( FLA_FLAT, FLA_UNBLOCKED_VARIANT1, bp, cntl_swap_blas );  cntl_trans_var_unb = FLA_Cntl_transpose_obj_create( FLA_FLAT, FLA_UNBLOCKED_VARIANT1, NULL, NULL, NULL );  cntl_trans_var_blk = FLA_Cntl_transpose_obj_create( FLA_FLAT, variant, bp, cntl_trans_var_unb, cntl_swap_var_blk );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_old );  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_tmp );  FLA_Copy_external( A, A_old );  for ( irep = 0 ; irep < nrepeats; irep++ ){    FLA_Copy_external( A_old, A );    *dtime = FLA_Clock();    switch( variant ){    case 0:      //FLA_Copyt_external( FLA_TRANSPOSE, A, A_tmp );      //FLA_Set( FLA_ZERO, A );      //FLA_Copyt_external( FLA_NO_TRANSPOSE, A_tmp, A );      FLA_Transpose( A );      break;    case 1:{      /* Time variant 1 */      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Transpose_unb_var1( A );        break;      case FLA_ALG_BLOCKED:        FLA_Transpose_blk_var1( A, cntl_trans_var_blk );        break;      default:        printf("trouble/n");      }      break;    }    case 2:{      /* Time variant 2 */      switch( type ){      case FLA_ALG_UNBLOCKED:        FLA_Transpose_unb_var2( A );        break;      case FLA_ALG_BLOCKED:        FLA_Transpose_blk_var2( A, cntl_trans_var_blk );        break;      default:        printf("trouble/n");      }      break;    }     }    *dtime = FLA_Clock() - *dtime;    dtime_old = min( *dtime, dtime_old );  }  FLA_Cntl_obj_free( cntl_trans_var_blk );  FLA_Cntl_obj_free( cntl_trans_var_unb );  FLA_Cntl_obj_free( cntl_swap_var_blk );  FLA_Cntl_obj_free( cntl_swap_blas );  FLA_Blocksize_free( bp );  if ( variant == 0 ){    FLA_Copy_external( A, A_ref );    *diff = 0.0;  }  else{    *diff = FLA_Max_elemwise_diff( A, A_ref );  }  *gflops = 4 * n * n /            dtime_old / 1e9;  *dtime = dtime_old;  FLA_Copy_external( A_old, A );  FLA_Obj_free( &A_old );  FLA_Obj_free( &A_tmp );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,



注:本文中的FLA_Obj_free函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ FLA_Obj_length函数代码示例
C++ FLA_Obj_datatype函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。