这篇教程C++ FLA_Obj_free函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中FLA_Obj_free函数的典型用法代码示例。如果您正苦于以下问题:C++ FLA_Obj_free函数的具体用法?C++ FLA_Obj_free怎么用?C++ FLA_Obj_free使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了FLA_Obj_free函数的29个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: main//.........这里部分代码省略......... diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_unb_var8( %d, 1:3 ) = [ %d %le %le];/n", i, n, gflops / dtime_best, diff ); fflush( stdout );#endif#if TEST_BLK_VAR8==TRUE /* Variant 4 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_blk_var8( A, B, C, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( C, Cref ); printf( "data_blk_var8( %d, 1:3 ) = [ %d %le %le];/n", i, n, gflops / dtime_best, diff ); fflush( stdout );#endif FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &Cref ); FLA_Obj_free( &Cold ); printf( "/n" ); i++; } /* Print the MATLAB commands to plot the data */ /* Delete all existing figures */ printf( "close all/n" ); /* Plot the performance of FLAME */ printf( "plot( data_FLAME( :,1 ), data_FLAME( :, 2 ), 'k--' ); /n" ); /* Indicate that you want to add to the existing plot */ printf( "hold on/n" ); /* Plot the performance of the reference implementation */ // printf( "plot( data_REF( :,1 ), data_REF( :, 2 ), 'k-' ); /n" ); /* Plot the performance of your implementations */#if TEST_UNB_VAR1==TRUE printf( "plot( data_unb_var1( :,1 ), data_unb_var1( :, 2 ), 'r-.' ); /n" );#endif#if TEST_UNB_VAR2==TRUE printf( "plot( data_unb_var2( :,1 ), data_unb_var2( :, 2 ), 'g-.' ); /n" );#endif#if TEST_UNB_VAR3==TRUE
开发者ID:ebeweber,项目名称:CS378-Invariant-Project,代码行数:67,
示例2: main//.........这里部分代码省略......... if ( pc_str[param_combo][0] == 'c' || pc_str[param_combo][1] == 'c' ) { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_COMPLEX; else datatype = FLA_DOUBLE_COMPLEX; } else { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_FLOAT; else datatype = FLA_DOUBLE; } // If transposing A, switch dimensions. if ( pc_str[param_combo][0] == 'n' ) FLA_Obj_create( datatype, m, k, 0, 0, &A ); else FLA_Obj_create( datatype, k, m, 0, 0, &A ); // If transposing B, switch dimensions. if ( pc_str[param_combo][1] == 'n' ) FLA_Obj_create( datatype, k, n, 0, 0, &B ); else FLA_Obj_create( datatype, n, k, 0, 0, &B ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref ); FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); fprintf( stdout, "data_gemm_%s( %d, 1:5 ) = [ %4d %4d %4d ", pc_str[param_combo], i, m, k, n ); fflush( stdout ); time_Gemm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );/* time_Gemm( param_combo, FLA_ALG_FRONT, n_repeats, m, k, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );*/ fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } fprintf( stdout, "/n" ); }/* fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 2 ), '%c:%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_gemm_%s( :,1 ), data_gemm_%s( :, 4 ), '%c-.%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref//_gemm//_%s', 'fla//_gemm//_%s', ... /n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME gemm front-end performance (%s, %s, %s)' );/n", m_dim_desc, k_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc gemm_front_%s_%s_%s.eps/n", m_dim_tag, k_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize( ); return 0;}
开发者ID:flame,项目名称:libflame,代码行数:101,
示例3: main//.........这里部分代码省略......... for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );/* FLA_Obj_create( datatype, m, m, m, 1, &A ); FLA_Obj_create( datatype, m, 1, 1, 1, &b ); FLA_Obj_create( datatype, m, 1, 1, 1, &b_orig );*/ if ( FLA_Obj_is_single_precision( A ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_UNIT_DIAG, A ); FLA_Random_matrix( b ); FLA_Copy_external( b, b_orig );/* time_Trinv_uu( 0, FLA_ALG_REFERENCE, n_repeats, m, nb_alg, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; /n", i, p, gflops ); fflush( stdout );*/ for ( variant = 1; variant <= n_variants; variant++ ){ fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p ); fflush( stdout ); time_Trinv_uu( variant, FLA_ALG_UNBLOCKED, n_repeats, m, nb_alg, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Trinv_uu( variant, FLA_ALG_UNB_OPT, n_repeats, m, nb_alg, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Trinv_uu( variant, FLA_ALG_BLOCKED, n_repeats, m, nb_alg, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); } FLA_Obj_free( &A ); FLA_Obj_free( &b ); FLA_Obj_free( &b_orig ); FLA_Obj_free( &norm ); fprintf( stdout, "/n" ); }/* fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); /n" ); for ( i = 1; i <= n_variants; i++ ){ fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); /n", variant, variant, colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); fprintf( stdout, "'Reference', ... /n" ); for ( i = 1; i <= n_variants; i++ ) fprintf( stdout, "'FLAME var%d', ... /n", i ); fprintf( stdout, "'Location', 'SouthWest' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME trinv//_u performance (%s)' );/n", m_dim_desc ); fprintf( stdout, "print -depsc trinv_l_%s.eps/n", m_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize( );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例4: FLA_Hevd_lv_var3_componentsFLA_Error FLA_Hevd_lv_var3_components( dim_t n_iter_max, FLA_Obj A, FLA_Obj l, dim_t k_accum, dim_t b_alg, double* dtime_tred, double* dtime_tevd, double* dtime_appq ){ FLA_Error r_val = FLA_SUCCESS; FLA_Uplo uplo = FLA_LOWER_TRIANGULAR; FLA_Datatype dt; FLA_Datatype dt_real; FLA_Datatype dt_comp; FLA_Obj T, r, d, e, G; FLA_Obj d0, e0, ls, pu; dim_t mn_A; dim_t n_G = k_accum; double dtime_temp; mn_A = FLA_Obj_length( A ); dt = FLA_Obj_datatype( A ); dt_real = FLA_Obj_datatype_proj_to_real( A ); dt_comp = FLA_Obj_datatype_proj_to_complex( A ); // If the matrix is a scalar, then the EVD is easy. if ( mn_A == 1 ) { FLA_Copy( A, l ); FLA_Set( FLA_ONE, A ); return FLA_SUCCESS; } // Create a matrix to hold block Householder transformations. FLA_Tridiag_UT_create_T( A, &T ); // Create a vector to hold the realifying scalars. FLA_Obj_create( dt, mn_A, 1, 0, 0, &r ); // Create vectors to hold the diagonal and sub-diagonal. FLA_Obj_create( dt_real, mn_A, 1, 0, 0, &d ); FLA_Obj_create( dt_real, mn_A-1, 1, 0, 0, &e ); FLA_Obj_create( dt_real, mn_A, 1, 0, 0, &d0 ); FLA_Obj_create( dt_real, mn_A-1, 1, 0, 0, &e0 ); FLA_Obj_create( dt_real, mn_A, 1, 0, 0, &pu ); FLA_Obj_create( FLA_INT, mn_A, 1, 0, 0, &ls ); FLA_Obj_create( dt_comp, mn_A-1, n_G, 0, 0, &G ); dtime_temp = FLA_Clock(); { // Reduce the matrix to tridiagonal form. FLA_Tridiag_UT( uplo, A, T ); } *dtime_tred = FLA_Clock() - dtime_temp; // Apply scalars to rotate elements on the sub-diagonal to the real domain. FLA_Tridiag_UT_realify( uplo, A, r ); // Extract the diagonal and sub-diagonal from A. FLA_Tridiag_UT_extract_diagonals( uplo, A, d, e ); dtime_temp = FLA_Clock(); { // Form Q, overwriting A. FLA_Tridiag_UT_form_Q( uplo, A, T ); } *dtime_appq = FLA_Clock() - dtime_temp; // Apply the scalars in r to Q. FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, r, A ); // Find the eigenvalues only. FLA_Copy( d, d0 ); FLA_Copy( e, e0 ); //r_val = FLA_Tevd_n_opt_var1( n_iter_max, d0, e0, G, A );{ int info; double* buff_d = FLA_DOUBLE_PTR( d0 ); double* buff_e = FLA_DOUBLE_PTR( e0 ); dsterf_( &mn_A, buff_d, buff_e, &info );} FLA_Sort( FLA_FORWARD, d0 ); FLA_Set( FLA_ZERO, ls ); FLA_Set( FLA_ZERO, pu ); dtime_temp = FLA_Clock(); { // Perform an eigenvalue decomposition on the tridiagonal matrix. r_val = FLA_Tevd_v_opt_var3( n_iter_max, d, e, d0, ls, pu, G, A, b_alg ); } *dtime_tevd = FLA_Clock() - dtime_temp;//FLA_Obj_show( "var4: e", e, "%22.15e", "" ); // Copy the converged eigenvalues to the output vector. FLA_Copy( d, l ); // Sort the eigenvalues and eigenvectors in ascending order. FLA_Sort_evd( FLA_FORWARD, l, A );//FLA_Obj_show( "var4: d", l, "%22.15e", "" );//FLA_Obj_show( "var4: A", A, "%8.1e + %8.1e", "" ); //FLA_Copy( d0, l ); FLA_Obj_free( &T ); FLA_Obj_free( &r );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例5: time_Trsm_lln//.........这里部分代码省略......... FLA_ONE, A, C ); break; case 1:{ // Time variant 1 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Trsm_lln_unb_var1( FLA_NONUNIT_DIAG, FLA_ONE, A, C ); break; case FLA_ALG_BLOCKED: FLA_Trsm_lln_blk_var1( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var ); break; default: printf("trouble/n"); } break; } case 2:{ // Time variant 2 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Trsm_lln_unb_var2( FLA_NONUNIT_DIAG, FLA_ONE, A, C ); break; case FLA_ALG_BLOCKED: FLA_Trsm_lln_blk_var2( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var ); break; default: printf("trouble/n"); } break; } case 3:{ // Time variant 3 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Trsm_lln_unb_var3( FLA_NONUNIT_DIAG, FLA_ONE, A, C ); break; case FLA_ALG_BLOCKED: FLA_Trsm_lln_blk_var3( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var ); break; default: printf("trouble/n"); } break; } case 4:{ // Time variant 4 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Trsm_lln_unb_var4( FLA_NONUNIT_DIAG, FLA_ONE, A, C ); break; case FLA_ALG_BLOCKED: FLA_Trsm_lln_blk_var4( FLA_NONUNIT_DIAG, FLA_ONE, A, C, cntl_trsm_var ); break; default: printf("trouble/n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } FLA_Cntl_obj_free( cntl_trsm_var ); FLA_Cntl_obj_free( cntl_trsm_blas ); FLA_Cntl_obj_free( cntl_gemm_blas ); FLA_Blocksize_free( bp ); if ( variant == 0 ) { FLA_Copy_external( C, C_ref ); *diff = 0.0; } else { *diff = FLA_Max_elemwise_diff( C, C_ref ); } *gflops = 1.0 * FLA_Obj_length( C ) * FLA_Obj_width( C ) * FLA_Obj_width( A ) / dtime_old / 1.0e9; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old );}
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例6: REF_Svdd_uv_components//.........这里部分代码省略......... *dtime_qrfa = 0.0; *dtime_gemm = 0.0; } else { FLA_Obj AT, AB; FLA_Obj UL, UR; FLA_Part_2x1( A, &AT, &AB, n_A, FLA_TOP ); FLA_Part_1x2( U, &UL, &UR, n_A, FLA_LEFT ); // Create a temporary n-by-n matrix R. FLA_Obj_create( dt_A, n_A, n_A, 0, 0, &W ); dtime_temp = FLA_Clock(); { // Perform a QR factorization. FLA_QR_blk_external( A, tq ); FLA_Copyr_external( FLA_LOWER_TRIANGULAR, A, UL ); FLA_Setr( FLA_LOWER_TRIANGULAR, FLA_ZERO, A ); } *dtime_qrfa = FLA_Clock() - dtime_temp; dtime_temp = FLA_Clock(); { // Form Q. FLA_QR_form_Q_external( U, tq ); } *dtime_appq = FLA_Clock() - dtime_temp; dtime_temp = FLA_Clock(); { // Reduce R to bidiagonal form. FLA_Bidiag_blk_external( AT, tu, tv ); FLA_Bidiag_UT_extract_diagonals( A, d, eT ); } *dtime_bred = FLA_Clock() - dtime_temp; dtime_temp = FLA_Clock(); { // Divide-and-conquor algorithm. FLA_Bsvdd_external( uplo, d, e, Ur, Vr ); } *dtime_bsvd = FLA_Clock() - dtime_temp; dtime_temp = FLA_Clock(); { // Form U in W. FLA_Copy_external( Ur, W ); FLA_Bidiag_apply_U_external( FLA_LEFT, FLA_NO_TRANSPOSE, AT, tu, W ); // Form V. FLA_Copy_external( Vr, V ); FLA_Bidiag_apply_V_external( FLA_RIGHT, FLA_CONJ_TRANSPOSE, AT, tv, V ); } *dtime_appq += FLA_Clock() - dtime_temp; dtime_temp = FLA_Clock(); { // Multiply R into U, storing the result in A and then copying // back to U. FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, UL, W, FLA_ZERO, A ); FLA_Copy( A, UL ); } *dtime_gemm = FLA_Clock() - dtime_temp; // Free R. FLA_Obj_free( &W ); } } else { FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); } // Copy singular values to output vector. FLA_Copy( d, s ); // Sort singular values and vectors. FLA_Sort_svd( FLA_BACKWARD, s, U, V ); FLA_Obj_free( &tq ); FLA_Obj_free( &tu ); FLA_Obj_free( &tv ); FLA_Obj_free( &d ); FLA_Obj_free( &e ); FLA_Obj_free( &Ur ); FLA_Obj_free( &Vr ); return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例7: main//.........这里部分代码省略......... for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // Determine datatype based on trans argument. if ( pc_str[param_combo][2] == 'h' ) { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_COMPLEX; else datatype = FLA_DOUBLE_COMPLEX; } else { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_FLOAT; else datatype = FLA_DOUBLE; } // If multiplying A on the left, A is m x m; ...on the right, A is n x n. if ( pc_str[param_combo][0] == 'l' ) FLA_Obj_create( datatype, m, m, 0, 0, &A ); else FLA_Obj_create( datatype, n, n, 0, 0, &A ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref ); if ( pc_str[param_combo][1] == 'l' ) { FLA_Random_tri_matrix( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Random_matrix( C ); } else { FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Random_matrix( C ); } fprintf( stdout, "data_trsm_%s( %d, 1:3 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Trsm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );/* time_Trsm( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );*/ fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } fprintf( stdout, "/n" ); }/* fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_trsm_%s( :,1 ), data_trsm_%s( :, 2 ), '%c:%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_trsm_%s( :,1 ), data_trsm_%s( :, 4 ), '%c-.%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref//_trsm//_%s', 'fla//_trsm//_%s', ... /n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME trsm front-end performance (%s, %s)' );/n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc trsm_front_%s_%s.eps/n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize( ); return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例8: time_Gemm_nn//.........这里部分代码省略......... break; default: printf("trouble/n"); } break; } case 15:{ // Time variant 1->5 switch( type ){ case FLA_ALG_OPENMP_CVAR: FLA_Gemm_nn_omp_var15( FLA_ONE, A, B, C, nb_alg ); break; default: printf("trouble/n"); } break; } case 31:{ // Time variant 3->1 switch( type ){ case FLA_ALG_OPENMP_CVAR: FLA_Gemm_nn_omp_var31( FLA_ONE, A, B, C, nb_alg ); break; default: printf("trouble/n"); } break; } case 35:{ // Time variant 3->5 switch( type ){ case FLA_ALG_OPENMP_CVAR: FLA_Gemm_nn_omp_var35( FLA_ONE, A, B, C, nb_alg ); break; default: printf("trouble/n"); } break; } case 51:{ // Time variant 5->1 switch( type ){ case FLA_ALG_OPENMP_CVAR: FLA_Gemm_nn_omp_var51( FLA_ONE, A, B, C, nb_alg ); break; default: printf("trouble/n"); } break; } case 53:{ // Time variant 5->3 switch( type ){ case FLA_ALG_OPENMP_CVAR: FLA_Gemm_nn_omp_var53( FLA_ONE, A, B, C, nb_alg ); break; default: printf("trouble/n"); } break; } } if ( irep == 0 ) dtime_old = FLA_Clock() - *dtime; else{ *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } } if ( variant == 0 ){ FLA_Copy_external( C, Cref ); *diff = 0.0; } else{ *diff = FLA_Max_elemwise_diff( C, Cref ); //FLA_Obj_show( "C:", C, "%f", "/n"); } *gflops = 2.0 * FLA_Obj_length( C ) * FLA_Obj_width( C ) * FLA_Obj_width( A ) / dtime_old / 1e9; *dtime = dtime_old; FLA_Copy_external( Cold, C ); FLA_Obj_free( &Cold );}
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例9: FLA_Gemm_nn_omp_var15FLA_Error FLA_Gemm_nn_omp_var15( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj C, fla_gemm_t* cntl ){ FLA_Obj AT, A0, AB, A1, A2; FLA_Obj CT, C0, CB, C1, C2; FLA_Obj AL, AR, A10, A11, A12; FLA_Obj BT, B0, BB, B1, B2; FLA_Obj C1_local; int i, j, lock_ldim, lock_i; int b_m, b_k; FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( C, &CT, &CB, 0, FLA_TOP ); #pragma intel omp parallel taskq { while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ) { b_m = FLA_Determine_blocksize( A, AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* ** */ &A1, AB, &A2, b_m, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( CT, &C0, /* ** */ /* ** */ &C1, CB, &C2, b_m, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* C1 = alpha * A1 * B + C1; */ FLA_Part_1x2( A1, &AL, &AR, 0, FLA_LEFT ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ) { b_k = FLA_Determine_blocksize( A, AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) ); // Get the index of the current partition. // FIX THIS: need + b_m - 1 or something like this //j = FLA_Obj_length( CT ) / b_m; //i = FLA_Obj_width( AL ) / b_k; //lock_ldim = FLA_get_num_threads_in_m_dim(omp_get_num_threads()); lock_i = FLA_Obj_length( CT ) / b_m; FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A10, /**/ &A11, &A12, b_k, FLA_RIGHT ); FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* ** */ &B1, BB, &B2, b_k, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* C1 = alpha * A11 * B1 + C1; */ //// FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, //// alpha, A11, B1, FLA_ONE, C1 ); #pragma intel omp task captureprivate( lock_i, A11, B1, C1 ), private( C1_local ) { FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C1, &C1_local ); FLA_Obj_set_to_zero( C1_local ); /* C1_local = alpha * A1 * B11 + C1_local; */ FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A11, B1, FLA_ONE, C1_local ); // Acquire lock[i] (the lock for C1). omp_set_lock( &fla_omp_lock[lock_i] ); /* C1 += C1_local */ FLA_Axpy_external( FLA_ONE, C1_local, C1 ); //FLA_Axpy_sync_pipeline2( j*lock_ldim, FLA_ONE, C1_local, C1 ); //FLA_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 ); //REF_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 ); // Release lock[i] (the lock for C1). omp_unset_lock( &fla_omp_lock[lock_i] ); FLA_Obj_free( &C1_local ); } //.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例10: time_Syrk_ln//.........这里部分代码省略......... switch( type ){ case FLA_ALG_OPENMP_1TASK: FLA_Syrk_ln_omp1t_var2( A, C ); break; case FLA_ALG_OPENMP_2TASKS: FLA_Syrk_ln_omp2t_var2( A, C ); break; case FLA_ALG_OPENMP_2LOOPS: FLA_Syrk_ln_omp2l_var2( A, C ); break; case FLA_ALG_OPENMP_2LOOPSPLUS: FLA_Syrk_ln_omp2x_var2( A, C ); break; default: printf("trouble/n"); } break; } case 3:{ // Time variant 3 switch( type ){ case FLA_ALG_OPENMP_1TASK: FLA_Syrk_ln_omp1t_var3( A, C ); break; case FLA_ALG_OPENMP_2TASKS: FLA_Syrk_ln_omp2t_var3( A, C ); break; case FLA_ALG_OPENMP_2LOOPS: FLA_Syrk_ln_omp2l_var3( A, C ); break; default: printf("trouble/n"); } break; } case 4:{ // Time variant 4 switch( type ){ case FLA_ALG_OPENMP_1TASK: FLA_Syrk_ln_omp1t_var4( A, C ); break; case FLA_ALG_OPENMP_2TASKS: FLA_Syrk_ln_omp2t_var4( A, C ); break; case FLA_ALG_OPENMP_2LOOPS: FLA_Syrk_ln_omp2l_var4( A, C ); break; default: printf("trouble/n"); } break; } case 5:{ // Time variant 5 switch( type ){ case FLA_ALG_OPENMP_1TASK: FLA_Syrk_ln_omp1t_var5( A, C ); break; default: printf("trouble/n"); } break; } } if ( irep == 0 ) dtime_old = FLA_Clock() - *dtime; else{ *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } } if ( variant == 0 ){ FLA_Copy_external( C, C_ref ); *diff = 0.0; } else{ *diff = FLA_Max_elemwise_diff( C, C_ref ); //FLA_Obj_show( "C:", C, "%f", "/n"); } *gflops = 1.0 * FLA_Obj_length( A ) * FLA_Obj_length( A ) * FLA_Obj_width( A ) / dtime_old / 1e9; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例11: mainint main( int argc, char** argv ) { FLA_Datatype testtype = TESTTYPE; dim_t m; FLA_Obj A; FLA_Obj a1, b1, r1; FLA_Obj a2, b2, r2; FLA_Uplo uplo; FLA_Error init_result; if ( argc == 3 ) { m = atoi(argv[1]); uplo = ( atoi(argv[2]) == 1 ? FLA_UPPER_TRIANGULAR : FLA_LOWER_TRIANGULAR ); } else { fprintf(stderr, " /n"); fprintf(stderr, "Usage: %s m uplo/n", argv[0]); fprintf(stderr, " m : test matrix length/n"); fprintf(stderr, " uplo : 0) lower, 1) upper/n"); fprintf(stderr, " /n"); return -1; } if ( m == 0 ) return 0; FLA_Init_safe( &init_result ); // Test matrix A FLA_Obj_create( testtype, m, m, 0, 0, &A ); FLA_Random_spd_matrix( uplo, A ); FLA_Hermitianize( uplo, A ); FLA_Obj_fshow( stdout, "- A -", A, "% 6.4e", "--" ); FLA_Obj_create( testtype, m, 1, 0, 0, &a1 ); FLA_Obj_create( testtype, m, 1, 0, 0, &a2 ); if ( m > 1 ) { FLA_Obj_create( testtype, m-1, 1, 0, 0, &b1 ); FLA_Obj_create( testtype, m-1, 1, 0, 0, &b2 ); } FLA_Obj_create( testtype, m, 1, 0, 0, &r1 ); FLA_Obj_create( testtype, m, 1, 0, 0, &r2 ); // Mine FLA_Tridiag_UT_extract_diagonals( uplo, A, a1, b1 ); FLA_Obj_fshow( stdout, "- a1 -", a1, "% 6.4e", "--" ); if ( m > 1 ) FLA_Obj_fshow( stdout, "- b1 -", b1, "% 6.4e", "--" ); FLA_Tridiag_UT_realify_subdiagonal( b1, r1 ); if ( m > 1 ) FLA_Obj_fshow( stdout, "- b1 realified -", b1, "% 6.4e", "--" ); FLA_Obj_fshow( stdout, "- r1 -", r1, "% 6.4e", "--" ); // Field FLA_Tridiag_UT_realify( uplo, A, r2 ); FLA_Tridiag_UT_extract_diagonals( uplo, A, a2, b2 ); FLA_Obj_fshow( stdout, "- a2 -", a2, "% 6.4e", "--" ); if ( m > 1 ) FLA_Obj_fshow( stdout, "- b2 realified -", b2, "% 6.4e", "--" ); FLA_Obj_fshow( stdout, "- r2 -", r2, "% 6.4e", "--" ); printf(" diff_a = %e/n", FLA_Max_elemwise_diff( a1, a2 )); if ( m > 1 ) printf(" diff_b = %e/n", FLA_Max_elemwise_diff( b1, b2 )); printf(" diff_rL = %e/n", FLA_Max_elemwise_diff( r1, r2 )); FLA_Obj_fshow( stdout, "- A realified-", A, "% 6.4e", "--" ); FLA_Obj_free( &r2 ); FLA_Obj_free( &r1 ); if ( m > 1 ) { FLA_Obj_free( &b2 ); FLA_Obj_free( &b1 ); } FLA_Obj_free( &a2 ); FLA_Obj_free( &a1 ); FLA_Obj_free( &A ); FLA_Finalize_safe( init_result ); }
开发者ID:anaptyxis,项目名称:libflame,代码行数:80,
示例12: main//.........这里部分代码省略......... sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ) { if ( pc_str[param_combo][0] == 'n' || pc_str[param_combo][0] == 'c' ) FLA_Obj_create( datatype, m, n, &A ); else FLA_Obj_create( datatype, n, m, &A ); FLA_Obj_create( datatype, m, n, &C ); FLA_Obj_create( datatype, m, n, &C_ref ); FLA_Random_matrix( A ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); fprintf( stdout, "data_axpyt_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Axpyt( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Axpyt( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } fprintf( stdout, "/n" ); } fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_axpyt_%s( :,1 ), data_axpyt_%s( :, 2 ), '%c:%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_axpyt_%s( :,1 ), data_axpyt_%s( :, 4 ), '%c-.%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref//_axpyt//_%s', 'fla//_axpyt//_%s', ... /n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME axpyt front-end performance (%s, %s)' );/n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc axpyt_front_%s_%s.eps/n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout ); FLA_Finalize( ); return 0;}
开发者ID:fmarrabal,项目名称:libflame,代码行数:101,
示例13: main//.........这里部分代码省略......... sprintf( k_dim_desc, "k = p" ); sprintf( k_dim_tag, "k%dp", 1 ); } //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; k = k_input; if( m < 0 ) m = p / f2c_abs(m_input); if( k < 0 ) k = p / f2c_abs(k_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // If transposing A, switch dimensions. if ( pc_str[param_combo][1] == 'n' ) { FLA_Obj_create( datatype, m, k, 0, 0, &A ); FLA_Obj_create( datatype, m, k, 0, 0, &B ); } else { FLA_Obj_create( datatype, k, m, 0, 0, &A ); FLA_Obj_create( datatype, k, m, 0, 0, &B ); } FLA_Obj_create( datatype, m, m, 0, 0, &C ); FLA_Obj_create( datatype, m, m, 0, 0, &C_ref ); FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); fprintf( stdout, "data_syr2k_%s( %d, 1:3 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Syr2k( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );/* time_Syr2k( param_combo, FLA_ALG_FRONT, n_repeats, m, k, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );*/ fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } fprintf( stdout, "/n" ); }/* fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_syr2k_%s( :,1 ), data_syr2k_%s( :, 2 ), '%c:%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_syr2k_%s( :,1 ), data_syr2k_%s( :, 4 ), '%c-.%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref//_syr2k//_%s', 'fla//_syr2k//_%s', ... /n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME syr2k front-end performance (%s, %s)' );/n", m_dim_desc, k_dim_desc ); fprintf( stdout, "print -depsc syr2k_front_%s_%s.eps/n", m_dim_tag, k_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize( ); return 0;}
开发者ID:flame,项目名称:libflame,代码行数:101,
示例14: main//.........这里部分代码省略......... if( m < 0 ) m = p / f2c_abs(m_input); //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, m, 0, 0, &Y ); FLA_Obj_create( datatype, m, m, 0, 0, &B ); FLA_Random_spd_matrix( uplo, A ); FLA_Hermitianize( uplo, A ); FLA_Random_spd_matrix( uplo, B ); FLA_Chol( uplo, B );/* time_Eig_gest_nu( 0, FLA_ALG_REFERENCE, n_repeats, p, b_alg, inv, uplo, A, B, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; /n", i, p, gflops ); fflush( stdout );*/ for ( variant = 1; variant <= n_variants; variant++ ){ fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p ); fflush( stdout ); time_Eig_gest_nu( variant, FLA_ALG_UNBLOCKED, n_repeats, p, b_alg, inv, uplo, A, Y, B, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Eig_gest_nu( variant, FLA_ALG_UNB_OPT, n_repeats, p, b_alg, inv, uplo, A, Y, B, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Eig_gest_nu( variant, FLA_ALG_BLOCKED, n_repeats, p, b_alg, inv, uplo, A, Y, B, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); } FLA_Obj_free( &A ); FLA_Obj_free( &Y ); FLA_Obj_free( &B ); fprintf( stdout, "/n" ); }/* // Print the MATLAB commands to plot the data // Delete all existing figures fprintf( stdout, "figure;/n" ); // Plot the performance of the reference implementation fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); /n" ); // Indicate that you want to add to the existing plot fprintf( stdout, "hold on;/n" ); // Plot the data for the other numbers of threads for ( i = 1; i <= n_variants; i++ ){ fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); /n", i, i, colors[ i-1 ], ticks[ i-1 ] ); } fprintf( stdout, "legend( ... /n" ); fprintf( stdout, "'Reference', ... /n" ); for ( i = 1; i <= n_variants; i++ ) fprintf( stdout, "'FLAME var%d', ... /n", i ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME chol//_l performance (%s)' );/n", m_dim_desc ); fprintf( stdout, "print -depsc chol_l_%s.eps/n", m_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize( ); return 0;}
开发者ID:flame,项目名称:libflame,代码行数:101,
示例15: main//.........这里部分代码省略......... else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig ); if ( FLA_Obj_is_single_precision( A ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); if ( pc_str[param_combo][0] == 'l' ) FLA_Random_spd_matrix( FLA_LOWER_TRIANGULAR, A ); else FLA_Random_spd_matrix( FLA_UPPER_TRIANGULAR, A ); FLA_Copy_external( b, b_orig ); fprintf( stdout, "data_chol_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Chol( param_combo, FLA_ALG_REFERENCE, n_repeats, m, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Chol( param_combo, FLA_ALG_FRONT, n_repeats, m, A, b, b_orig, norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &b ); FLA_Obj_free( &b_orig ); FLA_Obj_free( &norm ); } fprintf( stdout, "/n" ); }/* fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 2 ), '%c:%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_chol_%s( :,1 ), data_chol_%s( :, 4 ), '%c-.%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref//_chol//_%s', 'fla//_chol//_%s', ... /n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME chol front-end performance (%s)' );/n", m_dim_desc ); fprintf( stdout, "print -depsc chol_front_%s.eps/n", m_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize(); return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例16: time_Sylv//.........这里部分代码省略......... C_old; FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_old ); FLA_Copy_external( C, C_old ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( C_old, C ); *dtime = FLA_Clock(); switch( param_combo ){ case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Sylv( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale ); break; case FLA_ALG_FRONT: FLA_Sylv( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale ); break; default: printf("trouble/n"); } break; } case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Sylv( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale ); break; case FLA_ALG_FRONT: FLA_Sylv( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale ); break; default: printf("trouble/n"); } break; } case 2:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Sylv( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale ); break; case FLA_ALG_FRONT: FLA_Sylv( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B, C, scale ); break; default: printf("trouble/n"); } break; } case 3:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Sylv( FLA_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale ); break; case FLA_ALG_FRONT: FLA_Sylv( FLA_TRANSPOSE, FLA_TRANSPOSE, isgn, A, B, C, scale ); break; default: printf("trouble/n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ){ FLA_Copy_external( C, C_ref ); *diff = 0.0; } else{ *diff = FLA_Max_elemwise_diff( C, C_ref ); } *gflops = ( m * m * n + n * n * m ) / dtime_old / 1e9; if ( FLA_Obj_is_complex( C ) ) *gflops *= 4.0; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例17: main//.........这里部分代码省略......... fprintf( stdout, "/nclear all;/n/n" ); //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { mB = mB_input; mC = mC_input; mD = mD_input; n = n_input; if( mB < 0 ) mB = p / abs(mB_input); if( mC < 0 ) mC = p / abs(mC_input); if( mD < 0 ) mD = p / abs(mD_input); if( n < 0 ) n = p / abs(n_input); for ( variant = 0; variant < n_variants; variant++ ){ FLA_Obj_create( datatype, mB, n, 0, 0, &B ); FLA_Obj_create( datatype, mC, n, 0, 0, &C ); FLA_Obj_create( datatype, mD, n, 0, 0, &D ); FLA_Obj_create( datatype, b_alg, n, 0, 0, &T ); FLA_Obj_create( datatype, n, n, 0, 0, &R ); FLA_Obj_create( datatype, n, n, 0, 0, &E ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Random_matrix( D ); FLA_Set( FLA_ZERO, R ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, B, FLA_ONE, R ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, D, FLA_ONE, R ); FLA_Chol( FLA_UPPER_TRIANGULAR, R ); FLA_Set( FLA_ZERO, E ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, B, FLA_ONE, E ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, C, FLA_ONE, E ); FLA_Chol( FLA_UPPER_TRIANGULAR, E ); fprintf( stdout, "data_uddate_ut( %d, 1:5 ) = [ %d ", i, p ); fflush( stdout ); time_UDdate_UT( variant, FLA_ALG_FRONT, n_repeats, mB, mC, mD, n, B, C, D, T, R, E, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &D ); FLA_Obj_free( &T ); FLA_Obj_free( &R ); FLA_Obj_free( &E ); } fprintf( stdout, "/n" ); }/* fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); for ( i = 0; i < n_variants; i++ ) { fprintf( stdout, "plot( data_qr_ut( :,1 ), data_qr_ut( :, 2 ), '%c:%c' ); /n", colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_qr_ut( :,1 ), data_qr_ut( :, 4 ), '%c-.%c' ); /n", colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_variants; i++ ) fprintf( stdout, "'ref//_qr//_ut', 'fla//_qr//_ut', ... /n" ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME UDdate_UT front-end performance (%s, %s)' );/n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc qr_ut_front_%s_%s.eps/n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize( ); return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例18: time_Tevd_v//.........这里部分代码省略......... switch( type ){ case FLA_ALG_UNB_OPT: FLA_Tevd_v_opt_var2( n_iter_max, d, e, G, R, W, A, b_alg ); break; } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } { FLA_Obj V, A_rev_evd, norm, eye; FLA_Copy( d, l );//FLA_Obj_show( "A_save", A_save, "%9.2e + %9.2e ", "" );//FLA_Obj_show( "A_evd", A, "%9.2e + %9.2e ", "" ); FLA_Sort_evd( FLA_FORWARD, l, A ); FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &V ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_rev_evd ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &eye ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, l, A ); FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, V, FLA_ZERO, A_rev_evd ); FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A_rev_evd );/* FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, D, FLA_ZERO, A_rev_evd ); FLA_Copy( A_rev_evd, D ); FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, D, V, FLA_ZERO, A_rev_evd ); FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A_rev_evd );*///FLA_Obj_show( "A_rev_evd", A_rev_evd, "%9.2e + %9.2e ", "" ); FLA_Axpy( FLA_MINUS_ONE, A_orig, A_rev_evd ); FLA_Norm_frob( A_rev_evd, norm ); FLA_Obj_extract_real_scalar( norm, diff1 ); //*diff = FLA_Max_elemwise_diff( A_orig, A_rev_evd ); FLA_Set_to_identity( eye ); FLA_Copy( V, A_rev_evd ); FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, V, A_rev_evd, FLA_MINUS_ONE, eye ); FLA_Norm_frob( eye, norm ); FLA_Obj_extract_real_scalar( norm, diff2 );/*FLA_Obj_free( &EL );FLA_Obj_free( &EU );FLA_Obj_free( &D );FLA_Obj_free( &dc );FLA_Obj_free( &ec );*/ FLA_Obj_free( &V ); FLA_Obj_free( &A_rev_evd ); FLA_Obj_free( &eye ); FLA_Obj_free( &norm ); } k = 2.00; if ( FLA_Obj_is_complex( A ) ) { *gflops = ( ( 4.5 * k * m * m ) + 2.0 * ( 3.0 * k * m * m * m ) ) / dtime_old / 1e9; } else { *gflops = ( ( 4.5 * k * m * m ) + 1.0 * ( 3.0 * k * m * m * m ) ) / dtime_old / 1e9; } *dtime = dtime_old; FLA_Copy_external( A_save, A ); FLA_Copy_external( G_save, G ); FLA_Copy_external( d_save, d ); FLA_Copy_external( e_save, e ); FLA_Obj_free( &A_save ); FLA_Obj_free( &G_save ); FLA_Obj_free( &d_save ); FLA_Obj_free( &e_save );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例19: time_Copyt//.........这里部分代码省略......... for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( C_old, C ); *dtime = FLA_Clock(); switch( param_combo ){ // Time parameter combination 0 case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Copyt( FLA_NO_TRANSPOSE, A, C ); break; case FLA_ALG_FRONT: FLA_Copyt( FLA_NO_TRANSPOSE, A, C ); break; default: printf("trouble/n"); } break; } case 1:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Copyt( FLA_TRANSPOSE, A, C ); break; case FLA_ALG_FRONT: FLA_Copyt( FLA_TRANSPOSE, A, C ); break; default: printf("trouble/n"); } break; } case 2:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Copyt( FLA_CONJ_NO_TRANSPOSE, A, C ); break; case FLA_ALG_FRONT: FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, A, C ); break; default: printf("trouble/n"); } break; } case 3:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Copyt( FLA_CONJ_TRANSPOSE, A, C ); break; case FLA_ALG_FRONT: FLA_Copyt( FLA_CONJ_TRANSPOSE, A, C ); break; default: printf("trouble/n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLA_Copy_external( C, C_ref ); *diff = 0.0; } else { *diff = FLA_Max_elemwise_diff( C, C_ref ); } *gflops = 2.0 * m * n / dtime_old / 1.0e9; if ( FLA_Obj_is_complex( A ) ) *gflops *= 4.0; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例20: FLA_Lyap_n_unb_var4FLA_Error FLA_Lyap_n_unb_var4( FLA_Obj isgn, FLA_Obj A, FLA_Obj C ){ FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj CTL, CTR, C00, c01, C02, CBL, CBR, c10t, gamma11, c12t, C20, c21, C22; FLA_Obj WTL, WTR, W00, w01, W02, WBL, WBR, w10t, omega11, w12t, W20, w21, W22; FLA_Obj W, omega; FLA_Scal( isgn, C ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &W ); FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &omega ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_2x2( C, &CTL, &CTR, &CBL, &CBR, 0, 0, FLA_BR ); FLA_Part_2x2( W, &WTL, &WTR, &WBL, &WBR, 0, 0, FLA_BR ); while ( FLA_Obj_length( CTL ) > 0 ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &a01, /**/ &A02, &a10t, &alpha11, /**/ &a12t, /* ************* */ /* ************************** */ ABL, /**/ ABR, &A20, &a21, /**/ &A22, 1, 1, FLA_TL ); FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &c01, /**/ &C02, &c10t, &gamma11, /**/ &c12t, /* ************* */ /* ************************** */ CBL, /**/ CBR, &C20, &c21, /**/ &C22, 1, 1, FLA_TL ); FLA_Repart_2x2_to_3x3( WTL, /**/ WTR, &W00, &w01, /**/ &W02, &w10t, &omega11, /**/ &w12t, /* ************* */ /* ************************** */ WBL, /**/ WBR, &W20, &w21, /**/ &W22, 1, 1, FLA_TL ); /*------------------------------------------------------------*/ // gamma11 = gamma11 / ( alpha11 + alpha11' ); FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega ); FLA_Mult_add( FLA_ONE, alpha11, omega ); FLA_Inv_scal( omega, gamma11 ); // c01 = c01 - a01 * gamma11; FLA_Axpys( FLA_MINUS_ONE, gamma11, a01, FLA_ONE, c01 ); // c01 = inv( triu(A00) + conj(alpha) * I ) * c01; FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, A00, W00 ); FLA_Shift_diag( FLA_CONJUGATE, alpha11, W00 ); FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, W00, c01 ); // C00 = C00 - a01 * c01' - c01 * a01'; FLA_Her2( FLA_UPPER_TRIANGULAR, FLA_MINUS_ONE, a01, c01, C00 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ a01, A02, /* ************** */ /* ************************ */ a10t, /**/ alpha11, a12t, &ABL, /**/ &ABR, A20, /**/ a21, A22, FLA_BR ); FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ c01, C02, /* ************** */ /* ************************ */ c10t, /**/ gamma11, c12t, &CBL, /**/ &CBR, C20, /**/ c21, C22, FLA_BR ); FLA_Cont_with_3x3_to_2x2( &WTL, /**/ &WTR, W00, /**/ w01, W02, /* ************** */ /* ************************ */ w10t, /**/ omega11, w12t, &WBL, /**/ &WBR, W20, /**/ w21, W22, FLA_BR ); } FLA_Obj_free( &W ); FLA_Obj_free( &omega ); return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:94,
示例21: main//.........这里部分代码省略......... } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_set_num_threads( n_threads ); //FLASH_Queue_set_verbose_output( TRUE ); //FLA_Check_error_level_set( FLA_NO_ERROR_CHECKING ); //FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if ( m < 0 ) m = p * abs(m_input); if ( n < 0 ) n = p * abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ) { FLA_Obj_create( datatype, m, n, 0, 0, &A_flat ); FLA_Obj_create( datatype, n, 1, 0, 0, &x_flat ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_flat ); FLA_Random_matrix( A_flat ); FLA_Random_matrix( b_flat ); FLASH_QR_UT_create_hier_matrices( A_flat, 1, &b_flash, &A, &TW ); FLASH_Obj_create_hier_copy_of_flat( b_flat, 1, &b_flash, &b ); FLASH_Obj_create_hier_copy_of_flat( x_flat, 1, &b_flash, &x ); fprintf( stdout, "data_qrut_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_QR_UT( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, TW, b, x, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &A_flat ); FLA_Obj_free( &b_flat ); FLA_Obj_free( &x_flat ); FLASH_Obj_free( &A ); FLASH_Obj_free( &TW ); FLASH_Obj_free( &b ); FLASH_Obj_free( &x ); } }/* fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_qrut_%s( :,1 ), data_qrut_%s( :, 2 ), '%c:%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_qrut_%s( :,1 ), data_qrut_%s( :, 4 ), '%c-.%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref//_qrut//_%s', 'fla//_qrut//_%s', ... /n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME qrut front-end performance (%s)' );/n", m_dim_desc ); fprintf( stdout, "print -depsc qrut_front_%s.eps/n", m_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize( ); return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例22: time_Gemm_pp_nnvoid time_Gemm_pp_nn( int variant, int type, int nrepeats, int n, int nb_alg, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj Cref, double *dtime, double *diff, double *mflops ){ int irep, info, lwork; double dtime_old, d_minus_one = -1.0, d_one = 1.0; FLA_Obj Cold; FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &Cold ); FLA_Copy_external( C, Cold ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( Cold, C ); *dtime = FLA_Clock(); switch( variant ){ case 0: // Time reference implementation REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, ONE, A, B, FLA_ONE, C ); break; case 1:{ // Time variant 1 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Gemm_pp_nn_var1( FLA_ONE, A, B, C, nb_alg ); break; case FLA_ALG_BLOCKED: REF_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, ONE, A, B, FLA_ONE, C ); break; default: printf("trouble/n"); } break; } } if ( irep == 0 ) dtime_old = FLA_Clock() - *dtime; else{ *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } } if ( variant == 0 ){ FLA_Copy_external( C, Cref ); *diff = 0.0; } else{ *diff = FLA_Max_elemwise_diff( C, Cref ); } *mflops = 2.0 * FLA_Obj_length( C ) * FLA_Obj_width( C ) * FLA_Obj_width( A ) / dtime_old / 1000000; *dtime = dtime_old; FLA_Copy_external( Cold, C ); FLA_Obj_free( &Cold );}
开发者ID:pgawron,项目名称:tlash,代码行数:79,
示例23: main//.........这里部分代码省略......... blas_thread_init(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; k = k_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( k < 0 ) k = p / abs(k_input); if( n < 0 ) n = p / abs(n_input); FLA_Obj_create( FLA_DOUBLE, m, k, &A ); FLA_Obj_create( FLA_DOUBLE, k, n, &B ); FLA_Obj_create( FLA_DOUBLE, m, n, &C ); FLA_Obj_create( FLA_DOUBLE, m, n, &C_ref ); /* Generate random matrices A, C */ if( p > 4000 ){ FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); } blas_cpu_number = 1; //time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg, // A, B, C, C_ref, &dtime, &diff, &gflops ); //fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; /n", i, p, gflops ); //fflush( stdout ); for ( j = 0; j < n_thread_experiments; j++ ){ n_threads = n_threads_exp[j]; blas_cpu_number = n_threads; fprintf( stdout, "data_nth%d( %d, 1:3 ) = [ %d ", n_threads, i, p ); fflush( stdout ); time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); } fprintf( stdout, "/n" ); FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } /* Print the MATLAB commands to plot the data */ /* Delete all existing figures */ fprintf( stdout, "figure;/n" ); /* Indicate that you want to add to the existing plot */ fprintf( stdout, "hold on;/n" ); /* Plot the data for the other numbers of threads */ for ( i = 0; i < n_thread_experiments; i++ ){ fprintf( stdout, "plot( data_nth%d( :,1 ), data_nth%d( :, 2 ), '%c:%c' ); /n", n_threads_exp[ i ], n_threads_exp[ i ], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_thread_experiments-1; i++ ) fprintf( stdout, "'%d threads', ... /n", n_threads_exp[ i ] ); fprintf( stdout, "'%d threads', 'Location', 'Best' ); /n", n_threads_exp[ n_thread_experiments-1 ] ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, n_threads_exp[n_thread_experiments-1] * max_gflops ); fprintf( stdout, "title( 'Goto BLAS dgemm performance (%s, %s, %s)' );/n", m_dim_desc, k_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc gemm_nn_goto_p_%s_%s_%s.eps/n", m_dim_tag, k_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout ); FLA_Finalize( );}
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例24: main//.........这里部分代码省略......... printf( "data_unb_var3( %d, 1:3 ) = [ %d %le %le];/n", i, n, gflops / dtime_best, diff ); fflush( stdout ); } /* Variant 3 blocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, A ); dtime = FLA_Clock();#if TIME_BLK_VAR3 == TRUE Chol_blk_var3( A, nb_alg );#else REF_Chol( TIME_LAPACK, A, nb_alg );#endif dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } diff = FLA_Max_elemwise_diff( A, Aref ); printf( "data_blk_var3( %d, 1:3 ) = [ %d %le %le];/n", i, n, gflops / dtime_best, diff ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &Aold ); FLA_Obj_free( &Aref ); FLA_Obj_free( &delta ); printf( "/n" ); i++; } /* Print the MATLAB commands to plot the data */ /* Delete all existing figures */ printf( "close all/n" );#if OCTAVE == TRUE /* Plot the performance of FLAME */ printf( "plot( data_FLAME( :,1 ), data_FLAME( :, 2 ), '-k;libflame;' ); /n" ); /* Indicate that you want to add to the existing plot */ printf( "hold on/n" ); /* Plot the performance of the reference implementation */ printf( "plot( data_REF( :,1 ), data_REF( :, 2 ), '-m;reference;' ); /n" ); /* Plot the performance of your implementations */ printf( "plot( data_unb_var1( :,1 ), data_unb_var1( :, 2 ), /"-rx;UnbVar1;/" ); /n" ); printf( "plot( data_unb_var2( :,1 ), data_unb_var2( :, 2 ), /"-go;UnbVar2;/" ); /n" ); printf( "plot( data_unb_var3( :,1 ), data_unb_var3( :, 2 ), /"-b*;UnbVar3;/" ); /n" ); printf( "plot( data_blk_var1( :,1 ), data_blk_var1( :, 2 ), /"-rx;BlkVar1;/", /"markersize/", 3 ); /n" ); printf( "plot( data_blk_var2( :,1 ), data_blk_var2( :, 2 ), /"-go;BlkVar2;/", /"markersize/", 3 ); /n" ); printf( "plot( data_blk_var3( :,1 ), data_blk_var3( :, 2 ), /"-b*;BlkVar3;/", /"markersize/", 3 ); /n" );
开发者ID:ztschir,项目名称:High-Performance,代码行数:66,
示例25: time_Gemm//.........这里部分代码省略......... // Time parameter combination 5 case 5:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; case FLA_ALG_FRONT: FLA_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble/n"); } break; } // Time parameter combination 6 case 6:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; case FLA_ALG_FRONT: FLA_Gemm( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble/n"); } break; } // Time parameter combination 7 case 7:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; case FLA_ALG_FRONT: FLA_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble/n"); } break; } // Time parameter combination 8 case 8:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; case FLA_ALG_FRONT: FLA_Gemm( FLA_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A, B, FLA_ZERO, C ); break; default: printf("trouble/n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); }/* if ( type == FLA_ALG_REFERENCE ) { FLA_Copy_external( C, C_ref ); *diff = 0.0; } else { *diff = FLA_Max_elemwise_diff( C, C_ref ); }*/ *gflops = 2.0 * m * k * n / dtime_old / 1.0e9; if ( param_combo == 0 || param_combo == 1 || param_combo == 2 || param_combo == 3 || param_combo == 6 ) *gflops *= 4.0; *dtime = dtime_old; FLA_Copy_external( C_old, C ); FLA_Obj_free( &C_old );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例26: libfla_test_hemm_experiment//.........这里部分代码省略......... A_test = A; B_test = B; C_test = C; } // Create a control tree for the individual variants. if ( impl == FLA_TEST_FLAT_UNB_VAR || impl == FLA_TEST_FLAT_OPT_VAR || impl == FLA_TEST_FLAT_BLK_VAR || impl == FLA_TEST_FLAT_UNB_EXT || impl == FLA_TEST_FLAT_BLK_EXT ) libfla_test_hemm_cntl_create( var, b_alg_flat ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { if ( impl == FLA_TEST_HIER_FRONT_END ) FLASH_Obj_hierarchify( C_save, C_test ); else FLA_Copy_external( C_save, C_test ); time = FLA_Clock(); libfla_test_hemm_impl( impl, side, uplo, alpha, A_test, B_test, beta, C_test ); time = FLA_Clock() - time; time_min = min( time_min, time ); } // Copy the solution to flat matrix X. if ( impl == FLA_TEST_HIER_FRONT_END ) { FLASH_Obj_flatten( C_test, C ); } else { // No action needed since C_test and C refer to the same object. } // Free the hierarchical matrices if we're testing the FLASH front-end. if ( impl == FLA_TEST_HIER_FRONT_END ) { FLASH_Obj_free( &A_test ); FLASH_Obj_free( &B_test ); FLASH_Obj_free( &C_test ); } // Free the control trees if we're testing the variants. if ( impl == FLA_TEST_FLAT_UNB_VAR || impl == FLA_TEST_FLAT_OPT_VAR || impl == FLA_TEST_FLAT_BLK_VAR || impl == FLA_TEST_FLAT_UNB_EXT || impl == FLA_TEST_FLAT_BLK_EXT ) libfla_test_hemm_cntl_free(); // Compute the performance of the best experiment repeat. if ( side == FLA_LEFT ) *perf = ( 1 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; else *perf = ( 1 * m * n * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0; // Compute: // y = C * x // and compare to // z = ( beta * C_orig + alpha * A * B ) x (side = left) // z = ( beta * C_orig + alpha * B * A ) x (side = right) FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_ONE, C, x, FLA_ZERO, y ); if ( side == FLA_LEFT ) { FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_ONE, B, x, FLA_ZERO, w ); FLA_Hemv_external( uplo, alpha, A, w, FLA_ZERO, z ); } else { FLA_Hemv_external( uplo, FLA_ONE, A, x, FLA_ZERO, w ); FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B, w, FLA_ZERO, z ); } FLA_Gemv_external( FLA_NO_TRANSPOSE, beta, C_save, x, FLA_ONE, z ); // Compute || y - z ||. //FLA_Axpy_external( FLA_MINUS_ONE, y, z ); //FLA_Nrm2_external( z, norm ); //FLA_Obj_extract_real_scalar( norm, residual ); *residual = FLA_Max_elemwise_diff( y, z ); // Free the supporting flat objects. FLA_Obj_free( &C_save ); // Free the flat test matrices. FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &x ); FLA_Obj_free( &y ); FLA_Obj_free( &z ); FLA_Obj_free( &w ); FLA_Obj_free( &norm );}
开发者ID:fmarrabal,项目名称:libflame,代码行数:101,
示例27: libfla_test_qrut_experiment//.........这里部分代码省略......... // Initialize the test matrices. FLA_Random_matrix( A ); // Save the original object contents in a temporary object. FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save ); // Create vectors to form a linear system. FLA_Obj_create( datatype, n, 1, 0, 0, &x ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, n, 1, 0, 0, &y ); // Create a real scalar object to hold the norm of A. FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); // Create a random right-hand side vector. FLA_Random_matrix( b ); // Use hierarchical matrices if we're testing the FLASH front-end. if ( impl == FLA_TEST_HIER_FRONT_END ) { FLASH_QR_UT_create_hier_matrices( A, 1, &b_flash, &A_test, &T_test ); FLASH_Obj_create_hier_copy_of_flat( b, 1, &b_flash, &b_test ); FLASH_Obj_create_hier_copy_of_flat( x, 1, &b_flash, &x_test ); } else { A_test = A; T_test = T; } // Create a control tree for the individual variants. if ( impl == FLA_TEST_FLAT_UNB_VAR || impl == FLA_TEST_FLAT_OPT_VAR || impl == FLA_TEST_FLAT_BLK_VAR ) libfla_test_qrut_cntl_create( var, b_alg_flat ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { if ( impl == FLA_TEST_HIER_FRONT_END ) FLASH_Obj_hierarchify( A_save, A_test ); else FLA_Copy_external( A_save, A_test ); time = FLA_Clock(); libfla_test_qrut_impl( impl, A_test, T_test ); time = FLA_Clock() - time; time_min = min( time_min, time ); } // Perform a linear solve with the result. if ( impl == FLA_TEST_HIER_FRONT_END ) { FLASH_QR_UT_solve( A_test, T_test, b_test, x_test ); FLASH_Obj_flatten( x_test, x ); } else { FLA_QR_UT_solve( A_test, T_test, b, x ); } // Free the hierarchical matrices if we're testing the FLASH front-end. if ( impl == FLA_TEST_HIER_FRONT_END ) { FLASH_Obj_free( &A_test ); FLASH_Obj_free( &T_test ); FLASH_Obj_free( &b_test ); FLASH_Obj_free( &x_test ); } // Free the control trees if we're testing the variants. if ( impl == FLA_TEST_FLAT_UNB_VAR || impl == FLA_TEST_FLAT_OPT_VAR || impl == FLA_TEST_FLAT_BLK_VAR ) libfla_test_qrut_cntl_free(); // Compute the performance of the best experiment repeat. *perf = ( 2.0 * m * n * n - ( 2.0 / 3.0 ) * n * n * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0; // Compute the residual. FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_ONE, A_save, x, FLA_MINUS_ONE, b ); FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b, FLA_ZERO, y ); FLA_Nrm2_external( y, norm ); FLA_Obj_extract_real_scalar( norm, residual ); // Free the supporting flat objects. FLA_Obj_free( &x ); FLA_Obj_free( &b ); FLA_Obj_free( &y ); FLA_Obj_free( &norm ); FLA_Obj_free( &A_save ); // Free the flat test matrices. FLA_Obj_free( &A ); FLA_Obj_free( &T );}
开发者ID:flame,项目名称:libflame,代码行数:101,
示例28: main//.........这里部分代码省略......... FLA_Obj_create( datatype, k, n, 0, 0, &B ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref ); /* Generate random matrices A, C */ FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); /* Time the reference implementation */ time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; /n", i, p, gflops ); fflush( stdout ); for ( variant = 1; variant <= n_variants; variant++ ){ fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p ); fflush( stdout ); time_Gemm_nn( variant, FLA_ALG_UNBLOCKED, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); //gflops = 0.0; fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Gemm_nn( variant, FLA_ALG_BLOCKED, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); //time_Gemm_nn( variant, FLA_ALG_OPTIMIZED, n_repeats, p, nb_alg, // A, B, C, C_ref, &dtime, &diff, &gflops ); //fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); //fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); } fprintf( stdout, "/n" ); FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } /* Print the MATLAB commands to plot the data */ /* Delete all existing figures */ fprintf( stdout, "figure;/n" ); /* Plot the performance of the reference implementation */ fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); /n" ); /* Indicate that you want to add to the existing plot */ fprintf( stdout, "hold on;/n" ); /* Plot the data for the other numbers of threads */ for ( i = 1; i <= n_variants; i++ ) { fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); /n", i, i, colors[ i-1 ], ticks[ i-1 ] ); fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 4 ), '%c-.%c' ); /n", i, i, colors[ i-1 ], ticks[ i-1 ] ); //fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 6 ), '%c--%c' ); /n", // i, i, colors[ i-1 ], ticks[ i-1 ] ); } fprintf( stdout, "legend( ... /n" ); fprintf( stdout, "'Reference', ... /n" ); for ( i = 1; i < n_variants; i++ ) //fprintf( stdout, "'unb//_var%d', 'blk//_var%d', 'opt//_var%d' ... /n", i, i, i ); fprintf( stdout, "'unb//_var%d', 'blk//_var%d', ... /n", i, i ); i = n_variants; //fprintf( stdout, "'unb//_var%d', 'blk//_var%d', 'opt//_var%d' ); /n", i, i, i ); fprintf( stdout, "'unb//_var%d', 'blk//_var%d' ); /n", i, i ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME gemm//_nn performance (%s, %s, %s)' );/n", m_dim_desc, k_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc gemm_nn_%s_%s_%s.eps/n", m_dim_tag, k_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout ); FLA_Finalize( );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例29: time_Transpose//.........这里部分代码省略......... cntl_trans_var_unb; fla_transpose_t* cntl_trans_var_blk; fla_swap_t* cntl_swap_var_blk; fla_swap_t* cntl_swap_blas; bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg ); cntl_swap_blas = FLA_Cntl_swap_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL ); cntl_swap_var_blk = FLA_Cntl_swap_obj_create( FLA_FLAT, FLA_UNBLOCKED_VARIANT1, bp, cntl_swap_blas ); cntl_trans_var_unb = FLA_Cntl_transpose_obj_create( FLA_FLAT, FLA_UNBLOCKED_VARIANT1, NULL, NULL, NULL ); cntl_trans_var_blk = FLA_Cntl_transpose_obj_create( FLA_FLAT, variant, bp, cntl_trans_var_unb, cntl_swap_var_blk ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_old ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_tmp ); FLA_Copy_external( A, A_old ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( A_old, A ); *dtime = FLA_Clock(); switch( variant ){ case 0: //FLA_Copyt_external( FLA_TRANSPOSE, A, A_tmp ); //FLA_Set( FLA_ZERO, A ); //FLA_Copyt_external( FLA_NO_TRANSPOSE, A_tmp, A ); FLA_Transpose( A ); break; case 1:{ /* Time variant 1 */ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Transpose_unb_var1( A ); break; case FLA_ALG_BLOCKED: FLA_Transpose_blk_var1( A, cntl_trans_var_blk ); break; default: printf("trouble/n"); } break; } case 2:{ /* Time variant 2 */ switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Transpose_unb_var2( A ); break; case FLA_ALG_BLOCKED: FLA_Transpose_blk_var2( A, cntl_trans_var_blk ); break; default: printf("trouble/n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } FLA_Cntl_obj_free( cntl_trans_var_blk ); FLA_Cntl_obj_free( cntl_trans_var_unb ); FLA_Cntl_obj_free( cntl_swap_var_blk ); FLA_Cntl_obj_free( cntl_swap_blas ); FLA_Blocksize_free( bp ); if ( variant == 0 ){ FLA_Copy_external( A, A_ref ); *diff = 0.0; } else{ *diff = FLA_Max_elemwise_diff( A, A_ref ); } *gflops = 4 * n * n / dtime_old / 1e9; *dtime = dtime_old; FLA_Copy_external( A_old, A ); FLA_Obj_free( &A_old ); FLA_Obj_free( &A_tmp );}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
注:本文中的FLA_Obj_free函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ FLA_Obj_length函数代码示例 C++ FLA_Obj_datatype函数代码示例 |