这篇教程C++ FLA_Random_matrix函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中FLA_Random_matrix函数的典型用法代码示例。如果您正苦于以下问题:C++ FLA_Random_matrix函数的具体用法?C++ FLA_Random_matrix怎么用?C++ FLA_Random_matrix使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了FLA_Random_matrix函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: FLASH_Random_matrixFLA_Error FLASH_Random_matrix( FLA_Obj H ){ FLA_Obj F; // Exit early if one dimension is zero. if ( FLA_Obj_has_zero_dim( H ) ) return FLA_SUCCESS; // Create a temporary flat copy of the hierarchical object. FLASH_Obj_create_flat_copy_of_hier( H, &F ); // Randomize the flat matrix object. FLA_Random_matrix( F ); // Copy the flat object's contents back to the hierarchical object. FLASH_Obj_hierarchify( F, H ); // Free the temporary flat object. FLA_Obj_free( &F ); return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:21,
示例2: mainint main( int argc, char** argv ) { FLA_Datatype testtype = TESTTYPE; FLA_Datatype realtype = REALTYPE; dim_t m; FLA_Obj a, b; FLA_Error init_result; if ( argc == 2 ) { m = atoi(argv[1]); } else { fprintf(stderr, " /n"); fprintf(stderr, "Usage: %s m/n", argv[0]); fprintf(stderr, " m : test vector length/n"); fprintf(stderr, " /n"); return -1; } if ( m == 0 ) return 0; FLA_Init_safe( &init_result ); FLA_Obj_create( testtype, m, 1, 0, 0, &a ); FLA_Random_matrix( a ); FLA_Obj_fshow( stdout, "- a -", a, "% 6.4e", "--" ); FLA_Obj_create( realtype, 1, m, 0, 0, &b ); FLA_Obj_extract_real_part( a, b ); FLA_Obj_fshow( stdout, "- a real -", b, "% 6.4e", "--" ); FLA_Obj_extract_imag_part( a, b ); FLA_Obj_fshow( stdout, "- a imag -", b, "% 6.4e", "--" ); FLA_Obj_free( &b ); FLA_Obj_free( &a ); FLA_Finalize_safe( init_result ); }
开发者ID:anaptyxis,项目名称:libflame,代码行数:38,
示例3: main//.........这里部分代码省略......... sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // If multiplying A on the left, A is m x m; ...on the right, A is n x n. if ( pc_str[param_combo][0] == 'l' ) FLA_Obj_create( datatype, m, m, 0, 0, &A ); else FLA_Obj_create( datatype, n, n, 0, 0, &A ); FLA_Obj_create( datatype, m, n, 0, 0, &B ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref ); FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); fprintf( stdout, "data_symm_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Symm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Symm( param_combo, FLA_ALG_FRONT, n_repeats, m, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } fprintf( stdout, "/n" ); }
开发者ID:pgawron,项目名称:tlash,代码行数:67,
示例4: mainint main(int argc, char *argv[]){ int datatype, m_input, m, p_first, p_last, p_inc, p, nb_alg, variant, n_repeats, i, j, n_variants = N_VARIANTS; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, b, b_orig, norm; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c Enter blocking size:", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d/n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d/n", '%', m_input ); fprintf( stdout, "/nclear all;/n/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / f2c_abs(m_input); FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );/* FLA_Obj_create( datatype, m, m, m, 1, &A ); FLA_Obj_create( datatype, m, 1, 1, 1, &b ); FLA_Obj_create( datatype, m, 1, 1, 1, &b_orig );*/ if ( FLA_Obj_is_single_precision( A ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Random_matrix( b ); FLA_Copy_external( b, b_orig );/* time_Trinv_un( 0, FLA_ALG_REFERENCE, n_repeats, m, nb_alg, A, b, b_orig, norm, &dtime, &diff, &gflops );//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,
示例5: main//.........这里部分代码省略......... else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( pp = p_first, i = 1; pp <= p_last; pp += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = pp / abs(m_input); if( n < 0 ) n = pp / abs(n_input); min_m_n = min( m, n ); for ( pivot_combo = 0; pivot_combo < n_pivot_combos; pivot_combo++ ){ FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig ); if ( FLA_Obj_is_single_precision( C ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &b_norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &b_norm ); FLA_Random_matrix( C ); FLA_Random_matrix( b ); FLA_Copy_external( b, b_orig ); fprintf( stdout, "data_lu_%s( %d, 1:5 ) = [ %d ", pc_str[pivot_combo], i, pp ); fflush( stdout ); //time_LU( pivot_combo, FLA_ALG_REFERENCE, n_repeats, m, n, // C, b, b_orig, b_norm, &dtime, &diff, &gflops ); //fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); //fflush( stdout ); time_LU( pivot_combo, FLA_ALG_FRONT, n_repeats, m, n, C, b, b_orig, b_norm, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &C ); FLA_Obj_free( &b ); FLA_Obj_free( &b_orig ); FLA_Obj_free( &b_norm ); } fprintf( stdout, "/n" ); }/* fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); for ( i = 0; i < n_pivot_combos; i++ ) { fprintf( stdout, "plot( data_lu_%s( :,1 ), data_lu_%s( :, 2 ), '%c:%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_lu_%s( :,1 ), data_lu_%s( :, 4 ), '%c-.%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_pivot_combos; i++ ) fprintf( stdout, "'ref//_lu//_%s', 'fla//_lu//_%s', ... /n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME LU front-end performance (%s, %s)' );/n", m_dim_desc, n_dim_desc ); fprintf( stdout, "print -depsc lu_front_%s_%s.eps/n", m_dim_tag, n_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize( ); return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例6: main//.........这里部分代码省略......... // Determine datatype based on trans argument. if ( pc_str[param_combo][0] == 'c' || pc_str[param_combo][1] == 'c' ) { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_COMPLEX; else datatype = FLA_DOUBLE_COMPLEX; } else { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_FLOAT; else datatype = FLA_DOUBLE; } // If transposing A, switch dimensions. if ( pc_str[param_combo][0] == 'n' ) FLA_Obj_create( datatype, m, k, 0, 0, &A ); else FLA_Obj_create( datatype, k, m, 0, 0, &A ); // If transposing B, switch dimensions. if ( pc_str[param_combo][1] == 'n' ) FLA_Obj_create( datatype, k, n, 0, 0, &B ); else FLA_Obj_create( datatype, n, k, 0, 0, &B ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref ); FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); fprintf( stdout, "data_gemm_%s( %d, 1:5 ) = [ %4d %4d %4d ", pc_str[param_combo], i, m, k, n ); fflush( stdout ); time_Gemm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );/* time_Gemm( param_combo, FLA_ALG_FRONT, n_repeats, m, k, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );*/ fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } fprintf( stdout, "/n" );
开发者ID:flame,项目名称:libflame,代码行数:67,
示例7: mainint main(int argc, char *argv[]){ int datatype, n_input, mB_input, mC_input, mD_input, mB, mC, mD, n, p_first, p_last, p_inc, p, b_alg, variant, n_repeats, i, n_variants = 1; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj B, C, D, T, R, E; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c enter algorithmic blocksize:", '%' ); scanf( "%d", &b_alg ); fprintf( stdout, "%c %d/n", '%', b_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter n (-1 means bind to problem size): ", '%' ); scanf( "%d", &n_input ); fprintf( stdout, "%c %d/n", '%', n_input ); fprintf( stdout, "%c enter mB mC mD (-1 means bind to problem size): ", '%' ); scanf( "%d %d %d", &mB_input, &mC_input, &mD_input ); fprintf( stdout, "%c %d %d %d/n", '%', mB_input, mC_input, mD_input ); fprintf( stdout, "/nclear all;/n/n" ); //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { mB = mB_input; mC = mC_input; mD = mD_input; n = n_input; if( mB < 0 ) mB = p / abs(mB_input); if( mC < 0 ) mC = p / abs(mC_input); if( mD < 0 ) mD = p / abs(mD_input); if( n < 0 ) n = p / abs(n_input); for ( variant = 0; variant < n_variants; variant++ ){ FLA_Obj_create( datatype, mB, n, 0, 0, &B ); FLA_Obj_create( datatype, mC, n, 0, 0, &C ); FLA_Obj_create( datatype, mD, n, 0, 0, &D ); FLA_Obj_create( datatype, b_alg, n, 0, 0, &T ); FLA_Obj_create( datatype, n, n, 0, 0, &R ); FLA_Obj_create( datatype, n, n, 0, 0, &E ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Random_matrix( D ); FLA_Set( FLA_ZERO, R ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, B, FLA_ONE, R ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, D, FLA_ONE, R ); FLA_Chol( FLA_UPPER_TRIANGULAR, R ); FLA_Set( FLA_ZERO, E ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, B, FLA_ONE, E ); FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, C, FLA_ONE, E ); FLA_Chol( FLA_UPPER_TRIANGULAR, E ); fprintf( stdout, "data_uddate_ut( %d, 1:5 ) = [ %d ", i, p ); fflush( stdout ); time_UDdate_UT( variant, FLA_ALG_FRONT, n_repeats, mB, mC, mD, n, B, C, D, T, R, E, &dtime, &diff, &gflops );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例8: main//.........这里部分代码省略......... m = p_last; k = p_last; n = p_last; sprintf( nth_str, "OMP_NUM_THREADS=%d", n_threads_exp[ n_thread_experiments-1 ] ); putenv( nth_str ); blas_cpu_number = n_threads_exp[ n_thread_experiments-1 ]; blas_thread_init(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; k = k_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( k < 0 ) k = p / abs(k_input); if( n < 0 ) n = p / abs(n_input); FLA_Obj_create( FLA_DOUBLE, m, k, &A ); FLA_Obj_create( FLA_DOUBLE, k, n, &B ); FLA_Obj_create( FLA_DOUBLE, m, n, &C ); FLA_Obj_create( FLA_DOUBLE, m, n, &C_ref ); /* Generate random matrices A, C */ if( p > 4000 ){ FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); } blas_cpu_number = 1; //time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg, // A, B, C, C_ref, &dtime, &diff, &gflops ); //fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; /n", i, p, gflops ); //fflush( stdout ); for ( j = 0; j < n_thread_experiments; j++ ){ n_threads = n_threads_exp[j]; blas_cpu_number = n_threads; fprintf( stdout, "data_nth%d( %d, 1:3 ) = [ %d ", n_threads, i, p ); fflush( stdout ); time_Gemm_nn( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout );
开发者ID:pgawron,项目名称:tlash,代码行数:67,
示例9: libfla_test_apqut_experimentvoid libfla_test_apqut_experiment( test_params_t params, unsigned int var, char* sc_str, FLA_Datatype datatype, unsigned int p_cur, unsigned int pci, unsigned int n_repeats, signed int impl, double* perf, double* residual ){ dim_t b_flash = params.b_flash; dim_t b_alg_flat = params.b_alg_flat; double time_min = 1e9; double time; unsigned int i; unsigned int m, n; unsigned int min_m_n; signed int m_input; signed int n_input; FLA_Side side; FLA_Trans trans; FLA_Direct direct; FLA_Store storev; FLA_Obj A, T, W, B, eye, norm; FLA_Obj B_save; FLA_Obj A_test, T_test, W_test, B_test; // Translate parameter characters to libflame constants. FLA_Param_map_char_to_flame_side( &pc_str[pci][0], &side ); FLA_Param_map_char_to_flame_trans( &pc_str[pci][1], &trans ); FLA_Param_map_char_to_flame_direct( &pc_str[pci][2], &direct ); FLA_Param_map_char_to_flame_storev( &pc_str[pci][3], &storev ); // We want to make sure the Apply_Q_UT routines work with rectangular // matrices. So we use m > n when testing with column-wise storage (via // QR factorization) and m < n when testing with row-wise storage (via // LQ factorization). if ( storev == FLA_COLUMNWISE ) { m_input = -1; n_input = -1; //m_input = -1; //n_input = -1; } else // if ( storev == FLA_ROWWISE ) { m_input = -1; n_input = -1; //m_input = -1; //n_input = -1; } // Determine the dimensions. if ( m_input < 0 ) m = p_cur * abs(m_input); else m = p_cur; if ( n_input < 0 ) n = p_cur * abs(n_input); else n = p_cur; // Compute the minimum dimension. min_m_n = min( m, n ); // Create the matrices for the current operation. libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, n, &A ); libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, min_m_n, &T ); if ( storev == FLA_COLUMNWISE ) libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], m, m, &B ); else libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], n, n, &B ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, B, &eye ); FLA_Apply_Q_UT_create_workspace( T, B, &W ); // Create a real scalar object to hold the norm of A. FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); // Initialize the test matrices. FLA_Random_matrix( A ); FLA_Set_to_identity( B ); FLA_Set_to_identity( eye ); // Save the original object contents in a temporary object. FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &B_save ); // Use hierarchical matrices if we're testing the FLASH front-end. if ( impl == FLA_TEST_HIER_FRONT_END ) { if ( storev == FLA_COLUMNWISE ) FLASH_QR_UT_create_hier_matrices( A, 1, &b_flash, &A_test, &T_test ); else // if ( storev == FLA_ROWWISE ) FLASH_LQ_UT_create_hier_matrices( A, 1, &b_flash, &A_test, &T_test ); FLASH_Obj_create_hier_copy_of_flat( B, 1, &b_flash, &B_test ); FLASH_Apply_Q_UT_create_workspace( T_test, B_test, &W_test ); } else // if ( impl == FLA_TEST_FLAT_FRONT_END ) { A_test = A; T_test = T; W_test = W;//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,
示例10: mainint main( int argc, char** argv ) { FLA_Datatype datatype = TESTTYPE; FLA_Obj A, Ak, T, Tk, D, Dk, A_copy, A_recovered, L, Q, Qk, W, x, y, z; dim_t m, n, k; dim_t min_m_n; FLA_Error init_result; double residual_A, residual_Axy; int use_form_q = 1; if ( argc == 4 ) { m = atoi(argv[1]); n = atoi(argv[2]); k = atoi(argv[3]); min_m_n = min(m,n); } else { fprintf(stderr, " /n"); fprintf(stderr, "Usage: %s m n k/n", argv[0]); fprintf(stderr, " m : matrix length/n"); fprintf(stderr, " n : matrix width/n"); fprintf(stderr, " k : number of house holder vectors applied for testing/n"); fprintf(stderr, " /n"); return -1; } if ( m == 0 || n == 0 ) return 0; FLA_Init_safe( &init_result ); // FLAME LQ^H setup FLA_Obj_create( datatype, m, n, 0, 0, &A ); FLA_LQ_UT_create_T( A, &T ); // Rand A and create A_copy. FLA_Random_matrix( A ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_copy ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_recovered ); FLA_Copy( A, A_copy ); // LQ test ( A = L Q^H ) FLA_LQ_UT( A, T ); // Create Q (identity), L (A_copy) FLA_Obj_create( datatype, m, n, 0, 0, &Q ); FLA_Set_to_identity( Q ); FLA_Obj_create( datatype, m, m, 0, 0, &D ); FLA_Obj_create( datatype, k, n, 0, 0, &Qk ); FLA_Set_to_identity( Qk ); FLA_Obj_create( datatype, k, k, 0, 0, &Dk ); FLA_Obj_create( datatype, m, m, 0, 0, &L ); // Q^H := I H_{0}^H ... H_{k-1}^H if ( use_form_q ) { FLA_LQ_UT_form_Q( A, T, Q ); } else { FLA_Apply_Q_UT_create_workspace_side( FLA_RIGHT, T, Q, &W ); FLA_Apply_Q_UT( FLA_RIGHT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE, A, T, W, Q ); FLA_Obj_free( &W ); } // D := Q^T Q FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, Q, Q, FLA_ZERO, D ); // Qk := I H0 ... Hk FLA_Part_1x2( T, &Tk, &W, k, FLA_LEFT ); FLA_Part_2x1( A, &Ak, &W, k, FLA_TOP ); if ( use_form_q ) { // Overwrite the result to test FLAME API FLA_Set( FLA_ZERO, Qk ); FLA_Copy( Ak, Qk ); FLA_LQ_UT_form_Q( Ak, Tk, Qk ); } else { FLA_Apply_Q_UT_create_workspace( Tk, Qk, &W ); FLA_Apply_Q_UT( FLA_LEFT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE, Ak, Tk, W, Qk ); FLA_Obj_free( &W ); } // Dk := Qk^T Qk FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, Qk, Qk, FLA_ZERO, Dk ); // L := A (Q^H)^H if ( use_form_q ) { // Note that the formed Q is actually Q^H; transb should be carefully assigned. FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_copy, Q, FLA_ZERO, L ); } else { FLA_Apply_Q_UT_create_workspace( T, L, &W ); FLA_Apply_Q_UT( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE, A, T, W, L ); FLA_Obj_free( &W ); } FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, L, Q, FLA_ZERO, A_recovered ); //.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例11: main//.........这里部分代码省略......... FLA_Obj_create( datatype, m, n, n, 1, &T ); FLA_Obj_create( datatype, nb_alg, n, n, 1, &TT ); FLA_Obj_create( datatype, 1, 1, 0, 0, &w ); FLA_Obj_create( datatype, m, 1, 1, 1, &W ); FLA_Obj_create( datatype, nb_alg, 1, 1, 1, &WW ); FLA_Obj_create( datatype, m, 1, 1, 1, &b ); FLA_Obj_create( datatype, m, 1, 1, 1, &b_ref );*//* FLA_Obj_create( datatype, m, n, n, 1, &A ); //FLA_Obj_create( datatype, 1, min_m_n, 0, 0, &t ); //FLA_Obj_create( datatype, m, n, n, 1, &T ); //FLA_Obj_create( datatype, nb_alg, n, n, 1, &TT ); //FLA_Obj_create( datatype, 1, 1, 0, 0, &w ); //FLA_Obj_create( datatype, m, 1, 1, 1, &W ); //FLA_Obj_create( datatype, nb_alg, 1, 1, 1, &WW ); //FLA_Obj_create( datatype, m, 1, 1, 1, &b ); //FLA_Obj_create( datatype, m, 1, 1, 1, &b_ref );*/ //FLA_Obj_create( datatype, m, n, 0, 0, &A ); FLA_Obj_create( datatype, m, n, n, 1, &A ); FLA_Obj_create( datatype, 1, min_m_n, 0, 0, &t ); FLA_Obj_create( datatype, m, n, 0, 0, &T ); //FLA_Obj_create( datatype, m, n, n, 1, &T ); FLA_Obj_create( datatype, nb_alg, n, 0, 0, &TT ); FLA_Obj_create( datatype, 1, 1, 0, 0, &w ); FLA_Obj_create( datatype, m, 1, 0, 0, &W ); FLA_Obj_create( datatype, nb_alg, 1, 0, 0, &WW ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_ref ); FLA_Random_matrix( A ); FLA_Random_matrix( b );/* time_LQ( 0, FLA_ALG_REFERENCE, n_repeats, m, n, nb_alg, A, A_ref, t, T, W, b, b_ref, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf %6.2le ]; /n", i, p, gflops, diff ); fflush( stdout );*/ for ( variant = 1; variant <= n_variants; variant++ ){ fprintf( stdout, "data_var%d( %d, 1:3 ) = [ %d ", variant, i, p ); fflush( stdout ); time_LQ( variant, FLA_ALG_UNBLOCKED, n_repeats, m, n, nb_alg, A, t, T, TT, w, W, WW, b, b_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_LQ( variant, FLA_ALG_UNB_OPT1, n_repeats, m, n, nb_alg, A, t, T, TT, w, W, WW, b, b_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_LQ( variant, FLA_ALG_BLOCKED, n_repeats, m, n, nb_alg, A, t, T, TT, w, W, WW, b, b_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );
开发者ID:anaptyxis,项目名称:libflame,代码行数:67,
示例12: mainint main(int argc, char *argv[]){ int m_input, m, p_first, p_last, p_inc, p, b_alg, variant, n_repeats, i, datatype, n_variants = 1; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double safemin; double dtime, gflops, diff; FLA_Obj A, l, Q, T, W; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c Enter blocking size:", '%' ); scanf( "%d", &b_alg ); fprintf( stdout, "%c %d/n", '%', b_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d/n", '%', m_input ); fprintf( stdout, "/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); }/*char ch = 's';safemin = dlamch_( &ch );printf( "safemin = %23.15e/n", safemin );ch = 'e';double eps = dlamch_( &ch );printf( "eps dla = %23.15e/n", eps );printf( "eps fla = %23.15e/n", FLA_EPSILON_D );*/ for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / f2c_abs(m_input); //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, m, 0, 0, &Q ); FLA_Obj_create( datatype, 32, m, 0, 0, &T ); FLA_Obj_create( datatype, 32, m, 0, 0, &W ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), m, 1, 0, 0, &l ); //FLA_Random_herm_matrix( FLA_LOWER_TRIANGULAR, A ); //FLA_Random_spd_matrix( FLA_LOWER_TRIANGULAR, A ); FLA_Random_matrix( A ); FLA_Obj_set_to_identity( Q ); FLA_QR_UT( A, T );//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,
示例13: mainint main(int argc, char *argv[]){ int datatype, m_input, m, p_first, p_last, p_inc, p, variant, n_repeats, i, j, nb_alg, nfc, nlc, n_variants = 1; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj C, C_ref, t; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d/n", '%', m_input ); fprintf( stdout, "%c enter nfc, nlc (number of columns, initial and trailing, not processed): ", '%' ); scanf( "%d %d", &nfc, &nlc ); fprintf( stdout, "%c %d %d/n", '%', nfc, nlc ); fprintf( stdout, "/nclear all;/n/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( variant = 0; variant < n_variants; variant++ ){ FLA_Obj_create( datatype, m, m, &C ); FLA_Obj_create( datatype, m, m, &C_ref ); FLA_Obj_create( datatype, m, 1, &t ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); fprintf( stdout, "data_hess( %d, 1:5 ) = [ %d ", i, p ); fflush( stdout ); time_Hess( variant, FLA_ALG_REFERENCE, n_repeats, m, nfc, nlc, C, C_ref, t, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Hess( variant, FLA_ALG_FRONT, n_repeats, m, nfc, nlc, C, C_ref, t, &dtime, &diff, &gflops );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例14: mainint main(int argc, char *argv[]){ int datatype, n_threads, m_input, m, n_input, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; dim_t n_panels, nb_flash, nb_alg; double dtime, gflops, diff; FLA_Obj A, ATW, R, RTW, b, x; FLA_Obj A_flat, b_flat, x_flat; FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c enter algorithmic blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u/n", '%', nb_alg ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &nb_flash ); fprintf( stdout, "%c %u/n", '%', nb_flash ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d %d", &m_input, &n_input ); fprintf( stdout, "%c %d %d/n", '%', m_input, n_input ); fprintf( stdout, "%c enter the number of QR subproblem panels: ", '%' ); scanf( "%u", &n_panels ); fprintf( stdout, "%c %u/n", '%', n_panels ); fprintf( stdout, "%c enter the number of SuperMatrix threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d/n", '%', n_threads ); //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; //FLASH_Queue_disable(); FLASH_Queue_set_num_threads( n_threads ); //FLASH_Queue_set_verbose_output( TRUE ); // FLA_Check_error_level_set( FLA_NO_ERROR_CHECKING ); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if ( m < 0 ) m = p * f2c_abs(m_input); if ( n < 0 ) n = p * f2c_abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ) { FLA_Obj_create( datatype, m, n, 0, 0, &A_flat ); FLA_Obj_create( datatype, n, 1, 0, 0, &x_flat ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_flat ); FLA_Random_matrix( A_flat ); FLA_Random_matrix( b_flat ); FLASH_CAQR_UT_inc_create_hier_matrices( n_panels, A_flat, 1, &nb_flash, nb_alg, &A, &ATW, &R, &RTW ); FLASH_Obj_create_hier_copy_of_flat( b_flat, 1, &nb_flash, &b ); FLASH_Obj_create_hier_copy_of_flat( x_flat, 1, &nb_flash, &x ); fprintf( stdout, "data_caqrutinc_%s( %d, 1:3 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_CAQR_UT_inc( param_combo, FLA_ALG_FRONT, n_repeats, m, n, n_panels, A, ATW, R, RTW, b, x, &dtime, &diff, &gflops );//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,
示例15: mainint main( int argc, char *argv[] ){ int i, j, size, n_threads, n_repeats, n_trials, nb_alg, increment, begin; FLA_Datatype datatype = FLA_DOUBLE; FLA_Obj A; double b_norm_value = 0.0, dtime, *dtimes, *flops, *T; char output_file_m[100]; FILE *fpp; fprintf( stdout, "%c Enter number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c Enter blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d/n", '%', nb_alg ); fprintf( stdout, "%c Enter problem size parameters: first, inc, num: ", '%' ); scanf( "%d%d%d", &begin, &increment, &n_trials ); fprintf( stdout, "%c %d %d %d/n", '%', begin, increment, n_trials ); fprintf( stdout, "%c Enter number of threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d/n/n", '%', n_threads ); sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE ); fpp = fopen( output_file_m, "a" ); fprintf( fpp, "%%/n" ); fprintf( fpp, "%% | Matrix Size | PLASMA |/n" ); fprintf( fpp, "%% | n x n | GFlops |/n" ); fprintf( fpp, "%% -----------------------------/n" ); FLA_Init(); PLASMA_Init( n_threads ); PLASMA_Disable( PLASMA_AUTOTUNING ); PLASMA_Set( PLASMA_TILE_SIZE, nb_alg ); PLASMA_Set( PLASMA_INNER_BLOCK_SIZE, nb_alg / 4 ); dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) ); flops = ( double * ) FLA_malloc( n_trials * sizeof( double ) ); fprintf( fpp, "%s = [/n", OUTPUT_FILE ); for ( i = 0; i < n_trials; i++ ) { size = begin + i * increment; FLA_Obj_create( datatype, size, size, 0, 0, &A ); for ( j = 0; j < n_repeats; j++ ) { FLA_Random_matrix( A ); PLASMA_Alloc_Workspace_dgeqrf( size, size, &T ); dtime = FLA_Clock(); PLASMA_dgeqrf( size, size, FLA_Obj_buffer_at_view( A ), size, T ); dtime = FLA_Clock() - dtime; dtimes[j] = dtime; free( T ); } dtime = dtimes[0]; for ( j = 1; j < n_repeats; j++ ) dtime = min( dtime, dtimes[j] ); flops[i] = 4.0 / 3.0 * size * size * size / dtime / 1e9; fprintf( fpp, " %d %6.3f/n", size, flops[i] ); printf( "Time: %e | GFlops: %6.3f/n", dtime, flops[i] ); printf( "Matrix size: %d x %d | nb_alg: %d/n", size, size, nb_alg );//.........这里部分代码省略.........
开发者ID:fmarrabal,项目名称:libflame,代码行数:101,
示例16: main//.........这里部分代码省略......... sprintf( k_dim_desc, "k = p" ); sprintf( k_dim_tag, "k%dp", 1 ); } //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; k = k_input; if( m < 0 ) m = p / f2c_abs(m_input); if( k < 0 ) k = p / f2c_abs(k_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // If transposing A, switch dimensions. if ( pc_str[param_combo][1] == 'n' ) { FLA_Obj_create( datatype, m, k, 0, 0, &A ); FLA_Obj_create( datatype, m, k, 0, 0, &B ); } else { FLA_Obj_create( datatype, k, m, 0, 0, &A ); FLA_Obj_create( datatype, k, m, 0, 0, &B ); } FLA_Obj_create( datatype, m, m, 0, 0, &C ); FLA_Obj_create( datatype, m, m, 0, 0, &C_ref ); FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); fprintf( stdout, "data_syr2k_%s( %d, 1:3 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Syr2k( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );/* time_Syr2k( param_combo, FLA_ALG_FRONT, n_repeats, m, k, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );*/ fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &B ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); } fprintf( stdout, "/n" ); }/* fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_syr2k_%s( :,1 ), data_syr2k_%s( :, 2 ), '%c:%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_syr2k_%s( :,1 ), data_syr2k_%s( :, 4 ), '%c-.%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref//_syr2k//_%s', 'fla//_syr2k//_%s', ... /n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME syr2k front-end performance (%s, %s)' );/n", m_dim_desc, k_dim_desc ); fprintf( stdout, "print -depsc syr2k_front_%s_%s.eps/n", m_dim_tag, k_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize( ); return 0;}
开发者ID:flame,项目名称:libflame,代码行数:101,
示例17: mainint main(int argc, char *argv[]){ int datatype, n_threads, m_input, m, n_input, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; dim_t b_flash, b_alg; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, TW, b, x; FLA_Obj A_flat, b_flat, x_flat; FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &b_flash ); fprintf( stdout, "%c %u/n", '%', b_flash ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d %d", &m_input, &n_input ); fprintf( stdout, "%c %d %d/n", '%', m_input, n_input ); fprintf( stdout, "%c enter the number of SuperMatrix threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d/n", '%', n_threads ); fprintf( stdout, "/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_set_num_threads( n_threads ); //FLASH_Queue_set_verbose_output( TRUE ); //FLA_Check_error_level_set( FLA_NO_ERROR_CHECKING ); //FLASH_Queue_disable(); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if ( m < 0 ) m = p * abs(m_input); if ( n < 0 ) n = p * abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ) { FLA_Obj_create( datatype, m, n, 0, 0, &A_flat ); FLA_Obj_create( datatype, n, 1, 0, 0, &x_flat ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_flat ); FLA_Random_matrix( A_flat );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例18: mainint main( int argc, char** argv ) { FLA_Datatype comptype = COMPTYPE; FLA_Datatype realtype = REALTYPE; dim_t m; FLA_Obj a, aT, aB, a0, a1, a2; FLA_Obj v, vT, vB, v0, v1, v2; FLA_Error init_result; int use_abs = 1; if ( argc == 3 ) { m = atoi(argv[1]); use_abs = atoi(argv[2]); } else { fprintf(stderr, " /n"); fprintf(stderr, "Usage: %s m use_abs/n", argv[0]); fprintf(stderr, " m : test vector length/n"); fprintf(stderr, " use_abs : 0 - norm (realtype), 1 - abs (complex type)/n"); fprintf(stderr, " /n"); return -1; } if ( m == 0 ) return 0; FLA_Init_safe( &init_result ); FLA_Obj_create( comptype, m, 1, 0, 0, &a ); FLA_Obj_create( use_abs ? comptype : realtype, m, 1, 0, 0, &v ); FLA_Random_matrix( a ); FLA_Set( FLA_ZERO, v ); FLA_Obj_fshow( stdout, "- a -", a, "% 6.4e", "--" ); // Normalize a vector FLA_Part_2x1( a, &aT, &aB, 0, FLA_TOP ); FLA_Part_2x1( v, &vT, &vB, 0, FLA_TOP ); while ( FLA_Obj_length( aB ) > 0 ) { FLA_Repart_2x1_to_3x1( aT, &a0, &a1, aB, &a2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( vT, &v0, &v1, vB, &v2, 1, FLA_BOTTOM ); // -------------------------------------------- if ( use_abs ) { // a and v are complex datatype FLA_Copy( a1, v1 ); FLA_Absolute_value( v1 ); } else { // v is real datatype FLA_Nrm2( a1, v1 ); } if ( FLA_Obj_equals( v1, FLA_ZERO ) ) printf( " ZERO DETECTED/n" ); else FLA_Inv_scal( v1, a1 ); // Normalize the scalar // -------------------------------------------- FLA_Cont_with_3x1_to_2x1( &aT, a0, a1, &aB, a2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &vT, v0, v1, &vB, v2, FLA_TOP ); } FLA_Obj_fshow( stdout, "- a -", a, "% 6.4e", "--" ); FLA_Obj_fshow( stdout, "- v -", v, "% 6.4e", "--" ); // Check whether it is normalized FLA_Part_2x1( a, &aT, &aB, 0, FLA_TOP ); FLA_Part_2x1( v, &vT, &vB, 0, FLA_TOP ); while ( FLA_Obj_length( aB ) > 0 ) { FLA_Repart_2x1_to_3x1( aT, &a0, &a1, aB, &a2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( vT, &v0, &v1, vB, &v2, 1, FLA_BOTTOM ); // -------------------------------------------- if ( use_abs ) { // a and v are same datatype FLA_Copy( a1, v1 ); FLA_Absolute_value( v1 ); } else { // v is realdatatype FLA_Nrm2( a1, v1 ); } // -------------------------------------------- FLA_Cont_with_3x1_to_2x1( &aT, a0, a1, &aB, a2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &vT, v0, v1, &vB, v2, FLA_TOP ); } FLA_Obj_fshow( stdout, " - all should be one - ", v, "% 6.4e", "--"); FLA_Obj_free( &a ); FLA_Obj_free( &v );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例19: libfla_test_qrut_experimentvoid libfla_test_qrut_experiment( test_params_t params, unsigned int var, char* sc_str, FLA_Datatype datatype, unsigned int p_cur, unsigned int pci, unsigned int n_repeats, signed int impl, double* perf, double* residual ){ dim_t b_flash = params.b_flash; dim_t b_alg_flat = params.b_alg_flat; double time_min = 1e9; double time; unsigned int i; unsigned int m, n; unsigned int min_m_n; signed int m_input = -2; signed int n_input = -1; FLA_Obj A, T, x, b, y, norm; FLA_Obj A_save; FLA_Obj A_test, T_test, x_test, b_test; // Determine the dimensions. if ( m_input < 0 ) m = p_cur * abs(m_input); else m = p_cur; if ( n_input < 0 ) n = p_cur * abs(n_input); else n = p_cur; // Compute the minimum dimension. min_m_n = min( m, n ); // Create the matrices for the current operation. libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, n, &A ); if ( impl == FLA_TEST_FLAT_FRONT_END || ( impl == FLA_TEST_FLAT_BLK_VAR && var == 1 ) ) libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], b_alg_flat, min_m_n, &T ); else if ( var == 2 ) libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], min_m_n, min_m_n, &T ); else libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], 1, min_m_n, &T ); // Initialize the test matrices. FLA_Random_matrix( A ); // Save the original object contents in a temporary object. FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save ); // Create vectors to form a linear system. FLA_Obj_create( datatype, n, 1, 0, 0, &x ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, n, 1, 0, 0, &y ); // Create a real scalar object to hold the norm of A. FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); // Create a random right-hand side vector. FLA_Random_matrix( b ); // Use hierarchical matrices if we're testing the FLASH front-end. if ( impl == FLA_TEST_HIER_FRONT_END ) { FLASH_QR_UT_create_hier_matrices( A, 1, &b_flash, &A_test, &T_test ); FLASH_Obj_create_hier_copy_of_flat( b, 1, &b_flash, &b_test ); FLASH_Obj_create_hier_copy_of_flat( x, 1, &b_flash, &x_test ); } else { A_test = A; T_test = T; } // Create a control tree for the individual variants. if ( impl == FLA_TEST_FLAT_UNB_VAR || impl == FLA_TEST_FLAT_OPT_VAR || impl == FLA_TEST_FLAT_BLK_VAR ) libfla_test_qrut_cntl_create( var, b_alg_flat ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { if ( impl == FLA_TEST_HIER_FRONT_END ) FLASH_Obj_hierarchify( A_save, A_test ); else FLA_Copy_external( A_save, A_test ); time = FLA_Clock(); libfla_test_qrut_impl( impl, A_test, T_test ); time = FLA_Clock() - time; time_min = min( time_min, time ); } // Perform a linear solve with the result. if ( impl == FLA_TEST_HIER_FRONT_END ) { FLASH_QR_UT_solve( A_test, T_test, b_test, x_test );//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,
示例20: main//.........这里部分代码省略......... sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; k = k_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( k < 0 ) k = p / abs(k_input); if( n < 0 ) n = p / abs(n_input); /* Allocate space for the matrices */ FLA_Obj_create( FLA_DOUBLE, m, k, &A ); FLA_Obj_create( FLA_DOUBLE, k, n, &B ); FLA_Obj_create( FLA_DOUBLE, m, n, &C ); FLA_Obj_create( FLA_DOUBLE, m, n, &Cref ); /* Generate random matrices A, C */ FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, Cref ); /* Time the reference implementation */ time_Gemm_nn( 0, FLA_ALG_REFERENCE, nrepeats, n, nb_alg, A, B, C, Cref, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; /n", i, p, gflops ); fflush( stdout ); for ( j = 0; j < n_thread_experiments; j++ ){ n_threads = n_threads_exp[j]; FLA_Task_partitioning_set( n_threads_exp[j], n_threads_exp_m[j], n_threads_exp_k[j], n_threads_exp_n[j] ); FLA_omp_set_num_threads( n_threads_exp[j] ); FLA_omp_set_num_stages( n_threads_exp_k[j] ); fprintf( stdout, "data_nth%d_%dx%dx%d( %d, 1:3 ) = [ %d ", n_threads, n_threads_exp_m[j], n_threads_exp_k[j], n_threads_exp_n[j], i, p ); fflush( stdout ); //time_Gemm_nn( variant, FLA_ALG_OPENMP_BVAR, nrepeats, n, nb_alg, time_Gemm_nn( variant, FLA_ALG_OPENMP_CVAR, nrepeats, p, nb_alg, A, B, C, Cref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );
开发者ID:anaptyxis,项目名称:libflame,代码行数:65,
示例21: mainint main(int argc, char *argv[]){ int n, nfirst, nlast, ninc, nlast_unb, i, irep, nrepeats, nb_alg; double dtime, dtime_best, gflops, max_gflops, diff, d_n; FLA_Obj A, Aref, Aold, delta; /* Initialize FLAME */ FLA_Init( ); /* Every time trial is repeated "repeat" times and the fastest run in recorded */ printf( "%% number of repeats:" ); scanf( "%d", &nrepeats ); printf( "%% %d/n", nrepeats ); /* Enter the max GFLOPS attainable This is used to set the y-axis range for the graphs. Here is how you figure out what to enter (on Linux machines): 1) more /proc/cpuinfo (this lists the contents of this file). 2) read through this and figure out the clock rate of the machine (in GHz). 3) Find out (from an expert of from the web) the number of floating point instructions that can be performed per core per clock cycle. 4) Figure out if you are using "multithreaded BLAS" which automatically parallelize calls to the Basic Linear Algebra Subprograms. If so, check how many cores are available. 5) Multiply 2) x 3) x 4) and enter this in response to the below. If you enter a value for max GFLOPS that is lower that the maximum that is observed in the experiments, then the top of the graph is set to the observed maximum. Thus, one possibility is to simply set this to 0.0. */ printf( "%% enter max GFLOPS:" ); scanf( "%lf", &max_gflops ); printf( "%% %lf/n", max_gflops ); /* Enter the algorithmic block size */ printf( "%% enter nb_alg:" ); scanf( "%d", &nb_alg ); printf( "%% %d/n", nb_alg ); /* Timing trials for matrix sizes n=nfirst to nlast in increments of ninc will be performed. Unblocked versions are only tested to nlast_unb */ printf( "%% enter nfirst, nlast, ninc, nlast_unb:" ); scanf( "%d%d%d%d", &nfirst, &nlast, &ninc, &nlast_unb ); printf( "%% %d %d %d %d/n", nfirst, nlast, ninc, nlast_unb ); i = 1; for ( n=nfirst; n<= nlast; n+=ninc ){ /* Allocate space for the matrices */ FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &A ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Aref ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Aold ); FLA_Obj_create( FLA_DOUBLE, 1, 1, 1, 1, &delta ); /* Generate random matrix A and save in Aold */ FLA_Random_matrix( Aold ); /* Add something large to the diagonal to make sure it isn't ill-conditionsed */ d_n = ( double ) n; *( ( double * ) FLA_Obj_buffer_at_view( delta ) ) = d_n; FLA_Shift_diag( FLA_NO_CONJUGATE, delta, Aold ); /* Set gflops = billions of floating point operations that will be performed */ gflops = 1.0/3.0 * n * n * n * 1.0e-09; /* Time the reference implementation */#if TIME_LAPACK == TRUE#else // if ( n <= nlast_unb )#endif { for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, Aref ); dtime = FLA_Clock(); REF_Chol( TIME_LAPACK, Aref, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } printf( "data_REF( %d, 1:2 ) = [ %d %le ];/n", i, n, gflops / dtime_best ); fflush( stdout ); } //.........这里部分代码省略.........
开发者ID:ztschir,项目名称:High-Performance,代码行数:101,
示例22: mainint main( int argc, char** argv ) { FLA_Datatype datatype = TESTTYPE; FLA_Obj A, A_flame, A_lapack, C; int m; FLA_Error init_result; FLA_Obj TU, TV, U_flame, V_flame, d_flame, e_flame, B_flame; FLA_Obj tauq, taup, d_lapack, e_lapack, U_lapack, V_lapack, W, B_lapack; testtype *buff_tauq, *buff_taup, *buff_d_lapack, *buff_e_lapack, *buff_W, *buff_A_lapack, *buff_U_lapack, *buff_V_lapack; int lwork, info, is_flame; if ( argc == 3 ) { m = atoi(argv[1]); is_flame = atoi(argv[2]); } else { fprintf(stderr, " /n"); fprintf(stderr, "Usage: %s m is_flame/n", argv[0]); fprintf(stderr, " m : matrix length/n"); fprintf(stderr, " is_flame : 1 yes, 0 no/n"); fprintf(stderr, " /n"); return -1; } if ( m == 0 ) return 0; FLA_Init_safe( &init_result ); fprintf( stdout, "lapack2flame: %d x %d: /n", m, m); FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Random_matrix( A ); FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_flame ); FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_lapack ); FLA_Obj_create( datatype, m, m, 0, 0, &C ); FLA_Random_matrix( C ); if ( is_flame ) { fprintf( stdout, " flame executed/n"); FLA_Bidiag_UT_create_T( A_flame, &TU, &TV ); FLA_Bidiag_UT( A_flame, TU, TV ); FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A_flame, &U_flame ); FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A_flame, &V_flame ); FLA_Bidiag_UT_form_U( U_flame, TU, U_flame ); FLA_Bidiag_UT_form_V( V_flame, TV, V_flame ); FLA_Obj_create( datatype, m, 1, 0, 0, &d_flame ); FLA_Obj_create( datatype, m - 1, 1, 0, 0, &e_flame ); FLA_Bidiag_UT_extract_diagonals( A_flame, d_flame, e_flame ); FLA_Obj_create( datatype, m, m, 0, 0, &B_flame ); FLA_Set( FLA_ZERO, B_flame ); { FLA_Obj BTL, BTR, BBL, BBR; FLA_Part_2x2( B_flame, &BTL, &BTR, &BBL, &BBR, 1,1, FLA_BL ); FLA_Set_diagonal_matrix( d_flame, B_flame ); FLA_Set_diagonal_matrix( e_flame, BTR ); } if (1) { fprintf( stdout, " - FLAME ----------/n"); FLA_Obj_fshow( stdout, " - Given A - ", A, "% 6.4e", "------"); FLA_Obj_fshow( stdout, " - A - ", A_flame, "% 6.4e", "------"); FLA_Obj_fshow( stdout, " - U - ", U_flame, "% 6.4e", "------"); FLA_Obj_fshow( stdout, " - V - ", V_flame, "% 6.4e", "------"); FLA_Obj_fshow( stdout, " - d - ", d_flame, "% 6.4e", "------"); FLA_Obj_fshow( stdout, " - e - ", e_flame, "% 6.4e", "------"); FLA_Obj_fshow( stdout, " - B - ", B_flame, "% 6.4e", "------"); } } else { fprintf( stdout, " lapack executed/n"); FLA_Obj_create( datatype, m, 1, 0, 0, &tauq ); FLA_Obj_create( datatype, m, 1, 0, 0, &taup ); FLA_Obj_create( datatype, m, 1, 0, 0, &d_lapack ); FLA_Obj_create( datatype, m - 1, 1, 0, 0, &e_lapack ); buff_A_lapack = (testtype*)FLA_Obj_buffer_at_view( A_lapack ); buff_tauq = (testtype*)FLA_Obj_buffer_at_view( tauq ); buff_taup = (testtype*)FLA_Obj_buffer_at_view( taup ); buff_d_lapack = (testtype*)FLA_Obj_buffer_at_view( d_lapack ); buff_e_lapack = (testtype*)FLA_Obj_buffer_at_view( e_lapack ); lwork = 32*m; FLA_Obj_create( datatype, lwork, 1, 0, 0, &W ); buff_W = (testtype*)FLA_Obj_buffer_at_view( W ); sgebrd_( &m, &m, buff_A_lapack, &m, buff_d_lapack, buff_e_lapack, buff_tauq, buff_taup, buff_W, &lwork, &info );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例23: main//.........这里部分代码省略......... sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE ); fpp = fopen( output_file_m, "a" ); fprintf( fpp, "%%/n" ); fprintf( fpp, "%% | Matrix Size | FLASH |/n" ); fprintf( fpp, "%% | n x n | GFlops |/n" ); fprintf( fpp, "%% -----------------------------/n" ); fprintf( fpp, "%s_%u = [/n", OUTPUT_FILE, nb_alg );#endif FLA_Init(); dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) ); flops = ( double * ) FLA_malloc( n_trials * sizeof( double ) ); FLASH_Queue_set_num_threads( n_threads ); FLASH_Queue_set_sorting( sorting ); FLASH_Queue_set_caching( caching ); FLASH_Queue_set_work_stealing( work_stealing ); FLASH_Queue_set_data_affinity( data_affinity ); for ( i = 0; i < n_trials; i++ ) { size = begin + i * increment; FLA_Obj_create( datatype, size, size, 0, 0, &A ); FLA_Obj_create( datatype, size, size, 0, 0, &B ); FLA_Obj_create( datatype, size, 1, 0, 0, &x ); FLA_Obj_create( datatype, size, 1, 0, 0, &b ); FLA_Obj_create( datatype, 1, 1, 0, 0, &b_norm ); for ( j = 0; j < n_repeats; j++ ) { FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( x ); FLA_Random_matrix( b ); FLA_Symmetrize( uplo, A ); FLA_Symmetrize( uplo, B ); length = ( double ) FLA_Obj_length( B ); FLA_Add_to_diag( &length, B ); FLA_Symv_external( uplo, FLA_ONE, B, x, FLA_ZERO, b ); FLASH_Obj_create_hier_copy_of_flat( A, 1, &nb_alg, &AH ); FLASH_Obj_create_hier_copy_of_flat( B, 1, &nb_alg, &BH ); FLASH_Chol( uplo, BH ); dtime = FLA_Clock(); FLASH_Eig_gest( inv, uplo, AH, BH ); dtime = FLA_Clock() - dtime; dtimes[j] = dtime; FLASH_Obj_free( &AH ); FLASH_Obj_free( &BH ); } dtime = dtimes[0]; for ( j = 1; j < n_repeats; j++ ) dtime = min( dtime, dtimes[j] ); flops[i] = 1.0 * size * size * size / dtime / 1e9;
开发者ID:anaptyxis,项目名称:libflame,代码行数:66,
示例24: mainint main(int argc, char *argv[]){ int datatype, m_input, m, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; FLA_Uplo uplo; FLA_Diag diag; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, b, b_orig, norm; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d/n", '%', m_input ); fprintf( stdout, "/nclear all;/n/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ //FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, m, m, 1, &A ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig ); if ( FLA_Obj_is_single_precision( A ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); FLA_Param_map_netlib_to_flame_uplo( &pc_str[param_combo][0], &uplo ); FLA_Param_map_netlib_to_flame_diag( &pc_str[param_combo][1], &diag ); FLA_Random_tri_matrix( uplo, diag, A ); FLA_Random_matrix( b ); FLA_Copy_external( b, b_orig ); fprintf( stdout, "data_trinv_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout );/* time_Trinv( param_combo, FLA_ALG_REFERENCE, n_repeats, m, uplo, diag, A, b, b_orig, norm, &dtime, &diff, &gflops );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例25: libfla_test_symm_experimentvoid libfla_test_symm_experiment( test_params_t params, unsigned int var, char* sc_str, FLA_Datatype datatype, unsigned int p_cur, unsigned int pci, unsigned int n_repeats, signed int impl, double* perf, double* residual ){ dim_t b_flash = params.b_flash; dim_t b_alg_flat = params.b_alg_flat; double time_min = 1e9; double time; unsigned int i; unsigned int m; signed int m_input = -1; unsigned int n; signed int n_input = -1; FLA_Side side; FLA_Uplo uplo; FLA_Obj A, B, C, x, y, z, w, norm; FLA_Obj alpha, beta; FLA_Obj C_save; FLA_Obj A_test, B_test, C_test; // Determine the dimensions. if ( m_input < 0 ) m = p_cur / abs(m_input); else m = p_cur; if ( n_input < 0 ) n = p_cur / abs(n_input); else n = p_cur; // Translate parameter characters to libflame constants. FLA_Param_map_char_to_flame_side( &pc_str[pci][0], &side ); FLA_Param_map_char_to_flame_uplo( &pc_str[pci][1], &uplo ); // Create the matrices for the current operation. if ( side == FLA_LEFT ) { libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, m, &A ); // Create vectors for use in test. FLA_Obj_create( datatype, n, 1, 0, 0, &x ); FLA_Obj_create( datatype, m, 1, 0, 0, &y ); FLA_Obj_create( datatype, m, 1, 0, 0, &z ); FLA_Obj_create( datatype, m, 1, 0, 0, &w ); } else { libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], n, n, &A ); // Create vectors for use in test. FLA_Obj_create( datatype, n, 1, 0, 0, &x ); FLA_Obj_create( datatype, m, 1, 0, 0, &y ); FLA_Obj_create( datatype, m, 1, 0, 0, &z ); FLA_Obj_create( datatype, n, 1, 0, 0, &w ); } libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, n, &B ); libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], m, n, &C ); // Create a norm scalar. FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); // Initialize the test matrices. FLA_Random_symm_matrix( uplo, A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); // Initialize the test vectors. FLA_Random_matrix( x ); FLA_Set( FLA_ZERO, y ); FLA_Set( FLA_ZERO, z ); FLA_Set( FLA_ZERO, w ); // Set constants. alpha = FLA_TWO; beta = FLA_MINUS_ONE; // Save the original object contents in a temporary object. FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, C, &C_save ); // Use hierarchical matrices if we're testing the FLASH front-end. if ( impl == FLA_TEST_HIER_FRONT_END ) { FLASH_Obj_create_hier_copy_of_flat( A, 1, &b_flash, &A_test ); FLASH_Obj_create_hier_copy_of_flat( B, 1, &b_flash, &B_test ); FLASH_Obj_create_hier_copy_of_flat( C, 1, &b_flash, &C_test ); } else { A_test = A; B_test = B; C_test = C; } // Create a control tree for the individual variants. if ( impl == FLA_TEST_FLAT_UNB_VAR || impl == FLA_TEST_FLAT_OPT_VAR || impl == FLA_TEST_FLAT_BLK_VAR ||//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,
示例26: main//.........这里部分代码省略......... for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, n, n, 0, 0, &B ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref ); if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) { FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &scale ); FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); } else if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) { FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &scale ); FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); } FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, B ); FLA_Random_matrix( C ); FLA_Norm1( A, norm ); FLA_Shift_diag( FLA_NO_CONJUGATE, norm, A ); FLA_Norm1( B, norm ); if ( FLA_Obj_is( isgn, FLA_MINUS_ONE ) ) FLA_Negate( norm ); FLA_Shift_diag( FLA_NO_CONJUGATE, norm, B ); time_Sylv_nn( 0, FLA_ALG_REFERENCE, n_repeats, m, n, nb_alg, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; /n", i, p, gflops ); fflush( stdout ); for ( variant = 1; variant <= n_variants; variant++ ){ fprintf( stdout, "data_var%d( %d, 1:3 ) = [ %d ", variant, i, p ); fflush( stdout ); time_Sylv_nn( variant, FLA_ALG_UNB_OPT, n_repeats, m, n, nb_alg, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Sylv_nn( variant, FLA_ALG_BLOCKED, n_repeats, m, n, nb_alg, isgn, A, B, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );
开发者ID:anaptyxis,项目名称:libflame,代码行数:67,
示例27: mainint main( int argc, char *argv[] ) { int i, j, n_threads, n_repeats, n_trials, increment, begin, sorting, caching, work_stealing, data_affinity; dim_t size, nb_alg; FLA_Datatype datatype = FLA_DOUBLE; FLA_Obj A, x, b, b_norm, AH, pH, bH; double b_norm_value, dtime, *dtimes, *flops;#ifndef FLA_ENABLE_WINDOWS_BUILD char output_file_m[100]; FILE *fpp;#endif fprintf( stdout, "%c Enter number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c Enter blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u/n", '%', nb_alg ); fprintf( stdout, "%c Enter problem size parameters: first, inc, num: ", '%' ); scanf( "%d%d%d", &begin, &increment, &n_trials ); fprintf( stdout, "%c %d %d %d/n", '%', begin, increment, n_trials ); fprintf( stdout, "%c Enter number of threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d/n", '%', n_threads ); fprintf( stdout, "%c Enter SuperMatrix parameters: sorting, caching, work stealing, data affinity: ", '%' ); scanf( "%d%d%d%d", &sorting, &caching, &work_stealing, &data_affinity ); fprintf( stdout, "%c %s %s %s %s/n/n", '%', ( sorting ? "TRUE" : "FALSE" ), ( caching ? "TRUE" : "FALSE" ), ( work_stealing ? "TRUE" : "FALSE" ), ( data_affinity ? ( data_affinity == 1 ? "FLASH_QUEUE_AFFINITY_2D_BLOCK_CYCLIC" : "FLASH_QUEUE_AFFINITY_OTHER" ) : "FLASH_QUEUE_AFFINITY_NONE" ) );#ifdef FLA_ENABLE_WINDOWS_BUILD fprintf( stdout, "%s_%u = [/n", OUTPUT_FILE, nb_alg );#else sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE ); fpp = fopen( output_file_m, "a" ); fprintf( fpp, "%%/n" ); fprintf( fpp, "%% | Matrix Size | FLASH |/n" ); fprintf( fpp, "%% | n x n | GFlops |/n" ); fprintf( fpp, "%% -----------------------------/n" ); fprintf( fpp, "%s_%u = [/n", OUTPUT_FILE, nb_alg );#endif FLA_Init(); dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) ); flops = ( double * ) FLA_malloc( n_trials * sizeof( double ) ); FLASH_Queue_set_num_threads( n_threads ); FLASH_Queue_set_sorting( sorting ); FLASH_Queue_set_caching( caching ); FLASH_Queue_set_work_stealing( work_stealing ); FLASH_Queue_set_data_affinity( data_affinity ); for ( i = 0; i < n_trials; i++ ) { size = begin + i * increment; FLA_Obj_create( datatype, size, size, 0, 0, &A ); FLA_Obj_create( datatype, size, 1, 0, 0, &x ); FLA_Obj_create( datatype, size, 1, 0, 0, &b ); FLA_Obj_create( datatype, 1, 1, 0, 0, &b_norm ); for ( j = 0; j < n_repeats; j++ ) { FLA_Random_matrix( A ); FLA_Random_matrix( b ); FLASH_Obj_create_hier_copy_of_flat( A, 1, &nb_alg, &AH ); FLASH_Obj_create( FLA_INT, size, 1, 1, &nb_alg, &pH ); FLASH_Obj_create_hier_copy_of_flat( b, 1, &nb_alg, &bH );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例28: main//.........这里部分代码省略......... sprintf( m_dim_tag, "m%dp", 1 ); } if ( 0 < sign ) isgn = FLA_ONE; else isgn = FLA_MINUS_ONE; //datatype = FLA_FLOAT; datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; //datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, m, 0, 0, &C ); FLA_Obj_create( datatype, m, m, 0, 0, &C_ref ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &scale ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Norm1( A, norm ); FLA_Shift_diag( FLA_NO_CONJUGATE, norm, A ); FLA_Random_matrix( C ); FLA_Hermitianize( FLA_UPPER_TRIANGULAR, C ); fprintf( stdout, "data_lyap_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); time_Lyap( param_combo, FLA_ALG_REFERENCE, n_repeats, m, isgn, A, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Lyap( param_combo, FLA_ALG_FRONT, n_repeats, m, isgn, A, C, C_ref, scale, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); fprintf( stdout, " ]; /n" ); fflush( stdout ); FLA_Obj_free( &A ); FLA_Obj_free( &C ); FLA_Obj_free( &C_ref ); FLA_Obj_free( &scale ); FLA_Obj_free( &norm ); } fprintf( stdout, "/n" ); }/* fprintf( stdout, "figure;/n" ); fprintf( stdout, "hold on;/n" ); for ( i = 0; i < n_param_combos; i++ ) { fprintf( stdout, "plot( data_lyap_%s( :,1 ), data_lyap_%s( :, 2 ), '%c:%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); fprintf( stdout, "plot( data_lyap_%s( :,1 ), data_lyap_%s( :, 4 ), '%c-.%c' ); /n", pc_str[i], pc_str[i], colors[ i ], ticks[ i ] ); } fprintf( stdout, "legend( ... /n" ); for ( i = 0; i < n_param_combos; i++ ) fprintf( stdout, "'ref//_lyap//_%s', 'fla//_lyap//_%s', ... /n", pc_str[i], pc_str[i] ); fprintf( stdout, "'Location', 'SouthEast' ); /n" ); fprintf( stdout, "xlabel( 'problem size p' );/n" ); fprintf( stdout, "ylabel( 'GFLOPS/sec.' );/n" ); fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); /n", p_last, max_gflops ); fprintf( stdout, "title( 'FLAME lyap front-end performance (%s)' );/n", m_dim_desc ); fprintf( stdout, "print -depsc lyap_front_%s.eps/n", m_dim_tag ); fprintf( stdout, "hold off;/n"); fflush( stdout );*/ FLA_Finalize( ); return 0;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例29: main//.........这里部分代码省略......... sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; /* Allocate space for the matrices */ FLA_Obj_create( datatype, m, m, &A ); FLA_Obj_create( datatype, m, n, &C ); FLA_Obj_create( datatype, m, n, &C_ref ); /* Generate random matrices A, C */ FLA_Random_tri_matrix( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, A ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); /* Time the reference implementation */ time_Trmm_luh( 0, FLA_ALG_REFERENCE, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "data_REF( %d, 1:2 ) = [ %d %6.3lf ]; /n", i, p, gflops ); fflush( stdout ); for ( variant = 1; variant <= n_variants; variant++ ){ //fprintf( stdout, "data_var%d( %d, 1:7 ) = [ %d ", variant, i, p ); fprintf( stdout, "data_var%d( %d, 1:5 ) = [ %d ", variant, i, p ); fflush( stdout ); time_Trmm_luh( variant, FLA_ALG_UNBLOCKED, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); time_Trmm_luh( variant, FLA_ALG_BLOCKED, n_repeats, p, nb_alg, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout ); //time_Trmm_luh( variant, FLA_ALG_OPTIMIZED, n_repeats, p, nb_alg, // A, B, C, C_ref, &dtime, &diff, &gflops );
开发者ID:pgawron,项目名称:tlash,代码行数:67,
示例30: mainint main(int argc, char *argv[]){ int m, n, k, nfirst, nlast, ninc, i, irep, nrepeats, nb_alg, check;; double dtime, dtime_best, gflops, max_gflops, diff, d_n; FLA_Obj A, B, C, Cref, Cold; /* Initialize FLAME */ FLA_Init( ); /* Every time trial is repeated "repeat" times */ printf( "%% number of repeats:" ); scanf( "%d", &nrepeats ); printf( "%% %d/n", nrepeats ); /* Enter the max GFLOPS attainable */ printf( "%% enter max GFLOPS:" ); scanf( "%lf", &max_gflops ); printf( "%% %lf/n", max_gflops ); /* Enter the algorithmic block size */ printf( "%% enter nb_alg:" ); scanf( "%d", &nb_alg ); printf( "%% %d/n", nb_alg ); /* Timing trials for matrix sizes n=nfirst to nlast in increments of ninc will be performed */ printf( "%% enter nfirst, nlast, ninc:" ); scanf( "%d%d%d", &nfirst, &nlast, &ninc ); printf( "%% %d %d %d/n", nfirst, nlast, ninc ); i = 1; for ( n=nfirst; n<= nlast; n+=ninc ){ /* Allocate space for the matrices */ FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &A ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &B ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &C ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Cref ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Cold ); /* Generate random matrices L and B */ FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( Cold ); gflops = 2.0 * n * n * n * 1.0e-09; /* Time FLA_Symm */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, Cref ); dtime = FLA_Clock(); FLA_Symm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_ONE, A, B, FLA_ONE, Cref ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } printf( "data_FLAME( %d, 1:2 ) = [ %d %le ];/n", i, n, gflops / dtime_best ); fflush( stdout ); /* Time the your implementations */#if TEST_UNB_VAR1==TRUE /* Variant 1 unblocked */ for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Cold, C ); dtime = FLA_Clock(); Symm_unb_var1( A, B, C ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else//.........这里部分代码省略.........
开发者ID:ebeweber,项目名称:CS378-Invariant-Project,代码行数:101,
注:本文中的FLA_Random_matrix函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ FLA_Repart_1x2_to_1x3函数代码示例 C++ FLA_Part_2x1函数代码示例 |