这篇教程C++ FLA_Obj_create函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中FLA_Obj_create函数的典型用法代码示例。如果您正苦于以下问题:C++ FLA_Obj_create函数的具体用法?C++ FLA_Obj_create怎么用?C++ FLA_Obj_create使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了FLA_Obj_create函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: FLA_Obj_create_complex_constantFLA_Error FLA_Obj_create_complex_constant( double const_real, double const_imag, FLA_Obj *obj ){ int* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_create_complex_constant_check( const_real, const_imag, obj ); FLA_Obj_create( FLA_CONSTANT, 1, 1, 0, 0, obj );#ifdef FLA_ENABLE_SCC if ( !FLA_is_owner() ) return FLA_SUCCESS;#endif temp_i = FLA_INT_PTR( *obj ); temp_s = FLA_FLOAT_PTR( *obj ); temp_d = FLA_DOUBLE_PTR( *obj ); temp_c = FLA_COMPLEX_PTR( *obj ); temp_z = FLA_DOUBLE_COMPLEX_PTR( *obj ); *temp_i = ( int ) const_real; *temp_s = ( float ) const_real; *temp_d = const_real; temp_c->real = ( float ) const_real; temp_c->imag = ( float ) const_imag; temp_z->real = const_real; temp_z->imag = const_imag; return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:34,
示例2: FLA_Norm_infFLA_Error FLA_Norm_inf( FLA_Obj A, FLA_Obj norm ){ FLA_Obj AT, A0, AB, a1t, A2; FLA_Obj bT, b0, bB, beta1, b2; FLA_Obj b; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Norm_inf_check( A, norm ); FLA_Obj_create( FLA_Obj_datatype( A ), FLA_Obj_length( A ), 1, 0, 0, &b ); FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( b, &bT, &bB, 0, FLA_TOP ); while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* *** */ &a1t, AB, &A2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( bT, &b0, /* ** */ /* ***** */ &beta1, bB, &b2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ FLA_Asum( a1t, beta1 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, a1t, /* ** */ /* *** */ &AB, A2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &bT, b0, beta1, /* ** */ /* ***** */ &bB, b2, FLA_TOP ); } FLA_Max_abs_value( b, norm ); FLA_Obj_free( &b ); return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:58,
示例3: FLA_Bidiag_UT_create_TFLA_Error FLA_Bidiag_UT_create_T( FLA_Obj A, FLA_Obj* TU, FLA_Obj* TV ){ FLA_Datatype datatype; dim_t b_alg, k; dim_t rs_T, cs_T; // Query the datatype of A. datatype = FLA_Obj_datatype( A ); // Query the blocksize from the library. b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); // Scale the blocksize by a pre-set global constant. b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_BIDIAG_INNER_TO_OUTER_B_RATIO ); // Query the minimum dimension of A. k = FLA_Obj_min_dim( A ); b_alg = 5; // Adjust the blocksize with respect to the min-dim of A. b_alg = min( b_alg, k ); // Figure out whether TU and TV should be row-major or column-major. if ( FLA_Obj_row_stride( A ) == 1 ) { rs_T = 1; cs_T = b_alg; } else // if ( FLA_Obj_col_stride( A ) == 1 ) { rs_T = k; cs_T = 1; } // Create two b_alg x k matrices to hold the block Householder transforms // that will be accumulated within the bidiagonal reduction algorithm. // If the matrix dimension has a zero dimension, apply_q complains it. if ( TU != NULL ) FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, TU ); if ( TV != NULL ) FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, TV ); return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:43,
示例4: mainint main( int argc, char** argv ) { FLA_Datatype testtype = TESTTYPE; FLA_Datatype realtype = REALTYPE; dim_t m; FLA_Obj a, b; FLA_Error init_result; if ( argc == 2 ) { m = atoi(argv[1]); } else { fprintf(stderr, " /n"); fprintf(stderr, "Usage: %s m/n", argv[0]); fprintf(stderr, " m : test vector length/n"); fprintf(stderr, " /n"); return -1; } if ( m == 0 ) return 0; FLA_Init_safe( &init_result ); FLA_Obj_create( testtype, m, 1, 0, 0, &a ); FLA_Random_matrix( a ); FLA_Obj_fshow( stdout, "- a -", a, "% 6.4e", "--" ); FLA_Obj_create( realtype, 1, m, 0, 0, &b ); FLA_Obj_extract_real_part( a, b ); FLA_Obj_fshow( stdout, "- a real -", b, "% 6.4e", "--" ); FLA_Obj_extract_imag_part( a, b ); FLA_Obj_fshow( stdout, "- a imag -", b, "% 6.4e", "--" ); FLA_Obj_free( &b ); FLA_Obj_free( &a ); FLA_Finalize_safe( init_result ); }
开发者ID:anaptyxis,项目名称:libflame,代码行数:38,
示例5: FLA_LQ_UT_create_TFLA_Error FLA_LQ_UT_create_T( FLA_Obj A, FLA_Obj* T ){ FLA_Datatype datatype; dim_t b_alg, k; dim_t rs_T, cs_T; // Query the datatype of A. datatype = FLA_Obj_datatype( A ); // Query the blocksize from the library. b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); // Scale the blocksize by a pre-set global constant. b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_LQ_INNER_TO_OUTER_B_RATIO ); // Adjust the blocksize with respect to the min-dim of A. b_alg = min(b_alg, FLA_Obj_min_dim( A )); // Query the length of A. k = FLA_Obj_length( A ); // Figure out whether T should be row-major or column-major. if ( FLA_Obj_row_stride( A ) == 1 ) { rs_T = 1; cs_T = b_alg; } else // if ( FLA_Obj_col_stride( A ) == 1 ) { rs_T = k; cs_T = 1; } // Create a b_alg x k matrix to hold the block Householder transforms that // will be accumulated within the LQ factorization algorithm. FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, T ); return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:39,
示例6: fill_eigenvaluesvoid fill_eigenvalues( FLA_Obj l ){ FLA_Obj lT, l0, lB, lambda1, l2; FLA_Obj alpha; FLA_Obj_create( FLA_Obj_datatype( l ), 1, 1, 0, 0, &alpha ); FLA_Copy( FLA_ONE, alpha ); FLA_Part_2x1( l, &lT, &lB, 0, FLA_TOP ); while ( FLA_Obj_length( lT ) < FLA_Obj_length( l ) ){ FLA_Repart_2x1_to_3x1( lT, &l0, /* ** */ /* ******* */ &lambda1, lB, &l2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ FLA_Copy( alpha, lambda1 ); FLA_Mult_add( FLA_ONE, FLA_ONE, alpha ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &lT, l0, lambda1, /* ** */ /* ******* */ &lB, l2, FLA_TOP ); } FLA_Obj_free( &alpha );}
开发者ID:flame,项目名称:libflame,代码行数:36,
示例7: mainint main(int argc, char *argv[]){ int datatype, m_input, n_input, m, n, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; char *colors = "brkgmcbrkgmcbrkgmc"; char *ticks = "o+*xso+*xso+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, C, C_ref; FLA_Init( ); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d/n", '%', m_input, n_input ); fprintf( stdout, "/nclear all;/n/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // If multiplying A on the left, A is m x m; ...on the right, A is n x n. if ( pc_str[param_combo][0] == 'l' ) FLA_Obj_create( datatype, m, m, 0, 0, &A ); else FLA_Obj_create( datatype, n, n, 0, 0, &A ); FLA_Obj_create( datatype, m, n, 0, 0, &B ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例8: time_Lyapvoid time_Lyap( int param_combo, int type, int nrepeats, int m, FLA_Obj isgn, FLA_Obj A, FLA_Obj C, FLA_Obj scale, double *dtime, double *diff, double *gflops ){ int irep; double dtime_old = 1.0e9; FLA_Obj C_save, norm; if ( param_combo == 0 && type == FLA_ALG_FRONT ) { *gflops = 0.0; *diff = 0.0; return; } FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_save ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( C ), 1, 1, 0, 0, &norm ); FLASH_Copy( C, C_save ); for ( irep = 0 ; irep < nrepeats; irep++ ) { FLASH_Copy( C_save, C ); *dtime = FLA_Clock(); switch( param_combo ){ case 0:{ switch( type ){ //case FLA_ALG_REFERENCE: // REF_Lyap( FLA_NO_TRANSPOSE, isgn, A_flat, C_flat, scale ); // break; case FLA_ALG_FRONT: FLASH_Lyap( FLA_NO_TRANSPOSE, isgn, A, C, scale ); break; default: printf("trouble/n"); } break; } case 1:{ switch( type ){ //case FLA_ALG_REFERENCE: // REF_Lyap( FLA_CONJ_TRANSPOSE, isgn, A_flat, C_flat, scale ); // break; case FLA_ALG_FRONT: FLASH_Lyap( FLA_CONJ_TRANSPOSE, isgn, A, C, scale ); break; default: printf("trouble/n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); }/* if ( type == FLA_ALG_REFERENCE ) { FLASH_Obj_hierarchify( C_flat, C_ref ); *diff = 0.0; } else { *diff = FLASH_Max_elemwise_diff( C, C_ref ); }*/ { FLA_Obj X, W; FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &X ); FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &W ); FLASH_Copy( C, X ); FLASH_Hermitianize( FLA_UPPER_TRIANGULAR, X ); if ( param_combo == 0 ) { FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, X, FLA_ZERO, W ); FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, X, A, FLA_ONE, W ); } else if ( param_combo == 1 ) { FLASH_Gemm( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A, X, FLA_ZERO, W ); FLASH_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, X, A, FLA_ONE, W );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例9: mainint main(int argc, char *argv[]){ int datatype, m_input, m, p_first, p_last, p_inc, p, nb_alg, variant, n_repeats, i, j, n_variants = N_VARIANTS; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, b, b_orig, norm; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c Enter blocking size:", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d/n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d/n", '%', m_input ); fprintf( stdout, "/nclear all;/n/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / f2c_abs(m_input); FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig );/* FLA_Obj_create( datatype, m, m, m, 1, &A ); FLA_Obj_create( datatype, m, 1, 1, 1, &b ); FLA_Obj_create( datatype, m, 1, 1, 1, &b_orig );*/ if ( FLA_Obj_is_single_precision( A ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); FLA_Random_tri_matrix( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, A ); FLA_Random_matrix( b ); FLA_Copy_external( b, b_orig );/* time_Trinv_un( 0, FLA_ALG_REFERENCE, n_repeats, m, nb_alg, A, b, b_orig, norm, &dtime, &diff, &gflops );//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,
示例10: mainint main(int argc, char *argv[]){ int datatype, m_input, n_input, m, n, min_m_n, p_first, p_last, p_inc, pp, pivot_combo, n_repeats, i, n_pivot_combos = N_PIVOT_COMBOS; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj C, b, b_orig, b_norm; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d %d", &m_input, &n_input ); fprintf( stdout, "%c %d %d/n", '%', m_input, n_input ); fprintf( stdout, "/nclear all;/n/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; for ( pp = p_first, i = 1; pp <= p_last; pp += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = pp / abs(m_input); if( n < 0 ) n = pp / abs(n_input); min_m_n = min( m, n ); for ( pivot_combo = 0; pivot_combo < n_pivot_combos; pivot_combo++ ){ FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, 1, 0, 0, &b ); FLA_Obj_create( datatype, m, 1, 0, 0, &b_orig ); if ( FLA_Obj_is_single_precision( C ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &b_norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &b_norm );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例11: mainint main(int argc, char *argv[]){ int m_input, m, p_first, p_last, p_inc, p, k_accum, b_alg, n_iter_max, variant, n_repeats, i, n_variants = 2; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff1, diff2; FLA_Datatype datatype, dt_real; FLA_Obj A, l, Q, Ql, TT, r, d, e, A_orig, G, R, W2, de, alpha; FLA_Init(); fprintf( stdout, "%c number of repeats:", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c enter n_iter_max (per eigenvalue): ", '%' ); scanf( "%d", &n_iter_max ); fprintf( stdout, "%c %d/n", '%', n_iter_max ); fprintf( stdout, "%c enter number of sets of Givens rotations to accumulate:", '%' ); scanf( "%d", &k_accum ); fprintf( stdout, "%c %d/n", '%', k_accum ); fprintf( stdout, "%c enter blocking size for application of G:", '%' ); scanf( "%d", &b_alg ); fprintf( stdout, "%c %d/n", '%', b_alg ); fprintf( stdout, "%c enter problem size first, last, inc:", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d/n", '%', m_input ); fprintf( stdout, "/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLA_Obj_create( datatype, m, m, 0, 0, &A ); FLA_Obj_create( datatype, m, m, 0, 0, &A_orig ); FLA_Obj_create( datatype, m, m, 0, 0, &Q ); FLA_Obj_create( datatype, m, m, 0, 0, &Ql ); FLA_Obj_create( datatype, m, 1, 0, 0, &r ); FLA_Obj_create( datatype, m, m, 0, 0, &W2 ); FLA_Obj_create( datatype, m-1, k_accum, 0, 0, &G ); dt_real = FLA_Obj_datatype_proj_to_real( A ); FLA_Obj_create( dt_real, m, 1, 0, 0, &l );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例12: FLA_Svd_uv_unb_var1FLA_Error FLA_Svd_uv_unb_var1( dim_t n_iter_max, FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V, dim_t k_accum, dim_t b_alg ){ FLA_Error r_val = FLA_SUCCESS; FLA_Datatype dt; FLA_Datatype dt_real; FLA_Datatype dt_comp; FLA_Obj scale, T, S, rL, rR, d, e, G, H; dim_t m_A, n_A; dim_t min_m_n; dim_t n_GH; double crossover_ratio = 17.0 / 9.0; n_GH = k_accum; m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); min_m_n = FLA_Obj_min_dim( A ); dt = FLA_Obj_datatype( A ); dt_real = FLA_Obj_datatype_proj_to_real( A ); dt_comp = FLA_Obj_datatype_proj_to_complex( A ); // Create matrices to hold block Householder transformations. FLA_Bidiag_UT_create_T( A, &T, &S ); // Create vectors to hold the realifying scalars. FLA_Obj_create( dt, min_m_n, 1, 0, 0, &rL ); FLA_Obj_create( dt, min_m_n, 1, 0, 0, &rR ); // Create vectors to hold the diagonal and sub-diagonal. FLA_Obj_create( dt_real, min_m_n, 1, 0, 0, &d ); FLA_Obj_create( dt_real, min_m_n-1, 1, 0, 0, &e ); // Create matrices to hold the left and right Givens scalars. FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &G ); FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &H ); // Create a real scaling factor. FLA_Obj_create( dt_real, 1, 1, 0, 0, &scale ); // Compute a scaling factor; If none is needed, sigma will be set to one. FLA_Svd_compute_scaling( A, scale ); // Scale the matrix if scale is non-unit. if ( !FLA_Obj_equals( scale, FLA_ONE ) ) FLA_Scal( scale, A ); if ( m_A < crossover_ratio * n_A ) { // Reduce the matrix to bidiagonal form. // Apply scalars to rotate elements on the superdiagonal to the real domain. // Extract the diagonal and superdiagonal from A. FLA_Bidiag_UT( A, T, S ); FLA_Bidiag_UT_realify( A, rL, rR ); FLA_Bidiag_UT_extract_real_diagonals( A, d, e ); // Form U and V. FLA_Bidiag_UT_form_U( A, T, U ); FLA_Bidiag_UT_form_V( A, S, V ); // Apply the realifying scalars in rL and rR to U and V, respectively. { FLA_Obj UL, UR; FLA_Obj VL, VR; FLA_Part_1x2( U, &UL, &UR, min_m_n, FLA_LEFT ); FLA_Part_1x2( V, &VL, &VR, min_m_n, FLA_LEFT ); FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, rL, UL ); FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, VL ); } // Perform a singular value decomposition on the bidiagonal matrix. r_val = FLA_Bsvd_v_opt_var1( n_iter_max, d, e, G, H, U, V, b_alg ); } else // if ( crossover_ratio * n_A <= m_A ) { FLA_Obj TQ, R; FLA_Obj AT, AB; FLA_Obj UL, UR; // Perform a QR factorization on A and form Q in U. FLA_QR_UT_create_T( A, &TQ ); FLA_QR_UT( A, TQ ); FLA_QR_UT_form_Q( A, TQ, U ); FLA_Obj_free( &TQ ); // Set the lower triangle of R to zero and then copy the upper // triangle of A to R. FLA_Part_2x1( A, &AT, &AB, n_A, FLA_TOP ); FLA_Obj_create( dt, n_A, n_A, 0, 0, &R ); FLA_Setr( FLA_LOWER_TRIANGULAR, FLA_ZERO, R ); FLA_Copyr( FLA_UPPER_TRIANGULAR, AT, R ); // Reduce the matrix to bidiagonal form. // Apply scalars to rotate elements on the superdiagonal to the real domain. // Extract the diagonal and superdiagonal from A. FLA_Bidiag_UT( R, T, S ); FLA_Bidiag_UT_realify( R, rL, rR );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例13: main//.........这里部分代码省略......... precision = FLA_DOUBLE_PRECISION; for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; k = k_input; n = n_input; if( m < 0 ) m = p / f2c_abs(m_input); if( k < 0 ) k = p / f2c_abs(k_input); if( n < 0 ) n = p / f2c_abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // Determine datatype based on trans argument. if ( pc_str[param_combo][0] == 'c' || pc_str[param_combo][1] == 'c' ) { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_COMPLEX; else datatype = FLA_DOUBLE_COMPLEX; } else { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_FLOAT; else datatype = FLA_DOUBLE; } // If transposing A, switch dimensions. if ( pc_str[param_combo][0] == 'n' ) FLA_Obj_create( datatype, m, k, 0, 0, &A ); else FLA_Obj_create( datatype, k, m, 0, 0, &A ); // If transposing B, switch dimensions. if ( pc_str[param_combo][1] == 'n' ) FLA_Obj_create( datatype, k, n, 0, 0, &B ); else FLA_Obj_create( datatype, n, k, 0, 0, &B ); FLA_Obj_create( datatype, m, n, 0, 0, &C ); FLA_Obj_create( datatype, m, n, 0, 0, &C_ref ); FLA_Random_matrix( A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); FLA_Copy_external( C, C_ref ); fprintf( stdout, "data_gemm_%s( %d, 1:5 ) = [ %4d %4d %4d ", pc_str[param_combo], i, m, k, n ); fflush( stdout ); time_Gemm( param_combo, FLA_ALG_REFERENCE, n_repeats, m, k, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff ); fflush( stdout );/* time_Gemm( param_combo, FLA_ALG_FRONT, n_repeats, m, k, n, A, B, C, C_ref, &dtime, &diff, &gflops ); fprintf( stdout, "%6.3lf %6.2le ", gflops, diff );
开发者ID:flame,项目名称:libflame,代码行数:67,
示例14: mainint main(int argc, char *argv[]){ int n, nfirst, nlast, ninc, nlast_unb, i, irep, nrepeats, nb_alg; double dtime, dtime_best, gflops, max_gflops, diff, d_n; FLA_Obj A, Aref, Aold, delta; /* Initialize FLAME */ FLA_Init( ); /* Every time trial is repeated "repeat" times and the fastest run in recorded */ printf( "%% number of repeats:" ); scanf( "%d", &nrepeats ); printf( "%% %d/n", nrepeats ); /* Enter the max GFLOPS attainable This is used to set the y-axis range for the graphs. Here is how you figure out what to enter (on Linux machines): 1) more /proc/cpuinfo (this lists the contents of this file). 2) read through this and figure out the clock rate of the machine (in GHz). 3) Find out (from an expert of from the web) the number of floating point instructions that can be performed per core per clock cycle. 4) Figure out if you are using "multithreaded BLAS" which automatically parallelize calls to the Basic Linear Algebra Subprograms. If so, check how many cores are available. 5) Multiply 2) x 3) x 4) and enter this in response to the below. If you enter a value for max GFLOPS that is lower that the maximum that is observed in the experiments, then the top of the graph is set to the observed maximum. Thus, one possibility is to simply set this to 0.0. */ printf( "%% enter max GFLOPS:" ); scanf( "%lf", &max_gflops ); printf( "%% %lf/n", max_gflops ); /* Enter the algorithmic block size */ printf( "%% enter nb_alg:" ); scanf( "%d", &nb_alg ); printf( "%% %d/n", nb_alg ); /* Timing trials for matrix sizes n=nfirst to nlast in increments of ninc will be performed. Unblocked versions are only tested to nlast_unb */ printf( "%% enter nfirst, nlast, ninc, nlast_unb:" ); scanf( "%d%d%d%d", &nfirst, &nlast, &ninc, &nlast_unb ); printf( "%% %d %d %d %d/n", nfirst, nlast, ninc, nlast_unb ); i = 1; for ( n=nfirst; n<= nlast; n+=ninc ){ /* Allocate space for the matrices */ FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &A ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Aref ); FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Aold ); FLA_Obj_create( FLA_DOUBLE, 1, 1, 1, 1, &delta ); /* Generate random matrix A and save in Aold */ FLA_Random_matrix( Aold ); /* Add something large to the diagonal to make sure it isn't ill-conditionsed */ d_n = ( double ) n; *( ( double * ) FLA_Obj_buffer_at_view( delta ) ) = d_n; FLA_Shift_diag( FLA_NO_CONJUGATE, delta, Aold ); /* Set gflops = billions of floating point operations that will be performed */ gflops = 1.0/3.0 * n * n * n * 1.0e-09; /* Time the reference implementation */#if TIME_LAPACK == TRUE#else // if ( n <= nlast_unb )#endif { for ( irep=0; irep<nrepeats; irep++ ){ FLA_Copy( Aold, Aref ); dtime = FLA_Clock(); REF_Chol( TIME_LAPACK, Aref, nb_alg ); dtime = FLA_Clock() - dtime; if ( irep == 0 ) dtime_best = dtime; else dtime_best = ( dtime < dtime_best ? dtime : dtime_best ); } printf( "data_REF( %d, 1:2 ) = [ %d %le ];/n", i, n, gflops / dtime_best ); fflush( stdout ); } //.........这里部分代码省略.........
开发者ID:ztschir,项目名称:High-Performance,代码行数:101,
示例15: REF_Svdd_uv_componentsFLA_Error REF_Svdd_uv_components( FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V, double* dtime_bred, double* dtime_bsvd, double* dtime_appq, double* dtime_qrfa, double* dtime_gemm )/*{ *dtime_bred = 1; *dtime_bsvd = 1; *dtime_appq = 1; *dtime_qrfa = 1; *dtime_gemm = 1; return FLA_Svdd_external( FLA_SVD_VECTORS_ALL, A, s, U, V );}*/{ FLA_Datatype dt_A; FLA_Datatype dt_A_real; dim_t m_A, n_A; dim_t min_m_n; FLA_Obj tq, tu, tv, d, e, Ur, Vr, W; FLA_Obj eT, epsilonB; FLA_Uplo uplo = FLA_UPPER_TRIANGULAR; double crossover_ratio = 16.0 / 10.0; double dtime_temp; dt_A = FLA_Obj_datatype( A ); dt_A_real = FLA_Obj_datatype_proj_to_real( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); min_m_n = FLA_Obj_min_dim( A ); FLA_Obj_create( dt_A, min_m_n, 1, 0, 0, &tq ); FLA_Obj_create( dt_A, min_m_n, 1, 0, 0, &tu ); FLA_Obj_create( dt_A, min_m_n, 1, 0, 0, &tv ); FLA_Obj_create( dt_A_real, min_m_n, 1, 0, 0, &d ); FLA_Obj_create( dt_A_real, min_m_n, 1, 0, 0, &e ); FLA_Obj_create( dt_A_real, n_A, n_A, 0, 0, &Ur ); FLA_Obj_create( dt_A_real, n_A, n_A, 0, 0, &Vr ); FLA_Part_2x1( e, &eT, &epsilonB, 1, FLA_BOTTOM ); if ( m_A >= n_A ) { if ( m_A < crossover_ratio * n_A ) { dtime_temp = FLA_Clock(); { // Reduce to bidiagonal form. FLA_Bidiag_blk_external( A, tu, tv ); FLA_Bidiag_UT_extract_diagonals( A, d, eT ); } *dtime_bred = FLA_Clock() - dtime_temp; dtime_temp = FLA_Clock(); { // Divide-and-conquor algorithm. FLA_Bsvdd_external( uplo, d, e, Ur, Vr ); } *dtime_bsvd = FLA_Clock() - dtime_temp; dtime_temp = FLA_Clock(); { // Form U. FLA_Copy_external( Ur, U ); FLA_Bidiag_apply_U_external( FLA_LEFT, FLA_NO_TRANSPOSE, A, tu, U ); // Form V. FLA_Copy_external( Vr, V ); FLA_Bidiag_apply_V_external( FLA_RIGHT, FLA_CONJ_TRANSPOSE, A, tv, V ); } *dtime_appq = FLA_Clock() - dtime_temp; *dtime_qrfa = 0.0; *dtime_gemm = 0.0; } else { FLA_Obj AT, AB; FLA_Obj UL, UR; FLA_Part_2x1( A, &AT, &AB, n_A, FLA_TOP ); FLA_Part_1x2( U, &UL, &UR, n_A, FLA_LEFT ); // Create a temporary n-by-n matrix R. FLA_Obj_create( dt_A, n_A, n_A, 0, 0, &W ); dtime_temp = FLA_Clock(); { // Perform a QR factorization. FLA_QR_blk_external( A, tq ); FLA_Copyr_external( FLA_LOWER_TRIANGULAR, A, UL );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例16: FLA_Svd_uv_var2_componentsFLA_Error FLA_Svd_uv_var2_components( dim_t n_iter_max, dim_t k_accum, dim_t b_alg, FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V, double* dtime_bred, double* dtime_bsvd, double* dtime_appq, double* dtime_qrfa, double* dtime_gemm ){ FLA_Error r_val = FLA_SUCCESS; FLA_Datatype dt; FLA_Datatype dt_real; FLA_Datatype dt_comp; FLA_Obj T, S, rL, rR, d, e, G, H, RG, RH, W; dim_t m_A, n_A; dim_t min_m_n; dim_t n_GH; double crossover_ratio = 17.0 / 9.0; double dtime_temp; n_GH = k_accum; m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); min_m_n = FLA_Obj_min_dim( A ); dt = FLA_Obj_datatype( A ); dt_real = FLA_Obj_datatype_proj_to_real( A ); dt_comp = FLA_Obj_datatype_proj_to_complex( A ); // If the matrix is a scalar, then the SVD is easy. if ( min_m_n == 1 ) { FLA_Copy( A, s ); FLA_Set_to_identity( U ); FLA_Set_to_identity( V ); return FLA_SUCCESS; } // Create matrices to hold block Householder transformations. FLA_Bidiag_UT_create_T( A, &T, &S ); // Create vectors to hold the realifying scalars. FLA_Obj_create( dt, min_m_n, 1, 0, 0, &rL ); FLA_Obj_create( dt, min_m_n, 1, 0, 0, &rR ); // Create vectors to hold the diagonal and sub-diagonal. FLA_Obj_create( dt_real, min_m_n, 1, 0, 0, &d ); FLA_Obj_create( dt_real, min_m_n-1, 1, 0, 0, &e ); // Create matrices to hold the left and right Givens scalars. FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &G ); FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &H ); // Create matrices to hold the left and right Givens matrices. FLA_Obj_create( dt_real, min_m_n, min_m_n, 0, 0, &RG ); FLA_Obj_create( dt_real, min_m_n, min_m_n, 0, 0, &RH ); FLA_Obj_create( dt, m_A, n_A, 0, 0, &W ); if ( m_A >= n_A ) { if ( m_A < crossover_ratio * n_A ) { dtime_temp = FLA_Clock(); { // Reduce the matrix to bidiagonal form. // Apply scalars to rotate elements on the sub-diagonal to the real domain. // Extract the diagonal and sub-diagonal from A. FLA_Bidiag_UT( A, T, S ); FLA_Bidiag_UT_realify( A, rL, rR ); FLA_Bidiag_UT_extract_diagonals( A, d, e ); } *dtime_bred = FLA_Clock() - dtime_temp; dtime_temp = FLA_Clock(); { // Form U and V. FLA_Bidiag_UT_form_U( A, T, U ); FLA_Bidiag_UT_form_V( A, S, V ); } *dtime_appq = FLA_Clock() - dtime_temp; // Apply the realifying scalars in rL and rR to U and V, respectively. { FLA_Obj UL, UR; FLA_Obj VL, VR; FLA_Part_1x2( U, &UL, &UR, min_m_n, FLA_LEFT ); FLA_Part_1x2( V, &VL, &VR, min_m_n, FLA_LEFT ); FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, rL, UL ); FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, VL ); } dtime_temp = FLA_Clock(); { // Perform a singular value decomposition on the bidiagonal matrix. r_val = FLA_Bsvd_v_opt_var2( n_iter_max, d, e, G, H, RG, RH, W, U, V, b_alg ); } *dtime_bsvd = FLA_Clock() - dtime_temp; } else // if ( crossover_ratio * n_A <= m_A ) { FLA_Obj TQ, R;//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例17: FLA_Hess_UT_step_unb_var2FLA_Error FLA_Hess_UT_step_unb_var2( FLA_Obj A, FLA_Obj T ){ FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj TTL, TTR, T00, t01, T02, TBL, TBR, t10t, tau11, t12t, T20, t21, T22; FLA_Obj yT, y0, yB, psi1, y2; FLA_Obj zT, z0, zB, zeta1, z2; FLA_Obj y, z; FLA_Obj inv_tau11; FLA_Obj minus_inv_tau11; FLA_Obj first_elem; FLA_Obj beta; FLA_Obj conj_beta; FLA_Obj dot_product; FLA_Obj a21_t, a21_b; FLA_Datatype datatype_A; dim_t m_A; dim_t b_alg; b_alg = FLA_Obj_length( T ); datatype_A = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 ); FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 ); FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem ); FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta ); FLA_Obj_create( datatype_A, 1, 1, 0, 0, &conj_beta ); FLA_Obj_create( datatype_A, 1, 1, 0, 0, &dot_product ); FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y ); FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x2( T, &TTL, &TTR, &TBL, &TBR, 0, 0, FLA_TL ); FLA_Part_2x1( y, &yT, &yB, 0, FLA_TOP ); FLA_Part_2x1( z, &zT, &zB, 0, FLA_TOP ); while ( FLA_Obj_length( ATL ) < b_alg ) { FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02, /* ************* */ /* ************************** */ &t10t, /**/ &tau11, &t12t, TBL, /**/ TBR, &T20, /**/ &t21, &T22, 1, 1, FLA_BR ); FLA_Repart_2x1_to_3x1( yT, &y0, /* ** */ /* **** */ &psi1, yB, &y2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( zT, &z0, /* ** */ /* ***** */ &zeta1, zB, &z2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ if ( FLA_Obj_length( A22 ) > 0 ) { FLA_Part_2x1( a21, &a21_t, &a21_b, 1, FLA_TOP ); // [ u21, tau11, a21 ] = House( a21 ); FLA_Househ2_UT( FLA_LEFT, a21_t, a21_b, tau11 ); // inv_tau11 = 1 / tau11; // minus_inv_tau11 = -1 / tau11; FLA_Set( FLA_ONE, inv_tau11 ); FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 ); FLA_Copy( inv_tau11, minus_inv_tau11 ); FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 ); // Save first element of a21_t and set it to one so we can use a21 as // u21 in subsequent computations. We will restore a21_t later on. FLA_Copy( a21_t, first_elem ); FLA_Set( FLA_ONE, a21_t ); // y21 = A22' * u21;//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例18: mainint main( int argc, char *argv[] ) { int i, j, n_threads, n_repeats, n_trials, increment, begin, sorting, caching, work_stealing, data_affinity; dim_t size, nb_alg; FLA_Datatype datatype = FLA_DOUBLE; FLA_Inv inv = FLA_NO_INVERSE; FLA_Uplo uplo = FLA_LOWER_TRIANGULAR; FLA_Obj A, B, x, b, b_norm, AH, BH; double length, b_norm_value = 0.0, dtime, *dtimes, *flops;#ifndef FLA_ENABLE_WINDOWS_BUILD char output_file_m[100]; FILE *fpp;#endif fprintf( stdout, "%c Enter number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c Enter blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u/n", '%', nb_alg ); fprintf( stdout, "%c Enter problem size parameters: first, inc, num: ", '%' ); scanf( "%d%d%d", &begin, &increment, &n_trials ); fprintf( stdout, "%c %d %d %d/n", '%', begin, increment, n_trials ); fprintf( stdout, "%c Enter number of threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d/n", '%', n_threads ); fprintf( stdout, "%c Enter SuperMatrix parameters: sorting, caching, work stealing, data affinity: ", '%' ); scanf( "%d%d%d%d", &sorting, &caching, &work_stealing, &data_affinity ); fprintf( stdout, "%c %s %s %s %s/n/n", '%', ( sorting ? "TRUE" : "FALSE" ), ( caching ? "TRUE" : "FALSE" ), ( work_stealing ? "TRUE" : "FALSE" ), ( data_affinity ? ( data_affinity == 1 ? "FLASH_QUEUE_AFFINITY_2D_BLOCK_CYCLIC" : "FLASH_QUEUE_AFFINITY_OTHER" ) : "FLASH_QUEUE_AFFINITY_NONE" ) );#ifdef FLA_ENABLE_WINDOWS_BUILD fprintf( stdout, "%s_%u = [/n", OUTPUT_FILE, nb_alg );#else sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE ); fpp = fopen( output_file_m, "a" ); fprintf( fpp, "%%/n" ); fprintf( fpp, "%% | Matrix Size | FLASH |/n" ); fprintf( fpp, "%% | n x n | GFlops |/n" ); fprintf( fpp, "%% -----------------------------/n" ); fprintf( fpp, "%s_%u = [/n", OUTPUT_FILE, nb_alg );#endif FLA_Init(); dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) ); flops = ( double * ) FLA_malloc( n_trials * sizeof( double ) ); FLASH_Queue_set_num_threads( n_threads ); FLASH_Queue_set_sorting( sorting ); FLASH_Queue_set_caching( caching ); FLASH_Queue_set_work_stealing( work_stealing ); FLASH_Queue_set_data_affinity( data_affinity ); for ( i = 0; i < n_trials; i++ ) { size = begin + i * increment; FLA_Obj_create( datatype, size, size, 0, 0, &A ); FLA_Obj_create( datatype, size, size, 0, 0, &B ); FLA_Obj_create( datatype, size, 1, 0, 0, &x ); FLA_Obj_create( datatype, size, 1, 0, 0, &b ); FLA_Obj_create( datatype, 1, 1, 0, 0, &b_norm ); //.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例19: mainint main(int argc, char *argv[]){ int m_input, m, p_first, p_last, p_inc, p, n_repeats, param_combo, i, n_param_combos = N_PARAM_COMBOS; dim_t b_flash; dim_t n_threads; FLA_Datatype datatype; FLA_Uplo uplo; FLA_Inv inv; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char m_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, norm; FLA_Init(); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c enter FLASH blocksize: ", '%' ); scanf( "%u", &b_flash ); fprintf( stdout, "%c %u/n", '%', b_flash ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' ); scanf( "%d", &m_input ); fprintf( stdout, "%c %d/n", '%', m_input ); fprintf( stdout, "%c enter the number of SuperMatrix threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d/n", '%', n_threads ); fprintf( stdout, "/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } //datatype = FLA_FLOAT; //datatype = FLA_DOUBLE; //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; FLASH_Queue_set_num_threads( n_threads ); for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; if( m < 0 ) m = p / abs(m_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ if ( pc_str[param_combo][0] == 'i' ) inv = FLA_INVERSE; else inv = FLA_NO_INVERSE; if ( pc_str[param_combo][1] == 'l' ) uplo = FLA_LOWER_TRIANGULAR; else uplo = FLA_UPPER_TRIANGULAR; FLASH_Obj_create( datatype, m, m, 1, &b_flash, &A ); FLASH_Obj_create( datatype, m, m, 1, &b_flash, &B ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例20: time_Apply_G_rfvoid time_Apply_G_rf( int variant, int type, int n_repeats, int m, int k, int n, int b_alg, FLA_Obj A, FLA_Obj A_ref, FLA_Obj G, FLA_Obj P, double *dtime, double *diff, double *gflops ){ int irep; double dtime_old = 1.0e9; FLA_Obj A_save, G_save, norm; if ( FLA_Obj_is_real( A ) ) { if ( //( variant == 1 && type == FLA_ALG_UNB_OPT ) || //( variant == 1 && type == FLA_ALG_UNB_ASM ) || //( variant == 1 && type == FLA_ALG_BLOCKED ) || //( variant == 2 && type == FLA_ALG_UNB_OPT ) || //( variant == 2 && type == FLA_ALG_UNB_ASM ) || //( variant == 2 && type == FLA_ALG_BLOCKED ) || //( variant == 3 && type == FLA_ALG_UNB_OPT ) || //( variant == 3 && type == FLA_ALG_UNB_ASM ) || //( variant == 3 && type == FLA_ALG_BLOCKED ) || //( variant == 6 && type == FLA_ALG_UNB_OPT ) || //( variant == 6 && type == FLA_ALG_UNB_ASM ) || //( variant == 6 && type == FLA_ALG_BLOCKED ) || //( variant == 9 && type == FLA_ALG_UNB_OPT ) || //( variant == 9 && type == FLA_ALG_UNB_ASM ) || //( variant == 9 && type == FLA_ALG_BLOCKED ) || ( variant == 4 ) || ( variant == 5 ) || ( variant == 7 ) || ( variant == 8 ) || FALSE ) { *gflops = 0.0; *diff = 0.0; return; } } else if ( FLA_Obj_is_complex( A ) ) { if ( //( variant == 1 && type == FLA_ALG_UNB_OPT ) || //( variant == 1 && type == FLA_ALG_UNB_ASM ) || //( variant == 1 && type == FLA_ALG_BLOCKED ) || //( variant == 2 && type == FLA_ALG_UNB_OPT ) || //( variant == 2 && type == FLA_ALG_UNB_ASM ) || //( variant == 2 && type == FLA_ALG_BLOCKED ) || //( variant == 3 && type == FLA_ALG_UNB_OPT ) || //( variant == 3 && type == FLA_ALG_UNB_ASM ) || //( variant == 3 && type == FLA_ALG_BLOCKED ) || //( variant == 6 && type == FLA_ALG_UNB_OPT ) || //( variant == 6 && type == FLA_ALG_UNB_ASM ) || //( variant == 6 && type == FLA_ALG_BLOCKED ) || //( variant == 9 && type == FLA_ALG_UNB_OPT ) || //( variant == 9 && type == FLA_ALG_UNB_ASM ) || //( variant == 9 && type == FLA_ALG_BLOCKED ) || ( variant == 4 ) || ( variant == 5 ) || ( variant == 7 ) || ( variant == 8 ) || FALSE ) { *gflops = 0.0; *diff = 0.0; return; } } FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, G, &G_save ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); //dim_t b_flash_m = b_alg; //dim_t b_flash_n = n; //FLASH_Obj_create_hier_copy_of_flat_ext( A, 1, &b_flash_m, &b_flash_n, &AH ); //printf ( "flash dims: %d x %d/n", FLA_Obj_length( AH ), FLA_Obj_width( AH ) ); FLA_Copy_external( A, A_save ); FLA_Copy_external( G, G_save ); for ( irep = 0 ; irep < n_repeats; irep++ ){ FLA_Copy_external( A_save, A ); FLA_Copy_external( G_save, G ); //FLASH_Obj_hierarchify( A_save, AH ); *dtime = FLA_Clock(); switch( variant ){//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例21: FLA_Hevd_lv_var4_componentsFLA_Error FLA_Hevd_lv_var4_components( dim_t n_iter_max, FLA_Obj A, FLA_Obj l, dim_t k_accum, dim_t b_alg, double* dtime_tred, double* dtime_tevd, double* dtime_appq ){ FLA_Error r_val = FLA_SUCCESS; FLA_Uplo uplo = FLA_LOWER_TRIANGULAR; FLA_Datatype dt; FLA_Datatype dt_real; FLA_Datatype dt_comp; FLA_Obj T, r, d, e, G, R, W; FLA_Obj d0, e0, ls, pu; dim_t mn_A; dim_t n_G = k_accum; double dtime_temp; mn_A = FLA_Obj_length( A ); dt = FLA_Obj_datatype( A ); dt_real = FLA_Obj_datatype_proj_to_real( A ); dt_comp = FLA_Obj_datatype_proj_to_complex( A ); *dtime_tred = 1; *dtime_tevd = 1; *dtime_appq = 1; // If the matrix is a scalar, then the EVD is easy. if ( mn_A == 1 ) { FLA_Copy( A, l ); FLA_Set( FLA_ONE, A ); return FLA_SUCCESS; } // Create a matrix to hold block Householder transformations. FLA_Tridiag_UT_create_T( A, &T ); // Create a vector to hold the realifying scalars. FLA_Obj_create( dt, mn_A, 1, 0, 0, &r ); // Create vectors to hold the diagonal and sub-diagonal. FLA_Obj_create( dt_real, mn_A, 1, 0, 0, &d ); FLA_Obj_create( dt_real, mn_A-1, 1, 0, 0, &e ); FLA_Obj_create( dt_real, mn_A, 1, 0, 0, &d0 ); FLA_Obj_create( dt_real, mn_A-1, 1, 0, 0, &e0 ); FLA_Obj_create( dt_real, mn_A, 1, 0, 0, &pu ); FLA_Obj_create( FLA_INT, mn_A, 1, 0, 0, &ls ); FLA_Obj_create( dt_comp, mn_A-1, n_G, 0, 0, &G ); FLA_Obj_create( dt_real, mn_A, mn_A, 0, 0, &R ); FLA_Obj_create( dt, mn_A, mn_A, 0, 0, &W ); dtime_temp = FLA_Clock(); { // Reduce the matrix to tridiagonal form. FLA_Tridiag_UT( uplo, A, T ); } *dtime_tred = FLA_Clock() - dtime_temp; // Apply scalars to rotate elements on the sub-diagonal to the real domain. FLA_Tridiag_UT_realify( uplo, A, r ); // Extract the diagonal and sub-diagonal from A. FLA_Tridiag_UT_extract_diagonals( uplo, A, d, e ); dtime_temp = FLA_Clock(); { // Form Q, overwriting A. FLA_Tridiag_UT_form_Q( uplo, A, T ); } *dtime_appq = FLA_Clock() - dtime_temp; // Apply the scalars in r to Q. FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, r, A ); // Find the eigenvalues only. FLA_Copy( d, d0 ); FLA_Copy( e, e0 ); //r_val = FLA_Tevd_n_opt_var1( n_iter_max, d0, e0, G, A );{ int info; double* buff_d = FLA_DOUBLE_PTR( d0 ); double* buff_e = FLA_DOUBLE_PTR( e0 ); dsterf_( &mn_A, buff_d, buff_e, &info );} FLA_Sort( FLA_FORWARD, d0 ); FLA_Set( FLA_ZERO, ls ); FLA_Set( FLA_ZERO, pu ); dtime_temp = FLA_Clock(); { // Perform an eigenvalue decomposition on the tridiagonal matrix. r_val = FLA_Tevd_v_opt_var4( n_iter_max, d, e, d0, ls, pu, G, R, W, A, b_alg ); } *dtime_tevd = FLA_Clock() - dtime_temp; // Copy the converged eigenvalues to the output vector. FLA_Copy( d, l ); // Sort the eigenvalues and eigenvectors in ascending order. FLA_Sort_evd( FLA_FORWARD, l, A ); FLA_Obj_free( &T ); FLA_Obj_free( &r );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例22: FLA_Bidiag_apply_V_externalFLA_Error FLA_Bidiag_apply_V_external( FLA_Side side, FLA_Trans trans, FLA_Obj A, FLA_Obj t, FLA_Obj B ){ int info = 0;#ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; // int m_A, n_A; int m_B, n_B; int cs_A; int cs_B; int k_t; int lwork; FLA_Obj work; char blas_side; char blas_vect = 'P'; char blas_trans; int i; //if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) // FLA_Apply_Q_check( side, trans, storev, A, t, B ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); // m_A = FLA_Obj_length( A ); // n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); cs_B = FLA_Obj_col_stride( B ); if ( blas_vect == 'Q' ) k_t = FLA_Obj_vector_dim( t ); else k_t = FLA_Obj_vector_dim( t ) + 1; if ( FLA_Obj_is_real( A ) && trans == FLA_CONJ_TRANSPOSE ) trans = FLA_TRANSPOSE; FLA_Param_map_flame_to_netlib_side( side, &blas_side ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); // Make a workspace query the first time through. This will provide us with // and ideal workspace size based on an internal block size. lwork = -1; FLA_Obj_create( datatype, 1, 1, 0, 0, &work ); for ( i = 0; i < 2; ++i ) { if ( i == 1 ) { // Grab the queried ideal workspace size from the work array, free the // work object, and then re-allocate the workspace with the ideal size. if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) lwork = ( int ) *FLA_FLOAT_PTR( work ); else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) lwork = ( int ) *FLA_DOUBLE_PTR( work ); FLA_Obj_free( &work ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work ); } switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_t = ( float * ) FLA_FLOAT_PTR( t ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_work = ( float * ) FLA_FLOAT_PTR( work ); F77_sormbr( &blas_vect, &blas_side, &blas_trans, &m_B, &n_B, &k_t, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_t = ( double * ) FLA_DOUBLE_PTR( t ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_work = ( double * ) FLA_DOUBLE_PTR( work ); F77_dormbr( &blas_vect, &blas_side, &blas_trans, &m_B, &n_B, &k_t, buff_A, &cs_A,//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例23: libfla_test_symm_experimentvoid libfla_test_symm_experiment( test_params_t params, unsigned int var, char* sc_str, FLA_Datatype datatype, unsigned int p_cur, unsigned int pci, unsigned int n_repeats, signed int impl, double* perf, double* residual ){ dim_t b_flash = params.b_flash; dim_t b_alg_flat = params.b_alg_flat; double time_min = 1e9; double time; unsigned int i; unsigned int m; signed int m_input = -1; unsigned int n; signed int n_input = -1; FLA_Side side; FLA_Uplo uplo; FLA_Obj A, B, C, x, y, z, w, norm; FLA_Obj alpha, beta; FLA_Obj C_save; FLA_Obj A_test, B_test, C_test; // Determine the dimensions. if ( m_input < 0 ) m = p_cur / abs(m_input); else m = p_cur; if ( n_input < 0 ) n = p_cur / abs(n_input); else n = p_cur; // Translate parameter characters to libflame constants. FLA_Param_map_char_to_flame_side( &pc_str[pci][0], &side ); FLA_Param_map_char_to_flame_uplo( &pc_str[pci][1], &uplo ); // Create the matrices for the current operation. if ( side == FLA_LEFT ) { libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, m, &A ); // Create vectors for use in test. FLA_Obj_create( datatype, n, 1, 0, 0, &x ); FLA_Obj_create( datatype, m, 1, 0, 0, &y ); FLA_Obj_create( datatype, m, 1, 0, 0, &z ); FLA_Obj_create( datatype, m, 1, 0, 0, &w ); } else { libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], n, n, &A ); // Create vectors for use in test. FLA_Obj_create( datatype, n, 1, 0, 0, &x ); FLA_Obj_create( datatype, m, 1, 0, 0, &y ); FLA_Obj_create( datatype, m, 1, 0, 0, &z ); FLA_Obj_create( datatype, n, 1, 0, 0, &w ); } libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, n, &B ); libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], m, n, &C ); // Create a norm scalar. FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); // Initialize the test matrices. FLA_Random_symm_matrix( uplo, A ); FLA_Random_matrix( B ); FLA_Random_matrix( C ); // Initialize the test vectors. FLA_Random_matrix( x ); FLA_Set( FLA_ZERO, y ); FLA_Set( FLA_ZERO, z ); FLA_Set( FLA_ZERO, w ); // Set constants. alpha = FLA_TWO; beta = FLA_MINUS_ONE; // Save the original object contents in a temporary object. FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, C, &C_save ); // Use hierarchical matrices if we're testing the FLASH front-end. if ( impl == FLA_TEST_HIER_FRONT_END ) { FLASH_Obj_create_hier_copy_of_flat( A, 1, &b_flash, &A_test ); FLASH_Obj_create_hier_copy_of_flat( B, 1, &b_flash, &B_test ); FLASH_Obj_create_hier_copy_of_flat( C, 1, &b_flash, &C_test ); } else { A_test = A; B_test = B; C_test = C; } // Create a control tree for the individual variants. if ( impl == FLA_TEST_FLAT_UNB_VAR || impl == FLA_TEST_FLAT_OPT_VAR || impl == FLA_TEST_FLAT_BLK_VAR ||//.........这里部分代码省略.........
开发者ID:flame,项目名称:libflame,代码行数:101,
示例24: FLA_Svd_compute_scalingFLA_Error FLA_Svd_compute_scaling( FLA_Obj A, FLA_Obj sigma ){ FLA_Datatype dt_real; FLA_Obj norm; FLA_Obj safmin; FLA_Obj prec; FLA_Obj rmin; FLA_Obj rmax; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Svd_compute_scaling_check( A, sigma ); dt_real = FLA_Obj_datatype_proj_to_real( A ); FLA_Obj_create( dt_real, 1, 1, 0, 0, &norm ); FLA_Obj_create( dt_real, 1, 1, 0, 0, &prec ); FLA_Obj_create( dt_real, 1, 1, 0, 0, &safmin ); FLA_Obj_create( dt_real, 1, 1, 0, 0, &rmin ); FLA_Obj_create( dt_real, 1, 1, 0, 0, &rmax ); // Query safmin, precision. FLA_Mach_params( FLA_MACH_PREC, prec ); FLA_Mach_params( FLA_MACH_SFMIN, safmin );//FLA_Obj_show( "safmin", safmin, "%20.12e", "" );//FLA_Obj_show( "prec", prec, "%20.12e", "" ); // rmin = sqrt( safmin ) / prec; FLA_Copy( safmin, rmin ); FLA_Sqrt( rmin ); FLA_Inv_scal( prec, rmin ); // rmax = 1 / rmin; FLA_Copy( rmin, rmax ); FLA_Invert( FLA_NO_CONJUGATE, rmax );//FLA_Obj_show( "rmin", rmin, "%20.12e", "" );//FLA_Obj_show( "rmax", rmax, "%20.12e", "" ); // Find the maximum absolute value of A. FLA_Max_abs_value( A, norm ); if ( FLA_Obj_gt( norm, FLA_ZERO ) && FLA_Obj_lt( norm, rmin ) ) { // sigma = rmin / norm; FLA_Copy( rmin, sigma ); FLA_Inv_scal( norm, sigma ); } else if ( FLA_Obj_gt( norm, rmax ) ) { // sigma = rmax / norm; FLA_Copy( rmax, sigma ); FLA_Inv_scal( norm, sigma ); } else { // sigma = 1.0; FLA_Copy( FLA_ONE, sigma ); } FLA_Obj_free( &norm ); FLA_Obj_free( &prec ); FLA_Obj_free( &safmin ); FLA_Obj_free( &rmin ); FLA_Obj_free( &rmax ); return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:68,
示例25: mainint main( int argc, char *argv[] ) { int i, j, n_threads, n_repeats, n_trials, increment, begin, sorting, caching, work_stealing, data_affinity; dim_t size, nb_alg; FLA_Datatype datatype = FLA_DOUBLE; FLA_Obj A, x, b, b_norm, AH, pH, bH; double b_norm_value, dtime, *dtimes, *flops;#ifndef FLA_ENABLE_WINDOWS_BUILD char output_file_m[100]; FILE *fpp;#endif fprintf( stdout, "%c Enter number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c Enter blocksize: ", '%' ); scanf( "%u", &nb_alg ); fprintf( stdout, "%c %u/n", '%', nb_alg ); fprintf( stdout, "%c Enter problem size parameters: first, inc, num: ", '%' ); scanf( "%d%d%d", &begin, &increment, &n_trials ); fprintf( stdout, "%c %d %d %d/n", '%', begin, increment, n_trials ); fprintf( stdout, "%c Enter number of threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d/n", '%', n_threads ); fprintf( stdout, "%c Enter SuperMatrix parameters: sorting, caching, work stealing, data affinity: ", '%' ); scanf( "%d%d%d%d", &sorting, &caching, &work_stealing, &data_affinity ); fprintf( stdout, "%c %s %s %s %s/n/n", '%', ( sorting ? "TRUE" : "FALSE" ), ( caching ? "TRUE" : "FALSE" ), ( work_stealing ? "TRUE" : "FALSE" ), ( data_affinity ? ( data_affinity == 1 ? "FLASH_QUEUE_AFFINITY_2D_BLOCK_CYCLIC" : "FLASH_QUEUE_AFFINITY_OTHER" ) : "FLASH_QUEUE_AFFINITY_NONE" ) );#ifdef FLA_ENABLE_WINDOWS_BUILD fprintf( stdout, "%s_%u = [/n", OUTPUT_FILE, nb_alg );#else sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE ); fpp = fopen( output_file_m, "a" ); fprintf( fpp, "%%/n" ); fprintf( fpp, "%% | Matrix Size | FLASH |/n" ); fprintf( fpp, "%% | n x n | GFlops |/n" ); fprintf( fpp, "%% -----------------------------/n" ); fprintf( fpp, "%s_%u = [/n", OUTPUT_FILE, nb_alg );#endif FLA_Init(); dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) ); flops = ( double * ) FLA_malloc( n_trials * sizeof( double ) ); FLASH_Queue_set_num_threads( n_threads ); FLASH_Queue_set_sorting( sorting ); FLASH_Queue_set_caching( caching ); FLASH_Queue_set_work_stealing( work_stealing ); FLASH_Queue_set_data_affinity( data_affinity ); for ( i = 0; i < n_trials; i++ ) { size = begin + i * increment; FLA_Obj_create( datatype, size, size, 0, 0, &A ); FLA_Obj_create( datatype, size, 1, 0, 0, &x ); FLA_Obj_create( datatype, size, 1, 0, 0, &b ); FLA_Obj_create( datatype, 1, 1, 0, 0, &b_norm ); for ( j = 0; j < n_repeats; j++ ) { FLA_Random_matrix( A ); FLA_Random_matrix( b ); FLASH_Obj_create_hier_copy_of_flat( A, 1, &nb_alg, &AH ); FLASH_Obj_create( FLA_INT, size, 1, 1, &nb_alg, &pH ); FLASH_Obj_create_hier_copy_of_flat( b, 1, &nb_alg, &bH );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例26: main//.........这里部分代码省略......... for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); for ( param_combo = 0; param_combo < n_param_combos; param_combo++ ){ // Determine datatype based on trans argument. if ( pc_str[param_combo][1] == 'c' ) { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_COMPLEX; else datatype = FLA_DOUBLE_COMPLEX; } else { if ( precision == FLA_SINGLE_PRECISION ) datatype = FLA_FLOAT; else datatype = FLA_DOUBLE; } bm = nb_alg / 4; bn = nb_alg; // If multiplying Q on the left, A is m x m; ...on the right, A is n x n. if ( pc_str[param_combo][0] == 'l' ) { FLA_Obj_create( datatype, nb_alg, nb_alg, &A_flat ); FLASH_Obj_create( datatype, nb_alg, nb_alg, 1, &nb_alg, &A ); FLASH_Obj_create( datatype, nb_alg, nb_alg, 1, &nb_alg, &A_save ); FLA_Obj_create( datatype, bm, bn, &T_flat ); FLASH_Obj_create_ext( datatype, bm, bn, 1, &bm, &bn, &T ); FLASH_Obj_create_ext( datatype, bm, n, 1, &bm, &bn, &W ); } else { FLASH_Obj_create( datatype, n, n, 1, &nb_alg, &A ); } FLASH_Obj_create( datatype, nb_alg, n, 1, &nb_alg, &B ); FLASH_Obj_create( datatype, nb_alg, n, 1, &nb_alg, &B_ref ); FLA_Obj_create( datatype, nb_alg, 1, &t ); FLASH_Random_matrix( A ); FLASH_Random_matrix( B ); fprintf( stdout, "data_applyq_%s( %d, 1:5 ) = [ %d ", pc_str[param_combo], i, p ); fflush( stdout ); FLASH_Copy( A, A_save ); FLASH_Obj_flatten( A, A_flat ); FLA_QR_blk_external( A_flat, t ); FLASH_Obj_hierarchify( A_flat, A ); time_Apply_Q( param_combo, FLA_ALG_REFERENCE, n_repeats, m, n, A, B, B_ref, t, T, W, &dtime, &diff, &gflops );
开发者ID:anaptyxis,项目名称:libflame,代码行数:66,
示例27: FLA_Hess_UT_blk_var4FLA_Error FLA_Hess_UT_blk_var4( FLA_Obj A, FLA_Obj T ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj UT, U0, UB, U1, U2; FLA_Obj YT, Y0, YB, Y1, Y2; FLA_Obj ZT, Z0, ZB, Z1, Z2; FLA_Obj TL, TR, T0, T1, T2; FLA_Obj U, Y, Z; FLA_Obj ABR_l; FLA_Obj UB_l, U2_l; FLA_Obj YB_l, Y2_l; FLA_Obj ZB_l, Z2_l; FLA_Obj WT_l; FLA_Obj T1_tl; FLA_Obj none, none2, none3; FLA_Obj UB_tl, UB_bl; FLA_Datatype datatype_A; dim_t m_A; dim_t b_alg, b, bb; b_alg = FLA_Obj_length( T ); datatype_A = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U ); FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Y ); FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x1( U, &UT, &UB, 0, FLA_TOP ); FLA_Part_2x1( Y, &YT, &YB, 0, FLA_TOP ); FLA_Part_2x1( Z, &ZT, &ZB, 0, FLA_TOP ); FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ) { b = min( FLA_Obj_length( ABR ), b_alg ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, /* ************* */ /* ******************** */ &A10, /**/ &A11, &A12, ABL, /**/ ABR, &A20, /**/ &A21, &A22, b, b, FLA_BR ); FLA_Repart_2x1_to_3x1( UT, &U0, /* ** */ /* ** */ &U1, UB, &U2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( YT, &Y0, /* ** */ /* ** */ &Y1, YB, &Y2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( ZT, &Z0, /* ** */ /* ** */ &Z1, ZB, &Z2, b, FLA_BOTTOM ); FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ FLA_Part_2x2( T1, &T1_tl, &none, &none2, &none3, b, b, FLA_TL ); bb = min( FLA_Obj_length( ABR ) - 1, b_alg ); FLA_Part_1x2( ABR, &ABR_l, &none, bb, FLA_LEFT ); FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT ); FLA_Part_1x2( YB, &YB_l, &none, bb, FLA_LEFT ); FLA_Part_1x2( ZB, &ZB_l, &none, bb, FLA_LEFT ); FLA_Part_2x1( UB_l, &none, &U2_l, b, FLA_TOP ); FLA_Part_2x1( YB_l, &none, &Y2_l, b, FLA_TOP ); FLA_Part_2x1( ZB_l, &none, &Z2_l, b, FLA_TOP ); // [ ABR, YB, ZB, T1 ] = FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1, b ); //FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1_tl ); //FLA_Hess_UT_step_ofu_var4( ABR, YB, ZB, T1_tl ); FLA_Hess_UT_step_opt_var4( ABR, YB, ZB, T1_tl ); // Build UB from ABR, with explicit unit subdiagonal and zeros. FLA_Copy_external( ABR_l, UB_l ); FLA_Part_2x1( UB_l, &UB_tl, //.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例28: time_QR_UTvoid time_QR_UT( int variant, int type, int nrepeats, int m, int n, FLA_Obj A, FLA_Obj A_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W, FLA_Obj b, FLA_Obj b_orig, double *dtime, double *diff, double *gflops ){ int irep; double dtime_old = 1.0e9; FLA_Obj A_save, b_save, norm; FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, b, &b_save ); if ( FLA_Obj_is_single_precision( A ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); FLA_Copy_external( A, A_save ); FLA_Copy_external( b, b_save ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( A_save, A ); *dtime = FLA_Clock(); switch( variant ){ case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_QR_UT( A, t ); break; case FLA_ALG_FRONT: FLA_QR_UT( A, T ); break; default: printf("trouble/n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLA_Obj AT, AB; FLA_Obj bT, bB; FLA_Obj y; FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y ); FLA_Copy_external( b, b_orig ); if ( FLA_Obj_is_real( A ) ) FLA_Apply_Q_blk_external( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A, t, b ); else FLA_Apply_Q_blk_external( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, A, t, b ); FLA_Part_2x1( A, &AT, &AB, FLA_Obj_width( A ), FLA_TOP ); FLA_Part_2x1( b, &bT, &bB, FLA_Obj_width( A ), FLA_TOP ); FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, AT, bT ); FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, bT, FLA_ONE, b_orig ); FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y ); FLA_Nrm2_external( y, norm ); FLA_Obj_extract_real_scalar( norm, diff ); FLA_Obj_free( &y ); } else { FLA_Obj x, y; FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y ); FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &x ); FLA_Copy_external( b, b_orig ); FLA_QR_UT_solve( A, T, b, x ); FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, x, FLA_ONE, b_orig ); FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y ); FLA_Nrm2_external( y, norm ); FLA_Obj_extract_real_scalar( norm, diff );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例29: mainint main(int argc, char *argv[]){ int m_input, n_input, m, n, p_first, p_last, p_inc, p, nb_alg, n_repeats, variant, i, j, datatype, n_variants = N_VARIANTS; char *colors = "brkgmcbrkg"; char *ticks = "o+*xso+*xs"; char m_dim_desc[14]; char n_dim_desc[14]; char m_dim_tag[10]; char n_dim_tag[10]; double max_gflops=6.0; double dtime, gflops, diff; FLA_Obj A, B, C, C_ref; /* Initialize FLAME */ FLA_Init( ); fprintf( stdout, "%c number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d/n", '%', n_repeats ); fprintf( stdout, "%c Enter blocking size: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d/n", '%', nb_alg ); fprintf( stdout, "%c enter problem size first, last, inc: ", '%' ); scanf( "%d%d%d", &p_first, &p_last, &p_inc ); fprintf( stdout, "%c %d %d %d/n", '%', p_first, p_last, p_inc ); fprintf( stdout, "%c enter m n (-1 means bind to problem size): ", '%' ); scanf( "%d%d", &m_input, &n_input ); fprintf( stdout, "%c %d %d/n", '%', m_input, n_input ); /* Delete all existing data structures */ fprintf( stdout, "/nclear all;/n/n" ); if ( m_input > 0 ) { sprintf( m_dim_desc, "m = %d", m_input ); sprintf( m_dim_tag, "m%dc", m_input); } else if( m_input < -1 ) { sprintf( m_dim_desc, "m = p/%d", -m_input ); sprintf( m_dim_tag, "m%dp", -m_input ); } else if( m_input == -1 ) { sprintf( m_dim_desc, "m = p" ); sprintf( m_dim_tag, "m%dp", 1 ); } if ( n_input > 0 ) { sprintf( n_dim_desc, "n = %d", n_input ); sprintf( n_dim_tag, "n%dc", n_input); } else if( n_input < -1 ) { sprintf( n_dim_desc, "n = p/%d", -n_input ); sprintf( n_dim_tag, "n%dp", -n_input ); } else if( n_input == -1 ) { sprintf( n_dim_desc, "n = p" ); sprintf( n_dim_tag, "n%dp", 1 ); } for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 ) { m = m_input; n = n_input; if( m < 0 ) m = p / abs(m_input); if( n < 0 ) n = p / abs(n_input); //datatype = FLA_COMPLEX; datatype = FLA_DOUBLE_COMPLEX; /* Allocate space for the matrices */ FLA_Obj_create( datatype, m, m, &A ); FLA_Obj_create( datatype, m, n, &C ); FLA_Obj_create( datatype, m, n, &C_ref );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例30: FLA_Bidiag_blk_externalFLA_Error FLA_Bidiag_blk_external( FLA_Obj A, FLA_Obj tu, FLA_Obj tv ){ int info = 0;#ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; int m_A, n_A, cs_A; int min_m_n, max_m_n; int lwork; FLA_Obj d, e, work_obj; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Bidiag_check( A, tu, tv ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); min_m_n = FLA_Obj_min_dim( A ); max_m_n = FLA_Obj_max_dim( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), min_m_n, 1, 0, 0, &d ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), min_m_n - 1, 1, 0, 0, &e ); lwork = (m_A + n_A) * FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work_obj ); switch( datatype ){ case FLA_FLOAT: { float* buff_A = ( float * ) FLA_FLOAT_PTR( A ); float* buff_d = ( float * ) FLA_FLOAT_PTR( d ); float* buff_e = ( float * ) FLA_FLOAT_PTR( e ); float* buff_tu = ( float * ) FLA_FLOAT_PTR( tu ); float* buff_tv = ( float * ) FLA_FLOAT_PTR( tv ); float* buff_work = ( float * ) FLA_FLOAT_PTR( work_obj ); F77_sgebrd( &m_A, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_tu, buff_tv, buff_work, &lwork, &info ); break; } case FLA_DOUBLE: { double* buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double* buff_d = ( double * ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double * ) FLA_DOUBLE_PTR( e ); double* buff_tu = ( double * ) FLA_DOUBLE_PTR( tu ); double* buff_tv = ( double * ) FLA_DOUBLE_PTR( tv ); double* buff_work = ( double * ) FLA_DOUBLE_PTR( work_obj ); F77_dgebrd( &m_A, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_tu, buff_tv, buff_work, &lwork, &info ); break; } case FLA_COMPLEX: { scomplex* buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); float* buff_d = ( float * ) FLA_FLOAT_PTR( d ); float* buff_e = ( float * ) FLA_FLOAT_PTR( e ); scomplex* buff_tu = ( scomplex * ) FLA_COMPLEX_PTR( tu ); scomplex* buff_tv = ( scomplex * ) FLA_COMPLEX_PTR( tv ); scomplex* buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work_obj ); F77_cgebrd( &m_A, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_tu, buff_tv, buff_work, &lwork, &info ); break; } //.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
注:本文中的FLA_Obj_create函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ FLA_Obj_datatype函数代码示例 C++ FLA_Init函数代码示例 |