这篇教程C++ FLA_Determine_blocksize函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中FLA_Determine_blocksize函数的典型用法代码示例。如果您正苦于以下问题:C++ FLA_Determine_blocksize函数的具体用法?C++ FLA_Determine_blocksize怎么用?C++ FLA_Determine_blocksize使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了FLA_Determine_blocksize函数的26个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: FLA_Scalr_l_blk_var4FLA_Error FLA_Scalr_l_blk_var4( FLA_Obj alpha, FLA_Obj A, fla_scalr_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); while ( FLA_Obj_min_dim( ATL ) > 0 ){ b = FLA_Determine_blocksize( ATL, FLA_TL, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, &A10, &A11, /**/ &A12, /* ************* */ /* ******************** */ ABL, /**/ ABR, &A20, &A21, /**/ &A22, b, b, FLA_TL ); /*------------------------------------------------------------*/ // A11 = alpha * tril( A11 ); FLA_Scalr_internal( FLA_LOWER_TRIANGULAR, alpha, A11, FLA_Cntl_sub_scalr( cntl ) ); // A21 = alpha * A21; FLA_Scal_internal( alpha, A21, FLA_Cntl_sub_scal( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, /* ************** */ /* ****************** */ A10, /**/ A11, A12, &ABL, /**/ &ABR, A20, /**/ A21, A22, FLA_BR ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:42,
示例2: FLA_Syrk_ut_blk_var5FLA_Error FLA_Syrk_ut_blk_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t* cntl ){ FLA_Obj AT, A0, AB, A1, A2; dim_t b; FLA_Scalr_internal( FLA_UPPER_TRIANGULAR, beta, C, FLA_Cntl_sub_scalr( cntl ) ); FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* ** */ &A1, AB, &A2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* C = C + A1' * A1 */ FLA_Syrk_internal( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, alpha, A1, FLA_ONE, C, FLA_Cntl_sub_syrk( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, A1, /* ** */ /* ** */ &AB, A2, FLA_TOP ); } return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:41,
示例3: FLA_Hemm_lu_blk_var9FLA_Error FLA_Hemm_lu_blk_var9( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_hemm_t* cntl ){ FLA_Obj BL, BR, B0, B1, B2; FLA_Obj CL, CR, C0, C1, C2; dim_t b; FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) ){ b = FLA_Determine_blocksize( BR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C1 = C1 + A * B1 */ FLA_Hemm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR, alpha, A, B1, beta, C1, FLA_Cntl_sub_hemm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2, FLA_LEFT ); } return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:41,
示例4: FLA_Gemm_tc_blk_var4FLA_Error FLA_Gemm_tc_blk_var4( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl ){ FLA_Obj BL, BR, B0, B1, B2; FLA_Obj CL, CR, C0, C1, C2; dim_t b; FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_RIGHT ); while ( FLA_Obj_width( BR ) < FLA_Obj_width( B ) ){ b = FLA_Determine_blocksize( BL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &B1, /**/ &B2, b, FLA_LEFT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, &C1, /**/ &C2, b, FLA_LEFT ); /*------------------------------------------------------------*/ /* C1 = alpha * A' * B1 + C1; */ FLA_Gemm_internal( FLA_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A, B1, beta, C1, FLA_Cntl_sub_gemm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ B1, B2, FLA_RIGHT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, /**/ C1, C2, FLA_RIGHT ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:41,
示例5: FLA_Apply_Q_UT_lhfr_blk_var2FLA_Error FLA_Apply_Q_UT_lhfr_blk_var2( FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t* cntl ){ FLA_Obj BL, BR, B0, B1, B2; FLA_Obj WL, WR, W0, W1, W2; dim_t b; FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); FLA_Part_1x2( W, &WL, &WR, 0, FLA_LEFT ); while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) ){ b = FLA_Determine_blocksize( BR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( WL, /**/ WR, &W0, /**/ &W1, &W2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ // B1 = Q' * B1; FLA_Apply_Q_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE, A, T, W1, B1, FLA_Cntl_sub_apqut( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &WL, /**/ &WR, W0, W1, /**/ W2, FLA_LEFT ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:40,
示例6: FLA_Transpose_blk_var2FLA_Error FLA_Transpose_blk_var2( FLA_Obj A, fla_tpose_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, /* ************* */ /* ******************** */ &A10, /**/ &A11, &A12, ABL, /**/ ABR, &A20, /**/ &A21, &A22, b, b, FLA_BR ); /*------------------------------------------------------------*/ FLA_Transpose_unb_var2( A11 ); FLA_Swap_t_blk_var2( A21, A12, FLA_Cntl_sub_swap( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, A10, A11, /**/ A12, /* ************** */ /* ****************** */ &ABL, /**/ &ABR, A20, A21, /**/ A22, FLA_TL ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:40,
示例7: FLA_Copyt_c_blk_var3FLA_Error FLA_Copyt_c_blk_var3( FLA_Obj A, FLA_Obj B, fla_copyt_t* cntl ){ FLA_Obj AL, AR, A0, A1, A2; FLA_Obj BL, BR, B0, B1, B2; dim_t b; FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ b = FLA_Determine_blocksize( AR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &A1, &A2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ FLA_Copyt_internal( FLA_CONJ_NO_TRANSPOSE, A1, B1, FLA_Cntl_sub_copyt( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, A1, /**/ A2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:38,
示例8: FLA_Trsm_ruc_blk_var3FLA_Error FLA_Trsm_ruc_blk_var3( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t* cntl ){ FLA_Obj BT, B0, BB, B1, B2; dim_t b; FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); while ( FLA_Obj_length( BT ) < FLA_Obj_length( B ) ) { b = FLA_Determine_blocksize( BB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* ** */ &B1, BB, &B2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* B1 = B1 * triu( A ); */ FLA_Trsm_internal( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diagA, alpha, A, B1, FLA_Cntl_sub_trsm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &BT, B0, B1, /* ** */ /* ** */ &BB, B2, FLA_TOP ); } return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:38,
示例9: FLA_Apply_CAQ2_UT_lhfc_blk_var2FLA_Error FLA_Apply_CAQ2_UT_lhfc_blk_var2( FLA_Obj D, FLA_Obj T, FLA_Obj W1, FLA_Obj C, FLA_Obj E, fla_apcaq2ut_t* cntl ){ FLA_Obj DT, D0, DB, D1, D2; FLA_Obj TT, T0, TB, T1, T2; FLA_Obj ET, E0, EB, E1, E2; dim_t b; FLA_Part_2x1( D, &DT, &DB, 0, FLA_TOP ); FLA_Part_2x1( T, &TT, &TB, 0, FLA_TOP ); FLA_Part_2x1( E, &ET, &EB, 0, FLA_TOP ); while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) ){ b = FLA_Determine_blocksize( DB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( DT, &D0, /* ** */ /* ** */ &D1, DB, &D2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( TT, &T0, /* ** */ /* ** */ &T1, TB, &T2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( ET, &E0, /* ** */ /* ** */ &E1, EB, &E2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ // / C / = Q' / C / // / E1 / / E1 / // // where Q is formed from D1 and T1. FLA_Apply_CAQ2_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, D1, T1, W1, C, E1, FLA_Cntl_sub_apcaq2ut( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &DT, D0, D1, /* ** */ /* ** */ &DB, D2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &TT, T0, T1, /* ** */ /* ** */ &TB, T2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &ET, E0, E1, /* ** */ /* ** */ &EB, E2, FLA_TOP ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:76,
示例10: FLA_Her2k_uh_blk_var7FLA_Error FLA_Her2k_uh_blk_var7( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_her2k_t* cntl ){ FLA_Obj AL, AR, A0, A1, A2; FLA_Obj BL, BR, B0, B1, B2; FLA_Obj CTL, CTR, C00, C01, C02, CBL, CBR, C10, C11, C12, C20, C21, C22; dim_t b; FLA_Scalr_internal( FLA_UPPER_TRIANGULAR, beta, C, FLA_Cntl_sub_scalr( cntl ) ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_RIGHT ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); FLA_Part_2x2( C, &CTL, &CTR, &CBL, &CBR, 0, 0, FLA_BR ); while ( FLA_Obj_width( AR ) < FLA_Obj_width( A ) ){ b = FLA_Determine_blocksize( AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, &A1, /**/ &A2, b, FLA_LEFT ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &B1, /**/ &B2, b, FLA_LEFT ); FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02, &C10, &C11, /**/ &C12, /* ************* */ /* ******************** */ CBL, /**/ CBR, &C20, &C21, /**/ &C22, b, b, FLA_TL ); /*------------------------------------------------------------*/ /* C01 = C01 + B0' * A1 */ FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, B0, A1, FLA_ONE, C01, FLA_Cntl_sub_gemm1( cntl ) ); /* C12 = C12 + A1' * B2 */ FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A1, B2, FLA_ONE, C12, FLA_Cntl_sub_gemm2( cntl ) ); /* C11 = C11 + A1' * B1 + B1' * A1 */ FLA_Her2k_internal( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, alpha, A1, B1, FLA_ONE, C11, FLA_Cntl_sub_her2k( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, /**/ A1, A2, FLA_RIGHT ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ B1, B2, FLA_RIGHT ); FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02, /* ************** */ /* ****************** */ C10, /**/ C11, C12, &CBL, /**/ &CBR, C20, /**/ C21, C22, FLA_BR ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:73,
示例11: FLA_Eig_gest_nl_blk_var1FLA_Error FLA_Eig_gest_nl_blk_var1( FLA_Obj A, FLA_Obj Y, FLA_Obj B, fla_eig_gest_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj BTL, BTR, B00, B01, B02, BBL, BBR, B10, B11, B12, B20, B21, B22; FLA_Obj YT, Y01, YB, Y11, Y21; FLA_Obj Y21_l, Y21_r; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x2( B, &BTL, &BTR, &BBL, &BBR, 0, 0, FLA_TL ); FLA_Part_2x1( Y, &YT, &YB, 0, FLA_TOP ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, /* ************* */ /* ******************** */ &A10, /**/ &A11, &A12, ABL, /**/ ABR, &A20, /**/ &A21, &A22, b, b, FLA_BR ); FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, /* ************* */ /* ******************** */ &B10, /**/ &B11, &B12, BBL, /**/ BBR, &B20, /**/ &B21, &B22, b, b, FLA_BR ); FLA_Repart_2x1_to_3x1( YT, &Y01, /* ** */ /* *** */ &Y11, YB, &Y21, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ FLA_Part_1x2( Y21, &Y21_l, &Y21_r, b, FLA_LEFT ); // Y21 = A22 * B21; FLA_Hemm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_ONE, A22, B21, FLA_ZERO, Y21_l, FLA_Cntl_sub_hemm( cntl ) ); // A21 = A21 * tril( B11 ); FLA_Trmm_internal( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, B11, A21, FLA_Cntl_sub_trmm1( cntl ) ); // A21 = A21 + 1/2 * Y21; FLA_Axpy_internal( FLA_ONE_HALF, Y21_l, A21, FLA_Cntl_sub_axpy1( cntl ) ); // A11 = tril( B11 )' * A11 * tril( B11 ); FLA_Eig_gest_internal( FLA_NO_INVERSE, FLA_LOWER_TRIANGULAR, A11, Y11, B11, FLA_Cntl_sub_eig_gest( cntl ) ); // A11 = A11 + A21' * B21 + B21' * A21; FLA_Her2k_internal( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_ONE, A21, B21, FLA_ONE, A11, FLA_Cntl_sub_her2k( cntl ) ); // A21 = A21 + 1/2 * Y21; FLA_Axpy_internal( FLA_ONE_HALF, Y21_l, A21, FLA_Cntl_sub_axpy2( cntl ) ); // A21 = tril( B22 )' * A21; FLA_Trmm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, B22, A21, FLA_Cntl_sub_trmm2( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, A10, A11, /**/ A12, /* ************** */ /* ****************** */ &ABL, /**/ &ABR, A20, A21, /**/ A22, FLA_TL ); FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, B10, B11, /**/ B12, /* ************** */ /* ****************** */ &BBL, /**/ &BBR, B20, B21, /**/ B22, FLA_TL );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例12: FLA_Trsv_lc_blk_var1FLA_Error FLA_Trsv_lc_blk_var1( FLA_Diag diagA, FLA_Obj A, FLA_Obj x, fla_trsv_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj xT, x0, xB, x1, x2; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_2x1( x, &xT, &xB, 0, FLA_BOTTOM ); while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( ATL, FLA_TL, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, &A10, &A11, /**/ &A12, /* ************* */ /* ******************** */ ABL, /**/ ABR, &A20, &A21, /**/ &A22, b, b, FLA_TL ); FLA_Repart_2x1_to_3x1( xT, &x0, &x1, /* ** */ /* ** */ xB, &x2, b, FLA_TOP ); /*------------------------------------------------------------*/ /* x1 = x1 - A21' * x2; */ FLA_Gemv_internal( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A21, x2, FLA_ONE, x1, FLA_Cntl_sub_gemv( cntl ) ); /* x1 = tril( A11' ) / x1; */ FLA_Trsv_internal( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diagA, A11, x1, FLA_Cntl_sub_trsv( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, /* ************** */ /* ****************** */ A10, /**/ A11, A12, &ABL, /**/ &ABR, A20, /**/ A21, A22, FLA_BR ); FLA_Cont_with_3x1_to_2x1( &xT, x0, /* ** */ /* ** */ x1, &xB, x2, FLA_BOTTOM ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:62,
示例13: FLA_Hemm_lu_blk_var3FLA_Error FLA_Hemm_lu_blk_var3( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_hemm_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj BT, B0, BB, B1, B2; FLA_Obj CT, C0, CB, C1, C2; dim_t b; FLA_Scal_internal( beta, C, FLA_Cntl_sub_scal( cntl ) ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); FLA_Part_2x1( C, &CT, &CB, 0, FLA_TOP ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, /* ************* */ /* ******************** */ &A10, /**/ &A11, &A12, ABL, /**/ ABR, &A20, /**/ &A21, &A22, b, b, FLA_BR ); FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* ** */ &B1, BB, &B2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( CT, &C0, /* ** */ /* ** */ &C1, CB, &C2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* C1 = C1 + A11 * B1 */ FLA_Hemm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR, alpha, A11, B1, FLA_ONE, C1, FLA_Cntl_sub_hemm( cntl ) ); /* C1 = C1 + A12 * B2 */ FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A12, B2, FLA_ONE, C1, FLA_Cntl_sub_gemm1( cntl ) ); /* C2 = C2 + A12' * B1 */ FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A12, B1, FLA_ONE, C2, FLA_Cntl_sub_gemm2( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, A10, A11, /**/ A12, /* ************** */ /* ****************** */ &ABL, /**/ &ABR, A20, A21, /**/ A22, FLA_TL ); FLA_Cont_with_3x1_to_2x1( &BT, B0, B1, /* ** */ /* ** */ &BB, B2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &CT, C0, C1, /* ** */ /* ** */ &CB, C2, FLA_TOP ); } return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:87,
示例14: FLA_Trmm_llt_blk_var1FLA_Error FLA_Trmm_llt_blk_var1( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trmm_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj BT, B0, BB, B1, B2; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, /* ************* */ /* ******************** */ &A10, /**/ &A11, &A12, ABL, /**/ ABR, &A20, /**/ &A21, &A22, b, b, FLA_BR ); FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* ** */ &B1, BB, &B2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* B1 = tril( A11 )' * B1; */ FLA_Trmm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diagA, alpha, A11, B1, FLA_Cntl_sub_trmm( cntl ) ); /* B1 = B1 + A21' * B2; */ FLA_Gemm_internal( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A21, B2, FLA_ONE, B1, FLA_Cntl_sub_gemm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, A10, A11, /**/ A12, /* ************** */ /* ****************** */ &ABL, /**/ &ABR, A20, A21, /**/ A22, FLA_TL ); FLA_Cont_with_3x1_to_2x1( &BT, B0, B1, /* ** */ /* ** */ &BB, B2, FLA_TOP ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:62,
示例15: FLA_Gemm_nn_omp_var5FLA_Error FLA_Gemm_nn_omp_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj C, fla_gemm_t* cntl ){ FLA_Obj AL, AR, A0, A1, A2; FLA_Obj BT, B0, BB, B1, B2; FLA_Obj C_local; int b; FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); #pragma intel omp parallel taskq { while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ b = FLA_Determine_blocksize( A, AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) ); //b = min( FLA_Obj_width( AR ), nb_alg ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &A1, &A2, b, FLA_RIGHT ); FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* ** */ &B1, BB, &B2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ #pragma intel omp task captureprivate(A1,B1) private(C_local) { FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_local ); FLA_Obj_set_to_zero( C_local ); /* C = alpha * A1 * B1 + C; */ FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A1, B1, FLA_ONE, C_local ); REF_Axpy_sync_circular( FLA_ONE, C_local, C ); FLA_Obj_free( &C_local ); } /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, A1, /**/ A2, FLA_LEFT ); FLA_Cont_with_3x1_to_2x1( &BT, B0, B1, /* ** */ /* ** */ &BB, B2, FLA_TOP ); } } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:62,
示例16: FLA_Sylv_nn_blk_var8FLA_Error FLA_Sylv_nn_blk_var8( FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj BTL, BTR, B00, B01, B02, BBL, BBR, B10, B11, B12, B20, B21, B22; FLA_Obj CTL, CTR, C00, C01, C02, CBL, CBR, C10, C11, C12, C20, C21, C22; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_2x2( B, &BTL, &BTR, &BBL, &BBR, 0, 0, FLA_TL ); FLA_Part_2x2( C, &CTL, &CTR, &CBL, &CBR, 0, 0, FLA_BL ); while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, &A10, &A11, /**/ &A12, /* ************* */ /* ******************** */ ABL, /**/ ABR, &A20, &A21, /**/ &A22, b, b, FLA_TL ); FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, /* ************* */ /* ******************** */ &B10, /**/ &B11, &B12, BBL, /**/ BBR, &B20, /**/ &B21, &B22, b, b, FLA_BR ); FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, &C10, /**/ &C11, &C12, /* ************* */ /* ******************** */ CBL, /**/ CBR, &C20, /**/ &C21, &C22, b, b, FLA_TR ); // Loop Invariant: // CTL = CTL - ATR * sylv( ABR, BTL, CBL ) // CTR = CTR // CBL = sylv( ABR, BTL, CBL ) // CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR ) /*------------------------------------------------------------*/ // C10 = sylv( A11, B00, C10 ); FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A11, B00, C10, scale, FLA_Cntl_sub_sylv1( cntl ) ); // C00 = C00 - A01 * C10; FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A01, C10, FLA_ONE, C00, FLA_Cntl_sub_gemm1( cntl ) ); // C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 ); FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11, FLA_Cntl_sub_gemm2( cntl ) ); FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A12, C21, FLA_ONE, C11, FLA_Cntl_sub_gemm3( cntl ) ); FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A11, B11, C11, scale, FLA_Cntl_sub_sylv2( cntl ) ); // C01 = C01 - A01 * C11 - A02 * C21; FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A02, C21, FLA_ONE, C01, FLA_Cntl_sub_gemm4( cntl ) ); FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A01, C11, FLA_ONE, C01, FLA_Cntl_sub_gemm5( cntl ) ); // C12 = sylv( A11, B22, C12 - A12 * C22 -/+ C10 * B02 -/+ C11 * B12 ); FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12, FLA_Cntl_sub_gemm6( cntl ) ); FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12, FLA_Cntl_sub_gemm7( cntl ) ); FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A12, C22, FLA_ONE, C12, FLA_Cntl_sub_gemm8( cntl ) );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例17: FLA_Copyr_u_blk_var4FLA_Error FLA_Copyr_u_blk_var4( FLA_Obj A, FLA_Obj B, fla_copyr_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj BTL, BTR, B00, B01, B02, BBL, BBR, B10, B11, B12, B20, B21, B22; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_2x2( B, &BTL, &BTR, &BBL, &BBR, 0, 0, FLA_BR ); while ( FLA_Obj_min_dim( ATL ) > 0 ){ b = FLA_Determine_blocksize( ATL, FLA_TL, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, &A10, &A11, /**/ &A12, /* ************* */ /* ******************** */ ABL, /**/ ABR, &A20, &A21, /**/ &A22, b, b, FLA_TL ); FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02, &B10, &B11, /**/ &B12, /* ************* */ /* ******************** */ BBL, /**/ BBR, &B20, &B21, /**/ &B22, b, b, FLA_TL ); /*------------------------------------------------------------*/ // B11 = triu( A11 ); FLA_Copyr_internal( FLA_UPPER_TRIANGULAR, A11, B11, FLA_Cntl_sub_copyr( cntl ) ); // B01 = A01; FLA_Copy_internal( A01, B01, FLA_Cntl_sub_copy( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, /* ************** */ /* ****************** */ A10, /**/ A11, A12, &ABL, /**/ &ABR, A20, /**/ A21, A22, FLA_BR ); FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02, /* ************** */ /* ****************** */ B10, /**/ B11, B12, &BBL, /**/ &BBR, B20, /**/ B21, B22, FLA_BR ); } return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:61,
示例18: FLA_Sylv_nh_blk_var16FLA_Error FLA_Sylv_nh_blk_var16( FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj CT, C0, CB, C1, C2; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_2x1( C, &CT, &CB, 0, FLA_BOTTOM ); while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( CT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, &A10, &A11, /**/ &A12, /* ************* */ /* ******************** */ ABL, /**/ ABR, &A20, &A21, /**/ &A22, b, b, FLA_TL ); FLA_Repart_2x1_to_3x1( CT, &C0, &C1, /* ** */ /* ** */ CB, &C2, b, FLA_TOP ); // Loop Invariant: // CT = CT - ATR * sylv( ABR, B', CB ) // CB = sylv( ABR, B', CB ) /*------------------------------------------------------------*/ // C1 = sylv( A11, B', C1 ); FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, isgn, A11, B, C1, scale, FLA_Cntl_sub_sylv1( cntl ) ); // C0 = C0 - A01 * C1; FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A01, C1, FLA_ONE, C0, FLA_Cntl_sub_gemm1( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, /* ************** */ /* ****************** */ A10, /**/ A11, A12, &ABL, /**/ &ABR, A20, /**/ A21, A22, FLA_BR ); FLA_Cont_with_3x1_to_2x1( &CT, C0, /* ** */ /* ** */ C1, &CB, C2, FLA_BOTTOM ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:66,
示例19: FLA_Her2k_un_blk_var6FLA_Error FLA_Her2k_un_blk_var6( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_her2k_t* cntl ){ FLA_Obj AT, A0, AB, A1, A2; FLA_Obj BT, B0, BB, B1, B2; FLA_Obj CTL, CTR, C00, C01, C02, CBL, CBR, C10, C11, C12, C20, C21, C22; dim_t b; FLA_Scalr_internal( FLA_UPPER_TRIANGULAR, beta, C, FLA_Cntl_sub_scalr( cntl ) ); FLA_Part_2x1( A, &AT, &AB, 0, FLA_BOTTOM ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_BOTTOM ); FLA_Part_2x2( C, &CTL, &CTR, &CBL, &CBR, 0, 0, FLA_BR ); while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( AT, &A0, &A1, /* ** */ /* ** */ AB, &A2, b, FLA_TOP ); FLA_Repart_2x1_to_3x1( BT, &B0, &B1, /* ** */ /* ** */ BB, &B2, b, FLA_TOP ); FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02, &C10, &C11, /**/ &C12, /* ************* */ /* ******************** */ CBL, /**/ CBR, &C20, &C21, /**/ &C22, b, b, FLA_TL ); /*------------------------------------------------------------*/ /* C01 = C01 + A0 * B1' */ FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A0, B1, FLA_ONE, C01, FLA_Cntl_sub_gemm1( cntl ) ); /* C12 = C12 + B1 * A2' */ FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, B1, A2, FLA_ONE, C12, FLA_Cntl_sub_gemm2( cntl ) ); /* C11 = C11 + A1 * B1' + B1 * A1' */ FLA_Her2k_internal( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A1, B1, FLA_ONE, C11, FLA_Cntl_sub_her2k( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, /* ** */ /* ** */ A1, &AB, A2, FLA_BOTTOM ); FLA_Cont_with_3x1_to_2x1( &BT, B0, /* ** */ /* ** */ B1, &BB, B2, FLA_BOTTOM ); FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02, /* ************** */ /* ****************** */ C10, /**/ C11, C12, &CBL, /**/ &CBR, C20, /**/ C21, C22, FLA_BR ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:87,
示例20: FLA_Syrk_ln_blk_var1FLA_Error FLA_Syrk_ln_blk_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t* cntl ){ FLA_Obj AT, A0, AB, A1, A2; FLA_Obj CTL, CTR, C00, C01, C02, CBL, CBR, C10, C11, C12, C20, C21, C22; dim_t b; FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x2( C, &CTL, &CTR, &CBL, &CBR, 0, 0, FLA_TL ); while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* ** */ &A1, AB, &A2, b, FLA_BOTTOM ); FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, /* ************* */ /* ******************** */ &C10, /**/ &C11, &C12, CBL, /**/ CBR, &C20, /**/ &C21, &C22, b, b, FLA_BR ); /*------------------------------------------------------------*/ /* C10 = C10 + A1 * A0' */ FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, alpha, A1, A0, beta, C10, FLA_Cntl_sub_gemm( cntl ) ); /* C11 = C11 + A1 * A1' */ FLA_Syrk_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A1, beta, C11, FLA_Cntl_sub_syrk( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, A1, /* ** */ /* ** */ &AB, A2, FLA_TOP ); FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, C10, C11, /**/ C12, /* ************** */ /* ****************** */ &CBL, /**/ &CBR, C20, C21, /**/ C22, FLA_TL ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:62,
示例21: FLA_Lyap_n_blk_var1FLA_Error FLA_Lyap_n_blk_var1( FLA_Obj isgn, FLA_Obj A, FLA_Obj C, FLA_Obj scale, fla_lyap_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj CTL, CTR, C00, C01, C02, CBL, CBR, C10, C11, C12, C20, C21, C22; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_2x2( C, &CTL, &CTR, &CBL, &CBR, 0, 0, FLA_BR ); while ( FLA_Obj_length( CTL ) > 0 ){ b = FLA_Determine_blocksize( CTL, FLA_TL, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, &A10, &A11, /**/ &A12, /* ************* */ /* ******************** */ ABL, /**/ ABR, &A20, &A21, /**/ &A22, b, b, FLA_TL ); FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02, &C10, &C11, /**/ &C12, /* ************* */ /* ******************** */ CBL, /**/ CBR, &C20, &C21, /**/ &C22, b, b, FLA_TL ); /*------------------------------------------------------------*/ // C12 = isgn * C12 - A12 * C22; // C12 = sylv( A11, A22', C12 ); FLA_Hemm_internal( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_MINUS_ONE, C22, A12, isgn, C12, FLA_Cntl_sub_hemm( cntl ) ); FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A11, A22, C12, scale, FLA_Cntl_sub_sylv( cntl ) ); // C11 = isgn * C11 - A12 * C12' - C12 * A12'; FLA_Her2k_internal( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A12, C12, isgn, C11, FLA_Cntl_sub_her2k( cntl ) ); // C11 = lyap_n( A11, C11 ); FLA_Lyap_internal( FLA_NO_TRANSPOSE, FLA_ONE, A11, C11, scale, FLA_Cntl_sub_lyap( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, /* ************** */ /* ****************** */ A10, /**/ A11, A12, &ABL, /**/ &ABR, A20, /**/ A21, A22, FLA_BR ); FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02, /* ************** */ /* ****************** */ C10, /**/ C11, C12, &CBL, /**/ &CBR, C20, /**/ C21, C22, FLA_BR ); } return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:69,
示例22: FLA_UDdate_UT_blk_var2FLA_Error FLA_UDdate_UT_blk_var2( FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, fla_uddateut_t* cntl ){ FLA_Obj CT, C0, CB, C1, C2; FLA_Obj DT, D0, DB, D1, D2; FLA_Obj TT, T0, TB, T1, T2; dim_t b_C, b_D, b_T; FLA_Part_2x1( C, &CT, &CB, 0, FLA_TOP ); FLA_Part_2x1( D, &DT, &DB, 0, FLA_TOP ); FLA_Part_2x1( T, &TT, &TB, 0, FLA_TOP ); while ( FLA_Obj_length( CT ) < FLA_Obj_length( C ) && FLA_Obj_length( DT ) < FLA_Obj_length( D ) ) { b_C = FLA_Determine_blocksize( CB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); b_D = FLA_Determine_blocksize( DB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); b_T = FLA_Determine_blocksize( TB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( CT, &C0, /* ** */ /* ****** */ &C1, CB, &C2, b_C, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( DT, &D0, /* ** */ /* ****** */ &D1, DB, &D2, b_D, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( TT, &T0, /* ** */ /* ****** */ &T1, TB, &T2, b_T, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* Perform an up/downdate of the upper triangular Cholesky factor R via "UD" UT Householder transformations: [ R, ... C1, ... D1, T1 ] = FLA_UDdate_UT( R, ... C1, ... D1, T1 ); by updating R in such a way that removes the contributions of the rows in D1 while simultaneously adding new contributions to the factorization from the rows of C1. Note that C1 and D1 are also updated in the process. Also note that either C1 or D1 may become empty at any iteration. */ FLA_UDdate_UT_internal( R, C1, D1, T1, FLA_Cntl_sub_uddateut( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &CT, C0, C1, /* ** */ /* ****** */ &CB, C2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &DT, D0, D1, /* ** */ /* ****** */ &DB, D2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &TT, T0, T1, /* ** */ /* ****** */ &TB, T2, FLA_TOP ); } return FLA_SUCCESS;}
开发者ID:fmarrabal,项目名称:libflame,代码行数:92,
示例23: FLA_Gemm_nn_omp_var15FLA_Error FLA_Gemm_nn_omp_var15( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj C, fla_gemm_t* cntl ){ FLA_Obj AT, A0, AB, A1, A2; FLA_Obj CT, C0, CB, C1, C2; FLA_Obj AL, AR, A10, A11, A12; FLA_Obj BT, B0, BB, B1, B2; FLA_Obj C1_local; int i, j, lock_ldim, lock_i; int b_m, b_k; FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( C, &CT, &CB, 0, FLA_TOP ); #pragma intel omp parallel taskq { while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ) { b_m = FLA_Determine_blocksize( A, AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* ** */ &A1, AB, &A2, b_m, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( CT, &C0, /* ** */ /* ** */ &C1, CB, &C2, b_m, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* C1 = alpha * A1 * B + C1; */ FLA_Part_1x2( A1, &AL, &AR, 0, FLA_LEFT ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ) { b_k = FLA_Determine_blocksize( A, AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) ); // Get the index of the current partition. // FIX THIS: need + b_m - 1 or something like this //j = FLA_Obj_length( CT ) / b_m; //i = FLA_Obj_width( AL ) / b_k; //lock_ldim = FLA_get_num_threads_in_m_dim(omp_get_num_threads()); lock_i = FLA_Obj_length( CT ) / b_m; FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A10, /**/ &A11, &A12, b_k, FLA_RIGHT ); FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* ** */ &B1, BB, &B2, b_k, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* C1 = alpha * A11 * B1 + C1; */ //// FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, //// alpha, A11, B1, FLA_ONE, C1 ); #pragma intel omp task captureprivate( lock_i, A11, B1, C1 ), private( C1_local ) { FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C1, &C1_local ); FLA_Obj_set_to_zero( C1_local ); /* C1_local = alpha * A1 * B11 + C1_local; */ FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A11, B1, FLA_ONE, C1_local ); // Acquire lock[i] (the lock for C1). omp_set_lock( &fla_omp_lock[lock_i] ); /* C1 += C1_local */ FLA_Axpy_external( FLA_ONE, C1_local, C1 ); //FLA_Axpy_sync_pipeline2( j*lock_ldim, FLA_ONE, C1_local, C1 ); //FLA_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 ); //REF_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 ); // Release lock[i] (the lock for C1). omp_unset_lock( &fla_omp_lock[lock_i] ); FLA_Obj_free( &C1_local ); } //.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,
示例24: FLA_Eig_gest_il_blk_var2FLA_Error FLA_Eig_gest_il_blk_var2( FLA_Obj A, FLA_Obj Y, FLA_Obj B, fla_eig_gest_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj BTL, BTR, B00, B01, B02, BBL, BBR, B10, B11, B12, B20, B21, B22; FLA_Obj YL, YR, Y10, Y11, Y12; FLA_Obj Y10_t, Y10_b; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x2( B, &BTL, &BTR, &BBL, &BBR, 0, 0, FLA_TL ); FLA_Part_1x2( Y, &YL, &YR, 0, FLA_LEFT ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, /* ************* */ /* ******************** */ &A10, /**/ &A11, &A12, ABL, /**/ ABR, &A20, /**/ &A21, &A22, b, b, FLA_BR ); FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, /* ************* */ /* ******************** */ &B10, /**/ &B11, &B12, BBL, /**/ BBR, &B20, /**/ &B21, &B22, b, b, FLA_BR ); FLA_Repart_1x2_to_1x3( YL, /**/ YR, &Y10, /**/ &Y11, &Y12, b, FLA_RIGHT ); /*------------------------------------------------------------*/ FLA_Part_2x1( Y10, &Y10_t, &Y10_b, b, FLA_TOP ); // Y10 = 1/2 * B10 * A00; FLA_Hemm_internal( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_ONE_HALF, A00, B10, FLA_ZERO, Y10_t, FLA_Cntl_sub_hemm( cntl ) ); // A10 = A10 - Y10; FLA_Axpy_internal( FLA_MINUS_ONE, Y10_t, A10, FLA_Cntl_sub_axpy1( cntl ) ); // A11 = A11 - A10 * B10' - B10 * A10'; FLA_Her2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A10, B10, FLA_ONE, A11, FLA_Cntl_sub_her2k( cntl ) ); // A11 = inv( tril( B11 ) ) * A11 * inv( tril( B11 )' ); FLA_Eig_gest_internal( FLA_INVERSE, FLA_LOWER_TRIANGULAR, A11, Y11, B11, FLA_Cntl_sub_eig_gest( cntl ) ); // A21 = A21 - A20 * B10'; FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A20, B10, FLA_ONE, A21, FLA_Cntl_sub_gemm1( cntl ) ); // A21 = A21 * inv( tril( B11 )' ); FLA_Trsm_internal( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, B11, A21, FLA_Cntl_sub_trsm1( cntl ) ); // A10 = A10 - Y10; FLA_Axpy_internal( FLA_MINUS_ONE, Y10_t, A10, FLA_Cntl_sub_axpy2( cntl ) ); // A10 = inv( tril( B11 ) ) * A10; FLA_Trsm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, B11, A10, FLA_Cntl_sub_trsm2( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, A10, A11, /**/ A12, /* ************** */ /* ****************** */ &ABL, /**/ &ABR, A20, A21, /**/ A22, FLA_TL ); FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, B10, B11, /**/ B12, /* ************** */ /* ****************** *///.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
示例25: FLA_Symm_ru_blk_var4FLA_Error FLA_Symm_ru_blk_var4( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_symm_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj BL, BR, B0, B1, B2; FLA_Obj CL, CR, C0, C1, C2; dim_t b; FLA_Scal_internal( beta, C, FLA_Cntl_sub_scal( cntl ) ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, /* ************* */ /* ******************** */ &A10, /**/ &A11, &A12, ABL, /**/ ABR, &A20, /**/ &A21, &A22, b, b, FLA_BR ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C0 = C0 + B1 * A01' */ FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, alpha, B1, A01, FLA_ONE, C0, FLA_Cntl_sub_gemm1( cntl ) ); /* C1 = C1 + B1 * A11 */ FLA_Symm_internal( FLA_RIGHT, FLA_UPPER_TRIANGULAR, alpha, A11, B1, FLA_ONE, C1, FLA_Cntl_sub_symm( cntl ) ); /* C2 = C2 + B1 * A12 */ FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, B1, A12, FLA_ONE, C2, FLA_Cntl_sub_gemm2( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, A10, A11, /**/ A12, /* ************** */ /* ****************** */ &ABL, /**/ &ABR, A20, A21, /**/ A22, FLA_TL ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2, FLA_LEFT ); } return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:73,
示例26: FLA_Apply_Q_UT_rnfr_blk_var3FLA_Error FLA_Apply_Q_UT_rnfr_blk_var3( FLA_Obj A, FLA_Obj TW, FLA_Obj W, FLA_Obj B, fla_apqut_t* cntl ){ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj TWTL, TWTR, TW00, TW01, TW02, TWBL, TWBR, TW10, T11, W12, TW20, TW21, TW22; FLA_Obj WTL, WTR, WBL, WBR; FLA_Obj BL, BR, B0, B1, B2; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x2( TW, &TWTL, &TWTR, &TWBL, &TWBR, 0, 0, FLA_TL ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_min_dim( ABR ) > 0 ){ b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, /* ************* */ /* ******************** */ &A10, /**/ &A11, &A12, ABL, /**/ ABR, &A20, /**/ &A21, &A22, b, b, FLA_BR ); FLA_Repart_2x2_to_3x3( TWTL, /**/ TWTR, &TW00, /**/ &TW01, &TW02, /* *************** */ /* *********************** */ &TW10, /**/ &T11, &W12, TWBL, /**/ TWBR, &TW20, /**/ &TW21, &TW22, b, b, FLA_BR ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ FLA_Part_2x2( W, &WTL, &WTR, &WBL, &WBR, b, FLA_Obj_length( B1 ), FLA_TL ); // WTL = B1; FLA_Copyt_internal( FLA_TRANSPOSE, B1, WTL, FLA_Cntl_sub_copyt( cntl ) ); // U11 = trilu( A11 ); // U12 = A12; // Let WTL^T be conformal to B1. // // WTL^T = ( B1 * U11^T + B2 * U12^T ) * inv( triu(T11) ); // WTL = inv( triu(T11) )^T * ( U11 * B1^T + U12 * B2^T ); FLA_Trmm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, FLA_ONE, A11, WTL, FLA_Cntl_sub_trmm1( cntl ) ); FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A12, B2, FLA_ONE, WTL, FLA_Cntl_sub_gemm1( cntl ) ); FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, T11, WTL, FLA_Cntl_sub_trsm( cntl ) ); // B2 = B2 - WTL^T * conj(U12); // B1 = B1 - WTL^T * conj(U11); // = B1 - ( U11' * WTL )^T; FLA_Gemm_internal( FLA_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, WTL, A12, FLA_ONE, B2, FLA_Cntl_sub_gemm2( cntl ) ); FLA_Trmm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_UNIT_DIAG, FLA_MINUS_ONE, A11, WTL, FLA_Cntl_sub_trmm2( cntl ) ); FLA_Axpyt_internal( FLA_TRANSPOSE, FLA_ONE, WTL, B1, FLA_Cntl_sub_axpyt( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, A10, A11, /**/ A12, /* ************** */ /* ****************** */ &ABL, /**/ &ABR, A20, A21, /**/ A22, FLA_TL ); FLA_Cont_with_3x3_to_2x2( &TWTL, /**/ &TWTR, TW00, TW01, /**/ TW02,//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,
注:本文中的FLA_Determine_blocksize函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ FLA_FLOAT_PTR函数代码示例 C++ FLA_Copy_external函数代码示例 |