您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ FLA_Repart_2x1_to_3x1函数代码示例

51自学网 2021-06-01 20:42:08
  C++
这篇教程C++ FLA_Repart_2x1_to_3x1函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中FLA_Repart_2x1_to_3x1函数的典型用法代码示例。如果您正苦于以下问题:C++ FLA_Repart_2x1_to_3x1函数的具体用法?C++ FLA_Repart_2x1_to_3x1怎么用?C++ FLA_Repart_2x1_to_3x1使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了FLA_Repart_2x1_to_3x1函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: FLA_QR_UT_piv_blk_var2

FLA_Error FLA_QR_UT_piv_blk_var2( FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p, fla_qrut_t* cntl ){  FLA_Obj ATL,   ATR,      A00, A01, A02,           ABL,   ABR,      A10, A11, A12,                           A20, A21, A22;  FLA_Obj TL,    TR,       T0,  T1,  W12;  FLA_Obj TT,    TB;  FLA_Obj pT,              p0,          pB,              p1,                           p2;  FLA_Obj wT,              w0,          wB,              w1,                           w2;  dim_t   b_alg, b;  // Query the algorithmic blocksize by inspecting the length of T.  b_alg = FLA_Obj_length( T );  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );  FLA_Part_2x1( p,    &pT,                       &pB,            0, FLA_TOP );  FLA_Part_2x1( w,    &wT,                       &wB,            0, FLA_TOP );  while ( FLA_Obj_min_dim( ABR ) > 0 ){    b = min( b_alg, FLA_Obj_min_dim( ABR ) );    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,                        /* ************* */   /* ******************** */                                                &A10, /**/ &A11, &A12,                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,                           b, b, FLA_BR );    FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &W12,                           b, FLA_RIGHT );    FLA_Repart_2x1_to_3x1( pT,                &p0,                         /* ** */            /* ** */                                              &p1,                            pB,                &p2,        b, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( wT,                &w0,                         /* ** */            /* ** */                                              &w1,                            wB,                &w2,        b, FLA_BOTTOM );    /*------------------------------------------------------------*/    // ** Reshape T matrices to match the blocksize b    FLA_Part_2x1( TR,   &TT,                         &TB,    b, FLA_TOP );    // ** Perform a unblocked (BLAS2-oriented) QR factorization     // with pivoting via the UT transform on ABR:    //    //   ABR  -> QB1 R11    //    // where:    //  - QB1 is formed from UB1 (which is stored column-wise below the    //    diagonal of ( A11 A21 )^T and the upper-triangle of T1.     //  - R11 is stored to ( A11 A12 ).    //  - W12 stores  T and partial updates for FLA_Apply_Q_UT_piv_var.    FLA_QR_UT_piv_internal( ABR, TT, wB, p1,                             FLA_Cntl_sub_qrut( cntl ) );    if ( FLA_Obj_width( A12 ) > 0 )    {      // ** Block update      FLA_Part_2x1( W12,  &TT,                           &TB,    b, FLA_TOP );       FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,                          FLA_MINUS_ONE, A21, TT, FLA_ONE, A22 );    }    // ** Apply pivots to previous columns.    FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, p1, ATR );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,                                                     A10, A11, /**/ A12,                            /* ************** */  /* ****************** */                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,                              FLA_TL );    FLA_Cont_with_1x3_to_1x2( &TL,  /**/ &TR,        T0, T1, /**/ W12,                              FLA_LEFT );    FLA_Cont_with_3x1_to_2x1( &pT,                p0, //.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例2: FLASH_SA_LU

FLA_Error FLASH_SA_LU( FLA_Obj B, FLA_Obj C,                        FLA_Obj D, FLA_Obj E, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t* cntl ){   FLA_Obj DT,              D0,           DB,              D1,                            D2;   FLA_Obj ET,              E0,           EB,              E1,                            E2;   FLA_Obj pT,              p0,           pB,              p1,                            p2;   FLA_Obj LT,              L0,           LB,              L1,                            L2;   FLA_Part_2x1( D,    &DT,                       &DB,            0, FLA_TOP );   FLA_Part_2x1( E,    &ET,                       &EB,            0, FLA_TOP );   FLA_Part_2x1( p,    &pT,                       &pB,            0, FLA_TOP );   FLA_Part_2x1( L,    &LT,                       &LB,            0, FLA_TOP );   while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) )   {      FLA_Repart_2x1_to_3x1( DT,                &D0,                          /* ** */            /* ** */                                                &D1,                             DB,                &D2,        1, FLA_BOTTOM );      FLA_Repart_2x1_to_3x1( ET,                &E0,                          /* ** */            /* ** */                                                &E1,                             EB,                &E2,        1, FLA_BOTTOM );      FLA_Repart_2x1_to_3x1( pT,                &p0,                          /* ** */            /* ** */                                                &p1,                             pB,                &p2,        1, FLA_BOTTOM );      FLA_Repart_2x1_to_3x1( LT,                &L0,                          /* ** */            /* ** */                                                &L1,                             LB,                &L2,        1, FLA_BOTTOM );      /*------------------------------------------------------------*/      if ( FLASH_Queue_get_enabled( ) )      {         // Enqueue         ENQUEUE_FLASH_SA_LU( *FLASH_OBJ_PTR_AT( B ),                              *FLASH_OBJ_PTR_AT( D1 ),                              *FLASH_OBJ_PTR_AT( p1 ),                              *FLASH_OBJ_PTR_AT( L1 ),                              nb_alg,                              FLA_Cntl_sub_lu( cntl ) );      }      else      {         // Execute leaf         FLA_SA_LU_task( *FLASH_OBJ_PTR_AT( B ),                         *FLASH_OBJ_PTR_AT( D1 ),                         *FLASH_OBJ_PTR_AT( p1 ),                         *FLASH_OBJ_PTR_AT( L1 ),                         nb_alg,                         FLA_Cntl_sub_lu( cntl ) );      }            FLASH_SA_FS( L1,                   D1, p1, C,                           E1, nb_alg, FLA_Cntl_sub_gemm1( cntl ) );      /*------------------------------------------------------------*/      FLA_Cont_with_3x1_to_2x1( &DT,                D0,                                                    D1,                              /* ** */           /* ** */                                &DB,                D2,     FLA_TOP );      FLA_Cont_with_3x1_to_2x1( &ET,                E0,                                                    E1,                              /* ** */           /* ** */                                &EB,                E2,     FLA_TOP );      FLA_Cont_with_3x1_to_2x1( &pT,                p0,                                                    p1,                              /* ** */           /* ** */                                &pB,                p2,     FLA_TOP );      FLA_Cont_with_3x1_to_2x1( &LT,                L0,                                                    L1,                              /* ** */           /* ** *///.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例3: FLA_Her2k_ln_unb_var6

FLA_Error FLA_Her2k_ln_unb_var6( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ){  FLA_Obj AT,              A0,          AB,              a1t,                           A2;  FLA_Obj BT,              B0,          BB,              b1t,                           B2;  FLA_Obj CTL,   CTR,      C00,  c01,     C02,           CBL,   CBR,      c10t, gamma11, c12t,                           C20,  c21,     C22;  FLA_Scalr_external( FLA_LOWER_TRIANGULAR, beta, C );  FLA_Part_2x1( A,    &AT,                       &AB,            0, FLA_BOTTOM );  FLA_Part_2x1( B,    &BT,                       &BB,            0, FLA_BOTTOM );  FLA_Part_2x2( C,    &CTL, &CTR,                      &CBL, &CBR,     0, 0, FLA_BR );  while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){    FLA_Repart_2x1_to_3x1( AT,                &A0,                                               &a1t,                         /* ** */            /* ** */                           AB,                &A2,        1, FLA_TOP );    FLA_Repart_2x1_to_3x1( BT,                &B0,                                               &b1t,                         /* ** */            /* ** */                           BB,                &B2,        1, FLA_TOP );    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00,  &c01,     /**/ &C02,                                                &c10t, &gamma11, /**/ &c12t,                        /* ************* */   /* ************************** */                           CBL, /**/ CBR,       &C20,  &c21,     /**/ &C22,                           1, 1, FLA_TL );    /*------------------------------------------------------------*/    /* c10t = c10t + a1t * B0' */    FLA_Gemv_external( FLA_CONJ_NO_TRANSPOSE, alpha, B0, a1t, FLA_ONE, c10t );    /* c21 = c21 + B2 * a1t' */    FLA_Gemvc_external( FLA_NO_TRANSPOSE, FLA_CONJUGATE, alpha, B2, a1t, FLA_ONE, c21 );        /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */    FLA_Dot2cs_external( FLA_CONJUGATE, alpha, a1t, b1t, FLA_ONE, gamma11 );    /*------------------------------------------------------------*/    FLA_Cont_with_3x1_to_2x1( &AT,                A0,                             /* ** */           /* ** */                                                  a1t,                               &AB,                A2,     FLA_BOTTOM );    FLA_Cont_with_3x1_to_2x1( &BT,                B0,                             /* ** */           /* ** */                                                  b1t,                               &BB,                B2,     FLA_BOTTOM );    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00,  /**/ c01,     C02,                            /* ************** */  /* ************************ */                                                     c10t, /**/ gamma11, c12t,                              &CBL, /**/ &CBR,       C20,  /**/ c21,     C22,                              FLA_BR );  }  return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:78,


示例4: FLA_UDdate_UT_blk_var2

FLA_Error FLA_UDdate_UT_blk_var2( FLA_Obj R,                                  FLA_Obj C,                                  FLA_Obj D, FLA_Obj T, fla_uddateut_t* cntl ){    FLA_Obj CT,              C0,            CB,              C1,            C2;    FLA_Obj DT,              D0,            DB,              D1,            D2;    FLA_Obj TT,              T0,            TB,              T1,            T2;    dim_t   b_C, b_D, b_T;    FLA_Part_2x1( C,    &CT,                  &CB,            0, FLA_TOP );    FLA_Part_2x1( D,    &DT,                  &DB,            0, FLA_TOP );    FLA_Part_2x1( T,    &TT,                  &TB,            0, FLA_TOP );    while ( FLA_Obj_length( CT ) < FLA_Obj_length( C ) &&            FLA_Obj_length( DT ) < FLA_Obj_length( D ) ) {        b_C = FLA_Determine_blocksize( CB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );        b_D = FLA_Determine_blocksize( DB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );        b_T = FLA_Determine_blocksize( TB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );        FLA_Repart_2x1_to_3x1( CT,                &C0,                               /* ** */          /* ****** */                               &C1,                               CB,                &C2,        b_C, FLA_BOTTOM );        FLA_Repart_2x1_to_3x1( DT,                &D0,                               /* ** */          /* ****** */                               &D1,                               DB,                &D2,        b_D, FLA_BOTTOM );        FLA_Repart_2x1_to_3x1( TT,                &T0,                               /* ** */          /* ****** */                               &T1,                               TB,                &T2,        b_T, FLA_BOTTOM );        /*------------------------------------------------------------*/        /*           Perform an up/downdate of the upper triangular Cholesky factor R via           "UD" UT Householder transformations:             [ R, ...               C1, ...               D1, T1 ] = FLA_UDdate_UT( R, ...                                         C1, ...                                         D1, T1 );           by updating R in such a way that removes the contributions of the rows           in D1 while simultaneously adding new contributions to the factorization           from the rows of C1. Note that C1 and D1 are also updated in the process.           Also note that either C1 or D1 may become empty at any iteration.        */        FLA_UDdate_UT_internal( R,                                C1,                                D1, T1,                                FLA_Cntl_sub_uddateut( cntl ) );        /*------------------------------------------------------------*/        FLA_Cont_with_3x1_to_2x1( &CT,                C0,                                  C1,                                  /* ** */          /* ****** */                                  &CB,                C2,     FLA_TOP );        FLA_Cont_with_3x1_to_2x1( &DT,                D0,                                  D1,                                  /* ** */          /* ****** */                                  &DB,                D2,     FLA_TOP );        FLA_Cont_with_3x1_to_2x1( &TT,                T0,                                  T1,                                  /* ** */          /* ****** */                                  &TB,                T2,     FLA_TOP );    }    return FLA_SUCCESS;}
开发者ID:fmarrabal,项目名称:libflame,代码行数:92,


示例5: Symm_unb_var6

int Symm_unb_var6( FLA_Obj A, FLA_Obj B, FLA_Obj C ){  FLA_Obj ATL,   ATR,      A00,  a01,     A02,           ABL,   ABR,      a10t, alpha11, a12t,          A20,  a21,     A22;  FLA_Obj BT,              B0,          BB,              b1t,          B2;  FLA_Obj CT,              C0,          CB,              c1t,          C2;  FLA_Part_2x2( A,    &ATL, &ATR,      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_2x1( B,    &BT,       &BB,            0, FLA_TOP );  FLA_Part_2x1( C,    &CT,       &CB,            0, FLA_TOP );  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,        /* ************* */   /* ************************** */        &a10t, /**/ &alpha11, &a12t,        ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,        1, 1, FLA_BR );    FLA_Repart_2x1_to_3x1( BT,                &B0,         /* ** */            /* *** */        &b1t,         BB,                &B2,        1, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( CT,                &C0,         /* ** */            /* *** */        &c1t,         CB,                &C2,        1, FLA_BOTTOM );    /*------------------------------------------------------------*/        // c1t = c1t + a10t*B0 + alpha11*b1t + a21t*B2;    FLA_Gemv(FLA_TRANSPOSE, FLA_ONE, B0, a10t, FLA_ONE, c1t);    FLA_Gemv(FLA_TRANSPOSE, FLA_ONE, B2, a21, FLA_ONE, c1t);    FLA_Axpy(alpha11, b1t, c1t);    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,        a10t, alpha11, /**/ a12t,        /* ************** */  /* ************************ */        &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,        FLA_TL );    FLA_Cont_with_3x1_to_2x1( &BT,                B0,         b1t,         /* ** */           /* *** */        &BB,                B2,     FLA_TOP );    FLA_Cont_with_3x1_to_2x1( &CT,                C0,         c1t,         /* ** */           /* *** */        &CB,                C2,     FLA_TOP );  }  return FLA_SUCCESS;}
开发者ID:ebeweber,项目名称:CS378-Invariant-Project,代码行数:70,


示例6: FLA_Syrk_ln_omp2l_var3

FLA_Error FLA_Syrk_ln_omp2l_var3( FLA_Obj A, FLA_Obj C ){  FLA_Obj AT,              A0,          AB,              A1,                           A2;  FLA_Obj CTL,   CTR,      C00, C01, C02,           CBL,   CBR,      C10, C11, C12,                           C20, C21, C22;  int b;  FLA_Part_2x1( A,    &AT,                       &AB,            0, FLA_BOTTOM );  FLA_Part_2x2( C,    &CTL, &CTR,                      &CBL, &CBR,     0, 0, FLA_BR );  #pragma intel omp parallel taskq  {  while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){    b = FLA_Task_compute_blocksize( 0, A, AB, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( AT,                &A0,                                               &A1,                         /* ** */            /* ** */                           AB,                &A2,        b, FLA_TOP );    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00, &C01, /**/ &C02,                                                &C10, &C11, /**/ &C12,                        /* ************* */   /* ******************** */                           CBL, /**/ CBR,       &C20, &C21, /**/ &C22,                           b, b, FLA_TL );    /*------------------------------------------------------------*/    #pragma intel omp task captureprivate(A1, A2, C21)    {    /* C21 = C21 + A2 * A1' */    FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, A2, A1, FLA_ONE, C21 );    }    /*------------------------------------------------------------*/    FLA_Cont_with_3x1_to_2x1( &AT,                A0,                             /* ** */           /* ** */                                                  A1,                               &AB,                A2,     FLA_BOTTOM );    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00, /**/ C01, C02,                            /* ************** */  /* ****************** */                                                     C10, /**/ C11, C12,                              &CBL, /**/ &CBR,       C20, /**/ C21, C22,                              FLA_BR );  }  FLA_Part_2x1( A,    &AT,                       &AB,            0, FLA_BOTTOM );  FLA_Part_2x2( C,    &CTL, &CTR,                      &CBL, &CBR,     0, 0, FLA_BR );  while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){    b = FLA_Task_compute_blocksize( 0, A, AB, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( AT,                &A0,                                               &A1,                         /* ** */            /* ** */                           AB,                &A2,        b, FLA_TOP );    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00, &C01, /**/ &C02,                                                &C10, &C11, /**/ &C12,                        /* ************* */   /* ******************** */                           CBL, /**/ CBR,       &C20, &C21, /**/ &C22,                           b, b, FLA_TL );    /*------------------------------------------------------------*/    #pragma intel omp task captureprivate(C11, A1)    {    /* C11 = C11 + A1 * A1' */    FLA_Syrk_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, A1, FLA_ONE, C11 );    }    /*------------------------------------------------------------*/    FLA_Cont_with_3x1_to_2x1( &AT,                A0,                             /* ** */           /* ** */                                                  A1,                               &AB,                A2,     FLA_BOTTOM );    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00, /**/ C01, C02,                            /* ************** */  /* ****************** */                                                     C10, /**/ C11, C12,                              &CBL, /**/ &CBR,       C20, /**/ C21, C22,                              FLA_BR );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例7: FLA_Trsv_un_blk_var1

FLA_Error FLA_Trsv_un_blk_var1( FLA_Diag diagA, FLA_Obj A, FLA_Obj x, fla_trsv_t* cntl ){  FLA_Obj ATL,   ATR,      A00, A01, A02,           ABL,   ABR,      A10, A11, A12,                           A20, A21, A22;  FLA_Obj xT,              x0,          xB,              x1,                           x2;  dim_t b;  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_BR );  FLA_Part_2x1( x,    &xT,                       &xB,            0, FLA_BOTTOM );  while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){    b = FLA_Determine_blocksize( ATL, FLA_TL, FLA_Cntl_blocksize( cntl ) );    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, &A01, /**/ &A02,                                                &A10, &A11, /**/ &A12,                        /* ************* */   /* ******************** */                           ABL, /**/ ABR,       &A20, &A21, /**/ &A22,                           b, b, FLA_TL );    FLA_Repart_2x1_to_3x1( xT,                &x0,                                               &x1,                         /* ** */            /* ** */                           xB,                &x2,        b, FLA_TOP );    /*------------------------------------------------------------*/    /* x1 = x1 - A12 * x2; */    FLA_Gemv_internal( FLA_NO_TRANSPOSE,                        FLA_MINUS_ONE, A12, x2, FLA_ONE, x1,                       FLA_Cntl_sub_gemv( cntl ) );    /* x1 = triu( A11 ) / x1; */    FLA_Trsv_internal( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diagA,                       A11, x1,                       FLA_Cntl_sub_trsv( cntl ) );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, /**/ A01, A02,                            /* ************** */  /* ****************** */                                                     A10, /**/ A11, A12,                              &ABL, /**/ &ABR,       A20, /**/ A21, A22,                              FLA_BR );    FLA_Cont_with_3x1_to_2x1( &xT,                x0,                             /* ** */           /* ** */                                                  x1,                               &xB,                x2,     FLA_BOTTOM );  }  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:62,


示例8: FLA_Apply_QUD_UT_lhfc_blk_var1

FLA_Error FLA_Apply_QUD_UT_lhfc_blk_var1( FLA_Obj T, FLA_Obj W,                                                     FLA_Obj R,                                          FLA_Obj U, FLA_Obj C,                                          FLA_Obj V, FLA_Obj D, fla_apqudut_t* cntl ){  FLA_Obj TL,    TR,       T0,  T1,  T2;  FLA_Obj UL,    UR,       U0,  U1,  U2;  FLA_Obj VL,    VR,       V0,  V1,  V2;  FLA_Obj RT,              R0,          RB,              R1,                           R2;  FLA_Obj T1T,          T1B;  FLA_Obj W1TL,  W1TR,          W1BL,  W1BR;  dim_t   b_alg, b;  // Query the algorithmic blocksize by inspecting the length of T.  b_alg = FLA_Obj_length( T );  FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );  FLA_Part_1x2( U,    &UL,  &UR,      0, FLA_LEFT );  FLA_Part_1x2( V,    &VL,  &VR,      0, FLA_LEFT );  FLA_Part_2x1( R,    &RT,                       &RB,            0, FLA_TOP );  while ( FLA_Obj_width( UL ) < FLA_Obj_width( U ) ){    b = min( b_alg, FLA_Obj_width( UR ) );    FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &T2,                           b, FLA_RIGHT );    FLA_Repart_1x2_to_1x3( UL,  /**/ UR,        &U0, /**/ &U1, &U2,                           b, FLA_RIGHT );    FLA_Repart_1x2_to_1x3( VL,  /**/ VR,        &V0, /**/ &V1, &V2,                           b, FLA_RIGHT );    FLA_Repart_2x1_to_3x1( RT,                &R0,                         /* ** */            /* ** */                                              &R1,                            RB,                &R2,        b, FLA_BOTTOM );    /*------------------------------------------------------------*/    FLA_Part_2x1( T1,    &T1T,                         &T1B,    b, FLA_TOP );    FLA_Part_2x2( W,     &W1TL, &W1TR,                         &W1BL, &W1BR,     b, FLA_Obj_width( R1 ), FLA_TL );    // W1TL = R1;    FLA_Copyt_internal( FLA_NO_TRANSPOSE, R1, W1TL,                        FLA_Cntl_sub_copyt( cntl ) );    // W1TL = inv( triu( T1T ) )' * ( R1 + U1' * C + V1' * D );    FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,                        FLA_ONE, U1, C, FLA_ONE, W1TL,                       FLA_Cntl_sub_gemm1( cntl ) );    FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,                        FLA_ONE, V1, D, FLA_ONE, W1TL,                       FLA_Cntl_sub_gemm2( cntl ) );    FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR,                       FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,                       FLA_ONE, T1T, W1TL,                       FLA_Cntl_sub_trsm( cntl ) );    // R1 = R1 - W1TL;    // C  = C  - U1 * W1TL;    // D  = D  + V1 * W1TL;    FLA_Axpyt_internal( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, W1TL, R1,                        FLA_Cntl_sub_axpyt( cntl ) );    FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,                       FLA_MINUS_ONE, U1, W1TL, FLA_ONE, C,                       FLA_Cntl_sub_gemm3( cntl ) );    FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,                       FLA_ONE, V1, W1TL, FLA_ONE, D,                       FLA_Cntl_sub_gemm4( cntl ) );    /*------------------------------------------------------------*/    FLA_Cont_with_1x3_to_1x2( &TL,  /**/ &TR,        T0, T1, /**/ T2,                              FLA_LEFT );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例9: Symm_blk_var8

int Symm_blk_var8( FLA_Obj A, FLA_Obj B, FLA_Obj C, int nb_alg ){  FLA_Obj ATL,   ATR,      A00, A01, A02,           ABL,   ABR,      A10, A11, A12,                           A20, A21, A22;  FLA_Obj BT,              B0,          BB,              B1,                           B2;  FLA_Obj CT,              C0,          CB,              C1,                           C2;  int b;  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_2x1( B,    &BT,                       &BB,            0, FLA_TOP );  FLA_Part_2x1( C,    &CT,                       &CB,            0, FLA_TOP );  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){    b = min( FLA_Obj_length( ABR ), nb_alg );    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,                        /* ************* */   /* ******************** */                                                &A10, /**/ &A11, &A12,                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,                           b, b, FLA_BR );    FLA_Repart_2x1_to_3x1( BT,                &B0,                         /* ** */            /* ** */                                              &B1,                            BB,                &B2,        b, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( CT,                &C0,                         /* ** */            /* ** */                                              &C1,                            CB,                &C2,        b, FLA_BOTTOM );    /*------------------------------------------------------------*/    // C0 = C0 + A10^T*B1;    FLA_Gemm(FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A10, B1, FLA_ONE, C0);        // C1 = C1 + A10*B0 + A11*B1;    FLA_Gemm(FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A10, B0, FLA_ONE, C1);    // FLA_Gemm(FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A11, B1, FLA_ONE, C1);    FLA_Symm(FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_ONE, A11, B1, FLA_ONE, C1);    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,                                                     A10, A11, /**/ A12,                            /* ************** */  /* ****************** */                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,                              FLA_TL );    FLA_Cont_with_3x1_to_2x1( &BT,                B0,                                                   B1,                             /* ** */           /* ** */                              &BB,                B2,     FLA_TOP );    FLA_Cont_with_3x1_to_2x1( &CT,                C0,                                                   C1,                             /* ** */           /* ** */                              &CB,                C2,     FLA_TOP );  }  return FLA_SUCCESS;}
开发者ID:ebeweber,项目名称:CS378-Invariant-Project,代码行数:77,


示例10: FLA_Gemm_pp_nn_var1

FLA_Error FLA_Gemm_pp_nn_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj C, int nb_alg ){  FLA_Obj AT,              A0,          AB,              A1,                           A2;  FLA_Obj CT,              C0,          CB,              C1,                           C2;  FLA_Obj packed_C1;  int b;  FLA_Part_2x1( A,    &AT,                       &AB,            0, FLA_TOP );  FLA_Part_2x1( C,    &CT,                       &CB,            0, FLA_TOP );  /* Initialize the FLA_Gemm() interface to the kernel environment     Note: the blocksize given to the kernel environment can be non-     square. We pass the m and n dimensions of the blocksize here. */  FLA_Gemm_init( nb_alg, FLA_Obj_width( A ) );  /* Pack B */  /* Note: the idea here is that, optionally,     -  B is packed, and/or     -  B is scaled                                         If B needs not be packed, it is not packed.  If the multiplication     by alpha happens elsewhere, no scaling occurs.     The "NoTranspose" means that in the version of gemm being updated     B is not transposed.  In packing, B could be transposed, if there     is an advantage to this.  So, the "NoTranspose" means that input     B is not transposed in the FLA_Gemm call.  */   FLA_Gemm_pack_andor_scale_B( FLA_NO_TRANSPOSE, alpha, B );  while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){    b = min( FLA_Obj_length( AB ), nb_alg );    FLA_Repart_2x1_to_3x1( AT,                &A0,                         /* ** */            /* ** */                                              &A1,                            AB,                &A2,        b, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( CT,                &C0,                         /* ** */            /* ** */                                              &C1,                            CB,                &C2,        b, FLA_BOTTOM );    /*------------------------------------------------------------*/    /* C1 = alpha * A1 * B + C1; */    /* Pack C1 */    /* Note: the idea here is that, optionally,       -  packed space is provided for computing packed_C1 = alpha * A1 * B        If C1 needs not be packed, then C can just be returned by this routine. */    FLA_Gemm_pack_C( FLA_NO_TRANSPOSE, C1 );    /* Pack A */    /* Note: the idea here is that, optionally,        -  A is packed, and/or        -  A is scaled                                    	If A needs not be packed, it is not packed.  If the multiplication	by alpha happens elsewhere, no scaling occurs. */    FLA_Gemm_pack_andor_scale_A( FLA_NO_TRANSPOSE, alpha, A1 );    /* Call the kernel routine */    FLA_Gemm_kernel( alpha, A1, B, C1 );    /* Unpack C1 */    /* Note: the idea here is that, optionally,       -  packed_C1 is added to C1, possibly scaled at this point. */    FLA_Gemm_unpack_andor_scale_C( FLA_NO_TRANSPOSE, alpha, C1 );    /*------------------------------------------------------------*/    FLA_Cont_with_3x1_to_2x1( &AT,                A0,                                                   A1,                             /* ** */           /* ** */                              &AB,                A2,     FLA_TOP );    FLA_Cont_with_3x1_to_2x1( &CT,                C0,                                                   C1,                             /* ** */           /* ** */                              &CB,                C2,     FLA_TOP );  }  /* Release the space used to pack A1 */  /* Note: notice that the space provided for A1 can be recycled      everytime through the loop, which is why this call is outside the loop.      If the space is statically allocated, or A1 was not packed,     this could be a no-op. */  FLA_Gemm_release_pack_A( FLA_NO_TRANSPOSE, A1 );//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例11: FLA_Syr2k_un_unb_var4

FLA_Error FLA_Syr2k_un_unb_var4( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ){    FLA_Obj AT,              A0,            AB,              a1t,            A2;    FLA_Obj BT,              B0,            BB,              b1t,            B2;    FLA_Obj CTL,   CTR,      C00,  c01,     C02,            CBL,   CBR,      c10t, gamma11, c12t,            C20,  c21,     C22;    FLA_Scalr_external( FLA_UPPER_TRIANGULAR, beta, C );    FLA_Part_2x1( A,    &AT,                  &AB,            0, FLA_TOP );    FLA_Part_2x1( B,    &BT,                  &BB,            0, FLA_TOP );    FLA_Part_2x2( C,    &CTL, &CTR,                  &CBL, &CBR,     0, 0, FLA_TL );    while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ) {        FLA_Repart_2x1_to_3x1( AT,                &A0,                               /* ** */            /* ** */                               &a1t,                               AB,                &A2,        1, FLA_BOTTOM );        FLA_Repart_2x1_to_3x1( BT,                &B0,                               /* ** */            /* ** */                               &b1t,                               BB,                &B2,        1, FLA_BOTTOM );        FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00,  /**/ &c01,     &C02,                               /* ************* */   /* ************************** */                               &c10t, /**/ &gamma11, &c12t,                               CBL, /**/ CBR,       &C20,  /**/ &c21,     &C22,                               1, 1, FLA_BR );        /*------------------------------------------------------------*/        /* c01 = c01 + A0 * b1t' */        FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A0, b1t, FLA_ONE, c01 );        /* c01 = c01 + B0 * a1t' */        FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B0, a1t, FLA_ONE, c01 );        /* gamma11 = gamma11 + a1t * b1t' + b1t * a1t' */        FLA_Dot2s_external( alpha, a1t, b1t, FLA_ONE, gamma11 );        /*------------------------------------------------------------*/        FLA_Cont_with_3x1_to_2x1( &AT,                A0,                                  a1t,                                  /* ** */           /* ** */                                  &AB,                A2,     FLA_TOP );        FLA_Cont_with_3x1_to_2x1( &BT,                B0,                                  b1t,                                  /* ** */           /* ** */                                  &BB,                B2,     FLA_TOP );        FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00,  c01,     /**/ C02,                                  c10t, gamma11, /**/ c12t,                                  /* ************** */  /* ************************ */                                  &CBL, /**/ &CBR,       C20,  c21,     /**/ C22,                                  FLA_TL );    }    return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:78,


示例12: FLASH_Axpy_hierarchy

FLA_Error FLASH_Axpy_hierarchy( int direction, FLA_Obj alpha, FLA_Obj F, FLA_Obj* H ){	// Once we get down to a submatrix whose elements are scalars, we are down	// to our base case.	if ( FLA_Obj_elemtype( *H ) == FLA_SCALAR )	{		// Depending on which top-level function invoked us, we either axpy		// the source data in the flat matrix to the leaf-level submatrix of		// the hierarchical matrix, or axpy the data in the hierarchical		// submatrix to the flat matrix.		if      ( direction == FLA_FLAT_TO_HIER )		{#ifdef FLA_ENABLE_SCC			if ( FLA_is_owner() )#endif			FLA_Axpy_external( alpha, F, *H );		}		else if ( direction == FLA_HIER_TO_FLAT )		{#ifdef FLA_ENABLE_SCC			if ( FLA_is_owner() )#endif			FLA_Axpy_external( alpha, *H, F );		}	}	else	{		FLA_Obj HL,  HR,       H0,  H1,  H2;		FLA_Obj FL,  FR,       F0,  F1,  F2;		FLA_Obj H1T,           H01,		        H1B,           H11,		                       H21;		FLA_Obj F1T,           F01,		        F1B,           F11,		                       F21;		dim_t b_m;		dim_t b_n;		FLA_Part_1x2( *H,    &HL,  &HR,      0, FLA_LEFT );		FLA_Part_1x2(  F,    &FL,  &FR,      0, FLA_LEFT );		while ( FLA_Obj_width( HL ) < FLA_Obj_width( *H ) )		{			FLA_Repart_1x2_to_1x3( HL,  /**/ HR,        &H0, /**/ &H1, &H2,			                       1, FLA_RIGHT );			// Get the scalar width of H1 and use that to determine the			// width of F1.			b_n = FLASH_Obj_scalar_width( H1 );			FLA_Repart_1x2_to_1x3( FL,  /**/ FR,        &F0, /**/ &F1, &F2,			                       b_n, FLA_RIGHT );			// -------------------------------------------------------------			FLA_Part_2x1( H1,    &H1T,			                     &H1B,       0, FLA_TOP );			FLA_Part_2x1( F1,    &F1T,			                     &F1B,       0, FLA_TOP );			while ( FLA_Obj_length( H1T ) < FLA_Obj_length( H1 ) )			{				FLA_Repart_2x1_to_3x1( H1T,               &H01,				                    /* ** */            /* *** */				                                          &H11,				                       H1B,               &H21,        1, FLA_BOTTOM );				// Get the scalar length of H11 and use that to determine the				// length of F11.				b_m = FLASH_Obj_scalar_length( H11 );				FLA_Repart_2x1_to_3x1( F1T,               &F01,				                    /* ** */            /* *** */				                                          &F11,				                       F1B,               &F21,        b_m, FLA_BOTTOM );				// -------------------------------------------------------------				// Recursively axpy between F11 and H11.				FLASH_Axpy_hierarchy( direction, alpha, F11,				                      FLASH_OBJ_PTR_AT( H11 ) );				// -------------------------------------------------------------				FLA_Cont_with_3x1_to_2x1( &H1T,               H01,				                                              H11,				                        /* ** */           /* *** */				                          &H1B,               H21,     FLA_TOP );				FLA_Cont_with_3x1_to_2x1( &F1T,               F01,				                                              F11,				                        /* ** */           /* *** */				                          &F1B,               F21,     FLA_TOP );			}			// -------------------------------------------------------------			FLA_Cont_with_1x3_to_1x2( &HL,  /**/ &HR,        H0, H1, /**/ H2,			                          FLA_LEFT );			FLA_Cont_with_1x3_to_1x2( &FL,  /**/ &FR,        F0, F1, /**/ F2,//.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例13: FLA_Sylv_nh_blk_var16

FLA_Error FLA_Sylv_nh_blk_var16( FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t* cntl ){  FLA_Obj ATL,   ATR,      A00, A01, A02,           ABL,   ABR,      A10, A11, A12,                           A20, A21, A22;  FLA_Obj CT,              C0,          CB,              C1,                           C2;  dim_t b;  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_BR );  FLA_Part_2x1( C,    &CT,                       &CB,            0, FLA_BOTTOM );  while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){    b = FLA_Determine_blocksize( CT, FLA_TOP, FLA_Cntl_blocksize( cntl ) );    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, &A01, /**/ &A02,                                                &A10, &A11, /**/ &A12,                        /* ************* */   /* ******************** */                           ABL, /**/ ABR,       &A20, &A21, /**/ &A22,                           b, b, FLA_TL );    FLA_Repart_2x1_to_3x1( CT,                &C0,                                               &C1,                         /* ** */            /* ** */                           CB,                &C2,        b, FLA_TOP );    // Loop Invariant:    // CT = CT - ATR * sylv( ABR, B', CB )    // CB = sylv( ABR, B', CB )    /*------------------------------------------------------------*/    // C1 = sylv( A11, B', C1 );    FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,                        isgn, A11, B, C1, scale,                       FLA_Cntl_sub_sylv1( cntl ) );    // C0 = C0 - A01 * C1;    FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,                       FLA_MINUS_ONE, A01, C1, FLA_ONE, C0,                       FLA_Cntl_sub_gemm1( cntl ) );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, /**/ A01, A02,                            /* ************** */  /* ****************** */                                                     A10, /**/ A11, A12,                              &ABL, /**/ &ABR,       A20, /**/ A21, A22,                              FLA_BR );    FLA_Cont_with_3x1_to_2x1( &CT,                C0,                             /* ** */           /* ** */                                                  C1,                               &CB,                C2,     FLA_BOTTOM );  }  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:66,


示例14: Symm_blk_var1

int Symm_blk_var1( FLA_Obj A, FLA_Obj B, FLA_Obj C, int nb_alg ){  FLA_Obj ATL,   ATR,      A00, A01, A02,           ABL,   ABR,      A10, A11, A12,                           A20, A21, A22;  FLA_Obj BT,              B0,          BB,              B1,                           B2;  FLA_Obj CT,              C0,          CB,              C1,                           C2;  int b;  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_2x1( B,    &BT,                       &BB,            0, FLA_TOP );  FLA_Part_2x1( C,    &CT,                       &CB,            0, FLA_TOP );  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){    b = min( FLA_Obj_length( ABR ), nb_alg );    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,                        /* ************* */   /* ******************** */                                                &A10, /**/ &A11, &A12,                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,                           b, b, FLA_BR );    FLA_Repart_2x1_to_3x1( BT,                &B0,                         /* ** */            /* ** */                                              &B1,                            BB,                &B2,        b, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( CT,                &C0,                         /* ** */            /* ** */                                              &C1,                            CB,                &C2,        b, FLA_BOTTOM );    /*------------------------------------------------------------*/    C1 = C1 + A10*B0 + A11*B1;	C0 = C0 + A10*B1;    /*                       update line 1                        */    /*                             :                              */    /*                       update line n                        */    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,                                                     A10, A11, /**/ A12,                            /* ************** */  /* ****************** */                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,                              FLA_TL );    FLA_Cont_with_3x1_to_2x1( &BT,                B0,                                                   B1,                             /* ** */           /* ** */                              &BB,                B2,     FLA_TOP );    FLA_Cont_with_3x1_to_2x1( &CT,                C0,                                                   C1,                             /* ** */           /* ** */                              &CB,                C2,     FLA_TOP );  }  return FLA_SUCCESS;}
开发者ID:ebeweber,项目名称:CS378-Invariant-Project,代码行数:74,


示例15: FLA_Syrk_un_blk_var3

FLA_Error FLA_Syrk_un_blk_var3( FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t* cntl ){  FLA_Obj AT,              A0,          AB,              A1,                           A2;  FLA_Obj CTL,   CTR,      C00, C01, C02,           CBL,   CBR,      C10, C11, C12,                           C20, C21, C22;  dim_t b;  FLA_Part_2x1( A,    &AT,                       &AB,            0, FLA_BOTTOM );  FLA_Part_2x2( C,    &CTL, &CTR,                      &CBL, &CBR,     0, 0, FLA_BR );  while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){    b = FLA_Determine_blocksize( AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) );    FLA_Repart_2x1_to_3x1( AT,                &A0,                                               &A1,                         /* ** */            /* ** */                           AB,                &A2,        b, FLA_TOP );    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00, &C01, /**/ &C02,                                                &C10, &C11, /**/ &C12,                        /* ************* */   /* ******************** */                           CBL, /**/ CBR,       &C20, &C21, /**/ &C22,                           b, b, FLA_TL );    /*------------------------------------------------------------*/    /* C12 = C12 + A1 * A2' */    FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE,                        alpha, A1, A2, beta, C12,                       FLA_Cntl_sub_gemm( cntl ) );    /* C11 = C11 + A1 * A1' */    FLA_Syrk_internal( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,                        alpha, A1, beta, C11,                       FLA_Cntl_sub_syrk( cntl ) );    /*------------------------------------------------------------*/    FLA_Cont_with_3x1_to_2x1( &AT,                A0,                             /* ** */           /* ** */                                                  A1,                               &AB,                A2,     FLA_BOTTOM );    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00, /**/ C01, C02,                            /* ************** */  /* ****************** */                                                     C10, /**/ C11, C12,                              &CBL, /**/ &CBR,       C20, /**/ C21, C22,                              FLA_BR );  }  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:62,


示例16: FLA_Fill_with_logarithmic_dist

FLA_Error FLA_Fill_with_logarithmic_dist( FLA_Obj alpha, FLA_Obj x ){	FLA_Obj      lT,              l0,	             lB,              lambda1,	                              l2;	FLA_Obj      l, k, alpha2;	FLA_Datatype dt_real;	dim_t        n_x;	if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )		FLA_Fill_with_logarithmic_dist_check( alpha, x );	dt_real = FLA_Obj_datatype_proj_to_real( x );	n_x     = FLA_Obj_vector_dim( x );	// Create a local counter to increment as we create the distribution.	FLA_Obj_create( dt_real, 1,   1, 0, 0, &k );	// Create a local vector l. We will work with this vector, which is	// the same length as x, so that we can use vertical partitioning.	FLA_Obj_create( dt_real, n_x, 1, 0, 0, &l );	// Create a local real scalar alpha2 of the same precision as	// alpha. Then copy alpha to alpha2, which will convert the	// complex value to real, if necessary (ie: if alpha is complex).	FLA_Obj_create( dt_real, 1,   1, 0, 0, &alpha2 );	FLA_Copy( alpha, alpha2 );	// Initialize k to 0.	FLA_Set( FLA_ZERO, k );	FLA_Part_2x1( l,    &lT,	                    &lB,            0, FLA_TOP );	while ( FLA_Obj_length( lB ) > 0 )	{		FLA_Repart_2x1_to_3x1( lT,                &l0,		                    /* ** */            /* ******* */		                                          &lambda1,		                       lB,                &l2,        1, FLA_BOTTOM );		/*------------------------------------------------------------*/		// lambda1 = alpha^k;		FLA_Pow( alpha2, k, lambda1 );		// k = k + 1;		FLA_Mult_add( FLA_ONE, FLA_ONE, k );		/*------------------------------------------------------------*/		FLA_Cont_with_3x1_to_2x1( &lT,                l0,		                                              lambda1,		                        /* ** */           /* ******* */		                          &lB,                l2,     FLA_TOP );	}	// Normalize by last element.	FLA_Part_2x1( l,    &lT,	                    &lB,            1, FLA_BOTTOM );	FLA_Inv_scal( lB, l );	// Overwrite x with the distribution we created in l.	FLA_Copy( l, x );	FLA_Obj_free( &l );	FLA_Obj_free( &k );	FLA_Obj_free( &alpha2 );	return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:72,


示例17: Trsm_blk_var1

int Trsm_blk_var1( FLA_Obj L, FLA_Obj B, int nb_alg ){  FLA_Obj LTL,   LTR,      L00, L01, L02,           LBL,   LBR,      L10, L11, L12,                           L20, L21, L22;  FLA_Obj BT,              B0,          BB,              B1,                           B2;  int b;  FLA_Part_2x2( L,    &LTL, &LTR,                      &LBL, &LBR,     0, 0, FLA_TL );  FLA_Part_2x1( B,    &BT,                       &BB,            0, FLA_TOP );  while ( FLA_Obj_length( LTL ) < FLA_Obj_length( L ) ){    b = min( FLA_Obj_length( LBR ), nb_alg );    FLA_Repart_2x2_to_3x3( LTL, /**/ LTR,       &L00, /**/ &L01, &L02,                        /* ************* */   /* ******************** */                                                &L10, /**/ &L11, &L12,                           LBL, /**/ LBR,       &L20, /**/ &L21, &L22,                           b, b, FLA_BR );    FLA_Repart_2x1_to_3x1( BT,                &B0,                         /* ** */            /* ** */                                              &B1,                            BB,                &B2,        b, FLA_BOTTOM );    /*------------------------------------------------------------*/    /* B1 = B1 - L10 * B0 */    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 	      FLA_MINUS_ONE, L10, B0, FLA_ONE, B1 );    /* B1 = inv( L11 ) B1 */    Trsm_unb_var1( L11, B1 );    //Alternatively, try    //    FLA_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,    //	      FLA_ONE, L11, B1 );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &LTL, /**/ &LTR,       L00, L01, /**/ L02,                                                     L10, L11, /**/ L12,                            /* ************** */  /* ****************** */                              &LBL, /**/ &LBR,       L20, L21, /**/ L22,                              FLA_TL );    FLA_Cont_with_3x1_to_2x1( &BT,                B0,                                                   B1,                             /* ** */           /* ** */                              &BB,                B2,     FLA_TOP );  }  return FLA_SUCCESS;}
开发者ID:23beth,项目名称:PFC,代码行数:62,


示例18: FLA_Gemm_nn_omp_var5

FLA_Error FLA_Gemm_nn_omp_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj C, fla_gemm_t* cntl ){  FLA_Obj AL,    AR,       A0,  A1,  A2;  FLA_Obj BT,              B0,          BB,              B1,                           B2;  FLA_Obj C_local;  int b;  FLA_Part_1x2( A,    &AL,  &AR,      0, FLA_LEFT );  FLA_Part_2x1( B,    &BT,                       &BB,            0, FLA_TOP );  #pragma intel omp parallel taskq  {  while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){    b = FLA_Determine_blocksize( A, AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) );    //b = min( FLA_Obj_width( AR ), nb_alg );    FLA_Repart_1x2_to_1x3( AL,  /**/ AR,        &A0, /**/ &A1, &A2,                           b, FLA_RIGHT );    FLA_Repart_2x1_to_3x1( BT,                &B0,                         /* ** */            /* ** */                                              &B1,                            BB,                &B2,        b, FLA_BOTTOM );    /*------------------------------------------------------------*/    #pragma intel omp task captureprivate(A1,B1) private(C_local)    {    FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C, &C_local );    FLA_Obj_set_to_zero( C_local );    /* C = alpha * A1 * B1 + C; */    FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,                       alpha, A1, B1, FLA_ONE, C_local );    REF_Axpy_sync_circular( FLA_ONE, C_local, C );    FLA_Obj_free( &C_local );    }    /*------------------------------------------------------------*/    FLA_Cont_with_1x3_to_1x2( &AL,  /**/ &AR,        A0, A1, /**/ A2,                              FLA_LEFT );    FLA_Cont_with_3x1_to_2x1( &BT,                B0,                                                   B1,                             /* ** */           /* ** */                              &BB,                B2,     FLA_TOP );  }  }  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:62,


示例19: FLA_Gemm_nn_omp_var15

FLA_Error FLA_Gemm_nn_omp_var15( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj C, fla_gemm_t* cntl ){  FLA_Obj AT,              A0,          AB,              A1,                           A2;  FLA_Obj CT,              C0,          CB,              C1,                           C2;  FLA_Obj AL,    AR,       A10,  A11,  A12;  FLA_Obj BT,              B0,          BB,              B1,                           B2;  FLA_Obj C1_local;  int i, j, lock_ldim, lock_i;  int b_m, b_k;  FLA_Part_2x1( A,    &AT,                       &AB,            0, FLA_TOP );  FLA_Part_2x1( C,    &CT,                       &CB,            0, FLA_TOP );  #pragma intel omp parallel taskq  {  while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) )  {    b_m = FLA_Determine_blocksize( A, AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) );    FLA_Repart_2x1_to_3x1( AT,                &A0,                         /* ** */            /* ** */                                              &A1,                            AB,                &A2,        b_m, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( CT,                &C0,                         /* ** */            /* ** */                                              &C1,                            CB,                &C2,        b_m, FLA_BOTTOM );    /*------------------------------------------------------------*/    /* C1 = alpha * A1 * B + C1; */      FLA_Part_1x2( A1,   &AL,  &AR,      0, FLA_LEFT );    FLA_Part_2x1( B,    &BT,                         &BB,            0, FLA_TOP );      while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) )    {      b_k = FLA_Determine_blocksize( A, AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) );      // Get the index of the current partition.      // FIX THIS: need + b_m - 1 or something like this      //j = FLA_Obj_length( CT ) / b_m;      //i = FLA_Obj_width( AL ) / b_k;      //lock_ldim = FLA_get_num_threads_in_m_dim(omp_get_num_threads());      lock_i = FLA_Obj_length( CT ) / b_m;        FLA_Repart_1x2_to_1x3( AL,  /**/ AR,        &A10, /**/ &A11, &A12,                             b_k, FLA_RIGHT );      FLA_Repart_2x1_to_3x1( BT,                &B0,                           /* ** */            /* ** */                                                &B1,                              BB,                &B2,        b_k, FLA_BOTTOM );        /*------------------------------------------------------------*/        /*    C1 = alpha * A11 * B1 + C1; */      //// FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,      ////           alpha, A11, B1, FLA_ONE, C1 );      #pragma intel omp task captureprivate( lock_i, A11, B1, C1 ), private( C1_local )      {      FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C1, &C1_local );      FLA_Obj_set_to_zero( C1_local );      /*    C1_local = alpha * A1 * B11 + C1_local; */      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,                         alpha, A11, B1, FLA_ONE, C1_local );      // Acquire lock[i] (the lock for C1).      omp_set_lock( &fla_omp_lock[lock_i] );      /* C1 += C1_local */      FLA_Axpy_external( FLA_ONE, C1_local, C1 );      //FLA_Axpy_sync_pipeline2( j*lock_ldim, FLA_ONE, C1_local, C1 );      //FLA_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 );      //REF_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 );      // Release lock[i] (the lock for C1).      omp_unset_lock( &fla_omp_lock[lock_i] );      FLA_Obj_free( &C1_local );      }  //.........这里部分代码省略.........
开发者ID:pgawron,项目名称:tlash,代码行数:101,


示例20: FLA_Apply_Q_UT_lhfc_blk_var1

//.........这里部分代码省略.........  FLA_Obj WTL,  WTR,          WBL,  WBR;  FLA_Obj BT,              B0,          BB,              B1,                           B2;  dim_t   b_alg, b;  // Query the algorithmic blocksize by inspecting the length of T.  b_alg = FLA_Obj_length( T );  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );  FLA_Part_2x1( B,    &BT,                       &BB,            0, FLA_TOP );  while ( FLA_Obj_min_dim( ABR ) > 0 ){    b = min( b_alg, FLA_Obj_min_dim( ABR ) );    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,                        /* ************* */   /* ******************** */                                                &A10, /**/ &A11, &A12,                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,                           b, b, FLA_BR );    FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &T2,                           b, FLA_RIGHT );    FLA_Repart_2x1_to_3x1( BT,                &B0,                         /* ** */            /* ** */                                              &B1,                            BB,                &B2,        b, FLA_BOTTOM );    /*------------------------------------------------------------*/    FLA_Part_2x1( T1,    &T1T,                          &T2B,     b, FLA_TOP );    FLA_Part_2x2( W,     &WTL, &WTR,                         &WBL, &WBR,     b, FLA_Obj_width( B1 ), FLA_TL );    // WTL = B1;    FLA_Copyt_internal( FLA_NO_TRANSPOSE, B1, WTL,                        FLA_Cntl_sub_copyt( cntl ) );    // U11 = trilu( A11 );    // U21 = A21;    //    // WTL = inv( triu(T1T) )' * ( U11' * B1 + U21' * B2 );    FLA_Trmm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR,                       FLA_CONJ_TRANSPOSE, FLA_UNIT_DIAG,                       FLA_ONE, A11, WTL,                       FLA_Cntl_sub_trmm1( cntl ) );    FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,                        FLA_ONE, A21, B2, FLA_ONE, WTL,                       FLA_Cntl_sub_gemm1( cntl ) );    FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR,
开发者ID:anaptyxis,项目名称:libflame,代码行数:67,


示例21: FLA_LU_piv_unb_var3

FLA_Error FLA_LU_piv_unb_var3( FLA_Obj A, FLA_Obj p ){  FLA_Obj ATL,   ATR,      A00,  a01,     A02,           ABL,   ABR,      a10t, alpha11, a12t,                           A20,  a21,     A22;  FLA_Obj AL,    AR,       A0,  a1,  A2;  FLA_Obj pT,              p0,          pB,              pi1,                           p2;  FLA_Obj AB0, aB1;  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_1x2( A,    &AL,  &AR,      0, FLA_LEFT );  FLA_Part_2x1( p,    &pT,                       &pB,            0, FLA_TOP );  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&          FLA_Obj_width( ATL ) < FLA_Obj_width( A )){    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,                        /* ************* */   /* ************************** */                                                &a10t, /**/ &alpha11, &a12t,                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,                           1, 1, FLA_BR );    FLA_Repart_1x2_to_1x3( AL,  /**/ AR,        &A0, /**/ &a1, &A2,                           1, FLA_RIGHT );    FLA_Repart_2x1_to_3x1( pT,                &p0,                         /* ** */            /* *** */                                              &pi1,                            pB,                &p2,        1, FLA_BOTTOM );    /*------------------------------------------------------------*/    // Apply previously computed pivots    FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );    // a01 = trilu( A00 ) / a01     FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );    // alpha11 = alpha11 - a10t * a01    FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );    // a21 = a21 - A20 * a01    FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );    // aB1 = / alpha11 /    //       /  a21    /    FLA_Merge_2x1( alpha11,                       a21,      &aB1 );    // Determine pivot index    FLA_Amax_external( aB1, pi1 );    // Apply pivots to current column    FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );    // a21 = a21 / alpha11     FLA_Inv_scal_external( alpha11, a21 );    // AB0 = / a10t /    //       / A20  /    FLA_Merge_2x1( a10t,                   A20,      &AB0 );    // Apply pivots to previous columns    FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,                                                     a10t, alpha11, /**/ a12t,                            /* ************** */  /* ************************ */                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,                              FLA_TL );    FLA_Cont_with_1x3_to_1x2( &AL,  /**/ &AR,        A0, a1, /**/ A2,                              FLA_LEFT );    FLA_Cont_with_3x1_to_2x1( &pT,                p0,                                                   pi1,                             /* ** */           /* *** */                              &pB,                p2,     FLA_TOP );  }  if ( FLA_Obj_width( ATR ) > 0 )  {    /* Apply pivots to untouched columns */    FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );    /* ATR = trilu( ATL ) / ATR */ //.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例22: FLA_Eig_gest_iu_unb_var2

FLA_Error FLA_Eig_gest_iu_unb_var2( FLA_Obj A, FLA_Obj Y, FLA_Obj B ){  FLA_Obj ATL,   ATR,      A00,  a01,     A02,           ABL,   ABR,      a10t, alpha11, a12t,                           A20,  a21,     A22;  FLA_Obj BTL,   BTR,      B00,  b01,    B02,           BBL,   BBR,      b10t, beta11, b12t,                           B20,  b21,    B22;  FLA_Obj yT,              y01,          yB,              psi11,                           y21;  FLA_Obj y01_l, y01_r;  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_2x2( B,    &BTL, &BTR,                      &BBL, &BBR,     0, 0, FLA_TL );  FLA_Part_2x1( Y,    &yT,                      &yB,            0, FLA_TOP );  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,                        /* ************* */   /* ************************** */                                                &a10t, /**/ &alpha11, &a12t,                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,                           1, 1, FLA_BR );    FLA_Repart_2x2_to_3x3( BTL, /**/ BTR,       &B00,  /**/ &b01,    &B02,                        /* ************* */   /* ************************* */                                                &b10t, /**/ &beta11, &b12t,                           BBL, /**/ BBR,       &B20,  /**/ &b21,    &B22,                           1, 1, FLA_BR );    FLA_Repart_2x1_to_3x1( yT,                  &y01,                        /* ** */              /* ***** */                                                &psi11,                           yB,                  &y21,        1, FLA_BOTTOM );    /*------------------------------------------------------------*/    FLA_Part_1x2( y01,    &y01_l, &y01_r,     1, FLA_LEFT );    // y01 = 1/2 * A00 * b01;    FLA_Hemvc_external( FLA_UPPER_TRIANGULAR, FLA_NO_CONJUGATE,                        FLA_ONE_HALF, A00, b01, FLA_ZERO, y01_l );    // a01 = a01 - y01;    FLA_Axpy_external( FLA_MINUS_ONE, y01_l, a01 );    // alpha11 = alpha11 - a01' * b01 - b01' * a01;    FLA_Dot2cs_external( FLA_CONJUGATE, FLA_MINUS_ONE, a01, b01, FLA_ONE, alpha11 );    // alpha11 = inv(beta11) * alpha11 * inv(conj(beta11));    //         = inv(beta11) * alpha11 * inv(beta11);    FLA_Inv_scal_external( beta11, alpha11 );    FLA_Inv_scal_external( beta11, alpha11 );    // a12t   = a12t   - b01' * A02;     // a12t^T = a12t^T - A02^T * conj(b01);     FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE,                        FLA_MINUS_ONE, A02, b01, FLA_ONE, a12t );    // a12t = inv(conj(beta11)) * a12t;    // a12t = inv(beta11) * a12t;    FLA_Inv_scal_external( beta11, a12t );    // a01 = a01 - y01;    FLA_Axpy_external( FLA_MINUS_ONE, y01_l, a01 );    // a01 = a01 * inv(beta11);    FLA_Inv_scal_external( beta11, a01 );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,                                                     a10t, alpha11, /**/ a12t,                            /* ************** */  /* ************************ */                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,                              FLA_TL );    FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR,       B00,  b01,    /**/ B02,                                                     b10t, beta11, /**/ b12t,                            /* ************** */  /* *********************** */                              &BBL, /**/ &BBR,       B20,  b21,    /**/ B22,                              FLA_TL );    FLA_Cont_with_3x1_to_2x1( &yT,                   y01,                                                     psi11,                            /* ** */              /* ***** */                              &yB,                   y21,     FLA_TOP );  }  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:100,


示例23: FLA_Trmm_llt_blk_var1

FLA_Error FLA_Trmm_llt_blk_var1( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trmm_t* cntl ){  FLA_Obj ATL,   ATR,      A00, A01, A02,           ABL,   ABR,      A10, A11, A12,                           A20, A21, A22;  FLA_Obj BT,              B0,          BB,              B1,                           B2;  dim_t b;  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_2x1( B,    &BT,                       &BB,            0, FLA_TOP );  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){    b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,                        /* ************* */   /* ******************** */                                                &A10, /**/ &A11, &A12,                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,                           b, b, FLA_BR );    FLA_Repart_2x1_to_3x1( BT,                &B0,                         /* ** */            /* ** */                                              &B1,                            BB,                &B2,        b, FLA_BOTTOM );    /*------------------------------------------------------------*/    /* B1 = tril( A11 )' * B1; */    FLA_Trmm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diagA,                       alpha, A11, B1,                       FLA_Cntl_sub_trmm( cntl ) );    /* B1 = B1 + A21' * B2; */    FLA_Gemm_internal( FLA_TRANSPOSE, FLA_NO_TRANSPOSE,                       alpha, A21, B2, FLA_ONE, B1,                       FLA_Cntl_sub_gemm( cntl ) );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,                                                     A10, A11, /**/ A12,                            /* ************** */  /* ****************** */                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,                              FLA_TL );    FLA_Cont_with_3x1_to_2x1( &BT,                B0,                                                   B1,                             /* ** */           /* ** */                              &BB,                B2,     FLA_TOP );  }  return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:62,


示例24: main

int main( int argc, char** argv ) {  FLA_Datatype comptype = COMPTYPE;  FLA_Datatype realtype = REALTYPE;  dim_t        m;  FLA_Obj      a, aT, aB, a0, a1, a2;  FLA_Obj      v, vT, vB, v0, v1, v2;  FLA_Error    init_result;   int          use_abs = 1;  if ( argc == 3 ) {    m = atoi(argv[1]);    use_abs = atoi(argv[2]);  } else {    fprintf(stderr, "       /n");    fprintf(stderr, "Usage: %s m use_abs/n", argv[0]);    fprintf(stderr, "       m       : test vector length/n");    fprintf(stderr, "       use_abs : 0 - norm (realtype), 1 - abs (complex type)/n");    fprintf(stderr, "       /n");    return -1;  }  if ( m == 0 )    return 0;  FLA_Init_safe( &init_result );              FLA_Obj_create( comptype, m, 1, 0, 0, &a );  FLA_Obj_create( use_abs ? comptype : realtype, m, 1, 0, 0, &v );  FLA_Random_matrix( a );  FLA_Set( FLA_ZERO, v );  FLA_Obj_fshow( stdout,  "- a -", a, "% 6.4e", "--" );  // Normalize a vector  FLA_Part_2x1( a,    &aT,                        &aB,      0, FLA_TOP );  FLA_Part_2x1( v,    &vT,                        &vB,      0, FLA_TOP );  while ( FLA_Obj_length( aB ) > 0 ) {    FLA_Repart_2x1_to_3x1( aT, &a0,                               &a1,                           aB, &a2, 1, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( vT, &v0,                               &v1,                           vB, &v2, 1, FLA_BOTTOM );    // --------------------------------------------    if ( use_abs ) { // a and v are complex datatype      FLA_Copy( a1, v1 );      FLA_Absolute_value( v1 );    } else {         // v is real datatype       FLA_Nrm2( a1, v1 );    }    if ( FLA_Obj_equals( v1, FLA_ZERO ) )      printf( " ZERO DETECTED/n" );    else      FLA_Inv_scal( v1, a1 ); // Normalize the scalar    // --------------------------------------------    FLA_Cont_with_3x1_to_2x1( &aT, a0,                                   a1,                              &aB, a2, FLA_TOP );    FLA_Cont_with_3x1_to_2x1( &vT, v0,                                   v1,                              &vB, v2, FLA_TOP );  }  FLA_Obj_fshow( stdout,  "- a -", a, "% 6.4e", "--" );  FLA_Obj_fshow( stdout,  "- v -", v, "% 6.4e", "--" );  // Check whether it is normalized  FLA_Part_2x1( a,    &aT,                        &aB,      0, FLA_TOP );  FLA_Part_2x1( v,    &vT,                        &vB,      0, FLA_TOP );  while ( FLA_Obj_length( aB ) > 0 ) {    FLA_Repart_2x1_to_3x1( aT, &a0,                               &a1,                           aB, &a2, 1, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( vT, &v0,                               &v1,                           vB, &v2, 1, FLA_BOTTOM );    // --------------------------------------------    if ( use_abs ) { // a and v are same datatype      FLA_Copy( a1, v1 );      FLA_Absolute_value( v1 );    } else {         // v is realdatatype       FLA_Nrm2( a1, v1 );    }    // --------------------------------------------    FLA_Cont_with_3x1_to_2x1( &aT, a0,                                   a1,                              &aB, a2, FLA_TOP );    FLA_Cont_with_3x1_to_2x1( &vT, v0,                                   v1,                              &vB, v2, FLA_TOP );  }  FLA_Obj_fshow( stdout, " - all should be one - ", v, "% 6.4e", "--");  FLA_Obj_free( &a );  FLA_Obj_free( &v );//.........这里部分代码省略.........
开发者ID:anaptyxis,项目名称:libflame,代码行数:101,


示例25: FLA_Symm_lu_unb_var1

FLA_Error FLA_Symm_lu_unb_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ){  FLA_Obj ATL,   ATR,      A00,  a01,     A02,           ABL,   ABR,      a10t, alpha11, a12t,                           A20,  a21,     A22;  FLA_Obj BT,              B0,          BB,              b1t,                           B2;  FLA_Obj CT,              C0,          CB,              c1t,                           C2;  FLA_Scal_external( beta, C );  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_2x1( B,    &BT,                       &BB,            0, FLA_TOP );  FLA_Part_2x1( C,    &CT,                       &CB,            0, FLA_TOP );  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,                        /* ************* */   /* ************************** */                                                &a10t, /**/ &alpha11, &a12t,                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,                           1, 1, FLA_BR );    FLA_Repart_2x1_to_3x1( BT,                &B0,                         /* ** */            /* ** */                                              &b1t,                            BB,                &B2,        1, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( CT,                &C0,                         /* ** */            /* ** */                                              &c1t,                            CB,                &C2,        1, FLA_BOTTOM );    /*------------------------------------------------------------*/    /* C0 = C0 + a01 * b1t */    FLA_Ger_external( alpha, a01, b1t, C0 );    /* c1t  = c1t  + a01' * B0  */    /* c1t' = c1t' + B0'  * a01 */    FLA_Gemv_external( FLA_TRANSPOSE, alpha, B0, a01, FLA_ONE, c1t );    /* c1t = c1t + alpha11 * b1t */    FLA_Axpys_external( alpha, alpha11, b1t, FLA_ONE, c1t );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,                                                     a10t, alpha11, /**/ a12t,                            /* ************** */  /* ************************ */                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,                              FLA_TL );    FLA_Cont_with_3x1_to_2x1( &BT,                B0,                                                   b1t,                             /* ** */           /* ** */                              &BB,                B2,     FLA_TOP );    FLA_Cont_with_3x1_to_2x1( &CT,                C0,                                                   c1t,                             /* ** */           /* ** */                              &CB,                C2,     FLA_TOP );  }  return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:77,


示例26: FLA_Syrk_ln_blk_var2_ht

FLA_Error FLA_Syrk_ln_blk_var2_ht( FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, FLA_Syrk_t* cntl ){  FLA_Obj AT,              A0,          AB,              A1,                           A2;  FLA_Obj CTL,   CTR,      C00, C01, C02,           CBL,   CBR,      C10, C11, C12,                           C20, C21, C22;  int b;  FLA_Part_2x1( A,    &AT,                       &AB,            0, FLA_TOP );  FLA_Part_2x2( C,    &CTL, &CTR,                      &CBL, &CBR,     0, 0, FLA_TL );  while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){    b = 1;    FLA_Repart_2x1_to_3x1( AT,                &A0,                         /* ** */            /* ** */                                              &A1,                            AB,                &A2,        b, FLA_BOTTOM );    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00, /**/ &C01, &C02,                        /* ************* */   /* ******************** */                                                &C10, /**/ &C11, &C12,                           CBL, /**/ CBR,       &C20, /**/ &C21, &C22,                           b, b, FLA_BR );    /*------------------------------------------------------------*/    /* C21 = C21 + A2 * A1' */    FLA_Gemm_nt_blk_var1_ht( alpha, A2, A1, beta, C21,                          NULL );    /* C11 = C11 + A1 * A1' */    FLA_Syrk_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE,                        alpha, *FLASH_OBJ_PTR_AT( A1 ), beta, *FLASH_OBJ_PTR_AT( C11 ),                       NULL );    /*------------------------------------------------------------*/    FLA_Cont_with_3x1_to_2x1( &AT,                A0,                                                   A1,                             /* ** */           /* ** */                              &AB,                A2,     FLA_TOP );    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00, C01, /**/ C02,                                                     C10, C11, /**/ C12,                            /* ************** */  /* ****************** */                              &CBL, /**/ &CBR,       C20, C21, /**/ C22,                              FLA_TL );  }  return FLA_SUCCESS;}
开发者ID:flame,项目名称:libflame,代码行数:61,


示例27: FLA_Her2k_ln_blk_var4

FLA_Error FLA_Her2k_ln_blk_var4( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_her2k_t* cntl ){  FLA_Obj AT,              A0,          AB,              A1,                           A2;  FLA_Obj BT,              B0,          BB,              B1,                           B2;  FLA_Obj CTL,   CTR,      C00, C01, C02,           CBL,   CBR,      C10, C11, C12,                           C20, C21, C22;  dim_t b;  FLA_Scalr_internal( FLA_LOWER_TRIANGULAR, beta, C,                      FLA_Cntl_sub_scalr( cntl ) );  FLA_Part_2x1( A,    &AT,                       &AB,            0, FLA_TOP );  FLA_Part_2x1( B,    &BT,                       &BB,            0, FLA_TOP );  FLA_Part_2x2( C,    &CTL, &CTR,                      &CBL, &CBR,     0, 0, FLA_TL );  while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){    b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );    FLA_Repart_2x1_to_3x1( AT,                &A0,                         /* ** */            /* ** */                                              &A1,                            AB,                &A2,        b, FLA_BOTTOM );    FLA_Repart_2x1_to_3x1( BT,                &B0,                         /* ** */            /* ** */                                              &B1,                            BB,                &B2,        b, FLA_BOTTOM );    FLA_Repart_2x2_to_3x3( CTL, /**/ CTR,       &C00, /**/ &C01, &C02,                        /* ************* */   /* ******************** */                                                &C10, /**/ &C11, &C12,                           CBL, /**/ CBR,       &C20, /**/ &C21, &C22,                           b, b, FLA_BR );    /*------------------------------------------------------------*/    /* C21 = C21 + A2 * B1' */    FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,                        alpha, A2, B1, FLA_ONE, C21,                       FLA_Cntl_sub_gemm1( cntl ) );    /* C21 = C21 + B2 * A1' */    FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,                        alpha, B2, A1, FLA_ONE, C21,                       FLA_Cntl_sub_gemm2( cntl ) );    /* C11 = C11 + A1 * B1' + B1 * A1' */    FLA_Her2k_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE,                         alpha, A1, B1, FLA_ONE, C11,                        FLA_Cntl_sub_her2k( cntl ) );    /*------------------------------------------------------------*/    FLA_Cont_with_3x1_to_2x1( &AT,                A0,                                                   A1,                             /* ** */           /* ** */                              &AB,                A2,     FLA_TOP );    FLA_Cont_with_3x1_to_2x1( &BT,                B0,                                                   B1,                             /* ** */           /* ** */                              &BB,                B2,     FLA_TOP );    FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR,       C00, C01, /**/ C02,                                                     C10, C11, /**/ C12,                            /* ************** */  /* ****************** */                              &CBL, /**/ &CBR,       C20, C21, /**/ C22,                              FLA_TL );  }  return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:87,


示例28: FLA_Accum_T_UT_fc_blk_var2

FLA_Error FLA_Accum_T_UT_fc_blk_var2( FLA_Obj A, FLA_Obj t, FLA_Obj T ){  FLA_Obj ATL,   ATR,      A00, A01, A02,           ABL,   ABR,      A10, A11, A12,                           A20, A21, A22;  FLA_Obj tT,              t0,          tB,              t1,                           t2;  FLA_Obj TL,    TR,       T0,  T1,  T2;  FLA_Obj AB1;  dim_t b_alg, b;  b_alg = FLA_Obj_length( T );  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_TL );  FLA_Part_2x1( t,    &tT,                       &tB,            0, FLA_TOP );  FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );  while ( FLA_Obj_length( tB ) > 0 ) {    b = min( FLA_Obj_length( tB ), b_alg );    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,                        /* ************* */   /* ******************** */                                                &A10, /**/ &A11, &A12,                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,                           b, b, FLA_BR );    FLA_Repart_2x1_to_3x1( tT,                &t0,                         /* ** */            /* ** */                                              &t1,                            tB,                &t2,        b, FLA_BOTTOM );    FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &T2,                           b, FLA_RIGHT );    /*------------------------------------------------------------*/    FLA_Merge_2x1( A11,                   A21,   &AB1 );    //FLA_Accum_T_UT_fc_unb_var1( AB1, t1, T1 );    FLA_Accum_T_UT_fc_opt_var1( AB1, t1, T1 );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,                                                     A10, A11, /**/ A12,                            /* ************** */  /* ****************** */                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,                              FLA_TL );    FLA_Cont_with_3x1_to_2x1( &tT,                t0,                                                   t1,                             /* ** */           /* ** */                              &tB,                t2,     FLA_TOP );    FLA_Cont_with_1x3_to_1x2( &TL,  /**/ &TR,        T0, T1, /**/ T2,                              FLA_LEFT );  }  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:72,


示例29: FLA_Symm_ll_blk_var1

FLA_Error FLA_Symm_ll_blk_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_symm_t* cntl ){    FLA_Obj ATL,   ATR,      A00, A01, A02,            ABL,   ABR,      A10, A11, A12,            A20, A21, A22;    FLA_Obj BT,              B0,            BB,              B1,            B2;    FLA_Obj CT,              C0,            CB,              C1,            C2;    dim_t b;    FLA_Scal_internal( beta, C,                       FLA_Cntl_sub_scal( cntl ) );    FLA_Part_2x2( A,    &ATL, &ATR,                  &ABL, &ABR,     0, 0, FLA_TL );    FLA_Part_2x1( B,    &BT,                  &BB,            0, FLA_TOP );    FLA_Part_2x1( C,    &CT,                  &CB,            0, FLA_TOP );    while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ) {        b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );        FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,                               /* ************* */   /* ******************** */                               &A10, /**/ &A11, &A12,                               ABL, /**/ ABR,       &A20, /**/ &A21, &A22,                               b, b, FLA_BR );        FLA_Repart_2x1_to_3x1( BT,                &B0,                               /* ** */            /* ** */                               &B1,                               BB,                &B2,        b, FLA_BOTTOM );        FLA_Repart_2x1_to_3x1( CT,                &C0,                               /* ** */            /* ** */                               &C1,                               CB,                &C2,        b, FLA_BOTTOM );        /*------------------------------------------------------------*/        /* C0 = C0 + A10' * B1 */        FLA_Gemm_internal( FLA_TRANSPOSE, FLA_NO_TRANSPOSE,                           alpha, A10, B1, FLA_ONE, C0,                           FLA_Cntl_sub_gemm1( cntl ) );        /* C1 = C1 + A10 * B0 */        FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,                           alpha, A10, B0, FLA_ONE, C1,                           FLA_Cntl_sub_gemm2( cntl ) );        /* C1 = C1 + A11 * B1 */        FLA_Symm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR,                           alpha, A11, B1, FLA_ONE, C1,                           FLA_Cntl_sub_symm( cntl ) );        /*------------------------------------------------------------*/        FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,                                  A10, A11, /**/ A12,                                  /* ************** */  /* ****************** */                                  &ABL, /**/ &ABR,       A20, A21, /**/ A22,                                  FLA_TL );        FLA_Cont_with_3x1_to_2x1( &BT,                B0,                                  B1,                                  /* ** */           /* ** */                                  &BB,                B2,     FLA_TOP );        FLA_Cont_with_3x1_to_2x1( &CT,                C0,                                  C1,                                  /* ** */           /* ** */                                  &CB,                C2,     FLA_TOP );    }    return FLA_SUCCESS;}
开发者ID:pgawron,项目名称:tlash,代码行数:87,


示例30: FLA_Hemm_ll_unb_var5

FLA_Error FLA_Hemm_ll_unb_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ){  FLA_Obj ATL,   ATR,      A00,  a01,     A02,           ABL,   ABR,      a10t, alpha11, a12t,                           A20,  a21,     A22;  FLA_Obj BT,              B0,          BB,              b1t,                           B2;  FLA_Obj CT,              C0,          CB,              c1t,                           C2;  FLA_Scal_external( beta, C );  FLA_Part_2x2( A,    &ATL, &ATR,                      &ABL, &ABR,     0, 0, FLA_BR );  FLA_Part_2x1( B,    &BT,                       &BB,            0, FLA_BOTTOM );  FLA_Part_2x1( C,    &CT,                       &CB,            0, FLA_BOTTOM );  while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  &a01,     /**/ &A02,                                                &a10t, &alpha11, /**/ &a12t,                        /* ************* */   /* ************************** */                           ABL, /**/ ABR,       &A20,  &a21,     /**/ &A22,                           1, 1, FLA_TL );    FLA_Repart_2x1_to_3x1( BT,                &B0,                                               &b1t,                         /* ** */            /* ** */                           BB,                &B2,        1, FLA_TOP );    FLA_Repart_2x1_to_3x1( CT,                &C0,                                               &c1t,                         /* ** */            /* ** */                           CB,                &C2,        1, FLA_TOP );    /*------------------------------------------------------------*/    /* c1t = c1t + alpha11 * b1t */    FLA_Axpys_external( alpha, alpha11, b1t, FLA_ONE, c1t );    /* c1t  = c1t  + a21' * B2 */    /* c1t' = c1t' + B2' * a21 */    FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, alpha, B2, a21, FLA_ONE, c1t );    /* C2 = C2 + a21 * b1t */    FLA_Ger_external( alpha, a21, b1t, C2 );    /*------------------------------------------------------------*/    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  /**/ a01,     A02,                            /* ************** */  /* ************************ */                                                     a10t, /**/ alpha11, a12t,                              &ABL, /**/ &ABR,       A20,  /**/ a21,     A22,                              FLA_BR );    FLA_Cont_with_3x1_to_2x1( &BT,                B0,                             /* ** */           /* ** */                                                  b1t,                               &BB,                B2,     FLA_BOTTOM );    FLA_Cont_with_3x1_to_2x1( &CT,                C0,                             /* ** */           /* ** */                                                  c1t,                               &CB,                C2,     FLA_BOTTOM );  }  return FLA_SUCCESS;}
开发者ID:anaptyxis,项目名称:libflame,代码行数:77,



注:本文中的FLA_Repart_2x1_to_3x1函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ FLA_Repart_2x2_to_3x3函数代码示例
C++ FLA_Repart_1x2_to_1x3函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。