您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ ATL_AlignPtr函数代码示例

51自学网 2021-06-01 19:38:32
  C++
这篇教程C++ ATL_AlignPtr函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中ATL_AlignPtr函数的典型用法代码示例。如果您正苦于以下问题:C++ ATL_AlignPtr函数的具体用法?C++ ATL_AlignPtr怎么用?C++ ATL_AlignPtr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了ATL_AlignPtr函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: cblas_cher2

void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,                 const int N, const void *alpha,                 const void *X, const int incX,                 const void *Y, const int incY, void *A, const int lda){   int info = 2000;   void *vx, *vy;   float *x0, *y0;   const float *x=X, *y=Y, *alp=alpha;   const float one[2]={ATL_rone, ATL_rzero};#ifndef NoCblasErrorChecks   if (Order != CblasColMajor && Order != CblasRowMajor)      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",                          CblasRowMajor, CblasColMajor, Order);   if (Uplo != CblasUpper && Uplo != CblasLower)      info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d",                          CblasUpper, CblasLower, Uplo);   if (N < 0) info = cblas_errprn(3, info,                        "N cannot be less than zero; is set to %d.", N);   if (!incX) info = cblas_errprn(6, info,                                  "incX cannot be zero; is set to %d.", incX);   if (!incY) info = cblas_errprn(8, info,                                  "incY cannot be zero; is set to %d.", incY);   if (lda < N || lda < 1)      info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d N=%d",                          lda, N);   if (info != 2000)   {      cblas_xerbla(info, "cblas_cher2", "");      return;   }#endif   if (incX < 0) x += (1-N)*incX<<1;   if (incY < 0) y += (1-N)*incY<<1;   if (Order == CblasColMajor)      ATL_cher2(Uplo, N, alpha, x, incX, y, incY, A, lda);   else if (alp[0] != ATL_rzero || alp[1] != ATL_rzero)   {      vx = malloc(ATL_Cachelen + ATL_MulBySize(N));      vy = malloc(ATL_Cachelen + ATL_MulBySize(N));      ATL_assert(vx != NULL && vy != NULL);      x0 = ATL_AlignPtr(vx);      y0 = ATL_AlignPtr(vy);      ATL_cmoveConj(N, alpha, y, incY, y0, 1);      ATL_ccopyConj(N, x, incX, x0, 1);      ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),                N, one, y0, 1, x0, 1, A, lda);      free(vx);      free(vy);   }   else ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),                  N, alpha, y, incY, x, incX, A, lda);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:56,


示例2: Mjoin

void Mjoin(Mjoin(Mjoin(PATL,herk),UploNM),N)   (const int N, const int K, const void *valpha, const void *A, const int lda,    const void *vbeta, void *C, const int ldc){   void *vc;   TYPE *c;   TYPE alpha[2];   const TYPE beta = *( (const TYPE *)vbeta  );   const TYPE zero[2] = {0.0, 0.0};   alpha[0] = *( (const TYPE *)valpha );   if (K > HERK_Xover)   {      alpha[1] = 0.0;      vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);      ATL_assert(vc);      c = ATL_AlignPtr(vc);      CgemmNC(N, N, K, alpha, A, lda, A, lda, zero, c, N);      if ( beta == 1.0 ) Mjoin(her_put,_b1)(N, c, vbeta, C, ldc);      else if ( beta == 0.0 ) Mjoin(her_put,_b0)(N, c, vbeta, C, ldc);      else Mjoin(her_put,_bXi0)(N, c, vbeta, C, ldc);      free(vc);   }   else Mjoin(PATL,refherk)(Uplo_, AtlasNoTrans, N, K, *alpha, A, lda,                            beta, C, ldc);}
开发者ID:certik,项目名称:vendor,代码行数:26,


示例3: Mjoin

      int Mjoin(PATL,her2kLN)   #endif#endif   (const int N, const int K, const void *valpha, const void *A, const int lda,    const void *B, const int ldb, const void *vbeta, void *C, const int ldc){   int i;   void *vc=NULL;   TYPE *c;   const TYPE beta =*( (const TYPE *)vbeta  );   const TYPE zero[2]={0.0, 0.0};   i = ATL_MulBySize(N)*N;   if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i);   if (vc == NULL) return(1);   c = ATL_AlignPtr(vc);   #ifdef Transpose_      ATL_ammm(AtlasConjTrans, AtlasNoTrans, N, N, K, valpha, A, lda, B, ldb,   #else      ATL_ammm(AtlasNoTrans, AtlasConjTrans, N, N, K, valpha, A, lda, B, ldb,   #endif               zero, c, N);   if ( beta == 1.0 ) Mjoin(her2k_put,_b1)(N, c, vbeta, C, ldc);   else if ( beta == 0.0 ) Mjoin(her2k_put,_b0)(N, c, vbeta, C, ldc);   else Mjoin(her2k_put,_bXi0)(N, c, vbeta, C, ldc);   free(vc);   return(0);}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:28,


示例4: Mjoin

void Mjoin(Mjoin(PATL,symmL),UploNM)(const int M, const int N, const void *valpha, const void *A, const int lda, const void *B, const int ldb, const void *vbeta, void *C, const int ldc){#ifdef TREAL    const SCALAR alpha=*( (const SCALAR *)valpha );    const SCALAR beta =*( (const SCALAR *)vbeta  );    const SCALAR one=1.0;#else#define alpha valpha#define beta vbeta#endif    TYPE *a;    void *va;    if (N > SYMM_Xover)    {        va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M));        ATL_assert(va);        a = ATL_AlignPtr(va);#ifdef TREAL        if ( SCALAR_IS_ONE(alpha) )            Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(M, alpha, A, lda, a);        else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(M, alpha, A, lda, a);        CgemmTN(M, N, M, one, a, M, B, ldb, beta, C, ldc);#else        Mjoin(Mjoin(PATL,sycopy),UploNM)(M, A, lda, a);        CgemmTN(M, N, M, valpha, a, M, B, ldb, vbeta, C, ldc);#endif        free(va);    }    else Mjoin(PATL,refsymm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb,                                 beta, C, ldc);}
开发者ID:Leobin7,项目名称:Kaldi,代码行数:34,


示例5: Mjoin

   int Mjoin(PATL,syr2kLT)#endif   (const int N, const int K, const void *valpha, const void *A, const int lda,    const void *B, const int ldb, const void *vbeta, void *C, const int ldc){   int i;   void *vc=NULL;   TYPE *c;   #ifdef TREAL      const SCALAR alpha=*( (const SCALAR *)valpha );      const SCALAR beta =*( (const SCALAR *)vbeta  );      const SCALAR one=1.0, zero=0.0;   #else      #define alpha valpha      const TYPE *beta=vbeta;      const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};   #endif   i = ATL_MulBySize(N)*N;   if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i);   if (vc == NULL) return(1);   c = ATL_AlignPtr(vc);   CgemmTN(N, N, K, alpha, A, lda, B, ldb, zero, c, N);   if ( SCALAR_IS_ONE(beta) ) Mjoin(syr2k_put,_b1)(N, c, beta, C, ldc);   else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr2k_put,_b0)(N, c, beta, C, ldc);   #ifdef TCPLX      else if (SCALAR_IS_NONE(beta)) Mjoin(syr2k_put,_bn1)(N, c, beta, C, ldc);      else if (beta[1] == *zero) Mjoin(syr2k_put,_bXi0)(N, c, beta, C, ldc);   #endif   else Mjoin(syr2k_put,_bX)(N, c, beta, C, ldc);   free(vc);   return(0);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:33,


示例6: Mjoin

void Mjoin(Mjoin(Mjoin(PATL,syrk),UploNM),T)   (const int N, const int K, const void *valpha, const void *A, const int lda,    const void *vbeta, void *C, const int ldc){   void *vc;   TYPE *c;   #ifdef TREAL      const SCALAR alpha=*( (const SCALAR *)valpha );      const SCALAR beta =*( (const SCALAR *)vbeta  );      const SCALAR one=1.0, zero=0.0;   #else      #define alpha valpha      const TYPE *beta=vbeta;      const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};   #endif   if (K > SYRK_Xover)   {      vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);      ATL_assert(vc);      c = ATL_AlignPtr(vc);      CgemmTN(N, N, K, alpha, A, lda, A, lda, zero, c, N);      if ( SCALAR_IS_ONE(beta) ) Mjoin(syr_put,_b1)(N, c, beta, C, ldc);      else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr_put,_b0)(N, c, beta, C, ldc);      #ifdef TCPLX         else if ( SCALAR_IS_NONE(beta) )            Mjoin(syr_put,_bn1)(N, c, beta, C, ldc);         else if (beta[1] == *zero) Mjoin(syr_put,_bXi0)(N, c, beta, C, ldc);      #endif      else Mjoin(syr_put,_bX)(N, c, beta, C, ldc);      free(vc);   }   else Mjoin(PATL,refsyrk)(Uplo_, AtlasTrans, N, K, alpha, A, lda,                            beta, C, ldc);}
开发者ID:certik,项目名称:vendor,代码行数:35,


示例7: Mjoin

void Mjoin(Mjoin(PATL,symmR),UploNM)   (const int M, const int N, const void *valpha, const void *A, const int lda,    const void *B, const int ldb, const void *vbeta, void *C, const int ldc){   #ifdef TREAL      const SCALAR alpha=*( (const SCALAR *)valpha );      const SCALAR beta =*( (const SCALAR *)vbeta  );      const SCALAR one=1.0;   #else      #define alpha valpha      #define beta  vbeta   #endif   void *va;   TYPE *a;   if (M > SYMM_Xover)   {      va = malloc(ATL_Cachelen + ATL_MulBySize(N)*N);      ATL_assert(va);      a = ATL_AlignPtr(va);      #ifdef TREAL         if ( SCALAR_IS_ONE(alpha) )            Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(N, alpha, A, lda, a);         else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(N, alpha, A, lda, a);         ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, one, B, ldb, a, N, beta, C, ldc);      #else         Mjoin(Mjoin(PATL,sycopy),UploNM)(N, A, lda, a);         ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, valpha, B, ldb, a, N, vbeta, C, ldc);      #endif      free(va);   }   else Mjoin(PATL,refsymm)(AtlasRight, Uplo_, M, N, alpha, A, lda, B, ldb,                            beta, C, ldc);}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:34,


示例8: Mjoin

void Mjoin(Mjoin(PATL,trmmL),ATLP)   (const int M, const int N, const void *valpha, const void *A, const int lda,    void *C, const int ldc){   #ifdef TREAL      const SCALAR alpha=*( (const SCALAR *)valpha );      const SCALAR one=1.0, zero=0.0;   #else      const TYPE zero[2]={0.0,0.0};      #define alpha valpha   #endif   void *va;   TYPE *a;   if (N > TRMM_Xover)   {      va = malloc(ATL_Cachelen + ATL_MulBySize(M)*M);      ATL_assert(va);      a = ATL_AlignPtr(va);      #ifdef TREAL         if ( SCALAR_IS_ONE(alpha) ) Mjoin(ATL_trcopy,_a1)(M, alpha, A, lda, a);         else Mjoin(ATL_trcopy,_aX)(M, alpha, A, lda, a);         CAgemmTN(M, N, M, one, a, M, C, ldc, zero, C, ldc);      #else         ATL_trcopy(M, A, lda, a);         CAgemmTN(M, N, M, valpha, a, M, C, ldc, zero, C, ldc);      #endif      free(va);   }   else Mjoin(PATL,reftrmm)(AtlasLeft, Uplo_, Trans_, Unit_, M, N, alpha,                            A, lda, C, ldc);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:32,


示例9: cblas_zgerc

void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,                 const void *alpha, const void *X, const int incX,                 const void *Y, const int incY, void *A, const int lda){   int info = 2000;   const double *x = X, *y = Y;   void *vy;   double *y0;   double one[2] = {ATL_rone, ATL_rzero};#ifndef NoCblasErrorChecks   if (M < 0) info = cblas_errprn(2, info,                        "M cannot be less than zero; is set to %d.", M);   if (N < 0) info = cblas_errprn(3, info,                        "N cannot be less than zero; is set to %d.", N);   if (!incX) info = cblas_errprn(6, info,                                  "incX cannot be zero; is set to %d.", incX);   if (!incY) info = cblas_errprn(8, info,                                  "incY cannot be zero; is set to %d.", incY);   if (Order == CblasColMajor)   {      if (lda < M || lda < 1)         info = cblas_errprn(10, info, "lda must be >= MAX(M,1): lda=%d M=%d",                             lda, M);   }   else if (Order == CblasRowMajor)   {      if (lda < N || lda < 1)         info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d M=%d",                             lda, N);   }   else      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",                          CblasRowMajor, CblasColMajor, Order);   if (info != 2000)   {      cblas_xerbla(info, "cblas_zgerc", "");      return;   }#endif   if (incX < 0) x += (1-M)*incX<<1;   if (incY < 0) y += (1-N)*incY<<1;   if (Order == CblasColMajor)      ATL_zgerc(M, N, alpha, x, incX, y, incY, A, lda);   else   {      vy = malloc(ATL_Cachelen + ATL_MulBySize(N));      ATL_assert(vy);      y0 = ATL_AlignPtr(vy);      ATL_zmoveConj(N, alpha, y, incY, y0, 1);      ATL_zgeru(N, M, one, y0, 1, x, incX, A, lda);      free(vy);   }}
开发者ID:GorgonCryoEM,项目名称:Gorgon-CVS,代码行数:56,


示例10: cblas_zher

void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,                const int N, const double alpha,                const void *X, const int incX, void *A, const int lda){   int info = 2000;   void *vx;   double one[2] = {ATL_rone, ATL_rzero};   double *x0;   const double *x=X;#ifndef NoCblasErrorChecks   if (Order != CblasColMajor && Order != CblasRowMajor)      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",                          CblasRowMajor, CblasColMajor, Order);   if (Uplo != CblasUpper && Uplo != CblasLower)      info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d",                          CblasUpper, CblasLower, Uplo);   if (N < 0) info = cblas_errprn(3, info,                        "N cannot be less than zero; is set to %d.", N);   if (!incX) info = cblas_errprn(6, info,                                  "incX cannot be zero; is set to %d.", incX);   if (lda < N || lda < 1)      info = cblas_errprn(8, info, "lda must be >= MAX(N,1): lda=%d N=%d",                          lda, N);   if (info != 2000)   {      cblas_xerbla(info, "cblas_zher", "");      return;   }#endif   if (incX < 0) x += (1-N)*incX<<1;   if (Order == CblasColMajor)      ATL_zher(Uplo, N, alpha, x, incX, A, lda);   else if (alpha != ATL_rzero)   {      vx = malloc(ATL_Cachelen + ATL_MulBySize(N));      ATL_assert(vx);      x0 = ATL_AlignPtr(vx);      ATL_zmoveConj(N, one, x, incX, x0, 1);      ATL_zher(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),               N, alpha, x0, 1, A, lda);      free(vx);   }   else      ATL_zher(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),               N, ATL_rzero, x, incX, A, lda);}
开发者ID:apollos,项目名称:atlas,代码行数:49,


示例11: ATL_flushcache

double ATL_flushcache(long long size)/* * flush cache by reading enough mem; note that if the compiler gets * really smart, may be necessary to make vp a global variable so it * can't figure out it's not being modified other than during setup; * the fact that ATL_dzero is external will confuse most compilers */{  static void *vp=NULL;  static long long N = 0;  double *cache;  double dret=0.0;  size_t i;  if (size < 0) /* flush cache */  {     ATL_assert(vp);     cache = ATL_AlignPtr(vp);     if (N > 0) for (i=0; i != N; i++) dret += cache[i];  }  else if (size > 0) /* initialize */  {     vp = malloc(ATL_Cachelen + size);     ATL_assert(vp);     N = size / sizeof(double);     cache = ATL_AlignPtr(vp);     ATL_dzero(N, cache, 1);  }  else if (size == 0) /* free cache */  {     if (vp) free(vp);     vp = NULL;     N = 0;  }  return(dret);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:36,


示例12: Mjoin

void Mjoin(Mjoin(PATL,hemmL),UploNM)   (const int M, const int N, const void *alpha, const void *A, const int lda,    const void *B, const int ldb, const void *beta, void *C, const int ldc){   TYPE *a;   void *va;   if (N > HEMM_Xover)   {      va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M));      ATL_assert(va);      a = ATL_AlignPtr(va);      Mjoin(Mjoin(PATL,hecopy),UploNM)(M, A, lda, a);      ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, M, alpha, a, M, B, ldb,               beta, C, ldc);      free(va);   }   else Mjoin(PATL,refhemm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb,                            beta, C, ldc);}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:20,


示例13: ATL_ptflushcache

double ATL_ptflushcache(long long size)/* * flush cache by reading enough mem; note that if the compiler gets * really smart, may be necessary to make vp a global variable so it * can't figure out it's not being modified other than during setup; * the fact that ATL_dzero is external will confuse most compilers */{    static void *vp=NULL;    static double *cache=NULL;    double dret=0.0;    static long long i, N = 0;    ATL_FC fct[ATL_NTHREADS];    if (size < 0) /* flush cache */    {        ATL_assert(cache);        for (i=0; i < ATL_NTHREADS; i++)        {            fct[i].N = N;            fct[i].dp = cache+i*N;        }        ATL_goparallel(ATL_NTHREADS, ATL_DoWorkFC, fct, NULL);    }    else if (size > 0) /* initialize */    {        vp = malloc(ATL_Cachelen + (size * ATL_NTHREADS));        ATL_assert(vp);        cache = ATL_AlignPtr(vp);        N = size / sizeof(double);        ATL_dzero(N*ATL_NTHREADS, cache, 1);    }    else if (size == 0) /* free cache */    {        if (vp) free(vp);        vp = cache = NULL;        N = 0;    }    return(dret);}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:40,


示例14: Mjoin

void Mjoin(Mjoin(PATL,trsmR),ATLP)   (const int M, const int N, const void *valpha, const void *A, const int lda,    void *C, const int ldc){   const TYPE *alpha=valpha;#ifdef TREAL   #if defined(Transpose_) || defined(ConjTrans_)      if ( M > (N<<2) )      {         void *va;	 TYPE *a;         va = malloc(ATL_Cachelen + (ATL_MulBySize(N*N)));         ATL_assert(va);         a = ATL_AlignPtr(va);         #ifdef TREAL            Mjoin(ATL_trcopy,_a1)(N, ATL_rone, A, lda, a);         #else            ATL_trcopy(N, A, lda, a);         #endif         Mjoin(Mjoin(PATL,trsmKR),ATLPt)(M, N, *alpha, a, N, C, ldc);         free(va);      }      else Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, *alpha,                               A, lda, C, ldc);   #else      Mjoin(Mjoin(PATL,trsmKR),ATLP)(M, N, *alpha, A, lda, C, ldc);   #endif#else   if (M > (N<<2) && N <= 4)      Mjoin(PATL,CtrsmKR)(Uplo_, Trans_, Unit_, M, N, valpha, A, lda, C, ldc);   else      Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, alpha,                          A, lda, C, ldc);#endif}
开发者ID:onenoc,项目名称:QuantSoftwareToolkit,代码行数:36,


示例15: Mjoin

int Mjoin(PC2F,ormrq)   (const enum CBLAS_SIDE Side, const enum CBLAS_TRANSPOSE TA,    ATL_CINT M, ATL_CINT N, ATL_CINT K, TYPE *A, ATL_CINT lda, TYPE *TAU,    TYPE *C, ATL_CINT ldc){   TYPE work[2];   void *vp;   TYPE *wrk;   ATL_INT lwrk;   int iret;/* * Query routine for optimal workspace, allocate it, and call routine with it */   ATL_assert(!Mjoin(PC2F,ormrq_wrk)(Side, TA, M, N, K, A, lda, TAU, C, ldc,                                     work, -1));   lwrk = work[0];   vp = malloc(ATL_MulBySize(lwrk) + ATL_Cachelen);   ATL_assert(vp);   wrk = ATL_AlignPtr(vp);   iret = Mjoin(PC2F,ormrq_wrk)(Side, TA, M, N, K, A, lda, TAU, C, ldc,                                wrk, lwrk);   free(vp);   return(iret);}
开发者ID:apollos,项目名称:atlas,代码行数:24,


示例16: clapack_sgetri

int clapack_sgetri(const enum CBLAS_ORDER Order, const int N, float *A,                   const int lda, const int *ipiv){   int ierr=0, lwrk;   int Mjoin(PATL,GetNB)();   void *vp;   lwrk = Mjoin(PATL,GetNB)();   if (lwrk <= N) lwrk *= N;   else lwrk = N*N;   vp = malloc(ATL_Cachelen + ATL_MulBySize(lwrk));   if (vp)   {      ierr = ATL_getri(Order, N, A, lda, ipiv, ATL_AlignPtr(vp), &lwrk);      free(vp);   }   else   {      cblas_xerbla(7, "clapack_sgetri",                   "Cannot allocate workspace of %d/n", lwrk);      return(-7);   }   return(ierr);}
开发者ID:apollos,项目名称:atlas,代码行数:24,


示例17: cblas_cger2c

void cblas_cger2c(const enum CBLAS_ORDER Order, ATL_CINT M, ATL_CINT N,                 const void *alpha, const void *X, ATL_CINT incX,                 const void *Y, ATL_CINT incY, const void *beta,                 const void *W, ATL_CINT incW,                 const void *Z, ATL_CINT incZ, void *A, ATL_CINT lda){   int info = 2000;   const float *x = X, *y = Y, *w = W, *z = Z;   void *vy;   float *y0, *z0;   float one[2] = {ATL_rone, ATL_rzero};#ifndef NoCblasErrorChecks   if (M < 0) info = cblas_errprn(2, info,                        "M cannot be less than zero; is set to %d.", M);   if (N < 0) info = cblas_errprn(3, info,                        "N cannot be less than zero; is set to %d.", N);   if (!incX) info = cblas_errprn(6, info,                                  "incX cannot be zero; is set to %d.", incX);   if (!incY) info = cblas_errprn(8, info,                                  "incY cannot be zero; is set to %d.", incY);   if (!incW) info = cblas_errprn(11, info,                                  "incW cannot be zero; is set to %d.", incW);   if (!incZ) info = cblas_errprn(13, info,                                  "incZ cannot be zero; is set to %d.", incZ);   if (Order == CblasColMajor)   {      if (lda < M || lda < 1)         info = cblas_errprn(15, info, "lda must be >= MAX(M,1): lda=%d M=%d",                             lda, M);   }   else if (Order == CblasRowMajor)   {      if (lda < N || lda < 1)         info = cblas_errprn(15, info, "lda must be >= MAX(N,1): lda=%d M=%d",                             lda, N);   }   else      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",                          CblasRowMajor, CblasColMajor, Order);   if (info != 2000)   {      cblas_xerbla(info, "cblas_cger2c", "");      return;   }#endif   if (incX < 0) x += (1-M)*incX<<1;   if (incY < 0) y += (1-N)*incY<<1;   if (incW < 0) w += (1-M)*incW<<1;   if (incZ < 0) z += (1-N)*incZ<<1;   if (Order == CblasColMajor)      ATL_cger2c(M, N, alpha, x, incX, y, incY, beta, w, incW, z, incZ, A, lda);   else   {      vy = malloc(ATL_Cachelen+ATL_Cachelen + ATL_MulBySize(N+N));      ATL_assert(vy);      y0 = ATL_AlignPtr(vy);      z0 = y0 + N;      z0 = ATL_AlignPtr(z0);      ATL_cmoveConj(N, alpha, y, incY, y0, 1);      ATL_cmoveConj(N, alpha, z, incZ, z0, 1);      ATL_cger2u(N, M, one, y0, 1, x, incX, beta, w, incW, z, incZ, A, lda);      free(vy);   }}
开发者ID:apollos,项目名称:atlas,代码行数:67,


示例18: Mmin

//.........这里部分代码省略.........   nb = clapack_ilaenv(LAIS_OPT_NB, LAormqr, MYOPT+LARight+LAUpper, M, N, K,-1);/* * If it is a workspace query, return the size of work required. *    wrksz = wrksz of ATL_larfb + ATL_larft + ATL_geqr2 */   if (LWORK < 0)   {      if(SIDE == CblasLeft)      {         *WORK = ( N*nb + nb*nb + maxMN )  ;      }      else      {         *WORK = ( M*nb + nb*nb + maxMN )  ;      }      return(0);   }   else if (M < 1 || N < 1)                 /* quick return if no work to do  */      return(0);/* * If the user gives us too little space, see if we can allocate it ourselves */   else   {      if(SIDE == CblasLeft)      {         if (LWORK < (N*nb + nb*nb + maxMN))         {            vp = malloc(ATL_MulBySize(N*nb + nb*nb + maxMN) + ATL_Cachelen);            if (!vp)               return(-7);            WORK = ATL_AlignPtr(vp);         }      }      else      {         if (LWORK < (M*nb + nb*nb + maxMN))         {            vp = malloc(ATL_MulBySize(M*nb + nb*nb + maxMN) + ATL_Cachelen);            if (!vp)               return(-7);            WORK = ATL_AlignPtr(vp);         }      } /* if CblasRight */   }/* * Assign workspace areas for ATL_larft, ATL_geqr2, ATL_larfb */   ws_T = WORK;                             /* T at begining of work          */   ws_QR2 = WORK +(nb SHIFT)*nb;            /* After T Work space             */   ws_larfb = ws_QR2 + (maxMN SHIFT);       /* After workspace for T and QR2  */   if (SIDE == CblasLeft)   {      if ( TRANS == CblasNoTrans )      {         j = (K/nb)*nb;         if (j == K)         {            j=K -nb;         }
开发者ID:AIDman,项目名称:Kaldi,代码行数:67,


示例19: Mjoin

int Mjoin(PATL,mmJKI)(const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB,                      const int M, const int N, const int K,                      const SCALAR alpha, const TYPE *A, const int lda,                      const TYPE *B, const int ldb, const SCALAR beta,                      TYPE *C, const int ldc)/* * This gemm is for small K, so we build gemm out of AXPY (outer product) * rather than dot (inner product). */{   int Mp, mp, m, k, ldaa=lda;   void *vA=NULL;   TYPE *pA;   const TYPE CONE[2]={ATL_rone, ATL_rzero}, CNONE[2]={ATL_rnone, ATL_rzero};   const SCALAR alp=alpha;/* * Compute M partition necessary to promote reuse in the L1 cache.  Check * NB^2 in addition to L1elts, to catch machines where L1 is not used by FPU. * If this gives a small Mp, use CacheEdge instead (reuse in L2 instead of L1). */   Mp = NB*NB;   m = ATL_L1elts >> 1;   Mp = (m > Mp) ? m : Mp;   Mp /= ((K+2)<<1);   if (Mp < 128)   {      #if !defined(CacheEdge) || CacheEdge == 0         Mp = M;      #else         Mp = (CacheEdge) / ((K+2)*ATL_sizeof);         if (Mp < 128)            Mp = M;      #endif   }   if (Mp > M)      Mp = M;/* * Change Mp if remainder is very small */   else   {      Mp -= 16;      /* small safety margin on filling cache */      mp = M / Mp;      m = M - mp*Mp;      if (m && m < 32)         Mp += (m+mp-1)/mp;   }/* * If A not in NoTrans format, need to copy so it can use axpy wt stride=1. * NOTE: this routine should not be called when you can't afford this copy */   if (TA != AtlasNoTrans)   {      vA = malloc(ATL_Cachelen + Mp*ATL_MulBySize(K));      if (!vA) return(-1);      pA = ATL_AlignPtr(vA);      alp = CONE;      ldaa = Mp;      pA += Mp+Mp;   }   else      pA = (TYPE *) A;   for (m=0; m < M; m += Mp)   {      mp = M - m;      if (mp > Mp)         mp = Mp;/* *    If the thing is in Trans format, copy to NoTrans */      if (vA)      {         pA -= (Mp+Mp);         if (TA == AtlasConjTrans)         {            for (k=0; k < K; k++)            {               Mjoin(PATL,copy)(mp, A+k+k, lda, pA+((k*ldaa)<<1), 1);               Mjoin(PATLU,scal)(mp, ATL_rnone, pA+1+((k*ldaa)<<1), 2);               if (!SCALAR_IS_ONE(alpha))                  Mjoin(PATL,scal)(mp, alpha, pA+((k*ldaa)<<1), 1);            }         }         else         {            for (k=0; k < K; k++)               Mjoin(PATL,cpsc)(mp, alpha, A+k+k, lda, pA+((k*ldaa)<<1), 1);         }         A += mp*(lda+lda);      }      Mjoin(PATL,mm_axpy)(AtlasNoTrans, TB, mp, N, K, alp, pA, ldaa, B, ldb,                          beta, C, ldc);      pA += mp+mp;      C += mp+mp;   }   if (vA) free(vA);   return(0);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:99,


示例20: ATL_her

void ATL_her(const enum ATLAS_UPLO Uplo, ATL_CINT N, const TYPE alpha,               const TYPE *X, ATL_CINT incX, TYPE *A, ATL_CINT lda){   const TYPE calpha[2] = {alpha, ATL_rzero};   void *vp=NULL;   TYPE *x, *xt;   ATL_r1kern_t gerk;   ATL_INT CacheElts;   const int ALP1 = (alpha == ATL_rone);   int COPYX, COPYXt;   int mu, nu, minM, minN, alignX, alignXt, FNU, ALIGNX2A;   if (N < 1 || (alpha == ATL_rzero))      return;/* * For very small problems, avoid overhead of func calls & data copy */   if (N < 50)   {      Mjoin(PATL,refher)(Uplo, N, alpha, X, incX, A, lda);      return;   }/* * Determine the GER kernel to use, and its parameters */   gerk = ATL_GetR1Kern(N-ATL_s1L_NU, ATL_s1L_NU, A, lda, &mu, &nu,                        &minM, &minN, &alignX, &ALIGNX2A, &alignXt,                        &FNU, &CacheElts);/* * Determine if we need to copy the vectors */   COPYX = (incX != 1);   if (!COPYX)  /* may still need to copy due to alignment issues */   {/* *    ATL_Cachelen is the highest alignment that can be requested, so *    make X's % with Cachelen match that of A if you want A & X to have *    the same alignment */      if (ALIGNX2A)      {         size_t t1 = (size_t) A, t2 = (size_t) X;         COPYX = (t1 - ATL_MulByCachelen(ATL_DivByCachelen(t1))) !=                 (t2 - ATL_MulByCachelen(ATL_DivByCachelen(t2)));      }      else if (alignX)      {         size_t t1 = (size_t) X;         COPYX = ((t1/alignX)*alignX != t1);      }   }   vp = malloc((ATL_Cachelen+ATL_MulBySize(N))*(1+COPYX));   if (!vp)   {      Mjoin(PATL,refher)(Uplo, N, alpha, X, incX, A, lda);      return;   }   xt = ATL_AlignPtr(vp);   if (COPYX)   {      x = xt + N+N;      x = ALIGNX2A ? ATL_Align2Ptr(x, A) : ATL_AlignPtr(x);      Mjoin(PATL,copy)(N, X, incX, x, 1);   }   else      x = (TYPE*) X;   if (ALP1)      Mjoin(PATL,copyConj)(N, X, incX, xt, 1);   else      Mjoin(PATL,moveConj)(N, calpha, X, incX, xt, 1);   if (Uplo == AtlasUpper)      Mjoin(PATL,her_kU)(gerk, N, alpha, x, xt, A, lda);   else      Mjoin(PATL,her_kL)(gerk, N, alpha, x, xt, A, lda);   if (vp)     free(vp);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:76,


示例21: Mjoin

int Mjoin(PATL,mmJITcp)(const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB,                        const int M0, const int N, const int K,                        const SCALAR alpha, const TYPE *A, const int lda,                        const TYPE *B, const int ldb, const SCALAR beta,                        TYPE *C, const int ldc)/* * Copy matmul algorithm, copies A and B on-the-fly * If M < 0, allocates only (MB+NB)*KB workspace */{   void *v=NULL;   const TYPE *a=A;   TYPE *pA, *pB, *pB0;   MAT2BLK2 A2blk, B2blk;   NBMM0 NBmm0, NBmm1, pNBmm0;   const int M = (M0 >= 0) ? M0 : -M0;   int nkblks, nmblks, nnblks, mr, nr, kr, KR, bigK, h, i, j, ZEROC;   size_t incAk, incBk, incAm, incBn, incAW, incAWp, incBW, incBWp, incW;/* * If both M and N <= NB, and one of them is not full, call BPP, which * can sometimes avoid doing cleanup forall cases */   if (M <= MB && N <= NB && (M != MB || N != NB))      return(Mjoin(PATL,mmBPP)(TA, TB, M, N, K, alpha, A, lda, B, ldb,                               beta, C, ldc));/* * If these workspace increments are 0, we do JIT NBxNB copies instead of * copying entire array/panel.  Don't copy mat if you can't reuse it. */   if (M0 > 0)   {      incAW = (N > NB) ? KB*MB : 0;      incBW = (M > NB) ? KB*NB : 0;   }   else /* allocate in minimal space */      incAW = incBW = 0;   nmblks = M/MB;   nnblks = N/NB;   nkblks = K/KB;   mr = M - nmblks*MB;   nr = N - nnblks*NB;   kr = K - nkblks*KB;/* * K-loop is special, in that we don't call user cleanup, must explicitly zero, * and K-cleanup is typically slower even for generated kernels.  Therefore, * allow extra leaway for doing extra flops.  Note error is unaffected by * any of these extra flops: K-loop has elts zeroed, and multiplying zeros * and adding in zeros doesn't add to error */   KR = (kr && kr+4 >= KB) ? KB : kr;   bigK = nkblks*KB+KR;   if (incAW)   {      i = MB*bigK;      incAWp = KB*mr;   }   else   {      i = MB*KB;      incAWp = 0;   }   if (incBW)   {      incBWp = KB*nr;      incW = bigK*NB;      i += N*bigK;   }   else   {      incBWp = incW = 0;      i += NB*KB;   }   i *= sizeof(TYPE);   if (i <= ATL_MaxMalloc || !(incAW | incBW))      v = malloc(ATL_Cachelen+i);   if (!v) return(-1);   pA = ATL_AlignPtr(v);   pB0 = pA + (incAW ? bigK*MB : KB*MB);   if (TA == AtlasNoTrans)   {      A2blk = Mjoin(PATL,gemoveT);      incAk = lda*KB;      incAm = MB;   }   else   {      A2blk = Mjoin(PATL,gemove);      incAk = KB;      incAm = MB*lda;   }   if (TB == AtlasNoTrans)   {      B2blk = Mjoin(PATL,gemove);      incBk = KB;      incBn = NB*ldb;   }   else   {      B2blk = Mjoin(PATL,gemoveT);//.........这里部分代码省略.........
开发者ID:AIDman,项目名称:Kaldi,代码行数:101,


示例22: Mjoin

int Mjoin(PATL,mmIJK)(const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB,                      const int M, const int N0, const int K,                      const SCALAR alpha, const TYPE *A, const int lda0,                      const TYPE *B, const int ldb0, const SCALAR beta,                      TYPE *C, const int ldc0){    size_t incA, incB, incC;    const size_t lda=lda0, ldb=ldb0, ldc=ldc0;    const size_t incK = ATL_MulByNB((size_t)K);    int N = N0;    int nMb, nNb, nKb, ib, jb, kb, jb2, h, i, j, k, n;    void *vA=NULL, *vC=NULL;    TYPE *pA, *pB, *pC;    MAT2BLK A2blk, B2blk;    PUTBLK putblk;    NBMM0 NBmm0;    nMb = ATL_DivByNB(M);    nNb = ATL_DivByNB(N);    nKb = ATL_DivByNB(K);    ib = M - ATL_MulByNB(nMb);    jb = N - ATL_MulByNB(nNb);    kb = K - ATL_MulByNB(nKb);    /*     * If K sufficiently large, write to temporary C as safety measure;  otherwise     * write directly to C     */    if (nKb < 12)    {        putblk = NULL;        pC = C;        if ( SCALAR_IS_ONE(beta) ) NBmm0 = NBmm_b1;        else if ( SCALAR_IS_ZERO(beta) ) NBmm0 = NBmm_b0;        else NBmm0 = NBmm_bX;    }    else    {        NBmm0 = NBmm_b0;        vC = malloc(ATL_Cachelen + ATL_MulBySize(NBNB));        if (!vC) return(-1);        pC = ATL_AlignPtr(vC);        if ( SCALAR_IS_ONE(beta) ) putblk = Mjoin(PATL,putblk_b1);        else if ( SCALAR_IS_ZERO(beta) ) putblk = Mjoin(PATL,putblk_b0);        else if ( SCALAR_IS_NONE(beta) ) putblk = Mjoin(PATL,putblk_bn1);        else putblk = Mjoin(PATL,putblk_bX);    }    /*     * Special case if we don't need to copy one or more input matrix     */    if (K == NB && TB == AtlasNoTrans && ldb == NB && ATL_DataIsMinAligned(B))    {        if (lda == NB && TA == AtlasTrans && SCALAR_IS_ONE(alpha) &&                ATL_DataIsMinAligned(A))        {            i = NBNB;            pA = (TYPE *) A;            A = NULL;            A2blk = NULL;            incA = 0;        }        else        {            vA = malloc(ATL_Cachelen + ATL_MulBySize(incK));            if (!vA)            {                free(vC);                return(-1);            }            pA = ATL_AlignPtr(vA);            if (TA == AtlasNoTrans)            {                incA = NB;                if ( SCALAR_IS_ONE(alpha) ) A2blk = Mjoin(PATL,row2blkT_a1);                else A2blk = Mjoin(PATL,row2blkT_aX);            }            else            {                incA = ATL_MulByNB(lda);                if ( SCALAR_IS_ONE(alpha) ) A2blk = Mjoin(PATL,col2blk_a1);                else A2blk = Mjoin(PATL,col2blk_aX);            }        }        Mjoin(PATL,mmIJK2)(K, nMb, nNb, nKb, ib, jb, kb, alpha, A, lda, pA,                           incA, A2blk, B, beta, C, ldc, pC, putblk, NBmm0);        if (vA) free(vA);        if (vC) free(vC);        return(0);    }    i = ATL_Cachelen + ATL_MulBySize(N*K + incK);    if (i <= ATL_MaxMalloc) vA = malloc(i);    if (!vA)    {        if (TA == AtlasNoTrans && TB == AtlasNoTrans)        {            if (vC) free(vC);            return(1);        }        if (jb) n = nNb + 1;        else n = nNb;//.........这里部分代码省略.........
开发者ID:apollos,项目名称:atlas,代码行数:101,


示例23: ATL_gelqf

//.........这里部分代码省略.........   }   else if (M < 1 || N < 1)  /* quick return if no work to do */      return(0);/* * LQ is the transpose of QR: We use this to go from row-major LQ to * col-major QR, typically faster. Here, if we are square and large, * we transpose the whole matrix in-place and then transpose it back. * This should be a tunable parameter; perhaps if the matrix fits in * L1 or L2? (Note by Tony C, short on time to conduct tuning). */   if (M == N && N >= 128)   {      Mjoin(PATL,sqtrans)(N, A, lda);      n = ATL_geqrf(M, N, A, lda, TAU, WORK, LWORK);      Mjoin(PATL,sqtrans)(N, A, lda);      /* Take the conjugate for Complex TAU. */      #ifdef TCPLX      ATL_INT i;      for (i=1; i<(minMN<<1); i+=2)         *(TAU+i) = 0.-*(TAU+i);          /* Negate imaginary part. */      #endif      return(n);   }/* * If the user gives us too little space, see if we can allocate it ourselves */   else if (LWORK < (maxMN*nb + nb*nb + maxMN))   {      vp = malloc(ATL_MulBySize(maxMN*nb + nb*nb + maxMN) + ATL_Cachelen);      if (!vp)         return(-7);       WORK = ATL_AlignPtr(vp);   }/* * Assign workspace areas for ATL_larft, ATL_gelq2, ATL_larfb */   ws_T = WORK;                         /* T at begining of work */   ws_LQ2 = WORK +(nb SHIFT)*nb;        /* After T Work space             */   ws_larfb = ws_LQ2 + (maxMN SHIFT);   /* After workspace for T and LQ2  *//* * Leave one iteration to be done outside loop, so we don't build T * Any loop iterations are therefore known to be of size nb (no partial blocks) */   n = (minMN / nb) * nb;   if (n == minMN)      n -= Mmin(nb, minMN);       /* when n is a multiple of nb, reduce by nb */   #if !defined(ATL_USEPTHREADS)        /* If no PCA, try to copy up front. */      j = M - n;      j = Mmax(nb, j);      ldCP = (N&7) ? (((N+7)>>3)<<3) : N;      ws_CPRaw = malloc(ATL_MulBySize(ldCP)*j + ATL_Cachelen);      if (ws_CPRaw) ws_CP=ATL_AlignPtr(ws_CPRaw);  /* Align if malloced. */   #endif /* Serial Mode */   for (j=0; j < n; j += nb)   {      #if !defined(ATL_USEPTHREADS) /* If no PCA it won't copy. Try it here. */      /* If we got our copy workspace, transpose panel before recursion. */      if (ws_CP)                             /* If workspace exists. */      {         int ci, cj;                         /* for conjugation.     */
开发者ID:apollos,项目名称:atlas,代码行数:67,


示例24: Mjoin

void Mjoin( PATL, tbsv )(   const enum ATLAS_UPLO      UPLO,   const enum ATLAS_TRANS     TRANS,   const enum ATLAS_DIAG      DIAG,   const int                  N,   const int                  K,   const TYPE                 * A,   const int                  LDA,   TYPE                       * X,   const int                  INCX){/* * Purpose * ======= * * Mjoin( PATL, tbsv ) solves one of the systems of equations * *    A * x = b,   or   conjg( A  ) * x = b,   or * *    A'* x = b,   or   conjg( A' ) * x = b, * * where b and x are n-element vectors and  A is an n by n unit, or non- * unit, upper or lower triangular band matrix, with (k+1) diagonals. * * No test for  singularity  or  near-singularity  is included  in  this * routine. Such tests must be performed before calling this routine. * * This is a blocked version of the algorithm.  For a more detailed des- * cription of  the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- *//* * .. Local Variables .. */   void                       * vx = NULL;   TYPE                       * x;/* .. * .. Executable Statements .. * */   if( N == 0 ) return;   Mjoin(PATL,reftbsv)(UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX);   return;   if( INCX == 1 ) { x = X; }   else   {      vx = (TYPE *)malloc( ATL_Cachelen + ATL_MulBySize( N ) );      ATL_assert( vx ); x = ATL_AlignPtr( vx );      Mjoin( PATL, copy )( N, X, INCX, x, 1 );   }#ifdef TREAL   if( ( TRANS == AtlasNoTrans ) || ( TRANS == AtlasConj ) )#else   if( TRANS == AtlasNoTrans )#endif   {      if( UPLO == AtlasUpper ) Mjoin( PATL, tbsvUN )( DIAG, N, K, A, LDA, x );      else                     Mjoin( PATL, tbsvLN )( DIAG, N, K, A, LDA, x );   }#ifdef TCPLX   else if( TRANS == AtlasConj )   {      if( UPLO == AtlasUpper ) Mjoin( PATL, tbsvUC )( DIAG, N, K, A, LDA, x );      else                     Mjoin( PATL, tbsvLC )( DIAG, N, K, A, LDA, x );   }#endif#ifdef TREAL   else#else   else if( TRANS == AtlasTrans )#endif   {      if( UPLO == AtlasUpper ) Mjoin( PATL, tbsvUT )( DIAG, N, K, A, LDA, x );      else                     Mjoin( PATL, tbsvLT )( DIAG, N, K, A, LDA, x );   }#ifdef TCPLX   else   {      if( UPLO == AtlasUpper ) Mjoin( PATL, tbsvUH )( DIAG, N, K, A, LDA, x );      else                     Mjoin( PATL, tbsvLH )( DIAG, N, K, A, LDA, x );   }#endif   if( vx ) { Mjoin( PATL, copy )( N, x, 1, X, INCX ); free( vx ); }/* * End of Mjoin( PATL, tbsv ) */}
开发者ID:AIDman,项目名称:Kaldi,代码行数:93,


示例25: cblas_zhbmv

void cblas_zhbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,                 const int N, const int K, const void *alpha, const void *A,                 const int lda, const void *X, const int incX,                 const void *beta, void *Y, const int incY){   int info = 2000;   const enum CBLAS_UPLO ruplo = (Uplo == CblasUpper) ? CblasLower : CblasUpper;   void *vx;   double *X0, *x = (double*) X;   double *y = Y;   const double *alp=alpha;   const double *bet=beta;   double calpha[2], cbeta[2];   const double one[2] = {ATL_rone, ATL_rzero};   calpha[0] = *alp;   calpha[1] = -alp[1];   cbeta[0] = *bet;   cbeta[1] = -bet[1];#ifndef NoCblasErrorChecks   if (Order != CblasColMajor && Order != CblasRowMajor)      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",                          CblasRowMajor, CblasColMajor, Order);   if (Uplo != CblasUpper && Uplo != CblasLower)      info = cblas_errprn(2, info,                          "Uplo must be %d or %d, but is set to %d",                          CblasUpper, CblasLower, Uplo);   if (N < 0) info = cblas_errprn(3, info,                        "N cannot be less than zero; is set to %d.", N);   if (K < 0)      info = cblas_errprn(4, info, "Valid K: 0 < K < N; K=%d, N=%d.", K, N);   if (lda < K+1) info = cblas_errprn(7, info,      "lda cannot be less than K+1;  K=%d, lda=%d/n", K, lda);   if (!incX) info = cblas_errprn(9, info,                                  "incX cannot be zero; is set to %d.", incX);   if (!incY) info = cblas_errprn(12, info,                                  "incY cannot be zero; is set to %d.", incY);   if (info != 2000)   {      cblas_xerbla(info, "cblas_zhbmv", "");      return;   }#endif   if (incX < 0) x += (1-N)*incX<<1;   if (incY < 0) y += (1-N)*incY<<1;   if (Order == CblasColMajor)      ATL_zhbmv(Uplo, N, K, alpha, A, lda, x, incX, beta, y, incY);   else   {      vx = malloc(ATL_Cachelen + 2*N*sizeof(double));      ATL_assert(vx);      X0 = x;      x = ATL_AlignPtr(vx);      ATL_zmoveConj(N, calpha, X0, incX, x, 1);      if (*bet != ATL_rzero || bet[1] != ATL_rzero)      {         ATL_zscalConj(N, cbeta, y, incY);         ATL_zhbmv(ruplo, N, K, one, A, lda, x, 1, one, y, incY);      }      else ATL_zhbmv(ruplo, N, K, one, A, lda, x, 1, beta, y, incY);      free(vx);      ATL_zscalConj(N, one, y, incY);   }}
开发者ID:apollos,项目名称:atlas,代码行数:66,


示例26: Mjoin

void Mjoin( PATL, hpmv )(   const enum ATLAS_UPLO      UPLO,   const int                  N,   const SCALAR               ALPHA,   const TYPE                 * A,   const TYPE                 * X,   const int                  INCX,   const SCALAR               BETA,   TYPE                       * Y,   const int                  INCY){/* * Purpose * ======= * * Mjoin( PATL, hpmv ) performs the matrix-vector operation * *    y := alpha * A * x + beta * y, * * where alpha and beta are scalars, x and y are n-element vectors and A * is an n by n Hermitian matrix, supplied in packed form. * * This is a blocked version of the algorithm.  For a more detailed des- * cription of  the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- *//* * .. Local Variables .. */   void                       (*gpmv0)( const int, const int, const SCALAR,                              const TYPE *, const int, const TYPE *, const int,                              const SCALAR, TYPE *, const int );   void                       (*gpmv1)( const int, const int, const SCALAR,                              const TYPE *, const int, const TYPE *, const int,                              const SCALAR, TYPE *, const int );   void                       (*gpmvN)( const int, const int, const SCALAR,                              const TYPE *, const int, const TYPE *, const int,                              const SCALAR, TYPE *, const int );#ifdef TREAL   TYPE                       alphaY, beta0;#define one                   ATL_rone#define zero                  ATL_rzero#else   const TYPE                 * alphaY, * beta0;   const TYPE                 one [2] = { ATL_rone,  ATL_rzero },                              zero[2] = { ATL_rzero, ATL_rzero };#endif   void                       * vx = NULL, * vy = NULL;   TYPE                       * A0, * A1, * x, * x0, * x1, * y, * y00, * y0,                              * y1;   int                        incXY, incXY1, j, jb, lda, lda0, lda1, mb, mb1,                              n, nb;/* .. * .. Executable Statements .. * */   if( N == 0 ) return;   if( SCALAR_IS_ZERO( ALPHA ) )   {      if( !( SCALAR_IS_ONE( BETA ) ) ) Mjoin( PATL, scal )( N, BETA, Y, INCY );      return;   }   if( ( INCX != 1 ) || ( ( INCY == 1 ) && !( SCALAR_IS_ONE( ALPHA ) ) ) )   {      vx = (void *)malloc( ATL_Cachelen + ATL_MulBySize( N ) );      ATL_assert( vx ); x = ATL_AlignPtr( vx );      Mjoin( PATL, cpsc )( N, ALPHA, X, INCX, x, 1 );      alphaY = one;   }   else { x = (TYPE *)(X); alphaY = ALPHA; }   if( ( INCY != 1 ) || !( SCALAR_IS_ONE( alphaY ) ) )   {      vy = malloc( ATL_Cachelen + ATL_MulBySize( N ) );      ATL_assert( vy ); y00 = y = ATL_AlignPtr( vy );      beta0 = zero;   }   else { y00 = y = (TYPE *)(Y); beta0 = BETA; }   ATL_GetPartSPMV( A, N, &mb, &nb );   mb1 = N - ( ( N - 1 ) / mb ) * mb; incXY1 = (nb SHIFT);   if( UPLO == AtlasUpper )   {      if(      SCALAR_IS_ZERO( beta0 ) )         gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_b0_y1 );      else if( SCALAR_IS_ONE ( beta0 ) )         gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_b1_y1 );      else         gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_bX_y1 );      gpmv1 = Mjoin( PATL, gpmvUC_a1_x1_b1_y1 );      gpmvN = Mjoin( PATL, gpmvUN_a1_x1_b1_y1 );//.........这里部分代码省略.........
开发者ID:certik,项目名称:vendor,代码行数:101,


示例27: Mjoin

//.........这里部分代码省略.........         mmcu = Mjoin(Mjoin(Mjoin(NCmm00,0x0x0),TT),0x0x0_aX_bX);      }      incAk = KB;      incAm = lda*MB - Kb*KB;      incAn = -lda*MB*Mb;   }   if (TB == AtlasNoTrans)   {      incBk = KB;      incBm = -KB*Kb;      incBn = ldb*NB;   }   else   {      incBk = KB*ldb;      incBm = -Kb * incBk;      incBn = NB;   }   if (alpha == ATL_rone)   {      if (beta == ATL_rzero) geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_a1),_b0);      else if (beta == ATL_rone)         geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_a1),_b1);      else geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_a1),_bX);   }   else if (beta == ATL_rzero)      geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_aX),_b0);   else if (beta == ATL_rone)      geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_aX),_b1);   else geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_aX),_bX);   vp = malloc(ATL_Cachelen + ATL_MulBySize(MB * NB));   ATL_assert(vp);   cp = ATL_AlignPtr(vp);   if (mr || nr || kr) for (j=MB*NB, i=0; i != j; i++) cp[i] = ATL_rzero;   for (j=Nb; j; j--, a += incAn, b += incBn, c += incCn)   {      for (i=Mb; i; i--, a += incAm, b += incBm, c += incCm)      {         if (Kb)         {            mm_bX(MB, NB, KB, ATL_rone, a, lda, b, ldb, ATL_rzero, cp, MB);            a += incAk;  b += incBk;            for (k=Kb-1; k; k--, a += incAk, b += incBk)               mm_b1(MB, NB, KB, ATL_rone, a, lda, b, ldb, ATL_rone, cp, MB);            if (kr)               mmcu(MB, NB, kr, ATL_rone, a, lda, b, ldb, ATL_rone, cp, MB);         }         else if (kr)         {            Mjoin(PATL,zero)(MB*NB, cp, 1); /* kill NaN/INF from last time */            mmcu(MB, NB, kr, ATL_rone, a, lda, b, ldb, ATL_rzero, cp, MB);         }         geadd(MB, NB, alpha, cp, MB, beta, c, ldc);      }   }   if (mr && N != nr)      ATL_assert(Mjoin(PATL,NCmmIJK)(TA, TB, mr, N-nr, K, alpha,                                     A+Mb*(incAm+Kb*incAk), lda, B, ldb,                                     beta, C+Mb*MB, ldc) ==0);   if (nr)   {      for (i=Mb; i; i--, a += incAm, b += incBm, c += incCm)      {      Mjoin(PATL,zero)(MB*nr, cp, 1); /* kill NaN and INF from last time */
开发者ID:GorgonCryoEM,项目名称:Gorgon-CVS,代码行数:67,


示例28: Mjoin

void Mjoin(PATL,CtrsmKL)   (enum ATLAS_UPLO Uplo, enum ATLAS_TRANS Trans, enum ATLAS_DIAG Diag,    const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda,    TYPE *B, const int ldb)#endif{   TYPE tmp[2], ra, ia;   void *vp;   TYPE *a;   if (N > 0)   {      if (M > 1)      {         vp = malloc(ATL_Cachelen + ATL_MulBySize(M)*M);         ATL_assert(vp);         a = ATL_AlignPtr(vp);         Diag = trsmcopy(Uplo, Trans, Diag, M, alpha, A, lda, a);         if (Trans != AtlasNoTrans)         {            if (Uplo == AtlasLower) Uplo = AtlasUpper;            else Uplo = AtlasLower;         }         switch(M)         {         case 2:            if (Uplo == AtlasLower) trsmLL_2(N, a, B, ldb);            else trsmLU_2(N, a, B, ldb);            break;         case 3:            if (Uplo == AtlasLower) trsmLL_3(N, a, B, ldb);            else trsmLU_3(N, a, B, ldb);            break;         case 4:            if (Uplo == AtlasLower) trsmLL_4(N, a, B, ldb);            else trsmLU_4(N, a, B, ldb);            break;         default: /* this crap should never be used */               tmp[0] = ATL_rone; tmp[1] = ATL_rzero;               Mjoin(PATL,cplxinvert)(M, a, M+M+2, a, M+M+2);               Mjoin(PATL,reftrsm)(AtlasLeft, Uplo, AtlasNoTrans, Diag, M, N,                                   tmp, a, M, B, ldb);         }         free(vp);      }      else if (M == 1)      {         if (Diag == AtlasUnit)         #ifdef Right_            Mjoin(PATL,scal)(N, alpha, B, 1);         #else            Mjoin(PATL,scal)(N, alpha, B, ldb);         #endif         else         {            tmp[0] = A[0];            if (Trans != AtlasConjTrans) tmp[1] = A[1];            else tmp[1] = -A[1];            Mjoin(PATL,cplxinvert)(1, tmp, 2, tmp, 2); /* safe cplx invers */            ra = tmp[0]; ia = tmp[1];            tmp[0] = *alpha * ra - alpha[1] * ia;            tmp[1] = *alpha * ia + alpha[1] * ra;            #ifdef Right_               Mjoin(PATL,scal)(N, tmp, B, 1);            #else               Mjoin(PATL,scal)(N, tmp, B, ldb);            #endif         }      }   }
开发者ID:onenoc,项目名称:QuantSoftwareToolkit,代码行数:70,


示例29: mmcase0

int mmcase0(int MFLOP, int CACHESIZE, char TA, char TB, int M, int N, int K,	    SCALAR alpha, int lda, int ldb, SCALAR beta, int ldc){   char *pc;#ifdef TREAL   char *form="%4d   %c   %c %4d %4d %4d  %5.1f  %5.1f  %6.2f %5.1f %5.2f   %3s/n";   #define MALPH alpha   #define MBETA beta   TYPE betinv, bet=beta;#else   #define MALPH *alpha, alpha[1]   #define MBETA *beta, beta[1]   char *form="%4d   %c   %c %4d %4d %4d  %5.1f %5.1f  %5.1f %5.1f  %6.2f %6.1f %4.2f   %3s/n";   TYPE betinv[2], *bet=beta;#endif   int nreps, incA, incB, incC, inc, nmat, k;   TYPE *c, *C, *a, *A, *b, *B, *st;   int ii, jj, i, j=0, PASSED, nerrs;   double t0, t1, t2, t3, mflop, mf, mops;   TYPE maxval, f1, ferr;   static TYPE feps=0.0;   static int itst=1;   enum ATLAS_TRANS TAc, TBc;   void *vp;   #ifdef TCPLX      if (*beta == 0.0 && beta[1] == 0.0) betinv[0] = betinv[1] = 0.0;      else if (beta[1] == 0.0) { betinv[0] = 1 / *beta;  betinv[1] = 0.0; }      else      {         t0 = *beta;         t1 = beta[1];         if (Mabs(t1) <= Mabs(t0))         {            t2 = t1 / t0;            betinv[0] = t0 = 1.0 / (t0 + t1*t2);            betinv[1] = -t0 * t2;         }         else         {            t2 = t0 / t1;            betinv[1] = t0 = -1.0 / (t1 + t0*t2);            betinv[0] = -t2 * t0;         }      }      mops = ( ((8.0*M)*N)*K ) / 1000000.0;   #else      if (beta != 0.0) betinv = 1.0 / beta;      else betinv = beta;      mops = ( ((2.0*M)*N)*K ) / 1000000.0;   #endif   nreps = MFLOP / mops;   if (nreps < 1) nreps = 1;   if (TA == 'n' || TA == 'N')   {      TAc = AtlasNoTrans;      incA = lda * K;   }   else   {      if (TA == 'c' || TA == 'C') TAc = AtlasConjTrans;      else TAc = AtlasTrans;      incA = lda * M;   }   if (TB == 'n' || TB == 'N')   {      incB = ldb * N;      TBc = AtlasNoTrans;   }   else   {      incB = ldb * K;      if (TB == 'c' || TB == 'C') TBc = AtlasConjTrans;      else TBc = AtlasTrans;   }   incC = ldc*N;   inc = incA + incB + incC;   i = M*K + K*N + M*N;  /* amount of inc actually referenced */   /* This is a hack; change to use of flushcache instead. */   nmat = ((CACHESIZE/ATL_sizeof) + i)/i;   vp = malloc(ATL_MulBySize(nmat*inc)+ATL_Cachelen);   ATL_assert(vp);   C = c = ATL_AlignPtr(vp);   a = A = C + incC;   b = B = A + incA;   st = C + nmat*inc;   matgen(inc, nmat, C, inc, M*N);#ifdef DEBUG   printmat("A0", M, K, A, lda);   printmat("B0", K, N, B, ldb);   printmat("C0", M, N, C, ldc);#endif   t0 = time00();   for (k=nreps; k; k--)   {      trusted_gemm(TAc, TBc, M, N, K, alpha, a, lda, b, ldb, bet, c, ldc);      c += inc; a += inc; b += inc;      if (c == st)//.........这里部分代码省略.........
开发者ID:certik,项目名称:vendor,代码行数:101,


示例30: void

static int ATL_trmvLT(   const enum ATLAS_DIAG  Diag,   const int nb,   ATL_CINT N,   const TYPE *A,   ATL_CINT lda,   TYPE *X,   ATL_CINT incX)/* * RETURNS: 0 if TRMV was performed, non-zero if nothing done */{   static void (*trmvK)(ATL_CINT, const TYPE*, ATL_CINT, const TYPE*, TYPE*);   void (*gemv)(ATL_CINT, ATL_CINT, const SCALAR, const TYPE*, ATL_CINT,                const TYPE*, ATL_CINT, const SCALAR, TYPE*, ATL_CINT);   void *vp;   TYPE *x, *y;   const size_t opsize = (N*N+N+N)*sizeof(TYPE)SHIFT;   size_t t0;   #ifdef TCPLX      size_t N2=N+N, lda2 = lda+lda;      TYPE one[2] = {ATL_rone, ATL_rzero};   #else      #define N2 N      #define lda2 lda      #define one ATL_rone   #endif   const size_t incA = ((size_t)lda+1)*(nb SHIFT);   ATL_CINT Nnb = ((N-1)/nb)*nb, Nr = N-Nnb;   ATL_INT j;   if (N < nb+nb)      return(1);   if (opsize > MY_CE)      gemv = Mjoin(PATL,gemvT);   else      gemv = (opsize <= ATL_MulBySize(ATL_L1elts)) ? Mjoin(PATL,gemvT_L1) :             Mjoin(PATL,gemvT_L2);   trmvK = (Diag == AtlasNonUnit) ? ATL_trmvLTNk : ATL_trmvLTUk;/* * If X is aligned to Cachelen wt inc=1, use it as y */   t0 = (size_t) X;   if (incX == 1 && (ATL_MulByCachelen(ATL_DivByCachelen(t0)) == t0))   {      ATL_INT i;      vp = malloc(ATL_Cachelen+ATL_MulBySize(N));      if (!vp)         return(2);      x = ATL_AlignPtr(vp);      y = X;      for (i=0; i < N2; i++)      {         x[i] = X[i];         X[i] = ATL_rzero;      }   }   else  /* allocate both X and Y */   {      vp = malloc((ATL_Cachelen+ATL_MulBySize(N))<<1);      if (!vp)         return(3);      x = ATL_AlignPtr(vp);      y = x + N2;      y = ATL_AlignPtr(y);      Mjoin(PATL,copy)(N, X, incX, x, 1);      Mjoin(PATL,zero)(N, y, 1);   }   for (j=0; j < Nnb; j += nb, A += incA)   {      #ifdef TCPLX         const register size_t j2=j+j, nb2=nb+nb;      #else         #define j2 j         #define nb2 nb      #endif      trmvK(nb, A, lda, x+j2, y+j2);      gemv(N-j-nb, nb, one, A+nb2, lda, x+j2+nb2, 1, one, y+j2, 1);      #ifndef TCPLX         #undef j2         #undef nb2      #endif   }   #ifdef TCPLX      j += j;   #endif   trmvK(Nr, A, lda, x+j, y+j);   if (y != X)      Mjoin(PATL,copy)(N, y, 1, X, incX);   free(vp);   return(0);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:95,



注:本文中的ATL_AlignPtr函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ ATL_MulBySize函数代码示例
C++ ATLVERIFY函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。