这篇教程C++ ATL_AlignPtr函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中ATL_AlignPtr函数的典型用法代码示例。如果您正苦于以下问题:C++ ATL_AlignPtr函数的具体用法?C++ ATL_AlignPtr怎么用?C++ ATL_AlignPtr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了ATL_AlignPtr函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: cblas_cher2void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *X, const int incX, const void *Y, const int incY, void *A, const int lda){ int info = 2000; void *vx, *vy; float *x0, *y0; const float *x=X, *y=Y, *alp=alpha; const float one[2]={ATL_rone, ATL_rzero};#ifndef NoCblasErrorChecks if (Order != CblasColMajor && Order != CblasRowMajor) info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d", CblasRowMajor, CblasColMajor, Order); if (Uplo != CblasUpper && Uplo != CblasLower) info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d", CblasUpper, CblasLower, Uplo); if (N < 0) info = cblas_errprn(3, info, "N cannot be less than zero; is set to %d.", N); if (!incX) info = cblas_errprn(6, info, "incX cannot be zero; is set to %d.", incX); if (!incY) info = cblas_errprn(8, info, "incY cannot be zero; is set to %d.", incY); if (lda < N || lda < 1) info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d N=%d", lda, N); if (info != 2000) { cblas_xerbla(info, "cblas_cher2", ""); return; }#endif if (incX < 0) x += (1-N)*incX<<1; if (incY < 0) y += (1-N)*incY<<1; if (Order == CblasColMajor) ATL_cher2(Uplo, N, alpha, x, incX, y, incY, A, lda); else if (alp[0] != ATL_rzero || alp[1] != ATL_rzero) { vx = malloc(ATL_Cachelen + ATL_MulBySize(N)); vy = malloc(ATL_Cachelen + ATL_MulBySize(N)); ATL_assert(vx != NULL && vy != NULL); x0 = ATL_AlignPtr(vx); y0 = ATL_AlignPtr(vy); ATL_cmoveConj(N, alpha, y, incY, y0, 1); ATL_ccopyConj(N, x, incX, x0, 1); ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ), N, one, y0, 1, x0, 1, A, lda); free(vx); free(vy); } else ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ), N, alpha, y, incY, x, incX, A, lda);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:56,
示例2: Mjoinvoid Mjoin(Mjoin(Mjoin(PATL,herk),UploNM),N) (const int N, const int K, const void *valpha, const void *A, const int lda, const void *vbeta, void *C, const int ldc){ void *vc; TYPE *c; TYPE alpha[2]; const TYPE beta = *( (const TYPE *)vbeta ); const TYPE zero[2] = {0.0, 0.0}; alpha[0] = *( (const TYPE *)valpha ); if (K > HERK_Xover) { alpha[1] = 0.0; vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N); ATL_assert(vc); c = ATL_AlignPtr(vc); CgemmNC(N, N, K, alpha, A, lda, A, lda, zero, c, N); if ( beta == 1.0 ) Mjoin(her_put,_b1)(N, c, vbeta, C, ldc); else if ( beta == 0.0 ) Mjoin(her_put,_b0)(N, c, vbeta, C, ldc); else Mjoin(her_put,_bXi0)(N, c, vbeta, C, ldc); free(vc); } else Mjoin(PATL,refherk)(Uplo_, AtlasNoTrans, N, K, *alpha, A, lda, beta, C, ldc);}
开发者ID:certik,项目名称:vendor,代码行数:26,
示例3: Mjoin int Mjoin(PATL,her2kLN) #endif#endif (const int N, const int K, const void *valpha, const void *A, const int lda, const void *B, const int ldb, const void *vbeta, void *C, const int ldc){ int i; void *vc=NULL; TYPE *c; const TYPE beta =*( (const TYPE *)vbeta ); const TYPE zero[2]={0.0, 0.0}; i = ATL_MulBySize(N)*N; if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i); if (vc == NULL) return(1); c = ATL_AlignPtr(vc); #ifdef Transpose_ ATL_ammm(AtlasConjTrans, AtlasNoTrans, N, N, K, valpha, A, lda, B, ldb, #else ATL_ammm(AtlasNoTrans, AtlasConjTrans, N, N, K, valpha, A, lda, B, ldb, #endif zero, c, N); if ( beta == 1.0 ) Mjoin(her2k_put,_b1)(N, c, vbeta, C, ldc); else if ( beta == 0.0 ) Mjoin(her2k_put,_b0)(N, c, vbeta, C, ldc); else Mjoin(her2k_put,_bXi0)(N, c, vbeta, C, ldc); free(vc); return(0);}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:28,
示例4: Mjoinvoid Mjoin(Mjoin(PATL,symmL),UploNM)(const int M, const int N, const void *valpha, const void *A, const int lda, const void *B, const int ldb, const void *vbeta, void *C, const int ldc){#ifdef TREAL const SCALAR alpha=*( (const SCALAR *)valpha ); const SCALAR beta =*( (const SCALAR *)vbeta ); const SCALAR one=1.0;#else#define alpha valpha#define beta vbeta#endif TYPE *a; void *va; if (N > SYMM_Xover) { va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M)); ATL_assert(va); a = ATL_AlignPtr(va);#ifdef TREAL if ( SCALAR_IS_ONE(alpha) ) Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(M, alpha, A, lda, a); else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(M, alpha, A, lda, a); CgemmTN(M, N, M, one, a, M, B, ldb, beta, C, ldc);#else Mjoin(Mjoin(PATL,sycopy),UploNM)(M, A, lda, a); CgemmTN(M, N, M, valpha, a, M, B, ldb, vbeta, C, ldc);#endif free(va); } else Mjoin(PATL,refsymm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb, beta, C, ldc);}
开发者ID:Leobin7,项目名称:Kaldi,代码行数:34,
示例5: Mjoin int Mjoin(PATL,syr2kLT)#endif (const int N, const int K, const void *valpha, const void *A, const int lda, const void *B, const int ldb, const void *vbeta, void *C, const int ldc){ int i; void *vc=NULL; TYPE *c; #ifdef TREAL const SCALAR alpha=*( (const SCALAR *)valpha ); const SCALAR beta =*( (const SCALAR *)vbeta ); const SCALAR one=1.0, zero=0.0; #else #define alpha valpha const TYPE *beta=vbeta; const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0}; #endif i = ATL_MulBySize(N)*N; if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i); if (vc == NULL) return(1); c = ATL_AlignPtr(vc); CgemmTN(N, N, K, alpha, A, lda, B, ldb, zero, c, N); if ( SCALAR_IS_ONE(beta) ) Mjoin(syr2k_put,_b1)(N, c, beta, C, ldc); else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr2k_put,_b0)(N, c, beta, C, ldc); #ifdef TCPLX else if (SCALAR_IS_NONE(beta)) Mjoin(syr2k_put,_bn1)(N, c, beta, C, ldc); else if (beta[1] == *zero) Mjoin(syr2k_put,_bXi0)(N, c, beta, C, ldc); #endif else Mjoin(syr2k_put,_bX)(N, c, beta, C, ldc); free(vc); return(0);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:33,
示例6: Mjoinvoid Mjoin(Mjoin(Mjoin(PATL,syrk),UploNM),T) (const int N, const int K, const void *valpha, const void *A, const int lda, const void *vbeta, void *C, const int ldc){ void *vc; TYPE *c; #ifdef TREAL const SCALAR alpha=*( (const SCALAR *)valpha ); const SCALAR beta =*( (const SCALAR *)vbeta ); const SCALAR one=1.0, zero=0.0; #else #define alpha valpha const TYPE *beta=vbeta; const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0}; #endif if (K > SYRK_Xover) { vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N); ATL_assert(vc); c = ATL_AlignPtr(vc); CgemmTN(N, N, K, alpha, A, lda, A, lda, zero, c, N); if ( SCALAR_IS_ONE(beta) ) Mjoin(syr_put,_b1)(N, c, beta, C, ldc); else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr_put,_b0)(N, c, beta, C, ldc); #ifdef TCPLX else if ( SCALAR_IS_NONE(beta) ) Mjoin(syr_put,_bn1)(N, c, beta, C, ldc); else if (beta[1] == *zero) Mjoin(syr_put,_bXi0)(N, c, beta, C, ldc); #endif else Mjoin(syr_put,_bX)(N, c, beta, C, ldc); free(vc); } else Mjoin(PATL,refsyrk)(Uplo_, AtlasTrans, N, K, alpha, A, lda, beta, C, ldc);}
开发者ID:certik,项目名称:vendor,代码行数:35,
示例7: Mjoinvoid Mjoin(Mjoin(PATL,symmR),UploNM) (const int M, const int N, const void *valpha, const void *A, const int lda, const void *B, const int ldb, const void *vbeta, void *C, const int ldc){ #ifdef TREAL const SCALAR alpha=*( (const SCALAR *)valpha ); const SCALAR beta =*( (const SCALAR *)vbeta ); const SCALAR one=1.0; #else #define alpha valpha #define beta vbeta #endif void *va; TYPE *a; if (M > SYMM_Xover) { va = malloc(ATL_Cachelen + ATL_MulBySize(N)*N); ATL_assert(va); a = ATL_AlignPtr(va); #ifdef TREAL if ( SCALAR_IS_ONE(alpha) ) Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(N, alpha, A, lda, a); else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(N, alpha, A, lda, a); ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, one, B, ldb, a, N, beta, C, ldc); #else Mjoin(Mjoin(PATL,sycopy),UploNM)(N, A, lda, a); ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, valpha, B, ldb, a, N, vbeta, C, ldc); #endif free(va); } else Mjoin(PATL,refsymm)(AtlasRight, Uplo_, M, N, alpha, A, lda, B, ldb, beta, C, ldc);}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:34,
示例8: Mjoinvoid Mjoin(Mjoin(PATL,trmmL),ATLP) (const int M, const int N, const void *valpha, const void *A, const int lda, void *C, const int ldc){ #ifdef TREAL const SCALAR alpha=*( (const SCALAR *)valpha ); const SCALAR one=1.0, zero=0.0; #else const TYPE zero[2]={0.0,0.0}; #define alpha valpha #endif void *va; TYPE *a; if (N > TRMM_Xover) { va = malloc(ATL_Cachelen + ATL_MulBySize(M)*M); ATL_assert(va); a = ATL_AlignPtr(va); #ifdef TREAL if ( SCALAR_IS_ONE(alpha) ) Mjoin(ATL_trcopy,_a1)(M, alpha, A, lda, a); else Mjoin(ATL_trcopy,_aX)(M, alpha, A, lda, a); CAgemmTN(M, N, M, one, a, M, C, ldc, zero, C, ldc); #else ATL_trcopy(M, A, lda, a); CAgemmTN(M, N, M, valpha, a, M, C, ldc, zero, C, ldc); #endif free(va); } else Mjoin(PATL,reftrmm)(AtlasLeft, Uplo_, Trans_, Unit_, M, N, alpha, A, lda, C, ldc);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:32,
示例9: cblas_zgercvoid cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N, const void *alpha, const void *X, const int incX, const void *Y, const int incY, void *A, const int lda){ int info = 2000; const double *x = X, *y = Y; void *vy; double *y0; double one[2] = {ATL_rone, ATL_rzero};#ifndef NoCblasErrorChecks if (M < 0) info = cblas_errprn(2, info, "M cannot be less than zero; is set to %d.", M); if (N < 0) info = cblas_errprn(3, info, "N cannot be less than zero; is set to %d.", N); if (!incX) info = cblas_errprn(6, info, "incX cannot be zero; is set to %d.", incX); if (!incY) info = cblas_errprn(8, info, "incY cannot be zero; is set to %d.", incY); if (Order == CblasColMajor) { if (lda < M || lda < 1) info = cblas_errprn(10, info, "lda must be >= MAX(M,1): lda=%d M=%d", lda, M); } else if (Order == CblasRowMajor) { if (lda < N || lda < 1) info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d M=%d", lda, N); } else info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d", CblasRowMajor, CblasColMajor, Order); if (info != 2000) { cblas_xerbla(info, "cblas_zgerc", ""); return; }#endif if (incX < 0) x += (1-M)*incX<<1; if (incY < 0) y += (1-N)*incY<<1; if (Order == CblasColMajor) ATL_zgerc(M, N, alpha, x, incX, y, incY, A, lda); else { vy = malloc(ATL_Cachelen + ATL_MulBySize(N)); ATL_assert(vy); y0 = ATL_AlignPtr(vy); ATL_zmoveConj(N, alpha, y, incY, y0, 1); ATL_zgeru(N, M, one, y0, 1, x, incX, A, lda); free(vy); }}
开发者ID:GorgonCryoEM,项目名称:Gorgon-CVS,代码行数:56,
示例10: cblas_zhervoid cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const void *X, const int incX, void *A, const int lda){ int info = 2000; void *vx; double one[2] = {ATL_rone, ATL_rzero}; double *x0; const double *x=X;#ifndef NoCblasErrorChecks if (Order != CblasColMajor && Order != CblasRowMajor) info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d", CblasRowMajor, CblasColMajor, Order); if (Uplo != CblasUpper && Uplo != CblasLower) info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d", CblasUpper, CblasLower, Uplo); if (N < 0) info = cblas_errprn(3, info, "N cannot be less than zero; is set to %d.", N); if (!incX) info = cblas_errprn(6, info, "incX cannot be zero; is set to %d.", incX); if (lda < N || lda < 1) info = cblas_errprn(8, info, "lda must be >= MAX(N,1): lda=%d N=%d", lda, N); if (info != 2000) { cblas_xerbla(info, "cblas_zher", ""); return; }#endif if (incX < 0) x += (1-N)*incX<<1; if (Order == CblasColMajor) ATL_zher(Uplo, N, alpha, x, incX, A, lda); else if (alpha != ATL_rzero) { vx = malloc(ATL_Cachelen + ATL_MulBySize(N)); ATL_assert(vx); x0 = ATL_AlignPtr(vx); ATL_zmoveConj(N, one, x, incX, x0, 1); ATL_zher(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ), N, alpha, x0, 1, A, lda); free(vx); } else ATL_zher(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ), N, ATL_rzero, x, incX, A, lda);}
开发者ID:apollos,项目名称:atlas,代码行数:49,
示例11: ATL_flushcachedouble ATL_flushcache(long long size)/* * flush cache by reading enough mem; note that if the compiler gets * really smart, may be necessary to make vp a global variable so it * can't figure out it's not being modified other than during setup; * the fact that ATL_dzero is external will confuse most compilers */{ static void *vp=NULL; static long long N = 0; double *cache; double dret=0.0; size_t i; if (size < 0) /* flush cache */ { ATL_assert(vp); cache = ATL_AlignPtr(vp); if (N > 0) for (i=0; i != N; i++) dret += cache[i]; } else if (size > 0) /* initialize */ { vp = malloc(ATL_Cachelen + size); ATL_assert(vp); N = size / sizeof(double); cache = ATL_AlignPtr(vp); ATL_dzero(N, cache, 1); } else if (size == 0) /* free cache */ { if (vp) free(vp); vp = NULL; N = 0; } return(dret);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:36,
示例12: Mjoinvoid Mjoin(Mjoin(PATL,hemmL),UploNM) (const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc){ TYPE *a; void *va; if (N > HEMM_Xover) { va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M)); ATL_assert(va); a = ATL_AlignPtr(va); Mjoin(Mjoin(PATL,hecopy),UploNM)(M, A, lda, a); ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, M, alpha, a, M, B, ldb, beta, C, ldc); free(va); } else Mjoin(PATL,refhemm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb, beta, C, ldc);}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:20,
示例13: ATL_ptflushcachedouble ATL_ptflushcache(long long size)/* * flush cache by reading enough mem; note that if the compiler gets * really smart, may be necessary to make vp a global variable so it * can't figure out it's not being modified other than during setup; * the fact that ATL_dzero is external will confuse most compilers */{ static void *vp=NULL; static double *cache=NULL; double dret=0.0; static long long i, N = 0; ATL_FC fct[ATL_NTHREADS]; if (size < 0) /* flush cache */ { ATL_assert(cache); for (i=0; i < ATL_NTHREADS; i++) { fct[i].N = N; fct[i].dp = cache+i*N; } ATL_goparallel(ATL_NTHREADS, ATL_DoWorkFC, fct, NULL); } else if (size > 0) /* initialize */ { vp = malloc(ATL_Cachelen + (size * ATL_NTHREADS)); ATL_assert(vp); cache = ATL_AlignPtr(vp); N = size / sizeof(double); ATL_dzero(N*ATL_NTHREADS, cache, 1); } else if (size == 0) /* free cache */ { if (vp) free(vp); vp = cache = NULL; N = 0; } return(dret);}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:40,
示例14: Mjoinvoid Mjoin(Mjoin(PATL,trsmR),ATLP) (const int M, const int N, const void *valpha, const void *A, const int lda, void *C, const int ldc){ const TYPE *alpha=valpha;#ifdef TREAL #if defined(Transpose_) || defined(ConjTrans_) if ( M > (N<<2) ) { void *va; TYPE *a; va = malloc(ATL_Cachelen + (ATL_MulBySize(N*N))); ATL_assert(va); a = ATL_AlignPtr(va); #ifdef TREAL Mjoin(ATL_trcopy,_a1)(N, ATL_rone, A, lda, a); #else ATL_trcopy(N, A, lda, a); #endif Mjoin(Mjoin(PATL,trsmKR),ATLPt)(M, N, *alpha, a, N, C, ldc); free(va); } else Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, *alpha, A, lda, C, ldc); #else Mjoin(Mjoin(PATL,trsmKR),ATLP)(M, N, *alpha, A, lda, C, ldc); #endif#else if (M > (N<<2) && N <= 4) Mjoin(PATL,CtrsmKR)(Uplo_, Trans_, Unit_, M, N, valpha, A, lda, C, ldc); else Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, alpha, A, lda, C, ldc);#endif}
开发者ID:onenoc,项目名称:QuantSoftwareToolkit,代码行数:36,
示例15: Mjoinint Mjoin(PC2F,ormrq) (const enum CBLAS_SIDE Side, const enum CBLAS_TRANSPOSE TA, ATL_CINT M, ATL_CINT N, ATL_CINT K, TYPE *A, ATL_CINT lda, TYPE *TAU, TYPE *C, ATL_CINT ldc){ TYPE work[2]; void *vp; TYPE *wrk; ATL_INT lwrk; int iret;/* * Query routine for optimal workspace, allocate it, and call routine with it */ ATL_assert(!Mjoin(PC2F,ormrq_wrk)(Side, TA, M, N, K, A, lda, TAU, C, ldc, work, -1)); lwrk = work[0]; vp = malloc(ATL_MulBySize(lwrk) + ATL_Cachelen); ATL_assert(vp); wrk = ATL_AlignPtr(vp); iret = Mjoin(PC2F,ormrq_wrk)(Side, TA, M, N, K, A, lda, TAU, C, ldc, wrk, lwrk); free(vp); return(iret);}
开发者ID:apollos,项目名称:atlas,代码行数:24,
示例16: clapack_sgetriint clapack_sgetri(const enum CBLAS_ORDER Order, const int N, float *A, const int lda, const int *ipiv){ int ierr=0, lwrk; int Mjoin(PATL,GetNB)(); void *vp; lwrk = Mjoin(PATL,GetNB)(); if (lwrk <= N) lwrk *= N; else lwrk = N*N; vp = malloc(ATL_Cachelen + ATL_MulBySize(lwrk)); if (vp) { ierr = ATL_getri(Order, N, A, lda, ipiv, ATL_AlignPtr(vp), &lwrk); free(vp); } else { cblas_xerbla(7, "clapack_sgetri", "Cannot allocate workspace of %d/n", lwrk); return(-7); } return(ierr);}
开发者ID:apollos,项目名称:atlas,代码行数:24,
示例17: cblas_cger2cvoid cblas_cger2c(const enum CBLAS_ORDER Order, ATL_CINT M, ATL_CINT N, const void *alpha, const void *X, ATL_CINT incX, const void *Y, ATL_CINT incY, const void *beta, const void *W, ATL_CINT incW, const void *Z, ATL_CINT incZ, void *A, ATL_CINT lda){ int info = 2000; const float *x = X, *y = Y, *w = W, *z = Z; void *vy; float *y0, *z0; float one[2] = {ATL_rone, ATL_rzero};#ifndef NoCblasErrorChecks if (M < 0) info = cblas_errprn(2, info, "M cannot be less than zero; is set to %d.", M); if (N < 0) info = cblas_errprn(3, info, "N cannot be less than zero; is set to %d.", N); if (!incX) info = cblas_errprn(6, info, "incX cannot be zero; is set to %d.", incX); if (!incY) info = cblas_errprn(8, info, "incY cannot be zero; is set to %d.", incY); if (!incW) info = cblas_errprn(11, info, "incW cannot be zero; is set to %d.", incW); if (!incZ) info = cblas_errprn(13, info, "incZ cannot be zero; is set to %d.", incZ); if (Order == CblasColMajor) { if (lda < M || lda < 1) info = cblas_errprn(15, info, "lda must be >= MAX(M,1): lda=%d M=%d", lda, M); } else if (Order == CblasRowMajor) { if (lda < N || lda < 1) info = cblas_errprn(15, info, "lda must be >= MAX(N,1): lda=%d M=%d", lda, N); } else info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d", CblasRowMajor, CblasColMajor, Order); if (info != 2000) { cblas_xerbla(info, "cblas_cger2c", ""); return; }#endif if (incX < 0) x += (1-M)*incX<<1; if (incY < 0) y += (1-N)*incY<<1; if (incW < 0) w += (1-M)*incW<<1; if (incZ < 0) z += (1-N)*incZ<<1; if (Order == CblasColMajor) ATL_cger2c(M, N, alpha, x, incX, y, incY, beta, w, incW, z, incZ, A, lda); else { vy = malloc(ATL_Cachelen+ATL_Cachelen + ATL_MulBySize(N+N)); ATL_assert(vy); y0 = ATL_AlignPtr(vy); z0 = y0 + N; z0 = ATL_AlignPtr(z0); ATL_cmoveConj(N, alpha, y, incY, y0, 1); ATL_cmoveConj(N, alpha, z, incZ, z0, 1); ATL_cger2u(N, M, one, y0, 1, x, incX, beta, w, incW, z, incZ, A, lda); free(vy); }}
开发者ID:apollos,项目名称:atlas,代码行数:67,
示例18: Mmin//.........这里部分代码省略......... nb = clapack_ilaenv(LAIS_OPT_NB, LAormqr, MYOPT+LARight+LAUpper, M, N, K,-1);/* * If it is a workspace query, return the size of work required. * wrksz = wrksz of ATL_larfb + ATL_larft + ATL_geqr2 */ if (LWORK < 0) { if(SIDE == CblasLeft) { *WORK = ( N*nb + nb*nb + maxMN ) ; } else { *WORK = ( M*nb + nb*nb + maxMN ) ; } return(0); } else if (M < 1 || N < 1) /* quick return if no work to do */ return(0);/* * If the user gives us too little space, see if we can allocate it ourselves */ else { if(SIDE == CblasLeft) { if (LWORK < (N*nb + nb*nb + maxMN)) { vp = malloc(ATL_MulBySize(N*nb + nb*nb + maxMN) + ATL_Cachelen); if (!vp) return(-7); WORK = ATL_AlignPtr(vp); } } else { if (LWORK < (M*nb + nb*nb + maxMN)) { vp = malloc(ATL_MulBySize(M*nb + nb*nb + maxMN) + ATL_Cachelen); if (!vp) return(-7); WORK = ATL_AlignPtr(vp); } } /* if CblasRight */ }/* * Assign workspace areas for ATL_larft, ATL_geqr2, ATL_larfb */ ws_T = WORK; /* T at begining of work */ ws_QR2 = WORK +(nb SHIFT)*nb; /* After T Work space */ ws_larfb = ws_QR2 + (maxMN SHIFT); /* After workspace for T and QR2 */ if (SIDE == CblasLeft) { if ( TRANS == CblasNoTrans ) { j = (K/nb)*nb; if (j == K) { j=K -nb; }
开发者ID:AIDman,项目名称:Kaldi,代码行数:67,
示例19: Mjoinint Mjoin(PATL,mmJKI)(const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB, const int M, const int N, const int K, const SCALAR alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const SCALAR beta, TYPE *C, const int ldc)/* * This gemm is for small K, so we build gemm out of AXPY (outer product) * rather than dot (inner product). */{ int Mp, mp, m, k, ldaa=lda; void *vA=NULL; TYPE *pA; const TYPE CONE[2]={ATL_rone, ATL_rzero}, CNONE[2]={ATL_rnone, ATL_rzero}; const SCALAR alp=alpha;/* * Compute M partition necessary to promote reuse in the L1 cache. Check * NB^2 in addition to L1elts, to catch machines where L1 is not used by FPU. * If this gives a small Mp, use CacheEdge instead (reuse in L2 instead of L1). */ Mp = NB*NB; m = ATL_L1elts >> 1; Mp = (m > Mp) ? m : Mp; Mp /= ((K+2)<<1); if (Mp < 128) { #if !defined(CacheEdge) || CacheEdge == 0 Mp = M; #else Mp = (CacheEdge) / ((K+2)*ATL_sizeof); if (Mp < 128) Mp = M; #endif } if (Mp > M) Mp = M;/* * Change Mp if remainder is very small */ else { Mp -= 16; /* small safety margin on filling cache */ mp = M / Mp; m = M - mp*Mp; if (m && m < 32) Mp += (m+mp-1)/mp; }/* * If A not in NoTrans format, need to copy so it can use axpy wt stride=1. * NOTE: this routine should not be called when you can't afford this copy */ if (TA != AtlasNoTrans) { vA = malloc(ATL_Cachelen + Mp*ATL_MulBySize(K)); if (!vA) return(-1); pA = ATL_AlignPtr(vA); alp = CONE; ldaa = Mp; pA += Mp+Mp; } else pA = (TYPE *) A; for (m=0; m < M; m += Mp) { mp = M - m; if (mp > Mp) mp = Mp;/* * If the thing is in Trans format, copy to NoTrans */ if (vA) { pA -= (Mp+Mp); if (TA == AtlasConjTrans) { for (k=0; k < K; k++) { Mjoin(PATL,copy)(mp, A+k+k, lda, pA+((k*ldaa)<<1), 1); Mjoin(PATLU,scal)(mp, ATL_rnone, pA+1+((k*ldaa)<<1), 2); if (!SCALAR_IS_ONE(alpha)) Mjoin(PATL,scal)(mp, alpha, pA+((k*ldaa)<<1), 1); } } else { for (k=0; k < K; k++) Mjoin(PATL,cpsc)(mp, alpha, A+k+k, lda, pA+((k*ldaa)<<1), 1); } A += mp*(lda+lda); } Mjoin(PATL,mm_axpy)(AtlasNoTrans, TB, mp, N, K, alp, pA, ldaa, B, ldb, beta, C, ldc); pA += mp+mp; C += mp+mp; } if (vA) free(vA); return(0);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:99,
示例20: ATL_hervoid ATL_her(const enum ATLAS_UPLO Uplo, ATL_CINT N, const TYPE alpha, const TYPE *X, ATL_CINT incX, TYPE *A, ATL_CINT lda){ const TYPE calpha[2] = {alpha, ATL_rzero}; void *vp=NULL; TYPE *x, *xt; ATL_r1kern_t gerk; ATL_INT CacheElts; const int ALP1 = (alpha == ATL_rone); int COPYX, COPYXt; int mu, nu, minM, minN, alignX, alignXt, FNU, ALIGNX2A; if (N < 1 || (alpha == ATL_rzero)) return;/* * For very small problems, avoid overhead of func calls & data copy */ if (N < 50) { Mjoin(PATL,refher)(Uplo, N, alpha, X, incX, A, lda); return; }/* * Determine the GER kernel to use, and its parameters */ gerk = ATL_GetR1Kern(N-ATL_s1L_NU, ATL_s1L_NU, A, lda, &mu, &nu, &minM, &minN, &alignX, &ALIGNX2A, &alignXt, &FNU, &CacheElts);/* * Determine if we need to copy the vectors */ COPYX = (incX != 1); if (!COPYX) /* may still need to copy due to alignment issues */ {/* * ATL_Cachelen is the highest alignment that can be requested, so * make X's % with Cachelen match that of A if you want A & X to have * the same alignment */ if (ALIGNX2A) { size_t t1 = (size_t) A, t2 = (size_t) X; COPYX = (t1 - ATL_MulByCachelen(ATL_DivByCachelen(t1))) != (t2 - ATL_MulByCachelen(ATL_DivByCachelen(t2))); } else if (alignX) { size_t t1 = (size_t) X; COPYX = ((t1/alignX)*alignX != t1); } } vp = malloc((ATL_Cachelen+ATL_MulBySize(N))*(1+COPYX)); if (!vp) { Mjoin(PATL,refher)(Uplo, N, alpha, X, incX, A, lda); return; } xt = ATL_AlignPtr(vp); if (COPYX) { x = xt + N+N; x = ALIGNX2A ? ATL_Align2Ptr(x, A) : ATL_AlignPtr(x); Mjoin(PATL,copy)(N, X, incX, x, 1); } else x = (TYPE*) X; if (ALP1) Mjoin(PATL,copyConj)(N, X, incX, xt, 1); else Mjoin(PATL,moveConj)(N, calpha, X, incX, xt, 1); if (Uplo == AtlasUpper) Mjoin(PATL,her_kU)(gerk, N, alpha, x, xt, A, lda); else Mjoin(PATL,her_kL)(gerk, N, alpha, x, xt, A, lda); if (vp) free(vp);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:76,
示例21: Mjoinint Mjoin(PATL,mmJITcp)(const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB, const int M0, const int N, const int K, const SCALAR alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const SCALAR beta, TYPE *C, const int ldc)/* * Copy matmul algorithm, copies A and B on-the-fly * If M < 0, allocates only (MB+NB)*KB workspace */{ void *v=NULL; const TYPE *a=A; TYPE *pA, *pB, *pB0; MAT2BLK2 A2blk, B2blk; NBMM0 NBmm0, NBmm1, pNBmm0; const int M = (M0 >= 0) ? M0 : -M0; int nkblks, nmblks, nnblks, mr, nr, kr, KR, bigK, h, i, j, ZEROC; size_t incAk, incBk, incAm, incBn, incAW, incAWp, incBW, incBWp, incW;/* * If both M and N <= NB, and one of them is not full, call BPP, which * can sometimes avoid doing cleanup forall cases */ if (M <= MB && N <= NB && (M != MB || N != NB)) return(Mjoin(PATL,mmBPP)(TA, TB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc));/* * If these workspace increments are 0, we do JIT NBxNB copies instead of * copying entire array/panel. Don't copy mat if you can't reuse it. */ if (M0 > 0) { incAW = (N > NB) ? KB*MB : 0; incBW = (M > NB) ? KB*NB : 0; } else /* allocate in minimal space */ incAW = incBW = 0; nmblks = M/MB; nnblks = N/NB; nkblks = K/KB; mr = M - nmblks*MB; nr = N - nnblks*NB; kr = K - nkblks*KB;/* * K-loop is special, in that we don't call user cleanup, must explicitly zero, * and K-cleanup is typically slower even for generated kernels. Therefore, * allow extra leaway for doing extra flops. Note error is unaffected by * any of these extra flops: K-loop has elts zeroed, and multiplying zeros * and adding in zeros doesn't add to error */ KR = (kr && kr+4 >= KB) ? KB : kr; bigK = nkblks*KB+KR; if (incAW) { i = MB*bigK; incAWp = KB*mr; } else { i = MB*KB; incAWp = 0; } if (incBW) { incBWp = KB*nr; incW = bigK*NB; i += N*bigK; } else { incBWp = incW = 0; i += NB*KB; } i *= sizeof(TYPE); if (i <= ATL_MaxMalloc || !(incAW | incBW)) v = malloc(ATL_Cachelen+i); if (!v) return(-1); pA = ATL_AlignPtr(v); pB0 = pA + (incAW ? bigK*MB : KB*MB); if (TA == AtlasNoTrans) { A2blk = Mjoin(PATL,gemoveT); incAk = lda*KB; incAm = MB; } else { A2blk = Mjoin(PATL,gemove); incAk = KB; incAm = MB*lda; } if (TB == AtlasNoTrans) { B2blk = Mjoin(PATL,gemove); incBk = KB; incBn = NB*ldb; } else { B2blk = Mjoin(PATL,gemoveT);//.........这里部分代码省略.........
开发者ID:AIDman,项目名称:Kaldi,代码行数:101,
示例22: Mjoinint Mjoin(PATL,mmIJK)(const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB, const int M, const int N0, const int K, const SCALAR alpha, const TYPE *A, const int lda0, const TYPE *B, const int ldb0, const SCALAR beta, TYPE *C, const int ldc0){ size_t incA, incB, incC; const size_t lda=lda0, ldb=ldb0, ldc=ldc0; const size_t incK = ATL_MulByNB((size_t)K); int N = N0; int nMb, nNb, nKb, ib, jb, kb, jb2, h, i, j, k, n; void *vA=NULL, *vC=NULL; TYPE *pA, *pB, *pC; MAT2BLK A2blk, B2blk; PUTBLK putblk; NBMM0 NBmm0; nMb = ATL_DivByNB(M); nNb = ATL_DivByNB(N); nKb = ATL_DivByNB(K); ib = M - ATL_MulByNB(nMb); jb = N - ATL_MulByNB(nNb); kb = K - ATL_MulByNB(nKb); /* * If K sufficiently large, write to temporary C as safety measure; otherwise * write directly to C */ if (nKb < 12) { putblk = NULL; pC = C; if ( SCALAR_IS_ONE(beta) ) NBmm0 = NBmm_b1; else if ( SCALAR_IS_ZERO(beta) ) NBmm0 = NBmm_b0; else NBmm0 = NBmm_bX; } else { NBmm0 = NBmm_b0; vC = malloc(ATL_Cachelen + ATL_MulBySize(NBNB)); if (!vC) return(-1); pC = ATL_AlignPtr(vC); if ( SCALAR_IS_ONE(beta) ) putblk = Mjoin(PATL,putblk_b1); else if ( SCALAR_IS_ZERO(beta) ) putblk = Mjoin(PATL,putblk_b0); else if ( SCALAR_IS_NONE(beta) ) putblk = Mjoin(PATL,putblk_bn1); else putblk = Mjoin(PATL,putblk_bX); } /* * Special case if we don't need to copy one or more input matrix */ if (K == NB && TB == AtlasNoTrans && ldb == NB && ATL_DataIsMinAligned(B)) { if (lda == NB && TA == AtlasTrans && SCALAR_IS_ONE(alpha) && ATL_DataIsMinAligned(A)) { i = NBNB; pA = (TYPE *) A; A = NULL; A2blk = NULL; incA = 0; } else { vA = malloc(ATL_Cachelen + ATL_MulBySize(incK)); if (!vA) { free(vC); return(-1); } pA = ATL_AlignPtr(vA); if (TA == AtlasNoTrans) { incA = NB; if ( SCALAR_IS_ONE(alpha) ) A2blk = Mjoin(PATL,row2blkT_a1); else A2blk = Mjoin(PATL,row2blkT_aX); } else { incA = ATL_MulByNB(lda); if ( SCALAR_IS_ONE(alpha) ) A2blk = Mjoin(PATL,col2blk_a1); else A2blk = Mjoin(PATL,col2blk_aX); } } Mjoin(PATL,mmIJK2)(K, nMb, nNb, nKb, ib, jb, kb, alpha, A, lda, pA, incA, A2blk, B, beta, C, ldc, pC, putblk, NBmm0); if (vA) free(vA); if (vC) free(vC); return(0); } i = ATL_Cachelen + ATL_MulBySize(N*K + incK); if (i <= ATL_MaxMalloc) vA = malloc(i); if (!vA) { if (TA == AtlasNoTrans && TB == AtlasNoTrans) { if (vC) free(vC); return(1); } if (jb) n = nNb + 1; else n = nNb;//.........这里部分代码省略.........
开发者ID:apollos,项目名称:atlas,代码行数:101,
示例23: ATL_gelqf//.........这里部分代码省略......... } else if (M < 1 || N < 1) /* quick return if no work to do */ return(0);/* * LQ is the transpose of QR: We use this to go from row-major LQ to * col-major QR, typically faster. Here, if we are square and large, * we transpose the whole matrix in-place and then transpose it back. * This should be a tunable parameter; perhaps if the matrix fits in * L1 or L2? (Note by Tony C, short on time to conduct tuning). */ if (M == N && N >= 128) { Mjoin(PATL,sqtrans)(N, A, lda); n = ATL_geqrf(M, N, A, lda, TAU, WORK, LWORK); Mjoin(PATL,sqtrans)(N, A, lda); /* Take the conjugate for Complex TAU. */ #ifdef TCPLX ATL_INT i; for (i=1; i<(minMN<<1); i+=2) *(TAU+i) = 0.-*(TAU+i); /* Negate imaginary part. */ #endif return(n); }/* * If the user gives us too little space, see if we can allocate it ourselves */ else if (LWORK < (maxMN*nb + nb*nb + maxMN)) { vp = malloc(ATL_MulBySize(maxMN*nb + nb*nb + maxMN) + ATL_Cachelen); if (!vp) return(-7); WORK = ATL_AlignPtr(vp); }/* * Assign workspace areas for ATL_larft, ATL_gelq2, ATL_larfb */ ws_T = WORK; /* T at begining of work */ ws_LQ2 = WORK +(nb SHIFT)*nb; /* After T Work space */ ws_larfb = ws_LQ2 + (maxMN SHIFT); /* After workspace for T and LQ2 *//* * Leave one iteration to be done outside loop, so we don't build T * Any loop iterations are therefore known to be of size nb (no partial blocks) */ n = (minMN / nb) * nb; if (n == minMN) n -= Mmin(nb, minMN); /* when n is a multiple of nb, reduce by nb */ #if !defined(ATL_USEPTHREADS) /* If no PCA, try to copy up front. */ j = M - n; j = Mmax(nb, j); ldCP = (N&7) ? (((N+7)>>3)<<3) : N; ws_CPRaw = malloc(ATL_MulBySize(ldCP)*j + ATL_Cachelen); if (ws_CPRaw) ws_CP=ATL_AlignPtr(ws_CPRaw); /* Align if malloced. */ #endif /* Serial Mode */ for (j=0; j < n; j += nb) { #if !defined(ATL_USEPTHREADS) /* If no PCA it won't copy. Try it here. */ /* If we got our copy workspace, transpose panel before recursion. */ if (ws_CP) /* If workspace exists. */ { int ci, cj; /* for conjugation. */
开发者ID:apollos,项目名称:atlas,代码行数:67,
示例24: Mjoinvoid Mjoin( PATL, tbsv )( const enum ATLAS_UPLO UPLO, const enum ATLAS_TRANS TRANS, const enum ATLAS_DIAG DIAG, const int N, const int K, const TYPE * A, const int LDA, TYPE * X, const int INCX){/* * Purpose * ======= * * Mjoin( PATL, tbsv ) solves one of the systems of equations * * A * x = b, or conjg( A ) * x = b, or * * A'* x = b, or conjg( A' ) * x = b, * * where b and x are n-element vectors and A is an n by n unit, or non- * unit, upper or lower triangular band matrix, with (k+1) diagonals. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- *//* * .. Local Variables .. */ void * vx = NULL; TYPE * x;/* .. * .. Executable Statements .. * */ if( N == 0 ) return; Mjoin(PATL,reftbsv)(UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX); return; if( INCX == 1 ) { x = X; } else { vx = (TYPE *)malloc( ATL_Cachelen + ATL_MulBySize( N ) ); ATL_assert( vx ); x = ATL_AlignPtr( vx ); Mjoin( PATL, copy )( N, X, INCX, x, 1 ); }#ifdef TREAL if( ( TRANS == AtlasNoTrans ) || ( TRANS == AtlasConj ) )#else if( TRANS == AtlasNoTrans )#endif { if( UPLO == AtlasUpper ) Mjoin( PATL, tbsvUN )( DIAG, N, K, A, LDA, x ); else Mjoin( PATL, tbsvLN )( DIAG, N, K, A, LDA, x ); }#ifdef TCPLX else if( TRANS == AtlasConj ) { if( UPLO == AtlasUpper ) Mjoin( PATL, tbsvUC )( DIAG, N, K, A, LDA, x ); else Mjoin( PATL, tbsvLC )( DIAG, N, K, A, LDA, x ); }#endif#ifdef TREAL else#else else if( TRANS == AtlasTrans )#endif { if( UPLO == AtlasUpper ) Mjoin( PATL, tbsvUT )( DIAG, N, K, A, LDA, x ); else Mjoin( PATL, tbsvLT )( DIAG, N, K, A, LDA, x ); }#ifdef TCPLX else { if( UPLO == AtlasUpper ) Mjoin( PATL, tbsvUH )( DIAG, N, K, A, LDA, x ); else Mjoin( PATL, tbsvLH )( DIAG, N, K, A, LDA, x ); }#endif if( vx ) { Mjoin( PATL, copy )( N, x, 1, X, INCX ); free( vx ); }/* * End of Mjoin( PATL, tbsv ) */}
开发者ID:AIDman,项目名称:Kaldi,代码行数:93,
示例25: cblas_zhbmvvoid cblas_zhbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const int K, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY){ int info = 2000; const enum CBLAS_UPLO ruplo = (Uplo == CblasUpper) ? CblasLower : CblasUpper; void *vx; double *X0, *x = (double*) X; double *y = Y; const double *alp=alpha; const double *bet=beta; double calpha[2], cbeta[2]; const double one[2] = {ATL_rone, ATL_rzero}; calpha[0] = *alp; calpha[1] = -alp[1]; cbeta[0] = *bet; cbeta[1] = -bet[1];#ifndef NoCblasErrorChecks if (Order != CblasColMajor && Order != CblasRowMajor) info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d", CblasRowMajor, CblasColMajor, Order); if (Uplo != CblasUpper && Uplo != CblasLower) info = cblas_errprn(2, info, "Uplo must be %d or %d, but is set to %d", CblasUpper, CblasLower, Uplo); if (N < 0) info = cblas_errprn(3, info, "N cannot be less than zero; is set to %d.", N); if (K < 0) info = cblas_errprn(4, info, "Valid K: 0 < K < N; K=%d, N=%d.", K, N); if (lda < K+1) info = cblas_errprn(7, info, "lda cannot be less than K+1; K=%d, lda=%d/n", K, lda); if (!incX) info = cblas_errprn(9, info, "incX cannot be zero; is set to %d.", incX); if (!incY) info = cblas_errprn(12, info, "incY cannot be zero; is set to %d.", incY); if (info != 2000) { cblas_xerbla(info, "cblas_zhbmv", ""); return; }#endif if (incX < 0) x += (1-N)*incX<<1; if (incY < 0) y += (1-N)*incY<<1; if (Order == CblasColMajor) ATL_zhbmv(Uplo, N, K, alpha, A, lda, x, incX, beta, y, incY); else { vx = malloc(ATL_Cachelen + 2*N*sizeof(double)); ATL_assert(vx); X0 = x; x = ATL_AlignPtr(vx); ATL_zmoveConj(N, calpha, X0, incX, x, 1); if (*bet != ATL_rzero || bet[1] != ATL_rzero) { ATL_zscalConj(N, cbeta, y, incY); ATL_zhbmv(ruplo, N, K, one, A, lda, x, 1, one, y, incY); } else ATL_zhbmv(ruplo, N, K, one, A, lda, x, 1, beta, y, incY); free(vx); ATL_zscalConj(N, one, y, incY); }}
开发者ID:apollos,项目名称:atlas,代码行数:66,
示例26: Mjoinvoid Mjoin( PATL, hpmv )( const enum ATLAS_UPLO UPLO, const int N, const SCALAR ALPHA, const TYPE * A, const TYPE * X, const int INCX, const SCALAR BETA, TYPE * Y, const int INCY){/* * Purpose * ======= * * Mjoin( PATL, hpmv ) performs the matrix-vector operation * * y := alpha * A * x + beta * y, * * where alpha and beta are scalars, x and y are n-element vectors and A * is an n by n Hermitian matrix, supplied in packed form. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- *//* * .. Local Variables .. */ void (*gpmv0)( const int, const int, const SCALAR, const TYPE *, const int, const TYPE *, const int, const SCALAR, TYPE *, const int ); void (*gpmv1)( const int, const int, const SCALAR, const TYPE *, const int, const TYPE *, const int, const SCALAR, TYPE *, const int ); void (*gpmvN)( const int, const int, const SCALAR, const TYPE *, const int, const TYPE *, const int, const SCALAR, TYPE *, const int );#ifdef TREAL TYPE alphaY, beta0;#define one ATL_rone#define zero ATL_rzero#else const TYPE * alphaY, * beta0; const TYPE one [2] = { ATL_rone, ATL_rzero }, zero[2] = { ATL_rzero, ATL_rzero };#endif void * vx = NULL, * vy = NULL; TYPE * A0, * A1, * x, * x0, * x1, * y, * y00, * y0, * y1; int incXY, incXY1, j, jb, lda, lda0, lda1, mb, mb1, n, nb;/* .. * .. Executable Statements .. * */ if( N == 0 ) return; if( SCALAR_IS_ZERO( ALPHA ) ) { if( !( SCALAR_IS_ONE( BETA ) ) ) Mjoin( PATL, scal )( N, BETA, Y, INCY ); return; } if( ( INCX != 1 ) || ( ( INCY == 1 ) && !( SCALAR_IS_ONE( ALPHA ) ) ) ) { vx = (void *)malloc( ATL_Cachelen + ATL_MulBySize( N ) ); ATL_assert( vx ); x = ATL_AlignPtr( vx ); Mjoin( PATL, cpsc )( N, ALPHA, X, INCX, x, 1 ); alphaY = one; } else { x = (TYPE *)(X); alphaY = ALPHA; } if( ( INCY != 1 ) || !( SCALAR_IS_ONE( alphaY ) ) ) { vy = malloc( ATL_Cachelen + ATL_MulBySize( N ) ); ATL_assert( vy ); y00 = y = ATL_AlignPtr( vy ); beta0 = zero; } else { y00 = y = (TYPE *)(Y); beta0 = BETA; } ATL_GetPartSPMV( A, N, &mb, &nb ); mb1 = N - ( ( N - 1 ) / mb ) * mb; incXY1 = (nb SHIFT); if( UPLO == AtlasUpper ) { if( SCALAR_IS_ZERO( beta0 ) ) gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_b0_y1 ); else if( SCALAR_IS_ONE ( beta0 ) ) gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_b1_y1 ); else gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_bX_y1 ); gpmv1 = Mjoin( PATL, gpmvUC_a1_x1_b1_y1 ); gpmvN = Mjoin( PATL, gpmvUN_a1_x1_b1_y1 );//.........这里部分代码省略.........
开发者ID:certik,项目名称:vendor,代码行数:101,
示例27: Mjoin//.........这里部分代码省略......... mmcu = Mjoin(Mjoin(Mjoin(NCmm00,0x0x0),TT),0x0x0_aX_bX); } incAk = KB; incAm = lda*MB - Kb*KB; incAn = -lda*MB*Mb; } if (TB == AtlasNoTrans) { incBk = KB; incBm = -KB*Kb; incBn = ldb*NB; } else { incBk = KB*ldb; incBm = -Kb * incBk; incBn = NB; } if (alpha == ATL_rone) { if (beta == ATL_rzero) geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_a1),_b0); else if (beta == ATL_rone) geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_a1),_b1); else geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_a1),_bX); } else if (beta == ATL_rzero) geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_aX),_b0); else if (beta == ATL_rone) geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_aX),_b1); else geadd = Mjoin(Mjoin(Mjoin(PATL,geadd),_aX),_bX); vp = malloc(ATL_Cachelen + ATL_MulBySize(MB * NB)); ATL_assert(vp); cp = ATL_AlignPtr(vp); if (mr || nr || kr) for (j=MB*NB, i=0; i != j; i++) cp[i] = ATL_rzero; for (j=Nb; j; j--, a += incAn, b += incBn, c += incCn) { for (i=Mb; i; i--, a += incAm, b += incBm, c += incCm) { if (Kb) { mm_bX(MB, NB, KB, ATL_rone, a, lda, b, ldb, ATL_rzero, cp, MB); a += incAk; b += incBk; for (k=Kb-1; k; k--, a += incAk, b += incBk) mm_b1(MB, NB, KB, ATL_rone, a, lda, b, ldb, ATL_rone, cp, MB); if (kr) mmcu(MB, NB, kr, ATL_rone, a, lda, b, ldb, ATL_rone, cp, MB); } else if (kr) { Mjoin(PATL,zero)(MB*NB, cp, 1); /* kill NaN/INF from last time */ mmcu(MB, NB, kr, ATL_rone, a, lda, b, ldb, ATL_rzero, cp, MB); } geadd(MB, NB, alpha, cp, MB, beta, c, ldc); } } if (mr && N != nr) ATL_assert(Mjoin(PATL,NCmmIJK)(TA, TB, mr, N-nr, K, alpha, A+Mb*(incAm+Kb*incAk), lda, B, ldb, beta, C+Mb*MB, ldc) ==0); if (nr) { for (i=Mb; i; i--, a += incAm, b += incBm, c += incCm) { Mjoin(PATL,zero)(MB*nr, cp, 1); /* kill NaN and INF from last time */
开发者ID:GorgonCryoEM,项目名称:Gorgon-CVS,代码行数:67,
示例28: Mjoinvoid Mjoin(PATL,CtrsmKL) (enum ATLAS_UPLO Uplo, enum ATLAS_TRANS Trans, enum ATLAS_DIAG Diag, const int M, const int N, const SCALAR alpha, const TYPE *A, const int lda, TYPE *B, const int ldb)#endif{ TYPE tmp[2], ra, ia; void *vp; TYPE *a; if (N > 0) { if (M > 1) { vp = malloc(ATL_Cachelen + ATL_MulBySize(M)*M); ATL_assert(vp); a = ATL_AlignPtr(vp); Diag = trsmcopy(Uplo, Trans, Diag, M, alpha, A, lda, a); if (Trans != AtlasNoTrans) { if (Uplo == AtlasLower) Uplo = AtlasUpper; else Uplo = AtlasLower; } switch(M) { case 2: if (Uplo == AtlasLower) trsmLL_2(N, a, B, ldb); else trsmLU_2(N, a, B, ldb); break; case 3: if (Uplo == AtlasLower) trsmLL_3(N, a, B, ldb); else trsmLU_3(N, a, B, ldb); break; case 4: if (Uplo == AtlasLower) trsmLL_4(N, a, B, ldb); else trsmLU_4(N, a, B, ldb); break; default: /* this crap should never be used */ tmp[0] = ATL_rone; tmp[1] = ATL_rzero; Mjoin(PATL,cplxinvert)(M, a, M+M+2, a, M+M+2); Mjoin(PATL,reftrsm)(AtlasLeft, Uplo, AtlasNoTrans, Diag, M, N, tmp, a, M, B, ldb); } free(vp); } else if (M == 1) { if (Diag == AtlasUnit) #ifdef Right_ Mjoin(PATL,scal)(N, alpha, B, 1); #else Mjoin(PATL,scal)(N, alpha, B, ldb); #endif else { tmp[0] = A[0]; if (Trans != AtlasConjTrans) tmp[1] = A[1]; else tmp[1] = -A[1]; Mjoin(PATL,cplxinvert)(1, tmp, 2, tmp, 2); /* safe cplx invers */ ra = tmp[0]; ia = tmp[1]; tmp[0] = *alpha * ra - alpha[1] * ia; tmp[1] = *alpha * ia + alpha[1] * ra; #ifdef Right_ Mjoin(PATL,scal)(N, tmp, B, 1); #else Mjoin(PATL,scal)(N, tmp, B, ldb); #endif } } }
开发者ID:onenoc,项目名称:QuantSoftwareToolkit,代码行数:70,
示例29: mmcase0int mmcase0(int MFLOP, int CACHESIZE, char TA, char TB, int M, int N, int K, SCALAR alpha, int lda, int ldb, SCALAR beta, int ldc){ char *pc;#ifdef TREAL char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %6.2f %5.1f %5.2f %3s/n"; #define MALPH alpha #define MBETA beta TYPE betinv, bet=beta;#else #define MALPH *alpha, alpha[1] #define MBETA *beta, beta[1] char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %5.1f %5.1f %6.2f %6.1f %4.2f %3s/n"; TYPE betinv[2], *bet=beta;#endif int nreps, incA, incB, incC, inc, nmat, k; TYPE *c, *C, *a, *A, *b, *B, *st; int ii, jj, i, j=0, PASSED, nerrs; double t0, t1, t2, t3, mflop, mf, mops; TYPE maxval, f1, ferr; static TYPE feps=0.0; static int itst=1; enum ATLAS_TRANS TAc, TBc; void *vp; #ifdef TCPLX if (*beta == 0.0 && beta[1] == 0.0) betinv[0] = betinv[1] = 0.0; else if (beta[1] == 0.0) { betinv[0] = 1 / *beta; betinv[1] = 0.0; } else { t0 = *beta; t1 = beta[1]; if (Mabs(t1) <= Mabs(t0)) { t2 = t1 / t0; betinv[0] = t0 = 1.0 / (t0 + t1*t2); betinv[1] = -t0 * t2; } else { t2 = t0 / t1; betinv[1] = t0 = -1.0 / (t1 + t0*t2); betinv[0] = -t2 * t0; } } mops = ( ((8.0*M)*N)*K ) / 1000000.0; #else if (beta != 0.0) betinv = 1.0 / beta; else betinv = beta; mops = ( ((2.0*M)*N)*K ) / 1000000.0; #endif nreps = MFLOP / mops; if (nreps < 1) nreps = 1; if (TA == 'n' || TA == 'N') { TAc = AtlasNoTrans; incA = lda * K; } else { if (TA == 'c' || TA == 'C') TAc = AtlasConjTrans; else TAc = AtlasTrans; incA = lda * M; } if (TB == 'n' || TB == 'N') { incB = ldb * N; TBc = AtlasNoTrans; } else { incB = ldb * K; if (TB == 'c' || TB == 'C') TBc = AtlasConjTrans; else TBc = AtlasTrans; } incC = ldc*N; inc = incA + incB + incC; i = M*K + K*N + M*N; /* amount of inc actually referenced */ /* This is a hack; change to use of flushcache instead. */ nmat = ((CACHESIZE/ATL_sizeof) + i)/i; vp = malloc(ATL_MulBySize(nmat*inc)+ATL_Cachelen); ATL_assert(vp); C = c = ATL_AlignPtr(vp); a = A = C + incC; b = B = A + incA; st = C + nmat*inc; matgen(inc, nmat, C, inc, M*N);#ifdef DEBUG printmat("A0", M, K, A, lda); printmat("B0", K, N, B, ldb); printmat("C0", M, N, C, ldc);#endif t0 = time00(); for (k=nreps; k; k--) { trusted_gemm(TAc, TBc, M, N, K, alpha, a, lda, b, ldb, bet, c, ldc); c += inc; a += inc; b += inc; if (c == st)//.........这里部分代码省略.........
开发者ID:certik,项目名称:vendor,代码行数:101,
示例30: voidstatic int ATL_trmvLT( const enum ATLAS_DIAG Diag, const int nb, ATL_CINT N, const TYPE *A, ATL_CINT lda, TYPE *X, ATL_CINT incX)/* * RETURNS: 0 if TRMV was performed, non-zero if nothing done */{ static void (*trmvK)(ATL_CINT, const TYPE*, ATL_CINT, const TYPE*, TYPE*); void (*gemv)(ATL_CINT, ATL_CINT, const SCALAR, const TYPE*, ATL_CINT, const TYPE*, ATL_CINT, const SCALAR, TYPE*, ATL_CINT); void *vp; TYPE *x, *y; const size_t opsize = (N*N+N+N)*sizeof(TYPE)SHIFT; size_t t0; #ifdef TCPLX size_t N2=N+N, lda2 = lda+lda; TYPE one[2] = {ATL_rone, ATL_rzero}; #else #define N2 N #define lda2 lda #define one ATL_rone #endif const size_t incA = ((size_t)lda+1)*(nb SHIFT); ATL_CINT Nnb = ((N-1)/nb)*nb, Nr = N-Nnb; ATL_INT j; if (N < nb+nb) return(1); if (opsize > MY_CE) gemv = Mjoin(PATL,gemvT); else gemv = (opsize <= ATL_MulBySize(ATL_L1elts)) ? Mjoin(PATL,gemvT_L1) : Mjoin(PATL,gemvT_L2); trmvK = (Diag == AtlasNonUnit) ? ATL_trmvLTNk : ATL_trmvLTUk;/* * If X is aligned to Cachelen wt inc=1, use it as y */ t0 = (size_t) X; if (incX == 1 && (ATL_MulByCachelen(ATL_DivByCachelen(t0)) == t0)) { ATL_INT i; vp = malloc(ATL_Cachelen+ATL_MulBySize(N)); if (!vp) return(2); x = ATL_AlignPtr(vp); y = X; for (i=0; i < N2; i++) { x[i] = X[i]; X[i] = ATL_rzero; } } else /* allocate both X and Y */ { vp = malloc((ATL_Cachelen+ATL_MulBySize(N))<<1); if (!vp) return(3); x = ATL_AlignPtr(vp); y = x + N2; y = ATL_AlignPtr(y); Mjoin(PATL,copy)(N, X, incX, x, 1); Mjoin(PATL,zero)(N, y, 1); } for (j=0; j < Nnb; j += nb, A += incA) { #ifdef TCPLX const register size_t j2=j+j, nb2=nb+nb; #else #define j2 j #define nb2 nb #endif trmvK(nb, A, lda, x+j2, y+j2); gemv(N-j-nb, nb, one, A+nb2, lda, x+j2+nb2, 1, one, y+j2, 1); #ifndef TCPLX #undef j2 #undef nb2 #endif } #ifdef TCPLX j += j; #endif trmvK(Nr, A, lda, x+j, y+j); if (y != X) Mjoin(PATL,copy)(N, y, 1, X, incX); free(vp); return(0);}
开发者ID:AIDman,项目名称:Kaldi,代码行数:95,
注:本文中的ATL_AlignPtr函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ ATL_MulBySize函数代码示例 C++ ATLVERIFY函数代码示例 |