您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vec_madd函数代码示例

51自学网 2021-06-03 09:36:07
  C++
这篇教程C++ vec_madd函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vec_madd函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_madd函数的具体用法?C++ vec_madd怎么用?C++ vec_madd使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_madd函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: vector_fmul_window_altivec

static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len){    vector float zero, t0, t1, s0, s1, wi, wj;    const vector unsigned char reverse = vcprm(3,2,1,0);    int i,j;    dst += len;    win += len;    src0+= len;    zero = (vector float)vec_splat_u32(0);    for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {        s0 = vec_ld(i, src0);        s1 = vec_ld(j, src1);        wi = vec_ld(i, win);        wj = vec_ld(j, win);        s1 = vec_perm(s1, s1, reverse);        wj = vec_perm(wj, wj, reverse);        t0 = vec_madd(s0, wj, zero);        t0 = vec_nmsub(s1, wi, t0);        t1 = vec_madd(s0, wi, zero);        t1 = vec_madd(s1, wj, t1);        t1 = vec_perm(t1, t1, reverse);        vec_st(t0, i, dst);        vec_st(t1, j, dst);    }}
开发者ID:Arcen,项目名称:libav,代码行数:31,


示例2: audio_convert_s16_to_float_altivec

void audio_convert_s16_to_float_altivec(float *out,      const int16_t *in, size_t samples, float gain){   const vector float gain_vec = vec_splats(gain);   const vector float zero_vec = vec_splats(0.0f);   // Unaligned loads/store is a bit expensive, so we optimize for the good path (very likely).   if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)   {      size_t i;      for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)      {         vector signed short input = vec_ld(0, in);         vector signed int hi = vec_unpackh(input);         vector signed int lo = vec_unpackl(input);         vector float out_hi = vec_madd(vec_ctf(hi, 15), gain_vec, zero_vec);         vector float out_lo = vec_madd(vec_ctf(lo, 15), gain_vec, zero_vec);         vec_st(out_hi,  0, out);         vec_st(out_lo, 16, out);      }      audio_convert_s16_to_float_C(out, in, samples - i, gain);   }   else      audio_convert_s16_to_float_C(out, in, samples, gain);}
开发者ID:Jalle19,项目名称:RetroArch,代码行数:26,


示例3: select_e

/* Using FChoose() here would mean allocating tmp space for 2M-1 paths; * instead we use the fact that E(i) is itself the necessary normalization * factor, and implement FChoose's algorithm here for an on-the-fly  * calculation. */static inline intselect_e(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k){  int          Q     = p7O_NQF(ox->M);  double       sum   = 0.0;  double       roll  = esl_random(rng);  double       norm  = 1.0 / ox->xmx[i*p7X_NXCELLS+p7X_E];   /* all M, D already scaled exactly the same */  vector float xEv   = esl_vmx_set_float(norm);  vector float zerov = (vector float) vec_splat_u32(0);  union { vector float v; float p[4]; } u;  int    q,r;  while (1) {    for (q = 0; q < Q; q++)      {	u.v = vec_madd(ox->dpf[i][q*3 + p7X_M], xEv, zerov);	for (r = 0; r < 4; r++) {	  sum += u.p[r];	  if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_M;}	}	u.v = vec_madd(ox->dpf[i][q*3 + p7X_D], xEv, zerov);	for (r = 0; r < 4; r++) {	  sum += u.p[r];	  if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_D;}	}      }    ESL_DASSERT1(sum > 0.99);  }  /*UNREACHED*/  ESL_EXCEPTION(-1, "unreached code was reached. universe collapses.");} 
开发者ID:dboudour2002,项目名称:musicHMMER,代码行数:37,


示例4: select_d

/* D(i,k) is reached from M(i, k-1) or D(i,k-1). */static inline intselect_d(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k){  int     Q     = p7O_NQF(ox->M);  int     q     = (k-1) % Q;		/* (q,r) is position of the current DP cell D(i,k) */  int     r     = (k-1) / Q;  vector float  zerov;  vector float  mpv, dpv;  vector float  tmdv, tddv;  union { vector float v; float p[4]; } u;  float   path[2];  int     state[2] = { p7T_M, p7T_D };  zerov = (vector float) vec_splat_u32(0);  if (q > 0) {    mpv  = ox->dpf[i][(q-1)*3 + p7X_M];    dpv  = ox->dpf[i][(q-1)*3 + p7X_D];    tmdv = om->tfv[7*(q-1) + p7O_MD];    tddv = om->tfv[7*Q + (q-1)];  } else {    mpv  = vec_sld(zerov, ox->dpf[i][(Q-1)*3 + p7X_M], 12);    dpv  = vec_sld(zerov, ox->dpf[i][(Q-1)*3 + p7X_D], 12);    tmdv = vec_sld(zerov, om->tfv[7*(Q-1) + p7O_MD],   12);    tddv = vec_sld(zerov, om->tfv[8*Q-1],              12);  }	    u.v = vec_madd(mpv, tmdv, zerov); path[0] = u.p[r];  u.v = vec_madd(dpv, tddv, zerov); path[1] = u.p[r];  esl_vec_FNorm(path, 2);  return state[esl_rnd_FChoose(rng, path, 2)];}
开发者ID:Denis84,项目名称:EPA-WorkBench,代码行数:33,


示例5: vec_div

/* An accurate vector division routine using the reciprocal estimate and  * two Newton-Raphson iterations */static inline vector float vec_div(vector float A, vector float B){     vector float y0;     vector float y1;     vector float y2;     vector float Q;     vector float R;     vector float one = (vector float) (1.0f);     vector float zero = (vector float) (-0.0f);     vector float mone = (vector float) (-1.0f);     y0 = vec_re(B);            // approximate 1/B     // y1 = y0*(-(y0*B - 1.0))+y0  i.e. y0+y0*(1.0 - y0*B)     y1 = vec_madd(y0,vec_nmsub(y0, B, one),y0);       // REPEAT the Newton-Raphson to get the required 24 bits     y2 = vec_madd(y1, vec_nmsub(y1, B, one),y1);     // y2 = y1*(-(y1*B - 1.0f))+y1  i.e. y1+y1*(1.0f - y1*B)     // y2 is now the correctly rounded reciprocal, and the manual considers this     // OK for use in computing the remainder: Q = A*y2, R = A - B*Q     Q = vec_madd(A,y2,zero);  // -0.0 IEEE     R = vec_nmsub(B,Q,A);      // -(B*Q-A) == (A-B*Q)     // final rouding adjustment     return(vec_madd(R, y2, Q));}
开发者ID:rck1138,项目名称:CUDA-Exercises,代码行数:32,


示例6: select_m

/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */static inline intselect_m(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k){  int     Q     = p7O_NQF(ox->M);  int     q     = (k-1) % Q;		/* (q,r) is position of the current DP cell M(i,k) */  int     r     = (k-1) / Q;  vector float *tp = om->tfv + 7*q;    	/* *tp now at start of transitions to cur cell M(i,k) */  vector float  xBv;  vector float  zerov;  vector float  mpv, dpv, ipv;  union { vector float v; float p[4]; } u;  float   path[4];  int     state[4] = { p7T_B, p7T_M, p7T_I, p7T_D };    xBv   = esl_vmx_set_float(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]);  zerov = (vector float) vec_splat_u32(0);  if (q > 0) {    mpv = ox->dpf[i-1][(q-1)*3 + p7X_M];    dpv = ox->dpf[i-1][(q-1)*3 + p7X_D];    ipv = ox->dpf[i-1][(q-1)*3 + p7X_I];  } else {    mpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_M], 12);    dpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_D], 12);    ipv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_I], 12);  }	      u.v = vec_madd(xBv, *tp, zerov); tp++;  path[0] = u.p[r];  u.v = vec_madd(mpv, *tp, zerov); tp++;  path[1] = u.p[r];  u.v = vec_madd(ipv, *tp, zerov); tp++;  path[2] = u.p[r];  u.v = vec_madd(dpv, *tp, zerov);        path[3] = u.p[r];  esl_vec_FNorm(path, 4);  return state[esl_rnd_FChoose(rng, path, 4)];}
开发者ID:Denis84,项目名称:EPA-WorkBench,代码行数:35,


示例7: vector_fmul_reverse_altivec

static void vector_fmul_reverse_altivec(float *dst, const float *src0,                                        const float *src1, int len){    int i;    vector float d, s0, s1, h0, l0,                 s2, s3, zero = (vector float)vec_splat_u32(0);    src1 += len-4;    for(i=0; i<len-7; i+=8) {        s1 = vec_ld(0, src1-i);              // [a,b,c,d]        s0 = vec_ld(0, src0+i);        l0 = vec_mergel(s1, s1);             // [c,c,d,d]        s3 = vec_ld(-16, src1-i);        h0 = vec_mergeh(s1, s1);             // [a,a,b,b]        s2 = vec_ld(16, src0+i);        s1 = vec_mergeh(vec_mergel(l0,h0),   // [d,b,d,b]                        vec_mergeh(l0,h0));  // [c,a,c,a]                                             // [d,c,b,a]        l0 = vec_mergel(s3, s3);        d = vec_madd(s0, s1, zero);        h0 = vec_mergeh(s3, s3);        vec_st(d, 0, dst+i);        s3 = vec_mergeh(vec_mergel(l0,h0),                        vec_mergeh(l0,h0));        d = vec_madd(s2, s3, zero);        vec_st(d, 16, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:27,


示例8: v_matmuladd

inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,                               const v_float32x4& m1, const v_float32x4& m2,                               const v_float32x4& a){    const vec_float4 v0 = vec_splat(v.val, 0);    const vec_float4 v1 = vec_splat(v.val, 1);    const vec_float4 v2 = vec_splat(v.val, 2);    return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, a.val))));}
开发者ID:ArkaJU,项目名称:opencv,代码行数:9,


示例9: v_matmul

inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,                            const v_float32x4& m1, const v_float32x4& m2,                            const v_float32x4& m3){    const vec_float4 v0 = vec_splat(v.val, 0);    const vec_float4 v1 = vec_splat(v.val, 1);    const vec_float4 v2 = vec_splat(v.val, 2);    VSX_UNUSED(const vec_float4) v3 = vec_splat(v.val, 3);    return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, vec_mul(v3, m3.val)))));}
开发者ID:ArkaJU,项目名称:opencv,代码行数:10,


示例10: vector_fmul_altivec

static void vector_fmul_altivec(float *dst, const float *src, int len){    int i;    vector float d0, d1, s, zero = (vector float)vec_splat_u32(0);    for(i=0; i<len-7; i+=8) {        d0 = vec_ld(0, dst+i);        s = vec_ld(0, src+i);        d1 = vec_ld(16, dst+i);        d0 = vec_madd(d0, s, zero);        d1 = vec_madd(d1, vec_ld(16,src+i), zero);        vec_st(d0, 0, dst+i);        vec_st(d1, 16, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:14,


示例11: dotprod_crcf_execute

// exectue vectorized structured inner dot productvoid dotprod_crcf_execute(dotprod_crcf    _q,                          float complex * _x,                          float complex * _r){    int al; // input data alignment    vector float *ar,*d;    vector float s0,s1,s2,s3;    union { vector float v; float w[4];} s;    unsigned int nblocks;    ar = (vector float*)( (int)_x & ~15);    al = ((int)_x & 15)/sizeof(float);    d = (vector float*)_q->h[al];    // number of blocks doubles because of complex type    nblocks = (2*_q->n + al - 1)/4 + 1;    // split into four vectors each with four 32-bit    // partial sums.  Effectively each loop iteration    // operates on 16 input samples at a time.    s0 = s1 = s2 = s3 = (vector float)(0);    while (nblocks >= 4) {        s0 = vec_madd(ar[nblocks-1],d[nblocks-1],s0);        s1 = vec_madd(ar[nblocks-2],d[nblocks-2],s1);        s2 = vec_madd(ar[nblocks-3],d[nblocks-3],s2);        s3 = vec_madd(ar[nblocks-4],d[nblocks-4],s3);        nblocks -= 4;    }    // fold the resulting partial sums into vector s0    s0 = vec_add(s0,s1);    // s0 = s0+s1    s2 = vec_add(s2,s3);    // s2 = s2+s3    s0 = vec_add(s0,s2);    // s0 = s0+s2    // finish partial summing operations    while (nblocks-- > 0)        s0 = vec_madd(ar[nblocks],d[nblocks],s0);    // move the result into the union s (effetively,    // this loads the four 32-bit values in s0 into    // the array w).    s.v = vec_add(s0,(vector float)(0));    // sum the resulting array    //*_r = s.w[0] + s.w[1] + s.w[2] + s.w[3];    *_r = (s.w[0] + s.w[2]) + (s.w[1] + s.w[3]) * _Complex_I;}
开发者ID:0xLeo,项目名称:liquid-dsp,代码行数:50,


示例12: OSX_AudioIOProc16Bit_Altivec

static void OSX_AudioIOProc16Bit_Altivec(SInt16	*myInBuffer, float *myOutBuffer){		register UInt32	i;		float f = SOUND_BUFFER_SCALE_16BIT;   		const vector float gain = vec_load_ps1(&f); // multiplier		const vector float mix = vec_setzero();		if (gBufferMono2Stereo)		{			int j=0;			// TEST: OK			for (i=0;i<SOUND_BUFFER_SIZE;i+=8, j+=16)			{				vector short int v0 = vec_ld(0, myInBuffer + i); // Load 8 shorts				vector float v1 = vec_ctf((vector signed int)vec_unpackh(v0), 0); // convert to float				vector float v2 = vec_ctf((vector signed int)vec_unpackl(v0), 0); // convert to float				vector float v3 = vec_madd(v1, gain, mix); // scale				vector float v4 = vec_madd(v2, gain, mix); // scale				vector float v5 = vec_mergel(v3, v3); // v3(0,0,1,1);				vector float v6 = vec_mergeh(v3, v3); // v3(2,2,3,3);				vector float v7 = vec_mergel(v4, v4); // v4(0,0,1,1);				vector float v8 = vec_mergeh(v4, v4); // v4(2,2,3,3);				vec_st(v5, 0, myOutBuffer + j); // Store 4 floats				vec_st(v6, 0, myOutBuffer + 4 + j); // Store 4 floats				vec_st(v7, 0, myOutBuffer + 8 + j); // Store 4 floats				vec_st(v8, 0, myOutBuffer + 12 + j); // Store 4 floats			}		}		else		{			// TEST: OK			for (i=0;i<SOUND_BUFFER_SIZE;i+=8)			{				vector short int v0 = vec_ld(0, myInBuffer + i); // Load 8 shorts				vector float v1 = vec_ctf((vector signed int)vec_unpackh(v0), 0); // convert to float				vector float v2 = vec_ctf((vector signed int)vec_unpackl(v0), 0); // convert to float				vector float v3 = vec_madd(v1, gain, mix); // scale				vector float v4 = vec_madd(v2, gain, mix); // scale				vec_st(v3, 0, myOutBuffer + i); // Store 4 floats				vec_st(v4, 0, myOutBuffer + 4 + i); // Store 4 floats			}		}}
开发者ID:LighFusion,项目名称:surreal,代码行数:46,


示例13: _SIMD_madd_epi32

__SIMDi _SIMD_madd_epi32(__SIMDi a, __SIMDi b, __SIMDi c){#ifdef  USE_SSE  return _SIMD_add_epi32(_SIMD_mul_epi32(a,b),c);#elif defined USE_AVX  return _m256_madd_ps(a,b);#elif defined USE_IBM  return vec_madd(a,b);#endif}
开发者ID:peihunglin,项目名称:TSVC_benchmark,代码行数:10,


示例14: _SIMD_madd_pd

__SIMDd _SIMD_madd_pd(__SIMDd a, __SIMDd b, __SIMDd c){#ifdef  USE_SSE  return _mm_add_pd(_mm_mul_pd(a,b),c);#elif defined USE_AVX  return _m256_madd_ps(a,b);#elif defined USE_IBM  return vec_madd(a,b);#endif}
开发者ID:peihunglin,项目名称:TSVC_benchmark,代码行数:10,


示例15: select_i

/* I(i,k) is reached from M(i-1, k) or I(i-1,k). */static inline intselect_i(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k){  int     Q     = p7O_NQF(ox->M);  int     q    = (k-1) % Q;		/* (q,r) is position of the current DP cell D(i,k) */  int     r    = (k-1) / Q;  vector float  zerov;  vector float  mpv  = ox->dpf[i-1][q*3 + p7X_M];  vector float  ipv  = ox->dpf[i-1][q*3 + p7X_I];  vector float *tp   = om->tfv + 7*q + p7O_MI;  union { vector float v; float p[4]; } u;  float   path[2];  int     state[2] = { p7T_M, p7T_I };  zerov = (vector float) vec_splat_u32(0);  u.v = vec_madd(mpv, *tp, zerov); tp++;  path[0] = u.p[r];  u.v = vec_madd(ipv, *tp, zerov);        path[1] = u.p[r];  esl_vec_FNorm(path, 2);  return state[esl_rnd_FChoose(rng, path, 2)];}
开发者ID:Denis84,项目名称:EPA-WorkBench,代码行数:22,


示例16: appbones

static vector float appbones (State *s,                              struct skin *skin,                              vector float x,                              vector float y,                              vector float z,                              vector float nx,                              vector float ny,                              vector float nz,                              vector float *np){    int j;    int num_bones;    int bone_index;    struct abone *b;    vector float vz = (vector float) vec_splat_u32 (0);    vector float v, w, n;    vector unsigned char S = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4<<3};    v = n = vz;    w = vec_ld (0, skin->weights);    num_bones = skin->boneinfo & 3;    bone_index = skin->boneinfo >> 2;    for (j = 0; j < num_bones; ++j) {        vector float t0, t1, t2, t3, t4, t5, r0, r1, r2, r3, vw;        b = &s->abones[bone_index & 0x3ff];        bone_index >>= 10;        vw = vec_splat (w, 0);        w = vec_slo (w, S);        r0 = vec_ld ( 0, b->cm);        r1 = vec_ld (16, b->cm);        r2 = vec_ld (32, b->cm);        r3 = vec_ld (48, b->cm);        t0 = vec_madd (r0, x, r3);        t1 = vec_madd (r1, y, t0);        t2 = vec_madd (r2, z, t1);        v = vec_madd (t2, vw, v);        t3 = vec_madd (r0, nx, vz);        t4 = vec_madd (r1, ny, t3);        t5 = vec_madd (r2, nz, t4);        n = vec_madd (t5, vw, n);    }    *np = n;    return v;}
开发者ID:Lenbok,项目名称:dormin,代码行数:50,


示例17: int32_to_float_fmul_scalar_altivec

static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len){    union {        vector float v;        float s[4];    } mul_u;    int i;    vector float src1, src2, dst1, dst2, mul_v, zero;    zero = (vector float)vec_splat_u32(0);    mul_u.s[0] = mul;    mul_v = vec_splat(mul_u.v, 0);    for(i=0; i<len; i+=8) {        src1 = vec_ctf(vec_ld(0,  src+i), 0);        src2 = vec_ctf(vec_ld(16, src+i), 0);        dst1 = vec_madd(src1, mul_v, zero);        dst2 = vec_madd(src2, mul_v, zero);        vec_st(dst1,  0, dst+i);        vec_st(dst2, 16, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:22,


示例18: assign_add_mul_r_32

void assign_add_mul_r_32(spinor32 * const R, spinor32 * const S, const float c, const int N) {#ifdef TM_USE_OMP#pragma omp parallel  {#endif  vector4double x0, x1, x2, x3, x4, x5, y0, y1, y2, y3, y4, y5;  vector4double z0, z1, z2, z3, z4, z5, k;  float *s, *r;  float ALIGN32 _c;  _c = c;  __prefetch_by_load(S);  __prefetch_by_load(R);  k = vec_splats((double)_c);  __alignx(16, s);  __alignx(16, r);  __alignx(16, S);  __alignx(16, R);#ifdef TM_USE_OMP#pragma omp for#else#pragma unroll(2)#endif  for(int i = 0; i < N; i++) {    s=(float*)((spinor32 *) S + i);    r=(float*)((spinor32 *) R + i);    __prefetch_by_load(S + i + 1);    __prefetch_by_stream(1, R + i + 1);    x0 = vec_ld(0, r);    x1 = vec_ld(0, r+4);    x2 = vec_ld(0, r+8);    x3 = vec_ld(0, r+12);    x4 = vec_ld(0, r+16);    x5 = vec_ld(0, r+20);    y0 = vec_ld(0, s);    y1 = vec_ld(0, s+4);    y2 = vec_ld(0, s+8);    y3 = vec_ld(0, s+12);    y4 = vec_ld(0, s+16);    y5 = vec_ld(0, s+20);    z0 = vec_madd(k, y0, x0);    z1 = vec_madd(k, y1, x1);    z2 = vec_madd(k, y2, x2);    z3 = vec_madd(k, y3, x3);    z4 = vec_madd(k, y4, x4);    z5 = vec_madd(k, y5, x5);    vec_st(z0, 0, r);    vec_st(z1, 0, r+4);    vec_st(z2, 0, r+8);    vec_st(z3, 0, r+12);    vec_st(z4, 0, r+16);    vec_st(z5, 0, r+20);  }#ifdef TM_USE_OMP  } /* OpenMP closing brace */#endif  return;}
开发者ID:Finkenrath,项目名称:tmLQCD,代码行数:59,


示例19: func

__vector float func(__vector float vx){    __vector float vy;    __vector float va = (__vector float) {   5.0f,   5.0f,   5.0f,   5.0f };    __vector float vb = (__vector float) { -16.0f, -16.0f, -16.0f, -16.0f };    __vector float vc = (__vector float) { -36.0f, -36.0f, -36.0f, -36.0f };    __vector float vd = (__vector float) {  64.0f,  64.0f,  64.0f,  64.0f };    __vector float ve = (__vector float) { 192.0f, 192.0f, 192.0f, 192.0f };    vy = vec_madd(va, vx, vb);    vy = vec_madd(vy, vx, vc);    vy = vec_madd(vy, vx, vd);    vy = vec_madd(vy, vx, ve);    return vy;}float calc_integral(float start, float end, float delta){    int i;    float *sum;    __vector float vx     = (__vector float) { start+delta*0, start+delta*1,                                               start+delta*2, start+delta*3 };    __vector float vsum   = (__vector float) { 0.0f, 0.0f, 0.0f, 0.0f };    __vector float vdelta = (__vector float) { delta, delta, delta, delta };    __vector float vstep  = (__vector float) { 4.0f, 4.0f, 4.0f, 4.0f };    for (i = 0; i < (end-start)/delta; i += 4) {        vsum = vec_madd(func(vx), vdelta, vsum);        vx   = vec_madd(vdelta, vstep, vx);    }    sum = (float *) &vsum;    return (sum[0] + sum[1] + sum[2] + sum[3]);}int main(int argc, char **argv){    float start = 0.0f;    float end   = 4.0f;    float delta = 0.00001f;    float result;    printf("start = %f, end = %f/n", start, end);    result = calc_integral(start, end, delta);    printf("result = %f/n", result);    return 0;}
开发者ID:macton,项目名称:ps3-archive,代码行数:53,


示例20: vec_ldl

void CDSPToolsOSX::Mul32(tfloat32* pDest, tfloat32 Src){#ifdef _Mac_PowerPC	vector float vSrc = (vector float)(Src, Src, Src, Src);	vector float vDest = vec_ldl(0, pDest);	vDest = vec_madd(vDest, vSrc, (vector float)(0, 0, 0, 0));	vec_st(vDest, 0, pDest);	vDest = vec_ldl(4 * 4, pDest);	vDest = vec_madd(vDest, vSrc, (vector float)(0, 0, 0, 0));	vec_st(vDest, 4 * 4, pDest);	vDest = vec_ldl(8 * 4, pDest);	vDest = vec_madd(vDest, vSrc, (vector float)(0, 0, 0, 0));	vec_st(vDest, 8 * 4, pDest);	vDest = vec_ldl(12 * 4, pDest);	vDest = vec_madd(vDest, vSrc, (vector float)(0, 0, 0, 0));	vec_st(vDest, 12 * 4, pDest);	vDest = vec_ldl(16 * 4, pDest);	vDest = vec_madd(vDest, vSrc, (vector float)(0, 0, 0, 0));	vec_st(vDest, 16 * 4, pDest);	vDest = vec_ldl(20 * 4, pDest);	vDest = vec_madd(vDest, vSrc, (vector float)(0, 0, 0, 0));	vec_st(vDest, 20 * 4, pDest);	vDest = vec_ldl(24 * 4, pDest);	vDest = vec_madd(vDest, vSrc, (vector float)(0, 0, 0, 0));	vec_st(vDest, 24 * 4, pDest);	vDest = vec_ldl(28 * 4, pDest);	vDest = vec_madd(vDest, vSrc, (vector float)(0, 0, 0, 0));	vec_st(vDest, 28 * 4, pDest);#else	// _Mac_PowerPC	CDSPTools::Mul32(pDest, Src);#endif	// _Mac_PowerPC}
开发者ID:eriser,项目名称:koblo_software-1,代码行数:40,


示例21: test_madd

/* *************************************************************************    NAME:  test_madd   USAGE:    test_madd();   returns: void   DESCRIPTION:                   see how the combination multiply/add operation works   REFERENCES:   Ian Ollmann's Altivec Tutorial      LIMITATIONS:   GLOBAL VARIABLES:      accessed: none      modified: none   FUNCTIONS CALLED:      fprintf   vec_madd - multiply two float vectors and add to the sum a float value               all in one operation       REVISION HISTORY:        STR                  Description of Revision                 Author     27-Feb-11               initial coding                           kaj ************************************************************************* */void test_madd(void){  vector float floatVector1 =       { -17.777777, 0.0, 37.777777, 100.0};  vector float addVector =        { 32.0, 32.0, 32.0, 32.0};  vector float coeffVector =        { 1.8, 1.8, 1.8, 1.8};  vector float fahrenheitVector;  short printfloat[FLOAT_ARRAYSIZE] __attribute__ ((aligned (16)));  /* print vectors performing madd on */  fprintf(stderr,"-----------------------------------------------------------/n/n");  printVecFloats("vec_madd input vector 1", floatVector1,FLOAT_ARRAYSIZE);  printVecFloats("vec_madd input vector to add", addVector,FLOAT_ARRAYSIZE);    printVecFloats("vec_madd coeffvector to multiply", coeffVector,FLOAT_ARRAYSIZE);     /* calculate */  fahrenheitVector = vec_madd(floatVector1,coeffVector,addVector);  printVecFloats("vec_madd vector (Input*9/5+32)", fahrenheitVector,FLOAT_ARRAYSIZE);} /* test_madd */
开发者ID:yaojingguo,项目名称:gcc-intrinsics-samplecode,代码行数:59,


示例22: vector_fmul_add_altivec

static void vector_fmul_add_altivec(float *dst, const float *src0,                                    const float *src1, const float *src2,                                    int len){    int i;    vector float d, s0, s1, s2, t0, t1, edges;    vector unsigned char align = vec_lvsr(0,dst),                         mask = vec_lvsl(0, dst);    for (i=0; i<len-3; i+=4) {        t0 = vec_ld(0, dst+i);        t1 = vec_ld(15, dst+i);        s0 = vec_ld(0, src0+i);        s1 = vec_ld(0, src1+i);        s2 = vec_ld(0, src2+i);        edges = vec_perm(t1 ,t0, mask);        d = vec_madd(s0,s1,s2);        t1 = vec_perm(d, edges, align);        t0 = vec_perm(edges, d, align);        vec_st(t1, 15, dst+i);        vec_st(t0, 0, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:23,


示例23: main

int main(int argc, char **argv){    int i;    __vector float *vin  = (__vector float *) in;    __vector float *vout = (__vector float *) out;    __vector float vin_negative;    __vector unsigned int vpat;    __vector float vzero  = (__vector float) {  0.0f,  0.0f,  0.0f,  0.0f };    __vector float vminus = (__vector float) { -1.0f, -1.0f, -1.0f, -1.0f };    for (i = 0; i < SIZE/4; i++) {        vpat = vec_cmpgt(vin[i], vzero);        vin_negative = vec_madd(vin[i], vminus, vzero);        vout[i] = vec_sel(vin_negative, vin[i], vpat);    }    for (i = 0; i < SIZE; i++) {        printf("out[%02d]=%0.0f/n", i, out[i]);    }    return 0;}
开发者ID:macton,项目名称:ps3-archive,代码行数:24,


示例24: ProjectDlightTexture_altivec

//.........这里部分代码省略.........			vec_t dist0, dist1, dist2;						dist0 = origin0 - tess.xyz[i][0];			dist1 = origin1 - tess.xyz[i][1];			dist2 = origin2 - tess.xyz[i][2];			backEnd.pc.c_dlightVertexes++;			texCoords0 = 0.5f + dist0 * scale;			texCoords1 = 0.5f + dist1 * scale;			if( !r_dlightBacks->integer &&					// dist . tess.normal[i]					( dist0 * tess.normal[i][0] +					dist1 * tess.normal[i][1] +					dist2 * tess.normal[i][2] ) < 0.0f ) {				clip = 63;			} else {				if ( texCoords0 < 0.0f ) {					clip |= 1;				} else if ( texCoords0 > 1.0f ) {					clip |= 2;				}				if ( texCoords1 < 0.0f ) {					clip |= 4;				} else if ( texCoords1 > 1.0f ) {					clip |= 8;				}				texCoords[0] = texCoords0;				texCoords[1] = texCoords1;				// modulate the strength based on the height and color				if ( dist2 > radius ) {					clip |= 16;					modulate = 0.0f;				} else if ( dist2 < -radius ) {					clip |= 32;					modulate = 0.0f;				} else {					dist2 = Q_fabs(dist2);					if ( dist2 < radius * 0.5f ) {						modulate = 1.0f;					} else {						modulate = 2.0f * (radius - dist2) * scale;					}				}			}			clipBits[i] = clip;			modulateVec = vec_ld(0,(float *)&modulate);			modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm);			colorVec = vec_madd(floatColorVec0,modulateVec,zero);			colorInt = vec_cts(colorVec,0);	// RGBx			colorShort = vec_pack(colorInt,colorInt);		// RGBxRGBx			colorChar = vec_packsu(colorShort,colorShort);	// RGBxRGBxRGBxRGBx			colorChar = vec_sel(colorChar,vSel,vSel);		// RGBARGBARGBARGBA replace alpha with 255			vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors);	// store color		}		// build a list of triangles that need light		numIndexes = 0;		for ( i = 0 ; i < tess.numIndexes ; i += 3 ) {			int		a, b, c;			a = tess.indexes[i];			b = tess.indexes[i+1];			c = tess.indexes[i+2];			if ( clipBits[a] & clipBits[b] & clipBits[c] ) {				continue;	// not lighted			}			hitIndexes[numIndexes] = a;			hitIndexes[numIndexes+1] = b;			hitIndexes[numIndexes+2] = c;			numIndexes += 3;		}		if ( !numIndexes ) {			continue;		}		qglEnableClientState( GL_TEXTURE_COORD_ARRAY );		qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] );		qglEnableClientState( GL_COLOR_ARRAY );		qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray );		GL_Bind( tr.dlightImage );		// include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light		// where they aren't rendered		if ( dl->additive ) {			GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );		}		else {			GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );		}		R_DrawElements( numIndexes, hitIndexes );		backEnd.pc.c_totalIndexes += numIndexes;		backEnd.pc.c_dlightIndexes += numIndexes;	}}
开发者ID:ptitSeb,项目名称:ioq3,代码行数:101,


示例25: ff_fdct_altivec

//.........这里部分代码省略.........    x6 = ((vector float) vec_sub(vs16(b10), vs16(b60)));    x2 = ((vector float) vec_add(vs16(b20), vs16(b50)));    x5 = ((vector float) vec_sub(vs16(b20), vs16(b50)));    x3 = ((vector float) vec_add(vs16(b30), vs16(b40)));    x4 = ((vector float) vec_sub(vs16(b30), vs16(b40)));    b70 = ((vector float) vec_add(vs16(x0), vs16(x3)));    b10 = ((vector float) vec_add(vs16(x1), vs16(x2)));    b00 = ((vector float) vec_add(vs16(b70), vs16(b10)));    b40 = ((vector float) vec_sub(vs16(b70), vs16(b10)));#define CTF0(n)                                                    /    b ## n ## 1 = ((vector float) vec_unpackl(vs16(b ## n ## 0))); /    b ## n ## 0 = ((vector float) vec_unpackh(vs16(b ## n ## 0))); /    b ## n ## 1 = vec_ctf(vs32(b ## n ## 1), 0);                   /    b ## n ## 0 = vec_ctf(vs32(b ## n ## 0), 0)    CTF0(0);    CTF0(4);    b20 = ((vector float) vec_sub(vs16(x0), vs16(x3)));    b60 = ((vector float) vec_sub(vs16(x1), vs16(x2)));    CTF0(2);    CTF0(6);#undef CTF0    x0 = vec_add(b60, b20);    x1 = vec_add(b61, b21);    cnst = LD_W2;    x0   = vec_madd(cnst, x0, mzero);    x1   = vec_madd(cnst, x1, mzero);    cnst = LD_W1;    b20  = vec_madd(cnst, b20, x0);    b21  = vec_madd(cnst, b21, x1);    cnst = LD_W0;    b60  = vec_madd(cnst, b60, x0);    b61  = vec_madd(cnst, b61, x1);#define CTFX(x, b)                                  /    b ## 0 = ((vector float) vec_unpackh(vs16(x))); /    b ## 1 = ((vector float) vec_unpackl(vs16(x))); /    b ## 0 = vec_ctf(vs32(b ## 0), 0);              /    b ## 1 = vec_ctf(vs32(b ## 1), 0)    CTFX(x4, b7);    CTFX(x5, b5);    CTFX(x6, b3);    CTFX(x7, b1);#undef CTFX    x0   = vec_add(b70, b10);    x1   = vec_add(b50, b30);    x2   = vec_add(b70, b30);    x3   = vec_add(b50, b10);    x8   = vec_add(x2, x3);    cnst = LD_W3;    x8   = vec_madd(cnst, x8, mzero);    cnst = LD_W8;    x0   = vec_madd(cnst, x0, mzero);    cnst = LD_W9;
开发者ID:63n,项目名称:FFmpeg,代码行数:67,


示例26: cfft2

void cfft2(unsigned int n,float x[][2],float y[][2],float w[][2], float sign){    /*       altivec version of cfft2 from Petersen and Arbenz book, "Intro.       to Parallel Computing", Oxford Univ. Press, 2003, Section 3.6                                            wpp 14. Dec. 2003    */    int jb,jc,jd,jw,k,k2,k4,lj,m,j,mj,mj2,pass,tgle;    float rp,up,wr[4] __attribute((aligned(16)));    float wu[4] __attribute((aligned(16)));    float *a,*b,*c,*d;    const vector float vminus = (vector float) {        -0.,0.,-0.,0.    };    const vector float vzero  = (vector float) {        0.,0.,0.,0.    };    const vector unsigned char pv3201 =    (vector unsigned char) {        4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11    };    vector float V0,V1,V2,V3,V4,V5,V6,V7;    vector float V8,V9,V10,V11,V12,V13,V14,V15;    if(n<=1) {        y[0][0] = x[0][0];        y[0][1] = x[0][1];        return;    }    m    = (int) (log((float) n)/log(1.99));    mj   = 1;    mj2  = 2;    lj   = n/2;    /* first pass thru data: x -> y */    for(j=0; j<lj; j++) {        jb = n/2+j;        jc  = j*mj2;        jd = jc + 1;        rp = w[j][0];        up = w[j][1];        if(sign<0.0) up = -up;        y[jd][0] = rp*(x[j][0] - x[jb][0]) - up*(x[j][1] - x[jb][1]);        y[jd][1] = up*(x[j][0] - x[jb][0]) + rp*(x[j][1] - x[jb][1]);        y[jc][0] = x[j][0] + x[jb][0];        y[jc][1] = x[j][1] + x[jb][1];    }    if(n==2) return;    /* next pass is mj = 2 */    mj  = 2;    mj2 = 4;    lj  = n/4;    a = (float *)&y[0][0];    b = (float *)&y[n/2][0];    c = (float *)&x[0][0];    d = (float *)&x[mj][0];    if(n==4) {        c = (float *)&y[0][0];        d = (float *)&y[mj][0];    }    for(j=0; j<lj; j++) {        jw = j*mj;        jc = j*mj2;        jd = 2*jc;        rp = w[jw][0];        up = w[jw][1];        if(sign<0.0) up = -up;        wr[0] = rp;        wr[1] = rp;        wr[2] = rp;        wr[3] = rp;        wu[0] = up;        wu[1] = up;        wu[2] = up;        wu[3] = up;        V6 = vec_ld(0,wr);        V7 = vec_ld(0,wu);        V7 = vec_xor(V7,vminus);        V0 = vec_ld(0,(vector float *) (a+jc));        V1 = vec_ld(0,(vector float *) (b+jc));        V2 = vec_add(V0,V1);                         /* a + b */        vec_st(V2,0,(vector float *) (c+jd));     /* store c */        V3 = vec_sub(V0,V1);                         /* a - b */        V4 = vec_perm(V3,V3,pv3201);        V0 = vec_madd(V6,V3,vzero);        V1 = vec_madd(V7,V4,vzero);        V2 = vec_add(V0,V1);                         /* w*(a - b) */        vec_st(V2,0,(vector float*) (d+jd));         /* store d */    }    if(n==4) return;    mj  *= 2;    mj2  = 2*mj;    lj   = n/mj2;    tgle = 0;    for(pass=2; pass<m-1; pass++) {        if(tgle) {            a = (float *)&y[0][0];            b = (float *)&y[n/2][0];            c = (float *)&x[0][0];//.........这里部分代码省略.........
开发者ID:kailiao,项目名称:test-suite,代码行数:101,


示例27: nb_kernel310_ppc_altivec

void nb_kernel310_ppc_altivec  (int *             p_nri,                       int               iinr[],                       int               jindex[],                       int               jjnr[],                       int               shift[],                       float             shiftvec[],                       float             fshift[],                       int               gid[],                       float             pos[],                       float             faction[],                       float             charge[],                       float *           p_facel,                       float *           p_krf,                       float *           p_crf,                       float             Vc[],                       int               type[],                       int *             p_ntype,                       float             vdwparam[],                       float             Vvdw[],                       float *           p_tabscale,                       float             VFtab[],                       float             invsqrta[],                       float             dvda[],                       float *           p_gbtabscale,                       float             GBtab[],                       int *             p_nthreads,                       int *             count,                       void *            mtx,                       int *             outeriter,                       int *             inneriter,					   float *           work){	vector float ix,iy,iz,shvec;	vector float vfacel,tsc,fs,fs2,nul;	vector float dx,dy,dz;	vector float Vvdwtot,vctot,qq,iq,c6,c12,VVc,FFc;	vector float fix,fiy,fiz;	vector float tmp1,tmp2,tmp3,tmp4;	vector float rinv,r,rinvsq,rsq,rinvsix,Vvdw6,Vvdw12;	int n,k,ii,is3,ii3,ntiA,nj0,nj1;	int jnra,jnrb,jnrc,jnrd;	int j3a,j3b,j3c,j3d;	int nri, ntype, nouter, ninner;	int tja,tjb,tjc,tjd;#ifdef GMX_THREADS	int nn0, nn1;#endif    nouter   = 0;    ninner   = 0;    nri      = *p_nri;    ntype    = *p_ntype;	nul=vec_zero();	vfacel=load_float_and_splat(p_facel);	tsc=load_float_and_splat(p_tabscale);#ifdef GMX_THREADS    nthreads = *p_nthreads;	do {		gmx_thread_mutex_lock((gmx_thread_mutex_t *)mtx);		nn0              = *count;		nn1              = nn0+(nri-nn0)/(2*nthreads)+3;		*count           = nn1;		gmx_thread_mutex_unlock((gmx_thread_mutex_t *)mtx);		if(nn1>nri) nn1=nri;		for(n=nn0; (n<nn1); n++) {#if 0		} /* maintain correct indentation even with conditional left braces */#endif#else /* without gmx_threads */		for(n=0;n<nri;n++) {#endif  			is3        = 3*shift[n];			shvec      = load_xyz(shiftvec+is3);			ii         = iinr[n];			ii3        = 3*ii;			ix         = load_xyz(pos+ii3);			Vvdwtot     = nul;			vctot      = nul;			fix        = nul;			fiy        = nul;			fiz        = nul;			ix         = vec_add(ix,shvec);			nj0        = jindex[n];			nj1        = jindex[n+1];			splat_xyz_to_vectors(ix,&ix,&iy,&iz);			ntiA       = 2*ntype*type[ii];			iq        = vec_madd(load_float_and_splat(charge+ii),vfacel,nul);			for(k=nj0; k<(nj1-3); k+=4) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				jnrd            = jjnr[k+3];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				j3d             = 3*jnrd;//.........这里部分代码省略.........
开发者ID:alejandrox1,项目名称:gromacs_flatbottom,代码行数:101,


示例28: test1

//.........这里部分代码省略.........  res_vull = vec_andc(vull, vull);// CHECK: xor <2 x i64>// CHECK: and <2 x i64>// CHECK-LE: xor <2 x i64>// CHECK-LE: and <2 x i64>  res_vull = vec_andc(vbll, vull);// CHECK: xor <2 x i64>// CHECK: and <2 x i64>// CHECK-LE: xor <2 x i64>// CHECK-LE: and <2 x i64>  res_vull = vec_andc(vull, vbll);// CHECK: xor <2 x i64>// CHECK: and <2 x i64>// CHECK-LE: xor <2 x i64>// CHECK-LE: and <2 x i64>  res_vbll = vec_andc(vbll, vbll);// CHECK: xor <2 x i64>// CHECK: and <2 x i64>// CHECK-LE: xor <2 x i64>// CHECK-LE: and <2 x i64>  res_vf = vec_floor(vf);// CHECK: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}})// CHECK-LE: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}})  res_vd = vec_floor(vd);// CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}})// CHECK-LE: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}})  res_vf = vec_madd(vf, vf, vf);// CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})// CHECK-LE: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})  res_vd = vec_madd(vd, vd, vd);// CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})// CHECK-LE: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})  /* vec_mergeh */  res_vsll = vec_mergeh(vsll, vsll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vsll = vec_mergeh(vsll, vbll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vsll = vec_mergeh(vbll, vsll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vull = vec_mergeh(vull, vull);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vull = vec_mergeh(vull, vbll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vull = vec_mergeh(vbll, vull);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm
开发者ID:AlexDenisov,项目名称:clang,代码行数:66,


示例29: nb_kernel010nf_ppc_altivec

void nb_kernel010nf_ppc_altivec(int *             p_nri,                       int               iinr[],                       int               jindex[],                       int               jjnr[],                       int               shift[],                       float             shiftvec[],                       float             fshift[],                       int               gid[],                       float             pos[],                       float             faction[],                       float             charge[],                       float *           p_facel,                       float *           p_krf,                       float *           p_crf,                       float             Vc[],                       int               type[],                       int *             p_ntype,                       float             vdwparam[],                       float             Vvdw[],                       float *           p_tabscale,                       float             VFtab[],                       float             invsqrta[],                       float             dvda[],                       float *           p_gbtabscale,                       float             GBtab[],                       int *             p_nthreads,                       int *             count,                       void *            mtx,                       int *             outeriter,                       int *             inneriter,					   float *           work){	vector float ix,iy,iz,shvec;	vector float nul;	vector float dx,dy,dz;	vector float Vvdwtot,c6,c12;	vector float rinvsq,rsq,rinvsix;  	int n,k,ii,is3,ii3,nj0,nj1;	int jnra,jnrb,jnrc,jnrd;	int j3a,j3b,j3c,j3d;	int nri, ntype, nouter, ninner;	int ntiA,tja,tjb,tjc,tjd;#ifdef GMX_THREAD_SHM_FDECOMP	int nn0, nn1;#endif      nouter   = 0;    ninner   = 0;    nri      = *p_nri;    ntype    = *p_ntype;	nul=vec_zero();#ifdef GMX_THREAD_SHM_FDECOMP    nthreads = *p_nthreads;	do {		tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);		nn0              = *count;		nn1              = nn0+(nri-nn0)/(2*nthreads)+3;		*count           = nn1;		tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);		if(nn1>nri) nn1=nri;		for(n=nn0; (n<nn1); n++) {#if 0		} /* maintain correct indentation even with conditional left braces */#endif#else /* without tMPI_Threads */		for(n=0;n<nri;n++) {#endif			is3        = 3*shift[n];			shvec      = load_xyz(shiftvec+is3);			ii         = iinr[n];			ii3        = 3*ii;			ix         = load_xyz(pos+ii3);			Vvdwtot     = nul;			ix         = vec_add(ix,shvec);    			nj0        = jindex[n];			nj1        = jindex[n+1];			splat_xyz_to_vectors(ix,&ix,&iy,&iz);			ntiA       = 2*ntype*type[ii];			for(k=nj0; k<(nj1-3); k+=4) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				jnrd            = jjnr[k+3];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				j3d             = 3*jnrd;				transpose_4_to_3(load_xyz(pos+j3a),								 load_xyz(pos+j3b),								 load_xyz(pos+j3c),								 load_xyz(pos+j3d),&dx,&dy,&dz);				dx              = vec_sub(ix,dx);				dy              = vec_sub(iy,dy);				dz              = vec_sub(iz,dz);				rsq             = vec_madd(dx,dx,nul);				rsq             = vec_madd(dy,dy,rsq);				rsq             = vec_madd(dz,dz,rsq);//.........这里部分代码省略.........
开发者ID:TTarenzi,项目名称:MMCG-HAdResS,代码行数:101,


示例30: vec_madd

	int nri, ntype, nouter, ninner;	int tja,tjb,tjc,tjd;#ifdef GMX_THREADS	int nn0, nn1;#endif    nouter   = 0;    ninner   = 0;    nri      = *p_nri;    ntype    = *p_ntype;	nul=vec_zero();	vfacel=load_float_and_splat(p_facel);	vkrf=load_float_and_splat(p_krf);	vcrf=load_float_and_splat(p_crf);	ii         = iinr[0];	iqO        = vec_madd(load_float_and_splat(charge+ii),vfacel,nul);	iqH        = vec_madd(load_float_and_splat(charge+ii+1),vfacel,nul);	ntiA       = 2*ntype*type[ii];  #ifdef GMX_THREADS    nthreads = *p_nthreads;	do {		gmx_thread_mutex_lock((gmx_thread_mutex_t *)mtx);		nn0              = *count;		nn1              = nn0+(nri-nn0)/(2*nthreads)+3;		*count           = nn1;		gmx_thread_mutex_unlock((gmx_thread_mutex_t *)mtx);		if(nn1>nri) nn1=nri;		for(n=nn0; (n<nn1); n++) {#if 0		} /* maintain correct indentation even with conditional left braces */
开发者ID:BioinformaticsArchive,项目名称:GromPy,代码行数:31,



注:本文中的vec_madd函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vec_mergel函数代码示例
C++ vec_lvsr函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。