您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vec_sub函数代码示例

51自学网 2021-06-03 09:36:45
  C++
这篇教程C++ vec_sub函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vec_sub函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_sub函数的具体用法?C++ vec_sub怎么用?C++ vec_sub使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_sub函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: lightsource_diffuse

unsigned longlightsource_diffuse (const float *vert, const float *norm,		     const colour *ambient_col, const colour *pigment_col,		     const float *light_pos){  float light_to_vertex[3];  float out;  int r, g, b;  vec_sub (light_to_vertex, light_pos, vert);  vec_normalize (light_to_vertex, light_to_vertex);  out = vec_dot (light_to_vertex, norm);  if (out < 0)    out = 0;  r = ambient_col->r + out * (pigment_col->r - ambient_col->r);  g = ambient_col->g + out * (pigment_col->g - ambient_col->g);  b = ambient_col->b + out * (pigment_col->b - ambient_col->b);  return (255 << 24) | (r << 16) | (g << 8) | b;}
开发者ID:crtc-demos,项目名称:wobble,代码行数:23,


示例2: init_env

int			init_env(t_env *e, t_obj *obj){	if ((e->mlx = mlx_init()) == NULL)		return (m_error("mlx_init(): fail"));	if ((e->win = mlx_new_window(e->mlx, WIN_X, WIN_Y, "rtv1")) == NULL)		return (m_error("mlx_new_window(): fail"));	if ((e->img = mlx_new_image(e->mlx, WIN_X, WIN_Y)) == NULL)		return (m_error("mlx_new_image(): fail"));	if ((e->addr = mlx_get_data_addr(e->img, &(e->bpp), &(e->size_line),		&(e->endian))) == NULL)		return (m_error("mlx_get_data_addr(): fail"));	if ((e->rgb_tab = (t_rgb *)malloc(sizeof(t_rgb) * (WIN_X * WIN_Y))) == NULL)		return (m_error("rgb_tab_init(): fail"));	e->eye_pos = vec_new(0, 0, 0);	e->eye_dir = vec_new(0, 0, 1);	e->right_vec = vec_new(1, 0, 0);	e->up_vec = vec_new(0, -1, 0);	e->view_plane_ori = vec_add(vec_add(e->eye_pos, vec_numb(e->eye_dir,		VIEW_PLANE_DIST)), vec_sub(vec_numb(e->up_vec, VIEW_PLANE_HEIGHT / 2.0)		, vec_numb(e->right_vec, VIEW_PLANE_WIDTH / 2.0)));	init_obj(obj);	return (0);}
开发者ID:hbock-42,项目名称:portfolio,代码行数:23,


示例3: PREFIX_h264_qpel16_h_lowpass_altivec

//.........这里部分代码省略.........            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = vec_perm(srcR1, srcR2, permP3);        } break;        case 11: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = srcR2;        } break;        case 12: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = srcR2;            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 13: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = srcR2;            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 14: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = srcR2;            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 15: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = srcR2;            srcP0 = vec_perm(srcR2, srcR3, permP0);            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        }        srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);        srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);        srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);        srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);        srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);        srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);        srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);        srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);        srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);        srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);        srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2);        sum1A = vec_adds(srcP0A, srcP1A);        sum1B = vec_adds(srcP0B, srcP1B);        sum2A = vec_adds(srcM1A, srcP2A);        sum2B = vec_adds(srcM1B, srcP2B);        sum3A = vec_adds(srcM2A, srcP3A);        sum3B = vec_adds(srcM2B, srcP3B);        pp1A = vec_mladd(sum1A, v20ss, v16ss);        pp1B = vec_mladd(sum1B, v20ss, v16ss);        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);        pp3A = vec_add(sum3A, pp1A);        pp3B = vec_add(sum3B, pp1B);        psumA = vec_sub(pp3A, pp2A);        psumB = vec_sub(pp3B, pp2B);        sumA = vec_sra(psumA, v5us);        sumB = vec_sra(psumB, v5us);        sum = vec_packsu(sumA, sumB);        ASSERT_ALIGNED(dst);        OP_U8_ALTIVEC(fsum, sum, vec_ld(0, dst));        vec_st(fsum, 0, dst);        src += srcStride;        dst += dstStride;    }}
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例4: PREFIX_h264_qpel16_v_lowpass_altivec

static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {    register int i;    LOAD_ZERO;    const vec_u8 perm = vec_lvsl(0, src);    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_u16 v5us = vec_splat_u16(5);    const vec_s16 v5ss = vec_splat_s16(5);    const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));    uint8_t *srcbis = src - (srcStride * 2);    const vec_u8 srcM2a = vec_ld(0, srcbis);    const vec_u8 srcM2b = vec_ld(16, srcbis);    const vec_u8 srcM2 = vec_perm(srcM2a, srcM2b, perm);    //srcbis += srcStride;    const vec_u8 srcM1a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcM1b = vec_ld(16, srcbis);    const vec_u8 srcM1 = vec_perm(srcM1a, srcM1b, perm);    //srcbis += srcStride;    const vec_u8 srcP0a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP0b = vec_ld(16, srcbis);    const vec_u8 srcP0 = vec_perm(srcP0a, srcP0b, perm);    //srcbis += srcStride;    const vec_u8 srcP1a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP1b = vec_ld(16, srcbis);    const vec_u8 srcP1 = vec_perm(srcP1a, srcP1b, perm);    //srcbis += srcStride;    const vec_u8 srcP2a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP2b = vec_ld(16, srcbis);    const vec_u8 srcP2 = vec_perm(srcP2a, srcP2b, perm);    //srcbis += srcStride;    vec_s16 srcM2ssA = (vec_s16) vec_mergeh(zero_u8v, srcM2);    vec_s16 srcM2ssB = (vec_s16) vec_mergel(zero_u8v, srcM2);    vec_s16 srcM1ssA = (vec_s16) vec_mergeh(zero_u8v, srcM1);    vec_s16 srcM1ssB = (vec_s16) vec_mergel(zero_u8v, srcM1);    vec_s16 srcP0ssA = (vec_s16) vec_mergeh(zero_u8v, srcP0);    vec_s16 srcP0ssB = (vec_s16) vec_mergel(zero_u8v, srcP0);    vec_s16 srcP1ssA = (vec_s16) vec_mergeh(zero_u8v, srcP1);    vec_s16 srcP1ssB = (vec_s16) vec_mergel(zero_u8v, srcP1);    vec_s16 srcP2ssA = (vec_s16) vec_mergeh(zero_u8v, srcP2);    vec_s16 srcP2ssB = (vec_s16) vec_mergel(zero_u8v, srcP2);    vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,              psumA, psumB, sumA, sumB,              srcP3ssA, srcP3ssB,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;    vec_u8 sum, fsum, srcP3a, srcP3b, srcP3;    for (i = 0 ; i < 16 ; i++) {        srcP3a = vec_ld(0, srcbis += srcStride);        srcP3b = vec_ld(16, srcbis);        srcP3 = vec_perm(srcP3a, srcP3b, perm);        srcP3ssA = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3ssB = (vec_s16) vec_mergel(zero_u8v, srcP3);        //srcbis += srcStride;        sum1A = vec_adds(srcP0ssA, srcP1ssA);        sum1B = vec_adds(srcP0ssB, srcP1ssB);        sum2A = vec_adds(srcM1ssA, srcP2ssA);        sum2B = vec_adds(srcM1ssB, srcP2ssB);        sum3A = vec_adds(srcM2ssA, srcP3ssA);        sum3B = vec_adds(srcM2ssB, srcP3ssB);        srcM2ssA = srcM1ssA;        srcM2ssB = srcM1ssB;        srcM1ssA = srcP0ssA;        srcM1ssB = srcP0ssB;        srcP0ssA = srcP1ssA;        srcP0ssB = srcP1ssB;        srcP1ssA = srcP2ssA;        srcP1ssB = srcP2ssB;        srcP2ssA = srcP3ssA;        srcP2ssB = srcP3ssB;        pp1A = vec_mladd(sum1A, v20ss, v16ss);        pp1B = vec_mladd(sum1B, v20ss, v16ss);        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);        pp3A = vec_add(sum3A, pp1A);        pp3B = vec_add(sum3B, pp1B);        psumA = vec_sub(pp3A, pp2A);        psumB = vec_sub(pp3B, pp2B);        sumA = vec_sra(psumA, v5us);        sumB = vec_sra(psumB, v5us);        sum = vec_packsu(sumA, sumB);        ASSERT_ALIGNED(dst);        OP_U8_ALTIVEC(fsum, sum, vec_ld(0, dst));        vec_st(fsum, 0, dst);//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例5: ff_fdct_altivec

/* two dimensional discrete cosine transform */void ff_fdct_altivec(int16_t *block){    vector signed short *bp;    const vector float *cp = fdctconsts;    vector float b00, b10, b20, b30, b40, b50, b60, b70;    vector float b01, b11, b21, b31, b41, b51, b61, b71;    vector float mzero, cnst, cnsts0, cnsts1, cnsts2;    vector float x0, x1, x2, x3, x4, x5, x6, x7, x8;    /* setup constants {{{ */    /* mzero = -0.0 */    mzero  = ((vector float) vec_splat_u32(-1));    mzero  = ((vector float) vec_sl(vu32(mzero), vu32(mzero)));    cnsts0 = vec_ld(0, cp);    cp++;    cnsts1 = vec_ld(0, cp);    cp++;    cnsts2 = vec_ld(0, cp);    /* }}} */    /* 8x8 matrix transpose (vector short[8]) {{{ */#define MERGE_S16(hl, a, b) vec_merge ## hl(vs16(a), vs16(b))    bp  = (vector signed short *) block;    b00 = ((vector float) vec_ld(0,      bp));    b40 = ((vector float) vec_ld(16 * 4, bp));    b01 = ((vector float) MERGE_S16(h, b00, b40));    b11 = ((vector float) MERGE_S16(l, b00, b40));    bp++;    b10 = ((vector float) vec_ld(0,      bp));    b50 = ((vector float) vec_ld(16 * 4, bp));    b21 = ((vector float) MERGE_S16(h, b10, b50));    b31 = ((vector float) MERGE_S16(l, b10, b50));    bp++;    b20 = ((vector float) vec_ld(0,      bp));    b60 = ((vector float) vec_ld(16 * 4, bp));    b41 = ((vector float) MERGE_S16(h, b20, b60));    b51 = ((vector float) MERGE_S16(l, b20, b60));    bp++;    b30 = ((vector float) vec_ld(0,      bp));    b70 = ((vector float) vec_ld(16 * 4, bp));    b61 = ((vector float) MERGE_S16(h, b30, b70));    b71 = ((vector float) MERGE_S16(l, b30, b70));    x0 = ((vector float) MERGE_S16(h, b01, b41));    x1 = ((vector float) MERGE_S16(l, b01, b41));    x2 = ((vector float) MERGE_S16(h, b11, b51));    x3 = ((vector float) MERGE_S16(l, b11, b51));    x4 = ((vector float) MERGE_S16(h, b21, b61));    x5 = ((vector float) MERGE_S16(l, b21, b61));    x6 = ((vector float) MERGE_S16(h, b31, b71));    x7 = ((vector float) MERGE_S16(l, b31, b71));    b00 = ((vector float) MERGE_S16(h, x0, x4));    b10 = ((vector float) MERGE_S16(l, x0, x4));    b20 = ((vector float) MERGE_S16(h, x1, x5));    b30 = ((vector float) MERGE_S16(l, x1, x5));    b40 = ((vector float) MERGE_S16(h, x2, x6));    b50 = ((vector float) MERGE_S16(l, x2, x6));    b60 = ((vector float) MERGE_S16(h, x3, x7));    b70 = ((vector float) MERGE_S16(l, x3, x7));#undef MERGE_S16    /* }}} */    /* Some of the initial calculations can be done as vector short     * before conversion to vector float.  The following code section     * takes advantage of this. */    /* fdct rows {{{ */    x0 = ((vector float) vec_add(vs16(b00), vs16(b70)));    x7 = ((vector float) vec_sub(vs16(b00), vs16(b70)));    x1 = ((vector float) vec_add(vs16(b10), vs16(b60)));    x6 = ((vector float) vec_sub(vs16(b10), vs16(b60)));    x2 = ((vector float) vec_add(vs16(b20), vs16(b50)));    x5 = ((vector float) vec_sub(vs16(b20), vs16(b50)));    x3 = ((vector float) vec_add(vs16(b30), vs16(b40)));    x4 = ((vector float) vec_sub(vs16(b30), vs16(b40)));    b70 = ((vector float) vec_add(vs16(x0), vs16(x3)));    b10 = ((vector float) vec_add(vs16(x1), vs16(x2)));    b00 = ((vector float) vec_add(vs16(b70), vs16(b10)));    b40 = ((vector float) vec_sub(vs16(b70), vs16(b10)));#define CTF0(n)                                                    /    b ## n ## 1 = ((vector float) vec_unpackl(vs16(b ## n ## 0))); /    b ## n ## 0 = ((vector float) vec_unpackh(vs16(b ## n ## 0))); /    b ## n ## 1 = vec_ctf(vs32(b ## n ## 1), 0);                   /    b ## n ## 0 = vec_ctf(vs32(b ## n ## 0), 0)    CTF0(0);    CTF0(4);    b20 = ((vector float) vec_sub(vs16(x0), vs16(x3)));    b60 = ((vector float) vec_sub(vs16(x1), vs16(x2)));    CTF0(2);    CTF0(6);//.........这里部分代码省略.........
开发者ID:63n,项目名称:FFmpeg,代码行数:101,


示例6: parabolic_texcoords

static voidparabolic_texcoords (float *texc, float vertex[3], float normal[3], int front){  //float light[3] = { 0.2357, 0.2357, 0.9428 };  float vertex4[4], x_vertex[4], x_normal[4];  float eye_to_vertex[3], reflection[4], tmp[3];  float temp[4], along;  float eye_norm_dot;  float incidence, normalized_normal[4];  int r, g, b;  GLfloat unrot[4], x, y, z;  float x_f, y_f, z_f;  float x_b, y_b, z_b;  float c_eyepos[4];  vec_normalize (normalized_normal, normal);  /* Find the normal in eye space.  */  normalized_normal[3] = 1.0;  vec_transform_fipr (x_normal, (float *) &rotate[0][0], normalized_normal); /* incidence = vec_dot (&light[0], &x_normal[0]);    if (incidence < 0)    incidence = 0;*/    /*r = 64 + 191 * incidence;  g = 64 + 191 * incidence;  b = 64 + 191 * incidence;    glColor4ub (r, g, b, 0);  glColor4ub (255, 255, 255, 0);*/    /* We need the vertex in eye space.  This duplicates work!  */  memcpy (vertex4, vertex, sizeof (float) * 3);  vertex4[3] = 1.0;  vec_transform_fipr (x_vertex, (float *) &transform[0][0], vertex4);  vec_transform_fipr (&c_eyepos[0], &camera[0][0], &eye_pos[0]);  vec_sub (eye_to_vertex, &c_eyepos[0], &x_vertex[0]);  vec_normalize (eye_to_vertex, eye_to_vertex);    eye_norm_dot = vec_dot (eye_to_vertex, &x_normal[0]);  vec_scale (tmp, x_normal, 2.0 * eye_norm_dot);  vec_sub (reflection, tmp, eye_to_vertex);    //dbgio_printf ("ref length: %f/n", (double) vec_length (reflection));#if 0  if (draw_vectors > 0)    {     /* dbgio_printf ("%f %f %f/n", (double) eye_to_vertex[0],				  (double) eye_to_vertex[1],				  (double) eye_to_vertex[2]); */      dbgio_printf ("e2v . norm = %f  norm . refl = %f/n",        (double) eye_norm_dot, (double) vec_dot (x_normal, reflection));      for (along = 0.0; along < 0.25; along += 0.005)	{	  int colour = 0x001f | ((int) (along * 255) << 5);	  vec_scale (temp, x_normal, along);	  vec_add (&temp[0], x_vertex, &temp[0]);	  box (temp, colour);	}      for (along = 0.0; along < 0.25; along += 0.005)	{	  int colour = 0xf800 | ((int) (along * 255) << 5);	  /*vec_scale (temp, eye_to_vertex, along);	  vec_add (&temp[0], x_vertex, &temp[0]);	  box (temp, colour);*/	  colour &= ~0xf800;	  vec_scale (temp, reflection, along);	  vec_add (&temp[0], x_vertex, &temp[0]);	  box (temp, colour);	}      draw_vectors--;    }#endif  /*x = reflection[0];  y = reflection[1];  z = reflection[2];  w = 1.0;  mat_load ((matrix_t *) &unrotate[0][0]);  mat_trans_nodiv (x, y, z, w);  glKosMatrixDirty ();*/  reflection[3] = 1.0;  vec_transform_fipr (unrot, (float *) &invcamera[0][0], reflection);  x = unrot[0];  y = unrot[1];  z = unrot[2];  //glColor4ub (128 + 127 * x, 128 + 127 * y, 128 + 127 * z, 0);//.........这里部分代码省略.........
开发者ID:crtc-demos,项目名称:wobble,代码行数:101,


示例7: vec_max

 template<> SIMD_INLINE v128_s16 InterferenceChange<false>(v128_s16 statistic, v128_s16 value, v128_s16 saturation) {     return vec_max(vec_sub(statistic, value), saturation); }
开发者ID:pozdneev,项目名称:Simd,代码行数:4,


示例8: nb_kernel010nf_ppc_altivec

void nb_kernel010nf_ppc_altivec(int *             p_nri,                       int               iinr[],                       int               jindex[],                       int               jjnr[],                       int               shift[],                       float             shiftvec[],                       float             fshift[],                       int               gid[],                       float             pos[],                       float             faction[],                       float             charge[],                       float *           p_facel,                       float *           p_krf,                       float *           p_crf,                       float             Vc[],                       int               type[],                       int *             p_ntype,                       float             vdwparam[],                       float             Vvdw[],                       float *           p_tabscale,                       float             VFtab[],                       float             invsqrta[],                       float             dvda[],                       float *           p_gbtabscale,                       float             GBtab[],                       int *             p_nthreads,                       int *             count,                       void *            mtx,                       int *             outeriter,                       int *             inneriter,					   float *           work){	vector float ix,iy,iz,shvec;	vector float nul;	vector float dx,dy,dz;	vector float Vvdwtot,c6,c12;	vector float rinvsq,rsq,rinvsix;  	int n,k,ii,is3,ii3,nj0,nj1;	int jnra,jnrb,jnrc,jnrd;	int j3a,j3b,j3c,j3d;	int nri, ntype, nouter, ninner;	int ntiA,tja,tjb,tjc,tjd;#ifdef GMX_THREAD_SHM_FDECOMP	int nn0, nn1;#endif      nouter   = 0;    ninner   = 0;    nri      = *p_nri;    ntype    = *p_ntype;	nul=vec_zero();#ifdef GMX_THREAD_SHM_FDECOMP    nthreads = *p_nthreads;	do {		tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);		nn0              = *count;		nn1              = nn0+(nri-nn0)/(2*nthreads)+3;		*count           = nn1;		tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);		if(nn1>nri) nn1=nri;		for(n=nn0; (n<nn1); n++) {#if 0		} /* maintain correct indentation even with conditional left braces */#endif#else /* without tMPI_Threads */		for(n=0;n<nri;n++) {#endif			is3        = 3*shift[n];			shvec      = load_xyz(shiftvec+is3);			ii         = iinr[n];			ii3        = 3*ii;			ix         = load_xyz(pos+ii3);			Vvdwtot     = nul;			ix         = vec_add(ix,shvec);    			nj0        = jindex[n];			nj1        = jindex[n+1];			splat_xyz_to_vectors(ix,&ix,&iy,&iz);			ntiA       = 2*ntype*type[ii];			for(k=nj0; k<(nj1-3); k+=4) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				jnrd            = jjnr[k+3];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				j3d             = 3*jnrd;				transpose_4_to_3(load_xyz(pos+j3a),								 load_xyz(pos+j3b),								 load_xyz(pos+j3c),								 load_xyz(pos+j3d),&dx,&dy,&dz);				dx              = vec_sub(ix,dx);				dy              = vec_sub(iy,dy);				dz              = vec_sub(iz,dz);				rsq             = vec_madd(dx,dx,nul);				rsq             = vec_madd(dy,dy,rsq);				rsq             = vec_madd(dz,dz,rsq);//.........这里部分代码省略.........
开发者ID:TTarenzi,项目名称:MMCG-HAdResS,代码行数:101,


示例9: nb_kernel133nf_ppc_altivec

//.........这里部分代码省略.........		gmx_thread_mutex_unlock((gmx_thread_mutex_t *)mtx);		if(nn1>nri) nn1=nri;		for(n=nn0; (n<nn1); n++) {#if 0		} /* maintain correct indentation even with conditional left braces */#endif#else /* without gmx_threads */		for(n=0;n<nri;n++) {#endif  			is3        = 3*shift[n];			ii         = iinr[n];			ii3        = 3*ii;			load_1_4atoms_shift_and_splat(pos+ii3,shiftvec+is3,&iOx,&iOy,&iOz,										  &iH1x,&iH1y,&iH1z,&iH2x,&iH2y,&iH2z,										  &iMx,&iMy,&iMz);			vctot      = nul;			Vvdwtot     = nul;			nj0        = jindex[n];			nj1        = jindex[n+1];    			for(k=nj0; k<(nj1-3); k+=4) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				jnrd            = jjnr[k+3];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				j3d             = 3*jnrd;				transpose_4_to_3(load_xyz(pos+j3a),								 load_xyz(pos+j3b),								 load_xyz(pos+j3c),								 load_xyz(pos+j3d),&dMx,&dMy,&dMz);				dOx             = vec_sub(iOx,dMx);				dOy             = vec_sub(iOy,dMy);				dOz             = vec_sub(iOz,dMz);				dH1x            = vec_sub(iH1x,dMx);				dH1y            = vec_sub(iH1y,dMy);				dH1z            = vec_sub(iH1z,dMz);				dH2x            = vec_sub(iH2x,dMx);				dH2y            = vec_sub(iH2y,dMy);				dH2z            = vec_sub(iH2z,dMz);				dMx             = vec_sub(iMx,dMx);				dMy             = vec_sub(iMy,dMy);				dMz             = vec_sub(iMz,dMz);				rsqO            = vec_madd(dOx,dOx,nul);				rsqH1           = vec_madd(dH1x,dH1x,nul);				rsqH2           = vec_madd(dH2x,dH2x,nul);				rsqM            = vec_madd(dMx,dMx,nul);				rsqO            = vec_madd(dOy,dOy,rsqO);				rsqH1           = vec_madd(dH1y,dH1y,rsqH1);				rsqH2           = vec_madd(dH2y,dH2y,rsqH2);				rsqM            = vec_madd(dMy,dMy,rsqM);				rsqO            = vec_madd(dOz,dOz,rsqO);				rsqH1           = vec_madd(dH1z,dH1z,rsqH1);				rsqH2           = vec_madd(dH2z,dH2z,rsqH2);				rsqM            = vec_madd(dMz,dMz,rsqM);				rinvO           = do_invsqrt(rsqO);				do_3_invsqrt(rsqM,rsqH1,rsqH2,&rinvM,&rinvH1,&rinvH2);				r               = vec_madd(rsqO,rinvO,nul);								tja             = ntiA+2*type[jnra];				tjb             = ntiA+2*type[jnrb];				tjc             = ntiA+2*type[jnrc];
开发者ID:alejandrox1,项目名称:gromacs_flatbottom,代码行数:67,


示例10: qp

 void qp(double *x_out, int *iter, double *gt, double *bt) { /* define data */ double q[300]= {0};double q1[180]= {0};double q2[120]= {0};double l[180] = {0};double u[180] = {0};double tmp_var_p[180] = {0};double tmp_var_p2[180] = {0};double arg_prox_h[180] = {0};double lambda[180] = {0};double y[180] = {0};double x[180] = {0};double lambda_old[180] = {0};double v[180] = {0};double v_old[180] = {0};double tmp_var_n[180] = {0};double tmp_var_n2[180] = {0};double tmp_var_nm[300] = {0};double tmp_var_nm2[300] = {0};double rhs[300] = {0}; int jj = 0;double cond = -1; double theta = 1; double theta_old = 1; mat_vec_mult_sparse(&G,gt,q1); mat_vec_mult_sparse(&B,bt,q2); copy_vec_part((double *) &Lb,l,180); copy_vec_part((double *) &Ub,u,180); while ((jj < 2000) && (cond < 0)) { jj++;copy_vec_part_negate(v,tmp_var_p,180);mat_vec_mult_diag(&CT,tmp_var_p,tmp_var_n); vec_sub(tmp_var_n,q1,tmp_var_n,180); stack_vec(tmp_var_n,q2,rhs,180,120);perm_fwdsolve(&L,p,rhs,tmp_var_nm); mat_vec_mult_sparse(&Dinv,tmp_var_nm,tmp_var_nm2); backsolve_perm(&LT,p,tmp_var_nm2,tmp_var_nm); copy_vec_part(tmp_var_nm,x,180); mat_vec_mult_diag(&C,x,tmp_var_p);//.........这里部分代码省略.........
开发者ID:elt11jke,项目名称:RealPred-Projekt,代码行数:101,


示例11: minmax_3fv_aligned

GridSearchPairlist *vmd_gridsearch_bonds(const float *pos, const float *radii,                                   int natoms, float pairdist, int maxpairs) {  float min[3], max[3];  int i, xb, yb, zb, xytotb, totb;  int **boxatom, *numinbox, *maxinbox, **nbrlist;  int numon = 0;  float sidelen[3], volume;  int paircount = 0;  // find bounding box for selected atoms, and number of atoms in selection.#if 1  minmax_3fv_aligned(pos, natoms, min, max);#else  find_minmax_all(pos, natoms, min, max);#endif  // check for NaN coordinates propagating to the bounding box result  if (!(max[0] >= min[0] && max[1] >= min[1] && max[2] >= min[2])) {    msgErr << "vmd_gridsearch_bonds: NaN coordinates in bounds, aborting!" << sendmsg;    return NULL;  }  // do sanity checks and complain if we've got bogus atom coordinates,  // we shouldn't ever have density higher than 0.1 atom/A^3, but we'll  // be generous and allow much higher densities.    if (maxpairs != -1) {    vec_sub(sidelen, max, min);    // include estimate for atom radius (1 Angstrom) in volume determination    volume = fabsf((sidelen[0] + 2.0f) * (sidelen[1] + 2.0f) * (sidelen[2] + 2.0f));    if ((numon / volume) > 1.0) {      msgWarn << "vmd_gridsearch_bonds: insane atom density" << sendmsg;    }  }  // I don't want the grid to get too large, otherwise I could run out  // of memory.  Octrees would be cool, but I'll just limit the grid size  // and let the performance degrade a little for pathological systems.  // Note that pairdist^2 is what gets used for the actual distance checks;  // from here on out pairdist is only used to set the grid size, so we   // can set it to anything larger than the original pairdist.  const int MAXBOXES = 4000000;  totb = MAXBOXES + 1;  float newpairdist = pairdist;  float xrange = max[0]-min[0];  float yrange = max[1]-min[1];  float zrange = max[2]-min[2];  do {    pairdist = newpairdist;    const float invpairdist = 1.0f / pairdist;     xb = ((int)(xrange*invpairdist))+1;    yb = ((int)(yrange*invpairdist))+1;    zb = ((int)(zrange*invpairdist))+1;    xytotb = yb * xb;    totb = xytotb * zb;    newpairdist = pairdist * 1.26f; // cbrt(2) is about 1.26  } while (totb > MAXBOXES || totb < 1); // check for integer wraparound too   // 2. Sort each atom into appropriate bins  boxatom = (int **) calloc(1, totb*sizeof(int *));  numinbox = (int *) calloc(1, totb*sizeof(int));  maxinbox = (int *) calloc(1, totb*sizeof(int));  if (boxatom == NULL || numinbox == NULL || maxinbox == NULL) {    if (boxatom != NULL)      free(boxatom);    if (numinbox != NULL)      free(numinbox);    if (maxinbox != NULL)      free(maxinbox);    msgErr << "Bondsearch memory allocation failed, bailing out" << sendmsg;    return NULL; // ran out of memory, bail out!  }  const float invpairdist = 1.0f / pairdist;   for (i=0; i<natoms; i++) {    int axb, ayb, azb, aindex, num;    // compute box index for new atom    const float *loc = pos + 3L*i;    axb = (int)((loc[0] - min[0])*invpairdist);    ayb = (int)((loc[1] - min[1])*invpairdist);    azb = (int)((loc[2] - min[2])*invpairdist);    // clamp box indices to valid range in case of FP error    if (axb >= xb) axb = xb-1;    if (ayb >= yb) ayb = yb-1;    if (azb >= zb) azb = zb-1;    aindex = azb * xytotb + ayb * xb + axb;    // grow box if necessary     if ((num = numinbox[aindex]) == maxinbox[aindex]) {      boxatom[aindex] = (int *) realloc(boxatom[aindex], (num+4)*sizeof(int));      maxinbox[aindex] += 4;    }    // store atom index in box    boxatom[aindex][num] = i;    numinbox[aindex]++;  }//.........这里部分代码省略.........
开发者ID:Eigenstate,项目名称:vmd-python,代码行数:101,


示例12: efp_get_electric_field

EFP_EXPORT enum efp_resultefp_get_electric_field(struct efp *efp, size_t frag_idx, const double *xyz, double *field){	assert(efp);	assert(frag_idx < efp->n_frag);	assert(xyz);	assert(field);	const struct frag *frag = efp->frags + frag_idx;	vec_t elec_field = vec_zero;	for (size_t i = 0; i < efp->n_frag; i++) {		if (i == frag_idx || efp_skip_frag_pair(efp, i, frag_idx))			continue;		const struct frag *fr_i = efp->frags + i;		struct swf swf = efp_make_swf(efp, fr_i, frag);		/* field due to nuclei */		for (size_t j = 0; j < fr_i->n_atoms; j++) {			const struct efp_atom *at = fr_i->atoms + j;			vec_t dr = {				xyz[0] - at->x - swf.cell.x,				xyz[1] - at->y - swf.cell.y,				xyz[2] - at->z - swf.cell.z			};			double r = vec_len(&dr);			double r3 = r * r * r;			elec_field.x += swf.swf * at->znuc * dr.x / r3;			elec_field.y += swf.swf * at->znuc * dr.y / r3;			elec_field.z += swf.swf * at->znuc * dr.z / r3;		}		/* field due to multipoles */		for (size_t j = 0; j < fr_i->n_multipole_pts; j++) {			const struct multipole_pt *mpt = fr_i->multipole_pts + j;			vec_t mult_field = get_multipole_field((const vec_t *)xyz, mpt, &swf);			elec_field.x += mult_field.x;			elec_field.y += mult_field.y;			elec_field.z += mult_field.z;		}		/* field due to induced dipoles */		for (size_t j = 0; j < fr_i->n_polarizable_pts; j++) {			struct polarizable_pt *pt_i = fr_i->polarizable_pts + j;			size_t idx = fr_i->polarizable_offset + j;			vec_t dr = {				xyz[0] - pt_i->x - swf.cell.x,				xyz[1] - pt_i->y - swf.cell.y,				xyz[2] - pt_i->z - swf.cell.z			};			double r = vec_len(&dr);			double r3 = r * r * r;			double r5 = r3 * r * r;			double t1 = vec_dot(&efp->indip[idx], &dr);			elec_field.x -= swf.swf * (efp->indip[idx].x / r3 -						3.0 * t1 * dr.x / r5);			elec_field.y -= swf.swf * (efp->indip[idx].y / r3 -						3.0 * t1 * dr.y / r5);			elec_field.z -= swf.swf * (efp->indip[idx].z / r3 -						3.0 * t1 * dr.z / r5);		}	}	if (efp->opts.terms & EFP_TERM_AI_POL) {		/* field due to nuclei from ab initio subsystem */		for (size_t i = 0; i < efp->n_ptc; i++) {			vec_t dr = vec_sub((const vec_t *)xyz, efp->ptc_xyz + i);			double r = vec_len(&dr);			double r3 = r * r * r;			elec_field.x += efp->ptc[i] * dr.x / r3;			elec_field.y += efp->ptc[i] * dr.y / r3;			elec_field.z += efp->ptc[i] * dr.z / r3;		}	}	*((vec_t *)field) = elec_field;	return (EFP_RESULT_SUCCESS);}
开发者ID:SahanGH,项目名称:psi4public,代码行数:88,


示例13: compute_grad_point

//.........这里部分代码省略.........			force.z += p2 * e * dr.z;			vec_scale(&force, swf.swf);			vec_scale(&add_i, swf.swf);			vec_scale(&add_j, swf.swf);			efp_add_force(efp->grad + frag_idx, CVEC(fr_i->x),					CVEC(pt_i->x), &force, &add_i);			efp_sub_force(efp->grad + j, CVEC(fr_j->x),					CVEC(pt_j->x), &force, &add_j);			efp_add_stress(&swf.dr, &force, &efp->stress);			energy += p1 * e;		}		/* induced dipole - induced dipoles */		for (size_t jj = 0; jj < fr_j->n_polarizable_pts; jj++) {			struct polarizable_pt *pt_j = fr_j->polarizable_pts + jj;			size_t idx_j = fr_j->polarizable_offset + jj;			vec_t dr = {				pt_j->x - pt_i->x - swf.cell.x,				pt_j->y - pt_i->y - swf.cell.y,				pt_j->z - pt_i->z - swf.cell.z			};			vec_t half_dipole_i = {				0.5 * efp->indip[idx_i].x,				0.5 * efp->indip[idx_i].y,				0.5 * efp->indip[idx_i].z			};			double p1 = 1.0, p2 = 0.0;			if (efp->opts.pol_damp == EFP_POL_DAMP_TT) {				double r = vec_len(&dr);				p1 = efp_get_pol_damp_tt(r, fr_i->pol_damp,						fr_j->pol_damp);				p2 = efp_get_pol_damp_tt_grad(r, fr_i->pol_damp,						fr_j->pol_damp);			}			vec_t force, add_i, add_j;			double e = efp_dipole_dipole_energy(&half_dipole_i,						&efp->indipconj[idx_j], &dr);			efp_dipole_dipole_grad(&half_dipole_i, &efp->indipconj[idx_j],						&dr, &force, &add_i, &add_j);			vec_negate(&add_j);			vec_scale(&force, p1);			vec_scale(&add_i, p1);			vec_scale(&add_j, p1);			force.x += p2 * e * dr.x;			force.y += p2 * e * dr.y;			force.z += p2 * e * dr.z;			vec_scale(&force, swf.swf);			vec_scale(&add_i, swf.swf);			vec_scale(&add_j, swf.swf);			efp_add_force(efp->grad + frag_idx, CVEC(fr_i->x),					CVEC(pt_i->x), &force, &add_i);			efp_sub_force(efp->grad + j, CVEC(fr_j->x),					CVEC(pt_j->x), &force, &add_j);			efp_add_stress(&swf.dr, &force, &efp->stress);			energy += p1 * e;		}		vec_t force = {			swf.dswf.x * energy,			swf.dswf.y * energy,			swf.dswf.z * energy		};		six_atomic_add_xyz(efp->grad + frag_idx, &force);		six_atomic_sub_xyz(efp->grad + j, &force);		efp_add_stress(&swf.dr, &force, &efp->stress);	}	/* induced dipole - ab initio nuclei */	if (efp->opts.terms & EFP_TERM_AI_POL) {		for (size_t j = 0; j < efp->n_ptc; j++) {			vec_t dr = vec_sub(efp->ptc_xyz + j, CVEC(pt_i->x));			vec_t force, add_i, add_j;			efp_charge_dipole_grad(efp->ptc[j], &dipole_i, &dr,					       &force, &add_j, &add_i);			vec_negate(&add_i);			vec_atomic_add(efp->ptc_grad + j, &force);			efp_sub_force(efp->grad + frag_idx, CVEC(fr_i->x),					CVEC(pt_i->x), &force, &add_i);		}	}}
开发者ID:SahanGH,项目名称:psi4public,代码行数:101,


示例14: get_elec_field

static vec_tget_elec_field(const struct efp *efp, size_t frag_idx, size_t pt_idx){	const struct frag *fr_j = efp->frags + frag_idx;	const struct polarizable_pt *pt = fr_j->polarizable_pts + pt_idx;	vec_t elec_field = vec_zero;	for (size_t i = 0; i < efp->n_frag; i++) {		if (i == frag_idx || efp_skip_frag_pair(efp, i, frag_idx))			continue;		const struct frag *fr_i = efp->frags + i;		struct swf swf = efp_make_swf(efp, fr_i, fr_j);		/* field due to nuclei */		for (size_t j = 0; j < fr_i->n_atoms; j++) {			const struct efp_atom *at = fr_i->atoms + j;			vec_t dr = {				pt->x - at->x - swf.cell.x,				pt->y - at->y - swf.cell.y,				pt->z - at->z - swf.cell.z			};			double r = vec_len(&dr);			double r3 = r * r * r;			double p1 = 1.0;			if (efp->opts.pol_damp == EFP_POL_DAMP_TT)				p1 = efp_get_pol_damp_tt(r, fr_i->pol_damp, fr_j->pol_damp);			elec_field.x += swf.swf * at->znuc * dr.x / r3 * p1;			elec_field.y += swf.swf * at->znuc * dr.y / r3 * p1;			elec_field.z += swf.swf * at->znuc * dr.z / r3 * p1;		}		/* field due to multipoles */		for (size_t j = 0; j < fr_i->n_multipole_pts; j++) {			const struct multipole_pt *mult_pt = fr_i->multipole_pts + j;			vec_t mult_field = get_multipole_field(CVEC(pt->x), mult_pt, &swf);			vec_t dr = {				pt->x - mult_pt->x - swf.cell.x,				pt->y - mult_pt->y - swf.cell.y,				pt->z - mult_pt->z - swf.cell.z			};			double r = vec_len(&dr);			double p1 = 1.0;			if (efp->opts.pol_damp == EFP_POL_DAMP_TT)				p1 = efp_get_pol_damp_tt(r, fr_i->pol_damp, fr_j->pol_damp);			elec_field.x += mult_field.x * p1;			elec_field.y += mult_field.y * p1;			elec_field.z += mult_field.z * p1;		}	}	if (efp->opts.terms & EFP_TERM_AI_POL) {		/* field due to nuclei from ab initio subsystem */		for (size_t i = 0; i < efp->n_ptc; i++) {			vec_t dr = vec_sub(CVEC(pt->x), efp->ptc_xyz + i);			double r = vec_len(&dr);			double r3 = r * r * r;			elec_field.x += efp->ptc[i] * dr.x / r3;			elec_field.y += efp->ptc[i] * dr.y / r3;			elec_field.z += efp->ptc[i] * dr.z / r3;		}	}	return (elec_field);}
开发者ID:SahanGH,项目名称:psi4public,代码行数:75,


示例15: iquant_intra_m1_altivec

void iquant_intra_m1_altivec(IQUANT_INTRA_PDECL){    int i;    vector signed short vsrc;    uint16_t *qmat;    vector unsigned short vqmat;    vector unsigned short vmquant;    vector bool short eqzero, ltzero;    vector signed short val, t0;    vector signed short zero, one;    vector unsigned int four;    vector signed short min, max;    int offset, offset2;    int16_t dst0;    union {	vector unsigned short vu16;	unsigned short mquant;	vector signed int vs32;	struct {	    signed int pad[3];	    signed int sum;	} s;    } vu;#ifdef ALTIVEC_DST    DataStreamControl dsc;#endif#ifdef ALTIVEC_VERIFY /* {{{ */    if (NOT_VECTOR_ALIGNED(wsp->intra_q_mat))	mjpeg_error_exit1("iquant_intra_m1: wsp->intra_q_mat %% 16 != 0, (%d)",	    wsp->intra_q_mat);    if (NOT_VECTOR_ALIGNED(src))	mjpeg_error_exit1("iquant_intra_m1: src %% 16 != 0, (%d)", src);    if (NOT_VECTOR_ALIGNED(dst))	mjpeg_error_exit1("iquant_intra_m1: dst %% 16 != 0, (%d)", dst);    for (i = 0; i < 64; i++)	if (src[i] < -256 || src[i] > 255)	    mjpeg_error_exit1("iquant_intra_m2: -256 > src[%i] > 255, (%d)",		i, src[i]);#endif /* }}} */    AMBER_START;    dst0 = src[0] << (3 - dc_prec);    qmat = (uint16_t*)wsp->intra_q_mat;#ifdef ALTIVEC_DST    dsc.control = DATA_STREAM_CONTROL(64/8,1,0);    vec_dst(src, dsc.control, 0);    vec_dst(qmat, dsc.control, 1);#endif    /* vmquant = (vector unsigned short)(mquant); */    vu.mquant = (unsigned short)mquant;    vmquant = vec_splat(vu.vu16, 0);    zero = vec_splat_s16(0);    one = vec_splat_s16(1);    four = vec_splat_u32(4);    /* max = (2047); min = (-2048); {{{ */    vu8(max) = vec_splat_u8(0x7);    t0 = vec_splat_s16(-1); /* 0xffff */    vu8(max) = vec_mergeh(vu8(max), vu8(t0)); /* 0x07ff == 2047 */    min = vec_sub(t0, max);    /* }}} */    offset = 0;#if 1    vsrc = vec_ld(offset, (signed short*)src);    vqmat = vec_ld(offset, (unsigned short*)qmat);    i = (64/8) - 1;    do {	/* intra_q[i] * mquant */	vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));	/* save sign */	ltzero = vec_cmplt(vsrc, zero);	eqzero = vec_cmpeq(vsrc, zero);	/* val = abs(src) */	t0 = vec_sub(zero, vsrc);	val = vec_max(t0, vsrc);	/* val = (src * quant) >> 4 */	vs32(t0) = vec_mule(val, vs16(vqmat));	vs32(val) = vec_mulo(val, vs16(vqmat));	vs32(t0) = vec_sra(vs32(t0), four);	vs16(t0) = vec_pack(vs32(t0), vs32(t0));	vs32(val) = vec_sra(vs32(val), four);	vs16(val) = vec_pack(vs32(val), vs32(val));	val = vec_mergeh(vs16(t0), vs16(val));	offset2 = offset;	offset += 8*sizeof(int16_t);	vsrc = vec_ld(offset, (signed short*)src);	vqmat = vec_ld(offset, (unsigned short*)qmat);//.........这里部分代码省略.........
开发者ID:jlehtine,项目名称:yuvmotionfps,代码行数:101,


示例16: jsimd_fdct_islow_altivec

voidjsimd_fdct_islow_altivec (DCTELEM *data){  __vector short row0, row1, row2, row3, row4, row5, row6, row7,    col0, col1, col2, col3, col4, col5, col6, col7,    tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13,    tmp47l, tmp47h, tmp56l, tmp56h, tmp1312l, tmp1312h,    z3, z4, z34l, z34h,    out0, out1, out2, out3, out4, out5, out6, out7;  __vector int z3l, z3h, z4l, z4h,    out1l, out1h, out2l, out2h, out3l, out3h, out5l, out5h, out6l, out6h,    out7l, out7h;  /* Constants */  __vector short    pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) },    pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) },    pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) },    pw_f117_f078 = { __4X2(F_1_175, F_1_175 - F_0_390) },    pw_mf060_mf089 = { __4X2(F_0_298 - F_0_899, -F_0_899) },    pw_mf089_f060 = { __4X2(-F_0_899, F_1_501 - F_0_899) },    pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) },    pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) },    pw_descale_p2x = { __8X(1 << (PASS1_BITS - 1)) };  __vector unsigned short pass1_bits = { __8X(PASS1_BITS) };  __vector int pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) },    pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) };  __vector unsigned int descale_p1 = { __4X(DESCALE_P1) },    descale_p2 = { __4X(DESCALE_P2) };  /* Pass 1: process rows */  row0 = vec_ld(0, data);  row1 = vec_ld(16, data);  row2 = vec_ld(32, data);  row3 = vec_ld(48, data);  row4 = vec_ld(64, data);  row5 = vec_ld(80, data);  row6 = vec_ld(96, data);  row7 = vec_ld(112, data);  TRANSPOSE(row, col);  tmp0 = vec_add(col0, col7);  tmp7 = vec_sub(col0, col7);  tmp1 = vec_add(col1, col6);  tmp6 = vec_sub(col1, col6);  tmp2 = vec_add(col2, col5);  tmp5 = vec_sub(col2, col5);  tmp3 = vec_add(col3, col4);  tmp4 = vec_sub(col3, col4);  DO_FDCT_PASS1();  /* Pass 2: process columns */  TRANSPOSE(out, row);  tmp0 = vec_add(row0, row7);  tmp7 = vec_sub(row0, row7);  tmp1 = vec_add(row1, row6);  tmp6 = vec_sub(row1, row6);  tmp2 = vec_add(row2, row5);  tmp5 = vec_sub(row2, row5);  tmp3 = vec_add(row3, row4);  tmp4 = vec_sub(row3, row4);  DO_FDCT_PASS2();  vec_st(out0, 0, data);  vec_st(out1, 16, data);  vec_st(out2, 32, data);  vec_st(out3, 48, data);  vec_st(out4, 64, data);  vec_st(out5, 80, data);  vec_st(out6, 96, data);  vec_st(out7, 112, data);}
开发者ID:Acidburn0zzz,项目名称:libjpeg-turbo,代码行数:78,


示例17: vec_sub

void DispCmdCylinder::putdata(const float *pos1, const float *pos2, float rad,                       int res, int filled, VMDDisplayList *dobj) {  float lenaxis[3];  vec_sub(lenaxis, pos1, pos2);  // check that it's valid  if (dot_prod(lenaxis,lenaxis) == 0.0 || res <= 0) return;  if (lastres != res ) {    rot[0] = cosf( (float) VMD_TWOPI / (float) res);    rot[1] = sinf( (float) VMD_TWOPI / (float) res);  }  lastres = res;  size_t size = (9 + res*3*3)*sizeof(float);  float *pos = (float *)(dobj->append(DCYLINDER, size));  if (pos == NULL)     return;  memcpy(pos,pos1,3*sizeof(float));  memcpy(pos+3,pos2,3*sizeof(float));  pos[6] = rad;  pos[7] = (float)res;  pos[8] = (float)filled;  float axis[3];  vec_sub(axis, pos1, pos2);  vec_normalize(axis);  int i;  // find an axis not aligned with the cylinder  if (fabs(axis[0]) < fabs(axis[1]) &&      fabs(axis[0]) < fabs(axis[2])) {     i = 0;  } else if (fabs(axis[1]) < fabs(axis[2])) {     i = 1;  } else {     i = 2;  }  float perp[3];  perp[i] = 0;                    // this is not aligned with the cylinder  perp[(i+1)%3] = axis[(i+2)%3];  perp[(i+2)%3] = -axis[(i+1)%3];  vec_normalize(perp);  float perp2[3];  cross_prod(perp2, axis, perp); // find a normal to the cylinder  float *posptr = pos+9;  float m = rot[0], n = rot[1];  for (int h=0; h<res; h++) {    float tmp0, tmp1, tmp2;        tmp0 = m*perp[0] + n*perp2[0]; // add the normal    tmp1 = m*perp[1] + n*perp2[1];    tmp2 = m*perp[2] + n*perp2[2];    posptr[0] = tmp0; // add the normal    posptr[1] = tmp1;    posptr[2] = tmp2;    posptr[3] = pos2[0] + rad * tmp0; // start    posptr[4] = pos2[1] + rad * tmp1;    posptr[5] = pos2[2] + rad * tmp2;    posptr[6] = posptr[3] + lenaxis[0];  // and end of the edge    posptr[7] = posptr[4] + lenaxis[1];    posptr[8] = posptr[5] + lenaxis[2];    posptr += 9;    // use angle addition formulae:    // cos(A+B) = cos A cos B - sin A sin B    // sin(A+B) = cos A sin B + sin A cos B    float mtmp = rot[0]*m - rot[1]*n;    float ntmp = rot[0]*n + rot[1]*m;     m = mtmp;    n = ntmp;  }}
开发者ID:molsimmsu,项目名称:3mview,代码行数:74,


示例18: PREFIX_h264_qpel16_h_lowpass_altivec

//.........这里部分代码省略.........      srcP1 = vec_perm(srcR1, srcR2, permP1);      srcP2 = vec_perm(srcR1, srcR2, permP2);      srcP3 = srcR2;    } break;    case 12: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = vec_perm(srcR1, srcR2, permP1);      srcP2 = srcR2;      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 13: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = srcR2;      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 14: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = srcR2;      srcP1 = vec_perm(srcR2, srcR3, permP1);      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 15: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = srcR2;      srcP0 = vec_perm(srcR2, srcR3, permP0);      srcP1 = vec_perm(srcR2, srcR3, permP1);      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    }    const vector signed short srcP0A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0);    const vector signed short srcP0B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0);    const vector signed short srcP1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1);    const vector signed short srcP1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1);    const vector signed short srcP2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2);    const vector signed short srcP2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2);    const vector signed short srcP3A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3);    const vector signed short srcP3B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3);    const vector signed short srcM1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1);    const vector signed short srcM1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1);    const vector signed short srcM2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2);    const vector signed short srcM2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2);    const vector signed short sum1A = vec_adds(srcP0A, srcP1A);    const vector signed short sum1B = vec_adds(srcP0B, srcP1B);    const vector signed short sum2A = vec_adds(srcM1A, srcP2A);    const vector signed short sum2B = vec_adds(srcM1B, srcP2B);    const vector signed short sum3A = vec_adds(srcM2A, srcP3A);    const vector signed short sum3B = vec_adds(srcM2B, srcP3B);        const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss);    const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss);    const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);    const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);        const vector signed short pp3A = vec_add(sum3A, pp1A);    const vector signed short pp3B = vec_add(sum3B, pp1B);    const vector signed short psumA = vec_sub(pp3A, pp2A);    const vector signed short psumB = vec_sub(pp3B, pp2B);    const vector signed short sumA = vec_sra(psumA, v5us);    const vector signed short sumB = vec_sra(psumB, v5us);    const vector unsigned char sum = vec_packsu(sumA, sumB);    const vector unsigned char dst1 = vec_ld(0, dst);    const vector unsigned char dst2 = vec_ld(16, dst);    const vector unsigned char vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));    vector unsigned char fsum;    OP_U8_ALTIVEC(fsum, sum, vdst);    const vector unsigned char rsum = vec_perm(fsum, fsum, dstperm);    const vector unsigned char fdst1 = vec_sel(dst1, rsum, dstmask);    const vector unsigned char fdst2 = vec_sel(rsum, dst2, dstmask);    vec_st(fdst1, 0, dst);    vec_st(fdst2, 16, dst);    src += srcStride;    dst += dstStride;  }POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);}
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,


示例19: test1

//.........这里部分代码省略.........// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}// CHECK: or <2 x i64>// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>// CHECK-LE: and <2 x i64> %{{[0-9]+}},// CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}// CHECK-LE: or <2 x i64>// CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>  dummy();// CHECK: call void @dummy()// CHECK-LE: call void @dummy()  res_vd = vec_sel(vd, vd, vull);// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>// CHECK: and <2 x i64> %{{[0-9]+}},// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}// CHECK: or <2 x i64>// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>// CHECK-LE: and <2 x i64> %{{[0-9]+}},// CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}// CHECK-LE: or <2 x i64>// CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>  res_vf = vec_sqrt(vf);// CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}})// CHECK-LE: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}})  res_vd = vec_sqrt(vd);// CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}})// CHECK-LE: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}})  res_vd = vec_sub(vd, vd);// CHECK: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}}// CHECK-LE: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}}  res_vf = vec_trunc(vf);// CHECK: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}})// CHECK-LE: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}})  res_vd = vec_trunc(vd);// CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}})// CHECK-LE: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}})  /* vec_vor */  res_vsll = vec_vor(vsll, vsll);// CHECK: or <2 x i64>// CHECK-LE: or <2 x i64>  res_vsll = vec_vor(vbll, vsll);// CHECK: or <2 x i64>// CHECK-LE: or <2 x i64>  res_vsll = vec_vor(vsll, vbll);// CHECK: or <2 x i64>// CHECK-LE: or <2 x i64>  res_vull = vec_vor(vull, vull);// CHECK: or <2 x i64>// CHECK-LE: or <2 x i64>  res_vull = vec_vor(vbll, vull);// CHECK: or <2 x i64>// CHECK-LE: or <2 x i64>
开发者ID:AlexDenisov,项目名称:clang,代码行数:66,


示例20: PREFIX_h264_qpel16_v_lowpass_altivec

/* this code assume stride % 16 == 0 */static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);    register int i;  const vector signed int vzero = vec_splat_s32(0);  const vector unsigned char perm = vec_lvsl(0, src);  const vector signed short v20ss = (const vector signed short)AVV(20);  const vector unsigned short v5us = vec_splat_u16(5);  const vector signed short v5ss = vec_splat_s16(5);  const vector signed short v16ss = (const vector signed short)AVV(16);  const vector unsigned char dstperm = vec_lvsr(0, dst);  const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);    uint8_t *srcbis = src - (srcStride * 2);  const vector unsigned char srcM2a = vec_ld(0, srcbis);  const vector unsigned char srcM2b = vec_ld(16, srcbis);  const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm);  srcbis += srcStride;  const vector unsigned char srcM1a = vec_ld(0, srcbis);  const vector unsigned char srcM1b = vec_ld(16, srcbis);  const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm);  srcbis += srcStride;  const vector unsigned char srcP0a = vec_ld(0, srcbis);  const vector unsigned char srcP0b = vec_ld(16, srcbis);  const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm);  srcbis += srcStride;  const vector unsigned char srcP1a = vec_ld(0, srcbis);  const vector unsigned char srcP1b = vec_ld(16, srcbis);  const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm);  srcbis += srcStride;  const vector unsigned char srcP2a = vec_ld(0, srcbis);  const vector unsigned char srcP2b = vec_ld(16, srcbis);  const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm);  srcbis += srcStride;  vector signed short srcM2ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2);  vector signed short srcM2ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2);  vector signed short srcM1ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1);  vector signed short srcM1ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1);  vector signed short srcP0ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0);  vector signed short srcP0ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0);  vector signed short srcP1ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1);  vector signed short srcP1ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1);  vector signed short srcP2ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2);  vector signed short srcP2ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2);  for (i = 0 ; i < 16 ; i++) {    const vector unsigned char srcP3a = vec_ld(0, srcbis);    const vector unsigned char srcP3b = vec_ld(16, srcbis);    const vector unsigned char srcP3 = vec_perm(srcP3a, srcP3b, perm);    const vector signed short srcP3ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3);    const vector signed short srcP3ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3);    srcbis += srcStride;    const vector signed short sum1A = vec_adds(srcP0ssA, srcP1ssA);    const vector signed short sum1B = vec_adds(srcP0ssB, srcP1ssB);    const vector signed short sum2A = vec_adds(srcM1ssA, srcP2ssA);    const vector signed short sum2B = vec_adds(srcM1ssB, srcP2ssB);    const vector signed short sum3A = vec_adds(srcM2ssA, srcP3ssA);    const vector signed short sum3B = vec_adds(srcM2ssB, srcP3ssB);    srcM2ssA = srcM1ssA;    srcM2ssB = srcM1ssB;    srcM1ssA = srcP0ssA;    srcM1ssB = srcP0ssB;    srcP0ssA = srcP1ssA;    srcP0ssB = srcP1ssB;    srcP1ssA = srcP2ssA;    srcP1ssB = srcP2ssB;    srcP2ssA = srcP3ssA;    srcP2ssB = srcP3ssB;        const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss);    const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss);    const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);    const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);        const vector signed short pp3A = vec_add(sum3A, pp1A);    const vector signed short pp3B = vec_add(sum3B, pp1B);    const vector signed short psumA = vec_sub(pp3A, pp2A);    const vector signed short psumB = vec_sub(pp3B, pp2B);    const vector signed short sumA = vec_sra(psumA, v5us);    const vector signed short sumB = vec_sra(psumB, v5us);    const vector unsigned char sum = vec_packsu(sumA, sumB);    const vector unsigned char dst1 = vec_ld(0, dst);    const vector unsigned char dst2 = vec_ld(16, dst);    const vector unsigned char vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));    vector unsigned char fsum;    OP_U8_ALTIVEC(fsum, sum, vdst);//.........这里部分代码省略.........
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,


示例21: nb_kernel310_ppc_altivec

//.........这里部分代码省略.........#else /* without gmx_threads */		for(n=0;n<nri;n++) {#endif  			is3        = 3*shift[n];			shvec      = load_xyz(shiftvec+is3);			ii         = iinr[n];			ii3        = 3*ii;			ix         = load_xyz(pos+ii3);			Vvdwtot     = nul;			vctot      = nul;			fix        = nul;			fiy        = nul;			fiz        = nul;			ix         = vec_add(ix,shvec);			nj0        = jindex[n];			nj1        = jindex[n+1];			splat_xyz_to_vectors(ix,&ix,&iy,&iz);			ntiA       = 2*ntype*type[ii];			iq        = vec_madd(load_float_and_splat(charge+ii),vfacel,nul);			for(k=nj0; k<(nj1-3); k+=4) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				jnrd            = jjnr[k+3];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				j3d             = 3*jnrd;				transpose_4_to_3(load_xyz(pos+j3a),								 load_xyz(pos+j3b),								 load_xyz(pos+j3c),								 load_xyz(pos+j3d),&dx,&dy,&dz);				dx              = vec_sub(ix,dx);				dy              = vec_sub(iy,dy);				dz              = vec_sub(iz,dz);				rsq             = vec_madd(dx,dx,nul);				rsq             = vec_madd(dy,dy,rsq);				rsq             = vec_madd(dz,dz,rsq);				rinv            = do_invsqrt(rsq);				rinvsq          = vec_madd(rinv,rinv,nul);				r               = vec_madd(rinv,rsq,nul);				rinvsix         = vec_madd(rinvsq,rinvsq,nul);				rinvsix         = vec_madd(rinvsix,rinvsq,nul);				tja             = ntiA+2*type[jnra];				tjb             = ntiA+2*type[jnrb];				tjc             = ntiA+2*type[jnrc];				tjd             = ntiA+2*type[jnrd];				qq = vec_madd(load_4_float(charge+jnra,charge+jnrb,										   charge+jnrc,charge+jnrd),iq,nul);				load_4_pair(vdwparam+tja,vdwparam+tjb,vdwparam+tjc,vdwparam+tjd,&c6,&c12);				do_4_ctable_coul(VFtab,vec_madd(r,tsc,nul),&VVc,&FFc);				fs2             = vec_madd(qq,FFc,nul);   /* fijC */				vctot           = vec_madd(qq,VVc,vctot);				Vvdw6            = vec_madd(c6,rinvsix,nul);				Vvdw12           = vec_madd(c12,vec_madd(rinvsix,rinvsix,nul),										   nul);				fs              = vec_madd(vec_twelve(),Vvdw12,nul);				fs              = vec_nmsub(vec_six(),Vvdw6,fs);				fs              = vec_madd(fs,rinv,nul);				Vvdwtot          = vec_add(Vvdwtot,Vvdw12);				fs              = vec_nmsub(fs2,tsc,fs);				fs              = vec_madd(fs,rinv,nul);				Vvdwtot          = vec_sub(Vvdwtot,Vvdw6);				fix             = vec_madd(fs,dx,fix); /* +=fx */				fiy             = vec_madd(fs,dy,fiy); /* +=fy */
开发者ID:alejandrox1,项目名称:gromacs_flatbottom,代码行数:67,


示例22: PREFIX_h264_qpel16_hv_lowpass_altivec

//.........这里部分代码省略.........      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    }    const vector signed short srcP0A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0);    const vector signed short srcP0B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0);    const vector signed short srcP1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1);    const vector signed short srcP1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1);    const vector signed short srcP2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2);    const vector signed short srcP2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2);    const vector signed short srcP3A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3);    const vector signed short srcP3B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3);    const vector signed short srcM1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1);    const vector signed short srcM1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1);    const vector signed short srcM2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2);    const vector signed short srcM2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2);    const vector signed short sum1A = vec_adds(srcP0A, srcP1A);    const vector signed short sum1B = vec_adds(srcP0B, srcP1B);    const vector signed short sum2A = vec_adds(srcM1A, srcP2A);    const vector signed short sum2B = vec_adds(srcM1B, srcP2B);    const vector signed short sum3A = vec_adds(srcM2A, srcP3A);    const vector signed short sum3B = vec_adds(srcM2B, srcP3B);        const vector signed short pp1A = vec_mladd(sum1A, v20ss, sum3A);    const vector signed short pp1B = vec_mladd(sum1B, v20ss, sum3B);    const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);    const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);    const vector signed short psumA = vec_sub(pp1A, pp2A);    const vector signed short psumB = vec_sub(pp1B, pp2B);    vec_st(psumA, 0, tmp);    vec_st(psumB, 16, tmp);        src += srcStride;    tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */  }    const vector unsigned char dstperm = vec_lvsr(0, dst);  const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);  const vector unsigned char mperm = (const vector unsigned char)    AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,        0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);    int16_t *tmpbis = tmp - (tmpStride * 21);  vector signed short tmpM2ssA = vec_ld(0, tmpbis);  vector signed short tmpM2ssB = vec_ld(16, tmpbis);  tmpbis += tmpStride;  vector signed short tmpM1ssA = vec_ld(0, tmpbis);  vector signed short tmpM1ssB = vec_ld(16, tmpbis);  tmpbis += tmpStride;  vector signed short tmpP0ssA = vec_ld(0, tmpbis);  vector signed short tmpP0ssB = vec_ld(16, tmpbis);  tmpbis += tmpStride;  vector signed short tmpP1ssA = vec_ld(0, tmpbis);  vector signed short tmpP1ssB = vec_ld(16, tmpbis);  tmpbis += tmpStride;  vector signed short tmpP2ssA = vec_ld(0, tmpbis);  vector signed short tmpP2ssB = vec_ld(16, tmpbis);
开发者ID:Erikhht,项目名称:TCPMP,代码行数:67,


示例23: cfft2

void cfft2(unsigned int n,float x[][2],float y[][2],float w[][2], float sign){    /*       altivec version of cfft2 from Petersen and Arbenz book, "Intro.       to Parallel Computing", Oxford Univ. Press, 2003, Section 3.6                                            wpp 14. Dec. 2003    */    int jb,jc,jd,jw,k,k2,k4,lj,m,j,mj,mj2,pass,tgle;    float rp,up,wr[4] __attribute((aligned(16)));    float wu[4] __attribute((aligned(16)));    float *a,*b,*c,*d;    const vector float vminus = (vector float) {        -0.,0.,-0.,0.    };    const vector float vzero  = (vector float) {        0.,0.,0.,0.    };    const vector unsigned char pv3201 =    (vector unsigned char) {        4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11    };    vector float V0,V1,V2,V3,V4,V5,V6,V7;    vector float V8,V9,V10,V11,V12,V13,V14,V15;    if(n<=1) {        y[0][0] = x[0][0];        y[0][1] = x[0][1];        return;    }    m    = (int) (log((float) n)/log(1.99));    mj   = 1;    mj2  = 2;    lj   = n/2;    /* first pass thru data: x -> y */    for(j=0; j<lj; j++) {        jb = n/2+j;        jc  = j*mj2;        jd = jc + 1;        rp = w[j][0];        up = w[j][1];        if(sign<0.0) up = -up;        y[jd][0] = rp*(x[j][0] - x[jb][0]) - up*(x[j][1] - x[jb][1]);        y[jd][1] = up*(x[j][0] - x[jb][0]) + rp*(x[j][1] - x[jb][1]);        y[jc][0] = x[j][0] + x[jb][0];        y[jc][1] = x[j][1] + x[jb][1];    }    if(n==2) return;    /* next pass is mj = 2 */    mj  = 2;    mj2 = 4;    lj  = n/4;    a = (float *)&y[0][0];    b = (float *)&y[n/2][0];    c = (float *)&x[0][0];    d = (float *)&x[mj][0];    if(n==4) {        c = (float *)&y[0][0];        d = (float *)&y[mj][0];    }    for(j=0; j<lj; j++) {        jw = j*mj;        jc = j*mj2;        jd = 2*jc;        rp = w[jw][0];        up = w[jw][1];        if(sign<0.0) up = -up;        wr[0] = rp;        wr[1] = rp;        wr[2] = rp;        wr[3] = rp;        wu[0] = up;        wu[1] = up;        wu[2] = up;        wu[3] = up;        V6 = vec_ld(0,wr);        V7 = vec_ld(0,wu);        V7 = vec_xor(V7,vminus);        V0 = vec_ld(0,(vector float *) (a+jc));        V1 = vec_ld(0,(vector float *) (b+jc));        V2 = vec_add(V0,V1);                         /* a + b */        vec_st(V2,0,(vector float *) (c+jd));     /* store c */        V3 = vec_sub(V0,V1);                         /* a - b */        V4 = vec_perm(V3,V3,pv3201);        V0 = vec_madd(V6,V3,vzero);        V1 = vec_madd(V7,V4,vzero);        V2 = vec_add(V0,V1);                         /* w*(a - b) */        vec_st(V2,0,(vector float*) (d+jd));         /* store d */    }    if(n==4) return;    mj  *= 2;    mj2  = 2*mj;    lj   = n/mj2;    tgle = 0;    for(pass=2; pass<m-1; pass++) {        if(tgle) {            a = (float *)&y[0][0];            b = (float *)&y[n/2][0];            c = (float *)&x[0][0];//.........这里部分代码省略.........
开发者ID:kailiao,项目名称:test-suite,代码行数:101,


示例24: hill_reilly_ring_pucker

float hill_reilly_ring_pucker(SmallRing &ring, float *framepos) {  int N = ring.num(); // the number of atoms in the current ring#if 0  // return the default color if this isn't a 5 or 6 ring atom  if (N != 5 && N != 6)    return 0.0;    //MK added    if (N==6) {      //MK do Hill-Reilly for 6-membered rings      int NP = N-3; // number of puckering parameters      float *X = new float[N*3]; // atom co-ordinates      float *r = new float[N*3]; // bond vectors      float *a = new float[NP*3]; // puckering axes      float *q = new float[NP*3]; // normalized puckering vectors      float *n = new float[3]; // normal to reference plane      float *p = new float[3]; // a flap normal      float *theta = new float[NP]; // puckering parameters      float pucker_sum;      float max_pucker_sum;      float *atompos;      int curatomid, i, j, k, l;          // load ring co-ordinates      for (i=0; i<N; i++) {        curatomid = ring[i];        atompos = framepos + 3*curatomid;        X[3*i] = atompos[0];        X[3*i+1] = atompos[1];        X[3*i+2] = atompos[2];      }               // calculate bond vectors      for (i=0; i<N; i++) {        j = (i+1) % N;        vec_sub(r+3*i,X+3*j,X+3*i);      }          // calculate puckering axes, flap normals and puckering vectors      for (i=0; i<NP; i++) {        k = (2*(i+1)) % N;        j = (2*i) % N;        l = (2*i+1) % N;        vec_sub(a+3*i,X+3*k,X+3*j);        cross_prod(p,r+3*j,r+3*l);        cross_prod(q+3*i,a+3*i,p);        vec_normalize(q+3*i);      }          // reference normal      cross_prod(n,a+3*0,a+3*1);      vec_normalize(n);          // calculate the puckering parameters      pucker_sum = 0.0;          for (i=0; i<NP; i++) {        theta[i] = (float(VMD_PI)/2.0f) - acosf(dot_prod(q+3*i, n));        pucker_sum += theta[i];      }              // 0.6154 radians (35.26 degrees) has significance for perfect tetrahedral bond geometry (see Hill paper)      max_pucker_sum = NP * 0.6154f;      float pucker_scaled = pucker_sum/max_pucker_sum;      pucker_sum = fabsf((pucker_scaled < 1.0f) ? pucker_scaled : 1.0f);      pucker_sum = (pucker_sum < 1.0f) ? pucker_sum : 1.0f;          delete [] X;      delete [] r;      delete [] a;      delete [] q;      delete [] n;      delete [] p;      delete [] theta;      return pucker_sum;    }  //end MK if N==6    else {  //N==5 #endif    float *xring = new float[N];    float *yring = new float[N];    float *zring = new float[N];    float *displ = new float[N];    float *q = new float[N];    float *phi = new float[N];    float Q;    int m;    float *atompos;    int curatomid;        for (int i=0; i<N; i++) {      curatomid = ring[i];      atompos = framepos + 3*curatomid; // pointer arithmetic is evil :)      xring[i] = atompos[0];      yring[i] = atompos[1];      zring[i] = atompos[2];    }             atom_displ_from_mean_plane(xring, yring, zring, displ, N);//.........这里部分代码省略.........
开发者ID:VictorMion,项目名称:vmd-cvs-github,代码行数:101,


示例25: put_no_rnd_h264_chroma_mc8_altivec

/* this code assume that stride % 16 == 0 */void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {    signed int ABCD[4] __attribute__((aligned(16))) =                        {((8 - x) * (8 - y)),                          ((x) * (8 - y)),                          ((8 - x) * (y)),                          ((x) * (y))};    register int i;    vector unsigned char fperm;    const vector signed int vABCD = vec_ld(0, ABCD);    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);    const vector signed int vzero = vec_splat_s32(0);    const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));    const vector unsigned short v6us = vec_splat_u16(6);    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;    vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;    vector unsigned char vsrc0uc, vsrc1uc;    vector signed short vsrc0ssH, vsrc1ssH;    vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;    vector signed short vsrc2ssH, vsrc3ssH, psum;    vector unsigned char vdst, ppsum, fsum;    if (((unsigned long)dst) % 16 == 0) {      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,                                        0x14, 0x15, 0x16, 0x17,                                        0x08, 0x09, 0x0A, 0x0B,                                        0x0C, 0x0D, 0x0E, 0x0F);    } else {      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,                                        0x04, 0x05, 0x06, 0x07,                                        0x18, 0x19, 0x1A, 0x1B,                                        0x1C, 0x1D, 0x1E, 0x1F);    }    vsrcAuc = vec_ld(0, src);    if (loadSecond)      vsrcBuc = vec_ld(16, src);    vsrcperm0 = vec_lvsl(0, src);    vsrcperm1 = vec_lvsl(1, src);    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);    if (reallyBadAlign)      vsrc1uc = vsrcBuc;    else      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);    vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                               (vector unsigned char)vsrc0uc);    vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                               (vector unsigned char)vsrc1uc);    if (!loadSecond) {// -> !reallyBadAlign      for (i = 0 ; i < h ; i++) {        vsrcCuc = vec_ld(stride + 0, src);        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                                (vector unsigned char)vsrc2uc);        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                                (vector unsigned char)vsrc3uc);        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));        psum = vec_mladd(vB, vsrc1ssH, psum);        psum = vec_mladd(vC, vsrc2ssH, psum);        psum = vec_mladd(vD, vsrc3ssH, psum);        psum = vec_add(v28ss, psum);        psum = vec_sra(psum, v6us);        vdst = vec_ld(0, dst);        ppsum = (vector unsigned char)vec_packsu(psum, psum);        fsum = vec_perm(vdst, ppsum, fperm);        vec_st(fsum, 0, dst);        vsrc0ssH = vsrc2ssH;        vsrc1ssH = vsrc3ssH;        dst += stride;        src += stride;      }    } else {        vector unsigned char vsrcDuc;      for (i = 0 ; i < h ; i++) {        vsrcCuc = vec_ld(stride + 0, src);        vsrcDuc = vec_ld(stride + 16, src);        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);        if (reallyBadAlign)          vsrc3uc = vsrcDuc;        else//.........这里部分代码省略.........
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:101,


示例26: main

int32_t main(int32_t argc, char *argv[]) {    if( init_sdl2() ) {        return 1;    }    int width = 1280;    int height = 720;    SDL_Window* window;    sdl2_window("cute3d: " __FILE__, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, width, height, &window);    SDL_GLContext* context;    sdl2_glcontext(3, 2, window, &context);    if( init_shader() ) {        return 1;    }    if( init_canvas(width, height) ) {        return 1;    }    canvas_create("global_dynamic_canvas", &global_dynamic_canvas);    canvas_create("global_static_canvas", &global_static_canvas);    struct Vbo vbo = {0};    vbo_create(&vbo);    vbo_add_buffer(&vbo, SHADER_ATTRIBUTE_VERTEX, 3, GL_FLOAT, GL_STATIC_DRAW);    vbo_add_buffer(&vbo, SHADER_ATTRIBUTE_VERTEX_NORMAL, 3, GL_FLOAT, GL_STATIC_DRAW);    vbo_add_buffer(&vbo, SHADER_ATTRIBUTE_VERTEX_COLOR, 4, GL_UNSIGNED_BYTE, GL_STATIC_DRAW);    vbo_add_buffer(&vbo, SHADER_ATTRIBUTE_VERTEX_NORMAL, NORMAL_SIZE, GL_FLOAT, GL_STATIC_DRAW);    struct Ibo ibo = {0};    ibo_create(GL_TRIANGLES, GL_UNSIGNED_INT, GL_STATIC_DRAW, &ibo);    struct SolidTetrahedron hard_tetrahedron = {0};    struct SolidBox hard_cube = {0};    struct SolidSphere16 hard_sphere16 = {0};    struct SolidSphere32 hard_sphere32 = {0};    solid_tetrahedron_create(1.0, (Color){255, 0, 0, 255}, &hard_tetrahedron);    solid_cube_create(0.5, (Color){0, 255, 0, 255}, &hard_cube);    solid_sphere16_create(16, 8, 0.75, (Color){0, 255, 255, 255}, &hard_sphere16);    solid_sphere32_create(32, 16, 0.75, (Color){255, 255, 0, 255}, &hard_sphere32);    solid_optimize((struct Solid*)&hard_tetrahedron);    solid_optimize((struct Solid*)&hard_cube);    solid_optimize((struct Solid*)&hard_sphere16);    solid_optimize((struct Solid*)&hard_sphere32);    struct VboMesh hard_tetrahedron_mesh, hard_box_mesh, hard_cube_mesh, hard_sphere16_mesh, hard_sphere32_mesh;    vbo_mesh_create_from_solid((struct Solid*)&hard_tetrahedron, &vbo, &ibo, &hard_tetrahedron_mesh);    vbo_mesh_create_from_solid((struct Solid*)&hard_cube, &vbo, &ibo, &hard_cube_mesh);    vbo_mesh_create_from_solid((struct Solid*)&hard_sphere16, &vbo, &ibo, &hard_sphere16_mesh);    vbo_mesh_create_from_solid((struct Solid*)&hard_sphere32, &vbo, &ibo, &hard_sphere32_mesh);    struct SolidTetrahedron smooth_tetrahedron = {0};    struct SolidBox smooth_cube = {0};    struct SolidSphere16 smooth_sphere16 = {0};    struct SolidSphere32 smooth_sphere32 = {0};    solid_tetrahedron_create(1.0, (Color){255, 0, 0, 255}, &smooth_tetrahedron);    solid_cube_create(0.5, (Color){0, 255, 0, 255}, &smooth_cube);    solid_sphere16_create(16, 8, 0.75, (Color){0, 255, 255, 255}, &smooth_sphere16);    solid_sphere32_create(32, 16, 0.75, (Color){255, 255, 0, 255}, &smooth_sphere32);    solid_optimize((struct Solid*)&smooth_tetrahedron);    solid_optimize((struct Solid*)&smooth_cube);    solid_optimize((struct Solid*)&smooth_sphere16);    solid_optimize((struct Solid*)&smooth_sphere32);    solid_smooth_normals((struct Solid*)&smooth_tetrahedron, smooth_tetrahedron.normals, smooth_tetrahedron.normals);    solid_smooth_normals((struct Solid*)&smooth_cube, smooth_cube.normals, smooth_cube.normals);    solid_smooth_normals((struct Solid*)&smooth_sphere16, smooth_sphere16.normals, smooth_sphere16.normals);    solid_smooth_normals((struct Solid*)&smooth_sphere32, smooth_sphere32.normals, smooth_sphere32.normals);    struct VboMesh smooth_tetrahedron_mesh, smooth_box_mesh, smooth_cube_mesh, smooth_sphere16_mesh, smooth_sphere32_mesh;    vbo_mesh_create_from_solid((struct Solid*)&smooth_tetrahedron, &vbo, &ibo, &smooth_tetrahedron_mesh);    vbo_mesh_create_from_solid((struct Solid*)&smooth_cube, &vbo, &ibo, &smooth_cube_mesh);    vbo_mesh_create_from_solid((struct Solid*)&smooth_sphere16, &vbo, &ibo, &smooth_sphere16_mesh);    vbo_mesh_create_from_solid((struct Solid*)&smooth_sphere32, &vbo, &ibo, &smooth_sphere32_mesh);    struct Arcball arcball = {0};    arcball_create(width, height, (Vec4f){2.5,17.0,17.0,1.0}, (Vec4f){2.5,0.0,0.0,1.0}, 0.1, 100.0, &arcball);    float circular_motion_angle = 0.0f;    float circular_motion_speed = (2.0f*PI)/30;    float circular_motion_radius = 12.0f;    Vec3f light_position = { circular_motion_radius, 10.0, circular_motion_radius };    Vec3f light_direction = {0};    vec_sub((Vec3f){0.0f, 0.0f, 0.0f}, light_position, light_direction);    vec_normalize(light_direction, light_direction);    Vec3f eye_position = {0};    vec_copy3f(arcball.camera.pivot.position, eye_position);    Color ambiance = {50, 25, 150, 255};    Color specular = {255, 255, 255, 255};    float material_shininess = 1.0;    Vec4f material_coefficients = { 0.8, 0.2, 0.0, 0.0 };    // flat    struct Shader flat_shader = {0};//.........这里部分代码省略.........
开发者ID:rakete,项目名称:cute3d,代码行数:101,


示例27: PREFIX_h264_qpel16_hv_lowpass_altivec

//.........这里部分代码省略.........            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        }        srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);        srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);        srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);        srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);        srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);        srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);        srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);        srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);        srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);        srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);        srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2);        sum1A = vec_adds(srcP0A, srcP1A);        sum1B = vec_adds(srcP0B, srcP1B);        sum2A = vec_adds(srcM1A, srcP2A);        sum2B = vec_adds(srcM1B, srcP2B);        sum3A = vec_adds(srcM2A, srcP3A);        sum3B = vec_adds(srcM2B, srcP3B);        pp1A = vec_mladd(sum1A, v20ss, sum3A);        pp1B = vec_mladd(sum1B, v20ss, sum3B);        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);        psumA = vec_sub(pp1A, pp2A);        psumB = vec_sub(pp1B, pp2B);        vec_st(psumA, 0, tmp);        vec_st(psumB, 16, tmp);        src += srcStride;        tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */    }    tmpM2ssA = vec_ld(0, tmpbis);    tmpM2ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    tmpM1ssA = vec_ld(0, tmpbis);    tmpM1ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    tmpP0ssA = vec_ld(0, tmpbis);    tmpP0ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    tmpP1ssA = vec_ld(0, tmpbis);    tmpP1ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    tmpP2ssA = vec_ld(0, tmpbis);    tmpP2ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    for (i = 0 ; i < 16 ; i++) {        const vec_s16 tmpP3ssA = vec_ld(0, tmpbis);        const vec_s16 tmpP3ssB = vec_ld(16, tmpbis);        const vec_s16 sum1A = vec_adds(tmpP0ssA, tmpP1ssA);        const vec_s16 sum1B = vec_adds(tmpP0ssB, tmpP1ssB);        const vec_s16 sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
开发者ID:AVbin,项目名称:libav,代码行数:67,


示例28: dct_unquantize_h263_altivec

/* AltiVec version of dct_unquantize_h263   this code assumes `block' is 16 bytes-aligned */static void dct_unquantize_h263_altivec(MpegEncContext *s,                                 DCTELEM *block, int n, int qscale){    int i, level, qmul, qadd;    int nCoeffs;    assert(s->block_last_index[n]>=0);    qadd = (qscale - 1) | 1;    qmul = qscale << 1;    if (s->mb_intra) {        if (!s->h263_aic) {            if (n < 4)                block[0] = block[0] * s->y_dc_scale;            else                block[0] = block[0] * s->c_dc_scale;        }else            qadd = 0;        i = 1;        nCoeffs= 63; //does not always use zigzag table    } else {        i = 0;        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];    }    {        register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);        DECLARE_ALIGNED(16, short, qmul8) = qmul;        DECLARE_ALIGNED(16, short, qadd8) = qadd;        register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;        register vector bool short blockv_null, blockv_neg;        register short backup_0 = block[0];        register int j = 0;        qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0);        qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0);        nqaddv = vec_sub(vczero, qaddv);#if 0   // block *is* 16 bytes-aligned, it seems.        // first make sure block[j] is 16 bytes-aligned        for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) {            level = block[j];            if (level) {                if (level < 0) {                    level = level * qmul - qadd;                } else {                    level = level * qmul + qadd;                }                block[j] = level;            }        }#endif        // vectorize all the 16 bytes-aligned blocks        // of 8 elements        for(; (j + 7) <= nCoeffs ; j+=8) {            blockv = vec_ld(j << 1, block);            blockv_neg = vec_cmplt(blockv, vczero);            blockv_null = vec_cmpeq(blockv, vczero);            // choose between +qadd or -qadd as the third operand            temp1 = vec_sel(qaddv, nqaddv, blockv_neg);            // multiply & add (block{i,i+7} * qmul [+-] qadd)            temp1 = vec_mladd(blockv, qmulv, temp1);            // put 0 where block[{i,i+7} used to have 0            blockv = vec_sel(temp1, blockv, blockv_null);            vec_st(blockv, j << 1, block);        }        // if nCoeffs isn't a multiple of 8, finish the job        // using good old scalar units.        // (we could do it using a truncated vector,        // but I'm not sure it's worth the hassle)        for(; j <= nCoeffs ; j++) {            level = block[j];            if (level) {                if (level < 0) {                    level = level * qmul - qadd;                } else {                    level = level * qmul + qadd;                }                block[j] = level;            }        }        if (i == 1) {            // cheat. this avoid special-casing the first iteration            block[0] = backup_0;        }    }}
开发者ID:achellies,项目名称:camomile,代码行数:93,


示例29: return

//.........这里部分代码省略.........          // check if in picking region ... different for 2D and 3D          if (dim == 2) {            // convert the 3D world coordinate to 2D (XY) absolute screen             // coordinate, and a normalized Z coordinate.            abs_screen_loc_3D(pntpos, cpos);                  // check to see if the projected picking position falls within the             // view frustum, with the XY coords falling within the displayed             // window, and the Z coordinate falling within the view volume            // between the front and rear clipping planes.            inRegion = (cpos[0] >= minX && cpos[0] <= maxX &&                        cpos[1] >= minY && cpos[1] <= maxY &&                        cpos[2] >= 0.0  && cpos[2] <= 1.0);          } else {            // just check to see if the position is in a box centered on our            // pointer.  The pointer position should already be transformed.            inRegion = (pntpos[0] >= fminX && pntpos[0] <= fmaxX &&	                        pntpos[1] >= fminY && pntpos[1] <= fmaxY &&                        pntpos[2] >= fminZ && pntpos[2] <= fmaxZ);          }          // Clip still-viable pick points against all active clipping planes          if (inRegion) {            // We must perform a check against all of the active            // user-defined clipping planes to ensure that only pick points            // associated with visible geometry can be selected.            int cp;            for (cp=0; cp < VMD_MAX_CLIP_PLANE; cp++) {              // The final result is the intersection of all of the              // individual clipping plane tests...              if (cmdList->clipplanes[cp].mode) {                float cpdist[3];                vec_sub(cpdist, wpntpos, cmdList->clipplanes[cp].center);                inRegion &= (dot_prod(cpdist,                                       cmdList->clipplanes[cp].normal) > 0.0f);              }            }          }                // has a hit occurred?          if (inRegion) {            // yes, see if it is closer to the eye position than earlier objects            if(dim==2)               newEyeDist = DTOEYE(pntpos[0], pntpos[1], pntpos[2]);            else               newEyeDist = DTOPOINT(pntpos[0],pntpos[1],pntpos[2]);            if(currEyeDist < 0.0 || newEyeDist < currEyeDist) {              currEyeDist = newEyeDist;              tag = currTag;              if (unitcell) {                unitcell[0] = pbcCells[3*pbcimg  ];                unitcell[1] = pbcCells[3*pbcimg+1];                unitcell[2] = pbcCells[3*pbcimg+2];              }            }          }          break;        case DPICKPOINT_ARRAY:          // loop over all of the pick points in the pick point index array          DispCmdPickPointArray *cmd = (DispCmdPickPointArray *)cmdptr;          float *pickpos=NULL;          float *crds=NULL;          int *indices=NULL;
开发者ID:VictorMion,项目名称:vmd-cvs-github,代码行数:67,


示例30: dct_quantize_altivec

static int dct_quantize_altivec(MpegEncContext* s,                         DCTELEM* data, int n,                         int qscale, int* overflow){    int lastNonZero;    vector float row0, row1, row2, row3, row4, row5, row6, row7;    vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7;    const vector float zero = (const vector float)FOUROF(0.);    // used after quantize step    int oldBaseValue = 0;    // Load the data into the row/alt vectors    {        vector signed short data0, data1, data2, data3, data4, data5, data6, data7;        data0 = vec_ld(0, data);        data1 = vec_ld(16, data);        data2 = vec_ld(32, data);        data3 = vec_ld(48, data);        data4 = vec_ld(64, data);        data5 = vec_ld(80, data);        data6 = vec_ld(96, data);        data7 = vec_ld(112, data);        // Transpose the data before we start        TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);        // load the data into floating point vectors.  We load        // the high half of each row into the main row vectors        // and the low half into the alt vectors.        row0 = vec_ctf(vec_unpackh(data0), 0);        alt0 = vec_ctf(vec_unpackl(data0), 0);        row1 = vec_ctf(vec_unpackh(data1), 0);        alt1 = vec_ctf(vec_unpackl(data1), 0);        row2 = vec_ctf(vec_unpackh(data2), 0);        alt2 = vec_ctf(vec_unpackl(data2), 0);        row3 = vec_ctf(vec_unpackh(data3), 0);        alt3 = vec_ctf(vec_unpackl(data3), 0);        row4 = vec_ctf(vec_unpackh(data4), 0);        alt4 = vec_ctf(vec_unpackl(data4), 0);        row5 = vec_ctf(vec_unpackh(data5), 0);        alt5 = vec_ctf(vec_unpackl(data5), 0);        row6 = vec_ctf(vec_unpackh(data6), 0);        alt6 = vec_ctf(vec_unpackl(data6), 0);        row7 = vec_ctf(vec_unpackh(data7), 0);        alt7 = vec_ctf(vec_unpackl(data7), 0);    }    // The following block could exist as a separate an altivec dct                // function.  However, if we put it inline, the DCT data can remain                // in the vector local variables, as floats, which we'll use during the                // quantize step...    {        const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f);        const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f);        const vector float vec_0_541196100 = (vector float)FOUROF(0.541196100f);        const vector float vec_0_765366865 = (vector float)FOUROF(0.765366865f);        const vector float vec_0_899976223 = (vector float)FOUROF(-0.899976223f);        const vector float vec_1_175875602 = (vector float)FOUROF(1.175875602f);        const vector float vec_1_501321110 = (vector float)FOUROF(1.501321110f);        const vector float vec_1_847759065 = (vector float)FOUROF(-1.847759065f);        const vector float vec_1_961570560 = (vector float)FOUROF(-1.961570560f);        const vector float vec_2_053119869 = (vector float)FOUROF(2.053119869f);        const vector float vec_2_562915447 = (vector float)FOUROF(-2.562915447f);        const vector float vec_3_072711026 = (vector float)FOUROF(3.072711026f);        int whichPass, whichHalf;        for(whichPass = 1; whichPass<=2; whichPass++) {            for(whichHalf = 1; whichHalf<=2; whichHalf++) {                vector float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;                vector float tmp10, tmp11, tmp12, tmp13;                vector float z1, z2, z3, z4, z5;                tmp0 = vec_add(row0, row7); // tmp0 = dataptr[0] + dataptr[7];                tmp7 = vec_sub(row0, row7); // tmp7 = dataptr[0] - dataptr[7];                tmp3 = vec_add(row3, row4); // tmp3 = dataptr[3] + dataptr[4];                tmp4 = vec_sub(row3, row4); // tmp4 = dataptr[3] - dataptr[4];                tmp1 = vec_add(row1, row6); // tmp1 = dataptr[1] + dataptr[6];                tmp6 = vec_sub(row1, row6); // tmp6 = dataptr[1] - dataptr[6];                tmp2 = vec_add(row2, row5); // tmp2 = dataptr[2] + dataptr[5];                tmp5 = vec_sub(row2, row5); // tmp5 = dataptr[2] - dataptr[5];                tmp10 = vec_add(tmp0, tmp3); // tmp10 = tmp0 + tmp3;                tmp13 = vec_sub(tmp0, tmp3); // tmp13 = tmp0 - tmp3;                tmp11 = vec_add(tmp1, tmp2); // tmp11 = tmp1 + tmp2;                tmp12 = vec_sub(tmp1, tmp2); // tmp12 = tmp1 - tmp2;                // dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);                row0 = vec_add(tmp10, tmp11);                // dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);                row4 = vec_sub(tmp10, tmp11);                // z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);                z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero);//.........这里部分代码省略.........
开发者ID:achellies,项目名称:camomile,代码行数:101,



注:本文中的vec_sub函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vecadd函数代码示例
C++ vec_ste函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。