您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vec_mergel函数代码示例

51自学网 2021-06-03 09:36:16
  C++
这篇教程C++ vec_mergel函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vec_mergel函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_mergel函数的具体用法?C++ vec_mergel怎么用?C++ vec_mergel使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_mergel函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: vector_fmul_reverse_altivec

static void vector_fmul_reverse_altivec(float *dst, const float *src0,                                        const float *src1, int len){    int i;    vector float d, s0, s1, h0, l0,                 s2, s3, zero = (vector float)vec_splat_u32(0);    src1 += len-4;    for(i=0; i<len-7; i+=8) {        s1 = vec_ld(0, src1-i);              // [a,b,c,d]        s0 = vec_ld(0, src0+i);        l0 = vec_mergel(s1, s1);             // [c,c,d,d]        s3 = vec_ld(-16, src1-i);        h0 = vec_mergeh(s1, s1);             // [a,a,b,b]        s2 = vec_ld(16, src0+i);        s1 = vec_mergeh(vec_mergel(l0,h0),   // [d,b,d,b]                        vec_mergeh(l0,h0));  // [c,a,c,a]                                             // [d,c,b,a]        l0 = vec_mergel(s3, s3);        d = vec_madd(s0, s1, zero);        h0 = vec_mergeh(s3, s3);        vec_st(d, 0, dst+i);        s3 = vec_mergeh(vec_mergel(l0,h0),                        vec_mergeh(l0,h0));        d = vec_madd(s2, s3, zero);        vec_st(d, 16, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:27,


示例2: test

static void test(){  /* Input vectors.  */  vector long vla = {-2,-1};  vector long vlb = {0,1};  vector double vda = {-2.0,-1.0};  vector double vdb = {0.0,1.0};  /* Result vectors.  */  vector long vlh, vll;  vector double vdh, vdl;  /* Expected result vectors.  */#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  vector long vlrh = {1,-1};  vector long vlrl = {0,-2};  vector double vdrh = {1.0,-1.0};  vector double vdrl = {0.0,-2.0};#else  vector long vlrh = {-2,0};  vector long vlrl = {-1,1};  vector double vdrh = {-2.0,0.0};  vector double vdrl = {-1.0,1.0};#endif  vlh = vec_mergeh (vla, vlb);  vll = vec_mergel (vla, vlb);  vdh = vec_mergeh (vda, vdb);  vdl = vec_mergel (vda, vdb);  check (vec_long_eq (vlh, vlrh), "vlh");  check (vec_long_eq (vll, vlrl), "vll");  check (vec_double_eq (vdh, vdrh), "vdh" );  check (vec_double_eq (vdl, vdrl), "vdl" );}
开发者ID:Zex,项目名称:gcc,代码行数:35,


示例3: float_to_int16_interleave_altivec

static voidfloat_to_int16_interleave_altivec(int16_t *dst, const float **src,                                  long len, int channels){    int i;    vector signed short d0, d1, d2, c0, c1, t0, t1;    vector unsigned char align;    if(channels == 1)        float_to_int16_altivec(dst, src[0], len);    else if (channels == 2)    {        if(((long)dst) & 15)            for(i = 0; i < len - 7; i += 8)            {                d0 = vec_ld(0, dst + i);                t0 = float_to_int16_one_altivec(src[0] + i);                d1 = vec_ld(31, dst + i);                t1 = float_to_int16_one_altivec(src[1] + i);                c0 = vec_mergeh(t0, t1);                c1 = vec_mergel(t0, t1);                d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i));                align = vec_lvsr(0, dst + i);                d0 = vec_perm(d2, c0, align);                d1 = vec_perm(c0, c1, align);                vec_st(d0,  0, dst + i);                d0 = vec_perm(c1, d2, align);                vec_st(d1, 15, dst + i);                vec_st(d0, 31, dst + i);                dst += 8;            }        else            for(i = 0; i < len - 7; i += 8)            {                t0 = float_to_int16_one_altivec(src[0] + i);                t1 = float_to_int16_one_altivec(src[1] + i);                d0 = vec_mergeh(t0, t1);                d1 = vec_mergel(t0, t1);                vec_st(d0,  0, dst + i);                vec_st(d1, 16, dst + i);                dst += 8;            }    }    else    {        DECLARE_ALIGNED(16, int16_t, tmp)[len];        int c, j;        for (c = 0; c < channels; c++)        {            float_to_int16_altivec(tmp, src[c], len);            for (i = 0, j = c; i < len; i++, j += channels)            {                dst[j] = tmp[i];            }        }    }}
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:56,


示例4: v_reduce_sum4

inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,                                 const v_float32x4& c, const v_float32x4& d){    vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val));    ac = vec_add(ac, vec_sld(ac, ac, 8));    vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val));    bd = vec_add(bd, vec_sld(bd, bd, 8));    return v_float32x4(vec_mergeh(ac, bd));}
开发者ID:ArkaJU,项目名称:opencv,代码行数:10,


示例5: test

static void test(){  /* Input vectors.  */  vector long long vla = {-2,-1};  vector long long vlb = {0,1};  vector double vda = {-2.0,-1.0};  vector double vdb = {0.0,1.0};  vector unsigned int vuia = {0,1,2,3};  vector unsigned int vuib = {4,5,6,7};  vector signed int vsia = {-4,-3,-2,-1};  vector signed int vsib = {0,1,2,3};  vector float vfa = {-4.0,-3.0,-2.0,-1.0};  vector float vfb = {0.0,1.0,2.0,3.0};  /* Result vectors.  */  vector long long vlh, vll;  vector double vdh, vdl;  vector unsigned int vuih, vuil;  vector signed int vsih, vsil;  vector float vfh, vfl;  /* Expected result vectors.  */  vector long long vlrh = {-2,0};  vector long long vlrl = {-1,1};  vector double vdrh = {-2.0,0.0};  vector double vdrl = {-1.0,1.0};  vector unsigned int vuirh = {0,4,1,5};  vector unsigned int vuirl = {2,6,3,7};  vector signed int vsirh = {-4,0,-3,1};  vector signed int vsirl = {-2,2,-1,3};  vector float vfrh = {-4.0,0.0,-3.0,1.0};  vector float vfrl = {-2.0,2.0,-1.0,3.0};  vlh = vec_mergeh (vla, vlb);  vll = vec_mergel (vla, vlb);  vdh = vec_mergeh (vda, vdb);  vdl = vec_mergel (vda, vdb);  vuih = vec_mergeh (vuia, vuib);  vuil = vec_mergel (vuia, vuib);  vsih = vec_mergeh (vsia, vsib);  vsil = vec_mergel (vsia, vsib);  vfh  = vec_mergeh (vfa,  vfb );  vfl  = vec_mergel (vfa,  vfb );  check (vec_long_long_eq (vlh, vlrh), "vlh");  check (vec_long_long_eq (vll, vlrl), "vll");  check (vec_double_eq (vdh, vdrh), "vdh" );  check (vec_double_eq (vdl, vdrl), "vdl" );  check (vec_all_eq (vuih, vuirh), "vuih");  check (vec_all_eq (vuil, vuirl), "vuil");  check (vec_all_eq (vsih, vsirh), "vsih");  check (vec_all_eq (vsil, vsirl), "vsil");  check (vec_all_eq (vfh,  vfrh),  "vfh");  check (vec_all_eq (vfl,  vfrl),  "vfl");}
开发者ID:0day-ci,项目名称:gcc,代码行数:55,


示例6: processRGBA_Altivec

void pix_diff :: processRGBA_Altivec(imageStruct &image, imageStruct &right){    int datasize = image.xsize * image.ysize / 4;    vector signed short  hiImage, loImage, hiRight, loRight;    vector unsigned char zero = vec_splat_u8(0);    vector unsigned char *inData = (vector unsigned char *)image.data;    vector unsigned char *rightData = (vector unsigned char *)right.data;    #ifndef PPC970   	UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );	vec_dst( inData, prefetchSize, 0 );        vec_dst( rightData, prefetchSize, 1 );        vec_dst( inData+256, prefetchSize, 2 );        vec_dst( rightData+256, prefetchSize, 3 );    #endif    do {        #ifndef PPC970	vec_dst( inData, prefetchSize, 0 );        vec_dst( rightData, prefetchSize, 1 );        vec_dst( inData+256, prefetchSize, 2 );        vec_dst( rightData+256, prefetchSize, 3 );        #endif        hiImage = (vector signed short)vec_mergeh(zero,inData[0]);        loImage = (vector signed short)vec_mergel(zero,inData[0]);        hiRight = (vector signed short)vec_mergeh(zero,rightData[0]);        loRight = (vector signed short)vec_mergel(zero,rightData[0]);        hiImage = vec_subs(hiImage,hiRight);        loImage = vec_subs(loImage,loRight);        hiImage = vec_abs(hiImage);        loImage = vec_abs(loImage);        inData[0] = vec_packsu(hiImage,loImage);        inData++;        rightData++;    }    while (--datasize);    #ifndef PPC970        vec_dss( 0 );        vec_dss( 1 );        vec_dss( 2 );        vec_dss( 3 );    #endif}
开发者ID:avilleret,项目名称:Gem,代码行数:50,


示例7: foo

void foo (vector bool long long *vblr,	  vector double *vdr, vector unsigned long long *vullz,	  vector double *vdz, vector bool char *vbcz,	  vector signed char *vscz, vector unsigned char *vucz,	  vector bool int *vbiz, vector int *viz,	  vector unsigned int *vuiz, vector signed long long int *vslliz,	  vector bool short int *vbsiz, vector signed short int *vssiz,	  vector unsigned short int *vusiz, vector float *vfz){  *vblr++ = vec_andc (vbla, vblb);  *vdr++  = vec_double (vslla);  *vdr++  = vec_double (vulla);  *vblr++ = vec_mergeh (vbla, vblb);  *vblr++ = vec_mergel (vbla, vblb);  *vblr++ = vec_nor (vbla, vblb);  *vblr++ = vec_or (vbla, vblb);  *vblr++ = vec_sel (vbla, vblb, vblc);  *vblr++ = vec_sel (vbla, vblb, vullc);  *vblr++ = vec_xor (vbla, vblb);  *vullz++ = vec_sel (vulla, vullb, vbllc);  *vullz++ = vec_sel (vulla, vullb, vullc);  *vdz++ = vec_sel(vda, vdb, vullc);  *vbcz++ = vec_sel (vbca, vbcb, vbcc);  *vbcz++ = vec_sel (vbca, vbcb, vucc);  *vbcz++ = vec_xor (vbca, vbcb);  *vscz++ = vec_sel (vsca, vscb, vbcc);  *vscz++ = vec_sel (vsca, vscb, vucc);  *vucz++ = vec_sel (vuca, vucb, vbcc);  *vucz++ = vec_sel (vuca, vucb, vucc);  *vbiz++ = vec_sel (vbia, vbib, vbic);  *vbiz++ = vec_sel (vbia, vbib, vuic);  *vbiz++ = vec_xor (vbia, vbib);  *viz++ = vec_sel (vsia, vsib, vbic);  *viz++ = vec_sel (vsia, vsib, vuic);  *vuiz++ = vec_sel (vuia, vuib, vbic);  *vuiz++ = vec_sel (vuia, vuib, vuic);  *vslliz++ = vec_sel(vslla, vsllb, vbllc);  *vslliz++ = vec_sel(vslla, vsllb, vullc);  *vssiz++ = vec_sel(vssia, vssib, vbsic);  *vssiz++ = vec_sel(vssia, vssib, vusic);  *vusiz++ = vec_sel(vusia, vusib, vbsic);  *vusiz++ = vec_sel(vusia, vusib, vusic);  *vbsiz++ = vec_sel (vbsia, vbsib, vbsic);  *vbsiz++ = vec_sel (vbsia, vbsib, vusic);  *vbsiz++ = vec_xor (vbsia, vbsib);  *vdz++ = vec_sel (vda, vdb, vbllc);  *vfz++ = vec_sel (vfa, vfb, vbic);  *vfz++ = vec_sel (vfa, vfb, vuic);  *vfz++ = vec_xor (vfa, vfb);}
开发者ID:vinriviere,项目名称:m68k-atari-mint-gcc,代码行数:59,


示例8: float_to_int16_interleave_altivec

static void float_to_int16_interleave_altivec(int16_t *dst, const float **src,                                              long len, int channels){    int i;    vector signed short d0, d1, d2, c0, c1, t0, t1;    vector unsigned char align;    if (channels == 1)        float_to_int16_altivec(dst, src[0], len);    else {        if (channels == 2) {            if (((long)dst) & 15) {                for (i = 0; i < len - 7; i += 8) {                    d0 = vec_ld(0,  dst + i);                    t0 = float_to_int16_one_altivec(src[0] + i);                    d1 = vec_ld(31, dst + i);                    t1 = float_to_int16_one_altivec(src[1] + i);                    c0 = vec_mergeh(t0, t1);                    c1 = vec_mergel(t0, t1);                    d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i));                    align = vec_lvsr(0, dst + i);                    d0 = vec_perm(d2, c0, align);                    d1 = vec_perm(c0, c1, align);                    vec_st(d0,  0, dst + i);                    d0 = vec_perm(c1, d2, align);                    vec_st(d1, 15, dst + i);                    vec_st(d0, 31, dst + i);                    dst += 8;                }            } else {                for (i = 0; i < len - 7; i += 8) {                    t0 = float_to_int16_one_altivec(src[0] + i);                    t1 = float_to_int16_one_altivec(src[1] + i);                    d0 = vec_mergeh(t0, t1);                    d1 = vec_mergel(t0, t1);                    vec_st(d0,  0, dst + i);                    vec_st(d1, 16, dst + i);                    dst += 8;                }            }        } else {            for (i = 0; i < channels; i++)                float_to_int16_stride_altivec(dst + i, src[i], len, channels);        }    }}
开发者ID:AronVietti,项目名称:FFmpeg,代码行数:46,


示例9: OSX_AudioIOProc16Bit_Altivec

static void OSX_AudioIOProc16Bit_Altivec(SInt16	*myInBuffer, float *myOutBuffer){		register UInt32	i;		float f = SOUND_BUFFER_SCALE_16BIT;   		const vector float gain = vec_load_ps1(&f); // multiplier		const vector float mix = vec_setzero();		if (gBufferMono2Stereo)		{			int j=0;			// TEST: OK			for (i=0;i<SOUND_BUFFER_SIZE;i+=8, j+=16)			{				vector short int v0 = vec_ld(0, myInBuffer + i); // Load 8 shorts				vector float v1 = vec_ctf((vector signed int)vec_unpackh(v0), 0); // convert to float				vector float v2 = vec_ctf((vector signed int)vec_unpackl(v0), 0); // convert to float				vector float v3 = vec_madd(v1, gain, mix); // scale				vector float v4 = vec_madd(v2, gain, mix); // scale				vector float v5 = vec_mergel(v3, v3); // v3(0,0,1,1);				vector float v6 = vec_mergeh(v3, v3); // v3(2,2,3,3);				vector float v7 = vec_mergel(v4, v4); // v4(0,0,1,1);				vector float v8 = vec_mergeh(v4, v4); // v4(2,2,3,3);				vec_st(v5, 0, myOutBuffer + j); // Store 4 floats				vec_st(v6, 0, myOutBuffer + 4 + j); // Store 4 floats				vec_st(v7, 0, myOutBuffer + 8 + j); // Store 4 floats				vec_st(v8, 0, myOutBuffer + 12 + j); // Store 4 floats			}		}		else		{			// TEST: OK			for (i=0;i<SOUND_BUFFER_SIZE;i+=8)			{				vector short int v0 = vec_ld(0, myInBuffer + i); // Load 8 shorts				vector float v1 = vec_ctf((vector signed int)vec_unpackh(v0), 0); // convert to float				vector float v2 = vec_ctf((vector signed int)vec_unpackl(v0), 0); // convert to float				vector float v3 = vec_madd(v1, gain, mix); // scale				vector float v4 = vec_madd(v2, gain, mix); // scale				vec_st(v3, 0, myOutBuffer + i); // Store 4 floats				vec_st(v4, 0, myOutBuffer + 4 + i); // Store 4 floats			}		}}
开发者ID:LighFusion,项目名称:surreal,代码行数:46,


示例10: pix_multiply

static force_inline vector unsigned intpix_multiply (vector unsigned int p, vector unsigned int a){    vector unsigned short hi, lo, mod;    /* unpack to short */    hi = (vector unsigned short)	vec_mergeh ((vector unsigned char)AVV (0),		    (vector unsigned char)p);    mod = (vector unsigned short)	vec_mergeh ((vector unsigned char)AVV (0),		    (vector unsigned char)a);    hi = vec_mladd (hi, mod, (vector unsigned short)                    AVV (0x0080, 0x0080, 0x0080, 0x0080,                         0x0080, 0x0080, 0x0080, 0x0080));    hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));    hi = vec_sr (hi, vec_splat_u16 (8));    /* unpack to short */    lo = (vector unsigned short)	vec_mergel ((vector unsigned char)AVV (0),		    (vector unsigned char)p);    mod = (vector unsigned short)	vec_mergel ((vector unsigned char)AVV (0),		    (vector unsigned char)a);    lo = vec_mladd (lo, mod, (vector unsigned short)                    AVV (0x0080, 0x0080, 0x0080, 0x0080,                         0x0080, 0x0080, 0x0080, 0x0080));    lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));    lo = vec_sr (lo, vec_splat_u16 (8));    return (vector unsigned int)vec_packsu (hi, lo);}
开发者ID:1833183060,项目名称:wke,代码行数:40,


示例11: v_store_interleave_f32

void v_store_interleave_f32(float *ptr, vector float a, vector float b, vector float c){    vector float hbc = vec_mergeh(b, c);    static const vector unsigned char ahbc = {0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 4, 5, 6, 7};    vec_xst(vec_perm(a, hbc, ahbc),  0, ptr);    vector float lab = vec_mergel(a, b);    vec_xst(vec_sld(lab, hbc, 8), 16, ptr);    static const vector unsigned char clab = {8, 9, 10, 11, 24, 25, 26, 27, 28, 29, 30, 31, 12, 13, 14, 15};    vec_xst(vec_perm(c, lab, clab), 32, ptr);}
开发者ID:MaxKellermann,项目名称:gcc,代码行数:13,


示例12: predict_16x16_p_altivec

static void predict_16x16_p_altivec( uint8_t *src ){    int16_t a, b, c, i;    int H = 0;    int V = 0;    int16_t i00;    for( i = 1; i <= 8; i++ )    {        H += i * ( src[7+i - FDEC_STRIDE ]  - src[7-i - FDEC_STRIDE ] );        V += i * ( src[(7+i)*FDEC_STRIDE -1] - src[(7-i)*FDEC_STRIDE -1] );    }    a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );    b = ( 5 * H + 32 ) >> 6;    c = ( 5 * V + 32 ) >> 6;    i00 = a - b * 7 - c * 7 + 16;    vect_sshort_u i00_u, b_u, c_u;    i00_u.s[0] = i00;    b_u.s[0]   = b;    c_u.s[0]   = c;    vec_u16_t val5_v = vec_splat_u16(5);    vec_s16_t i00_v, b_v, c_v;    i00_v = vec_splat(i00_u.v, 0);    b_v = vec_splat(b_u.v, 0);    c_v = vec_splat(c_u.v, 0);    vec_s16_t induc_v  = (vec_s16_t) CV(0,  1,  2,  3,  4,  5,  6,  7);    vec_s16_t b8_v = vec_sl(b_v, vec_splat_u16(3));    vec_s32_t mule_b_v = vec_mule(induc_v, b_v);    vec_s32_t mulo_b_v = vec_mulo(induc_v, b_v);    vec_s16_t mul_b_induc0_v = vec_pack(vec_mergeh(mule_b_v, mulo_b_v), vec_mergel(mule_b_v, mulo_b_v));    vec_s16_t add_i0_b_0v = vec_adds(i00_v, mul_b_induc0_v);    vec_s16_t add_i0_b_8v = vec_adds(b8_v, add_i0_b_0v);    int y;    for( y = 0; y < 16; y++ )    {        vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);        vec_s16_t shift_8_v = vec_sra(add_i0_b_8v, val5_v);        vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_8_v);        vec_st( com_sat_v, 0, &src[0]);        src += FDEC_STRIDE;        i00 += c;        add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);        add_i0_b_8v = vec_adds(add_i0_b_8v, c_v);    }}
开发者ID:UIKit0,项目名称:H.264-in-CUDA,代码行数:50,


示例13: foo

void foo (vector bool long long *vblr,	  vector double *vdr){  *vblr++ = vec_andc (vbla, vblb);  *vdr++  = vec_double (vsla);  *vdr++  = vec_double (vula);  *vblr++ = vec_mergeh (vbla, vblb);  *vblr++ = vec_mergel (vbla, vblb);  *vblr++ = vec_nor (vbla, vblb);  *vblr++ = vec_or (vbla, vblb);  *vblr++ = vec_sel (vbla, vblb, vblc);  *vblr++ = vec_sel (vbla, vblb, vulc);  *vblr++ = vec_xor (vbla, vblb);}
开发者ID:0day-ci,项目名称:gcc,代码行数:14,


示例14: main

/* Place the content of the array of structures   in vectors x_vec, y_vec, z_vec, and t_vec */int main(int argc, char **argv) {   vector float x_vec, y_vec, z_vec,       t_vec, hold[4], tmp[4];      /* Load structures into vectors */   hold[0] = vec_ld(0, (float*)p_motion);   hold[1] = vec_ld(0, (float*)&p_motion[1]);      hold[2] = vec_ld(0, (float*)&p_motion[2]);      hold[3] = vec_ld(0, (float*)&p_motion[3]);      /* Perform first step of the swizzle */   tmp[0] = vec_mergeh(hold[0], hold[2]);   tmp[1] = vec_mergeh(hold[1], hold[3]);   tmp[2] = vec_mergel(hold[0], hold[2]);   tmp[3] = vec_mergel(hold[1], hold[3]);      /* Perform second step of the swizzle */   x_vec = vec_mergeh(tmp[0], tmp[1]);   y_vec = vec_mergel(tmp[0], tmp[1]);   z_vec = vec_mergeh(tmp[2], tmp[3]);   t_vec = vec_mergel(tmp[2], tmp[3]);   return 0;}
开发者ID:pstrinkle,项目名称:misc-umbc,代码行数:26,


示例15: a52_resample_STEREO_to_2_altivec

static int a52_resample_STEREO_to_2_altivec(float * _f, int16_t * s16){#if 0  int i;  int32_t * f = (int32_t *) _f;  for (i = 0; i < 256; i++) {    s16[2*i] = convert (f[i]);    s16[2*i+1] = convert (f[i+256]);  }  return 2*256;#else  int i = 0;  int32_t * f = (int32_t *) _f;  register vector signed int f0, f4, f256, f260;  register vector signed short reven, rodd, r0, r1;  for (i = 0; i < 256; i+= 8) {    f0 = vec_ld(0, f);    f4 = vec_ld(16, f);    f256 = vec_ld(1024, f);    f260 = vec_ld(1040, f);    reven = convert16_altivec(f0, f4);    rodd = convert16_altivec(f256, f260);    r0 = vec_mergeh(reven, rodd);    r1 = vec_mergel(reven, rodd);    // FIXME can be merged to spare some I/O    unaligned_store(r0, 0, s16);    unaligned_store(r1, 16, s16);    f += 8;    s16 += 16;  }  return(2*256);#endif}
开发者ID:dr4g0nsr,项目名称:mplayer-skyviia-8860,代码行数:37,


示例16: PREFIX_h264_qpel16_v_lowpass_altivec

static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {    register int i;    LOAD_ZERO;    const vec_u8 perm = vec_lvsl(0, src);    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_u16 v5us = vec_splat_u16(5);    const vec_s16 v5ss = vec_splat_s16(5);    const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));    uint8_t *srcbis = src - (srcStride * 2);    const vec_u8 srcM2a = vec_ld(0, srcbis);    const vec_u8 srcM2b = vec_ld(16, srcbis);    const vec_u8 srcM2 = vec_perm(srcM2a, srcM2b, perm);    //srcbis += srcStride;    const vec_u8 srcM1a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcM1b = vec_ld(16, srcbis);    const vec_u8 srcM1 = vec_perm(srcM1a, srcM1b, perm);    //srcbis += srcStride;    const vec_u8 srcP0a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP0b = vec_ld(16, srcbis);    const vec_u8 srcP0 = vec_perm(srcP0a, srcP0b, perm);    //srcbis += srcStride;    const vec_u8 srcP1a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP1b = vec_ld(16, srcbis);    const vec_u8 srcP1 = vec_perm(srcP1a, srcP1b, perm);    //srcbis += srcStride;    const vec_u8 srcP2a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP2b = vec_ld(16, srcbis);    const vec_u8 srcP2 = vec_perm(srcP2a, srcP2b, perm);    //srcbis += srcStride;    vec_s16 srcM2ssA = (vec_s16) vec_mergeh(zero_u8v, srcM2);    vec_s16 srcM2ssB = (vec_s16) vec_mergel(zero_u8v, srcM2);    vec_s16 srcM1ssA = (vec_s16) vec_mergeh(zero_u8v, srcM1);    vec_s16 srcM1ssB = (vec_s16) vec_mergel(zero_u8v, srcM1);    vec_s16 srcP0ssA = (vec_s16) vec_mergeh(zero_u8v, srcP0);    vec_s16 srcP0ssB = (vec_s16) vec_mergel(zero_u8v, srcP0);    vec_s16 srcP1ssA = (vec_s16) vec_mergeh(zero_u8v, srcP1);    vec_s16 srcP1ssB = (vec_s16) vec_mergel(zero_u8v, srcP1);    vec_s16 srcP2ssA = (vec_s16) vec_mergeh(zero_u8v, srcP2);    vec_s16 srcP2ssB = (vec_s16) vec_mergel(zero_u8v, srcP2);    vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,              psumA, psumB, sumA, sumB,              srcP3ssA, srcP3ssB,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;    vec_u8 sum, fsum, srcP3a, srcP3b, srcP3;    for (i = 0 ; i < 16 ; i++) {        srcP3a = vec_ld(0, srcbis += srcStride);        srcP3b = vec_ld(16, srcbis);        srcP3 = vec_perm(srcP3a, srcP3b, perm);        srcP3ssA = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3ssB = (vec_s16) vec_mergel(zero_u8v, srcP3);        //srcbis += srcStride;        sum1A = vec_adds(srcP0ssA, srcP1ssA);        sum1B = vec_adds(srcP0ssB, srcP1ssB);        sum2A = vec_adds(srcM1ssA, srcP2ssA);        sum2B = vec_adds(srcM1ssB, srcP2ssB);        sum3A = vec_adds(srcM2ssA, srcP3ssA);        sum3B = vec_adds(srcM2ssB, srcP3ssB);        srcM2ssA = srcM1ssA;        srcM2ssB = srcM1ssB;        srcM1ssA = srcP0ssA;        srcM1ssB = srcP0ssB;        srcP0ssA = srcP1ssA;        srcP0ssB = srcP1ssB;        srcP1ssA = srcP2ssA;        srcP1ssB = srcP2ssB;        srcP2ssA = srcP3ssA;        srcP2ssB = srcP3ssB;        pp1A = vec_mladd(sum1A, v20ss, v16ss);        pp1B = vec_mladd(sum1B, v20ss, v16ss);        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);        pp3A = vec_add(sum3A, pp1A);        pp3B = vec_add(sum3B, pp1B);        psumA = vec_sub(pp3A, pp2A);        psumB = vec_sub(pp3B, pp2B);        sumA = vec_sra(psumA, v5us);        sumB = vec_sra(psumB, v5us);        sum = vec_packsu(sumA, sumB);        ASSERT_ALIGNED(dst);        OP_U8_ALTIVEC(fsum, sum, vec_ld(0, dst));        vec_st(fsum, 0, dst);//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例17: vec_perm

  tmp1 = vec_perm(table[6], table[7], tmpIndex);  stmp1 = vec_perm(slope_cos[6], slope_cos[7], tmpIndex);  select = (vector  unsigned short)vec_cmpgt(PerIndex, (((vector unsigned char){95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95})) );  tmp3 = vec_sel(tmp0, tmp1, select);  stmp3 = vec_sel(stmp0, stmp1, select);    select = (vector  unsigned short)vec_cmpgt(PerIndex, (((vector unsigned char){63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63})) );  table1 = vec_sel(tmp2, tmp3, select);  slope1 = vec_sel(stmp2, stmp3, select);     L_tmp0 = vec_sra(vec_mule(slope0, offset0), (((vector unsigned int){12,12,12,12})) );  L_tmp1 = vec_sra(vec_mulo(slope0, offset0), (((vector unsigned int){12,12,12,12})) );  L_tmp2 = vec_sra(vec_mule(slope1, offset1), (((vector unsigned int){12,12,12,12})) );  L_tmp3 = vec_sra(vec_mulo(slope1, offset1), (((vector unsigned int){12,12,12,12})) );    tmp0 = vec_packs(L_tmp0, L_tmp2);  tmp1 = vec_packs(L_tmp1, L_tmp3);  tmp2 = vec_mergeh(tmp0, tmp1);  tmp3 = vec_mergel(tmp0, tmp1);       lspq[0] = vec_adds(table0, tmp2);  lspq[1] = vec_adds(table1, tmp3);  return;}
开发者ID:0day-ci,项目名称:gcc,代码行数:30,


示例18: processYUVAltivec

//.........这里部分代码省略.........    shortBuffer.s[6] = m_Urange;    shortBuffer.s[7] = m_Vrange;    UVrange = shortBuffer.v;            //setup the cache prefetch -- A MUST!!!    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );    #ifndef PPC970     vec_dst( inData, prefetchSize, 0 );    vec_dst( rightData, prefetchSize, 1 );    vec_dst( inData+32, prefetchSize, 2 );    vec_dst( rightData+32, prefetchSize, 3 );    #endif //PPC970        for ( i=0; i<h; i++){        for (j=0; j<w; j++)        {        #ifndef PPC970        //this function is probably memory bound on most G4's -- what else is new?            vec_dst( inData, prefetchSize, 0 );            vec_dst( rightData, prefetchSize, 1 );            vec_dst( inData+32, prefetchSize, 2 );            vec_dst( rightData+32, prefetchSize, 3 );        #endif        //separate the U and V from Y        UVres1 = (vector unsigned short)vec_mule(one,inData[0]);        UVres2 = (vector unsigned short)vec_mule(one,rightData[0]);                    //vec_mulo Y * 1 to short vector Y Y Y Y shorts        Yres1 = (vector unsigned short)vec_mulo(one,inData[0]);        Yres2 = (vector unsigned short)vec_mulo(one,rightData[0]);                Yhi = vec_adds(Yres2,Yrange);        Ylo = vec_subs(Yres2,Yrange);                //go to ints for comparison        UVhi = vec_adds(UVres2,UVrange);        UVlo = vec_subs(UVres2,UVrange);                Uhi = vec_mule(sone,UVhi);        Ulo = vec_mule(sone,UVlo);                Vhi = vec_mulo(sone,UVhi);        Vlo = vec_mulo(sone,UVlo);                Ures = vec_mule(sone,UVres1);         Vres = vec_mulo(sone,UVres1);                  Umasklo = vec_cmpgt(Ures,Ulo);         Umaskhi = vec_cmplt(Ures,Uhi);                  Vmasklo = vec_cmpgt(Vres,Vlo);         Vmaskhi = vec_cmplt(Vres,Vhi);                  Umaskhi = vec_and(Umaskhi,Umasklo);                  Vmaskhi = vec_and(Vmaskhi,Vmasklo);                  Umasklo = vec_and(Umaskhi,Vmaskhi);         Vmasklo = vec_and(Umaskhi,Vmaskhi);                  hiImage = (vector unsigned short)vec_mergeh(Umasklo,Vmasklo);         loImage = (vector unsigned short)vec_mergel(Umasklo,Vmasklo);                  //pack it back down to bool short         UVmaskhi = (vector bool short)vec_packsu(hiImage,loImage);                  Ymasklo = vec_cmpgt(Yres1,Ylo);         Ymaskhi = vec_cmplt(Yres1,Yhi);                  Ymaskhi = vec_and(Ymaskhi,Ymasklo);                  Ymaskhi = vec_and(Ymaskhi,UVmaskhi);         UVmaskhi = vec_and(Ymaskhi,UVmaskhi);                  //bitwise comparison and move using the result of the comparison as a mask         Yres1 = vec_sel(Yres1,Yblank,Ymaskhi);                  //UVres1 = vec_sel(UVres1,UVres2,UVmaskhi);         UVres1 = vec_sel(UVres1,UVblank,UVmaskhi);                  //merge the Y and UV back together         hiImage = vec_mergeh(UVres1,Yres1);         loImage = vec_mergel(UVres1,Yres1);                  //pack it back down to unsigned char to store         inData[0] = vec_packsu(hiImage,loImage);                  inData++;         rightData++;                }        #ifndef PPC970        vec_dss(0);        vec_dss(1);        vec_dss(2);        vec_dss(3);        #endif    }}
开发者ID:kmatheussen,项目名称:libpd,代码行数:101,


示例19: processYUV_Altivec

void pix_add :: processYUV_Altivec(imageStruct &image, imageStruct &right){ int h,w,width;   width = image.xsize/8;   //format is U Y V Y    union    {        //unsigned int	i;        short	elements[8];        //vector signed char v;        vector	signed short v;    }shortBuffer;        union    {        //unsigned int	i;        unsigned char	elements[16];        //vector signed char v;        vector	unsigned char v;    }charBuffer;    //vector unsigned char c;    register vector signed short d, hiImage, loImage, YRight, UVRight, YImage, UVImage, UVTemp, YTemp;   // vector unsigned char zero = vec_splat_u8(0);    register vector unsigned char c,one;  //  vector signed short zshort = vec_splat_s16(0);    vector unsigned char *inData = (vector unsigned char*) image.data;    vector unsigned char *rightData = (vector unsigned char*) right.data;    //Write the pixel (pair) to the transfer buffer    charBuffer.elements[0] = 2;    charBuffer.elements[1] = 1;    charBuffer.elements[2] = 2;    charBuffer.elements[3] = 1;    charBuffer.elements[4] = 2;    charBuffer.elements[5] = 1;    charBuffer.elements[6] = 2;    charBuffer.elements[7] = 1;    charBuffer.elements[8] = 2;    charBuffer.elements[9] = 1;    charBuffer.elements[10] = 2;    charBuffer.elements[11] = 1;    charBuffer.elements[12] = 2;    charBuffer.elements[13] = 1;    charBuffer.elements[14] = 2;    charBuffer.elements[15] = 1;    //Load it into the vector unit    c = charBuffer.v;    one =  vec_splat_u8( 1 );    shortBuffer.elements[0] = 255;    //Load it into the vector unit    d = shortBuffer.v;    d = static_cast<vector signed short>(vec_splat(static_cast<vector signed short>(d),0));#ifndef PPC970    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );    vec_dst( inData, prefetchSize, 0 );    vec_dst( rightData, prefetchSize, 1 );#endif    for ( h=0; h<image.ysize; h++){      for (w=0; w<width; w++)        {#ifndef PPC970	  vec_dst( inData, prefetchSize, 0 );	  vec_dst( rightData, prefetchSize, 1 );#endif	  //interleaved U Y V Y chars	  //vec_mule UV * 2 to short vector U V U V shorts	  UVImage = static_cast<vector signed short>(vec_mule(one,inData[0]));	  UVRight = static_cast<vector signed short>(vec_mule(c,rightData[0]));	  //vec_mulo Y * 1 to short vector Y Y Y Y shorts	  YImage = static_cast<vector signed short>(vec_mulo(c,inData[0]));	  YRight = static_cast<vector signed short>(vec_mulo(c,rightData[0]));	  //vel_subs UV - 255	  UVRight = static_cast<vector signed short>(vec_subs(UVRight, d));	  //vec_adds UV	  UVTemp = vec_adds(UVImage,UVRight);	  //vec_adds Y	  YTemp = vec_adds(YImage,YRight);	  hiImage = vec_mergeh(UVTemp,YTemp);	  loImage = vec_mergel(UVTemp,YTemp);	  //vec_mergel + vec_mergeh Y and UV	  inData[0] = vec_packsu(hiImage, loImage);	  inData++;	  rightData++;        }#ifndef PPC970        vec_dss( 0 );//.........这里部分代码省略.........
开发者ID:avilleret,项目名称:Gem,代码行数:101,


示例20: ff_fdct_altivec

//.........这里部分代码省略.........    x1 = vec_add(b51, b31);    x2 = vec_add(b71, b31);    x3 = vec_add(b51, b11);    x8 = vec_add(x2, x3);    cnst = LD_W3;    x8 = vec_madd(cnst, x8, mzero);    cnst = LD_W8;    x0 = vec_madd(cnst, x0, mzero);    cnst = LD_W9;    x1 = vec_madd(cnst, x1, mzero);    cnst = LD_WA;    x2 = vec_madd(cnst, x2, x8);    cnst = LD_WB;    x3 = vec_madd(cnst, x3, x8);    cnst = LD_W4;    b71 = vec_madd(cnst, b71, x0);    cnst = LD_W5;    b51 = vec_madd(cnst, b51, x1);    cnst = LD_W6;    b31 = vec_madd(cnst, b31, x1);    cnst = LD_W7;    b11 = vec_madd(cnst, b11, x0);    b71 = vec_add(b71, x2);    b51 = vec_add(b51, x3);    b31 = vec_add(b31, x2);    b11 = vec_add(b11, x3);    /* }}} */    /* 8x8 matrix transpose (vector float[8][2]) {{{ */    x0 = vec_mergel(b00, b20);    x1 = vec_mergeh(b00, b20);    x2 = vec_mergel(b10, b30);    x3 = vec_mergeh(b10, b30);    b00 = vec_mergeh(x1, x3);    b10 = vec_mergel(x1, x3);    b20 = vec_mergeh(x0, x2);    b30 = vec_mergel(x0, x2);    x4 = vec_mergel(b41, b61);    x5 = vec_mergeh(b41, b61);    x6 = vec_mergel(b51, b71);    x7 = vec_mergeh(b51, b71);    b41 = vec_mergeh(x5, x7);    b51 = vec_mergel(x5, x7);    b61 = vec_mergeh(x4, x6);    b71 = vec_mergel(x4, x6);    x0 = vec_mergel(b01, b21);    x1 = vec_mergeh(b01, b21);    x2 = vec_mergel(b11, b31);    x3 = vec_mergeh(b11, b31);    x4 = vec_mergel(b40, b60);    x5 = vec_mergeh(b40, b60);    x6 = vec_mergel(b50, b70);    x7 = vec_mergeh(b50, b70);    b40 = vec_mergeh(x1, x3);    b50 = vec_mergel(x1, x3);    b60 = vec_mergeh(x0, x2);
开发者ID:AVbin,项目名称:libav,代码行数:67,


示例21: main

int main (){  vector float fa = {1.0, 2.0, 3.0, -4.0};  vector float fb = {-2.0, -3.0, -4.0, -5.0};  vector float fc = vec_cpsgn (fa, fb);  vector long long la = {5L, 14L};  vector long long lb = {3L, 86L};  vector long long lc = vec_and (la, lb);  vector bool long long ld = {0, -1};  vector long long le = vec_and (la, ld);  vector long long lf = vec_and (ld, lb);  vector unsigned long long ua = {5L, 14L};  vector unsigned long long ub = {3L, 86L};  vector unsigned long long uc = vec_and (ua, ub);  vector bool long long ud = {0, -1};  vector unsigned long long ue = vec_and (ua, ud);  vector unsigned long long uf = vec_and (ud, ub);  vector long long lg = vec_andc (la, lb);  vector long long lh = vec_andc (la, ld);  vector long long li = vec_andc (ld, lb);  vector unsigned long long ug = vec_andc (ua, ub);  vector unsigned long long uh = vec_andc (ua, ud);  vector unsigned long long ui = vec_andc (ud, ub);  vector double da = {1.0, -4.0};  vector double db = {-2.0, 5.0};  vector double dc = vec_cpsgn (da, db);  vector long long lj = vec_mergeh (la, lb);  vector long long lk = vec_mergeh (la, ld);  vector long long ll = vec_mergeh (ld, la);  vector unsigned long long uj = vec_mergeh (ua, ub);  vector unsigned long long uk = vec_mergeh (ua, ud);  vector unsigned long long ul = vec_mergeh (ud, ua);  vector long long lm = vec_mergel (la, lb);  vector long long ln = vec_mergel (la, ld);  vector long long lo = vec_mergel (ld, la);  vector unsigned long long um = vec_mergel (ua, ub);  vector unsigned long long un = vec_mergel (ua, ud);  vector unsigned long long uo = vec_mergel (ud, ua);  vector long long lp = vec_nor (la, lb);  vector long long lq = vec_nor (la, ld);  vector long long lr = vec_nor (ld, la);  vector unsigned long long up = vec_nor (ua, ub);  vector unsigned long long uq = vec_nor (ua, ud);  vector unsigned long long ur = vec_nor (ud, ua);  vector long long ls = vec_or (la, lb);  vector long long lt = vec_or (la, ld);  vector long long lu = vec_or (ld, la);  vector unsigned long long us = vec_or (ua, ub);  vector unsigned long long ut = vec_or (ua, ud);  vector unsigned long long uu = vec_or (ud, ua);  vector unsigned char ca = {0,4,8,1,5,9,2,6,10,3,7,11,15,12,14,13};  vector long long lv = vec_perm (la, lb, ca);  vector unsigned long long uv = vec_perm (ua, ub, ca);  vector long long lw = vec_sel (la, lb, lc);  vector long long lx = vec_sel (la, lb, uc);  vector long long ly = vec_sel (la, lb, ld);  vector unsigned long long uw = vec_sel (ua, ub, lc);  vector unsigned long long ux = vec_sel (ua, ub, uc);  vector unsigned long long uy = vec_sel (ua, ub, ld);  vector long long lz = vec_xor (la, lb);  vector long long l0 = vec_xor (la, ld);  vector long long l1 = vec_xor (ld, la);  vector unsigned long long uz = vec_xor (ua, ub);  vector unsigned long long u0 = vec_xor (ua, ud);  vector unsigned long long u1 = vec_xor (ud, ua);  int ia = vec_all_eq (ua, ub);  int ib = vec_all_ge (ua, ub);  int ic = vec_all_gt (ua, ub);  int id = vec_all_le (ua, ub);  int ie = vec_all_lt (ua, ub);  int ig = vec_all_ne (ua, ub);  int ih = vec_any_eq (ua, ub);  int ii = vec_any_ge (ua, ub);  int ij = vec_any_gt (ua, ub);  int ik = vec_any_le (ua, ub);  int il = vec_any_lt (ua, ub);  int im = vec_any_ne (ua, ub);  vector int sia = {9, 16, 25, 36};  vector int sib = {-8, -27, -64, -125};//.........这里部分代码省略.........
开发者ID:0day-ci,项目名称:gcc,代码行数:101,


示例22: test1

//.........这里部分代码省略.........// CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})// CHECK-LE: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})  res_vd = vec_madd(vd, vd, vd);// CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})// CHECK-LE: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})  /* vec_mergeh */  res_vsll = vec_mergeh(vsll, vsll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vsll = vec_mergeh(vsll, vbll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vsll = vec_mergeh(vbll, vsll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vull = vec_mergeh(vull, vull);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vull = vec_mergeh(vull, vbll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vull = vec_mergeh(vbll, vull);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  /* vec_mergel */  res_vsll = vec_mergel(vsll, vsll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vsll = vec_mergel(vsll, vbll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vsll = vec_mergel(vbll, vsll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vull = vec_mergel(vull, vull);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vull = vec_mergel(vull, vbll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  res_vull = vec_mergel(vbll, vull);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm  /* vec_msub */  res_vf = vec_msub(vf, vf, vf);// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>// CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}// CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>  res_vd = vec_msub(vd, vd, vd);// CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
开发者ID:AlexDenisov,项目名称:clang,代码行数:67,


示例23: processYUVAltivec

/* more optimized version - unrolled and load-hoisted */void pix_offset :: processYUVAltivec(imageStruct &image){  register int h,w,width,height;  width = image.xsize/16; //for altivec  height = image.ysize;  //format is U Y V Y  // start of working altivec function  union {    short       elements[8];    vector      signed short v;  } transferBuffer;  register vector signed short c, hi, lo;  register vector signed short hi1, lo1;  register vector signed short loadhi, loadhi1, loadlo, loadlo1;  register vector unsigned char zero = vec_splat_u8(0);  register vector unsigned char *inData = (vector unsigned char*) image.data;  //Write the pixel (pair) to the transfer buffer  //transferBuffer.i = (U << 24) | (Y << 16) | (V << 8 ) | Y;  transferBuffer.elements[0] = U;  transferBuffer.elements[1] = Y;  transferBuffer.elements[2] = V;  transferBuffer.elements[3] = Y;  transferBuffer.elements[4] = U;  transferBuffer.elements[5] = Y;  transferBuffer.elements[6] = V;  transferBuffer.elements[7] = Y;  //Load it into the vector unit  c = transferBuffer.v;#ifndef PPC970  UInt32                        prefetchSize = GetPrefetchConstant( 16, 1,      256 );  vec_dst( inData, prefetchSize, 0 );  vec_dst( inData+16, prefetchSize, 1 );  vec_dst( inData+32, prefetchSize, 2 );  vec_dst( inData+64, prefetchSize, 3 );#endif  //expand the UInt8's to short's  loadhi = (vector signed short) vec_mergeh( zero, inData[0] );  loadlo = (vector signed short) vec_mergel( zero, inData[0] );  loadhi1 = (vector signed short) vec_mergeh( zero, inData[1] );  loadlo1 = (vector signed short) vec_mergel( zero, inData[1] );  /  for ( h=0; h<height; h++) {    for (w=0; w<width; w++) {#ifndef PPC970      vec_dst( inData, prefetchSize, 0 );      vec_dst( inData+16, prefetchSize, 1 );      vec_dst( inData+32, prefetchSize, 2 );      vec_dst( inData+64, prefetchSize, 3 );#endif      //add the constant to it      hi = vec_add( loadhi, c );      lo = vec_add( loadlo, c );      hi1 = vec_add( loadhi1, c );      lo1 = vec_add( loadlo1, c );      //expand the UInt8's to short's      loadhi = (vector signed short) vec_mergeh( zero, inData[2] );      loadlo = (vector signed short) vec_mergel( zero, inData[2] );      loadhi1 = (vector signed short) vec_mergeh( zero, inData[3] );      loadlo1 = (vector signed short) vec_mergel( zero, inData[3] );      //pack the result back down, with saturation      inData[0] = vec_packsu( hi, lo );      inData++;      inData[0] = vec_packsu( hi1, lo1 );      inData++;    }  }  //  // finish the last iteration after the loop  //  hi = vec_add( loadhi, c );  lo = vec_add( loadlo, c );  hi1 = vec_add( loadhi1, c );  lo1 = vec_add( loadlo1, c );  //pack the result back down, with saturation  inData[0] = vec_packsu( hi, lo );//.........这里部分代码省略.........
开发者ID:megrimm,项目名称:Gem,代码行数:101,


示例24: dct_quantize_altivec

//.........这里部分代码省略.........            data7 = vec_max(vec_min(data7, max_q), min_q);        }        {        vector bool char zero_01, zero_23, zero_45, zero_67;        vector signed char scanIndexes_01, scanIndexes_23, scanIndexes_45, scanIndexes_67;        vector signed char negOne = vec_splat_s8(-1);        vector signed char* scanPtr =                (vector signed char*)(s->intra_scantable.inverse);        signed char lastNonZeroChar;        // Determine the largest non-zero index.        zero_01 = vec_pack(vec_cmpeq(data0, (vector signed short)zero),                vec_cmpeq(data1, (vector signed short)zero));        zero_23 = vec_pack(vec_cmpeq(data2, (vector signed short)zero),                vec_cmpeq(data3, (vector signed short)zero));        zero_45 = vec_pack(vec_cmpeq(data4, (vector signed short)zero),                vec_cmpeq(data5, (vector signed short)zero));        zero_67 = vec_pack(vec_cmpeq(data6, (vector signed short)zero),                vec_cmpeq(data7, (vector signed short)zero));        // 64 biggest values        scanIndexes_01 = vec_sel(scanPtr[0], negOne, zero_01);        scanIndexes_23 = vec_sel(scanPtr[1], negOne, zero_23);        scanIndexes_45 = vec_sel(scanPtr[2], negOne, zero_45);        scanIndexes_67 = vec_sel(scanPtr[3], negOne, zero_67);        // 32 largest values        scanIndexes_01 = vec_max(scanIndexes_01, scanIndexes_23);        scanIndexes_45 = vec_max(scanIndexes_45, scanIndexes_67);        // 16 largest values        scanIndexes_01 = vec_max(scanIndexes_01, scanIndexes_45);        // 8 largest values        scanIndexes_01 = vec_max(vec_mergeh(scanIndexes_01, negOne),                vec_mergel(scanIndexes_01, negOne));        // 4 largest values        scanIndexes_01 = vec_max(vec_mergeh(scanIndexes_01, negOne),                vec_mergel(scanIndexes_01, negOne));        // 2 largest values        scanIndexes_01 = vec_max(vec_mergeh(scanIndexes_01, negOne),                vec_mergel(scanIndexes_01, negOne));        // largest value        scanIndexes_01 = vec_max(vec_mergeh(scanIndexes_01, negOne),                vec_mergel(scanIndexes_01, negOne));        scanIndexes_01 = vec_splat(scanIndexes_01, 0);        vec_ste(scanIndexes_01, 0, &lastNonZeroChar);        lastNonZero = lastNonZeroChar;        // While the data is still in vectors we check for the transpose IDCT permute        // and handle it using the vector unit if we can.  This is the permute used        // by the altivec idct, so it is common when using the altivec dct.        if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) {            TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);        }        vec_st(data0, 0, data);        vec_st(data1, 16, data);        vec_st(data2, 32, data);        vec_st(data3, 48, data);        vec_st(data4, 64, data);        vec_st(data5, 80, data);        vec_st(data6, 96, data);        vec_st(data7, 112, data);        }    }    // special handling of block[0]    if (s->mb_intra) {        if (!s->h263_aic) {            if (n < 4)                oldBaseValue /= s->y_dc_scale;            else                oldBaseValue /= s->c_dc_scale;        }        // Divide by 8, rounding the result        data[0] = (oldBaseValue + 4) >> 3;    }    // We handled the transpose permutation above and we don't    // need to permute the "no" permutation case.    if ((lastNonZero > 0) &&        (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) &&        (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) {        ff_block_permute(data, s->dsp.idct_permutation,                s->intra_scantable.scantable, lastNonZero);    }    return lastNonZero;}
开发者ID:achellies,项目名称:camomile,代码行数:101,


示例25: PREFIX_h264_qpel16_hv_lowpass_altivec

/* this code assume stride % 16 == 0 *and* tmp is properly aligned */static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);  register int i;  const vector signed int vzero = vec_splat_s32(0);  const vector unsigned char permM2 = vec_lvsl(-2, src);  const vector unsigned char permM1 = vec_lvsl(-1, src);  const vector unsigned char permP0 = vec_lvsl(+0, src);  const vector unsigned char permP1 = vec_lvsl(+1, src);  const vector unsigned char permP2 = vec_lvsl(+2, src);  const vector unsigned char permP3 = vec_lvsl(+3, src);  const vector signed short v20ss = (const vector signed short)AVV(20);  const vector unsigned int v10ui = vec_splat_u32(10);  const vector signed short v5ss = vec_splat_s16(5);  const vector signed short v1ss = vec_splat_s16(1);  const vector signed int v512si = (const vector signed int)AVV(512);  const vector unsigned int v16ui = (const vector unsigned int)AVV(16);  register int align = ((((unsigned long)src) - 2) % 16);  src -= (2 * srcStride);  for (i = 0 ; i < 21 ; i ++) {    vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;    vector unsigned char srcR1 = vec_ld(-2, src);    vector unsigned char srcR2 = vec_ld(14, src);    switch (align) {    default: {      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = vec_perm(srcR1, srcR2, permP1);      srcP2 = vec_perm(srcR1, srcR2, permP2);      srcP3 = vec_perm(srcR1, srcR2, permP3);    } break;    case 11: {      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = vec_perm(srcR1, srcR2, permP1);      srcP2 = vec_perm(srcR1, srcR2, permP2);      srcP3 = srcR2;    } break;    case 12: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = vec_perm(srcR1, srcR2, permP1);      srcP2 = srcR2;      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 13: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = srcR2;      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 14: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = srcR2;      srcP1 = vec_perm(srcR2, srcR3, permP1);      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 15: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = srcR2;      srcP0 = vec_perm(srcR2, srcR3, permP0);      srcP1 = vec_perm(srcR2, srcR3, permP1);      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    }    const vector signed short srcP0A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0);    const vector signed short srcP0B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0);    const vector signed short srcP1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1);    const vector signed short srcP1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1);    const vector signed short srcP2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2);    const vector signed short srcP2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2);    const vector signed short srcP3A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3);    const vector signed short srcP3B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3);    const vector signed short srcM1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1);    const vector signed short srcM1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1);    const vector signed short srcM2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2);    const vector signed short srcM2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2);    const vector signed short sum1A = vec_adds(srcP0A, srcP1A);    const vector signed short sum1B = vec_adds(srcP0B, srcP1B);//.........这里部分代码省略.........
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,


示例26: PREFIX_h264_qpel16_v_lowpass_altivec

/* this code assume stride % 16 == 0 */static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);    register int i;  const vector signed int vzero = vec_splat_s32(0);  const vector unsigned char perm = vec_lvsl(0, src);  const vector signed short v20ss = (const vector signed short)AVV(20);  const vector unsigned short v5us = vec_splat_u16(5);  const vector signed short v5ss = vec_splat_s16(5);  const vector signed short v16ss = (const vector signed short)AVV(16);  const vector unsigned char dstperm = vec_lvsr(0, dst);  const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);    uint8_t *srcbis = src - (srcStride * 2);  const vector unsigned char srcM2a = vec_ld(0, srcbis);  const vector unsigned char srcM2b = vec_ld(16, srcbis);  const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm);  srcbis += srcStride;  const vector unsigned char srcM1a = vec_ld(0, srcbis);  const vector unsigned char srcM1b = vec_ld(16, srcbis);  const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm);  srcbis += srcStride;  const vector unsigned char srcP0a = vec_ld(0, srcbis);  const vector unsigned char srcP0b = vec_ld(16, srcbis);  const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm);  srcbis += srcStride;  const vector unsigned char srcP1a = vec_ld(0, srcbis);  const vector unsigned char srcP1b = vec_ld(16, srcbis);  const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm);  srcbis += srcStride;  const vector unsigned char srcP2a = vec_ld(0, srcbis);  const vector unsigned char srcP2b = vec_ld(16, srcbis);  const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm);  srcbis += srcStride;  vector signed short srcM2ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2);  vector signed short srcM2ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2);  vector signed short srcM1ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1);  vector signed short srcM1ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1);  vector signed short srcP0ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0);  vector signed short srcP0ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0);  vector signed short srcP1ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1);  vector signed short srcP1ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1);  vector signed short srcP2ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2);  vector signed short srcP2ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2);  for (i = 0 ; i < 16 ; i++) {    const vector unsigned char srcP3a = vec_ld(0, srcbis);    const vector unsigned char srcP3b = vec_ld(16, srcbis);    const vector unsigned char srcP3 = vec_perm(srcP3a, srcP3b, perm);    const vector signed short srcP3ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3);    const vector signed short srcP3ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3);    srcbis += srcStride;    const vector signed short sum1A = vec_adds(srcP0ssA, srcP1ssA);    const vector signed short sum1B = vec_adds(srcP0ssB, srcP1ssB);    const vector signed short sum2A = vec_adds(srcM1ssA, srcP2ssA);    const vector signed short sum2B = vec_adds(srcM1ssB, srcP2ssB);    const vector signed short sum3A = vec_adds(srcM2ssA, srcP3ssA);    const vector signed short sum3B = vec_adds(srcM2ssB, srcP3ssB);    srcM2ssA = srcM1ssA;    srcM2ssB = srcM1ssB;    srcM1ssA = srcP0ssA;    srcM1ssB = srcP0ssB;    srcP0ssA = srcP1ssA;    srcP0ssB = srcP1ssB;    srcP1ssA = srcP2ssA;    srcP1ssB = srcP2ssB;    srcP2ssA = srcP3ssA;    srcP2ssB = srcP3ssB;        const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss);    const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss);    const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);    const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);        const vector signed short pp3A = vec_add(sum3A, pp1A);    const vector signed short pp3B = vec_add(sum3B, pp1B);    const vector signed short psumA = vec_sub(pp3A, pp2A);    const vector signed short psumB = vec_sub(pp3B, pp2B);    const vector signed short sumA = vec_sra(psumA, v5us);    const vector signed short sumB = vec_sra(psumB, v5us);    const vector unsigned char sum = vec_packsu(sumA, sumB);    const vector unsigned char dst1 = vec_ld(0, dst);    const vector unsigned char dst2 = vec_ld(16, dst);    const vector unsigned char vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));    vector unsigned char fsum;    OP_U8_ALTIVEC(fsum, sum, vdst);//.........这里部分代码省略.........
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,


示例27: PREFIX_h264_qpel16_hv_lowpass_altivec

//.........这里部分代码省略.........            srcP2 = srcR2;            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 13: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = srcR2;            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 14: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = srcR2;            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 15: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = srcR2;            srcP0 = vec_perm(srcR2, srcR3, permP0);            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        }        srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);        srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);        srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);        srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);        srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);        srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);        srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);        srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);        srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);        srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);        srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2);        sum1A = vec_adds(srcP0A, srcP1A);        sum1B = vec_adds(srcP0B, srcP1B);        sum2A = vec_adds(srcM1A, srcP2A);        sum2B = vec_adds(srcM1B, srcP2B);        sum3A = vec_adds(srcM2A, srcP3A);        sum3B = vec_adds(srcM2B, srcP3B);        pp1A = vec_mladd(sum1A, v20ss, sum3A);        pp1B = vec_mladd(sum1B, v20ss, sum3B);        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);        psumA = vec_sub(pp1A, pp2A);        psumB = vec_sub(pp1B, pp2B);        vec_st(psumA, 0, tmp);        vec_st(psumB, 16, tmp);
开发者ID:AVbin,项目名称:libav,代码行数:66,


示例28: put_no_rnd_pixels16_xy2_altivec

/* next one assumes that ((line_size % 16) == 0) */static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h){    register int i;    register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;    register vector unsigned char blockv, temp1, temp2;    register vector unsigned short temp3, temp4,        pixelssum1, pixelssum2, pixelssum3, pixelssum4;    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);    register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);    temp1 = vec_ld(0, pixels);    temp2 = vec_ld(16, pixels);    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {        pixelsv2 = temp2;    } else {        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));    }    pixelsv3 = vec_mergel(vczero, pixelsv1);    pixelsv4 = vec_mergel(vczero, pixelsv2);    pixelsv1 = vec_mergeh(vczero, pixelsv1);    pixelsv2 = vec_mergeh(vczero, pixelsv2);    pixelssum3 = vec_add((vector unsigned short)pixelsv3,                         (vector unsigned short)pixelsv4);    pixelssum3 = vec_add(pixelssum3, vcone);    pixelssum1 = vec_add((vector unsigned short)pixelsv1,                         (vector unsigned short)pixelsv2);    pixelssum1 = vec_add(pixelssum1, vcone);    for (i = 0; i < h ; i++) {        blockv = vec_ld(0, block);        temp1 = vec_ld(line_size, pixels);        temp2 = vec_ld(line_size + 16, pixels);        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {            pixelsv2 = temp2;        } else {            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));        }        pixelsv3 = vec_mergel(vczero, pixelsv1);        pixelsv4 = vec_mergel(vczero, pixelsv2);        pixelsv1 = vec_mergeh(vczero, pixelsv1);        pixelsv2 = vec_mergeh(vczero, pixelsv2);        pixelssum4 = vec_add((vector unsigned short)pixelsv3,                             (vector unsigned short)pixelsv4);        pixelssum2 = vec_add((vector unsigned short)pixelsv1,                             (vector unsigned short)pixelsv2);        temp4 = vec_add(pixelssum3, pixelssum4);        temp4 = vec_sra(temp4, vctwo);        temp3 = vec_add(pixelssum1, pixelssum2);        temp3 = vec_sra(temp3, vctwo);        pixelssum3 = vec_add(pixelssum4, vcone);        pixelssum1 = vec_add(pixelssum2, vcone);        blockv = vec_packsu(temp3, temp4);        vec_st(blockv, 0, block);        block += line_size;        pixels += line_size;    }}
开发者ID:AVLeo,项目名称:libav,代码行数:68,


示例29: PREFIX_h264_qpel16_h_lowpass_altivec

static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {    register int i;    LOAD_ZERO;    const vec_u8 permM2 = vec_lvsl(-2, src);    const vec_u8 permM1 = vec_lvsl(-1, src);    const vec_u8 permP0 = vec_lvsl(+0, src);    const vec_u8 permP1 = vec_lvsl(+1, src);    const vec_u8 permP2 = vec_lvsl(+2, src);    const vec_u8 permP3 = vec_lvsl(+3, src);    const vec_s16 v5ss = vec_splat_s16(5);    const vec_u16 v5us = vec_splat_u16(5);    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));    vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;    register int align = ((((unsigned long)src) - 2) % 16);    vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,              srcP2A, srcP2B, srcP3A, srcP3B,              srcM1A, srcM1B, srcM2A, srcM2B,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,              pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,              psumA, psumB, sumA, sumB;    vec_u8 sum, fsum;    for (i = 0 ; i < 16 ; i ++) {        vec_u8 srcR1 = vec_ld(-2, src);        vec_u8 srcR2 = vec_ld(14, src);        switch (align) {        default: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = vec_perm(srcR1, srcR2, permP3);        } break;        case 11: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = srcR2;        } break;        case 12: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = srcR2;            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 13: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = srcR2;            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 14: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = srcR2;            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 15: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = srcR2;            srcP0 = vec_perm(srcR2, srcR3, permP0);            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        }        srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);        srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);        srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);        srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);        srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);        srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);        srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);        srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);        srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);        srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例30: PREFIX_h264_qpel16_h_lowpass_altivec

/* this code assume stride % 16 == 0 */static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);  register int i;    const vector signed int vzero = vec_splat_s32(0);  const vector unsigned char permM2 = vec_lvsl(-2, src);  const vector unsigned char permM1 = vec_lvsl(-1, src);  const vector unsigned char permP0 = vec_lvsl(+0, src);  const vector unsigned char permP1 = vec_lvsl(+1, src);  const vector unsigned char permP2 = vec_lvsl(+2, src);  const vector unsigned char permP3 = vec_lvsl(+3, src);  const vector signed short v20ss = (const vector signed short)AVV(20);  const vector unsigned short v5us = vec_splat_u16(5);  const vector signed short v5ss = vec_splat_s16(5);  const vector signed short v16ss = (const vector signed short)AVV(16);  const vector unsigned char dstperm = vec_lvsr(0, dst);  const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);  register int align = ((((unsigned long)src) - 2) % 16);  for (i = 0 ; i < 16 ; i ++) {    vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;    vector unsigned char srcR1 = vec_ld(-2, src);    vector unsigned char srcR2 = vec_ld(14, src);    switch (align) {    default: {      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = vec_perm(srcR1, srcR2, permP1);      srcP2 = vec_perm(srcR1, srcR2, permP2);      srcP3 = vec_perm(srcR1, srcR2, permP3);    } break;    case 11: {      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = vec_perm(srcR1, srcR2, permP1);      srcP2 = vec_perm(srcR1, srcR2, permP2);      srcP3 = srcR2;    } break;    case 12: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = vec_perm(srcR1, srcR2, permP1);      srcP2 = srcR2;      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 13: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = srcR2;      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 14: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = srcR2;      srcP1 = vec_perm(srcR2, srcR3, permP1);      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 15: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = srcR2;      srcP0 = vec_perm(srcR2, srcR3, permP0);      srcP1 = vec_perm(srcR2, srcR3, permP1);      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    }    const vector signed short srcP0A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0);    const vector signed short srcP0B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0);    const vector signed short srcP1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1);    const vector signed short srcP1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1);    const vector signed short srcP2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2);    const vector signed short srcP2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2);    const vector signed short srcP3A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3);    const vector signed short srcP3B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3);    const vector signed short srcM1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1);    const vector signed short srcM1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1);    const vector signed short srcM2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2);    const vector signed short srcM2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2);    const vector signed short sum1A = vec_adds(srcP0A, srcP1A);    const vector signed short sum1B = vec_adds(srcP0B, srcP1B);//.........这里部分代码省略.........
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,



注:本文中的vec_mergel函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vec_new函数代码示例
C++ vec_madd函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。