这篇教程C++ vec_mergel函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中vec_mergel函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_mergel函数的具体用法?C++ vec_mergel怎么用?C++ vec_mergel使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了vec_mergel函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: vector_fmul_reverse_altivecstatic void vector_fmul_reverse_altivec(float *dst, const float *src0, const float *src1, int len){ int i; vector float d, s0, s1, h0, l0, s2, s3, zero = (vector float)vec_splat_u32(0); src1 += len-4; for(i=0; i<len-7; i+=8) { s1 = vec_ld(0, src1-i); // [a,b,c,d] s0 = vec_ld(0, src0+i); l0 = vec_mergel(s1, s1); // [c,c,d,d] s3 = vec_ld(-16, src1-i); h0 = vec_mergeh(s1, s1); // [a,a,b,b] s2 = vec_ld(16, src0+i); s1 = vec_mergeh(vec_mergel(l0,h0), // [d,b,d,b] vec_mergeh(l0,h0)); // [c,a,c,a] // [d,c,b,a] l0 = vec_mergel(s3, s3); d = vec_madd(s0, s1, zero); h0 = vec_mergeh(s3, s3); vec_st(d, 0, dst+i); s3 = vec_mergeh(vec_mergel(l0,h0), vec_mergeh(l0,h0)); d = vec_madd(s2, s3, zero); vec_st(d, 16, dst+i); }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:27,
示例2: teststatic void test(){ /* Input vectors. */ vector long vla = {-2,-1}; vector long vlb = {0,1}; vector double vda = {-2.0,-1.0}; vector double vdb = {0.0,1.0}; /* Result vectors. */ vector long vlh, vll; vector double vdh, vdl; /* Expected result vectors. */#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ vector long vlrh = {1,-1}; vector long vlrl = {0,-2}; vector double vdrh = {1.0,-1.0}; vector double vdrl = {0.0,-2.0};#else vector long vlrh = {-2,0}; vector long vlrl = {-1,1}; vector double vdrh = {-2.0,0.0}; vector double vdrl = {-1.0,1.0};#endif vlh = vec_mergeh (vla, vlb); vll = vec_mergel (vla, vlb); vdh = vec_mergeh (vda, vdb); vdl = vec_mergel (vda, vdb); check (vec_long_eq (vlh, vlrh), "vlh"); check (vec_long_eq (vll, vlrl), "vll"); check (vec_double_eq (vdh, vdrh), "vdh" ); check (vec_double_eq (vdl, vdrl), "vdl" );}
开发者ID:Zex,项目名称:gcc,代码行数:35,
示例3: float_to_int16_interleave_altivecstatic voidfloat_to_int16_interleave_altivec(int16_t *dst, const float **src, long len, int channels){ int i; vector signed short d0, d1, d2, c0, c1, t0, t1; vector unsigned char align; if(channels == 1) float_to_int16_altivec(dst, src[0], len); else if (channels == 2) { if(((long)dst) & 15) for(i = 0; i < len - 7; i += 8) { d0 = vec_ld(0, dst + i); t0 = float_to_int16_one_altivec(src[0] + i); d1 = vec_ld(31, dst + i); t1 = float_to_int16_one_altivec(src[1] + i); c0 = vec_mergeh(t0, t1); c1 = vec_mergel(t0, t1); d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); align = vec_lvsr(0, dst + i); d0 = vec_perm(d2, c0, align); d1 = vec_perm(c0, c1, align); vec_st(d0, 0, dst + i); d0 = vec_perm(c1, d2, align); vec_st(d1, 15, dst + i); vec_st(d0, 31, dst + i); dst += 8; } else for(i = 0; i < len - 7; i += 8) { t0 = float_to_int16_one_altivec(src[0] + i); t1 = float_to_int16_one_altivec(src[1] + i); d0 = vec_mergeh(t0, t1); d1 = vec_mergel(t0, t1); vec_st(d0, 0, dst + i); vec_st(d1, 16, dst + i); dst += 8; } } else { DECLARE_ALIGNED(16, int16_t, tmp)[len]; int c, j; for (c = 0; c < channels; c++) { float_to_int16_altivec(tmp, src[c], len); for (i = 0, j = c; i < len; i++, j += channels) { dst[j] = tmp[i]; } } }}
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:56,
示例4: v_reduce_sum4inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c, const v_float32x4& d){ vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val)); ac = vec_add(ac, vec_sld(ac, ac, 8)); vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val)); bd = vec_add(bd, vec_sld(bd, bd, 8)); return v_float32x4(vec_mergeh(ac, bd));}
开发者ID:ArkaJU,项目名称:opencv,代码行数:10,
示例5: teststatic void test(){ /* Input vectors. */ vector long long vla = {-2,-1}; vector long long vlb = {0,1}; vector double vda = {-2.0,-1.0}; vector double vdb = {0.0,1.0}; vector unsigned int vuia = {0,1,2,3}; vector unsigned int vuib = {4,5,6,7}; vector signed int vsia = {-4,-3,-2,-1}; vector signed int vsib = {0,1,2,3}; vector float vfa = {-4.0,-3.0,-2.0,-1.0}; vector float vfb = {0.0,1.0,2.0,3.0}; /* Result vectors. */ vector long long vlh, vll; vector double vdh, vdl; vector unsigned int vuih, vuil; vector signed int vsih, vsil; vector float vfh, vfl; /* Expected result vectors. */ vector long long vlrh = {-2,0}; vector long long vlrl = {-1,1}; vector double vdrh = {-2.0,0.0}; vector double vdrl = {-1.0,1.0}; vector unsigned int vuirh = {0,4,1,5}; vector unsigned int vuirl = {2,6,3,7}; vector signed int vsirh = {-4,0,-3,1}; vector signed int vsirl = {-2,2,-1,3}; vector float vfrh = {-4.0,0.0,-3.0,1.0}; vector float vfrl = {-2.0,2.0,-1.0,3.0}; vlh = vec_mergeh (vla, vlb); vll = vec_mergel (vla, vlb); vdh = vec_mergeh (vda, vdb); vdl = vec_mergel (vda, vdb); vuih = vec_mergeh (vuia, vuib); vuil = vec_mergel (vuia, vuib); vsih = vec_mergeh (vsia, vsib); vsil = vec_mergel (vsia, vsib); vfh = vec_mergeh (vfa, vfb ); vfl = vec_mergel (vfa, vfb ); check (vec_long_long_eq (vlh, vlrh), "vlh"); check (vec_long_long_eq (vll, vlrl), "vll"); check (vec_double_eq (vdh, vdrh), "vdh" ); check (vec_double_eq (vdl, vdrl), "vdl" ); check (vec_all_eq (vuih, vuirh), "vuih"); check (vec_all_eq (vuil, vuirl), "vuil"); check (vec_all_eq (vsih, vsirh), "vsih"); check (vec_all_eq (vsil, vsirl), "vsil"); check (vec_all_eq (vfh, vfrh), "vfh"); check (vec_all_eq (vfl, vfrl), "vfl");}
开发者ID:0day-ci,项目名称:gcc,代码行数:55,
示例6: processRGBA_Altivecvoid pix_diff :: processRGBA_Altivec(imageStruct &image, imageStruct &right){ int datasize = image.xsize * image.ysize / 4; vector signed short hiImage, loImage, hiRight, loRight; vector unsigned char zero = vec_splat_u8(0); vector unsigned char *inData = (vector unsigned char *)image.data; vector unsigned char *rightData = (vector unsigned char *)right.data; #ifndef PPC970 UInt32 prefetchSize = GetPrefetchConstant( 16, 1, 256 ); vec_dst( inData, prefetchSize, 0 ); vec_dst( rightData, prefetchSize, 1 ); vec_dst( inData+256, prefetchSize, 2 ); vec_dst( rightData+256, prefetchSize, 3 ); #endif do { #ifndef PPC970 vec_dst( inData, prefetchSize, 0 ); vec_dst( rightData, prefetchSize, 1 ); vec_dst( inData+256, prefetchSize, 2 ); vec_dst( rightData+256, prefetchSize, 3 ); #endif hiImage = (vector signed short)vec_mergeh(zero,inData[0]); loImage = (vector signed short)vec_mergel(zero,inData[0]); hiRight = (vector signed short)vec_mergeh(zero,rightData[0]); loRight = (vector signed short)vec_mergel(zero,rightData[0]); hiImage = vec_subs(hiImage,hiRight); loImage = vec_subs(loImage,loRight); hiImage = vec_abs(hiImage); loImage = vec_abs(loImage); inData[0] = vec_packsu(hiImage,loImage); inData++; rightData++; } while (--datasize); #ifndef PPC970 vec_dss( 0 ); vec_dss( 1 ); vec_dss( 2 ); vec_dss( 3 ); #endif}
开发者ID:avilleret,项目名称:Gem,代码行数:50,
示例7: foovoid foo (vector bool long long *vblr, vector double *vdr, vector unsigned long long *vullz, vector double *vdz, vector bool char *vbcz, vector signed char *vscz, vector unsigned char *vucz, vector bool int *vbiz, vector int *viz, vector unsigned int *vuiz, vector signed long long int *vslliz, vector bool short int *vbsiz, vector signed short int *vssiz, vector unsigned short int *vusiz, vector float *vfz){ *vblr++ = vec_andc (vbla, vblb); *vdr++ = vec_double (vslla); *vdr++ = vec_double (vulla); *vblr++ = vec_mergeh (vbla, vblb); *vblr++ = vec_mergel (vbla, vblb); *vblr++ = vec_nor (vbla, vblb); *vblr++ = vec_or (vbla, vblb); *vblr++ = vec_sel (vbla, vblb, vblc); *vblr++ = vec_sel (vbla, vblb, vullc); *vblr++ = vec_xor (vbla, vblb); *vullz++ = vec_sel (vulla, vullb, vbllc); *vullz++ = vec_sel (vulla, vullb, vullc); *vdz++ = vec_sel(vda, vdb, vullc); *vbcz++ = vec_sel (vbca, vbcb, vbcc); *vbcz++ = vec_sel (vbca, vbcb, vucc); *vbcz++ = vec_xor (vbca, vbcb); *vscz++ = vec_sel (vsca, vscb, vbcc); *vscz++ = vec_sel (vsca, vscb, vucc); *vucz++ = vec_sel (vuca, vucb, vbcc); *vucz++ = vec_sel (vuca, vucb, vucc); *vbiz++ = vec_sel (vbia, vbib, vbic); *vbiz++ = vec_sel (vbia, vbib, vuic); *vbiz++ = vec_xor (vbia, vbib); *viz++ = vec_sel (vsia, vsib, vbic); *viz++ = vec_sel (vsia, vsib, vuic); *vuiz++ = vec_sel (vuia, vuib, vbic); *vuiz++ = vec_sel (vuia, vuib, vuic); *vslliz++ = vec_sel(vslla, vsllb, vbllc); *vslliz++ = vec_sel(vslla, vsllb, vullc); *vssiz++ = vec_sel(vssia, vssib, vbsic); *vssiz++ = vec_sel(vssia, vssib, vusic); *vusiz++ = vec_sel(vusia, vusib, vbsic); *vusiz++ = vec_sel(vusia, vusib, vusic); *vbsiz++ = vec_sel (vbsia, vbsib, vbsic); *vbsiz++ = vec_sel (vbsia, vbsib, vusic); *vbsiz++ = vec_xor (vbsia, vbsib); *vdz++ = vec_sel (vda, vdb, vbllc); *vfz++ = vec_sel (vfa, vfb, vbic); *vfz++ = vec_sel (vfa, vfb, vuic); *vfz++ = vec_xor (vfa, vfb);}
开发者ID:vinriviere,项目名称:m68k-atari-mint-gcc,代码行数:59,
示例8: float_to_int16_interleave_altivecstatic void float_to_int16_interleave_altivec(int16_t *dst, const float **src, long len, int channels){ int i; vector signed short d0, d1, d2, c0, c1, t0, t1; vector unsigned char align; if (channels == 1) float_to_int16_altivec(dst, src[0], len); else { if (channels == 2) { if (((long)dst) & 15) { for (i = 0; i < len - 7; i += 8) { d0 = vec_ld(0, dst + i); t0 = float_to_int16_one_altivec(src[0] + i); d1 = vec_ld(31, dst + i); t1 = float_to_int16_one_altivec(src[1] + i); c0 = vec_mergeh(t0, t1); c1 = vec_mergel(t0, t1); d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); align = vec_lvsr(0, dst + i); d0 = vec_perm(d2, c0, align); d1 = vec_perm(c0, c1, align); vec_st(d0, 0, dst + i); d0 = vec_perm(c1, d2, align); vec_st(d1, 15, dst + i); vec_st(d0, 31, dst + i); dst += 8; } } else { for (i = 0; i < len - 7; i += 8) { t0 = float_to_int16_one_altivec(src[0] + i); t1 = float_to_int16_one_altivec(src[1] + i); d0 = vec_mergeh(t0, t1); d1 = vec_mergel(t0, t1); vec_st(d0, 0, dst + i); vec_st(d1, 16, dst + i); dst += 8; } } } else { for (i = 0; i < channels; i++) float_to_int16_stride_altivec(dst + i, src[i], len, channels); } }}
开发者ID:AronVietti,项目名称:FFmpeg,代码行数:46,
示例9: OSX_AudioIOProc16Bit_Altivecstatic void OSX_AudioIOProc16Bit_Altivec(SInt16 *myInBuffer, float *myOutBuffer){ register UInt32 i; float f = SOUND_BUFFER_SCALE_16BIT; const vector float gain = vec_load_ps1(&f); // multiplier const vector float mix = vec_setzero(); if (gBufferMono2Stereo) { int j=0; // TEST: OK for (i=0;i<SOUND_BUFFER_SIZE;i+=8, j+=16) { vector short int v0 = vec_ld(0, myInBuffer + i); // Load 8 shorts vector float v1 = vec_ctf((vector signed int)vec_unpackh(v0), 0); // convert to float vector float v2 = vec_ctf((vector signed int)vec_unpackl(v0), 0); // convert to float vector float v3 = vec_madd(v1, gain, mix); // scale vector float v4 = vec_madd(v2, gain, mix); // scale vector float v5 = vec_mergel(v3, v3); // v3(0,0,1,1); vector float v6 = vec_mergeh(v3, v3); // v3(2,2,3,3); vector float v7 = vec_mergel(v4, v4); // v4(0,0,1,1); vector float v8 = vec_mergeh(v4, v4); // v4(2,2,3,3); vec_st(v5, 0, myOutBuffer + j); // Store 4 floats vec_st(v6, 0, myOutBuffer + 4 + j); // Store 4 floats vec_st(v7, 0, myOutBuffer + 8 + j); // Store 4 floats vec_st(v8, 0, myOutBuffer + 12 + j); // Store 4 floats } } else { // TEST: OK for (i=0;i<SOUND_BUFFER_SIZE;i+=8) { vector short int v0 = vec_ld(0, myInBuffer + i); // Load 8 shorts vector float v1 = vec_ctf((vector signed int)vec_unpackh(v0), 0); // convert to float vector float v2 = vec_ctf((vector signed int)vec_unpackl(v0), 0); // convert to float vector float v3 = vec_madd(v1, gain, mix); // scale vector float v4 = vec_madd(v2, gain, mix); // scale vec_st(v3, 0, myOutBuffer + i); // Store 4 floats vec_st(v4, 0, myOutBuffer + 4 + i); // Store 4 floats } }}
开发者ID:LighFusion,项目名称:surreal,代码行数:46,
示例10: pix_multiplystatic force_inline vector unsigned intpix_multiply (vector unsigned int p, vector unsigned int a){ vector unsigned short hi, lo, mod; /* unpack to short */ hi = (vector unsigned short) vec_mergeh ((vector unsigned char)AVV (0), (vector unsigned char)p); mod = (vector unsigned short) vec_mergeh ((vector unsigned char)AVV (0), (vector unsigned char)a); hi = vec_mladd (hi, mod, (vector unsigned short) AVV (0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080)); hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); hi = vec_sr (hi, vec_splat_u16 (8)); /* unpack to short */ lo = (vector unsigned short) vec_mergel ((vector unsigned char)AVV (0), (vector unsigned char)p); mod = (vector unsigned short) vec_mergel ((vector unsigned char)AVV (0), (vector unsigned char)a); lo = vec_mladd (lo, mod, (vector unsigned short) AVV (0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080)); lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); lo = vec_sr (lo, vec_splat_u16 (8)); return (vector unsigned int)vec_packsu (hi, lo);}
开发者ID:1833183060,项目名称:wke,代码行数:40,
示例11: v_store_interleave_f32void v_store_interleave_f32(float *ptr, vector float a, vector float b, vector float c){ vector float hbc = vec_mergeh(b, c); static const vector unsigned char ahbc = {0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 4, 5, 6, 7}; vec_xst(vec_perm(a, hbc, ahbc), 0, ptr); vector float lab = vec_mergel(a, b); vec_xst(vec_sld(lab, hbc, 8), 16, ptr); static const vector unsigned char clab = {8, 9, 10, 11, 24, 25, 26, 27, 28, 29, 30, 31, 12, 13, 14, 15}; vec_xst(vec_perm(c, lab, clab), 32, ptr);}
开发者ID:MaxKellermann,项目名称:gcc,代码行数:13,
示例12: predict_16x16_p_altivecstatic void predict_16x16_p_altivec( uint8_t *src ){ int16_t a, b, c, i; int H = 0; int V = 0; int16_t i00; for( i = 1; i <= 8; i++ ) { H += i * ( src[7+i - FDEC_STRIDE ] - src[7-i - FDEC_STRIDE ] ); V += i * ( src[(7+i)*FDEC_STRIDE -1] - src[(7-i)*FDEC_STRIDE -1] ); } a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] ); b = ( 5 * H + 32 ) >> 6; c = ( 5 * V + 32 ) >> 6; i00 = a - b * 7 - c * 7 + 16; vect_sshort_u i00_u, b_u, c_u; i00_u.s[0] = i00; b_u.s[0] = b; c_u.s[0] = c; vec_u16_t val5_v = vec_splat_u16(5); vec_s16_t i00_v, b_v, c_v; i00_v = vec_splat(i00_u.v, 0); b_v = vec_splat(b_u.v, 0); c_v = vec_splat(c_u.v, 0); vec_s16_t induc_v = (vec_s16_t) CV(0, 1, 2, 3, 4, 5, 6, 7); vec_s16_t b8_v = vec_sl(b_v, vec_splat_u16(3)); vec_s32_t mule_b_v = vec_mule(induc_v, b_v); vec_s32_t mulo_b_v = vec_mulo(induc_v, b_v); vec_s16_t mul_b_induc0_v = vec_pack(vec_mergeh(mule_b_v, mulo_b_v), vec_mergel(mule_b_v, mulo_b_v)); vec_s16_t add_i0_b_0v = vec_adds(i00_v, mul_b_induc0_v); vec_s16_t add_i0_b_8v = vec_adds(b8_v, add_i0_b_0v); int y; for( y = 0; y < 16; y++ ) { vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v); vec_s16_t shift_8_v = vec_sra(add_i0_b_8v, val5_v); vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_8_v); vec_st( com_sat_v, 0, &src[0]); src += FDEC_STRIDE; i00 += c; add_i0_b_0v = vec_adds(add_i0_b_0v, c_v); add_i0_b_8v = vec_adds(add_i0_b_8v, c_v); }}
开发者ID:UIKit0,项目名称:H.264-in-CUDA,代码行数:50,
示例13: foovoid foo (vector bool long long *vblr, vector double *vdr){ *vblr++ = vec_andc (vbla, vblb); *vdr++ = vec_double (vsla); *vdr++ = vec_double (vula); *vblr++ = vec_mergeh (vbla, vblb); *vblr++ = vec_mergel (vbla, vblb); *vblr++ = vec_nor (vbla, vblb); *vblr++ = vec_or (vbla, vblb); *vblr++ = vec_sel (vbla, vblb, vblc); *vblr++ = vec_sel (vbla, vblb, vulc); *vblr++ = vec_xor (vbla, vblb);}
开发者ID:0day-ci,项目名称:gcc,代码行数:14,
示例14: main/* Place the content of the array of structures in vectors x_vec, y_vec, z_vec, and t_vec */int main(int argc, char **argv) { vector float x_vec, y_vec, z_vec, t_vec, hold[4], tmp[4]; /* Load structures into vectors */ hold[0] = vec_ld(0, (float*)p_motion); hold[1] = vec_ld(0, (float*)&p_motion[1]); hold[2] = vec_ld(0, (float*)&p_motion[2]); hold[3] = vec_ld(0, (float*)&p_motion[3]); /* Perform first step of the swizzle */ tmp[0] = vec_mergeh(hold[0], hold[2]); tmp[1] = vec_mergeh(hold[1], hold[3]); tmp[2] = vec_mergel(hold[0], hold[2]); tmp[3] = vec_mergel(hold[1], hold[3]); /* Perform second step of the swizzle */ x_vec = vec_mergeh(tmp[0], tmp[1]); y_vec = vec_mergel(tmp[0], tmp[1]); z_vec = vec_mergeh(tmp[2], tmp[3]); t_vec = vec_mergel(tmp[2], tmp[3]); return 0;}
开发者ID:pstrinkle,项目名称:misc-umbc,代码行数:26,
示例15: a52_resample_STEREO_to_2_altivecstatic int a52_resample_STEREO_to_2_altivec(float * _f, int16_t * s16){#if 0 int i; int32_t * f = (int32_t *) _f; for (i = 0; i < 256; i++) { s16[2*i] = convert (f[i]); s16[2*i+1] = convert (f[i+256]); } return 2*256;#else int i = 0; int32_t * f = (int32_t *) _f; register vector signed int f0, f4, f256, f260; register vector signed short reven, rodd, r0, r1; for (i = 0; i < 256; i+= 8) { f0 = vec_ld(0, f); f4 = vec_ld(16, f); f256 = vec_ld(1024, f); f260 = vec_ld(1040, f); reven = convert16_altivec(f0, f4); rodd = convert16_altivec(f256, f260); r0 = vec_mergeh(reven, rodd); r1 = vec_mergel(reven, rodd); // FIXME can be merged to spare some I/O unaligned_store(r0, 0, s16); unaligned_store(r1, 16, s16); f += 8; s16 += 16; } return(2*256);#endif}
开发者ID:dr4g0nsr,项目名称:mplayer-skyviia-8860,代码行数:37,
示例16: PREFIX_h264_qpel16_v_lowpass_altivecstatic void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { register int i; LOAD_ZERO; const vec_u8 perm = vec_lvsl(0, src); const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); const vec_u16 v5us = vec_splat_u16(5); const vec_s16 v5ss = vec_splat_s16(5); const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4)); uint8_t *srcbis = src - (srcStride * 2); const vec_u8 srcM2a = vec_ld(0, srcbis); const vec_u8 srcM2b = vec_ld(16, srcbis); const vec_u8 srcM2 = vec_perm(srcM2a, srcM2b, perm); //srcbis += srcStride; const vec_u8 srcM1a = vec_ld(0, srcbis += srcStride); const vec_u8 srcM1b = vec_ld(16, srcbis); const vec_u8 srcM1 = vec_perm(srcM1a, srcM1b, perm); //srcbis += srcStride; const vec_u8 srcP0a = vec_ld(0, srcbis += srcStride); const vec_u8 srcP0b = vec_ld(16, srcbis); const vec_u8 srcP0 = vec_perm(srcP0a, srcP0b, perm); //srcbis += srcStride; const vec_u8 srcP1a = vec_ld(0, srcbis += srcStride); const vec_u8 srcP1b = vec_ld(16, srcbis); const vec_u8 srcP1 = vec_perm(srcP1a, srcP1b, perm); //srcbis += srcStride; const vec_u8 srcP2a = vec_ld(0, srcbis += srcStride); const vec_u8 srcP2b = vec_ld(16, srcbis); const vec_u8 srcP2 = vec_perm(srcP2a, srcP2b, perm); //srcbis += srcStride; vec_s16 srcM2ssA = (vec_s16) vec_mergeh(zero_u8v, srcM2); vec_s16 srcM2ssB = (vec_s16) vec_mergel(zero_u8v, srcM2); vec_s16 srcM1ssA = (vec_s16) vec_mergeh(zero_u8v, srcM1); vec_s16 srcM1ssB = (vec_s16) vec_mergel(zero_u8v, srcM1); vec_s16 srcP0ssA = (vec_s16) vec_mergeh(zero_u8v, srcP0); vec_s16 srcP0ssB = (vec_s16) vec_mergel(zero_u8v, srcP0); vec_s16 srcP1ssA = (vec_s16) vec_mergeh(zero_u8v, srcP1); vec_s16 srcP1ssB = (vec_s16) vec_mergel(zero_u8v, srcP1); vec_s16 srcP2ssA = (vec_s16) vec_mergeh(zero_u8v, srcP2); vec_s16 srcP2ssB = (vec_s16) vec_mergel(zero_u8v, srcP2); vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B, psumA, psumB, sumA, sumB, srcP3ssA, srcP3ssB, sum1A, sum1B, sum2A, sum2B, sum3A, sum3B; vec_u8 sum, fsum, srcP3a, srcP3b, srcP3; for (i = 0 ; i < 16 ; i++) { srcP3a = vec_ld(0, srcbis += srcStride); srcP3b = vec_ld(16, srcbis); srcP3 = vec_perm(srcP3a, srcP3b, perm); srcP3ssA = (vec_s16) vec_mergeh(zero_u8v, srcP3); srcP3ssB = (vec_s16) vec_mergel(zero_u8v, srcP3); //srcbis += srcStride; sum1A = vec_adds(srcP0ssA, srcP1ssA); sum1B = vec_adds(srcP0ssB, srcP1ssB); sum2A = vec_adds(srcM1ssA, srcP2ssA); sum2B = vec_adds(srcM1ssB, srcP2ssB); sum3A = vec_adds(srcM2ssA, srcP3ssA); sum3B = vec_adds(srcM2ssB, srcP3ssB); srcM2ssA = srcM1ssA; srcM2ssB = srcM1ssB; srcM1ssA = srcP0ssA; srcM1ssB = srcP0ssB; srcP0ssA = srcP1ssA; srcP0ssB = srcP1ssB; srcP1ssA = srcP2ssA; srcP1ssB = srcP2ssB; srcP2ssA = srcP3ssA; srcP2ssB = srcP3ssB; pp1A = vec_mladd(sum1A, v20ss, v16ss); pp1B = vec_mladd(sum1B, v20ss, v16ss); pp2A = vec_mladd(sum2A, v5ss, zero_s16v); pp2B = vec_mladd(sum2B, v5ss, zero_s16v); pp3A = vec_add(sum3A, pp1A); pp3B = vec_add(sum3B, pp1B); psumA = vec_sub(pp3A, pp2A); psumB = vec_sub(pp3B, pp2B); sumA = vec_sra(psumA, v5us); sumB = vec_sra(psumB, v5us); sum = vec_packsu(sumA, sumB); ASSERT_ALIGNED(dst); OP_U8_ALTIVEC(fsum, sum, vec_ld(0, dst)); vec_st(fsum, 0, dst);//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,
示例17: vec_perm tmp1 = vec_perm(table[6], table[7], tmpIndex); stmp1 = vec_perm(slope_cos[6], slope_cos[7], tmpIndex); select = (vector unsigned short)vec_cmpgt(PerIndex, (((vector unsigned char){95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95})) ); tmp3 = vec_sel(tmp0, tmp1, select); stmp3 = vec_sel(stmp0, stmp1, select); select = (vector unsigned short)vec_cmpgt(PerIndex, (((vector unsigned char){63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63})) ); table1 = vec_sel(tmp2, tmp3, select); slope1 = vec_sel(stmp2, stmp3, select); L_tmp0 = vec_sra(vec_mule(slope0, offset0), (((vector unsigned int){12,12,12,12})) ); L_tmp1 = vec_sra(vec_mulo(slope0, offset0), (((vector unsigned int){12,12,12,12})) ); L_tmp2 = vec_sra(vec_mule(slope1, offset1), (((vector unsigned int){12,12,12,12})) ); L_tmp3 = vec_sra(vec_mulo(slope1, offset1), (((vector unsigned int){12,12,12,12})) ); tmp0 = vec_packs(L_tmp0, L_tmp2); tmp1 = vec_packs(L_tmp1, L_tmp3); tmp2 = vec_mergeh(tmp0, tmp1); tmp3 = vec_mergel(tmp0, tmp1); lspq[0] = vec_adds(table0, tmp2); lspq[1] = vec_adds(table1, tmp3); return;}
开发者ID:0day-ci,项目名称:gcc,代码行数:30,
示例18: processYUVAltivec//.........这里部分代码省略......... shortBuffer.s[6] = m_Urange; shortBuffer.s[7] = m_Vrange; UVrange = shortBuffer.v; //setup the cache prefetch -- A MUST!!! UInt32 prefetchSize = GetPrefetchConstant( 16, 1, 256 ); #ifndef PPC970 vec_dst( inData, prefetchSize, 0 ); vec_dst( rightData, prefetchSize, 1 ); vec_dst( inData+32, prefetchSize, 2 ); vec_dst( rightData+32, prefetchSize, 3 ); #endif //PPC970 for ( i=0; i<h; i++){ for (j=0; j<w; j++) { #ifndef PPC970 //this function is probably memory bound on most G4's -- what else is new? vec_dst( inData, prefetchSize, 0 ); vec_dst( rightData, prefetchSize, 1 ); vec_dst( inData+32, prefetchSize, 2 ); vec_dst( rightData+32, prefetchSize, 3 ); #endif //separate the U and V from Y UVres1 = (vector unsigned short)vec_mule(one,inData[0]); UVres2 = (vector unsigned short)vec_mule(one,rightData[0]); //vec_mulo Y * 1 to short vector Y Y Y Y shorts Yres1 = (vector unsigned short)vec_mulo(one,inData[0]); Yres2 = (vector unsigned short)vec_mulo(one,rightData[0]); Yhi = vec_adds(Yres2,Yrange); Ylo = vec_subs(Yres2,Yrange); //go to ints for comparison UVhi = vec_adds(UVres2,UVrange); UVlo = vec_subs(UVres2,UVrange); Uhi = vec_mule(sone,UVhi); Ulo = vec_mule(sone,UVlo); Vhi = vec_mulo(sone,UVhi); Vlo = vec_mulo(sone,UVlo); Ures = vec_mule(sone,UVres1); Vres = vec_mulo(sone,UVres1); Umasklo = vec_cmpgt(Ures,Ulo); Umaskhi = vec_cmplt(Ures,Uhi); Vmasklo = vec_cmpgt(Vres,Vlo); Vmaskhi = vec_cmplt(Vres,Vhi); Umaskhi = vec_and(Umaskhi,Umasklo); Vmaskhi = vec_and(Vmaskhi,Vmasklo); Umasklo = vec_and(Umaskhi,Vmaskhi); Vmasklo = vec_and(Umaskhi,Vmaskhi); hiImage = (vector unsigned short)vec_mergeh(Umasklo,Vmasklo); loImage = (vector unsigned short)vec_mergel(Umasklo,Vmasklo); //pack it back down to bool short UVmaskhi = (vector bool short)vec_packsu(hiImage,loImage); Ymasklo = vec_cmpgt(Yres1,Ylo); Ymaskhi = vec_cmplt(Yres1,Yhi); Ymaskhi = vec_and(Ymaskhi,Ymasklo); Ymaskhi = vec_and(Ymaskhi,UVmaskhi); UVmaskhi = vec_and(Ymaskhi,UVmaskhi); //bitwise comparison and move using the result of the comparison as a mask Yres1 = vec_sel(Yres1,Yblank,Ymaskhi); //UVres1 = vec_sel(UVres1,UVres2,UVmaskhi); UVres1 = vec_sel(UVres1,UVblank,UVmaskhi); //merge the Y and UV back together hiImage = vec_mergeh(UVres1,Yres1); loImage = vec_mergel(UVres1,Yres1); //pack it back down to unsigned char to store inData[0] = vec_packsu(hiImage,loImage); inData++; rightData++; } #ifndef PPC970 vec_dss(0); vec_dss(1); vec_dss(2); vec_dss(3); #endif }}
开发者ID:kmatheussen,项目名称:libpd,代码行数:101,
示例19: processYUV_Altivecvoid pix_add :: processYUV_Altivec(imageStruct &image, imageStruct &right){ int h,w,width; width = image.xsize/8; //format is U Y V Y union { //unsigned int i; short elements[8]; //vector signed char v; vector signed short v; }shortBuffer; union { //unsigned int i; unsigned char elements[16]; //vector signed char v; vector unsigned char v; }charBuffer; //vector unsigned char c; register vector signed short d, hiImage, loImage, YRight, UVRight, YImage, UVImage, UVTemp, YTemp; // vector unsigned char zero = vec_splat_u8(0); register vector unsigned char c,one; // vector signed short zshort = vec_splat_s16(0); vector unsigned char *inData = (vector unsigned char*) image.data; vector unsigned char *rightData = (vector unsigned char*) right.data; //Write the pixel (pair) to the transfer buffer charBuffer.elements[0] = 2; charBuffer.elements[1] = 1; charBuffer.elements[2] = 2; charBuffer.elements[3] = 1; charBuffer.elements[4] = 2; charBuffer.elements[5] = 1; charBuffer.elements[6] = 2; charBuffer.elements[7] = 1; charBuffer.elements[8] = 2; charBuffer.elements[9] = 1; charBuffer.elements[10] = 2; charBuffer.elements[11] = 1; charBuffer.elements[12] = 2; charBuffer.elements[13] = 1; charBuffer.elements[14] = 2; charBuffer.elements[15] = 1; //Load it into the vector unit c = charBuffer.v; one = vec_splat_u8( 1 ); shortBuffer.elements[0] = 255; //Load it into the vector unit d = shortBuffer.v; d = static_cast<vector signed short>(vec_splat(static_cast<vector signed short>(d),0));#ifndef PPC970 UInt32 prefetchSize = GetPrefetchConstant( 16, 1, 256 ); vec_dst( inData, prefetchSize, 0 ); vec_dst( rightData, prefetchSize, 1 );#endif for ( h=0; h<image.ysize; h++){ for (w=0; w<width; w++) {#ifndef PPC970 vec_dst( inData, prefetchSize, 0 ); vec_dst( rightData, prefetchSize, 1 );#endif //interleaved U Y V Y chars //vec_mule UV * 2 to short vector U V U V shorts UVImage = static_cast<vector signed short>(vec_mule(one,inData[0])); UVRight = static_cast<vector signed short>(vec_mule(c,rightData[0])); //vec_mulo Y * 1 to short vector Y Y Y Y shorts YImage = static_cast<vector signed short>(vec_mulo(c,inData[0])); YRight = static_cast<vector signed short>(vec_mulo(c,rightData[0])); //vel_subs UV - 255 UVRight = static_cast<vector signed short>(vec_subs(UVRight, d)); //vec_adds UV UVTemp = vec_adds(UVImage,UVRight); //vec_adds Y YTemp = vec_adds(YImage,YRight); hiImage = vec_mergeh(UVTemp,YTemp); loImage = vec_mergel(UVTemp,YTemp); //vec_mergel + vec_mergeh Y and UV inData[0] = vec_packsu(hiImage, loImage); inData++; rightData++; }#ifndef PPC970 vec_dss( 0 );//.........这里部分代码省略.........
开发者ID:avilleret,项目名称:Gem,代码行数:101,
示例20: ff_fdct_altivec//.........这里部分代码省略......... x1 = vec_add(b51, b31); x2 = vec_add(b71, b31); x3 = vec_add(b51, b11); x8 = vec_add(x2, x3); cnst = LD_W3; x8 = vec_madd(cnst, x8, mzero); cnst = LD_W8; x0 = vec_madd(cnst, x0, mzero); cnst = LD_W9; x1 = vec_madd(cnst, x1, mzero); cnst = LD_WA; x2 = vec_madd(cnst, x2, x8); cnst = LD_WB; x3 = vec_madd(cnst, x3, x8); cnst = LD_W4; b71 = vec_madd(cnst, b71, x0); cnst = LD_W5; b51 = vec_madd(cnst, b51, x1); cnst = LD_W6; b31 = vec_madd(cnst, b31, x1); cnst = LD_W7; b11 = vec_madd(cnst, b11, x0); b71 = vec_add(b71, x2); b51 = vec_add(b51, x3); b31 = vec_add(b31, x2); b11 = vec_add(b11, x3); /* }}} */ /* 8x8 matrix transpose (vector float[8][2]) {{{ */ x0 = vec_mergel(b00, b20); x1 = vec_mergeh(b00, b20); x2 = vec_mergel(b10, b30); x3 = vec_mergeh(b10, b30); b00 = vec_mergeh(x1, x3); b10 = vec_mergel(x1, x3); b20 = vec_mergeh(x0, x2); b30 = vec_mergel(x0, x2); x4 = vec_mergel(b41, b61); x5 = vec_mergeh(b41, b61); x6 = vec_mergel(b51, b71); x7 = vec_mergeh(b51, b71); b41 = vec_mergeh(x5, x7); b51 = vec_mergel(x5, x7); b61 = vec_mergeh(x4, x6); b71 = vec_mergel(x4, x6); x0 = vec_mergel(b01, b21); x1 = vec_mergeh(b01, b21); x2 = vec_mergel(b11, b31); x3 = vec_mergeh(b11, b31); x4 = vec_mergel(b40, b60); x5 = vec_mergeh(b40, b60); x6 = vec_mergel(b50, b70); x7 = vec_mergeh(b50, b70); b40 = vec_mergeh(x1, x3); b50 = vec_mergel(x1, x3); b60 = vec_mergeh(x0, x2);
开发者ID:AVbin,项目名称:libav,代码行数:67,
示例21: mainint main (){ vector float fa = {1.0, 2.0, 3.0, -4.0}; vector float fb = {-2.0, -3.0, -4.0, -5.0}; vector float fc = vec_cpsgn (fa, fb); vector long long la = {5L, 14L}; vector long long lb = {3L, 86L}; vector long long lc = vec_and (la, lb); vector bool long long ld = {0, -1}; vector long long le = vec_and (la, ld); vector long long lf = vec_and (ld, lb); vector unsigned long long ua = {5L, 14L}; vector unsigned long long ub = {3L, 86L}; vector unsigned long long uc = vec_and (ua, ub); vector bool long long ud = {0, -1}; vector unsigned long long ue = vec_and (ua, ud); vector unsigned long long uf = vec_and (ud, ub); vector long long lg = vec_andc (la, lb); vector long long lh = vec_andc (la, ld); vector long long li = vec_andc (ld, lb); vector unsigned long long ug = vec_andc (ua, ub); vector unsigned long long uh = vec_andc (ua, ud); vector unsigned long long ui = vec_andc (ud, ub); vector double da = {1.0, -4.0}; vector double db = {-2.0, 5.0}; vector double dc = vec_cpsgn (da, db); vector long long lj = vec_mergeh (la, lb); vector long long lk = vec_mergeh (la, ld); vector long long ll = vec_mergeh (ld, la); vector unsigned long long uj = vec_mergeh (ua, ub); vector unsigned long long uk = vec_mergeh (ua, ud); vector unsigned long long ul = vec_mergeh (ud, ua); vector long long lm = vec_mergel (la, lb); vector long long ln = vec_mergel (la, ld); vector long long lo = vec_mergel (ld, la); vector unsigned long long um = vec_mergel (ua, ub); vector unsigned long long un = vec_mergel (ua, ud); vector unsigned long long uo = vec_mergel (ud, ua); vector long long lp = vec_nor (la, lb); vector long long lq = vec_nor (la, ld); vector long long lr = vec_nor (ld, la); vector unsigned long long up = vec_nor (ua, ub); vector unsigned long long uq = vec_nor (ua, ud); vector unsigned long long ur = vec_nor (ud, ua); vector long long ls = vec_or (la, lb); vector long long lt = vec_or (la, ld); vector long long lu = vec_or (ld, la); vector unsigned long long us = vec_or (ua, ub); vector unsigned long long ut = vec_or (ua, ud); vector unsigned long long uu = vec_or (ud, ua); vector unsigned char ca = {0,4,8,1,5,9,2,6,10,3,7,11,15,12,14,13}; vector long long lv = vec_perm (la, lb, ca); vector unsigned long long uv = vec_perm (ua, ub, ca); vector long long lw = vec_sel (la, lb, lc); vector long long lx = vec_sel (la, lb, uc); vector long long ly = vec_sel (la, lb, ld); vector unsigned long long uw = vec_sel (ua, ub, lc); vector unsigned long long ux = vec_sel (ua, ub, uc); vector unsigned long long uy = vec_sel (ua, ub, ld); vector long long lz = vec_xor (la, lb); vector long long l0 = vec_xor (la, ld); vector long long l1 = vec_xor (ld, la); vector unsigned long long uz = vec_xor (ua, ub); vector unsigned long long u0 = vec_xor (ua, ud); vector unsigned long long u1 = vec_xor (ud, ua); int ia = vec_all_eq (ua, ub); int ib = vec_all_ge (ua, ub); int ic = vec_all_gt (ua, ub); int id = vec_all_le (ua, ub); int ie = vec_all_lt (ua, ub); int ig = vec_all_ne (ua, ub); int ih = vec_any_eq (ua, ub); int ii = vec_any_ge (ua, ub); int ij = vec_any_gt (ua, ub); int ik = vec_any_le (ua, ub); int il = vec_any_lt (ua, ub); int im = vec_any_ne (ua, ub); vector int sia = {9, 16, 25, 36}; vector int sib = {-8, -27, -64, -125};//.........这里部分代码省略.........
开发者ID:0day-ci,项目名称:gcc,代码行数:101,
示例22: test1//.........这里部分代码省略.........// CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})// CHECK-LE: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) res_vd = vec_madd(vd, vd, vd);// CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})// CHECK-LE: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) /* vec_mergeh */ res_vsll = vec_mergeh(vsll, vsll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm res_vsll = vec_mergeh(vsll, vbll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm res_vsll = vec_mergeh(vbll, vsll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergeh(vull, vull);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergeh(vull, vbll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergeh(vbll, vull);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm /* vec_mergel */ res_vsll = vec_mergel(vsll, vsll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm res_vsll = vec_mergel(vsll, vbll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm res_vsll = vec_mergel(vbll, vsll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergel(vull, vull);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergel(vull, vbll);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm res_vull = vec_mergel(vbll, vull);// CHECK: @llvm.ppc.altivec.vperm// CHECK-LE: @llvm.ppc.altivec.vperm /* vec_msub */ res_vf = vec_msub(vf, vf, vf);// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>// CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}// CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> res_vd = vec_msub(vd, vd, vd);// CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
开发者ID:AlexDenisov,项目名称:clang,代码行数:67,
示例23: processYUVAltivec/* more optimized version - unrolled and load-hoisted */void pix_offset :: processYUVAltivec(imageStruct &image){ register int h,w,width,height; width = image.xsize/16; //for altivec height = image.ysize; //format is U Y V Y // start of working altivec function union { short elements[8]; vector signed short v; } transferBuffer; register vector signed short c, hi, lo; register vector signed short hi1, lo1; register vector signed short loadhi, loadhi1, loadlo, loadlo1; register vector unsigned char zero = vec_splat_u8(0); register vector unsigned char *inData = (vector unsigned char*) image.data; //Write the pixel (pair) to the transfer buffer //transferBuffer.i = (U << 24) | (Y << 16) | (V << 8 ) | Y; transferBuffer.elements[0] = U; transferBuffer.elements[1] = Y; transferBuffer.elements[2] = V; transferBuffer.elements[3] = Y; transferBuffer.elements[4] = U; transferBuffer.elements[5] = Y; transferBuffer.elements[6] = V; transferBuffer.elements[7] = Y; //Load it into the vector unit c = transferBuffer.v;#ifndef PPC970 UInt32 prefetchSize = GetPrefetchConstant( 16, 1, 256 ); vec_dst( inData, prefetchSize, 0 ); vec_dst( inData+16, prefetchSize, 1 ); vec_dst( inData+32, prefetchSize, 2 ); vec_dst( inData+64, prefetchSize, 3 );#endif //expand the UInt8's to short's loadhi = (vector signed short) vec_mergeh( zero, inData[0] ); loadlo = (vector signed short) vec_mergel( zero, inData[0] ); loadhi1 = (vector signed short) vec_mergeh( zero, inData[1] ); loadlo1 = (vector signed short) vec_mergel( zero, inData[1] ); / for ( h=0; h<height; h++) { for (w=0; w<width; w++) {#ifndef PPC970 vec_dst( inData, prefetchSize, 0 ); vec_dst( inData+16, prefetchSize, 1 ); vec_dst( inData+32, prefetchSize, 2 ); vec_dst( inData+64, prefetchSize, 3 );#endif //add the constant to it hi = vec_add( loadhi, c ); lo = vec_add( loadlo, c ); hi1 = vec_add( loadhi1, c ); lo1 = vec_add( loadlo1, c ); //expand the UInt8's to short's loadhi = (vector signed short) vec_mergeh( zero, inData[2] ); loadlo = (vector signed short) vec_mergel( zero, inData[2] ); loadhi1 = (vector signed short) vec_mergeh( zero, inData[3] ); loadlo1 = (vector signed short) vec_mergel( zero, inData[3] ); //pack the result back down, with saturation inData[0] = vec_packsu( hi, lo ); inData++; inData[0] = vec_packsu( hi1, lo1 ); inData++; } } // // finish the last iteration after the loop // hi = vec_add( loadhi, c ); lo = vec_add( loadlo, c ); hi1 = vec_add( loadhi1, c ); lo1 = vec_add( loadlo1, c ); //pack the result back down, with saturation inData[0] = vec_packsu( hi, lo );//.........这里部分代码省略.........
开发者ID:megrimm,项目名称:Gem,代码行数:101,
示例24: dct_quantize_altivec//.........这里部分代码省略......... data7 = vec_max(vec_min(data7, max_q), min_q); } { vector bool char zero_01, zero_23, zero_45, zero_67; vector signed char scanIndexes_01, scanIndexes_23, scanIndexes_45, scanIndexes_67; vector signed char negOne = vec_splat_s8(-1); vector signed char* scanPtr = (vector signed char*)(s->intra_scantable.inverse); signed char lastNonZeroChar; // Determine the largest non-zero index. zero_01 = vec_pack(vec_cmpeq(data0, (vector signed short)zero), vec_cmpeq(data1, (vector signed short)zero)); zero_23 = vec_pack(vec_cmpeq(data2, (vector signed short)zero), vec_cmpeq(data3, (vector signed short)zero)); zero_45 = vec_pack(vec_cmpeq(data4, (vector signed short)zero), vec_cmpeq(data5, (vector signed short)zero)); zero_67 = vec_pack(vec_cmpeq(data6, (vector signed short)zero), vec_cmpeq(data7, (vector signed short)zero)); // 64 biggest values scanIndexes_01 = vec_sel(scanPtr[0], negOne, zero_01); scanIndexes_23 = vec_sel(scanPtr[1], negOne, zero_23); scanIndexes_45 = vec_sel(scanPtr[2], negOne, zero_45); scanIndexes_67 = vec_sel(scanPtr[3], negOne, zero_67); // 32 largest values scanIndexes_01 = vec_max(scanIndexes_01, scanIndexes_23); scanIndexes_45 = vec_max(scanIndexes_45, scanIndexes_67); // 16 largest values scanIndexes_01 = vec_max(scanIndexes_01, scanIndexes_45); // 8 largest values scanIndexes_01 = vec_max(vec_mergeh(scanIndexes_01, negOne), vec_mergel(scanIndexes_01, negOne)); // 4 largest values scanIndexes_01 = vec_max(vec_mergeh(scanIndexes_01, negOne), vec_mergel(scanIndexes_01, negOne)); // 2 largest values scanIndexes_01 = vec_max(vec_mergeh(scanIndexes_01, negOne), vec_mergel(scanIndexes_01, negOne)); // largest value scanIndexes_01 = vec_max(vec_mergeh(scanIndexes_01, negOne), vec_mergel(scanIndexes_01, negOne)); scanIndexes_01 = vec_splat(scanIndexes_01, 0); vec_ste(scanIndexes_01, 0, &lastNonZeroChar); lastNonZero = lastNonZeroChar; // While the data is still in vectors we check for the transpose IDCT permute // and handle it using the vector unit if we can. This is the permute used // by the altivec idct, so it is common when using the altivec dct. if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) { TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7); } vec_st(data0, 0, data); vec_st(data1, 16, data); vec_st(data2, 32, data); vec_st(data3, 48, data); vec_st(data4, 64, data); vec_st(data5, 80, data); vec_st(data6, 96, data); vec_st(data7, 112, data); } } // special handling of block[0] if (s->mb_intra) { if (!s->h263_aic) { if (n < 4) oldBaseValue /= s->y_dc_scale; else oldBaseValue /= s->c_dc_scale; } // Divide by 8, rounding the result data[0] = (oldBaseValue + 4) >> 3; } // We handled the transpose permutation above and we don't // need to permute the "no" permutation case. if ((lastNonZero > 0) && (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) && (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) { ff_block_permute(data, s->dsp.idct_permutation, s->intra_scantable.scantable, lastNonZero); } return lastNonZero;}
开发者ID:achellies,项目名称:camomile,代码行数:101,
示例25: PREFIX_h264_qpel16_hv_lowpass_altivec/* this code assume stride % 16 == 0 *and* tmp is properly aligned */static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) { POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1); POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1); register int i; const vector signed int vzero = vec_splat_s32(0); const vector unsigned char permM2 = vec_lvsl(-2, src); const vector unsigned char permM1 = vec_lvsl(-1, src); const vector unsigned char permP0 = vec_lvsl(+0, src); const vector unsigned char permP1 = vec_lvsl(+1, src); const vector unsigned char permP2 = vec_lvsl(+2, src); const vector unsigned char permP3 = vec_lvsl(+3, src); const vector signed short v20ss = (const vector signed short)AVV(20); const vector unsigned int v10ui = vec_splat_u32(10); const vector signed short v5ss = vec_splat_s16(5); const vector signed short v1ss = vec_splat_s16(1); const vector signed int v512si = (const vector signed int)AVV(512); const vector unsigned int v16ui = (const vector unsigned int)AVV(16); register int align = ((((unsigned long)src) - 2) % 16); src -= (2 * srcStride); for (i = 0 ; i < 21 ; i ++) { vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; vector unsigned char srcR1 = vec_ld(-2, src); vector unsigned char srcR2 = vec_ld(14, src); switch (align) { default: { srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = vec_perm(srcR1, srcR2, permP1); srcP2 = vec_perm(srcR1, srcR2, permP2); srcP3 = vec_perm(srcR1, srcR2, permP3); } break; case 11: { srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = vec_perm(srcR1, srcR2, permP1); srcP2 = vec_perm(srcR1, srcR2, permP2); srcP3 = srcR2; } break; case 12: { vector unsigned char srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = vec_perm(srcR1, srcR2, permP1); srcP2 = srcR2; srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 13: { vector unsigned char srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = srcR2; srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 14: { vector unsigned char srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = srcR2; srcP1 = vec_perm(srcR2, srcR3, permP1); srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 15: { vector unsigned char srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = srcR2; srcP0 = vec_perm(srcR2, srcR3, permP0); srcP1 = vec_perm(srcR2, srcR3, permP1); srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; } const vector signed short srcP0A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0); const vector signed short srcP0B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0); const vector signed short srcP1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1); const vector signed short srcP1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1); const vector signed short srcP2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2); const vector signed short srcP2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2); const vector signed short srcP3A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3); const vector signed short srcP3B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3); const vector signed short srcM1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1); const vector signed short srcM1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1); const vector signed short srcM2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2); const vector signed short srcM2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2); const vector signed short sum1A = vec_adds(srcP0A, srcP1A); const vector signed short sum1B = vec_adds(srcP0B, srcP1B);//.........这里部分代码省略.........
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,
示例26: PREFIX_h264_qpel16_v_lowpass_altivec/* this code assume stride % 16 == 0 */static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1); POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1); register int i; const vector signed int vzero = vec_splat_s32(0); const vector unsigned char perm = vec_lvsl(0, src); const vector signed short v20ss = (const vector signed short)AVV(20); const vector unsigned short v5us = vec_splat_u16(5); const vector signed short v5ss = vec_splat_s16(5); const vector signed short v16ss = (const vector signed short)AVV(16); const vector unsigned char dstperm = vec_lvsr(0, dst); const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1); const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm); uint8_t *srcbis = src - (srcStride * 2); const vector unsigned char srcM2a = vec_ld(0, srcbis); const vector unsigned char srcM2b = vec_ld(16, srcbis); const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm); srcbis += srcStride; const vector unsigned char srcM1a = vec_ld(0, srcbis); const vector unsigned char srcM1b = vec_ld(16, srcbis); const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm); srcbis += srcStride; const vector unsigned char srcP0a = vec_ld(0, srcbis); const vector unsigned char srcP0b = vec_ld(16, srcbis); const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm); srcbis += srcStride; const vector unsigned char srcP1a = vec_ld(0, srcbis); const vector unsigned char srcP1b = vec_ld(16, srcbis); const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm); srcbis += srcStride; const vector unsigned char srcP2a = vec_ld(0, srcbis); const vector unsigned char srcP2b = vec_ld(16, srcbis); const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm); srcbis += srcStride; vector signed short srcM2ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2); vector signed short srcM2ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2); vector signed short srcM1ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1); vector signed short srcM1ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1); vector signed short srcP0ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0); vector signed short srcP0ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0); vector signed short srcP1ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1); vector signed short srcP1ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1); vector signed short srcP2ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2); vector signed short srcP2ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2); for (i = 0 ; i < 16 ; i++) { const vector unsigned char srcP3a = vec_ld(0, srcbis); const vector unsigned char srcP3b = vec_ld(16, srcbis); const vector unsigned char srcP3 = vec_perm(srcP3a, srcP3b, perm); const vector signed short srcP3ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3); const vector signed short srcP3ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3); srcbis += srcStride; const vector signed short sum1A = vec_adds(srcP0ssA, srcP1ssA); const vector signed short sum1B = vec_adds(srcP0ssB, srcP1ssB); const vector signed short sum2A = vec_adds(srcM1ssA, srcP2ssA); const vector signed short sum2B = vec_adds(srcM1ssB, srcP2ssB); const vector signed short sum3A = vec_adds(srcM2ssA, srcP3ssA); const vector signed short sum3B = vec_adds(srcM2ssB, srcP3ssB); srcM2ssA = srcM1ssA; srcM2ssB = srcM1ssB; srcM1ssA = srcP0ssA; srcM1ssB = srcP0ssB; srcP0ssA = srcP1ssA; srcP0ssB = srcP1ssB; srcP1ssA = srcP2ssA; srcP1ssB = srcP2ssB; srcP2ssA = srcP3ssA; srcP2ssB = srcP3ssB; const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss); const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss); const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); const vector signed short pp3A = vec_add(sum3A, pp1A); const vector signed short pp3B = vec_add(sum3B, pp1B); const vector signed short psumA = vec_sub(pp3A, pp2A); const vector signed short psumB = vec_sub(pp3B, pp2B); const vector signed short sumA = vec_sra(psumA, v5us); const vector signed short sumB = vec_sra(psumB, v5us); const vector unsigned char sum = vec_packsu(sumA, sumB); const vector unsigned char dst1 = vec_ld(0, dst); const vector unsigned char dst2 = vec_ld(16, dst); const vector unsigned char vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst)); vector unsigned char fsum; OP_U8_ALTIVEC(fsum, sum, vdst);//.........这里部分代码省略.........
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,
示例27: PREFIX_h264_qpel16_hv_lowpass_altivec//.........这里部分代码省略......... srcP2 = srcR2; srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 13: { vec_u8 srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = srcR2; srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 14: { vec_u8 srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = srcR2; srcP1 = vec_perm(srcR2, srcR3, permP1); srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 15: { vec_u8 srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = srcR2; srcP0 = vec_perm(srcR2, srcR3, permP0); srcP1 = vec_perm(srcR2, srcR3, permP1); srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; } srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0); srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0); srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1); srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1); srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2); srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2); srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3); srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3); srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1); srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1); srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2); srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2); sum1A = vec_adds(srcP0A, srcP1A); sum1B = vec_adds(srcP0B, srcP1B); sum2A = vec_adds(srcM1A, srcP2A); sum2B = vec_adds(srcM1B, srcP2B); sum3A = vec_adds(srcM2A, srcP3A); sum3B = vec_adds(srcM2B, srcP3B); pp1A = vec_mladd(sum1A, v20ss, sum3A); pp1B = vec_mladd(sum1B, v20ss, sum3B); pp2A = vec_mladd(sum2A, v5ss, zero_s16v); pp2B = vec_mladd(sum2B, v5ss, zero_s16v); psumA = vec_sub(pp1A, pp2A); psumB = vec_sub(pp1B, pp2B); vec_st(psumA, 0, tmp); vec_st(psumB, 16, tmp);
开发者ID:AVbin,项目名称:libav,代码行数:66,
示例28: put_no_rnd_pixels16_xy2_altivec/* next one assumes that ((line_size % 16) == 0) */static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h){ register int i; register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; register vector unsigned char blockv, temp1, temp2; register vector unsigned short temp3, temp4, pixelssum1, pixelssum2, pixelssum3, pixelssum4; register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); temp1 = vec_ld(0, pixels); temp2 = vec_ld(16, pixels); pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { pixelsv2 = temp2; } else { pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); } pixelsv3 = vec_mergel(vczero, pixelsv1); pixelsv4 = vec_mergel(vczero, pixelsv2); pixelsv1 = vec_mergeh(vczero, pixelsv1); pixelsv2 = vec_mergeh(vczero, pixelsv2); pixelssum3 = vec_add((vector unsigned short)pixelsv3, (vector unsigned short)pixelsv4); pixelssum3 = vec_add(pixelssum3, vcone); pixelssum1 = vec_add((vector unsigned short)pixelsv1, (vector unsigned short)pixelsv2); pixelssum1 = vec_add(pixelssum1, vcone); for (i = 0; i < h ; i++) { blockv = vec_ld(0, block); temp1 = vec_ld(line_size, pixels); temp2 = vec_ld(line_size + 16, pixels); pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { pixelsv2 = temp2; } else { pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); } pixelsv3 = vec_mergel(vczero, pixelsv1); pixelsv4 = vec_mergel(vczero, pixelsv2); pixelsv1 = vec_mergeh(vczero, pixelsv1); pixelsv2 = vec_mergeh(vczero, pixelsv2); pixelssum4 = vec_add((vector unsigned short)pixelsv3, (vector unsigned short)pixelsv4); pixelssum2 = vec_add((vector unsigned short)pixelsv1, (vector unsigned short)pixelsv2); temp4 = vec_add(pixelssum3, pixelssum4); temp4 = vec_sra(temp4, vctwo); temp3 = vec_add(pixelssum1, pixelssum2); temp3 = vec_sra(temp3, vctwo); pixelssum3 = vec_add(pixelssum4, vcone); pixelssum1 = vec_add(pixelssum2, vcone); blockv = vec_packsu(temp3, temp4); vec_st(blockv, 0, block); block += line_size; pixels += line_size; }}
开发者ID:AVLeo,项目名称:libav,代码行数:68,
示例29: PREFIX_h264_qpel16_h_lowpass_altivecstatic void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { register int i; LOAD_ZERO; const vec_u8 permM2 = vec_lvsl(-2, src); const vec_u8 permM1 = vec_lvsl(-1, src); const vec_u8 permP0 = vec_lvsl(+0, src); const vec_u8 permP1 = vec_lvsl(+1, src); const vec_u8 permP2 = vec_lvsl(+2, src); const vec_u8 permP3 = vec_lvsl(+3, src); const vec_s16 v5ss = vec_splat_s16(5); const vec_u16 v5us = vec_splat_u16(5); const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4)); vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; register int align = ((((unsigned long)src) - 2) % 16); vec_s16 srcP0A, srcP0B, srcP1A, srcP1B, srcP2A, srcP2B, srcP3A, srcP3B, srcM1A, srcM1B, srcM2A, srcM2B, sum1A, sum1B, sum2A, sum2B, sum3A, sum3B, pp1A, pp1B, pp2A, pp2B, pp3A, pp3B, psumA, psumB, sumA, sumB; vec_u8 sum, fsum; for (i = 0 ; i < 16 ; i ++) { vec_u8 srcR1 = vec_ld(-2, src); vec_u8 srcR2 = vec_ld(14, src); switch (align) { default: { srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = vec_perm(srcR1, srcR2, permP1); srcP2 = vec_perm(srcR1, srcR2, permP2); srcP3 = vec_perm(srcR1, srcR2, permP3); } break; case 11: { srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = vec_perm(srcR1, srcR2, permP1); srcP2 = vec_perm(srcR1, srcR2, permP2); srcP3 = srcR2; } break; case 12: { vec_u8 srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = vec_perm(srcR1, srcR2, permP1); srcP2 = srcR2; srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 13: { vec_u8 srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = srcR2; srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 14: { vec_u8 srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = srcR2; srcP1 = vec_perm(srcR2, srcR3, permP1); srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 15: { vec_u8 srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = srcR2; srcP0 = vec_perm(srcR2, srcR3, permP0); srcP1 = vec_perm(srcR2, srcR3, permP1); srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; } srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0); srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0); srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1); srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1); srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2); srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2); srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3); srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3); srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1); srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1); srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,
示例30: PREFIX_h264_qpel16_h_lowpass_altivec/* this code assume stride % 16 == 0 */static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1); POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1); register int i; const vector signed int vzero = vec_splat_s32(0); const vector unsigned char permM2 = vec_lvsl(-2, src); const vector unsigned char permM1 = vec_lvsl(-1, src); const vector unsigned char permP0 = vec_lvsl(+0, src); const vector unsigned char permP1 = vec_lvsl(+1, src); const vector unsigned char permP2 = vec_lvsl(+2, src); const vector unsigned char permP3 = vec_lvsl(+3, src); const vector signed short v20ss = (const vector signed short)AVV(20); const vector unsigned short v5us = vec_splat_u16(5); const vector signed short v5ss = vec_splat_s16(5); const vector signed short v16ss = (const vector signed short)AVV(16); const vector unsigned char dstperm = vec_lvsr(0, dst); const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1); const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm); register int align = ((((unsigned long)src) - 2) % 16); for (i = 0 ; i < 16 ; i ++) { vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; vector unsigned char srcR1 = vec_ld(-2, src); vector unsigned char srcR2 = vec_ld(14, src); switch (align) { default: { srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = vec_perm(srcR1, srcR2, permP1); srcP2 = vec_perm(srcR1, srcR2, permP2); srcP3 = vec_perm(srcR1, srcR2, permP3); } break; case 11: { srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = vec_perm(srcR1, srcR2, permP1); srcP2 = vec_perm(srcR1, srcR2, permP2); srcP3 = srcR2; } break; case 12: { vector unsigned char srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = vec_perm(srcR1, srcR2, permP1); srcP2 = srcR2; srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 13: { vector unsigned char srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = vec_perm(srcR1, srcR2, permP0); srcP1 = srcR2; srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 14: { vector unsigned char srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = vec_perm(srcR1, srcR2, permM1); srcP0 = srcR2; srcP1 = vec_perm(srcR2, srcR3, permP1); srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; case 15: { vector unsigned char srcR3 = vec_ld(30, src); srcM2 = vec_perm(srcR1, srcR2, permM2); srcM1 = srcR2; srcP0 = vec_perm(srcR2, srcR3, permP0); srcP1 = vec_perm(srcR2, srcR3, permP1); srcP2 = vec_perm(srcR2, srcR3, permP2); srcP3 = vec_perm(srcR2, srcR3, permP3); } break; } const vector signed short srcP0A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0); const vector signed short srcP0B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0); const vector signed short srcP1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1); const vector signed short srcP1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1); const vector signed short srcP2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2); const vector signed short srcP2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2); const vector signed short srcP3A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3); const vector signed short srcP3B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3); const vector signed short srcM1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1); const vector signed short srcM1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1); const vector signed short srcM2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2); const vector signed short srcM2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2); const vector signed short sum1A = vec_adds(srcP0A, srcP1A); const vector signed short sum1B = vec_adds(srcP0B, srcP1B);//.........这里部分代码省略.........
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,
注:本文中的vec_mergel函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ vec_new函数代码示例 C++ vec_madd函数代码示例 |