您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vec_packsu函数代码示例

51自学网 2021-06-03 09:36:22
  C++
这篇教程C++ vec_packsu函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vec_packsu函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_packsu函数的具体用法?C++ vec_packsu怎么用?C++ vec_packsu使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_packsu函数的29个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: h264_idct_dc_add_internal

static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size){    vec_s16 dc16;    vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;    vec_s32 v_dc32;    LOAD_ZERO;    DECLARE_ALIGNED(16, int, dc);    int i;    dc = (block[0] + 32) >> 6;    block[0] = 0;    v_dc32 = vec_lde(0, &dc);    dc16 = VEC_SPLAT16((vec_s16)v_dc32, 1);    if (size == 4)        dc16 = VEC_SLD16(dc16, zero_s16v, 8);    dcplus = vec_packsu(dc16, zero_s16v);    dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);    aligner = vec_lvsr(0, dst);#if !HAVE_BIGENDIAN    aligner = vec_perm(aligner, zero_u8v, vcswapc());#endif    dcplus = vec_perm(dcplus, dcplus, aligner);    dcminus = vec_perm(dcminus, dcminus, aligner);    for (i = 0; i < size; i += 4) {        v0 = vec_ld(0, dst+0*stride);        v1 = vec_ld(0, dst+1*stride);        v2 = vec_ld(0, dst+2*stride);        v3 = vec_ld(0, dst+3*stride);        v0 = vec_adds(v0, dcplus);        v1 = vec_adds(v1, dcplus);        v2 = vec_adds(v2, dcplus);        v3 = vec_adds(v3, dcplus);        v0 = vec_subs(v0, dcminus);        v1 = vec_subs(v1, dcminus);        v2 = vec_subs(v2, dcminus);        v3 = vec_subs(v3, dcminus);        vec_st(v0, 0, dst+0*stride);        vec_st(v1, 0, dst+1*stride);        vec_st(v2, 0, dst+2*stride);        vec_st(v3, 0, dst+3*stride);        dst += 4*stride;    }}
开发者ID:63n,项目名称:FFmpeg,代码行数:50,


示例2: BgrToGray

        SIMD_INLINE void BgrToGray(const Loader<align> & bgr, Storer<align> & gray)        {            v128_u8 _bgr[3];            _bgr[0] = Load<align, first>(bgr);            _bgr[1] = Load<align, false>(bgr);            _bgr[2] = Load<align, false>(bgr);            const v128_u16 lo = vec_packsu(                BgraToGray32(vec_perm(_bgr[0], _bgr[1], K8_PERM_0)),                 BgraToGray32(vec_perm(_bgr[0], _bgr[1], K8_PERM_1)));            const v128_u16 hi = vec_packsu(                BgraToGray32(vec_perm(_bgr[1], _bgr[2], K8_PERM_2)),                 BgraToGray32(vec_perm(_bgr[1], _bgr[2], K8_PERM_3)));            Store<align, first>(gray, vec_packsu(lo, hi));        }
开发者ID:4144,项目名称:Simd,代码行数:15,


示例3: yuv2plane1_8_vsx

static void yuv2plane1_8_vsx(const int16_t *src, uint8_t *dest, int dstW,                           const uint8_t *dither, int offset){    const int dst_u = -(uintptr_t)dest & 15;    int i, j;    LOCAL_ALIGNED(16, int16_t, val, [16]);    const vector uint16_t shifts = (vector uint16_t) {7, 7, 7, 7, 7, 7, 7, 7};    vector int16_t vi, vileft, ditherleft, ditherright;    vector uint8_t vd;    for (j = 0; j < 16; j++) {        val[j] = dither[(dst_u + offset + j) & 7];    }    ditherleft = vec_ld(0, val);    ditherright = vec_ld(0, &val[8]);    yuv2plane1_8_u(src, dest, dst_u, dither, offset, 0);    for (i = dst_u; i < dstW - 15; i += 16) {        vi = vec_vsx_ld(0, &src[i]);        vi = vec_adds(ditherleft, vi);        vileft = vec_sra(vi, shifts);        vi = vec_vsx_ld(0, &src[i + 8]);        vi = vec_adds(ditherright, vi);        vi = vec_sra(vi, shifts);        vd = vec_packsu(vileft, vi);        vec_st(vd, 0, &dest[i]);    }    yuv2plane1_8_u(src, dest, dstW, dither, offset, i);}
开发者ID:lihp1603,项目名称:ffmpeg,代码行数:35,


示例4: yuv2plane1_16_vsx

static void yuv2plane1_16_vsx(const int32_t *src, uint16_t *dest, int dstW,                           int big_endian, int output_bits){    const int dst_u = -(uintptr_t)dest & 7;    const int shift = 3;    const int add = (1 << (shift - 1));    const vector uint32_t vadd = (vector uint32_t) {add, add, add, add};    const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0);    const vector uint32_t vshift = (vector uint32_t) vec_splat_u32(shift);    vector uint32_t v, v2;    vector uint16_t vd;    int i;    yuv2plane1_16_u(src, dest, dst_u, big_endian, output_bits, 0);    for (i = dst_u; i < dstW - 7; i += 8) {        v = vec_vsx_ld(0, (const uint32_t *) &src[i]);        v = vec_add(v, vadd);        v = vec_sr(v, vshift);        v2 = vec_vsx_ld(0, (const uint32_t *) &src[i + 4]);        v2 = vec_add(v2, vadd);        v2 = vec_sr(v2, vshift);        vd = vec_packsu(v, v2);        vd = vec_rl(vd, vswap);        vec_st(vd, 0, &dest[i]);    }    yuv2plane1_16_u(src, dest, dstW, big_endian, output_bits, i);}
开发者ID:lihp1603,项目名称:ffmpeg,代码行数:32,


示例5: put_vp8_epel_h_altivec_core

static av_always_inlinevoid put_vp8_epel_h_altivec_core(uint8_t *dst, ptrdiff_t dst_stride,                                 uint8_t *src, ptrdiff_t src_stride,                                 int h, int mx, int w, int is6tap){    LOAD_H_SUBPEL_FILTER(mx-1);    vec_u8 align_vec0, align_vec8, permh0, permh8, filt;    vec_u8 perm_6tap0, perm_6tap8, perml0, perml8;    vec_u8 a, b, pixh, pixl, outer;    vec_s16 f16h, f16l;    vec_s32 filth, filtl;    vec_u8 perm_inner6 = { 1,2,3,4, 2,3,4,5, 3,4,5,6, 4,5,6,7 };    vec_u8 perm_inner4 = { 0,1,2,3, 1,2,3,4, 2,3,4,5, 3,4,5,6 };    vec_u8 perm_inner  = is6tap ? perm_inner6 : perm_inner4;    vec_u8 perm_outer = { 4,9, 0,5, 5,10, 1,6, 6,11, 2,7, 7,12, 3,8 };    vec_s32 c64 = vec_sl(vec_splat_s32(1), vec_splat_u32(6));    vec_u16 c7  = vec_splat_u16(7);    align_vec0 = vec_lvsl( -is6tap-1, src);    align_vec8 = vec_lvsl(8-is6tap-1, src);    permh0     = vec_perm(align_vec0, align_vec0, perm_inner);    permh8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_inner = vec_add(perm_inner, vec_splat_u8(4));    perml0     = vec_perm(align_vec0, align_vec0, perm_inner);    perml8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_6tap0 = vec_perm(align_vec0, align_vec0, perm_outer);    perm_6tap8 = vec_perm(align_vec8, align_vec8, perm_outer);    while (h --> 0) {        FILTER_H(f16h, 0);        if (w == 16) {            FILTER_H(f16l, 8);            filt = vec_packsu(f16h, f16l);            vec_st(filt, 0, dst);        } else {            filt = vec_packsu(f16h, f16h);            vec_ste((vec_u32)filt, 0, (uint32_t*)dst);            if (w == 8)                vec_ste((vec_u32)filt, 4, (uint32_t*)dst);        }        src += src_stride;        dst += dst_stride;    }}
开发者ID:Arcen,项目名称:libav,代码行数:47,


示例6: put_no_rnd_pixels8_xy2_altivec

/* next one assumes that ((line_size % 8) == 0) */static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){    register int i;    register vector unsigned char pixelsv1, pixelsv2, pixelsavg;    register vector unsigned char blockv, temp1, temp2;    register vector unsigned short pixelssum1, pixelssum2, temp3;    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);    register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);    temp1 = vec_ld(0, pixels);    temp2 = vec_ld(16, pixels);    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {        pixelsv2 = temp2;    } else {        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));    }    pixelsv1 = vec_mergeh(vczero, pixelsv1);    pixelsv2 = vec_mergeh(vczero, pixelsv2);    pixelssum1 = vec_add((vector unsigned short)pixelsv1,                         (vector unsigned short)pixelsv2);    pixelssum1 = vec_add(pixelssum1, vcone);    for (i = 0; i < h ; i++) {        int rightside = ((unsigned long)block & 0x0000000F);        blockv = vec_ld(0, block);        temp1 = vec_ld(line_size, pixels);        temp2 = vec_ld(line_size + 16, pixels);        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {            pixelsv2 = temp2;        } else {            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));        }        pixelsv1 = vec_mergeh(vczero, pixelsv1);        pixelsv2 = vec_mergeh(vczero, pixelsv2);        pixelssum2 = vec_add((vector unsigned short)pixelsv1,                             (vector unsigned short)pixelsv2);        temp3 = vec_add(pixelssum1, pixelssum2);        temp3 = vec_sra(temp3, vctwo);        pixelssum1 = vec_add(pixelssum2, vcone);        pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);        if (rightside) {            blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));        } else {            blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));        }        vec_st(blockv, 0, block);        block += line_size;        pixels += line_size;    }}
开发者ID:AVLeo,项目名称:libav,代码行数:59,


示例7: h264_idct_dc_add_internal

static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size){    vec_s16 dc16;    vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;    LOAD_ZERO;    DECLARE_ALIGNED(16, int, dc);    int i;    dc = (block[0] + 32) >> 6;    block[0] = 0;    dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1);    if (size == 4)        dc16 = vec_sld(dc16, zero_s16v, 8);    dcplus = vec_packsu(dc16, zero_s16v);    dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);    aligner = vec_lvsr(0, dst);    dcplus = vec_perm(dcplus, dcplus, aligner);    dcminus = vec_perm(dcminus, dcminus, aligner);    for (i = 0; i < size; i += 4) {        v0 = vec_ld(0, dst+0*stride);        v1 = vec_ld(0, dst+1*stride);        v2 = vec_ld(0, dst+2*stride);        v3 = vec_ld(0, dst+3*stride);        v0 = vec_adds(v0, dcplus);        v1 = vec_adds(v1, dcplus);        v2 = vec_adds(v2, dcplus);        v3 = vec_adds(v3, dcplus);        v0 = vec_subs(v0, dcminus);        v1 = vec_subs(v1, dcminus);        v2 = vec_subs(v2, dcminus);        v3 = vec_subs(v3, dcminus);        vec_st(v0, 0, dst+0*stride);        vec_st(v1, 0, dst+1*stride);        vec_st(v2, 0, dst+2*stride);        vec_st(v3, 0, dst+3*stride);        dst += 4*stride;    }}
开发者ID:DDTChen,项目名称:CookieVLC,代码行数:45,


示例8: yuv2planeX_16_altivec

static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize,                                  const int16_t **src, uint8_t *dest,                                  const uint8_t *dither, int offset, int x){    register int i, j;    DECLARE_ALIGNED(16, int, val)[16];    vector signed int vo1, vo2, vo3, vo4;    vector unsigned short vs1, vs2;    vector unsigned char vf;    vector unsigned int altivec_vectorShiftInt19 =        vec_add(vec_splat_u32(10), vec_splat_u32(9));    for (i = 0; i < 16; i++)        val[i] = dither[(x + i + offset) & 7] << 12;    vo1 = vec_ld(0,  val);    vo2 = vec_ld(16, val);    vo3 = vec_ld(32, val);    vo4 = vec_ld(48, val);    for (j = 0; j < filterSize; j++) {        vector signed short l1, vLumFilter = vec_ld(j << 1, filter);        vector unsigned char perm, perm0 = vec_lvsl(j << 1, filter);        vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);        vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter        perm = vec_lvsl(x << 1, src[j]);        l1   = vec_ld(x << 1, src[j]);        yuv2planeX_8(vo1, vo2, l1, src[j], x,     perm, vLumFilter);        yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);    }    vo1 = vec_sra(vo1, altivec_vectorShiftInt19);    vo2 = vec_sra(vo2, altivec_vectorShiftInt19);    vo3 = vec_sra(vo3, altivec_vectorShiftInt19);    vo4 = vec_sra(vo4, altivec_vectorShiftInt19);    vs1 = vec_packsu(vo1, vo2);    vs2 = vec_packsu(vo3, vo4);    vf  = vec_packsu(vs1, vs2);    vec_st(vf, 0, dest);}
开发者ID:1c0n,项目名称:xbmc,代码行数:42,


示例9: yuv2planeX_16_altivec

static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize,                                  const int16_t **src, uint8_t *dest,                                  const uint8_t *dither, int offset, int x){    register int i, j;    LOCAL_ALIGNED(16, int, val, [16]);    vector signed int vo1, vo2, vo3, vo4;    vector unsigned short vs1, vs2;    vector unsigned char vf;    vector unsigned int altivec_vectorShiftInt19 =        vec_add(vec_splat_u32(10), vec_splat_u32(9));    for (i = 0; i < 16; i++)        val[i] = dither[(x + i + offset) & 7] << 12;    vo1 = vec_ld(0,  val);    vo2 = vec_ld(16, val);    vo3 = vec_ld(32, val);    vo4 = vec_ld(48, val);    for (j = 0; j < filterSize; j++) {        unsigned int joffset=j<<1;        unsigned int xoffset=x<<1;        vector unsigned char perm;        vector signed short l1,vLumFilter;        LOAD_FILTER(vLumFilter,filter);        vLumFilter = vec_splat(vLumFilter, 0);        LOAD_L1(l1,src[j],perm);        yuv2planeX_8(vo1, vo2, l1, src[j], x,     perm, vLumFilter);        yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);    }    vo1 = vec_sra(vo1, altivec_vectorShiftInt19);    vo2 = vec_sra(vo2, altivec_vectorShiftInt19);    vo3 = vec_sra(vo3, altivec_vectorShiftInt19);    vo4 = vec_sra(vo4, altivec_vectorShiftInt19);    vs1 = vec_packsu(vo1, vo2);    vs2 = vec_packsu(vo3, vo4);    vf  = vec_packsu(vs1, vs2);    VEC_ST(vf, 0, dest);}
开发者ID:0day-ci,项目名称:FFmpeg,代码行数:41,


示例10: processRGBA_Altivec

void pix_diff :: processRGBA_Altivec(imageStruct &image, imageStruct &right){    int datasize = image.xsize * image.ysize / 4;    vector signed short  hiImage, loImage, hiRight, loRight;    vector unsigned char zero = vec_splat_u8(0);    vector unsigned char *inData = (vector unsigned char *)image.data;    vector unsigned char *rightData = (vector unsigned char *)right.data;    #ifndef PPC970   	UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );	vec_dst( inData, prefetchSize, 0 );        vec_dst( rightData, prefetchSize, 1 );        vec_dst( inData+256, prefetchSize, 2 );        vec_dst( rightData+256, prefetchSize, 3 );    #endif    do {        #ifndef PPC970	vec_dst( inData, prefetchSize, 0 );        vec_dst( rightData, prefetchSize, 1 );        vec_dst( inData+256, prefetchSize, 2 );        vec_dst( rightData+256, prefetchSize, 3 );        #endif        hiImage = (vector signed short)vec_mergeh(zero,inData[0]);        loImage = (vector signed short)vec_mergel(zero,inData[0]);        hiRight = (vector signed short)vec_mergeh(zero,rightData[0]);        loRight = (vector signed short)vec_mergel(zero,rightData[0]);        hiImage = vec_subs(hiImage,hiRight);        loImage = vec_subs(loImage,loRight);        hiImage = vec_abs(hiImage);        loImage = vec_abs(loImage);        inData[0] = vec_packsu(hiImage,loImage);        inData++;        rightData++;    }    while (--datasize);    #ifndef PPC970        vec_dss( 0 );        vec_dss( 1 );        vec_dss( 2 );        vec_dss( 3 );    #endif}
开发者ID:avilleret,项目名称:Gem,代码行数:50,


示例11: predict_16x16_p_altivec

static void predict_16x16_p_altivec( uint8_t *src ){    int16_t a, b, c, i;    int H = 0;    int V = 0;    int16_t i00;    for( i = 1; i <= 8; i++ )    {        H += i * ( src[7+i - FDEC_STRIDE ]  - src[7-i - FDEC_STRIDE ] );        V += i * ( src[(7+i)*FDEC_STRIDE -1] - src[(7-i)*FDEC_STRIDE -1] );    }    a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );    b = ( 5 * H + 32 ) >> 6;    c = ( 5 * V + 32 ) >> 6;    i00 = a - b * 7 - c * 7 + 16;    vect_sshort_u i00_u, b_u, c_u;    i00_u.s[0] = i00;    b_u.s[0]   = b;    c_u.s[0]   = c;    vec_u16_t val5_v = vec_splat_u16(5);    vec_s16_t i00_v, b_v, c_v;    i00_v = vec_splat(i00_u.v, 0);    b_v = vec_splat(b_u.v, 0);    c_v = vec_splat(c_u.v, 0);    vec_s16_t induc_v  = (vec_s16_t) CV(0,  1,  2,  3,  4,  5,  6,  7);    vec_s16_t b8_v = vec_sl(b_v, vec_splat_u16(3));    vec_s32_t mule_b_v = vec_mule(induc_v, b_v);    vec_s32_t mulo_b_v = vec_mulo(induc_v, b_v);    vec_s16_t mul_b_induc0_v = vec_pack(vec_mergeh(mule_b_v, mulo_b_v), vec_mergel(mule_b_v, mulo_b_v));    vec_s16_t add_i0_b_0v = vec_adds(i00_v, mul_b_induc0_v);    vec_s16_t add_i0_b_8v = vec_adds(b8_v, add_i0_b_0v);    int y;    for( y = 0; y < 16; y++ )    {        vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);        vec_s16_t shift_8_v = vec_sra(add_i0_b_8v, val5_v);        vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_8_v);        vec_st( com_sat_v, 0, &src[0]);        src += FDEC_STRIDE;        i00 += c;        add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);        add_i0_b_8v = vec_adds(add_i0_b_8v, c_v);    }}
开发者ID:UIKit0,项目名称:H.264-in-CUDA,代码行数:50,


示例12: predict_8x8c_p_altivec

static void predict_8x8c_p_altivec( uint8_t *src ){    int H = 0, V = 0;    for( int i = 0; i < 4; i++ )    {        H += ( i + 1 ) * ( src[4+i - FDEC_STRIDE] - src[2 - i -FDEC_STRIDE] );        V += ( i + 1 ) * ( src[-1 +(i+4)*FDEC_STRIDE] - src[-1+(2-i)*FDEC_STRIDE] );    }    int a = 16 * ( src[-1+7*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );    int b = ( 17 * H + 16 ) >> 5;    int c = ( 17 * V + 16 ) >> 5;    int i00 = a -3*b -3*c + 16;    vec_s16_u i00_u, b_u, c_u;    i00_u.s[0] = i00;    b_u.s[0]   = b;    c_u.s[0]   = c;    vec_u16_t val5_v = vec_splat_u16(5);    vec_s16_t i00_v, b_v, c_v;    i00_v = vec_splat(i00_u.v, 0);    b_v = vec_splat(b_u.v, 0);    c_v = vec_splat(c_u.v, 0);    vec_s16_t induc_v  = (vec_s16_t) CV(0, 1, 2, 3, 4, 5, 6, 7);    vec_s16_t add_i0_b_0v = vec_mladd(induc_v, b_v, i00_v);    PREP_STORE8;    for( int i = 0; i < 8; ++i )    {        vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);        vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_0_v);        VEC_STORE8(com_sat_v, &src[0]);        src += FDEC_STRIDE;        add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);    }}
开发者ID:xing2fan,项目名称:x264,代码行数:41,


示例13: predict_16x16_p_altivec

static void predict_16x16_p_altivec( uint8_t *src ){    int H = 0, V = 0;    for( int i = 1; i <= 8; i++ )    {        H += i * ( src[7+i - FDEC_STRIDE ]  - src[7-i - FDEC_STRIDE ] );        V += i * ( src[(7+i)*FDEC_STRIDE -1] - src[(7-i)*FDEC_STRIDE -1] );    }    int a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );    int b = ( 5 * H + 32 ) >> 6;    int c = ( 5 * V + 32 ) >> 6;    int i00 = a - b * 7 - c * 7 + 16;    vec_s16_u i00_u, b_u, c_u;    i00_u.s[0] = i00;    b_u.s[0]   = b;    c_u.s[0]   = c;    vec_u16_t val5_v = vec_splat_u16(5);    vec_s16_t i00_v, b_v, c_v;    i00_v = vec_splat(i00_u.v, 0);    b_v = vec_splat(b_u.v, 0);    c_v = vec_splat(c_u.v, 0);    vec_s16_t induc_v  = (vec_s16_t) CV(0,  1,  2,  3,  4,  5,  6,  7);    vec_s16_t b8_v = vec_sl(b_v, vec_splat_u16(3));    vec_s16_t add_i0_b_0v = vec_mladd(induc_v, b_v, i00_v);    vec_s16_t add_i0_b_8v = vec_adds(b8_v, add_i0_b_0v);    for( int y = 0; y < 16; y++ )    {        vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);        vec_s16_t shift_8_v = vec_sra(add_i0_b_8v, val5_v);        vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_8_v);        vec_st( com_sat_v, 0, &src[0]);        src += FDEC_STRIDE;        add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);        add_i0_b_8v = vec_adds(add_i0_b_8v, c_v);    }}
开发者ID:xing2fan,项目名称:x264,代码行数:41,


示例14: pix_multiply

static force_inline vector unsigned intpix_multiply (vector unsigned int p, vector unsigned int a){    vector unsigned short hi, lo, mod;    /* unpack to short */    hi = (vector unsigned short)	vec_mergeh ((vector unsigned char)AVV (0),		    (vector unsigned char)p);    mod = (vector unsigned short)	vec_mergeh ((vector unsigned char)AVV (0),		    (vector unsigned char)a);    hi = vec_mladd (hi, mod, (vector unsigned short)                    AVV (0x0080, 0x0080, 0x0080, 0x0080,                         0x0080, 0x0080, 0x0080, 0x0080));    hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));    hi = vec_sr (hi, vec_splat_u16 (8));    /* unpack to short */    lo = (vector unsigned short)	vec_mergel ((vector unsigned char)AVV (0),		    (vector unsigned char)p);    mod = (vector unsigned short)	vec_mergel ((vector unsigned char)AVV (0),		    (vector unsigned char)a);    lo = vec_mladd (lo, mod, (vector unsigned short)                    AVV (0x0080, 0x0080, 0x0080, 0x0080,                         0x0080, 0x0080, 0x0080, 0x0080));    lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));    lo = vec_sr (lo, vec_splat_u16 (8));    return (vector unsigned int)vec_packsu (hi, lo);}
开发者ID:1833183060,项目名称:wke,代码行数:40,


示例15: PREFIX_h264_qpel16_hv_lowpass_altivec

//.........这里部分代码省略.........        psumA = vec_sub(pp1A, pp2A);        psumB = vec_sub(pp1B, pp2B);        vec_st(psumA, 0, tmp);        vec_st(psumB, 16, tmp);        src += srcStride;        tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */    }    tmpM2ssA = vec_ld(0, tmpbis);    tmpM2ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    tmpM1ssA = vec_ld(0, tmpbis);    tmpM1ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    tmpP0ssA = vec_ld(0, tmpbis);    tmpP0ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    tmpP1ssA = vec_ld(0, tmpbis);    tmpP1ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    tmpP2ssA = vec_ld(0, tmpbis);    tmpP2ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    for (i = 0 ; i < 16 ; i++) {        const vec_s16 tmpP3ssA = vec_ld(0, tmpbis);        const vec_s16 tmpP3ssB = vec_ld(16, tmpbis);        const vec_s16 sum1A = vec_adds(tmpP0ssA, tmpP1ssA);        const vec_s16 sum1B = vec_adds(tmpP0ssB, tmpP1ssB);        const vec_s16 sum2A = vec_adds(tmpM1ssA, tmpP2ssA);        const vec_s16 sum2B = vec_adds(tmpM1ssB, tmpP2ssB);        const vec_s16 sum3A = vec_adds(tmpM2ssA, tmpP3ssA);        const vec_s16 sum3B = vec_adds(tmpM2ssB, tmpP3ssB);        tmpbis += tmpStride;        tmpM2ssA = tmpM1ssA;        tmpM2ssB = tmpM1ssB;        tmpM1ssA = tmpP0ssA;        tmpM1ssB = tmpP0ssB;        tmpP0ssA = tmpP1ssA;        tmpP0ssB = tmpP1ssB;        tmpP1ssA = tmpP2ssA;        tmpP1ssB = tmpP2ssB;        tmpP2ssA = tmpP3ssA;        tmpP2ssB = tmpP3ssB;        pp1Ae = vec_mule(sum1A, v20ss);        pp1Ao = vec_mulo(sum1A, v20ss);        pp1Be = vec_mule(sum1B, v20ss);        pp1Bo = vec_mulo(sum1B, v20ss);        pp2Ae = vec_mule(sum2A, v5ss);        pp2Ao = vec_mulo(sum2A, v5ss);        pp2Be = vec_mule(sum2B, v5ss);        pp2Bo = vec_mulo(sum2B, v5ss);        pp3Ae = vec_sra((vec_s32)sum3A, v16ui);        pp3Ao = vec_mulo(sum3A, v1ss);        pp3Be = vec_sra((vec_s32)sum3B, v16ui);        pp3Bo = vec_mulo(sum3B, v1ss);        pp1cAe = vec_add(pp1Ae, v512si);        pp1cAo = vec_add(pp1Ao, v512si);        pp1cBe = vec_add(pp1Be, v512si);        pp1cBo = vec_add(pp1Bo, v512si);        pp32Ae = vec_sub(pp3Ae, pp2Ae);        pp32Ao = vec_sub(pp3Ao, pp2Ao);        pp32Be = vec_sub(pp3Be, pp2Be);        pp32Bo = vec_sub(pp3Bo, pp2Bo);        sumAe = vec_add(pp1cAe, pp32Ae);        sumAo = vec_add(pp1cAo, pp32Ao);        sumBe = vec_add(pp1cBe, pp32Be);        sumBo = vec_add(pp1cBo, pp32Bo);        ssumAe = vec_sra(sumAe, v10ui);        ssumAo = vec_sra(sumAo, v10ui);        ssumBe = vec_sra(sumBe, v10ui);        ssumBo = vec_sra(sumBo, v10ui);        ssume = vec_packs(ssumAe, ssumBe);        ssumo = vec_packs(ssumAo, ssumBo);        sumv = vec_packsu(ssume, ssumo);        sum = vec_perm(sumv, sumv, mperm);        ASSERT_ALIGNED(dst);        OP_U8_ALTIVEC(fsum, sum, vec_ld(0, dst));        vec_st(fsum, 0, dst);        dst += dstStride;    }}
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例16: PREFIX_h264_qpel16_v_lowpass_altivec

static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {    register int i;    LOAD_ZERO;    const vec_u8 perm = vec_lvsl(0, src);    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_u16 v5us = vec_splat_u16(5);    const vec_s16 v5ss = vec_splat_s16(5);    const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));    uint8_t *srcbis = src - (srcStride * 2);    const vec_u8 srcM2a = vec_ld(0, srcbis);    const vec_u8 srcM2b = vec_ld(16, srcbis);    const vec_u8 srcM2 = vec_perm(srcM2a, srcM2b, perm);    //srcbis += srcStride;    const vec_u8 srcM1a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcM1b = vec_ld(16, srcbis);    const vec_u8 srcM1 = vec_perm(srcM1a, srcM1b, perm);    //srcbis += srcStride;    const vec_u8 srcP0a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP0b = vec_ld(16, srcbis);    const vec_u8 srcP0 = vec_perm(srcP0a, srcP0b, perm);    //srcbis += srcStride;    const vec_u8 srcP1a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP1b = vec_ld(16, srcbis);    const vec_u8 srcP1 = vec_perm(srcP1a, srcP1b, perm);    //srcbis += srcStride;    const vec_u8 srcP2a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP2b = vec_ld(16, srcbis);    const vec_u8 srcP2 = vec_perm(srcP2a, srcP2b, perm);    //srcbis += srcStride;    vec_s16 srcM2ssA = (vec_s16) vec_mergeh(zero_u8v, srcM2);    vec_s16 srcM2ssB = (vec_s16) vec_mergel(zero_u8v, srcM2);    vec_s16 srcM1ssA = (vec_s16) vec_mergeh(zero_u8v, srcM1);    vec_s16 srcM1ssB = (vec_s16) vec_mergel(zero_u8v, srcM1);    vec_s16 srcP0ssA = (vec_s16) vec_mergeh(zero_u8v, srcP0);    vec_s16 srcP0ssB = (vec_s16) vec_mergel(zero_u8v, srcP0);    vec_s16 srcP1ssA = (vec_s16) vec_mergeh(zero_u8v, srcP1);    vec_s16 srcP1ssB = (vec_s16) vec_mergel(zero_u8v, srcP1);    vec_s16 srcP2ssA = (vec_s16) vec_mergeh(zero_u8v, srcP2);    vec_s16 srcP2ssB = (vec_s16) vec_mergel(zero_u8v, srcP2);    vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,              psumA, psumB, sumA, sumB,              srcP3ssA, srcP3ssB,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;    vec_u8 sum, fsum, srcP3a, srcP3b, srcP3;    for (i = 0 ; i < 16 ; i++) {        srcP3a = vec_ld(0, srcbis += srcStride);        srcP3b = vec_ld(16, srcbis);        srcP3 = vec_perm(srcP3a, srcP3b, perm);        srcP3ssA = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3ssB = (vec_s16) vec_mergel(zero_u8v, srcP3);        //srcbis += srcStride;        sum1A = vec_adds(srcP0ssA, srcP1ssA);        sum1B = vec_adds(srcP0ssB, srcP1ssB);        sum2A = vec_adds(srcM1ssA, srcP2ssA);        sum2B = vec_adds(srcM1ssB, srcP2ssB);        sum3A = vec_adds(srcM2ssA, srcP3ssA);        sum3B = vec_adds(srcM2ssB, srcP3ssB);        srcM2ssA = srcM1ssA;        srcM2ssB = srcM1ssB;        srcM1ssA = srcP0ssA;        srcM1ssB = srcP0ssB;        srcP0ssA = srcP1ssA;        srcP0ssB = srcP1ssB;        srcP1ssA = srcP2ssA;        srcP1ssB = srcP2ssB;        srcP2ssA = srcP3ssA;        srcP2ssB = srcP3ssB;        pp1A = vec_mladd(sum1A, v20ss, v16ss);        pp1B = vec_mladd(sum1B, v20ss, v16ss);        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);        pp3A = vec_add(sum3A, pp1A);        pp3B = vec_add(sum3B, pp1B);        psumA = vec_sub(pp3A, pp2A);        psumB = vec_sub(pp3B, pp2B);        sumA = vec_sra(psumA, v5us);        sumB = vec_sra(psumB, v5us);        sum = vec_packsu(sumA, sumB);        ASSERT_ALIGNED(dst);        OP_U8_ALTIVEC(fsum, sum, vec_ld(0, dst));        vec_st(fsum, 0, dst);//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例17: ProjectDlightTexture_altivec

//.........这里部分代码省略.........			vec_t dist0, dist1, dist2;						dist0 = origin0 - tess.xyz[i][0];			dist1 = origin1 - tess.xyz[i][1];			dist2 = origin2 - tess.xyz[i][2];			backEnd.pc.c_dlightVertexes++;			texCoords0 = 0.5f + dist0 * scale;			texCoords1 = 0.5f + dist1 * scale;			if( !r_dlightBacks->integer &&					// dist . tess.normal[i]					( dist0 * tess.normal[i][0] +					dist1 * tess.normal[i][1] +					dist2 * tess.normal[i][2] ) < 0.0f ) {				clip = 63;			} else {				if ( texCoords0 < 0.0f ) {					clip |= 1;				} else if ( texCoords0 > 1.0f ) {					clip |= 2;				}				if ( texCoords1 < 0.0f ) {					clip |= 4;				} else if ( texCoords1 > 1.0f ) {					clip |= 8;				}				texCoords[0] = texCoords0;				texCoords[1] = texCoords1;				// modulate the strength based on the height and color				if ( dist2 > radius ) {					clip |= 16;					modulate = 0.0f;				} else if ( dist2 < -radius ) {					clip |= 32;					modulate = 0.0f;				} else {					dist2 = Q_fabs(dist2);					if ( dist2 < radius * 0.5f ) {						modulate = 1.0f;					} else {						modulate = 2.0f * (radius - dist2) * scale;					}				}			}			clipBits[i] = clip;			modulateVec = vec_ld(0,(float *)&modulate);			modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm);			colorVec = vec_madd(floatColorVec0,modulateVec,zero);			colorInt = vec_cts(colorVec,0);	// RGBx			colorShort = vec_pack(colorInt,colorInt);		// RGBxRGBx			colorChar = vec_packsu(colorShort,colorShort);	// RGBxRGBxRGBxRGBx			colorChar = vec_sel(colorChar,vSel,vSel);		// RGBARGBARGBARGBA replace alpha with 255			vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors);	// store color		}		// build a list of triangles that need light		numIndexes = 0;		for ( i = 0 ; i < tess.numIndexes ; i += 3 ) {			int		a, b, c;			a = tess.indexes[i];			b = tess.indexes[i+1];			c = tess.indexes[i+2];			if ( clipBits[a] & clipBits[b] & clipBits[c] ) {				continue;	// not lighted			}			hitIndexes[numIndexes] = a;			hitIndexes[numIndexes+1] = b;			hitIndexes[numIndexes+2] = c;			numIndexes += 3;		}		if ( !numIndexes ) {			continue;		}		qglEnableClientState( GL_TEXTURE_COORD_ARRAY );		qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] );		qglEnableClientState( GL_COLOR_ARRAY );		qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray );		GL_Bind( tr.dlightImage );		// include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light		// where they aren't rendered		if ( dl->additive ) {			GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );		}		else {			GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );		}		R_DrawElements( numIndexes, hitIndexes );		backEnd.pc.c_totalIndexes += numIndexes;		backEnd.pc.c_dlightIndexes += numIndexes;	}}
开发者ID:ptitSeb,项目名称:ioq3,代码行数:101,


示例18: processYUVAltivec

//.........这里部分代码省略.........    shortBuffer.s[6] = m_Urange;    shortBuffer.s[7] = m_Vrange;    UVrange = shortBuffer.v;            //setup the cache prefetch -- A MUST!!!    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );    #ifndef PPC970     vec_dst( inData, prefetchSize, 0 );    vec_dst( rightData, prefetchSize, 1 );    vec_dst( inData+32, prefetchSize, 2 );    vec_dst( rightData+32, prefetchSize, 3 );    #endif //PPC970        for ( i=0; i<h; i++){        for (j=0; j<w; j++)        {        #ifndef PPC970        //this function is probably memory bound on most G4's -- what else is new?            vec_dst( inData, prefetchSize, 0 );            vec_dst( rightData, prefetchSize, 1 );            vec_dst( inData+32, prefetchSize, 2 );            vec_dst( rightData+32, prefetchSize, 3 );        #endif        //separate the U and V from Y        UVres1 = (vector unsigned short)vec_mule(one,inData[0]);        UVres2 = (vector unsigned short)vec_mule(one,rightData[0]);                    //vec_mulo Y * 1 to short vector Y Y Y Y shorts        Yres1 = (vector unsigned short)vec_mulo(one,inData[0]);        Yres2 = (vector unsigned short)vec_mulo(one,rightData[0]);                Yhi = vec_adds(Yres2,Yrange);        Ylo = vec_subs(Yres2,Yrange);                //go to ints for comparison        UVhi = vec_adds(UVres2,UVrange);        UVlo = vec_subs(UVres2,UVrange);                Uhi = vec_mule(sone,UVhi);        Ulo = vec_mule(sone,UVlo);                Vhi = vec_mulo(sone,UVhi);        Vlo = vec_mulo(sone,UVlo);                Ures = vec_mule(sone,UVres1);         Vres = vec_mulo(sone,UVres1);                  Umasklo = vec_cmpgt(Ures,Ulo);         Umaskhi = vec_cmplt(Ures,Uhi);                  Vmasklo = vec_cmpgt(Vres,Vlo);         Vmaskhi = vec_cmplt(Vres,Vhi);                  Umaskhi = vec_and(Umaskhi,Umasklo);                  Vmaskhi = vec_and(Vmaskhi,Vmasklo);                  Umasklo = vec_and(Umaskhi,Vmaskhi);         Vmasklo = vec_and(Umaskhi,Vmaskhi);                  hiImage = (vector unsigned short)vec_mergeh(Umasklo,Vmasklo);         loImage = (vector unsigned short)vec_mergel(Umasklo,Vmasklo);                  //pack it back down to bool short         UVmaskhi = (vector bool short)vec_packsu(hiImage,loImage);                  Ymasklo = vec_cmpgt(Yres1,Ylo);         Ymaskhi = vec_cmplt(Yres1,Yhi);                  Ymaskhi = vec_and(Ymaskhi,Ymasklo);                  Ymaskhi = vec_and(Ymaskhi,UVmaskhi);         UVmaskhi = vec_and(Ymaskhi,UVmaskhi);                  //bitwise comparison and move using the result of the comparison as a mask         Yres1 = vec_sel(Yres1,Yblank,Ymaskhi);                  //UVres1 = vec_sel(UVres1,UVres2,UVmaskhi);         UVres1 = vec_sel(UVres1,UVblank,UVmaskhi);                  //merge the Y and UV back together         hiImage = vec_mergeh(UVres1,Yres1);         loImage = vec_mergel(UVres1,Yres1);                  //pack it back down to unsigned char to store         inData[0] = vec_packsu(hiImage,loImage);                  inData++;         rightData++;                }        #ifndef PPC970        vec_dss(0);        vec_dss(1);        vec_dss(2);        vec_dss(3);        #endif    }}
开发者ID:kmatheussen,项目名称:libpd,代码行数:101,


示例19: put_no_rnd_h264_chroma_mc8_altivec

/* this code assume that stride % 16 == 0 */void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {    signed int ABCD[4] __attribute__((aligned(16))) =                        {((8 - x) * (8 - y)),                          ((x) * (8 - y)),                          ((8 - x) * (y)),                          ((x) * (y))};    register int i;    vector unsigned char fperm;    const vector signed int vABCD = vec_ld(0, ABCD);    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);    const vector signed int vzero = vec_splat_s32(0);    const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));    const vector unsigned short v6us = vec_splat_u16(6);    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;    vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;    vector unsigned char vsrc0uc, vsrc1uc;    vector signed short vsrc0ssH, vsrc1ssH;    vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;    vector signed short vsrc2ssH, vsrc3ssH, psum;    vector unsigned char vdst, ppsum, fsum;    if (((unsigned long)dst) % 16 == 0) {      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,                                        0x14, 0x15, 0x16, 0x17,                                        0x08, 0x09, 0x0A, 0x0B,                                        0x0C, 0x0D, 0x0E, 0x0F);    } else {      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,                                        0x04, 0x05, 0x06, 0x07,                                        0x18, 0x19, 0x1A, 0x1B,                                        0x1C, 0x1D, 0x1E, 0x1F);    }    vsrcAuc = vec_ld(0, src);    if (loadSecond)      vsrcBuc = vec_ld(16, src);    vsrcperm0 = vec_lvsl(0, src);    vsrcperm1 = vec_lvsl(1, src);    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);    if (reallyBadAlign)      vsrc1uc = vsrcBuc;    else      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);    vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                               (vector unsigned char)vsrc0uc);    vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                               (vector unsigned char)vsrc1uc);    if (!loadSecond) {// -> !reallyBadAlign      for (i = 0 ; i < h ; i++) {        vsrcCuc = vec_ld(stride + 0, src);        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                                (vector unsigned char)vsrc2uc);        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                                (vector unsigned char)vsrc3uc);        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));        psum = vec_mladd(vB, vsrc1ssH, psum);        psum = vec_mladd(vC, vsrc2ssH, psum);        psum = vec_mladd(vD, vsrc3ssH, psum);        psum = vec_add(v28ss, psum);        psum = vec_sra(psum, v6us);        vdst = vec_ld(0, dst);        ppsum = (vector unsigned char)vec_packsu(psum, psum);        fsum = vec_perm(vdst, ppsum, fperm);        vec_st(fsum, 0, dst);        vsrc0ssH = vsrc2ssH;        vsrc1ssH = vsrc3ssH;        dst += stride;        src += stride;      }    } else {        vector unsigned char vsrcDuc;      for (i = 0 ; i < h ; i++) {        vsrcCuc = vec_ld(stride + 0, src);        vsrcDuc = vec_ld(stride + 16, src);        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);        if (reallyBadAlign)          vsrc3uc = vsrcDuc;        else//.........这里部分代码省略.........
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:101,


示例20: put_no_rnd_pixels16_xy2_altivec

/* next one assumes that ((line_size % 16) == 0) */static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h){    register int i;    register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;    register vector unsigned char blockv, temp1, temp2;    register vector unsigned short temp3, temp4,        pixelssum1, pixelssum2, pixelssum3, pixelssum4;    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);    register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);    temp1 = vec_ld(0, pixels);    temp2 = vec_ld(16, pixels);    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {        pixelsv2 = temp2;    } else {        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));    }    pixelsv3 = vec_mergel(vczero, pixelsv1);    pixelsv4 = vec_mergel(vczero, pixelsv2);    pixelsv1 = vec_mergeh(vczero, pixelsv1);    pixelsv2 = vec_mergeh(vczero, pixelsv2);    pixelssum3 = vec_add((vector unsigned short)pixelsv3,                         (vector unsigned short)pixelsv4);    pixelssum3 = vec_add(pixelssum3, vcone);    pixelssum1 = vec_add((vector unsigned short)pixelsv1,                         (vector unsigned short)pixelsv2);    pixelssum1 = vec_add(pixelssum1, vcone);    for (i = 0; i < h ; i++) {        blockv = vec_ld(0, block);        temp1 = vec_ld(line_size, pixels);        temp2 = vec_ld(line_size + 16, pixels);        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {            pixelsv2 = temp2;        } else {            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));        }        pixelsv3 = vec_mergel(vczero, pixelsv1);        pixelsv4 = vec_mergel(vczero, pixelsv2);        pixelsv1 = vec_mergeh(vczero, pixelsv1);        pixelsv2 = vec_mergeh(vczero, pixelsv2);        pixelssum4 = vec_add((vector unsigned short)pixelsv3,                             (vector unsigned short)pixelsv4);        pixelssum2 = vec_add((vector unsigned short)pixelsv1,                             (vector unsigned short)pixelsv2);        temp4 = vec_add(pixelssum3, pixelssum4);        temp4 = vec_sra(temp4, vctwo);        temp3 = vec_add(pixelssum1, pixelssum2);        temp3 = vec_sra(temp3, vctwo);        pixelssum3 = vec_add(pixelssum4, vcone);        pixelssum1 = vec_add(pixelssum2, vcone);        blockv = vec_packsu(temp3, temp4);        vec_st(blockv, 0, block);        block += line_size;        pixels += line_size;    }}
开发者ID:AVLeo,项目名称:libav,代码行数:68,


示例21: main

//.........这里部分代码省略.........  vector long long ls = vec_or (la, lb);  vector long long lt = vec_or (la, ld);  vector long long lu = vec_or (ld, la);  vector unsigned long long us = vec_or (ua, ub);  vector unsigned long long ut = vec_or (ua, ud);  vector unsigned long long uu = vec_or (ud, ua);  vector unsigned char ca = {0,4,8,1,5,9,2,6,10,3,7,11,15,12,14,13};  vector long long lv = vec_perm (la, lb, ca);  vector unsigned long long uv = vec_perm (ua, ub, ca);  vector long long lw = vec_sel (la, lb, lc);  vector long long lx = vec_sel (la, lb, uc);  vector long long ly = vec_sel (la, lb, ld);  vector unsigned long long uw = vec_sel (ua, ub, lc);  vector unsigned long long ux = vec_sel (ua, ub, uc);  vector unsigned long long uy = vec_sel (ua, ub, ld);  vector long long lz = vec_xor (la, lb);  vector long long l0 = vec_xor (la, ld);  vector long long l1 = vec_xor (ld, la);  vector unsigned long long uz = vec_xor (ua, ub);  vector unsigned long long u0 = vec_xor (ua, ud);  vector unsigned long long u1 = vec_xor (ud, ua);  int ia = vec_all_eq (ua, ub);  int ib = vec_all_ge (ua, ub);  int ic = vec_all_gt (ua, ub);  int id = vec_all_le (ua, ub);  int ie = vec_all_lt (ua, ub);  int ig = vec_all_ne (ua, ub);  int ih = vec_any_eq (ua, ub);  int ii = vec_any_ge (ua, ub);  int ij = vec_any_gt (ua, ub);  int ik = vec_any_le (ua, ub);  int il = vec_any_lt (ua, ub);  int im = vec_any_ne (ua, ub);  vector int sia = {9, 16, 25, 36};  vector int sib = {-8, -27, -64, -125};  vector int sic = vec_mergee (sia, sib);  vector int sid = vec_mergeo (sia, sib);  vector unsigned int uia = {9, 16, 25, 36};  vector unsigned int uib = {8, 27, 64, 125};  vector unsigned int uic = vec_mergee (uia, uib);  vector unsigned int uid = vec_mergeo (uia, uib);  vector bool int bia = {0, -1, -1, 0};  vector bool int bib = {-1, -1, 0, -1};  vector bool int bic = vec_mergee (bia, bib);  vector bool int bid = vec_mergeo (bia, bib);  vector unsigned int uie = vec_packsu (ua, ub);  vector long long l2 = vec_cntlz (la);  vector unsigned long long u2 = vec_cntlz (ua);  vector int sie = vec_cntlz (sia);  vector unsigned int uif = vec_cntlz (uia);  vector short ssa = {20, -40, -60, 80, 100, -120, -140, 160};  vector short ssb = vec_cntlz (ssa);  vector unsigned short usa = {81, 72, 63, 54, 45, 36, 27, 18};  vector unsigned short usb = vec_cntlz (usa);  vector signed char sca = {-4, 3, -9, 15, -31, 31, 0, 0,		            1, 117, -36, 99, 98, 97, 96, 95};  vector signed char scb = vec_cntlz (sca);  vector unsigned char cb = vec_cntlz (ca);  vector double dd = vec_xl (0, &y);  vec_xst (dd, 0, &z);  vector double de = vec_round (dd);  vector double df = vec_splat (de, 0);  vector double dg = vec_splat (de, 1);  vector long long l3 = vec_splat (l2, 0);  vector long long l4 = vec_splat (l2, 1);  vector unsigned long long u3 = vec_splat (u2, 0);  vector unsigned long long u4 = vec_splat (u2, 1);  vector bool long long l5 = vec_splat (ld, 0);  vector bool long long l6 = vec_splat (ld, 1);  vector long long l7 = vec_div (l3, l4);  vector unsigned long long u5 = vec_div (u3, u4);  vector long long l8 = vec_mul (l3, l4);  vector unsigned long long u6 = vec_mul (u3, u4);  vector double dh = vec_ctf (la, -2);  vector double di = vec_ctf (ua, 2);  vector long long l9 = vec_cts (dh, -2);  vector unsigned long long u7 = vec_ctu (di, 2);  return 0;}
开发者ID:0day-ci,项目名称:gcc,代码行数:101,


示例22: PREFIX_h264_chroma_mc8_altivec

/* this code assume that stride % 16 == 0 */void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {  POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);    DECLARE_ALIGNED_16(signed int, ABCD[4]) =                        {((8 - x) * (8 - y)),                          ((x) * (8 - y)),                          ((8 - x) * (y)),                          ((x) * (y))};    register int i;    vec_u8_t fperm;    const vec_s32_t vABCD = vec_ld(0, ABCD);    const vec_s16_t vA = vec_splat((vec_s16_t)vABCD, 1);    const vec_s16_t vB = vec_splat((vec_s16_t)vABCD, 3);    const vec_s16_t vC = vec_splat((vec_s16_t)vABCD, 5);    const vec_s16_t vD = vec_splat((vec_s16_t)vABCD, 7);    LOAD_ZERO;    const vec_s16_t v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));    const vec_u16_t v6us = vec_splat_u16(6);    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;    vec_u8_t vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;    vec_u8_t vsrc0uc, vsrc1uc;    vec_s16_t vsrc0ssH, vsrc1ssH;    vec_u8_t vsrcCuc, vsrc2uc, vsrc3uc;    vec_s16_t vsrc2ssH, vsrc3ssH, psum;    vec_u8_t vdst, ppsum, vfdst, fsum;  POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);    if (((unsigned long)dst) % 16 == 0) {      fperm = (vec_u8_t)AVV(0x10, 0x11, 0x12, 0x13,                            0x14, 0x15, 0x16, 0x17,                            0x08, 0x09, 0x0A, 0x0B,                            0x0C, 0x0D, 0x0E, 0x0F);    } else {      fperm = (vec_u8_t)AVV(0x00, 0x01, 0x02, 0x03,                            0x04, 0x05, 0x06, 0x07,                            0x18, 0x19, 0x1A, 0x1B,                            0x1C, 0x1D, 0x1E, 0x1F);    }    vsrcAuc = vec_ld(0, src);    if (loadSecond)      vsrcBuc = vec_ld(16, src);    vsrcperm0 = vec_lvsl(0, src);    vsrcperm1 = vec_lvsl(1, src);    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);    if (reallyBadAlign)      vsrc1uc = vsrcBuc;    else      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);    vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);    vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);    if (!loadSecond) {// -> !reallyBadAlign      for (i = 0 ; i < h ; i++) {        vsrcCuc = vec_ld(stride + 0, src);        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);        vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);        vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc);        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));        psum = vec_mladd(vB, vsrc1ssH, psum);        psum = vec_mladd(vC, vsrc2ssH, psum);        psum = vec_mladd(vD, vsrc3ssH, psum);        psum = vec_add(v32ss, psum);        psum = vec_sra(psum, v6us);        vdst = vec_ld(0, dst);        ppsum = (vec_u8_t)vec_packsu(psum, psum);        vfdst = vec_perm(vdst, ppsum, fperm);        OP_U8_ALTIVEC(fsum, vfdst, vdst);        vec_st(fsum, 0, dst);        vsrc0ssH = vsrc2ssH;        vsrc1ssH = vsrc3ssH;        dst += stride;        src += stride;      }    } else {        vec_u8_t vsrcDuc;      for (i = 0 ; i < h ; i++) {        vsrcCuc = vec_ld(stride + 0, src);        vsrcDuc = vec_ld(stride + 16, src);        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);        if (reallyBadAlign)          vsrc3uc = vsrcDuc;//.........这里部分代码省略.........
开发者ID:JERUKA9,项目名称:amv-codec-tools,代码行数:101,


示例23: processYUVAltivec

/* more optimized version - unrolled and load-hoisted */void pix_offset :: processYUVAltivec(imageStruct &image){  register int h,w,width,height;  width = image.xsize/16; //for altivec  height = image.ysize;  //format is U Y V Y  // start of working altivec function  union {    short       elements[8];    vector      signed short v;  } transferBuffer;  register vector signed short c, hi, lo;  register vector signed short hi1, lo1;  register vector signed short loadhi, loadhi1, loadlo, loadlo1;  register vector unsigned char zero = vec_splat_u8(0);  register vector unsigned char *inData = (vector unsigned char*) image.data;  //Write the pixel (pair) to the transfer buffer  //transferBuffer.i = (U << 24) | (Y << 16) | (V << 8 ) | Y;  transferBuffer.elements[0] = U;  transferBuffer.elements[1] = Y;  transferBuffer.elements[2] = V;  transferBuffer.elements[3] = Y;  transferBuffer.elements[4] = U;  transferBuffer.elements[5] = Y;  transferBuffer.elements[6] = V;  transferBuffer.elements[7] = Y;  //Load it into the vector unit  c = transferBuffer.v;#ifndef PPC970  UInt32                        prefetchSize = GetPrefetchConstant( 16, 1,      256 );  vec_dst( inData, prefetchSize, 0 );  vec_dst( inData+16, prefetchSize, 1 );  vec_dst( inData+32, prefetchSize, 2 );  vec_dst( inData+64, prefetchSize, 3 );#endif  //expand the UInt8's to short's  loadhi = (vector signed short) vec_mergeh( zero, inData[0] );  loadlo = (vector signed short) vec_mergel( zero, inData[0] );  loadhi1 = (vector signed short) vec_mergeh( zero, inData[1] );  loadlo1 = (vector signed short) vec_mergel( zero, inData[1] );  /  for ( h=0; h<height; h++) {    for (w=0; w<width; w++) {#ifndef PPC970      vec_dst( inData, prefetchSize, 0 );      vec_dst( inData+16, prefetchSize, 1 );      vec_dst( inData+32, prefetchSize, 2 );      vec_dst( inData+64, prefetchSize, 3 );#endif      //add the constant to it      hi = vec_add( loadhi, c );      lo = vec_add( loadlo, c );      hi1 = vec_add( loadhi1, c );      lo1 = vec_add( loadlo1, c );      //expand the UInt8's to short's      loadhi = (vector signed short) vec_mergeh( zero, inData[2] );      loadlo = (vector signed short) vec_mergel( zero, inData[2] );      loadhi1 = (vector signed short) vec_mergeh( zero, inData[3] );      loadlo1 = (vector signed short) vec_mergel( zero, inData[3] );      //pack the result back down, with saturation      inData[0] = vec_packsu( hi, lo );      inData++;      inData[0] = vec_packsu( hi1, lo1 );      inData++;    }  }  //  // finish the last iteration after the loop  //  hi = vec_add( loadhi, c );  lo = vec_add( loadlo, c );  hi1 = vec_add( loadhi1, c );  lo1 = vec_add( loadlo1, c );  //pack the result back down, with saturation  inData[0] = vec_packsu( hi, lo );//.........这里部分代码省略.........
开发者ID:megrimm,项目名称:Gem,代码行数:101,


示例24: PREFIX_h264_qpel16_hv_lowpass_altivec

//.........这里部分代码省略.........  int16_t *tmpbis = tmp - (tmpStride * 21);  vector signed short tmpM2ssA = vec_ld(0, tmpbis);  vector signed short tmpM2ssB = vec_ld(16, tmpbis);  tmpbis += tmpStride;  vector signed short tmpM1ssA = vec_ld(0, tmpbis);  vector signed short tmpM1ssB = vec_ld(16, tmpbis);  tmpbis += tmpStride;  vector signed short tmpP0ssA = vec_ld(0, tmpbis);  vector signed short tmpP0ssB = vec_ld(16, tmpbis);  tmpbis += tmpStride;  vector signed short tmpP1ssA = vec_ld(0, tmpbis);  vector signed short tmpP1ssB = vec_ld(16, tmpbis);  tmpbis += tmpStride;  vector signed short tmpP2ssA = vec_ld(0, tmpbis);  vector signed short tmpP2ssB = vec_ld(16, tmpbis);  tmpbis += tmpStride;  for (i = 0 ; i < 16 ; i++) {    const vector signed short tmpP3ssA = vec_ld(0, tmpbis);    const vector signed short tmpP3ssB = vec_ld(16, tmpbis);    tmpbis += tmpStride;    const vector signed short sum1A = vec_adds(tmpP0ssA, tmpP1ssA);    const vector signed short sum1B = vec_adds(tmpP0ssB, tmpP1ssB);    const vector signed short sum2A = vec_adds(tmpM1ssA, tmpP2ssA);    const vector signed short sum2B = vec_adds(tmpM1ssB, tmpP2ssB);    const vector signed short sum3A = vec_adds(tmpM2ssA, tmpP3ssA);    const vector signed short sum3B = vec_adds(tmpM2ssB, tmpP3ssB);    tmpM2ssA = tmpM1ssA;    tmpM2ssB = tmpM1ssB;    tmpM1ssA = tmpP0ssA;    tmpM1ssB = tmpP0ssB;    tmpP0ssA = tmpP1ssA;    tmpP0ssB = tmpP1ssB;    tmpP1ssA = tmpP2ssA;    tmpP1ssB = tmpP2ssB;    tmpP2ssA = tmpP3ssA;    tmpP2ssB = tmpP3ssB;    const vector signed int pp1Ae = vec_mule(sum1A, v20ss);    const vector signed int pp1Ao = vec_mulo(sum1A, v20ss);    const vector signed int pp1Be = vec_mule(sum1B, v20ss);    const vector signed int pp1Bo = vec_mulo(sum1B, v20ss);    const vector signed int pp2Ae = vec_mule(sum2A, v5ss);    const vector signed int pp2Ao = vec_mulo(sum2A, v5ss);    const vector signed int pp2Be = vec_mule(sum2B, v5ss);    const vector signed int pp2Bo = vec_mulo(sum2B, v5ss);    const vector signed int pp3Ae = vec_sra((vector signed int)sum3A, v16ui);    const vector signed int pp3Ao = vec_mulo(sum3A, v1ss);    const vector signed int pp3Be = vec_sra((vector signed int)sum3B, v16ui);    const vector signed int pp3Bo = vec_mulo(sum3B, v1ss);    const vector signed int pp1cAe = vec_add(pp1Ae, v512si);    const vector signed int pp1cAo = vec_add(pp1Ao, v512si);    const vector signed int pp1cBe = vec_add(pp1Be, v512si);    const vector signed int pp1cBo = vec_add(pp1Bo, v512si);    const vector signed int pp32Ae = vec_sub(pp3Ae, pp2Ae);    const vector signed int pp32Ao = vec_sub(pp3Ao, pp2Ao);    const vector signed int pp32Be = vec_sub(pp3Be, pp2Be);    const vector signed int pp32Bo = vec_sub(pp3Bo, pp2Bo);    const vector signed int sumAe = vec_add(pp1cAe, pp32Ae);    const vector signed int sumAo = vec_add(pp1cAo, pp32Ao);    const vector signed int sumBe = vec_add(pp1cBe, pp32Be);    const vector signed int sumBo = vec_add(pp1cBo, pp32Bo);        const vector signed int ssumAe = vec_sra(sumAe, v10ui);    const vector signed int ssumAo = vec_sra(sumAo, v10ui);    const vector signed int ssumBe = vec_sra(sumBe, v10ui);    const vector signed int ssumBo = vec_sra(sumBo, v10ui);    const vector signed short ssume = vec_packs(ssumAe, ssumBe);    const vector signed short ssumo = vec_packs(ssumAo, ssumBo);    const vector unsigned char sumv = vec_packsu(ssume, ssumo);    const vector unsigned char sum = vec_perm(sumv, sumv, mperm);    const vector unsigned char dst1 = vec_ld(0, dst);    const vector unsigned char dst2 = vec_ld(16, dst);    const vector unsigned char vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));    vector unsigned char fsum;    OP_U8_ALTIVEC(fsum, sum, vdst);    const vector unsigned char rsum = vec_perm(fsum, fsum, dstperm);    const vector unsigned char fdst1 = vec_sel(dst1, rsum, dstmask);    const vector unsigned char fdst2 = vec_sel(rsum, dst2, dstmask);    vec_st(fdst1, 0, dst);    vec_st(fdst2, 16, dst);    dst += dstStride;  }  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);}
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,


示例25: PREFIX_h264_qpel16_v_lowpass_altivec

/* this code assume stride % 16 == 0 */static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);    register int i;  const vector signed int vzero = vec_splat_s32(0);  const vector unsigned char perm = vec_lvsl(0, src);  const vector signed short v20ss = (const vector signed short)AVV(20);  const vector unsigned short v5us = vec_splat_u16(5);  const vector signed short v5ss = vec_splat_s16(5);  const vector signed short v16ss = (const vector signed short)AVV(16);  const vector unsigned char dstperm = vec_lvsr(0, dst);  const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);    uint8_t *srcbis = src - (srcStride * 2);  const vector unsigned char srcM2a = vec_ld(0, srcbis);  const vector unsigned char srcM2b = vec_ld(16, srcbis);  const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm);  srcbis += srcStride;  const vector unsigned char srcM1a = vec_ld(0, srcbis);  const vector unsigned char srcM1b = vec_ld(16, srcbis);  const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm);  srcbis += srcStride;  const vector unsigned char srcP0a = vec_ld(0, srcbis);  const vector unsigned char srcP0b = vec_ld(16, srcbis);  const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm);  srcbis += srcStride;  const vector unsigned char srcP1a = vec_ld(0, srcbis);  const vector unsigned char srcP1b = vec_ld(16, srcbis);  const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm);  srcbis += srcStride;  const vector unsigned char srcP2a = vec_ld(0, srcbis);  const vector unsigned char srcP2b = vec_ld(16, srcbis);  const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm);  srcbis += srcStride;  vector signed short srcM2ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2);  vector signed short srcM2ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2);  vector signed short srcM1ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1);  vector signed short srcM1ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1);  vector signed short srcP0ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0);  vector signed short srcP0ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0);  vector signed short srcP1ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1);  vector signed short srcP1ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1);  vector signed short srcP2ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2);  vector signed short srcP2ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2);  for (i = 0 ; i < 16 ; i++) {    const vector unsigned char srcP3a = vec_ld(0, srcbis);    const vector unsigned char srcP3b = vec_ld(16, srcbis);    const vector unsigned char srcP3 = vec_perm(srcP3a, srcP3b, perm);    const vector signed short srcP3ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3);    const vector signed short srcP3ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3);    srcbis += srcStride;    const vector signed short sum1A = vec_adds(srcP0ssA, srcP1ssA);    const vector signed short sum1B = vec_adds(srcP0ssB, srcP1ssB);    const vector signed short sum2A = vec_adds(srcM1ssA, srcP2ssA);    const vector signed short sum2B = vec_adds(srcM1ssB, srcP2ssB);    const vector signed short sum3A = vec_adds(srcM2ssA, srcP3ssA);    const vector signed short sum3B = vec_adds(srcM2ssB, srcP3ssB);    srcM2ssA = srcM1ssA;    srcM2ssB = srcM1ssB;    srcM1ssA = srcP0ssA;    srcM1ssB = srcP0ssB;    srcP0ssA = srcP1ssA;    srcP0ssB = srcP1ssB;    srcP1ssA = srcP2ssA;    srcP1ssB = srcP2ssB;    srcP2ssA = srcP3ssA;    srcP2ssB = srcP3ssB;        const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss);    const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss);    const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);    const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);        const vector signed short pp3A = vec_add(sum3A, pp1A);    const vector signed short pp3B = vec_add(sum3B, pp1B);    const vector signed short psumA = vec_sub(pp3A, pp2A);    const vector signed short psumB = vec_sub(pp3B, pp2B);    const vector signed short sumA = vec_sra(psumA, v5us);    const vector signed short sumB = vec_sra(psumB, v5us);    const vector unsigned char sum = vec_packsu(sumA, sumB);    const vector unsigned char dst1 = vec_ld(0, dst);    const vector unsigned char dst2 = vec_ld(16, dst);    const vector unsigned char vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));    vector unsigned char fsum;    OP_U8_ALTIVEC(fsum, sum, vdst);//.........这里部分代码省略.........
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,


示例26: PREFIX_h264_qpel16_h_lowpass_altivec

//.........这里部分代码省略.........      srcP1 = vec_perm(srcR1, srcR2, permP1);      srcP2 = vec_perm(srcR1, srcR2, permP2);      srcP3 = srcR2;    } break;    case 12: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = vec_perm(srcR1, srcR2, permP1);      srcP2 = srcR2;      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 13: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = vec_perm(srcR1, srcR2, permP0);      srcP1 = srcR2;      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 14: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = vec_perm(srcR1, srcR2, permM1);      srcP0 = srcR2;      srcP1 = vec_perm(srcR2, srcR3, permP1);      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    case 15: {      vector unsigned char srcR3 = vec_ld(30, src);      srcM2 = vec_perm(srcR1, srcR2, permM2);      srcM1 = srcR2;      srcP0 = vec_perm(srcR2, srcR3, permP0);      srcP1 = vec_perm(srcR2, srcR3, permP1);      srcP2 = vec_perm(srcR2, srcR3, permP2);      srcP3 = vec_perm(srcR2, srcR3, permP3);    } break;    }    const vector signed short srcP0A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0);    const vector signed short srcP0B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0);    const vector signed short srcP1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1);    const vector signed short srcP1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1);    const vector signed short srcP2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2);    const vector signed short srcP2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2);    const vector signed short srcP3A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3);    const vector signed short srcP3B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3);    const vector signed short srcM1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1);    const vector signed short srcM1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1);    const vector signed short srcM2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2);    const vector signed short srcM2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2);    const vector signed short sum1A = vec_adds(srcP0A, srcP1A);    const vector signed short sum1B = vec_adds(srcP0B, srcP1B);    const vector signed short sum2A = vec_adds(srcM1A, srcP2A);    const vector signed short sum2B = vec_adds(srcM1B, srcP2B);    const vector signed short sum3A = vec_adds(srcM2A, srcP3A);    const vector signed short sum3B = vec_adds(srcM2B, srcP3B);        const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss);    const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss);    const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);    const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);        const vector signed short pp3A = vec_add(sum3A, pp1A);    const vector signed short pp3B = vec_add(sum3B, pp1B);    const vector signed short psumA = vec_sub(pp3A, pp2A);    const vector signed short psumB = vec_sub(pp3B, pp2B);    const vector signed short sumA = vec_sra(psumA, v5us);    const vector signed short sumB = vec_sra(psumB, v5us);    const vector unsigned char sum = vec_packsu(sumA, sumB);    const vector unsigned char dst1 = vec_ld(0, dst);    const vector unsigned char dst2 = vec_ld(16, dst);    const vector unsigned char vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));    vector unsigned char fsum;    OP_U8_ALTIVEC(fsum, sum, vdst);    const vector unsigned char rsum = vec_perm(fsum, fsum, dstperm);    const vector unsigned char fdst1 = vec_sel(dst1, rsum, dstmask);    const vector unsigned char fdst2 = vec_sel(rsum, dst2, dstmask);    vec_st(fdst1, 0, dst);    vec_st(fdst2, 16, dst);    src += srcStride;    dst += dstStride;  }POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);}
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,


示例27: PREFIX_h264_qpel16_h_lowpass_altivec

//.........这里部分代码省略.........            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = vec_perm(srcR1, srcR2, permP3);        } break;        case 11: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = srcR2;        } break;        case 12: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = srcR2;            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 13: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = srcR2;            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 14: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = srcR2;            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 15: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = srcR2;            srcP0 = vec_perm(srcR2, srcR3, permP0);            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        }        srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);        srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);        srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);        srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);        srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);        srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);        srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);        srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);        srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);        srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);        srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2);        sum1A = vec_adds(srcP0A, srcP1A);        sum1B = vec_adds(srcP0B, srcP1B);        sum2A = vec_adds(srcM1A, srcP2A);        sum2B = vec_adds(srcM1B, srcP2B);        sum3A = vec_adds(srcM2A, srcP3A);        sum3B = vec_adds(srcM2B, srcP3B);        pp1A = vec_mladd(sum1A, v20ss, v16ss);        pp1B = vec_mladd(sum1B, v20ss, v16ss);        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);        pp3A = vec_add(sum3A, pp1A);        pp3B = vec_add(sum3B, pp1B);        psumA = vec_sub(pp3A, pp2A);        psumB = vec_sub(pp3B, pp2B);        sumA = vec_sra(psumA, v5us);        sumB = vec_sra(psumB, v5us);        sum = vec_packsu(sumA, sumB);        ASSERT_ALIGNED(dst);        OP_U8_ALTIVEC(fsum, sum, vec_ld(0, dst));        vec_st(fsum, 0, dst);        src += srcStride;        dst += dstStride;    }}
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例28: PREFIX_h264_chroma_mc8_altivec

/* this code assume that stride % 16 == 0 */void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {  POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);  POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);    signed int ABCD[4] __attribute__((aligned(16)));    register int i;    ABCD[0] = ((8 - x) * (8 - y));    ABCD[1] = ((x) * (8 - y));    ABCD[2] = ((8 - x) * (y));    ABCD[3] = ((x) * (y));    const vector signed int vABCD = vec_ld(0, ABCD);    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);    const vector signed int vzero = vec_splat_s32(0);    const vector signed short v32ss = (const vector signed short)AVV(32);    const vector unsigned short v6us = vec_splat_u16(6);    vector unsigned char fperm;    if (((unsigned long)dst) % 16 == 0) {      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,                                        0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);    } else {      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,                                        0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);    }    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;        vector unsigned char vsrcAuc;    vector unsigned char vsrcBuc;    vector unsigned char vsrcperm0;    vector unsigned char vsrcperm1;    vsrcAuc = vec_ld(0, src);    if (loadSecond)      vsrcBuc = vec_ld(16, src);    vsrcperm0 = vec_lvsl(0, src);    vsrcperm1 = vec_lvsl(1, src);        vector unsigned char vsrc0uc;    vector unsigned char vsrc1uc;    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);    if (reallyBadAlign)      vsrc1uc = vsrcBuc;    else      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);        vector signed short vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc0uc);    vector signed short vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc1uc);    if (!loadSecond) {// -> !reallyBadAlign      for (i = 0 ; i < h ; i++) {        vector unsigned char vsrcCuc;        vsrcCuc = vec_ld(stride + 0, src);                vector unsigned char vsrc2uc;        vector unsigned char vsrc3uc;        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);                vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc);        vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc);                vector signed short psum;                psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));        psum = vec_mladd(vB, vsrc1ssH, psum);        psum = vec_mladd(vC, vsrc2ssH, psum);        psum = vec_mladd(vD, vsrc3ssH, psum);        psum = vec_add(v32ss, psum);        psum = vec_sra(psum, v6us);                vector unsigned char vdst = vec_ld(0, dst);        vector unsigned char ppsum = (vector unsigned char)vec_packsu(psum, psum);                vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm);        vector unsigned char fsum;                OP_U8_ALTIVEC(fsum, vfdst, vdst);        vec_st(fsum, 0, dst);                vsrc0ssH = vsrc2ssH;        vsrc1ssH = vsrc3ssH;                dst += stride;        src += stride;      }    } else {      for (i = 0 ; i < h ; i++) {        vector unsigned char vsrcCuc;        vector unsigned char vsrcDuc;        vsrcCuc = vec_ld(stride + 0, src);        vsrcDuc = vec_ld(stride + 16, src);                vector unsigned char vsrc2uc;        vector unsigned char vsrc3uc;//.........这里部分代码省略.........
开发者ID:Erikhht,项目名称:TCPMP,代码行数:101,


示例29: processYUV_Altivec

void pix_add :: processYUV_Altivec(imageStruct &image, imageStruct &right){ int h,w,width;   width = image.xsize/8;   //format is U Y V Y    union    {        //unsigned int	i;        short	elements[8];        //vector signed char v;        vector	signed short v;    }shortBuffer;        union    {        //unsigned int	i;        unsigned char	elements[16];        //vector signed char v;        vector	unsigned char v;    }charBuffer;    //vector unsigned char c;    register vector signed short d, hiImage, loImage, YRight, UVRight, YImage, UVImage, UVTemp, YTemp;   // vector unsigned char zero = vec_splat_u8(0);    register vector unsigned char c,one;  //  vector signed short zshort = vec_splat_s16(0);    vector unsigned char *inData = (vector unsigned char*) image.data;    vector unsigned char *rightData = (vector unsigned char*) right.data;    //Write the pixel (pair) to the transfer buffer    charBuffer.elements[0] = 2;    charBuffer.elements[1] = 1;    charBuffer.elements[2] = 2;    charBuffer.elements[3] = 1;    charBuffer.elements[4] = 2;    charBuffer.elements[5] = 1;    charBuffer.elements[6] = 2;    charBuffer.elements[7] = 1;    charBuffer.elements[8] = 2;    charBuffer.elements[9] = 1;    charBuffer.elements[10] = 2;    charBuffer.elements[11] = 1;    charBuffer.elements[12] = 2;    charBuffer.elements[13] = 1;    charBuffer.elements[14] = 2;    charBuffer.elements[15] = 1;    //Load it into the vector unit    c = charBuffer.v;    one =  vec_splat_u8( 1 );    shortBuffer.elements[0] = 255;    //Load it into the vector unit    d = shortBuffer.v;    d = static_cast<vector signed short>(vec_splat(static_cast<vector signed short>(d),0));#ifndef PPC970    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );    vec_dst( inData, prefetchSize, 0 );    vec_dst( rightData, prefetchSize, 1 );#endif    for ( h=0; h<image.ysize; h++){      for (w=0; w<width; w++)        {#ifndef PPC970	  vec_dst( inData, prefetchSize, 0 );	  vec_dst( rightData, prefetchSize, 1 );#endif	  //interleaved U Y V Y chars	  //vec_mule UV * 2 to short vector U V U V shorts	  UVImage = static_cast<vector signed short>(vec_mule(one,inData[0]));	  UVRight = static_cast<vector signed short>(vec_mule(c,rightData[0]));	  //vec_mulo Y * 1 to short vector Y Y Y Y shorts	  YImage = static_cast<vector signed short>(vec_mulo(c,inData[0]));	  YRight = static_cast<vector signed short>(vec_mulo(c,rightData[0]));	  //vel_subs UV - 255	  UVRight = static_cast<vector signed short>(vec_subs(UVRight, d));	  //vec_adds UV	  UVTemp = vec_adds(UVImage,UVRight);	  //vec_adds Y	  YTemp = vec_adds(YImage,YRight);	  hiImage = vec_mergeh(UVTemp,YTemp);	  loImage = vec_mergel(UVTemp,YTemp);	  //vec_mergel + vec_mergeh Y and UV	  inData[0] = vec_packsu(hiImage, loImage);	  inData++;	  rightData++;        }#ifndef PPC970        vec_dss( 0 );//.........这里部分代码省略.........
开发者ID:avilleret,项目名称:Gem,代码行数:101,



注:本文中的vec_packsu函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vec_perm函数代码示例
C++ vec_new函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。