您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vec_sl函数代码示例

51自学网 2021-06-03 09:36:32
  C++
这篇教程C++ vec_sl函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vec_sl函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_sl函数的具体用法?C++ vec_sl怎么用?C++ vec_sl使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_sl函数的27个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: v_signmask

/** Mask **/inline int v_signmask(const v_uint8x16& a){    vec_uchar16 sv  = vec_sr(a.val, vec_uchar16_sp(7));    static const vec_uchar16 slm = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};    sv = vec_sl(sv, slm);    vec_uint4 sv4 = vec_sum4s(sv, vec_uint4_z);    static const vec_uint4 slm4 = {0, 0, 8, 8};    sv4 = vec_sl(sv4, slm4);    return vec_extract(vec_sums((vec_int4) sv4, vec_int4_z), 3);}
开发者ID:ArkaJU,项目名称:opencv,代码行数:11,


示例2: ff_vp3_idct_put_altivec

void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]){    vec_u8 t;    IDCT_START    // pixels are signed; so add 128*16 in addition to the normal 8    vec_s16 v2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11));    eight = vec_add(eight, v2048);    IDCT_1D(NOP, NOP)    TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);    IDCT_1D(ADD8, SHIFT4)#define PUT(a)/    t = vec_packsu(a, a);/    vec_ste((vec_u32)t, 0, (unsigned int *)dst);/    vec_ste((vec_u32)t, 4, (unsigned int *)dst);    PUT(b0)     dst += stride;    PUT(b1)     dst += stride;    PUT(b2)     dst += stride;    PUT(b3)     dst += stride;    PUT(b4)     dst += stride;    PUT(b5)     dst += stride;    PUT(b6)     dst += stride;    PUT(b7)}
开发者ID:9aa5,项目名称:FFmpeg,代码行数:27,


示例3: scalarproduct_int16_altivec

static int32_t scalarproduct_int16_altivec(const int16_t * v1, const int16_t * v2, int order, const int shift){    int i;    LOAD_ZERO;    register vec_s16 vec1, *pv;    register vec_s32 res = vec_splat_s32(0), t;    register vec_u32 shifts;    int32_t ires;    shifts = zero_u32v;    if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1)));    if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08));    if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04));    if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02));    if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01));    for(i = 0; i < order; i += 8){        pv = (vec_s16*)v1;        vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);        t = vec_sr(t, shifts);        res = vec_sums(t, res);        v1 += 8;        v2 += 8;    }    res = vec_splat(res, 3);    vec_ste(res, 0, &ires);    return ires;}
开发者ID:10045125,项目名称:xuggle-xuggler,代码行数:29,


示例4: quant_h263_inter_altivec_c

uint32_tquant_h263_inter_altivec_c(int16_t *coeff,                            int16_t *data,                            const uint32_t quant,                            const uint16_t *mpeg_quant_matrices){    vector unsigned char zerovec;    vector unsigned short mult;    vector unsigned short quant_m_2;    vector unsigned short quant_d_2;    vector unsigned short sum_short;    vector signed short acLevel;        vector unsigned int even;    vector unsigned int odd;        vector bool short m2_mask;    vector bool short zero_mask;        uint32_t result;#ifdef DEBUG    if(((unsigned)coeff) & 0x15)        fprintf(stderr, "quant_h263_inter_altivec_c:incorrect align, coeff: %lx/n", (long)coeff);#endif        /* initialisation stuff */    zerovec = vec_splat_u8(0);    *((unsigned short*)&mult) = (unsigned short)multipliers[quant];    mult = vec_splat(mult, 0);    *((unsigned short*)&quant_m_2) = (unsigned short)quant;    quant_m_2 = vec_splat(quant_m_2, 0);    quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1));    *((unsigned short*)&quant_d_2) = (unsigned short)quant;    quant_d_2 = vec_splat(quant_d_2, 0);    quant_d_2 = vec_sr(quant_d_2, vec_splat_u16(1));    sum_short = (vector unsigned short)zerovec;        /* Quantize */    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();        QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();            /* Calculate the return value */    even = (vector unsigned int)vec_sum4s((vector signed short)sum_short, (vector signed int)zerovec);    even = (vector unsigned int)vec_sums((vector signed int)even, (vector signed int)zerovec);    even = vec_splat(even, 3);    vec_ste(even, 0, &result);    return result;}
开发者ID:roozbeh,项目名称:openCU,代码行数:56,


示例5: dequant_h263_inter_altivec_c

uint32_tdequant_h263_inter_altivec_c(int16_t *data,                                int16_t *coeff,                                const uint32_t quant,                                const uint16_t *mpeg_quant_matrices){    vector signed short acLevel;    vector signed short vec_2048;        vector unsigned short quant_m_2;    vector unsigned short quant_add;    vector unsigned short t;        register vector unsigned int even;    register vector unsigned int odd;    register vector unsigned int high;    register vector unsigned int low;        register vector unsigned char zerovec;        vector bool short equal_zero;    vector bool short less_zero;    vector bool short overflow;    #ifdef DEBUG    /* print alignment errors if this is on */    if(((unsigned)data) & 0x15)        fprintf(stderr, "dequant_h263_inter_altivec_c:incorrect align, data: %lx/n", (long)data);#endif        /* initialize */    *((unsigned short*)&quant_m_2) = (unsigned short)(quant << 1);    quant_m_2 = vec_splat(quant_m_2,0);        *((unsigned short*)&quant_add) = (unsigned short)(quant & 1 ? quant : quant - 1);    quant_add = vec_splat(quant_add,0);        vec_2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11));    zerovec = vec_splat_u8(0);        /* dequant */    DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();        DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();        return 0;}
开发者ID:roozbeh,项目名称:openCU,代码行数:53,


示例6: vec_splat_u32

void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor){	const VECU32 shift = vec_splat_u32(-16);	const VECS32 scale1 = { factor, factor, factor, factor };	const VECS32 scale2 = { 0x100 - factor, 0x100 - factor, 0x100 - factor, 0x100 - factor, };	VECU32 temp = vec_msum((VECU16)m_value, (VECU16)vec_rl(scale1, shift), vec_splat_u32(0));	temp = vec_msum((VECU16)other.m_value, (VECU16)vec_rl(scale2, shift), temp);	m_value = vec_msum((VECU16)m_value, (VECU16)scale1, vec_mulo((VECU16)other.m_value, (VECU16)scale2));	m_value = vec_add(vec_sl(temp, shift), (VECU32)m_value);	sra(8);}
开发者ID:dinkc64,项目名称:mame,代码行数:13,


示例7: quant_h263_intra_altivec_c

uint32_tquant_h263_intra_altivec_c(int16_t *coeff,                                    int16_t *data,                                    const uint32_t quant,                                    const uint32_t dcscalar,                                    const uint16_t *mpeg_quant_matrices){    vector unsigned char zerovec;    vector unsigned short mult;    vector unsigned short quant_m_2;    vector signed short acLevel;        register vector unsigned int even;    register vector unsigned int odd;        vector bool short zero_mask;    vector bool short m2_mask;        register int16_t *origin_coeff = coeff;    register int16_t *origin_data = data;#ifdef DEBUG    if(((unsigned)coeff) & 15)        fprintf(stderr, "quant_h263_intra_altivec_c:incorrect align, coeff: %lx/n", (long)coeff);#endif        zerovec = vec_splat_u8(0);        *((unsigned short*)&mult) = (unsigned short)multipliers[quant];    mult = vec_splat(mult, 0);        *((unsigned short*)&quant_m_2) = (unsigned short)quant;    quant_m_2 = vec_splat(quant_m_2, 0);    quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1));        QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();        QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();        // noch erstes setzen    origin_coeff[0] = DIV_DIV(origin_data[0], (int32_t)dcscalar);        return 0;}
开发者ID:roozbeh,项目名称:openCU,代码行数:50,


示例8: predict_16x16_p_altivec

static void predict_16x16_p_altivec( uint8_t *src ){    int16_t a, b, c, i;    int H = 0;    int V = 0;    int16_t i00;    for( i = 1; i <= 8; i++ )    {        H += i * ( src[7+i - FDEC_STRIDE ]  - src[7-i - FDEC_STRIDE ] );        V += i * ( src[(7+i)*FDEC_STRIDE -1] - src[(7-i)*FDEC_STRIDE -1] );    }    a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );    b = ( 5 * H + 32 ) >> 6;    c = ( 5 * V + 32 ) >> 6;    i00 = a - b * 7 - c * 7 + 16;    vect_sshort_u i00_u, b_u, c_u;    i00_u.s[0] = i00;    b_u.s[0]   = b;    c_u.s[0]   = c;    vec_u16_t val5_v = vec_splat_u16(5);    vec_s16_t i00_v, b_v, c_v;    i00_v = vec_splat(i00_u.v, 0);    b_v = vec_splat(b_u.v, 0);    c_v = vec_splat(c_u.v, 0);    vec_s16_t induc_v  = (vec_s16_t) CV(0,  1,  2,  3,  4,  5,  6,  7);    vec_s16_t b8_v = vec_sl(b_v, vec_splat_u16(3));    vec_s32_t mule_b_v = vec_mule(induc_v, b_v);    vec_s32_t mulo_b_v = vec_mulo(induc_v, b_v);    vec_s16_t mul_b_induc0_v = vec_pack(vec_mergeh(mule_b_v, mulo_b_v), vec_mergel(mule_b_v, mulo_b_v));    vec_s16_t add_i0_b_0v = vec_adds(i00_v, mul_b_induc0_v);    vec_s16_t add_i0_b_8v = vec_adds(b8_v, add_i0_b_0v);    int y;    for( y = 0; y < 16; y++ )    {        vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);        vec_s16_t shift_8_v = vec_sra(add_i0_b_8v, val5_v);        vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_8_v);        vec_st( com_sat_v, 0, &src[0]);        src += FDEC_STRIDE;        i00 += c;        add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);        add_i0_b_8v = vec_adds(add_i0_b_8v, c_v);    }}
开发者ID:UIKit0,项目名称:H.264-in-CUDA,代码行数:50,


示例9: x264_add8x8_idct_dc_altivec

void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] ){    vec_s16_t dcv;    vec_s16_t v32 = vec_sl( vec_splat_s16( 8 ), vec_splat_u16( 2 ) );    vec_u16_t v6 = vec_splat_u16( 6 );    vec_s16_t dctv = vec_vsx_ld( 0, dct );    dctv = vec_sra( vec_add( dctv, v32 ), v6 );    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 0 ), (vec_s32_t)vec_splat( dctv, 1 ) );    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv );    idct8_dc_altivec( &p_dst[0], dcv );    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 2 ), (vec_s32_t)vec_splat( dctv, 3 ) );    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv );    idct8_dc_altivec( &p_dst[4*FDEC_STRIDE+0], dcv );}
开发者ID:Hero2000,项目名称:CainCamera,代码行数:15,


示例10: put_vp8_epel_h_altivec_core

static av_always_inlinevoid put_vp8_epel_h_altivec_core(uint8_t *dst, ptrdiff_t dst_stride,                                 uint8_t *src, ptrdiff_t src_stride,                                 int h, int mx, int w, int is6tap){    LOAD_H_SUBPEL_FILTER(mx-1);    vec_u8 align_vec0, align_vec8, permh0, permh8, filt;    vec_u8 perm_6tap0, perm_6tap8, perml0, perml8;    vec_u8 a, b, pixh, pixl, outer;    vec_s16 f16h, f16l;    vec_s32 filth, filtl;    vec_u8 perm_inner6 = { 1,2,3,4, 2,3,4,5, 3,4,5,6, 4,5,6,7 };    vec_u8 perm_inner4 = { 0,1,2,3, 1,2,3,4, 2,3,4,5, 3,4,5,6 };    vec_u8 perm_inner  = is6tap ? perm_inner6 : perm_inner4;    vec_u8 perm_outer = { 4,9, 0,5, 5,10, 1,6, 6,11, 2,7, 7,12, 3,8 };    vec_s32 c64 = vec_sl(vec_splat_s32(1), vec_splat_u32(6));    vec_u16 c7  = vec_splat_u16(7);    align_vec0 = vec_lvsl( -is6tap-1, src);    align_vec8 = vec_lvsl(8-is6tap-1, src);    permh0     = vec_perm(align_vec0, align_vec0, perm_inner);    permh8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_inner = vec_add(perm_inner, vec_splat_u8(4));    perml0     = vec_perm(align_vec0, align_vec0, perm_inner);    perml8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_6tap0 = vec_perm(align_vec0, align_vec0, perm_outer);    perm_6tap8 = vec_perm(align_vec8, align_vec8, perm_outer);    while (h --> 0) {        FILTER_H(f16h, 0);        if (w == 16) {            FILTER_H(f16l, 8);            filt = vec_packsu(f16h, f16l);            vec_st(filt, 0, dst);        } else {            filt = vec_packsu(f16h, f16h);            vec_ste((vec_u32)filt, 0, (uint32_t*)dst);            if (w == 8)                vec_ste((vec_u32)filt, 4, (uint32_t*)dst);        }        src += src_stride;        dst += dst_stride;    }}
开发者ID:Arcen,项目名称:libav,代码行数:47,


示例11: vec_splat_u32

void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor){	const VECU32 shift = vec_splat_u32(-16);	const VECS32 scale1 = { factor, factor, factor, factor };	const VECS32 scale2 = { 0x100 - factor, 0x100 - factor, 0x100 - factor, 0x100 - factor, };	VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(scale1, shift)), vec_splat_u32(0));	temp = vec_msum(VECU16(other.m_value), VECU16(vec_rl(scale2, shift)), temp);#if defined __LITTLE_ENDIAN__	m_value = VECS32(vec_msum(VECU16(m_value), VECU16(scale1), vec_mule(VECU16(other.m_value), VECU16(scale2))));#else	m_value = VECS32(vec_msum(VECU16(m_value), VECU16(scale1), vec_mulo(VECU16(other.m_value), VECU16(scale2))));#endif	m_value = VECS32(vec_add(vec_sl(temp, shift), VECU32(m_value)));	sra_imm(8);}
开发者ID:GiuseppeGorgoglione,项目名称:mame,代码行数:17,


示例12: predict_16x16_p_altivec

static void predict_16x16_p_altivec( uint8_t *src ){    int H = 0, V = 0;    for( int i = 1; i <= 8; i++ )    {        H += i * ( src[7+i - FDEC_STRIDE ]  - src[7-i - FDEC_STRIDE ] );        V += i * ( src[(7+i)*FDEC_STRIDE -1] - src[(7-i)*FDEC_STRIDE -1] );    }    int a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );    int b = ( 5 * H + 32 ) >> 6;    int c = ( 5 * V + 32 ) >> 6;    int i00 = a - b * 7 - c * 7 + 16;    vec_s16_u i00_u, b_u, c_u;    i00_u.s[0] = i00;    b_u.s[0]   = b;    c_u.s[0]   = c;    vec_u16_t val5_v = vec_splat_u16(5);    vec_s16_t i00_v, b_v, c_v;    i00_v = vec_splat(i00_u.v, 0);    b_v = vec_splat(b_u.v, 0);    c_v = vec_splat(c_u.v, 0);    vec_s16_t induc_v  = (vec_s16_t) CV(0,  1,  2,  3,  4,  5,  6,  7);    vec_s16_t b8_v = vec_sl(b_v, vec_splat_u16(3));    vec_s16_t add_i0_b_0v = vec_mladd(induc_v, b_v, i00_v);    vec_s16_t add_i0_b_8v = vec_adds(b8_v, add_i0_b_0v);    for( int y = 0; y < 16; y++ )    {        vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);        vec_s16_t shift_8_v = vec_sra(add_i0_b_8v, val5_v);        vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_8_v);        vec_st( com_sat_v, 0, &src[0]);        src += FDEC_STRIDE;        add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);        add_i0_b_8v = vec_adds(add_i0_b_8v, c_v);    }}
开发者ID:xing2fan,项目名称:x264,代码行数:41,


示例13: vorbis_inverse_coupling_altivec

static void vorbis_inverse_coupling_altivec(float *mag, float *ang,                                            intptr_t blocksize){    int i;    vector float m, a;    vector bool int t0, t1;    const vector unsigned int v_31 = //XXX        vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1));    for (i = 0; i < blocksize; i += 4) {        m = vec_ld(0, mag+i);        a = vec_ld(0, ang+i);        t0 = vec_cmple(m, (vector float)vec_splat_u32(0));        t1 = vec_cmple(a, (vector float)vec_splat_u32(0));        a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31));        t0 = (vector bool int)vec_and(a, t1);        t1 = (vector bool int)vec_andc(a, t1);        a = vec_sub(m, (vector float)t1);        m = vec_add(m, (vector float)t0);        vec_stl(a, 0, ang+i);        vec_stl(m, 0, mag+i);    }}
开发者ID:TaoheGit,项目名称:hmi_sdl_android,代码行数:22,


示例14: put_no_rnd_h264_chroma_mc8_altivec

/* this code assume that stride % 16 == 0 */void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {    signed int ABCD[4] __attribute__((aligned(16))) =                        {((8 - x) * (8 - y)),                          ((x) * (8 - y)),                          ((8 - x) * (y)),                          ((x) * (y))};    register int i;    vector unsigned char fperm;    const vector signed int vABCD = vec_ld(0, ABCD);    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);    const vector signed int vzero = vec_splat_s32(0);    const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));    const vector unsigned short v6us = vec_splat_u16(6);    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;    vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;    vector unsigned char vsrc0uc, vsrc1uc;    vector signed short vsrc0ssH, vsrc1ssH;    vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;    vector signed short vsrc2ssH, vsrc3ssH, psum;    vector unsigned char vdst, ppsum, fsum;    if (((unsigned long)dst) % 16 == 0) {      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,                                        0x14, 0x15, 0x16, 0x17,                                        0x08, 0x09, 0x0A, 0x0B,                                        0x0C, 0x0D, 0x0E, 0x0F);    } else {      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,                                        0x04, 0x05, 0x06, 0x07,                                        0x18, 0x19, 0x1A, 0x1B,                                        0x1C, 0x1D, 0x1E, 0x1F);    }    vsrcAuc = vec_ld(0, src);    if (loadSecond)      vsrcBuc = vec_ld(16, src);    vsrcperm0 = vec_lvsl(0, src);    vsrcperm1 = vec_lvsl(1, src);    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);    if (reallyBadAlign)      vsrc1uc = vsrcBuc;    else      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);    vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                               (vector unsigned char)vsrc0uc);    vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                               (vector unsigned char)vsrc1uc);    if (!loadSecond) {// -> !reallyBadAlign      for (i = 0 ; i < h ; i++) {        vsrcCuc = vec_ld(stride + 0, src);        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                                (vector unsigned char)vsrc2uc);        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,                                                (vector unsigned char)vsrc3uc);        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));        psum = vec_mladd(vB, vsrc1ssH, psum);        psum = vec_mladd(vC, vsrc2ssH, psum);        psum = vec_mladd(vD, vsrc3ssH, psum);        psum = vec_add(v28ss, psum);        psum = vec_sra(psum, v6us);        vdst = vec_ld(0, dst);        ppsum = (vector unsigned char)vec_packsu(psum, psum);        fsum = vec_perm(vdst, ppsum, fperm);        vec_st(fsum, 0, dst);        vsrc0ssH = vsrc2ssH;        vsrc1ssH = vsrc3ssH;        dst += stride;        src += stride;      }    } else {        vector unsigned char vsrcDuc;      for (i = 0 ; i < h ; i++) {        vsrcCuc = vec_ld(stride + 0, src);        vsrcDuc = vec_ld(stride + 16, src);        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);        if (reallyBadAlign)          vsrc3uc = vsrcDuc;        else//.........这里部分代码省略.........
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:101,


示例15: ff_fdct_altivec

/* two dimensional discrete cosine transform */void ff_fdct_altivec(int16_t *block){    vector signed short *bp;    const vector float *cp = fdctconsts;    vector float b00, b10, b20, b30, b40, b50, b60, b70;    vector float b01, b11, b21, b31, b41, b51, b61, b71;    vector float mzero, cnst, cnsts0, cnsts1, cnsts2;    vector float x0, x1, x2, x3, x4, x5, x6, x7, x8;    /* setup constants {{{ */    /* mzero = -0.0 */    mzero  = ((vector float) vec_splat_u32(-1));    mzero  = ((vector float) vec_sl(vu32(mzero), vu32(mzero)));    cnsts0 = vec_ld(0, cp);    cp++;    cnsts1 = vec_ld(0, cp);    cp++;    cnsts2 = vec_ld(0, cp);    /* }}} */    /* 8x8 matrix transpose (vector short[8]) {{{ */#define MERGE_S16(hl, a, b) vec_merge ## hl(vs16(a), vs16(b))    bp  = (vector signed short *) block;    b00 = ((vector float) vec_ld(0,      bp));    b40 = ((vector float) vec_ld(16 * 4, bp));    b01 = ((vector float) MERGE_S16(h, b00, b40));    b11 = ((vector float) MERGE_S16(l, b00, b40));    bp++;    b10 = ((vector float) vec_ld(0,      bp));    b50 = ((vector float) vec_ld(16 * 4, bp));    b21 = ((vector float) MERGE_S16(h, b10, b50));    b31 = ((vector float) MERGE_S16(l, b10, b50));    bp++;    b20 = ((vector float) vec_ld(0,      bp));    b60 = ((vector float) vec_ld(16 * 4, bp));    b41 = ((vector float) MERGE_S16(h, b20, b60));    b51 = ((vector float) MERGE_S16(l, b20, b60));    bp++;    b30 = ((vector float) vec_ld(0,      bp));    b70 = ((vector float) vec_ld(16 * 4, bp));    b61 = ((vector float) MERGE_S16(h, b30, b70));    b71 = ((vector float) MERGE_S16(l, b30, b70));    x0 = ((vector float) MERGE_S16(h, b01, b41));    x1 = ((vector float) MERGE_S16(l, b01, b41));    x2 = ((vector float) MERGE_S16(h, b11, b51));    x3 = ((vector float) MERGE_S16(l, b11, b51));    x4 = ((vector float) MERGE_S16(h, b21, b61));    x5 = ((vector float) MERGE_S16(l, b21, b61));    x6 = ((vector float) MERGE_S16(h, b31, b71));    x7 = ((vector float) MERGE_S16(l, b31, b71));    b00 = ((vector float) MERGE_S16(h, x0, x4));    b10 = ((vector float) MERGE_S16(l, x0, x4));    b20 = ((vector float) MERGE_S16(h, x1, x5));    b30 = ((vector float) MERGE_S16(l, x1, x5));    b40 = ((vector float) MERGE_S16(h, x2, x6));    b50 = ((vector float) MERGE_S16(l, x2, x6));    b60 = ((vector float) MERGE_S16(h, x3, x7));    b70 = ((vector float) MERGE_S16(l, x3, x7));#undef MERGE_S16    /* }}} */    /* Some of the initial calculations can be done as vector short     * before conversion to vector float.  The following code section     * takes advantage of this. */    /* fdct rows {{{ */    x0 = ((vector float) vec_add(vs16(b00), vs16(b70)));    x7 = ((vector float) vec_sub(vs16(b00), vs16(b70)));    x1 = ((vector float) vec_add(vs16(b10), vs16(b60)));    x6 = ((vector float) vec_sub(vs16(b10), vs16(b60)));    x2 = ((vector float) vec_add(vs16(b20), vs16(b50)));    x5 = ((vector float) vec_sub(vs16(b20), vs16(b50)));    x3 = ((vector float) vec_add(vs16(b30), vs16(b40)));    x4 = ((vector float) vec_sub(vs16(b30), vs16(b40)));    b70 = ((vector float) vec_add(vs16(x0), vs16(x3)));    b10 = ((vector float) vec_add(vs16(x1), vs16(x2)));    b00 = ((vector float) vec_add(vs16(b70), vs16(b10)));    b40 = ((vector float) vec_sub(vs16(b70), vs16(b10)));#define CTF0(n)                                                    /    b ## n ## 1 = ((vector float) vec_unpackl(vs16(b ## n ## 0))); /    b ## n ## 0 = ((vector float) vec_unpackh(vs16(b ## n ## 0))); /    b ## n ## 1 = vec_ctf(vs32(b ## n ## 1), 0);                   /    b ## n ## 0 = vec_ctf(vs32(b ## n ## 0), 0)    CTF0(0);    CTF0(4);    b20 = ((vector float) vec_sub(vs16(x0), vs16(x3)));    b60 = ((vector float) vec_sub(vs16(x1), vs16(x2)));    CTF0(2);    CTF0(6);//.........这里部分代码省略.........
开发者ID:63n,项目名称:FFmpeg,代码行数:101,


示例16: PREFIX_h264_chroma_mc8_altivec

void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,                                    int stride, int h, int x, int y) {  POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);    DECLARE_ALIGNED_16(signed int, ABCD[4]) =                        {((8 - x) * (8 - y)),                         ((    x) * (8 - y)),                         ((8 - x) * (    y)),                         ((    x) * (    y))};    register int i;    vec_u8 fperm;    const vec_s32 vABCD = vec_ld(0, ABCD);    const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);    const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);    const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);    const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);    LOAD_ZERO;    const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));    const vec_u16 v6us = vec_splat_u16(6);    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;    vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;    vec_u8 vsrc0uc, vsrc1uc;    vec_s16 vsrc0ssH, vsrc1ssH;    vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;    vec_s16 vsrc2ssH, vsrc3ssH, psum;    vec_u8 vdst, ppsum, vfdst, fsum;  POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);    if (((unsigned long)dst) % 16 == 0) {        fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,                         0x14, 0x15, 0x16, 0x17,                         0x08, 0x09, 0x0A, 0x0B,                         0x0C, 0x0D, 0x0E, 0x0F};    } else {        fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,                         0x04, 0x05, 0x06, 0x07,                         0x18, 0x19, 0x1A, 0x1B,                         0x1C, 0x1D, 0x1E, 0x1F};    }    vsrcAuc = vec_ld(0, src);    if (loadSecond)        vsrcBuc = vec_ld(16, src);    vsrcperm0 = vec_lvsl(0, src);    vsrcperm1 = vec_lvsl(1, src);    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);    if (reallyBadAlign)        vsrc1uc = vsrcBuc;    else        vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);    vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);    vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);    if (ABCD[3]) {        if (!loadSecond) {// -> !reallyBadAlign            for (i = 0 ; i < h ; i++) {                vsrcCuc = vec_ld(stride + 0, src);                vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);                vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);                CHROMA_MC8_ALTIVEC_CORE            }        } else {            vec_u8 vsrcDuc;            for (i = 0 ; i < h ; i++) {                vsrcCuc = vec_ld(stride + 0, src);                vsrcDuc = vec_ld(stride + 16, src);                vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);                if (reallyBadAlign)                    vsrc3uc = vsrcDuc;                else                    vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);                CHROMA_MC8_ALTIVEC_CORE            }        }    } else {
开发者ID:bwahn,项目名称:ffmpeg,代码行数:82,


示例17: PREFIX_h264_chroma_mc8_altivec

/* this code assume that stride % 16 == 0 */void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {  POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);    DECLARE_ALIGNED_16(signed int, ABCD[4]) =                        {((8 - x) * (8 - y)),                          ((x) * (8 - y)),                          ((8 - x) * (y)),                          ((x) * (y))};    register int i;    vec_u8_t fperm;    const vec_s32_t vABCD = vec_ld(0, ABCD);    const vec_s16_t vA = vec_splat((vec_s16_t)vABCD, 1);    const vec_s16_t vB = vec_splat((vec_s16_t)vABCD, 3);    const vec_s16_t vC = vec_splat((vec_s16_t)vABCD, 5);    const vec_s16_t vD = vec_splat((vec_s16_t)vABCD, 7);    LOAD_ZERO;    const vec_s16_t v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));    const vec_u16_t v6us = vec_splat_u16(6);    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;    vec_u8_t vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;    vec_u8_t vsrc0uc, vsrc1uc;    vec_s16_t vsrc0ssH, vsrc1ssH;    vec_u8_t vsrcCuc, vsrc2uc, vsrc3uc;    vec_s16_t vsrc2ssH, vsrc3ssH, psum;    vec_u8_t vdst, ppsum, vfdst, fsum;  POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);    if (((unsigned long)dst) % 16 == 0) {      fperm = (vec_u8_t)AVV(0x10, 0x11, 0x12, 0x13,                            0x14, 0x15, 0x16, 0x17,                            0x08, 0x09, 0x0A, 0x0B,                            0x0C, 0x0D, 0x0E, 0x0F);    } else {      fperm = (vec_u8_t)AVV(0x00, 0x01, 0x02, 0x03,                            0x04, 0x05, 0x06, 0x07,                            0x18, 0x19, 0x1A, 0x1B,                            0x1C, 0x1D, 0x1E, 0x1F);    }    vsrcAuc = vec_ld(0, src);    if (loadSecond)      vsrcBuc = vec_ld(16, src);    vsrcperm0 = vec_lvsl(0, src);    vsrcperm1 = vec_lvsl(1, src);    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);    if (reallyBadAlign)      vsrc1uc = vsrcBuc;    else      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);    vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);    vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);    if (!loadSecond) {// -> !reallyBadAlign      for (i = 0 ; i < h ; i++) {        vsrcCuc = vec_ld(stride + 0, src);        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);        vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);        vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc);        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));        psum = vec_mladd(vB, vsrc1ssH, psum);        psum = vec_mladd(vC, vsrc2ssH, psum);        psum = vec_mladd(vD, vsrc3ssH, psum);        psum = vec_add(v32ss, psum);        psum = vec_sra(psum, v6us);        vdst = vec_ld(0, dst);        ppsum = (vec_u8_t)vec_packsu(psum, psum);        vfdst = vec_perm(vdst, ppsum, fperm);        OP_U8_ALTIVEC(fsum, vfdst, vdst);        vec_st(fsum, 0, dst);        vsrc0ssH = vsrc2ssH;        vsrc1ssH = vsrc3ssH;        dst += stride;        src += stride;      }    } else {        vec_u8_t vsrcDuc;      for (i = 0 ; i < h ; i++) {        vsrcCuc = vec_ld(stride + 0, src);        vsrcDuc = vec_ld(stride + 16, src);        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);        if (reallyBadAlign)          vsrc3uc = vsrcDuc;//.........这里部分代码省略.........
开发者ID:JERUKA9,项目名称:amv-codec-tools,代码行数:101,


示例18: DECLARE_ALIGNED

    DECLARE_ALIGNED(16, signed int, ABCD)[4] =    {        ((8 - x) * (8 - y)),        ((    x) * (8 - y)),        ((8 - x) * (    y)),        ((    x) * (    y))    };    register int i;    vec_u8 fperm;    const vec_s32 vABCD = vec_ld(0, ABCD);    const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);    const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);    const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);    const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);    LOAD_ZERO;    const vec_s16 v32ss = vec_sl(vec_splat_s16(1), vec_splat_u16(5));    const vec_u16 v6us = vec_splat_u16(6);    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;    vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;    vec_u8 vsrc0uc, vsrc1uc;    vec_s16 vsrc0ssH, vsrc1ssH;    vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;    vec_s16 vsrc2ssH, vsrc3ssH, psum;    vec_u8 vdst, ppsum, vfdst, fsum;    if (((unsigned long)dst) % 16 == 0)    {        fperm = (vec_u8)        {
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:31,


示例19: testsl_signed

vector signed shorttestsl_signed (vector signed short x, vector unsigned short y){  return vec_sl (x, y);}
开发者ID:MaxKellermann,项目名称:gcc,代码行数:5,


示例20: vc1_inv_trans_8x4_altivec

/** Do inverse transform on 8x4 part of block*/static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, int16_t *block){    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;    vector signed int t0, t1, t2, t3, t4, t5, t6, t7;    const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));    const vector unsigned int vec_7 = vec_splat_u32(7);    const vector unsigned int vec_5 = vec_splat_u32(5);    const vector unsigned int vec_4 = vec_splat_u32(4);    const vector  signed int vec_4s = vec_splat_s32(4);    const vector unsigned int vec_3 = vec_splat_u32(3);    const vector unsigned int vec_2 = vec_splat_u32(2);    const vector unsigned int vec_1 = vec_splat_u32(1);    vector unsigned char tmp;    vector signed short tmp2, tmp3;    vector unsigned char perm0, perm1, p0, p1, p;    src0 = vec_ld(  0, block);    src1 = vec_ld( 16, block);    src2 = vec_ld( 32, block);    src3 = vec_ld( 48, block);    src4 = vec_ld( 64, block);    src5 = vec_ld( 80, block);    src6 = vec_ld( 96, block);    src7 = vec_ld(112, block);    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);    s0 = vec_unpackl(src0);    s1 = vec_unpackl(src1);    s2 = vec_unpackl(src2);    s3 = vec_unpackl(src3);    s4 = vec_unpackl(src4);    s5 = vec_unpackl(src5);    s6 = vec_unpackl(src6);    s7 = vec_unpackl(src7);    s8 = vec_unpackh(src0);    s9 = vec_unpackh(src1);    sA = vec_unpackh(src2);    sB = vec_unpackh(src3);    sC = vec_unpackh(src4);    sD = vec_unpackh(src5);    sE = vec_unpackh(src6);    sF = vec_unpackh(src7);    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);    SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);    src0 = vec_pack(s8, s0);    src1 = vec_pack(s9, s1);    src2 = vec_pack(sA, s2);    src3 = vec_pack(sB, s3);    src4 = vec_pack(sC, s4);    src5 = vec_pack(sD, s5);    src6 = vec_pack(sE, s6);    src7 = vec_pack(sF, s7);    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);    s0 = vec_unpackh(src0);    s1 = vec_unpackh(src1);    s2 = vec_unpackh(src2);    s3 = vec_unpackh(src3);    s8 = vec_unpackl(src0);    s9 = vec_unpackl(src1);    sA = vec_unpackl(src2);    sB = vec_unpackl(src3);    STEP4(s0, s1, s2, s3, vec_64);    SHIFT_VERT4(s0, s1, s2, s3);    STEP4(s8, s9, sA, sB, vec_64);    SHIFT_VERT4(s8, s9, sA, sB);    src0 = vec_pack(s0, s8);    src1 = vec_pack(s1, s9);    src2 = vec_pack(s2, sA);    src3 = vec_pack(s3, sB);    p0 = vec_lvsl (0, dest);    p1 = vec_lvsl (stride, dest);    p = vec_splat_u8 (-1);    perm0 = vec_mergeh (p, p0);    perm1 = vec_mergeh (p, p1);#define ADD(dest,src,perm)                                              /    /* *(uint64_t *)&tmp = *(uint64_t *)dest; */                        /    tmp = vec_ld (0, dest);                                             /    tmp2 = (vector signed short)vec_perm (tmp, vec_splat_u8(0), perm);  /    tmp3 = vec_adds (tmp2, src);                                        /    tmp = vec_packsu (tmp3, tmp3);                                      /    vec_ste ((vector unsigned int)tmp, 0, (unsigned int *)dest);        /    vec_ste ((vector unsigned int)tmp, 4, (unsigned int *)dest);    ADD (dest, src0, perm0)      dest += stride;    ADD (dest, src1, perm1)      dest += stride;    ADD (dest, src2, perm0)      dest += stride;    ADD (dest, src3, perm1)}
开发者ID:Acidburn0zzz,项目名称:libav,代码行数:97,


示例21: vc1_inv_trans_8x8_altivec

/** Do inverse transform on 8x8 block*/static void vc1_inv_trans_8x8_altivec(int16_t block[64]){    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;    vector signed int t0, t1, t2, t3, t4, t5, t6, t7;    const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));    const vector unsigned int vec_7 = vec_splat_u32(7);    const vector unsigned int vec_4 = vec_splat_u32(4);    const vector  signed int vec_4s = vec_splat_s32(4);    const vector unsigned int vec_3 = vec_splat_u32(3);    const vector unsigned int vec_2 = vec_splat_u32(2);    const vector  signed int vec_1s = vec_splat_s32(1);    const vector unsigned int vec_1 = vec_splat_u32(1);    src0 = vec_ld(  0, block);    src1 = vec_ld( 16, block);    src2 = vec_ld( 32, block);    src3 = vec_ld( 48, block);    src4 = vec_ld( 64, block);    src5 = vec_ld( 80, block);    src6 = vec_ld( 96, block);    src7 = vec_ld(112, block);    s0 = vec_unpackl(src0);    s1 = vec_unpackl(src1);    s2 = vec_unpackl(src2);    s3 = vec_unpackl(src3);    s4 = vec_unpackl(src4);    s5 = vec_unpackl(src5);    s6 = vec_unpackl(src6);    s7 = vec_unpackl(src7);    s8 = vec_unpackh(src0);    s9 = vec_unpackh(src1);    sA = vec_unpackh(src2);    sB = vec_unpackh(src3);    sC = vec_unpackh(src4);    sD = vec_unpackh(src5);    sE = vec_unpackh(src6);    sF = vec_unpackh(src7);    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);    SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);    src0 = vec_pack(s8, s0);    src1 = vec_pack(s9, s1);    src2 = vec_pack(sA, s2);    src3 = vec_pack(sB, s3);    src4 = vec_pack(sC, s4);    src5 = vec_pack(sD, s5);    src6 = vec_pack(sE, s6);    src7 = vec_pack(sF, s7);    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);    s0 = vec_unpackl(src0);    s1 = vec_unpackl(src1);    s2 = vec_unpackl(src2);    s3 = vec_unpackl(src3);    s4 = vec_unpackl(src4);    s5 = vec_unpackl(src5);    s6 = vec_unpackl(src6);    s7 = vec_unpackl(src7);    s8 = vec_unpackh(src0);    s9 = vec_unpackh(src1);    sA = vec_unpackh(src2);    sB = vec_unpackh(src3);    sC = vec_unpackh(src4);    sD = vec_unpackh(src5);    sE = vec_unpackh(src6);    sF = vec_unpackh(src7);    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_64);    SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7);    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_64);    SHIFT_VERT8(s8, s9, sA, sB, sC, sD, sE, sF);    src0 = vec_pack(s8, s0);    src1 = vec_pack(s9, s1);    src2 = vec_pack(sA, s2);    src3 = vec_pack(sB, s3);    src4 = vec_pack(sC, s4);    src5 = vec_pack(sD, s5);    src6 = vec_pack(sE, s6);    src7 = vec_pack(sF, s7);    vec_st(src0,  0, block);    vec_st(src1, 16, block);    vec_st(src2, 32, block);    vec_st(src3, 48, block);    vec_st(src4, 64, block);    vec_st(src5, 80, block);    vec_st(src6, 96, block);    vec_st(src7,112, block);}
开发者ID:Acidburn0zzz,项目名称:libav,代码行数:94,


示例22: PREFIX_h264_qpel16_hv_lowpass_altivec

static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {    register int i;    LOAD_ZERO;    const vec_u8 permM2 = vec_lvsl(-2, src);    const vec_u8 permM1 = vec_lvsl(-1, src);    const vec_u8 permP0 = vec_lvsl(+0, src);    const vec_u8 permP1 = vec_lvsl(+1, src);    const vec_u8 permP2 = vec_lvsl(+2, src);    const vec_u8 permP3 = vec_lvsl(+3, src);    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_u32 v10ui = vec_splat_u32(10);    const vec_s16 v5ss = vec_splat_s16(5);    const vec_s16 v1ss = vec_splat_s16(1);    const vec_s32 v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));    const vec_u32 v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));    register int align = ((((unsigned long)src) - 2) % 16);    vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,              srcP2A, srcP2B, srcP3A, srcP3B,              srcM1A, srcM1B, srcM2A, srcM2B,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,              pp1A, pp1B, pp2A, pp2B, psumA, psumB;    const vec_u8 mperm = (const vec_u8)        {0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,         0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F};    int16_t *tmpbis = tmp;    vec_s16 tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,              tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,              tmpP2ssA, tmpP2ssB;    vec_s32 pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,              pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,              pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,              ssumAe, ssumAo, ssumBe, ssumBo;    vec_u8 fsum, sumv, sum;    vec_s16 ssume, ssumo;    src -= (2 * srcStride);    for (i = 0 ; i < 21 ; i ++) {        vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;        vec_u8 srcR1 = vec_ld(-2, src);        vec_u8 srcR2 = vec_ld(14, src);        switch (align) {        default: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = vec_perm(srcR1, srcR2, permP3);        } break;        case 11: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = srcR2;        } break;        case 12: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = srcR2;            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 13: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = srcR2;            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 14: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = srcR2;            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 15: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = srcR2;            srcP0 = vec_perm(srcR2, srcR3, permP0);            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        }//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例23: PREFIX_h264_qpel16_v_lowpass_altivec

static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t *dst,                                                 const uint8_t *src,                                                 int dstStride, int srcStride){    register int i;    LOAD_ZERO;    vec_u8 perm;#if HAVE_BIGENDIAN    perm = vec_lvsl(0, src);#endif    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_u16 v5us = vec_splat_u16(5);    const vec_s16 v5ss = vec_splat_s16(5);    const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));    const uint8_t *srcbis = src - (srcStride * 2);    const vec_u8 srcM2 = load_with_perm_vec(0, srcbis, perm);    srcbis += srcStride;    const vec_u8 srcM1 = load_with_perm_vec(0, srcbis, perm);    srcbis += srcStride;    const vec_u8 srcP0 = load_with_perm_vec(0, srcbis, perm);    srcbis += srcStride;    const vec_u8 srcP1 = load_with_perm_vec(0, srcbis, perm);    srcbis += srcStride;    const vec_u8 srcP2 = load_with_perm_vec(0, srcbis, perm);    srcbis += srcStride;    vec_s16 srcM2ssA = (vec_s16) VEC_MERGEH(zero_u8v, srcM2);    vec_s16 srcM2ssB = (vec_s16) VEC_MERGEL(zero_u8v, srcM2);    vec_s16 srcM1ssA = (vec_s16) VEC_MERGEH(zero_u8v, srcM1);    vec_s16 srcM1ssB = (vec_s16) VEC_MERGEL(zero_u8v, srcM1);    vec_s16 srcP0ssA = (vec_s16) VEC_MERGEH(zero_u8v, srcP0);    vec_s16 srcP0ssB = (vec_s16) VEC_MERGEL(zero_u8v, srcP0);    vec_s16 srcP1ssA = (vec_s16) VEC_MERGEH(zero_u8v, srcP1);    vec_s16 srcP1ssB = (vec_s16) VEC_MERGEL(zero_u8v, srcP1);    vec_s16 srcP2ssA = (vec_s16) VEC_MERGEH(zero_u8v, srcP2);    vec_s16 srcP2ssB = (vec_s16) VEC_MERGEL(zero_u8v, srcP2);    vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,              psumA, psumB, sumA, sumB,              srcP3ssA, srcP3ssB,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;    vec_u8 sum, fsum, srcP3;    for (i = 0 ; i < 16 ; i++) {        srcP3 = load_with_perm_vec(0, srcbis, perm);        srcbis += srcStride;        srcP3ssA = (vec_s16) VEC_MERGEH(zero_u8v, srcP3);        srcP3ssB = (vec_s16) VEC_MERGEL(zero_u8v, srcP3);        sum1A = vec_adds(srcP0ssA, srcP1ssA);        sum1B = vec_adds(srcP0ssB, srcP1ssB);        sum2A = vec_adds(srcM1ssA, srcP2ssA);        sum2B = vec_adds(srcM1ssB, srcP2ssB);        sum3A = vec_adds(srcM2ssA, srcP3ssA);        sum3B = vec_adds(srcM2ssB, srcP3ssB);        srcM2ssA = srcM1ssA;        srcM2ssB = srcM1ssB;        srcM1ssA = srcP0ssA;        srcM1ssB = srcP0ssB;        srcP0ssA = srcP1ssA;        srcP0ssB = srcP1ssB;        srcP1ssA = srcP2ssA;        srcP1ssB = srcP2ssB;        srcP2ssA = srcP3ssA;        srcP2ssB = srcP3ssB;        pp1A = vec_mladd(sum1A, v20ss, v16ss);        pp1B = vec_mladd(sum1B, v20ss, v16ss);        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);        pp3A = vec_add(sum3A, pp1A);        pp3B = vec_add(sum3B, pp1B);        psumA = vec_sub(pp3A, pp2A);        psumB = vec_sub(pp3B, pp2B);        sumA = vec_sra(psumA, v5us);        sumB = vec_sra(psumB, v5us);        sum = vec_packsu(sumA, sumB);        ASSERT_ALIGNED(dst);        OP_U8_ALTIVEC(fsum, sum, vec_ld(0, dst));        vec_st(fsum, 0, dst);        dst += dstStride;    }}
开发者ID:TaoheGit,项目名称:hmi_sdl_android,代码行数:98,


示例24: PREFIX_h264_qpel16_h_lowpass_altivec

static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t *dst,                                                 const uint8_t *src,                                                 int dstStride, int srcStride){    register int i;    LOAD_ZERO;    vec_u8 permM2, permM1, permP0, permP1, permP2, permP3;    const vec_s16 v5ss = vec_splat_s16(5);    const vec_u16 v5us = vec_splat_u16(5);    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));    vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;    register int align = ((((unsigned long)src) - 2) % 16);    vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,              srcP2A, srcP2B, srcP3A, srcP3B,              srcM1A, srcM1B, srcM2A, srcM2B,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,              pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,              psumA, psumB, sumA, sumB;    vec_u8 sum, fsum;#if HAVE_BIGENDIAN    permM2 = vec_lvsl(-2, src);    permM1 = vec_lvsl(-1, src);    permP0 = vec_lvsl(+0, src);    permP1 = vec_lvsl(+1, src);    permP2 = vec_lvsl(+2, src);    permP3 = vec_lvsl(+3, src);#endif /* HAVE_BIGENDIAN */    for (i = 0 ; i < 16 ; i ++) {        load_alignment(src, align, permM2, permM1, permP0, permP1, permP2, permP3);        srcP0A = (vec_s16) VEC_MERGEH(zero_u8v, srcP0);        srcP0B = (vec_s16) VEC_MERGEL(zero_u8v, srcP0);        srcP1A = (vec_s16) VEC_MERGEH(zero_u8v, srcP1);        srcP1B = (vec_s16) VEC_MERGEL(zero_u8v, srcP1);        srcP2A = (vec_s16) VEC_MERGEH(zero_u8v, srcP2);        srcP2B = (vec_s16) VEC_MERGEL(zero_u8v, srcP2);        srcP3A = (vec_s16) VEC_MERGEH(zero_u8v, srcP3);        srcP3B = (vec_s16) VEC_MERGEL(zero_u8v, srcP3);        srcM1A = (vec_s16) VEC_MERGEH(zero_u8v, srcM1);        srcM1B = (vec_s16) VEC_MERGEL(zero_u8v, srcM1);        srcM2A = (vec_s16) VEC_MERGEH(zero_u8v, srcM2);        srcM2B = (vec_s16) VEC_MERGEL(zero_u8v, srcM2);        sum1A = vec_adds(srcP0A, srcP1A);        sum1B = vec_adds(srcP0B, srcP1B);        sum2A = vec_adds(srcM1A, srcP2A);        sum2B = vec_adds(srcM1B, srcP2B);        sum3A = vec_adds(srcM2A, srcP3A);        sum3B = vec_adds(srcM2B, srcP3B);        pp1A = vec_mladd(sum1A, v20ss, v16ss);        pp1B = vec_mladd(sum1B, v20ss, v16ss);        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);        pp3A = vec_add(sum3A, pp1A);        pp3B = vec_add(sum3B, pp1B);        psumA = vec_sub(pp3A, pp2A);        psumB = vec_sub(pp3B, pp2B);        sumA = vec_sra(psumA, v5us);        sumB = vec_sra(psumB, v5us);        sum = vec_packsu(sumA, sumB);        ASSERT_ALIGNED(dst);        OP_U8_ALTIVEC(fsum, sum, vec_ld(0, dst));        vec_st(fsum, 0, dst);        src += srcStride;        dst += dstStride;    }}
开发者ID:TaoheGit,项目名称:hmi_sdl_android,代码行数:87,


示例25: PREFIX_h264_qpel16_v_lowpass_altivec

static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {    register int i;    LOAD_ZERO;    const vec_u8 perm = vec_lvsl(0, src);    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_u16 v5us = vec_splat_u16(5);    const vec_s16 v5ss = vec_splat_s16(5);    const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));    uint8_t *srcbis = src - (srcStride * 2);    const vec_u8 srcM2a = vec_ld(0, srcbis);    const vec_u8 srcM2b = vec_ld(16, srcbis);    const vec_u8 srcM2 = vec_perm(srcM2a, srcM2b, perm);    //srcbis += srcStride;    const vec_u8 srcM1a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcM1b = vec_ld(16, srcbis);    const vec_u8 srcM1 = vec_perm(srcM1a, srcM1b, perm);    //srcbis += srcStride;    const vec_u8 srcP0a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP0b = vec_ld(16, srcbis);    const vec_u8 srcP0 = vec_perm(srcP0a, srcP0b, perm);    //srcbis += srcStride;    const vec_u8 srcP1a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP1b = vec_ld(16, srcbis);    const vec_u8 srcP1 = vec_perm(srcP1a, srcP1b, perm);    //srcbis += srcStride;    const vec_u8 srcP2a = vec_ld(0, srcbis += srcStride);    const vec_u8 srcP2b = vec_ld(16, srcbis);    const vec_u8 srcP2 = vec_perm(srcP2a, srcP2b, perm);    //srcbis += srcStride;    vec_s16 srcM2ssA = (vec_s16) vec_mergeh(zero_u8v, srcM2);    vec_s16 srcM2ssB = (vec_s16) vec_mergel(zero_u8v, srcM2);    vec_s16 srcM1ssA = (vec_s16) vec_mergeh(zero_u8v, srcM1);    vec_s16 srcM1ssB = (vec_s16) vec_mergel(zero_u8v, srcM1);    vec_s16 srcP0ssA = (vec_s16) vec_mergeh(zero_u8v, srcP0);    vec_s16 srcP0ssB = (vec_s16) vec_mergel(zero_u8v, srcP0);    vec_s16 srcP1ssA = (vec_s16) vec_mergeh(zero_u8v, srcP1);    vec_s16 srcP1ssB = (vec_s16) vec_mergel(zero_u8v, srcP1);    vec_s16 srcP2ssA = (vec_s16) vec_mergeh(zero_u8v, srcP2);    vec_s16 srcP2ssB = (vec_s16) vec_mergel(zero_u8v, srcP2);    vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,              psumA, psumB, sumA, sumB,              srcP3ssA, srcP3ssB,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;    vec_u8 sum, fsum, srcP3a, srcP3b, srcP3;    for (i = 0 ; i < 16 ; i++) {        srcP3a = vec_ld(0, srcbis += srcStride);        srcP3b = vec_ld(16, srcbis);        srcP3 = vec_perm(srcP3a, srcP3b, perm);        srcP3ssA = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3ssB = (vec_s16) vec_mergel(zero_u8v, srcP3);        //srcbis += srcStride;        sum1A = vec_adds(srcP0ssA, srcP1ssA);        sum1B = vec_adds(srcP0ssB, srcP1ssB);        sum2A = vec_adds(srcM1ssA, srcP2ssA);        sum2B = vec_adds(srcM1ssB, srcP2ssB);        sum3A = vec_adds(srcM2ssA, srcP3ssA);        sum3B = vec_adds(srcM2ssB, srcP3ssB);        srcM2ssA = srcM1ssA;        srcM2ssB = srcM1ssB;        srcM1ssA = srcP0ssA;        srcM1ssB = srcP0ssB;        srcP0ssA = srcP1ssA;        srcP0ssB = srcP1ssB;        srcP1ssA = srcP2ssA;        srcP1ssB = srcP2ssB;        srcP2ssA = srcP3ssA;        srcP2ssB = srcP3ssB;        pp1A = vec_mladd(sum1A, v20ss, v16ss);        pp1B = vec_mladd(sum1B, v20ss, v16ss);        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);        pp3A = vec_add(sum3A, pp1A);        pp3B = vec_add(sum3B, pp1B);        psumA = vec_sub(pp3A, pp2A);        psumB = vec_sub(pp3B, pp2B);        sumA = vec_sra(psumA, v5us);        sumB = vec_sra(psumB, v5us);        sum = vec_packsu(sumA, sumB);        ASSERT_ALIGNED(dst);        OP_U8_ALTIVEC(fsum, sum, vec_ld(0, dst));        vec_st(fsum, 0, dst);//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例26: PREFIX_h264_qpel16_h_lowpass_altivec

static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {    register int i;    LOAD_ZERO;    const vec_u8 permM2 = vec_lvsl(-2, src);    const vec_u8 permM1 = vec_lvsl(-1, src);    const vec_u8 permP0 = vec_lvsl(+0, src);    const vec_u8 permP1 = vec_lvsl(+1, src);    const vec_u8 permP2 = vec_lvsl(+2, src);    const vec_u8 permP3 = vec_lvsl(+3, src);    const vec_s16 v5ss = vec_splat_s16(5);    const vec_u16 v5us = vec_splat_u16(5);    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));    vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;    register int align = ((((unsigned long)src) - 2) % 16);    vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,              srcP2A, srcP2B, srcP3A, srcP3B,              srcM1A, srcM1B, srcM2A, srcM2B,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,              pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,              psumA, psumB, sumA, sumB;    vec_u8 sum, fsum;    for (i = 0 ; i < 16 ; i ++) {        vec_u8 srcR1 = vec_ld(-2, src);        vec_u8 srcR2 = vec_ld(14, src);        switch (align) {        default: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = vec_perm(srcR1, srcR2, permP3);        } break;        case 11: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = srcR2;        } break;        case 12: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = srcR2;            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 13: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = srcR2;            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 14: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = srcR2;            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 15: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = srcR2;            srcP0 = vec_perm(srcR2, srcR3, permP0);            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        }        srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);        srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);        srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);        srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);        srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);        srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);        srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);        srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);        srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);        srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例27: put_vp8_epel_v_altivec_core

static av_always_inlinevoid put_vp8_epel_v_altivec_core(uint8_t *dst, ptrdiff_t dst_stride,                                 uint8_t *src, ptrdiff_t src_stride,                                 int h, int my, int w, int is6tap){    LOAD_V_SUBPEL_FILTER(my-1);    vec_u8 s0, s1, s2, s3, s4, s5, filt, align_vech, perm_vec, align_vecl;    vec_s16 s0f, s1f, s2f, s3f, s4f, s5f, f16h, f16l;    vec_s16 c64 = vec_sl(vec_splat_s16(1), vec_splat_u16(6));    vec_u16 c7  = vec_splat_u16(7);    // we want pixels 0-7 to be in the even positions and 8-15 in the odd,    // so combine this permute with the alignment permute vector    align_vech = vec_lvsl(0, src);    align_vecl = vec_sld(align_vech, align_vech, 8);    if (w ==16)        perm_vec = vec_mergeh(align_vech, align_vecl);    else        perm_vec = vec_mergeh(align_vech, align_vech);    if (is6tap)        s0 = load_with_perm_vec(-2*src_stride, src, perm_vec);    s1 = load_with_perm_vec(-1*src_stride, src, perm_vec);    s2 = load_with_perm_vec( 0*src_stride, src, perm_vec);    s3 = load_with_perm_vec( 1*src_stride, src, perm_vec);    if (is6tap)        s4 = load_with_perm_vec( 2*src_stride, src, perm_vec);    src += (2+is6tap)*src_stride;    while (h --> 0) {        if (is6tap)            s5 = load_with_perm_vec(0, src, perm_vec);        else            s4 = load_with_perm_vec(0, src, perm_vec);        FILTER_V(f16h, vec_mule);        if (w == 16) {            FILTER_V(f16l, vec_mulo);            filt = vec_packsu(f16h, f16l);            vec_st(filt, 0, dst);        } else {            filt = vec_packsu(f16h, f16h);            if (w == 4)                filt = (vec_u8)vec_splat((vec_u32)filt, 0);            else                vec_ste((vec_u32)filt, 4, (uint32_t*)dst);            vec_ste((vec_u32)filt, 0, (uint32_t*)dst);        }        if (is6tap)            s0 = s1;        s1 = s2;        s2 = s3;        s3 = s4;        if (is6tap)            s4 = s5;        dst += dst_stride;        src += src_stride;    }}
开发者ID:Arcen,项目名称:libav,代码行数:63,



注:本文中的vec_sl函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vec_splat_u16函数代码示例
C++ vec_set函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。