您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vec_splat_u16函数代码示例

51自学网 2021-06-03 09:36:35
  C++
这篇教程C++ vec_splat_u16函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vec_splat_u16函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_splat_u16函数的具体用法?C++ vec_splat_u16怎么用?C++ vec_splat_u16使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_splat_u16函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: yuv2plane1_nbps_vsx

static void yuv2plane1_nbps_vsx(const int16_t *src, uint16_t *dest, int dstW,                           int big_endian, int output_bits){    const int dst_u = -(uintptr_t)dest & 7;    const int shift = 15 - output_bits;    const int add = (1 << (shift - 1));    const int clip = (1 << output_bits) - 1;    const vector uint16_t vadd = (vector uint16_t) {add, add, add, add, add, add, add, add};    const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0);    const vector uint16_t vshift = (vector uint16_t) vec_splat_u16(shift);    const vector uint16_t vlargest = (vector uint16_t) {clip, clip, clip, clip, clip, clip, clip, clip};    vector uint16_t v;    int i;    yuv2plane1_nbps_u(src, dest, dst_u, big_endian, output_bits, 0);    for (i = dst_u; i < dstW - 7; i += 8) {        v = vec_vsx_ld(0, (const uint16_t *) &src[i]);        v = vec_add(v, vadd);        v = vec_sr(v, vshift);        v = vec_min(v, vlargest);        v = vec_rl(v, vswap);        vec_st(v, 0, &dest[i]);    }    yuv2plane1_nbps_u(src, dest, dstW, big_endian, output_bits, i);}
开发者ID:lihp1603,项目名称:ffmpeg,代码行数:27,


示例2: transfer8x8_copy_altivec_c

voidtransfer8x8_copy_altivec_c( uint8_t * dst,                            uint8_t * src,                            uint32_t stride){    register vector unsigned char tmp;    register vector unsigned char mask;	register vector unsigned char t0, t1;    #ifdef DEBUG    if(((unsigned long)dst) & 0x7)        fprintf(stderr, "transfer8x8_copy_altivec:incorrect align, dst: %lx/n", (long)dst);    if(stride & 0x7)        fprintf(stderr, "transfer8x8_copy_altivec:incorrect stride, stride: %u/n", stride);#endif    mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));        COPY8TO8();    COPY8TO8();    COPY8TO8();    COPY8TO8();        COPY8TO8();    COPY8TO8();    COPY8TO8();    COPY8TO8();}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:27,


示例3: x264_add4x4_idct_altivec

void x264_add4x4_idct_altivec( uint8_t *dst, int16_t dct[16] ){    vec_u16_t onev = vec_splat_u16(1);    dct[0] += 32; // rounding for the >>6 at the end    vec_s16_t s0, s1, s2, s3;    s0 = vec_ld( 0x00, dct );    s1 = vec_sld( s0, s0, 8 );    s2 = vec_ld( 0x10, dct );    s3 = vec_sld( s2, s2, 8 );    vec_s16_t d0, d1, d2, d3;    IDCT_1D_ALTIVEC( s0, s1, s2, s3, d0, d1, d2, d3 );    vec_s16_t tr0, tr1, tr2, tr3;    VEC_TRANSPOSE_4( d0, d1, d2, d3, tr0, tr1, tr2, tr3 );    vec_s16_t idct0, idct1, idct2, idct3;    IDCT_1D_ALTIVEC( tr0, tr1, tr2, tr3, idct0, idct1, idct2, idct3 );    vec_u8_t perm_ldv = vec_lvsl( 0, dst );    vec_u16_t sixv = vec_splat_u16(6);    LOAD_ZERO;    ALTIVEC_STORE4_SUM_CLIP( &dst[0*FDEC_STRIDE], idct0, perm_ldv );    ALTIVEC_STORE4_SUM_CLIP( &dst[1*FDEC_STRIDE], idct1, perm_ldv );    ALTIVEC_STORE4_SUM_CLIP( &dst[2*FDEC_STRIDE], idct2, perm_ldv );    ALTIVEC_STORE4_SUM_CLIP( &dst[3*FDEC_STRIDE], idct3, perm_ldv );}
开发者ID:0x0B501E7E,项目名称:x264,代码行数:32,


示例4: transfer_16to8copy_altivec_c

void transfer_16to8copy_altivec_c(uint8_t *dst,                            vector signed short *src,                            uint32_t stride){    register vector signed short s;    register vector unsigned char packed;    register vector unsigned char mask_stencil;    register vector unsigned char mask;    register vector unsigned char load_src_perm;    #ifdef DEBUG    /* if this is on, print alignment errors */    if(((unsigned long) dst) & 0x7)        fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, dst %lx/n", (long)dst);    if(stride & 0x7)        fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, stride %u/n", stride);#endif    /* Initialisation stuff */    load_src_perm = vec_lvsl(0, (unsigned char*)src);    mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));        COPY16TO8();    COPY16TO8();    COPY16TO8();    COPY16TO8();        COPY16TO8();    COPY16TO8();    COPY16TO8();    COPY16TO8();}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:31,


示例5: sad8_altivec_c

uint32_tsad8_altivec_c(const uint8_t * cur,	   const uint8_t *ref,	   const uint32_t stride){	uint32_t result = 0;		register vector unsigned int sad;	register vector unsigned char c;	register vector unsigned char r;		/* initialize */	sad = vec_splat_u32(0);		/* Perform sad operations */	SAD8();	SAD8();	SAD8();	SAD8();		SAD8();	SAD8();	SAD8();	SAD8();		/* finish addition, add the first 2 together */	sad = vec_and(sad, (vector unsigned int)vec_pack(vec_splat_u16(-1),vec_splat_u16(0)));	sad = (vector unsigned int)vec_sums((vector signed int)sad, vec_splat_s32(0));	sad = vec_splat(sad,3);	vec_ste(sad, 0, &result);			return result;}
开发者ID:Distrotech,项目名称:xvidcore,代码行数:33,


示例6: put_no_rnd_pixels8_xy2_altivec

/* next one assumes that ((line_size % 8) == 0) */static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){    register int i;    register vector unsigned char pixelsv1, pixelsv2, pixelsavg;    register vector unsigned char blockv, temp1, temp2;    register vector unsigned short pixelssum1, pixelssum2, temp3;    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);    register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);    temp1 = vec_ld(0, pixels);    temp2 = vec_ld(16, pixels);    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {        pixelsv2 = temp2;    } else {        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));    }    pixelsv1 = vec_mergeh(vczero, pixelsv1);    pixelsv2 = vec_mergeh(vczero, pixelsv2);    pixelssum1 = vec_add((vector unsigned short)pixelsv1,                         (vector unsigned short)pixelsv2);    pixelssum1 = vec_add(pixelssum1, vcone);    for (i = 0; i < h ; i++) {        int rightside = ((unsigned long)block & 0x0000000F);        blockv = vec_ld(0, block);        temp1 = vec_ld(line_size, pixels);        temp2 = vec_ld(line_size + 16, pixels);        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {            pixelsv2 = temp2;        } else {            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));        }        pixelsv1 = vec_mergeh(vczero, pixelsv1);        pixelsv2 = vec_mergeh(vczero, pixelsv2);        pixelssum2 = vec_add((vector unsigned short)pixelsv1,                             (vector unsigned short)pixelsv2);        temp3 = vec_add(pixelssum1, pixelssum2);        temp3 = vec_sra(temp3, vctwo);        pixelssum1 = vec_add(pixelssum2, vcone);        pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);        if (rightside) {            blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));        } else {            blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));        }        vec_st(blockv, 0, block);        block += line_size;        pixels += line_size;    }}
开发者ID:AVLeo,项目名称:libav,代码行数:59,


示例7: dequant_mpeg_intra_altivec_c

uint32_tdequant_mpeg_intra_altivec_c(int16_t * data,					 const int16_t * coeff,					 const uint32_t quant,					 const uint32_t dcscalar,					 const uint16_t * mpeg_quant_matrices){	register const uint16_t *intra_matrix = get_intra_matrix(mpeg_quant_matrices);	register const int16_t *coeff_ptr = coeff;	register int16_t *data_ptr = data;		register vec_sint16_t ox00;	register vec_sint16_t level;	register vec_sint16_t vec_2048;	register vec_uint16_t vintra;	register vec_uint32_t swap;	register vec_uint32_t even,odd;	register vec_uint32_t et,ot,t;		vec_uint32_t vquant;	vector bool short zero_less;	vector bool short overflow;	#ifdef DEBUG	if((long)data & 0xf)		fprintf(stderr, "xvidcore: error in dequant_mpeg_intra_altivec_c, incorrect align: %x/n", data);#endif	/* Initialize */	ox00 = vec_splat_s16(0);	*((uint32_t*)&vquant) = quant;	vquant = vec_splat(vquant,0);		swap = vec_rl(vquant, vec_splat_u32(-16));	vec_2048 = (vec_sint16_t)vec_rl(vec_splat_u16(8),vec_splat_u16(8));		DEQUANT_MPEG_INTRA();	DEQUANT_MPEG_INTRA();	DEQUANT_MPEG_INTRA();	DEQUANT_MPEG_INTRA();		DEQUANT_MPEG_INTRA();	DEQUANT_MPEG_INTRA();	DEQUANT_MPEG_INTRA();	DEQUANT_MPEG_INTRA();		/* Process the first */	data[0] = coeff[0] * dcscalar;	if (data[0] < -2048) {		data[0] = -2048;	} else if (data[0] > 2047) {		data[0] = 2047;	}			return 0;}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:56,


示例8: quant_h263_inter_altivec_c

uint32_tquant_h263_inter_altivec_c(int16_t *coeff,                            int16_t *data,                            const uint32_t quant,                            const uint16_t *mpeg_quant_matrices){    vector unsigned char zerovec;    vector unsigned short mult;    vector unsigned short quant_m_2;    vector unsigned short quant_d_2;    vector unsigned short sum_short;    vector signed short acLevel;        vector unsigned int even;    vector unsigned int odd;        vector bool short m2_mask;    vector bool short zero_mask;        uint32_t result;#ifdef DEBUG    if(((unsigned)coeff) & 0x15)        fprintf(stderr, "quant_h263_inter_altivec_c:incorrect align, coeff: %lx/n", (long)coeff);#endif        /* initialisation stuff */    zerovec = vec_splat_u8(0);    *((unsigned short*)&mult) = (unsigned short)multipliers[quant];    mult = vec_splat(mult, 0);    *((unsigned short*)&quant_m_2) = (unsigned short)quant;    quant_m_2 = vec_splat(quant_m_2, 0);    quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1));    *((unsigned short*)&quant_d_2) = (unsigned short)quant;    quant_d_2 = vec_splat(quant_d_2, 0);    quant_d_2 = vec_sr(quant_d_2, vec_splat_u16(1));    sum_short = (vector unsigned short)zerovec;        /* Quantize */    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();        QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();            /* Calculate the return value */    even = (vector unsigned int)vec_sum4s((vector signed short)sum_short, (vector signed int)zerovec);    even = (vector unsigned int)vec_sums((vector signed int)even, (vector signed int)zerovec);    even = vec_splat(even, 3);    vec_ste(even, 0, &result);    return result;}
开发者ID:roozbeh,项目名称:openCU,代码行数:56,


示例9: test_madd

/* *************************************************************************    NAME:  test_mladd   USAGE:    test_madd();   returns: void   DESCRIPTION:                   see how the combination multiply/add operation works                   this will work on low order 16-bits    REFERENCES:   Ian Ollmann's Altivec Tutorial      LIMITATIONS:   GLOBAL VARIABLES:      accessed: none      modified: none   FUNCTIONS CALLED:      fprintf   vec_madd - multiply two short vectors and add to the sum a short              all in one operation       REVISION HISTORY:        STR                  Description of Revision                 Author     06-Mar-11               initial coding                           kaj ************************************************************************* */void test_mladd(void){  vector unsigned short shortVector1 =       { 0, 2, 4, 8, 16, 32, 64, 128 };  vector unsigned short addVector;  vector unsigned short coeffVector;  vector unsigned short resultVector;  vector short shortVector2 =       { -128, -64, -32, -16, 0, 16, 32, 64};  vector short addVector2 =        { -10, -10, -10, -10, 0, 10, 10, 10};  vector short coeffVector2;  vector short resultVector2;  short printshort[SHORT_ARRAYSIZE] __attribute__ ((aligned (16)));  coeffVector = vec_splat_u16(2);  addVector = vec_splat_u16(0);  /* print vectors performing mladd on */  fprintf(stderr,"-----------------------------------------------------------"                 "/n/n");  printVecUShorts("vec_mladd unsigned input vector 1", shortVector1,                   SHORT_ARRAYSIZE);  printVecUShorts("vec_mladd unsigned input vector to add", addVector,                   SHORT_ARRAYSIZE);    printVecUShorts("vec_mladd unsigned coeffvector to multiply", coeffVector,                   SHORT_ARRAYSIZE);     /* calculate */  resultVector = vec_mladd(shortVector1,coeffVector,addVector);  printVecUShorts("vec_mladd vector (Input*2+0)", resultVector,                   SHORT_ARRAYSIZE);  /* signed shorts */  coeffVector2 = vec_splat_s16(2);      /* print signed short vectors performing mladd on */  fprintf(stderr,"----------------------------------------------------------"                 "/n/n");  printVecShorts("vec_mladd signed input vector 1", shortVector2,                  SHORT_ARRAYSIZE);  printVecShorts("vec_mladd signed input vector to add", addVector2,                  SHORT_ARRAYSIZE);    printVecShorts("vec_mladd signed coeffvector to multiply", coeffVector2,                  SHORT_ARRAYSIZE);     /* calculate */  resultVector2 = vec_mladd(shortVector2,coeffVector2,addVector2);  printVecShorts("vec_mladd vector (Input*2 + 10(increment pos & neg by 10)",                  resultVector2,SHORT_ARRAYSIZE);} /* test_mladd */
开发者ID:yaojingguo,项目名称:gcc-intrinsics-samplecode,代码行数:90,


示例10: predict_16x16_p_altivec

static void predict_16x16_p_altivec( uint8_t *src ){    int16_t a, b, c, i;    int H = 0;    int V = 0;    int16_t i00;    for( i = 1; i <= 8; i++ )    {        H += i * ( src[7+i - FDEC_STRIDE ]  - src[7-i - FDEC_STRIDE ] );        V += i * ( src[(7+i)*FDEC_STRIDE -1] - src[(7-i)*FDEC_STRIDE -1] );    }    a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );    b = ( 5 * H + 32 ) >> 6;    c = ( 5 * V + 32 ) >> 6;    i00 = a - b * 7 - c * 7 + 16;    vect_sshort_u i00_u, b_u, c_u;    i00_u.s[0] = i00;    b_u.s[0]   = b;    c_u.s[0]   = c;    vec_u16_t val5_v = vec_splat_u16(5);    vec_s16_t i00_v, b_v, c_v;    i00_v = vec_splat(i00_u.v, 0);    b_v = vec_splat(b_u.v, 0);    c_v = vec_splat(c_u.v, 0);    vec_s16_t induc_v  = (vec_s16_t) CV(0,  1,  2,  3,  4,  5,  6,  7);    vec_s16_t b8_v = vec_sl(b_v, vec_splat_u16(3));    vec_s32_t mule_b_v = vec_mule(induc_v, b_v);    vec_s32_t mulo_b_v = vec_mulo(induc_v, b_v);    vec_s16_t mul_b_induc0_v = vec_pack(vec_mergeh(mule_b_v, mulo_b_v), vec_mergel(mule_b_v, mulo_b_v));    vec_s16_t add_i0_b_0v = vec_adds(i00_v, mul_b_induc0_v);    vec_s16_t add_i0_b_8v = vec_adds(b8_v, add_i0_b_0v);    int y;    for( y = 0; y < 16; y++ )    {        vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);        vec_s16_t shift_8_v = vec_sra(add_i0_b_8v, val5_v);        vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_8_v);        vec_st( com_sat_v, 0, &src[0]);        src += FDEC_STRIDE;        i00 += c;        add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);        add_i0_b_8v = vec_adds(add_i0_b_8v, c_v);    }}
开发者ID:UIKit0,项目名称:H.264-in-CUDA,代码行数:50,


示例11: test_add_subtract

/* *************************************************************************    NAME:  test_add_subtract   USAGE:    test_add_subtract();   returns: void   DESCRIPTION:                   see how add and subtract work on vectors   REFERENCES:   Ian Ollmann's Altivec Tutorial      LIMITATIONS:   GLOBAL VARIABLES:      accessed: none      modified: none   FUNCTIONS CALLED:      fprintf   vec_add - add two vectors   vec_adds - add two vectors, saturation   vec_sub - subtract two vectors      REVISION HISTORY:        STR                  Description of Revision                 Author     27-Feb-11               initial coding                           kaj ************************************************************************* */void test_add_subtract(void){  vector unsigned short addVector1 =        { 0, 1000, 5000, 10000, 15000, 20000, 50000, 65535};  vector signed short addSVector1 =        { -32768, -10000, -5000, 0, 10, 5000, 10000, 32767};  vector signed short addSVector2 =        { -10, -10, -10, 0, 10, 10, 10, 10};  vector unsigned short sumVector;  vector signed short sumSVector;  short printshort[SHORT_ARRAYSIZE] __attribute__ ((aligned (16)));  /* vec_add should wrap, vec_adds will chop at max/min */  fprintf(stderr,"-----------------------------------------------------------/n");    /* add 10 to each element - unsigned short */  printVecUShorts("vec_add unsigned short input vector", addVector1,SHORT_ARRAYSIZE);   sumVector = vec_add(addVector1, vec_splat_u16(10));  printVecUShorts("vec_add sum vector (Input+10) ", sumVector,SHORT_ARRAYSIZE);  /* add 10 to each element using saturation add - unsigned short */  sumVector = vec_adds(addVector1, vec_splat_u16(10));  printVecUShorts("vec_adds sum vector (Input+10)", sumVector,SHORT_ARRAYSIZE);  /* subtract 10 from each element - unsigned short */  sumVector = vec_sub(addVector1, vec_splat_u16(10));  printVecUShorts("vec_sub sum vector (Input-10) ", sumVector,SHORT_ARRAYSIZE);  fprintf(stderr,"-----------------------------------------------------------/n/n");   /* add 10 to each element - signed short */  printVecShorts("vec_add signed short input vector", addSVector1,SHORT_ARRAYSIZE);   sumSVector = vec_add(addSVector1,addSVector2);  printVecShorts("vec_add sum vector (increment pos & neg by 10) ",                      sumSVector,SHORT_ARRAYSIZE);   /* add 10 to each element using saturation add - signed short */  sumSVector = vec_adds(addSVector1,addSVector2);  printVecShorts("vec_adds sum vector (increment pos & neg by 10)",                     sumSVector,SHORT_ARRAYSIZE);  /* subtract 10 from each element - signed short */  sumSVector = vec_sub(addSVector1,addSVector2);  printVecShorts("vec_sub vector (decrement pos & neg by 10) ",                      sumSVector,SHORT_ARRAYSIZE);} /* test_add_subtract */
开发者ID:yaojingguo,项目名称:gcc-intrinsics-samplecode,代码行数:87,


示例12: x264_add8x8_idct_dc_altivec

void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] ){    vec_s16_t dcv;    vec_s16_t v32 = vec_sl( vec_splat_s16( 8 ), vec_splat_u16( 2 ) );    vec_u16_t v6 = vec_splat_u16( 6 );    vec_s16_t dctv = vec_vsx_ld( 0, dct );    dctv = vec_sra( vec_add( dctv, v32 ), v6 );    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 0 ), (vec_s32_t)vec_splat( dctv, 1 ) );    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv );    idct8_dc_altivec( &p_dst[0], dcv );    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 2 ), (vec_s32_t)vec_splat( dctv, 3 ) );    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv );    idct8_dc_altivec( &p_dst[4*FDEC_STRIDE+0], dcv );}
开发者ID:Hero2000,项目名称:CainCamera,代码行数:15,


示例13: yuv2plane1_16_vsx

static void yuv2plane1_16_vsx(const int32_t *src, uint16_t *dest, int dstW,                           int big_endian, int output_bits){    const int dst_u = -(uintptr_t)dest & 7;    const int shift = 3;    const int add = (1 << (shift - 1));    const vector uint32_t vadd = (vector uint32_t) {add, add, add, add};    const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0);    const vector uint32_t vshift = (vector uint32_t) vec_splat_u32(shift);    vector uint32_t v, v2;    vector uint16_t vd;    int i;    yuv2plane1_16_u(src, dest, dst_u, big_endian, output_bits, 0);    for (i = dst_u; i < dstW - 7; i += 8) {        v = vec_vsx_ld(0, (const uint32_t *) &src[i]);        v = vec_add(v, vadd);        v = vec_sr(v, vshift);        v2 = vec_vsx_ld(0, (const uint32_t *) &src[i + 4]);        v2 = vec_add(v2, vadd);        v2 = vec_sr(v2, vshift);        vd = vec_packsu(v, v2);        vd = vec_rl(vd, vswap);        vec_st(vd, 0, &dest[i]);    }    yuv2plane1_16_u(src, dest, dstW, big_endian, output_bits, i);}
开发者ID:lihp1603,项目名称:ffmpeg,代码行数:32,


示例14: transfer_8to16sub_altivec_c

voidtransfer_8to16sub_altivec_c(int16_t * dct,							uint8_t * cur,							uint8_t * ref,							const uint32_t stride){	register vector unsigned char c,r;	register vector unsigned char ox00;	register vector unsigned char mask_00ff;	register vector unsigned char mask;	register vector signed short cs,rs;	#ifdef DEBUG	if((long)dct & 0xf)		fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, dct: %lx/n", (long)dct);	if((long)cur & 0x7)		fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, cur: %lx/n", (long)cur);	if(stride & 0x7)		fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect stride, stride: %lu/n", (long)stride);#endif	/* initialize */	ox00 = vec_splat_u8(0);	mask_00ff = vec_pack((vector unsigned short)ox00,vec_splat_u16(-1));		SUB8TO16();	SUB8TO16();	SUB8TO16();	SUB8TO16();		SUB8TO16();	SUB8TO16();	SUB8TO16();	SUB8TO16();}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:34,


示例15: ff_vp3_idct_put_altivec

void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]){    vec_u8 t;    IDCT_START    // pixels are signed; so add 128*16 in addition to the normal 8    vec_s16 v2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11));    eight = vec_add(eight, v2048);    IDCT_1D(NOP, NOP)    TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);    IDCT_1D(ADD8, SHIFT4)#define PUT(a)/    t = vec_packsu(a, a);/    vec_ste((vec_u32)t, 0, (unsigned int *)dst);/    vec_ste((vec_u32)t, 4, (unsigned int *)dst);    PUT(b0)     dst += stride;    PUT(b1)     dst += stride;    PUT(b2)     dst += stride;    PUT(b3)     dst += stride;    PUT(b4)     dst += stride;    PUT(b5)     dst += stride;    PUT(b6)     dst += stride;    PUT(b7)}
开发者ID:9aa5,项目名称:FFmpeg,代码行数:27,


示例16: ff_h264_idct8_add_altivec

static void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ){    vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;    vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;    vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;    vec_u8 perm_ldv = vec_lvsl(0, dst);    vec_u8 perm_stv = vec_lvsr(8, dst);    const vec_u16 onev = vec_splat_u16(1);    const vec_u16 twov = vec_splat_u16(2);    const vec_u16 sixv = vec_splat_u16(6);    const vec_u8 sel = (vec_u8)    {        0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1    };    LOAD_ZERO;    dct[0] += 32; // rounding for the >>6 at the end    s0 = vec_ld(0x00, (int16_t *)dct);    s1 = vec_ld(0x10, (int16_t *)dct);    s2 = vec_ld(0x20, (int16_t *)dct);    s3 = vec_ld(0x30, (int16_t *)dct);    s4 = vec_ld(0x40, (int16_t *)dct);    s5 = vec_ld(0x50, (int16_t *)dct);    s6 = vec_ld(0x60, (int16_t *)dct);    s7 = vec_ld(0x70, (int16_t *)dct);    IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,                     d0, d1, d2, d3, d4, d5, d6, d7);    TRANSPOSE8( d0,  d1,  d2,  d3,  d4,  d5,  d6, d7 );    IDCT8_1D_ALTIVEC(d0,  d1,  d2,  d3,  d4,  d5,  d6, d7,                     idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);    ALTIVEC_STORE_SUM_CLIP(&dst[0*stride], idct0, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[1*stride], idct1, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[2*stride], idct2, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[3*stride], idct3, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[4*stride], idct4, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);}
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:47,


示例17: x264_add8x8_idct8_altivec

void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[64] ){    vec_u16_t onev = vec_splat_u16(1);    vec_u16_t twov = vec_splat_u16(2);    dct[0] += 32; // rounding for the >>6 at the end    vec_s16_t s0, s1, s2, s3, s4, s5, s6, s7;    s0 = vec_ld(0x00, dct);    s1 = vec_ld(0x10, dct);    s2 = vec_ld(0x20, dct);    s3 = vec_ld(0x30, dct);    s4 = vec_ld(0x40, dct);    s5 = vec_ld(0x50, dct);    s6 = vec_ld(0x60, dct);    s7 = vec_ld(0x70, dct);    vec_s16_t d0, d1, d2, d3, d4, d5, d6, d7;    IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,  d0, d1, d2, d3, d4, d5, d6, d7);    vec_s16_t tr0, tr1, tr2, tr3, tr4, tr5, tr6, tr7;    VEC_TRANSPOSE_8( d0,  d1,  d2,  d3,  d4,  d5,  d6, d7,                    tr0, tr1, tr2, tr3, tr4, tr5, tr6, tr7);    vec_s16_t idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;    IDCT8_1D_ALTIVEC(tr0,     tr1,   tr2,   tr3,   tr4,   tr5,   tr6,   tr7,                     idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);    vec_u8_t perm_ldv = vec_lvsl(0, dst);    vec_u8_t perm_stv = vec_lvsr(8, dst);    vec_u16_t sixv = vec_splat_u16(6);    const vec_u8_t sel = (vec_u8_t) CV(0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1);    LOAD_ZERO;    ALTIVEC_STORE_SUM_CLIP(&dst[0*FDEC_STRIDE], idct0, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[1*FDEC_STRIDE], idct1, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[2*FDEC_STRIDE], idct2, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[3*FDEC_STRIDE], idct3, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[4*FDEC_STRIDE], idct4, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[5*FDEC_STRIDE], idct5, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[6*FDEC_STRIDE], idct6, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[7*FDEC_STRIDE], idct7, perm_ldv, perm_stv, sel);}
开发者ID:0x0B501E7E,项目名称:x264,代码行数:45,


示例18: foo

void foo (void) {  vector bool int boolVec1 = (vector bool int) vec_splat_u32(3);  vector bool short boolVec2 = (vector bool short) vec_splat_u16(3);  vector bool char boolVec3 = (vector bool char) vec_splat_u8(3);  boolVec1 = vec_sld( boolVec1, boolVec1, 4 );  boolVec2 = vec_sld( boolVec2, boolVec2, 2 );  boolVec3 = vec_sld( boolVec3, boolVec3, 1 );}
开发者ID:0day-ci,项目名称:gcc,代码行数:10,


示例19: jsimd_h2v1_downsample_altivec

voidjsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,                               JDIMENSION v_samp_factor,                               JDIMENSION width_blocks,                               JSAMPARRAY input_data, JSAMPARRAY output_data){  int outrow, outcol;  JDIMENSION output_cols = width_blocks * DCTSIZE;  JSAMPROW inptr, outptr;  __vector unsigned char this0, next0, out;  __vector unsigned short this0e, this0o, next0e, next0o, outl, outh;  /* Constants */  __vector unsigned short pw_bias = { __4X2(0, 1) },    pw_one = { __8X(1) };  __vector unsigned char even_odd_index =    {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15},    pb_zero = { __16X(0) };  expand_right_edge(input_data, max_v_samp_factor, image_width,                    output_cols * 2);  for (outrow = 0; outrow < v_samp_factor; outrow++) {    outptr = output_data[outrow];    inptr = input_data[outrow];    for (outcol = output_cols; outcol > 0;         outcol -= 16, inptr += 32, outptr += 16) {      this0 = vec_ld(0, inptr);      this0 = vec_perm(this0, this0, even_odd_index);      this0e = (__vector unsigned short)VEC_UNPACKHU(this0);      this0o = (__vector unsigned short)VEC_UNPACKLU(this0);      outl = vec_add(this0e, this0o);      outl = vec_add(outl, pw_bias);      outl = vec_sr(outl, pw_one);      if (outcol > 8) {        next0 = vec_ld(16, inptr);        next0 = vec_perm(next0, next0, even_odd_index);        next0e = (__vector unsigned short)VEC_UNPACKHU(next0);        next0o = (__vector unsigned short)VEC_UNPACKLU(next0);        outh = vec_add(next0e, next0o);        outh = vec_add(outh, pw_bias);        outh = vec_sr(outh, pw_one);      } else        outh = vec_splat_u16(0);      out = vec_pack(outl, outh);      vec_st(out, 0, outptr);    }  }}
开发者ID:AntonioMA,项目名称:UVCCamera,代码行数:54,


示例20: predict_16x16_p_altivec

static void predict_16x16_p_altivec( uint8_t *src ){    int H = 0, V = 0;    for( int i = 1; i <= 8; i++ )    {        H += i * ( src[7+i - FDEC_STRIDE ]  - src[7-i - FDEC_STRIDE ] );        V += i * ( src[(7+i)*FDEC_STRIDE -1] - src[(7-i)*FDEC_STRIDE -1] );    }    int a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );    int b = ( 5 * H + 32 ) >> 6;    int c = ( 5 * V + 32 ) >> 6;    int i00 = a - b * 7 - c * 7 + 16;    vec_s16_u i00_u, b_u, c_u;    i00_u.s[0] = i00;    b_u.s[0]   = b;    c_u.s[0]   = c;    vec_u16_t val5_v = vec_splat_u16(5);    vec_s16_t i00_v, b_v, c_v;    i00_v = vec_splat(i00_u.v, 0);    b_v = vec_splat(b_u.v, 0);    c_v = vec_splat(c_u.v, 0);    vec_s16_t induc_v  = (vec_s16_t) CV(0,  1,  2,  3,  4,  5,  6,  7);    vec_s16_t b8_v = vec_sl(b_v, vec_splat_u16(3));    vec_s16_t add_i0_b_0v = vec_mladd(induc_v, b_v, i00_v);    vec_s16_t add_i0_b_8v = vec_adds(b8_v, add_i0_b_0v);    for( int y = 0; y < 16; y++ )    {        vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);        vec_s16_t shift_8_v = vec_sra(add_i0_b_8v, val5_v);        vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_8_v);        vec_st( com_sat_v, 0, &src[0]);        src += FDEC_STRIDE;        add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);        add_i0_b_8v = vec_adds(add_i0_b_8v, c_v);    }}
开发者ID:xing2fan,项目名称:x264,代码行数:41,


示例21: dequant_h263_inter_altivec_c

uint32_tdequant_h263_inter_altivec_c(int16_t *data,                                int16_t *coeff,                                const uint32_t quant,                                const uint16_t *mpeg_quant_matrices){    vector signed short acLevel;    vector signed short vec_2048;        vector unsigned short quant_m_2;    vector unsigned short quant_add;    vector unsigned short t;        register vector unsigned int even;    register vector unsigned int odd;    register vector unsigned int high;    register vector unsigned int low;        register vector unsigned char zerovec;        vector bool short equal_zero;    vector bool short less_zero;    vector bool short overflow;    #ifdef DEBUG    /* print alignment errors if this is on */    if(((unsigned)data) & 0x15)        fprintf(stderr, "dequant_h263_inter_altivec_c:incorrect align, data: %lx/n", (long)data);#endif        /* initialize */    *((unsigned short*)&quant_m_2) = (unsigned short)(quant << 1);    quant_m_2 = vec_splat(quant_m_2,0);        *((unsigned short*)&quant_add) = (unsigned short)(quant & 1 ? quant : quant - 1);    quant_add = vec_splat(quant_add,0);        vec_2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11));    zerovec = vec_splat_u8(0);        /* dequant */    DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();        DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();    DEQUANT_H263_INTER_ALTIVEC();        return 0;}
开发者ID:roozbeh,项目名称:openCU,代码行数:53,


示例22: pix_multiply

static force_inline vector unsigned intpix_multiply (vector unsigned int p, vector unsigned int a){    vector unsigned short hi, lo, mod;    /* unpack to short */    hi = (vector unsigned short)	vec_mergeh ((vector unsigned char)AVV (0),		    (vector unsigned char)p);    mod = (vector unsigned short)	vec_mergeh ((vector unsigned char)AVV (0),		    (vector unsigned char)a);    hi = vec_mladd (hi, mod, (vector unsigned short)                    AVV (0x0080, 0x0080, 0x0080, 0x0080,                         0x0080, 0x0080, 0x0080, 0x0080));    hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));    hi = vec_sr (hi, vec_splat_u16 (8));    /* unpack to short */    lo = (vector unsigned short)	vec_mergel ((vector unsigned char)AVV (0),		    (vector unsigned char)p);    mod = (vector unsigned short)	vec_mergel ((vector unsigned char)AVV (0),		    (vector unsigned char)a);    lo = vec_mladd (lo, mod, (vector unsigned short)                    AVV (0x0080, 0x0080, 0x0080, 0x0080,                         0x0080, 0x0080, 0x0080, 0x0080));    lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));    lo = vec_sr (lo, vec_splat_u16 (8));    return (vector unsigned int)vec_packsu (hi, lo);}
开发者ID:1833183060,项目名称:wke,代码行数:40,


示例23: transfer_8to16sub2_altivec_c

voidtransfer_8to16sub2_altivec_c(vector signed short *dct,                             uint8_t *cur,                             uint8_t *ref1,                             uint8_t *ref2,                             const uint32_t stride){    vector unsigned char r1;    vector unsigned char r2;    vector unsigned char r;    vector unsigned char c;    vector unsigned char mask;    vector signed short cs;    vector signed short rs;    #ifdef DEBUG    /* Dump alignment erros if DEBUG is set */    if(((unsigned long)dct) & 0xf)        fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %lx/n", (long)dct);    if(((unsigned long)cur) & 0x7)        fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, cur: %lx/n", (long)cur);    if(stride & 0x7)        fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %u/n", stride);#endif        /* Initialisation */    mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));        SUB28TO16();    SUB28TO16();    SUB28TO16();    SUB28TO16();        SUB28TO16();    SUB28TO16();    SUB28TO16();    SUB28TO16();}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:38,


示例24: quant_h263_intra_altivec_c

uint32_tquant_h263_intra_altivec_c(int16_t *coeff,                                    int16_t *data,                                    const uint32_t quant,                                    const uint32_t dcscalar,                                    const uint16_t *mpeg_quant_matrices){    vector unsigned char zerovec;    vector unsigned short mult;    vector unsigned short quant_m_2;    vector signed short acLevel;        register vector unsigned int even;    register vector unsigned int odd;        vector bool short zero_mask;    vector bool short m2_mask;        register int16_t *origin_coeff = coeff;    register int16_t *origin_data = data;#ifdef DEBUG    if(((unsigned)coeff) & 15)        fprintf(stderr, "quant_h263_intra_altivec_c:incorrect align, coeff: %lx/n", (long)coeff);#endif        zerovec = vec_splat_u8(0);        *((unsigned short*)&mult) = (unsigned short)multipliers[quant];    mult = vec_splat(mult, 0);        *((unsigned short*)&quant_m_2) = (unsigned short)quant;    quant_m_2 = vec_splat(quant_m_2, 0);    quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1));        QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();        QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();    QUANT_H263_INTRA_ALTIVEC();        // noch erstes setzen    origin_coeff[0] = DIV_DIV(origin_data[0], (int32_t)dcscalar);        return 0;}
开发者ID:roozbeh,项目名称:openCU,代码行数:50,


示例25: put_vp8_epel_h_altivec_core

static av_always_inlinevoid put_vp8_epel_h_altivec_core(uint8_t *dst, ptrdiff_t dst_stride,                                 uint8_t *src, ptrdiff_t src_stride,                                 int h, int mx, int w, int is6tap){    LOAD_H_SUBPEL_FILTER(mx-1);    vec_u8 align_vec0, align_vec8, permh0, permh8, filt;    vec_u8 perm_6tap0, perm_6tap8, perml0, perml8;    vec_u8 a, b, pixh, pixl, outer;    vec_s16 f16h, f16l;    vec_s32 filth, filtl;    vec_u8 perm_inner6 = { 1,2,3,4, 2,3,4,5, 3,4,5,6, 4,5,6,7 };    vec_u8 perm_inner4 = { 0,1,2,3, 1,2,3,4, 2,3,4,5, 3,4,5,6 };    vec_u8 perm_inner  = is6tap ? perm_inner6 : perm_inner4;    vec_u8 perm_outer = { 4,9, 0,5, 5,10, 1,6, 6,11, 2,7, 7,12, 3,8 };    vec_s32 c64 = vec_sl(vec_splat_s32(1), vec_splat_u32(6));    vec_u16 c7  = vec_splat_u16(7);    align_vec0 = vec_lvsl( -is6tap-1, src);    align_vec8 = vec_lvsl(8-is6tap-1, src);    permh0     = vec_perm(align_vec0, align_vec0, perm_inner);    permh8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_inner = vec_add(perm_inner, vec_splat_u8(4));    perml0     = vec_perm(align_vec0, align_vec0, perm_inner);    perml8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_6tap0 = vec_perm(align_vec0, align_vec0, perm_outer);    perm_6tap8 = vec_perm(align_vec8, align_vec8, perm_outer);    while (h --> 0) {        FILTER_H(f16h, 0);        if (w == 16) {            FILTER_H(f16l, 8);            filt = vec_packsu(f16h, f16l);            vec_st(filt, 0, dst);        } else {            filt = vec_packsu(f16h, f16h);            vec_ste((vec_u32)filt, 0, (uint32_t*)dst);            if (w == 8)                vec_ste((vec_u32)filt, 4, (uint32_t*)dst);        }        src += src_stride;        dst += dst_stride;    }}
开发者ID:Arcen,项目名称:libav,代码行数:47,


示例26: x264_sub8x8_dct8_altivec

void x264_sub8x8_dct8_altivec( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 ){    vec_u16_t onev = vec_splat_u16(1);    vec_u16_t twov = vec_add( onev, onev );    PREP_DIFF_8BYTEALIGNED;    vec_s16_t dct0v, dct1v, dct2v, dct3v,              dct4v, dct5v, dct6v, dct7v;    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct0v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct1v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct2v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct3v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct4v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct5v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct6v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct7v );    DCT8_1D_ALTIVEC( dct0v, dct1v, dct2v, dct3v,                     dct4v, dct5v, dct6v, dct7v );    vec_s16_t dct_tr0v, dct_tr1v, dct_tr2v, dct_tr3v,        dct_tr4v, dct_tr5v, dct_tr6v, dct_tr7v;    VEC_TRANSPOSE_8(dct0v, dct1v, dct2v, dct3v,                    dct4v, dct5v, dct6v, dct7v,                    dct_tr0v, dct_tr1v, dct_tr2v, dct_tr3v,                    dct_tr4v, dct_tr5v, dct_tr6v, dct_tr7v );    DCT8_1D_ALTIVEC( dct_tr0v, dct_tr1v, dct_tr2v, dct_tr3v,                     dct_tr4v, dct_tr5v, dct_tr6v, dct_tr7v );    vec_st( dct_tr0v,  0,  dct );    vec_st( dct_tr1v, 16,  dct );    vec_st( dct_tr2v, 32,  dct );    vec_st( dct_tr3v, 48,  dct );    vec_st( dct_tr4v, 64,  dct );    vec_st( dct_tr5v, 80,  dct );    vec_st( dct_tr6v, 96,  dct );    vec_st( dct_tr7v, 112, dct );}
开发者ID:0x0B501E7E,项目名称:x264,代码行数:44,


示例27: predict_8x8c_p_altivec

static void predict_8x8c_p_altivec( uint8_t *src ){    int H = 0, V = 0;    for( int i = 0; i < 4; i++ )    {        H += ( i + 1 ) * ( src[4+i - FDEC_STRIDE] - src[2 - i -FDEC_STRIDE] );        V += ( i + 1 ) * ( src[-1 +(i+4)*FDEC_STRIDE] - src[-1+(2-i)*FDEC_STRIDE] );    }    int a = 16 * ( src[-1+7*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );    int b = ( 17 * H + 16 ) >> 5;    int c = ( 17 * V + 16 ) >> 5;    int i00 = a -3*b -3*c + 16;    vec_s16_u i00_u, b_u, c_u;    i00_u.s[0] = i00;    b_u.s[0]   = b;    c_u.s[0]   = c;    vec_u16_t val5_v = vec_splat_u16(5);    vec_s16_t i00_v, b_v, c_v;    i00_v = vec_splat(i00_u.v, 0);    b_v = vec_splat(b_u.v, 0);    c_v = vec_splat(c_u.v, 0);    vec_s16_t induc_v  = (vec_s16_t) CV(0, 1, 2, 3, 4, 5, 6, 7);    vec_s16_t add_i0_b_0v = vec_mladd(induc_v, b_v, i00_v);    PREP_STORE8;    for( int i = 0; i < 8; ++i )    {        vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);        vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_0_v);        VEC_STORE8(com_sat_v, &src[0]);        src += FDEC_STRIDE;        add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);    }}
开发者ID:xing2fan,项目名称:x264,代码行数:41,


示例28: h264_idct_add_altivec

static void h264_idct_add_altivec(uint8_t *dst, int16_t *block, int stride){    vec_s16 va0, va1, va2, va3;    vec_s16 vz0, vz1, vz2, vz3;    vec_s16 vtmp0, vtmp1, vtmp2, vtmp3;    vec_u8 va_u8;    vec_u32 va_u32;    vec_s16 vdst_ss;    const vec_u16 v6us = vec_splat_u16(6);    vec_u8 vdst, vdst_orig;    vec_u8 vdst_mask = vec_lvsl(0, dst);    int element = ((unsigned long)dst & 0xf) >> 2;    LOAD_ZERO;    block[0] += 32;  /* add 32 as a DC-level for rounding */    vtmp0 = vec_ld(0,block);    vtmp1 = vec_sld(vtmp0, vtmp0, 8);    vtmp2 = vec_ld(16,block);    vtmp3 = vec_sld(vtmp2, vtmp2, 8);    memset(block, 0, 16 * sizeof(int16_t));    VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);    VEC_TRANSPOSE_4(va0,va1,va2,va3,vtmp0,vtmp1,vtmp2,vtmp3);    VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);    va0 = vec_sra(va0,v6us);    va1 = vec_sra(va1,v6us);    va2 = vec_sra(va2,v6us);    va3 = vec_sra(va3,v6us);    VEC_LOAD_U8_ADD_S16_STORE_U8(va0);    dst += stride;    VEC_LOAD_U8_ADD_S16_STORE_U8(va1);    dst += stride;    VEC_LOAD_U8_ADD_S16_STORE_U8(va2);    dst += stride;    VEC_LOAD_U8_ADD_S16_STORE_U8(va3);}
开发者ID:DDTChen,项目名称:CookieVLC,代码行数:39,


示例29: PREFIX_h264_qpel16_h_lowpass_altivec

static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {    register int i;    LOAD_ZERO;    const vec_u8 permM2 = vec_lvsl(-2, src);    const vec_u8 permM1 = vec_lvsl(-1, src);    const vec_u8 permP0 = vec_lvsl(+0, src);    const vec_u8 permP1 = vec_lvsl(+1, src);    const vec_u8 permP2 = vec_lvsl(+2, src);    const vec_u8 permP3 = vec_lvsl(+3, src);    const vec_s16 v5ss = vec_splat_s16(5);    const vec_u16 v5us = vec_splat_u16(5);    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));    vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;    register int align = ((((unsigned long)src) - 2) % 16);    vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,              srcP2A, srcP2B, srcP3A, srcP3B,              srcM1A, srcM1B, srcM2A, srcM2B,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,              pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,              psumA, psumB, sumA, sumB;    vec_u8 sum, fsum;    for (i = 0 ; i < 16 ; i ++) {        vec_u8 srcR1 = vec_ld(-2, src);        vec_u8 srcR2 = vec_ld(14, src);        switch (align) {        default: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = vec_perm(srcR1, srcR2, permP3);        } break;        case 11: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = srcR2;        } break;        case 12: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = srcR2;            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 13: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = srcR2;            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 14: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = srcR2;            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 15: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = srcR2;            srcP0 = vec_perm(srcR2, srcR3, permP0);            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        }        srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);        srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);        srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);        srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);        srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);        srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);        srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);        srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);        srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);        srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);        srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,


示例30: PREFIX_h264_qpel16_hv_lowpass_altivec

static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {    register int i;    LOAD_ZERO;    const vec_u8 permM2 = vec_lvsl(-2, src);    const vec_u8 permM1 = vec_lvsl(-1, src);    const vec_u8 permP0 = vec_lvsl(+0, src);    const vec_u8 permP1 = vec_lvsl(+1, src);    const vec_u8 permP2 = vec_lvsl(+2, src);    const vec_u8 permP3 = vec_lvsl(+3, src);    const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));    const vec_u32 v10ui = vec_splat_u32(10);    const vec_s16 v5ss = vec_splat_s16(5);    const vec_s16 v1ss = vec_splat_s16(1);    const vec_s32 v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));    const vec_u32 v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));    register int align = ((((unsigned long)src) - 2) % 16);    vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,              srcP2A, srcP2B, srcP3A, srcP3B,              srcM1A, srcM1B, srcM2A, srcM2B,              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,              pp1A, pp1B, pp2A, pp2B, psumA, psumB;    const vec_u8 mperm = (const vec_u8)        {0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,         0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F};    int16_t *tmpbis = tmp;    vec_s16 tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,              tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,              tmpP2ssA, tmpP2ssB;    vec_s32 pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,              pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,              pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,              ssumAe, ssumAo, ssumBe, ssumBo;    vec_u8 fsum, sumv, sum;    vec_s16 ssume, ssumo;    src -= (2 * srcStride);    for (i = 0 ; i < 21 ; i ++) {        vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;        vec_u8 srcR1 = vec_ld(-2, src);        vec_u8 srcR2 = vec_ld(14, src);        switch (align) {        default: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = vec_perm(srcR1, srcR2, permP3);        } break;        case 11: {            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = vec_perm(srcR1, srcR2, permP2);            srcP3 = srcR2;        } break;        case 12: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = vec_perm(srcR1, srcR2, permP1);            srcP2 = srcR2;            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 13: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = vec_perm(srcR1, srcR2, permP0);            srcP1 = srcR2;            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 14: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = vec_perm(srcR1, srcR2, permM1);            srcP0 = srcR2;            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        case 15: {            vec_u8 srcR3 = vec_ld(30, src);            srcM2 = vec_perm(srcR1, srcR2, permM2);            srcM1 = srcR2;            srcP0 = vec_perm(srcR2, srcR3, permP0);            srcP1 = vec_perm(srcR2, srcR3, permP1);            srcP2 = vec_perm(srcR2, srcR3, permP2);            srcP3 = vec_perm(srcR2, srcR3, permP3);        } break;        }//.........这里部分代码省略.........
开发者ID:AVbin,项目名称:libav,代码行数:101,



注:本文中的vec_splat_u16函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vec_splat_u32函数代码示例
C++ vec_sl函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。