您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vec_lvsl函数代码示例

51自学网 2021-06-03 09:36:04
  C++
这篇教程C++ vec_lvsl函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vec_lvsl函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_lvsl函数的具体用法?C++ vec_lvsl怎么用?C++ vec_lvsl使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_lvsl函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: ff_idct_add_altivec

void ff_idct_add_altivec(uint8_t* dest, int stride, int16_t *blk){    vec_s16 *block = (vec_s16*)blk;    vec_u8 tmp;    vec_s16 tmp2, tmp3;    vec_u8 perm0;    vec_u8 perm1;    vec_u8 p0, p1, p;    IDCT    p0 = vec_lvsl (0, dest);    p1 = vec_lvsl (stride, dest);    p = vec_splat_u8 (-1);    perm0 = vec_mergeh (p, p0);    perm1 = vec_mergeh (p, p1);#define ADD(dest,src,perm)                                              /    /* *(uint64_t *)&tmp = *(uint64_t *)dest; */                        /    tmp = vec_ld (0, dest);                                             /    tmp2 = (vec_s16)vec_perm (tmp, (vec_u8)zero, perm);       /    tmp3 = vec_adds (tmp2, src);                                        /    tmp = vec_packsu (tmp3, tmp3);                                      /    vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest);               /    vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest);    ADD (dest, vx0, perm0)      dest += stride;    ADD (dest, vx1, perm1)      dest += stride;    ADD (dest, vx2, perm0)      dest += stride;    ADD (dest, vx3, perm1)      dest += stride;    ADD (dest, vx4, perm0)      dest += stride;    ADD (dest, vx5, perm1)      dest += stride;    ADD (dest, vx6, perm0)      dest += stride;    ADD (dest, vx7, perm1)}
开发者ID:9aa5,项目名称:FFmpeg,代码行数:35,


示例2: pix_abs16x16_y2_altivec

int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);    vector unsigned char *tv;    vector unsigned char pix1v, pix2v, pix3v, avgv, t5;    vector unsigned int sad;    vector signed int sumdiffs;    uint8_t *pix3 = pix2 + line_size;    s = 0;    sad = (vector unsigned int)vec_splat_u32(0);    /*       Due to the fact that pix3 = pix2 + line_size, the pix3 of one       iteration becomes pix2 in the next iteration. We can use this       fact to avoid a potentially expensive unaligned read, each       time around the loop.       Read unaligned pixels into our vectors. The vectors are as follows:       pix2v: pix2[0]-pix2[15]       Split the pixel vectors into shorts    */    tv = (vector unsigned char *) &pix2[0];    pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));        for(i=0;i<16;i++) {        /*           Read unaligned pixels into our vectors. The vectors are as follows:           pix1v: pix1[0]-pix1[15]           pix3v: pix3[0]-pix3[15]        */        tv = (vector unsigned char *) pix1;        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));        tv = (vector unsigned char *) &pix3[0];        pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));        /* Calculate the average vector */        avgv = vec_avg(pix2v, pix3v);        /* Calculate a sum of abs differences vector */        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);                pix1 += line_size;        pix2v = pix3v;        pix3 += line_size;            }        /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;    }
开发者ID:KoetseJ,项目名称:xumo,代码行数:59,


示例3: put_no_rnd_pixels8_xy2_altivec

/* next one assumes that ((line_size % 8) == 0) */static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){    register int i;    register vector unsigned char pixelsv1, pixelsv2, pixelsavg;    register vector unsigned char blockv, temp1, temp2;    register vector unsigned short pixelssum1, pixelssum2, temp3;    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);    register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);    temp1 = vec_ld(0, pixels);    temp2 = vec_ld(16, pixels);    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {        pixelsv2 = temp2;    } else {        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));    }    pixelsv1 = vec_mergeh(vczero, pixelsv1);    pixelsv2 = vec_mergeh(vczero, pixelsv2);    pixelssum1 = vec_add((vector unsigned short)pixelsv1,                         (vector unsigned short)pixelsv2);    pixelssum1 = vec_add(pixelssum1, vcone);    for (i = 0; i < h ; i++) {        int rightside = ((unsigned long)block & 0x0000000F);        blockv = vec_ld(0, block);        temp1 = vec_ld(line_size, pixels);        temp2 = vec_ld(line_size + 16, pixels);        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {            pixelsv2 = temp2;        } else {            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));        }        pixelsv1 = vec_mergeh(vczero, pixelsv1);        pixelsv2 = vec_mergeh(vczero, pixelsv2);        pixelssum2 = vec_add((vector unsigned short)pixelsv1,                             (vector unsigned short)pixelsv2);        temp3 = vec_add(pixelssum1, pixelssum2);        temp3 = vec_sra(temp3, vctwo);        pixelssum1 = vec_add(pixelssum2, vcone);        pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);        if (rightside) {            blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));        } else {            blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));        }        vec_st(blockv, 0, block);        block += line_size;        pixels += line_size;    }}
开发者ID:AVLeo,项目名称:libav,代码行数:59,


示例4: sad16bi_altivec_c

uint32_tsad16bi_altivec_c(vector unsigned char *cur,                        vector unsigned char *ref1,                        vector unsigned char *ref2,                        uint32_t stride){    vector unsigned char t1, t2;    vector unsigned char mask1, mask2;    vector unsigned char sad;    vector unsigned int sum;    uint32_t result;    #ifdef DEBUG    /* print alignment errors if this is on */    if((long)cur & 0xf)        fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lx/n", (long)cur);    if(stride & 0xf)        fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lu/n", stride);#endif        /* Initialisation stuff */    stride >>= 4;    mask1 = vec_lvsl(0, (unsigned char*)ref1);    mask2 = vec_lvsl(0, (unsigned char*)ref2);    sad = vec_splat_u8(0);    sum = (vector unsigned int)sad;        SAD16BI();    SAD16BI();    SAD16BI();    SAD16BI();        SAD16BI();    SAD16BI();    SAD16BI();    SAD16BI();        SAD16BI();    SAD16BI();    SAD16BI();    SAD16BI();        SAD16BI();    SAD16BI();    SAD16BI();    SAD16BI();        sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));    sum = vec_splat(sum, 3);    vec_ste(sum, 0, (uint32_t*)&result);        return result;}
开发者ID:Distrotech,项目名称:xvidcore,代码行数:53,


示例5: sse8_altivec

/** * Sum of Squared Errors for a 8x8 block. * AltiVec-enhanced. * It's the pix_abs8x8_altivec code above w/ squaring added. */int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;    vector unsigned char t1, t2, t3,t4, t5;    vector unsigned int sum;    vector signed int sumsqr;        sum = (vector unsigned int)vec_splat_u32(0);    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);        for(i=0;i<8;i++) {	/* Read potentially unaligned pixels into t1 and t2	   Since we're reading 16 pixels, and actually only want 8,	   mask out the last 8 pixels. The 0s don't change the sum. */        perm1 = vec_lvsl(0, pix1);        pix1v = (vector unsigned char *) pix1;        perm2 = vec_lvsl(0, pix2);        pix2v = (vector unsigned char *) pix2;        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);        /*          Since we want to use unsigned chars, we can take advantage          of the fact that abs(a-b)^2 = (a-b)^2.        */        	/* Calculate abs differences vector */         t3 = vec_max(t1, t2);        t4 = vec_min(t1, t2);        t5 = vec_sub(t3, t4);                /* Square the values and add them to our sum */        sum = vec_msum(t5, t5, sum);                pix1 += line_size;        pix2 += line_size;    }        /* Sum up the four partial sums, and put the result into s */    sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);    sumsqr = vec_splat(sumsqr, 3);    vec_ste(sumsqr, 0, &s);        return s;}
开发者ID:KoetseJ,项目名称:xumo,代码行数:55,


示例6: pix_norm1_altivec

int pix_norm1_altivec(uint8_t *pix, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char *tv;    vector unsigned char pixv;    vector unsigned int sv;    vector signed int sum;        sv = (vector unsigned int)vec_splat_u32(0);        s = 0;    for (i = 0; i < 16; i++) {        /* Read in the potentially unaligned pixels */        tv = (vector unsigned char *) pix;        pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));        /* Square the values, and add them to our sum */        sv = vec_msum(pixv, pixv, sv);        pix += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sum = vec_sums((vector signed int) sv, (vector signed int) zero);    sum = vec_splat(sum, 3);    vec_ste(sum, 0, &s);    return s;}
开发者ID:KoetseJ,项目名称:xumo,代码行数:30,


示例7: _twin_fbdev_vec_put_span

static void _twin_fbdev_vec_put_span (twin_coord_t    left,				      twin_coord_t    top,				      twin_coord_t    right,				      twin_argb32_t   *pixels,				      void     	      *closure){	twin_fbdev_t    	*tf = closure;	twin_coord_t    	width = right - left;	unsigned int		*dest;	vector unsigned char 	edgeperm;	vector unsigned char	src0v, src1v, srcv;	if (!tf->active || tf->fb_base == MAP_FAILED)		return;	dest = (unsigned int *)(tf->fb_ptr + top * tf->fb_fix.line_length);	dest += left;	while((((unsigned long)dest) & 0xf) && width--)		*(dest++) = *(pixels++);	edgeperm = vec_lvsl (0, pixels);	src0v = vec_ld (0, pixels);	while(width >= 4) {		src1v = vec_ld (16, pixels);		srcv = vec_perm (src0v, src1v, edgeperm);		vec_st ((vector unsigned int)srcv, 0, dest);		src0v = src1v;		dest += 4;		pixels += 4;		width -= 4;	}	while(width--)		*(dest++) = *(pixels++);}
开发者ID:freedesktop-unofficial-mirror,项目名称:twin,代码行数:35,


示例8: ff_vp3_idct_add_altivec

void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]){    LOAD_ZERO;    vec_u8 t, vdst;    vec_s16 vdst_16;    vec_u8 vdst_mask = vec_mergeh(vec_splat_u8(-1), vec_lvsl(0, dst));    IDCT_START    IDCT_1D(NOP, NOP)    TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);    IDCT_1D(ADD8, SHIFT4)#define ADD(a)/    vdst = vec_ld(0, dst);/    vdst_16 = (vec_s16)vec_perm(vdst, zero_u8v, vdst_mask);/    vdst_16 = vec_adds(a, vdst_16);/    t = vec_packsu(vdst_16, vdst_16);/    vec_ste((vec_u32)t, 0, (unsigned int *)dst);/    vec_ste((vec_u32)t, 4, (unsigned int *)dst);    ADD(b0)     dst += stride;    ADD(b1)     dst += stride;    ADD(b2)     dst += stride;    ADD(b3)     dst += stride;    ADD(b4)     dst += stride;    ADD(b5)     dst += stride;    ADD(b6)     dst += stride;    ADD(b7)}
开发者ID:9aa5,项目名称:FFmpeg,代码行数:30,


示例9: float_to_int16_altivec

static void float_to_int16_altivec(int16_t *dst, const float *src, long len){    int i;    vector signed short d0, d1, d;    vector unsigned char align;    if(((long)dst) & 15) //FIXME        for(i = 0; i < len - 7; i += 8)        {            d0 = vec_ld(0, dst + i);            d = float_to_int16_one_altivec(src + i);            d1 = vec_ld(15, dst + i);            d1 = vec_perm(d1, d0, vec_lvsl(0, dst + i));            align = vec_lvsr(0, dst + i);            d0 = vec_perm(d1, d, align);            d1 = vec_perm(d, d1, align);            vec_st(d0, 0, dst + i);            vec_st(d1, 15, dst + i);        }    else        for(i = 0; i < len - 7; i += 8)        {            d = float_to_int16_one_altivec(src + i);            vec_st(d, 0, dst + i);        }}
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:25,


示例10: x264_add4x4_idct_altivec

void x264_add4x4_idct_altivec( uint8_t *dst, int16_t dct[16] ){    vec_u16_t onev = vec_splat_u16(1);    dct[0] += 32; // rounding for the >>6 at the end    vec_s16_t s0, s1, s2, s3;    s0 = vec_ld( 0x00, dct );    s1 = vec_sld( s0, s0, 8 );    s2 = vec_ld( 0x10, dct );    s3 = vec_sld( s2, s2, 8 );    vec_s16_t d0, d1, d2, d3;    IDCT_1D_ALTIVEC( s0, s1, s2, s3, d0, d1, d2, d3 );    vec_s16_t tr0, tr1, tr2, tr3;    VEC_TRANSPOSE_4( d0, d1, d2, d3, tr0, tr1, tr2, tr3 );    vec_s16_t idct0, idct1, idct2, idct3;    IDCT_1D_ALTIVEC( tr0, tr1, tr2, tr3, idct0, idct1, idct2, idct3 );    vec_u8_t perm_ldv = vec_lvsl( 0, dst );    vec_u16_t sixv = vec_splat_u16(6);    LOAD_ZERO;    ALTIVEC_STORE4_SUM_CLIP( &dst[0*FDEC_STRIDE], idct0, perm_ldv );    ALTIVEC_STORE4_SUM_CLIP( &dst[1*FDEC_STRIDE], idct1, perm_ldv );    ALTIVEC_STORE4_SUM_CLIP( &dst[2*FDEC_STRIDE], idct2, perm_ldv );    ALTIVEC_STORE4_SUM_CLIP( &dst[3*FDEC_STRIDE], idct3, perm_ldv );}
开发者ID:0x0B501E7E,项目名称:x264,代码行数:32,


示例11: transfer_16to8copy_altivec_c

void transfer_16to8copy_altivec_c(uint8_t *dst,                            vector signed short *src,                            uint32_t stride){    register vector signed short s;    register vector unsigned char packed;    register vector unsigned char mask_stencil;    register vector unsigned char mask;    register vector unsigned char load_src_perm;    #ifdef DEBUG    /* if this is on, print alignment errors */    if(((unsigned long) dst) & 0x7)        fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, dst %lx/n", (long)dst);    if(stride & 0x7)        fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, stride %u/n", stride);#endif    /* Initialisation stuff */    load_src_perm = vec_lvsl(0, (unsigned char*)src);    mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));        COPY16TO8();    COPY16TO8();    COPY16TO8();    COPY16TO8();        COPY16TO8();    COPY16TO8();    COPY16TO8();    COPY16TO8();}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:31,


示例12: pix_norm1_altivec

static int pix_norm1_altivec(uint8_t *pix, int line_size){    int i, s = 0;    const vector unsigned int zero =        (const vector unsigned int) vec_splat_u32(0);    vector unsigned char perm = vec_lvsl(0, pix);    vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);    vector signed int sum;    for (i = 0; i < 16; i++) {        /* Read the potentially unaligned pixels. */        vector unsigned char pixl = vec_ld(0,  pix);        vector unsigned char pixr = vec_ld(15, pix);        vector unsigned char pixv = vec_perm(pixl, pixr, perm);        /* Square the values, and add them to our sum. */        sv = vec_msum(pixv, pixv, sv);        pix += line_size;    }    /* Sum up the four partial sums, and put the result into s. */    sum = vec_sums((vector signed int) sv, (vector signed int) zero);    sum = vec_splat(sum, 3);    vec_ste(sum, 0, &s);    return s;}
开发者ID:26mansi,项目名称:FFmpeg,代码行数:27,


示例13: float_to_int16_altivec

void float_to_int16_altivec(int16_t *dst, const float *src, int len){    int i;    vector float s0, s1;    vector signed int t0, t1;    vector signed short d0, d1, d;    vector unsigned char align;    if(((long)dst)&15) //FIXME    for(i=0; i<len-7; i+=8) {        s0 = vec_ld(0, src+i);        s1 = vec_ld(16, src+i);        t0 = vec_cts(s0, 0);        d0 = vec_ld(0, dst+i);        t1 = vec_cts(s1, 0);        d1 = vec_ld(15, dst+i);        d = vec_packs(t0,t1);        d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));        align = vec_lvsr(0, dst+i);        d0 = vec_perm(d1, d, align);        d1 = vec_perm(d, d1, align);        vec_st(d0, 0, dst+i);        vec_st(d1,15, dst+i);    }    else    for(i=0; i<len-7; i+=8) {        s0 = vec_ld(0, src+i);        s1 = vec_ld(16, src+i);        t0 = vec_cts(s0, 0);        t1 = vec_cts(s1, 0);        d = vec_packs(t0,t1);        vec_st(d, 0, dst+i);    }}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:33,


示例14: scalarproduct_and_madd_int16_altivec

static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul){    LOAD_ZERO;    vec_s16 *pv1 = (vec_s16*)v1;    vec_s16 *pv2 = (vec_s16*)v2;    vec_s16 *pv3 = (vec_s16*)v3;    register vec_s16 muls = {mul,mul,mul,mul,mul,mul,mul,mul};    register vec_s16 t0, t1, i0, i1;    register vec_s16 i2 = pv2[0], i3 = pv3[0];    register vec_s32 res = zero_s32v;    register vec_u8 align = vec_lvsl(0, v2);    int32_t ires;    order >>= 4;    do {        t0 = vec_perm(i2, pv2[1], align);        i2 = pv2[2];        t1 = vec_perm(pv2[1], i2, align);        i0 = pv1[0];        i1 = pv1[1];        res = vec_msum(t0, i0, res);        res = vec_msum(t1, i1, res);        t0 = vec_perm(i3, pv3[1], align);        i3 = pv3[2];        t1 = vec_perm(pv3[1], i3, align);        pv1[0] = vec_mladd(t0, muls, i0);        pv1[1] = vec_mladd(t1, muls, i1);        pv1 += 2;        pv2 += 2;        pv3 += 2;    } while(--order);    res = vec_splat(vec_sums(res, zero_s32v), 3);    vec_ste(res, 0, &ires);    return ires;}
开发者ID:10045125,项目名称:xuggle-xuggler,代码行数:34,


示例15: pix_sum_altivec

static int pix_sum_altivec(uint8_t * pix, int line_size){    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm = vec_lvsl(0, pix);    vector unsigned char t1;    vector unsigned int sad;    vector signed int sumdiffs;    int i;    int s;    sad = (vector unsigned int)vec_splat_u32(0);    for (i = 0; i < 16; i++) {        /* Read the potentially unaligned 16 pixels into t1 */        vector unsigned char pixl = vec_ld( 0, pix);        vector unsigned char pixr = vec_ld(15, pix);        t1 = vec_perm(pixl, pixr, perm);        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t1, sad);        pix += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}
开发者ID:0xFFeng,项目名称:ffmpeg,代码行数:32,


示例16: scalarproduct_int16_altivec

static int32_t scalarproduct_int16_altivec(const int16_t * v1, const int16_t * v2, int order, const int shift){    int i;    LOAD_ZERO;    register vec_s16 vec1, *pv;    register vec_s32 res = vec_splat_s32(0), t;    register vec_u32 shifts;    int32_t ires;    shifts = zero_u32v;    if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1)));    if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08));    if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04));    if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02));    if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01));    for(i = 0; i < order; i += 8){        pv = (vec_s16*)v1;        vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);        t = vec_sr(t, shifts);        res = vec_sums(t, res);        v1 += 8;        v2 += 8;    }    res = vec_splat(res, 3);    vec_ste(res, 0, &ires);    return ires;}
开发者ID:10045125,项目名称:xuggle-xuggler,代码行数:29,


示例17: avg_pixels8_altivec

/* next one assumes that ((line_size % 8) == 0) */static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h){    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;    int i;   for (i = 0; i < h; i++) {       /* block is 8 bytes-aligned, so we're either in the          left block (16 bytes-aligned) or in the right block (not) */       int rightside = ((unsigned long)block & 0x0000000F);       blockv = vec_ld(0, block);       pixelsv1 = vec_ld( 0, pixels);       pixelsv2 = vec_ld(16, pixels);       pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));       if (rightside) {           pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1));       } else {           pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3));       }       blockv = vec_avg(blockv, pixelsv);       vec_st(blockv, 0, block);       pixels += line_size;       block += line_size;   }}
开发者ID:AVLeo,项目名称:libav,代码行数:30,


示例18: put_vp8_epel_h_altivec_core

static av_always_inlinevoid put_vp8_epel_h_altivec_core(uint8_t *dst, ptrdiff_t dst_stride,                                 uint8_t *src, ptrdiff_t src_stride,                                 int h, int mx, int w, int is6tap){    LOAD_H_SUBPEL_FILTER(mx-1);    vec_u8 align_vec0, align_vec8, permh0, permh8, filt;    vec_u8 perm_6tap0, perm_6tap8, perml0, perml8;    vec_u8 a, b, pixh, pixl, outer;    vec_s16 f16h, f16l;    vec_s32 filth, filtl;    vec_u8 perm_inner6 = { 1,2,3,4, 2,3,4,5, 3,4,5,6, 4,5,6,7 };    vec_u8 perm_inner4 = { 0,1,2,3, 1,2,3,4, 2,3,4,5, 3,4,5,6 };    vec_u8 perm_inner  = is6tap ? perm_inner6 : perm_inner4;    vec_u8 perm_outer = { 4,9, 0,5, 5,10, 1,6, 6,11, 2,7, 7,12, 3,8 };    vec_s32 c64 = vec_sl(vec_splat_s32(1), vec_splat_u32(6));    vec_u16 c7  = vec_splat_u16(7);    align_vec0 = vec_lvsl( -is6tap-1, src);    align_vec8 = vec_lvsl(8-is6tap-1, src);    permh0     = vec_perm(align_vec0, align_vec0, perm_inner);    permh8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_inner = vec_add(perm_inner, vec_splat_u8(4));    perml0     = vec_perm(align_vec0, align_vec0, perm_inner);    perml8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_6tap0 = vec_perm(align_vec0, align_vec0, perm_outer);    perm_6tap8 = vec_perm(align_vec8, align_vec8, perm_outer);    while (h --> 0) {        FILTER_H(f16h, 0);        if (w == 16) {            FILTER_H(f16l, 8);            filt = vec_packsu(f16h, f16l);            vec_st(filt, 0, dst);        } else {            filt = vec_packsu(f16h, f16h);            vec_ste((vec_u32)filt, 0, (uint32_t*)dst);            if (w == 8)                vec_ste((vec_u32)filt, 4, (uint32_t*)dst);        }        src += src_stride;        dst += dst_stride;    }}
开发者ID:Arcen,项目名称:libav,代码行数:47,


示例19: pix_abs16x16_x2_altivec

int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);    vector unsigned char *tv;    vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;    vector unsigned int sad;    vector signed int sumdiffs;    s = 0;    sad = (vector unsigned int)vec_splat_u32(0);    for(i=0;i<16;i++) {        /*           Read unaligned pixels into our vectors. The vectors are as follows:           pix1v: pix1[0]-pix1[15]           pix2v: pix2[0]-pix2[15]	pix2iv: pix2[1]-pix2[16]        */        tv = (vector unsigned char *) pix1;        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));                tv = (vector unsigned char *) &pix2[0];        pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));        tv = (vector unsigned char *) &pix2[1];        pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));        /* Calculate the average vector */        avgv = vec_avg(pix2v, pix2iv);        /* Calculate a sum of abs differences vector */        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);                pix1 += line_size;        pix2 += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}
开发者ID:KoetseJ,项目名称:xumo,代码行数:46,


示例20: pix_abs8x8_altivec

int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;    vector unsigned char t1, t2, t3,t4, t5;    vector unsigned int sad;    vector signed int sumdiffs;    sad = (vector unsigned int)vec_splat_u32(0);    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);    for(i=0;i<8;i++) {	/* Read potentially unaligned pixels into t1 and t2	   Since we're reading 16 pixels, and actually only want 8,	   mask out the last 8 pixels. The 0s don't change the sum. */        perm1 = vec_lvsl(0, pix1);        pix1v = (vector unsigned char *) pix1;        perm2 = vec_lvsl(0, pix2);        pix2v = (vector unsigned char *) pix2;        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);	/* Calculate a sum of abs differences vector */         t3 = vec_max(t1, t2);        t4 = vec_min(t1, t2);        t5 = vec_sub(t3, t4);	/* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);        pix1 += line_size;        pix2 += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}
开发者ID:KoetseJ,项目名称:xumo,代码行数:44,


示例21: float_to_int16_interleave_altivec

static voidfloat_to_int16_interleave_altivec(int16_t *dst, const float **src,                                  long len, int channels){    int i;    vector signed short d0, d1, d2, c0, c1, t0, t1;    vector unsigned char align;    if(channels == 1)        float_to_int16_altivec(dst, src[0], len);    else if (channels == 2)    {        if(((long)dst) & 15)            for(i = 0; i < len - 7; i += 8)            {                d0 = vec_ld(0, dst + i);                t0 = float_to_int16_one_altivec(src[0] + i);                d1 = vec_ld(31, dst + i);                t1 = float_to_int16_one_altivec(src[1] + i);                c0 = vec_mergeh(t0, t1);                c1 = vec_mergel(t0, t1);                d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i));                align = vec_lvsr(0, dst + i);                d0 = vec_perm(d2, c0, align);                d1 = vec_perm(c0, c1, align);                vec_st(d0,  0, dst + i);                d0 = vec_perm(c1, d2, align);                vec_st(d1, 15, dst + i);                vec_st(d0, 31, dst + i);                dst += 8;            }        else            for(i = 0; i < len - 7; i += 8)            {                t0 = float_to_int16_one_altivec(src[0] + i);                t1 = float_to_int16_one_altivec(src[1] + i);                d0 = vec_mergeh(t0, t1);                d1 = vec_mergel(t0, t1);                vec_st(d0,  0, dst + i);                vec_st(d1, 16, dst + i);                dst += 8;            }    }    else    {        DECLARE_ALIGNED(16, int16_t, tmp)[len];        int c, j;        for (c = 0; c < channels; c++)        {            float_to_int16_altivec(tmp, src[c], len);            for (i = 0, j = c; i < len; i++, j += channels)            {                dst[j] = tmp[i];            }        }    }}
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:56,


示例22: sad16_altivec_c

uint32_tsad16_altivec_c(vector unsigned char *cur,			  vector unsigned char *ref,			  uint32_t stride,			  const uint32_t best_sad){	vector unsigned char perm;	vector unsigned char t1, t2;	vector unsigned int sad;	vector unsigned int sumdiffs;	vector unsigned int best_vec;	uint32_t result;        #ifdef DEBUG        /* print alignment errors if DEBUG is on */	if (((unsigned long) cur) & 0xf)		fprintf(stderr, "sad16_altivec:incorrect align, cur: %lx/n", (long)cur);	if (stride & 0xf)		fprintf(stderr, "sad16_altivec:incorrect align, stride: %lu/n", stride);#endif	/* initialization */	sad = vec_splat_u32(0);	sumdiffs = sad;	stride >>= 4;	perm = vec_lvsl(0, (unsigned char *) ref);	*((uint32_t*)&best_vec) = best_sad;	best_vec = vec_splat(best_vec, 0);	/* perform sum of differences between current and previous */	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();        	SAD16();	SAD16();	SAD16();	SAD16();  bail:	/* copy vector sum into unaligned result */	sumdiffs = vec_splat(sumdiffs, 3);	vec_ste(sumdiffs, 0, (uint32_t*) &result);	return result;}
开发者ID:Distrotech,项目名称:xvidcore,代码行数:56,


示例23: reg_sad_altivec

static unsigned reg_sad_altivec(const kvz_pixel * const data1, const kvz_pixel * const data2,                        const int width, const int height, const unsigned stride1, const unsigned stride2){  vector unsigned int vsad = {0,0,0,0}, vzero = {0,0,0,0};   vector signed int sumdiffs;  int tmpsad, sad = 0;    int y, x;    for (y = 0; y < height; ++y) {    vector unsigned char perm1, perm2;        perm1 = vec_lvsl(0, &data1[y * stride1]);    perm2 = vec_lvsl(0, &data2[y * stride2]);        for (x = 0; x <= width-16; x+=16) {      vector unsigned char t1, t2, t3, t4, t5;      vector unsigned char *current, *previous;            current = (vector unsigned char *) &data1[y * stride1 + x];      previous = (vector unsigned char *) &data2[y * stride2 + x];            t1  = vec_perm(current[0], current[1], perm1 );  /* align current vector  */       t2  = vec_perm(previous[0], previous[1], perm2 );/* align previous vector */       t3  = vec_max(t1, t2 );      /* find largest of two           */       t4  = vec_min(t1, t2 );      /* find smaller of two           */       t5  = vec_sub(t3, t4);       /* find absolute difference      */       vsad = vec_sum4s(t5, vsad);    /* accumulate sum of differences */    }    for (; x < width; ++x) {      sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);    }  }    sumdiffs = vec_sums((vector signed int) vsad, (vector signed int) vzero);  /* copy vector sum into unaligned result */  sumdiffs = vec_splat( sumdiffs, 3);  vec_ste( sumdiffs, 0, &tmpsad );  sad += tmpsad;    return sad;}
开发者ID:Arizer,项目名称:kvazaar,代码行数:43,


示例24: avg_pixels16_l2_altivec

static inline void avg_pixels16_l2_altivec( uint8_t *dst, const uint8_t *src1,        const uint8_t *src2, int dst_stride,        int src_stride1, int h){    int i;    vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;    mask_ = vec_lvsl(0, src2);    for (i = 0; i < h; i++)    {        tmp1 = vec_ld(i * src_stride1, src1);        mask = vec_lvsl(i * src_stride1, src1);        tmp2 = vec_ld(i * src_stride1 + 15, src1);        a = vec_perm(tmp1, tmp2, mask);        tmp1 = vec_ld(i * 16, src2);        tmp2 = vec_ld(i * 16 + 15, src2);        b = vec_perm(tmp1, tmp2, mask_);        tmp1 = vec_ld(0, dst);        mask = vec_lvsl(0, dst);        tmp2 = vec_ld(15, dst);        d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));        edges = vec_perm(tmp2, tmp1, mask);        align = vec_lvsr(0, dst);        tmp2 = vec_perm(d, edges, align);        tmp1 = vec_perm(edges, d, align);        vec_st(tmp2, 15, dst);        vec_st(tmp1, 0 , dst);        dst += dst_stride;    }}
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:42,


示例25: yuv2planeX_16_altivec

static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize,                                  const int16_t **src, uint8_t *dest,                                  const uint8_t *dither, int offset, int x){    register int i, j;    DECLARE_ALIGNED(16, int, val)[16];    vector signed int vo1, vo2, vo3, vo4;    vector unsigned short vs1, vs2;    vector unsigned char vf;    vector unsigned int altivec_vectorShiftInt19 =        vec_add(vec_splat_u32(10), vec_splat_u32(9));    for (i = 0; i < 16; i++)        val[i] = dither[(x + i + offset) & 7] << 12;    vo1 = vec_ld(0,  val);    vo2 = vec_ld(16, val);    vo3 = vec_ld(32, val);    vo4 = vec_ld(48, val);    for (j = 0; j < filterSize; j++) {        vector signed short l1, vLumFilter = vec_ld(j << 1, filter);        vector unsigned char perm, perm0 = vec_lvsl(j << 1, filter);        vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);        vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter        perm = vec_lvsl(x << 1, src[j]);        l1   = vec_ld(x << 1, src[j]);        yuv2planeX_8(vo1, vo2, l1, src[j], x,     perm, vLumFilter);        yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);    }    vo1 = vec_sra(vo1, altivec_vectorShiftInt19);    vo2 = vec_sra(vo2, altivec_vectorShiftInt19);    vo3 = vec_sra(vo3, altivec_vectorShiftInt19);    vo4 = vec_sra(vo4, altivec_vectorShiftInt19);    vs1 = vec_packsu(vo1, vo2);    vs2 = vec_packsu(vo3, vo4);    vf  = vec_packsu(vs1, vs2);    vec_st(vf, 0, dest);}
开发者ID:1c0n,项目名称:xbmc,代码行数:42,


示例26: idct_add_altivec

void idct_add_altivec(uint8_t* dest, int stride, int16_t *blk){POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);    vec_s16 *block = (vec_s16*)blk;    vec_u8 tmp;    vec_s16 tmp2, tmp3;    vec_u8 perm0;    vec_u8 perm1;    vec_u8 p0, p1, p;#if CONFIG_POWERPC_PERFPOWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);#endif    IDCT    p0 = vec_lvsl (0, dest);    p1 = vec_lvsl (stride, dest);    p = vec_splat_u8 (-1);    perm0 = vec_mergeh (p, p0);    perm1 = vec_mergeh (p, p1);#define ADD(dest,src,perm)                                              /    /* *(uint64_t *)&tmp = *(uint64_t *)dest; */                        /    tmp = vec_ld (0, dest);                                             /    tmp2 = (vec_s16)vec_perm (tmp, (vec_u8)zero, perm);       /    tmp3 = vec_adds (tmp2, src);                                        /    tmp = vec_packsu (tmp3, tmp3);                                      /    vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest);               /    vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest);    ADD (dest, vx0, perm0)      dest += stride;    ADD (dest, vx1, perm1)      dest += stride;    ADD (dest, vx2, perm0)      dest += stride;    ADD (dest, vx3, perm1)      dest += stride;    ADD (dest, vx4, perm0)      dest += stride;    ADD (dest, vx5, perm1)      dest += stride;    ADD (dest, vx6, perm0)      dest += stride;    ADD (dest, vx7, perm1)POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);}
开发者ID:WangCrystal,项目名称:FFplayer,代码行数:42,


示例27: pix_abs16x16_altivec

int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm1, perm2, *pix1v, *pix2v;    vector unsigned char t1, t2, t3,t4, t5;    vector unsigned int sad;    vector signed int sumdiffs;        sad = (vector unsigned int)vec_splat_u32(0);    for(i=0;i<16;i++) {	/* Read potentially unaligned pixels into t1 and t2 */        perm1 = vec_lvsl(0, pix1);        pix1v = (vector unsigned char *) pix1;        perm2 = vec_lvsl(0, pix2);        pix2v = (vector unsigned char *) pix2;        t1 = vec_perm(pix1v[0], pix1v[1], perm1);        t2 = vec_perm(pix2v[0], pix2v[1], perm2);       	/* Calculate a sum of abs differences vector */         t3 = vec_max(t1, t2);        t4 = vec_min(t1, t2);        t5 = vec_sub(t3, t4);		/* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);        pix1 += line_size;        pix2 += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);        return s;}
开发者ID:KoetseJ,项目名称:xumo,代码行数:41,


示例28: test

static void test() {  vector unsigned char expected = {4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};  check (vec_all_eq(vec_lvsl(0, &sc[4]), expected), "sc");  check (vec_all_eq(vec_lvsl(0, &uc[4]), expected), "uc");  check (vec_all_eq(vec_lvsl(0, &ss[2]), expected), "ss");  check (vec_all_eq(vec_lvsl(0, &us[2]), expected), "us");  check (vec_all_eq(vec_lvsl(0, &si[1]), expected), "si");  check (vec_all_eq(vec_lvsl(0, &ui[1]), expected), "ui");  check (vec_all_eq(vec_lvsl(0, & f[1]), expected), "f");}
开发者ID:Artem-B,项目名称:test-suite,代码行数:10,


示例29: LoadUnaligned

/* Load a vector from an unaligned location in memory */static inline vector unsigned charLoadUnaligned(const guchar *v){  if ((long)v & 0x0f)    {      vector unsigned char permuteVector = vec_lvsl(0, v);      vector unsigned char low = vec_ld(0, v);      vector unsigned char high = vec_ld(16, v);      return vec_perm(low, high, permuteVector);    }  else    return vec_ld(0, v); /* don't want overflow */}
开发者ID:1ynx,项目名称:gimp,代码行数:14,


示例30: sub_int16_altivec

static void sub_int16_altivec(int16_t * v1, int16_t * v2, int order){    int i;    register vec_s16_t vec, *pv;    for(i = 0; i < order; i += 8){        pv = (vec_s16_t*)v2;        vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));        vec_st(vec_sub(vec_ld(0, v1), vec), 0, v1);        v1 += 8;        v2 += 8;    }}
开发者ID:Haaaaaank,项目名称:avbin,代码行数:13,



注:本文中的vec_lvsl函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vec_lvsr函数代码示例
C++ vec_length函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。