您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vec_perm函数代码示例

51自学网 2021-06-03 09:36:22
  C++
这篇教程C++ vec_perm函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vec_perm函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_perm函数的具体用法?C++ vec_perm怎么用?C++ vec_perm使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_perm函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: FUNC

static void FUNC(ff_hevc_idct_4x4, BIT_DEPTH)(int16_t *coeffs, int col_limit){    const int shift = 7;    const int shift2 = 20 - BIT_DEPTH;    vec_s16 src_01, src_23;    vec_s32 res[4];    vec_s16 res_packed[2];    src_01 = vec_ld(0, coeffs);    src_23 = vec_ld(16, coeffs);    transform4x4(src_01, src_23, res, shift, coeffs);    src_01 = vec_packs(res[0], res[1]);    src_23 = vec_packs(res[2], res[3]);    scale(res, res_packed, shift);    // transpose    src_01 = vec_perm(res_packed[0], res_packed[1], mask[0]);    src_23 = vec_perm(res_packed[0], res_packed[1], mask[1]);    transform4x4(src_01, src_23, res, shift2, coeffs);    scale(res, res_packed, shift2);    // transpose    src_01 = vec_perm(res_packed[0], res_packed[1], mask[0]);    src_23 = vec_perm(res_packed[0], res_packed[1], mask[1]);    vec_st(src_01, 0, coeffs);    vec_st(src_23, 16, coeffs);}
开发者ID:DeHackEd,项目名称:FFmpeg,代码行数:28,


示例2: avg_pixels8_altivec

/* next one assumes that ((line_size % 8) == 0) */static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h){    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;    int i;   for (i = 0; i < h; i++) {       /* block is 8 bytes-aligned, so we're either in the          left block (16 bytes-aligned) or in the right block (not) */       int rightside = ((unsigned long)block & 0x0000000F);       blockv = vec_ld(0, block);       pixelsv1 = vec_ld( 0, pixels);       pixelsv2 = vec_ld(16, pixels);       pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));       if (rightside) {           pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1));       } else {           pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3));       }       blockv = vec_avg(blockv, pixelsv);       vec_st(blockv, 0, block);       pixels += line_size;       block += line_size;   }}
开发者ID:AVLeo,项目名称:libav,代码行数:30,


示例3: do_recursion

static inline void do_recursion(w128_t *r, w128_t *a, w128_t * b,                                w128_t * lung) {  const vector unsigned char sl1 = ALTI_SL1;  const vector unsigned char sl1_perm = ALTI_SL1_PERM;  const vector unsigned int sl1_msk = ALTI_SL1_MSK;  const vector unsigned char sr1 = ALTI_SR;  const vector unsigned char sr1_perm = ALTI_SR_PERM;  const vector unsigned int sr1_msk = ALTI_SR_MSK;  const vector unsigned char perm = ALTI_PERM;  const vector unsigned int msk1 = ALTI_MSK;  vector unsigned int z = a->s;  vector unsigned int w = lung->s;  vector unsigned int x = vec_perm(w, (vector unsigned int)perm, perm);  vector unsigned int y = vec_perm(z, (vector unsigned int)sl1_perm, sl1_perm);  y = vec_sll(y, sl1);  y = vec_and(y, sl1_msk);  w = vec_xor(x, b->s);  w = vec_xor(w, y);  x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm);  x = vec_srl(x, sr1);  x = vec_and(x, sr1_msk);  y = vec_and(w, msk1);  z = vec_xor(z, y);  r->s = vec_xor(z, x);  lung->s = w;}
开发者ID:bencalderhead,项目名称:General_MCMC,代码行数:27,


示例4: scalarproduct_and_madd_int16_altivec

static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul){    LOAD_ZERO;    vec_s16 *pv1 = (vec_s16*)v1;    vec_s16 *pv2 = (vec_s16*)v2;    vec_s16 *pv3 = (vec_s16*)v3;    register vec_s16 muls = {mul,mul,mul,mul,mul,mul,mul,mul};    register vec_s16 t0, t1, i0, i1;    register vec_s16 i2 = pv2[0], i3 = pv3[0];    register vec_s32 res = zero_s32v;    register vec_u8 align = vec_lvsl(0, v2);    int32_t ires;    order >>= 4;    do {        t0 = vec_perm(i2, pv2[1], align);        i2 = pv2[2];        t1 = vec_perm(pv2[1], i2, align);        i0 = pv1[0];        i1 = pv1[1];        res = vec_msum(t0, i0, res);        res = vec_msum(t1, i1, res);        t0 = vec_perm(i3, pv3[1], align);        i3 = pv3[2];        t1 = vec_perm(pv3[1], i3, align);        pv1[0] = vec_mladd(t0, muls, i0);        pv1[1] = vec_mladd(t1, muls, i1);        pv1 += 2;        pv2 += 2;        pv3 += 2;    } while(--order);    res = vec_splat(vec_sums(res, zero_s32v), 3);    vec_ste(res, 0, &ires);    return ires;}
开发者ID:10045125,项目名称:xuggle-xuggler,代码行数:34,


示例5: StoreUnaligned

/* Store a vector to an unaligned location in memory */static inline voidStoreUnaligned (vector unsigned char v,                 const guchar *where){  if ((unsigned long)where & 0x0f)    {      /* Load the surrounding area */      vector unsigned char low = vec_ld(0, where);      vector unsigned char high = vec_ld(16, where);      /* Prepare the constants that we need */      vector unsigned char permuteVector = vec_lvsr(0, where);      vector signed char oxFF = vec_splat_s8(-1);      vector signed char ox00 = vec_splat_s8(0);      /* Make a mask for which parts of the vectors to swap out */      vector unsigned char mask = (vector unsigned char)vec_perm(ox00, oxFF, permuteVector);      v = vec_perm(v, v, permuteVector);      /* Insert our data into the low and high vectors */      low = vec_sel(low, v, mask);      high = vec_sel(v, high, mask);      /* Store the two aligned result vectors */      vec_st(low, 0, CONST_BUFFER(where));      vec_st(high, 16, CONST_BUFFER(where));    }  else    { /* prevent overflow */      vec_st(v, 0, CONST_BUFFER(where));    }}
开发者ID:1ynx,项目名称:gimp,代码行数:29,


示例6: float_to_int16_altivec

static void float_to_int16_altivec(int16_t *dst, const float *src, long len){    int i;    vector signed short d0, d1, d;    vector unsigned char align;    if(((long)dst) & 15) //FIXME        for(i = 0; i < len - 7; i += 8)        {            d0 = vec_ld(0, dst + i);            d = float_to_int16_one_altivec(src + i);            d1 = vec_ld(15, dst + i);            d1 = vec_perm(d1, d0, vec_lvsl(0, dst + i));            align = vec_lvsr(0, dst + i);            d0 = vec_perm(d1, d, align);            d1 = vec_perm(d, d1, align);            vec_st(d0, 0, dst + i);            vec_st(d1, 15, dst + i);        }    else        for(i = 0; i < len - 7; i += 8)        {            d = float_to_int16_one_altivec(src + i);            vec_st(d, 0, dst + i);        }}
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:25,


示例7: float_to_int16_altivec

void float_to_int16_altivec(int16_t *dst, const float *src, int len){    int i;    vector float s0, s1;    vector signed int t0, t1;    vector signed short d0, d1, d;    vector unsigned char align;    if(((long)dst)&15) //FIXME    for(i=0; i<len-7; i+=8) {        s0 = vec_ld(0, src+i);        s1 = vec_ld(16, src+i);        t0 = vec_cts(s0, 0);        d0 = vec_ld(0, dst+i);        t1 = vec_cts(s1, 0);        d1 = vec_ld(15, dst+i);        d = vec_packs(t0,t1);        d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));        align = vec_lvsr(0, dst+i);        d0 = vec_perm(d1, d, align);        d1 = vec_perm(d, d1, align);        vec_st(d0, 0, dst+i);        vec_st(d1,15, dst+i);    }    else    for(i=0; i<len-7; i+=8) {        s0 = vec_ld(0, src+i);        s1 = vec_ld(16, src+i);        t0 = vec_cts(s0, 0);        t1 = vec_cts(s1, 0);        d = vec_packs(t0,t1);        vec_st(d, 0, dst+i);    }}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:33,


示例8: vector_fmul_window_altivec

static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len){    vector float zero, t0, t1, s0, s1, wi, wj;    const vector unsigned char reverse = vcprm(3,2,1,0);    int i,j;    dst += len;    win += len;    src0+= len;    zero = (vector float)vec_splat_u32(0);    for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {        s0 = vec_ld(i, src0);        s1 = vec_ld(j, src1);        wi = vec_ld(i, win);        wj = vec_ld(j, win);        s1 = vec_perm(s1, s1, reverse);        wj = vec_perm(wj, wj, reverse);        t0 = vec_madd(s0, wj, zero);        t0 = vec_nmsub(s1, wi, t0);        t1 = vec_madd(s0, wi, zero);        t1 = vec_madd(s1, wj, t1);        t1 = vec_perm(t1, t1, reverse);        vec_st(t0, i, dst);        vec_st(t1, j, dst);    }}
开发者ID:Arcen,项目名称:libav,代码行数:31,


示例9: pix_abs16x16_y2_altivec

int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);    vector unsigned char *tv;    vector unsigned char pix1v, pix2v, pix3v, avgv, t5;    vector unsigned int sad;    vector signed int sumdiffs;    uint8_t *pix3 = pix2 + line_size;    s = 0;    sad = (vector unsigned int)vec_splat_u32(0);    /*       Due to the fact that pix3 = pix2 + line_size, the pix3 of one       iteration becomes pix2 in the next iteration. We can use this       fact to avoid a potentially expensive unaligned read, each       time around the loop.       Read unaligned pixels into our vectors. The vectors are as follows:       pix2v: pix2[0]-pix2[15]       Split the pixel vectors into shorts    */    tv = (vector unsigned char *) &pix2[0];    pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));        for(i=0;i<16;i++) {        /*           Read unaligned pixels into our vectors. The vectors are as follows:           pix1v: pix1[0]-pix1[15]           pix3v: pix3[0]-pix3[15]        */        tv = (vector unsigned char *) pix1;        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));        tv = (vector unsigned char *) &pix3[0];        pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));        /* Calculate the average vector */        avgv = vec_avg(pix2v, pix3v);        /* Calculate a sum of abs differences vector */        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);                pix1 += line_size;        pix2v = pix3v;        pix3 += line_size;            }        /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;    }
开发者ID:KoetseJ,项目名称:xumo,代码行数:59,


示例10: gimp_composite_multiply_rgba8_rgba8_rgba8_altivec

voidgimp_composite_multiply_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx){  const guchar *A = ctx->A;  const guchar *B = ctx->B;  guchar *D = ctx->D;  guint length = ctx->n_pixels;  vector unsigned char a,b,d,alpha_a,alpha_b,alpha;  vector unsigned short al,ah;  while (length >= 4)    {      a=LoadUnaligned(A);      b=LoadUnaligned(B);      al=vec_mule(a,b);      al=vec_add(al,ox0080);      ah=vec_mulo(a,b);      ah=vec_add(ah,ox0080);      al=vec_add(al,vec_sr(al,ox0008));      ah=vec_add(ah,vec_sr(ah,ox0008));      d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);      alpha_a=vec_and(a, alphamask);      alpha_b=vec_and(b, alphamask);      alpha=vec_min(alpha_a, alpha_b);      d=vec_andc(d, alphamask);      d=vec_or(d, alpha);      StoreUnaligned(d, D);      A+=16;      B+=16;      D+=16;      length-=4;    }  /* process last pixels */  length = length*4;  a=LoadUnalignedLess(A, length);  b=LoadUnalignedLess(B, length);  al=vec_mule(a,b);  al=vec_add(al,ox0080);  ah=vec_mulo(a,b);  ah=vec_add(ah,ox0080);  al=vec_add(al,vec_sr(al,ox0008));  ah=vec_add(ah,vec_sr(ah,ox0008));  d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);  alpha_a=vec_and(a, alphamask);  alpha_b=vec_and(b, alphamask);  alpha=vec_min(alpha_a, alpha_b);  d=vec_andc(d, alphamask);  d=vec_or(d, alpha);  StoreUnalignedLess(d, D, length);}
开发者ID:1ynx,项目名称:gimp,代码行数:59,


示例11: DeinterleavedUv

        SIMD_INLINE void DeinterleavedUv(const Loader<align> & uv, Storer<align> & u, Storer<align> & v)        {            v128_u8 _uv0 = Load<align, first>(uv);            v128_u8 _uv1 = Load<align, false>(uv);            Store<align, first>(u, vec_perm(_uv0, _uv1, K8_PERM_U));            Store<align, first>(v, vec_perm(_uv0, _uv1, K8_PERM_V));        }
开发者ID:pozdneev,项目名称:Simd,代码行数:8,


示例12: InterleavedUv

        SIMD_INLINE void InterleavedUv(const Loader<align> & u, const Loader<align> & v, Storer<align> & uv)        {            v128_u8 _u = Load<align, first>(u);            v128_u8 _v = Load<align, first>(v);            Store<align, first>(uv, vec_perm(_u, _v, K8_PERM_UV0));            Store<align, false>(uv, vec_perm(_u, _v, K8_PERM_UV1));        }
开发者ID:4144,项目名称:Simd,代码行数:8,


示例13: abcd2cbad_double

void abcd2cbad_double( ILdouble *tdata, ILuint length, ILdouble *tnewdata ) {	register ILubyte *data = (ILubyte*)tdata;	register ILubyte *newdata = (ILubyte*)tnewdata;	const vector unsigned char p = (vector unsigned char)(0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F);	register vector unsigned char d0,d1,d2,d3,t0,t1,t2,t3;		length = eround16(length);		if( length >= 4 ) {		length -= 4;				d3 = vec_ld(48,data);		d2 = vec_ld(32,data);		d1 = vec_ld(16,data);		d0 = vec_ld(0,data);				while( length >= 4 ) {			t0 = vec_perm(d0,d1,p);			t1 = vec_perm(d1,d0,p);			t2 = vec_perm(d2,d3,p);			t3 = vec_perm(d3,d2,p);						vec_st(t0,0,newdata);			vec_st(t1,16,newdata);			vec_st(t2,32,newdata);			vec_st(t3,48,newdata);						length -= 4;			data += 16*4;			newdata += 16*4;						d3 = vec_ld(48,data);			d2 = vec_ld(32,data);			d1 = vec_ld(16,data);			d0 = vec_ld(0,data);		}		t0 = vec_perm(d0,d1,p);		t1 = vec_perm(d1,d0,p);		t2 = vec_perm(d2,d3,p);		t3 = vec_perm(d3,d2,p);				vec_st(d0,0,newdata);		vec_st(d1,16,newdata);		vec_st(d2,32,newdata);		vec_st(d3,48,newdata);	}		if( length == 2 ) {		d0 = vec_ld(0,data);		d1 = vec_ld(16,data);				t0 = vec_perm(d0,d1,p);		t1 = vec_perm(d1,d0,p);				vec_st(t0,0,newdata);		vec_st(t1,16,newdata);	}}
开发者ID:bazhenovc,项目名称:nebula3,代码行数:58,


示例14: float_to_int16_interleave_altivec

static voidfloat_to_int16_interleave_altivec(int16_t *dst, const float **src,                                  long len, int channels){    int i;    vector signed short d0, d1, d2, c0, c1, t0, t1;    vector unsigned char align;    if(channels == 1)        float_to_int16_altivec(dst, src[0], len);    else if (channels == 2)    {        if(((long)dst) & 15)            for(i = 0; i < len - 7; i += 8)            {                d0 = vec_ld(0, dst + i);                t0 = float_to_int16_one_altivec(src[0] + i);                d1 = vec_ld(31, dst + i);                t1 = float_to_int16_one_altivec(src[1] + i);                c0 = vec_mergeh(t0, t1);                c1 = vec_mergel(t0, t1);                d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i));                align = vec_lvsr(0, dst + i);                d0 = vec_perm(d2, c0, align);                d1 = vec_perm(c0, c1, align);                vec_st(d0,  0, dst + i);                d0 = vec_perm(c1, d2, align);                vec_st(d1, 15, dst + i);                vec_st(d0, 31, dst + i);                dst += 8;            }        else            for(i = 0; i < len - 7; i += 8)            {                t0 = float_to_int16_one_altivec(src[0] + i);                t1 = float_to_int16_one_altivec(src[1] + i);                d0 = vec_mergeh(t0, t1);                d1 = vec_mergel(t0, t1);                vec_st(d0,  0, dst + i);                vec_st(d1, 16, dst + i);                dst += 8;            }    }    else    {        DECLARE_ALIGNED(16, int16_t, tmp)[len];        int c, j;        for (c = 0; c < channels; c++)        {            float_to_int16_altivec(tmp, src[c], len);            for (i = 0, j = c; i < len; i++, j += channels)            {                dst[j] = tmp[i];            }        }    }}
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:56,


示例15: jsimd_h2v1_downsample_altivec

voidjsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,                               JDIMENSION v_samp_factor,                               JDIMENSION width_blocks,                               JSAMPARRAY input_data, JSAMPARRAY output_data){  int outrow, outcol;  JDIMENSION output_cols = width_blocks * DCTSIZE;  JSAMPROW inptr, outptr;  __vector unsigned char this0, next0, out;  __vector unsigned short this0e, this0o, next0e, next0o, outl, outh;  /* Constants */  __vector unsigned short pw_bias = { __4X2(0, 1) },    pw_one = { __8X(1) };  __vector unsigned char even_odd_index =    {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15},    pb_zero = { __16X(0) };  expand_right_edge(input_data, max_v_samp_factor, image_width,                    output_cols * 2);  for (outrow = 0; outrow < v_samp_factor; outrow++) {    outptr = output_data[outrow];    inptr = input_data[outrow];    for (outcol = output_cols; outcol > 0;         outcol -= 16, inptr += 32, outptr += 16) {      this0 = vec_ld(0, inptr);      this0 = vec_perm(this0, this0, even_odd_index);      this0e = (__vector unsigned short)VEC_UNPACKHU(this0);      this0o = (__vector unsigned short)VEC_UNPACKLU(this0);      outl = vec_add(this0e, this0o);      outl = vec_add(outl, pw_bias);      outl = vec_sr(outl, pw_one);      if (outcol > 8) {        next0 = vec_ld(16, inptr);        next0 = vec_perm(next0, next0, even_odd_index);        next0e = (__vector unsigned short)VEC_UNPACKHU(next0);        next0o = (__vector unsigned short)VEC_UNPACKLU(next0);        outh = vec_add(next0e, next0o);        outh = vec_add(outh, pw_bias);        outh = vec_sr(outh, pw_one);      } else        outh = vec_splat_u16(0);      out = vec_pack(outl, outh);      vec_st(out, 0, outptr);    }  }}
开发者ID:AntonioMA,项目名称:UVCCamera,代码行数:54,


示例16: v_store_interleave_f32

void v_store_interleave_f32(float *ptr, vector float a, vector float b, vector float c){    vector float hbc = vec_mergeh(b, c);    static const vector unsigned char ahbc = {0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 4, 5, 6, 7};    vec_xst(vec_perm(a, hbc, ahbc),  0, ptr);    vector float lab = vec_mergel(a, b);    vec_xst(vec_sld(lab, hbc, 8), 16, ptr);    static const vector unsigned char clab = {8, 9, 10, 11, 24, 25, 26, 27, 28, 29, 30, 31, 12, 13, 14, 15};    vec_xst(vec_perm(c, lab, clab), 32, ptr);}
开发者ID:MaxKellermann,项目名称:gcc,代码行数:13,


示例17: store_unaligned

/* Store a float vector to a potentially unaligned address */void store_unaligned(float *target, vector float src) {  vector float msq, lsq, edges;  vector unsigned char edgeAlign, align;  msq = vec_ld(0, target);               // most significant quadword  lsq = vec_ld(15, target);              // least significant quadword  edgeAlign = vec_lvsl(0, target);       // permute map to extract edges  edges = vec_perm(lsq, msq, edgeAlign); // extract the edges  align = vec_lvsr(0, target);           // permute map to misalign data  msq = vec_perm(edges, src, align);     // misalign the data (msq)  lsq = vec_perm(src, edges, align);     // misalign the data (lsq)  vec_st(lsq, 15, target);               // Store the lsq part first  vec_st(msq, 0, target);                // Store the msq part}
开发者ID:ysei,项目名称:freezer,代码行数:14,


示例18: Bgr48pToBgra32

        SIMD_INLINE void Bgr48pToBgra32(const uint8_t * blue, const uint8_t * green, const uint8_t * red, size_t offset,             const v128_u8 & alpha, Storer<align> & bgra)        {            const v128_u8 _blue = Load<align>(blue + offset);            const v128_u8 _green = Load<align>(green + offset);            const v128_u8 _red = Load<align>(red + offset);            v128_u16 bg = (v128_u16)vec_perm(_blue, _green, K8_PERM_48);            v128_u16 ra = (v128_u16)vec_perm(_red, alpha, K8_PERM_48);            Store<align, first>(bgra, (v128_u8)UnpackLoU16(ra, bg));            Store<align, false>(bgra, (v128_u8)UnpackHiU16(ra, bg));        }
开发者ID:nagyist,项目名称:Simd,代码行数:13,


示例19: unaligned_store

static void unaligned_store(vector signed short value, int off, int16_t *dst){    register vector unsigned char align = vec_lvsr(0, dst),                                  mask = vec_lvsl(0, dst);    register vector signed short t0,t1, edges;    t0 = vec_ld(0+off, dst);    t1 = vec_ld(15+off, dst);    edges = vec_perm(t1 ,t0, mask);    t1 = vec_perm(value, edges, align);    t0 = vec_perm(edges, value, align);    vec_st(t1, 15+off, dst);    vec_st(t0, 0+off, dst);}
开发者ID:dr4g0nsr,项目名称:mplayer-skyviia-8860,代码行数:14,


示例20: v_load_deinterleave_f32

void v_load_deinterleave_f32(float *ptr, vector float* a, vector float* b, vector float* c){    vector float v1 = vec_xl( 0, ptr);    vector float v2 = vec_xl(16, ptr);    vector float v3 = vec_xl(32, ptr);    static const vector unsigned char flp = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31};    *a = vec_perm(v1, vec_sld(v3, v2, 8), flp);    static const vector unsigned char flp2 = {28, 29, 30, 31, 0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19};    *b = vec_perm(v2, vec_sld(v1, v3, 8), flp2);    *c = vec_perm(vec_sld(v2, v1, 8), v3, flp);}
开发者ID:MaxKellermann,项目名称:gcc,代码行数:14,


示例21: sse8_altivec

/** * Sum of Squared Errors for a 8x8 block. * AltiVec-enhanced. * It's the pix_abs8x8_altivec code above w/ squaring added. */int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;    vector unsigned char t1, t2, t3,t4, t5;    vector unsigned int sum;    vector signed int sumsqr;        sum = (vector unsigned int)vec_splat_u32(0);    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);        for(i=0;i<8;i++) {	/* Read potentially unaligned pixels into t1 and t2	   Since we're reading 16 pixels, and actually only want 8,	   mask out the last 8 pixels. The 0s don't change the sum. */        perm1 = vec_lvsl(0, pix1);        pix1v = (vector unsigned char *) pix1;        perm2 = vec_lvsl(0, pix2);        pix2v = (vector unsigned char *) pix2;        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);        /*          Since we want to use unsigned chars, we can take advantage          of the fact that abs(a-b)^2 = (a-b)^2.        */        	/* Calculate abs differences vector */         t3 = vec_max(t1, t2);        t4 = vec_min(t1, t2);        t5 = vec_sub(t3, t4);                /* Square the values and add them to our sum */        sum = vec_msum(t5, t5, sum);                pix1 += line_size;        pix2 += line_size;    }        /* Sum up the four partial sums, and put the result into s */    sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);    sumsqr = vec_splat(sumsqr, 3);    vec_ste(sumsqr, 0, &s);        return s;}
开发者ID:KoetseJ,项目名称:xumo,代码行数:55,


示例22: h264_idct_dc_add_internal

static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size){    vec_s16 dc16;    vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;    vec_s32 v_dc32;    LOAD_ZERO;    DECLARE_ALIGNED(16, int, dc);    int i;    dc = (block[0] + 32) >> 6;    block[0] = 0;    v_dc32 = vec_lde(0, &dc);    dc16 = VEC_SPLAT16((vec_s16)v_dc32, 1);    if (size == 4)        dc16 = VEC_SLD16(dc16, zero_s16v, 8);    dcplus = vec_packsu(dc16, zero_s16v);    dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);    aligner = vec_lvsr(0, dst);#if !HAVE_BIGENDIAN    aligner = vec_perm(aligner, zero_u8v, vcswapc());#endif    dcplus = vec_perm(dcplus, dcplus, aligner);    dcminus = vec_perm(dcminus, dcminus, aligner);    for (i = 0; i < size; i += 4) {        v0 = vec_ld(0, dst+0*stride);        v1 = vec_ld(0, dst+1*stride);        v2 = vec_ld(0, dst+2*stride);        v3 = vec_ld(0, dst+3*stride);        v0 = vec_adds(v0, dcplus);        v1 = vec_adds(v1, dcplus);        v2 = vec_adds(v2, dcplus);        v3 = vec_adds(v3, dcplus);        v0 = vec_subs(v0, dcminus);        v1 = vec_subs(v1, dcminus);        v2 = vec_subs(v2, dcminus);        v3 = vec_subs(v3, dcminus);        vec_st(v0, 0, dst+0*stride);        vec_st(v1, 0, dst+1*stride);        vec_st(v2, 0, dst+2*stride);        vec_st(v3, 0, dst+3*stride);        dst += 4*stride;    }}
开发者ID:63n,项目名称:FFmpeg,代码行数:50,


示例23: abcd2cbad_internal

static inline void abcd2cbad_internal( register const vector unsigned char p, unsigned char *data, unsigned int length, unsigned char *newdata ) {	register vector unsigned char d0,d1,d2,z;	z = vec_splat_u8(0);		length = eround16(length);		if( length >= 3 ) {		length -= 3;				d2 = vec_ld(32,data);		d1 = vec_ld(16,data);		d0 = vec_ld(0,data);				while( length >= 3 ) {			d0 = vec_perm(d0,z,p);			d1 = vec_perm(d1,z,p);			d2 = vec_perm(d2,z,p);						vec_st(d0,0,newdata);			vec_st(d1,16,newdata);			vec_st(d2,32,newdata);						length -= 3;			data += 16*3;			newdata += 16*3;						d2 = vec_ld(32,data);			d1 = vec_ld(16,data);			d0 = vec_ld(0,data);		}		d0 = vec_perm(d0,z,p);		d1 = vec_perm(d1,z,p);		d2 = vec_perm(d2,z,p);				vec_st(d0,0,newdata);		vec_st(d1,16,newdata);		vec_st(d2,32,newdata);	}		if( length == 2 ) {		d0 = vec_ld(0,data);		d1 = vec_ld(16,data);				d0 = vec_perm(d0,z,p);		d1 = vec_perm(d1,z,p);				vec_st(d0,0,newdata);		vec_st(d1,16,newdata);	} else if( length == 1 ) {		d0 = vec_ld(0,data);		d0 = vec_perm(d0,d0,z);		vec_st(d0,0,newdata);	}}
开发者ID:bazhenovc,项目名称:nebula3,代码行数:54,


示例24: BgrToGray

        SIMD_INLINE void BgrToGray(const Loader<align> & bgr, Storer<align> & gray)        {            v128_u8 _bgr[3];            _bgr[0] = Load<align, first>(bgr);            _bgr[1] = Load<align, false>(bgr);            _bgr[2] = Load<align, false>(bgr);            const v128_u16 lo = vec_packsu(                BgraToGray32(vec_perm(_bgr[0], _bgr[1], K8_PERM_0)),                 BgraToGray32(vec_perm(_bgr[0], _bgr[1], K8_PERM_1)));            const v128_u16 hi = vec_packsu(                BgraToGray32(vec_perm(_bgr[1], _bgr[2], K8_PERM_2)),                 BgraToGray32(vec_perm(_bgr[1], _bgr[2], K8_PERM_3)));            Store<align, first>(gray, vec_packsu(lo, hi));        }
开发者ID:4144,项目名称:Simd,代码行数:15,


示例25: pix_norm1_altivec

static int pix_norm1_altivec(uint8_t *pix, int line_size){    int i, s = 0;    const vector unsigned int zero =        (const vector unsigned int) vec_splat_u32(0);    vector unsigned char perm = vec_lvsl(0, pix);    vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);    vector signed int sum;    for (i = 0; i < 16; i++) {        /* Read the potentially unaligned pixels. */        vector unsigned char pixl = vec_ld(0,  pix);        vector unsigned char pixr = vec_ld(15, pix);        vector unsigned char pixv = vec_perm(pixl, pixr, perm);        /* Square the values, and add them to our sum. */        sv = vec_msum(pixv, pixv, sv);        pix += line_size;    }    /* Sum up the four partial sums, and put the result into s. */    sum = vec_sums((vector signed int) sv, (vector signed int) zero);    sum = vec_splat(sum, 3);    vec_ste(sum, 0, &s);    return s;}
开发者ID:26mansi,项目名称:FFmpeg,代码行数:27,


示例26: scalarproduct_int16_altivec

static int32_t scalarproduct_int16_altivec(const int16_t * v1, const int16_t * v2, int order, const int shift){    int i;    LOAD_ZERO;    register vec_s16 vec1, *pv;    register vec_s32 res = vec_splat_s32(0), t;    register vec_u32 shifts;    int32_t ires;    shifts = zero_u32v;    if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1)));    if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08));    if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04));    if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02));    if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01));    for(i = 0; i < order; i += 8){        pv = (vec_s16*)v1;        vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);        t = vec_sr(t, shifts);        res = vec_sums(t, res);        v1 += 8;        v2 += 8;    }    res = vec_splat(res, 3);    vec_ste(res, 0, &ires);    return ires;}
开发者ID:10045125,项目名称:xuggle-xuggler,代码行数:29,


示例27: _twin_fbdev_vec_put_span

static void _twin_fbdev_vec_put_span (twin_coord_t    left,				      twin_coord_t    top,				      twin_coord_t    right,				      twin_argb32_t   *pixels,				      void     	      *closure){	twin_fbdev_t    	*tf = closure;	twin_coord_t    	width = right - left;	unsigned int		*dest;	vector unsigned char 	edgeperm;	vector unsigned char	src0v, src1v, srcv;	if (!tf->active || tf->fb_base == MAP_FAILED)		return;	dest = (unsigned int *)(tf->fb_ptr + top * tf->fb_fix.line_length);	dest += left;	while((((unsigned long)dest) & 0xf) && width--)		*(dest++) = *(pixels++);	edgeperm = vec_lvsl (0, pixels);	src0v = vec_ld (0, pixels);	while(width >= 4) {		src1v = vec_ld (16, pixels);		srcv = vec_perm (src0v, src1v, edgeperm);		vec_st ((vector unsigned int)srcv, 0, dest);		src0v = src1v;		dest += 4;		pixels += 4;		width -= 4;	}	while(width--)		*(dest++) = *(pixels++);}
开发者ID:freedesktop-unofficial-mirror,项目名称:twin,代码行数:35,


示例28: pix_norm1_altivec

int pix_norm1_altivec(uint8_t *pix, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char *tv;    vector unsigned char pixv;    vector unsigned int sv;    vector signed int sum;        sv = (vector unsigned int)vec_splat_u32(0);        s = 0;    for (i = 0; i < 16; i++) {        /* Read in the potentially unaligned pixels */        tv = (vector unsigned char *) pix;        pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));        /* Square the values, and add them to our sum */        sv = vec_msum(pixv, pixv, sv);        pix += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sum = vec_sums((vector signed int) sv, (vector signed int) zero);    sum = vec_splat(sum, 3);    vec_ste(sum, 0, &s);    return s;}
开发者ID:KoetseJ,项目名称:xumo,代码行数:30,


示例29: x264_sub8x8_dct_dc_altivec

void x264_sub8x8_dct_dc_altivec( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 ){    vec_s16_t diff[2];    vec_s32_t sum[2];    vec_s32_t zero32 = vec_splat_s32(0);    vec_u8_t mask = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,                      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F };    pix_diff( &pix1[0], &pix2[0], diff, 0 );    pix_diff( &pix1[4*FENC_STRIDE], &pix2[4*FDEC_STRIDE], diff, 1 );    sum[0] = vec_sum4s( diff[0], zero32 );    sum[1] = vec_sum4s( diff[1], zero32 );    diff[0] = vec_packs( sum[0], sum[1] );    sum[0] = vec_sum4s( diff[0], zero32 );    diff[0] = vec_packs( sum[0], zero32 );    diff[1] = vec_vsx_ld( 0, dct );    diff[0] = vec_perm( diff[0], diff[1], mask );    vec_vsx_st( diff[0], 0, dct );    /* 2x2 DC transform */    int d0 = dct[0] + dct[1];    int d1 = dct[2] + dct[3];    int d2 = dct[0] - dct[1];    int d3 = dct[2] - dct[3];    dct[0] = d0 + d1;    dct[1] = d0 - d1;    dct[2] = d2 + d3;    dct[3] = d2 - d3;}
开发者ID:xkfz007,项目名称:x264_git,代码行数:32,


示例30: pix_sum_altivec

static int pix_sum_altivec(uint8_t * pix, int line_size){    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm = vec_lvsl(0, pix);    vector unsigned char t1;    vector unsigned int sad;    vector signed int sumdiffs;    int i;    int s;    sad = (vector unsigned int)vec_splat_u32(0);    for (i = 0; i < 16; i++) {        /* Read the potentially unaligned 16 pixels into t1 */        vector unsigned char pixl = vec_ld( 0, pix);        vector unsigned char pixr = vec_ld(15, pix);        t1 = vec_perm(pixl, pixr, perm);        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t1, sad);        pix += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}
开发者ID:0xFFeng,项目名称:ffmpeg,代码行数:32,



注:本文中的vec_perm函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vec_safe_push函数代码示例
C++ vec_packsu函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。