您当前的位置：首页 > IT编程 > C++
\| C语言 \| Java \| VB \| VC \| python \| Android \| TensorFlow \| C++ \| oracle \| 学术与代码 \| cnn卷积神经网络 \| gnn \| 图像修复 \| Keras \| 数据集 \| Neo4j \| 自然语言处理 \| 深度学习 \| 医学CAD \| 医学影像 \| 超参数 \| pointnet \| pytorch \| 异常检测 \| Transformers \| 情感分类 \| 知识图谱 \|

自学教程：C++ vec_ste函数代码示例

51自学网 2021-06-03 09:36:45

C++

这篇教程C++ vec_ste函数代码示例写得很实用，希望能帮到您。

本文整理汇总了C++中vec_ste函数的典型用法代码示例。如果您正苦于以下问题：C++ vec_ste函数的具体用法？C++ vec_ste怎么用？C++ vec_ste使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_ste函数的30个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: scalarproduct_and_madd_int16_altivec

static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul){    LOAD_ZERO;    vec_s16 *pv1 = (vec_s16*)v1;    vec_s16 *pv2 = (vec_s16*)v2;    vec_s16 *pv3 = (vec_s16*)v3;    register vec_s16 muls = {mul,mul,mul,mul,mul,mul,mul,mul};    register vec_s16 t0, t1, i0, i1;    register vec_s16 i2 = pv2[0], i3 = pv3[0];    register vec_s32 res = zero_s32v;    register vec_u8 align = vec_lvsl(0, v2);    int32_t ires;    order >>= 4;    do {        t0 = vec_perm(i2, pv2[1], align);        i2 = pv2[2];        t1 = vec_perm(pv2[1], i2, align);        i0 = pv1[0];        i1 = pv1[1];        res = vec_msum(t0, i0, res);        res = vec_msum(t1, i1, res);        t0 = vec_perm(i3, pv3[1], align);        i3 = pv3[2];        t1 = vec_perm(pv3[1], i3, align);        pv1[0] = vec_mladd(t0, muls, i0);        pv1[1] = vec_mladd(t1, muls, i1);        pv1 += 2;        pv2 += 2;        pv3 += 2;    } while(--order);    res = vec_splat(vec_sums(res, zero_s32v), 3);    vec_ste(res, 0, &ires);    return ires;}

开发者ID:10045125，项目名称:xuggle-xuggler，代码行数:34，

示例2: pix_norm1_altivec

int pix_norm1_altivec(uint8_t *pix, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char *tv;    vector unsigned char pixv;    vector unsigned int sv;    vector signed int sum;        sv = (vector unsigned int)vec_splat_u32(0);        s = 0;    for (i = 0; i < 16; i++) {        /* Read in the potentially unaligned pixels */        tv = (vector unsigned char *) pix;        pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));        /* Square the values, and add them to our sum */        sv = vec_msum(pixv, pixv, sv);        pix += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sum = vec_sums((vector signed int) sv, (vector signed int) zero);    sum = vec_splat(sum, 3);    vec_ste(sum, 0, &s);    return s;}

开发者ID:KoetseJ，项目名称:xumo，代码行数:30，

示例3: pix_sum_altivec

static int pix_sum_altivec(uint8_t * pix, int line_size){    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm = vec_lvsl(0, pix);    vector unsigned char t1;    vector unsigned int sad;    vector signed int sumdiffs;    int i;    int s;    sad = (vector unsigned int)vec_splat_u32(0);    for (i = 0; i < 16; i++) {        /* Read the potentially unaligned 16 pixels into t1 */        vector unsigned char pixl = vec_ld( 0, pix);        vector unsigned char pixr = vec_ld(15, pix);        t1 = vec_perm(pixl, pixr, perm);        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t1, sad);        pix += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}

开发者ID:0xFFeng，项目名称:ffmpeg，代码行数:32，

示例4: scalarproduct_int16_altivec

static int32_t scalarproduct_int16_altivec(const int16_t * v1, const int16_t * v2, int order, const int shift){    int i;    LOAD_ZERO;    register vec_s16 vec1, *pv;    register vec_s32 res = vec_splat_s32(0), t;    register vec_u32 shifts;    int32_t ires;    shifts = zero_u32v;    if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1)));    if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08));    if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04));    if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02));    if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01));    for(i = 0; i < order; i += 8){        pv = (vec_s16*)v1;        vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);        t = vec_sr(t, shifts);        res = vec_sums(t, res);        v1 += 8;        v2 += 8;    }    res = vec_splat(res, 3);    vec_ste(res, 0, &ires);    return ires;}

开发者ID:10045125，项目名称:xuggle-xuggler，代码行数:29，

示例5: sad8_altivec_c

uint32_tsad8_altivec_c(const uint8_t * cur,	   const uint8_t *ref,	   const uint32_t stride){	uint32_t result = 0;		register vector unsigned int sad;	register vector unsigned char c;	register vector unsigned char r;		/* initialize */	sad = vec_splat_u32(0);		/* Perform sad operations */	SAD8();	SAD8();	SAD8();	SAD8();		SAD8();	SAD8();	SAD8();	SAD8();		/* finish addition, add the first 2 together */	sad = vec_and(sad, (vector unsigned int)vec_pack(vec_splat_u16(-1),vec_splat_u16(0)));	sad = (vector unsigned int)vec_sums((vector signed int)sad, vec_splat_s32(0));	sad = vec_splat(sad,3);	vec_ste(sad, 0, &result);			return result;}

开发者ID:Distrotech，项目名称:xvidcore，代码行数:33，

示例6: sse8_16bit_altivec_c

uint32_tsse8_16bit_altivec_c(const int16_t * b1,			 const int16_t * b2,			 const uint32_t stride){    register vector signed short b1_vec;    register vector signed short b2_vec;    register vector signed short diff;    register vector signed int sum;    uint32_t result;        /* initialize */    sum = vec_splat_s32(0);        SSE8_16BIT();    SSE8_16BIT();    SSE8_16BIT();    SSE8_16BIT();        SSE8_16BIT();    SSE8_16BIT();    SSE8_16BIT();    SSE8_16BIT();            /* sum the vector */    sum = vec_sums(sum, vec_splat_s32(0));    sum = vec_splat(sum,3);        vec_ste(sum,0,(int*)&result);        /* and return */    return result;}

开发者ID:Distrotech，项目名称:xvidcore，代码行数:33，

示例7: pix_norm1_altivec

static int pix_norm1_altivec(uint8_t *pix, int line_size){    int i, s = 0;    const vector unsigned int zero =        (const vector unsigned int) vec_splat_u32(0);    vector unsigned char perm = vec_lvsl(0, pix);    vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);    vector signed int sum;    for (i = 0; i < 16; i++) {        /* Read the potentially unaligned pixels. */        vector unsigned char pixl = vec_ld(0,  pix);        vector unsigned char pixr = vec_ld(15, pix);        vector unsigned char pixv = vec_perm(pixl, pixr, perm);        /* Square the values, and add them to our sum. */        sv = vec_msum(pixv, pixv, sv);        pix += line_size;    }    /* Sum up the four partial sums, and put the result into s. */    sum = vec_sums((vector signed int) sv, (vector signed int) zero);    sum = vec_splat(sum, 3);    vec_ste(sum, 0, &s);    return s;}

开发者ID:26mansi，项目名称:FFmpeg，代码行数:27，

示例8: put_vp8_epel_h_altivec_core

static av_always_inlinevoid put_vp8_epel_h_altivec_core(uint8_t *dst, ptrdiff_t dst_stride,                                 uint8_t *src, ptrdiff_t src_stride,                                 int h, int mx, int w, int is6tap){    LOAD_H_SUBPEL_FILTER(mx-1);    vec_u8 align_vec0, align_vec8, permh0, permh8, filt;    vec_u8 perm_6tap0, perm_6tap8, perml0, perml8;    vec_u8 a, b, pixh, pixl, outer;    vec_s16 f16h, f16l;    vec_s32 filth, filtl;    vec_u8 perm_inner6 = { 1,2,3,4, 2,3,4,5, 3,4,5,6, 4,5,6,7 };    vec_u8 perm_inner4 = { 0,1,2,3, 1,2,3,4, 2,3,4,5, 3,4,5,6 };    vec_u8 perm_inner  = is6tap ? perm_inner6 : perm_inner4;    vec_u8 perm_outer = { 4,9, 0,5, 5,10, 1,6, 6,11, 2,7, 7,12, 3,8 };    vec_s32 c64 = vec_sl(vec_splat_s32(1), vec_splat_u32(6));    vec_u16 c7  = vec_splat_u16(7);    align_vec0 = vec_lvsl( -is6tap-1, src);    align_vec8 = vec_lvsl(8-is6tap-1, src);    permh0     = vec_perm(align_vec0, align_vec0, perm_inner);    permh8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_inner = vec_add(perm_inner, vec_splat_u8(4));    perml0     = vec_perm(align_vec0, align_vec0, perm_inner);    perml8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_6tap0 = vec_perm(align_vec0, align_vec0, perm_outer);    perm_6tap8 = vec_perm(align_vec8, align_vec8, perm_outer);    while (h --> 0) {        FILTER_H(f16h, 0);        if (w == 16) {            FILTER_H(f16l, 8);            filt = vec_packsu(f16h, f16l);            vec_st(filt, 0, dst);        } else {            filt = vec_packsu(f16h, f16h);            vec_ste((vec_u32)filt, 0, (uint32_t*)dst);            if (w == 8)                vec_ste((vec_u32)filt, 4, (uint32_t*)dst);        }        src += src_stride;        dst += dst_stride;    }}

开发者ID:Arcen，项目名称:libav，代码行数:47，

示例9: pix_abs16x16_y2_altivec

int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);    vector unsigned char *tv;    vector unsigned char pix1v, pix2v, pix3v, avgv, t5;    vector unsigned int sad;    vector signed int sumdiffs;    uint8_t *pix3 = pix2 + line_size;    s = 0;    sad = (vector unsigned int)vec_splat_u32(0);    /*       Due to the fact that pix3 = pix2 + line_size, the pix3 of one       iteration becomes pix2 in the next iteration. We can use this       fact to avoid a potentially expensive unaligned read, each       time around the loop.       Read unaligned pixels into our vectors. The vectors are as follows:       pix2v: pix2[0]-pix2[15]       Split the pixel vectors into shorts    */    tv = (vector unsigned char *) &pix2[0];    pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));        for(i=0;i<16;i++) {        /*           Read unaligned pixels into our vectors. The vectors are as follows:           pix1v: pix1[0]-pix1[15]           pix3v: pix3[0]-pix3[15]        */        tv = (vector unsigned char *) pix1;        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));        tv = (vector unsigned char *) &pix3[0];        pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));        /* Calculate the average vector */        avgv = vec_avg(pix2v, pix3v);        /* Calculate a sum of abs differences vector */        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);                pix1 += line_size;        pix2v = pix3v;        pix3 += line_size;            }        /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;    }

开发者ID:KoetseJ，项目名称:xumo，代码行数:59，

示例10: b

voidb(){  z = vec_add (x, y);  /* Make sure the predicates accept correct argument types.  */  int1 = vec_all_in (f, g);  int1 = vec_all_ge (f, g);  int1 = vec_all_eq (c, d);  int1 = vec_all_ne (s, t);  int1 = vec_any_eq (i, j);  int1 = vec_any_ge (f, g);  int1 = vec_all_ngt (f, g);  int1 = vec_any_ge (c, d);  int1 = vec_any_ge (s, t);  int1 = vec_any_ge (i, j);  int1 = vec_any_ge (c, d);  int1 = vec_any_ge (s, t);  int1 = vec_any_ge (i, j);  vec_mtvscr (i);  vec_dssall ();  s = (vector signed short) vec_mfvscr ();  vec_dss (3);  vec_dst (pi, int1 + int2, 3);  vec_dstst (pi, int1 + int2, 3);  vec_dststt (pi, int1 + int2, 3);  vec_dstt (pi, int1 + int2, 3);  uc = (vector unsigned char) vec_lvsl (int1 + 69, (signed int *) pi);  uc = (vector unsigned char) vec_lvsr (int1 + 69, (signed int *) pi);  c = vec_lde (int1, (signed char *) pi);  s = vec_lde (int1, (signed short *) pi);  i = vec_lde (int1, (signed int *) pi);  i = vec_ldl (int1, pi);  i = vec_ld (int1, pi);  vec_st (i, int2, pi);  vec_ste (c, int2, (signed char *) pi);  vec_ste (s, int2, (signed short *) pi);  vec_ste (i, int2, (signed int *) pi);  vec_stl (i, int2, pi);}

开发者ID:Akheon23，项目名称:chromecast-mirrored-source.toolchain，代码行数:46，

示例11: test

static void test (){#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__    vector unsigned char vuc = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};    vector signed char vsc = {7,6,5,4,3,2,1,0,-1,-2,-3,-4,-5,-6,-7,-8};    vector unsigned short vus = {7,6,5,4,3,2,1,0};    vector signed short vss = {3,2,1,0,-1,-2,-3,-4};    vector unsigned int vui = {3,2,1,0};    vector signed int vsi = {1,0,-1,-2};    vector float vf = {3.0,2.0,1.0,0.0};#else    vector unsigned char vuc = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};    vector signed char vsc = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};    vector unsigned short vus = {0,1,2,3,4,5,6,7};    vector signed short vss = {-4,-3,-2,-1,0,1,2,3};    vector unsigned int vui = {0,1,2,3};    vector signed int vsi = {-2,-1,0,1};    vector float vf = {0.0,1.0,2.0,3.0};#endif    vec_ste (vuc, 9*1, (unsigned char *)svuc);    vec_ste (vsc, 14*1, (signed char *)svsc);    vec_ste (vus, 7*2, (unsigned short *)svus);    vec_ste (vss, 1*2, (signed short *)svss);    vec_ste (vui, 3*4, (unsigned int *)svui);    vec_ste (vsi, 2*4, (signed int *)svsi);    vec_ste (vf,  0*4, (float *)svf);    check_arrays ();}

开发者ID:pjump，项目名称:gcc，代码行数:30，

示例12: r_dimpatchD_ALTIVEC

void r_dimpatchD_ALTIVEC(const DCanvas *const cvs, argb_t color, int alpha, int x1, int y1, int w, int h){	int x, y, i;	argb_t *line;	int invAlpha = 256 - alpha;	int dpitch = cvs->pitch / sizeof(argb_t);	line = (argb_t *)cvs->buffer + y1 * dpitch;	int batches = w / 4;	int remainder = w & 3;	// AltiVec temporaries:	const vu16 zero = {0, 0, 0, 0, 0, 0, 0, 0};	const vu16 upper8mask = {0, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0xff};	const vu16 blendAlpha = {0, alpha, alpha, alpha, 0, alpha, alpha, alpha};	const vu16 blendInvAlpha = {0, invAlpha, invAlpha, invAlpha, 0, invAlpha, invAlpha, invAlpha};	const vu16 blendColor = {0, RPART(color), GPART(color), BPART(color), 0, RPART(color), GPART(color), BPART(color)};	const vu16 blendMult = vec_mladd(blendColor, blendAlpha, zero);	for (y = y1; y < y1 + h; y++)	{		// AltiVec optimize the bulk in batches of 4 colors:		for (i = 0, x = x1; i < batches; ++i, x += 4)		{			const vu32 input = {line[x + 0], line[x + 1], line[x + 2], line[x + 3]};			const vu32 output = (vu32)blend4vs1_altivec(input, blendMult, blendInvAlpha, upper8mask);			vec_ste(output, 0, &line[x]);			vec_ste(output, 4, &line[x]);			vec_ste(output, 8, &line[x]);			vec_ste(output, 12, &line[x]);		}		if (remainder)		{			// Pick up the remainder:			for (; x < x1 + w; x++)			{				line[x] = alphablend1a(line[x], color, alpha);			}		}		line += dpitch;	}}

开发者ID:JohnnyonFlame，项目名称:odamex，代码行数:45，

示例13: sad16_altivec_c

uint32_tsad16_altivec_c(vector unsigned char *cur,			  vector unsigned char *ref,			  uint32_t stride,			  const uint32_t best_sad){	vector unsigned char perm;	vector unsigned char t1, t2;	vector unsigned int sad;	vector unsigned int sumdiffs;	vector unsigned int best_vec;	uint32_t result;        #ifdef DEBUG        /* print alignment errors if DEBUG is on */	if (((unsigned long) cur) & 0xf)		fprintf(stderr, "sad16_altivec:incorrect align, cur: %lx/n", (long)cur);	if (stride & 0xf)		fprintf(stderr, "sad16_altivec:incorrect align, stride: %lu/n", stride);#endif	/* initialization */	sad = vec_splat_u32(0);	sumdiffs = sad;	stride >>= 4;	perm = vec_lvsl(0, (unsigned char *) ref);	*((uint32_t*)&best_vec) = best_sad;	best_vec = vec_splat(best_vec, 0);	/* perform sum of differences between current and previous */	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();        	SAD16();	SAD16();	SAD16();	SAD16();  bail:	/* copy vector sum into unaligned result */	sumdiffs = vec_splat(sumdiffs, 3);	vec_ste(sumdiffs, 0, (uint32_t*) &result);	return result;}

开发者ID:Distrotech，项目名称:xvidcore，代码行数:56，

示例14: quant_h263_inter_altivec_c

uint32_tquant_h263_inter_altivec_c(int16_t *coeff,                            int16_t *data,                            const uint32_t quant,                            const uint16_t *mpeg_quant_matrices){    vector unsigned char zerovec;    vector unsigned short mult;    vector unsigned short quant_m_2;    vector unsigned short quant_d_2;    vector unsigned short sum_short;    vector signed short acLevel;        vector unsigned int even;    vector unsigned int odd;        vector bool short m2_mask;    vector bool short zero_mask;        uint32_t result;#ifdef DEBUG    if(((unsigned)coeff) & 0x15)        fprintf(stderr, "quant_h263_inter_altivec_c:incorrect align, coeff: %lx/n", (long)coeff);#endif        /* initialisation stuff */    zerovec = vec_splat_u8(0);    *((unsigned short*)&mult) = (unsigned short)multipliers[quant];    mult = vec_splat(mult, 0);    *((unsigned short*)&quant_m_2) = (unsigned short)quant;    quant_m_2 = vec_splat(quant_m_2, 0);    quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1));    *((unsigned short*)&quant_d_2) = (unsigned short)quant;    quant_d_2 = vec_splat(quant_d_2, 0);    quant_d_2 = vec_sr(quant_d_2, vec_splat_u16(1));    sum_short = (vector unsigned short)zerovec;        /* Quantize */    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();        QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();    QUANT_H263_INTER_ALTIVEC();            /* Calculate the return value */    even = (vector unsigned int)vec_sum4s((vector signed short)sum_short, (vector signed int)zerovec);    even = (vector unsigned int)vec_sums((vector signed int)even, (vector signed int)zerovec);    even = vec_splat(even, 3);    vec_ste(even, 0, &result);    return result;}

开发者ID:roozbeh，项目名称:openCU，代码行数:56，

示例15: StoreUnalignedLess

/* Store less than a vector to an unaligned location in memory */static inline voidStoreUnalignedLess (vector unsigned char v,                    const guchar *where,                    int n){  int i;  vector unsigned char permuteVector = vec_lvsr(0, where);  v = vec_perm(v, v, permuteVector);  for (i=0; i<n; i++)    vec_ste(v, i, CONST_BUFFER(where));}

开发者ID:1ynx，项目名称:gimp，代码行数:12，

示例16: sad16bi_altivec_c

uint32_tsad16bi_altivec_c(vector unsigned char *cur,                        vector unsigned char *ref1,                        vector unsigned char *ref2,                        uint32_t stride){    vector unsigned char t1, t2;    vector unsigned char mask1, mask2;    vector unsigned char sad;    vector unsigned int sum;    uint32_t result;    #ifdef DEBUG    /* print alignment errors if this is on */    if((long)cur & 0xf)        fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lx/n", (long)cur);    if(stride & 0xf)        fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lu/n", stride);#endif        /* Initialisation stuff */    stride >>= 4;    mask1 = vec_lvsl(0, (unsigned char*)ref1);    mask2 = vec_lvsl(0, (unsigned char*)ref2);    sad = vec_splat_u8(0);    sum = (vector unsigned int)sad;        SAD16BI();    SAD16BI();    SAD16BI();    SAD16BI();        SAD16BI();    SAD16BI();    SAD16BI();    SAD16BI();        SAD16BI();    SAD16BI();    SAD16BI();    SAD16BI();        SAD16BI();    SAD16BI();    SAD16BI();    SAD16BI();        sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));    sum = vec_splat(sum, 3);    vec_ste(sum, 0, (uint32_t*)&result);        return result;}

开发者ID:Distrotech，项目名称:xvidcore，代码行数:53，

示例17: sse8_altivec

/** * Sum of Squared Errors for a 8x8 block. * AltiVec-enhanced. * It's the pix_abs8x8_altivec code above w/ squaring added. */int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;    vector unsigned char t1, t2, t3,t4, t5;    vector unsigned int sum;    vector signed int sumsqr;        sum = (vector unsigned int)vec_splat_u32(0);    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);        for(i=0;i<8;i++) {	/* Read potentially unaligned pixels into t1 and t2	   Since we're reading 16 pixels, and actually only want 8,	   mask out the last 8 pixels. The 0s don't change the sum. */        perm1 = vec_lvsl(0, pix1);        pix1v = (vector unsigned char *) pix1;        perm2 = vec_lvsl(0, pix2);        pix2v = (vector unsigned char *) pix2;        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);        /*          Since we want to use unsigned chars, we can take advantage          of the fact that abs(a-b)^2 = (a-b)^2.        */        	/* Calculate abs differences vector */         t3 = vec_max(t1, t2);        t4 = vec_min(t1, t2);        t5 = vec_sub(t3, t4);                /* Square the values and add them to our sum */        sum = vec_msum(t5, t5, sum);                pix1 += line_size;        pix2 += line_size;    }        /* Sum up the four partial sums, and put the result into s */    sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);    sumsqr = vec_splat(sumsqr, 3);    vec_ste(sumsqr, 0, &s);        return s;}

开发者ID:KoetseJ，项目名称:xumo，代码行数:55，

示例18: pix_abs16x16_x2_altivec

int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);    vector unsigned char *tv;    vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;    vector unsigned int sad;    vector signed int sumdiffs;    s = 0;    sad = (vector unsigned int)vec_splat_u32(0);    for(i=0;i<16;i++) {        /*           Read unaligned pixels into our vectors. The vectors are as follows:           pix1v: pix1[0]-pix1[15]           pix2v: pix2[0]-pix2[15]	pix2iv: pix2[1]-pix2[16]        */        tv = (vector unsigned char *) pix1;        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));                tv = (vector unsigned char *) &pix2[0];        pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));        tv = (vector unsigned char *) &pix2[1];        pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));        /* Calculate the average vector */        avgv = vec_avg(pix2v, pix2iv);        /* Calculate a sum of abs differences vector */        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);                pix1 += line_size;        pix2 += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}

开发者ID:KoetseJ，项目名称:xumo，代码行数:46，

示例19: sad8_altivec

/* * This function assumes cur is 8 bytes aligned, stride is 16 bytes * aligned and ref is unaligned */unsigned longsad8_altivec(const vector unsigned char *cur,			 const vector unsigned char *ref,			 unsigned long stride){	vector unsigned char t1, t2, t3, t4, t5, tp;	vector unsigned int sad;	vector signed int sumdiffs;	vector unsigned char perm_cur;	vector unsigned char perm_ref1, perm_ref2;	unsigned long result;	ZERODEF;#ifdef DEBUG	if (((unsigned long) cur) & 0x7)		fprintf(stderr, "sad8_altivec:incorrect align, cur: %x/n", cur);//  if (((unsigned long)ref) & 0x7)//      fprintf(stderr, "sad8_altivec:incorrect align, ref: %x/n", ref);	if (stride & 0xf)		fprintf(stderr, "sad8_altivec:incorrect align, stride: %x/n", stride);#endif	perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);	perm_ref1 = vec_lvsl(0, (unsigned char *) ref);	perm_ref2 = get_perm(0);	/* initialization */	sad = (vector unsigned int) (ZEROVEC);	stride >>= 4;	/* perform sum of differences between current and previous */	SAD8();	SAD8();	SAD8();	SAD8();	/* sum all parts of difference into one 32 bit quantity */	sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);	/* copy vector sum into unaligned result */	sumdiffs = vec_splat(sumdiffs, 3);	vec_ste(sumdiffs, 0, (int *) &result);	return (result);}

开发者ID:cchatterj，项目名称:isabel，代码行数:49，

示例20: pix_abs8x8_altivec

int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;    vector unsigned char t1, t2, t3,t4, t5;    vector unsigned int sad;    vector signed int sumdiffs;    sad = (vector unsigned int)vec_splat_u32(0);    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);    for(i=0;i<8;i++) {	/* Read potentially unaligned pixels into t1 and t2	   Since we're reading 16 pixels, and actually only want 8,	   mask out the last 8 pixels. The 0s don't change the sum. */        perm1 = vec_lvsl(0, pix1);        pix1v = (vector unsigned char *) pix1;        perm2 = vec_lvsl(0, pix2);        pix2v = (vector unsigned char *) pix2;        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);	/* Calculate a sum of abs differences vector */         t3 = vec_max(t1, t2);        t4 = vec_min(t1, t2);        t5 = vec_sub(t3, t4);	/* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);        pix1 += line_size;        pix2 += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}

开发者ID:KoetseJ，项目名称:xumo，代码行数:44，

示例21: sad16_x2_altivec

static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h){    int i;    int s;    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);    vector unsigned char perm1 = vec_lvsl(0, pix2);    vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1));    vector unsigned char pix2l, pix2r;    vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;    vector unsigned int sad;    vector signed int sumdiffs;    s = 0;    sad = (vector unsigned int)vec_splat_u32(0);    for (i = 0; i < h; i++) {        /* Read unaligned pixels into our vectors. The vectors are as follows:           pix1v: pix1[0]-pix1[15]           pix2v: pix2[0]-pix2[15]      pix2iv: pix2[1]-pix2[16] */        pix1v  = vec_ld( 0, pix1);        pix2l  = vec_ld( 0, pix2);        pix2r  = vec_ld(16, pix2);        pix2v  = vec_perm(pix2l, pix2r, perm1);        pix2iv = vec_perm(pix2l, pix2r, perm2);        /* Calculate the average vector */        avgv = vec_avg(pix2v, pix2iv);        /* Calculate a sum of abs differences vector */        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);        pix1 += line_size;        pix2 += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}

开发者ID:0xFFeng，项目名称:ffmpeg，代码行数:43，

示例22: reg_sad_altivec

static unsigned reg_sad_altivec(const kvz_pixel * const data1, const kvz_pixel * const data2,                        const int width, const int height, const unsigned stride1, const unsigned stride2){  vector unsigned int vsad = {0,0,0,0}, vzero = {0,0,0,0};   vector signed int sumdiffs;  int tmpsad, sad = 0;    int y, x;    for (y = 0; y < height; ++y) {    vector unsigned char perm1, perm2;        perm1 = vec_lvsl(0, &data1[y * stride1]);    perm2 = vec_lvsl(0, &data2[y * stride2]);        for (x = 0; x <= width-16; x+=16) {      vector unsigned char t1, t2, t3, t4, t5;      vector unsigned char *current, *previous;            current = (vector unsigned char *) &data1[y * stride1 + x];      previous = (vector unsigned char *) &data2[y * stride2 + x];            t1  = vec_perm(current[0], current[1], perm1 );  /* align current vector  */       t2  = vec_perm(previous[0], previous[1], perm2 );/* align previous vector */       t3  = vec_max(t1, t2 );      /* find largest of two           */       t4  = vec_min(t1, t2 );      /* find smaller of two           */       t5  = vec_sub(t3, t4);       /* find absolute difference      */       vsad = vec_sum4s(t5, vsad);    /* accumulate sum of differences */    }    for (; x < width; ++x) {      sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);    }  }    sumdiffs = vec_sums((vector signed int) vsad, (vector signed int) vzero);  /* copy vector sum into unaligned result */  sumdiffs = vec_splat( sumdiffs, 3);  vec_ste( sumdiffs, 0, &tmpsad );  sad += tmpsad;    return sad;}

开发者ID:Arizer，项目名称:kvazaar，代码行数:43，

示例23: scalarproduct_int16_altivec

static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,                                           int order){    int i;    LOAD_ZERO;    register vec_s16 vec1;    register vec_s32 res = vec_splat_s32(0), t;    int32_t ires;    for(i = 0; i < order; i += 8){        vec1 = vec_unaligned_load(v1);        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);        res = vec_sums(t, res);        v1 += 8;        v2 += 8;    }    res = vec_splat(res, 3);    vec_ste(res, 0, &ires);    return ires;}

开发者ID:Bjelijah，项目名称:EcamTurnH265，代码行数:20，

示例24: pix_abs16x16_altivec

int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){    int i;    int s __attribute__((aligned(16)));    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm1, perm2, *pix1v, *pix2v;    vector unsigned char t1, t2, t3,t4, t5;    vector unsigned int sad;    vector signed int sumdiffs;        sad = (vector unsigned int)vec_splat_u32(0);    for(i=0;i<16;i++) {	/* Read potentially unaligned pixels into t1 and t2 */        perm1 = vec_lvsl(0, pix1);        pix1v = (vector unsigned char *) pix1;        perm2 = vec_lvsl(0, pix2);        pix2v = (vector unsigned char *) pix2;        t1 = vec_perm(pix1v[0], pix1v[1], perm1);        t2 = vec_perm(pix2v[0], pix2v[1], perm2);       	/* Calculate a sum of abs differences vector */         t3 = vec_max(t1, t2);        t4 = vec_min(t1, t2);        t5 = vec_sub(t3, t4);		/* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);        pix1 += line_size;        pix2 += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);        return s;}

开发者ID:KoetseJ，项目名称:xumo，代码行数:41，

示例25: scalarproduct_int16_vsx

scalarproduct_int16_vsx (const signed short *v1, const signed short *v2,			 int order){  int i;  LOAD_ZERO;  register vec_s16 vec1;  register vec_s32 res = vec_splat_s32 (0), t;  signed int ires;  for (i = 0; i < order; i += 8) {    vec1 = vec_vsx_ld (0, v1);    t    = vec_msum (vec1, vec_vsx_ld (0, v2), zero_s32v);    res  = vec_sums (t, res);    v1  += 8;    v2  += 8;  }  res = vec_splat (res, 3);  vec_ste (res, 0, &ires);  return ires;}

开发者ID:MaxKellermann，项目名称:gcc，代码行数:21，

示例26: pix_norm1_altivec

static int pix_norm1_altivec(uint8_t *pix, int line_size){    int i;    int s;    __vector zero = __vzero();/*    vector unsigned char *tv;    vector unsigned char pixv;    vector unsigned int sv;    vector signed int sum;	*/	__vector *tv;    __vector pixv;    __vector sv;    __vector sum;    sv = __vzero();    s = 0;    for (i = 0; i < 16; i++) {        /* Read in the potentially unaligned pixels */        //tv = (vector unsigned char *) pix;		tv = (__vector*) pix;        //pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));		pixv = __vperm(tv[0], tv[1], __lvsl(pix,0));        /* Square the values, and add them to our sum */        sv = vec_msum(pixv, pixv, sv);        pix += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sum = vec_sums((vector signed int) sv, (vector signed int) zero);    sum = vec_splat(sum, 3);    	vec_ste(sum, 0, &s);    return s;}

开发者ID:CodeAsm，项目名称:ffplay360，代码行数:40，

示例27: sad16_altivec

static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h){    int i;    int s;    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm = vec_lvsl(0, pix2);    vector unsigned char t1, t2, t3,t4, t5;    vector unsigned int sad;    vector signed int sumdiffs;    sad = (vector unsigned int)vec_splat_u32(0);    for (i = 0; i < h; i++) {        /* Read potentially unaligned pixels into t1 and t2 */        vector unsigned char pix2l = vec_ld( 0, pix2);        vector unsigned char pix2r = vec_ld(15, pix2);        t1 = vec_ld(0, pix1);        t2 = vec_perm(pix2l, pix2r, perm);        /* Calculate a sum of abs differences vector */        t3 = vec_max(t1, t2);        t4 = vec_min(t1, t2);        t5 = vec_sub(t3, t4);        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);        pix1 += line_size;        pix2 += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}

开发者ID:0xFFeng，项目名称:ffmpeg，代码行数:39，

示例28: scalarproduct_int16_altivec

static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,                                           int order){    int i;    LOAD_ZERO;    const vec_s16 *pv;    register vec_s16 vec1;    register vec_s32 res = vec_splat_s32(0), t;    int32_t ires;    for(i = 0; i < order; i += 8){        pv = (const vec_s16*)v1;        vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);        res = vec_sums(t, res);        v1 += 8;        v2 += 8;    }    res = vec_splat(res, 3);    vec_ste(res, 0, &ires);    return ires;}

开发者ID:0x0B501E7E，项目名称:ffmpeg，代码行数:22，

示例29: scalarproduct_and_madd_int16_altivec

static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,                                                    const int16_t *v2,                                                    const int16_t *v3,                                                    int order, int mul){    LOAD_ZERO;    vec_s16 *pv1 = (vec_s16 *) v1;    register vec_s16 muls = { mul, mul, mul, mul, mul, mul, mul, mul };    register vec_s16 t0, t1, i0, i1, i4, i2, i3;    register vec_s32 res = zero_s32v;#if HAVE_BIGENDIAN    register vec_u8 align = vec_lvsl(0, v2);    i2 = vec_ld(0, v2);    i3 = vec_ld(0, v3);#endif    int32_t ires;    order >>= 4;    do {        GET_T(t0,t1,v2,i1,i2);        i0     = pv1[0];        i1     = pv1[1];        res    = vec_msum(t0, i0, res);        res    = vec_msum(t1, i1, res);        GET_T(t0,t1,v3,i4,i3);        pv1[0] = vec_mladd(t0, muls, i0);        pv1[1] = vec_mladd(t1, muls, i1);        pv1   += 2;        v2    += 16;        v3    += 16;    } while (--order);    res = vec_splat(vec_sums(res, zero_s32v), 3);    vec_ste(res, 0, &ires);    return ires;}

开发者ID:TaoheGit，项目名称:hmi_sdl_android，代码行数:36，

示例30: ProjectDlightTexture_altivec

//.........这里部分代码省略.........			vec_t dist0, dist1, dist2;						dist0 = origin0 - tess.xyz[i][0];			dist1 = origin1 - tess.xyz[i][1];			dist2 = origin2 - tess.xyz[i][2];			backEnd.pc.c_dlightVertexes++;			texCoords0 = 0.5f + dist0 * scale;			texCoords1 = 0.5f + dist1 * scale;			if( !r_dlightBacks->integer &&					// dist . tess.normal[i]					( dist0 * tess.normal[i][0] +					dist1 * tess.normal[i][1] +					dist2 * tess.normal[i][2] ) < 0.0f ) {				clip = 63;			} else {				if ( texCoords0 < 0.0f ) {					clip |= 1;				} else if ( texCoords0 > 1.0f ) {					clip |= 2;				}				if ( texCoords1 < 0.0f ) {					clip |= 4;				} else if ( texCoords1 > 1.0f ) {					clip |= 8;				}				texCoords[0] = texCoords0;				texCoords[1] = texCoords1;				// modulate the strength based on the height and color				if ( dist2 > radius ) {					clip |= 16;					modulate = 0.0f;				} else if ( dist2 < -radius ) {					clip |= 32;					modulate = 0.0f;				} else {					dist2 = Q_fabs(dist2);					if ( dist2 < radius * 0.5f ) {						modulate = 1.0f;					} else {						modulate = 2.0f * (radius - dist2) * scale;					}				}			}			clipBits[i] = clip;			modulateVec = vec_ld(0,(float *)&modulate);			modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm);			colorVec = vec_madd(floatColorVec0,modulateVec,zero);			colorInt = vec_cts(colorVec,0);	// RGBx			colorShort = vec_pack(colorInt,colorInt);		// RGBxRGBx			colorChar = vec_packsu(colorShort,colorShort);	// RGBxRGBxRGBxRGBx			colorChar = vec_sel(colorChar,vSel,vSel);		// RGBARGBARGBARGBA replace alpha with 255			vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors);	// store color		}		// build a list of triangles that need light		numIndexes = 0;		for ( i = 0 ; i < tess.numIndexes ; i += 3 ) {			int		a, b, c;			a = tess.indexes[i];			b = tess.indexes[i+1];			c = tess.indexes[i+2];			if ( clipBits[a] & clipBits[b] & clipBits[c] ) {				continue;	// not lighted			}			hitIndexes[numIndexes] = a;			hitIndexes[numIndexes+1] = b;			hitIndexes[numIndexes+2] = c;			numIndexes += 3;		}		if ( !numIndexes ) {			continue;		}		qglEnableClientState( GL_TEXTURE_COORD_ARRAY );		qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] );		qglEnableClientState( GL_COLOR_ARRAY );		qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray );		GL_Bind( tr.dlightImage );		// include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light		// where they aren't rendered		if ( dl->additive ) {			GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );		}		else {			GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );		}		R_DrawElements( numIndexes, hitIndexes );		backEnd.pc.c_totalIndexes += numIndexes;		backEnd.pc.c_dlightIndexes += numIndexes;	}}

开发者ID:ptitSeb，项目名称:ioq3，代码行数:101，

注：本文中的vec_ste函数示例整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

C++ vec_sub函数代码示例
C++ vec_st函数代码示例