这篇教程C++ vec_splat_u8函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中vec_splat_u8函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_splat_u8函数的具体用法?C++ vec_splat_u8怎么用?C++ vec_splat_u8使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了vec_splat_u8函数的29个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: predict_16x16_dc_128_altivecstatic void predict_16x16_dc_128_altivec( uint8_t *src ){ /* test if generating the constant is faster than loading it. vector unsigned int bc_v = (vector unsigned int)CV(0x80808080, 0x80808080, 0x80808080, 0x80808080); */ vec_u8_t bc_v = vec_vslb((vec_u8_t)vec_splat_u8(1),(vec_u8_t)vec_splat_u8(7)); PREDICT_16x16_DC_ALTIVEC(bc_v);}
开发者ID:xing2fan,项目名称:x264,代码行数:8,
示例2: fooint foo(volatile vector float &i, int &j){ vector unsigned char zero = vec_splat_u8(0); vector unsigned char one = vec_splat_u8(1); i = vec_add( (vector float)zero, (vector float)one ); j = 5; return 0;}
开发者ID:5432935,项目名称:crossbridge,代码行数:8,
示例3: ff_vp3_idct_add_altivecvoid ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]){ LOAD_ZERO; vec_u8 t, vdst; vec_s16 vdst_16; vec_u8 vdst_mask = vec_mergeh(vec_splat_u8(-1), vec_lvsl(0, dst)); IDCT_START IDCT_1D(NOP, NOP) TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7); IDCT_1D(ADD8, SHIFT4)#define ADD(a)/ vdst = vec_ld(0, dst);/ vdst_16 = (vec_s16)vec_perm(vdst, zero_u8v, vdst_mask);/ vdst_16 = vec_adds(a, vdst_16);/ t = vec_packsu(vdst_16, vdst_16);/ vec_ste((vec_u32)t, 0, (unsigned int *)dst);/ vec_ste((vec_u32)t, 4, (unsigned int *)dst); ADD(b0) dst += stride; ADD(b1) dst += stride; ADD(b2) dst += stride; ADD(b3) dst += stride; ADD(b4) dst += stride; ADD(b5) dst += stride; ADD(b6) dst += stride; ADD(b7)}
开发者ID:9aa5,项目名称:FFmpeg,代码行数:30,
示例4: h264_deblock_q1// out: newp1 = clip((p2 + ((p0 + q0 + 1) >> 1)) >> 1, p1-tc0, p1+tc0)static inline vec_u8_t h264_deblock_q1(register vec_u8_t p0, register vec_u8_t p1, register vec_u8_t p2, register vec_u8_t q0, register vec_u8_t tc0) { register vec_u8_t average = vec_avg(p0, q0); register vec_u8_t temp; register vec_u8_t uncliped; register vec_u8_t ones; register vec_u8_t max; register vec_u8_t min; register vec_u8_t newp1; temp = vec_xor(average, p2); average = vec_avg(average, p2); /*avg(p2, avg(p0, q0)) */ ones = vec_splat_u8(1); temp = vec_and(temp, ones); /*(p2^avg(p0, q0)) & 1 */ uncliped = vec_subs(average, temp); /*(p2+((p0+q0+1)>>1))>>1 */ max = vec_adds(p1, tc0); min = vec_subs(p1, tc0); newp1 = vec_max(min, uncliped); newp1 = vec_min(max, newp1); return newp1;}
开发者ID:AnthonyNystrom,项目名称:MobiVU,代码行数:26,
示例5: transfer_8to16subro_altivec_cvoidtransfer_8to16subro_altivec_c(int16_t * dct, const uint8_t * cur, const uint8_t * ref, const uint32_t stride){ register vector unsigned char c; register vector unsigned char r; register vector unsigned char z; register vector signed short cs; register vector signed short rs; #ifdef DEBUG /* Check the alignment assumptions if this is on */ if((long)dct & 0xf) fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, dct: %lx/n", (long)dct);#endif /* initialize */ z = vec_splat_u8(0); SUBRO8TO16(); SUBRO8TO16(); SUBRO8TO16(); SUBRO8TO16(); SUBRO8TO16(); SUBRO8TO16(); SUBRO8TO16(); SUBRO8TO16();}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:30,
示例6: transfer_8to16sub_altivec_cvoidtransfer_8to16sub_altivec_c(int16_t * dct, uint8_t * cur, uint8_t * ref, const uint32_t stride){ register vector unsigned char c,r; register vector unsigned char ox00; register vector unsigned char mask_00ff; register vector unsigned char mask; register vector signed short cs,rs; #ifdef DEBUG if((long)dct & 0xf) fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, dct: %lx/n", (long)dct); if((long)cur & 0x7) fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, cur: %lx/n", (long)cur); if(stride & 0x7) fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect stride, stride: %lu/n", (long)stride);#endif /* initialize */ ox00 = vec_splat_u8(0); mask_00ff = vec_pack((vector unsigned short)ox00,vec_splat_u16(-1)); SUB8TO16(); SUB8TO16(); SUB8TO16(); SUB8TO16(); SUB8TO16(); SUB8TO16(); SUB8TO16(); SUB8TO16();}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:34,
示例7: ff_idct_add_altivecvoid ff_idct_add_altivec(uint8_t* dest, int stride, int16_t *blk){ vec_s16 *block = (vec_s16*)blk; vec_u8 tmp; vec_s16 tmp2, tmp3; vec_u8 perm0; vec_u8 perm1; vec_u8 p0, p1, p; IDCT p0 = vec_lvsl (0, dest); p1 = vec_lvsl (stride, dest); p = vec_splat_u8 (-1); perm0 = vec_mergeh (p, p0); perm1 = vec_mergeh (p, p1);#define ADD(dest,src,perm) / /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ / tmp = vec_ld (0, dest); / tmp2 = (vec_s16)vec_perm (tmp, (vec_u8)zero, perm); / tmp3 = vec_adds (tmp2, src); / tmp = vec_packsu (tmp3, tmp3); / vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); / vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest); ADD (dest, vx0, perm0) dest += stride; ADD (dest, vx1, perm1) dest += stride; ADD (dest, vx2, perm0) dest += stride; ADD (dest, vx3, perm1) dest += stride; ADD (dest, vx4, perm0) dest += stride; ADD (dest, vx5, perm1) dest += stride; ADD (dest, vx6, perm0) dest += stride; ADD (dest, vx7, perm1)}
开发者ID:9aa5,项目名称:FFmpeg,代码行数:35,
示例8: transfer_8to16copy_altivec_cvoidtransfer_8to16copy_altivec_c(int16_t *dst, uint8_t * src, uint32_t stride){ register vector unsigned char s; register vector unsigned char zerovec; #ifdef DEBUG /* Check the alignment */ if((long)dst & 0xf) fprintf(stderr, "transfer_8to16copy_altivec_c:incorrect align, dst: %lx/n", (long)dst);#endif /* initialization */ zerovec = vec_splat_u8(0); COPY8TO16(); COPY8TO16(); COPY8TO16(); COPY8TO16(); COPY8TO16(); COPY8TO16(); COPY8TO16(); COPY8TO16();}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:27,
示例9: imageFilterMean_Altivecvoid imageFilterMean_Altivec(unsigned char *src1, unsigned char *src2, unsigned char *dst, int length){ int n = length; // Compute first few values so we're on a 16-byte boundary in dst while( (((long)dst & 0xF) > 0) && (n > 0) ) { MEAN_PIXEL(); --n; ++dst; ++src1; ++src2; } // Do bulk of processing using Altivec (find the mean of 16 8-bit unsigned integers, with saturation) vector unsigned char rshft = vec_splat_u8(0x1); while(n >= 16) { vector unsigned char s1 = vec_ld(0,src1); s1 = vec_sr(s1, rshft); // shift right 1 vector unsigned char s2 = vec_ld(0,src2); s2 = vec_sr(s2, rshft); // shift right 1 vector unsigned char r = vec_adds(s1, s2); vec_st(r,0,dst); n -= 16; src1 += 16; src2 += 16; dst += 16; } // If any bytes are left over, deal with them individually ++n; BASIC_MEAN();}
开发者ID:brijohn,项目名称:onscripter-wii,代码行数:27,
示例10: pix_abs16x16_y2_altivecint pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){ int i; int s __attribute__((aligned(16))); const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); vector unsigned char *tv; vector unsigned char pix1v, pix2v, pix3v, avgv, t5; vector unsigned int sad; vector signed int sumdiffs; uint8_t *pix3 = pix2 + line_size; s = 0; sad = (vector unsigned int)vec_splat_u32(0); /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one iteration becomes pix2 in the next iteration. We can use this fact to avoid a potentially expensive unaligned read, each time around the loop. Read unaligned pixels into our vectors. The vectors are as follows: pix2v: pix2[0]-pix2[15] Split the pixel vectors into shorts */ tv = (vector unsigned char *) &pix2[0]; pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); for(i=0;i<16;i++) { /* Read unaligned pixels into our vectors. The vectors are as follows: pix1v: pix1[0]-pix1[15] pix3v: pix3[0]-pix3[15] */ tv = (vector unsigned char *) pix1; pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); tv = (vector unsigned char *) &pix3[0]; pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0])); /* Calculate the average vector */ avgv = vec_avg(pix2v, pix3v); /* Calculate a sum of abs differences vector */ t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); /* Add each 4 pixel group together and put 4 results into sad */ sad = vec_sum4s(t5, sad); pix1 += line_size; pix2v = pix3v; pix3 += line_size; } /* Sum up the four partial sums, and put the result into s */ sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); sumdiffs = vec_splat(sumdiffs, 3); vec_ste(sumdiffs, 0, &s); return s; }
开发者ID:KoetseJ,项目名称:xumo,代码行数:59,
示例11: put_no_rnd_pixels8_xy2_altivec/* next one assumes that ((line_size % 8) == 0) */static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){ register int i; register vector unsigned char pixelsv1, pixelsv2, pixelsavg; register vector unsigned char blockv, temp1, temp2; register vector unsigned short pixelssum1, pixelssum2, temp3; register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); temp1 = vec_ld(0, pixels); temp2 = vec_ld(16, pixels); pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { pixelsv2 = temp2; } else { pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); } pixelsv1 = vec_mergeh(vczero, pixelsv1); pixelsv2 = vec_mergeh(vczero, pixelsv2); pixelssum1 = vec_add((vector unsigned short)pixelsv1, (vector unsigned short)pixelsv2); pixelssum1 = vec_add(pixelssum1, vcone); for (i = 0; i < h ; i++) { int rightside = ((unsigned long)block & 0x0000000F); blockv = vec_ld(0, block); temp1 = vec_ld(line_size, pixels); temp2 = vec_ld(line_size + 16, pixels); pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { pixelsv2 = temp2; } else { pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); } pixelsv1 = vec_mergeh(vczero, pixelsv1); pixelsv2 = vec_mergeh(vczero, pixelsv2); pixelssum2 = vec_add((vector unsigned short)pixelsv1, (vector unsigned short)pixelsv2); temp3 = vec_add(pixelssum1, pixelssum2); temp3 = vec_sra(temp3, vctwo); pixelssum1 = vec_add(pixelssum2, vcone); pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); if (rightside) { blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); } else { blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); } vec_st(blockv, 0, block); block += line_size; pixels += line_size; }}
开发者ID:AVLeo,项目名称:libav,代码行数:59,
示例12: quant_h263_inter_altivec_cuint32_tquant_h263_inter_altivec_c(int16_t *coeff, int16_t *data, const uint32_t quant, const uint16_t *mpeg_quant_matrices){ vector unsigned char zerovec; vector unsigned short mult; vector unsigned short quant_m_2; vector unsigned short quant_d_2; vector unsigned short sum_short; vector signed short acLevel; vector unsigned int even; vector unsigned int odd; vector bool short m2_mask; vector bool short zero_mask; uint32_t result;#ifdef DEBUG if(((unsigned)coeff) & 0x15) fprintf(stderr, "quant_h263_inter_altivec_c:incorrect align, coeff: %lx/n", (long)coeff);#endif /* initialisation stuff */ zerovec = vec_splat_u8(0); *((unsigned short*)&mult) = (unsigned short)multipliers[quant]; mult = vec_splat(mult, 0); *((unsigned short*)&quant_m_2) = (unsigned short)quant; quant_m_2 = vec_splat(quant_m_2, 0); quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1)); *((unsigned short*)&quant_d_2) = (unsigned short)quant; quant_d_2 = vec_splat(quant_d_2, 0); quant_d_2 = vec_sr(quant_d_2, vec_splat_u16(1)); sum_short = (vector unsigned short)zerovec; /* Quantize */ QUANT_H263_INTER_ALTIVEC(); QUANT_H263_INTER_ALTIVEC(); QUANT_H263_INTER_ALTIVEC(); QUANT_H263_INTER_ALTIVEC(); QUANT_H263_INTER_ALTIVEC(); QUANT_H263_INTER_ALTIVEC(); QUANT_H263_INTER_ALTIVEC(); QUANT_H263_INTER_ALTIVEC(); /* Calculate the return value */ even = (vector unsigned int)vec_sum4s((vector signed short)sum_short, (vector signed int)zerovec); even = (vector unsigned int)vec_sums((vector signed int)even, (vector signed int)zerovec); even = vec_splat(even, 3); vec_ste(even, 0, &result); return result;}
开发者ID:roozbeh,项目名称:openCU,代码行数:56,
示例13: YV12_422_Altivecvoid YV12_422_Altivec( uint8_t *in, uint8_t *out, uint32_t w,uint32_t h){uint8_t *y,*y2,*u,*v,*out2;uint32_t dx,dy;vector unsigned char vecy,vecy2,vecu,vecv,MSQ,mask;vector unsigned char zero;#define VEC16 vector unsigned short#define VEC8 vector unsigned char#define VECS8 vector signed char out2=out+w*2; y=in; y2=in+w; u=in+w*h; v=in+((w*h*5)>>2); zero=vec_splat_u8(0); if( (long int)out & 15) { printf("Alignment issue in yv12 to 422 altivec!/n"); } for(dy=h>>1;dy>0;dy--) { // We do 4 pix in a raw for(dx=w>>3;dx>0;dx--) { LOAD_ALIGN(vecy,y); // expand LOAD_ALIGN(vecy2,y2); // expand LOAD_ALIGN(vecu,v); // expand LOAD_ALIGN(vecv,u); // expand vecu=(VEC8)vec_mergeh(vecu,vecv); vecy=(VEC8)vec_mergeh(vecy,vecu); vecy2=(VEC8)vec_mergeh(vecy2,vecu); // Store vec_st(vecy,0,out); vec_st(vecy2,0,out2); // next out2+=16; out+=16; y+=8; y2+=8; u+=4; v+=4; } out+=w*2; out2+=w*2; y+=w; y2+=w; }}
开发者ID:BackupTheBerlios,项目名称:avidemux-svn,代码行数:56,
示例14: foovoid foo (void) { vector bool int boolVec1 = (vector bool int) vec_splat_u32(3); vector bool short boolVec2 = (vector bool short) vec_splat_u16(3); vector bool char boolVec3 = (vector bool char) vec_splat_u8(3); boolVec1 = vec_sld( boolVec1, boolVec1, 4 ); boolVec2 = vec_sld( boolVec2, boolVec2, 2 ); boolVec3 = vec_sld( boolVec3, boolVec3, 1 );}
开发者ID:0day-ci,项目名称:gcc,代码行数:10,
示例15: sad16_x2_altivecstatic int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h){ int i; int s; const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); vector unsigned char perm1 = vec_lvsl(0, pix2); vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1)); vector unsigned char pix2l, pix2r; vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; vector unsigned int sad; vector signed int sumdiffs; s = 0; sad = (vector unsigned int)vec_splat_u32(0); for (i = 0; i < h; i++) { /* Read unaligned pixels into our vectors. The vectors are as follows: pix1v: pix1[0]-pix1[15] pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] */ pix1v = vec_ld( 0, pix1); pix2l = vec_ld( 0, pix2); pix2r = vec_ld(16, pix2); pix2v = vec_perm(pix2l, pix2r, perm1); pix2iv = vec_perm(pix2l, pix2r, perm2); /* Calculate the average vector */ avgv = vec_avg(pix2v, pix2iv); /* Calculate a sum of abs differences vector */ t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); /* Add each 4 pixel group together and put 4 results into sad */ sad = vec_sum4s(t5, sad); pix1 += line_size; pix2 += line_size; } /* Sum up the four partial sums, and put the result into s */ sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); sumdiffs = vec_splat(sumdiffs, 3); vec_ste(sumdiffs, 0, &s); return s;}
开发者ID:0xFFeng,项目名称:ffmpeg,代码行数:43,
示例16: abcd2cbad_internalstatic inline void abcd2cbad_internal( register const vector unsigned char p, unsigned char *data, unsigned int length, unsigned char *newdata ) { register vector unsigned char d0,d1,d2,z; z = vec_splat_u8(0); length = eround16(length); if( length >= 3 ) { length -= 3; d2 = vec_ld(32,data); d1 = vec_ld(16,data); d0 = vec_ld(0,data); while( length >= 3 ) { d0 = vec_perm(d0,z,p); d1 = vec_perm(d1,z,p); d2 = vec_perm(d2,z,p); vec_st(d0,0,newdata); vec_st(d1,16,newdata); vec_st(d2,32,newdata); length -= 3; data += 16*3; newdata += 16*3; d2 = vec_ld(32,data); d1 = vec_ld(16,data); d0 = vec_ld(0,data); } d0 = vec_perm(d0,z,p); d1 = vec_perm(d1,z,p); d2 = vec_perm(d2,z,p); vec_st(d0,0,newdata); vec_st(d1,16,newdata); vec_st(d2,32,newdata); } if( length == 2 ) { d0 = vec_ld(0,data); d1 = vec_ld(16,data); d0 = vec_perm(d0,z,p); d1 = vec_perm(d1,z,p); vec_st(d0,0,newdata); vec_st(d1,16,newdata); } else if( length == 1 ) { d0 = vec_ld(0,data); d0 = vec_perm(d0,d0,z); vec_st(d0,0,newdata); }}
开发者ID:bazhenovc,项目名称:nebula3,代码行数:54,
示例17: dequant_h263_inter_altivec_cuint32_tdequant_h263_inter_altivec_c(int16_t *data, int16_t *coeff, const uint32_t quant, const uint16_t *mpeg_quant_matrices){ vector signed short acLevel; vector signed short vec_2048; vector unsigned short quant_m_2; vector unsigned short quant_add; vector unsigned short t; register vector unsigned int even; register vector unsigned int odd; register vector unsigned int high; register vector unsigned int low; register vector unsigned char zerovec; vector bool short equal_zero; vector bool short less_zero; vector bool short overflow; #ifdef DEBUG /* print alignment errors if this is on */ if(((unsigned)data) & 0x15) fprintf(stderr, "dequant_h263_inter_altivec_c:incorrect align, data: %lx/n", (long)data);#endif /* initialize */ *((unsigned short*)&quant_m_2) = (unsigned short)(quant << 1); quant_m_2 = vec_splat(quant_m_2,0); *((unsigned short*)&quant_add) = (unsigned short)(quant & 1 ? quant : quant - 1); quant_add = vec_splat(quant_add,0); vec_2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11)); zerovec = vec_splat_u8(0); /* dequant */ DEQUANT_H263_INTER_ALTIVEC(); DEQUANT_H263_INTER_ALTIVEC(); DEQUANT_H263_INTER_ALTIVEC(); DEQUANT_H263_INTER_ALTIVEC(); DEQUANT_H263_INTER_ALTIVEC(); DEQUANT_H263_INTER_ALTIVEC(); DEQUANT_H263_INTER_ALTIVEC(); DEQUANT_H263_INTER_ALTIVEC(); return 0;}
开发者ID:roozbeh,项目名称:openCU,代码行数:53,
示例18: sad16bi_altivec_cuint32_tsad16bi_altivec_c(vector unsigned char *cur, vector unsigned char *ref1, vector unsigned char *ref2, uint32_t stride){ vector unsigned char t1, t2; vector unsigned char mask1, mask2; vector unsigned char sad; vector unsigned int sum; uint32_t result; #ifdef DEBUG /* print alignment errors if this is on */ if((long)cur & 0xf) fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lx/n", (long)cur); if(stride & 0xf) fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lu/n", stride);#endif /* Initialisation stuff */ stride >>= 4; mask1 = vec_lvsl(0, (unsigned char*)ref1); mask2 = vec_lvsl(0, (unsigned char*)ref2); sad = vec_splat_u8(0); sum = (vector unsigned int)sad; SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0)); sum = vec_splat(sum, 3); vec_ste(sum, 0, (uint32_t*)&result); return result;}
开发者ID:Distrotech,项目名称:xvidcore,代码行数:53,
示例19: quant_h263_intra_altivec_cuint32_tquant_h263_intra_altivec_c(int16_t *coeff, int16_t *data, const uint32_t quant, const uint32_t dcscalar, const uint16_t *mpeg_quant_matrices){ vector unsigned char zerovec; vector unsigned short mult; vector unsigned short quant_m_2; vector signed short acLevel; register vector unsigned int even; register vector unsigned int odd; vector bool short zero_mask; vector bool short m2_mask; register int16_t *origin_coeff = coeff; register int16_t *origin_data = data;#ifdef DEBUG if(((unsigned)coeff) & 15) fprintf(stderr, "quant_h263_intra_altivec_c:incorrect align, coeff: %lx/n", (long)coeff);#endif zerovec = vec_splat_u8(0); *((unsigned short*)&mult) = (unsigned short)multipliers[quant]; mult = vec_splat(mult, 0); *((unsigned short*)&quant_m_2) = (unsigned short)quant; quant_m_2 = vec_splat(quant_m_2, 0); quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1)); QUANT_H263_INTRA_ALTIVEC(); QUANT_H263_INTRA_ALTIVEC(); QUANT_H263_INTRA_ALTIVEC(); QUANT_H263_INTRA_ALTIVEC(); QUANT_H263_INTRA_ALTIVEC(); QUANT_H263_INTRA_ALTIVEC(); QUANT_H263_INTRA_ALTIVEC(); QUANT_H263_INTRA_ALTIVEC(); // noch erstes setzen origin_coeff[0] = DIV_DIV(origin_data[0], (int32_t)dcscalar); return 0;}
开发者ID:roozbeh,项目名称:openCU,代码行数:50,
示例20: processRGBA_Altivecvoid pix_diff :: processRGBA_Altivec(imageStruct &image, imageStruct &right){ int datasize = image.xsize * image.ysize / 4; vector signed short hiImage, loImage, hiRight, loRight; vector unsigned char zero = vec_splat_u8(0); vector unsigned char *inData = (vector unsigned char *)image.data; vector unsigned char *rightData = (vector unsigned char *)right.data; #ifndef PPC970 UInt32 prefetchSize = GetPrefetchConstant( 16, 1, 256 ); vec_dst( inData, prefetchSize, 0 ); vec_dst( rightData, prefetchSize, 1 ); vec_dst( inData+256, prefetchSize, 2 ); vec_dst( rightData+256, prefetchSize, 3 ); #endif do { #ifndef PPC970 vec_dst( inData, prefetchSize, 0 ); vec_dst( rightData, prefetchSize, 1 ); vec_dst( inData+256, prefetchSize, 2 ); vec_dst( rightData+256, prefetchSize, 3 ); #endif hiImage = (vector signed short)vec_mergeh(zero,inData[0]); loImage = (vector signed short)vec_mergel(zero,inData[0]); hiRight = (vector signed short)vec_mergeh(zero,rightData[0]); loRight = (vector signed short)vec_mergel(zero,rightData[0]); hiImage = vec_subs(hiImage,hiRight); loImage = vec_subs(loImage,loRight); hiImage = vec_abs(hiImage); loImage = vec_abs(loImage); inData[0] = vec_packsu(hiImage,loImage); inData++; rightData++; } while (--datasize); #ifndef PPC970 vec_dss( 0 ); vec_dss( 1 ); vec_dss( 2 ); vec_dss( 3 ); #endif}
开发者ID:avilleret,项目名称:Gem,代码行数:50,
示例21: put_vp8_epel_h_altivec_corestatic av_always_inlinevoid put_vp8_epel_h_altivec_core(uint8_t *dst, ptrdiff_t dst_stride, uint8_t *src, ptrdiff_t src_stride, int h, int mx, int w, int is6tap){ LOAD_H_SUBPEL_FILTER(mx-1); vec_u8 align_vec0, align_vec8, permh0, permh8, filt; vec_u8 perm_6tap0, perm_6tap8, perml0, perml8; vec_u8 a, b, pixh, pixl, outer; vec_s16 f16h, f16l; vec_s32 filth, filtl; vec_u8 perm_inner6 = { 1,2,3,4, 2,3,4,5, 3,4,5,6, 4,5,6,7 }; vec_u8 perm_inner4 = { 0,1,2,3, 1,2,3,4, 2,3,4,5, 3,4,5,6 }; vec_u8 perm_inner = is6tap ? perm_inner6 : perm_inner4; vec_u8 perm_outer = { 4,9, 0,5, 5,10, 1,6, 6,11, 2,7, 7,12, 3,8 }; vec_s32 c64 = vec_sl(vec_splat_s32(1), vec_splat_u32(6)); vec_u16 c7 = vec_splat_u16(7); align_vec0 = vec_lvsl( -is6tap-1, src); align_vec8 = vec_lvsl(8-is6tap-1, src); permh0 = vec_perm(align_vec0, align_vec0, perm_inner); permh8 = vec_perm(align_vec8, align_vec8, perm_inner); perm_inner = vec_add(perm_inner, vec_splat_u8(4)); perml0 = vec_perm(align_vec0, align_vec0, perm_inner); perml8 = vec_perm(align_vec8, align_vec8, perm_inner); perm_6tap0 = vec_perm(align_vec0, align_vec0, perm_outer); perm_6tap8 = vec_perm(align_vec8, align_vec8, perm_outer); while (h --> 0) { FILTER_H(f16h, 0); if (w == 16) { FILTER_H(f16l, 8); filt = vec_packsu(f16h, f16l); vec_st(filt, 0, dst); } else { filt = vec_packsu(f16h, f16h); vec_ste((vec_u32)filt, 0, (uint32_t*)dst); if (w == 8) vec_ste((vec_u32)filt, 4, (uint32_t*)dst); } src += src_stride; dst += dst_stride; }}
开发者ID:Arcen,项目名称:libav,代码行数:47,
示例22: pix_abs16x16_x2_altivecint pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size){ int i; int s __attribute__((aligned(16))); const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); vector unsigned char *tv; vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; vector unsigned int sad; vector signed int sumdiffs; s = 0; sad = (vector unsigned int)vec_splat_u32(0); for(i=0;i<16;i++) { /* Read unaligned pixels into our vectors. The vectors are as follows: pix1v: pix1[0]-pix1[15] pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] */ tv = (vector unsigned char *) pix1; pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); tv = (vector unsigned char *) &pix2[0]; pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); tv = (vector unsigned char *) &pix2[1]; pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1])); /* Calculate the average vector */ avgv = vec_avg(pix2v, pix2iv); /* Calculate a sum of abs differences vector */ t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); /* Add each 4 pixel group together and put 4 results into sad */ sad = vec_sum4s(t5, sad); pix1 += line_size; pix2 += line_size; } /* Sum up the four partial sums, and put the result into s */ sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); sumdiffs = vec_splat(sumdiffs, 3); vec_ste(sumdiffs, 0, &s); return s;}
开发者ID:KoetseJ,项目名称:xumo,代码行数:46,
示例23: idct_add_altivecvoid idct_add_altivec(uint8_t* dest, int stride, int16_t *blk){POWERPC_PERF_DECLARE(altivec_idct_add_num, 1); vec_s16 *block = (vec_s16*)blk; vec_u8 tmp; vec_s16 tmp2, tmp3; vec_u8 perm0; vec_u8 perm1; vec_u8 p0, p1, p;#if CONFIG_POWERPC_PERFPOWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);#endif IDCT p0 = vec_lvsl (0, dest); p1 = vec_lvsl (stride, dest); p = vec_splat_u8 (-1); perm0 = vec_mergeh (p, p0); perm1 = vec_mergeh (p, p1);#define ADD(dest,src,perm) / /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ / tmp = vec_ld (0, dest); / tmp2 = (vec_s16)vec_perm (tmp, (vec_u8)zero, perm); / tmp3 = vec_adds (tmp2, src); / tmp = vec_packsu (tmp3, tmp3); / vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); / vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest); ADD (dest, vx0, perm0) dest += stride; ADD (dest, vx1, perm1) dest += stride; ADD (dest, vx2, perm0) dest += stride; ADD (dest, vx3, perm1) dest += stride; ADD (dest, vx4, perm0) dest += stride; ADD (dest, vx5, perm1) dest += stride; ADD (dest, vx6, perm0) dest += stride; ADD (dest, vx7, perm1)POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);}
开发者ID:WangCrystal,项目名称:FFplayer,代码行数:42,
示例24: vp3_idct_add_altivecstatic void vp3_idct_add_altivec(uint8_t *dst, int stride, int16_t block[64]){ LOAD_ZERO; vec_u8 t, vdst; vec_s16 vdst_16; vec_u8 vdst_mask = vec_mergeh(vec_splat_u8(-1), vec_lvsl(0, dst)); IDCT_START IDCT_1D(NOP, NOP) TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7); IDCT_1D(ADD8, SHIFT4)#if HAVE_BIGENDIAN#define GET_VDST16/ vdst = vec_ld(0, dst);/ vdst_16 = (vec_s16)vec_perm(vdst, zero_u8v, vdst_mask);#else#define GET_VDST16/ vdst = vec_vsx_ld(0,dst);/ vdst_16 = (vec_s16)vec_mergeh(vdst, zero_u8v);#endif#define ADD(a)/ GET_VDST16;/ vdst_16 = vec_adds(a, vdst_16);/ t = vec_packsu(vdst_16, vdst_16);/ vec_ste((vec_u32)t, 0, (unsigned int *)dst);/ vec_ste((vec_u32)t, 4, (unsigned int *)dst); ADD(b0) dst += stride; ADD(b1) dst += stride; ADD(b2) dst += stride; ADD(b3) dst += stride; ADD(b4) dst += stride; ADD(b5) dst += stride; ADD(b6) dst += stride; ADD(b7) memset(block, 0, sizeof(*block) * 64);}
开发者ID:TaoheGit,项目名称:hmi_sdl_android,代码行数:40,
示例25: ff_gmc1_altivec/* AltiVec-enhanced gmc1. ATM this code assumes stride is a multiple of 8 * to preserve proper dst alignment. */void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder){ int i; const DECLARE_ALIGNED(16, unsigned short, rounder_a) = rounder; const DECLARE_ALIGNED(16, unsigned short, ABCD)[8] = { (16 - x16) * (16 - y16), /* A */ (x16) * (16 - y16), /* B */ (16 - x16) * (y16), /* C */ (x16) * (y16), /* D */ 0, 0, 0, 0 /* padding */ }; register const vector unsigned char vczero = (const vector unsigned char) vec_splat_u8(0); register const vector unsigned short vcsr8 = (const vector unsigned short) vec_splat_u16(8); register vector unsigned char dstv, dstv2, srcvB, srcvC, srcvD; register vector unsigned short tempB, tempC, tempD; unsigned long dst_odd = (unsigned long) dst & 0x0000000F; unsigned long src_really_odd = (unsigned long) src & 0x0000000F; register vector unsigned short tempA = vec_ld(0, (const unsigned short *) ABCD); register vector unsigned short Av = vec_splat(tempA, 0); register vector unsigned short Bv = vec_splat(tempA, 1); register vector unsigned short Cv = vec_splat(tempA, 2); register vector unsigned short Dv = vec_splat(tempA, 3); register vector unsigned short rounderV = vec_splat((vec_u16) vec_lde(0, &rounder_a), 0); /* we'll be able to pick-up our 9 char elements at src from those * 32 bytes we load the first batch here, as inside the loop we can * reuse 'src + stride' from one iteration as the 'src' of the next. */ register vector unsigned char src_0 = vec_ld(0, src); register vector unsigned char src_1 = vec_ld(16, src); register vector unsigned char srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src)); if (src_really_odd != 0x0000000F) /* If (src & 0xF) == 0xF, then (src + 1) is properly aligned * on the second vector. */ srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src)); else srcvB = src_1; srcvA = vec_mergeh(vczero, srcvA); srcvB = vec_mergeh(vczero, srcvB); for (i = 0; i < h; i++) { dst_odd = (unsigned long) dst & 0x0000000F; src_really_odd = (((unsigned long) src) + stride) & 0x0000000F; dstv = vec_ld(0, dst); /* We'll be able to pick-up our 9 char elements at src + stride from * those 32 bytes then reuse the resulting 2 vectors srvcC and srcvD * as the next srcvA and srcvB. */ src_0 = vec_ld(stride + 0, src); src_1 = vec_ld(stride + 16, src); srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src)); if (src_really_odd != 0x0000000F) /* If (src & 0xF) == 0xF, then (src + 1) is properly aligned * on the second vector. */ srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src)); else srcvD = src_1; srcvC = vec_mergeh(vczero, srcvC); srcvD = vec_mergeh(vczero, srcvD); /* OK, now we (finally) do the math :-) * Those four instructions replace 32 int muls & 32 int adds. * Isn't AltiVec nice? */ tempA = vec_mladd((vector unsigned short) srcvA, Av, rounderV); tempB = vec_mladd((vector unsigned short) srcvB, Bv, tempA); tempC = vec_mladd((vector unsigned short) srcvC, Cv, tempB); tempD = vec_mladd((vector unsigned short) srcvD, Dv, tempC); srcvA = srcvC; srcvB = srcvD; tempD = vec_sr(tempD, vcsr8); dstv2 = vec_pack(tempD, (vector unsigned short) vczero); if (dst_odd) dstv2 = vec_perm(dstv, dstv2, vcprm(0, 1, s0, s1)); else dstv2 = vec_perm(dstv, dstv2, vcprm(s0, s1, 2, 3)); vec_st(dstv2, 0, dst); dst += stride; src += stride; }}
开发者ID:venkatarajasekhar,项目名称:Qt,代码行数:97,
示例26: processYUVAltivecvoid pix_background :: processYUVAltivec(imageStruct &image){register int h,w,i,j,width;int pixsize = image.xsize * image.ysize * image.csize; h = image.ysize; w = image.xsize/8; width = image.xsize/8; //check to see if the buffer isn't 16byte aligned (highly unlikely) if (image.ysize*image.xsize % 16 != 0){ error("image not properly aligned for Altivec - try something SD or HD maybe?"); return; } union{ unsigned short s[8]; vector unsigned short v; }shortBuffer; if(m_savedImage.xsize!=image.xsize || m_savedImage.ysize!=image.ysize || m_savedImage.format!=image.format)m_reset=1; m_savedImage.xsize=image.xsize; m_savedImage.ysize=image.ysize; m_savedImage.setCsizeByFormat(image.format); m_savedImage.reallocate(); if (m_reset){ memcpy(m_savedImage.data,image.data,pixsize); m_reset = 0; } register vector unsigned short UVres1, Yres1, UVres2, Yres2;//interleave; register vector unsigned short hiImage, loImage; register vector unsigned short Yrange, UVrange, Yblank,UVblank,blank; register vector bool short Ymasklo,Ymaskhi, UVmaskhi; register vector unsigned short Yhi,Ylo,UVhi,UVlo; register vector unsigned char one = vec_splat_u8(1); register vector unsigned short sone = vec_splat_u16(1); register vector unsigned int Uhi, Ulo, Vhi, Vlo,Ures,Vres; register vector bool int Umasklo, Umaskhi, Vmaskhi, Vmasklo; vector unsigned char *inData = (vector unsigned char*) image.data; vector unsigned char *rightData = (vector unsigned char*) m_savedImage.data; shortBuffer.s[0] = m_Yrange; Yrange = shortBuffer.v; Yrange = vec_splat(Yrange,0); shortBuffer.s[0] = 128; shortBuffer.s[1] = 0; shortBuffer.s[2] = 128; shortBuffer.s[3] = 0; shortBuffer.s[4] = 128; shortBuffer.s[5] = 0; shortBuffer.s[6] = 128; shortBuffer.s[7] = 0; blank = shortBuffer.v; shortBuffer.s[0] = 0; Yblank = shortBuffer.v; Yblank = vec_splat(Yblank,0); shortBuffer.s[0] = 128; UVblank = shortBuffer.v; UVblank = vec_splat(UVblank,0); shortBuffer.s[0] = m_Urange; shortBuffer.s[1] = m_Vrange; shortBuffer.s[2] = m_Urange; shortBuffer.s[3] = m_Vrange; shortBuffer.s[4] = m_Urange; shortBuffer.s[5] = m_Vrange; shortBuffer.s[6] = m_Urange; shortBuffer.s[7] = m_Vrange; UVrange = shortBuffer.v; //setup the cache prefetch -- A MUST!!! UInt32 prefetchSize = GetPrefetchConstant( 16, 1, 256 ); #ifndef PPC970 vec_dst( inData, prefetchSize, 0 ); vec_dst( rightData, prefetchSize, 1 ); vec_dst( inData+32, prefetchSize, 2 ); vec_dst( rightData+32, prefetchSize, 3 ); #endif //PPC970 for ( i=0; i<h; i++){ for (j=0; j<w; j++) { #ifndef PPC970 //this function is probably memory bound on most G4's -- what else is new? vec_dst( inData, prefetchSize, 0 ); vec_dst( rightData, prefetchSize, 1 ); vec_dst( inData+32, prefetchSize, 2 ); vec_dst( rightData+32, prefetchSize, 3 ); #endif //separate the U and V from Y UVres1 = (vector unsigned short)vec_mule(one,inData[0]);//.........这里部分代码省略.........
开发者ID:kmatheussen,项目名称:libpd,代码行数:101,
示例27: gmc1_altivecvoid gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder){POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) = {rounder, rounder, rounder, rounder, rounder, rounder, rounder, rounder}; const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) = { (16-x16)*(16-y16), /* A */ ( x16)*(16-y16), /* B */ (16-x16)*( y16), /* C */ ( x16)*( y16), /* D */ 0, 0, 0, 0 /* padding */ }; register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); register const_vector unsigned short vcsr8 = (const_vector unsigned short)vec_splat_u16(8); register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD; register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD; int i; unsigned long dst_odd = (unsigned long)dst & 0x0000000F; unsigned long src_really_odd = (unsigned long)src & 0x0000000F;POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); tempA = vec_ld(0, (unsigned short*)ABCD); Av = vec_splat(tempA, 0); Bv = vec_splat(tempA, 1); Cv = vec_splat(tempA, 2); Dv = vec_splat(tempA, 3); rounderV = vec_ld(0, (unsigned short*)rounder_a); // we'll be able to pick-up our 9 char elements // at src from those 32 bytes // we load the first batch here, as inside the loop // we can re-use 'src+stride' from one iteration // as the 'src' of the next. src_0 = vec_ld(0, src); src_1 = vec_ld(16, src); srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src)); if (src_really_odd != 0x0000000F) { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector. srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src)); } else { srcvB = src_1; } srcvA = vec_mergeh(vczero, srcvA); srcvB = vec_mergeh(vczero, srcvB); for(i=0; i<h; i++) { dst_odd = (unsigned long)dst & 0x0000000F; src_really_odd = (((unsigned long)src) + stride) & 0x0000000F; dstv = vec_ld(0, dst); // we we'll be able to pick-up our 9 char elements // at src + stride from those 32 bytes // then reuse the resulting 2 vectors srvcC and srcvD // as the next srcvA and srcvB src_0 = vec_ld(stride + 0, src); src_1 = vec_ld(stride + 16, src); srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src)); if (src_really_odd != 0x0000000F) { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector. srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src)); } else { srcvD = src_1; } srcvC = vec_mergeh(vczero, srcvC); srcvD = vec_mergeh(vczero, srcvD); // OK, now we (finally) do the math :-) // those four instructions replaces 32 int muls & 32 int adds. // isn't AltiVec nice ? tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV); tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA); tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB); tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC); srcvA = srcvC; srcvB = srcvD; tempD = vec_sr(tempD, vcsr8); dstv2 = vec_pack(tempD, (vector unsigned short)vczero); if (dst_odd) { dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1)); }//.........这里部分代码省略.........
开发者ID:BackupTheBerlios,项目名称:avidemux-svn,代码行数:101,
示例28: testuc_3vector unsigned chartestuc_3 (){ return vec_splat_u8 (15);}
开发者ID:MaxKellermann,项目名称:gcc,代码行数:5,
示例29: testuc_2vector unsigned chartestuc_2 (){ return vec_splat_u8 (-5);}
开发者ID:MaxKellermann,项目名称:gcc,代码行数:5,
注:本文中的vec_splat_u8函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ vec_st函数代码示例 C++ vec_splat_u32函数代码示例 |