您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vec_st函数代码示例

51自学网 2021-06-03 09:36:39
  C++
这篇教程C++ vec_st函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vec_st函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_st函数的具体用法?C++ vec_st怎么用?C++ vec_st使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_st函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: apply_window

static void apply_window(const float *buf, const float *win1,                         const float *win2, float *sum1, float *sum2, int len){    const vector float *win1a = (const vector float *) win1;    const vector float *win2a = (const vector float *) win2;    const vector float *bufa  = (const vector float *) buf;    vector float *sum1a = (vector float *) sum1;    vector float *sum2a = (vector float *) sum2;    vector float av_uninit(v0), av_uninit(v4);    vector float v1, v2, v3;    len = len >> 2;#define MULT(a, b)                         /    {                                      /        v1 = vec_ld(a, win1a);             /        v2 = vec_ld(b, win2a);             /        v3 = vec_ld(a, bufa);              /        v0 = vec_madd(v3, v1, v0);         /        v4 = vec_madd(v2, v3, v4);         /    }    while (len--) {        v0 = vec_xor(v0, v0);        v4 = vec_xor(v4, v4);        MULT(   0,   0);        MULT( 256,  64);        MULT( 512, 128);        MULT( 768, 192);        MULT(1024, 256);        MULT(1280, 320);        MULT(1536, 384);        MULT(1792, 448);        vec_st(v0, 0, sum1a);        vec_st(v4, 0, sum2a);        sum1a++;        sum2a++;        win1a++;        win2a++;        bufa++;    }}
开发者ID:kierank,项目名称:libav-obe-dev2,代码行数:44,


示例2: h264_idct_dc_add_internal

static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *block, int stride, int size){    vec_s16 dc16;    vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;    LOAD_ZERO;    DECLARE_ALIGNED(16, int, dc);    int i;    dc = (block[0] + 32) >> 6;    dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1);    if (size == 4)        dc16 = vec_sld(dc16, zero_s16v, 8);    dcplus = vec_packsu(dc16, zero_s16v);    dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);    aligner = vec_lvsr(0, dst);    dcplus = vec_perm(dcplus, dcplus, aligner);    dcminus = vec_perm(dcminus, dcminus, aligner);    for (i = 0; i < size; i += 4) {        v0 = vec_ld(0, dst+0*stride);        v1 = vec_ld(0, dst+1*stride);        v2 = vec_ld(0, dst+2*stride);        v3 = vec_ld(0, dst+3*stride);        v0 = vec_adds(v0, dcplus);        v1 = vec_adds(v1, dcplus);        v2 = vec_adds(v2, dcplus);        v3 = vec_adds(v3, dcplus);        v0 = vec_subs(v0, dcminus);        v1 = vec_subs(v1, dcminus);        v2 = vec_subs(v2, dcminus);        v3 = vec_subs(v3, dcminus);        vec_st(v0, 0, dst+0*stride);        vec_st(v1, 0, dst+1*stride);        vec_st(v2, 0, dst+2*stride);        vec_st(v3, 0, dst+3*stride);        dst += 4*stride;    }}
开发者ID:1Server,项目名称:OneServer-Android,代码行数:44,


示例3: clear_block_altivec

static void clear_block_altivec(int16_t *block){    LOAD_ZERO;    vec_st(zero_s16v,   0, block);    vec_st(zero_s16v,  16, block);    vec_st(zero_s16v,  32, block);    vec_st(zero_s16v,  48, block);    vec_st(zero_s16v,  64, block);    vec_st(zero_s16v,  80, block);    vec_st(zero_s16v,  96, block);    vec_st(zero_s16v, 112, block);}
开发者ID:dawangjiaowolaixunshan,项目名称:FFmpeg,代码行数:12,


示例4: avg_pixels16_l2_altivec

static inline void avg_pixels16_l2_altivec( uint8_t *dst, const uint8_t *src1,        const uint8_t *src2, int dst_stride,        int src_stride1, int h){    int i;    vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;    mask_ = vec_lvsl(0, src2);    for (i = 0; i < h; i++)    {        tmp1 = vec_ld(i * src_stride1, src1);        mask = vec_lvsl(i * src_stride1, src1);        tmp2 = vec_ld(i * src_stride1 + 15, src1);        a = vec_perm(tmp1, tmp2, mask);        tmp1 = vec_ld(i * 16, src2);        tmp2 = vec_ld(i * 16 + 15, src2);        b = vec_perm(tmp1, tmp2, mask_);        tmp1 = vec_ld(0, dst);        mask = vec_lvsl(0, dst);        tmp2 = vec_ld(15, dst);        d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));        edges = vec_perm(tmp2, tmp1, mask);        align = vec_lvsr(0, dst);        tmp2 = vec_perm(d, edges, align);        tmp1 = vec_perm(edges, d, align);        vec_st(tmp2, 15, dst);        vec_st(tmp1, 0 , dst);        dst += dst_stride;    }}
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:42,


示例5: predict_16x16_h_altivec

static void predict_16x16_h_altivec( uint8_t *src ){    for( int i = 0; i < 16; i++ )    {        vec_u8_t v = vec_ld(-1, src);        vec_u8_t v_v = vec_splat(v, 15);        vec_st(v_v, 0, src);        src += FDEC_STRIDE;    }}
开发者ID:0x0B501E7E,项目名称:x264,代码行数:11,


示例6: jsimd_h2v1_downsample_altivec

voidjsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,                               JDIMENSION v_samp_factor,                               JDIMENSION width_blocks,                               JSAMPARRAY input_data, JSAMPARRAY output_data){  int outrow, outcol;  JDIMENSION output_cols = width_blocks * DCTSIZE;  JSAMPROW inptr, outptr;  __vector unsigned char this0, next0, out;  __vector unsigned short this0e, this0o, next0e, next0o, outl, outh;  /* Constants */  __vector unsigned short pw_bias = { __4X2(0, 1) },    pw_one = { __8X(1) };  __vector unsigned char even_odd_index =    {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15},    pb_zero = { __16X(0) };  expand_right_edge(input_data, max_v_samp_factor, image_width,                    output_cols * 2);  for (outrow = 0; outrow < v_samp_factor; outrow++) {    outptr = output_data[outrow];    inptr = input_data[outrow];    for (outcol = output_cols; outcol > 0;         outcol -= 16, inptr += 32, outptr += 16) {      this0 = vec_ld(0, inptr);      this0 = vec_perm(this0, this0, even_odd_index);      this0e = (__vector unsigned short)VEC_UNPACKHU(this0);      this0o = (__vector unsigned short)VEC_UNPACKLU(this0);      outl = vec_add(this0e, this0o);      outl = vec_add(outl, pw_bias);      outl = vec_sr(outl, pw_one);      if (outcol > 8) {        next0 = vec_ld(16, inptr);        next0 = vec_perm(next0, next0, even_odd_index);        next0e = (__vector unsigned short)VEC_UNPACKHU(next0);        next0o = (__vector unsigned short)VEC_UNPACKLU(next0);        outh = vec_add(next0e, next0o);        outh = vec_add(outh, pw_bias);        outh = vec_sr(outh, pw_one);      } else        outh = vec_splat_u16(0);      out = vec_pack(outl, outh);      vec_st(out, 0, outptr);    }  }}
开发者ID:AntonioMA,项目名称:UVCCamera,代码行数:54,


示例7: x264_sub4x4_dct_altivec

void x264_sub4x4_dct_altivec( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 ){    PREP_DIFF_8BYTEALIGNED;    vec_s16_t dct0v, dct1v, dct2v, dct3v;    vec_s16_t tmp0v, tmp1v, tmp2v, tmp3v;    vec_u8_t permHighv;    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 4, dct0v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 4, dct1v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 4, dct2v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 4, dct3v );    VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );    VEC_TRANSPOSE_4( tmp0v, tmp1v, tmp2v, tmp3v,                     dct0v, dct1v, dct2v, dct3v );    permHighv = (vec_u8_t) CV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17);    VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );    vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0,  dct);    vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, dct);}
开发者ID:0x0B501E7E,项目名称:x264,代码行数:21,


示例8: ff_put_pixels16_altivec

/* next one assumes that ((line_size % 16) == 0) */void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){    register vector unsigned char pixelsv1, pixelsv2;    register vector unsigned char pixelsv1B, pixelsv2B;    register vector unsigned char pixelsv1C, pixelsv2C;    register vector unsigned char pixelsv1D, pixelsv2D;    register vector unsigned char perm = vec_lvsl(0, pixels);    int i;    register ptrdiff_t line_size_2 = line_size << 1;    register ptrdiff_t line_size_3 = line_size + line_size_2;    register ptrdiff_t line_size_4 = line_size << 2;// hand-unrolling the loop by 4 gains about 15%// mininum execution time goes from 74 to 60 cycles// it's faster than -funroll-loops, but using// -funroll-loops w/ this is bad - 74 cycles again.// all this is on a 7450, tuning for the 7450    for (i = 0; i < h; i += 4) {        pixelsv1  = vec_ld( 0, pixels);        pixelsv2  = vec_ld(15, pixels);        pixelsv1B = vec_ld(line_size, pixels);        pixelsv2B = vec_ld(15 + line_size, pixels);        pixelsv1C = vec_ld(line_size_2, pixels);        pixelsv2C = vec_ld(15 + line_size_2, pixels);        pixelsv1D = vec_ld(line_size_3, pixels);        pixelsv2D = vec_ld(15 + line_size_3, pixels);        vec_st(vec_perm(pixelsv1, pixelsv2, perm),               0, (unsigned char*)block);        vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),               line_size, (unsigned char*)block);        vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),               line_size_2, (unsigned char*)block);        vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),               line_size_3, (unsigned char*)block);        pixels+=line_size_4;        block +=line_size_4;    }}
开发者ID:AVLeo,项目名称:libav,代码行数:40,


示例9: assign_add_mul_r_32

void assign_add_mul_r_32(spinor32 * const R, spinor32 * const S, const float c, const int N) {#ifdef TM_USE_OMP#pragma omp parallel  {#endif  vector4double x0, x1, x2, x3, x4, x5, y0, y1, y2, y3, y4, y5;  vector4double z0, z1, z2, z3, z4, z5, k;  float *s, *r;  float ALIGN32 _c;  _c = c;  __prefetch_by_load(S);  __prefetch_by_load(R);  k = vec_splats((double)_c);  __alignx(16, s);  __alignx(16, r);  __alignx(16, S);  __alignx(16, R);#ifdef TM_USE_OMP#pragma omp for#else#pragma unroll(2)#endif  for(int i = 0; i < N; i++) {    s=(float*)((spinor32 *) S + i);    r=(float*)((spinor32 *) R + i);    __prefetch_by_load(S + i + 1);    __prefetch_by_stream(1, R + i + 1);    x0 = vec_ld(0, r);    x1 = vec_ld(0, r+4);    x2 = vec_ld(0, r+8);    x3 = vec_ld(0, r+12);    x4 = vec_ld(0, r+16);    x5 = vec_ld(0, r+20);    y0 = vec_ld(0, s);    y1 = vec_ld(0, s+4);    y2 = vec_ld(0, s+8);    y3 = vec_ld(0, s+12);    y4 = vec_ld(0, s+16);    y5 = vec_ld(0, s+20);    z0 = vec_madd(k, y0, x0);    z1 = vec_madd(k, y1, x1);    z2 = vec_madd(k, y2, x2);    z3 = vec_madd(k, y3, x3);    z4 = vec_madd(k, y4, x4);    z5 = vec_madd(k, y5, x5);    vec_st(z0, 0, r);    vec_st(z1, 0, r+4);    vec_st(z2, 0, r+8);    vec_st(z3, 0, r+12);    vec_st(z4, 0, r+16);    vec_st(z5, 0, r+20);  }#ifdef TM_USE_OMP  } /* OpenMP closing brace */#endif  return;}
开发者ID:Finkenrath,项目名称:tmLQCD,代码行数:59,


示例10: sub_int16_altivec

static void sub_int16_altivec(int16_t * v1, int16_t * v2, int order){    int i;    register vec_s16_t vec, *pv;    for(i = 0; i < order; i += 8){        pv = (vec_s16_t*)v2;        vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));        vec_st(vec_sub(vec_ld(0, v1), vec), 0, v1);        v1 += 8;        v2 += 8;    }}
开发者ID:Haaaaaank,项目名称:avbin,代码行数:13,


示例11: jsimd_convsamp_altivec

voidjsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col,                        DCTELEM * workspace){  JSAMPROW elemptr;  __vector unsigned char in0, in1, in2, in3, in4, in5, in6, in7;  __vector short out0, out1, out2, out3, out4, out5, out6, out7;  /* Constants */  __vector short pw_centerjsamp = { __8X(CENTERJSAMPLE) };  __vector unsigned char pb_zero = { __16X(0) };  LOAD_ROW(0);  LOAD_ROW(1);  LOAD_ROW(2);  LOAD_ROW(3);  LOAD_ROW(4);  LOAD_ROW(5);  LOAD_ROW(6);  LOAD_ROW(7);  out0 = (__vector short)VEC_UNPACKHU(in0);  out1 = (__vector short)VEC_UNPACKHU(in1);  out2 = (__vector short)VEC_UNPACKHU(in2);  out3 = (__vector short)VEC_UNPACKHU(in3);  out4 = (__vector short)VEC_UNPACKHU(in4);  out5 = (__vector short)VEC_UNPACKHU(in5);  out6 = (__vector short)VEC_UNPACKHU(in6);  out7 = (__vector short)VEC_UNPACKHU(in7);  out0 = vec_sub(out0, pw_centerjsamp);  out1 = vec_sub(out1, pw_centerjsamp);  out2 = vec_sub(out2, pw_centerjsamp);  out3 = vec_sub(out3, pw_centerjsamp);  out4 = vec_sub(out4, pw_centerjsamp);  out5 = vec_sub(out5, pw_centerjsamp);  out6 = vec_sub(out6, pw_centerjsamp);  out7 = vec_sub(out7, pw_centerjsamp);  vec_st(out0, 0, workspace);  vec_st(out1, 16, workspace);  vec_st(out2, 32, workspace);  vec_st(out3, 48, workspace);  vec_st(out4, 64, workspace);  vec_st(out5, 80, workspace);  vec_st(out6, 96, workspace);  vec_st(out7, 112, workspace);}
开发者ID:PyYoshi,项目名称:libjpeg-turbo,代码行数:49,


示例12: put_vp8_pixels16_altivec

static void put_vp8_pixels16_altivec(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my){    register vector unsigned char pixelsv1, pixelsv2;    register vector unsigned char pixelsv1B, pixelsv2B;    register vector unsigned char pixelsv1C, pixelsv2C;    register vector unsigned char pixelsv1D, pixelsv2D;    register vector unsigned char perm = vec_lvsl(0, src);    int i;    register ptrdiff_t dstride2 = dstride << 1, sstride2 = sstride << 1;    register ptrdiff_t dstride3 = dstride2 + dstride, sstride3 = sstride + sstride2;    register ptrdiff_t dstride4 = dstride << 2, sstride4 = sstride << 2;// hand-unrolling the loop by 4 gains about 15%// mininum execution time goes from 74 to 60 cycles// it's faster than -funroll-loops, but using// -funroll-loops w/ this is bad - 74 cycles again.// all this is on a 7450, tuning for the 7450    for (i = 0; i < h; i += 4) {        pixelsv1  = vec_ld( 0, src);        pixelsv2  = vec_ld(15, src);        pixelsv1B = vec_ld(sstride, src);        pixelsv2B = vec_ld(15 + sstride, src);        pixelsv1C = vec_ld(sstride2, src);        pixelsv2C = vec_ld(15 + sstride2, src);        pixelsv1D = vec_ld(sstride3, src);        pixelsv2D = vec_ld(15 + sstride3, src);        vec_st(vec_perm(pixelsv1, pixelsv2, perm),               0, (unsigned char*)dst);        vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),               dstride, (unsigned char*)dst);        vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),               dstride2, (unsigned char*)dst);        vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),               dstride3, (unsigned char*)dst);        src += sstride4;        dst += dstride4;    }}
开发者ID:elnormous,项目名称:libav,代码行数:39,


示例13: int32_to_float_fmul_scalar_altivec

static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len){    union {        vector float v;        float s[4];    } mul_u;    int i;    vector float src1, src2, dst1, dst2, mul_v, zero;    zero = (vector float)vec_splat_u32(0);    mul_u.s[0] = mul;    mul_v = vec_splat(mul_u.v, 0);    for(i=0; i<len; i+=8) {        src1 = vec_ctf(vec_ld(0,  src+i), 0);        src2 = vec_ctf(vec_ld(16, src+i), 0);        dst1 = vec_madd(src1, mul_v, zero);        dst2 = vec_madd(src2, mul_v, zero);        vec_st(dst1,  0, dst+i);        vec_st(dst2, 16, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:22,


示例14: predict_16x16_v_altivec

static void predict_16x16_v_altivec( uint8_t *src ){    vec_u32_u v;    v.s[0] = *(uint32_t*)&src[ 0-FDEC_STRIDE];    v.s[1] = *(uint32_t*)&src[ 4-FDEC_STRIDE];    v.s[2] = *(uint32_t*)&src[ 8-FDEC_STRIDE];    v.s[3] = *(uint32_t*)&src[12-FDEC_STRIDE];    for( int i = 0; i < 16; i++ )    {        vec_st(v.v, 0, (uint32_t*)src);        src += FDEC_STRIDE;    }}
开发者ID:xing2fan,项目名称:x264,代码行数:14,


示例15: audio_convert_s16_to_float_altivec

static void audio_convert_s16_to_float_altivec(float *out, const int16_t *in, size_t samples){   // Unaligned loads/store is a bit expensive, so we optimize for the good path (very likely).   if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)   {      size_t i;      for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)      {         vector signed short input = vec_ld(0, in);         vector signed int hi = vec_unpackh(input);         vector signed int lo = vec_unpackl(input);         vector float out_hi = vec_ctf(hi, 15);         vector float out_lo = vec_ctf(lo, 15);         vec_st(out_hi, 0, out);         vec_st(out_lo, 16, out);      }      audio_convert_s16_to_float_C(out, in, samples - i);   }   else      audio_convert_s16_to_float_C(out, in, samples);}
开发者ID:twinaphex,项目名称:cellframework2,代码行数:23,


示例16: predict_16x16_p_altivec

static void predict_16x16_p_altivec( uint8_t *src ){    int16_t a, b, c, i;    int H = 0;    int V = 0;    int16_t i00;    for( i = 1; i <= 8; i++ )    {        H += i * ( src[7+i - FDEC_STRIDE ]  - src[7-i - FDEC_STRIDE ] );        V += i * ( src[(7+i)*FDEC_STRIDE -1] - src[(7-i)*FDEC_STRIDE -1] );    }    a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );    b = ( 5 * H + 32 ) >> 6;    c = ( 5 * V + 32 ) >> 6;    i00 = a - b * 7 - c * 7 + 16;    vect_sshort_u i00_u, b_u, c_u;    i00_u.s[0] = i00;    b_u.s[0]   = b;    c_u.s[0]   = c;    vec_u16_t val5_v = vec_splat_u16(5);    vec_s16_t i00_v, b_v, c_v;    i00_v = vec_splat(i00_u.v, 0);    b_v = vec_splat(b_u.v, 0);    c_v = vec_splat(c_u.v, 0);    vec_s16_t induc_v  = (vec_s16_t) CV(0,  1,  2,  3,  4,  5,  6,  7);    vec_s16_t b8_v = vec_sl(b_v, vec_splat_u16(3));    vec_s32_t mule_b_v = vec_mule(induc_v, b_v);    vec_s32_t mulo_b_v = vec_mulo(induc_v, b_v);    vec_s16_t mul_b_induc0_v = vec_pack(vec_mergeh(mule_b_v, mulo_b_v), vec_mergel(mule_b_v, mulo_b_v));    vec_s16_t add_i0_b_0v = vec_adds(i00_v, mul_b_induc0_v);    vec_s16_t add_i0_b_8v = vec_adds(b8_v, add_i0_b_0v);    int y;    for( y = 0; y < 16; y++ )    {        vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);        vec_s16_t shift_8_v = vec_sra(add_i0_b_8v, val5_v);        vec_u8_t com_sat_v = vec_packsu(shift_0_v, shift_8_v);        vec_st( com_sat_v, 0, &src[0]);        src += FDEC_STRIDE;        i00 += c;        add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);        add_i0_b_8v = vec_adds(add_i0_b_8v, c_v);    }}
开发者ID:UIKit0,项目名称:H.264-in-CUDA,代码行数:50,


示例17: vector_fmul_add_altivec

static void vector_fmul_add_altivec(float *dst, const float *src0,                                    const float *src1, const float *src2,                                    int len){    int i;    vector float d, s0, s1, s2, t0, t1, edges;    vector unsigned char align = vec_lvsr(0,dst),                         mask = vec_lvsl(0, dst);    for (i=0; i<len-3; i+=4) {        t0 = vec_ld(0, dst+i);        t1 = vec_ld(15, dst+i);        s0 = vec_ld(0, src0+i);        s1 = vec_ld(0, src1+i);        s2 = vec_ld(0, src2+i);        edges = vec_perm(t1 ,t0, mask);        d = vec_madd(s0,s1,s2);        t1 = vec_perm(d, edges, align);        t0 = vec_perm(edges, d, align);        vec_st(t1, 15, dst+i);        vec_st(t0, 0, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:23,


示例18: float_to_int16_altivec

static void float_to_int16_altivec(int16_t *dst, const float *src, long len){    int i;    vector signed short d0, d1, d;    vector unsigned char align;    if(((long)dst)&15) //FIXME    for(i=0; i<len-7; i+=8) {        d0 = vec_ld(0, dst+i);        d = float_to_int16_one_altivec(src+i);        d1 = vec_ld(15, dst+i);        d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));        align = vec_lvsr(0, dst+i);        d0 = vec_perm(d1, d, align);        d1 = vec_perm(d, d1, align);        vec_st(d0, 0, dst+i);        vec_st(d1,15, dst+i);    }    else    for(i=0; i<len-7; i+=8) {        d = float_to_int16_one_altivec(src+i);        vec_st(d, 0, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:23,


示例19: vector_fmul_window_altivec

static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){    union {        vector float v;        float s[4];    } vadd;    vector float vadd_bias, zero, t0, t1, s0, s1, wi, wj;    const vector unsigned char reverse = vcprm(3,2,1,0);    int i,j;    dst += len;    win += len;    src0+= len;    vadd.s[0] = add_bias;    vadd_bias = vec_splat(vadd.v, 0);    zero = (vector float)vec_splat_u32(0);    for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {        s0 = vec_ld(i, src0);        s1 = vec_ld(j, src1);        wi = vec_ld(i, win);        wj = vec_ld(j, win);        s1 = vec_perm(s1, s1, reverse);        wj = vec_perm(wj, wj, reverse);        t0 = vec_madd(s0, wj, vadd_bias);        t0 = vec_nmsub(s1, wi, t0);        t1 = vec_madd(s0, wi, vadd_bias);        t1 = vec_madd(s1, wj, t1);        t1 = vec_perm(t1, t1, reverse);        vec_st(t0, i, dst);        vec_st(t1, j, dst);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:37,


示例20: expf

float expf(float x) {#elsefloat vexpf(float x) {#endif  vector float vexpa, va;//, vx, vn, va, vb,//               v0, vlog2, vln2;    register float exp, a;//, b, b2, b3, b4;//, b6, b8, b10, R0, R;  float __attribute__((aligned(16))) xa[4];  xa[0] = x;/*  // set up a few constants  vlog2 = vec_ld(0, &C_EXPF[0]);  v0    = (vector float) vec_splat_u32(0);  vln2  = vec_splat(vlog2, 1);  vlog2 = vec_splat(vlog2, 0);  // Load x into a vector float  vx = vec_ld(0, xa);  vx = vec_splat(vx, 0);  // Split x = n*log2e + b  vn = vec_madd(vx, vlog2e, v0);  vn = vec_floor(vn);*/        xa[0] = truncf(x*M_LOG2E);  va = vec_ld(0, xa);  vexpa = vec_expte(va);  a = xa[0] * M_LN2;  vec_st(vexpa, 0, xa);/*  b = x - a;  b2 = b*b;  b3 = b2*b;  b4 = b2*b2;  b6 = b4*b2;  b8 = b6*b2;  b10 = b8*b2;  R0 =       0.1666666666666666019037   *b2 - 0.00277777777770155933842  *b4           + 6.61375632143793436117e-05 *b6 - 1.65339022054652515390e-06 *b8           + 4.13813679705723846039e-08 *b10;  R = b - R0;  //exp = 1.0 + 2.0*b/(2.0 - R);  exp = (1680.0 + 840*b + 180*b2 + 20*b3 + b4)/(1680 - 840*b + 180*b2 - 20*b3 + b4);*/  exp = xa[0];  return exp;}
开发者ID:K1773R,项目名称:libfreevec,代码行数:49,


示例21: ff_vp3_idct_altivec

void ff_vp3_idct_altivec(DCTELEM block[64]){    IDCT_START    IDCT_1D(NOP, NOP)    TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);    IDCT_1D(ADD8, SHIFT4)    vec_st(b0, 0x00, block);    vec_st(b1, 0x10, block);    vec_st(b2, 0x20, block);    vec_st(b3, 0x30, block);    vec_st(b4, 0x40, block);    vec_st(b5, 0x50, block);    vec_st(b6, 0x60, block);    vec_st(b7, 0x70, block);}
开发者ID:9aa5,项目名称:FFmpeg,代码行数:17,


示例22: printVecFloats

/* *************************************************************************  *    NAME:  printVecFloats * * ************************************************************************* */void printVecFloats(char *label, vector float outFloats, int arraySize){  float printfloat[arraySize] __attribute__ ((aligned (16)));  int i;  vec_st(outFloats, 0, printfloat);  fprintf(stderr,"%s --> float:{",label);  for (i = 0; i < arraySize; i++)  {     fprintf(stderr,"%f ",printfloat[i]);  }  fprintf(stderr,"}/n/n");} /* printVecFloats */
开发者ID:yaojingguo,项目名称:gcc-intrinsics-samplecode,代码行数:19,


示例23: printVecShorts

/* *************************************************************************  *    NAME:  printVecShorts * * ************************************************************************* */void printVecShorts(char *label, vector signed short outShorts, int arraySize){  signed short printshort[arraySize] __attribute__ ((aligned (16)));  int i;  vec_st(outShorts, 0, printshort);  fprintf(stderr,"%s --> short:{",label);  for (i = 0; i < arraySize; i++)  {     fprintf(stderr,"%d ",printshort[i]);  }  fprintf(stderr,"}/n/n");} /* printVecShorts */
开发者ID:yaojingguo,项目名称:gcc-intrinsics-samplecode,代码行数:19,


示例24: vec_ldl

void CDSPToolsOSX::Add32(tfloat32* pDest, const tfloat32* pSrc){#ifdef _Mac_PowerPC	vector float vDest = vec_ldl(0, pDest);	vector float vSrc = vec_ldl(0, pSrc);	vDest = vec_add(vDest, vSrc);	vec_st(vDest, 0, pDest);	vDest = vec_ldl(4 * 4, pDest);	vSrc = vec_ldl(4 * 4, pSrc);	vDest = vec_add(vDest, vSrc);	vec_st(vDest, 4 * 4, pDest);	vDest = vec_ldl(8 * 4, pDest);	vSrc = vec_ldl(8 * 4, pSrc);	vDest = vec_add(vDest, vSrc);	vec_st(vDest, 8 * 4, pDest);	vDest = vec_ldl(12 * 4, pDest);	vSrc = vec_ldl(12 * 4, pSrc);	vDest = vec_add(vDest, vSrc);	vec_st(vDest, 12 * 4, pDest);	vDest = vec_ldl(16 * 4, pDest);	vSrc = vec_ldl(16 * 4, pSrc);	vDest = vec_add(vDest, vSrc);	vec_st(vDest, 16 * 4, pDest);	vDest = vec_ldl(20 * 4, pDest);	vSrc = vec_ldl(20 * 4, pSrc);	vDest = vec_add(vDest, vSrc);	vec_st(vDest, 20 * 4, pDest);	vDest = vec_ldl(24 * 4, pDest);	vSrc = vec_ldl(24 * 4, pSrc);	vDest = vec_add(vDest, vSrc);	vec_st(vDest, 24 * 4, pDest);	vDest = vec_ldl(28 * 4, pDest);	vSrc = vec_ldl(28 * 4, pSrc);	vDest = vec_add(vDest, vSrc);	vec_st(vDest, 28 * 4, pDest);#else	// _Mac_PowerPC	CDSPTools::Add32(pDest, pSrc);#endif	// _Mac_PowerPC}
开发者ID:eriser,项目名称:koblo_software-1,代码行数:46,


示例25: add_bytes_altivec

static void add_bytes_altivec(uint8_t *dst, uint8_t *src, intptr_t w){    register int i;    register vector unsigned char vdst, vsrc;    /* dst and src are 16 bytes-aligned (guaranteed). */    for (i = 0; i + 15 < w; i += 16) {        vdst = vec_ld(i, (unsigned char *) dst);        vsrc = vec_ld(i, (unsigned char *) src);        vdst = vec_add(vsrc, vdst);        vec_st(vdst, i, (unsigned char *) dst);    }    /* If w is not a multiple of 16. */    for (; i < w; i++)        dst[i] = src[i];}
开发者ID:0day-ci,项目名称:FFmpeg,代码行数:16,


示例26: put_vp8_epel_h_altivec_core

static av_always_inlinevoid put_vp8_epel_h_altivec_core(uint8_t *dst, ptrdiff_t dst_stride,                                 uint8_t *src, ptrdiff_t src_stride,                                 int h, int mx, int w, int is6tap){    LOAD_H_SUBPEL_FILTER(mx-1);    vec_u8 align_vec0, align_vec8, permh0, permh8, filt;    vec_u8 perm_6tap0, perm_6tap8, perml0, perml8;    vec_u8 a, b, pixh, pixl, outer;    vec_s16 f16h, f16l;    vec_s32 filth, filtl;    vec_u8 perm_inner6 = { 1,2,3,4, 2,3,4,5, 3,4,5,6, 4,5,6,7 };    vec_u8 perm_inner4 = { 0,1,2,3, 1,2,3,4, 2,3,4,5, 3,4,5,6 };    vec_u8 perm_inner  = is6tap ? perm_inner6 : perm_inner4;    vec_u8 perm_outer = { 4,9, 0,5, 5,10, 1,6, 6,11, 2,7, 7,12, 3,8 };    vec_s32 c64 = vec_sl(vec_splat_s32(1), vec_splat_u32(6));    vec_u16 c7  = vec_splat_u16(7);    align_vec0 = vec_lvsl( -is6tap-1, src);    align_vec8 = vec_lvsl(8-is6tap-1, src);    permh0     = vec_perm(align_vec0, align_vec0, perm_inner);    permh8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_inner = vec_add(perm_inner, vec_splat_u8(4));    perml0     = vec_perm(align_vec0, align_vec0, perm_inner);    perml8     = vec_perm(align_vec8, align_vec8, perm_inner);    perm_6tap0 = vec_perm(align_vec0, align_vec0, perm_outer);    perm_6tap8 = vec_perm(align_vec8, align_vec8, perm_outer);    while (h --> 0) {        FILTER_H(f16h, 0);        if (w == 16) {            FILTER_H(f16l, 8);            filt = vec_packsu(f16h, f16l);            vec_st(filt, 0, dst);        } else {            filt = vec_packsu(f16h, f16h);            vec_ste((vec_u32)filt, 0, (uint32_t*)dst);            if (w == 8)                vec_ste((vec_u32)filt, 4, (uint32_t*)dst);        }        src += src_stride;        dst += dst_stride;    }}
开发者ID:Arcen,项目名称:libav,代码行数:47,


示例27: x264_sub8x8_dct8_altivec

void x264_sub8x8_dct8_altivec( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 ){    vec_u16_t onev = vec_splat_u16(1);    vec_u16_t twov = vec_add( onev, onev );    PREP_DIFF_8BYTEALIGNED;    vec_s16_t dct0v, dct1v, dct2v, dct3v,              dct4v, dct5v, dct6v, dct7v;    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct0v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct1v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct2v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct3v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct4v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct5v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct6v );    VEC_DIFF_H_8BYTE_ALIGNED( pix1, FENC_STRIDE, pix2, FDEC_STRIDE, 8, dct7v );    DCT8_1D_ALTIVEC( dct0v, dct1v, dct2v, dct3v,                     dct4v, dct5v, dct6v, dct7v );    vec_s16_t dct_tr0v, dct_tr1v, dct_tr2v, dct_tr3v,        dct_tr4v, dct_tr5v, dct_tr6v, dct_tr7v;    VEC_TRANSPOSE_8(dct0v, dct1v, dct2v, dct3v,                    dct4v, dct5v, dct6v, dct7v,                    dct_tr0v, dct_tr1v, dct_tr2v, dct_tr3v,                    dct_tr4v, dct_tr5v, dct_tr6v, dct_tr7v );    DCT8_1D_ALTIVEC( dct_tr0v, dct_tr1v, dct_tr2v, dct_tr3v,                     dct_tr4v, dct_tr5v, dct_tr6v, dct_tr7v );    vec_st( dct_tr0v,  0,  dct );    vec_st( dct_tr1v, 16,  dct );    vec_st( dct_tr2v, 32,  dct );    vec_st( dct_tr3v, 48,  dct );    vec_st( dct_tr4v, 64,  dct );    vec_st( dct_tr5v, 80,  dct );    vec_st( dct_tr6v, 96,  dct );    vec_st( dct_tr7v, 112, dct );}
开发者ID:0x0B501E7E,项目名称:x264,代码行数:44,


示例28: b

voidb(){  z = vec_add (x, y);  /* Make sure the predicates accept correct argument types.  */  int1 = vec_all_in (f, g);  int1 = vec_all_ge (f, g);  int1 = vec_all_eq (c, d);  int1 = vec_all_ne (s, t);  int1 = vec_any_eq (i, j);  int1 = vec_any_ge (f, g);  int1 = vec_all_ngt (f, g);  int1 = vec_any_ge (c, d);  int1 = vec_any_ge (s, t);  int1 = vec_any_ge (i, j);  int1 = vec_any_ge (c, d);  int1 = vec_any_ge (s, t);  int1 = vec_any_ge (i, j);  vec_mtvscr (i);  vec_dssall ();  s = (vector signed short) vec_mfvscr ();  vec_dss (3);  vec_dst (pi, int1 + int2, 3);  vec_dstst (pi, int1 + int2, 3);  vec_dststt (pi, int1 + int2, 3);  vec_dstt (pi, int1 + int2, 3);  uc = (vector unsigned char) vec_lvsl (int1 + 69, (signed int *) pi);  uc = (vector unsigned char) vec_lvsr (int1 + 69, (signed int *) pi);  c = vec_lde (int1, (signed char *) pi);  s = vec_lde (int1, (signed short *) pi);  i = vec_lde (int1, (signed int *) pi);  i = vec_ldl (int1, pi);  i = vec_ld (int1, pi);  vec_st (i, int2, pi);  vec_ste (c, int2, (signed char *) pi);  vec_ste (s, int2, (signed short *) pi);  vec_ste (i, int2, (signed int *) pi);  vec_stl (i, int2, pi);}
开发者ID:Akheon23,项目名称:chromecast-mirrored-source.toolchain,代码行数:46,


示例29: ff_avg_pixels16_altivec

void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;    register vector unsigned char perm = vec_lvsl(0, pixels);    int i;    for (i = 0; i < h; i++) {        pixelsv1 = vec_ld( 0, pixels);        pixelsv2 = vec_ld(16,pixels);        blockv = vec_ld(0, block);        pixelsv = vec_perm(pixelsv1, pixelsv2, perm);        blockv = vec_avg(blockv,pixelsv);        vec_st(blockv, 0, (unsigned char*)block);        pixels+=line_size;        block +=line_size;    }}
开发者ID:AVLeo,项目名称:libav,代码行数:17,


示例30: foo

void foo( float scalar){    unsigned long width;    unsigned long x;    vector float vColor;    vector unsigned int selectMask;    vColor = vec_perm( vec_ld( 0, &scalar), vec_ld( 3, &scalar), vec_lvsl( 0, &scalar) );    float *destRow;    vector float store, load0;    for( ; x < width; x++)    {            load0 = vec_sel( vColor, load0, selectMask );            vec_st( store, 0, destRow );            store = load0;    }}
开发者ID:Alexpux,项目名称:GCC,代码行数:18,



注:本文中的vec_st函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vec_ste函数代码示例
C++ vec_splat_u8函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。