您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vec_ld函数代码示例

51自学网 2021-06-03 09:36:02
  C++
这篇教程C++ vec_ld函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vec_ld函数的典型用法代码示例。如果您正苦于以下问题:C++ vec_ld函数的具体用法?C++ vec_ld怎么用?C++ vec_ld使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vec_ld函数的28个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: h264_idct_dc_add_internal

static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size){    vec_s16 dc16;    vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;    LOAD_ZERO;    DECLARE_ALIGNED(16, int, dc);    int i;    dc = (block[0] + 32) >> 6;    block[0] = 0;    dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1);    if (size == 4)        dc16 = vec_sld(dc16, zero_s16v, 8);    dcplus = vec_packsu(dc16, zero_s16v);    dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);    aligner = vec_lvsr(0, dst);    dcplus = vec_perm(dcplus, dcplus, aligner);    dcminus = vec_perm(dcminus, dcminus, aligner);    for (i = 0; i < size; i += 4) {        v0 = vec_ld(0, dst+0*stride);        v1 = vec_ld(0, dst+1*stride);        v2 = vec_ld(0, dst+2*stride);        v3 = vec_ld(0, dst+3*stride);        v0 = vec_adds(v0, dcplus);        v1 = vec_adds(v1, dcplus);        v2 = vec_adds(v2, dcplus);        v3 = vec_adds(v3, dcplus);        v0 = vec_subs(v0, dcminus);        v1 = vec_subs(v1, dcminus);        v2 = vec_subs(v2, dcminus);        v3 = vec_subs(v3, dcminus);        vec_st(v0, 0, dst+0*stride);        vec_st(v1, 0, dst+1*stride);        vec_st(v2, 0, dst+2*stride);        vec_st(v3, 0, dst+3*stride);        dst += 4*stride;    }}
开发者ID:AVbin,项目名称:libav,代码行数:45,


示例2: foo

void foo( float scalar){    unsigned long width;    unsigned long x;    vector float vColor;    vector unsigned int selectMask;    vColor = vec_perm( vec_ld( 0, &scalar), vec_ld( 3, &scalar), vec_lvsl( 0, &scalar) );    float *destRow;    vector float store, load0;    for( ; x < width; x++)    {            load0 = vec_sel( vColor, load0, selectMask );            vec_st( store, 0, destRow );            store = load0;    }}
开发者ID:Alexpux,项目名称:GCC,代码行数:18,


示例3: sad16_x2_altivec

static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h){    int i;    int s;    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);    vector unsigned char perm1 = vec_lvsl(0, pix2);    vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1));    vector unsigned char pix2l, pix2r;    vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;    vector unsigned int sad;    vector signed int sumdiffs;    s = 0;    sad = (vector unsigned int)vec_splat_u32(0);    for (i = 0; i < h; i++) {        /* Read unaligned pixels into our vectors. The vectors are as follows:           pix1v: pix1[0]-pix1[15]           pix2v: pix2[0]-pix2[15]      pix2iv: pix2[1]-pix2[16] */        pix1v  = vec_ld( 0, pix1);        pix2l  = vec_ld( 0, pix2);        pix2r  = vec_ld(16, pix2);        pix2v  = vec_perm(pix2l, pix2r, perm1);        pix2iv = vec_perm(pix2l, pix2r, perm2);        /* Calculate the average vector */        avgv = vec_avg(pix2v, pix2iv);        /* Calculate a sum of abs differences vector */        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);        pix1 += line_size;        pix2 += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}
开发者ID:0xFFeng,项目名称:ffmpeg,代码行数:43,


示例4: predict_16x16_h_altivec

static void predict_16x16_h_altivec( uint8_t *src ){    for( int i = 0; i < 16; i++ )    {        vec_u8_t v = vec_ld(-1, src);        vec_u8_t v_v = vec_splat(v, 15);        vec_st(v_v, 0, src);        src += FDEC_STRIDE;    }}
开发者ID:0x0B501E7E,项目名称:x264,代码行数:11,


示例5: yuv2planeX_16_altivec

static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize,                                  const int16_t **src, uint8_t *dest,                                  const uint8_t *dither, int offset, int x){    register int i, j;    LOCAL_ALIGNED(16, int, val, [16]);    vector signed int vo1, vo2, vo3, vo4;    vector unsigned short vs1, vs2;    vector unsigned char vf;    vector unsigned int altivec_vectorShiftInt19 =        vec_add(vec_splat_u32(10), vec_splat_u32(9));    for (i = 0; i < 16; i++)        val[i] = dither[(x + i + offset) & 7] << 12;    vo1 = vec_ld(0,  val);    vo2 = vec_ld(16, val);    vo3 = vec_ld(32, val);    vo4 = vec_ld(48, val);    for (j = 0; j < filterSize; j++) {        unsigned int joffset=j<<1;        unsigned int xoffset=x<<1;        vector unsigned char perm;        vector signed short l1,vLumFilter;        LOAD_FILTER(vLumFilter,filter);        vLumFilter = vec_splat(vLumFilter, 0);        LOAD_L1(l1,src[j],perm);        yuv2planeX_8(vo1, vo2, l1, src[j], x,     perm, vLumFilter);        yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);    }    vo1 = vec_sra(vo1, altivec_vectorShiftInt19);    vo2 = vec_sra(vo2, altivec_vectorShiftInt19);    vo3 = vec_sra(vo3, altivec_vectorShiftInt19);    vo4 = vec_sra(vo4, altivec_vectorShiftInt19);    vs1 = vec_packsu(vo1, vo2);    vs2 = vec_packsu(vo3, vo4);    vf  = vec_packsu(vs1, vs2);    VEC_ST(vf, 0, dest);}
开发者ID:0day-ci,项目名称:FFmpeg,代码行数:41,


示例6: audio_convert_float_to_s16_altivec

void audio_convert_float_to_s16_altivec(int16_t *out,      const float *in, size_t samples){   // Unaligned loads/store is a bit expensive, so we optimize for the good path (very likely).   if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)   {      size_t i;      for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)      {         vector float input0 = vec_ld( 0, in);         vector float input1 = vec_ld(16, in);         vector signed int result0 = vec_cts(input0, 15);         vector signed int result1 = vec_cts(input1, 15);         vec_st(vec_packs(result0, result1), 0, out);      }      audio_convert_float_to_s16_C(out, in, samples - i);   }   else      audio_convert_float_to_s16_C(out, in, samples);}
开发者ID:Jalle19,项目名称:RetroArch,代码行数:21,


示例7: scalarproduct_and_madd_int16_altivec

static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul){    LOAD_ZERO;    vec_s16 *pv1 = (vec_s16*)v1;    register vec_s16 muls = {mul,mul,mul,mul,mul,mul,mul,mul};    register vec_s16 t0, t1, i0, i1, i4;    register vec_s16 i2 = vec_ld(0, v2), i3 = vec_ld(0, v3);    register vec_s32 res = zero_s32v;    register vec_u8 align = vec_lvsl(0, v2);    int32_t ires;    order >>= 4;    do {        i1 = vec_ld(16, v2);        t0 = vec_perm(i2, i1, align);        i2 = vec_ld(32, v2);        t1 = vec_perm(i1, i2, align);        i0 = pv1[0];        i1 = pv1[1];        res = vec_msum(t0, i0, res);        res = vec_msum(t1, i1, res);        i4 = vec_ld(16, v3);        t0 = vec_perm(i3, i4, align);        i3 = vec_ld(32, v3);        t1 = vec_perm(i4, i3, align);        pv1[0] = vec_mladd(t0, muls, i0);        pv1[1] = vec_mladd(t1, muls, i1);        pv1 += 2;        v2  += 8;        v3  += 8;    } while(--order);    res = vec_splat(vec_sums(res, zero_s32v), 3);    vec_ste(res, 0, &ires);    return ires;}
开发者ID:0x0B501E7E,项目名称:ffmpeg,代码行数:34,


示例8: float_to_int16_altivec

void float_to_int16_altivec(int16_t *dst, const float *src, int len){    int i;    vector float s0, s1;    vector signed int t0, t1;    vector signed short d0, d1, d;    vector unsigned char align;    if(((long)dst)&15) //FIXME    for(i=0; i<len-7; i+=8) {        s0 = vec_ld(0, src+i);        s1 = vec_ld(16, src+i);        t0 = vec_cts(s0, 0);        d0 = vec_ld(0, dst+i);        t1 = vec_cts(s1, 0);        d1 = vec_ld(15, dst+i);        d = vec_packs(t0,t1);        d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));        align = vec_lvsr(0, dst+i);        d0 = vec_perm(d1, d, align);        d1 = vec_perm(d, d1, align);        vec_st(d0, 0, dst+i);        vec_st(d1,15, dst+i);    }    else    for(i=0; i<len-7; i+=8) {        s0 = vec_ld(0, src+i);        s1 = vec_ld(16, src+i);        t0 = vec_cts(s0, 0);        t1 = vec_cts(s1, 0);        d = vec_packs(t0,t1);        vec_st(d, 0, dst+i);    }}
开发者ID:BOTCrusher,项目名称:sagetv,代码行数:33,


示例9: h264_idct_add_altivec

static void h264_idct_add_altivec(uint8_t *dst, int16_t *block, int stride){    vec_s16 va0, va1, va2, va3;    vec_s16 vz0, vz1, vz2, vz3;    vec_s16 vtmp0, vtmp1, vtmp2, vtmp3;    vec_u8 va_u8;    vec_u32 va_u32;    vec_s16 vdst_ss;    const vec_u16 v6us = vec_splat_u16(6);    vec_u8 vdst, vdst_orig;    vec_u8 vdst_mask = vec_lvsl(0, dst);    int element = ((unsigned long)dst & 0xf) >> 2;    LOAD_ZERO;    block[0] += 32;  /* add 32 as a DC-level for rounding */    vtmp0 = vec_ld(0,block);    vtmp1 = vec_sld(vtmp0, vtmp0, 8);    vtmp2 = vec_ld(16,block);    vtmp3 = vec_sld(vtmp2, vtmp2, 8);    memset(block, 0, 16 * sizeof(int16_t));    VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);    VEC_TRANSPOSE_4(va0,va1,va2,va3,vtmp0,vtmp1,vtmp2,vtmp3);    VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);    va0 = vec_sra(va0,v6us);    va1 = vec_sra(va1,v6us);    va2 = vec_sra(va2,v6us);    va3 = vec_sra(va3,v6us);    VEC_LOAD_U8_ADD_S16_STORE_U8(va0);    dst += stride;    VEC_LOAD_U8_ADD_S16_STORE_U8(va1);    dst += stride;    VEC_LOAD_U8_ADD_S16_STORE_U8(va2);    dst += stride;    VEC_LOAD_U8_ADD_S16_STORE_U8(va3);}
开发者ID:DDTChen,项目名称:CookieVLC,代码行数:39,


示例10: int32_to_float_fmul_scalar_altivec

static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len){    union {        vector float v;        float s[4];    } mul_u;    int i;    vector float src1, src2, dst1, dst2, mul_v, zero;    zero = (vector float)vec_splat_u32(0);    mul_u.s[0] = mul;    mul_v = vec_splat(mul_u.v, 0);    for(i=0; i<len; i+=8) {        src1 = vec_ctf(vec_ld(0,  src+i), 0);        src2 = vec_ctf(vec_ld(16, src+i), 0);        dst1 = vec_madd(src1, mul_v, zero);        dst2 = vec_madd(src2, mul_v, zero);        vec_st(dst1,  0, dst+i);        vec_st(dst2, 16, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:22,


示例11: sub_int16_altivec

static void sub_int16_altivec(int16_t * v1, int16_t * v2, int order){    int i;    register vec_s16_t vec, *pv;    for(i = 0; i < order; i += 8){        pv = (vec_s16_t*)v2;        vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));        vec_st(vec_sub(vec_ld(0, v1), vec), 0, v1);        v1 += 8;        v2 += 8;    }}
开发者ID:Haaaaaank,项目名称:avbin,代码行数:13,


示例12: vorbis_inverse_coupling_altivec

static void vorbis_inverse_coupling_altivec(float *mag, float *ang,                                            intptr_t blocksize){    int i;    vector float m, a;    vector bool int t0, t1;    const vector unsigned int v_31 = //XXX        vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1));    for (i = 0; i < blocksize; i += 4) {        m = vec_ld(0, mag+i);        a = vec_ld(0, ang+i);        t0 = vec_cmple(m, (vector float)vec_splat_u32(0));        t1 = vec_cmple(a, (vector float)vec_splat_u32(0));        a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31));        t0 = (vector bool int)vec_and(a, t1);        t1 = (vector bool int)vec_andc(a, t1);        a = vec_sub(m, (vector float)t1);        m = vec_add(m, (vector float)t0);        vec_stl(a, 0, ang+i);        vec_stl(m, 0, mag+i);    }}
开发者ID:TaoheGit,项目名称:hmi_sdl_android,代码行数:22,


示例13: sad16_altivec

static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h){    int i;    int s;    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);    vector unsigned char perm = vec_lvsl(0, pix2);    vector unsigned char t1, t2, t3,t4, t5;    vector unsigned int sad;    vector signed int sumdiffs;    sad = (vector unsigned int)vec_splat_u32(0);    for (i = 0; i < h; i++) {        /* Read potentially unaligned pixels into t1 and t2 */        vector unsigned char pix2l = vec_ld( 0, pix2);        vector unsigned char pix2r = vec_ld(15, pix2);        t1 = vec_ld(0, pix1);        t2 = vec_perm(pix2l, pix2r, perm);        /* Calculate a sum of abs differences vector */        t3 = vec_max(t1, t2);        t4 = vec_min(t1, t2);        t5 = vec_sub(t3, t4);        /* Add each 4 pixel group together and put 4 results into sad */        sad = vec_sum4s(t5, sad);        pix1 += line_size;        pix2 += line_size;    }    /* Sum up the four partial sums, and put the result into s */    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_splat(sumdiffs, 3);    vec_ste(sumdiffs, 0, &s);    return s;}
开发者ID:0xFFeng,项目名称:ffmpeg,代码行数:39,


示例14: vector_fmul_add_altivec

static void vector_fmul_add_altivec(float *dst, const float *src0,                                    const float *src1, const float *src2,                                    int len){    int i;    vector float d, s0, s1, s2, t0, t1, edges;    vector unsigned char align = vec_lvsr(0,dst),                         mask = vec_lvsl(0, dst);    for (i=0; i<len-3; i+=4) {        t0 = vec_ld(0, dst+i);        t1 = vec_ld(15, dst+i);        s0 = vec_ld(0, src0+i);        s1 = vec_ld(0, src1+i);        s2 = vec_ld(0, src2+i);        edges = vec_perm(t1 ,t0, mask);        d = vec_madd(s0,s1,s2);        t1 = vec_perm(d, edges, align);        t0 = vec_perm(edges, d, align);        vec_st(t1, 15, dst+i);        vec_st(t0, 0, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:23,


示例15: float_to_int16_altivec

static void float_to_int16_altivec(int16_t *dst, const float *src, long len){    int i;    vector signed short d0, d1, d;    vector unsigned char align;    if(((long)dst)&15) //FIXME    for(i=0; i<len-7; i+=8) {        d0 = vec_ld(0, dst+i);        d = float_to_int16_one_altivec(src+i);        d1 = vec_ld(15, dst+i);        d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));        align = vec_lvsr(0, dst+i);        d0 = vec_perm(d1, d, align);        d1 = vec_perm(d, d1, align);        vec_st(d0, 0, dst+i);        vec_st(d1,15, dst+i);    }    else    for(i=0; i<len-7; i+=8) {        d = float_to_int16_one_altivec(src+i);        vec_st(d, 0, dst+i);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:23,


示例16: main

int main (int argc, const char * argv[]){  int i;  const float cf = 1.0;  vector float v;  const vector float cv = (vector float){1.0, 2.0, 3.0, 4.0};  vec_dst(&cv, i, 0);  v = vec_ld(0, &cv);	  v = vec_lde(0, &cf);  vec_lvsl(0, &cf);    return 0;}
开发者ID:coolshou,项目名称:screenplay-dx_Toolchain,代码行数:14,


示例17: ff_h264_idct8_add_altivec

static void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ){    vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;    vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;    vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;    vec_u8 perm_ldv = vec_lvsl(0, dst);    vec_u8 perm_stv = vec_lvsr(8, dst);    const vec_u16 onev = vec_splat_u16(1);    const vec_u16 twov = vec_splat_u16(2);    const vec_u16 sixv = vec_splat_u16(6);    const vec_u8 sel = (vec_u8)    {        0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1    };    LOAD_ZERO;    dct[0] += 32; // rounding for the >>6 at the end    s0 = vec_ld(0x00, (int16_t *)dct);    s1 = vec_ld(0x10, (int16_t *)dct);    s2 = vec_ld(0x20, (int16_t *)dct);    s3 = vec_ld(0x30, (int16_t *)dct);    s4 = vec_ld(0x40, (int16_t *)dct);    s5 = vec_ld(0x50, (int16_t *)dct);    s6 = vec_ld(0x60, (int16_t *)dct);    s7 = vec_ld(0x70, (int16_t *)dct);    IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,                     d0, d1, d2, d3, d4, d5, d6, d7);    TRANSPOSE8( d0,  d1,  d2,  d3,  d4,  d5,  d6, d7 );    IDCT8_1D_ALTIVEC(d0,  d1,  d2,  d3,  d4,  d5,  d6, d7,                     idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);    ALTIVEC_STORE_SUM_CLIP(&dst[0*stride], idct0, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[1*stride], idct1, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[2*stride], idct2, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[3*stride], idct3, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[4*stride], idct4, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);}
开发者ID:248668342,项目名称:ffmpeg-windows,代码行数:47,


示例18: a52_resample_STEREO_to_2_altivec

static int a52_resample_STEREO_to_2_altivec(float * _f, int16_t * s16){#if 0  int i;  int32_t * f = (int32_t *) _f;  for (i = 0; i < 256; i++) {    s16[2*i] = convert (f[i]);    s16[2*i+1] = convert (f[i+256]);  }  return 2*256;#else  int i = 0;  int32_t * f = (int32_t *) _f;  register vector signed int f0, f4, f256, f260;  register vector signed short reven, rodd, r0, r1;  for (i = 0; i < 256; i+= 8) {    f0 = vec_ld(0, f);    f4 = vec_ld(16, f);    f256 = vec_ld(1024, f);    f260 = vec_ld(1040, f);    reven = convert16_altivec(f0, f4);    rodd = convert16_altivec(f256, f260);    r0 = vec_mergeh(reven, rodd);    r1 = vec_mergel(reven, rodd);    // FIXME can be merged to spare some I/O    unaligned_store(r0, 0, s16);    unaligned_store(r1, 16, s16);    f += 8;    s16 += 16;  }  return(2*256);#endif}
开发者ID:dr4g0nsr,项目名称:mplayer-skyviia-8860,代码行数:37,


示例19: vector_fmul_window_altivec

static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){    union {        vector float v;        float s[4];    } vadd;    vector float vadd_bias, zero, t0, t1, s0, s1, wi, wj;    const vector unsigned char reverse = vcprm(3,2,1,0);    int i,j;    dst += len;    win += len;    src0+= len;    vadd.s[0] = add_bias;    vadd_bias = vec_splat(vadd.v, 0);    zero = (vector float)vec_splat_u32(0);    for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {        s0 = vec_ld(i, src0);        s1 = vec_ld(j, src1);        wi = vec_ld(i, win);        wj = vec_ld(j, win);        s1 = vec_perm(s1, s1, reverse);        wj = vec_perm(wj, wj, reverse);        t0 = vec_madd(s0, wj, vadd_bias);        t0 = vec_nmsub(s1, wi, t0);        t1 = vec_madd(s0, wi, vadd_bias);        t1 = vec_madd(s1, wj, t1);        t1 = vec_perm(t1, t1, reverse);        vec_st(t0, i, dst);        vec_st(t1, j, dst);    }}
开发者ID:119,项目名称:dropcam_for_iphone,代码行数:37,


示例20: expf

float expf(float x) {#elsefloat vexpf(float x) {#endif  vector float vexpa, va;//, vx, vn, va, vb,//               v0, vlog2, vln2;    register float exp, a;//, b, b2, b3, b4;//, b6, b8, b10, R0, R;  float __attribute__((aligned(16))) xa[4];  xa[0] = x;/*  // set up a few constants  vlog2 = vec_ld(0, &C_EXPF[0]);  v0    = (vector float) vec_splat_u32(0);  vln2  = vec_splat(vlog2, 1);  vlog2 = vec_splat(vlog2, 0);  // Load x into a vector float  vx = vec_ld(0, xa);  vx = vec_splat(vx, 0);  // Split x = n*log2e + b  vn = vec_madd(vx, vlog2e, v0);  vn = vec_floor(vn);*/        xa[0] = truncf(x*M_LOG2E);  va = vec_ld(0, xa);  vexpa = vec_expte(va);  a = xa[0] * M_LN2;  vec_st(vexpa, 0, xa);/*  b = x - a;  b2 = b*b;  b3 = b2*b;  b4 = b2*b2;  b6 = b4*b2;  b8 = b6*b2;  b10 = b8*b2;  R0 =       0.1666666666666666019037   *b2 - 0.00277777777770155933842  *b4           + 6.61375632143793436117e-05 *b6 - 1.65339022054652515390e-06 *b8           + 4.13813679705723846039e-08 *b10;  R = b - R0;  //exp = 1.0 + 2.0*b/(2.0 - R);  exp = (1680.0 + 840*b + 180*b2 + 20*b3 + b4)/(1680 - 840*b + 180*b2 - 20*b3 + b4);*/  exp = xa[0];  return exp;}
开发者ID:K1773R,项目名称:libfreevec,代码行数:49,


示例21: imageFilterSubFrom_Altivec

void imageFilterSubFrom_Altivec(unsigned char *dst, unsigned char *src, int length){    int n = length;    // Compute first few values so we're on a 16-byte boundary in dst    while( (((long)dst & 0xF) > 0) && (n > 0) ) {        SUBFROM_PIXEL();        --n; ++dst; ++src;    }    // Do bulk of processing using Altivec (sub 16 8-bit unsigned integers, with saturation)    while(n >= 16) {        vector unsigned char s = vec_ld(0,src);        vector unsigned char d = vec_ld(0,dst);        vector unsigned char r = vec_subs(d, s);        vec_st(r,0,dst);        n -= 16; src += 16; dst += 16;    }    // If any bytes are left over, deal with them individually    ++n;    BASIC_SUBFROM();}
开发者ID:brijohn,项目名称:onscripter-wii,代码行数:24,


示例22: scalarproduct_and_madd_int16_altivec

static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,                                                    const int16_t *v2,                                                    const int16_t *v3,                                                    int order, int mul){    LOAD_ZERO;    vec_s16 *pv1 = (vec_s16 *) v1;    register vec_s16 muls = { mul, mul, mul, mul, mul, mul, mul, mul };    register vec_s16 t0, t1, i0, i1, i4, i2, i3;    register vec_s32 res = zero_s32v;#if HAVE_BIGENDIAN    register vec_u8 align = vec_lvsl(0, v2);    i2 = vec_ld(0, v2);    i3 = vec_ld(0, v3);#endif    int32_t ires;    order >>= 4;    do {        GET_T(t0,t1,v2,i1,i2);        i0     = pv1[0];        i1     = pv1[1];        res    = vec_msum(t0, i0, res);        res    = vec_msum(t1, i1, res);        GET_T(t0,t1,v3,i4,i3);        pv1[0] = vec_mladd(t0, muls, i0);        pv1[1] = vec_mladd(t1, muls, i1);        pv1   += 2;        v2    += 16;        v3    += 16;    } while (--order);    res = vec_splat(vec_sums(res, zero_s32v), 3);    vec_ste(res, 0, &ires);    return ires;}
开发者ID:TaoheGit,项目名称:hmi_sdl_android,代码行数:36,


示例23: main

/* Place the content of the array of structures   in vectors x_vec, y_vec, z_vec, and t_vec */int main(int argc, char **argv) {   vector float x_vec, y_vec, z_vec,       t_vec, hold[4], tmp[4];      /* Load structures into vectors */   hold[0] = vec_ld(0, (float*)p_motion);   hold[1] = vec_ld(0, (float*)&p_motion[1]);      hold[2] = vec_ld(0, (float*)&p_motion[2]);      hold[3] = vec_ld(0, (float*)&p_motion[3]);      /* Perform first step of the swizzle */   tmp[0] = vec_mergeh(hold[0], hold[2]);   tmp[1] = vec_mergeh(hold[1], hold[3]);   tmp[2] = vec_mergel(hold[0], hold[2]);   tmp[3] = vec_mergel(hold[1], hold[3]);      /* Perform second step of the swizzle */   x_vec = vec_mergeh(tmp[0], tmp[1]);   y_vec = vec_mergel(tmp[0], tmp[1]);   z_vec = vec_mergeh(tmp[2], tmp[3]);   t_vec = vec_mergel(tmp[2], tmp[3]);   return 0;}
开发者ID:pstrinkle,项目名称:misc-umbc,代码行数:26,


示例24: x264_add8x8_idct8_altivec

void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[64] ){    vec_u16_t onev = vec_splat_u16(1);    vec_u16_t twov = vec_splat_u16(2);    dct[0] += 32; // rounding for the >>6 at the end    vec_s16_t s0, s1, s2, s3, s4, s5, s6, s7;    s0 = vec_ld(0x00, dct);    s1 = vec_ld(0x10, dct);    s2 = vec_ld(0x20, dct);    s3 = vec_ld(0x30, dct);    s4 = vec_ld(0x40, dct);    s5 = vec_ld(0x50, dct);    s6 = vec_ld(0x60, dct);    s7 = vec_ld(0x70, dct);    vec_s16_t d0, d1, d2, d3, d4, d5, d6, d7;    IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,  d0, d1, d2, d3, d4, d5, d6, d7);    vec_s16_t tr0, tr1, tr2, tr3, tr4, tr5, tr6, tr7;    VEC_TRANSPOSE_8( d0,  d1,  d2,  d3,  d4,  d5,  d6, d7,                    tr0, tr1, tr2, tr3, tr4, tr5, tr6, tr7);    vec_s16_t idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;    IDCT8_1D_ALTIVEC(tr0,     tr1,   tr2,   tr3,   tr4,   tr5,   tr6,   tr7,                     idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);    vec_u8_t perm_ldv = vec_lvsl(0, dst);    vec_u8_t perm_stv = vec_lvsr(8, dst);    vec_u16_t sixv = vec_splat_u16(6);    const vec_u8_t sel = (vec_u8_t) CV(0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1);    LOAD_ZERO;    ALTIVEC_STORE_SUM_CLIP(&dst[0*FDEC_STRIDE], idct0, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[1*FDEC_STRIDE], idct1, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[2*FDEC_STRIDE], idct2, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[3*FDEC_STRIDE], idct3, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[4*FDEC_STRIDE], idct4, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[5*FDEC_STRIDE], idct5, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[6*FDEC_STRIDE], idct6, perm_ldv, perm_stv, sel);    ALTIVEC_STORE_SUM_CLIP(&dst[7*FDEC_STRIDE], idct7, perm_ldv, perm_stv, sel);}
开发者ID:0x0B501E7E,项目名称:x264,代码行数:45,


示例25: b

voidb(){  z = vec_add (x, y);  /* Make sure the predicates accept correct argument types.  */  int1 = vec_all_in (f, g);  int1 = vec_all_ge (f, g);  int1 = vec_all_eq (c, d);  int1 = vec_all_ne (s, t);  int1 = vec_any_eq (i, j);  int1 = vec_any_ge (f, g);  int1 = vec_all_ngt (f, g);  int1 = vec_any_ge (c, d);  int1 = vec_any_ge (s, t);  int1 = vec_any_ge (i, j);  int1 = vec_any_ge (c, d);  int1 = vec_any_ge (s, t);  int1 = vec_any_ge (i, j);  vec_mtvscr (i);  vec_dssall ();  s = (vector signed short) vec_mfvscr ();  vec_dss (3);  vec_dst (pi, int1 + int2, 3);  vec_dstst (pi, int1 + int2, 3);  vec_dststt (pi, int1 + int2, 3);  vec_dstt (pi, int1 + int2, 3);  uc = (vector unsigned char) vec_lvsl (int1 + 69, (signed int *) pi);  uc = (vector unsigned char) vec_lvsr (int1 + 69, (signed int *) pi);  c = vec_lde (int1, (signed char *) pi);  s = vec_lde (int1, (signed short *) pi);  i = vec_lde (int1, (signed int *) pi);  i = vec_ldl (int1, pi);  i = vec_ld (int1, pi);  vec_st (i, int2, pi);  vec_ste (c, int2, (signed char *) pi);  vec_ste (s, int2, (signed short *) pi);  vec_ste (i, int2, (signed int *) pi);  vec_stl (i, int2, pi);}
开发者ID:Akheon23,项目名称:chromecast-mirrored-source.toolchain,代码行数:46,


示例26: scalarproduct_int16_altivec

static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,                                           int order){    int i;    LOAD_ZERO;    register vec_s16 vec1;    register vec_s32 res = vec_splat_s32(0), t;    int32_t ires;    for(i = 0; i < order; i += 8){        vec1 = vec_unaligned_load(v1);        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);        res = vec_sums(t, res);        v1 += 8;        v2 += 8;    }    res = vec_splat(res, 3);    vec_ste(res, 0, &ires);    return ires;}
开发者ID:Bjelijah,项目名称:EcamTurnH265,代码行数:20,


示例27: ff_put_pixels16_altivec

/* next one assumes that ((line_size % 16) == 0) */void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){    register vector unsigned char pixelsv1, pixelsv2;    register vector unsigned char pixelsv1B, pixelsv2B;    register vector unsigned char pixelsv1C, pixelsv2C;    register vector unsigned char pixelsv1D, pixelsv2D;    register vector unsigned char perm = vec_lvsl(0, pixels);    int i;    register ptrdiff_t line_size_2 = line_size << 1;    register ptrdiff_t line_size_3 = line_size + line_size_2;    register ptrdiff_t line_size_4 = line_size << 2;// hand-unrolling the loop by 4 gains about 15%// mininum execution time goes from 74 to 60 cycles// it's faster than -funroll-loops, but using// -funroll-loops w/ this is bad - 74 cycles again.// all this is on a 7450, tuning for the 7450    for (i = 0; i < h; i += 4) {        pixelsv1  = vec_ld( 0, pixels);        pixelsv2  = vec_ld(15, pixels);        pixelsv1B = vec_ld(line_size, pixels);        pixelsv2B = vec_ld(15 + line_size, pixels);        pixelsv1C = vec_ld(line_size_2, pixels);        pixelsv2C = vec_ld(15 + line_size_2, pixels);        pixelsv1D = vec_ld(line_size_3, pixels);        pixelsv2D = vec_ld(15 + line_size_3, pixels);        vec_st(vec_perm(pixelsv1, pixelsv2, perm),               0, (unsigned char*)block);        vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),               line_size, (unsigned char*)block);        vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),               line_size_2, (unsigned char*)block);        vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),               line_size_3, (unsigned char*)block);        pixels+=line_size_4;        block +=line_size_4;    }}
开发者ID:AVLeo,项目名称:libav,代码行数:40,


示例28: put_vp8_pixels16_altivec

static void put_vp8_pixels16_altivec(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my){    register vector unsigned char pixelsv1, pixelsv2;    register vector unsigned char pixelsv1B, pixelsv2B;    register vector unsigned char pixelsv1C, pixelsv2C;    register vector unsigned char pixelsv1D, pixelsv2D;    register vector unsigned char perm = vec_lvsl(0, src);    int i;    register ptrdiff_t dstride2 = dstride << 1, sstride2 = sstride << 1;    register ptrdiff_t dstride3 = dstride2 + dstride, sstride3 = sstride + sstride2;    register ptrdiff_t dstride4 = dstride << 2, sstride4 = sstride << 2;// hand-unrolling the loop by 4 gains about 15%// mininum execution time goes from 74 to 60 cycles// it's faster than -funroll-loops, but using// -funroll-loops w/ this is bad - 74 cycles again.// all this is on a 7450, tuning for the 7450    for (i = 0; i < h; i += 4) {        pixelsv1  = vec_ld( 0, src);        pixelsv2  = vec_ld(15, src);        pixelsv1B = vec_ld(sstride, src);        pixelsv2B = vec_ld(15 + sstride, src);        pixelsv1C = vec_ld(sstride2, src);        pixelsv2C = vec_ld(15 + sstride2, src);        pixelsv1D = vec_ld(sstride3, src);        pixelsv2D = vec_ld(15 + sstride3, src);        vec_st(vec_perm(pixelsv1, pixelsv2, perm),               0, (unsigned char*)dst);        vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),               dstride, (unsigned char*)dst);        vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),               dstride2, (unsigned char*)dst);        vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),               dstride3, (unsigned char*)dst);        src += sstride4;        dst += dstride4;    }}
开发者ID:elnormous,项目名称:libav,代码行数:39,



注:本文中的vec_ld函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vec_len函数代码示例
C++ vec_extract函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。