您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vld1q_u8函数代码示例

51自学网 2021-06-03 09:44:56
  C++
这篇教程C++ vld1q_u8函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vld1q_u8函数的典型用法代码示例。如果您正苦于以下问题:C++ vld1q_u8函数的具体用法?C++ vld1q_u8怎么用?C++ vld1q_u8使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vld1q_u8函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: var_filter_block2d_bil_w16

static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,                                       uint8_t *output_ptr,                                       unsigned int src_pixels_per_line,                                       int pixel_step,                                       unsigned int output_height,                                       unsigned int output_width,                                       const uint8_t *filter) {  const uint8x8_t f0 = vmov_n_u8(filter[0]);  const uint8x8_t f1 = vmov_n_u8(filter[1]);  unsigned int i, j;  for (i = 0; i < output_height; ++i) {    for (j = 0; j < output_width; j += 16) {      const uint8x16_t src_0 = vld1q_u8(&src_ptr[j]);      const uint8x16_t src_1 = vld1q_u8(&src_ptr[j + pixel_step]);      const uint16x8_t a = vmull_u8(vget_low_u8(src_0), f0);      const uint16x8_t b = vmlal_u8(a, vget_low_u8(src_1), f1);      const uint8x8_t out_lo = vrshrn_n_u16(b, FILTER_BITS);      const uint16x8_t c = vmull_u8(vget_high_u8(src_0), f0);      const uint16x8_t d = vmlal_u8(c, vget_high_u8(src_1), f1);      const uint8x8_t out_hi = vrshrn_n_u16(d, FILTER_BITS);      vst1q_u8(&output_ptr[j], vcombine_u8(out_lo, out_hi));    }    // Next row...    src_ptr += src_pixels_per_line;    output_ptr += output_width;  }}
开发者ID:Wafflespeanut,项目名称:gecko-dev,代码行数:27,


示例2: norx_aead_decrypt

int norx_aead_decrypt(  unsigned char *m, size_t *mlen,  const unsigned char *a, size_t alen,  const unsigned char *c, size_t clen,  const unsigned char *z, size_t zlen,  const unsigned char *nonce,  const unsigned char *key){    uint64x2_t S[8];    uint32x4_t T[2];    if (clen < BYTES(NORX_T)) { return -1; }    *mlen = clen - BYTES(NORX_T);    INITIALISE(S, nonce, key);    ABSORB_DATA(S, a, alen, HEADER_TAG);    DECRYPT_DATA(S, m, c, clen - BYTES(NORX_T));    ABSORB_DATA(S, z, zlen, TRAILER_TAG);    FINALISE(S);    /* Verify tag */    T[0] = vceqq_u32(U64TOU32(S[0]), U8TOU32( vld1q_u8((uint8_t *)(c + clen - BYTES(NORX_T)  )) ));    T[1] = vceqq_u32(U64TOU32(S[1]), U8TOU32( vld1q_u8((uint8_t *)(c + clen - BYTES(NORX_T)/2)) ));    T[0] = vandq_u32(T[0], T[1]);    return 0xFFFFFFFFFFFFFFFFULL == (vgetq_lane_u64(U32TOU64(T[0]), 0) & vgetq_lane_u64(U32TOU64(T[0]), 1)) ? 0 : -1;}
开发者ID:0x64616E69656C,项目名称:supercop,代码行数:28,


示例3: compute_ham_similarity_64

//Note: it takes size and offset in units of bytestatic inline int compute_ham_similarity_64(unsigned short* ref, unsigned short* circ_array,                                 int size){	    const uint8_t*         	ref_c=(uint8_t*) ref;    const uint8_t*         	circ_c=(uint8_t*) circ_array;    register uint8x16_t     a,b;    register uint8x16_t     c,d,temp;    register uint16x8_t     acc;    register uint           i=0,count=0;	int 					j=0;	int 					shift=size&0xF;	for(i=0;i<=size-16; i+=16){		j++;		a=vld1q_u8(&ref_c[i]);		b=vld1q_u8(&circ_c[i]);		c=veorq_u8(a,b);		acc=vaddq_u16(acc,vpaddlq_u8(vcntq_u8(c)));	}		count=setbits(acc);	a=vld1q_u8(&ref_c[i]);	b=vld1q_u8(&circ_c[i]);	c=veorq_u8(a,b);	c=vcntq_u8(c);		for(i=0;i<shift;i++){		count=count+vgetq_lane_u8 (c,i);	}    return size*8-count;}
开发者ID:umgupta,项目名称:hamming_code,代码行数:30,


示例4: idct32x32_1_add_neg_kernel

static INLINE void idct32x32_1_add_neg_kernel(uint8_t **dest, const int stride,                                              const uint8x16_t res) {  const uint8x16_t a0 = vld1q_u8(*dest);  const uint8x16_t a1 = vld1q_u8(*dest + 16);  const uint8x16_t b0 = vqsubq_u8(a0, res);  const uint8x16_t b1 = vqsubq_u8(a1, res);  vst1q_u8(*dest, b0);  vst1q_u8(*dest + 16, b1);  *dest += stride;}
开发者ID:webmproject,项目名称:libvpx,代码行数:10,


示例5: aes_ofb

void aes_ofb(const unsigned char* in, unsigned char *out, 			int length, const char *expkey, const char* iv){	uint8x16_t block, cipher;	block = vld1q_u8((int8_t *)iv);	for (int i = 0; i < length; i += 16){		block = aes_enc(block, (uint8x16_t *)expkey);			cipher = veorq_u8(vld1q_u8(&((int8_t *)in)[i]), block);		vst1q_u8(&((int8_t*)out)[i], cipher);	}}
开发者ID:MikkelsCykel,项目名称:ARMv8,代码行数:11,


示例6: ar_vmin_u8_neon

void ar_vmin_u8_neon(uint8_t* res, const uint8_t* a, const uint8_t* b, uint32_t n){   uint8x16_t a_loaded;   uint8x16_t b_loaded;   uint8x16_t res_loaded;   for (uint32_t i = 0; i < n; i += 16) {      a_loaded = vld1q_u8(&(a[i]));      b_loaded = vld1q_u8(&(b[i]));      res_loaded = vminq_u8(a_loaded, b_loaded);      vst1q_u8(&(res[i]),res_loaded);   }}
开发者ID:petecoup,项目名称:argon,代码行数:13,


示例7: dc_32x32

// 'do_above' and 'do_left' facilitate branch removal when inlined.static INLINE void dc_32x32(uint8_t *dst, ptrdiff_t stride,                            const uint8_t *above, const uint8_t *left,                            int do_above, int do_left) {  uint16x8_t sum_top;  uint16x8_t sum_left;  uint8x8_t dc0;  if (do_above) {    const uint8x16_t A0 = vld1q_u8(above);  // top row    const uint8x16_t A1 = vld1q_u8(above + 16);    const uint16x8_t p0 = vpaddlq_u8(A0);  // cascading summation of the top    const uint16x8_t p1 = vpaddlq_u8(A1);    const uint16x8_t p2 = vaddq_u16(p0, p1);    const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));    const uint16x4_t p4 = vpadd_u16(p3, p3);    const uint16x4_t p5 = vpadd_u16(p4, p4);    sum_top = vcombine_u16(p5, p5);  }  if (do_left) {    const uint8x16_t L0 = vld1q_u8(left);  // left row    const uint8x16_t L1 = vld1q_u8(left + 16);    const uint16x8_t p0 = vpaddlq_u8(L0);  // cascading summation of the left    const uint16x8_t p1 = vpaddlq_u8(L1);    const uint16x8_t p2 = vaddq_u16(p0, p1);    const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));    const uint16x4_t p4 = vpadd_u16(p3, p3);    const uint16x4_t p5 = vpadd_u16(p4, p4);    sum_left = vcombine_u16(p5, p5);  }  if (do_above && do_left) {    const uint16x8_t sum = vaddq_u16(sum_left, sum_top);    dc0 = vrshrn_n_u16(sum, 6);  } else if (do_above) {    dc0 = vrshrn_n_u16(sum_top, 5);  } else if (do_left) {    dc0 = vrshrn_n_u16(sum_left, 5);  } else {    dc0 = vdup_n_u8(0x80);  }  {    const uint8x16_t dc = vdupq_lane_u8(dc0, 0);    int i;    for (i = 0; i < 32; ++i) {      vst1q_u8(dst + i * stride, dc);      vst1q_u8(dst + i * stride + 16, dc);    }  }}
开发者ID:jmvalin,项目名称:aom,代码行数:52,


示例8: vp9_v_predictor_32x32_neon

void vp9_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,                                const uint8_t *above, const uint8_t *left) {  int i;  uint8x16_t q0u8 = vdupq_n_u8(0);  uint8x16_t q1u8 = vdupq_n_u8(0);  (void)left;  q0u8 = vld1q_u8(above);  q1u8 = vld1q_u8(above + 16);  for (i = 0; i < 32; i++, dst += stride) {    vst1q_u8(dst, q0u8);    vst1q_u8(dst + 16, q1u8);  }}
开发者ID:MekliCZ,项目名称:positron,代码行数:14,


示例9: SubtractGreenFromBlueAndRed

static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {  const uint32_t* const end = argb_data + (num_pixels & ~3);#ifdef USE_VTBLQ  const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);#else  const uint8x8_t shuffle = vld1_u8(kGreenShuffle);#endif  for (; argb_data < end; argb_data += 4) {    const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);    const uint8x16_t greens = DoGreenShuffle(argb, shuffle);    vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));  }  // fallthrough and finish off with plain-C  VP8LSubtractGreenFromBlueAndRed_C(argb_data, num_pixels & 3);}
开发者ID:BBLionel,项目名称:libwebp,代码行数:15,


示例10: unshuffle4_neon

/* Routine optimized for unshuffling a buffer for a type size of 4 bytes. */static void    unshuffle4_neon(uint8_t * constdest,const uint8_t* const src,const size_t vectorizable_elements,const size_t total_elements){size_t i, j, k;static const size_t bytesoftype = 4;uint8x16x4_t r0;for(i = 0, k = 0;i<vectorizable_elements*bytesoftype;i += 64, k++) {/* load 64 bytes to the structure r0 */for(j = 0;j < 4; j++) {r0.val[j] =vld1q_u8(src+total_elements* j+ k*16);}/* Store (with permutation) the results in the destination vector */vst4q_u8(dest+ k*64, r0);}}
开发者ID:BillTheBest,项目名称:c-blosc2,代码行数:32,


示例11: normL1_

int normL1_(const uchar* a, const uchar* b, int n){    int j = 0, d = 0;#if CV_SSE    __m128i d0 = _mm_setzero_si128();    for( ; j <= n - 16; j += 16 )    {        __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));        __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));        d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));    }    for( ; j <= n - 4; j += 4 )    {        __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));        __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));        d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));    }    d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));#elif CV_NEON    uint32x4_t v_sum = vdupq_n_u32(0.0f);    for ( ; j <= n - 16; j += 16)    {        uint8x16_t v_dst = vabdq_u8(vld1q_u8(a + j), vld1q_u8(b + j));        uint16x8_t v_low = vmovl_u8(vget_low_u8(v_dst)), v_high = vmovl_u8(vget_high_u8(v_dst));        v_sum = vaddq_u32(v_sum, vaddl_u16(vget_low_u16(v_low), vget_low_u16(v_high)));        v_sum = vaddq_u32(v_sum, vaddl_u16(vget_high_u16(v_low), vget_high_u16(v_high)));    }    uint CV_DECL_ALIGNED(16) buf[4];    vst1q_u32(buf, v_sum);    d = buf[0] + buf[1] + buf[2] + buf[3];#endif    {        for( ; j <= n - 4; j += 4 )        {            d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +            std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);        }    }    for( ; j < n; j++ )        d += std::abs(a[j] - b[j]);    return d;}
开发者ID:cyberCBM,项目名称:DetectO,代码行数:47,


示例12: png_read_filter_row_sub3_neon

voidpng_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row,   png_const_bytep prev_row){   png_bytep rp = row;   png_bytep rp_stop = row + row_info->rowbytes;   uint8x16_t vtmp = vld1q_u8(rp);   uint8x8x2_t *vrpt = png_ptr(uint8x8x2_t, &vtmp);   uint8x8x2_t vrp = *vrpt;   uint8x8x4_t vdest;   vdest.val[3] = vdup_n_u8(0);   png_debug(1, "in png_read_filter_row_sub3_neon");   for (; rp < rp_stop;)   {      uint8x8_t vtmp1, vtmp2;      uint32x2_t *temp_pointer;      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);      vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);      vtmp2 = vext_u8(vrp.val[0], vrp.val[1], 6);      vdest.val[1] = vadd_u8(vdest.val[0], vtmp1);      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);      vdest.val[2] = vadd_u8(vdest.val[1], vtmp2);      vdest.val[3] = vadd_u8(vdest.val[2], vtmp1);      vtmp = vld1q_u8(rp + 12);      vrpt = png_ptr(uint8x8x2_t, &vtmp);      vrp = *vrpt;      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);      rp += 3;      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);      rp += 3;      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);      rp += 3;      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);      rp += 3;   }   PNG_UNUSED(prev_row)}
开发者ID:brendandahl,项目名称:positron,代码行数:46,


示例13: png_read_filter_row_up_neon

voidpng_read_filter_row_up_neon(png_row_infop row_info, png_bytep row,   png_const_bytep prev_row){   png_bytep rp = row;   png_bytep rp_stop = row + row_info->rowbytes;   png_const_bytep pp = prev_row;   for (; rp < rp_stop; rp += 16, pp += 16)   {      uint8x16_t qrp, qpp;      qrp = vld1q_u8(rp);      qpp = vld1q_u8(pp);      qrp = vaddq_u8(qrp, qpp);      vst1q_u8(rp, qrp);   }}
开发者ID:mehulsbhatt,项目名称:MyOCRTEST,代码行数:18,


示例14: vpx_v_predictor_16x16_neon

void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,                                const uint8_t *above, const uint8_t *left) {  int i;  uint8x16_t q0u8 = vdupq_n_u8(0);  (void)left;  q0u8 = vld1q_u8(above);  for (i = 0; i < 16; i++, dst += stride) vst1q_u8(dst, q0u8);}
开发者ID:jmvalin,项目名称:aom,代码行数:9,


示例15: dc_16x16

// 'do_above' and 'do_left' facilitate branch removal when inlined.static INLINE void dc_16x16(uint8_t *dst, ptrdiff_t stride,                            const uint8_t *above, const uint8_t *left,                            int do_above, int do_left) {  uint16x8_t sum_top;  uint16x8_t sum_left;  uint8x8_t dc0;  if (do_above) {    const uint8x16_t A = vld1q_u8(above);  // top row    const uint16x8_t p0 = vpaddlq_u8(A);  // cascading summation of the top    const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));    const uint16x4_t p2 = vpadd_u16(p1, p1);    const uint16x4_t p3 = vpadd_u16(p2, p2);    sum_top = vcombine_u16(p3, p3);  }  if (do_left) {    const uint8x16_t L = vld1q_u8(left);  // left row    const uint16x8_t p0 = vpaddlq_u8(L);  // cascading summation of the left    const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));    const uint16x4_t p2 = vpadd_u16(p1, p1);    const uint16x4_t p3 = vpadd_u16(p2, p2);    sum_left = vcombine_u16(p3, p3);  }  if (do_above && do_left) {    const uint16x8_t sum = vaddq_u16(sum_left, sum_top);    dc0 = vrshrn_n_u16(sum, 5);  } else if (do_above) {    dc0 = vrshrn_n_u16(sum_top, 4);  } else if (do_left) {    dc0 = vrshrn_n_u16(sum_left, 4);  } else {    dc0 = vdup_n_u8(0x80);  }  {    const uint8x16_t dc = vdupq_lane_u8(dc0, 0);    int i;    for (i = 0; i < 16; ++i) {      vst1q_u8(dst + i * stride, dc);    }  }}
开发者ID:MekliCZ,项目名称:positron,代码行数:45,


示例16: vp9_int_pro_row_neon

void vp9_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref,                          const int ref_stride, const int height) {  int i;  uint16x8_t vec_sum_lo = vdupq_n_u16(0);  uint16x8_t vec_sum_hi = vdupq_n_u16(0);  const int shift_factor = ((height >> 5) + 3) * -1;  const int16x8_t vec_shift = vdupq_n_s16(shift_factor);  for (i = 0; i < height; i += 8) {    const uint8x16_t vec_row1 = vld1q_u8(ref);    const uint8x16_t vec_row2 = vld1q_u8(ref + ref_stride);    const uint8x16_t vec_row3 = vld1q_u8(ref + ref_stride * 2);    const uint8x16_t vec_row4 = vld1q_u8(ref + ref_stride * 3);    const uint8x16_t vec_row5 = vld1q_u8(ref + ref_stride * 4);    const uint8x16_t vec_row6 = vld1q_u8(ref + ref_stride * 5);    const uint8x16_t vec_row7 = vld1q_u8(ref + ref_stride * 6);    const uint8x16_t vec_row8 = vld1q_u8(ref + ref_stride * 7);    vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row1));    vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row1));    vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row2));    vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row2));    vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row3));    vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row3));    vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row4));    vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row4));    vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row5));    vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row5));    vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row6));    vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row6));    vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row7));    vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row7));    vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row8));    vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row8));    ref += ref_stride * 8;  }  vec_sum_lo = vshlq_u16(vec_sum_lo, vec_shift);  vec_sum_hi = vshlq_u16(vec_sum_hi, vec_shift);  vst1q_s16(hbuf, vreinterpretq_s16_u16(vec_sum_lo));  hbuf += 8;  vst1q_s16(hbuf, vreinterpretq_s16_u16(vec_sum_hi));}
开发者ID:d4rkuzs,项目名称:ffmpeg-codecs-libs,代码行数:52,


示例17: vp8_copy_mem16x16_neon

void vp8_copy_mem16x16_neon(unsigned char *src, int src_stride,                            unsigned char *dst, int dst_stride) {  int r;  uint8x16_t qtmp;  for (r = 0; r < 16; ++r) {    qtmp = vld1q_u8(src);    vst1q_u8(dst, qtmp);    src += src_stride;    dst += dst_stride;  }}
开发者ID:webmproject,项目名称:libvpx,代码行数:12,


示例18: vp8_mbloop_filter_horizontal_edge_y_neon

void vp8_mbloop_filter_horizontal_edge_y_neon(        unsigned char *src,        int pitch,        unsigned char blimit,        unsigned char limit,        unsigned char thresh) {    uint8x16_t qblimit, qlimit, qthresh, q3, q4;    uint8x16_t q5, q6, q7, q8, q9, q10;    qblimit = vdupq_n_u8(blimit);    qlimit = vdupq_n_u8(limit);    qthresh = vdupq_n_u8(thresh);    src -= (pitch << 2);    q3 = vld1q_u8(src);    src += pitch;    q4 = vld1q_u8(src);    src += pitch;    q5 = vld1q_u8(src);    src += pitch;    q6 = vld1q_u8(src);    src += pitch;    q7 = vld1q_u8(src);    src += pitch;    q8 = vld1q_u8(src);    src += pitch;    q9 = vld1q_u8(src);    src += pitch;    q10 = vld1q_u8(src);    vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,                         q5, q6, q7, q8, q9, q10,                         &q4, &q5, &q6, &q7, &q8, &q9);    src -= (pitch * 6);    vst1q_u8(src, q4);    src += pitch;    vst1q_u8(src, q5);    src += pitch;    vst1q_u8(src, q6);    src += pitch;    vst1q_u8(src, q7);    src += pitch;    vst1q_u8(src, q8);    src += pitch;    vst1q_u8(src, q9);    return;}
开发者ID:venkatarajasekhar,项目名称:Qt,代码行数:49,


示例19: mw_neon_mm_qsub_u8x16

/* u8x16 saturated sub */void mw_neon_mm_qsub_u8x16(unsigned char * A, int Row, int Col, unsigned char * B, unsigned char * C){	uint8x16_t neon_a, neon_b, neon_c;	int size = Row * Col;	int i = 0;	int k = 0;	for (i = 16; i <= size ; i+=16)	{		k = i - 16;		neon_a = vld1q_u8(A + k);		neon_b = vld1q_u8(B + k);		neon_c = vqsubq_u8(neon_a, neon_b);		vst1q_u8(C + k, neon_c);	}	k = i - 16;    for (i = 0; i < size % 16; i++)	{		C[k + i] = A[k + i] - B[k + i];	}}
开发者ID:Daniel-Hwang,项目名称:eeg,代码行数:23,


示例20: vp9_int_pro_col_neon

int16_t vp9_int_pro_col_neon(uint8_t const *ref, const int width) {  int i;  uint16x8_t vec_sum = vdupq_n_u16(0);  for (i = 0; i < width; i += 16) {    const uint8x16_t vec_row = vld1q_u8(ref);    vec_sum = vaddw_u8(vec_sum, vget_low_u8(vec_row));    vec_sum = vaddw_u8(vec_sum, vget_high_u8(vec_row));    ref += 16;  }  return horizontal_add_u16x8(vec_sum);}
开发者ID:d4rkuzs,项目名称:ffmpeg-codecs-libs,代码行数:13,


示例21: vpx_lpf_horizontal_4_dual_neon

void vpx_lpf_horizontal_4_dual_neon(    uint8_t *s, int p /* pitch */, const uint8_t *blimit0,    const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,    const uint8_t *limit1, const uint8_t *thresh1) {  uint8x8_t dblimit0, dlimit0, dthresh0, dblimit1, dlimit1, dthresh1;  uint8x16_t qblimit, qlimit, qthresh;  uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8;  dblimit0 = vld1_u8(blimit0);  dlimit0 = vld1_u8(limit0);  dthresh0 = vld1_u8(thresh0);  dblimit1 = vld1_u8(blimit1);  dlimit1 = vld1_u8(limit1);  dthresh1 = vld1_u8(thresh1);  qblimit = vcombine_u8(dblimit0, dblimit1);  qlimit = vcombine_u8(dlimit0, dlimit1);  qthresh = vcombine_u8(dthresh0, dthresh1);  s -= (p << 2);  q3u8 = vld1q_u8(s);  s += p;  q4u8 = vld1q_u8(s);  s += p;  q5u8 = vld1q_u8(s);  s += p;  q6u8 = vld1q_u8(s);  s += p;  q7u8 = vld1q_u8(s);  s += p;  q8u8 = vld1q_u8(s);  s += p;  q9u8 = vld1q_u8(s);  s += p;  q10u8 = vld1q_u8(s);  loop_filter_neon_16(qblimit, qlimit, qthresh, q3u8, q4u8, q5u8, q6u8, q7u8,                      q8u8, q9u8, q10u8, &q5u8, &q6u8, &q7u8, &q8u8);  s -= (p * 5);  vst1q_u8(s, q5u8);  s += p;  vst1q_u8(s, q6u8);  s += p;  vst1q_u8(s, q7u8);  s += p;  vst1q_u8(s, q8u8);  return;}
开发者ID:Corax26,项目名称:libvpx,代码行数:49,


示例22: AddGreenToBlueAndRed

static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {  const uint32_t* const end = argb_data + (num_pixels & ~3);  const uint8x8_t shuffle = vld1_u8(kGreenShuffle);  for (; argb_data < end; argb_data += 4) {    const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);    const uint8x16_t greens =        vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),                    vtbl1_u8(vget_high_u8(argb), shuffle));    vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));  }  // fallthrough and finish off with plain-C  VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);}
开发者ID:0109yuma,项目名称:drrrbile-reader,代码行数:13,


示例23: vpx_d45_predictor_16x16_neon

void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,                                  const uint8_t *above, const uint8_t *left) {  const uint8x16_t A0 = vld1q_u8(above);  // top row  const uint8x16_t above_right = vld1q_dup_u8(above + 15);  const uint8x16_t A1 = vextq_u8(A0, above_right, 1);  const uint8x16_t A2 = vextq_u8(A0, above_right, 2);  const uint8x16_t avg1 = vhaddq_u8(A0, A2);  uint8x16_t row = vrhaddq_u8(avg1, A1);  int i;  (void)left;  for (i = 0; i < 15; ++i) {    vst1q_u8(dst + i * stride, row);    row = vextq_u8(row, above_right, 1);  }  vst1q_u8(dst + i * stride, row);}
开发者ID:jmvalin,项目名称:aom,代码行数:16,


示例24: vp9_h_predictor_32x32_neon

void vp9_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,                                const uint8_t *above, const uint8_t *left) {  int j, k;  uint8x8_t d2u8 = vdup_n_u8(0);  uint8x16_t q0u8 = vdupq_n_u8(0);  uint8x16_t q1u8 = vdupq_n_u8(0);  (void)above;  for (k = 0; k < 2; k++, left += 16) {    q1u8 = vld1q_u8(left);    d2u8 = vget_low_u8(q1u8);    for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {      q0u8 = vdupq_lane_u8(d2u8, 0);      vst1q_u8(dst, q0u8);      vst1q_u8(dst + 16, q0u8);      dst += stride;      q0u8 = vdupq_lane_u8(d2u8, 1);      vst1q_u8(dst, q0u8);      vst1q_u8(dst + 16, q0u8);      dst += stride;      q0u8 = vdupq_lane_u8(d2u8, 2);      vst1q_u8(dst, q0u8);      vst1q_u8(dst + 16, q0u8);      dst += stride;      q0u8 = vdupq_lane_u8(d2u8, 3);      vst1q_u8(dst, q0u8);      vst1q_u8(dst + 16, q0u8);      dst += stride;      q0u8 = vdupq_lane_u8(d2u8, 4);      vst1q_u8(dst, q0u8);      vst1q_u8(dst + 16, q0u8);      dst += stride;      q0u8 = vdupq_lane_u8(d2u8, 5);      vst1q_u8(dst, q0u8);      vst1q_u8(dst + 16, q0u8);      dst += stride;      q0u8 = vdupq_lane_u8(d2u8, 6);      vst1q_u8(dst, q0u8);      vst1q_u8(dst + 16, q0u8);      dst += stride;      q0u8 = vdupq_lane_u8(d2u8, 7);      vst1q_u8(dst, q0u8);      vst1q_u8(dst + 16, q0u8);      dst += stride;    }  }}
开发者ID:MekliCZ,项目名称:positron,代码行数:47,


示例25: main

int main () {    /* Create custom arbitrary data. */    const uint8_t uint8_data[] = { 1, 2, 3, 4, 5, 6, 7, 8,				   9, 10, 11, 12, 13, 14, 15, 16 };    /* Create the vector with our data. */    uint8x16_t data;    /* Load our custom data into the vector register. */    data = vld1q_u8 (uint8_data);    print_uint8 (data, "data");        /* Call of the add3 function. */    add3(&data);    print_uint8 (data, "data (new)");        return 0;}
开发者ID:Michael-bochi,项目名称:training,代码行数:20,


示例26: LD_16x8

static INLINE void LD_16x8(uint8_t *d, int d_stride, uint8x16_t *q8u8,                           uint8x16_t *q9u8, uint8x16_t *q10u8,                           uint8x16_t *q11u8, uint8x16_t *q12u8,                           uint8x16_t *q13u8, uint8x16_t *q14u8,                           uint8x16_t *q15u8) {  *q8u8 = vld1q_u8(d);  d += d_stride;  *q9u8 = vld1q_u8(d);  d += d_stride;  *q10u8 = vld1q_u8(d);  d += d_stride;  *q11u8 = vld1q_u8(d);  d += d_stride;  *q12u8 = vld1q_u8(d);  d += d_stride;  *q13u8 = vld1q_u8(d);  d += d_stride;  *q14u8 = vld1q_u8(d);  d += d_stride;  *q15u8 = vld1q_u8(d);  return;}
开发者ID:nwgat,项目名称:Mirror-aom,代码行数:22,


示例27: ar_vminall_u8_neon

uint8_t ar_vminall_u8_neon(const uint8_t* a, uint32_t n){   uint8x16_t a_loaded;   uint8x16_t overall_min = vdupq_n_u8(255);   uint8_t om_array[16];   uint8_t themin = 255;   for (uint32_t i = 0; i < n; i += 16) {      a_loaded = vld1q_u8(&(a[i]));      overall_min = vminq_u8(a_loaded, overall_min);   }   vst1q_u8(om_array, overall_min);      for (uint32_t i = 0; i < 16; i++) {      themin = ar_min_u8(themin, om_array[i]);   }   return themin;}
开发者ID:petecoup,项目名称:argon,代码行数:20,


示例28: ar_vmaxall_u8_neon

uint8_t ar_vmaxall_u8_neon(const uint8_t* a, uint32_t n){   uint8x16_t a_loaded;   uint8x16_t overall_max = vdupq_n_u8(0);   uint8_t om_array[16];   uint8_t themax = 0;   for (uint32_t i = 0; i < n; i += 16) {      a_loaded = vld1q_u8(&(a[i]));      overall_max = vmaxq_u8(a_loaded, overall_max);   }   vst1q_u8(om_array, overall_max);      for (uint32_t i = 0; i < 16; i++) {      themax = ar_max_u8(themax, om_array[i]);   }   return themax;}
开发者ID:petecoup,项目名称:argon,代码行数:20,


示例29: SubBytes

static void SubBytes(void){	vst1q_u8(lut,*state);	lut[0] = sbox[lut[0]];	lut[1] = sbox[lut[1]];	lut[2] = sbox[lut[2]];	lut[3] = sbox[lut[3]];	lut[4] = sbox[lut[4]];	lut[5] = sbox[lut[5]];	lut[6] = sbox[lut[6]];	lut[7] = sbox[lut[7]];	lut[8] = sbox[lut[8]];	lut[9] = sbox[lut[9]];	lut[10] = sbox[lut[10]];	lut[11] = sbox[lut[11]];	lut[12] = sbox[lut[12]];	lut[13] = sbox[lut[13]];	lut[14] = sbox[lut[14]];	lut[15] = sbox[lut[15]];	*state = vld1q_u8(lut);}
开发者ID:BerkeleyVogelheim,项目名称:ContextEngine27,代码行数:21,


示例30: crypto_stream_xor

int crypto_stream_xor(        unsigned char *c,  const unsigned char *m,unsigned long long mlen,  const unsigned char *n,  const unsigned char *k){  const uint32x4_t abab = {-1,0,-1,0};  const uint64x1_t nextblock = {1};  uint32x4_t k0k1k2k3 = (uint32x4_t) vld1q_u8((uint8_t *) k);  uint32x4_t k4k5k6k7 = (uint32x4_t) vld1q_u8((uint8_t *) (k + 16));  uint32x4_t start0 = (uint32x4_t) vld1q_u8((uint8_t *) sigma);  uint32x2_t n0n1 = (uint32x2_t) vld1_u8((uint8_t *) n);  uint32x2_t n2n3 = {0,0};  uint32x2_t k0k1 = vget_low_u32(k0k1k2k3);  uint32x2_t k2k3 = vget_high_u32(k0k1k2k3);  uint32x2_t k4k5 = vget_low_u32(k4k5k6k7);  uint32x2_t k6k7 = vget_high_u32(k4k5k6k7);  uint32x2_t n1n0 = vext_u32(n0n1,n0n1,1);  uint32x2_t n3n2;  uint32x2_t n0k4 = vext_u32(n1n0,k4k5,1);  uint32x2_t k5k0 = vext_u32(k4k5,k0k1,1);  uint32x2_t k1n1 = vext_u32(k0k1,n1n0,1);  uint32x2_t n2k6;  uint32x2_t k7k2 = vext_u32(k6k7,k2k3,1);  uint32x2_t k3n3;  uint32x4_t start1 = vcombine_u32(k5k0,n0k4);  uint32x4_t start2;  uint32x4_t start3;  register uint32x4_t diag0;  register uint32x4_t diag1;  register uint32x4_t diag2;  register uint32x4_t diag3;  uint32x4_t next_start2;  uint32x4_t next_start3;  register uint32x4_t next_diag0;  register uint32x4_t next_diag1;  register uint32x4_t next_diag2;  register uint32x4_t next_diag3;  uint32x4_t x0x5x10x15;  uint32x4_t x12x1x6x11;  uint32x4_t x8x13x2x7;  uint32x4_t x4x9x14x3;  uint32x4_t x0x1x10x11;  uint32x4_t x12x13x6x7;  uint32x4_t x8x9x2x3;  uint32x4_t x4x5x14x15;  uint32x4_t x0x1x2x3;  uint32x4_t x4x5x6x7;  uint32x4_t x8x9x10x11;  uint32x4_t x12x13x14x15;  uint32x4_t m0m1m2m3;  uint32x4_t m4m5m6m7;  uint32x4_t m8m9m10m11;  uint32x4_t m12m13m14m15;  register uint32x4_t a0;  register uint32x4_t a1;  register uint32x4_t a2;  register uint32x4_t a3;  register uint32x4_t b0;  register uint32x4_t b1;  register uint32x4_t b2;  register uint32x4_t b3;  register uint32x4_t next_a0;  register uint32x4_t next_a1;  register uint32x4_t next_a2;  register uint32x4_t next_a3;  register uint32x4_t next_b0;  register uint32x4_t next_b1;  register uint32x4_t next_b2;  register uint32x4_t next_b3;  unsigned char block[64];  unsigned char *savec;  int i;  int flagm = (m != 0);  if (!mlen) return 0;  if (mlen < 128) goto mlenatleast1;  mlenatleast128:  n3n2 = vext_u32(n2n3,n2n3,1);  n2k6 = vext_u32(n3n2,k6k7,1);  k3n3 = vext_u32(k2k3,n3n2,1);  start2 = vcombine_u32(n2k6,k1n1);  start3 = vcombine_u32(k3n3,k7k2);  n2n3 = (uint32x2_t) vadd_u64(nextblock,(uint64x1_t) n2n3);  diag0 = start0;  diag1 = start1;  diag2 = start2;  diag3 = start3;  n3n2 = vext_u32(n2n3,n2n3,1);  n2k6 = vext_u32(n3n2,k6k7,1);  k3n3 = vext_u32(k2k3,n3n2,1);  next_start2 = vcombine_u32(n2k6,k1n1);  next_start3 = vcombine_u32(k3n3,k7k2);//.........这里部分代码省略.........
开发者ID:BurnBeforeReading,项目名称:cjdns,代码行数:101,



注:本文中的vld1q_u8函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vline函数代码示例
C++ vld1q_f32函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。