您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ vis_alignaddr函数代码示例

51自学网 2021-06-03 09:42:59
  C++
这篇教程C++ vis_alignaddr函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中vis_alignaddr函数的典型用法代码示例。如果您正苦于以下问题:C++ vis_alignaddr函数的具体用法?C++ vis_alignaddr怎么用?C++ vis_alignaddr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了vis_alignaddr函数的28个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: mlib_v_ImageLookUp_S16_S16_3_D1

voidmlib_v_ImageLookUp_S16_S16_3_D1(    const mlib_s16 *src,    mlib_s16 *dst,    mlib_s32 xsize,    const mlib_s16 *table0,    const mlib_s16 *table1,    const mlib_s16 *table2){/* pointer to source data */	mlib_s16 *sp;/* source data */	mlib_s32 s0, s1, s2, s3;/* pointer to start of destination */	mlib_s16 *dl;/* pointer to end of destination */	mlib_s16 *dend;/* aligned pointer to destination */	mlib_d64 *dp;/* destination data */	mlib_d64 t0, t1, t2, t3;/* destination data */	mlib_d64 acc0, acc1;/* edge mask */	mlib_s32 emask;/* loop variable */	mlib_s32 i, num;	const mlib_s16 *table;	dl = dst;	sp = (void *)src;	dp = (mlib_d64 *)dl;	dend = dl + xsize - 1;	vis_alignaddr((void *)0, 6);	i = 0;	if (xsize >= 4) {		s0 = sp[0] << 1;		s1 = sp[1] << 1;		s2 = sp[2] << 1;		s3 = sp[3] << 1;		sp += 4;		vis_write_bmask(0x012389ab, 0);#pragma pipeloop(0)		for (i = 0; i <= xsize - 8; i += 4, sp += 4) {			t3 = VIS_LD_U16_I(table0, s3);			t2 = VIS_LD_U16_I(table2, s2);			t1 = VIS_LD_U16_I(table1, s1);			t0 = VIS_LD_U16_I(table0, s0);			acc1 = vis_faligndata(t3, acc1);			acc1 = vis_faligndata(t2, acc1);			acc0 = vis_faligndata(t1, acc0);			acc0 = vis_faligndata(t0, acc0);			s0 = sp[0] << 1;			s1 = sp[1] << 1;			s2 = sp[2] << 1;			s3 = sp[3] << 1;			(*dp++) = vis_bshuffle(acc0, acc1);			table = table0;			table0 = table1;			table1 = table2;			table2 = table;		}		t3 = VIS_LD_U16_I(table0, s3);		t2 = VIS_LD_U16_I(table2, s2);		t1 = VIS_LD_U16_I(table1, s1);		t0 = VIS_LD_U16_I(table0, s0);		acc1 = vis_faligndata(t3, acc1);		acc1 = vis_faligndata(t2, acc1);		acc0 = vis_faligndata(t1, acc0);		acc0 = vis_faligndata(t0, acc0);		(*dp++) = vis_bshuffle(acc0, acc1);		table = table0;		table0 = table1;		table1 = table2;		table2 = table;		i += 4;	}	if ((mlib_addr)dp <= (mlib_addr)dend) {		num = (mlib_s16 *)dend - (mlib_s16 *)dp;		sp += num;		num++;		if (num == 1) {//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例2: __mlib_VectorConvert_U8_S32_Sat

mlib_status__mlib_VectorConvert_U8_S32_Sat(	mlib_u8 *z,	const mlib_s32 *x,	mlib_s32 n){	mlib_s32 *src = (void *)x;	mlib_u8 *dst = z;	mlib_d64 *dsrc, *ddst;	mlib_d64 d0, d_tmp, d1, d2, d3, d4;	mlib_s32 len_64, even_length, rest_64, length = n, i;	mlib_s32 c;	if (n < 8) {		PACK_S_S(mlib_s32, mlib_u8, MLIB_U8_MAX, 0);	}/* * First try to align destination address for 8 bytes . */	while ((mlib_addr)dst & 7) {		(*dst++) = (c =			(*src++)) < 0 ? 0 : (c > MLIB_U8_MAX ? MLIB_U8_MAX : c);		length--;	}	rest_64 = length & 7;	len_64 = length >> 3;	even_length = len_64 << 3;	ddst = (mlib_d64 *)dst;	vis_write_gsr(23 << 3);/* * Now analyze source address alignment. */	if (((mlib_addr)src & 7) == 0) {/* * Source address is also 8-byte aligned. */		dsrc = (mlib_d64 *)src;#pragma pipeloop(0)#pragma unroll(4)		for (i = 0; i < len_64; i++) {			d1 = (*dsrc++);			d2 = (*dsrc++);			d3 = (*dsrc++);			d4 = (*dsrc++);			d1 = vis_fpack32(d1, d1);			d2 = vis_fpack32(d1, d2);			d3 = vis_fpack32(d2, d3);			d4 = vis_fpack32(d3, d4);			(*ddst++) =				vis_fpmerge(vis_read_hi(d4), vis_read_lo(d4));		}	} else {/* * Source address is arbitrary aligned. Use vis_alignaddr() and * vis_faligndata() functions. */		dsrc = (mlib_d64 *)vis_alignaddr(src, 0);		d0 = (*dsrc++);#pragma pipeloop(0)#pragma unroll(4)		for (i = 0; i < len_64; i++) {			d_tmp = (*dsrc++);			d1 = vis_faligndata(d0, d_tmp);			d0 = (*dsrc++);			d2 = vis_faligndata(d_tmp, d0);			d_tmp = (*dsrc++);			d3 = vis_faligndata(d0, d_tmp);			d0 = vis_ld_d64_nf(dsrc); dsrc++;			d4 = vis_faligndata(d_tmp, d0);			d1 = vis_fpack32(d1, d1);			d2 = vis_fpack32(d1, d2);			d3 = vis_fpack32(d2, d3);			d4 = vis_fpack32(d3, d4);			(*ddst++) =				vis_fpmerge(vis_read_hi(d4), vis_read_lo(d4));		}	}	for (i = 0; i < rest_64; i++) {		c = src[even_length + i];		dst[even_length + i] = c < MLIB_U8_MIN ? MLIB_U8_MIN			: (c > MLIB_U8_MAX ? MLIB_U8_MAX : c);	}	return (MLIB_SUCCESS);}
开发者ID:Aries85,项目名称:mediaLib,代码行数:98,


示例3: __mlib_VectorSubS_S32_S16_Mod

mlib_status__mlib_VectorSubS_S32_S16_Mod(	mlib_s32 *z,	const mlib_s16 *x,	const mlib_s16 *c,	mlib_s32 n){	mlib_d64 *dpz, *dpx;	mlib_d64 dx, dx0, dx1;	mlib_d64 dr1, dr2, dzh, dzl;	mlib_f32 fone = vis_to_float(0x10001);	mlib_s32 uc = *((mlib_s16 *)c);	mlib_s16 *px;	mlib_s32 *pz;	mlib_s32 len = n, i;/* rest and leng in terms of 8 bytes. */	mlib_s32 rest_8, even_8;	mlib_d64 dc = vis_to_double_dup(uc);	if (n <= 0)		return (MLIB_FAILURE);	px = (mlib_s16 *)x;	pz = (mlib_s32 *)z;	if (n <= 4)		SUBS_S32_S16_IN_C;/* * prepare the destination address */	while ((mlib_addr)pz & 7) {		(*pz++) = uc - ((mlib_s32)(*px));		px++;		len--;	}	dpz = (mlib_d64 *)pz;	even_8 = len >> 2;	rest_8 = len & 0x3;	if (!((mlib_addr)px & 7)) {/* * 'x' address is 8-byte aligned. * No  vis_alignaddr and  vis_faligndata at all. */		dpx = (mlib_d64 *)px;#pragma pipeloop(0)		for (i = 0; i < even_8; i++) {			dx = (*dpx++);			SUBS_S32_S16_MOD;/* * store 16 bytes of result */			dpz[0] = dzh;			dpz[1] = dzl;			dpz += 2;		}	} else {/* * "x"  address is arbitrary aligned. * 1 vis_alignaddr and 1 vis_faligndata in the loop. */		dpx = vis_alignaddr(px, 0);		dx0 = vis_ld_d64_nf(dpx);		dpx++;#pragma pipeloop(0)		for (i = 0; i < even_8; i++) {			dx1 = vis_ld_d64_nf(dpx);			dpx++;			dx = vis_faligndata(dx0, dx1);			SUBS_S32_S16_MOD;			dx0 = dx1;/* * store 16 bytes of result */			dpz[0] = dzh;			dpz[1] = dzl;			dpz += 2;		}	}	if (!rest_8)		return (MLIB_SUCCESS);	px += (even_8 << 2);	pz += (even_8 << 2);	while (rest_8--) {//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例4: __mlib_VectorConvert_S8_U8_Sat

mlib_status__mlib_VectorConvert_S8_U8_Sat(	mlib_s8 *z,	const mlib_u8 *x,	mlib_s32 n){	mlib_u8 *src = (void *)x;	mlib_s8 *dst = z;	mlib_d64 fzero = vis_fzeros();	mlib_d64 *dsrc, *ddst;	mlib_d64 d1, d2, d3, d4, d5, d6;	mlib_s32 len_64, even_length, rest_64, length = n, i;	mlib_u8 c;	mlib_d64 dsp = vis_to_double_dup(0x800080);	mlib_d64 rst = vis_to_double_dup(0x80808080);	mlib_f32 fm = vis_to_float(0x100);	if (length < 16) {		PACK_U_S(mlib_u8, mlib_s8, MLIB_S8_MAX);	}/* * First, try to align destination address for 8 bytes . */	while ((mlib_addr)dst & 7) {		(*dst++) = (c = (*src++)) > MLIB_S8_MAX ? MLIB_S8_MAX : c;		length--;	}	rest_64 = length & 7;	len_64 = length >> 3;	even_length = len_64 << 3;	ddst = (mlib_d64 *)dst;	vis_write_gsr(7 << 3);/* * Now analyze source address alignment. */	if (((mlib_addr)src & 7) == 0) {/* * Source address is also 8-byte aligned. */		dsrc = (mlib_d64 *)src;/* * Peeling the 1st iteration. */		if (i = (len_64 & 1)) {			d1 = (*dsrc++);			d2 = vis_fpmerge(fzero, vis_read_hi(d1));			d3 = vis_fmul8x16al(vis_read_lo(d1), fm);			d2 = vis_fpadd16(dsp, d2);			d3 = vis_fpadd16(dsp, d3);			d1 = vis_fpack16_pair(d2, d3);			(*ddst++) = vis_fxor(d1, rst);		}/* * Then loop with step==2. Unroll for 2 iterations. */#pragma pipeloop(0)#pragma unroll(4)		for (; i < len_64; i += 2) {			d1 = (*dsrc++);			d4 = (*dsrc++);			d2 = vis_fpmerge(fzero, vis_read_hi(d1));			d3 = vis_fmul8x16al(vis_read_lo(d1), fm);			d2 = vis_fpadd16(dsp, d2);			d3 = vis_fpadd16(dsp, d3);			d1 = vis_fpack16_pair(d2, d3);			d2 = vis_fpmerge(fzero, vis_read_hi(d4));			d3 = vis_fmul8x16al(vis_read_lo(d4), fm);			d2 = vis_fpadd16(dsp, d2);			d3 = vis_fpadd16(dsp, d3);			d4 = vis_fpack16_pair(d2, d3);			(*ddst++) = vis_fxor(d1, rst);			(*ddst++) = vis_fxor(d4, rst);		}	} else {/* * Source address has arbitrary alignment. Use vis_alignaddr() and * vis_faligndata() functions. */		dsrc = (mlib_d64 *)vis_alignaddr(src, 0);		d2 = (*dsrc++);/* * Peeling of 1 iteration. */		if (i = (len_64 & 1)) {			d1 = d2;//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例5: __mlib_VectorConvert_U8_S16_Sat

mlib_status__mlib_VectorConvert_U8_S16_Sat(	mlib_u8 *z,	const mlib_s16 *x,	mlib_s32 n){	mlib_s16 *src = (void *)x;	mlib_u8 *dst = z;	mlib_d64 *dsrc, *ddst;	mlib_d64 d1, d2, d3, d4, d5, d6, d7;	mlib_s32 len_64, even_length, rest_64, length = n, i;	mlib_s16 c;	if (n < 16) {		PACK_S_U_DF(mlib_s16, mlib_u8, MLIB_U8_MAX, 0);	}/* * First try to align destination address for 8 bytes . */	while ((mlib_addr)dst & 7) {		(*dst++) = (c =			(*src++)) < 0 ? 0 : (c > MLIB_U8_MAX ? MLIB_U8_MAX : c);		length--;	}	rest_64 = length & 7;	len_64 = length >> 3;	even_length = len_64 << 3;	ddst = (mlib_d64 *)dst;	vis_write_gsr(7 << 3);/* * Now analyze source address alignment. */	if (((mlib_addr)src & 7) == 0) {/* * Source address is also 8-byte aligned. */		dsrc = (mlib_d64 *)src;/* * Peeling the 1st iteration. */		if (i = (len_64 & 1)) {			d4 = (*dsrc++);			d5 = (*dsrc++);			d3 = vis_fpack16_pair(d4, d5);			(*ddst++) = d3;		}/* * Then loop with step==2. Unroll for 2 iterations. */#pragma pipeloop(0)#pragma unroll(2)		for (; i < len_64; i += 2) {			d1 = (*dsrc++);			d2 = (*dsrc++);			d5 = (*dsrc++);			d6 = (*dsrc++);			d3 = vis_fpack16_pair(d1, d2);			d7 = vis_fpack16_pair(d5, d6);			(*ddst++) = d3;			(*ddst++) = d7;		}	} else {/* * Source address is 2-byte aligned. Use vis_alignaddr() and * vis_faligndata() functions. */		dsrc = (mlib_d64 *)vis_alignaddr(src, 0);		d2 = (*dsrc++);/* * Peeling of 1 iteration. */		if (i = (len_64 & 1)) {			d1 = d2;			d2 = vis_ld_d64_nf(dsrc); dsrc++;			d3 = vis_faligndata(d1, d2);			d1 = d2;			d2 = vis_ld_d64_nf(dsrc); dsrc++;			d4 = vis_faligndata(d1, d2);			d3 = vis_fpack16_pair(d3, d4);			(*ddst++) = d3;		}/* * Then loop with step==2. *///.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例6: mlib_v_ImageAdd_U8

mlib_statusmlib_v_ImageAdd_U8(    mlib_image *dst,    const mlib_image *src1,    const mlib_image *src2){	mlib_s32 i, j, k;	mlib_s32 offdst, offsrc1, offsrc2, emask;	mlib_s32 amount;	mlib_d64 *dpp, *spp2, *spp1, *tmp_ptr;	mlib_d64 dd, dd0, dd1, sd10, sd11, sd20, sd21;	mlib_d64 sd1h, sd2h, sd1l, sd2l, rdh, rdl;	mlib_u8 *dend;	mlib_f32 nul = vis_to_float(0), fone = vis_to_float(0x100);	VALIDATE(mlib_u8);/* initialize GSR scale factor */	vis_write_gsr(7 << 3);	sl1 = sp1;	sl2 = sp2;	dl = dp;	amount = width * channels;	offdst = ((mlib_addr)dp) & 7;	offsrc1 = ((mlib_addr)sp1) & 7;	offsrc2 = ((mlib_addr)sp2) & 7;	if ((offdst == offsrc1) && (offdst == offsrc2) &&	    (((strided ^ stride1) & 7) == 0) &&	    (((strided ^ stride2) & 7) == 0)) {		for (j = 0; j < height; j++) {/* prepare the destination addresses */			dpp = (mlib_d64 *)vis_alignaddr(dp, 0);			i = (mlib_u8 *)dpp - dp;/* prepare the source addresses */			spp1 = (mlib_d64 *)vis_alignaddr(sp1, 0);			spp2 = (mlib_d64 *)vis_alignaddr(sp2, 0);			dend = dp + amount - 1;/* generate edge mask for the start point */			emask = vis_edge8(dp, dend);			if (emask != 0xff) {				sd10 = (*spp1++);				sd20 = (*spp2++);				MLIB_V_ADDIMAGE_U8(sd10, sd20, dd);				vis_pst_8(dd, dpp++, emask);				i += 8;			}#pragma pipeloop(0)			for (; i <= amount - 8; i += 8) {				sd10 = (*spp1++);				sd20 = (*spp2++);				MLIB_V_ADDIMAGE_U8(sd10, sd20, dd);				(*dpp++) = dd;			}			if (i < amount) {				emask = vis_edge8(dpp, dend);				sd10 = (*spp1++);				sd20 = (*spp2++);				MLIB_V_ADDIMAGE_U8(sd10, sd20, dd);				vis_pst_8(dd, dpp, emask);			}			sp1 = sl1 += stride1;			sp2 = sl2 += stride2;			dp = dl += strided;		}	} else if ((offdst == offsrc1) && (((strided ^ stride1) & 7) == 0)) {		for (j = 0; j < height; j++) {/* prepare the destination addresses */			dpp = (mlib_d64 *)vis_alignaddr(dp, 0);			i = (mlib_u8 *)dpp - dp;/* prepare the source addresses */			spp1 = (mlib_d64 *)vis_alignaddr(sp1, 0);			spp2 = (mlib_d64 *)vis_alignaddr(sp2, i);			dend = dp + amount - 1;/* generate edge mask for the start point */			emask = vis_edge8(dp, dend);			sd20 = vis_ld_d64_nf(spp2);			if (emask != 0xff) {				sd10 = (*spp1++);				sd21 = vis_ld_d64_nf(spp2 + 1);				sd20 = vis_faligndata(sd20, sd21);				MLIB_V_ADDIMAGE_U8(sd10, sd20, dd);				vis_pst_8(dd, dpp++, emask);				sd20 = sd21;//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例7: mlib_v_ImageAdd_U16

mlib_statusmlib_v_ImageAdd_U16(    mlib_image *dst,    const mlib_image *src1,    const mlib_image *src2){	mlib_s32 i, j, k;	mlib_s32 offdst, offsrc1, offsrc2, emask, mask;	mlib_s32 amount;	mlib_d64 *dpp, *spp2, *spp1, *tmp_ptr, tmp;	mlib_d64 sd10, sd11, sd20, sd21;	mlib_d64 ones = vis_to_double_dup(0x7fff7fff);	mlib_d64 max_u16 = vis_to_double_dup(0xffffffff);	mlib_u16 *dend;	VALIDATE(mlib_u16);/* initialize GSR scale factor */	vis_write_gsr(15 << 3);	sl1 = sp1;	sl2 = sp2;	dl = dp;	amount = width * channels;	offdst = ((mlib_addr)dp) & 7;	offsrc1 = ((mlib_addr)sp1) & 7;	offsrc2 = ((mlib_addr)sp2) & 7;	if ((offdst == offsrc1) && (offdst == offsrc2) &&	    (((strided ^ stride1) & 3) == 0) &&	    (((strided ^ stride2) & 3) == 0)) {		for (j = 0; j < height; j++) {/* prepare the destination addresses */			dpp = (mlib_d64 *)vis_alignaddr(dp, 0);			i = (mlib_u16 *)dpp - dp;/* prepare the source addresses */			spp1 = (mlib_d64 *)vis_alignaddr(sp1, 0);			spp2 = (mlib_d64 *)vis_alignaddr(sp2, 0);			dend = dp + amount - 1;/* generate edge mask for the start point */			emask = vis_edge16(dp, dend);			if (emask != 0xf) {				sd10 = (*spp1++);				sd20 = (*spp2++);				MLIB_V_ADDIMAGE_U16_emask(sd10, sd20, dpp,				    emask);				i += 4;			}#pragma pipeloop(0)			for (; i <= amount - 4; i += 4) {				sd10 = (*spp1++);				sd20 = (*spp2++);				MLIB_V_ADDIMAGE_U16(sd10, sd20, dpp)			}			if (i < amount) {				emask = vis_edge16(dpp, dend);				sd10 = (*spp1++);				sd20 = (*spp2++);				MLIB_V_ADDIMAGE_U16_emask(sd10, sd20, dpp,				    emask);			}			sp1 = sl1 += stride1;			sp2 = sl2 += stride2;			dp = dl += strided;		}	} else if ((offdst == offsrc1) && (((strided ^ stride1) & 3) == 0)) {
开发者ID:Aries85,项目名称:mediaLib,代码行数:75,


示例8: __mlib_VectorSub_U8_U8_Mod

//.........这里部分代码省略.........			SUB_S8_MOD;			dpz[0] = dz;			px += 8;			py += 8;			dpz++;			nrest -= 8;		}		if (nrest > 0) {			dx = *((mlib_d64 *)px);			dy = *((mlib_d64 *)py);			SUB_S8_MOD;			emask = vis_edge8(dpz, pzend);			vis_pst_8(dz, dpz, emask);		}	} else {/* * General case. */		dpz = (mlib_d64 *)((mlib_addr)z & (~7));		off = (mlib_addr)dpz - (mlib_addr)z;/* * generate edge mask for the start point */		emask = vis_edge8(pz, pzend);/* * prepare the source address */		if (off) {			dpy = (mlib_d64 *)vis_alignaddr(py, off);			dy0 = vis_ld_d64_nf(dpy);			dy1 = vis_ld_d64_nf(dpy + 1);			dy = vis_faligndata(dy0, dy1);			dpx = (mlib_d64 *)vis_alignaddr(px, off);			dx0 = vis_ld_d64_nf(dpx);			dx1 = vis_ld_d64_nf(dpx + 1);			dx = vis_faligndata(dx0, dx1);			SUB_S8_MOD;/* * store first bytes of result */			vis_pst_8(dz, dpz, emask);			px += (8 + off);			py += (8 + off);			len -= (8 + off);			dpz++;			if (len <= 0)				return (MLIB_SUCCESS);		}		even_8 = len >> 3;		rest_8 = len & 0x7;/* * Now try to analyze source "x" and "y" addresses. */		if ((!((mlib_addr)px & 7)) && (!((mlib_addr)py & 7))) {
开发者ID:Aries85,项目名称:mediaLib,代码行数:67,


示例9: __mlib_SignalEmphasize_S16S_S16S_Sat

mlib_status__mlib_SignalEmphasize_S16S_S16S_Sat(    mlib_s16 *dst,    const mlib_s16 *src,    void *filter,    mlib_s32 n){	mlib_emphasize_struct *fist = filter;	mlib_d64 w_maskand0 = vis_to_double(0xFFFFFFFF, 0xFFFF);	mlib_d64 w_maskor0  = vis_freg_pair(0.f, fist->v16_last0);	mlib_d64 w_maskand1 = vis_to_double(0xFFFFFFFF, 0xFFFF0000);	mlib_d64 w_maskor1  = vis_freg_pair(0.f, fist->v16_last1);	mlib_f32 v_mask	    = vis_to_float(0x80008000);	mlib_f32 v_alpha    = fist->v_alpha;	mlib_s16 *fdst	    = dst + n + n - 1;	mlib_d64 *dpd, *dps, *dsrct1;	mlib_d64 w_dst, w_src, w_src0, w_src1, w_src2, w_lsrc;	mlib_d64 dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7;	mlib_s32 i, times, t1, t2;/* check for obvious errors */	if ((fist == NULL) || (n <= 0) || (src == 0) || (dst == 0) ||	    (fist->type != MLIB_EMPH)) {		return (MLIB_FAILURE);	}	vis_write_gsr(1 << 3);	w_maskor0 = vis_fand(w_maskor0, w_maskand1);	w_maskor1 = vis_fand(w_maskor1, w_maskand0);	vis_alignaddr((void *)(-(mlib_addr)src), 0);	w_maskand0 = vis_faligndata(w_maskand0, w_maskand0);	w_maskor0 = vis_faligndata(w_maskor0, w_maskor0);	w_maskand1 = vis_faligndata(w_maskand1, w_maskand1);	w_maskor1 = vis_faligndata(w_maskor1, w_maskor1);	dpd = vis_alignaddr(dst, 0);	times = (mlib_d64 *)vis_alignaddr(fdst, 0) - dpd;	t1 = -((mlib_addr)(dst) & 7);	t2 = t1 - 4;	dps = vis_alignaddr((void *)src, t2);	w_src0 = vis_ld_d64_nf(dps);	dps++;	w_src1 = vis_ld_d64_nf(dps);	dps++;	if ((((mlib_addr)dst ^ (mlib_addr)src) & 7)) {		if (((mlib_addr)dps - (mlib_addr)src) >= 6) {			w_src0 = vis_fand(w_maskand0, w_src0);			w_src0 = vis_for(w_maskor0, w_src0);		} else {			w_src1 = vis_fand(w_maskand0, w_src1);			w_src1 = vis_for(w_maskor0, w_src1);		}		if (((mlib_addr)dps - (mlib_addr)src) >= 8) {			w_src0 = vis_fand(w_maskand1, w_src0);			w_src0 = vis_for(w_maskor1, w_src0);		} else {			w_src1 = vis_fand(w_maskand1, w_src1);			w_src1 = vis_for(w_maskor1, w_src1);		}		w_lsrc = vis_faligndata(w_src0, w_src1);		dsrct1 = vis_alignaddr((void *)src, t1);		if (dps - 2 != dsrct1) {			w_src2 = *dps;			dps++;			w_src = vis_faligndata(w_src1, w_src2);			MLIB_MUL8;			if ((mlib_addr)dst & 7) {				times--;				w_src0 = w_src1;				w_src1 = w_src2;				w_src2 = *dps;				vis_alignaddr((void *)src, t2);				w_lsrc = vis_faligndata(w_src0, w_src1);				vis_alignaddr((void *)src, t1);				w_src = vis_faligndata(w_src1, w_src2);				dps++;				MLIB_MIX;				w_dst = vis_fpackfix_pair(dr2, dr3);				vis_pst_16(w_dst, dpd, vis_edge16(dst, fdst));				dpd++;			}			w_src0 = w_src1;			w_src1 = w_src2;			w_src2 = vis_ld_d64_nf(dps);			vis_alignaddr((void *)src, t2);			w_lsrc = vis_faligndata(w_src0, w_src1);			vis_alignaddr((void *)src, t1);			w_src = vis_faligndata(w_src1, w_src2);//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例10: __mlib_VectorSub_S16_S8_Mod

mlib_status__mlib_VectorSub_S16_S8_Mod(	mlib_s16 *z,	const mlib_s8 *x,	const mlib_s8 *y,	mlib_s32 n){	mlib_d64 *dpz, *dpx, *dpy;	mlib_d64 dx, dy, dx0, dx1, dy0, dy1;	mlib_d64 dxh, dxl, dyh, dyl, dzh, dzl;	mlib_f32 fone = vis_to_float(0x100);	mlib_s8 *px, *py;	mlib_s16 *pz;	mlib_s32 len = n, i;/* rest and leng in terms of 8 bytes. */	mlib_s32 rest_8, even_8;	mlib_d64 restore = vis_to_double_dup(0x80808080);	if (n <= 0)		return (MLIB_FAILURE);	px = (mlib_s8 *)x;	py = (mlib_s8 *)y;	pz = (mlib_s16 *)z;	if (n <= 8) {		SUB_S16_S8_IN_C;	}/* * prepare the source address */	while ((mlib_addr)pz & 7) {		(*pz++) = ((mlib_s16)(*px)) - (*py);		px++;		py++;		len--;	}	dpz = (mlib_d64 *)pz;	even_8 = len >> 3;	rest_8 = len & 0x7;	if ((!((mlib_addr)px & 7)) && (!((mlib_addr)py & 7))) {/* * Both addresses are 8-byte aligned. * No  vis_alignaddr and  vis_faligndata at all. */		dpx = (mlib_d64 *)px;		dpy = (mlib_d64 *)py;		dx = vis_ld_d64_nf(dpx);		dy = vis_ld_d64_nf(dpy);		dpx++;		dpy++;#pragma pipeloop(0)		for (i = 0; i < even_8; i++) {			dx1 = vis_ld_d64_nf(dpx);			dy1 = vis_ld_d64_nf(dpy);			SUB_S8_S16;			dx = dx1;			dy = dy1;/* * store 16 bytes of result */			(*dpz++) = dzh;			(*dpz++) = dzl;			dpx++;			dpy++;		}	} else if ((!((mlib_addr)px & 7))) {/* * First ("x") address is 8-byte aligned. * vis_alignaddr and vis_faligndata only for "y".** */		dpx = (mlib_d64 *)px;		dpy = vis_alignaddr(py, 0);		dy1 = vis_ld_d64_nf(dpy);		dpy++;#pragma pipeloop(0)		for (i = 0; i < even_8; i++) {			dx = (*dpx++);			dy0 = dy1;			dy1 = vis_ld_d64_nf(dpy);			dpy++;			dy = vis_faligndata(dy0, dy1);			SUB_S8_S16;/* * store 16 bytes of result *///.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例11: __mlib_VectorSub_S8_S8_Sat

mlib_status__mlib_VectorSub_S8_S8_Sat(	mlib_s8 *z,	const mlib_s8 *x,	const mlib_s8 *y,	mlib_s32 n){	mlib_d64 *dpz, *dpx, *dpy;	mlib_d64 dx, dy, dz, dx0, dx1, dy0, dy1;	mlib_d64 dxh, dxl, dyh, dyl, dzh, dzl;	mlib_d64 dh, dl;	mlib_s8 *pz = z, *px, *py, *pzend;/* offset of address alignment in destination */	mlib_s32 off;	mlib_s32 len = n, i;/* rest and leng in terms of 8 bytes. */	mlib_s32 rest_8, even_8;/* edge masks */	mlib_s32 emask;	mlib_d64 displacement = vis_to_double_dup(0x8000800);	mlib_d64 restore = vis_to_double_dup(0x80808080);	mlib_f32 fmul = vis_to_float(0x1000);	if (n <= 0)		return (MLIB_FAILURE);	px = (mlib_s8 *)x;	py = (mlib_s8 *)y;/* initialize GSR scale factor */	vis_write_gsr(3 << 3);	dpz = (mlib_d64 *)((mlib_addr)z & (~7));	off = (mlib_addr)dpz - (mlib_addr)z;	pzend = pz + n - 1;/* * generate edge mask for the start point */	emask = vis_edge8(pz, pzend);/* * prepare the source address */	if (off) {		dpy = (mlib_d64 *)vis_alignaddr(py, off);		dy0 = vis_ld_d64_nf(dpy);		dy1 = vis_ld_d64_nf(dpy + 1);		dy = vis_faligndata(dy0, dy1);		dpx = (mlib_d64 *)vis_alignaddr(px, off);		dx0 = vis_ld_d64_nf(dpx);		dx1 = vis_ld_d64_nf(dpx + 1);		dx = vis_faligndata(dx0, dx1);		SUB_S8_SAT;/* * store first bytes of result */		vis_pst_8(dz, dpz, emask);		px += (8 + off);		py += (8 + off);		len -= (8 + off);		dpz++;		if (len <= 0)			return (MLIB_SUCCESS);	}	even_8 = len >> 3;	rest_8 = len & 0x7;/* * Now try to analyze source "x" and "y" addresses. */	if ((!((mlib_addr)px & 7)) && (!((mlib_addr)py & 7))) {/* * Both addresses are 8-byte aligned. No  vis_alignaddr * and  vis_faligndata at all. */		dpx = (mlib_d64 *)px;		dpy = (mlib_d64 *)py;		dx = vis_ld_d64_nf(dpx);		dpx++;		dy = vis_ld_d64_nf(dpy);		dpy++;#pragma pipeloop(0)		for (i = 0; i < even_8; i++) {			dx1 = vis_ld_d64_nf(dpx);			dy1 = vis_ld_d64_nf(dpy);			SUB_S8_SAT;//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例12: mlib_v_ImageLookUpSI_S16_U8_4_DstOff3_D1

voidmlib_v_ImageLookUpSI_S16_U8_4_DstOff3_D1(    const mlib_s16 *src,    mlib_u8 *dst,    mlib_s32 xsize,    const mlib_u8 **table){/* pointer to source data */	mlib_s16 *sp;/* source data */	mlib_s32 s0, s1, s2;/* pointer to start of destination */	mlib_u8 *dl;/* aligned pointer to destination */	mlib_d64 *dp;/* destination data */	mlib_d64 t0, t1, t2;/* destination data */	mlib_d64 t3, t4, t5;/* destination data */	mlib_d64 t6, t7, acc;/* loop variable */	mlib_s32 i;	const mlib_u8 *tab0 = &table[0][32768];	const mlib_u8 *tab1 = &table[1][32768];	const mlib_u8 *tab2 = &table[2][32768];	const mlib_u8 *tab3 = &table[3][32768];	sp = (void *)src;	dl = dst;	dp = (mlib_d64 *)dl;	vis_alignaddr((void *)0, 7);	s0 = (*sp++);	if (xsize >= 2) {		s1 = sp[0];		s2 = sp[1];		sp += 2;#pragma pipeloop(0)		for (i = 0; i <= xsize - 4; i += 2, sp += 2) {			t7 = VIS_LD_U8_I(tab2, s2);			t6 = VIS_LD_U8_I(tab1, s2);			t5 = VIS_LD_U8_I(tab0, s2);			t4 = VIS_LD_U8_I(tab3, s1);			t3 = VIS_LD_U8_I(tab2, s1);			t2 = VIS_LD_U8_I(tab1, s1);			t1 = VIS_LD_U8_I(tab0, s1);			t0 = VIS_LD_U8_I(tab3, s0);			acc = vis_faligndata(t7, acc);			acc = vis_faligndata(t6, acc);			acc = vis_faligndata(t5, acc);			acc = vis_faligndata(t4, acc);			acc = vis_faligndata(t3, acc);			acc = vis_faligndata(t2, acc);			acc = vis_faligndata(t1, acc);			acc = vis_faligndata(t0, acc);			s0 = s2;			s1 = sp[0];			s2 = sp[1];			(*dp++) = acc;		}		t7 = VIS_LD_U8_I(tab2, s2);		t6 = VIS_LD_U8_I(tab1, s2);		t5 = VIS_LD_U8_I(tab0, s2);		t4 = VIS_LD_U8_I(tab3, s1);		t3 = VIS_LD_U8_I(tab2, s1);		t2 = VIS_LD_U8_I(tab1, s1);		t1 = VIS_LD_U8_I(tab0, s1);		t0 = VIS_LD_U8_I(tab3, s0);		acc = vis_faligndata(t7, acc);		acc = vis_faligndata(t6, acc);		acc = vis_faligndata(t5, acc);		acc = vis_faligndata(t4, acc);		acc = vis_faligndata(t3, acc);		acc = vis_faligndata(t2, acc);		acc = vis_faligndata(t1, acc);		acc = vis_faligndata(t0, acc);		s0 = s2;		(*dp++) = acc;	}	dl = (mlib_u8 *)dp;	if ((xsize & 1) != 0) {		s1 = sp[0];		t7 = VIS_LD_U8_I(tab2, s1);		t6 = VIS_LD_U8_I(tab1, s1);		t5 = VIS_LD_U8_I(tab0, s1);//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例13: mlib_v_ImageLookUpSI_S16_U8_2_DstA8D1

voidmlib_v_ImageLookUpSI_S16_U8_2_DstA8D1(    const mlib_s16 *src,    mlib_u8 *dst,    mlib_s32 xsize,    const mlib_u8 **table){/* pointer to source data */	mlib_s16 *sp;/* source data */	mlib_s32 s0, s1, s2, s3;/* pointer to start of destination */	mlib_u16 *dl;/* pointer to end of destination */	mlib_u16 *dend;/* aligned pointer to destination */	mlib_d64 *dp;/* destination data */	mlib_d64 t0, t1, t2;/* destination data */	mlib_d64 t3, t4, t5;/* destination data */	mlib_d64 t6, t7, acc;/* edge mask */	mlib_s32 emask;/* loop variable */	mlib_s32 i, num;	const mlib_u8 *tab0 = &table[0][32768];	const mlib_u8 *tab1 = &table[1][32768];	sp = (void *)src;	dl = (mlib_u16 *)dst;	dp = (mlib_d64 *)dl;	dend = dl + xsize - 1;	vis_alignaddr((void *)0, 7);	if (xsize >= 4) {		s0 = sp[0];		s1 = sp[1];		s2 = sp[2];		s3 = sp[3];		sp += 4;#pragma pipeloop(0)		for (i = 0; i <= xsize - 8; i += 4, sp += 4) {			t7 = VIS_LD_U8_I(tab1, s3);			t6 = VIS_LD_U8_I(tab0, s3);			t5 = VIS_LD_U8_I(tab1, s2);			t4 = VIS_LD_U8_I(tab0, s2);			t3 = VIS_LD_U8_I(tab1, s1);			t2 = VIS_LD_U8_I(tab0, s1);			t1 = VIS_LD_U8_I(tab1, s0);			t0 = VIS_LD_U8_I(tab0, s0);			acc = vis_faligndata(t7, acc);			acc = vis_faligndata(t6, acc);			acc = vis_faligndata(t5, acc);			acc = vis_faligndata(t4, acc);			acc = vis_faligndata(t3, acc);			acc = vis_faligndata(t2, acc);			acc = vis_faligndata(t1, acc);			acc = vis_faligndata(t0, acc);			s0 = sp[0];			s1 = sp[1];			s2 = sp[2];			s3 = sp[3];			(*dp++) = acc;		}		t7 = VIS_LD_U8_I(tab1, s3);		t6 = VIS_LD_U8_I(tab0, s3);		t5 = VIS_LD_U8_I(tab1, s2);		t4 = VIS_LD_U8_I(tab0, s2);		t3 = VIS_LD_U8_I(tab1, s1);		t2 = VIS_LD_U8_I(tab0, s1);		t1 = VIS_LD_U8_I(tab1, s0);		t0 = VIS_LD_U8_I(tab0, s0);		acc = vis_faligndata(t7, acc);		acc = vis_faligndata(t6, acc);		acc = vis_faligndata(t5, acc);		acc = vis_faligndata(t4, acc);		acc = vis_faligndata(t3, acc);		acc = vis_faligndata(t2, acc);		acc = vis_faligndata(t1, acc);		acc = vis_faligndata(t0, acc);		(*dp++) = acc;	}	if ((mlib_addr)dp <= (mlib_addr)dend) {//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例14: mlib_v_ImageLookUpSI_S16_U8_3_D1

voidmlib_v_ImageLookUpSI_S16_U8_3_D1(    const mlib_s16 *src,    mlib_u8 *dst,    mlib_s32 xsize,    const mlib_u8 **table){/* pointer to source data */	mlib_s16 *sp;/* pointer to start of destination */	mlib_u8 *dl;/* aligned pointer to destination */	mlib_d64 *dp;/* destination data */	mlib_d64 t0, t1, t2;/* destination data */	mlib_d64 t3, t4, t5;/* destination data */	mlib_d64 t6, t7;/* destination data */	mlib_d64 acc0, acc1, acc2;/* loop variable */	mlib_s32 i;	const mlib_u8 *tab0 = &table[0][32768];	const mlib_u8 *tab1 = &table[1][32768];	const mlib_u8 *tab2 = &table[2][32768];	mlib_s32 s00, s01, s02, s03;	mlib_s32 s10, s11, s12, s13;	sp = (void *)src;	dl = dst;	dp = (mlib_d64 *)dl;	vis_alignaddr((void *)0, 7);	i = 0;	if (xsize >= 8) {		s00 = sp[0];		s01 = sp[1];		s02 = sp[2];		s03 = sp[3];		s10 = sp[4];		s11 = sp[5];		s12 = sp[6];		s13 = sp[7];		sp += 8;#pragma pipeloop(0)		for (i = 0; i <= xsize - 16; i += 8, sp += 8) {			t7 = VIS_LD_U8_I(tab1, s02);			t6 = VIS_LD_U8_I(tab0, s02);			t5 = VIS_LD_U8_I(tab2, s01);			t4 = VIS_LD_U8_I(tab1, s01);			t3 = VIS_LD_U8_I(tab0, s01);			t2 = VIS_LD_U8_I(tab2, s00);			t1 = VIS_LD_U8_I(tab1, s00);			t0 = VIS_LD_U8_I(tab0, s00);			acc0 = vis_faligndata(t7, acc0);			acc0 = vis_faligndata(t6, acc0);			acc0 = vis_faligndata(t5, acc0);			acc0 = vis_faligndata(t4, acc0);			acc0 = vis_faligndata(t3, acc0);			acc0 = vis_faligndata(t2, acc0);			acc0 = vis_faligndata(t1, acc0);			acc0 = vis_faligndata(t0, acc0);			t7 = VIS_LD_U8_I(tab0, s11);			t6 = VIS_LD_U8_I(tab2, s10);			t5 = VIS_LD_U8_I(tab1, s10);			t4 = VIS_LD_U8_I(tab0, s10);			t3 = VIS_LD_U8_I(tab2, s03);			t2 = VIS_LD_U8_I(tab1, s03);			t1 = VIS_LD_U8_I(tab0, s03);			t0 = VIS_LD_U8_I(tab2, s02);			acc1 = vis_faligndata(t7, acc1);			acc1 = vis_faligndata(t6, acc1);			acc1 = vis_faligndata(t5, acc1);			acc1 = vis_faligndata(t4, acc1);			acc1 = vis_faligndata(t3, acc1);			acc1 = vis_faligndata(t2, acc1);			acc1 = vis_faligndata(t1, acc1);			acc1 = vis_faligndata(t0, acc1);			t7 = VIS_LD_U8_I(tab2, s13);			t6 = VIS_LD_U8_I(tab1, s13);			t5 = VIS_LD_U8_I(tab0, s13);			t4 = VIS_LD_U8_I(tab2, s12);			t3 = VIS_LD_U8_I(tab1, s12);			t2 = VIS_LD_U8_I(tab0, s12);			t1 = VIS_LD_U8_I(tab2, s11);			t0 = VIS_LD_U8_I(tab1, s11);			acc2 = vis_faligndata(t7, acc2);			acc2 = vis_faligndata(t6, acc2);//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例15: mlib_v_ImageLookUp_S16_U8_124_D1

void mlib_v_ImageLookUp_S16_U8_124_D1(const mlib_s16 *src,                                      mlib_u8        *dst,                                      mlib_s32       xsize,                                      const mlib_u8  *table0,                                      const mlib_u8  *table1,                                      const mlib_u8  *table2,                                      const mlib_u8  *table3){  mlib_s16 *sp;                        /* pointer to source data */  mlib_s32 s0, s1, s2, s3;             /* source data */  mlib_s32 s4, s5, s6, s7;             /* source data */  mlib_u8 *dl;                         /* pointer to start of destination */  mlib_u8 *dend;                       /* pointer to end of destination */  mlib_d64 *dp;                        /* aligned pointer to destination */  mlib_d64 t0, t1, t2;                 /* destination data */  mlib_d64 t3, t4, t5;                 /* destination data */  mlib_d64 t6, t7, acc;                /* destination data */  mlib_s32 emask;                      /* edge mask */  mlib_s32 i, num;                     /* loop variable */  dl = dst;  dp = (mlib_d64 *) dl;  dend = dl + xsize - 1;  sp = (void *)src;  vis_alignaddr((void *)0, 7);  if (xsize >= 8) {    s0 = sp[0];    s1 = sp[1];    s2 = sp[2];    s3 = sp[3];    s4 = sp[4];    s5 = sp[5];    s6 = sp[6];    s7 = sp[7];    sp += 8;#pragma pipeloop(0)    for (i = 0; i <= xsize - 16; i += 8, sp += 8) {      t7 = VIS_LD_U8_I(table3, s7);      t6 = VIS_LD_U8_I(table2, s6);      t5 = VIS_LD_U8_I(table1, s5);      t4 = VIS_LD_U8_I(table0, s4);      t3 = VIS_LD_U8_I(table3, s3);      t2 = VIS_LD_U8_I(table2, s2);      t1 = VIS_LD_U8_I(table1, s1);      t0 = VIS_LD_U8_I(table0, s0);      acc = vis_faligndata(t7, acc);      acc = vis_faligndata(t6, acc);      acc = vis_faligndata(t5, acc);      acc = vis_faligndata(t4, acc);      acc = vis_faligndata(t3, acc);      acc = vis_faligndata(t2, acc);      acc = vis_faligndata(t1, acc);      acc = vis_faligndata(t0, acc);      s0 = sp[0];      s1 = sp[1];      s2 = sp[2];      s3 = sp[3];      s4 = sp[4];      s5 = sp[5];      s6 = sp[6];      s7 = sp[7];      *dp++ = acc;    }    t7 = VIS_LD_U8_I(table3, s7);    t6 = VIS_LD_U8_I(table2, s6);    t5 = VIS_LD_U8_I(table1, s5);    t4 = VIS_LD_U8_I(table0, s4);    t3 = VIS_LD_U8_I(table3, s3);    t2 = VIS_LD_U8_I(table2, s2);    t1 = VIS_LD_U8_I(table1, s1);    t0 = VIS_LD_U8_I(table0, s0);    acc = vis_faligndata(t7, acc);    acc = vis_faligndata(t6, acc);    acc = vis_faligndata(t5, acc);    acc = vis_faligndata(t4, acc);    acc = vis_faligndata(t3, acc);    acc = vis_faligndata(t2, acc);    acc = vis_faligndata(t1, acc);    acc = vis_faligndata(t0, acc);    *dp++ = acc;  }  if ((mlib_addr) dp <= (mlib_addr) dend) {    num = (mlib_addr) dend - (mlib_addr) dp;    sp += num;    num++;    if ((num & 3) == 1) {      s0 = (mlib_s32) * sp;      sp--;      t0 = VIS_LD_U8_I(table0, s0);      acc = vis_faligndata(t0, acc);      num--;//.........这里部分代码省略.........
开发者ID:michalwarecki,项目名称:ManagedRuntimeInitiative,代码行数:101,


示例16: mlib_v_VideoColorYUV2ABGR422_nonalign

static mlib_statusmlib_v_VideoColorYUV2ABGR422_nonalign(	mlib_u8 *abgr,	const mlib_u8 *y,	const mlib_u8 *u,	const mlib_u8 *v,	mlib_s32 width,	mlib_s32 height,	mlib_s32 abgr_stride,	mlib_s32 y_stride,	mlib_s32 uv_stride){/* pointers to src address */	mlib_u8 *sp2, *sp3, *sl2, *sl3;/* pointers to src address */	mlib_u8 *sp1, *sl1;/* pointers to dst address */	mlib_u8 *dp, *dl, *dend;/* all. pointer to y */	mlib_d64 *spy;/* all. pointer to dst */	mlib_d64 *dpp;/* u, v data */	mlib_f32 fu0, fu1, fv0, fv1;/* y data */	mlib_d64 dy0, dy1, dy3;	mlib_d64 du, dv;/* (1.1644, 1.5966)*8192 */	mlib_f32 k12 = vis_to_float(0x25433317);/* (-.3920, -.8132)*8192 */	mlib_f32 k34 = vis_to_float(0xf375e5fa);/* 2.0184*8192 */	mlib_f32 k5 = vis_to_float(0x1004097);	mlib_d64 k_222_9952 = vis_to_double(0x1be01be0, 0x1be01be0);	mlib_d64 k_135_6352 = vis_to_double(0x10f410f4, 0x10f410f4);	mlib_d64 k_276_9856 = vis_to_double(0x22a022a0, 0x22a022a0);	mlib_d64 u_3920_hi, u_20184_hi, v_15966_hi, v_8132_hi;	mlib_d64 u_3920_lo, u_20184_lo, v_15966_lo, v_8132_lo;	mlib_d64 y_11644_hi, y_11644_lo;	mlib_d64 r_hi, r_lo, g_hi, g_lo, b_hi, b_lo;	mlib_d64 temp_r_hi, temp_r_lo, temp_g_hi, temp_g_lo, temp_b_hi,		temp_b_lo;	mlib_f32 red_hi, red_lo, green_hi, green_lo, blue_hi, blue_lo;	mlib_d64 blue_red_hi, x_green_hi, blue_red_lo, x_green_lo;	mlib_d64 dd, dd0, dd1;/* loop variable */	mlib_s32 i, j;/* alpha_ch. is not written */	mlib_s32 emask = 0x7777;	mlib_s32 emask1;	mlib_s32 off;	mlib_f32 *dfu, *dfv;	mlib_d64 du0, du1, dv0, dv1;	mlib_s32 off2, off3;	mlib_s32 inc;/* * initialize GSR scale factor */	vis_write_gsr(2 << 3);	sp1 = sl1 = (mlib_u8 *)y;	sp2 = sl2 = (mlib_u8 *)u;	sp3 = sl3 = (mlib_u8 *)v;	dl = dp = (mlib_u8 *)abgr;/* * row loop */	for (j = 0; j < height; j++) {		spy = (mlib_d64 *)vis_alignaddr(sp1, 0);		dpp = (mlib_d64 *)vis_alignaddr(dp, 0);		dfu = (mlib_f32 *)((mlib_addr)sp2 & ~3);		off2 = (sp2 - (mlib_u8 *)dfu) * 2;		dfv = (mlib_f32 *)((mlib_addr)sp3 & ~3);		off3 = (sp3 - (mlib_u8 *)dfv) * 2;		dend = dp + width * 4 - 1;		emask1 = vis_edge8(dp, dend);		i = dp - (mlib_u8 *)dpp;		emask >>= i;		inc = (emask1 != 0xff);		emask1 &= emask;		off = 8 - i;		vis_alignaddr((void *)off2, 0);		fu0 = vis_ld_f32_nf(dfu); dfu++;		fu1 = vis_ld_f32_nf(dfu); dfu++;//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例17: MLIB_V_ADDIMAGE_U16_emask

				sd10 = (*spp1++);				sd20 = (*spp2++);				MLIB_V_ADDIMAGE_U16_emask(sd10, sd20, dpp,				    emask);			}			sp1 = sl1 += stride1;			sp2 = sl2 += stride2;			dp = dl += strided;		}	} else if ((offdst == offsrc1) && (((strided ^ stride1) & 3) == 0)) {		for (j = 0; j < height; j++) {/* prepare the destination addresses */			dpp = (mlib_d64 *)vis_alignaddr(dp, 0);			i = (mlib_u16 *)dpp - dp;/* prepare the source addresses */			spp1 = (mlib_d64 *)vis_alignaddr(sp1, 0);			spp2 = (mlib_d64 *)vis_alignaddr(sp2, 2 * i);			dend = dp + amount - 1;/* generate edge mask for the start point */			emask = vis_edge16(dp, dend);			sd20 = vis_ld_d64_nf(spp2);			if (emask != 0xf) {				sd10 = (*spp1++);				sd21 = vis_ld_d64_nf(spp2 + 1);
开发者ID:Aries85,项目名称:mediaLib,代码行数:31,


示例18: __mlib_VideoInterpAveX_U8_U8_16x16

mlib_status__mlib_VideoInterpAveX_U8_U8_16x16(    mlib_u8 *curr_block,    const mlib_u8 *ref_block,    mlib_s32 frame_stride,    mlib_s32 field_stride){    mlib_d64 s0, s1, s2, s3, s4, s5, s6;    mlib_d64 sd0, sd1, sd2, sd3, d0, d1, d2, d3;    mlib_d64 *sd, *dd;    mlib_d64 dzero = vis_fzero();    const mlib_f32 fm2 = vis_to_float(0x1000200);    mlib_f32 fzero = vis_read_hi(dzero);    mlib_d64 rounder = vis_fpsub16(dzero, vis_fone());    mlib_s32 y;    rounder = vis_fpadd16(vis_fpadd16(rounder, rounder), rounder);    vis_write_gsr((5 << 3) + ((mlib_u32)ref_block & 7));    dd = (mlib_d64 *)curr_block;    sd = (mlib_d64 *)((mlib_addr)ref_block & ~7);    y = 8;    if (((mlib_s32)(ref_block + 1) & 7)) {        do {            s0 = sd[0];            s1 = sd[1];            s2 = sd[2];            sd0 = vis_faligndata(s0, s1);            sd1 = vis_faligndata(s1, s2);            sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);            s4 = sd[0];            s5 = sd[1];            s6 = sd[2];            sd2 = vis_faligndata(s4, s5);            sd3 = vis_faligndata(s5, s6);            vis_alignaddr((void *)(ref_block + 1), 0);            sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);            d0 = dd[0];            d1 = dd[1];            d2 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[0];            d3 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[1];            s0 = vis_faligndata(s0, s1);            s1 = vis_faligndata(s1, s2);            s2 = vis_faligndata(s4, s5);            s3 = vis_faligndata(s5, s6);            MLIB_V_VIDEOINTERPAVG(d0, sd0, s0);            MLIB_V_VIDEOINTERPAVG(d1, sd1, s1);            MLIB_V_VIDEOINTERPAVG(d2, sd2, s2);            MLIB_V_VIDEOINTERPAVG(d3, sd3, s3);            dd[0] = d0;            dd[1] = d1;            dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);            dd[0] = d2;            dd[1] = d3;            dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);            vis_alignaddr((void *)ref_block, 0);        } while (--y);    } else {        do {            s0 = sd[0];            s1 = sd[1];            s2 = sd[2];            sd0 = vis_faligndata(s0, s1);            sd1 = vis_faligndata(s1, s2);            sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);            s4 = sd[0];            s5 = sd[1];            s6 = sd[2];            sd2 = vis_faligndata(s4, s5);            sd3 = vis_faligndata(s5, s6);            sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);            d0 = dd[0];            d1 = dd[1];            d2 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[0];            d3 = ((mlib_d64 *)((mlib_u8 *)dd + field_stride))[1];            MLIB_V_VIDEOINTERPAVG0(d0, sd0, s1);            MLIB_V_VIDEOINTERPAVG(d1, sd1, s2);            MLIB_V_VIDEOINTERPAVG(d2, sd2, s5);            MLIB_V_VIDEOINTERPAVG(d3, sd3, s6);            dd[0] = d0;            dd[1] = d1;            dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);            dd[0] = d2;            dd[1] = d3;            dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);        } while (--y);    }    return (MLIB_SUCCESS);}
开发者ID:Aries85,项目名称:mediaLib,代码行数:94,


示例19: mlib_v_ImageAdd_S16

mlib_statusmlib_v_ImageAdd_S16(    mlib_image *dst,    const mlib_image *src1,    const mlib_image *src2){	mlib_s32 i, j, k;	mlib_s32 offdst, offsrc1, offsrc2, emask;	mlib_s32 amount;	mlib_d64 *dpp, *spp2, *spp1, *tmp_ptr;	mlib_d64 dd, dd0, dd1, sd10, sd11, sd20, sd21;	mlib_s16 *dend;	VALIDATE(mlib_s16);	sl1 = sp1;	sl2 = sp2;	dl = dp;	amount = width * channels;	offdst = ((mlib_addr)dp) & 7;	offsrc1 = ((mlib_addr)sp1) & 7;	offsrc2 = ((mlib_addr)sp2) & 7;	if ((offdst == offsrc1) && (offdst == offsrc2) &&	    (((strided ^ stride1) & 3) == 0) &&	    (((strided ^ stride2) & 3) == 0)) {		for (j = 0; j < height; j++) {/* prepare the destination addresses */			dpp = (mlib_d64 *)vis_alignaddr(dp, 0);			i = (mlib_s16 *)dpp - dp;/* prepare the source addresses */			spp1 = (mlib_d64 *)vis_alignaddr(sp1, 0);			spp2 = (mlib_d64 *)vis_alignaddr(sp2, 0);			dend = dp + amount - 1;/* generate edge mask for the start point */			emask = vis_edge16(dp, dend);			if (emask != 0xf) {				sd10 = (*spp1++);				sd20 = (*spp2++);				MLIB_V_ADDIMAGE_S16(sd10, sd20, dd);				vis_pst_16(dd, dpp++, emask);				i += 4;			}#pragma pipeloop(0)			for (; i <= amount - 4; i += 4) {				sd10 = (*spp1++);				sd20 = (*spp2++);				MLIB_V_ADDIMAGE_S16(sd10, sd20, dd);				(*dpp++) = dd;			}			if (i < amount) {				emask = vis_edge16(dpp, dend);				sd10 = (*spp1++);				sd20 = (*spp2++);				MLIB_V_ADDIMAGE_S16(sd10, sd20, dd);				vis_pst_16(dd, dpp, emask);			}			sp1 = sl1 += stride1;			sp2 = sl2 += stride2;			dp = dl += strided;		}	} else if ((offdst == offsrc1) && (((strided ^ stride1) & 3) == 0)) {		for (j = 0; j < height; j++) {/* prepare the destination addresses */			dpp = (mlib_d64 *)vis_alignaddr(dp, 0);			i = (mlib_s16 *)dpp - dp;/* prepare the source addresses */			spp1 = (mlib_d64 *)vis_alignaddr(sp1, 0);			spp2 = (mlib_d64 *)vis_alignaddr(sp2, 2 * i);			dend = dp + amount - 1;/* generate edge mask for the start point */			emask = vis_edge16(dp, dend);			sd20 = vis_ld_d64_nf(spp2);			if (emask != 0xf) {				sd10 = (*spp1++);				sd21 = vis_ld_d64_nf(spp2 + 1);				sd20 = vis_faligndata(sd20, sd21);				MLIB_V_ADDIMAGE_S16(sd10, sd20, dd);				vis_pst_16(dd, dpp++, emask);				sd20 = sd21;				spp2++;				i += 4;			}#pragma pipeloop(0)			for (; i <= amount - 4; i += 4) {//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例20: __mlib_VideoInterpAveX_U8_U8

mlib_status__mlib_VideoInterpAveX_U8_U8(    mlib_u8 *curr_block,    const mlib_u8 *ref_block,    mlib_s32 width,    mlib_s32 height,    mlib_s32 frame_stride,    mlib_s32 field_stride){    mlib_d64 s0, s1, s2, s3, s4, s5, s6, s7;    mlib_d64 sd0, sd1, sd2, sd3, d0, d1, d2, d3;    mlib_d64 *sd, *dd;    mlib_d64 dzero = vis_fzero();    const mlib_f32 fm2 = vis_to_float(0x1000200);    mlib_f32 fzero = vis_read_hi(dzero);    mlib_d64 rounder = vis_fpsub16(dzero, vis_fone());    mlib_s32 y;    rounder = vis_fpadd16(vis_fpadd16(rounder, rounder), rounder);    vis_write_gsr((5 << 3) + ((mlib_u32)ref_block & 7));    dd = (mlib_d64 *)curr_block;    sd = (mlib_d64 *)((mlib_addr)ref_block & ~7);    if (width == 8) {        y = height >> 2;        if (((mlib_s32)(ref_block + 1) & 7)) {            do {                s0 = sd[0];                s1 = sd[1];                sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);                sd0 = vis_faligndata(s0, s1);                s2 = sd[0];                s3 = sd[1];                sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);                sd1 = vis_faligndata(s2, s3);                s4 = sd[0];                s5 = sd[1];                sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);                sd2 = vis_faligndata(s4, s5);                s6 = sd[0];                s7 = sd[1];                sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);                sd3 = vis_faligndata(s6, s7);                vis_alignaddr((void *)(ref_block + 1), 0);                d0 = *dd;                d1 = *(mlib_d64 *)((mlib_u8 *)dd +                                   field_stride);                d2 = *(mlib_d64 *)((mlib_u8 *)dd +                                   2 * field_stride);                d3 = *(mlib_d64 *)((mlib_u8 *)dd +                                   3 * field_stride);                s0 = vis_faligndata(s0, s1);                s1 = vis_faligndata(s2, s3);                s2 = vis_faligndata(s4, s5);                s3 = vis_faligndata(s6, s7);                MLIB_V_VIDEOINTERPAVG(d0, sd0, s0);                MLIB_V_VIDEOINTERPAVG(d1, sd1, s1);                MLIB_V_VIDEOINTERPAVG(d2, sd2, s2);                MLIB_V_VIDEOINTERPAVG(d3, sd3, s3);                *dd = d0;                dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);                *dd = d1;                dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);                *dd = d2;                dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);                *dd = d3;                dd = (mlib_d64 *)((mlib_u8 *)dd + field_stride);                vis_alignaddr((void *)ref_block, 0);            } while (--y);        } else {            do {                s0 = sd[0];                s1 = sd[1];                sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);                sd0 = vis_faligndata(s0, s1);                s2 = sd[0];                s3 = sd[1];                sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);                sd1 = vis_faligndata(s2, s3);                s4 = sd[0];                s5 = sd[1];                sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);                sd2 = vis_faligndata(s4, s5);                s6 = sd[0];                s7 = sd[1];                sd = (mlib_d64 *)((mlib_u8 *)sd + field_stride);                sd3 = vis_faligndata(s6, s7);                d0 = *dd;                d1 = *(mlib_d64 *)((mlib_u8 *)dd +                                   field_stride);                d2 = *(mlib_d64 *)((mlib_u8 *)dd +                                   2 * field_stride);                d3 = *(mlib_d64 *)((mlib_u8 *)dd +                                   3 * field_stride);                MLIB_V_VIDEOINTERPAVG0(d0, sd0, s1);//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例21: __mlib_VectorConvert_S16_S32_Sat

mlib_status__mlib_VectorConvert_S16_S32_Sat(	mlib_s16 *z,	const mlib_s32 *x,	mlib_s32 n){	mlib_s32 *src = (void *)x;	mlib_s16 *dst = z;	mlib_d64 *dsrc, *ddst;	mlib_d64 d0, d1, d2, d3, d4, d5, d6, d7, d8;	mlib_s32 c;	mlib_s32 len_64, even_length, rest_64, length = n, i;	if (n < 16) {		PACK_S_S(mlib_s32, mlib_s16, MLIB_S16_MAX, MLIB_S16_MIN);	}/* * First try to align destination address for 8 bytes. */	while ((mlib_addr)dst & 7) {		(*dst++) = (c = *src) > MLIB_S16_MAX ? MLIB_S16_MAX			: (c < MLIB_S16_MIN ? MLIB_S16_MIN : c);		src++;		length--;	}	vis_write_gsr(16 << 3);	rest_64 = length & 3;	len_64 = length >> 2;	even_length = len_64 << 2;	ddst = (mlib_d64 *)dst;	if (((mlib_addr)src & 7) == 0) {/* * Source address is also 8-byte aligned. */		dsrc = (mlib_d64 *)src;/* * Peeling the 1st iteration. */		if (i = (len_64 & 1)) {			d1 = (*dsrc++);			d2 = (*dsrc++);			(*ddst++) =				vis_freg_pair(vis_fpackfix(d1),				vis_fpackfix(d2));		}/* * Then loop with step==2. */#pragma pipeloop(0)#pragma unroll(4)		for (; i < len_64; i += 2) {			d1 = (*dsrc++);			d2 = (*dsrc++);			d3 = (*dsrc++);			d4 = (*dsrc++);			(*ddst++) =				vis_freg_pair(vis_fpackfix(d1),				vis_fpackfix(d2));			(*ddst++) =				vis_freg_pair(vis_fpackfix(d3),				vis_fpackfix(d4));		}	} else {/* * Source address is arbitrary aligned. Use vis_alignaddr() and * vis_faligndata() functions. */		dsrc = (mlib_d64 *)vis_alignaddr(src, 0);		d4 = (*dsrc++);/* * Peeling of 1 iteration. */		if (i = (len_64 & 1)) {			d1 = d4;			d2 = (*dsrc++);			d4 = vis_ld_d64_nf(dsrc); dsrc++;			d5 = vis_faligndata(d1, d2);			d6 = vis_faligndata(d2, d4);			(*ddst++) =				vis_freg_pair(vis_fpackfix(d5),				vis_fpackfix(d6));		}/* * Then loop with step==2.//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例22: mlib_VectorDotProd_U8C_al_x

/* The case of even address of vector x */static voidmlib_VectorDotProd_U8C_al_x(	mlib_d64 *z,	const void *x,	const void *y,	mlib_s32 n){	mlib_u8 *pxend, *px = (mlib_u8 *)x, *py = (mlib_u8 *)y;	mlib_d64 sum_r = 0.0, sum_i = 0.0;	mlib_d64 *dpx, *dpy, *dpxend;	mlib_d64 dx, dy, dy0, dy1;	mlib_d64 dx_r, dy_r, dy_i;	mlib_d64 d_iih, d_iil, d_irh, d_irl, d_rih, d_ril, d_rrh, d_rrl;	mlib_d64 d_ih, d_il, d_rh, d_rl;	mlib_d64 ds_r, ds_i, ds1_r, ds1_i;	mlib_d64 lb_mask = vis_to_double_dup(0x00FF00FF);	mlib_d64 edge[2], fzero = vis_fzero();	mlib_f32 fsum;	mlib_s32 d_left;	mlib_s32 emask, off;	edge[0] = edge[1] = 0;	dpx = (mlib_d64 *)((mlib_addr)px & (~7));	off = (mlib_addr)dpx - (mlib_addr)px;	dpy = vis_alignaddr((void *)py, off);	pxend = px + n + n - 1;	dpxend = (mlib_d64 *)((mlib_addr)pxend & (~7));	emask = vis_edge8(px, pxend);	vis_pst_8(dpx[0], edge, emask);	dx = edge[0];	dy = vis_ld_d64_nf(dpy);	if (((((mlib_addr)px) ^ ((mlib_addr)py)) & 7) == 0) {		vis_write_bmask(0x781A3C5E, 0);		while ((mlib_addr)dpx < (mlib_addr)dpxend) {			d_left = dpxend - dpx;			if (d_left > MAX_LOOP)				d_left = MAX_LOOP;			ds_i = ds_r = ds1_i = ds1_r = 0.0;#pragma pipeloop(0)			for (; d_left > 0; d_left--) {				DPROD_U8C0;				SUM_U8C;				dx = dpx[1];				dy = dpy[1];				dpx++;				dpy++;			}			ds_i = vis_fpadd32(ds_i, ds1_i);			ds_r = vis_fpadd32(ds_r, ds1_r);			fsum = vis_read_hi(ds_r);			sum_r += (mlib_d64)*((mlib_s32 *)&fsum);			fsum = vis_read_lo(ds_r);			sum_r += (mlib_d64)*((mlib_s32 *)&fsum);			fsum = vis_read_hi(ds_i);			sum_i += (mlib_d64)*((mlib_s32 *)&fsum);			fsum = vis_read_lo(ds_i);			sum_i += (mlib_d64)*((mlib_s32 *)&fsum);		}	} else {		mlib_s32 mask = ((mlib_addr)(py + off)) & 7;		vis_write_bmask(0x11111111 * mask, 0x01234567);		dy1 = vis_ld_d64_nf(dpy+1);		dy = vis_bshuffle(dy, dy1);		SET_ALIGN_U8C;		while ((mlib_addr)dpx < (mlib_addr)dpxend) {			d_left = dpxend - dpx;			if (d_left > MAX_LOOP)				d_left = MAX_LOOP;			ds_i = ds_r = ds1_i = ds1_r = 0.0;#pragma pipeloop(0)			for (; d_left > 0; d_left--) {				DPROD_U8C;				SUM_U8C;				dy0 = dy1;				dy1 = vis_ld_d64_nf(dpy+2);				dx = vis_ld_d64_nf(dpx+1);				dy = vis_bshuffle(dy0, dy1);				dpx++;				dpy++;			}			ds_i = vis_fpadd32(ds_i, ds1_i);			ds_r = vis_fpadd32(ds_r, ds1_r);			fsum = vis_read_hi(ds_r);			sum_r += (mlib_d64)*((mlib_s32 *)&fsum);			fsum = vis_read_lo(ds_r);			sum_r += (mlib_d64)*((mlib_s32 *)&fsum);			fsum = vis_read_hi(ds_i);			sum_i += (mlib_d64)*((mlib_s32 *)&fsum);			fsum = vis_read_lo(ds_i);			sum_i += (mlib_d64)*((mlib_s32 *)&fsum);		}	}//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例23: __mlib_VectorConvert_U8_S8_Sat

mlib_status__mlib_VectorConvert_U8_S8_Sat(	mlib_u8 *z,	const mlib_s8 *x,	mlib_s32 n){	mlib_s8 *src = (void *)x;	mlib_u8 *dst = z;	mlib_d64 *dsrc, *ddst;	mlib_d64 d1, d2, d3, d4, d5, d6;	mlib_s32 len_64, even_length, rest_64, length = n, i, off;	mlib_s8 c;	mlib_d64 four_16_ones = vis_to_double_dup(0x01000100);	mlib_f32 zero = vis_fzeros();	if (length < 16) {		PACK_S_U(mlib_s8, mlib_u8);	}/* * First, try to align destination address for 8 bytes . */	while ((mlib_addr)dst & 7) {		(*dst++) = (c = (*src++)) < 0 ? 0 : c;		length--;	}	rest_64 = length & 7;	len_64 = length >> 3;	even_length = len_64 << 3;	ddst = (mlib_d64 *)dst;	vis_write_gsr(7 << 3);/* * Now analyze source address alignment. */	if (((mlib_addr)src & 7) == 0) {/* * Source address is also 8-byte aligned. */		dsrc = (mlib_d64 *)src;/* * Peeling the 1st iteration. */		if (i = (len_64 & 1)) {			d1 = (*dsrc++);			d2 = vis_fmul8sux16(vis_fpmerge(vis_read_hi(d1), zero),				four_16_ones);			d3 = vis_fmul8sux16(vis_fpmerge(vis_read_lo(d1), zero),				four_16_ones);			(*ddst++) = vis_fpack16_pair(d2, d3);		}/* * Then loop with step==2. Unroll for 2 iterations. */#pragma pipeloop(0)#pragma unroll(4)		for (; i < len_64; i += 2) {			d1 = (*dsrc++);			d2 = vis_fmul8sux16(vis_fpmerge(vis_read_hi(d1), zero),				four_16_ones);			d3 = vis_fmul8sux16(vis_fpmerge(vis_read_lo(d1), zero),				four_16_ones);			(*ddst++) = vis_fpack16_pair(d2, d3);			d1 = (*dsrc++);			d2 = vis_fmul8sux16(vis_fpmerge(vis_read_hi(d1), zero),				four_16_ones);			d3 = vis_fmul8sux16(vis_fpmerge(vis_read_lo(d1), zero),				four_16_ones);			(*ddst++) = vis_fpack16_pair(d2, d3);		}	} else {/* * Source address has arbitrary alignment. Use vis_alignaddr() and * vis_faligndata() functions. */		dsrc = (mlib_d64 *)vis_alignaddr(src, 0);		off = (mlib_addr)src & 7;		vis_alignaddr((void *)0, 1);		vis_write_bmask(0x11111111 * off, 0x04152637);		d2 = (*dsrc++);/* * Peeling of 1 iteration. */		if (i = (len_64 & 1)) {			d1 = d2;			d2 = vis_ld_d64_nf(dsrc); dsrc++;			d3 = vis_bshuffle(d1, d2);			d4 = vis_fmul8sux16(d3, four_16_ones);//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例24: mlib_v_ImageLookUp_S16_U16_124_D1

void mlib_v_ImageLookUp_S16_U16_124_D1(const mlib_s16 *src,                                       mlib_u16       *dst,                                       mlib_s32       xsize,                                       const mlib_u16 *table0,                                       const mlib_u16 *table1,                                       const mlib_u16 *table2,                                       const mlib_u16 *table3){  mlib_s16 *sp;                        /* pointer to source data */  mlib_s32 s0, s1, s2, s3;             /* source data */  mlib_u16 *dl;                        /* pointer to start of destination */  mlib_u16 *dend;                      /* pointer to end of destination */  mlib_d64 *dp;                        /* aligned pointer to destination */  mlib_d64 t0, t1, t2;                 /* destination data */  mlib_d64 t3, acc0;                   /* destination data */  mlib_s32 emask;                      /* edge mask */  mlib_s32 i, num;                     /* loop variable */  dl = dst;  sp = (void *)src;  dp = (mlib_d64 *) dl;  dend = dl + xsize - 1;  vis_alignaddr((void *)0, 6);  i = 0;  if (xsize >= 4) {    s0 = sp[0];    s1 = sp[1];    s2 = sp[2];    s3 = sp[3];    sp += 4;#pragma pipeloop(0)    for (i = 0; i <= xsize - 8; i += 4, sp += 4) {      t3 = VIS_LD_U16_I(table3, 2 * s3);      t2 = VIS_LD_U16_I(table2, 2 * s2);      t1 = VIS_LD_U16_I(table1, 2 * s1);      t0 = VIS_LD_U16_I(table0, 2 * s0);      acc0 = vis_faligndata(t3, acc0);      acc0 = vis_faligndata(t2, acc0);      acc0 = vis_faligndata(t1, acc0);      acc0 = vis_faligndata(t0, acc0);      s0 = sp[0];      s1 = sp[1];      s2 = sp[2];      s3 = sp[3];      *dp++ = acc0;    }    t3 = VIS_LD_U16_I(table3, 2 * s3);    t2 = VIS_LD_U16_I(table2, 2 * s2);    t1 = VIS_LD_U16_I(table1, 2 * s1);    t0 = VIS_LD_U16_I(table0, 2 * s0);    acc0 = vis_faligndata(t3, acc0);    acc0 = vis_faligndata(t2, acc0);    acc0 = vis_faligndata(t1, acc0);    acc0 = vis_faligndata(t0, acc0);    *dp++ = acc0;  }  if ((mlib_addr) dp <= (mlib_addr) dend) {    num = (mlib_u16 *) dend - (mlib_u16 *) dp;    sp += num;    num++;    if (num == 1) {      s0 = (mlib_s32) * sp;      sp--;      t0 = VIS_LD_U16_I(table0, 2 * s0);      acc0 = vis_faligndata(t0, acc0);    }    else if (num == 2) {      s0 = (mlib_s32) * sp;      sp--;      t0 = VIS_LD_U16_I(table1, 2 * s0);      acc0 = vis_faligndata(t0, acc0);      s0 = (mlib_s32) * sp;      sp--;      t0 = VIS_LD_U16_I(table0, 2 * s0);      acc0 = vis_faligndata(t0, acc0);    }    else if (num == 3) {      s0 = (mlib_s32) * sp;      sp--;      t0 = VIS_LD_U16_I(table2, 2 * s0);      acc0 = vis_faligndata(t0, acc0);      s0 = (mlib_s32) * sp;      sp--;      t0 = VIS_LD_U16_I(table1, 2 * s0);//.........这里部分代码省略.........
开发者ID:michalwarecki,项目名称:ManagedRuntimeInitiative,代码行数:101,


示例25: __mlib_VectorConvert_S8_S16_Sat

mlib_status__mlib_VectorConvert_S8_S16_Sat(	mlib_s8 *z,	const mlib_s16 *x,	mlib_s32 n){	mlib_s16 *src = (void *)x;	mlib_s8 *dst = z;	mlib_d64 *dsrc, *ddst;	mlib_d64 d1, d2, d3, d4, d5, d6, d7;	mlib_s32 len_64, even_length, rest_64, length = n, i;	mlib_s16 c;	if (n < 16) {		PACK_S_S(mlib_s16, mlib_s8, MLIB_S8_MAX, MLIB_S8_MIN);	}/* * First try to align destination address for 8 bytes . */	while ((mlib_s32)dst & 7) {		(*dst++) = (c =			(*src++)) < MLIB_S8_MIN ? MLIB_S8_MIN : (c >			MLIB_S8_MAX ? MLIB_S8_MAX : c);		length--;	}	rest_64 = length & 7;	len_64 = length >> 3;	even_length = len_64 << 3;	ddst = (mlib_d64 *)dst;	vis_write_gsr64(((mlib_u64)0x082A4C6E << 32) | (8 << 3) | 2);/* * Now analyze source address alignment. */	if (((mlib_addr)src & 7) == 0) {		dsrc = (mlib_d64 *)src;		if (i = (len_64 & 1)) {			d1 = (*dsrc++);			d2 = (*dsrc++);			d3 = vis_fpackfix_pair(d1, d2);			d1 = vis_faligndata(d1, d1);			d2 = vis_faligndata(d2, d2);			d4 = vis_fpackfix_pair(d1, d2);			(*ddst++) = vis_bshuffle(d3, d4);		}#pragma pipeloop(0)#pragma unroll(2)		for (; i < len_64; i += 2) {			d1 = (*dsrc++);			d2 = (*dsrc++);			d3 = vis_fpackfix_pair(d1, d2);			d1 = vis_faligndata(d1, d1);			d2 = vis_faligndata(d2, d2);			d4 = vis_fpackfix_pair(d1, d2);			(*ddst++) = vis_bshuffle(d3, d4);			d1 = (*dsrc++);			d2 = (*dsrc++);			d3 = vis_fpackfix_pair(d1, d2);			d1 = vis_faligndata(d1, d1);			d2 = vis_faligndata(d2, d2);			d4 = vis_fpackfix_pair(d1, d2);			(*ddst++) = vis_bshuffle(d3, d4);		}	} else {/* * Source address is arbitrary aligned. Use vis_alignaddr() and * vis_faligndata() functions. */		dsrc = (mlib_d64 *)vis_alignaddr(src, 0);		d2 = (*dsrc++);/* * Peeling of 1 iteration. */		if (i = (len_64 & 1)) {			d1 = d2;			d2 = vis_ld_d64_nf(dsrc); dsrc++;			d4 = vis_faligndata(d1, d2);			d1 = d2;			d2 = vis_ld_d64_nf(dsrc); dsrc++;			d5 = vis_faligndata(d1, d2);			d3 = vis_fpackfix_pair(d4, d5);			d4 = vis_fpack32(d4, d4);			d4 = vis_fpack32(d4, d4);			d5 = vis_fpmerge(vis_read_hi(d5), vis_read_lo(d5));			d5 = vis_fpmerge(vis_read_lo(d5), vis_read_hi(d5));			d5 = vis_fpmerge(vis_read_hi(d5), vis_read_lo(d5));			d4 = vis_fpackfix_pair(d4, d5);			(*ddst++) = vis_bshuffle(d3, d4);		}//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例26: __mlib_VideoDCT8x8Quantize_S16_S16_B12_NA

mlib_status__mlib_VideoDCT8x8Quantize_S16_S16_B12_NA(    mlib_s16 coeffs[64],    const mlib_s16 *block,    const mlib_d64 qtable[64]){    mlib_d64 *sp = (mlib_d64 *)block;    mlib_d64 *dp = (mlib_d64 *)coeffs;    mlib_d64 d00, d10, d20, d30, d40, d50, d60, d70;    mlib_d64 d01, d11, d21, d31, d41, d51, d61, d71;    mlib_d64 t00, t10, t20, t30, t40, t50, t60, t70, t80, t90;    mlib_d64 t01, t11, t21, t31, t41, t51, t61, t71, t81, t91;    mlib_d64 r00, r10, r20, r30, r40, r50, r60, r70;    mlib_d64 r01, r11, r21, r31, r41, r51, r61, r71;    mlib_f32 FCOS, c17, c26, c35, c_4;    mlib_s32 mask;    mlib_d64 w_const = vis_to_double_dup(0x4000);    if (block == NULL || coeffs == NULL)        return (MLIB_FAILURE);    if (!(((mlib_addr)block | (mlib_addr)coeffs) & 7)) {        return (__mlib_VideoDCT8x8Quantize_S16_S16_B12(coeffs,                block, qtable));    }    vis_write_gsr(1 << 3);    /*     * first stage     */    LOAD_DATA_GE_INTER1;    TRANSPOSE(d00, d20, d40, d60, r00, r10, r20, r30);    TRANSPOSE(d10, d30, d50, d70, r40, r50, r60, r70);    LOADCONSTS4_12;    PREPARE_DATA_INTER(0);    LOAD_DATA_GE_INTER2;    TRANSPOSE(d01, d21, d41, d61, r01, r11, r21, r31);    COMPUTING_DATA(0);    TRANSPOSE(d11, d31, d51, d71, r41, r51, r61, r71);    PREPARE_DATA_INTER(1);    COMPUTING_DATA(1);    /*     * second stage     */    TRANSPOSE(d01, d11, d21, d31, r40, r50, r60, r70);    TRANSPOSE(d00, d10, d20, d30, r00, r10, r20, r30);    PREPARE_DATA_INTER(0);    TRANSPOSE(d40, d50, d60, d70, r01, r11, r21, r31);    COMPUTING_DATA_12(0);    TRANSPOSE(d41, d51, d61, d71, r41, r51, r61, r71);    ENDSCALE_12(0);    dp = (mlib_d64 *)vis_alignaddr(coeffs, -1);    mask = 0xFF >> ((mlib_addr)coeffs - (mlib_addr)dp);    vis_alignaddrl((void *)coeffs, 0);    PREPARE_DATA_INTER(1);    COMPUTING_DATA_12(1);    ENDSCALE_12(1);    Quant_ST_NA(d00, d00, qtable[0]);    Quant_ST_NA(d01, d01, qtable[1]);    Quant_ST_NA(d10, d10, qtable[2]);    Quant_ST_NA(d11, d11, qtable[3]);    Quant_ST_NA(d20, d20, qtable[4]);    Quant_ST_NA(d21, d21, qtable[5]);    Quant_ST_NA(d30, d30, qtable[6]);    Quant_ST_NA(d31, d31, qtable[7]);    Quant_ST_NA(d40, d40, qtable[8]);    Quant_ST_NA(d41, d41, qtable[9]);    Quant_ST_NA(d50, d50, qtable[10]);    Quant_ST_NA(d51, d51, qtable[11]);    Quant_ST_NA(d60, d60, qtable[12]);    Quant_ST_NA(d61, d61, qtable[13]);    Quant_ST_NA(d70, d70, qtable[14]);    Quant_ST_NA(d71, d71, qtable[15]);    dp[1] = vis_faligndata(d00, d01);    dp[2] = vis_faligndata(d01, d10);    dp[3] = vis_faligndata(d10, d11);    dp[4] = vis_faligndata(d11, d20);    dp[5] = vis_faligndata(d20, d21);    dp[6] = vis_faligndata(d21, d30);    dp[7] = vis_faligndata(d30, d31);    dp[8] = vis_faligndata(d31, d40);    dp[9] = vis_faligndata(d40, d41);    dp[10] = vis_faligndata(d41, d50);//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例27: __mlib_VectorSubS_S16C_S16C_Sat

mlib_status__mlib_VectorSubS_S16C_S16C_Sat(	mlib_s16 *z,	const mlib_s16 *x,	const mlib_s16 *c,	mlib_s32 n){	mlib_d64 *dpz, *dpx;	mlib_d64 dx, dz, dx0, dx1, dr0, dr1, dr2;	mlib_s16 *pz, *px, *pzend;/* offset of address alignment in destination */	mlib_s32 off;/* edge masks */	mlib_s32 emask;	mlib_s32 mask1, mask2;	mlib_s32 ovl, und;	mlib_u16 uc0 = *((mlib_s16 *)c);	mlib_u16 uc1 = *((mlib_s16 *)c + 1);	mlib_d64 dc = ((mlib_addr)z & 2) ? vis_to_double_dup((uc1 << 16) | uc0)		: vis_to_double_dup((uc0 << 16) | uc1);	mlib_d64 fzero = vis_fzero();	mlib_d64 const_ovl = vis_to_double_dup(0x7fff7fff);	mlib_d64 const_und = vis_fnot(const_ovl);	mlib_s32 len = n + n, i;/* rest and leng in terms of 8 bytes. */	mlib_s32 rest_8, even_8;	if (n <= 0)		return (MLIB_FAILURE);	px = (mlib_s16 *)x;	pz = (mlib_s16 *)z;/* * prepare the destination address */	dpz = (mlib_d64 *)((mlib_addr)z & (~7));	off = (mlib_addr)dpz - (mlib_addr)z;	pzend = pz + n + n - 1;/* * generate edge mask for the start point */	emask = vis_edge16(pz, pzend);/* * prepare the destination address */	if (off) {		dpx = (mlib_d64 *)vis_alignaddr(px, off);		dx0 = vis_ld_d64_nf(dpx);		dx1 = vis_ld_d64_nf(dpx + 1);		dx = vis_faligndata(dx0, dx1);		SUBS16_SAT;		px += (8 + off) >> 1;		len -= (8 + off) >> 1;		dpz++;	}	if (len <= 0)		return (MLIB_SUCCESS);	even_8 = len >> 2;	rest_8 = len & 0x3;	emask = 0xf;/* * Now try to analyze source "x" and "y" addresses. */	if (!((mlib_addr)px & 7)) {		dpx = (mlib_d64 *)px;#pragma pipeloop(0)		for (i = 0; i < even_8; i++) {			dx = (*dpx++);			SUBS16_SAT;			dpz++;		}		dx1 = vis_ld_d64_nf(dpx);		dpx++;	} else {		dpx = vis_alignaddr(px, 0);		dx0 = vis_ld_d64_nf(dpx);		dpx++;#pragma pipeloop(0)		for (i = 0; i < even_8; i++) {			dx1 = vis_ld_d64_nf(dpx);			dpx++;			dx = vis_faligndata(dx0, dx1);//.........这里部分代码省略.........
开发者ID:Aries85,项目名称:mediaLib,代码行数:101,


示例28: mlib_v_conv5x5_8nw_4

//.........这里部分代码省略.........	sa2 = sa1 + slb;	sa3 = sa2 + slb;	sa4 = sa3 + slb;	d_a = adr_dst + 2 * dlb + 8;/* load interm. src buff */	PREPARE_TO_LOAD_LINE(sbuf2, sa);#pragma pipeloop(0)	LOAD_LINE_INTO_BUFFER(16);/* load interm. src buff */	PREPARE_TO_LOAD_LINE(sbuf3, sa1);#pragma pipeloop(0)	LOAD_LINE_INTO_BUFFER(16);/* load interm. src buff */	PREPARE_TO_LOAD_LINE(sbuf4, sa2);#pragma pipeloop(0)	LOAD_LINE_INTO_BUFFER(16);/* load interm. src buff */	PREPARE_TO_LOAD_LINE(sbuf5, sa3);#pragma pipeloop(0)	LOAD_LINE_INTO_BUFFER(16);#pragma pipeloop(0)	for (j = 0; j < dh; j++) {		LOOP_INI();		PREPARE_TO_LOAD_LINE(sbuf5, sa4);#pragma pipeloop(0)		LOAD_LINE_INTO_BUFFER_NF(16);		vis_alignaddr(s1, 4);		dbuf1 = dbuf;		d1 = *s1;		d2 = *s2;		d3 = *s3;		d11 = *(s1 + 1);		d12 = *(s2 + 1);		d13 = *(s3 + 1);#pragma pipeloop(0)		for (i = 0; i < dw; i += 8) {			d21 = *(s1 + 2);			d22 = *(s2 + 2);			d23 = *(s3 + 2);			out0 = out1 = rnd;			CONV_AU(d1, k1k2);			CONV_AL(d2, k5k6);			CONV_AU(d3, k11k12);			dt_1 = vis_faligndata(d1, d11);			dt_2 = vis_faligndata(d2, d12);			dt_3 = vis_faligndata(d3, d13);			CONV_AL(dt_1, k1k2);			CONV_AU(dt_2, k7k8);			CONV_AL(dt_3, k11k12);			CONV_AU(d11, k3k4);			CONV_AL(d12, k7k8);			CONV_AU(d13, k13k14);			dt_1 = vis_faligndata(d11, d21);			dt_2 = vis_faligndata(d12, d22);			dt_3 = vis_faligndata(d13, d23);			CONV_AL(dt_1, k3k4);			CONV_AU(dt_2, k9k10);			CONV_AL(dt_3, k13k14);
开发者ID:Aries85,项目名称:mediaLib,代码行数:67,



注:本文中的vis_alignaddr函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ vis_faligndata函数代码示例
C++ virtual_space函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。