Annotation of /xvidcore/src/motion/gmc.c

Revision 1.6 - (view) (download)

1 :	edgomez	1.2	/*****************************************************************************
2 :			*
3 :			* XVID MPEG-4 VIDEO CODEC
4 :			* - GMC interpolation module -
5 :			*
6 :			* Copyright(C) 2002-2003 Pascal Massimino <skal@planet-d.net>
7 :			*
8 :			* This program is free software ; you can redistribute it and/or modify
9 :			* it under the terms of the GNU General Public License as published by
10 :			* the Free Software Foundation ; either version 2 of the License, or
11 :			* (at your option) any later version.
12 :			*
13 :			* This program is distributed in the hope that it will be useful,
14 :			* but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 :			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 :			* GNU General Public License for more details.
17 :			*
18 :			* You should have received a copy of the GNU General Public License
19 :			* along with this program ; if not, write to the Free Software
20 :			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 :			*
22 :	Isibaar	1.6	* $Id: gmc.c,v 1.5 2006/06/14 21:44:07 Skal Exp $
23 :	edgomez	1.2	*
24 :			****************************************************************************/
25 :
26 :			#include "../portab.h"
27 :			#include "../global.h"
28 :			#include "../encoder.h"
29 :			#include "gmc.h"
30 :
31 :			#include <stdio.h>
32 :
33 :	Skal	1.5	/* initialized by init_GMC(), for 3points */
34 :			static
35 :			void (Predict_16x16_func)(const NEW_GMC_DATA const This,
36 :			uint8_t dst, const uint8_t src,
37 :			int dststride, int srcstride, int x, int y, int rounding) = 0;
38 :			static
39 :			void (Predict_8x8_func)(const NEW_GMC_DATA const This,
40 :			uint8_t uDst, const uint8_t uSrc,
41 :			uint8_t vDst, const uint8_t vSrc,
42 :			int dststride, int srcstride, int x, int y, int rounding) = 0;
43 :
44 :			/****************************************************************************/
45 :			/* this is borrowed from bitstream.c until we find a common solution */
46 :			static uint32_t __inline
47 :			log2bin(uint32_t value)
48 :			{
49 :			/* Changed by Chenm001 */
50 :			#if !defined(_MSC_VER)
51 :			int n = 0;
52 :
53 :			while (value) {
54 :			value >>= 1;
55 :			n++;
56 :			}
57 :			return n;
58 :			#else
59 :			__asm {
60 :			bsr eax, value
61 :			inc eax
62 :			}
63 :			#endif
64 :			}
65 :
66 :			/* 16sizeof(int) -> 1 or 2 cachelines /
67 :			/* table lookup might be faster! (still to be benchmarked) */
68 :
69 :			/*
70 :			static int log2bin_table[16] =
71 :			{ 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4};
72 :			*/
73 :			/* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 */
74 :
75 :			#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
76 :			#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
77 :
78 :			#define MLT(i) (((16-(i))<<16) + (i))
79 :			static const uint32_t MTab[16] = {
80 :			MLT( 0), MLT( 1), MLT( 2), MLT( 3), MLT( 4), MLT( 5), MLT( 6), MLT( 7),
81 :			MLT( 8), MLT( 9), MLT(10), MLT(11), MLT(12), MLT(13), MLT(14), MLT(15)
82 :			};
83 :			#undef MLT
84 :
85 :	edgomez	1.2	/* ************************************************************
86 :			* Pts = 2 or 3
87 :			*
88 :			* Warning! *src is the global frame pointer (that is: adress
89 :			* of pixel 0,0), not the macroblock one.
90 :			* Conversely, *dst is the macroblock top-left adress.
91 :			*/
92 :
93 :	Skal	1.5	static
94 :	edgomez	1.2	void Predict_16x16_C(const NEW_GMC_DATA * const This,
95 :	Skal	1.5	uint8_t dst, const uint8_t src,
96 :			int dststride, int srcstride, int x, int y, int rounding)
97 :	edgomez	1.2	{
98 :			const int W = This->sW;
99 :			const int H = This->sH;
100 :			const int rho = 3 - This->accuracy;
101 :			const int Rounder = ( (1<<7) - (rounding<<(2*rho)) ) << 16;
102 :
103 :			const int dUx = This->dU[0];
104 :			const int dVx = This->dV[0];
105 :			const int dUy = This->dU[1];
106 :			const int dVy = This->dV[1];
107 :
108 :			int Uo = This->Uo + 16(dUyy + dUx*x);
109 :			int Vo = This->Vo + 16(dVyy + dVx*x);
110 :
111 :			int i, j;
112 :
113 :			dst += 16;
114 :			for (j=16; j>0; --j) {
115 :			int U = Uo, V = Vo;
116 :			Uo += dUy; Vo += dVy;
117 :			for (i=-16; i<0; ++i) {
118 :			unsigned int f0, f1, ri = 16, rj = 16;
119 :			int Offset;
120 :			int u = ( U >> 16 ) << rho;
121 :			int v = ( V >> 16 ) << rho;
122 :
123 :			U += dUx; V += dVx;
124 :
125 :			if (u > 0 && u <= W) { ri = MTab[u&15]; Offset = u>>4; }
126 :			else {
127 :			if (u > W) Offset = W>>4;
128 :			else Offset = 0;
129 :			ri = MTab[0];
130 :			}
131 :
132 :			if (v > 0 && v <= H) { rj = MTab[v&15]; Offset += (v>>4)*srcstride; }
133 :			else {
134 :			if (v > H) Offset += (H>>4)*srcstride;
135 :			rj = MTab[0];
136 :			}
137 :
138 :			f0 = src[Offset + 0];
139 :			f0 \|= src[Offset + 1] << 16;
140 :			f1 = src[Offset + srcstride + 0];
141 :			f1 \|= src[Offset + srcstride + 1] << 16;
142 :			f0 = (ri*f0)>>16;
143 :			f1 = (ri*f1) & 0x0fff0000;
144 :			f0 \|= f1;
145 :			f0 = (rj*f0 + Rounder) >> 24;
146 :
147 :			dst[i] = (uint8_t)f0;
148 :			}
149 :			dst += dststride;
150 :			}
151 :			}
152 :
153 :	Skal	1.5	static
154 :	edgomez	1.2	void Predict_8x8_C(const NEW_GMC_DATA * const This,
155 :	Skal	1.5	uint8_t uDst, const uint8_t uSrc,
156 :			uint8_t vDst, const uint8_t vSrc,
157 :			int dststride, int srcstride, int x, int y, int rounding)
158 :	edgomez	1.2	{
159 :			const int W = This->sW >> 1;
160 :			const int H = This->sH >> 1;
161 :			const int rho = 3-This->accuracy;
162 :			const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
163 :
164 :			const int32_t dUx = This->dU[0];
165 :			const int32_t dVx = This->dV[0];
166 :			const int32_t dUy = This->dU[1];
167 :			const int32_t dVy = This->dV[1];
168 :
169 :			int32_t Uo = This->Uco + 8(dUyy + dUx*x);
170 :			int32_t Vo = This->Vco + 8(dVyy + dVx*x);
171 :
172 :			int i, j;
173 :
174 :			uDst += 8;
175 :			vDst += 8;
176 :			for (j=8; j>0; --j) {
177 :			int32_t U = Uo, V = Vo;
178 :			Uo += dUy; Vo += dVy;
179 :
180 :			for (i=-8; i<0; ++i) {
181 :			int Offset;
182 :			uint32_t f0, f1, ri, rj;
183 :			int32_t u, v;
184 :
185 :			u = ( U >> 16 ) << rho;
186 :			v = ( V >> 16 ) << rho;
187 :			U += dUx; V += dVx;
188 :
189 :			if (u > 0 && u <= W) {
190 :			ri = MTab[u&15];
191 :			Offset = u>>4;
192 :			} else {
193 :			if (u>W) Offset = W>>4;
194 :			else Offset = 0;
195 :			ri = MTab[0];
196 :			}
197 :
198 :			if (v > 0 && v <= H) {
199 :			rj = MTab[v&15];
200 :			Offset += (v>>4)*srcstride;
201 :			} else {
202 :			if (v>H) Offset += (H>>4)*srcstride;
203 :			rj = MTab[0];
204 :			}
205 :
206 :			f0 = uSrc[Offset + 0];
207 :			f0 \|= uSrc[Offset + 1] << 16;
208 :			f1 = uSrc[Offset + srcstride + 0];
209 :			f1 \|= uSrc[Offset + srcstride + 1] << 16;
210 :			f0 = (ri*f0)>>16;
211 :			f1 = (ri*f1) & 0x0fff0000;
212 :			f0 \|= f1;
213 :			f0 = (rj*f0 + Rounder) >> 24;
214 :
215 :			uDst[i] = (uint8_t)f0;
216 :
217 :			f0 = vSrc[Offset + 0];
218 :			f0 \|= vSrc[Offset + 1] << 16;
219 :			f1 = vSrc[Offset + srcstride + 0];
220 :			f1 \|= vSrc[Offset + srcstride + 1] << 16;
221 :			f0 = (ri*f0)>>16;
222 :			f1 = (ri*f1) & 0x0fff0000;
223 :			f0 \|= f1;
224 :			f0 = (rj*f0 + Rounder) >> 24;
225 :
226 :			vDst[i] = (uint8_t)f0;
227 :			}
228 :			uDst += dststride;
229 :			vDst += dststride;
230 :			}
231 :			}
232 :
233 :	Skal	1.5	static
234 :	edgomez	1.2	void get_average_mv_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv,
235 :	Skal	1.5	int x, int y, int qpel)
236 :	edgomez	1.2	{
237 :			int i, j;
238 :			int vx = 0, vy = 0;
239 :			int32_t uo = Dsp->Uo + 16(Dsp->dU[1]y + Dsp->dU[0]*x);
240 :			int32_t vo = Dsp->Vo + 16(Dsp->dV[1]y + Dsp->dV[0]*x);
241 :			for (j=16; j>0; --j)
242 :			{
243 :			int32_t U, V;
244 :			U = uo; uo += Dsp->dU[1];
245 :			V = vo; vo += Dsp->dV[1];
246 :			for (i=16; i>0; --i)
247 :			{
248 :			int32_t u,v;
249 :			u = U >> 16; U += Dsp->dU[0]; vx += u;
250 :			v = V >> 16; V += Dsp->dV[0]; vy += v;
251 :			}
252 :			}
253 :			vx -= (256x+120) << (5+Dsp->accuracy); / 120 = 1516/2 /
254 :			vy -= (256*y+120) << (5+Dsp->accuracy);
255 :
256 :			mv->x = RSHIFT( vx, 8+Dsp->accuracy - qpel );
257 :			mv->y = RSHIFT( vy, 8+Dsp->accuracy - qpel );
258 :			}
259 :
260 :			/* ************************************************************
261 :			* simplified version for 1 warp point
262 :			*/
263 :
264 :	Skal	1.5	static
265 :	edgomez	1.2	void Predict_1pt_16x16_C(const NEW_GMC_DATA * const This,
266 :	Skal	1.5	uint8_t Dst, const uint8_t Src,
267 :			int dststride, int srcstride, int x, int y, int rounding)
268 :	edgomez	1.2	{
269 :			const int W = This->sW;
270 :			const int H = This->sH;
271 :			const int rho = 3-This->accuracy;
272 :			const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
273 :
274 :
275 :			int32_t uo = This->Uo + (x<<8); /* ((16x)<<4) /
276 :			int32_t vo = This->Vo + (y<<8);
277 :			uint32_t ri = MTab[uo & 15];
278 :			uint32_t rj = MTab[vo & 15];
279 :			int i, j;
280 :
281 :			int32_t Offset;
282 :	Skal	1.4	if (vo>=(-16<<4) && vo<=H) Offset = (vo>>4)*srcstride;
283 :	edgomez	1.2	else {
284 :			if (vo>H) Offset = ( H>>4)*srcstride;
285 :			else Offset =-16*srcstride;
286 :			rj = MTab[0];
287 :			}
288 :	Skal	1.4	if (uo>=(-16<<4) && uo<=W) Offset += (uo>>4);
289 :	edgomez	1.2	else {
290 :			if (uo>W) Offset += (W>>4);
291 :			else Offset -= 16;
292 :			ri = MTab[0];
293 :			}
294 :
295 :			Dst += 16;
296 :
297 :			for(j=16; j>0; --j, Offset+=srcstride-16)
298 :			{
299 :			for(i=-16; i<0; ++i, ++Offset)
300 :			{
301 :			uint32_t f0, f1;
302 :			f0 = Src[ Offset +0 ];
303 :			f0 \|= Src[ Offset +1 ] << 16;
304 :			f1 = Src[ Offset+srcstride +0 ];
305 :			f1 \|= Src[ Offset+srcstride +1 ] << 16;
306 :			f0 = (ri*f0)>>16;
307 :			f1 = (ri*f1) & 0x0fff0000;
308 :			f0 \|= f1;
309 :			f0 = ( rj*f0 + Rounder ) >> 24;
310 :			Dst[i] = (uint8_t)f0;
311 :			}
312 :			Dst += dststride;
313 :			}
314 :			}
315 :
316 :	Skal	1.5	static
317 :	edgomez	1.2	void Predict_1pt_8x8_C(const NEW_GMC_DATA * const This,
318 :	Skal	1.5	uint8_t uDst, const uint8_t uSrc,
319 :			uint8_t vDst, const uint8_t vSrc,
320 :			int dststride, int srcstride, int x, int y, int rounding)
321 :	edgomez	1.2	{
322 :			const int W = This->sW >> 1;
323 :			const int H = This->sH >> 1;
324 :			const int rho = 3-This->accuracy;
325 :			const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
326 :
327 :			int32_t uo = This->Uco + (x<<7);
328 :			int32_t vo = This->Vco + (y<<7);
329 :			uint32_t rri = MTab[uo & 15];
330 :			uint32_t rrj = MTab[vo & 15];
331 :			int i, j;
332 :
333 :			int32_t Offset;
334 :	Skal	1.4	if (vo>=(-8<<4) && vo<=H) Offset = (vo>>4)*srcstride;
335 :	edgomez	1.2	else {
336 :			if (vo>H) Offset = ( H>>4)*srcstride;
337 :			else Offset =-8*srcstride;
338 :			rrj = MTab[0];
339 :			}
340 :	Skal	1.4	if (uo>=(-8<<4) && uo<=W) Offset += (uo>>4);
341 :	edgomez	1.2	else {
342 :			if (uo>W) Offset += ( W>>4);
343 :			else Offset -= 8;
344 :			rri = MTab[0];
345 :			}
346 :
347 :			uDst += 8;
348 :			vDst += 8;
349 :			for(j=8; j>0; --j, Offset+=srcstride-8)
350 :			{
351 :			for(i=-8; i<0; ++i, Offset++)
352 :			{
353 :			uint32_t f0, f1;
354 :			f0 = uSrc[ Offset + 0 ];
355 :			f0 \|= uSrc[ Offset + 1 ] << 16;
356 :			f1 = uSrc[ Offset + srcstride + 0 ];
357 :			f1 \|= uSrc[ Offset + srcstride + 1 ] << 16;
358 :			f0 = (rri*f0)>>16;
359 :			f1 = (rri*f1) & 0x0fff0000;
360 :			f0 \|= f1;
361 :			f0 = ( rrj*f0 + Rounder ) >> 24;
362 :			uDst[i] = (uint8_t)f0;
363 :
364 :			f0 = vSrc[ Offset + 0 ];
365 :			f0 \|= vSrc[ Offset + 1 ] << 16;
366 :			f1 = vSrc[ Offset + srcstride + 0 ];
367 :			f1 \|= vSrc[ Offset + srcstride + 1 ] << 16;
368 :			f0 = (rri*f0)>>16;
369 :			f1 = (rri*f1) & 0x0fff0000;
370 :			f0 \|= f1;
371 :			f0 = ( rrj*f0 + Rounder ) >> 24;
372 :			vDst[i] = (uint8_t)f0;
373 :			}
374 :			uDst += dststride;
375 :			vDst += dststride;
376 :			}
377 :			}
378 :
379 :	Skal	1.5	static
380 :	edgomez	1.2	void get_average_mv_1pt_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv,
381 :			int x, int y, int qpel)
382 :			{
383 :			mv->x = RSHIFT(Dsp->Uo<<qpel, 3);
384 :			mv->y = RSHIFT(Dsp->Vo<<qpel, 3);
385 :			}
386 :
387 :	Skal	1.5	#if defined(ARCH_IS_IA32)
388 :			/* *************************************************************
389 :			* MMX core function
390 :			*/
391 :
392 :			static
393 :			void (GMC_Core_Lin_8)(uint8_t Dst, const uint16_t * Offsets,
394 :			const uint8_t * const Src0, const int BpS, const int Rounder) = 0;
395 :
396 :			extern void xvid_GMC_Core_Lin_8_mmx(uint8_t Dst, const uint16_t Offsets,
397 :			const uint8_t * const Src0, const int BpS, const int Rounder);
398 :
399 :			extern void xvid_GMC_Core_Lin_8_sse2(uint8_t Dst, const uint16_t Offsets,
400 :			const uint8_t * const Src0, const int BpS, const int Rounder);
401 :
402 :			/* *************************************************************/
403 :
404 :			static void GMC_Core_Non_Lin_8(uint8_t *Dst,
405 :			const uint16_t * Offsets,
406 :			const uint8_t * const Src0, const int srcstride,
407 :			const int Rounder)
408 :			{
409 :			int i;
410 :			for(i=0; i<8; ++i)
411 :			{
412 :			uint32_t u = Offsets[i ];
413 :			uint32_t v = Offsets[i+16];
414 :			const uint32_t ri = MTab[u&0x0f];
415 :			const uint32_t rj = MTab[v&0x0f];
416 :			uint32_t f0, f1;
417 :			const uint8_t * const Src = Src0 + (u>>4) + (v>>4)*srcstride;
418 :			f0 = Src[0];
419 :			f0 \|= Src[1] << 16;
420 :			f1 = Src[srcstride +0];
421 :			f1 \|= Src[srcstride +1] << 16;
422 :			f0 = (ri*f0)>>16;
423 :			f1 = (ri*f1) & 0x0fff0000;
424 :			f0 \|= f1;
425 :			f0 = ( rj*f0 + Rounder ) >> 24;
426 :			Dst[i] = (uint8_t)f0;
427 :			}
428 :			}
429 :
430 :			//////////////////////////////////////////////////////////
431 :
432 :			static
433 :			void Predict_16x16_mmx(const NEW_GMC_DATA * const This,
434 :			uint8_t dst, const uint8_t src,
435 :			int dststride, int srcstride, int x, int y, int rounding)
436 :			{
437 :			const int W = This->sW;
438 :			const int H = This->sH;
439 :			const int rho = 3 - This->accuracy;
440 :			const int Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
441 :			const uint32_t W2 = W<<(16-rho);
442 :			const uint32_t H2 = H<<(16-rho);
443 :
444 :			const int dUx = This->dU[0];
445 :			const int dVx = This->dV[0];
446 :			const int dUy = This->dU[1];
447 :			const int dVy = This->dV[1];
448 :
449 :			int Uo = This->Uo + 16(dUyy + dUx*x);
450 :			int Vo = This->Vo + 16(dVyy + dVx*x);
451 :
452 :			int i, j;
453 :
454 :			DECLARE_ALIGNED_MATRIX(Offsets, 2,16, uint16_t, CACHE_LINE);
455 :			for(j=16; j>0; --j)
456 :			{
457 :			int32_t U = Uo, V = Vo;
458 :			Uo += dUy; Vo += dVy;
459 :			if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) &&
460 :			H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) )
461 :			{
462 :			for(i=0; i<16; ++i)
463 :			{
464 :			uint32_t u = ( U >> 16 ) << rho;
465 :			uint32_t v = ( V >> 16 ) << rho;
466 :			U += dUx; V += dVx;
467 :			Offsets[ i] = u;
468 :			Offsets[16+i] = v;
469 :			}
470 :	Isibaar	1.6
471 :			{
472 :	Skal	1.5	// batch 8 input pixels when linearity says it's ok
473 :	Isibaar	1.6	uint32_t UV1, UV2;
474 :			UV1 = (Offsets[0] \| (Offsets[16]<<16)) & 0xfff0fff0U;
475 :			UV2 = (Offsets[7] \| (Offsets[23]<<16)) & 0xfff0fff0U;
476 :			if (UV1+7*16==UV2)
477 :			GMC_Core_Lin_8(dst, Offsets, src + (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride, srcstride, Rounder);
478 :			else
479 :			GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder);
480 :			UV1 = (Offsets[ 8] \| (Offsets[24]<<16)) & 0xfff0fff0U;
481 :			UV2 = (Offsets[15] \| (Offsets[31]<<16)) & 0xfff0fff0U;
482 :			if (UV1+7*16==UV2)
483 :			GMC_Core_Lin_8(dst+8, Offsets+8, src + (Offsets[8]>>4) + (Offsets[24]>>4)*srcstride, srcstride, Rounder);
484 :			else
485 :			GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder);
486 :			}
487 :			}
488 :	Skal	1.5	else
489 :			{
490 :			for(i=0; i<16; ++i)
491 :			{
492 :			int u = ( U >> 16 ) << rho;
493 :			int v = ( V >> 16 ) << rho;
494 :			U += dUx; V += dVx;
495 :
496 :			Offsets[ i] = (u<0) ? 0 : (u>=W) ? W : u;
497 :			Offsets[16+i] = (v<0) ? 0 : (v>=H) ? H : v;
498 :			}
499 :			// due to boundary clipping, we cannot infer the 8-pixels batchability
500 :			// simply by using the linearity. Oh well, not a big deal...
501 :			GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder);
502 :			GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder);
503 :			}
504 :			dst += dststride;
505 :			}
506 :			}
507 :
508 :			static
509 :			void Predict_8x8_mmx(const NEW_GMC_DATA * const This,
510 :			uint8_t uDst, const uint8_t uSrc,
511 :			uint8_t vDst, const uint8_t vSrc,
512 :			int dststride, int srcstride, int x, int y, int rounding)
513 :			{
514 :			const int W = This->sW >> 1;
515 :			const int H = This->sH >> 1;
516 :			const int rho = 3-This->accuracy;
517 :			const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
518 :			const uint32_t W2 = W<<(16-rho);
519 :			const uint32_t H2 = H<<(16-rho);
520 :
521 :			const int dUx = This->dU[0];
522 :			const int dVx = This->dV[0];
523 :			const int dUy = This->dU[1];
524 :			const int dVy = This->dV[1];
525 :
526 :			int Uo = This->Uco + 8(dUyy + dUx*x);
527 :			int Vo = This->Vco + 8(dVyy + dVx*x);
528 :
529 :			DECLARE_ALIGNED_MATRIX(Offsets, 2,16, uint16_t, CACHE_LINE);
530 :			int i, j;
531 :			for(j=8; j>0; --j)
532 :			{
533 :			int32_t U = Uo, V = Vo;
534 :			Uo += dUy; Vo += dVy;
535 :			if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) &&
536 :			H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) )
537 :			{
538 :			for(i=0; i<8; ++i)
539 :			{
540 :			int32_t u = ( U >> 16 ) << rho;
541 :			int32_t v = ( V >> 16 ) << rho;
542 :			U += dUx; V += dVx;
543 :			Offsets[ i] = u;
544 :			Offsets[16+i] = v;
545 :			}
546 :	Isibaar	1.6
547 :			{
548 :			// batch 8 input pixels when linearity says it's ok
549 :			const uint32_t UV1 = (Offsets[ 0] \| (Offsets[16]<<16)) & 0xfff0fff0U;
550 :			const uint32_t UV2 = (Offsets[ 7] \| (Offsets[23]<<16)) & 0xfff0fff0U;
551 :			if (UV1+7*16==UV2)
552 :			{
553 :			const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride;
554 :			GMC_Core_Lin_8(uDst, Offsets, uSrc+Off, srcstride, Rounder);
555 :			GMC_Core_Lin_8(vDst, Offsets, vSrc+Off, srcstride, Rounder);
556 :			}
557 :			else {
558 :			GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder);
559 :			GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder);
560 :			}
561 :			}
562 :	Skal	1.5	}
563 :			else
564 :			{
565 :			for(i=0; i<8; ++i)
566 :			{
567 :			int u = ( U >> 16 ) << rho;
568 :			int v = ( V >> 16 ) << rho;
569 :			U += dUx; V += dVx;
570 :			Offsets[ i] = (u<0) ? 0 : (u>=W) ? W : u;
571 :			Offsets[16+i] = (v<0) ? 0 : (v>=H) ? H : v;
572 :			}
573 :			GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder);
574 :			GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder);
575 :			}
576 :			uDst += dststride;
577 :			vDst += dststride;
578 :			}
579 :			}
580 :
581 :			#endif /* ARCH_IS_IA32 */
582 :
583 :			/* *************************************************************
584 :			* will initialize internal pointers
585 :			*/
586 :
587 :			void init_GMC(const unsigned int cpu_flags)
588 :			{
589 :			Predict_16x16_func = Predict_16x16_C;
590 :			Predict_8x8_func = Predict_8x8_C;
591 :
592 :	Isibaar	1.6	#if defined(ARCH_IS_IA32)
593 :	Skal	1.5	if ((cpu_flags & XVID_CPU_MMX) \|\| (cpu_flags & XVID_CPU_MMXEXT) \|\|
594 :			(cpu_flags & XVID_CPU_3DNOW) \|\| (cpu_flags & XVID_CPU_3DNOWEXT) \|\|
595 :			(cpu_flags & XVID_CPU_SSE) \|\| (cpu_flags & XVID_CPU_SSE2))
596 :			{
597 :			Predict_16x16_func = Predict_16x16_mmx;
598 :			Predict_8x8_func = Predict_8x8_mmx;
599 :			if (cpu_flags & XVID_CPU_SSE2)
600 :			GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2;
601 :			else
602 :			GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx;
603 :			}
604 :			#endif
605 :			}
606 :
607 :	edgomez	1.2	/* *************************************************************
608 :			* Warning! It's Accuracy being passed, not 'resolution'!
609 :			*/
610 :
611 :			void generate_GMCparameters( int nb_pts, const int accuracy,
612 :			const WARPPOINTS *const pts,
613 :			const int width, const int height,
614 :			NEW_GMC_DATA *const gmc)
615 :			{
616 :			gmc->sW = width << 4;
617 :			gmc->sH = height << 4;
618 :			gmc->accuracy = accuracy;
619 :			gmc->num_wp = nb_pts;
620 :
621 :			/* reduce the number of points, if possible */
622 :	edgomez	1.3	if (nb_pts<2 \|\| (pts->duv[2].x==0 && pts->duv[2].y==0 && pts->duv[1].x==0 && pts->duv[1].y==0 )) {
623 :			if (nb_pts<2 \|\| (pts->duv[1].x==0 && pts->duv[1].y==0)) {
624 :			if (nb_pts<1 \|\| (pts->duv[0].x==0 && pts->duv[0].y==0)) {
625 :			nb_pts = 0;
626 :			}
627 :			else nb_pts = 1;
628 :			}
629 :			else nb_pts = 2;
630 :			}
631 :	edgomez	1.2
632 :			/* now, nb_pts stores the actual number of points required for interpolation */
633 :
634 :			if (nb_pts<=1)
635 :			{
636 :			if (nb_pts==1) {
637 :			/* store as 4b fixed point */
638 :			gmc->Uo = pts->duv[0].x << accuracy;
639 :			gmc->Vo = pts->duv[0].y << accuracy;
640 :			gmc->Uco = ((pts->duv[0].x>>1) \| (pts->duv[0].x&1)) << accuracy; /* DIV2RND() */
641 :			gmc->Vco = ((pts->duv[0].y>>1) \| (pts->duv[0].y&1)) << accuracy; /* DIV2RND() */
642 :			}
643 :			else { /* zero points?! */
644 :			gmc->Uo = gmc->Vo = 0;
645 :			gmc->Uco = gmc->Vco = 0;
646 :			}
647 :
648 :			gmc->predict_16x16 = Predict_1pt_16x16_C;
649 :			gmc->predict_8x8 = Predict_1pt_8x8_C;
650 :			gmc->get_average_mv = get_average_mv_1pt_C;
651 :			}
652 :			else { /* 2 or 3 points */
653 :			const int rho = 3 - accuracy; /* = {3,2,1,0} for Acc={0,1,2,3} */
654 :			int Alpha = log2bin(width-1);
655 :			int Ws = 1 << Alpha;
656 :
657 :			gmc->dU[0] = 16Ws + RDIV( 8Wspts->duv[1].x, width ); / dU/dx */
658 :			gmc->dV[0] = RDIV( 8Wspts->duv[1].y, width ); /* dV/dx */
659 :
660 :			if (nb_pts==2) {
661 :			gmc->dU[1] = -gmc->dV[0]; /* -Sin */
662 :			gmc->dV[1] = gmc->dU[0] ; /* Cos */
663 :			}
664 :			else
665 :			{
666 :			const int Beta = log2bin(height-1);
667 :			const int Hs = 1<<Beta;
668 :			gmc->dU[1] = RDIV( 8Hspts->duv[2].x, height ); /* dU/dy */
669 :			gmc->dV[1] = 16Hs + RDIV( 8Hspts->duv[2].y, height ); / dV/dy */
670 :			if (Beta>Alpha) {
671 :			gmc->dU[0] <<= (Beta-Alpha);
672 :			gmc->dV[0] <<= (Beta-Alpha);
673 :			Alpha = Beta;
674 :			Ws = Hs;
675 :			}
676 :			else {
677 :			gmc->dU[1] <<= Alpha - Beta;
678 :			gmc->dV[1] <<= Alpha - Beta;
679 :			}
680 :			}
681 :			/* upscale to 16b fixed-point */
682 :			gmc->dU[0] <<= (16-Alpha - rho);
683 :			gmc->dU[1] <<= (16-Alpha - rho);
684 :			gmc->dV[0] <<= (16-Alpha - rho);
685 :			gmc->dV[1] <<= (16-Alpha - rho);
686 :
687 :			gmc->Uo = ( pts->duv[0].x <<(16+ accuracy)) + (1<<15);
688 :			gmc->Vo = ( pts->duv[0].y <<(16+ accuracy)) + (1<<15);
689 :			gmc->Uco = ((pts->duv[0].x-1)<<(17+ accuracy)) + (1<<17);
690 :			gmc->Vco = ((pts->duv[0].y-1)<<(17+ accuracy)) + (1<<17);
691 :			gmc->Uco = (gmc->Uco + gmc->dU[0] + gmc->dU[1])>>2;
692 :			gmc->Vco = (gmc->Vco + gmc->dV[0] + gmc->dV[1])>>2;
693 :
694 :	Skal	1.5	gmc->predict_16x16 = Predict_16x16_func;
695 :			gmc->predict_8x8 = Predict_8x8_func;
696 :	edgomez	1.2	gmc->get_average_mv = get_average_mv_C;
697 :			}
698 :			}
699 :
700 :			/* *******************************************************************
701 :			* quick and dirty routine to generate the full warped image
702 :			* (pGMC != NULL) or just all average Motion Vectors (pGMC == NULL) */
703 :
704 :			void
705 :			generate_GMCimage( const NEW_GMC_DATA const gmc_data, / [input] precalculated data */
706 :			const IMAGE const pRef, / [input] */
707 :			const int mb_width,
708 :			const int mb_height,
709 :			const int stride,
710 :			const int stride2,
711 :			const int fcode, /* [input] some parameters... */
712 :			const int32_t quarterpel, /* [input] for rounding avgMV */
713 :			const int reduced_resolution, /* [input] ignored */
714 :			const int32_t rounding, /* [input] for rounding image data */
715 :			MACROBLOCK const pMBs, / [output] average motion vectors */
716 :			IMAGE const pGMC) / [output] full warped image */
717 :			{
718 :
719 :			unsigned int mj,mi;
720 :			VECTOR avgMV;
721 :
722 :			for (mj = 0; mj < (unsigned int)mb_height; mj++)
723 :			for (mi = 0; mi < (unsigned int)mb_width; mi++) {
724 :			const int mbnum = mj*mb_width+mi;
725 :			if (pGMC)
726 :			{
727 :			gmc_data->predict_16x16(gmc_data,
728 :			pGMC->y + mj16stride + mi*16, pRef->y,
729 :			stride, stride, mi, mj, rounding);
730 :
731 :			gmc_data->predict_8x8(gmc_data,
732 :			pGMC->u + mj8stride2 + mi*8, pRef->u,
733 :			pGMC->v + mj8stride2 + mi*8, pRef->v,
734 :			stride2, stride2, mi, mj, rounding);
735 :			}
736 :			gmc_data->get_average_mv(gmc_data, &avgMV, mi, mj, quarterpel);
737 :
738 :			pMBs[mbnum].amv.x = gmc_sanitize(avgMV.x, quarterpel, fcode);
739 :			pMBs[mbnum].amv.y = gmc_sanitize(avgMV.y, quarterpel, fcode);
740 :
741 :			pMBs[mbnum].mcsel = 0; /* until mode decision */
742 :			}
743 :			}

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4