1 |
/************************************************************************** |
/***************************************************************************** |
2 |
* |
* |
3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* GMC interpolation module |
* - GMC interpolation module - |
5 |
|
* |
6 |
|
* Copyright(C) 2002-2003 Pascal Massimino <skal@planet-d.net> |
7 |
* |
* |
8 |
* This program is free software; you can redistribute it and/or modify |
* This program is free software; you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
17 |
* |
* |
18 |
* You should have received a copy of the GNU General Public License |
* You should have received a copy of the GNU General Public License |
19 |
* along with this program; if not, write to the Free Software |
* along with this program; if not, write to the Free Software |
20 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
|
* |
22 |
|
* $Id$ |
23 |
* |
* |
24 |
*************************************************************************/ |
****************************************************************************/ |
25 |
|
|
26 |
#include "../portab.h" |
#include "../portab.h" |
27 |
#include "../global.h" |
#include "../global.h" |
30 |
|
|
31 |
#include <stdio.h> |
#include <stdio.h> |
32 |
|
|
33 |
/* These are mainly the new GMC routines by -Skal- (C) 2003 */ |
/* initialized by init_GMC(), for 3points */ |
34 |
|
static |
35 |
////////////////////////////////////////////////////////// |
void (*Predict_16x16_func)(const NEW_GMC_DATA * const This, |
36 |
// Pts = 2 or 3 |
uint8_t *dst, const uint8_t *src, |
37 |
|
int dststride, int srcstride, int x, int y, int rounding) = 0; |
38 |
|
static |
39 |
|
void (*Predict_8x8_func)(const NEW_GMC_DATA * const This, |
40 |
|
uint8_t *uDst, const uint8_t *uSrc, |
41 |
|
uint8_t *vDst, const uint8_t *vSrc, |
42 |
|
int dststride, int srcstride, int x, int y, int rounding) = 0; |
43 |
|
|
44 |
// Warning! *src is the global frame pointer (that is: adress |
/****************************************************************************/ |
45 |
// of pixel 0,0), not the macroblock one. |
/* this is borrowed from bitstream.c until we find a common solution */ |
46 |
// Conversely, *dst is the macroblock top-left adress. |
static uint32_t __inline |
47 |
|
log2bin(uint32_t value) |
48 |
|
{ |
49 |
|
/* Changed by Chenm001 */ |
50 |
|
#if !defined(_MSC_VER) |
51 |
|
int n = 0; |
52 |
|
|
53 |
|
while (value) { |
54 |
|
value >>= 1; |
55 |
|
n++; |
56 |
|
} |
57 |
|
return n; |
58 |
|
#else |
59 |
|
__asm { |
60 |
|
bsr eax, value |
61 |
|
inc eax |
62 |
|
} |
63 |
|
#endif |
64 |
|
} |
65 |
|
|
66 |
|
/* 16*sizeof(int) -> 1 or 2 cachelines */ |
67 |
|
/* table lookup might be faster! (still to be benchmarked) */ |
68 |
|
|
69 |
|
/* |
70 |
|
static int log2bin_table[16] = |
71 |
|
{ 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4}; |
72 |
|
*/ |
73 |
|
/* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 */ |
74 |
|
|
75 |
|
#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b)) |
76 |
|
#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b)) |
77 |
|
|
78 |
|
#define MLT(i) (((16-(i))<<16) + (i)) |
79 |
|
static const uint32_t MTab[16] = { |
80 |
|
MLT( 0), MLT( 1), MLT( 2), MLT( 3), MLT( 4), MLT( 5), MLT( 6), MLT( 7), |
81 |
|
MLT( 8), MLT( 9), MLT(10), MLT(11), MLT(12), MLT(13), MLT(14), MLT(15) |
82 |
|
}; |
83 |
|
#undef MLT |
84 |
|
|
85 |
|
/* ************************************************************ |
86 |
|
* Pts = 2 or 3 |
87 |
|
* |
88 |
|
* Warning! *src is the global frame pointer (that is: adress |
89 |
|
* of pixel 0,0), not the macroblock one. |
90 |
|
* Conversely, *dst is the macroblock top-left adress. |
91 |
|
*/ |
92 |
|
|
93 |
|
static |
94 |
void Predict_16x16_C(const NEW_GMC_DATA * const This, |
void Predict_16x16_C(const NEW_GMC_DATA * const This, |
95 |
uint8_t *dst, const uint8_t *src, |
uint8_t *dst, const uint8_t *src, |
96 |
int dststride, int srcstride, int x, int y, int rounding) |
int dststride, int srcstride, int x, int y, int rounding) |
111 |
int i, j; |
int i, j; |
112 |
|
|
113 |
dst += 16; |
dst += 16; |
114 |
for (j=16; j>0; --j) |
for (j=16; j>0; --j) { |
|
{ |
|
115 |
int U = Uo, V = Vo; |
int U = Uo, V = Vo; |
116 |
Uo += dUy; Vo += dVy; |
Uo += dUy; Vo += dVy; |
117 |
for (i=-16; i<0; ++i) |
for (i=-16; i<0; ++i) { |
118 |
{ |
unsigned int f0, f1, ri = 16, rj = 16; |
|
unsigned int f0, f1, ri, rj; |
|
119 |
int Offset; |
int Offset; |
|
|
|
120 |
int u = ( U >> 16 ) << rho; |
int u = ( U >> 16 ) << rho; |
121 |
int v = ( V >> 16 ) << rho; |
int v = ( V >> 16 ) << rho; |
122 |
|
|
123 |
U += dUx; V += dVx; |
U += dUx; V += dVx; |
124 |
|
|
125 |
ri = 16; |
if (u > 0 && u <= W) { ri = MTab[u&15]; Offset = u>>4; } |
126 |
if ((uint32_t)u<=(uint32_t)W) { ri = MTab[u&15]; Offset = u>>4; } |
else { |
127 |
else if (u>W) Offset = W>>4; |
if (u > W) Offset = W>>4; |
128 |
else Offset = -1; |
else Offset = 0; |
129 |
|
ri = MTab[0]; |
130 |
rj = 16; |
} |
131 |
if ((uint32_t)v<=(uint32_t)H) { rj = MTab[v&15]; Offset += (v>>4)*srcstride; } |
|
132 |
else if (v>H) Offset += (H>>4)*srcstride; |
if (v > 0 && v <= H) { rj = MTab[v&15]; Offset += (v>>4)*srcstride; } |
133 |
else Offset -= srcstride; |
else { |
134 |
|
if (v > H) Offset += (H>>4)*srcstride; |
135 |
|
rj = MTab[0]; |
136 |
|
} |
137 |
|
|
138 |
f0 = src[ Offset +0 ]; |
f0 = src[ Offset +0 ]; |
139 |
f0 |= src[ Offset +1 ] << 16; |
f0 |= src[ Offset +1 ] << 16; |
150 |
} |
} |
151 |
} |
} |
152 |
|
|
153 |
|
static |
154 |
void Predict_8x8_C(const NEW_GMC_DATA * const This, |
void Predict_8x8_C(const NEW_GMC_DATA * const This, |
155 |
uint8_t *uDst, const uint8_t *uSrc, |
uint8_t *uDst, const uint8_t *uSrc, |
156 |
uint8_t *vDst, const uint8_t *vSrc, |
uint8_t *vDst, const uint8_t *vSrc, |
173 |
|
|
174 |
uDst += 8; |
uDst += 8; |
175 |
vDst += 8; |
vDst += 8; |
176 |
for (j=8; j>0; --j) |
for (j=8; j>0; --j) { |
|
{ |
|
177 |
int32_t U = Uo, V = Vo; |
int32_t U = Uo, V = Vo; |
178 |
Uo += dUy; Vo += dVy; |
Uo += dUy; Vo += dVy; |
179 |
|
|
180 |
for (i=-8; i<0; ++i) |
for (i=-8; i<0; ++i) { |
|
{ |
|
181 |
int Offset; |
int Offset; |
182 |
uint32_t f0, f1, ri, rj; |
uint32_t f0, f1, ri, rj; |
183 |
int32_t u, v; |
int32_t u, v; |
186 |
v = ( V >> 16 ) << rho; |
v = ( V >> 16 ) << rho; |
187 |
U += dUx; V += dVx; |
U += dUx; V += dVx; |
188 |
|
|
189 |
if ((uint32_t)u<=(uint32_t)W) { |
if (u > 0 && u <= W) { |
190 |
ri = MTab[u&15]; |
ri = MTab[u&15]; |
191 |
Offset = u>>4; |
Offset = u>>4; |
192 |
} |
} else { |
|
else { |
|
|
ri = 16; |
|
193 |
if (u>W) Offset = W>>4; |
if (u>W) Offset = W>>4; |
194 |
else Offset = -1; |
else Offset = 0; |
195 |
|
ri = MTab[0]; |
196 |
} |
} |
197 |
if ((uint32_t)v<=(uint32_t)H) { |
|
198 |
|
if (v > 0 && v <= H) { |
199 |
rj = MTab[v&15]; |
rj = MTab[v&15]; |
200 |
Offset += (v>>4)*srcstride; |
Offset += (v>>4)*srcstride; |
201 |
} |
} else { |
|
else { |
|
|
rj = 16; |
|
202 |
if (v>H) Offset += (H>>4)*srcstride; |
if (v>H) Offset += (H>>4)*srcstride; |
203 |
else Offset -= srcstride; |
rj = MTab[0]; |
204 |
} |
} |
205 |
|
|
206 |
f0 = uSrc[ Offset +0 ]; |
f0 = uSrc[ Offset +0 ]; |
230 |
} |
} |
231 |
} |
} |
232 |
|
|
233 |
|
static |
234 |
void get_average_mv_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv, |
void get_average_mv_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv, |
235 |
int x, int y, int qpel) |
int x, int y, int qpel) |
236 |
{ |
{ |
250 |
v = V >> 16; V += Dsp->dV[0]; vy += v; |
v = V >> 16; V += Dsp->dV[0]; vy += v; |
251 |
} |
} |
252 |
} |
} |
253 |
vx -= (256*x+120) << (5+Dsp->accuracy); // 120 = 15*16/2 |
vx -= (256*x+120) << (5+Dsp->accuracy); /* 120 = 15*16/2 */ |
254 |
vy -= (256*y+120) << (5+Dsp->accuracy); |
vy -= (256*y+120) << (5+Dsp->accuracy); |
255 |
|
|
256 |
mv->x = RSHIFT( vx, 8+Dsp->accuracy - qpel ); |
mv->x = RSHIFT( vx, 8+Dsp->accuracy - qpel ); |
257 |
mv->y = RSHIFT( vy, 8+Dsp->accuracy - qpel ); |
mv->y = RSHIFT( vy, 8+Dsp->accuracy - qpel ); |
258 |
} |
} |
259 |
|
|
260 |
////////////////////////////////////////////////////////// |
/* ************************************************************ |
261 |
// simplified version for 1 warp point |
* simplified version for 1 warp point |
262 |
|
*/ |
263 |
|
|
264 |
|
static |
265 |
void Predict_1pt_16x16_C(const NEW_GMC_DATA * const This, |
void Predict_1pt_16x16_C(const NEW_GMC_DATA * const This, |
266 |
uint8_t *Dst, const uint8_t *Src, |
uint8_t *Dst, const uint8_t *Src, |
267 |
int dststride, int srcstride, int x, int y, int rounding) |
int dststride, int srcstride, int x, int y, int rounding) |
272 |
const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; |
const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; |
273 |
|
|
274 |
|
|
275 |
int32_t uo = This->Uo + (x<<8); // ((16*x)<<4) |
int32_t uo = This->Uo + (x<<8); /* ((16*x)<<4) */ |
276 |
int32_t vo = This->Vo + (y<<8); |
int32_t vo = This->Vo + (y<<8); |
277 |
const uint32_t ri = MTab[uo & 15]; |
uint32_t ri = MTab[uo & 15]; |
278 |
const uint32_t rj = MTab[vo & 15]; |
uint32_t rj = MTab[vo & 15]; |
279 |
int i, j; |
int i, j; |
280 |
|
|
281 |
int32_t Offset; |
int32_t Offset; |
282 |
if ((uint32_t)vo<=(uint32_t)H) Offset = (vo>>4)*srcstride; |
if (vo>=(-16<<4) && vo<=H) Offset = (vo>>4)*srcstride; |
283 |
else if (vo>H) Offset = ( H>>4)*srcstride; |
else { |
284 |
|
if (vo>H) Offset = ( H>>4)*srcstride; |
285 |
else Offset =-16*srcstride; |
else Offset =-16*srcstride; |
286 |
if ((uint32_t)uo<=(uint32_t)W) Offset += (uo>>4); |
rj = MTab[0]; |
287 |
else if (uo>W) Offset += ( W>>4); |
} |
288 |
|
if (uo>=(-16<<4) && uo<=W) Offset += (uo>>4); |
289 |
|
else { |
290 |
|
if (uo>W) Offset += (W>>4); |
291 |
else Offset -= 16; |
else Offset -= 16; |
292 |
|
ri = MTab[0]; |
293 |
|
} |
294 |
|
|
295 |
Dst += 16; |
Dst += 16; |
296 |
|
|
313 |
} |
} |
314 |
} |
} |
315 |
|
|
316 |
|
static |
317 |
void Predict_1pt_8x8_C(const NEW_GMC_DATA * const This, |
void Predict_1pt_8x8_C(const NEW_GMC_DATA * const This, |
318 |
uint8_t *uDst, const uint8_t *uSrc, |
uint8_t *uDst, const uint8_t *uSrc, |
319 |
uint8_t *vDst, const uint8_t *vSrc, |
uint8_t *vDst, const uint8_t *vSrc, |
326 |
|
|
327 |
int32_t uo = This->Uco + (x<<7); |
int32_t uo = This->Uco + (x<<7); |
328 |
int32_t vo = This->Vco + (y<<7); |
int32_t vo = This->Vco + (y<<7); |
329 |
const uint32_t rri = MTab[uo & 15]; |
uint32_t rri = MTab[uo & 15]; |
330 |
const uint32_t rrj = MTab[vo & 15]; |
uint32_t rrj = MTab[vo & 15]; |
331 |
int i, j; |
int i, j; |
332 |
|
|
333 |
int32_t Offset; |
int32_t Offset; |
334 |
if ((uint32_t)vo<=(uint32_t)H) Offset = (vo>>4)*srcstride; |
if (vo>=(-8<<4) && vo<=H) Offset = (vo>>4)*srcstride; |
335 |
else if (vo>H) Offset = ( H>>4)*srcstride; |
else { |
336 |
|
if (vo>H) Offset = ( H>>4)*srcstride; |
337 |
else Offset =-8*srcstride; |
else Offset =-8*srcstride; |
338 |
if ((uint32_t)uo<=(uint32_t)W) Offset += (uo>>4); |
rrj = MTab[0]; |
339 |
else if (uo>W) Offset += (W>>4); |
} |
340 |
|
if (uo>=(-8<<4) && uo<=W) Offset += (uo>>4); |
341 |
|
else { |
342 |
|
if (uo>W) Offset += ( W>>4); |
343 |
else Offset -= 8; |
else Offset -= 8; |
344 |
|
rri = MTab[0]; |
345 |
|
} |
346 |
|
|
347 |
uDst += 8; |
uDst += 8; |
348 |
vDst += 8; |
vDst += 8; |
376 |
} |
} |
377 |
} |
} |
378 |
|
|
379 |
|
static |
380 |
void get_average_mv_1pt_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv, |
void get_average_mv_1pt_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv, |
381 |
int x, int y, int qpel) |
int x, int y, int qpel) |
382 |
{ |
{ |
384 |
mv->y = RSHIFT(Dsp->Vo<<qpel, 3); |
mv->y = RSHIFT(Dsp->Vo<<qpel, 3); |
385 |
} |
} |
386 |
|
|
387 |
|
#if defined(ARCH_IS_IA32) |
388 |
|
/* ************************************************************* |
389 |
|
* MMX core function |
390 |
|
*/ |
391 |
|
|
392 |
|
static |
393 |
|
void (*GMC_Core_Lin_8)(uint8_t *Dst, const uint16_t * Offsets, |
394 |
|
const uint8_t * const Src0, const int BpS, const int Rounder) = 0; |
395 |
|
|
396 |
|
extern void xvid_GMC_Core_Lin_8_mmx(uint8_t *Dst, const uint16_t * Offsets, |
397 |
|
const uint8_t * const Src0, const int BpS, const int Rounder); |
398 |
|
|
399 |
|
extern void xvid_GMC_Core_Lin_8_sse2(uint8_t *Dst, const uint16_t * Offsets, |
400 |
|
const uint8_t * const Src0, const int BpS, const int Rounder); |
401 |
|
|
402 |
|
/* *************************************************************/ |
403 |
|
|
404 |
|
static void GMC_Core_Non_Lin_8(uint8_t *Dst, |
405 |
|
const uint16_t * Offsets, |
406 |
|
const uint8_t * const Src0, const int srcstride, |
407 |
|
const int Rounder) |
408 |
|
{ |
409 |
|
int i; |
410 |
|
for(i=0; i<8; ++i) |
411 |
|
{ |
412 |
|
uint32_t u = Offsets[i ]; |
413 |
|
uint32_t v = Offsets[i+16]; |
414 |
|
const uint32_t ri = MTab[u&0x0f]; |
415 |
|
const uint32_t rj = MTab[v&0x0f]; |
416 |
|
uint32_t f0, f1; |
417 |
|
const uint8_t * const Src = Src0 + (u>>4) + (v>>4)*srcstride; |
418 |
|
f0 = Src[0]; |
419 |
|
f0 |= Src[1] << 16; |
420 |
|
f1 = Src[srcstride +0]; |
421 |
|
f1 |= Src[srcstride +1] << 16; |
422 |
|
f0 = (ri*f0)>>16; |
423 |
|
f1 = (ri*f1) & 0x0fff0000; |
424 |
|
f0 |= f1; |
425 |
|
f0 = ( rj*f0 + Rounder ) >> 24; |
426 |
|
Dst[i] = (uint8_t)f0; |
427 |
|
} |
428 |
|
} |
429 |
|
|
430 |
////////////////////////////////////////////////////////// |
////////////////////////////////////////////////////////// |
431 |
|
|
432 |
|
static |
433 |
|
void Predict_16x16_mmx(const NEW_GMC_DATA * const This, |
434 |
|
uint8_t *dst, const uint8_t *src, |
435 |
|
int dststride, int srcstride, int x, int y, int rounding) |
436 |
|
{ |
437 |
|
const int W = This->sW; |
438 |
|
const int H = This->sH; |
439 |
|
const int rho = 3 - This->accuracy; |
440 |
|
const int Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; |
441 |
|
const uint32_t W2 = W<<(16-rho); |
442 |
|
const uint32_t H2 = H<<(16-rho); |
443 |
|
|
444 |
|
const int dUx = This->dU[0]; |
445 |
|
const int dVx = This->dV[0]; |
446 |
|
const int dUy = This->dU[1]; |
447 |
|
const int dVy = This->dV[1]; |
448 |
|
|
449 |
|
int Uo = This->Uo + 16*(dUy*y + dUx*x); |
450 |
|
int Vo = This->Vo + 16*(dVy*y + dVx*x); |
451 |
|
|
452 |
|
int i, j; |
453 |
|
|
454 |
|
DECLARE_ALIGNED_MATRIX(Offsets, 2,16, uint16_t, CACHE_LINE); |
455 |
|
for(j=16; j>0; --j) |
456 |
|
{ |
457 |
|
int32_t U = Uo, V = Vo; |
458 |
|
Uo += dUy; Vo += dVy; |
459 |
|
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
460 |
|
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
461 |
|
{ |
462 |
|
for(i=0; i<16; ++i) |
463 |
|
{ |
464 |
|
uint32_t u = ( U >> 16 ) << rho; |
465 |
|
uint32_t v = ( V >> 16 ) << rho; |
466 |
|
U += dUx; V += dVx; |
467 |
|
Offsets[ i] = u; |
468 |
|
Offsets[16+i] = v; |
469 |
|
} |
470 |
|
// batch 8 input pixels when linearity says it's ok |
471 |
|
uint32_t UV1, UV2; |
472 |
|
UV1 = (Offsets[0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
473 |
|
UV2 = (Offsets[7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
474 |
|
if (UV1+7*16==UV2) |
475 |
|
GMC_Core_Lin_8(dst, Offsets, src + (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride, srcstride, Rounder); |
476 |
|
else |
477 |
|
GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder); |
478 |
|
UV1 = (Offsets[ 8] | (Offsets[24]<<16)) & 0xfff0fff0U; |
479 |
|
UV2 = (Offsets[15] | (Offsets[31]<<16)) & 0xfff0fff0U; |
480 |
|
if (UV1+7*16==UV2) |
481 |
|
GMC_Core_Lin_8(dst+8, Offsets+8, src + (Offsets[8]>>4) + (Offsets[24]>>4)*srcstride, srcstride, Rounder); |
482 |
|
else |
483 |
|
GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder); |
484 |
|
} |
485 |
|
else |
486 |
|
{ |
487 |
|
for(i=0; i<16; ++i) |
488 |
|
{ |
489 |
|
int u = ( U >> 16 ) << rho; |
490 |
|
int v = ( V >> 16 ) << rho; |
491 |
|
U += dUx; V += dVx; |
492 |
|
|
493 |
|
Offsets[ i] = (u<0) ? 0 : (u>=W) ? W : u; |
494 |
|
Offsets[16+i] = (v<0) ? 0 : (v>=H) ? H : v; |
495 |
|
} |
496 |
|
// due to boundary clipping, we cannot infer the 8-pixels batchability |
497 |
|
// simply by using the linearity. Oh well, not a big deal... |
498 |
|
GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder); |
499 |
|
GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder); |
500 |
|
} |
501 |
|
dst += dststride; |
502 |
|
} |
503 |
|
} |
504 |
|
|
505 |
|
static |
506 |
|
void Predict_8x8_mmx(const NEW_GMC_DATA * const This, |
507 |
|
uint8_t *uDst, const uint8_t *uSrc, |
508 |
|
uint8_t *vDst, const uint8_t *vSrc, |
509 |
|
int dststride, int srcstride, int x, int y, int rounding) |
510 |
|
{ |
511 |
|
const int W = This->sW >> 1; |
512 |
|
const int H = This->sH >> 1; |
513 |
|
const int rho = 3-This->accuracy; |
514 |
|
const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; |
515 |
|
const uint32_t W2 = W<<(16-rho); |
516 |
|
const uint32_t H2 = H<<(16-rho); |
517 |
|
|
518 |
|
const int dUx = This->dU[0]; |
519 |
|
const int dVx = This->dV[0]; |
520 |
|
const int dUy = This->dU[1]; |
521 |
|
const int dVy = This->dV[1]; |
522 |
|
|
523 |
|
int Uo = This->Uco + 8*(dUy*y + dUx*x); |
524 |
|
int Vo = This->Vco + 8*(dVy*y + dVx*x); |
525 |
|
|
526 |
|
DECLARE_ALIGNED_MATRIX(Offsets, 2,16, uint16_t, CACHE_LINE); |
527 |
|
int i, j; |
528 |
|
for(j=8; j>0; --j) |
529 |
|
{ |
530 |
|
int32_t U = Uo, V = Vo; |
531 |
|
Uo += dUy; Vo += dVy; |
532 |
|
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
533 |
|
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
534 |
|
{ |
535 |
|
for(i=0; i<8; ++i) |
536 |
|
{ |
537 |
|
int32_t u = ( U >> 16 ) << rho; |
538 |
|
int32_t v = ( V >> 16 ) << rho; |
539 |
|
U += dUx; V += dVx; |
540 |
|
Offsets[ i] = u; |
541 |
|
Offsets[16+i] = v; |
542 |
|
} |
543 |
|
// batch 8 input pixels when linearity says it's ok |
544 |
|
const uint32_t UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
545 |
|
const uint32_t UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
546 |
|
if (UV1+7*16==UV2) |
547 |
|
{ |
548 |
|
const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride; |
549 |
|
GMC_Core_Lin_8(uDst, Offsets, uSrc+Off, srcstride, Rounder); |
550 |
|
GMC_Core_Lin_8(vDst, Offsets, vSrc+Off, srcstride, Rounder); |
551 |
|
} |
552 |
|
else { |
553 |
|
GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder); |
554 |
|
GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder); |
555 |
|
} |
556 |
|
} |
557 |
|
else |
558 |
|
{ |
559 |
|
for(i=0; i<8; ++i) |
560 |
|
{ |
561 |
|
int u = ( U >> 16 ) << rho; |
562 |
|
int v = ( V >> 16 ) << rho; |
563 |
|
U += dUx; V += dVx; |
564 |
|
Offsets[ i] = (u<0) ? 0 : (u>=W) ? W : u; |
565 |
|
Offsets[16+i] = (v<0) ? 0 : (v>=H) ? H : v; |
566 |
|
} |
567 |
|
GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder); |
568 |
|
GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder); |
569 |
|
} |
570 |
|
uDst += dststride; |
571 |
|
vDst += dststride; |
572 |
|
} |
573 |
|
} |
574 |
|
|
575 |
|
#endif /* ARCH_IS_IA32 */ |
576 |
|
|
577 |
|
/* ************************************************************* |
578 |
|
* will initialize internal pointers |
579 |
|
*/ |
580 |
|
|
581 |
// Warning! It's Accuracy being passed, not 'resolution'! |
void init_GMC(const unsigned int cpu_flags) |
582 |
|
{ |
583 |
|
Predict_16x16_func = Predict_16x16_C; |
584 |
|
Predict_8x8_func = Predict_8x8_C; |
585 |
|
|
586 |
|
#if 0 // #if defined(ARCH_IS_IA32) |
587 |
|
if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || |
588 |
|
(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || |
589 |
|
(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2)) |
590 |
|
{ |
591 |
|
Predict_16x16_func = Predict_16x16_mmx; |
592 |
|
Predict_8x8_func = Predict_8x8_mmx; |
593 |
|
if (cpu_flags & XVID_CPU_SSE2) |
594 |
|
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2; |
595 |
|
else |
596 |
|
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx; |
597 |
|
} |
598 |
|
#endif |
599 |
|
} |
600 |
|
|
601 |
|
/* ************************************************************* |
602 |
|
* Warning! It's Accuracy being passed, not 'resolution'! |
603 |
|
*/ |
604 |
|
|
605 |
void generate_GMCparameters( int nb_pts, const int accuracy, |
void generate_GMCparameters( int nb_pts, const int accuracy, |
606 |
const WARPPOINTS *const pts, |
const WARPPOINTS *const pts, |
612 |
gmc->accuracy = accuracy; |
gmc->accuracy = accuracy; |
613 |
gmc->num_wp = nb_pts; |
gmc->num_wp = nb_pts; |
614 |
|
|
615 |
// reduce the number of points, if possible |
/* reduce the number of points, if possible */ |
616 |
if (nb_pts<3 || (pts->duv[2].x==-pts->duv[1].y && pts->duv[2].y==pts->duv[1].x)) { |
if (nb_pts<2 || (pts->duv[2].x==0 && pts->duv[2].y==0 && pts->duv[1].x==0 && pts->duv[1].y==0 )) { |
617 |
if (nb_pts<2 || (pts->duv[1].x==0 && pts->duv[1].y==0)) { |
if (nb_pts<2 || (pts->duv[1].x==0 && pts->duv[1].y==0)) { |
618 |
if (nb_pts<1 || (pts->duv[0].x==0 && pts->duv[0].y==0)) { |
if (nb_pts<1 || (pts->duv[0].x==0 && pts->duv[0].y==0)) { |
619 |
nb_pts = 0; |
nb_pts = 0; |
622 |
} |
} |
623 |
else nb_pts = 2; |
else nb_pts = 2; |
624 |
} |
} |
|
else nb_pts = 3; |
|
625 |
|
|
626 |
// now, nb_pts stores the actual number of points required for interpolation |
/* now, nb_pts stores the actual number of points required for interpolation */ |
627 |
|
|
628 |
if (nb_pts<=1) |
if (nb_pts<=1) |
629 |
{ |
{ |
630 |
if (nb_pts==1) { |
if (nb_pts==1) { |
631 |
// store as 4b fixed point |
/* store as 4b fixed point */ |
632 |
gmc->Uo = pts->duv[0].x << accuracy; |
gmc->Uo = pts->duv[0].x << accuracy; |
633 |
gmc->Vo = pts->duv[0].y << accuracy; |
gmc->Vo = pts->duv[0].y << accuracy; |
634 |
gmc->Uco = ((pts->duv[0].x>>1) | (pts->duv[0].x&1)) << accuracy; // DIV2RND() |
gmc->Uco = ((pts->duv[0].x>>1) | (pts->duv[0].x&1)) << accuracy; /* DIV2RND() */ |
635 |
gmc->Vco = ((pts->duv[0].y>>1) | (pts->duv[0].y&1)) << accuracy; // DIV2RND() |
gmc->Vco = ((pts->duv[0].y>>1) | (pts->duv[0].y&1)) << accuracy; /* DIV2RND() */ |
636 |
} |
} |
637 |
else { // zero points?! |
else { /* zero points?! */ |
638 |
gmc->Uo = gmc->Vo = 0; |
gmc->Uo = gmc->Vo = 0; |
639 |
gmc->Uco = gmc->Vco = 0; |
gmc->Uco = gmc->Vco = 0; |
640 |
} |
} |
643 |
gmc->predict_8x8 = Predict_1pt_8x8_C; |
gmc->predict_8x8 = Predict_1pt_8x8_C; |
644 |
gmc->get_average_mv = get_average_mv_1pt_C; |
gmc->get_average_mv = get_average_mv_1pt_C; |
645 |
} |
} |
646 |
else { // 2 or 3 points |
else { /* 2 or 3 points */ |
647 |
const int rho = 3 - accuracy; // = {3,2,1,0} for Acc={0,1,2,3} |
const int rho = 3 - accuracy; /* = {3,2,1,0} for Acc={0,1,2,3} */ |
648 |
int Alpha = log2bin(width-1); |
int Alpha = log2bin(width-1); |
649 |
int Ws = 1 << Alpha; |
int Ws = 1 << Alpha; |
650 |
|
|
651 |
gmc->dU[0] = 16*Ws + RDIV( 8*Ws*pts->duv[1].x, width ); // dU/dx |
gmc->dU[0] = 16*Ws + RDIV( 8*Ws*pts->duv[1].x, width ); /* dU/dx */ |
652 |
gmc->dV[0] = RDIV( 8*Ws*pts->duv[1].y, width ); // dV/dx |
gmc->dV[0] = RDIV( 8*Ws*pts->duv[1].y, width ); /* dV/dx */ |
653 |
|
|
654 |
/* disabled, because possibly buggy? */ |
if (nb_pts==2) { |
655 |
|
gmc->dU[1] = -gmc->dV[0]; /* -Sin */ |
656 |
/* if (nb_pts==2) { |
gmc->dV[1] = gmc->dU[0] ; /* Cos */ |
|
gmc->dU[1] = -gmc->dV[0]; // -Sin |
|
|
gmc->dV[1] = gmc->dU[0] ; // Cos |
|
657 |
} |
} |
658 |
else */ |
else |
659 |
{ |
{ |
660 |
const int Beta = log2bin(height-1); |
const int Beta = log2bin(height-1); |
661 |
const int Hs = 1<<Beta; |
const int Hs = 1<<Beta; |
662 |
gmc->dU[1] = RDIV( 8*Hs*pts->duv[2].x, height ); // dU/dy |
gmc->dU[1] = RDIV( 8*Hs*pts->duv[2].x, height ); /* dU/dy */ |
663 |
gmc->dV[1] = 16*Hs + RDIV( 8*Hs*pts->duv[2].y, height ); // dV/dy |
gmc->dV[1] = 16*Hs + RDIV( 8*Hs*pts->duv[2].y, height ); /* dV/dy */ |
664 |
if (Beta>Alpha) { |
if (Beta>Alpha) { |
665 |
gmc->dU[0] <<= (Beta-Alpha); |
gmc->dU[0] <<= (Beta-Alpha); |
666 |
gmc->dV[0] <<= (Beta-Alpha); |
gmc->dV[0] <<= (Beta-Alpha); |
672 |
gmc->dV[1] <<= Alpha - Beta; |
gmc->dV[1] <<= Alpha - Beta; |
673 |
} |
} |
674 |
} |
} |
675 |
// upscale to 16b fixed-point |
/* upscale to 16b fixed-point */ |
676 |
gmc->dU[0] <<= (16-Alpha - rho); |
gmc->dU[0] <<= (16-Alpha - rho); |
677 |
gmc->dU[1] <<= (16-Alpha - rho); |
gmc->dU[1] <<= (16-Alpha - rho); |
678 |
gmc->dV[0] <<= (16-Alpha - rho); |
gmc->dV[0] <<= (16-Alpha - rho); |
685 |
gmc->Uco = (gmc->Uco + gmc->dU[0] + gmc->dU[1])>>2; |
gmc->Uco = (gmc->Uco + gmc->dU[0] + gmc->dU[1])>>2; |
686 |
gmc->Vco = (gmc->Vco + gmc->dV[0] + gmc->dV[1])>>2; |
gmc->Vco = (gmc->Vco + gmc->dV[0] + gmc->dV[1])>>2; |
687 |
|
|
688 |
gmc->predict_16x16 = Predict_16x16_C; |
gmc->predict_16x16 = Predict_16x16_func; |
689 |
gmc->predict_8x8 = Predict_8x8_C; |
gmc->predict_8x8 = Predict_8x8_func; |
690 |
gmc->get_average_mv = get_average_mv_C; |
gmc->get_average_mv = get_average_mv_C; |
691 |
} |
} |
692 |
} |
} |
693 |
|
|
694 |
////////////////////////////////////////////////////////// |
/* ******************************************************************* |
695 |
|
* quick and dirty routine to generate the full warped image |
696 |
|
* (pGMC != NULL) or just all average Motion Vectors (pGMC == NULL) */ |
|
/* quick and dirty routine to generate the full warped image (pGMC != NULL) |
|
|
or just all average Motion Vectors (pGMC == NULL) */ |
|
697 |
|
|
698 |
void |
void |
699 |
generate_GMCimage( const NEW_GMC_DATA *const gmc_data, // [input] precalculated data |
generate_GMCimage( const NEW_GMC_DATA *const gmc_data, /* [input] precalculated data */ |
700 |
const IMAGE *const pRef, // [input] |
const IMAGE *const pRef, /* [input] */ |
701 |
const int mb_width, |
const int mb_width, |
702 |
const int mb_height, |
const int mb_height, |
703 |
const int stride, |
const int stride, |
704 |
const int stride2, |
const int stride2, |
705 |
const int fcode, // [input] some parameters... |
const int fcode, /* [input] some parameters... */ |
706 |
const int32_t quarterpel, // [input] for rounding avgMV |
const int32_t quarterpel, /* [input] for rounding avgMV */ |
707 |
const int reduced_resolution, // [input] ignored |
const int reduced_resolution, /* [input] ignored */ |
708 |
const int32_t rounding, // [input] for rounding image data |
const int32_t rounding, /* [input] for rounding image data */ |
709 |
MACROBLOCK *const pMBs, // [output] average motion vectors |
MACROBLOCK *const pMBs, /* [output] average motion vectors */ |
710 |
IMAGE *const pGMC) // [output] full warped image |
IMAGE *const pGMC) /* [output] full warped image */ |
711 |
{ |
{ |
712 |
|
|
713 |
unsigned int mj,mi; |
unsigned int mj,mi; |