1 |
/************************************************************************** |
/***************************************************************************** |
2 |
* |
* |
3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* GMC interpolation module |
* - GMC interpolation module - |
5 |
|
* |
6 |
|
* Copyright(C) 2002-2003 Pascal Massimino <skal@planet-d.net> |
7 |
* |
* |
8 |
* This program is free software; you can redistribute it and/or modify |
* This program is free software; you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
17 |
* |
* |
18 |
* You should have received a copy of the GNU General Public License |
* You should have received a copy of the GNU General Public License |
19 |
* along with this program; if not, write to the Free Software |
* along with this program; if not, write to the Free Software |
20 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
|
* |
22 |
|
* $Id$ |
23 |
* |
* |
24 |
*************************************************************************/ |
****************************************************************************/ |
25 |
|
|
26 |
#include "../portab.h" |
#include "../portab.h" |
27 |
#include "../global.h" |
#include "../global.h" |
30 |
|
|
31 |
#include <stdio.h> |
#include <stdio.h> |
32 |
|
|
33 |
/* These are mainly the new GMC routines by -Skal- (C) 2003 */ |
/* ************************************************************ |
34 |
|
* Pts = 2 or 3 |
35 |
////////////////////////////////////////////////////////// |
* |
36 |
// Pts = 2 or 3 |
* Warning! *src is the global frame pointer (that is: adress |
37 |
|
* of pixel 0,0), not the macroblock one. |
38 |
// Warning! *src is the global frame pointer (that is: adress |
* Conversely, *dst is the macroblock top-left adress. |
39 |
// of pixel 0,0), not the macroblock one. |
*/ |
|
// Conversely, *dst is the macroblock top-left adress. |
|
|
|
|
40 |
|
|
41 |
void Predict_16x16_C(const NEW_GMC_DATA * const This, |
void Predict_16x16_C(const NEW_GMC_DATA * const This, |
42 |
uint8_t *dst, const uint8_t *src, |
uint8_t *dst, const uint8_t *src, |
58 |
int i, j; |
int i, j; |
59 |
|
|
60 |
dst += 16; |
dst += 16; |
61 |
for (j=16; j>0; --j) |
for (j=16; j>0; --j) { |
|
{ |
|
62 |
int U = Uo, V = Vo; |
int U = Uo, V = Vo; |
63 |
Uo += dUy; Vo += dVy; |
Uo += dUy; Vo += dVy; |
64 |
for (i=-16; i<0; ++i) |
for (i=-16; i<0; ++i) { |
65 |
{ |
unsigned int f0, f1, ri = 16, rj = 16; |
|
unsigned int f0, f1, ri, rj; |
|
66 |
int Offset; |
int Offset; |
|
|
|
67 |
int u = ( U >> 16 ) << rho; |
int u = ( U >> 16 ) << rho; |
68 |
int v = ( V >> 16 ) << rho; |
int v = ( V >> 16 ) << rho; |
69 |
|
|
70 |
U += dUx; V += dVx; |
U += dUx; V += dVx; |
71 |
|
|
72 |
ri = 16; |
if (u > 0 && u <= W) { ri = MTab[u&15]; Offset = u>>4; } |
73 |
if ((uint32_t)u<=(uint32_t)W) { ri = MTab[u&15]; Offset = u>>4; } |
else { |
74 |
else if (u>W) Offset = W>>4; |
if (u > W) Offset = W>>4; |
75 |
else Offset = -1; |
else Offset = -1; |
76 |
|
ri = 0; |
77 |
|
} |
78 |
|
|
79 |
rj = 16; |
if (v > 0 && v <= H) { rj = MTab[v&15]; Offset += (v>>4)*srcstride; } |
80 |
if ((uint32_t)v<=(uint32_t)H) { rj = MTab[v&15]; Offset += (v>>4)*srcstride; } |
else { |
81 |
else if (v>H) Offset += (H>>4)*srcstride; |
if (v > H) Offset += (H>>4)*srcstride; |
82 |
else Offset -= srcstride; |
else Offset -= srcstride; |
83 |
|
rj = 0; |
84 |
|
} |
85 |
|
|
86 |
f0 = src[ Offset +0 ]; |
f0 = src[ Offset +0 ]; |
87 |
f0 |= src[ Offset +1 ] << 16; |
f0 |= src[ Offset +1 ] << 16; |
98 |
} |
} |
99 |
} |
} |
100 |
|
|
|
|
|
101 |
void Predict_8x8_C(const NEW_GMC_DATA * const This, |
void Predict_8x8_C(const NEW_GMC_DATA * const This, |
102 |
uint8_t *uDst, const uint8_t *uSrc, |
uint8_t *uDst, const uint8_t *uSrc, |
103 |
uint8_t *vDst, const uint8_t *vSrc, |
uint8_t *vDst, const uint8_t *vSrc, |
120 |
|
|
121 |
uDst += 8; |
uDst += 8; |
122 |
vDst += 8; |
vDst += 8; |
123 |
for (j=8; j>0; --j) |
for (j=8; j>0; --j) { |
|
{ |
|
124 |
int32_t U = Uo, V = Vo; |
int32_t U = Uo, V = Vo; |
125 |
Uo += dUy; Vo += dVy; |
Uo += dUy; Vo += dVy; |
126 |
|
|
127 |
for (i=-8; i<0; ++i) |
for (i=-8; i<0; ++i) { |
|
{ |
|
128 |
int Offset; |
int Offset; |
129 |
uint32_t f0, f1, ri, rj; |
uint32_t f0, f1, ri, rj; |
130 |
int32_t u, v; |
int32_t u, v; |
133 |
v = ( V >> 16 ) << rho; |
v = ( V >> 16 ) << rho; |
134 |
U += dUx; V += dVx; |
U += dUx; V += dVx; |
135 |
|
|
136 |
if ((uint32_t)u<=(uint32_t)W) { |
if (u > 0 && u <= W) { |
137 |
ri = MTab[u&15]; |
ri = MTab[u&15]; |
138 |
Offset = u>>4; |
Offset = u>>4; |
139 |
} |
} else { |
|
else { |
|
140 |
ri = 16; |
ri = 16; |
141 |
if (u>W) Offset = W>>4; |
if (u>W) Offset = W>>4; |
142 |
else Offset = -1; |
else Offset = -1; |
143 |
} |
} |
144 |
if ((uint32_t)v<=(uint32_t)H) { |
|
145 |
|
if (v > 0 && v <= H) { |
146 |
rj = MTab[v&15]; |
rj = MTab[v&15]; |
147 |
Offset += (v>>4)*srcstride; |
Offset += (v>>4)*srcstride; |
148 |
} |
} else { |
|
else { |
|
149 |
rj = 16; |
rj = 16; |
150 |
if (v>H) Offset += (H>>4)*srcstride; |
if (v>H) Offset += (H>>4)*srcstride; |
151 |
else Offset -= srcstride; |
else Offset -= srcstride; |
178 |
} |
} |
179 |
} |
} |
180 |
|
|
|
|
|
181 |
void get_average_mv_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv, |
void get_average_mv_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv, |
182 |
int x, int y, int qpel) |
int x, int y, int qpel) |
183 |
{ |
{ |
197 |
v = V >> 16; V += Dsp->dV[0]; vy += v; |
v = V >> 16; V += Dsp->dV[0]; vy += v; |
198 |
} |
} |
199 |
} |
} |
200 |
vx -= (256*x+120) << (5+Dsp->accuracy); // 120 = 15*16/2 |
vx -= (256*x+120) << (5+Dsp->accuracy); /* 120 = 15*16/2 */ |
201 |
vy -= (256*y+120) << (5+Dsp->accuracy); |
vy -= (256*y+120) << (5+Dsp->accuracy); |
202 |
|
|
203 |
mv->x = RSHIFT( vx, 8+Dsp->accuracy - qpel ); |
mv->x = RSHIFT( vx, 8+Dsp->accuracy - qpel ); |
204 |
mv->y = RSHIFT( vy, 8+Dsp->accuracy - qpel ); |
mv->y = RSHIFT( vy, 8+Dsp->accuracy - qpel ); |
205 |
} |
} |
206 |
|
|
207 |
////////////////////////////////////////////////////////// |
/* ************************************************************ |
208 |
// simplified version for 1 warp point |
* simplified version for 1 warp point |
209 |
|
*/ |
210 |
|
|
211 |
void Predict_1pt_16x16_C(const NEW_GMC_DATA * const This, |
void Predict_1pt_16x16_C(const NEW_GMC_DATA * const This, |
212 |
uint8_t *Dst, const uint8_t *Src, |
uint8_t *Dst, const uint8_t *Src, |
218 |
const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; |
const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; |
219 |
|
|
220 |
|
|
221 |
int32_t uo = This->Uo + (x<<8); // ((16*x)<<4) |
int32_t uo = This->Uo + (x<<8); /* ((16*x)<<4) */ |
222 |
int32_t vo = This->Vo + (y<<8); |
int32_t vo = This->Vo + (y<<8); |
223 |
const uint32_t ri = MTab[uo & 15]; |
uint32_t ri = MTab[uo & 15]; |
224 |
const uint32_t rj = MTab[vo & 15]; |
uint32_t rj = MTab[vo & 15]; |
225 |
int i, j; |
int i, j; |
226 |
|
|
227 |
int32_t Offset; |
int32_t Offset; |
228 |
if ((uint32_t)vo<=(uint32_t)H) Offset = (vo>>4)*srcstride; |
if (vo>=(-16*4) && vo<=H) Offset = (vo>>4)*srcstride; |
229 |
else if (vo>H) Offset = ( H>>4)*srcstride; |
else { |
230 |
|
if (vo>H) Offset = ( H>>4)*srcstride; |
231 |
else Offset =-16*srcstride; |
else Offset =-16*srcstride; |
232 |
if ((uint32_t)uo<=(uint32_t)W) Offset += (uo>>4); |
rj = MTab[0]; |
233 |
else if (uo>W) Offset += ( W>>4); |
} |
234 |
|
if (uo>=(-16*4) && uo<=W) Offset += (uo>>4); |
235 |
|
else { |
236 |
|
if (uo>W) Offset += (W>>4); |
237 |
else Offset -= 16; |
else Offset -= 16; |
238 |
|
ri = MTab[0]; |
239 |
|
} |
240 |
|
|
241 |
Dst += 16; |
Dst += 16; |
242 |
|
|
259 |
} |
} |
260 |
} |
} |
261 |
|
|
|
|
|
262 |
void Predict_1pt_8x8_C(const NEW_GMC_DATA * const This, |
void Predict_1pt_8x8_C(const NEW_GMC_DATA * const This, |
263 |
uint8_t *uDst, const uint8_t *uSrc, |
uint8_t *uDst, const uint8_t *uSrc, |
264 |
uint8_t *vDst, const uint8_t *vSrc, |
uint8_t *vDst, const uint8_t *vSrc, |
271 |
|
|
272 |
int32_t uo = This->Uco + (x<<7); |
int32_t uo = This->Uco + (x<<7); |
273 |
int32_t vo = This->Vco + (y<<7); |
int32_t vo = This->Vco + (y<<7); |
274 |
const uint32_t rri = MTab[uo & 15]; |
uint32_t rri = MTab[uo & 15]; |
275 |
const uint32_t rrj = MTab[vo & 15]; |
uint32_t rrj = MTab[vo & 15]; |
276 |
int i, j; |
int i, j; |
277 |
|
|
278 |
int32_t Offset; |
int32_t Offset; |
279 |
if ((uint32_t)vo<=(uint32_t)H) Offset = (vo>>4)*srcstride; |
if (vo>=(-8*4) && vo<=H) Offset = (vo>>4)*srcstride; |
280 |
else if (vo>H) Offset = ( H>>4)*srcstride; |
else { |
281 |
|
if (vo>H) Offset = ( H>>4)*srcstride; |
282 |
else Offset =-8*srcstride; |
else Offset =-8*srcstride; |
283 |
if ((uint32_t)uo<=(uint32_t)W) Offset += (uo>>4); |
rrj = MTab[0]; |
284 |
else if (uo>W) Offset += (W>>4); |
} |
285 |
|
if (uo>=(-8*4) && uo<=W) Offset += (uo>>4); |
286 |
|
else { |
287 |
|
if (uo>W) Offset += ( W>>4); |
288 |
else Offset -= 8; |
else Offset -= 8; |
289 |
|
rri = MTab[0]; |
290 |
|
} |
291 |
|
|
292 |
uDst += 8; |
uDst += 8; |
293 |
vDst += 8; |
vDst += 8; |
321 |
} |
} |
322 |
} |
} |
323 |
|
|
|
|
|
324 |
void get_average_mv_1pt_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv, |
void get_average_mv_1pt_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv, |
325 |
int x, int y, int qpel) |
int x, int y, int qpel) |
326 |
{ |
{ |
328 |
mv->y = RSHIFT(Dsp->Vo<<qpel, 3); |
mv->y = RSHIFT(Dsp->Vo<<qpel, 3); |
329 |
} |
} |
330 |
|
|
331 |
////////////////////////////////////////////////////////// |
/* ************************************************************* |
332 |
|
* Warning! It's Accuracy being passed, not 'resolution'! |
333 |
|
*/ |
|
// Warning! It's Accuracy being passed, not 'resolution'! |
|
334 |
|
|
335 |
void generate_GMCparameters( int nb_pts, const int accuracy, |
void generate_GMCparameters( int nb_pts, const int accuracy, |
336 |
const WARPPOINTS *const pts, |
const WARPPOINTS *const pts, |
342 |
gmc->accuracy = accuracy; |
gmc->accuracy = accuracy; |
343 |
gmc->num_wp = nb_pts; |
gmc->num_wp = nb_pts; |
344 |
|
|
345 |
// reduce the number of points, if possible |
/* reduce the number of points, if possible */ |
346 |
if (nb_pts<3 || (pts->duv[2].x==-pts->duv[1].y && pts->duv[2].y==pts->duv[1].x)) { |
if (nb_pts<3 || (pts->duv[2].x==-pts->duv[1].y && pts->duv[2].y==pts->duv[1].x)) { |
347 |
if (nb_pts<2 || (pts->duv[1].x==0 && pts->duv[1].y==0)) { |
if (nb_pts<2 || (pts->duv[1].x==0 && pts->duv[1].y==0)) { |
348 |
if (nb_pts<1 || (pts->duv[0].x==0 && pts->duv[0].y==0)) { |
if (nb_pts<1 || (pts->duv[0].x==0 && pts->duv[0].y==0)) { |
354 |
} |
} |
355 |
else nb_pts = 3; |
else nb_pts = 3; |
356 |
|
|
357 |
// now, nb_pts stores the actual number of points required for interpolation |
/* now, nb_pts stores the actual number of points required for interpolation */ |
358 |
|
|
359 |
if (nb_pts<=1) |
if (nb_pts<=1) |
360 |
{ |
{ |
361 |
if (nb_pts==1) { |
if (nb_pts==1) { |
362 |
// store as 4b fixed point |
/* store as 4b fixed point */ |
363 |
gmc->Uo = pts->duv[0].x << accuracy; |
gmc->Uo = pts->duv[0].x << accuracy; |
364 |
gmc->Vo = pts->duv[0].y << accuracy; |
gmc->Vo = pts->duv[0].y << accuracy; |
365 |
gmc->Uco = ((pts->duv[0].x>>1) | (pts->duv[0].x&1)) << accuracy; // DIV2RND() |
gmc->Uco = ((pts->duv[0].x>>1) | (pts->duv[0].x&1)) << accuracy; /* DIV2RND() */ |
366 |
gmc->Vco = ((pts->duv[0].y>>1) | (pts->duv[0].y&1)) << accuracy; // DIV2RND() |
gmc->Vco = ((pts->duv[0].y>>1) | (pts->duv[0].y&1)) << accuracy; /* DIV2RND() */ |
367 |
} |
} |
368 |
else { // zero points?! |
else { /* zero points?! */ |
369 |
gmc->Uo = gmc->Vo = 0; |
gmc->Uo = gmc->Vo = 0; |
370 |
gmc->Uco = gmc->Vco = 0; |
gmc->Uco = gmc->Vco = 0; |
371 |
} |
} |
374 |
gmc->predict_8x8 = Predict_1pt_8x8_C; |
gmc->predict_8x8 = Predict_1pt_8x8_C; |
375 |
gmc->get_average_mv = get_average_mv_1pt_C; |
gmc->get_average_mv = get_average_mv_1pt_C; |
376 |
} |
} |
377 |
else { // 2 or 3 points |
else { /* 2 or 3 points */ |
378 |
const int rho = 3 - accuracy; // = {3,2,1,0} for Acc={0,1,2,3} |
const int rho = 3 - accuracy; /* = {3,2,1,0} for Acc={0,1,2,3} */ |
379 |
int Alpha = log2bin(width-1); |
int Alpha = log2bin(width-1); |
380 |
int Ws = 1 << Alpha; |
int Ws = 1 << Alpha; |
381 |
|
|
382 |
gmc->dU[0] = 16*Ws + RDIV( 8*Ws*pts->duv[1].x, width ); // dU/dx |
gmc->dU[0] = 16*Ws + RDIV( 8*Ws*pts->duv[1].x, width ); /* dU/dx */ |
383 |
gmc->dV[0] = RDIV( 8*Ws*pts->duv[1].y, width ); // dV/dx |
gmc->dV[0] = RDIV( 8*Ws*pts->duv[1].y, width ); /* dV/dx */ |
384 |
|
|
385 |
/* disabled, because possibly buggy? */ |
/* disabled, because possibly buggy? */ |
386 |
|
|
387 |
/* if (nb_pts==2) { |
#if 0 |
388 |
gmc->dU[1] = -gmc->dV[0]; // -Sin |
if (nb_pts==2) { |
389 |
gmc->dV[1] = gmc->dU[0] ; // Cos |
gmc->dU[1] = -gmc->dV[0]; /* -Sin */ |
390 |
|
gmc->dV[1] = gmc->dU[0] ; /* Cos */ |
391 |
} |
} |
392 |
else */ |
else |
393 |
|
#endif |
394 |
{ |
{ |
395 |
const int Beta = log2bin(height-1); |
const int Beta = log2bin(height-1); |
396 |
const int Hs = 1<<Beta; |
const int Hs = 1<<Beta; |
397 |
gmc->dU[1] = RDIV( 8*Hs*pts->duv[2].x, height ); // dU/dy |
gmc->dU[1] = RDIV( 8*Hs*pts->duv[2].x, height ); /* dU/dy */ |
398 |
gmc->dV[1] = 16*Hs + RDIV( 8*Hs*pts->duv[2].y, height ); // dV/dy |
gmc->dV[1] = 16*Hs + RDIV( 8*Hs*pts->duv[2].y, height ); /* dV/dy */ |
399 |
if (Beta>Alpha) { |
if (Beta>Alpha) { |
400 |
gmc->dU[0] <<= (Beta-Alpha); |
gmc->dU[0] <<= (Beta-Alpha); |
401 |
gmc->dV[0] <<= (Beta-Alpha); |
gmc->dV[0] <<= (Beta-Alpha); |
407 |
gmc->dV[1] <<= Alpha - Beta; |
gmc->dV[1] <<= Alpha - Beta; |
408 |
} |
} |
409 |
} |
} |
410 |
// upscale to 16b fixed-point |
/* upscale to 16b fixed-point */ |
411 |
gmc->dU[0] <<= (16-Alpha - rho); |
gmc->dU[0] <<= (16-Alpha - rho); |
412 |
gmc->dU[1] <<= (16-Alpha - rho); |
gmc->dU[1] <<= (16-Alpha - rho); |
413 |
gmc->dV[0] <<= (16-Alpha - rho); |
gmc->dV[0] <<= (16-Alpha - rho); |
426 |
} |
} |
427 |
} |
} |
428 |
|
|
429 |
////////////////////////////////////////////////////////// |
/* ******************************************************************* |
430 |
|
* quick and dirty routine to generate the full warped image |
431 |
|
* (pGMC != NULL) or just all average Motion Vectors (pGMC == NULL) */ |
|
/* quick and dirty routine to generate the full warped image (pGMC != NULL) |
|
|
or just all average Motion Vectors (pGMC == NULL) */ |
|
432 |
|
|
433 |
void |
void |
434 |
generate_GMCimage( const NEW_GMC_DATA *const gmc_data, // [input] precalculated data |
generate_GMCimage( const NEW_GMC_DATA *const gmc_data, /* [input] precalculated data */ |
435 |
const IMAGE *const pRef, // [input] |
const IMAGE *const pRef, /* [input] */ |
436 |
const int mb_width, |
const int mb_width, |
437 |
const int mb_height, |
const int mb_height, |
438 |
const int stride, |
const int stride, |
439 |
const int stride2, |
const int stride2, |
440 |
const int fcode, // [input] some parameters... |
const int fcode, /* [input] some parameters... */ |
441 |
const int32_t quarterpel, // [input] for rounding avgMV |
const int32_t quarterpel, /* [input] for rounding avgMV */ |
442 |
const int reduced_resolution, // [input] ignored |
const int reduced_resolution, /* [input] ignored */ |
443 |
const int32_t rounding, // [input] for rounding image data |
const int32_t rounding, /* [input] for rounding image data */ |
444 |
MACROBLOCK *const pMBs, // [output] average motion vectors |
MACROBLOCK *const pMBs, /* [output] average motion vectors */ |
445 |
IMAGE *const pGMC) // [output] full warped image |
IMAGE *const pGMC) /* [output] full warped image */ |
446 |
{ |
{ |
447 |
|
|
448 |
unsigned int mj,mi; |
unsigned int mj,mi; |