1 |
// 30.10.2002 corrected qpel chroma rounding |
/* |
2 |
// 04.10.2002 added qpel support to MBMotionCompensation |
* 30.10.2002 corrected qpel chroma rounding |
3 |
// 01.05.2002 updated MBMotionCompensationBVOP |
* 04.10.2002 added qpel support to MBMotionCompensation |
4 |
// 14.04.2002 bframe compensation |
* 01.05.2002 updated MBMotionCompensationBVOP |
5 |
|
* 14.04.2002 bframe compensation |
6 |
|
*/ |
7 |
|
|
8 |
#include <stdio.h> |
#include <stdio.h> |
9 |
|
|
29 |
{ |
{ |
30 |
int length = 1 << (fcode+4); |
int length = 1 << (fcode+4); |
31 |
|
|
32 |
// if (quarterpel) value *= 2; |
#if 0 |
33 |
|
if (quarterpel) value *= 2; |
34 |
|
#endif |
35 |
|
|
36 |
if (value < -length) |
if (value < -length) |
37 |
return -length; |
return -length; |
90 |
(uint8_t *) ref, tmp + 32, |
(uint8_t *) ref, tmp + 32, |
91 |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
92 |
ptr = tmp; |
ptr = tmp; |
93 |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; // fullpixel position |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; /* fullpixel position */ |
94 |
|
|
95 |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
96 |
|
|
103 |
transfer_8to16sub(dct_codes+192, cur + y * stride + x + 8*stride+8, |
transfer_8to16sub(dct_codes+192, cur + y * stride + x + 8*stride+8, |
104 |
ptr + 8*stride + 8, stride); |
ptr + 8*stride + 8, stride); |
105 |
|
|
106 |
} else { //reduced_resolution |
} else { /* reduced_resolution */ |
107 |
|
|
108 |
x *= 2; y *= 2; |
x *= 2; y *= 2; |
109 |
|
|
152 |
(uint8_t *) ref, tmp + 32, |
(uint8_t *) ref, tmp + 32, |
153 |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
154 |
ptr = tmp; |
ptr = tmp; |
155 |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; // fullpixel position |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; /* fullpixel position */ |
156 |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
157 |
|
|
158 |
transfer_8to16sub(dct_codes, cur + y * stride + x, ptr, stride); |
transfer_8to16sub(dct_codes, cur + y * stride + x, ptr, stride); |
159 |
|
|
160 |
} else { //reduced_resolution |
} else { /* reduced_resolution */ |
161 |
|
|
162 |
x *= 2; y *= 2; |
x *= 2; y *= 2; |
163 |
|
|
320 |
dx = (dx >> 1) + roundtab_79[dx & 0x3]; |
dx = (dx >> 1) + roundtab_79[dx & 0x3]; |
321 |
dy = (dy >> 1) + roundtab_79[dy & 0x3]; |
dy = (dy >> 1) + roundtab_79[dy & 0x3]; |
322 |
|
|
323 |
} else { // mode == MODE_INTER4V |
} else { /* mode == MODE_INTER4V */ |
324 |
int k, sumx = 0, sumy = 0; |
int k, sumx = 0, sumy = 0; |
325 |
const VECTOR * const mvs = (quarterpel ? mb->qmvs : mb->mvs); |
const VECTOR * const mvs = (quarterpel ? mb->qmvs : mb->mvs); |
326 |
|
|
418 |
(uint8_t *) f_ref->y, tmp + 32, |
(uint8_t *) f_ref->y, tmp + 32, |
419 |
tmp + 64, tmp + 96, 16*i, 16*j, dx, dy, edged_width, 0); |
tmp + 64, tmp + 96, 16*i, 16*j, dx, dy, edged_width, 0); |
420 |
ptr1 = tmp; |
ptr1 = tmp; |
421 |
} else ptr1 = f_ref->y + (16*j + dy/4)*edged_width + 16*i + dx/4; // fullpixel position |
} else ptr1 = f_ref->y + (16*j + dy/4)*edged_width + 16*i + dx/4; /* fullpixel position */ |
422 |
|
|
423 |
if ((b_dx&3) | (b_dy&3)) { |
if ((b_dx&3) | (b_dy&3)) { |
424 |
interpolate16x16_quarterpel(tmp - i * 16 - j * 16 * edged_width + 16, |
interpolate16x16_quarterpel(tmp - i * 16 - j * 16 * edged_width + 16, |
425 |
(uint8_t *) b_ref->y, tmp + 32, |
(uint8_t *) b_ref->y, tmp + 32, |
426 |
tmp + 64, tmp + 96, 16*i, 16*j, b_dx, b_dy, edged_width, 0); |
tmp + 64, tmp + 96, 16*i, 16*j, b_dx, b_dy, edged_width, 0); |
427 |
ptr2 = tmp + 16; |
ptr2 = tmp + 16; |
428 |
} else ptr2 = b_ref->y + (16*j + b_dy/4)*edged_width + 16*i + b_dx/4; // fullpixel position |
} else ptr2 = b_ref->y + (16*j + b_dy/4)*edged_width + 16*i + b_dx/4; /* fullpixel position */ |
429 |
|
|
430 |
b_dx /= 2; |
b_dx /= 2; |
431 |
b_dy /= 2; |
b_dy /= 2; |
454 |
|
|
455 |
break; |
break; |
456 |
|
|
457 |
default: // MODE_DIRECT (or MODE_DIRECT_NONE_MV in case of bframes decoding) |
default: /* MODE_DIRECT (or MODE_DIRECT_NONE_MV in case of bframes decoding) */ |
458 |
sumx = sumy = b_sumx = b_sumy = 0; |
sumx = sumy = b_sumx = b_sumy = 0; |
459 |
|
|
460 |
for (k = 0; k < 4; k++) { |
for (k = 0; k < 4; k++) { |
504 |
break; |
break; |
505 |
} |
} |
506 |
|
|
507 |
// uv block-based chroma interpolation for direct and interpolate modes |
/* v block-based chroma interpolation for direct and interpolate modes */ |
508 |
transfer_8to16sub2(&dct_codes[4 * 64], |
transfer_8to16sub2(&dct_codes[4 * 64], |
509 |
cur->u + (j * 8) * edged_width / 2 + (i * 8), |
cur->u + (j * 8) * edged_width / 2 + (i * 8), |
510 |
interpolate8x8_switch2(tmp, b_ref->u, 8 * i, 8 * j, |
interpolate8x8_switch2(tmp, b_ref->u, 8 * i, 8 * j, |
539 |
gmc->W = width; |
gmc->W = width; |
540 |
gmc->H = height; |
gmc->H = height; |
541 |
|
|
542 |
gmc->rho = 4 - log2bin(res-1); // = {3,2,1,0} for res={2,4,8,16} |
gmc->rho = 4 - log2bin(res-1); /* = {3,2,1,0} for res={2,4,8,16} */ |
543 |
|
|
544 |
gmc->alpha = log2bin(gmc->W-1); |
gmc->alpha = log2bin(gmc->W-1); |
545 |
gmc->Ws = (1 << gmc->alpha); |
gmc->Ws = (1 << gmc->alpha); |
576 |
} |
} |
577 |
|
|
578 |
void |
void |
579 |
generate_GMCimage( const GMC_DATA *const gmc_data, // [input] precalculated data |
generate_GMCimage( const GMC_DATA *const gmc_data, /* [input] precalculated data */ |
580 |
const IMAGE *const pRef, // [input] |
const IMAGE *const pRef, /* [input] */ |
581 |
const int mb_width, |
const int mb_width, |
582 |
const int mb_height, |
const int mb_height, |
583 |
const int stride, |
const int stride, |
584 |
const int stride2, |
const int stride2, |
585 |
const int fcode, // [input] some parameters... |
const int fcode, /* [input] some parameters... */ |
586 |
const int32_t quarterpel, // [input] for rounding avgMV |
const int32_t quarterpel, /* [input] for rounding avgMV */ |
587 |
const int reduced_resolution, // [input] ignored |
const int reduced_resolution, /* [input] ignored */ |
588 |
const int32_t rounding, // [input] for rounding image data |
const int32_t rounding, /* [input] for rounding image data */ |
589 |
MACROBLOCK *const pMBs, // [output] average motion vectors |
MACROBLOCK *const pMBs, /* [output] average motion vectors */ |
590 |
IMAGE *const pGMC) // [output] full warped image |
IMAGE *const pGMC) /* [output] full warped image */ |
591 |
{ |
{ |
592 |
|
|
593 |
unsigned int mj,mi; |
unsigned int mj,mi; |
674 |
if (G< -1) G=-1; |
if (G< -1) G=-1; |
675 |
else if (G>H) G=H; |
else if (G>H) G=H; |
676 |
|
|
677 |
{ // MMX-like bilinear... |
{ /* MMX-like bilinear... */ |
678 |
const int offset = G*stride + F; |
const int offset = G*stride + F; |
679 |
uint32_t f0, f1; |
uint32_t f0, f1; |
680 |
f0 = pRef->y[ offset +0 ]; |
f0 = pRef->y[ offset +0 ]; |
755 |
} |
} |
756 |
|
|
757 |
|
|
758 |
avgMV.x -= 16*((256*mi+120)<<4); // 120 = 15*16/2 |
avgMV.x -= 16*((256*mi+120)<<4); /* 120 = 15*16/2 */ |
759 |
avgMV.y -= 16*((256*mj+120)<<4); |
avgMV.y -= 16*((256*mj+120)<<4); |
760 |
|
|
761 |
avgMV.x = RSHIFT( avgMV.x, (4+7-quarterpel) ); |
avgMV.x = RSHIFT( avgMV.x, (4+7-quarterpel) ); |
768 |
|
|
769 |
#ifdef OLD_GRUEL_GMC |
#ifdef OLD_GRUEL_GMC |
770 |
void |
void |
771 |
generate_GMCparameters( const int num_wp, // [input]: number of warppoints |
generate_GMCparameters( const int num_wp, /* [input]: number of warppoints */ |
772 |
const int res, // [input]: resolution |
const int res, /* [input]: resolution */ |
773 |
const WARPPOINTS *const warp, // [input]: warp points |
const WARPPOINTS *const warp, /* [input]: warp points */ |
774 |
const int width, const int height, |
const int width, const int height, |
775 |
GMC_DATA *const gmc) // [output] precalculated parameters |
GMC_DATA *const gmc) /* [output] precalculated parameters */ |
776 |
{ |
{ |
777 |
|
|
778 |
/* We follow mainly two sources: The original standard, which is ugly, and the |
/* We follow mainly two sources: The original standard, which is ugly, and the |
832 |
int dv0 = warp->duv[0].y; |
int dv0 = warp->duv[0].y; |
833 |
int du1 = warp->duv[1].x; |
int du1 = warp->duv[1].x; |
834 |
int dv1 = warp->duv[1].y; |
int dv1 = warp->duv[1].y; |
835 |
// int du2 = warp->duv[2].x; |
#if 0 |
836 |
// int dv2 = warp->duv[2].y; |
int du2 = warp->duv[2].x; |
837 |
|
int dv2 = warp->duv[2].y; |
838 |
|
#endif |
839 |
|
|
840 |
gmc->num_wp = num_wp; |
gmc->num_wp = num_wp; |
841 |
|
|
850 |
gmc->alpha = log2bin(gmc->W-1); |
gmc->alpha = log2bin(gmc->W-1); |
851 |
gmc->Ws= 1<<gmc->alpha; |
gmc->Ws= 1<<gmc->alpha; |
852 |
|
|
853 |
// gmc->beta = log2bin(gmc->H-1); |
#if 0 |
854 |
// gmc->Hs= 1<<gmc->beta; |
gmc->beta = log2bin(gmc->H-1); |
855 |
|
gmc->Hs= 1<<gmc->beta; |
856 |
|
#endif |
857 |
|
|
858 |
// printf("du0=%d dv0=%d du1=%d dv1=%d s=%d sigma=%d W=%d alpha=%d, Ws=%d, rho=%d\n",du0,dv0,du1,dv1,gmc->s,gmc->sigma,gmc->W,gmc->alpha,gmc->Ws,gmc->rho); |
#if 0 |
859 |
|
printf("du0=%d dv0=%d du1=%d dv1=%d s=%d sigma=%d W=%d alpha=%d, Ws=%d, rho=%d\n",du0,dv0,du1,dv1,gmc->s,gmc->sigma,gmc->W,gmc->alpha,gmc->Ws,gmc->rho); |
860 |
|
#endif |
861 |
|
|
862 |
/* i2s is only needed for num_wp >= 3, etc. */ |
/* |
863 |
/* the 's' values are in 1/s pel resolution */ |
* i2s is only needed for num_wp >= 3, etc. |
864 |
|
* the 's' values are in 1/s pel resolution |
865 |
|
*/ |
866 |
gmc->i0s = res/2 * ( du0 ); |
gmc->i0s = res/2 * ( du0 ); |
867 |
gmc->j0s = res/2 * ( dv0 ); |
gmc->j0s = res/2 * ( dv0 ); |
868 |
gmc->i1s = res/2 * (2*width + du1 + du0 ); |
gmc->i1s = res/2 * (2*width + du1 + du0 ); |
869 |
gmc->j1s = res/2 * ( dv1 + dv0 ); |
gmc->j1s = res/2 * ( dv1 + dv0 ); |
870 |
// gmc->i2s = res/2 * ( du2 + du0 ); |
#if 0 |
871 |
// gmc->j2s = res/2 * (2*height + dv2 + dv0 ); |
gmc->i2s = res/2 * ( du2 + du0 ); |
872 |
|
gmc->j2s = res/2 * (2*height + dv2 + dv0 ); |
873 |
|
#endif |
874 |
|
|
875 |
/* i2s and i2ss are only needed for num_wp == 3, etc. */ |
/* i2s and i2ss are only needed for num_wp == 3, etc. */ |
876 |
|
|
878 |
gmc->i1ss = 16*gmc->Ws + ROUNDED_DIV(((gmc->W-gmc->Ws)*(gmc->r*gmc->i0s) + gmc->Ws*(gmc->r*gmc->i1s - 16*gmc->W)),gmc->W); |
gmc->i1ss = 16*gmc->Ws + ROUNDED_DIV(((gmc->W-gmc->Ws)*(gmc->r*gmc->i0s) + gmc->Ws*(gmc->r*gmc->i1s - 16*gmc->W)),gmc->W); |
879 |
gmc->j1ss = ROUNDED_DIV( ((gmc->W - gmc->Ws)*(gmc->r*gmc->j0s) + gmc->Ws*gmc->r*gmc->j1s) ,gmc->W ); |
gmc->j1ss = ROUNDED_DIV( ((gmc->W - gmc->Ws)*(gmc->r*gmc->j0s) + gmc->Ws*gmc->r*gmc->j1s) ,gmc->W ); |
880 |
|
|
881 |
// gmc->i2ss = ROUNDED_DIV( ((gmc->H - gmc->Hs)*(gmc->r*gmc->i0s) + gmc->Hs*(gmc->r*gmc->i2s)), gmc->H); |
#if 0 |
882 |
// gmc->j2ss = 16*gmc->Hs + ROUNDED_DIV( ((gmc->H-gmc->Hs)*(gmc->r*gmc->j0s) + gmc->Ws*(gmc->r*gmc->j2s - 16*gmc->H)), gmc->H); |
gmc->i2ss = ROUNDED_DIV( ((gmc->H - gmc->Hs)*(gmc->r*gmc->i0s) + gmc->Hs*(gmc->r*gmc->i2s)), gmc->H); |
883 |
|
gmc->j2ss = 16*gmc->Hs + ROUNDED_DIV( ((gmc->H-gmc->Hs)*(gmc->r*gmc->j0s) + gmc->Ws*(gmc->r*gmc->j2s - 16*gmc->H)), gmc->H); |
884 |
|
#endif |
885 |
|
|
886 |
return; |
return; |
887 |
} |
} |
888 |
|
|
889 |
void |
void |
890 |
generate_GMCimage( const GMC_DATA *const gmc_data, // [input] precalculated data |
generate_GMCimage( const GMC_DATA *const gmc_data, /* [input] precalculated data */ |
891 |
const IMAGE *const pRef, // [input] |
const IMAGE *const pRef, /* [input] */ |
892 |
const int mb_width, |
const int mb_width, |
893 |
const int mb_height, |
const int mb_height, |
894 |
const int stride, |
const int stride, |
895 |
const int stride2, |
const int stride2, |
896 |
const int fcode, // [input] some parameters... |
const int fcode, /* [input] some parameters... */ |
897 |
const int32_t quarterpel, // [input] for rounding avgMV |
const int32_t quarterpel, /* [input] for rounding avgMV */ |
898 |
const int reduced_resolution, // [input] ignored |
const int reduced_resolution, /* [input] ignored */ |
899 |
const int32_t rounding, // [input] for rounding image data |
const int32_t rounding, /* [input] for rounding image data */ |
900 |
MACROBLOCK *const pMBs, // [output] average motion vectors |
MACROBLOCK *const pMBs, /* [output] average motion vectors */ |
901 |
IMAGE *const pGMC) // [output] full warped image |
IMAGE *const pGMC) /* [output] full warped image */ |
902 |
{ |
{ |
903 |
|
|
904 |
unsigned int mj,mi; |
unsigned int mj,mi; |
960 |
|
|
961 |
const int i1ss = gmc_data->i1ss; |
const int i1ss = gmc_data->i1ss; |
962 |
const int j1ss = gmc_data->j1ss; |
const int j1ss = gmc_data->j1ss; |
963 |
// const int i2ss = gmc_data->i2ss; |
#if 0 |
964 |
// const int j2ss = gmc_data->j2ss; |
const int i2ss = gmc_data->i2ss; |
965 |
|
const int j2ss = gmc_data->j2ss; |
966 |
|
#endif |
967 |
|
|
968 |
const int alpha = gmc_data->alpha; |
const int alpha = gmc_data->alpha; |
969 |
const int Ws = gmc_data->Ws; |
const int Ws = gmc_data->Ws; |
970 |
|
|
971 |
// const int beta = gmc_data->beta; |
#if 0 |
972 |
// const int Hs = gmc_data->Hs; |
const int beta = gmc_data->beta; |
973 |
|
const int Hs = gmc_data->Hs; |
974 |
|
#endif |
975 |
|
|
976 |
int I,J; |
int I,J; |
977 |
VECTOR avgMV = {0,0}; |
VECTOR avgMV = {0,0}; |
985 |
/* this naive implementation (with lots of multiplications) isn't slower (rather faster) than |
/* this naive implementation (with lots of multiplications) isn't slower (rather faster) than |
986 |
working incremental. Don't ask me why... maybe the whole this is memory bound? */ |
working incremental. Don't ask me why... maybe the whole this is memory bound? */ |
987 |
|
|
988 |
const int ri= F & (s-1); // fractional part of pelwise MV X |
const int ri= F & (s-1); /* fractional part of pelwise MV X */ |
989 |
const int rj= G & (s-1); // fractional part of pelwise MV Y |
const int rj= G & (s-1); /* fractional part of pelwise MV Y */ |
990 |
|
|
991 |
int Y00,Y01,Y10,Y11; |
int Y00,Y01,Y10,Y11; |
992 |
|
|
1009 |
else if (G>H) |
else if (G>H) |
1010 |
G=H; /* dito */ |
G=H; /* dito */ |
1011 |
|
|
1012 |
Y00 = pRef->y[ G*stride + F ]; // Lumi values |
Y00 = pRef->y[ G*stride + F ]; /* Lumi values */ |
1013 |
Y01 = pRef->y[ G*stride + F+1 ]; |
Y01 = pRef->y[ G*stride + F+1 ]; |
1014 |
Y10 = pRef->y[ G*stride + F+stride ]; |
Y10 = pRef->y[ G*stride + F+stride ]; |
1015 |
Y11 = pRef->y[ G*stride + F+stride+1 ]; |
Y11 = pRef->y[ G*stride + F+stride+1 ]; |
1036 |
int Gc=((-r*j0s+j1ss)*(4*I+1) +(-r*i0s+i1ss)*(4*J+1) +2*Ws*r*j0s |
int Gc=((-r*j0s+j1ss)*(4*I+1) +(-r*i0s+i1ss)*(4*J+1) +2*Ws*r*j0s |
1037 |
-16*Ws +(1<<(alpha+rho+1))) >>(alpha+rho+2); |
-16*Ws +(1<<(alpha+rho+1))) >>(alpha+rho+2); |
1038 |
|
|
1039 |
const int ri= Fc & (s-1); // fractional part of pelwise MV X |
const int ri= Fc & (s-1); /* fractional part of pelwise MV X */ |
1040 |
const int rj= Gc & (s-1); // fractional part of pelwise MV Y |
const int rj= Gc & (s-1); /* fractional part of pelwise MV Y */ |
1041 |
|
|
1042 |
int C00,C01,C10,C11; |
int C00,C01,C10,C11; |
1043 |
|
|
1054 |
Gc=H/2; /* dito */ |
Gc=H/2; /* dito */ |
1055 |
|
|
1056 |
/* now calculate U data */ |
/* now calculate U data */ |
1057 |
C00 = pRef->u[ Gc*stride2 + Fc ]; // chroma-value Cb |
C00 = pRef->u[ Gc*stride2 + Fc ]; /* chroma-value Cb */ |
1058 |
C01 = pRef->u[ Gc*stride2 + Fc+1 ]; |
C01 = pRef->u[ Gc*stride2 + Fc+1 ]; |
1059 |
C10 = pRef->u[ (Gc+1)*stride2 + Fc ]; |
C10 = pRef->u[ (Gc+1)*stride2 + Fc ]; |
1060 |
C11 = pRef->u[ (Gc+1)*stride2 + Fc+1 ]; |
C11 = pRef->u[ (Gc+1)*stride2 + Fc+1 ]; |
1067 |
pGMC->u[J*stride2+I] = (uint8_t)C00; /* output 1 U-pixel */ |
pGMC->u[J*stride2+I] = (uint8_t)C00; /* output 1 U-pixel */ |
1068 |
|
|
1069 |
/* now calculate V data */ |
/* now calculate V data */ |
1070 |
C00 = pRef->v[ Gc*stride2 + Fc ]; // chroma-value Cr |
C00 = pRef->v[ Gc*stride2 + Fc ]; /* chroma-value Cr */ |
1071 |
C01 = pRef->v[ Gc*stride2 + Fc+1 ]; |
C01 = pRef->v[ Gc*stride2 + Fc+1 ]; |
1072 |
C10 = pRef->v[ (Gc+1)*stride2 + Fc ]; |
C10 = pRef->v[ (Gc+1)*stride2 + Fc ]; |
1073 |
C11 = pRef->v[ (Gc+1)*stride2 + Fc+1 ]; |
C11 = pRef->v[ (Gc+1)*stride2 + Fc+1 ]; |