--- motion_comp.c 2004/10/12 21:08:41 1.22 +++ motion_comp.c 2004/12/05 13:01:27 1.23 @@ -20,7 +20,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: motion_comp.c,v 1.22 2004/10/12 21:08:41 edgomez Exp $ + * $Id: motion_comp.c,v 1.23 2004/12/05 13:01:27 syskin Exp $ * ****************************************************************************/ @@ -30,43 +30,10 @@ #include "../utils/mbfunctions.h" #include "../image/interpolate8x8.h" #include "../image/qpel.h" -#include "../image/reduced.h" #include "../utils/timer.h" #include "motion.h" -#ifndef RSHIFT -#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b)) -#endif - -/* assume b>0 */ -#ifndef RDIV -#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b)) -#endif - - -/* This is borrowed from bitstream.c until we find a common solution */ - -static uint32_t __inline -log2bin(uint32_t value) -{ -/* Changed by Chenm001 */ -#if !defined(_MSC_VER) - int n = 0; - - while (value) { - value >>= 1; - n++; - } - return n; -#else - __asm { - bsr eax, value - inc eax - } -#endif -} - -/* + /* * getref: calculate reference image pointer * the decision to use interpolation h/v/hv or the normal image is * based on dx & dy. @@ -110,52 +77,30 @@ const int32_t dy, const int32_t stride, const int quarterpel, - const int reduced_resolution, const int32_t rounding) { const uint8_t * ptr; - if (!reduced_resolution) { - - if(quarterpel) { - if ((dx&3) | (dy&3)) { - interpolate16x16_quarterpel(tmp - y * stride - x, - (uint8_t *) ref, tmp + 32, - tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); - ptr = tmp; - } else ptr = ref + ((int)y + dy/4)*(int)stride + (int)x + dx/4; /* fullpixel position */ - - } else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); - - transfer_8to16sub(dct_codes, cur + y * stride + x, - ptr, stride); - transfer_8to16sub(dct_codes+64, cur + y * stride + x + 8, - ptr + 8, stride); - transfer_8to16sub(dct_codes+128, cur + y * stride + x + 8*stride, - ptr + 8*stride, stride); - transfer_8to16sub(dct_codes+192, cur + y * stride + x + 8*stride+8, - ptr + 8*stride + 8, stride); - - } else { /* reduced_resolution */ - - x *= 2; y *= 2; - ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); - - filter_18x18_to_8x8(dct_codes, cur+y*stride + x, stride); - filter_diff_18x18_to_8x8(dct_codes, ptr, stride); - - filter_18x18_to_8x8(dct_codes+64, cur+y*stride + x + 16, stride); - filter_diff_18x18_to_8x8(dct_codes+64, ptr + 16, stride); + if(quarterpel) { + if ((dx&3) | (dy&3)) { + interpolate16x16_quarterpel(tmp - y * stride - x, + (uint8_t *) ref, tmp + 32, + tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); + ptr = tmp; + } else ptr = ref + ((int)y + dy/4)*(int)stride + (int)x + dx/4; /* fullpixel position */ - filter_18x18_to_8x8(dct_codes+128, cur+(y+16)*stride + x, stride); - filter_diff_18x18_to_8x8(dct_codes+128, ptr + 16*stride, stride); + } else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); - filter_18x18_to_8x8(dct_codes+192, cur+(y+16)*stride + x + 16, stride); - filter_diff_18x18_to_8x8(dct_codes+192, ptr + 16*stride + 16, stride); + transfer_8to16sub(dct_codes, cur + y * stride + x, + ptr, stride); + transfer_8to16sub(dct_codes+64, cur + y * stride + x + 8, + ptr + 8, stride); + transfer_8to16sub(dct_codes+128, cur + y * stride + x + 8*stride, + ptr + 8*stride, stride); + transfer_8to16sub(dct_codes+192, cur + y * stride + x + 8*stride+8, + ptr + 8*stride + 8, stride); - transfer32x32_copy(cur + y*stride + x, ptr, stride); - } } static __inline void @@ -172,60 +117,22 @@ const int32_t dy, const int32_t stride, const int32_t quarterpel, - const int reduced_resolution, const int32_t rounding) { const uint8_t * ptr; - if (!reduced_resolution) { + if(quarterpel) { + if ((dx&3) | (dy&3)) { + interpolate8x8_quarterpel(tmp - y*stride - x, + (uint8_t *) ref, tmp + 32, + tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); + ptr = tmp; + } else ptr = ref + ((int)y + dy/4)*(int)stride + (int)x + dx/4; /* fullpixel position */ + } else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); - if(quarterpel) { - if ((dx&3) | (dy&3)) { - interpolate8x8_quarterpel(tmp - y*stride - x, - (uint8_t *) ref, tmp + 32, - tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); - ptr = tmp; - } else ptr = ref + ((int)y + dy/4)*(int)stride + (int)x + dx/4; /* fullpixel position */ - } else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); - - transfer_8to16sub(dct_codes, cur + y * stride + x, ptr, stride); - - } else { /* reduced_resolution */ - - x *= 2; y *= 2; - - ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); - - filter_18x18_to_8x8(dct_codes, cur+y*stride + x, stride); - filter_diff_18x18_to_8x8(dct_codes, ptr, stride); - - transfer16x16_copy(cur + y*stride + x, ptr, stride); - } + transfer_8to16sub(dct_codes, cur + y * stride + x, ptr, stride); } -/* XXX: slow, inelegant... */ -static void -interpolate18x18_switch(uint8_t * const cur, - const uint8_t * const refn, - const uint32_t x, - const uint32_t y, - const int32_t dx, - const int dy, - const int32_t stride, - const int32_t rounding) -{ - interpolate8x8_switch(cur, refn, x-1, y-1, dx, dy, stride, rounding); - interpolate8x8_switch(cur, refn, x+7, y-1, dx, dy, stride, rounding); - interpolate8x8_switch(cur, refn, x+9, y-1, dx, dy, stride, rounding); - - interpolate8x8_switch(cur, refn, x-1, y+7, dx, dy, stride, rounding); - interpolate8x8_switch(cur, refn, x+7, y+7, dx, dy, stride, rounding); - interpolate8x8_switch(cur, refn, x+9, y+7, dx, dy, stride, rounding); - - interpolate8x8_switch(cur, refn, x-1, y+9, dx, dy, stride, rounding); - interpolate8x8_switch(cur, refn, x+7, y+9, dx, dy, stride, rounding); - interpolate8x8_switch(cur, refn, x+9, y+9, dx, dy, stride, rounding); -} static void CompensateChroma( int dx, int dy, @@ -235,35 +142,17 @@ uint8_t * const temp, int16_t * const coeff, const int32_t stride, - const int rounding, - const int rrv) + const int rounding) { /* uv-block-based compensation */ - if (!rrv) { - transfer_8to16sub(coeff, Cur->u + 8 * j * stride + 8 * i, - interpolate8x8_switch2(temp, Ref->u, 8 * i, 8 * j, - dx, dy, stride, rounding), - stride); - transfer_8to16sub(coeff + 64, Cur->v + 8 * j * stride + 8 * i, - interpolate8x8_switch2(temp, Ref->v, 8 * i, 8 * j, - dx, dy, stride, rounding), - stride); - } else { - uint8_t * current, * reference; - - current = Cur->u + 16*j*stride + 16*i; - reference = temp - 16*j*stride - 16*i; - interpolate18x18_switch(reference, Ref->u, 16*i, 16*j, dx, dy, stride, rounding); - filter_18x18_to_8x8(coeff, current, stride); - filter_diff_18x18_to_8x8(coeff, temp, stride); - transfer16x16_copy(current, temp, stride); - - current = Cur->v + 16*j*stride + 16*i; - interpolate18x18_switch(reference, Ref->v, 16*i, 16*j, dx, dy, stride, rounding); - filter_18x18_to_8x8(coeff + 64, current, stride); - filter_diff_18x18_to_8x8(coeff + 64, temp, stride); - transfer16x16_copy(current, temp, stride); - } + transfer_8to16sub(coeff, Cur->u + 8 * j * stride + 8 * i, + interpolate8x8_switch2(temp, Ref->u, 8 * i, 8 * j, + dx, dy, stride, rounding), + stride); + transfer_8to16sub(coeff + 64, Cur->v + 8 * j * stride + 8 * i, + interpolate8x8_switch2(temp, Ref->v, 8 * i, 8 * j, + dx, dy, stride, rounding), + stride); } void @@ -281,7 +170,6 @@ const uint32_t height, const uint32_t edged_width, const int32_t quarterpel, - const int reduced_resolution, const int32_t rounding) { int32_t dx; @@ -289,7 +177,7 @@ uint8_t * const tmp = refv->u; - if ( (!reduced_resolution) && (mb->mode == MODE_NOT_CODED) ) { /* quick copy for early SKIP */ + if (mb->mode == MODE_NOT_CODED) { /* quick copy for early SKIP */ /* early SKIP is only activated in P-VOPs, not in S-VOPs, so mcsel can never be 1 */ transfer16x16_copy(cur->y + 16 * (i + j * edged_width), @@ -308,8 +196,6 @@ if ((mb->mode == MODE_NOT_CODED || mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q)) { - /* reduced resolution + GMC: not possible */ - if (mb->mcsel) { /* call normal routine once, easier than "if (mcsel)"ing all the time */ @@ -323,8 +209,6 @@ transfer_8to16sub(&dct_codes[3*64], cur->y + (16*j+8)*edged_width + 16*i+8, refGMC->y + (16*j+8)*edged_width + 16*i+8, edged_width); -/* lumi is needed earlier for mode decision, but chroma should be done block-based, but it isn't, yet. */ - transfer_8to16sub(&dct_codes[4 * 64], cur->u + 8 *j*edged_width/2 + 8*i, refGMC->u + 8 *j*edged_width/2 + 8*i, edged_width/2); @@ -339,14 +223,9 @@ dx = (quarterpel ? mb->qmvs[0].x : mb->mvs[0].x); dy = (quarterpel ? mb->qmvs[0].y : mb->mvs[0].y); - if (reduced_resolution) { - dx = RRV_MV_SCALEUP(dx); - dy = RRV_MV_SCALEUP(dy); - } - compensate16x16_interpolate(&dct_codes[0 * 64], cur->y, ref->y, refh->y, refv->y, refhv->y, tmp, 16 * i, 16 * j, dx, dy, - edged_width, quarterpel, reduced_resolution, rounding); + edged_width, quarterpel, rounding); if (quarterpel) { dx /= 2; dy /= 2; } @@ -363,21 +242,16 @@ sumx += quarterpel ? dx/2 : dx; sumy += quarterpel ? dy/2 : dy; - if (reduced_resolution){ - dx = RRV_MV_SCALEUP(dx); - dy = RRV_MV_SCALEUP(dy); - } - compensate8x8_interpolate(&dct_codes[k * 64], cur->y, ref->y, refh->y, refv->y, refhv->y, tmp, 16 * i + 8*(k&1), 16 * j + 8*(k>>1), dx, - dy, edged_width, quarterpel, reduced_resolution, rounding); + dy, edged_width, quarterpel, rounding); } dx = (sumx >> 3) + roundtab_76[sumx & 0xf]; dy = (sumy >> 3) + roundtab_76[sumy & 0xf]; } CompensateChroma(dx, dy, i, j, cur, ref, tmp, - &dct_codes[4 * 64], edged_width / 2, rounding, reduced_resolution); + &dct_codes[4 * 64], edged_width / 2, rounding); } @@ -412,14 +286,14 @@ compensate16x16_interpolate(&dct_codes[0 * 64], cur->y, f_ref->y, f_refh->y, f_refv->y, f_refhv->y, tmp, 16 * i, 16 * j, dx, - dy, edged_width, quarterpel, 0, 0); + dy, edged_width, quarterpel, 0); if (quarterpel) { dx /= 2; dy /= 2; } CompensateChroma( (dx >> 1) + roundtab_79[dx & 0x3], (dy >> 1) + roundtab_79[dy & 0x3], i, j, cur, f_ref, tmp, - &dct_codes[4 * 64], edged_width / 2, 0, 0); + &dct_codes[4 * 64], edged_width / 2, 0); return; @@ -428,18 +302,18 @@ compensate16x16_interpolate(&dct_codes[0 * 64], cur->y, b_ref->y, b_refh->y, b_refv->y, b_refhv->y, tmp, 16 * i, 16 * j, b_dx, - b_dy, edged_width, quarterpel, 0, 0); + b_dy, edged_width, quarterpel, 0); if (quarterpel) { b_dx /= 2; b_dy /= 2; } CompensateChroma( (b_dx >> 1) + roundtab_79[b_dx & 0x3], (b_dy >> 1) + roundtab_79[b_dy & 0x3], i, j, cur, b_ref, tmp, - &dct_codes[4 * 64], edged_width / 2, 0, 0); + &dct_codes[4 * 64], edged_width / 2, 0); return; - case MODE_INTERPOLATE: /* _could_ use DIRECT, but would be overkill (no 4MV there) */ + case MODE_INTERPOLATE: case MODE_DIRECT_NO4V: dx = fmvs->x; dy = fmvs->y; b_dx = bmvs->x; b_dy = bmvs->y; @@ -537,7 +411,7 @@ break; } - /* v block-based chroma interpolation for direct and interpolate modes */ + /* block-based chroma interpolation for direct and interpolate modes */ transfer_8to16sub2(&dct_codes[4 * 64], cur->u + (j * 8) * edged_width / 2 + (i * 8), interpolate8x8_switch2(tmp, b_ref->u, 8 * i, 8 * j,