10 |
#include "../image/interpolate8x8.h" |
#include "../image/interpolate8x8.h" |
11 |
#include "../image/reduced.h" |
#include "../image/reduced.h" |
12 |
#include "../utils/timer.h" |
#include "../utils/timer.h" |
13 |
|
#include "../image/qpel.h" |
14 |
#include "motion.h" |
#include "motion.h" |
15 |
|
|
16 |
#ifndef ABS |
#ifndef ABS |
90 |
|
|
91 |
if(quarterpel) { |
if(quarterpel) { |
92 |
if ((dx&3) | (dy&3)) { |
if ((dx&3) | (dy&3)) { |
93 |
interpolate16x16_quarterpel(tmp - y * stride - x, |
new_interpolate16x16_quarterpel(tmp - y * stride - x, |
94 |
(uint8_t *) ref, tmp + 32, |
(uint8_t *) ref, tmp + 32, |
95 |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
96 |
ptr = tmp; |
ptr = tmp; |
152 |
|
|
153 |
if(quarterpel) { |
if(quarterpel) { |
154 |
if ((dx&3) | (dy&3)) { |
if ((dx&3) | (dy&3)) { |
155 |
interpolate8x8_quarterpel(tmp - y*stride - x, |
new_interpolate8x8_quarterpel(tmp - y*stride - x, |
156 |
(uint8_t *) ref, tmp + 32, |
(uint8_t *) ref, tmp + 32, |
157 |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
158 |
ptr = tmp; |
ptr = tmp; |
174 |
} |
} |
175 |
} |
} |
176 |
|
|
|
|
|
|
static __inline void |
|
|
compensate16x16_interpolate_ro(int16_t * const dct_codes, |
|
|
const uint8_t * const cur, |
|
|
const uint8_t * const ref, |
|
|
const uint8_t * const refh, |
|
|
const uint8_t * const refv, |
|
|
const uint8_t * const refhv, |
|
|
uint8_t * const tmp, |
|
|
const uint32_t x, const uint32_t y, |
|
|
const int32_t dx, const int32_t dy, |
|
|
const int32_t stride, |
|
|
const int quarterpel) |
|
|
{ |
|
|
const uint8_t * ptr; |
|
|
|
|
|
if(quarterpel) { |
|
|
if ((dx&3) | (dy&3)) { |
|
|
interpolate16x16_quarterpel(tmp - y * stride - x, |
|
|
(uint8_t *) ref, tmp + 32, |
|
|
tmp + 64, tmp + 96, x, y, dx, dy, stride, 0); |
|
|
ptr = tmp; |
|
|
} else ptr = ref + (y + dy/4)*stride + x + dx/4; // fullpixel position |
|
|
|
|
|
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
|
|
|
|
|
transfer_8to16subro(dct_codes, cur + y * stride + x, |
|
|
ptr, stride); |
|
|
transfer_8to16subro(dct_codes+64, cur + y * stride + x + 8, |
|
|
ptr + 8, stride); |
|
|
transfer_8to16subro(dct_codes+128, cur + y * stride + x + 8*stride, |
|
|
ptr + 8*stride, stride); |
|
|
transfer_8to16subro(dct_codes+192, cur + y * stride + x + 8*stride+8, |
|
|
ptr + 8*stride + 8, stride); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
177 |
/* XXX: slow, inelegant... */ |
/* XXX: slow, inelegant... */ |
178 |
static void |
static void |
179 |
interpolate18x18_switch(uint8_t * const cur, |
interpolate18x18_switch(uint8_t * const cur, |
258 |
int32_t dx; |
int32_t dx; |
259 |
int32_t dy; |
int32_t dy; |
260 |
|
|
|
|
|
261 |
uint8_t * const tmp = refv->u; |
uint8_t * const tmp = refv->u; |
262 |
|
|
263 |
if ( (!reduced_resolution) && (mb->mode == MODE_NOT_CODED) ) { /* quick copy for early SKIP */ |
if ( (!reduced_resolution) && (mb->mode == MODE_NOT_CODED) ) { /* quick copy for early SKIP */ |
319 |
refv->y, refhv->y, tmp, 16 * i, 16 * j, dx, dy, |
refv->y, refhv->y, tmp, 16 * i, 16 * j, dx, dy, |
320 |
edged_width, quarterpel, reduced_resolution, rounding); |
edged_width, quarterpel, reduced_resolution, rounding); |
321 |
|
|
322 |
dx /= (int)(1 + quarterpel); |
if (quarterpel) { dx /= 2; dy /= 2; } |
|
dy /= (int)(1 + quarterpel); |
|
323 |
|
|
324 |
dx = (dx >> 1) + roundtab_79[dx & 0x3]; |
dx = (dx >> 1) + roundtab_79[dx & 0x3]; |
325 |
dy = (dy >> 1) + roundtab_79[dy & 0x3]; |
dy = (dy >> 1) + roundtab_79[dy & 0x3]; |
331 |
for (k = 0; k < 4; k++) { |
for (k = 0; k < 4; k++) { |
332 |
dx = mvs[k].x; |
dx = mvs[k].x; |
333 |
dy = mvs[k].y; |
dy = mvs[k].y; |
334 |
sumx += dx / (1 + quarterpel); |
sumx += quarterpel ? dx/2 : dx; |
335 |
sumy += dy / (1 + quarterpel); |
sumy += quarterpel ? dy/2 : dy; |
336 |
|
|
337 |
if (reduced_resolution){ |
if (reduced_resolution){ |
338 |
dx = RRV_MV_SCALEUP(dx); |
dx = RRV_MV_SCALEUP(dx); |
397 |
case MODE_BACKWARD: |
case MODE_BACKWARD: |
398 |
b_dx = bmvs->x; b_dy = bmvs->y; |
b_dx = bmvs->x; b_dy = bmvs->y; |
399 |
|
|
400 |
compensate16x16_interpolate_ro(&dct_codes[0 * 64], cur->y, b_ref->y, b_refh->y, |
compensate16x16_interpolate(&dct_codes[0 * 64], cur->y, b_ref->y, b_refh->y, |
401 |
b_refv->y, b_refhv->y, tmp, 16 * i, 16 * j, b_dx, |
b_refv->y, b_refhv->y, tmp, 16 * i, 16 * j, b_dx, |
402 |
b_dy, edged_width, quarterpel); |
b_dy, edged_width, quarterpel, 0, 0); |
403 |
|
|
404 |
if (quarterpel) { b_dx /= 2; b_dy /= 2; } |
if (quarterpel) { b_dx /= 2; b_dy /= 2; } |
405 |
|
|
418 |
if (quarterpel) { |
if (quarterpel) { |
419 |
|
|
420 |
if ((dx&3) | (dy&3)) { |
if ((dx&3) | (dy&3)) { |
421 |
interpolate16x16_quarterpel(tmp - i * 16 - j * 16 * edged_width, |
new_interpolate16x16_quarterpel(tmp - i * 16 - j * 16 * edged_width, |
422 |
(uint8_t *) f_ref->y, tmp + 32, |
(uint8_t *) f_ref->y, tmp + 32, |
423 |
tmp + 64, tmp + 96, 16*i, 16*j, dx, dy, edged_width, 0); |
tmp + 64, tmp + 96, 16*i, 16*j, dx, dy, edged_width, 0); |
424 |
ptr1 = tmp; |
ptr1 = tmp; |
425 |
} else ptr1 = f_ref->y + (16*j + dy/4)*edged_width + 16*i + dx/4; // fullpixel position |
} else ptr1 = f_ref->y + (16*j + dy/4)*edged_width + 16*i + dx/4; // fullpixel position |
426 |
|
|
427 |
if ((b_dx&3) | (b_dy&3)) { |
if ((b_dx&3) | (b_dy&3)) { |
428 |
interpolate16x16_quarterpel(tmp - i * 16 - j * 16 * edged_width + 16, |
new_interpolate16x16_quarterpel(tmp - i * 16 - j * 16 * edged_width + 16, |
429 |
(uint8_t *) b_ref->y, tmp + 32, |
(uint8_t *) b_ref->y, tmp + 32, |
430 |
tmp + 64, tmp + 96, 16*i, 16*j, b_dx, b_dy, edged_width, 0); |
tmp + 64, tmp + 96, 16*i, 16*j, b_dx, b_dy, edged_width, 0); |
431 |
ptr2 = tmp + 16; |
ptr2 = tmp + 16; |
458 |
|
|
459 |
break; |
break; |
460 |
|
|
461 |
default: // MODE_DIRECT |
default: // MODE_DIRECT (or MODE_DIRECT_NONE_MV in case of bframes decoding) |
462 |
sumx = sumy = b_sumx = b_sumy = 0; |
sumx = sumy = b_sumx = b_sumy = 0; |
463 |
|
|
464 |
for (k = 0; k < 4; k++) { |
for (k = 0; k < 4; k++) { |
471 |
b_sumx += b_dx/2; b_sumy += b_dy/2; |
b_sumx += b_dx/2; b_sumy += b_dy/2; |
472 |
|
|
473 |
if ((dx&3) | (dy&3)) { |
if ((dx&3) | (dy&3)) { |
474 |
interpolate8x8_quarterpel(tmp - (i * 16+(k&1)*8) - (j * 16+((k>>1)*8)) * edged_width, |
new_interpolate8x8_quarterpel(tmp - (i * 16+(k&1)*8) - (j * 16+((k>>1)*8)) * edged_width, |
475 |
(uint8_t *) f_ref->y, |
(uint8_t *) f_ref->y, |
476 |
tmp + 32, tmp + 64, tmp + 96, |
tmp + 32, tmp + 64, tmp + 96, |
477 |
16*i + (k&1)*8, 16*j + (k>>1)*8, dx, dy, edged_width, 0); |
16*i + (k&1)*8, 16*j + (k>>1)*8, dx, dy, edged_width, 0); |
479 |
} else ptr1 = f_ref->y + (16*j + (k>>1)*8 + dy/4)*edged_width + 16*i + (k&1)*8 + dx/4; |
} else ptr1 = f_ref->y + (16*j + (k>>1)*8 + dy/4)*edged_width + 16*i + (k&1)*8 + dx/4; |
480 |
|
|
481 |
if ((b_dx&3) | (b_dy&3)) { |
if ((b_dx&3) | (b_dy&3)) { |
482 |
interpolate8x8_quarterpel(tmp - (i * 16+(k&1)*8) - (j * 16+((k>>1)*8)) * edged_width + 16, |
new_interpolate8x8_quarterpel(tmp - (i * 16+(k&1)*8) - (j * 16+((k>>1)*8)) * edged_width + 16, |
483 |
(uint8_t *) b_ref->y, |
(uint8_t *) b_ref->y, |
484 |
tmp + 16, tmp + 32, tmp + 48, |
tmp + 16, tmp + 32, tmp + 48, |
485 |
16*i + (k&1)*8, 16*j + (k>>1)*8, b_dx, b_dy, edged_width, 0); |
16*i + (k&1)*8, 16*j + (k>>1)*8, b_dx, b_dy, edged_width, 0); |
556 |
if (num_wp==2) { |
if (num_wp==2) { |
557 |
gmc->dyF = -gmc->dxG; |
gmc->dyF = -gmc->dxG; |
558 |
gmc->dyG = gmc->dxF; |
gmc->dyG = gmc->dxF; |
559 |
} |
} else if (num_wp==3) { |
|
else if (num_wp==3) { |
|
560 |
gmc->beta = log2bin(gmc->H-1); |
gmc->beta = log2bin(gmc->H-1); |
561 |
gmc->Hs = (1 << gmc->beta); |
gmc->Hs = (1 << gmc->beta); |
562 |
gmc->dyF = RDIV( 8*gmc->Hs*du2, gmc->H ); |
gmc->dyF = RDIV( 8*gmc->Hs*du2, gmc->H ); |
566 |
gmc->dxG <<= (gmc->beta - gmc->alpha); |
gmc->dxG <<= (gmc->beta - gmc->alpha); |
567 |
gmc->alpha = gmc->beta; |
gmc->alpha = gmc->beta; |
568 |
gmc->Ws = 1<< gmc->beta; |
gmc->Ws = 1<< gmc->beta; |
569 |
} |
} else { |
|
else { |
|
570 |
gmc->dyF <<= gmc->alpha - gmc->beta; |
gmc->dyF <<= gmc->alpha - gmc->beta; |
571 |
gmc->dyG <<= gmc->alpha - gmc->beta; |
gmc->dyG <<= gmc->alpha - gmc->beta; |
572 |
} |
} |
598 |
VECTOR avgMV; |
VECTOR avgMV; |
599 |
|
|
600 |
for (mj=0;mj<(unsigned int)mb_height;mj++) |
for (mj=0;mj<(unsigned int)mb_height;mj++) |
601 |
for (mi=0;mi<(unsigned int)mb_width; mi++) |
for (mi = 0; mi < (unsigned int)mb_width; mi++) { |
602 |
{ |
|
603 |
avgMV = generate_GMCimageMB(gmc_data, pRef, mi, mj, |
avgMV = generate_GMCimageMB(gmc_data, pRef, mi, mj, |
604 |
stride, stride2, quarterpel, rounding, pGMC); |
stride, stride2, quarterpel, rounding, pGMC); |
605 |
|
|
651 |
|
|
652 |
Fj = gmc_data->Fo + dyF*mj*16 + dxF*mi*16; |
Fj = gmc_data->Fo + dyF*mj*16 + dxF*mi*16; |
653 |
Gj = gmc_data->Go + dyG*mj*16 + dxG*mi*16; |
Gj = gmc_data->Go + dyG*mj*16 + dxG*mi*16; |
654 |
for (J=16; J>0; --J) |
|
655 |
{ |
for (J = 16; J > 0; --J) { |
656 |
int32_t Fi, Gi; |
int32_t Fi, Gi; |
657 |
|
|
658 |
Fi = Fj; Fj += dyF; |
Fi = Fj; Fj += dyF; |
659 |
Gi = Gj; Gj += dyG; |
Gi = Gj; Gj += dyG; |
660 |
for (I=-16; I<0; ++I) |
for (I = -16; I < 0; ++I) { |
|
{ |
|
661 |
int32_t F, G; |
int32_t F, G; |
662 |
uint32_t ri, rj; |
uint32_t ri, rj; |
663 |
|
|
693 |
dstY[I] = (uint8_t)f0; |
dstY[I] = (uint8_t)f0; |
694 |
} |
} |
695 |
} |
} |
696 |
|
|
697 |
dstY += stride; |
dstY += stride; |
698 |
} |
} |
699 |
|
|
702 |
|
|
703 |
Fj = gmc_data->cFo + dyF*4 *mj*8 + dxF*4 *mi*8; |
Fj = gmc_data->cFo + dyF*4 *mj*8 + dxF*4 *mi*8; |
704 |
Gj = gmc_data->cGo + dyG*4 *mj*8 + dxG*4 *mi*8; |
Gj = gmc_data->cGo + dyG*4 *mj*8 + dxG*4 *mi*8; |
705 |
for (J=8; J>0; --J) |
|
706 |
{ |
for (J = 8; J > 0; --J) { |
707 |
int32_t Fi, Gi; |
int32_t Fi, Gi; |
708 |
Fi = Fj; Fj += 4*dyF; |
Fi = Fj; Fj += 4*dyF; |
709 |
Gi = Gj; Gj += 4*dyG; |
Gi = Gj; Gj += 4*dyG; |
710 |
|
|
711 |
for (I=-8; I<0; ++I) |
for (I = -8; I < 0; ++I) { |
|
{ |
|
712 |
int32_t F, G; |
int32_t F, G; |
713 |
uint32_t ri, rj; |
uint32_t ri, rj; |
714 |
|
|
878 |
return; |
return; |
879 |
} |
} |
880 |
|
|
|
|
|
|
|
|
881 |
void |
void |
882 |
generate_GMCimage( const GMC_DATA *const gmc_data, // [input] precalculated data |
generate_GMCimage( const GMC_DATA *const gmc_data, // [input] precalculated data |
883 |
const IMAGE *const pRef, // [input] |
const IMAGE *const pRef, // [input] |
897 |
VECTOR avgMV; |
VECTOR avgMV; |
898 |
|
|
899 |
for (mj=0;mj<mb_height;mj++) |
for (mj=0;mj<mb_height;mj++) |
900 |
for (mi=0;mi<mb_width; mi++) |
for (mi = 0;mi < mb_width; mi++) { |
901 |
{ |
|
902 |
avgMV = generate_GMCimageMB(gmc_data, pRef, mi, mj, |
avgMV = generate_GMCimageMB(gmc_data, pRef, mi, mj, |
903 |
stride, stride2, quarterpel, rounding, pGMC); |
stride, stride2, quarterpel, rounding, pGMC); |
904 |
|
|