4 |
* - Decoder Module - |
* - Decoder Module - |
5 |
* |
* |
6 |
* Copyright(C) 2002 MinChen <chenm001@163.com> |
* Copyright(C) 2002 MinChen <chenm001@163.com> |
7 |
* 2002-2003 Peter Ross <pross@xvid.org> |
* 2002-2004 Peter Ross <pross@xvid.org> |
8 |
* |
* |
9 |
* This program is free software ; you can redistribute it and/or modify |
* This program is free software ; you can redistribute it and/or modify |
10 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
48 |
#include "image/interpolate8x8.h" |
#include "image/interpolate8x8.h" |
49 |
#include "image/reduced.h" |
#include "image/reduced.h" |
50 |
#include "image/font.h" |
#include "image/font.h" |
51 |
|
#include "image/qpel.h" |
52 |
|
|
53 |
#include "bitstream/mbcoding.h" |
#include "bitstream/mbcoding.h" |
54 |
#include "prediction/mbprediction.h" |
#include "prediction/mbprediction.h" |
62 |
#include "image/postprocessing.h" |
#include "image/postprocessing.h" |
63 |
#include "utils/mem_align.h" |
#include "utils/mem_align.h" |
64 |
|
|
65 |
|
#ifdef ARCH_IS_IA32 |
66 |
|
#define interpolate16x16_quarterpel new_interpolate16x16_quarterpel |
67 |
|
#define interpolate8x8_quarterpel new_interpolate8x8_quarterpel |
68 |
|
#endif |
69 |
|
|
70 |
static int |
static int |
71 |
decoder_resize(DECODER * dec) |
decoder_resize(DECODER * dec) |
72 |
{ |
{ |
399 |
const int reduced_resolution, |
const int reduced_resolution, |
400 |
const MACROBLOCK * pMB) |
const MACROBLOCK * pMB) |
401 |
{ |
{ |
|
DECLARE_ALIGNED_MATRIX(block, 1, 64, int16_t, CACHE_LINE); |
|
402 |
DECLARE_ALIGNED_MATRIX(data, 6, 64, int16_t, CACHE_LINE); |
DECLARE_ALIGNED_MATRIX(data, 6, 64, int16_t, CACHE_LINE); |
403 |
|
|
404 |
int stride = dec->edged_width; |
int stride = dec->edged_width; |
407 |
int i; |
int i; |
408 |
const uint32_t iQuant = pMB->quant; |
const uint32_t iQuant = pMB->quant; |
409 |
const int direction = dec->alternate_vertical_scan ? 2 : 0; |
const int direction = dec->alternate_vertical_scan ? 2 : 0; |
410 |
const quant_interFuncPtr dequant = dec->quant_type == 0 ? dequant_h263_inter : dequant_mpeg_inter; |
typedef void (*get_inter_block_function_t)( |
411 |
|
Bitstream * bs, |
412 |
|
int16_t * block, |
413 |
|
int direction, |
414 |
|
const int quant, |
415 |
|
const uint16_t *matrix); |
416 |
|
|
417 |
|
const get_inter_block_function_t get_inter_block = (dec->quant_type == 0) |
418 |
|
? get_inter_block_h263 |
419 |
|
: get_inter_block_mpeg; |
420 |
|
|
421 |
|
memset(&data[0], 0, 6*64*sizeof(int16_t)); /* clear */ |
422 |
|
|
423 |
for (i = 0; i < 6; i++) { |
for (i = 0; i < 6; i++) { |
424 |
|
|
425 |
if (cbp & (1 << (5 - i))) { /* coded */ |
if (cbp & (1 << (5 - i))) { /* coded */ |
426 |
|
|
|
memset(block, 0, 64 * sizeof(int16_t)); /* clear */ |
|
427 |
|
|
428 |
|
/* Decode coeffs and dequantize on the fly */ |
429 |
start_timer(); |
start_timer(); |
430 |
get_inter_block(bs, block, direction); |
get_inter_block(bs, &data[i*64], direction, iQuant, get_inter_matrix(dec->mpeg_quant_matrices)); |
431 |
stop_coding_timer(); |
stop_coding_timer(); |
432 |
|
|
433 |
start_timer(); |
start_timer(); |
|
dequant(&data[i * 64], block, iQuant, dec->mpeg_quant_matrices); |
|
|
stop_iquant_timer(); |
|
|
|
|
|
start_timer(); |
|
434 |
idct(&data[i * 64]); |
idct(&data[i * 64]); |
435 |
stop_idct_timer(); |
stop_idct_timer(); |
436 |
} |
} |
472 |
stop_transfer_timer(); |
stop_transfer_timer(); |
473 |
} |
} |
474 |
|
|
|
static void __inline |
|
|
validate_vector(VECTOR * mv, unsigned int x_pos, unsigned int y_pos, const DECODER * dec) |
|
|
{ |
|
|
/* clip a vector to valid range |
|
|
prevents crashes if bitstream is broken |
|
|
*/ |
|
|
int shift = 5 + dec->quarterpel; |
|
|
int xborder_high = (int)(dec->mb_width - x_pos) << shift; |
|
|
int xborder_low = (-(int)x_pos-1) << shift; |
|
|
int yborder_high = (int)(dec->mb_height - y_pos) << shift; |
|
|
int yborder_low = (-(int)y_pos-1) << shift; |
|
|
|
|
|
#define CHECK_MV(mv) \ |
|
|
do { \ |
|
|
if ((mv).x > xborder_high) { \ |
|
|
DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \ |
|
|
(mv).x = xborder_high; \ |
|
|
} else if ((mv).x < xborder_low) { \ |
|
|
DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \ |
|
|
(mv).x = xborder_low; \ |
|
|
} \ |
|
|
if ((mv).y > yborder_high) { \ |
|
|
DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \ |
|
|
(mv).y = yborder_high; \ |
|
|
} else if ((mv).y < yborder_low) { \ |
|
|
DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \ |
|
|
(mv).y = yborder_low; \ |
|
|
} \ |
|
|
} while (0) |
|
|
|
|
|
CHECK_MV(mv[0]); |
|
|
CHECK_MV(mv[1]); |
|
|
CHECK_MV(mv[2]); |
|
|
CHECK_MV(mv[3]); |
|
|
} |
|
|
|
|
475 |
/* decode an inter macroblock */ |
/* decode an inter macroblock */ |
476 |
static void |
static void |
477 |
decoder_mbinter(DECODER * dec, |
decoder_mbinter(DECODER * dec, |
509 |
mv[i] = pMB->mvs[i]; |
mv[i] = pMB->mvs[i]; |
510 |
} |
} |
511 |
|
|
512 |
validate_vector(mv, x_pos, y_pos, dec); |
for (i = 0; i < 4; i++) { |
513 |
|
/* clip to valid range */ |
514 |
|
int border = (int)(dec->mb_width - x_pos) << (5 + dec->quarterpel); |
515 |
|
if (mv[i].x > border) { |
516 |
|
DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", mv[i].x, border, x_pos, y_pos); |
517 |
|
mv[i].x = border; |
518 |
|
} else { |
519 |
|
border = (-(int)x_pos-1) << (5 + dec->quarterpel); |
520 |
|
if (mv[i].x < border) { |
521 |
|
DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", mv[i].x, border, x_pos, y_pos); |
522 |
|
mv[i].x = border; |
523 |
|
} |
524 |
|
} |
525 |
|
|
526 |
|
border = (int)(dec->mb_height - y_pos) << (5 + dec->quarterpel); |
527 |
|
if (mv[i].y > border) { |
528 |
|
DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", mv[i].y, border, x_pos, y_pos); |
529 |
|
mv[i].y = border; |
530 |
|
} else { |
531 |
|
border = (-(int)y_pos-1) << (5 + dec->quarterpel); |
532 |
|
if (mv[i].y < border) { |
533 |
|
DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", mv[i].y, border, x_pos, y_pos); |
534 |
|
mv[i].y = border; |
535 |
|
} |
536 |
|
} |
537 |
|
} |
538 |
|
|
539 |
start_timer(); |
start_timer(); |
540 |
|
|
827 |
mb_height = (dec->height + 31) / 32; |
mb_height = (dec->height + 31) / 32; |
828 |
} |
} |
829 |
|
|
830 |
|
if (!dec->is_edged[0]) { |
831 |
start_timer(); |
start_timer(); |
832 |
image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height, |
image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height, |
833 |
dec->width, dec->height, dec->bs_version); |
dec->width, dec->height, dec->bs_version); |
834 |
|
dec->is_edged[0] = 1; |
835 |
stop_edges_timer(); |
stop_edges_timer(); |
836 |
|
} |
837 |
|
|
838 |
if (gmc_warp) { |
if (gmc_warp) { |
839 |
/* accuracy: 0==1/2, 1=1/4, 2=1/8, 3=1/16 */ |
/* accuracy: 0==1/2, 1=1/4, 2=1/8, 3=1/16 */ |
1024 |
decoder_bf_interpolate_mbinter(DECODER * dec, |
decoder_bf_interpolate_mbinter(DECODER * dec, |
1025 |
IMAGE forward, |
IMAGE forward, |
1026 |
IMAGE backward, |
IMAGE backward, |
1027 |
MACROBLOCK * pMB, |
const MACROBLOCK * pMB, |
1028 |
const uint32_t x_pos, |
const uint32_t x_pos, |
1029 |
const uint32_t y_pos, |
const uint32_t y_pos, |
1030 |
Bitstream * bs, |
Bitstream * bs, |
1041 |
pU_Cur = dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3); |
pU_Cur = dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3); |
1042 |
pV_Cur = dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3); |
pV_Cur = dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3); |
1043 |
|
|
|
validate_vector(pMB->mvs, x_pos, y_pos, dec); |
|
|
validate_vector(pMB->b_mvs, x_pos, y_pos, dec); |
|
|
|
|
1044 |
if (!direct) { |
if (!direct) { |
1045 |
uv_dx = pMB->mvs[0].x; |
uv_dx = pMB->mvs[0].x; |
1046 |
uv_dy = pMB->mvs[0].y; |
uv_dy = pMB->mvs[0].y; |
|
|
|
1047 |
b_uv_dx = pMB->b_mvs[0].x; |
b_uv_dx = pMB->b_mvs[0].x; |
1048 |
b_uv_dy = pMB->b_mvs[0].y; |
b_uv_dy = pMB->b_mvs[0].y; |
1049 |
|
|
1056 |
|
|
1057 |
uv_dx = (uv_dx >> 1) + roundtab_79[uv_dx & 0x3]; |
uv_dx = (uv_dx >> 1) + roundtab_79[uv_dx & 0x3]; |
1058 |
uv_dy = (uv_dy >> 1) + roundtab_79[uv_dy & 0x3]; |
uv_dy = (uv_dy >> 1) + roundtab_79[uv_dy & 0x3]; |
|
|
|
1059 |
b_uv_dx = (b_uv_dx >> 1) + roundtab_79[b_uv_dx & 0x3]; |
b_uv_dx = (b_uv_dx >> 1) + roundtab_79[b_uv_dx & 0x3]; |
1060 |
b_uv_dy = (b_uv_dy >> 1) + roundtab_79[b_uv_dy & 0x3]; |
b_uv_dy = (b_uv_dy >> 1) + roundtab_79[b_uv_dy & 0x3]; |
1061 |
|
|
1062 |
} else { |
} else { |
|
if(dec->quarterpel) { |
|
|
uv_dx = (pMB->mvs[0].x / 2) + (pMB->mvs[1].x / 2) + (pMB->mvs[2].x / 2) + (pMB->mvs[3].x / 2); |
|
|
uv_dy = (pMB->mvs[0].y / 2) + (pMB->mvs[1].y / 2) + (pMB->mvs[2].y / 2) + (pMB->mvs[3].y / 2); |
|
|
b_uv_dx = (pMB->b_mvs[0].x / 2) + (pMB->b_mvs[1].x / 2) + (pMB->b_mvs[2].x / 2) + (pMB->b_mvs[3].x / 2); |
|
|
b_uv_dy = (pMB->b_mvs[0].y / 2) + (pMB->b_mvs[1].y / 2) + (pMB->b_mvs[2].y / 2) + (pMB->b_mvs[3].y / 2); |
|
|
} else { |
|
1063 |
uv_dx = pMB->mvs[0].x + pMB->mvs[1].x + pMB->mvs[2].x + pMB->mvs[3].x; |
uv_dx = pMB->mvs[0].x + pMB->mvs[1].x + pMB->mvs[2].x + pMB->mvs[3].x; |
1064 |
uv_dy = pMB->mvs[0].y + pMB->mvs[1].y + pMB->mvs[2].y + pMB->mvs[3].y; |
uv_dy = pMB->mvs[0].y + pMB->mvs[1].y + pMB->mvs[2].y + pMB->mvs[3].y; |
1065 |
b_uv_dx = pMB->b_mvs[0].x + pMB->b_mvs[1].x + pMB->b_mvs[2].x + pMB->b_mvs[3].x; |
b_uv_dx = pMB->b_mvs[0].x + pMB->b_mvs[1].x + pMB->b_mvs[2].x + pMB->b_mvs[3].x; |
1066 |
b_uv_dy = pMB->b_mvs[0].y + pMB->b_mvs[1].y + pMB->b_mvs[2].y + pMB->b_mvs[3].y; |
b_uv_dy = pMB->b_mvs[0].y + pMB->b_mvs[1].y + pMB->b_mvs[2].y + pMB->b_mvs[3].y; |
1067 |
|
|
1068 |
|
if (dec->quarterpel) { |
1069 |
|
uv_dx /= 2; |
1070 |
|
uv_dy /= 2; |
1071 |
|
b_uv_dx /= 2; |
1072 |
|
b_uv_dy /= 2; |
1073 |
} |
} |
1074 |
|
|
1075 |
uv_dx = (uv_dx >> 3) + roundtab_76[uv_dx & 0xf]; |
uv_dx = (uv_dx >> 3) + roundtab_76[uv_dx & 0xf]; |
1153 |
interpolate8x8_avg2(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos, |
interpolate8x8_avg2(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos, |
1154 |
dec->cur.y + (16 * y_pos * stride) + 16 * x_pos, |
dec->cur.y + (16 * y_pos * stride) + 16 * x_pos, |
1155 |
dec->tmp.y + (16 * y_pos * stride) + 16 * x_pos, |
dec->tmp.y + (16 * y_pos * stride) + 16 * x_pos, |
1156 |
stride, 0, 8); |
stride, 1, 8); |
1157 |
|
|
1158 |
interpolate8x8_avg2(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
interpolate8x8_avg2(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
1159 |
dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
1160 |
dec->tmp.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
dec->tmp.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
1161 |
stride, 0, 8); |
stride, 1, 8); |
1162 |
|
|
1163 |
interpolate8x8_avg2(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
interpolate8x8_avg2(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
1164 |
dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
1165 |
dec->tmp.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
dec->tmp.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
1166 |
stride, 0, 8); |
stride, 1, 8); |
1167 |
|
|
1168 |
interpolate8x8_avg2(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
interpolate8x8_avg2(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
1169 |
dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
1170 |
dec->tmp.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
dec->tmp.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
1171 |
stride, 0, 8); |
stride, 1, 8); |
1172 |
|
|
1173 |
interpolate8x8_avg2(dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos, |
interpolate8x8_avg2(dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos, |
1174 |
dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos, |
dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos, |
1175 |
dec->tmp.u + (8 * y_pos * stride2) + 8 * x_pos, |
dec->tmp.u + (8 * y_pos * stride2) + 8 * x_pos, |
1176 |
stride2, 0, 8); |
stride2, 1, 8); |
1177 |
|
|
1178 |
interpolate8x8_avg2(dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos, |
interpolate8x8_avg2(dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos, |
1179 |
dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos, |
dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos, |
1180 |
dec->tmp.v + (8 * y_pos * stride2) + 8 * x_pos, |
dec->tmp.v + (8 * y_pos * stride2) + 8 * x_pos, |
1181 |
stride2, 0, 8); |
stride2, 1, 8); |
1182 |
|
|
1183 |
stop_comp_timer(); |
stop_comp_timer(); |
1184 |
|
|
1230 |
const VECTOR zeromv = {0,0}; |
const VECTOR zeromv = {0,0}; |
1231 |
int i; |
int i; |
1232 |
|
|
1233 |
|
if (!dec->is_edged[0]) { |
1234 |
start_timer(); |
start_timer(); |
1235 |
image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height, |
image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height, |
1236 |
dec->width, dec->height, dec->bs_version); |
dec->width, dec->height, dec->bs_version); |
1237 |
|
dec->is_edged[0] = 1; |
1238 |
|
stop_edges_timer(); |
1239 |
|
} |
1240 |
|
|
1241 |
|
if (!dec->is_edged[1]) { |
1242 |
|
start_timer(); |
1243 |
image_setedges(&dec->refn[1], dec->edged_width, dec->edged_height, |
image_setedges(&dec->refn[1], dec->edged_width, dec->edged_height, |
1244 |
dec->width, dec->height, dec->bs_version); |
dec->width, dec->height, dec->bs_version); |
1245 |
|
dec->is_edged[1] = 1; |
1246 |
stop_edges_timer(); |
stop_edges_timer(); |
1247 |
|
} |
1248 |
|
|
1249 |
for (y = 0; y < dec->mb_height; y++) { |
for (y = 0; y < dec->mb_height; y++) { |
1250 |
/* Initialize Pred Motion Vector */ |
/* Initialize Pred Motion Vector */ |
1383 |
xvid_dec_frame_t * frame, xvid_dec_stats_t * stats, |
xvid_dec_frame_t * frame, xvid_dec_stats_t * stats, |
1384 |
int coding_type, int quant) |
int coding_type, int quant) |
1385 |
{ |
{ |
1386 |
|
const int brightness = XVID_VERSION_MINOR(frame->version) >= 1 ? frame->brightness : 0; |
1387 |
|
|
1388 |
if (dec->cartoon_mode) |
if (dec->cartoon_mode) |
1389 |
frame->general &= ~XVID_FILMEFFECT; |
frame->general &= ~XVID_FILMEFFECT; |
1390 |
|
|
1391 |
if (frame->general & (XVID_DEBLOCKY|XVID_DEBLOCKUV|XVID_FILMEFFECT) && mbs != NULL) /* post process */ |
if ((frame->general & (XVID_DEBLOCKY|XVID_DEBLOCKUV|XVID_FILMEFFECT) || brightness!=0) |
1392 |
|
&& mbs != NULL) /* post process */ |
1393 |
{ |
{ |
1394 |
/* note: image is stored to tmp */ |
/* note: image is stored to tmp */ |
1395 |
image_copy(&dec->tmp, img, dec->edged_width, dec->height); |
image_copy(&dec->tmp, img, dec->edged_width, dec->height); |
1396 |
image_postproc(&dec->postproc, &dec->tmp, dec->edged_width, |
image_postproc(&dec->postproc, &dec->tmp, dec->edged_width, |
1397 |
mbs, dec->mb_width, dec->mb_height, dec->mb_width, |
mbs, dec->mb_width, dec->mb_height, dec->mb_width, |
1398 |
frame->general, dec->frames, (coding_type == B_VOP)); |
frame->general, brightness, dec->frames, (coding_type == B_VOP)); |
1399 |
img = &dec->tmp; |
img = &dec->tmp; |
1400 |
} |
} |
1401 |
|
|
1418 |
} |
} |
1419 |
} |
} |
1420 |
|
|
1421 |
|
|
1422 |
int |
int |
1423 |
decoder_decode(DECODER * dec, |
decoder_decode(DECODER * dec, |
1424 |
xvid_dec_frame_t * frame, xvid_dec_stats_t * stats) |
xvid_dec_frame_t * frame, xvid_dec_stats_t * stats) |
1569 |
} |
} |
1570 |
|
|
1571 |
image_swap(&dec->refn[0], &dec->refn[1]); |
image_swap(&dec->refn[0], &dec->refn[1]); |
1572 |
|
dec->is_edged[1] = dec->is_edged[0]; |
1573 |
image_swap(&dec->cur, &dec->refn[0]); |
image_swap(&dec->cur, &dec->refn[0]); |
1574 |
|
dec->is_edged[0] = 0; |
1575 |
SWAP(MACROBLOCK *, dec->mbs, dec->last_mbs); |
SWAP(MACROBLOCK *, dec->mbs, dec->last_mbs); |
1576 |
dec->last_reduced_resolution = reduced_resolution; |
dec->last_reduced_resolution = reduced_resolution; |
1577 |
dec->last_coding_type = coding_type; |
dec->last_coding_type = coding_type; |