--- mbcoding.c 2003/05/03 19:11:58 1.44.2.7 +++ mbcoding.c 2009/11/10 14:06:58 1.58 @@ -1,53 +1,27 @@ - /****************************************************************************** - * * - * This file is part of XviD, a free MPEG-4 video encoder/decoder * - * * - * XviD is an implementation of a part of one or more MPEG-4 Video tools * - * as specified in ISO/IEC 14496-2 standard. Those intending to use this * - * software module in hardware or software products are advised that its * - * use may infringe existing patents or copyrights, and any such use * - * would be at such party's own risk. The original developer of this * - * software module and his/her company, and subsequent editors and their * - * companies, will have no liability for use of this software or * - * modifications or derivatives thereof. * - * * - * XviD is free software; you can redistribute it and/or modify it * - * under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * XviD is distributed in the hope that it will be useful, but * - * WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the Free Software * - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * * - ******************************************************************************/ - - /****************************************************************************** - * * - * mbcoding.c * - * * - * Copyright (C) 2002 - Michael Militzer * - * * - * For more information visit the XviD homepage: http://www.xvid.org * - * * - ******************************************************************************/ - - /****************************************************************************** - * * - * Revision history: * - * * - * 28.10.2002 GMC support - gruel * - * 28.06.2002 added check_resync_marker() * - * 14.04.2002 bframe encoding * - * 08.03.2002 initial version; isibaar * - * * - ******************************************************************************/ - +/***************************************************************************** + * + * XVID MPEG-4 VIDEO CODEC + * - MB coding - + * + * Copyright (C) 2002 Michael Militzer + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program ; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * $Id: mbcoding.c,v 1.58 2009/11/10 14:06:58 Isibaar Exp $ + * + ****************************************************************************/ #include #include @@ -62,23 +36,19 @@ #include "../utils/mbfunctions.h" -/* #define BIGLUT */ +#ifdef _DEBUG +# include "../motion/estimation.h" +# include "../motion/motion_inlines.h" +# include +#endif + -#ifdef BIGLUT -#define LEVELOFFSET 2048 -#else #define LEVELOFFSET 32 -#endif +/* Initialized once during xvid_global call + * RO access is thread safe */ static REVERSE_EVENT DCT3D[2][4096]; - -#ifdef BIGLUT -static VLC coeff_VLC[2][2][4096][64]; -VLC *intra_table; -static VLC *inter_table; -#else static VLC coeff_VLC[2][2][64][64]; -#endif /* not really MB related, but VLCs are only available here */ void bs_put_spritetrajectory(Bitstream * bs, const int val) @@ -88,8 +58,10 @@ const int code2 = sprite_trajectory_len[len].code; const int len2 = sprite_trajectory_len[len].len; -// printf("GMC=%d Code/Len = %d / %d ",val, code,len); -// printf("Code2 / Len2 = %d / %d \n",code2,len2); +#if 0 + printf("GMC=%d Code/Len = %d / %d ",val, code,len); + printf("Code2 / Len2 = %d / %d \n",code2,len2); +#endif BitstreamPutBits(bs, code2, len2); if (len) BitstreamPutBits(bs, code, len); @@ -115,41 +87,26 @@ uint32_t i, j, k, intra, last, run, run_esc, level, level_esc, escape, escape_len, offset; int32_t l; -#ifdef BIGLUT - intra_table = coeff_VLC[1]; - inter_table = coeff_VLC[0]; -#endif - - for (intra = 0; intra < 2; intra++) for (i = 0; i < 4096; i++) DCT3D[intra][i].event.level = 0; - for (intra = 0; intra < 2; intra++) - for (last = 0; last < 2; last++) - { - for (run = 0; run < 63 + last; run++) - for (level = 0; level < (uint32_t)(32 << intra); level++) - { -#ifdef BIGLUT - offset = LEVELOFFSET; -#else + for (intra = 0; intra < 2; intra++) { + for (last = 0; last < 2; last++) { + for (run = 0; run < 63 + last; run++) { + for (level = 0; level < (uint32_t)(32 << intra); level++) { offset = !intra * LEVELOFFSET; -#endif coeff_VLC[intra][last][level + offset][run].len = 128; } + } } + } - for (intra = 0; intra < 2; intra++) - for (i = 0; i < 102; i++) - { -#ifdef BIGLUT - offset = LEVELOFFSET; -#else + for (intra = 0; intra < 2; intra++) { + for (i = 0; i < 102; i++) { offset = !intra * LEVELOFFSET; -#endif - for (j = 0; j < (uint32_t)(1 << (12 - coeff_tab[intra][i].vlc.len)); j++) - { + + for (j = 0; j < (uint32_t)(1 << (12 - coeff_tab[intra][i].vlc.len)); j++) { DCT3D[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].len = coeff_tab[intra][i].vlc.len; DCT3D[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].event = coeff_tab[intra][i].event; } @@ -158,58 +115,39 @@ = coeff_tab[intra][i].vlc.code << 1; coeff_VLC[intra][coeff_tab[intra][i].event.last][coeff_tab[intra][i].event.level + offset][coeff_tab[intra][i].event.run].len = coeff_tab[intra][i].vlc.len + 1; -#ifndef BIGLUT - if (!intra) -#endif - { + + if (!intra) { coeff_VLC[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].code = (coeff_tab[intra][i].vlc.code << 1) | 1; coeff_VLC[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].len = coeff_tab[intra][i].vlc.len + 1; } } + } + + for (intra = 0; intra < 2; intra++) { + for (last = 0; last < 2; last++) { + for (run = 0; run < 63 + last; run++) { + for (level = 1; level < (uint32_t)(32 << intra); level++) { - for (intra = 0; intra < 2; intra++) - for (last = 0; last < 2; last++) - for (run = 0; run < 63 + last; run++) - { - for (level = 1; level < (uint32_t)(32 << intra); level++) - { if (level <= max_level[intra][last][run] && run <= max_run[intra][last][level]) continue; -#ifdef BIGLUT - offset = LEVELOFFSET; -#else offset = !intra * LEVELOFFSET; -#endif level_esc = level - max_level[intra][last][run]; run_esc = run - 1 - max_run[intra][last][level]; - /*use this test to use shorter esc2 codes when possible - if (level_esc <= max_level[intra][last][run] && run <= max_run[intra][last][level_esc] - && !(coeff_VLC[intra][last][level_esc + offset][run].len + 7 + 1 - > coeff_VLC[intra][last][level + offset][run_esc].code + 7 + 2))*/ - if (level_esc <= max_level[intra][last][run] && run <= max_run[intra][last][level_esc]) - { + if (level_esc <= max_level[intra][last][run] && run <= max_run[intra][last][level_esc]) { escape = ESCAPE1; escape_len = 7 + 1; run_esc = run; - } - else - { - if (run_esc <= max_run[intra][last][level] && level <= max_level[intra][last][run_esc]) - { + } else { + if (run_esc <= max_run[intra][last][level] && level <= max_level[intra][last][run_esc]) { escape = ESCAPE2; escape_len = 7 + 2; level_esc = level; - } - else - { -#ifndef BIGLUT - if (!intra) -#endif - { + } else { + if (!intra) { coeff_VLC[intra][last][level + offset][run].code = (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((level & 0xfff) << 1) | 1; coeff_VLC[intra][last][level + offset][run].len = 30; @@ -226,10 +164,8 @@ | coeff_VLC[intra][last][level_esc + offset][run_esc].code; coeff_VLC[intra][last][level + offset][run].len = coeff_VLC[intra][last][level_esc + offset][run_esc].len + escape_len; -#ifndef BIGLUT - if (!intra) -#endif - { + + if (!intra) { coeff_VLC[intra][last][offset - level][run].code = (escape << coeff_VLC[intra][last][level_esc + offset][run_esc].len) | coeff_VLC[intra][last][level_esc + offset][run_esc].code | 1; @@ -238,44 +174,30 @@ } } -#ifdef BIGLUT - for (level = 32 << intra; level < 2048; level++) - { - coeff_VLC[intra][last][level + offset][run].code - = (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((level & 0xfff) << 1) | 1; - coeff_VLC[intra][last][level + offset][run].len = 30; - - coeff_VLC[intra][last][offset - level][run].code - = (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((-level & 0xfff) << 1) | 1; - coeff_VLC[intra][last][offset - level][run].len = 30; - } -#else - if (!intra) - { + if (!intra) { coeff_VLC[intra][last][0][run].code = (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((-32 & 0xfff) << 1) | 1; coeff_VLC[intra][last][0][run].len = 30; } -#endif } -/* init sprite_trajectory tables */ -/* even if GMC is not specified (it might be used later...) */ + } + } + + /* init sprite_trajectory tables + * even if GMC is not specified (it might be used later...) */ sprite_trajectory_code[0+16384].code = 0; sprite_trajectory_code[0+16384].len = 0; - for (k=0;k<14;k++) - { + for (k=0;k<14;k++) { int limit = (1< (cmp - 1)) value -= 64 * scale_factor; - pStat->iMvSum += value * value; - pStat->iMvCount++; - if (value == 0) { BitstreamPutBits(bs, mb_motion_table[32].code, mb_motion_table[32].len); @@ -336,92 +254,6 @@ } -#ifdef BIGLUT - -static __inline void -CodeCoeff(Bitstream * bs, - const int16_t qcoeff[64], - VLC * table, - const uint16_t * zigzag, - uint16_t intra) -{ - - uint32_t j, last; - short v; - VLC *vlc; - - j = intra; - last = intra; - - while (j < 64 && (v = qcoeff[zigzag[j]]) == 0) - j++; - - do { - vlc = table + 64 * 2048 + (v << 6) + j - last; - last = ++j; - - /* count zeroes */ - while (j < 64 && (v = qcoeff[zigzag[j]]) == 0) - j++; - - /* write code */ - if (j != 64) { - BitstreamPutBits(bs, vlc->code, vlc->len); - } else { - vlc += 64 * 4096; - BitstreamPutBits(bs, vlc->code, vlc->len); - break; - } - } while (1); - -} - - - -/* returns the number of bits required to encode qcoeff */ -int -CodeCoeff_CalcBits(const int16_t qcoeff[64], - VLC * table, - const uint16_t * zigzag, - uint16_t intra) -{ - int bits = 0; - uint32_t j, last; - short v; - VLC *vlc; - - j = intra; - last = intra; - - while (j < 64 && (v = qcoeff[zigzag[j]]) == 0) - j++; - - if (j >= 64) return 0; /* empty block */ - - do { - vlc = table + 64 * 2048 + (v << 6) + j - last; - last = ++j; - - /* count zeroes */ - while (j < 64 && (v = qcoeff[zigzag[j]]) == 0) - j++; - - /* write code */ - if (j != 64) { - bits += vlc->len; - } else { - vlc += 64 * 4096; - bits += vlc->len; - break; - } - } while (1); - - return bits; -} - - -#else - static __inline void CodeCoeffInter(Bitstream * bs, const int16_t qcoeff[64], @@ -538,7 +370,7 @@ /* returns the number of bits required to encode qcoeff */ -int +int CodeCoeffIntra_CalcBits(const int16_t qcoeff[64], const uint16_t * zigzag) { int bits = 0; @@ -626,11 +458,7 @@ return bits; } - -#endif - - -static int iDQtab[5] = { +static const int iDQtab[5] = { 1, 0, -1 /* no change */, 2, 3 }; #define DQ_VALUE2INDEX(value) iDQtab[(value)+2] @@ -648,7 +476,7 @@ cbpy = pMB->cbp >> 2; - // write mcbpc + /* write mcbpc */ if (frame->coding_type == I_VOP) { mcbpc = ((pMB->mode >> 1) & 3) | ((pMB->cbp & 3) << 2); BitstreamPutBits(bs, mcbpc_intra_tab[mcbpc].code, @@ -659,24 +487,24 @@ mcbpc_inter_tab[mcbpc].len); } - // ac prediction flag + /* ac prediction flag */ if (pMB->acpred_directions[0]) BitstreamPutBits(bs, 1, 1); else BitstreamPutBits(bs, 0, 1); - // write cbpy + /* write cbpy */ BitstreamPutBits(bs, xvid_cbpy_tab[cbpy].code, xvid_cbpy_tab[cbpy].len); - // write dquant + /* write dquant */ if (pMB->mode == MODE_INTRA_Q) BitstreamPutBits(bs, DQ_VALUE2INDEX(pMB->dquant), 2); - // write interlacing + /* write interlacing */ if (frame->vol_flags & XVID_VOL_INTERLACING) { BitstreamPutBit(bs, pMB->field_dct); } - // code block coeffs + /* code block coeffs */ for (i = 0; i < 6; i++) { if (i < 4) BitstreamPutBits(bs, dcy_tab[qcoeff[i * 64 + 0] + 255].code, @@ -692,11 +520,7 @@ bits = BitstreamPos(bs); -#ifdef BIGLUT - CodeCoeff(bs, &qcoeff[i * 64], intra_table, scan_table, 1); -#else CodeCoeffIntra(bs, &qcoeff[i * 64], scan_table); -#endif bits = BitstreamPos(bs) - bits; pStat->iTextBits += bits; @@ -720,61 +544,73 @@ mcbpc = (pMB->mode & 7) | ((pMB->cbp & 3) << 3); cbpy = 15 - (pMB->cbp >> 2); - // write mcbpc + /* write mcbpc */ BitstreamPutBits(bs, mcbpc_inter_tab[mcbpc].code, mcbpc_inter_tab[mcbpc].len); if ( (frame->coding_type == S_VOP) && (pMB->mode == MODE_INTER || pMB->mode == MODE_INTER_Q) ) - BitstreamPutBit(bs, pMB->mcsel); // mcsel: '0'=local motion, '1'=GMC + BitstreamPutBit(bs, pMB->mcsel); /* mcsel: '0'=local motion, '1'=GMC */ - // write cbpy + /* write cbpy */ BitstreamPutBits(bs, xvid_cbpy_tab[cbpy].code, xvid_cbpy_tab[cbpy].len); - // write dquant + /* write dquant */ if (pMB->mode == MODE_INTER_Q) BitstreamPutBits(bs, DQ_VALUE2INDEX(pMB->dquant), 2); - // interlacing + /* interlacing */ if (frame->vol_flags & XVID_VOL_INTERLACING) { if (pMB->cbp) { BitstreamPutBit(bs, pMB->field_dct); - DPRINTF(DPRINTF_MB,"codep: field_dct: %i", pMB->field_dct); + DPRINTF(XVID_DEBUG_MB,"codep: field_dct: %i\n", pMB->field_dct); } - // if inter block, write field ME flag - if (pMB->mode == MODE_INTER || pMB->mode == MODE_INTER_Q) { - BitstreamPutBit(bs, pMB->field_pred); - DPRINTF(DPRINTF_MB,"codep: field_pred: %i", pMB->field_pred); + /* if inter block, write field ME flag */ + if ((pMB->mode == MODE_INTER || pMB->mode == MODE_INTER_Q) && (pMB->mcsel == 0)) { + BitstreamPutBit(bs, 0 /*pMB->field_pred*/); /* not implemented yet */ + DPRINTF(XVID_DEBUG_MB,"codep: field_pred: %i\n", pMB->field_pred); - // write field prediction references + /* write field prediction references */ +#if 0 /* Remove the #if once field_pred is supported */ if (pMB->field_pred) { BitstreamPutBit(bs, pMB->field_for_top); BitstreamPutBit(bs, pMB->field_for_bot); } +#endif } } - // code motion vector(s) if motion is local + + bits = BitstreamPos(bs); + + /* code motion vector(s) if motion is local */ if (!pMB->mcsel) for (i = 0; i < (pMB->mode == MODE_INTER4V ? 4 : 1); i++) { - CodeVector(bs, pMB->pmvs[i].x, frame->fcode, pStat); - CodeVector(bs, pMB->pmvs[i].y, frame->fcode, pStat); + CodeVector(bs, pMB->pmvs[i].x, frame->fcode); + CodeVector(bs, pMB->pmvs[i].y, frame->fcode); + +#if 0 /* #ifdef _DEBUG */ + if (i == 0) /* for simplicity */ { + int coded_length = BitstreamPos(bs) - bits; + int estimated_length = d_mv_bits(pMB->pmvs[i].x, pMB->pmvs[i].y, zeroMV, frame->fcode, 0); + assert(estimated_length == coded_length); + d_mv_bits(pMB->pmvs[i].x, pMB->pmvs[i].y, zeroMV, frame->fcode, 0); + } +#endif } + bits = BitstreamPos(bs) - bits; + pStat->iMVBits += bits; + bits = BitstreamPos(bs); - // code block coeffs + /* code block coeffs */ for (i = 0; i < 6; i++) - if (pMB->cbp & (1 << (5 - i))) - { + if (pMB->cbp & (1 << (5 - i))) { const uint16_t *scan_table = frame->vop_flags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : scan_tables[0]; -#ifdef BIGLUT - CodeCoeff(bs, &qcoeff[i * 64], inter_table, scan_table, 0); -#else CodeCoeffInter(bs, &qcoeff[i * 64], scan_table); -#endif } bits = BitstreamPos(bs) - bits; @@ -789,9 +625,15 @@ Bitstream * bs, Statistics * pStat) { - if (frame->coding_type != I_VOP) - BitstreamPutBit(bs, 0); // not_coded - + if (frame->coding_type != I_VOP) + BitstreamPutBit(bs, 0); /* not_coded */ + + if (frame->vop_flags & XVID_VOP_GREYSCALE) { + pMB->cbp &= 0x3C; /* keep only bits 5-2 */ + qcoeff[4*64+0]=0; /* for INTRA DC value is saved */ + qcoeff[5*64+0]=0; + } + if (pMB->mode == MODE_INTRA || pMB->mode == MODE_INTRA_Q) CodeBlockIntra(frame, pMB, qcoeff, bs, pStat); else @@ -799,15 +641,6 @@ } -/* -// moved to mbcoding.h so that in can be 'static __inline' -void -MBSkip(Bitstream * bs) -{ - BitstreamPutBit(bs, 1); // not coded -} -*/ - /*************************************************************** * bframe encoding start ***************************************************************/ @@ -864,40 +697,45 @@ BitstreamPutBit(bs, 1); return; - default:; // invalid + default:; /* invalid */ } } void -MBCodingBVOP(const MACROBLOCK * mb, +MBCodingBVOP(const FRAMEINFO * const frame, + const MACROBLOCK * mb, const int16_t qcoeff[6 * 64], const int32_t fcode, const int32_t bcode, Bitstream * bs, - Statistics * pStat, - int direction) + Statistics * pStat) { int vcode = fcode; unsigned int i; + const uint16_t *scan_table = + frame->vop_flags & XVID_VOP_ALTERNATESCAN ? + scan_tables[2] : scan_tables[0]; + int bits; + /* ------------------------------------------------------------------ when a block is skipped it is decoded DIRECT(0,0) hence is interpolated from forward & backward frames ------------------------------------------------------------------ */ if (mb->mode == MODE_DIRECT_NONE_MV) { - BitstreamPutBit(bs, 1); // skipped + BitstreamPutBit(bs, 1); /* skipped */ return; } - BitstreamPutBit(bs, 0); // not skipped + BitstreamPutBit(bs, 0); /* not skipped */ if (mb->cbp == 0) { - BitstreamPutBit(bs, 1); // cbp == 0 + BitstreamPutBit(bs, 1); /* cbp == 0 */ } else { - BitstreamPutBit(bs, 0); // cbp == xxx + BitstreamPutBit(bs, 0); /* cbp == xxx */ } put_bvop_mbtype(bs, mb->mode); @@ -907,34 +745,55 @@ } if (mb->mode != MODE_DIRECT && mb->cbp != 0) { - put_bvop_dbquant(bs, 0); // todo: mb->dquant = 0 + put_bvop_dbquant(bs, 0); /* todo: mb->dquant = 0 */ } + if (frame->vol_flags & XVID_VOL_INTERLACING) { + if (mb->cbp) { + BitstreamPutBit(bs, mb->field_dct); + DPRINTF(XVID_DEBUG_MB,"codep: field_dct: %i\n", mb->field_dct); + } + + /* if not direct block, write field ME flag */ + if (mb->mode != MODE_DIRECT) { + BitstreamPutBit(bs, 0 /*mb->field_pred*/); /* field ME not implemented */ + + /* write field prediction references */ +#if 0 /* Remove the #if once field_pred is supported */ + if (mb->field_pred) { + BitstreamPutBit(bs, mb->field_for_top); + BitstreamPutBit(bs, mb->field_for_bot); + } +#endif + } + } + + bits = BitstreamPos(bs); + switch (mb->mode) { case MODE_INTERPOLATE: - CodeVector(bs, mb->pmvs[1].x, vcode, pStat); //forward vector of interpolate mode - CodeVector(bs, mb->pmvs[1].y, vcode, pStat); + CodeVector(bs, mb->pmvs[1].x, vcode); /* forward vector of interpolate mode */ + CodeVector(bs, mb->pmvs[1].y, vcode); case MODE_BACKWARD: vcode = bcode; case MODE_FORWARD: - CodeVector(bs, mb->pmvs[0].x, vcode, pStat); - CodeVector(bs, mb->pmvs[0].y, vcode, pStat); + CodeVector(bs, mb->pmvs[0].x, vcode); + CodeVector(bs, mb->pmvs[0].y, vcode); break; case MODE_DIRECT: - CodeVector(bs, mb->pmvs[3].x, 1, pStat); // fcode is always 1 for delta vector - CodeVector(bs, mb->pmvs[3].y, 1, pStat); // prediction is always (0,0) + CodeVector(bs, mb->pmvs[3].x, 1); /* fcode is always 1 for delta vector */ + CodeVector(bs, mb->pmvs[3].y, 1); /* prediction is always (0,0) */ default: break; } + pStat->iMVBits += BitstreamPos(bs) - bits; + bits = BitstreamPos(bs); for (i = 0; i < 6; i++) { if (mb->cbp & (1 << (5 - i))) { -#ifdef BIGLUT - CodeCoeff(bs, &qcoeff[i * 64], inter_table, scan_tables[0], 0); -#else - CodeCoeffInter(bs, &qcoeff[i * 64], scan_tables[0]); -#endif + CodeCoeffInter(bs, &qcoeff[i * 64], scan_table); } } + pStat->iTextBits += BitstreamPos(bs) - bits; } @@ -944,11 +803,13 @@ ***************************************************************/ -// for IVOP addbits == 0 -// for PVOP addbits == fcode - 1 -// for BVOP addbits == max(fcode,bcode) - 1 -// returns true or false -int +/* + * for IVOP addbits == 0 + * for PVOP addbits == fcode - 1 + * for BVOP addbits == max(fcode,bcode) - 1 + * returns true or false + */ +int check_resync_marker(Bitstream * bs, int addbits) { uint32_t nbits; @@ -988,7 +849,7 @@ { uint32_t index; - + index = MIN(BitstreamShowBits(bs, 9), 256); BitstreamSkip(bs, mcbpc_inter_table[index].len); @@ -1124,6 +985,8 @@ } +#define GET_BITS(cache, n) ((cache)>>(32-(n))) + static __inline int get_coeff(Bitstream * bs, int *run, @@ -1136,11 +999,13 @@ int32_t level; REVERSE_EVENT *reverse_event; + uint32_t cache = BitstreamShowBits(bs, 32); + if (short_video_header) /* inter-VLCs will be used for both intra and inter blocks */ intra = 0; - if (BitstreamShowBits(bs, 7) != ESCAPE) { - reverse_event = &DCT3D[intra][BitstreamShowBits(bs, 12)]; + if (GET_BITS(cache, 7) != ESCAPE) { + reverse_event = &DCT3D[intra][GET_BITS(cache, 12)]; if ((level = reverse_event->event.level) == 0) goto error; @@ -1148,31 +1013,35 @@ *last = reverse_event->event.last; *run = reverse_event->event.run; - BitstreamSkip(bs, reverse_event->len); + /* Don't forget to update the bitstream position */ + BitstreamSkip(bs, reverse_event->len+1); - return BitstreamGetBits(bs, 1) ? -level : level; + return (GET_BITS(cache, reverse_event->len+1)&0x01) ? -level : level; } - BitstreamSkip(bs, 7); + /* flush 7bits of cache */ + cache <<= 7; if (short_video_header) { /* escape mode 4 - H.263 type, only used if short_video_header = 1 */ - *last = BitstreamGetBit(bs); - *run = BitstreamGetBits(bs, 6); - level = BitstreamGetBits(bs, 8); + *last = GET_BITS(cache, 1); + *run = (GET_BITS(cache, 7) &0x3f); + level = (GET_BITS(cache, 15)&0xff); if (level == 0 || level == 128) - DPRINTF(DPRINTF_ERROR, "Illegal LEVEL for ESCAPE mode 4: %d", level); + DPRINTF(XVID_DEBUG_ERROR, "Illegal LEVEL for ESCAPE mode 4: %d\n", level); + + /* We've "eaten" 22 bits */ + BitstreamSkip(bs, 22); return (level << 24) >> 24; } - mode = BitstreamShowBits(bs, 2); - - if (mode < 3) { - BitstreamSkip(bs, (mode == 2) ? 2 : 1); + if ((mode = GET_BITS(cache, 2)) < 3) { + const int skip[3] = {1, 1, 2}; + cache <<= skip[mode]; - reverse_event = &DCT3D[intra][BitstreamShowBits(bs, 12)]; + reverse_event = &DCT3D[intra][GET_BITS(cache, 12)]; if ((level = reverse_event->event.level) == 0) goto error; @@ -1180,28 +1049,33 @@ *last = reverse_event->event.last; *run = reverse_event->event.run; - BitstreamSkip(bs, reverse_event->len); - - if (mode < 2) /* first escape mode, level is offset */ + if (mode < 2) { + /* first escape mode, level is offset */ level += max_level[intra][*last][*run]; - else /* second escape mode, run is offset */ + } else { + /* second escape mode, run is offset */ *run += max_run[intra][*last][level] + 1; + } + + /* Update bitstream position */ + BitstreamSkip(bs, 7 + skip[mode] + reverse_event->len + 1); - return BitstreamGetBits(bs, 1) ? -level : level; + return (GET_BITS(cache, reverse_event->len+1)&0x01) ? -level : level; } /* third escape mode - fixed length codes */ - BitstreamSkip(bs, 2); - *last = BitstreamGetBits(bs, 1); - *run = BitstreamGetBits(bs, 6); - BitstreamSkip(bs, 1); /* marker */ - level = BitstreamGetBits(bs, 12); - BitstreamSkip(bs, 1); /* marker */ + cache <<= 2; + *last = GET_BITS(cache, 1); + *run = (GET_BITS(cache, 7)&0x3f); + level = (GET_BITS(cache, 20)&0xfff); + + /* Update bitstream position */ + BitstreamSkip(bs, 30); return (level << 20) >> 20; error: - *run = VLC_ERROR; + *run = 64; return 0; } @@ -1213,22 +1087,25 @@ { const uint16_t *scan = scan_tables[direction]; - int level, run, last; + int level, run, last = 0; do { level = get_coeff(bs, &run, &last, 1, 0); - if (run == -1) { - DPRINTF(DPRINTF_ERROR,"fatal: invalid run"); + coeff += run; + if (coeff & ~63) { + DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index"); break; } - coeff += run; + block[scan[coeff]] = level; - DPRINTF(DPRINTF_COEFF,"block[%i] %i", scan[coeff], level); - //DPRINTF(DPRINTF_COEFF,"block[%i] %i %08x", scan[coeff], level, BitstreamShowBits(bs, 32)); + DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i\n", scan[coeff], level); +#if 0 + DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i %08x\n", scan[coeff], level, BitstreamShowBits(bs, 32)); +#endif if (level < -2047 || level > 2047) { - DPRINTF(DPRINTF_ERROR,"warning: intra_overflow %i", level); + DPRINTF(XVID_DEBUG_ERROR,"warning: intra_overflow %i\n", level); } coeff++; } while (!last); @@ -1236,320 +1113,85 @@ } void -get_inter_block(Bitstream * bs, - int16_t * block, - int direction) +get_inter_block_h263( + Bitstream * bs, + int16_t * block, + int direction, + const int quant, + const uint16_t *matrix) { const uint16_t *scan = scan_tables[direction]; + const uint16_t quant_m_2 = quant << 1; + const uint16_t quant_add = (quant & 1 ? quant : quant - 1); int p; int level; int run; - int last; + int last = 0; p = 0; do { level = get_coeff(bs, &run, &last, 0, 0); - if (run == -1) { - DPRINTF(DPRINTF_ERROR,"fatal: invalid run"); + p += run; + if (p & ~63) { + DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index"); break; } - p += run; - - block[scan[p]] = level; - DPRINTF(DPRINTF_COEFF,"block[%i] %i", scan[p], level); - // DPRINTF(DPRINTF_COEFF,"block[%i] %i %08x", scan[p], level, BitstreamShowBits(bs, 32)); - - if (level < -2047 || level > 2047) { - DPRINTF(DPRINTF_ERROR,"warning: inter overflow %i", level); - } + if (level < 0) { + level = level*quant_m_2 - quant_add; + block[scan[p]] = (level >= -2048 ? level : -2048); + } else { + level = level * quant_m_2 + quant_add; + block[scan[p]] = (level <= 2047 ? level : 2047); + } p++; } while (!last); - } - - - - - - -/************************************************************************ - * Trellis based R-D optimal quantization * - * not really "bitstream" or "mbcoding" related, but needs VLC tables * - * * - ************************************************************************/ - - -int __inline -RunLevel_CalcBits_inter(const int16_t run, int16_t level) -{ - const int esc_length = 30; - - if (!((level+32) & -64)) - return coeff_VLC[0][0][level+32][run].len; - else - return esc_length; -} - -int __inline -RunLevelLast_CalcBits_inter(const int16_t run, const int16_t level) -{ - const int esc_length = 30; - - if (!((level+32) & -64)) - return coeff_VLC[0][1][level+32][run].len; - else - return esc_length; -} - - -int __inline -RunLevel_CalcBits_intra(const int16_t run, int16_t level) -{ - const int esc_length = 30; - int bits; - - level = abs(level); - if (!(level & -64)) { - bits = coeff_VLC[1][0][level][run].len; - if (bits!=128) - return bits; - } - return esc_length; -} - -int __inline -RunLevelLast_CalcBits_intra(const int16_t run, int16_t level) -{ - const int esc_length = 30; - int bits; - - level = abs(level); - if (!(level & -64)) { - bits = coeff_VLC[1][1][level][run].len; - if (bits!=128) - return bits; - } - return esc_length; -} - -/* based on ffmpeg's trellis quant, thanks! */ -/* (C) 2003 Michael Niedermayer */ - -int -dct_quantize_trellis_inter_h263_c (int16_t *qcoeff, const int16_t *data, int quant) +void +get_inter_block_mpeg( + Bitstream * bs, + int16_t * block, + int direction, + const int quant, + const uint16_t *matrix) { + const uint16_t *scan = scan_tables[direction]; + uint32_t sum = 0; + int p; + int level; + int run; + int last = 0; -/* input: original quantized DCT coefficients (to calc distorion)*/ -/* already quantized DCT coefficients */ -/* quantizer */ -/* output: modified table of quantized DCT coefficients */ - -/* maybe combining quantize&Trellis would be faster (even that it disables MMX quant) */ - - int run_tab[65]; - int level_tab[65]; - int score_tab[65]; - int last_run = 0; - int last_level = 0; - int last_score = 0; - int last_i = 0; - int coeff[64]; - int coeff_count[64]; /* is a table useful for this 0-1 (or 1-2) table? */ - int last_non_zero, i; - - const uint16_t *const zigzag = &scan_tables[0][0]; - /* ordinary zigzag order, so it's not INTERLACE compatible, yet */ - - const int qmul = 2*quant; - const int qadd = ((quant-1)|1); - -/* quant is not needed anymore after this */ - - int score_limit = 0; - int left_limit = 0; - - const int lambda = (quant * quant * 123 + 64) >> 7; // default lagrangian - -/* control lambda through a ENVIRONMENT variable (for automatic optmization) */ - -/* - const int lfact=123; // better control of the lagrangian lambda - int lambda = (quant * quant * 123 + 64) >> 7; // default lagrangian - - const char * const trellis_lambda = getenv("TRELLIS_LAMBDA"); - if(trellis_lambda) - lfact = atoi(trellis_lambda); - if (lfact < 1) - lfact = 123; // why this value? Who knows? But 123 seems better than 109 = 0.85<<7 - - lambda = (quant * quant * lfact + 64) >> 7; // lagrangian -*/ - - last_non_zero = -1; - for (i = 0; i < 64; i++) - { - const int level = qcoeff[zigzag[i]]; - - if (level) { - last_non_zero = i; - - if (level>0) { - if (level==1) { - coeff[i] = 1; - coeff_count[i] = 0; - } else { - coeff[i] = level; - coeff_count[i] = 1; - } - } else { - if (level==-1) { - coeff[i] = -1; - coeff_count[i] = 0; - } else { - coeff[i] = level+1; // because we check coeff[i] and coeff[i]-1 - coeff_count[i] = 1; - } - } - } else { - coeff[i] = ((data[zigzag[i]]>>31)|1); /* +- 1 because of gap */ - coeff_count[i] = 0; - } - } - - if (last_non_zero < 0) - return last_non_zero; - - score_tab[0] = 0; - - for (i = 0; i <= last_non_zero; i++) { - int level, run, j; - const int dct_coeff = data[zigzag[i]]; - const int zero_distortion = dct_coeff * dct_coeff; - int best_score = 256 * 256 * 256 * 120; - - int distortion; - int dequant_err; - - last_score += zero_distortion; - - -/****************** level loop unrolled: first check coeff[i] *********/ - level = coeff[i]; - - if (level > 0) // coeff[i]==0 is not possible here - dequant_err = level * qmul + qadd - dct_coeff; - else - dequant_err = level * qmul - qadd - dct_coeff; - - distortion = dequant_err*dequant_err; - - for (run = 0; run <= i - left_limit; run++) { - - int score = distortion + lambda*RunLevel_CalcBits_inter(run, level) + score_tab[i - run]; - - if (score < best_score) - { - best_score = score_tab[i + 1] = score; - run_tab[i + 1] = run; - level_tab[i + 1] = level; - } - } - - for (run = 0; run <= i - left_limit; run++) { - int score = distortion + lambda*RunLevelLast_CalcBits_inter(run, level) + score_tab[i - run]; - - if (score < last_score) - { - last_score = score; - last_run = run; - last_level = level; - last_i = i + 1; + p = 0; + do { + level = get_coeff(bs, &run, &last, 0, 0); + p += run; + if (p & ~63) { + DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index"); + break; } - } - -/****************** level loop unrolled: if possible, check coeff[i]-1 *********/ - - if (coeff_count[i]) { - level--; - dequant_err -= qmul; - distortion = dequant_err*dequant_err; - - for (run = 0; run <= i - left_limit; run++) { - int score = distortion + lambda*RunLevel_CalcBits_inter(run, level) + score_tab[i-run]; - - if (score < best_score) - { - best_score = score_tab[i + 1] = score; - run_tab[i + 1] = run; - level_tab[i + 1] = level; - } + if (level < 0) { + level = ((2 * -level + 1) * matrix[scan[p]] * quant) >> 4; + block[scan[p]] = (level <= 2048 ? -level : -2048); + } else { + level = ((2 * level + 1) * matrix[scan[p]] * quant) >> 4; + block[scan[p]] = (level <= 2047 ? level : 2047); } - for (run = 0; run <= i - left_limit; run++) { - int score = distortion + lambda*RunLevelLast_CalcBits_inter(run, level) + score_tab[i-run]; - - if (score < last_score) - { - last_score = score; - last_run = run; - last_level = level; - last_i = i + 1; - } - - } - } // of check coeff[i]-1 - - -/****************** checking coeff[i]-2 doesn't isn't supported *********/ - -/****************** add distorsion for higher RUN (-> coeff[i]==0) *******/ - for (j = left_limit; j <= i; j++) - score_tab[j] += zero_distortion; - - score_limit += zero_distortion; - - if (score_tab[i + 1] < score_limit) - score_limit = score_tab[i + 1]; - - // there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level - // so we finalize only if we have no chance of getting lower than score_limit + 1*lambda anymore - - while (score_tab[left_limit] > score_limit + lambda) - left_limit++; - - - } // end of (i=0;i<=last_non_zero;i++) - - last_non_zero = last_i - 1; - if (last_non_zero < 0) - return last_non_zero; - - i = last_i; - - memset(qcoeff,0x00,64*sizeof(int16_t)); - - qcoeff[zigzag[last_non_zero]] = last_level; - i -= last_run + 1; - - for (; i > 0; i -= run_tab[i] + 1) - { - qcoeff[zigzag[i-1]] = level_tab[i]; - } + sum ^= block[scan[p]]; + + p++; + } while (!last); - return last_non_zero; + /* mismatch control */ + if ((sum & 1) == 0) { + block[63] ^= 1; + } } -int -dct_quantize_trellis_inter_mpeg_c (int16_t *qcoeff, const int16_t *data, int quant) -{ return 64; } - - - - /***************************************************************************** * VLC tables and other constant arrays @@ -1835,7 +1477,7 @@ 0, 0, 0, 0, 0, 0, 0, 0, }, /* intra = 0, last = 1 */ - { + { 0, 40, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -1879,9 +1521,9 @@ VLC sprite_trajectory_code[32768]; VLC sprite_trajectory_len[15] = { - { 0x00 , 2}, - { 0x02 , 3}, { 0x03, 3}, { 0x04, 3}, { 0x05, 3}, { 0x06, 3}, - { 0x0E , 4}, { 0x1E, 5}, { 0x3E, 6}, { 0x7F, 7}, { 0xFE, 8}, + { 0x00 , 2}, + { 0x02 , 3}, { 0x03, 3}, { 0x04, 3}, { 0x05, 3}, { 0x06, 3}, + { 0x0E , 4}, { 0x1E, 5}, { 0x3E, 6}, { 0x7E, 7}, { 0xFE, 8}, { 0x1FE, 9}, {0x3FE,10}, {0x7FE,11}, {0xFFE,12} }; @@ -2327,14 +1969,14 @@ }; short const dc_threshold[] = { - 21514, 26984, 8307, 28531, 29798, 24951, 25970, 26912, - 8307, 25956, 26994, 25974, 8292, 29286, 28015, 29728, - 25960, 18208, 21838, 18208, 19536, 22560, 26998, 8260, - 28515, 25956, 8291, 25640, 30309, 27749, 11817, 22794, - 30063, 8306, 28531, 29798, 24951, 25970, 25632, 29545, - 29300, 25193, 29813, 29295, 26656, 29537, 29728, 8303, - 26983, 25974, 24864, 25443, 29541, 8307, 28532, 26912, - 29556, 29472, 30063, 25458, 8293, 28515, 25956, 2606 + 26708, 29545, 29472, 26223, 30580, 29281, 8293, 29545, + 25632, 29285, 30313, 25701, 26144, 28530, 8301, 26740, + 8293, 20039, 8277, 20551, 8268, 30296, 17513, 25376, + 25711, 25445, 10272, 11825, 11825, 10544, 2606, 28505, + 29301, 29472, 26223, 30580, 29281, 8293, 26980, 29811, + 26994, 30050, 28532, 8306, 24936, 8307, 28532, 26400, + 30313, 8293, 25441, 25955, 29555, 29728, 8303, 29801, + 8307, 28531, 29301, 25955, 25376, 25711, 11877, 10 }; VLC const dc_lum_tab[] = {