--- mbcoding.c 2003/10/03 13:47:00 1.44.2.15 +++ mbcoding.c 2009/11/10 14:06:58 1.58 @@ -19,7 +19,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: mbcoding.c,v 1.44.2.15 2003/10/03 13:47:00 syskin Exp $ + * $Id: mbcoding.c,v 1.58 2009/11/10 14:06:58 Isibaar Exp $ * ****************************************************************************/ @@ -36,23 +36,19 @@ #include "../utils/mbfunctions.h" -/* #define BIGLUT */ +#ifdef _DEBUG +# include "../motion/estimation.h" +# include "../motion/motion_inlines.h" +# include +#endif + -#ifdef BIGLUT -#define LEVELOFFSET 2048 -#else #define LEVELOFFSET 32 -#endif +/* Initialized once during xvid_global call + * RO access is thread safe */ static REVERSE_EVENT DCT3D[2][4096]; - -#ifdef BIGLUT -static VLC coeff_VLC[2][2][4096][64]; -VLC *intra_table; -static VLC *inter_table; -#else static VLC coeff_VLC[2][2][64][64]; -#endif /* not really MB related, but VLCs are only available here */ void bs_put_spritetrajectory(Bitstream * bs, const int val) @@ -91,41 +87,26 @@ uint32_t i, j, k, intra, last, run, run_esc, level, level_esc, escape, escape_len, offset; int32_t l; -#ifdef BIGLUT - intra_table = coeff_VLC[1]; - inter_table = coeff_VLC[0]; -#endif - - for (intra = 0; intra < 2; intra++) for (i = 0; i < 4096; i++) DCT3D[intra][i].event.level = 0; - for (intra = 0; intra < 2; intra++) - for (last = 0; last < 2; last++) - { - for (run = 0; run < 63 + last; run++) - for (level = 0; level < (uint32_t)(32 << intra); level++) - { -#ifdef BIGLUT - offset = LEVELOFFSET; -#else + for (intra = 0; intra < 2; intra++) { + for (last = 0; last < 2; last++) { + for (run = 0; run < 63 + last; run++) { + for (level = 0; level < (uint32_t)(32 << intra); level++) { offset = !intra * LEVELOFFSET; -#endif coeff_VLC[intra][last][level + offset][run].len = 128; } + } } + } - for (intra = 0; intra < 2; intra++) - for (i = 0; i < 102; i++) - { -#ifdef BIGLUT - offset = LEVELOFFSET; -#else + for (intra = 0; intra < 2; intra++) { + for (i = 0; i < 102; i++) { offset = !intra * LEVELOFFSET; -#endif - for (j = 0; j < (uint32_t)(1 << (12 - coeff_tab[intra][i].vlc.len)); j++) - { + + for (j = 0; j < (uint32_t)(1 << (12 - coeff_tab[intra][i].vlc.len)); j++) { DCT3D[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].len = coeff_tab[intra][i].vlc.len; DCT3D[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].event = coeff_tab[intra][i].event; } @@ -134,58 +115,39 @@ = coeff_tab[intra][i].vlc.code << 1; coeff_VLC[intra][coeff_tab[intra][i].event.last][coeff_tab[intra][i].event.level + offset][coeff_tab[intra][i].event.run].len = coeff_tab[intra][i].vlc.len + 1; -#ifndef BIGLUT - if (!intra) -#endif - { + + if (!intra) { coeff_VLC[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].code = (coeff_tab[intra][i].vlc.code << 1) | 1; coeff_VLC[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].len = coeff_tab[intra][i].vlc.len + 1; } } + } + + for (intra = 0; intra < 2; intra++) { + for (last = 0; last < 2; last++) { + for (run = 0; run < 63 + last; run++) { + for (level = 1; level < (uint32_t)(32 << intra); level++) { - for (intra = 0; intra < 2; intra++) - for (last = 0; last < 2; last++) - for (run = 0; run < 63 + last; run++) - { - for (level = 1; level < (uint32_t)(32 << intra); level++) - { if (level <= max_level[intra][last][run] && run <= max_run[intra][last][level]) continue; -#ifdef BIGLUT - offset = LEVELOFFSET; -#else offset = !intra * LEVELOFFSET; -#endif level_esc = level - max_level[intra][last][run]; run_esc = run - 1 - max_run[intra][last][level]; - /*use this test to use shorter esc2 codes when possible - if (level_esc <= max_level[intra][last][run] && run <= max_run[intra][last][level_esc] - && !(coeff_VLC[intra][last][level_esc + offset][run].len + 7 + 1 - > coeff_VLC[intra][last][level + offset][run_esc].code + 7 + 2))*/ - if (level_esc <= max_level[intra][last][run] && run <= max_run[intra][last][level_esc]) - { + if (level_esc <= max_level[intra][last][run] && run <= max_run[intra][last][level_esc]) { escape = ESCAPE1; escape_len = 7 + 1; run_esc = run; - } - else - { - if (run_esc <= max_run[intra][last][level] && level <= max_level[intra][last][run_esc]) - { + } else { + if (run_esc <= max_run[intra][last][level] && level <= max_level[intra][last][run_esc]) { escape = ESCAPE2; escape_len = 7 + 2; level_esc = level; - } - else - { -#ifndef BIGLUT - if (!intra) -#endif - { + } else { + if (!intra) { coeff_VLC[intra][last][level + offset][run].code = (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((level & 0xfff) << 1) | 1; coeff_VLC[intra][last][level + offset][run].len = 30; @@ -202,10 +164,8 @@ | coeff_VLC[intra][last][level_esc + offset][run_esc].code; coeff_VLC[intra][last][level + offset][run].len = coeff_VLC[intra][last][level_esc + offset][run_esc].len + escape_len; -#ifndef BIGLUT - if (!intra) -#endif - { + + if (!intra) { coeff_VLC[intra][last][offset - level][run].code = (escape << coeff_VLC[intra][last][level_esc + offset][run_esc].len) | coeff_VLC[intra][last][level_esc + offset][run_esc].code | 1; @@ -214,43 +174,29 @@ } } -#ifdef BIGLUT - for (level = 32 << intra; level < 2048; level++) - { - coeff_VLC[intra][last][level + offset][run].code - = (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((level & 0xfff) << 1) | 1; - coeff_VLC[intra][last][level + offset][run].len = 30; - - coeff_VLC[intra][last][offset - level][run].code - = (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((-level & 0xfff) << 1) | 1; - coeff_VLC[intra][last][offset - level][run].len = 30; - } -#else - if (!intra) - { + if (!intra) { coeff_VLC[intra][last][0][run].code = (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((-32 & 0xfff) << 1) | 1; coeff_VLC[intra][last][0][run].len = 30; } -#endif } -/* init sprite_trajectory tables */ -/* even if GMC is not specified (it might be used later...) */ + } + } + + /* init sprite_trajectory tables + * even if GMC is not specified (it might be used later...) */ sprite_trajectory_code[0+16384].code = 0; sprite_trajectory_code[0+16384].len = 0; - for (k=0;k<14;k++) - { + for (k=0;k<14;k++) { int limit = (1< (cmp - 1)) value -= 64 * scale_factor; - pStat->iMvSum += value * value; - pStat->iMvCount++; - if (value == 0) { BitstreamPutBits(bs, mb_motion_table[32].code, mb_motion_table[32].len); @@ -312,92 +254,6 @@ } -#ifdef BIGLUT - -static __inline void -CodeCoeff(Bitstream * bs, - const int16_t qcoeff[64], - VLC * table, - const uint16_t * zigzag, - uint16_t intra) -{ - - uint32_t j, last; - short v; - VLC *vlc; - - j = intra; - last = intra; - - while (j < 64 && (v = qcoeff[zigzag[j]]) == 0) - j++; - - do { - vlc = table + 64 * 2048 + (v << 6) + j - last; - last = ++j; - - /* count zeroes */ - while (j < 64 && (v = qcoeff[zigzag[j]]) == 0) - j++; - - /* write code */ - if (j != 64) { - BitstreamPutBits(bs, vlc->code, vlc->len); - } else { - vlc += 64 * 4096; - BitstreamPutBits(bs, vlc->code, vlc->len); - break; - } - } while (1); - -} - - - -/* returns the number of bits required to encode qcoeff */ -int -CodeCoeff_CalcBits(const int16_t qcoeff[64], - VLC * table, - const uint16_t * zigzag, - uint16_t intra) -{ - int bits = 0; - uint32_t j, last; - short v; - VLC *vlc; - - j = intra; - last = intra; - - while (j < 64 && (v = qcoeff[zigzag[j]]) == 0) - j++; - - if (j >= 64) return 0; /* empty block */ - - do { - vlc = table + 64 * 2048 + (v << 6) + j - last; - last = ++j; - - /* count zeroes */ - while (j < 64 && (v = qcoeff[zigzag[j]]) == 0) - j++; - - /* write code */ - if (j != 64) { - bits += vlc->len; - } else { - vlc += 64 * 4096; - bits += vlc->len; - break; - } - } while (1); - - return bits; -} - - -#else - static __inline void CodeCoeffInter(Bitstream * bs, const int16_t qcoeff[64], @@ -602,11 +458,7 @@ return bits; } - -#endif - - -static int iDQtab[5] = { +static const int iDQtab[5] = { 1, 0, -1 /* no change */, 2, 3 }; #define DQ_VALUE2INDEX(value) iDQtab[(value)+2] @@ -668,11 +520,7 @@ bits = BitstreamPos(bs); -#ifdef BIGLUT - CodeCoeff(bs, &qcoeff[i * 64], intra_table, scan_table, 1); -#else CodeCoeffIntra(bs, &qcoeff[i * 64], scan_table); -#endif bits = BitstreamPos(bs) - bits; pStat->iTextBits += bits; @@ -718,39 +566,51 @@ } /* if inter block, write field ME flag */ - if (pMB->mode == MODE_INTER || pMB->mode == MODE_INTER_Q) { - BitstreamPutBit(bs, pMB->field_pred); + if ((pMB->mode == MODE_INTER || pMB->mode == MODE_INTER_Q) && (pMB->mcsel == 0)) { + BitstreamPutBit(bs, 0 /*pMB->field_pred*/); /* not implemented yet */ DPRINTF(XVID_DEBUG_MB,"codep: field_pred: %i\n", pMB->field_pred); /* write field prediction references */ +#if 0 /* Remove the #if once field_pred is supported */ if (pMB->field_pred) { BitstreamPutBit(bs, pMB->field_for_top); BitstreamPutBit(bs, pMB->field_for_bot); } +#endif } } + + bits = BitstreamPos(bs); + /* code motion vector(s) if motion is local */ if (!pMB->mcsel) for (i = 0; i < (pMB->mode == MODE_INTER4V ? 4 : 1); i++) { - CodeVector(bs, pMB->pmvs[i].x, frame->fcode, pStat); - CodeVector(bs, pMB->pmvs[i].y, frame->fcode, pStat); + CodeVector(bs, pMB->pmvs[i].x, frame->fcode); + CodeVector(bs, pMB->pmvs[i].y, frame->fcode); + +#if 0 /* #ifdef _DEBUG */ + if (i == 0) /* for simplicity */ { + int coded_length = BitstreamPos(bs) - bits; + int estimated_length = d_mv_bits(pMB->pmvs[i].x, pMB->pmvs[i].y, zeroMV, frame->fcode, 0); + assert(estimated_length == coded_length); + d_mv_bits(pMB->pmvs[i].x, pMB->pmvs[i].y, zeroMV, frame->fcode, 0); + } +#endif } + bits = BitstreamPos(bs) - bits; + pStat->iMVBits += bits; + bits = BitstreamPos(bs); /* code block coeffs */ for (i = 0; i < 6; i++) - if (pMB->cbp & (1 << (5 - i))) - { + if (pMB->cbp & (1 << (5 - i))) { const uint16_t *scan_table = frame->vop_flags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : scan_tables[0]; -#ifdef BIGLUT - CodeCoeff(bs, &qcoeff[i * 64], inter_table, scan_table, 0); -#else CodeCoeffInter(bs, &qcoeff[i * 64], scan_table); -#endif } bits = BitstreamPos(bs) - bits; @@ -768,6 +628,12 @@ if (frame->coding_type != I_VOP) BitstreamPutBit(bs, 0); /* not_coded */ + if (frame->vop_flags & XVID_VOP_GREYSCALE) { + pMB->cbp &= 0x3C; /* keep only bits 5-2 */ + qcoeff[4*64+0]=0; /* for INTRA DC value is saved */ + qcoeff[5*64+0]=0; + } + if (pMB->mode == MODE_INTRA || pMB->mode == MODE_INTRA_Q) CodeBlockIntra(frame, pMB, qcoeff, bs, pStat); else @@ -852,7 +718,7 @@ const uint16_t *scan_table = frame->vop_flags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : scan_tables[0]; - + int bits; /* ------------------------------------------------------------------ when a block is skipped it is decoded DIRECT(0,0) @@ -893,39 +759,41 @@ BitstreamPutBit(bs, 0 /*mb->field_pred*/); /* field ME not implemented */ /* write field prediction references */ - /* if (mb->field_pred) { +#if 0 /* Remove the #if once field_pred is supported */ + if (mb->field_pred) { BitstreamPutBit(bs, mb->field_for_top); BitstreamPutBit(bs, mb->field_for_bot); - }*/ + } +#endif } } + bits = BitstreamPos(bs); switch (mb->mode) { case MODE_INTERPOLATE: - CodeVector(bs, mb->pmvs[1].x, vcode, pStat); /* forward vector of interpolate mode */ - CodeVector(bs, mb->pmvs[1].y, vcode, pStat); + CodeVector(bs, mb->pmvs[1].x, vcode); /* forward vector of interpolate mode */ + CodeVector(bs, mb->pmvs[1].y, vcode); case MODE_BACKWARD: vcode = bcode; case MODE_FORWARD: - CodeVector(bs, mb->pmvs[0].x, vcode, pStat); - CodeVector(bs, mb->pmvs[0].y, vcode, pStat); + CodeVector(bs, mb->pmvs[0].x, vcode); + CodeVector(bs, mb->pmvs[0].y, vcode); break; case MODE_DIRECT: - CodeVector(bs, mb->pmvs[3].x, 1, pStat); /* fcode is always 1 for delta vector */ - CodeVector(bs, mb->pmvs[3].y, 1, pStat); /* prediction is always (0,0) */ + CodeVector(bs, mb->pmvs[3].x, 1); /* fcode is always 1 for delta vector */ + CodeVector(bs, mb->pmvs[3].y, 1); /* prediction is always (0,0) */ default: break; } + pStat->iMVBits += BitstreamPos(bs) - bits; + bits = BitstreamPos(bs); for (i = 0; i < 6; i++) { if (mb->cbp & (1 << (5 - i))) { -#ifdef BIGLUT - CodeCoeff(bs, &qcoeff[i * 64], inter_table, scan_tables[0], 0); -#else - CodeCoeffInter(bs, &qcoeff[i * 64], scan_tables[0]); -#endif + CodeCoeffInter(bs, &qcoeff[i * 64], scan_table); } } + pStat->iTextBits += BitstreamPos(bs) - bits; } @@ -1117,6 +985,8 @@ } +#define GET_BITS(cache, n) ((cache)>>(32-(n))) + static __inline int get_coeff(Bitstream * bs, int *run, @@ -1129,11 +999,13 @@ int32_t level; REVERSE_EVENT *reverse_event; + uint32_t cache = BitstreamShowBits(bs, 32); + if (short_video_header) /* inter-VLCs will be used for both intra and inter blocks */ intra = 0; - if (BitstreamShowBits(bs, 7) != ESCAPE) { - reverse_event = &DCT3D[intra][BitstreamShowBits(bs, 12)]; + if (GET_BITS(cache, 7) != ESCAPE) { + reverse_event = &DCT3D[intra][GET_BITS(cache, 12)]; if ((level = reverse_event->event.level) == 0) goto error; @@ -1141,31 +1013,35 @@ *last = reverse_event->event.last; *run = reverse_event->event.run; - BitstreamSkip(bs, reverse_event->len); + /* Don't forget to update the bitstream position */ + BitstreamSkip(bs, reverse_event->len+1); - return BitstreamGetBits(bs, 1) ? -level : level; + return (GET_BITS(cache, reverse_event->len+1)&0x01) ? -level : level; } - BitstreamSkip(bs, 7); + /* flush 7bits of cache */ + cache <<= 7; if (short_video_header) { /* escape mode 4 - H.263 type, only used if short_video_header = 1 */ - *last = BitstreamGetBit(bs); - *run = BitstreamGetBits(bs, 6); - level = BitstreamGetBits(bs, 8); + *last = GET_BITS(cache, 1); + *run = (GET_BITS(cache, 7) &0x3f); + level = (GET_BITS(cache, 15)&0xff); if (level == 0 || level == 128) DPRINTF(XVID_DEBUG_ERROR, "Illegal LEVEL for ESCAPE mode 4: %d\n", level); + /* We've "eaten" 22 bits */ + BitstreamSkip(bs, 22); + return (level << 24) >> 24; } - mode = BitstreamShowBits(bs, 2); + if ((mode = GET_BITS(cache, 2)) < 3) { + const int skip[3] = {1, 1, 2}; + cache <<= skip[mode]; - if (mode < 3) { - BitstreamSkip(bs, (mode == 2) ? 2 : 1); - - reverse_event = &DCT3D[intra][BitstreamShowBits(bs, 12)]; + reverse_event = &DCT3D[intra][GET_BITS(cache, 12)]; if ((level = reverse_event->event.level) == 0) goto error; @@ -1173,28 +1049,33 @@ *last = reverse_event->event.last; *run = reverse_event->event.run; - BitstreamSkip(bs, reverse_event->len); - - if (mode < 2) /* first escape mode, level is offset */ + if (mode < 2) { + /* first escape mode, level is offset */ level += max_level[intra][*last][*run]; - else /* second escape mode, run is offset */ + } else { + /* second escape mode, run is offset */ *run += max_run[intra][*last][level] + 1; + } + + /* Update bitstream position */ + BitstreamSkip(bs, 7 + skip[mode] + reverse_event->len + 1); - return BitstreamGetBits(bs, 1) ? -level : level; + return (GET_BITS(cache, reverse_event->len+1)&0x01) ? -level : level; } /* third escape mode - fixed length codes */ - BitstreamSkip(bs, 2); - *last = BitstreamGetBits(bs, 1); - *run = BitstreamGetBits(bs, 6); - BitstreamSkip(bs, 1); /* marker */ - level = BitstreamGetBits(bs, 12); - BitstreamSkip(bs, 1); /* marker */ + cache <<= 2; + *last = GET_BITS(cache, 1); + *run = (GET_BITS(cache, 7)&0x3f); + level = (GET_BITS(cache, 20)&0xfff); + + /* Update bitstream position */ + BitstreamSkip(bs, 30); return (level << 20) >> 20; error: - *run = VLC_ERROR; + *run = 64; return 0; } @@ -1206,15 +1087,16 @@ { const uint16_t *scan = scan_tables[direction]; - int level, run, last; + int level, run, last = 0; do { level = get_coeff(bs, &run, &last, 1, 0); - if (run == -1) { - DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run"); + coeff += run; + if (coeff & ~63) { + DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index"); break; } - coeff += run; + block[scan[coeff]] = level; DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i\n", scan[coeff], level); @@ -1231,37 +1113,83 @@ } void -get_inter_block(Bitstream * bs, - int16_t * block, - int direction) +get_inter_block_h263( + Bitstream * bs, + int16_t * block, + int direction, + const int quant, + const uint16_t *matrix) { const uint16_t *scan = scan_tables[direction]; + const uint16_t quant_m_2 = quant << 1; + const uint16_t quant_add = (quant & 1 ? quant : quant - 1); int p; int level; int run; - int last; + int last = 0; p = 0; do { level = get_coeff(bs, &run, &last, 0, 0); - if (run == -1) { - DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run"); + p += run; + if (p & ~63) { + DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index"); break; } - p += run; - block[scan[p]] = level; + if (level < 0) { + level = level*quant_m_2 - quant_add; + block[scan[p]] = (level >= -2048 ? level : -2048); + } else { + level = level * quant_m_2 + quant_add; + block[scan[p]] = (level <= 2047 ? level : 2047); + } + p++; + } while (!last); +} + +void +get_inter_block_mpeg( + Bitstream * bs, + int16_t * block, + int direction, + const int quant, + const uint16_t *matrix) +{ + const uint16_t *scan = scan_tables[direction]; + uint32_t sum = 0; + int p; + int level; + int run; + int last = 0; - DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i\n", scan[p], level); - /* DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i %08x\n", scan[p], level, BitstreamShowBits(bs, 32)); */ + p = 0; + do { + level = get_coeff(bs, &run, &last, 0, 0); + p += run; + if (p & ~63) { + DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index"); + break; + } - if (level < -2047 || level > 2047) { - DPRINTF(XVID_DEBUG_ERROR,"warning: inter overflow %i\n", level); + if (level < 0) { + level = ((2 * -level + 1) * matrix[scan[p]] * quant) >> 4; + block[scan[p]] = (level <= 2048 ? -level : -2048); + } else { + level = ((2 * level + 1) * matrix[scan[p]] * quant) >> 4; + block[scan[p]] = (level <= 2047 ? level : 2047); } + + sum ^= block[scan[p]]; + p++; } while (!last); + /* mismatch control */ + if ((sum & 1) == 0) { + block[63] ^= 1; + } } @@ -2041,14 +1969,14 @@ }; short const dc_threshold[] = { - 21514, 26984, 8307, 28531, 29798, 24951, 25970, 26912, - 8307, 25956, 26994, 25974, 8292, 29286, 28015, 29728, - 25960, 18208, 21838, 18208, 19536, 22560, 26998, 8260, - 28515, 25956, 8291, 25640, 30309, 27749, 11817, 22794, - 30063, 8306, 28531, 29798, 24951, 25970, 25632, 29545, - 29300, 25193, 29813, 29295, 26656, 29537, 29728, 8303, - 26983, 25974, 24864, 25443, 29541, 8307, 28532, 26912, - 29556, 29472, 30063, 25458, 8293, 28515, 25956, 2606 + 26708, 29545, 29472, 26223, 30580, 29281, 8293, 29545, + 25632, 29285, 30313, 25701, 26144, 28530, 8301, 26740, + 8293, 20039, 8277, 20551, 8268, 30296, 17513, 25376, + 25711, 25445, 10272, 11825, 11825, 10544, 2606, 28505, + 29301, 29472, 26223, 30580, 29281, 8293, 26980, 29811, + 26994, 30050, 28532, 8306, 24936, 8307, 28532, 26400, + 30313, 8293, 25441, 25955, 29555, 29728, 8303, 29801, + 8307, 28531, 29301, 25955, 25376, 25711, 11877, 10 }; VLC const dc_lum_tab[] = {