--- mbcoding.c 2003/02/21 08:32:34 1.44 +++ mbcoding.c 2003/04/27 19:45:52 1.44.2.6 @@ -51,6 +51,8 @@ #include #include +#include + #include "../portab.h" #include "../global.h" #include "bitstream.h" @@ -498,7 +500,7 @@ { if ((level = qcoeff[zigzag[i++]]) != 0) { - abs_level = ABS(prev_level); + abs_level = abs(prev_level); abs_level = abs_level < 64 ? abs_level : 0; code = coeff_VLC[1][0][abs_level][prev_run].code; len = coeff_VLC[1][0][abs_level][prev_run].len; @@ -518,7 +520,7 @@ run++; } - abs_level = ABS(prev_level); + abs_level = abs(prev_level); abs_level = abs_level < 64 ? abs_level : 0; code = coeff_VLC[1][1][abs_level][prev_run].code; len = coeff_VLC[1][1][abs_level][prev_run].len; @@ -559,7 +561,7 @@ { if ((level = qcoeff[zigzag[i++]]) != 0) { - abs_level = ABS(prev_level); + abs_level = abs(prev_level); abs_level = abs_level < 64 ? abs_level : 0; len = coeff_VLC[1][0][abs_level][prev_run].len; bits += len!=128 ? len : 30; @@ -572,7 +574,7 @@ run++; } - abs_level = ABS(prev_level); + abs_level = abs(prev_level); abs_level = abs_level < 64 ? abs_level : 0; len = coeff_VLC[1][1][abs_level][prev_run].len; bits += len!=128 ? len : 30; @@ -627,6 +629,13 @@ #endif + +static int iDQtab[5] = { + 1, 0, -1 /* no change */, 2, 3 +}; +#define DQ_VALUE2INDEX(value) iDQtab[(value)+2] + + static __inline void CodeBlockIntra(const FRAMEINFO * const frame, const MACROBLOCK * pMB, @@ -657,14 +666,14 @@ BitstreamPutBits(bs, 0, 1); // write cbpy - BitstreamPutBits(bs, cbpy_tab[cbpy].code, cbpy_tab[cbpy].len); + BitstreamPutBits(bs, xvid_cbpy_tab[cbpy].code, xvid_cbpy_tab[cbpy].len); // write dquant if (pMB->mode == MODE_INTRA_Q) - BitstreamPutBits(bs, pMB->dquant, 2); + BitstreamPutBits(bs, DQ_VALUE2INDEX(pMB->dquant), 2); // write interlacing - if (frame->global_flags & XVID_INTERLACING) { + if (frame->vol_flags & XVID_VOL_INTERLACING) { BitstreamPutBit(bs, pMB->field_dct); } // code block coeffs @@ -678,7 +687,7 @@ if (pMB->cbp & (1 << (5 - i))) { const uint16_t *scan_table = - frame->global_flags & XVID_ALTERNATESCAN ? + frame->vop_flags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : scan_tables[pMB->acpred_directions[i]]; bits = BitstreamPos(bs); @@ -719,14 +728,14 @@ BitstreamPutBit(bs, pMB->mcsel); // mcsel: '0'=local motion, '1'=GMC // write cbpy - BitstreamPutBits(bs, cbpy_tab[cbpy].code, cbpy_tab[cbpy].len); + BitstreamPutBits(bs, xvid_cbpy_tab[cbpy].code, xvid_cbpy_tab[cbpy].len); // write dquant if (pMB->mode == MODE_INTER_Q) - BitstreamPutBits(bs, pMB->dquant, 2); + BitstreamPutBits(bs, DQ_VALUE2INDEX(pMB->dquant), 2); // interlacing - if (frame->global_flags & XVID_INTERLACING) { + if (frame->vol_flags & XVID_VOL_INTERLACING) { if (pMB->cbp) { BitstreamPutBit(bs, pMB->field_dct); DPRINTF(DPRINTF_MB,"codep: field_dct: %i", pMB->field_dct); @@ -758,7 +767,7 @@ if (pMB->cbp & (1 << (5 - i))) { const uint16_t *scan_table = - frame->global_flags & XVID_ALTERNATESCAN ? + frame->vop_flags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : scan_tables[0]; #ifdef BIGLUT @@ -1052,7 +1061,7 @@ return data; res = BitstreamGetBits(bs, fcode - 1); - mv = ((ABS(data) - 1) * scale_fac) + res + 1; + mv = ((abs(data) - 1) * scale_fac) + res + 1; return data < 0 ? -mv : mv; @@ -1260,6 +1269,305 @@ } + + + + + + +/************************************************************************ + * Trellis based R-D optimal quantization * + * not really "bitstream" or "mbcoding" related, but needs VLC tables * + * * + ************************************************************************/ + + +int __inline +RunLevel_CalcBits_inter(const int16_t run, int16_t level) +{ + const int esc_length = 30; + + if (!((level+32) & -64)) + return coeff_VLC[0][0][level+32][run].len; + else + return esc_length; +} + +int __inline +RunLevelLast_CalcBits_inter(const int16_t run, const int16_t level) +{ + const int esc_length = 30; + + if (!((level+32) & -64)) + return coeff_VLC[0][1][level+32][run].len; + else + return esc_length; +} + + +int __inline +RunLevel_CalcBits_intra(const int16_t run, int16_t level) +{ + const int esc_length = 30; + int bits; + + level = abs(level); + if (!(level & -64)) { + bits = coeff_VLC[1][0][level][run].len; + if (bits!=128) + return bits; + } + return esc_length; +} + +int __inline +RunLevelLast_CalcBits_intra(const int16_t run, int16_t level) +{ + const int esc_length = 30; + int bits; + + level = abs(level); + if (!(level & -64)) { + bits = coeff_VLC[1][1][level][run].len; + if (bits!=128) + return bits; + } + return esc_length; +} + +/* based on ffmpeg's trellis quant, thanks! */ +/* (C) 2003 Michael Niedermayer */ + +int +dct_quantize_trellis_inter_h263_c (int16_t *qcoeff, const int16_t *data, int quant) +{ + +/* input: original quantized DCT coefficients (to calc distorion)*/ +/* already quantized DCT coefficients */ +/* quantizer */ +/* output: modified table of quantized DCT coefficients */ + +/* maybe combining quantize&Trellis would be faster (even that it disables MMX quant) */ + + int run_tab[65]; + int level_tab[65]; + int score_tab[65]; + int last_run = 0; + int last_level = 0; + int last_score = 0; + int last_i = 0; + int coeff[64]; + int coeff_count[64]; /* is a table useful for this 0-1 (or 1-2) table? */ + int last_non_zero, i; + + const uint16_t *const zigzag = &scan_tables[0][0]; + /* ordinary zigzag order, so it's not INTERLACE compatible, yet */ + + const int qmul = 2*quant; + const int qadd = ((quant-1)|1); + +/* quant is not needed anymore after this */ + + int score_limit = 0; + int left_limit = 0; + + const int lambda = (quant * quant * 123 + 64) >> 7; // default lagrangian + +/* control lambda through a ENVIRONMENT variable (for automatic optmization) */ + +/* + const int lfact=123; // better control of the lagrangian lambda + int lambda = (quant * quant * 123 + 64) >> 7; // default lagrangian + + const char * const trellis_lambda = getenv("TRELLIS_LAMBDA"); + if(trellis_lambda) + lfact = atoi(trellis_lambda); + if (lfact < 1) + lfact = 123; // why this value? Who knows? But 123 seems better than 109 = 0.85<<7 + + lambda = (quant * quant * lfact + 64) >> 7; // lagrangian +*/ + + last_non_zero = -1; + for (i = 0; i < 64; i++) + { + const int level = qcoeff[zigzag[i]]; + + if (level) { + last_non_zero = i; + + if (level>0) { + if (level==1) { + coeff[i] = 1; + coeff_count[i] = 0; + } else { + coeff[i] = level; + coeff_count[i] = 1; + } + } else { + if (level==-1) { + coeff[i] = -1; + coeff_count[i] = 0; + } else { + coeff[i] = level+1; // because we check coeff[i] and coeff[i]-1 + coeff_count[i] = 1; + } + } + } else { + coeff[i] = ((data[zigzag[i]]>>31)|1); /* +- 1 because of gap */ + coeff_count[i] = 0; + } + } + + if (last_non_zero < 0) + return last_non_zero; + + score_tab[0] = 0; + + for (i = 0; i <= last_non_zero; i++) { + int level, run, j; + const int dct_coeff = data[zigzag[i]]; + const int zero_distortion = dct_coeff * dct_coeff; + int best_score = 256 * 256 * 256 * 120; + + int distortion; + int dequant_err; + + last_score += zero_distortion; + + +/****************** level loop unrolled: first check coeff[i] *********/ + level = coeff[i]; + + if (level > 0) // coeff[i]==0 is not possible here + dequant_err = level * qmul + qadd - dct_coeff; + else + dequant_err = level * qmul - qadd - dct_coeff; + + distortion = dequant_err*dequant_err; + + for (run = 0; run <= i - left_limit; run++) { + + int score = distortion + lambda*RunLevel_CalcBits_inter(run, level) + score_tab[i - run]; + + if (score < best_score) + { + best_score = score_tab[i + 1] = score; + run_tab[i + 1] = run; + level_tab[i + 1] = level; + } + } + + for (run = 0; run <= i - left_limit; run++) { + int score = distortion + lambda*RunLevelLast_CalcBits_inter(run, level) + score_tab[i - run]; + + if (score < last_score) + { + last_score = score; + last_run = run; + last_level = level; + last_i = i + 1; + } + } + +/****************** level loop unrolled: if possible, check coeff[i]-1 *********/ + + if (coeff_count[i]) { + + level--; + dequant_err -= qmul; + distortion = dequant_err*dequant_err; + + for (run = 0; run <= i - left_limit; run++) { + int score = distortion + lambda*RunLevel_CalcBits_inter(run, level) + score_tab[i-run]; + + if (score < best_score) + { + best_score = score_tab[i + 1] = score; + run_tab[i + 1] = run; + level_tab[i + 1] = level; + } + } + + for (run = 0; run <= i - left_limit; run++) { + int score = distortion + lambda*RunLevelLast_CalcBits_inter(run, level) + score_tab[i-run]; + + if (score < last_score) + { + last_score = score; + last_run = run; + last_level = level; + last_i = i + 1; + } + + } + } // of check coeff[i]-1 + + +/****************** checking coeff[i]-2 doesn't isn't supported *********/ + +/****************** add distorsion for higher RUN (-> coeff[i]==0) *******/ + for (j = left_limit; j <= i; j++) + score_tab[j] += zero_distortion; + + score_limit += zero_distortion; + + if (score_tab[i + 1] < score_limit) + score_limit = score_tab[i + 1]; + + // there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level + // so we finalize only if we have no chance of getting lower than score_limit + 1*lambda anymore + + while (score_tab[left_limit] > score_limit + lambda) + left_limit++; + + + } // end of (i=0;i<=last_non_zero;i++) + + + last_score = 256 * 256 * 256 * 120; + for (i = left_limit; i <= last_non_zero + 1; i++) + { + int score = score_tab[i]; + if (i) + score += 2*lambda; /* coded block means 2 extra bits (roughly) */ + + if (score < last_score) + { + last_score = score; + last_i = i; + last_level = level_tab[i]; + last_run = run_tab[i]; + } + } + + last_non_zero = last_i - 1; + if (last_non_zero < 0) + return last_non_zero; + + i = last_i; + + memset(qcoeff,0x00,64*sizeof(int16_t)); + + qcoeff[zigzag[last_non_zero]] = last_level; + i -= last_run + 1; + + for (; i > 0; i -= run_tab[i] + 1) + { + qcoeff[zigzag[i-1]] = level_tab[i]; + } + + return last_non_zero; +} + +int +dct_quantize_trellis_inter_mpeg_c (int16_t *qcoeff, const int16_t *data, int quant) +{ return 64; } + + + + + /***************************************************************************** * VLC tables and other constant arrays ****************************************************************************/ @@ -1618,7 +1926,7 @@ {5, 6}, {5, 9}, {5, 8}, {3, 7}, {2, 9} }; -const VLC cbpy_tab[16] = { +const VLC xvid_cbpy_tab[16] = { {3, 4}, {5, 5}, {4, 5}, {9, 4}, {3, 5}, {7, 4}, {2, 6}, {11, 4}, {2, 5}, {3, 6}, {5, 4}, {10, 4}, {4, 4}, {8, 4}, {6, 4}, {3, 2} };