--- mbtransquant.c 2003/09/10 00:54:27 1.21.2.16 +++ mbtransquant.c 2003/11/23 17:01:08 1.21.2.19 @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: mbtransquant.c,v 1.21.2.16 2003/09/10 00:54:27 edgomez Exp $ + * $Id: mbtransquant.c,v 1.21.2.19 2003/11/23 17:01:08 edgomez Exp $ * ****************************************************************************/ @@ -39,11 +39,11 @@ #include "../bitstream/zigzag.h" #include "../dct/fdct.h" #include "../dct/idct.h" -#include "../quant/quant_mpeg4.h" -#include "../quant/quant_h263.h" +#include "../quant/quant.h" #include "../encoder.h" #include "../image/reduced.h" +#include "../quant/quant_matrix.h" MBFIELDTEST_PTR MBFieldTest; @@ -126,10 +126,10 @@ int mpeg; int scaler_lum, scaler_chr; - quanth263_intraFuncPtr const quant[2] = + quant_intraFuncPtr const quant[2] = { - (quanth263_intraFuncPtr)quant_intra, - (quanth263_intraFuncPtr)quant4_intra + quant_h263_intra, + quant_mpeg_intra }; mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT); @@ -157,10 +157,10 @@ int mpeg; int scaler_lum, scaler_chr; - quanth263_intraFuncPtr const dequant[2] = + quant_intraFuncPtr const dequant[2] = { - (quanth263_intraFuncPtr)dequant_intra, - (quanth263_intraFuncPtr)dequant4_intra + dequant_h263_intra, + dequant_mpeg_intra }; mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT); @@ -177,26 +177,13 @@ stop_iquant_timer(); } - -typedef int (*trellis_func_ptr_t)(int16_t *const Out, - const int16_t *const In, - int Q, - const uint16_t * const Zigzag, - int Non_Zero); - -static int -dct_quantize_trellis_h263_c(int16_t *const Out, - const int16_t *const In, - int Q, - const uint16_t * const Zigzag, - int Non_Zero); - -static int -dct_quantize_trellis_mpeg_c(int16_t *const Out, - const int16_t *const In, - int Q, - const uint16_t * const Zigzag, - int Non_Zero); +static int +dct_quantize_trellis_c(int16_t *const Out, + const int16_t *const In, + int Q, + const uint16_t * const Zigzag, + const uint16_t * const QuantMatrix, + int Non_Zero); /* Quantize all blocks -- Inter mode */ static __inline uint8_t @@ -214,16 +201,10 @@ int sum; int code_block, mpeg; - quanth263_interFuncPtr const quant[2] = - { - (quanth263_interFuncPtr)quant_inter, - (quanth263_interFuncPtr)quant4_inter - }; - - trellis_func_ptr_t const trellis[2] = + quant_interFuncPtr const quant[2] = { - (trellis_func_ptr_t)dct_quantize_trellis_h263_c, - (trellis_func_ptr_t)dct_quantize_trellis_mpeg_c + quant_h263_inter, + quant_mpeg_inter }; mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT); @@ -236,7 +217,21 @@ sum = quant[mpeg](&qcoeff[i*64], &data[i*64], pMB->quant); if(sum && (frame->vop_flags & XVID_VOP_TRELLISQUANT)) { - sum = trellis[mpeg](&qcoeff[i*64], &data[i*64], pMB->quant, &scan_tables[0][0], 63); + const static uint16_t h263matrix[] = + { + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16 + }; + sum = dct_quantize_trellis_c(&qcoeff[i*64], &data[i*64], + pMB->quant, &scan_tables[0][0], + (mpeg)?(uint16_t*)get_inter_matrix():h263matrix, + 63); } stop_quant_timer(); @@ -277,10 +272,10 @@ { int mpeg; - quanth263_interFuncPtr const dequant[2] = + quant_interFuncPtr const dequant[2] = { - (quanth263_interFuncPtr)dequant_inter, - (quanth263_interFuncPtr)dequant4_inter + dequant_h263_inter, + dequant_mpeg_inter }; mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT); @@ -362,6 +357,7 @@ uint32_t cst; int vop_reduced; const IMAGE * const pCurrent = &frame->image; + /* Array of function pointers, indexed by [vop_reduced<<1+add] */ transfer_operation_16to8_t * const functions[4] = { @@ -370,7 +366,7 @@ (transfer_operation_16to8_t*)copy_upsampled_8x8_16to8, (transfer_operation_16to8_t*)add_upsampled_8x8_16to8 }; - + transfer_operation_16to8_t *transfer_op = NULL; if (pMB->field_dct) { @@ -449,10 +445,8 @@ uint8_t cbp; uint32_t limit; - /* - * There is no MBTrans8to16 for Inter block, that's done in motion compensation - * already - */ + /* There is no MBTrans8to16 for Inter block, that's done in motion compensation + * already */ /* Perform DCT (and field decision) */ MBfDCT(pParam, frame, pMB, x_pos, y_pos, data); @@ -490,10 +484,8 @@ uint8_t cbp; uint32_t limit; - /* - * There is no MBTrans8to16 for Inter block, that's done in motion compensation - * already - */ + /* There is no MBTrans8to16 for Inter block, that's done in motion compensation + * already */ /* Perform DCT (and field decision) */ MBfDCT(pParam, frame, pMB, x_pos, y_pos, data); @@ -511,8 +503,8 @@ * History comment: * We don't have to DeQuant, iDCT and Transfer back data for B-frames. * - * BUT some plugins require the original frame to be passed so we have - * to take care of that here + * BUT some plugins require the rebuilt original frame to be passed so we + * have to take care of that here */ if((pParam->plugin_flags & XVID_REQORIGINAL)) { @@ -637,10 +629,6 @@ MOVLINE(LINE(3, 3), tmp); } - - - - /***************************************************************************** * Trellis based R-D optimal quantization * @@ -648,19 +636,6 @@ * ****************************************************************************/ - -#if 0 -static int -dct_quantize_trellis_mpeg_c(int16_t *const Out, - const int16_t *const In, - int Q, - const uint16_t * const Zigzag, - int Non_Zero) -{ - return 63; -} -#endif - /*---------------------------------------------------------------------------- * * Trellis-Based quantization @@ -674,7 +649,7 @@ * we are at stake with a simplified Bellmand-Ford / Dijkstra Single * Source Shorted Path algo. But due to the underlying graph structure * ("Trellis"), it can be turned into a dynamic programming algo, - * partially saving the explicit graph's nodes representation. And + * partially saving the explicit graph's nodes representation. And * without using a heap, since the open frontier of the DAG is always * known, and of fixed sized. *--------------------------------------------------------------------------*/ @@ -773,10 +748,17 @@ }; static const uint8_t * const B16_17_Code_Len_Last[6] = { /* levels [1..6] */ - Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1, + Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1, }; -#define TL(q) 0xfe00/(q*q) +/* TL_SHIFT controls the precision of the RD optimizations in trellis + * valid range is [10..16]. The bigger, the more trellis is vulnerable + * to overflows in cost formulas. + * - 10 allows ac values up to 2^11 == 2048 + * - 16 allows ac values up to 2^8 == 256 + */ +#define TL_SHIFT 11 +#define TL(q) ((0xfe00>>(16-TL_SHIFT))/(q*q)) static const int Trellis_Lambda_Tabs[31] = { TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7), @@ -806,10 +788,15 @@ return(sum); } -/* this routine has been strippen of all debug code */ -static int -dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero) +/* this routine has been strippen of all debug code */ +static int +dct_quantize_trellis_c(int16_t *const Out, + const int16_t *const In, + int Q, + const uint16_t * const Zigzag, + const uint16_t * const QuantMatrix, + int Non_Zero) { /* @@ -819,32 +806,35 @@ * Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps. */ typedef struct { int16_t Run, Level; } NODE; - + NODE Nodes[65], Last; uint32_t Run_Costs0[64+1]; uint32_t * const Run_Costs = Run_Costs0 + 1; - const int Mult = 2*Q; - const int Bias = (Q-1) | 1; - const int Lev0 = Mult + Bias; + const int Lambda = Trellis_Lambda_Tabs[Q-1]; /* it's 1/lambda, actually */ int Run_Start = -1; - uint32_t Min_Cost = 2<<16; + uint32_t Min_Cost = 2<>4); + const int Mult = 2*q; + const int Bias = (q-1) | 1; + const int Lev0 = Mult + Bias; + const int AC = In[Zigzag[i]]; const int Level1 = Out[Zigzag[i]]; - const int Dist0 = Lambda* AC*AC; + const unsigned int Dist0 = Lambda* AC*AC; uint32_t Best_Cost = 0xf0000000; Last_Cost += Dist0; @@ -862,13 +852,13 @@ dQ = Lev0 - AC; } Cost0 = Lambda*dQ*dQ; - + Nodes[i].Run = 1; - Best_Cost = (Code_Len20[0]<<16) + Run_Costs[i-1]+Cost0; + Best_Cost = (Code_Len20[0]<0; --Run) { const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run]; - const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<16); - const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<16); + const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<1) { dQ1 = Level1*Mult-AC + Bias; dQ2 = dQ1 - Mult; @@ -933,12 +923,12 @@ * (? doesn't seem to have any effect -- gruel ) */ - Cost1 = Cost_Base + (Tbl_L1[Run-1]<<16); - Cost2 = Cost_Base + (Tbl_L2[Run-1]<<16) + dDist21; + Cost1 = Cost_Base + (Tbl_L1[Run-1]<Min_Cost+(1<<16) ) + while( Run_Costs[Run_Start]>Min_Cost+(1<=0 && C[Zigzag[Last]]==0) + + while(Last>=0 && C[Zigzag[Last]]==0) Last--; - + if (Last>=0) { int j=0, j0=0; int Run, Level; Bits = 2; /* CBP */ while(j=-24 && Level<=24) + if (Level>=-24 && Level<=24) Bits += B16_17_Code_Len[(Level<0) ? -Level-1 : Level-1][Run]; - else + else Bits += 30; } Level = C[Zigzag[Last]]; Run = j - j0; - if (Level>=-6 && Level<=6) + if (Level>=-6 && Level<=6) Bits += B16_17_Code_Len_Last[(Level<0) ? -Level-1 : Level-1][Run]; - else + else Bits += 30; } for(i=0; i<=Last; ++i) { int V = C[Zigzag[i]]*Mult; - if (V>0) + if (V>0) V += Bias; - else - if (V<0) + else + if (V<0) V -= Bias; V -= Ref[Zigzag[i]]; Dist += V*V; } - Cost = Lambda*Dist + (Bits<<16); + Cost = Lambda*Dist + (Bits<>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 ); return Cost; @@ -1083,7 +1066,7 @@ } -static int +static int dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero) { @@ -1094,7 +1077,7 @@ * Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps. */ typedef struct { int16_t Run, Level; } NODE; - + NODE Nodes[65], Last; uint32_t Run_Costs0[64+1]; uint32_t * const Run_Costs = Run_Costs0 + 1; @@ -1104,8 +1087,8 @@ const int Lambda = Trellis_Lambda_Tabs[Q-1]; /* it's 1/lambda, actually */ int Run_Start = -1; - Run_Costs[-1] = 2<<16; /* source (w/ CBP penalty) */ - uint32_t Min_Cost = 2<<16; + Run_Costs[-1] = 2<0; --Run) { const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run]; - const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<16); - const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<16); + const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<1) { dQ1 = Level1*Mult-AC + Bias; dQ2 = dQ1 - Mult; @@ -1225,13 +1208,13 @@ * for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following: * if (Cost_Base>=Best_Cost) continue; */ - Cost1 = Cost_Base + (Tbl_L1[Run-1]<<16); - Cost2 = Cost_Base + (Tbl_L2[Run-1]<<16) + dDist21; + Cost1 = Cost_Base + (Tbl_L1[Run-1]<Min_Cost+(1<<16) ) + while( Run_Costs[Run_Start]>Min_Cost+(1<