[cvs] / xvidcore / src / utils / mbtransquant.c Repository:
ViewVC logotype

Diff of /xvidcore/src/utils/mbtransquant.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.21.2.12, Mon May 12 12:33:16 2003 UTC revision 1.21.2.20, Mon Nov 24 22:06:19 2003 UTC
# Line 39  Line 39 
39  #include "../bitstream/zigzag.h"  #include "../bitstream/zigzag.h"
40  #include "../dct/fdct.h"  #include "../dct/fdct.h"
41  #include "../dct/idct.h"  #include "../dct/idct.h"
42  #include "../quant/quant_mpeg4.h"  #include "../quant/quant.h"
 #include "../quant/quant_h263.h"  
43  #include "../encoder.h"  #include "../encoder.h"
44    
45  #include "../image/reduced.h"  #include "../image/reduced.h"
46    #include  "../quant/quant_matrix.h"
47    
48  MBFIELDTEST_PTR MBFieldTest;  MBFIELDTEST_PTR MBFieldTest;
49    
# Line 123  Line 123 
123                           int16_t qcoeff[6 * 64],                           int16_t qcoeff[6 * 64],
124                           int16_t data[6*64])                           int16_t data[6*64])
125  {  {
126          int i;          int mpeg;
127            int scaler_lum, scaler_chr;
128    
129          for (i = 0; i < 6; i++) {          quant_intraFuncPtr const quant[2] =
130                  uint32_t iDcScaler = get_dc_scaler(pMB->quant, i < 4);                  {
131                            quant_h263_intra,
132                            quant_mpeg_intra
133                    };
134    
135            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
136            scaler_lum = get_dc_scaler(pMB->quant, 1);
137            scaler_chr = get_dc_scaler(pMB->quant, 0);
138    
139                  /* Quantize the block */                  /* Quantize the block */
140                  start_timer();                  start_timer();
141                  if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT)) {          quant[mpeg](&data[0 * 64], &qcoeff[0 * 64], pMB->quant, scaler_lum);
142                          quant_intra(&data[i * 64], &qcoeff[i * 64], pMB->quant, iDcScaler);          quant[mpeg](&data[1 * 64], &qcoeff[1 * 64], pMB->quant, scaler_lum);
143                  } else {          quant[mpeg](&data[2 * 64], &qcoeff[2 * 64], pMB->quant, scaler_lum);
144                          quant4_intra(&data[i * 64], &qcoeff[i * 64], pMB->quant, iDcScaler);          quant[mpeg](&data[3 * 64], &qcoeff[3 * 64], pMB->quant, scaler_lum);
145                  }          quant[mpeg](&data[4 * 64], &qcoeff[4 * 64], pMB->quant, scaler_chr);
146            quant[mpeg](&data[5 * 64], &qcoeff[5 * 64], pMB->quant, scaler_chr);
147                  stop_quant_timer();                  stop_quant_timer();
148          }          }
 }  
149    
150  /* DeQuantize all blocks -- Intra mode */  /* DeQuantize all blocks -- Intra mode */
151  static __inline void  static __inline void
# Line 146  Line 154 
154                             int16_t qcoeff[6 * 64],                             int16_t qcoeff[6 * 64],
155                             int16_t data[6*64])                             int16_t data[6*64])
156  {  {
157          int i;          int mpeg;
158            int scaler_lum, scaler_chr;
159    
160          for (i = 0; i < 6; i++) {          quant_intraFuncPtr const dequant[2] =
161                  uint32_t iDcScaler = get_dc_scaler(iQuant, i < 4);                  {
162                            dequant_h263_intra,
163                            dequant_mpeg_intra
164                    };
165    
166            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
167            scaler_lum = get_dc_scaler(iQuant, 1);
168            scaler_chr = get_dc_scaler(iQuant, 0);
169    
170                  start_timer();                  start_timer();
171                  if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT))          dequant[mpeg](&qcoeff[0 * 64], &data[0 * 64], iQuant, scaler_lum);
172                          dequant_intra(&qcoeff[i * 64], &data[i * 64], iQuant, iDcScaler);          dequant[mpeg](&qcoeff[1 * 64], &data[1 * 64], iQuant, scaler_lum);
173                  else          dequant[mpeg](&qcoeff[2 * 64], &data[2 * 64], iQuant, scaler_lum);
174                          dequant4_intra(&qcoeff[i * 64], &data[i * 64], iQuant, iDcScaler);          dequant[mpeg](&qcoeff[3 * 64], &data[3 * 64], iQuant, scaler_lum);
175            dequant[mpeg](&qcoeff[4 * 64], &data[4 * 64], iQuant, scaler_chr);
176            dequant[mpeg](&qcoeff[5 * 64], &data[5 * 64], iQuant, scaler_chr);
177                  stop_iquant_timer();                  stop_iquant_timer();
178          }          }
 }  
   
   
 static int  
 dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero);  
179    
180  static int  static int
181  dct_quantize_trellis_mpeg_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero);  dct_quantize_trellis_c(int16_t *const Out,
182                                               const int16_t *const In,
183                                               int Q,
184                                               const uint16_t * const Zigzag,
185                                               const uint16_t * const QuantMatrix,
186                                               int Non_Zero);
187    
188  /* Quantize all blocks -- Inter mode */  /* Quantize all blocks -- Inter mode */
189  static __inline uint8_t  static __inline uint8_t
# Line 182  Line 199 
199          int i;          int i;
200          uint8_t cbp = 0;          uint8_t cbp = 0;
201          int sum;          int sum;
202          int code_block;          int code_block, mpeg;
203    
204            quant_interFuncPtr const quant[2] =
205                    {
206                            quant_h263_inter,
207                            quant_mpeg_inter
208                    };
209    
210            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
211    
212          for (i = 0; i < 6; i++) {          for (i = 0; i < 6; i++) {
213    
214                  /* Quantize the block */                  /* Quantize the block */
215                  start_timer();                  start_timer();
216                  if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT)) {  
217                          sum = quant_inter(&qcoeff[i*64], &data[i*64], pMB->quant);                  sum = quant[mpeg](&qcoeff[i*64], &data[i*64], pMB->quant);
218                          if ( (sum) && (frame->vop_flags & XVID_VOP_TRELLISQUANT) ) {  
219                                  sum = dct_quantize_trellis_h263_c(&qcoeff[i*64], &data[i*64], pMB->quant, &scan_tables[0][0], 63)+1;                  if(sum && (frame->vop_flags & XVID_VOP_TRELLISQUANT)) {
220                                  limit = 1;                          const static uint16_t h263matrix[] =
221                          }                                  {
222                  } else {                                          16, 16, 16, 16, 16, 16, 16, 16,
223                          sum = quant4_inter(&qcoeff[i * 64], &data[i * 64], pMB->quant);                                          16, 16, 16, 16, 16, 16, 16, 16,
224  //                      if ( (sum) && (frame->vop_flags & XVID_VOP_TRELLISQUANT) )                                          16, 16, 16, 16, 16, 16, 16, 16,
225  //                              sum = dct_quantize_trellis_mpeg_c (&qcoeff[i*64], &data[i*64], pMB->quant)+1;                                          16, 16, 16, 16, 16, 16, 16, 16,
226                                            16, 16, 16, 16, 16, 16, 16, 16,
227                                            16, 16, 16, 16, 16, 16, 16, 16,
228                                            16, 16, 16, 16, 16, 16, 16, 16,
229                                            16, 16, 16, 16, 16, 16, 16, 16
230                                    };
231                            sum = dct_quantize_trellis_c(&qcoeff[i*64], &data[i*64],
232                                                                                     pMB->quant, &scan_tables[0][0],
233                                                                                     (mpeg)?(uint16_t*)get_inter_matrix():h263matrix,
234                                                                                     63);
235                  }                  }
236                  stop_quant_timer();                  stop_quant_timer();
237    
# Line 236  Line 270 
270                             int16_t qcoeff[6 * 64],                             int16_t qcoeff[6 * 64],
271                             const uint8_t cbp)                             const uint8_t cbp)
272  {  {
273          int i;          int mpeg;
274    
275            quant_interFuncPtr const dequant[2] =
276                    {
277                            dequant_h263_inter,
278                            dequant_mpeg_inter
279                    };
280    
281            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
282    
         for (i = 0; i < 6; i++) {  
                 if (cbp & (1 << (5 - i))) {  
283                          start_timer();                          start_timer();
284                          if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT))          if(cbp & (1 << (5 - 0))) dequant[mpeg](&data[0 * 64], &qcoeff[0 * 64], iQuant);
285                                  dequant_inter(&data[i * 64], &qcoeff[i * 64], iQuant);          if(cbp & (1 << (5 - 1))) dequant[mpeg](&data[1 * 64], &qcoeff[1 * 64], iQuant);
286                          else          if(cbp & (1 << (5 - 2))) dequant[mpeg](&data[2 * 64], &qcoeff[2 * 64], iQuant);
287                                  dequant4_inter(&data[i * 64], &qcoeff[i * 64], iQuant);          if(cbp & (1 << (5 - 3))) dequant[mpeg](&data[3 * 64], &qcoeff[3 * 64], iQuant);
288            if(cbp & (1 << (5 - 4))) dequant[mpeg](&data[4 * 64], &qcoeff[4 * 64], iQuant);
289            if(cbp & (1 << (5 - 5))) dequant[mpeg](&data[5 * 64], &qcoeff[5 * 64], iQuant);
290                          stop_iquant_timer();                          stop_iquant_timer();
291                  }                  }
         }  
 }  
292    
293  typedef void (transfer_operation_8to16_t) (int16_t *Dst, const uint8_t *Src, int BpS);  typedef void (transfer_operation_8to16_t) (int16_t *Dst, const uint8_t *Src, int BpS);
294  typedef void (transfer_operation_16to8_t) (uint8_t *Dst, const int16_t *Src, int BpS);  typedef void (transfer_operation_16to8_t) (uint8_t *Dst, const int16_t *Src, int BpS);
# Line 266  Line 306 
306          uint32_t stride2 = stride / 2;          uint32_t stride2 = stride / 2;
307          uint32_t next_block = stride * 8;          uint32_t next_block = stride * 8;
308          int32_t cst;          int32_t cst;
309            int vop_reduced;
310          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
311          const IMAGE * const pCurrent = &frame->image;          const IMAGE * const pCurrent = &frame->image;
312            transfer_operation_8to16_t * const functions[2] =
313                    {
314                            (transfer_operation_8to16_t *)transfer_8to16copy,
315                            (transfer_operation_8to16_t *)filter_18x18_to_8x8
316                    };
317          transfer_operation_8to16_t *transfer_op = NULL;          transfer_operation_8to16_t *transfer_op = NULL;
318    
319          if ((frame->vop_flags & XVID_VOP_REDUCED)) {          vop_reduced = !!(frame->vop_flags & XVID_VOP_REDUCED);
   
                 /* Image pointers */  
                 pY_Cur = pCurrent->y + (y_pos << 5) * stride  + (x_pos << 5);  
                 pU_Cur = pCurrent->u + (y_pos << 4) * stride2 + (x_pos << 4);  
                 pV_Cur = pCurrent->v + (y_pos << 4) * stride2 + (x_pos << 4);  
   
                 /* Block size */  
                 cst = 16;  
   
                 /* Operation function */  
                 transfer_op = (transfer_operation_8to16_t*)filter_18x18_to_8x8;  
         } else {  
320    
321                  /* Image pointers */                  /* Image pointers */
322                  pY_Cur = pCurrent->y + (y_pos << 4) * stride  + (x_pos << 4);          pY_Cur = pCurrent->y + (y_pos << (4+vop_reduced)) * stride  + (x_pos << (4+vop_reduced));
323                  pU_Cur = pCurrent->u + (y_pos << 3) * stride2 + (x_pos << 3);          pU_Cur = pCurrent->u + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
324                  pV_Cur = pCurrent->v + (y_pos << 3) * stride2 + (x_pos << 3);          pV_Cur = pCurrent->v + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
325    
326                  /* Block size */                  /* Block size */
327                  cst = 8;          cst = 8<<vop_reduced;
328    
329                  /* Operation function */                  /* Operation function */
330                  transfer_op = (transfer_operation_8to16_t*)transfer_8to16copy;          transfer_op = functions[vop_reduced];
         }  
331    
332          /* Do the transfer */          /* Do the transfer */
333          start_timer();          start_timer();
# Line 314  Line 347 
347                           const uint32_t x_pos,                           const uint32_t x_pos,
348                           const uint32_t y_pos,                           const uint32_t y_pos,
349                           int16_t data[6 * 64],                           int16_t data[6 * 64],
350                           const uint32_t add,                           const uint32_t add, /* Must be 1 or 0 */
351                           const uint8_t cbp)                           const uint8_t cbp)
352  {  {
353          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
# Line 322  Line 355 
355          uint32_t stride2 = stride / 2;          uint32_t stride2 = stride / 2;
356          uint32_t next_block = stride * 8;          uint32_t next_block = stride * 8;
357          uint32_t cst;          uint32_t cst;
358            int vop_reduced;
359          const IMAGE * const pCurrent = &frame->image;          const IMAGE * const pCurrent = &frame->image;
360    
361            /* Array of function pointers, indexed by [vop_reduced<<1+add] */
362            transfer_operation_16to8_t  * const functions[4] =
363                    {
364                            (transfer_operation_16to8_t*)transfer_16to8copy,
365                            (transfer_operation_16to8_t*)transfer_16to8add,
366                            (transfer_operation_16to8_t*)copy_upsampled_8x8_16to8,
367                            (transfer_operation_16to8_t*)add_upsampled_8x8_16to8
368                    };
369    
370          transfer_operation_16to8_t *transfer_op = NULL;          transfer_operation_16to8_t *transfer_op = NULL;
371    
372          if (pMB->field_dct) {          if (pMB->field_dct) {
# Line 330  Line 374 
374                  stride *= 2;                  stride *= 2;
375          }          }
376    
377          if ((frame->vop_flags & XVID_VOP_REDUCED)) {          /* Makes this vars booleans */
378            vop_reduced = !!(frame->vop_flags & XVID_VOP_REDUCED);
                 /* Image pointers */  
                 pY_Cur = pCurrent->y + (y_pos << 5) * stride  + (x_pos << 5);  
                 pU_Cur = pCurrent->u + (y_pos << 4) * stride2 + (x_pos << 4);  
                 pV_Cur = pCurrent->v + (y_pos << 4) * stride2 + (x_pos << 4);  
   
                 /* Block size */  
                 cst = 16;  
   
                 /* Operation function */  
                 if(add)  
                         transfer_op = (transfer_operation_16to8_t*)add_upsampled_8x8_16to8;  
                 else  
                         transfer_op = (transfer_operation_16to8_t*)copy_upsampled_8x8_16to8;  
         } else {  
379    
380                  /* Image pointers */                  /* Image pointers */
381                  pY_Cur = pCurrent->y + (y_pos << 4) * stride  + (x_pos << 4);          pY_Cur = pCurrent->y + (y_pos << (4+vop_reduced)) * stride  + (x_pos << (4+vop_reduced));
382                  pU_Cur = pCurrent->u + (y_pos << 3) * stride2 + (x_pos << 3);          pU_Cur = pCurrent->u + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
383                  pV_Cur = pCurrent->v + (y_pos << 3) * stride2 + (x_pos << 3);          pV_Cur = pCurrent->v + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
384    
385                  /* Block size */                  /* Block size */
386                  cst = 8;          cst = 8<<vop_reduced;
387    
388                  /* Operation function */                  /* Operation function */
389                  if(add)          transfer_op = functions[(vop_reduced<<1) + add];
                         transfer_op = (transfer_operation_16to8_t*)transfer_16to8add;  
                 else  
                         transfer_op = (transfer_operation_16to8_t*)transfer_16to8copy;  
         }  
390    
391          /* Do the operation */          /* Do the operation */
392          start_timer();          start_timer();
# Line 419  Line 445 
445          uint8_t cbp;          uint8_t cbp;
446          uint32_t limit;          uint32_t limit;
447    
448          /*          /* There is no MBTrans8to16 for Inter block, that's done in motion compensation
449           * There is no MBTrans8to16 for Inter block, that's done in motion compensation           * already */
          * already  
          */  
450    
451          /* Perform DCT (and field decision) */          /* Perform DCT (and field decision) */
452          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
# Line 430  Line 454 
454          /* Set the limit threshold */          /* Set the limit threshold */
455          limit = PVOP_TOOSMALL_LIMIT + ((pMB->quant == 1)? 1 : 0);          limit = PVOP_TOOSMALL_LIMIT + ((pMB->quant == 1)? 1 : 0);
456    
457            if (frame->vop_flags & XVID_VOP_CARTOON)
458                    limit *= 3;
459    
460          /* Quantize the block */          /* Quantize the block */
461          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 0, limit);          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 0, limit);
462    
# Line 457  Line 484 
484          uint8_t cbp;          uint8_t cbp;
485          uint32_t limit;          uint32_t limit;
486    
487          /*          /* There is no MBTrans8to16 for Inter block, that's done in motion compensation
488           * There is no MBTrans8to16 for Inter block, that's done in motion compensation           * already */
          * already  
          */  
489    
490          /* Perform DCT (and field decision) */          /* Perform DCT (and field decision) */
491          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
# Line 468  Line 493 
493          /* Set the limit threshold */          /* Set the limit threshold */
494          limit = BVOP_TOOSMALL_LIMIT;          limit = BVOP_TOOSMALL_LIMIT;
495    
496            if (frame->vop_flags & XVID_VOP_CARTOON)
497                    limit *= 2;
498    
499          /* Quantize the block */          /* Quantize the block */
500          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 1, limit);          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 1, limit);
501    
# Line 475  Line 503 
503           * History comment:           * History comment:
504           * We don't have to DeQuant, iDCT and Transfer back data for B-frames.           * We don't have to DeQuant, iDCT and Transfer back data for B-frames.
505           *           *
506           * BUT some plugins require the original frame to be passed so we have           * BUT some plugins require the rebuilt original frame to be passed so we
507           * to take care of that here           * have to take care of that here
508           */           */
509          if((pParam->plugin_flags & XVID_REQORIGINAL)) {          if((pParam->plugin_flags & XVID_REQORIGINAL)) {
510    
# Line 546  Line 574 
574    
575          /* left blocks */          /* left blocks */
576    
577          // 1=2, 2=4, 4=8, 8=1          /* 1=2, 2=4, 4=8, 8=1 */
578          MOVLINE(tmp, LINE(0, 1));          MOVLINE(tmp, LINE(0, 1));
579          MOVLINE(LINE(0, 1), LINE(0, 2));          MOVLINE(LINE(0, 1), LINE(0, 2));
580          MOVLINE(LINE(0, 2), LINE(0, 4));          MOVLINE(LINE(0, 2), LINE(0, 4));
581          MOVLINE(LINE(0, 4), LINE(2, 0));          MOVLINE(LINE(0, 4), LINE(2, 0));
582          MOVLINE(LINE(2, 0), tmp);          MOVLINE(LINE(2, 0), tmp);
583    
584          // 3=6, 6=12, 12=9, 9=3          /* 3=6, 6=12, 12=9, 9=3 */
585          MOVLINE(tmp, LINE(0, 3));          MOVLINE(tmp, LINE(0, 3));
586          MOVLINE(LINE(0, 3), LINE(0, 6));          MOVLINE(LINE(0, 3), LINE(0, 6));
587          MOVLINE(LINE(0, 6), LINE(2, 4));          MOVLINE(LINE(0, 6), LINE(2, 4));
588          MOVLINE(LINE(2, 4), LINE(2, 1));          MOVLINE(LINE(2, 4), LINE(2, 1));
589          MOVLINE(LINE(2, 1), tmp);          MOVLINE(LINE(2, 1), tmp);
590    
591          // 5=10, 10=5          /* 5=10, 10=5 */
592          MOVLINE(tmp, LINE(0, 5));          MOVLINE(tmp, LINE(0, 5));
593          MOVLINE(LINE(0, 5), LINE(2, 2));          MOVLINE(LINE(0, 5), LINE(2, 2));
594          MOVLINE(LINE(2, 2), tmp);          MOVLINE(LINE(2, 2), tmp);
595    
596          // 7=14, 14=13, 13=11, 11=7          /* 7=14, 14=13, 13=11, 11=7 */
597          MOVLINE(tmp, LINE(0, 7));          MOVLINE(tmp, LINE(0, 7));
598          MOVLINE(LINE(0, 7), LINE(2, 6));          MOVLINE(LINE(0, 7), LINE(2, 6));
599          MOVLINE(LINE(2, 6), LINE(2, 5));          MOVLINE(LINE(2, 6), LINE(2, 5));
# Line 574  Line 602 
602    
603          /* right blocks */          /* right blocks */
604    
605          // 1=2, 2=4, 4=8, 8=1          /* 1=2, 2=4, 4=8, 8=1 */
606          MOVLINE(tmp, LINE(1, 1));          MOVLINE(tmp, LINE(1, 1));
607          MOVLINE(LINE(1, 1), LINE(1, 2));          MOVLINE(LINE(1, 1), LINE(1, 2));
608          MOVLINE(LINE(1, 2), LINE(1, 4));          MOVLINE(LINE(1, 2), LINE(1, 4));
609          MOVLINE(LINE(1, 4), LINE(3, 0));          MOVLINE(LINE(1, 4), LINE(3, 0));
610          MOVLINE(LINE(3, 0), tmp);          MOVLINE(LINE(3, 0), tmp);
611    
612          // 3=6, 6=12, 12=9, 9=3          /* 3=6, 6=12, 12=9, 9=3 */
613          MOVLINE(tmp, LINE(1, 3));          MOVLINE(tmp, LINE(1, 3));
614          MOVLINE(LINE(1, 3), LINE(1, 6));          MOVLINE(LINE(1, 3), LINE(1, 6));
615          MOVLINE(LINE(1, 6), LINE(3, 4));          MOVLINE(LINE(1, 6), LINE(3, 4));
616          MOVLINE(LINE(3, 4), LINE(3, 1));          MOVLINE(LINE(3, 4), LINE(3, 1));
617          MOVLINE(LINE(3, 1), tmp);          MOVLINE(LINE(3, 1), tmp);
618    
619          // 5=10, 10=5          /* 5=10, 10=5 */
620          MOVLINE(tmp, LINE(1, 5));          MOVLINE(tmp, LINE(1, 5));
621          MOVLINE(LINE(1, 5), LINE(3, 2));          MOVLINE(LINE(1, 5), LINE(3, 2));
622          MOVLINE(LINE(3, 2), tmp);          MOVLINE(LINE(3, 2), tmp);
623    
624          // 7=14, 14=13, 13=11, 11=7          /* 7=14, 14=13, 13=11, 11=7 */
625          MOVLINE(tmp, LINE(1, 7));          MOVLINE(tmp, LINE(1, 7));
626          MOVLINE(LINE(1, 7), LINE(3, 6));          MOVLINE(LINE(1, 7), LINE(3, 6));
627          MOVLINE(LINE(3, 6), LINE(3, 5));          MOVLINE(LINE(3, 6), LINE(3, 5));
# Line 601  Line 629 
629          MOVLINE(LINE(3, 3), tmp);          MOVLINE(LINE(3, 3), tmp);
630  }  }
631    
632    /*****************************************************************************
633     *               Trellis based R-D optimal quantization
634     *
635     *   Trellis Quant code (C) 2003 Pascal Massimino skal(at)planet-d.net
636     *
637     ****************************************************************************/
638    
639    /*----------------------------------------------------------------------------
640     *
641     *        Trellis-Based quantization
642     *
643     * So far I understand this paper:
644     *
645     *  "Trellis-Based R-D Optimal Quantization in H.263+"
646     *    J.Wen, M.Luttrell, J.Villasenor
647     *    IEEE Transactions on Image Processing, Vol.9, No.8, Aug. 2000.
648     *
649     * we are at stake with a simplified Bellmand-Ford / Dijkstra Single
650     * Source Shortest Path algo. But due to the underlying graph structure
651     * ("Trellis"), it can be turned into a dynamic programming algo,
652     * partially saving the explicit graph's nodes representation. And
653     * without using a heap, since the open frontier of the DAG is always
654     * known, and of fixed size.
655     *--------------------------------------------------------------------------*/
656    
657    
658    
659  /************************************************************************  /* Codes lengths for relevant levels. */
  *               Trellis based R-D optimal quantization                 *  
  *                                                                      *  
  *   Trellis Quant code (C) 2003 Pascal Massimino skal(at)planet-d.net  *  
  *                                                                      *  
  ************************************************************************/  
   
   
 static int  
 dct_quantize_trellis_mpeg_c(int16_t *const Out, const int16_t *const In, int Q,  
                 const uint16_t * const Zigzag, int Non_Zero)  
 { return 63; }  
   
   
 //////////////////////////////////////////////////////////  
 //  
 //        Trellis-Based quantization  
 //  
 // So far I understand this paper:  
 //  
 //  "Trellis-Based R-D Optimal Quantization in H.263+"  
 //    J.Wen, M.Luttrell, J.Villasenor  
 //    IEEE Transactions on Image Processing, Vol.9, No.8, Aug. 2000.  
 //  
 // we are at stake with a simplified Bellmand-Ford / Dijkstra Single  
 // Source Shorted Path algo. But due to the underlying graph structure  
 // ("Trellis"), it can be turned into a dynamic programming algo,  
 // partially saving the explicit graph's nodes representation. And  
 // without using a heap, since the open frontier of the DAG is always  
 // known, and of fixed sized.  
 //  
 //////////////////////////////////////////////////////////  
   
   
 //////////////////////////////////////////////////////////  
 // Codes lengths for relevant levels.  
660    
661    // let's factorize:  /* let's factorize: */
662  static const uint8_t Code_Len0[64] = {  static const uint8_t Code_Len0[64] = {
663    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
664    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
# Line 707  Line 723 
723     3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9,10,10,10,10,10,10,10,10,12,12,13,13,12,13,14,15,15,     3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9,10,10,10,10,10,10,10,10,12,12,13,13,12,13,14,15,15,
724    15,16,16,16,16,17,17,17,18,18,19,19,19,19,19,19,19,19,21,21,22,22,30,30,30,30,30,30,30,30,30,30 };    15,16,16,16,16,17,17,17,18,18,19,19,19,19,19,19,19,19,21,21,22,22,30,30,30,30,30,30,30,30,30,30 };
725    
726    // a few more table for LAST table:  /* a few more table for LAST table: */
727  static const uint8_t Code_Len21[64] = {  static const uint8_t Code_Len21[64] = {
728    13,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,    13,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
729    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
# Line 722  Line 738 
738    12,13,13,13,13,13,13,13,13,14,16,16,16,16,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19};    12,13,13,13,13,13,13,13,13,14,16,16,16,16,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19};
739    
740    
741  static const uint8_t * const B16_17_Code_Len[24] = { // levels [1..24]  static const uint8_t * const B16_17_Code_Len[24] = { /* levels [1..24] */
742    Code_Len20,Code_Len19,Code_Len18,Code_Len17,    Code_Len20,Code_Len19,Code_Len18,Code_Len17,
743    Code_Len16,Code_Len15,Code_Len14,Code_Len13,    Code_Len16,Code_Len15,Code_Len14,Code_Len13,
744    Code_Len12,Code_Len11,Code_Len10,Code_Len9,    Code_Len12,Code_Len11,Code_Len10,Code_Len9,
# Line 731  Line 747 
747    Code_Len2, Code_Len1, Code_Len1, Code_Len1,    Code_Len2, Code_Len1, Code_Len1, Code_Len1,
748  };  };
749    
750  static const uint8_t * const B16_17_Code_Len_Last[6] = { // levels [1..6]  static const uint8_t * const B16_17_Code_Len_Last[6] = { /* levels [1..6] */
751    Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1,    Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1,
752  };  };
753    
754  #define TL(q) 0xfe00/(q*q)  /* TL_SHIFT controls the precision of the RD optimizations in trellis
755     * valid range is [10..16]. The bigger, the more trellis is vulnerable
756     * to overflows in cost formulas.
757     *  - 10 allows ac values up to 2^11 == 2048
758     *  - 16 allows ac values up to 2^8 == 256
759     */
760    #define TL_SHIFT 11
761    #define TL(q) ((0xfe00>>(16-TL_SHIFT))/(q*q))
762    
763  static const int Trellis_Lambda_Tabs[31] = {  static const int Trellis_Lambda_Tabs[31] = {
764           TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7),           TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7),
# Line 745  Line 768 
768  };  };
769  #undef TL  #undef TL
770    
771  static __inline int Find_Last(const int16_t *C, const uint16_t *Zigzag, int i)  static int __inline
772    Find_Last(const int16_t *C, const uint16_t *Zigzag, int i)
773  {  {
774    while(i>=0)    while(i>=0)
775      if (C[Zigzag[i]])      if (C[Zigzag[i]])
# Line 754  Line 778 
778    return -1;    return -1;
779  }  }
780    
781  //////////////////////////////////////////////////////////  static int __inline
782  // this routine has been strippen of all debug code  Compute_Sum(const int16_t *C, int last)
783  //////////////////////////////////////////////////////////  {
784            int sum = 0;
785    
786            while(last--)
787                    sum += abs(C[last]);
788    
789            return(sum);
790    }
791    
792    /* this routine has been strippen of all debug code */
793  static int  static int
794  dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)  dct_quantize_trellis_c(int16_t *const Out,
795                                               const int16_t *const In,
796                                               int Q,
797                                               const uint16_t * const Zigzag,
798                                               const uint16_t * const QuantMatrix,
799                                               int Non_Zero)
800  {  {
801    
802      // Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]),          /* Note: We should search last non-zero coeffs on *real* DCT input coeffs
803      // not quantized one (Out[]). However, it only improves the result *very*           * (In[]), not quantized one (Out[]). However, it only improves the result
804      // slightly (~0.01dB), whereas speed drops to crawling level :)           * *very* slightly (~0.01dB), whereas speed drops to crawling level :)
805      // Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps,           * Well, actually, taking 1 more coeff past Non_Zero into account sometimes
806             * helps. */
807    typedef struct { int16_t Run, Level; } NODE;    typedef struct { int16_t Run, Level; } NODE;
808    
809    NODE Nodes[65], Last;    NODE Nodes[65], Last;
810    uint32_t Run_Costs0[64+1];    uint32_t Run_Costs0[64+1];
811    uint32_t * const Run_Costs = Run_Costs0 + 1;    uint32_t * const Run_Costs = Run_Costs0 + 1;
812    const int Mult = 2*Q;  
813    const int Bias = (Q-1) | 1;          /* it's 1/lambda, actually */
814    const int Lev0 = Mult + Bias;          const int Lambda = Trellis_Lambda_Tabs[Q-1];
   const int Lambda = Trellis_Lambda_Tabs[Q-1];    // it's 1/lambda, actually  
815    
816    int Run_Start = -1;    int Run_Start = -1;
817    uint32_t Min_Cost = 2<<16;          uint32_t Min_Cost = 2<<TL_SHIFT;
818    
819    int Last_Node = -1;    int Last_Node = -1;
820    uint32_t Last_Cost = 0;    uint32_t Last_Cost = 0;
821    
822    int i, j;          int i, j, sum;
823    Run_Costs[-1] = 2<<16;                          // source (w/ CBP penalty)  
824            /* source (w/ CBP penalty) */
825            Run_Costs[-1] = 2<<TL_SHIFT;
826    
827    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);
828    if (Non_Zero<0)    if (Non_Zero<0)
829        return -1;                  return 0; /* Sum is zero if there are only zero coeffs */
830    
831            for(i=0; i<=Non_Zero; i++) {
832                    const int q = ((Q*QuantMatrix[Zigzag[i]])>>4);
833                    const int Mult = 2*q;
834                    const int Bias = (q-1) | 1;
835                    const int Lev0 = Mult + Bias;
836    
   for(i=0; i<=Non_Zero; i++)  
   {  
837      const int AC = In[Zigzag[i]];      const int AC = In[Zigzag[i]];
838      const int Level1 = Out[Zigzag[i]];      const int Level1 = Out[Zigzag[i]];
839      const int Dist0 = Lambda* AC*AC;                  const unsigned int Dist0 = Lambda* AC*AC;
840      uint32_t Best_Cost = 0xf0000000;      uint32_t Best_Cost = 0xf0000000;
841      Last_Cost += Dist0;      Last_Cost += Dist0;
842    
843      if ((uint32_t)(Level1+1)<3)                 // very specialized loop for -1,0,+1                  /* very specialized loop for -1,0,+1 */
844      {                  if ((uint32_t)(Level1+1)<3) {
845          int dQ;          int dQ;
846                  int Run;                  int Run;
847        uint32_t Cost0;        uint32_t Cost0;
# Line 814  Line 856 
856                  Cost0 = Lambda*dQ*dQ;                  Cost0 = Lambda*dQ*dQ;
857    
858        Nodes[i].Run = 1;        Nodes[i].Run = 1;
859        Best_Cost = (Code_Len20[0]<<16) + Run_Costs[i-1]+Cost0;                          Best_Cost = (Code_Len20[0]<<TL_SHIFT) + Run_Costs[i-1]+Cost0;
860        for(Run=i-Run_Start; Run>0; --Run)                          for(Run=i-Run_Start; Run>0; --Run) {
       {  
861          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];
862          const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<16);                                  const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<TL_SHIFT);
863          const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<16);                                  const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<TL_SHIFT);
864    
865            // TODO: what about tie-breaks? Should we favor short runs or                                  /* TODO: what about tie-breaks? Should we favor short runs or
866            // long runs? Although the error is the same, it would not be                                   * long runs? Although the error is the same, it would not be
867            // spread the same way along high and low frequencies...                                   * spread the same way along high and low frequencies... */
868    
869                          // (I'd say: favour short runs => hifreq errors (HVS) -- gruel )                                  /* Gruel: I'd say, favour short runs => hifreq errors (HVS) */
870    
871          if (Cost<Best_Cost) {          if (Cost<Best_Cost) {
872            Best_Cost    = Cost;            Best_Cost    = Cost;
# Line 840  Line 881 
881        }        }
882        if (Last_Node==i)        if (Last_Node==i)
883                          Last.Level = Nodes[i].Level;                          Last.Level = Nodes[i].Level;
884      }                  } else if (51U>(uint32_t)(Level1+25)) {
885      else                      // "big" levels                          /* "big" levels (not less than ESC3, though) */
     {  
886        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;
887        int Level2;        int Level2;
888        int dQ1, dQ2;        int dQ1, dQ2;
# Line 858  Line 898 
898          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;
899          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;
900          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;
901        } else { // Level1<-1                          } else { /* Level1<-1 */
902          dQ1 = Level1*Mult-AC - Bias;          dQ1 = Level1*Mult-AC - Bias;
903          dQ2 = dQ1 + Mult;          dQ2 = dQ1 + Mult;
904          Level2 = Level1 + 1;          Level2 = Level1 + 1;
# Line 867  Line 907 
907          Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0;          Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0;
908          Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0;          Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0;
909        }        }
910    
911        Dist1 = Lambda*dQ1*dQ1;        Dist1 = Lambda*dQ1*dQ1;
912        Dist2 = Lambda*dQ2*dQ2;        Dist2 = Lambda*dQ2*dQ2;
913        dDist21 = Dist2-Dist1;        dDist21 = Dist2-Dist1;
# Line 877  Line 918 
918          uint32_t Cost1, Cost2;          uint32_t Cost1, Cost2;
919          int bLevel;          int bLevel;
920    
921  // for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:                                  /* for sub-optimal (but slightly worth it, speed-wise) search,
922  //        if (Cost_Base>=Best_Cost) continue;                                   * uncomment the following:
923  // (? doesn't seem to have any effect -- gruel )                                   *              if (Cost_Base>=Best_Cost) continue;
924                                     * (? doesn't seem to have any effect -- gruel ) */
925    
926          Cost1 = Cost_Base + (Tbl_L1[Run-1]<<16);                                  Cost1 = Cost_Base + (Tbl_L1[Run-1]<<TL_SHIFT);
927          Cost2 = Cost_Base + (Tbl_L2[Run-1]<<16) + dDist21;                                  Cost2 = Cost_Base + (Tbl_L2[Run-1]<<TL_SHIFT) + dDist21;
928    
929          if (Cost2<Cost1) {          if (Cost2<Cost1) {
930                           Cost1 = Cost2;                           Cost1 = Cost2;
931                           bLevel = Level2;                           bLevel = Level2;
932                    } else                                  } else {
933                           bLevel = Level1;                           bLevel = Level1;
934                                    }
935    
936          if (Cost1<Best_Cost) {          if (Cost1<Best_Cost) {
937            Best_Cost = Cost1;            Best_Cost = Cost1;
# Line 896  Line 939 
939            Nodes[i].Level = bLevel;            Nodes[i].Level = bLevel;
940          }          }
941    
942          Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<16);                                  Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<TL_SHIFT);
943          Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<16) + dDist21;                                  Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<TL_SHIFT) + dDist21;
944    
945          if (Cost2<Cost1) {          if (Cost2<Cost1) {
946                           Cost1 = Cost2;                           Cost1 = Cost2;
947                           bLevel = Level2;                           bLevel = Level2;
948                    } else                                  } else {
949                           bLevel = Level1;                           bLevel = Level1;
950                                    }
951    
952          if (Cost1<Last_Cost) {          if (Cost1<Last_Cost) {
953            Last_Cost  = Cost1;            Last_Cost  = Cost1;
# Line 911  Line 955 
955            Last.Level = bLevel;            Last.Level = bLevel;
956            Last_Node  = i;            Last_Node  = i;
957          }          }
958        } //end of "for Run"                          } /* end of "for Run" */
959                    } else {
960                            /* Very very high levels, with no chance of being optimizable
961                             * => Simply pick best Run. */
962                            int Run;
963                            for(Run=i-Run_Start; Run>0; --Run) {
964                                    /* 30 bits + no distortion */
965                                    const uint32_t Cost = (30<<TL_SHIFT) + Run_Costs[i-Run];
966                                    if (Cost<Best_Cost) {
967                                            Best_Cost = Cost;
968                                            Nodes[i].Run   = Run;
969                                            Nodes[i].Level = Level1;
970                                    }
971    
972                                    if (Cost<Last_Cost) {
973                                            Last_Cost  = Cost;
974                                            Last.Run   = Run;
975                                            Last.Level = Level1;
976                                            Last_Node  = i;
977                                    }
978                            }
979      }      }
980    
981    
982      Run_Costs[i] = Best_Cost;      Run_Costs[i] = Best_Cost;
983    
984      if (Best_Cost < Min_Cost + Dist0) {      if (Best_Cost < Min_Cost + Dist0) {
985        Min_Cost = Best_Cost;        Min_Cost = Best_Cost;
986        Run_Start = i;        Run_Start = i;
987      }                  } else {
988      else                          /* as noticed by Michael Niedermayer (michaelni at gmx.at),
989      {                           * there's a code shorter by 1 bit for a larger run (!), same
990          // as noticed by Michael Niedermayer (michaelni at gmx.at), there's                           * level. We give it a chance by not moving the left barrier too
991          // a code shorter by 1 bit for a larger run (!), same level. We give                           * much. */
992          // it a chance by not moving the left barrier too much.                          while( Run_Costs[Run_Start]>Min_Cost+(1<<TL_SHIFT) )
   
       while( Run_Costs[Run_Start]>Min_Cost+(1<<16) )  
993          Run_Start++;          Run_Start++;
994    
995          // spread on preceding coeffs the cost incurred by skipping this one                          /* spread on preceding coeffs the cost incurred by skipping this
996                             * one */
997        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;
998        Min_Cost += Dist0;        Min_Cost += Dist0;
999      }      }
1000    }    }
1001    
1002            /* It seems trellis doesn't give good results... just compute the Out sum
1003             * and quit */
1004    if (Last_Node<0)    if (Last_Node<0)
1005      return -1;                  return Compute_Sum(Out, Non_Zero);
1006    
1007         // reconstruct optimal sequence backward with surviving paths          /* reconstruct optimal sequence backward with surviving paths */
1008    memset(Out, 0x00, 64*sizeof(*Out));    memset(Out, 0x00, 64*sizeof(*Out));
1009    Out[Zigzag[Last_Node]] = Last.Level;    Out[Zigzag[Last_Node]] = Last.Level;
1010    i = Last_Node - Last.Run;    i = Last_Node - Last.Run;
1011            sum = 0;
1012    while(i>=0) {    while(i>=0) {
1013      Out[Zigzag[i]] = Nodes[i].Level;      Out[Zigzag[i]] = Nodes[i].Level;
1014                    sum += abs(Nodes[i].Level);
1015      i -= Nodes[i].Run;      i -= Nodes[i].Run;
1016    }    }
   return Last_Node;  
 }  
   
   
1017    
1018            return sum;
1019    }
1020    
1021    /* original version including heavy debugging info */
   
   
   
   
   
   
 //////////////////////////////////////////////////////////  
 // original version including heavy debugging info  
 //////////////////////////////////////////////////////////  
   
1022    
1023  #ifdef DBGTRELL  #ifdef DBGTRELL
1024    
# Line 987  Line 1042 
1042      int j=0, j0=0;      int j=0, j0=0;
1043      int Run, Level;      int Run, Level;
1044    
1045      Bits = 2;   // CBP                  Bits = 2;   /* CBP */
1046      while(j<Last) {      while(j<Last) {
1047        while(!C[Zigzag[j]])        while(!C[Zigzag[j]])
1048                          j++;                          j++;
# Line 1019  Line 1074 
1074      V -= Ref[Zigzag[i]];      V -= Ref[Zigzag[i]];
1075      Dist += V*V;      Dist += V*V;
1076    }    }
1077    Cost = Lambda*Dist + (Bits<<16);          Cost = Lambda*Dist + (Bits<<TL_SHIFT);
1078    if (DBG==1)    if (DBG==1)
1079      printf( " Last:%2d/%2d Cost = [(Bits=%5.0d) + Lambda*(Dist=%6.0d) = %d ] >>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 );      printf( " Last:%2d/%2d Cost = [(Bits=%5.0d) + Lambda*(Dist=%6.0d) = %d ] >>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 );
1080    return Cost;    return Cost;
# Line 1034  Line 1089 
1089  dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)  dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)
1090  {  {
1091    
1092      // Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]),      /*
1093      // not quantized one (Out[]). However, it only improves the result *very*           * Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]),
1094      // slightly (~0.01dB), whereas speed drops to crawling level :)           * not quantized one (Out[]). However, it only improves the result *very*
1095      // Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps,           * slightly (~0.01dB), whereas speed drops to crawling level :)
1096             * Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps.
1097             */
1098    typedef struct { int16_t Run, Level; } NODE;    typedef struct { int16_t Run, Level; } NODE;
1099    
1100    NODE Nodes[65], Last;    NODE Nodes[65], Last;
# Line 1047  Line 1103 
1103    const int Mult = 2*Q;    const int Mult = 2*Q;
1104    const int Bias = (Q-1) | 1;    const int Bias = (Q-1) | 1;
1105    const int Lev0 = Mult + Bias;    const int Lev0 = Mult + Bias;
1106    const int Lambda = Trellis_Lambda_Tabs[Q-1];    // it's 1/lambda, actually          const int Lambda = Trellis_Lambda_Tabs[Q-1];    /* it's 1/lambda, actually */
1107    
1108    int Run_Start = -1;    int Run_Start = -1;
1109    Run_Costs[-1] = 2<<16;                          // source (w/ CBP penalty)          Run_Costs[-1] = 2<<TL_SHIFT;                          /* source (w/ CBP penalty) */
1110    uint32_t Min_Cost = 2<<16;          uint32_t Min_Cost = 2<<TL_SHIFT;
1111    
1112    int Last_Node = -1;    int Last_Node = -1;
1113    uint32_t Last_Cost = 0;    uint32_t Last_Cost = 0;
# Line 1059  Line 1115 
1115    int i, j;    int i, j;
1116    
1117  #if (DBG>0)  #if (DBG>0)
1118    Last.Level = 0; Last.Run = -1; // just initialize to smthg          Last.Level = 0; Last.Run = -1; /* just initialize to smthg */
1119  #endif  #endif
1120    
1121    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);
# Line 1074  Line 1130 
1130      uint32_t Best_Cost = 0xf0000000;      uint32_t Best_Cost = 0xf0000000;
1131      Last_Cost += Dist0;      Last_Cost += Dist0;
1132    
1133      if ((uint32_t)(Level1+1)<3)                 // very specialized loop for -1,0,+1                  if ((uint32_t)(Level1+1)<3)                 /* very specialized loop for -1,0,+1 */
1134      {      {
1135          int dQ;          int dQ;
1136                  int Run;                  int Run;
# Line 1090  Line 1146 
1146                  Cost0 = Lambda*dQ*dQ;                  Cost0 = Lambda*dQ*dQ;
1147    
1148        Nodes[i].Run = 1;        Nodes[i].Run = 1;
1149        Best_Cost = (Code_Len20[0]<<16) + Run_Costs[i-1]+Cost0;                          Best_Cost = (Code_Len20[0]<<TL_SHIFT) + Run_Costs[i-1]+Cost0;
1150        for(Run=i-Run_Start; Run>0; --Run)        for(Run=i-Run_Start; Run>0; --Run)
1151        {        {
1152          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];
1153          const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<16);                                  const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<TL_SHIFT);
1154          const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<16);                                  const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<TL_SHIFT);
1155    
1156            // TODO: what about tie-breaks? Should we favor short runs or                                  /*
1157            // long runs? Although the error is the same, it would not be                                   * TODO: what about tie-breaks? Should we favor short runs or
1158            // spread the same way along high and low frequencies...                                   * long runs? Although the error is the same, it would not be
1159                                     * spread the same way along high and low frequencies...
1160                                     */
1161          if (Cost<Best_Cost) {          if (Cost<Best_Cost) {
1162            Best_Cost    = Cost;            Best_Cost    = Cost;
1163            Nodes[i].Run = Run;            Nodes[i].Run = Run;
# Line 1129  Line 1187 
1187          printf( "\n" );          printf( "\n" );
1188        }        }
1189      }      }
1190      else                      // "big" levels                  else                      /* "big" levels */
1191      {      {
1192        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;
1193        int Level2;        int Level2;
# Line 1146  Line 1204 
1204          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;
1205          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;
1206          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;
1207        } else { // Level1<-1                          } else { /* Level1<-1 */
1208          dQ1 = Level1*Mult-AC - Bias;          dQ1 = Level1*Mult-AC - Bias;
1209          dQ2 = dQ1 + Mult;          dQ2 = dQ1 + Mult;
1210          Level2 = Level1 + 1;          Level2 = Level1 + 1;
# Line 1165  Line 1223 
1223          uint32_t Cost1, Cost2;          uint32_t Cost1, Cost2;
1224          int bLevel;          int bLevel;
1225    
1226  // for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:  /*
1227  //        if (Cost_Base>=Best_Cost) continue;   * for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:
1228     *        if (Cost_Base>=Best_Cost) continue;
1229          Cost1 = Cost_Base + (Tbl_L1[Run-1]<<16);   */
1230          Cost2 = Cost_Base + (Tbl_L2[Run-1]<<16) + dDist21;                                  Cost1 = Cost_Base + (Tbl_L1[Run-1]<<TL_SHIFT);
1231                                    Cost2 = Cost_Base + (Tbl_L2[Run-1]<<TL_SHIFT) + dDist21;
1232    
1233          if (Cost2<Cost1) {          if (Cost2<Cost1) {
1234                           Cost1 = Cost2;                           Cost1 = Cost2;
# Line 1183  Line 1242 
1242            Nodes[i].Level = bLevel;            Nodes[i].Level = bLevel;
1243          }          }
1244    
1245          Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<16);                                  Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<TL_SHIFT);
1246          Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<16) + dDist21;                                  Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<TL_SHIFT) + dDist21;
1247    
1248          if (Cost2<Cost1) {          if (Cost2<Cost1) {
1249                           Cost1 = Cost2;                           Cost1 = Cost2;
# Line 1198  Line 1257 
1257            Last.Level = bLevel;            Last.Level = bLevel;
1258            Last_Node  = i;            Last_Node  = i;
1259          }          }
1260        } //end of "for Run"                          } /* end of "for Run" */
1261    
1262        if (DBG==1) {        if (DBG==1) {
1263          Run_Costs[i] = Best_Cost;          Run_Costs[i] = Best_Cost;
# Line 1224  Line 1283 
1283      }      }
1284      else      else
1285      {      {
1286          // as noticed by Michael Niedermayer (michaelni at gmx.at), there's                          /*
1287          // a code shorter by 1 bit for a larger run (!), same level. We give                           * as noticed by Michael Niedermayer (michaelni at gmx.at), there's
1288          // it a chance by not moving the left barrier too much.                           * a code shorter by 1 bit for a larger run (!), same level. We give
1289                             * it a chance by not moving the left barrier too much.
1290                             */
1291    
1292        while( Run_Costs[Run_Start]>Min_Cost+(1<<16) )                          while( Run_Costs[Run_Start]>Min_Cost+(1<<TL_SHIFT) )
1293          Run_Start++;          Run_Start++;
1294    
1295          // spread on preceding coeffs the cost incurred by skipping this one                          /* spread on preceding coeffs the cost incurred by skipping this one */
1296        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;
1297        Min_Cost += Dist0;        Min_Cost += Dist0;
1298      }      }
# Line 1249  Line 1310 
1310    if (Last_Node<0)    if (Last_Node<0)
1311      return -1;      return -1;
1312    
1313         // reconstruct optimal sequence backward with surviving paths          /* reconstruct optimal sequence backward with surviving paths */
1314    memset(Out, 0x00, 64*sizeof(*Out));    memset(Out, 0x00, 64*sizeof(*Out));
1315    Out[Zigzag[Last_Node]] = Last.Level;    Out[Zigzag[Last_Node]] = Last.Level;
1316    i = Last_Node - Last.Run;    i = Last_Node - Last.Run;

Legend:
Removed from v.1.21.2.12  
changed lines
  Added in v.1.21.2.20

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4