[cvs] / xvidcore / src / utils / mbtransquant.c Repository:
ViewVC logotype

Diff of /xvidcore/src/utils/mbtransquant.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.21.2.12, Mon May 12 12:33:16 2003 UTC revision 1.23.2.3, Sun Dec 19 12:04:27 2004 UTC
# Line 39  Line 39 
39  #include "../bitstream/zigzag.h"  #include "../bitstream/zigzag.h"
40  #include "../dct/fdct.h"  #include "../dct/fdct.h"
41  #include "../dct/idct.h"  #include "../dct/idct.h"
42  #include "../quant/quant_mpeg4.h"  #include "../quant/quant.h"
 #include "../quant/quant_h263.h"  
43  #include "../encoder.h"  #include "../encoder.h"
44    
45  #include "../image/reduced.h"  #include "../image/reduced.h"
46    #include  "../quant/quant_matrix.h"
47    
48  MBFIELDTEST_PTR MBFieldTest;  MBFIELDTEST_PTR MBFieldTest;
49    
# Line 123  Line 123 
123                           int16_t qcoeff[6 * 64],                           int16_t qcoeff[6 * 64],
124                           int16_t data[6*64])                           int16_t data[6*64])
125  {  {
126          int i;          int mpeg;
127            int scaler_lum, scaler_chr;
128    
129          for (i = 0; i < 6; i++) {          quant_intraFuncPtr const quant[2] =
130                  uint32_t iDcScaler = get_dc_scaler(pMB->quant, i < 4);                  {
131                            quant_h263_intra,
132                            quant_mpeg_intra
133                    };
134    
135            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
136            scaler_lum = get_dc_scaler(pMB->quant, 1);
137            scaler_chr = get_dc_scaler(pMB->quant, 0);
138    
139                  /* Quantize the block */                  /* Quantize the block */
140                  start_timer();                  start_timer();
141                  if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT)) {          quant[mpeg](&data[0 * 64], &qcoeff[0 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
142                          quant_intra(&data[i * 64], &qcoeff[i * 64], pMB->quant, iDcScaler);          quant[mpeg](&data[1 * 64], &qcoeff[1 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
143                  } else {          quant[mpeg](&data[2 * 64], &qcoeff[2 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
144                          quant4_intra(&data[i * 64], &qcoeff[i * 64], pMB->quant, iDcScaler);          quant[mpeg](&data[3 * 64], &qcoeff[3 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
145                  }          quant[mpeg](&data[4 * 64], &qcoeff[4 * 64], pMB->quant, scaler_chr, pParam->mpeg_quant_matrices);
146            quant[mpeg](&data[5 * 64], &qcoeff[5 * 64], pMB->quant, scaler_chr, pParam->mpeg_quant_matrices);
147                  stop_quant_timer();                  stop_quant_timer();
148          }          }
 }  
149    
150  /* DeQuantize all blocks -- Intra mode */  /* DeQuantize all blocks -- Intra mode */
151  static __inline void  static __inline void
# Line 146  Line 154 
154                             int16_t qcoeff[6 * 64],                             int16_t qcoeff[6 * 64],
155                             int16_t data[6*64])                             int16_t data[6*64])
156  {  {
157          int i;          int mpeg;
158            int scaler_lum, scaler_chr;
159    
160          for (i = 0; i < 6; i++) {          quant_intraFuncPtr const dequant[2] =
161                  uint32_t iDcScaler = get_dc_scaler(iQuant, i < 4);                  {
162                            dequant_h263_intra,
163                            dequant_mpeg_intra
164                    };
165    
166            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
167            scaler_lum = get_dc_scaler(iQuant, 1);
168            scaler_chr = get_dc_scaler(iQuant, 0);
169    
170                  start_timer();                  start_timer();
171                  if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT))          dequant[mpeg](&qcoeff[0 * 64], &data[0 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
172                          dequant_intra(&qcoeff[i * 64], &data[i * 64], iQuant, iDcScaler);          dequant[mpeg](&qcoeff[1 * 64], &data[1 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
173                  else          dequant[mpeg](&qcoeff[2 * 64], &data[2 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
174                          dequant4_intra(&qcoeff[i * 64], &data[i * 64], iQuant, iDcScaler);          dequant[mpeg](&qcoeff[3 * 64], &data[3 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
175            dequant[mpeg](&qcoeff[4 * 64], &data[4 * 64], iQuant, scaler_chr, pParam->mpeg_quant_matrices);
176            dequant[mpeg](&qcoeff[5 * 64], &data[5 * 64], iQuant, scaler_chr, pParam->mpeg_quant_matrices);
177                  stop_iquant_timer();                  stop_iquant_timer();
178          }          }
 }  
   
   
 static int  
 dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero);  
179    
180  static int  static int
181  dct_quantize_trellis_mpeg_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero);  dct_quantize_trellis_c(int16_t *const Out,
182                                               const int16_t *const In,
183                                               int Q,
184                                               const uint16_t * const Zigzag,
185                                               const uint16_t * const QuantMatrix,
186                                               int Non_Zero,
187                                               int Sum);
188    
189  /* Quantize all blocks -- Inter mode */  /* Quantize all blocks -- Inter mode */
190  static __inline uint8_t  static __inline uint8_t
# Line 182  Line 200 
200          int i;          int i;
201          uint8_t cbp = 0;          uint8_t cbp = 0;
202          int sum;          int sum;
203          int code_block;          int code_block, mpeg;
204    
205            quant_interFuncPtr const quant[2] =
206                    {
207                            quant_h263_inter,
208                            quant_mpeg_inter
209                    };
210    
211            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
212    
213          for (i = 0; i < 6; i++) {          for (i = 0; i < 6; i++) {
214    
215                  /* Quantize the block */                  /* Quantize the block */
216                  start_timer();                  start_timer();
217                  if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT)) {  
218                          sum = quant_inter(&qcoeff[i*64], &data[i*64], pMB->quant);                  sum = quant[mpeg](&qcoeff[i*64], &data[i*64], pMB->quant, pParam->mpeg_quant_matrices);
219                          if ( (sum) && (frame->vop_flags & XVID_VOP_TRELLISQUANT) ) {  
220                                  sum = dct_quantize_trellis_h263_c(&qcoeff[i*64], &data[i*64], pMB->quant, &scan_tables[0][0], 63)+1;                  if(sum && (pMB->quant > 2) && (frame->vop_flags & XVID_VOP_TRELLISQUANT)) {
221                                  limit = 1;                          const uint16_t *matrix;
222                          }                          const static uint16_t h263matrix[] =
223                  } else {                                  {
224                          sum = quant4_inter(&qcoeff[i * 64], &data[i * 64], pMB->quant);                                          16, 16, 16, 16, 16, 16, 16, 16,
225  //                      if ( (sum) && (frame->vop_flags & XVID_VOP_TRELLISQUANT) )                                          16, 16, 16, 16, 16, 16, 16, 16,
226  //                              sum = dct_quantize_trellis_mpeg_c (&qcoeff[i*64], &data[i*64], pMB->quant)+1;                                          16, 16, 16, 16, 16, 16, 16, 16,
227                                            16, 16, 16, 16, 16, 16, 16, 16,
228                                            16, 16, 16, 16, 16, 16, 16, 16,
229                                            16, 16, 16, 16, 16, 16, 16, 16,
230                                            16, 16, 16, 16, 16, 16, 16, 16,
231                                            16, 16, 16, 16, 16, 16, 16, 16
232                                    };
233    
234                            matrix = (mpeg)?get_inter_matrix(pParam->mpeg_quant_matrices):h263matrix;
235                            sum = dct_quantize_trellis_c(&qcoeff[i*64], &data[i*64],
236                                                                                     pMB->quant, &scan_tables[0][0],
237                                                                                     matrix,
238                                                                                     63,
239                                                                                     sum);
240                  }                  }
241                  stop_quant_timer();                  stop_quant_timer();
242    
# Line 236  Line 275 
275                             int16_t qcoeff[6 * 64],                             int16_t qcoeff[6 * 64],
276                             const uint8_t cbp)                             const uint8_t cbp)
277  {  {
278          int i;          int mpeg;
279    
280            quant_interFuncPtr const dequant[2] =
281                    {
282                            dequant_h263_inter,
283                            dequant_mpeg_inter
284                    };
285    
286            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
287    
         for (i = 0; i < 6; i++) {  
                 if (cbp & (1 << (5 - i))) {  
288                          start_timer();                          start_timer();
289                          if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT))          if(cbp & (1 << (5 - 0))) dequant[mpeg](&data[0 * 64], &qcoeff[0 * 64], iQuant, pParam->mpeg_quant_matrices);
290                                  dequant_inter(&data[i * 64], &qcoeff[i * 64], iQuant);          if(cbp & (1 << (5 - 1))) dequant[mpeg](&data[1 * 64], &qcoeff[1 * 64], iQuant, pParam->mpeg_quant_matrices);
291                          else          if(cbp & (1 << (5 - 2))) dequant[mpeg](&data[2 * 64], &qcoeff[2 * 64], iQuant, pParam->mpeg_quant_matrices);
292                                  dequant4_inter(&data[i * 64], &qcoeff[i * 64], iQuant);          if(cbp & (1 << (5 - 3))) dequant[mpeg](&data[3 * 64], &qcoeff[3 * 64], iQuant, pParam->mpeg_quant_matrices);
293            if(cbp & (1 << (5 - 4))) dequant[mpeg](&data[4 * 64], &qcoeff[4 * 64], iQuant, pParam->mpeg_quant_matrices);
294            if(cbp & (1 << (5 - 5))) dequant[mpeg](&data[5 * 64], &qcoeff[5 * 64], iQuant, pParam->mpeg_quant_matrices);
295                          stop_iquant_timer();                          stop_iquant_timer();
296                  }                  }
         }  
 }  
297    
298  typedef void (transfer_operation_8to16_t) (int16_t *Dst, const uint8_t *Src, int BpS);  typedef void (transfer_operation_8to16_t) (int16_t *Dst, const uint8_t *Src, int BpS);
299  typedef void (transfer_operation_16to8_t) (uint8_t *Dst, const int16_t *Src, int BpS);  typedef void (transfer_operation_16to8_t) (uint8_t *Dst, const int16_t *Src, int BpS);
# Line 266  Line 311 
311          uint32_t stride2 = stride / 2;          uint32_t stride2 = stride / 2;
312          uint32_t next_block = stride * 8;          uint32_t next_block = stride * 8;
313          int32_t cst;          int32_t cst;
314            int vop_reduced;
315          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
316          const IMAGE * const pCurrent = &frame->image;          const IMAGE * const pCurrent = &frame->image;
317            transfer_operation_8to16_t * const functions[2] =
318                    {
319                            (transfer_operation_8to16_t *)transfer_8to16copy,
320                            (transfer_operation_8to16_t *)filter_18x18_to_8x8
321                    };
322          transfer_operation_8to16_t *transfer_op = NULL;          transfer_operation_8to16_t *transfer_op = NULL;
323    
324          if ((frame->vop_flags & XVID_VOP_REDUCED)) {          vop_reduced = !!(frame->vop_flags & XVID_VOP_REDUCED);
   
                 /* Image pointers */  
                 pY_Cur = pCurrent->y + (y_pos << 5) * stride  + (x_pos << 5);  
                 pU_Cur = pCurrent->u + (y_pos << 4) * stride2 + (x_pos << 4);  
                 pV_Cur = pCurrent->v + (y_pos << 4) * stride2 + (x_pos << 4);  
   
                 /* Block size */  
                 cst = 16;  
   
                 /* Operation function */  
                 transfer_op = (transfer_operation_8to16_t*)filter_18x18_to_8x8;  
         } else {  
325    
326                  /* Image pointers */                  /* Image pointers */
327                  pY_Cur = pCurrent->y + (y_pos << 4) * stride  + (x_pos << 4);          pY_Cur = pCurrent->y + (y_pos << (4+vop_reduced)) * stride  + (x_pos << (4+vop_reduced));
328                  pU_Cur = pCurrent->u + (y_pos << 3) * stride2 + (x_pos << 3);          pU_Cur = pCurrent->u + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
329                  pV_Cur = pCurrent->v + (y_pos << 3) * stride2 + (x_pos << 3);          pV_Cur = pCurrent->v + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
330    
331                  /* Block size */                  /* Block size */
332                  cst = 8;          cst = 8<<vop_reduced;
333    
334                  /* Operation function */                  /* Operation function */
335                  transfer_op = (transfer_operation_8to16_t*)transfer_8to16copy;          transfer_op = functions[vop_reduced];
         }  
336    
337          /* Do the transfer */          /* Do the transfer */
338          start_timer();          start_timer();
# Line 314  Line 352 
352                           const uint32_t x_pos,                           const uint32_t x_pos,
353                           const uint32_t y_pos,                           const uint32_t y_pos,
354                           int16_t data[6 * 64],                           int16_t data[6 * 64],
355                           const uint32_t add,                           const uint32_t add, /* Must be 1 or 0 */
356                           const uint8_t cbp)                           const uint8_t cbp)
357  {  {
358          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
# Line 322  Line 360 
360          uint32_t stride2 = stride / 2;          uint32_t stride2 = stride / 2;
361          uint32_t next_block = stride * 8;          uint32_t next_block = stride * 8;
362          uint32_t cst;          uint32_t cst;
363            int vop_reduced;
364          const IMAGE * const pCurrent = &frame->image;          const IMAGE * const pCurrent = &frame->image;
365    
366            /* Array of function pointers, indexed by [vop_reduced<<1+add] */
367            transfer_operation_16to8_t  * const functions[4] =
368                    {
369                            (transfer_operation_16to8_t*)transfer_16to8copy,
370                            (transfer_operation_16to8_t*)transfer_16to8add,
371                            (transfer_operation_16to8_t*)copy_upsampled_8x8_16to8,
372                            (transfer_operation_16to8_t*)add_upsampled_8x8_16to8
373                    };
374    
375          transfer_operation_16to8_t *transfer_op = NULL;          transfer_operation_16to8_t *transfer_op = NULL;
376    
377            /* Makes this vars booleans */
378            vop_reduced = !!(frame->vop_flags & XVID_VOP_REDUCED);
379    
380            /* Image pointers */
381            pY_Cur = pCurrent->y + (y_pos << (4+vop_reduced)) * stride  + (x_pos << (4+vop_reduced));
382            pU_Cur = pCurrent->u + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
383            pV_Cur = pCurrent->v + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
384    
385          if (pMB->field_dct) {          if (pMB->field_dct) {
386                  next_block = stride;                  next_block = stride;
387                  stride *= 2;                  stride *= 2;
388          }          }
389    
         if ((frame->vop_flags & XVID_VOP_REDUCED)) {  
   
                 /* Image pointers */  
                 pY_Cur = pCurrent->y + (y_pos << 5) * stride  + (x_pos << 5);  
                 pU_Cur = pCurrent->u + (y_pos << 4) * stride2 + (x_pos << 4);  
                 pV_Cur = pCurrent->v + (y_pos << 4) * stride2 + (x_pos << 4);  
   
390                  /* Block size */                  /* Block size */
391                  cst = 16;          cst = 8<<vop_reduced;
392    
393                  /* Operation function */                  /* Operation function */
394                  if(add)          transfer_op = functions[(vop_reduced<<1) + add];
                         transfer_op = (transfer_operation_16to8_t*)add_upsampled_8x8_16to8;  
                 else  
                         transfer_op = (transfer_operation_16to8_t*)copy_upsampled_8x8_16to8;  
         } else {  
   
                 /* Image pointers */  
                 pY_Cur = pCurrent->y + (y_pos << 4) * stride  + (x_pos << 4);  
                 pU_Cur = pCurrent->u + (y_pos << 3) * stride2 + (x_pos << 3);  
                 pV_Cur = pCurrent->v + (y_pos << 3) * stride2 + (x_pos << 3);  
   
                 /* Block size */  
                 cst = 8;  
   
                 /* Operation function */  
                 if(add)  
                         transfer_op = (transfer_operation_16to8_t*)transfer_16to8add;  
                 else  
                         transfer_op = (transfer_operation_16to8_t*)transfer_16to8copy;  
         }  
395    
396          /* Do the operation */          /* Do the operation */
397          start_timer();          start_timer();
# Line 419  Line 450 
450          uint8_t cbp;          uint8_t cbp;
451          uint32_t limit;          uint32_t limit;
452    
453          /*          /* There is no MBTrans8to16 for Inter block, that's done in motion compensation
454           * There is no MBTrans8to16 for Inter block, that's done in motion compensation           * already */
          * already  
          */  
455    
456          /* Perform DCT (and field decision) */          /* Perform DCT (and field decision) */
457          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
# Line 430  Line 459 
459          /* Set the limit threshold */          /* Set the limit threshold */
460          limit = PVOP_TOOSMALL_LIMIT + ((pMB->quant == 1)? 1 : 0);          limit = PVOP_TOOSMALL_LIMIT + ((pMB->quant == 1)? 1 : 0);
461    
462            if (frame->vop_flags & XVID_VOP_CARTOON)
463                    limit *= 3;
464    
465          /* Quantize the block */          /* Quantize the block */
466          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 0, limit);          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 0, limit);
467    
# Line 457  Line 489 
489          uint8_t cbp;          uint8_t cbp;
490          uint32_t limit;          uint32_t limit;
491    
492          /*          /* There is no MBTrans8to16 for Inter block, that's done in motion compensation
493           * There is no MBTrans8to16 for Inter block, that's done in motion compensation           * already */
          * already  
          */  
494    
495          /* Perform DCT (and field decision) */          /* Perform DCT (and field decision) */
496          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
# Line 468  Line 498 
498          /* Set the limit threshold */          /* Set the limit threshold */
499          limit = BVOP_TOOSMALL_LIMIT;          limit = BVOP_TOOSMALL_LIMIT;
500    
501            if (frame->vop_flags & XVID_VOP_CARTOON)
502                    limit *= 2;
503    
504          /* Quantize the block */          /* Quantize the block */
505          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 1, limit);          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 1, limit);
506    
# Line 475  Line 508 
508           * History comment:           * History comment:
509           * We don't have to DeQuant, iDCT and Transfer back data for B-frames.           * We don't have to DeQuant, iDCT and Transfer back data for B-frames.
510           *           *
511           * BUT some plugins require the original frame to be passed so we have           * BUT some plugins require the rebuilt original frame to be passed so we
512           * to take care of that here           * have to take care of that here
513           */           */
514          if((pParam->plugin_flags & XVID_REQORIGINAL)) {          if((pParam->plugin_flags & XVID_REQORIGINAL)) {
515    
# Line 546  Line 579 
579    
580          /* left blocks */          /* left blocks */
581    
582          // 1=2, 2=4, 4=8, 8=1          /* 1=2, 2=4, 4=8, 8=1 */
583          MOVLINE(tmp, LINE(0, 1));          MOVLINE(tmp, LINE(0, 1));
584          MOVLINE(LINE(0, 1), LINE(0, 2));          MOVLINE(LINE(0, 1), LINE(0, 2));
585          MOVLINE(LINE(0, 2), LINE(0, 4));          MOVLINE(LINE(0, 2), LINE(0, 4));
586          MOVLINE(LINE(0, 4), LINE(2, 0));          MOVLINE(LINE(0, 4), LINE(2, 0));
587          MOVLINE(LINE(2, 0), tmp);          MOVLINE(LINE(2, 0), tmp);
588    
589          // 3=6, 6=12, 12=9, 9=3          /* 3=6, 6=12, 12=9, 9=3 */
590          MOVLINE(tmp, LINE(0, 3));          MOVLINE(tmp, LINE(0, 3));
591          MOVLINE(LINE(0, 3), LINE(0, 6));          MOVLINE(LINE(0, 3), LINE(0, 6));
592          MOVLINE(LINE(0, 6), LINE(2, 4));          MOVLINE(LINE(0, 6), LINE(2, 4));
593          MOVLINE(LINE(2, 4), LINE(2, 1));          MOVLINE(LINE(2, 4), LINE(2, 1));
594          MOVLINE(LINE(2, 1), tmp);          MOVLINE(LINE(2, 1), tmp);
595    
596          // 5=10, 10=5          /* 5=10, 10=5 */
597          MOVLINE(tmp, LINE(0, 5));          MOVLINE(tmp, LINE(0, 5));
598          MOVLINE(LINE(0, 5), LINE(2, 2));          MOVLINE(LINE(0, 5), LINE(2, 2));
599          MOVLINE(LINE(2, 2), tmp);          MOVLINE(LINE(2, 2), tmp);
600    
601          // 7=14, 14=13, 13=11, 11=7          /* 7=14, 14=13, 13=11, 11=7 */
602          MOVLINE(tmp, LINE(0, 7));          MOVLINE(tmp, LINE(0, 7));
603          MOVLINE(LINE(0, 7), LINE(2, 6));          MOVLINE(LINE(0, 7), LINE(2, 6));
604          MOVLINE(LINE(2, 6), LINE(2, 5));          MOVLINE(LINE(2, 6), LINE(2, 5));
# Line 574  Line 607 
607    
608          /* right blocks */          /* right blocks */
609    
610          // 1=2, 2=4, 4=8, 8=1          /* 1=2, 2=4, 4=8, 8=1 */
611          MOVLINE(tmp, LINE(1, 1));          MOVLINE(tmp, LINE(1, 1));
612          MOVLINE(LINE(1, 1), LINE(1, 2));          MOVLINE(LINE(1, 1), LINE(1, 2));
613          MOVLINE(LINE(1, 2), LINE(1, 4));          MOVLINE(LINE(1, 2), LINE(1, 4));
614          MOVLINE(LINE(1, 4), LINE(3, 0));          MOVLINE(LINE(1, 4), LINE(3, 0));
615          MOVLINE(LINE(3, 0), tmp);          MOVLINE(LINE(3, 0), tmp);
616    
617          // 3=6, 6=12, 12=9, 9=3          /* 3=6, 6=12, 12=9, 9=3 */
618          MOVLINE(tmp, LINE(1, 3));          MOVLINE(tmp, LINE(1, 3));
619          MOVLINE(LINE(1, 3), LINE(1, 6));          MOVLINE(LINE(1, 3), LINE(1, 6));
620          MOVLINE(LINE(1, 6), LINE(3, 4));          MOVLINE(LINE(1, 6), LINE(3, 4));
621          MOVLINE(LINE(3, 4), LINE(3, 1));          MOVLINE(LINE(3, 4), LINE(3, 1));
622          MOVLINE(LINE(3, 1), tmp);          MOVLINE(LINE(3, 1), tmp);
623    
624          // 5=10, 10=5          /* 5=10, 10=5 */
625          MOVLINE(tmp, LINE(1, 5));          MOVLINE(tmp, LINE(1, 5));
626          MOVLINE(LINE(1, 5), LINE(3, 2));          MOVLINE(LINE(1, 5), LINE(3, 2));
627          MOVLINE(LINE(3, 2), tmp);          MOVLINE(LINE(3, 2), tmp);
628    
629          // 7=14, 14=13, 13=11, 11=7          /* 7=14, 14=13, 13=11, 11=7 */
630          MOVLINE(tmp, LINE(1, 7));          MOVLINE(tmp, LINE(1, 7));
631          MOVLINE(LINE(1, 7), LINE(3, 6));          MOVLINE(LINE(1, 7), LINE(3, 6));
632          MOVLINE(LINE(3, 6), LINE(3, 5));          MOVLINE(LINE(3, 6), LINE(3, 5));
# Line 601  Line 634 
634          MOVLINE(LINE(3, 3), tmp);          MOVLINE(LINE(3, 3), tmp);
635  }  }
636    
637    /*****************************************************************************
638     *               Trellis based R-D optimal quantization
639     *
640     *   Trellis Quant code (C) 2003 Pascal Massimino skal(at)planet-d.net
641     *
642     ****************************************************************************/
643    
644    /*----------------------------------------------------------------------------
645     *
646     *        Trellis-Based quantization
647     *
648     * So far I understand this paper:
649     *
650     *  "Trellis-Based R-D Optimal Quantization in H.263+"
651     *    J.Wen, M.Luttrell, J.Villasenor
652     *    IEEE Transactions on Image Processing, Vol.9, No.8, Aug. 2000.
653     *
654     * we are at stake with a simplified Bellmand-Ford / Dijkstra Single
655     * Source Shortest Path algo. But due to the underlying graph structure
656     * ("Trellis"), it can be turned into a dynamic programming algo,
657     * partially saving the explicit graph's nodes representation. And
658     * without using a heap, since the open frontier of the DAG is always
659     * known, and of fixed size.
660     *--------------------------------------------------------------------------*/
661    
662    
663    
664  /************************************************************************  /* Codes lengths for relevant levels. */
  *               Trellis based R-D optimal quantization                 *  
  *                                                                      *  
  *   Trellis Quant code (C) 2003 Pascal Massimino skal(at)planet-d.net  *  
  *                                                                      *  
  ************************************************************************/  
   
   
 static int  
 dct_quantize_trellis_mpeg_c(int16_t *const Out, const int16_t *const In, int Q,  
                 const uint16_t * const Zigzag, int Non_Zero)  
 { return 63; }  
   
   
 //////////////////////////////////////////////////////////  
 //  
 //        Trellis-Based quantization  
 //  
 // So far I understand this paper:  
 //  
 //  "Trellis-Based R-D Optimal Quantization in H.263+"  
 //    J.Wen, M.Luttrell, J.Villasenor  
 //    IEEE Transactions on Image Processing, Vol.9, No.8, Aug. 2000.  
 //  
 // we are at stake with a simplified Bellmand-Ford / Dijkstra Single  
 // Source Shorted Path algo. But due to the underlying graph structure  
 // ("Trellis"), it can be turned into a dynamic programming algo,  
 // partially saving the explicit graph's nodes representation. And  
 // without using a heap, since the open frontier of the DAG is always  
 // known, and of fixed sized.  
 //  
 //////////////////////////////////////////////////////////  
   
   
 //////////////////////////////////////////////////////////  
 // Codes lengths for relevant levels.  
665    
666    // let's factorize:  /* let's factorize: */
667  static const uint8_t Code_Len0[64] = {  static const uint8_t Code_Len0[64] = {
668    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
669    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
# Line 707  Line 728 
728     3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9,10,10,10,10,10,10,10,10,12,12,13,13,12,13,14,15,15,     3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9,10,10,10,10,10,10,10,10,12,12,13,13,12,13,14,15,15,
729    15,16,16,16,16,17,17,17,18,18,19,19,19,19,19,19,19,19,21,21,22,22,30,30,30,30,30,30,30,30,30,30 };    15,16,16,16,16,17,17,17,18,18,19,19,19,19,19,19,19,19,21,21,22,22,30,30,30,30,30,30,30,30,30,30 };
730    
731    // a few more table for LAST table:  /* a few more table for LAST table: */
732  static const uint8_t Code_Len21[64] = {  static const uint8_t Code_Len21[64] = {
733    13,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,    13,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
734    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
# Line 722  Line 743 
743    12,13,13,13,13,13,13,13,13,14,16,16,16,16,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19};    12,13,13,13,13,13,13,13,13,14,16,16,16,16,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19};
744    
745    
746  static const uint8_t * const B16_17_Code_Len[24] = { // levels [1..24]  static const uint8_t * const B16_17_Code_Len[24] = { /* levels [1..24] */
747    Code_Len20,Code_Len19,Code_Len18,Code_Len17,    Code_Len20,Code_Len19,Code_Len18,Code_Len17,
748    Code_Len16,Code_Len15,Code_Len14,Code_Len13,    Code_Len16,Code_Len15,Code_Len14,Code_Len13,
749    Code_Len12,Code_Len11,Code_Len10,Code_Len9,    Code_Len12,Code_Len11,Code_Len10,Code_Len9,
# Line 731  Line 752 
752    Code_Len2, Code_Len1, Code_Len1, Code_Len1,    Code_Len2, Code_Len1, Code_Len1, Code_Len1,
753  };  };
754    
755  static const uint8_t * const B16_17_Code_Len_Last[6] = { // levels [1..6]  static const uint8_t * const B16_17_Code_Len_Last[6] = { /* levels [1..6] */
756    Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1,    Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1,
757  };  };
758    
759  #define TL(q) 0xfe00/(q*q)  /* TL_SHIFT controls the precision of the RD optimizations in trellis
760     * valid range is [10..16]. The bigger, the more trellis is vulnerable
761     * to overflows in cost formulas.
762     *  - 10 allows ac values up to 2^11 == 2048
763     *  - 16 allows ac values up to 2^8 == 256
764     */
765    #define TL_SHIFT 11
766    #define TL(q) ((0xfe00>>(16-TL_SHIFT))/(q*q))
767    
768  static const int Trellis_Lambda_Tabs[31] = {  static const int Trellis_Lambda_Tabs[31] = {
769           TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7),           TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7),
# Line 745  Line 773 
773  };  };
774  #undef TL  #undef TL
775    
776  static __inline int Find_Last(const int16_t *C, const uint16_t *Zigzag, int i)  static int __inline
777    Find_Last(const int16_t *C, const uint16_t *Zigzag, int i)
778  {  {
779    while(i>=0)    while(i>=0)
780      if (C[Zigzag[i]])      if (C[Zigzag[i]])
# Line 754  Line 783 
783    return -1;    return -1;
784  }  }
785    
786  //////////////////////////////////////////////////////////  /* this routine has been strippen of all debug code */
 // this routine has been strippen of all debug code  
 //////////////////////////////////////////////////////////  
   
787  static int  static int
788  dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)  dct_quantize_trellis_c(int16_t *const Out,
789                                               const int16_t *const In,
790                                               int Q,
791                                               const uint16_t * const Zigzag,
792                                               const uint16_t * const QuantMatrix,
793                                               int Non_Zero,
794                                               int Sum)
795  {  {
796    
797      // Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]),          /* Note: We should search last non-zero coeffs on *real* DCT input coeffs
798      // not quantized one (Out[]). However, it only improves the result *very*           * (In[]), not quantized one (Out[]). However, it only improves the result
799      // slightly (~0.01dB), whereas speed drops to crawling level :)           * *very* slightly (~0.01dB), whereas speed drops to crawling level :)
800      // Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps,           * Well, actually, taking 1 more coeff past Non_Zero into account sometimes
801             * helps. */
802    typedef struct { int16_t Run, Level; } NODE;    typedef struct { int16_t Run, Level; } NODE;
803    
804    NODE Nodes[65], Last;    NODE Nodes[65], Last;
805    uint32_t Run_Costs0[64+1];    uint32_t Run_Costs0[64+1];
806    uint32_t * const Run_Costs = Run_Costs0 + 1;    uint32_t * const Run_Costs = Run_Costs0 + 1;
807    const int Mult = 2*Q;  
808    const int Bias = (Q-1) | 1;          /* it's 1/lambda, actually */
809    const int Lev0 = Mult + Bias;          const int Lambda = Trellis_Lambda_Tabs[Q-1];
   const int Lambda = Trellis_Lambda_Tabs[Q-1];    // it's 1/lambda, actually  
810    
811    int Run_Start = -1;    int Run_Start = -1;
812    uint32_t Min_Cost = 2<<16;          uint32_t Min_Cost = 2<<TL_SHIFT;
813    
814    int Last_Node = -1;    int Last_Node = -1;
815    uint32_t Last_Cost = 0;    uint32_t Last_Cost = 0;
816    
817    int i, j;    int i, j;
818    Run_Costs[-1] = 2<<16;                          // source (w/ CBP penalty)  
819            /* source (w/ CBP penalty) */
820            Run_Costs[-1] = 2<<TL_SHIFT;
821    
822    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);
823    if (Non_Zero<0)    if (Non_Zero<0)
824        return -1;                  return 0; /* Sum is zero if there are only zero coeffs */
825    
826            for(i=0; i<=Non_Zero; i++) {
827                    const int q = ((Q*QuantMatrix[Zigzag[i]])>>4);
828                    const int Mult = 2*q;
829                    const int Bias = (q-1) | 1;
830                    const int Lev0 = Mult + Bias;
831    
   for(i=0; i<=Non_Zero; i++)  
   {  
832      const int AC = In[Zigzag[i]];      const int AC = In[Zigzag[i]];
833      const int Level1 = Out[Zigzag[i]];      const int Level1 = Out[Zigzag[i]];
834      const int Dist0 = Lambda* AC*AC;                  const unsigned int Dist0 = Lambda* AC*AC;
835      uint32_t Best_Cost = 0xf0000000;      uint32_t Best_Cost = 0xf0000000;
836      Last_Cost += Dist0;      Last_Cost += Dist0;
837    
838      if ((uint32_t)(Level1+1)<3)                 // very specialized loop for -1,0,+1                  /* very specialized loop for -1,0,+1 */
839      {                  if ((uint32_t)(Level1+1)<3) {
840          int dQ;          int dQ;
841                  int Run;                  int Run;
842        uint32_t Cost0;        uint32_t Cost0;
# Line 814  Line 851 
851                  Cost0 = Lambda*dQ*dQ;                  Cost0 = Lambda*dQ*dQ;
852    
853        Nodes[i].Run = 1;        Nodes[i].Run = 1;
854        Best_Cost = (Code_Len20[0]<<16) + Run_Costs[i-1]+Cost0;                          Best_Cost = (Code_Len20[0]<<TL_SHIFT) + Run_Costs[i-1]+Cost0;
855        for(Run=i-Run_Start; Run>0; --Run)                          for(Run=i-Run_Start; Run>0; --Run) {
       {  
856          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];
857          const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<16);                                  const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<TL_SHIFT);
858          const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<16);                                  const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<TL_SHIFT);
859    
860            // TODO: what about tie-breaks? Should we favor short runs or                                  /* TODO: what about tie-breaks? Should we favor short runs or
861            // long runs? Although the error is the same, it would not be                                   * long runs? Although the error is the same, it would not be
862            // spread the same way along high and low frequencies...                                   * spread the same way along high and low frequencies... */
863    
864                          // (I'd say: favour short runs => hifreq errors (HVS) -- gruel )                                  /* Gruel: I'd say, favour short runs => hifreq errors (HVS) */
865    
866          if (Cost<Best_Cost) {          if (Cost<Best_Cost) {
867            Best_Cost    = Cost;            Best_Cost    = Cost;
# Line 840  Line 876 
876        }        }
877        if (Last_Node==i)        if (Last_Node==i)
878                          Last.Level = Nodes[i].Level;                          Last.Level = Nodes[i].Level;
879      }                  } else if (51U>(uint32_t)(Level1+25)) {
880      else                      // "big" levels                          /* "big" levels (not less than ESC3, though) */
     {  
881        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;
882        int Level2;        int Level2;
883        int dQ1, dQ2;        int dQ1, dQ2;
# Line 858  Line 893 
893          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;
894          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;
895          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;
896        } else { // Level1<-1                          } else { /* Level1<-1 */
897          dQ1 = Level1*Mult-AC - Bias;          dQ1 = Level1*Mult-AC - Bias;
898          dQ2 = dQ1 + Mult;          dQ2 = dQ1 + Mult;
899          Level2 = Level1 + 1;          Level2 = Level1 + 1;
# Line 867  Line 902 
902          Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0;          Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0;
903          Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0;          Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0;
904        }        }
905    
906        Dist1 = Lambda*dQ1*dQ1;        Dist1 = Lambda*dQ1*dQ1;
907        Dist2 = Lambda*dQ2*dQ2;        Dist2 = Lambda*dQ2*dQ2;
908        dDist21 = Dist2-Dist1;        dDist21 = Dist2-Dist1;
# Line 877  Line 913 
913          uint32_t Cost1, Cost2;          uint32_t Cost1, Cost2;
914          int bLevel;          int bLevel;
915    
916  // for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:                                  /* for sub-optimal (but slightly worth it, speed-wise) search,
917  //        if (Cost_Base>=Best_Cost) continue;                                   * uncomment the following:
918  // (? doesn't seem to have any effect -- gruel )                                   *              if (Cost_Base>=Best_Cost) continue;
919                                     * (? doesn't seem to have any effect -- gruel ) */
920    
921          Cost1 = Cost_Base + (Tbl_L1[Run-1]<<16);                                  Cost1 = Cost_Base + (Tbl_L1[Run-1]<<TL_SHIFT);
922          Cost2 = Cost_Base + (Tbl_L2[Run-1]<<16) + dDist21;                                  Cost2 = Cost_Base + (Tbl_L2[Run-1]<<TL_SHIFT) + dDist21;
923    
924          if (Cost2<Cost1) {          if (Cost2<Cost1) {
925                           Cost1 = Cost2;                           Cost1 = Cost2;
926                           bLevel = Level2;                           bLevel = Level2;
927                    } else                                  } else {
928                           bLevel = Level1;                           bLevel = Level1;
929                                    }
930    
931          if (Cost1<Best_Cost) {          if (Cost1<Best_Cost) {
932            Best_Cost = Cost1;            Best_Cost = Cost1;
# Line 896  Line 934 
934            Nodes[i].Level = bLevel;            Nodes[i].Level = bLevel;
935          }          }
936    
937          Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<16);                                  Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<TL_SHIFT);
938          Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<16) + dDist21;                                  Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<TL_SHIFT) + dDist21;
939    
940          if (Cost2<Cost1) {          if (Cost2<Cost1) {
941                           Cost1 = Cost2;                           Cost1 = Cost2;
942                           bLevel = Level2;                           bLevel = Level2;
943                    } else                                  } else {
944                           bLevel = Level1;                           bLevel = Level1;
945                                    }
946    
947          if (Cost1<Last_Cost) {          if (Cost1<Last_Cost) {
948            Last_Cost  = Cost1;            Last_Cost  = Cost1;
# Line 911  Line 950 
950            Last.Level = bLevel;            Last.Level = bLevel;
951            Last_Node  = i;            Last_Node  = i;
952          }          }
953        } //end of "for Run"                          } /* end of "for Run" */
954                    } else {
955                            /* Very very high levels, with no chance of being optimizable
956                             * => Simply pick best Run. */
957                            int Run;
958                            for(Run=i-Run_Start; Run>0; --Run) {
959                                    /* 30 bits + no distortion */
960                                    const uint32_t Cost = (30<<TL_SHIFT) + Run_Costs[i-Run];
961                                    if (Cost<Best_Cost) {
962                                            Best_Cost = Cost;
963                                            Nodes[i].Run   = Run;
964                                            Nodes[i].Level = Level1;
965                                    }
966    
967                                    if (Cost<Last_Cost) {
968                                            Last_Cost  = Cost;
969                                            Last.Run   = Run;
970                                            Last.Level = Level1;
971                                            Last_Node  = i;
972                                    }
973                            }
974      }      }
975    
976    
977      Run_Costs[i] = Best_Cost;      Run_Costs[i] = Best_Cost;
978    
979      if (Best_Cost < Min_Cost + Dist0) {      if (Best_Cost < Min_Cost + Dist0) {
980        Min_Cost = Best_Cost;        Min_Cost = Best_Cost;
981        Run_Start = i;        Run_Start = i;
982      }                  } else {
983      else                          /* as noticed by Michael Niedermayer (michaelni at gmx.at),
984      {                           * there's a code shorter by 1 bit for a larger run (!), same
985          // as noticed by Michael Niedermayer (michaelni at gmx.at), there's                           * level. We give it a chance by not moving the left barrier too
986          // a code shorter by 1 bit for a larger run (!), same level. We give                           * much. */
987          // it a chance by not moving the left barrier too much.                          while( Run_Costs[Run_Start]>Min_Cost+(1<<TL_SHIFT) )
   
       while( Run_Costs[Run_Start]>Min_Cost+(1<<16) )  
988          Run_Start++;          Run_Start++;
989    
990          // spread on preceding coeffs the cost incurred by skipping this one                          /* spread on preceding coeffs the cost incurred by skipping this
991                             * one */
992        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;
993        Min_Cost += Dist0;        Min_Cost += Dist0;
994      }      }
995    }    }
996    
997            /* It seems trellis doesn't give good results... just leave the block untouched
998             * and return the original sum value */
999    if (Last_Node<0)    if (Last_Node<0)
1000      return -1;                  return Sum;
1001    
1002         // reconstruct optimal sequence backward with surviving paths          /* reconstruct optimal sequence backward with surviving paths */
1003    memset(Out, 0x00, 64*sizeof(*Out));    memset(Out, 0x00, 64*sizeof(*Out));
1004    Out[Zigzag[Last_Node]] = Last.Level;    Out[Zigzag[Last_Node]] = Last.Level;
1005    i = Last_Node - Last.Run;    i = Last_Node - Last.Run;
1006            Sum = abs(Last.Level);
1007    while(i>=0) {    while(i>=0) {
1008      Out[Zigzag[i]] = Nodes[i].Level;      Out[Zigzag[i]] = Nodes[i].Level;
1009                    Sum += abs(Nodes[i].Level);
1010      i -= Nodes[i].Run;      i -= Nodes[i].Run;
1011    }    }
   return Last_Node;  
 }  
   
   
1012    
1013            return Sum;
1014    }
1015    
1016    /* original version including heavy debugging info */
   
   
   
   
   
   
 //////////////////////////////////////////////////////////  
 // original version including heavy debugging info  
 //////////////////////////////////////////////////////////  
   
1017    
1018  #ifdef DBGTRELL  #ifdef DBGTRELL
1019    
# Line 987  Line 1037 
1037      int j=0, j0=0;      int j=0, j0=0;
1038      int Run, Level;      int Run, Level;
1039    
1040      Bits = 2;   // CBP                  Bits = 2;   /* CBP */
1041      while(j<Last) {      while(j<Last) {
1042        while(!C[Zigzag[j]])        while(!C[Zigzag[j]])
1043                          j++;                          j++;
# Line 1019  Line 1069 
1069      V -= Ref[Zigzag[i]];      V -= Ref[Zigzag[i]];
1070      Dist += V*V;      Dist += V*V;
1071    }    }
1072    Cost = Lambda*Dist + (Bits<<16);          Cost = Lambda*Dist + (Bits<<TL_SHIFT);
1073    if (DBG==1)    if (DBG==1)
1074      printf( " Last:%2d/%2d Cost = [(Bits=%5.0d) + Lambda*(Dist=%6.0d) = %d ] >>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 );      printf( " Last:%2d/%2d Cost = [(Bits=%5.0d) + Lambda*(Dist=%6.0d) = %d ] >>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 );
1075    return Cost;    return Cost;
# Line 1034  Line 1084 
1084  dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)  dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)
1085  {  {
1086    
1087      // Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]),      /*
1088      // not quantized one (Out[]). However, it only improves the result *very*           * Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]),
1089      // slightly (~0.01dB), whereas speed drops to crawling level :)           * not quantized one (Out[]). However, it only improves the result *very*
1090      // Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps,           * slightly (~0.01dB), whereas speed drops to crawling level :)
1091             * Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps.
1092             */
1093    typedef struct { int16_t Run, Level; } NODE;    typedef struct { int16_t Run, Level; } NODE;
1094    
1095    NODE Nodes[65], Last;    NODE Nodes[65], Last;
# Line 1047  Line 1098 
1098    const int Mult = 2*Q;    const int Mult = 2*Q;
1099    const int Bias = (Q-1) | 1;    const int Bias = (Q-1) | 1;
1100    const int Lev0 = Mult + Bias;    const int Lev0 = Mult + Bias;
1101    const int Lambda = Trellis_Lambda_Tabs[Q-1];    // it's 1/lambda, actually          const int Lambda = Trellis_Lambda_Tabs[Q-1];    /* it's 1/lambda, actually */
1102    
1103    int Run_Start = -1;    int Run_Start = -1;
1104    Run_Costs[-1] = 2<<16;                          // source (w/ CBP penalty)          Run_Costs[-1] = 2<<TL_SHIFT;                          /* source (w/ CBP penalty) */
1105    uint32_t Min_Cost = 2<<16;          uint32_t Min_Cost = 2<<TL_SHIFT;
1106    
1107    int Last_Node = -1;    int Last_Node = -1;
1108    uint32_t Last_Cost = 0;    uint32_t Last_Cost = 0;
# Line 1059  Line 1110 
1110    int i, j;    int i, j;
1111    
1112  #if (DBG>0)  #if (DBG>0)
1113    Last.Level = 0; Last.Run = -1; // just initialize to smthg          Last.Level = 0; Last.Run = -1; /* just initialize to smthg */
1114  #endif  #endif
1115    
1116    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);
# Line 1074  Line 1125 
1125      uint32_t Best_Cost = 0xf0000000;      uint32_t Best_Cost = 0xf0000000;
1126      Last_Cost += Dist0;      Last_Cost += Dist0;
1127    
1128      if ((uint32_t)(Level1+1)<3)                 // very specialized loop for -1,0,+1                  if ((uint32_t)(Level1+1)<3)                 /* very specialized loop for -1,0,+1 */
1129      {      {
1130          int dQ;          int dQ;
1131                  int Run;                  int Run;
# Line 1090  Line 1141 
1141                  Cost0 = Lambda*dQ*dQ;                  Cost0 = Lambda*dQ*dQ;
1142    
1143        Nodes[i].Run = 1;        Nodes[i].Run = 1;
1144        Best_Cost = (Code_Len20[0]<<16) + Run_Costs[i-1]+Cost0;                          Best_Cost = (Code_Len20[0]<<TL_SHIFT) + Run_Costs[i-1]+Cost0;
1145        for(Run=i-Run_Start; Run>0; --Run)        for(Run=i-Run_Start; Run>0; --Run)
1146        {        {
1147          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];
1148          const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<16);                                  const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<TL_SHIFT);
1149          const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<16);                                  const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<TL_SHIFT);
1150    
1151            // TODO: what about tie-breaks? Should we favor short runs or                                  /*
1152            // long runs? Although the error is the same, it would not be                                   * TODO: what about tie-breaks? Should we favor short runs or
1153            // spread the same way along high and low frequencies...                                   * long runs? Although the error is the same, it would not be
1154                                     * spread the same way along high and low frequencies...
1155                                     */
1156          if (Cost<Best_Cost) {          if (Cost<Best_Cost) {
1157            Best_Cost    = Cost;            Best_Cost    = Cost;
1158            Nodes[i].Run = Run;            Nodes[i].Run = Run;
# Line 1129  Line 1182 
1182          printf( "\n" );          printf( "\n" );
1183        }        }
1184      }      }
1185      else                      // "big" levels                  else                      /* "big" levels */
1186      {      {
1187        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;
1188        int Level2;        int Level2;
# Line 1146  Line 1199 
1199          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;
1200          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;
1201          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;
1202        } else { // Level1<-1                          } else { /* Level1<-1 */
1203          dQ1 = Level1*Mult-AC - Bias;          dQ1 = Level1*Mult-AC - Bias;
1204          dQ2 = dQ1 + Mult;          dQ2 = dQ1 + Mult;
1205          Level2 = Level1 + 1;          Level2 = Level1 + 1;
# Line 1165  Line 1218 
1218          uint32_t Cost1, Cost2;          uint32_t Cost1, Cost2;
1219          int bLevel;          int bLevel;
1220    
1221  // for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:  /*
1222  //        if (Cost_Base>=Best_Cost) continue;   * for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:
1223     *        if (Cost_Base>=Best_Cost) continue;
1224          Cost1 = Cost_Base + (Tbl_L1[Run-1]<<16);   */
1225          Cost2 = Cost_Base + (Tbl_L2[Run-1]<<16) + dDist21;                                  Cost1 = Cost_Base + (Tbl_L1[Run-1]<<TL_SHIFT);
1226                                    Cost2 = Cost_Base + (Tbl_L2[Run-1]<<TL_SHIFT) + dDist21;
1227    
1228          if (Cost2<Cost1) {          if (Cost2<Cost1) {
1229                           Cost1 = Cost2;                           Cost1 = Cost2;
# Line 1183  Line 1237 
1237            Nodes[i].Level = bLevel;            Nodes[i].Level = bLevel;
1238          }          }
1239    
1240          Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<16);                                  Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<TL_SHIFT);
1241          Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<16) + dDist21;                                  Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<TL_SHIFT) + dDist21;
1242    
1243          if (Cost2<Cost1) {          if (Cost2<Cost1) {
1244                           Cost1 = Cost2;                           Cost1 = Cost2;
# Line 1198  Line 1252 
1252            Last.Level = bLevel;            Last.Level = bLevel;
1253            Last_Node  = i;            Last_Node  = i;
1254          }          }
1255        } //end of "for Run"                          } /* end of "for Run" */
1256    
1257        if (DBG==1) {        if (DBG==1) {
1258          Run_Costs[i] = Best_Cost;          Run_Costs[i] = Best_Cost;
# Line 1224  Line 1278 
1278      }      }
1279      else      else
1280      {      {
1281          // as noticed by Michael Niedermayer (michaelni at gmx.at), there's                          /*
1282          // a code shorter by 1 bit for a larger run (!), same level. We give                           * as noticed by Michael Niedermayer (michaelni at gmx.at), there's
1283          // it a chance by not moving the left barrier too much.                           * a code shorter by 1 bit for a larger run (!), same level. We give
1284                             * it a chance by not moving the left barrier too much.
1285                             */
1286    
1287        while( Run_Costs[Run_Start]>Min_Cost+(1<<16) )                          while( Run_Costs[Run_Start]>Min_Cost+(1<<TL_SHIFT) )
1288          Run_Start++;          Run_Start++;
1289    
1290          // spread on preceding coeffs the cost incurred by skipping this one                          /* spread on preceding coeffs the cost incurred by skipping this one */
1291        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;
1292        Min_Cost += Dist0;        Min_Cost += Dist0;
1293      }      }
# Line 1249  Line 1305 
1305    if (Last_Node<0)    if (Last_Node<0)
1306      return -1;      return -1;
1307    
1308         // reconstruct optimal sequence backward with surviving paths          /* reconstruct optimal sequence backward with surviving paths */
1309    memset(Out, 0x00, 64*sizeof(*Out));    memset(Out, 0x00, 64*sizeof(*Out));
1310    Out[Zigzag[Last_Node]] = Last.Level;    Out[Zigzag[Last_Node]] = Last.Level;
1311    i = Last_Node - Last.Run;    i = Last_Node - Last.Run;

Legend:
Removed from v.1.21.2.12  
changed lines
  Added in v.1.23.2.3

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4