Parent Directory | Revision Log
Revision 1.21.2.19 - (view) (download)
1 : | edgomez | 1.21.2.6 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - MB Transfert/Quantization functions - | ||
5 : | * | ||
6 : | * Copyright(C) 2001-2003 Peter Ross <pross@xvid.org> | ||
7 : | * 2001-2003 Michael Militzer <isibaar@xvid.org> | ||
8 : | * 2003 Edouard Gomez <ed.gomez@free.fr> | ||
9 : | * | ||
10 : | * This program is free software ; you can redistribute it and/or modify | ||
11 : | * it under the terms of the GNU General Public License as published by | ||
12 : | * the Free Software Foundation ; either version 2 of the License, or | ||
13 : | * (at your option) any later version. | ||
14 : | * | ||
15 : | * This program is distributed in the hope that it will be useful, | ||
16 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
17 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 : | * GNU General Public License for more details. | ||
19 : | * | ||
20 : | * You should have received a copy of the GNU General Public License | ||
21 : | * along with this program ; if not, write to the Free Software | ||
22 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 : | * | ||
24 : | edgomez | 1.21.2.17 | * $Id$ |
25 : | edgomez | 1.21.2.6 | * |
26 : | ****************************************************************************/ | ||
27 : | Isibaar | 1.1 | |
28 : | chl | 1.21.2.11 | #include <stdio.h> |
29 : | edgomez | 1.21.2.7 | #include <stdlib.h> |
30 : | chl | 1.21.2.11 | #include <string.h> |
31 : | edgomez | 1.3 | |
32 : | Isibaar | 1.1 | #include "../portab.h" |
33 : | #include "mbfunctions.h" | ||
34 : | |||
35 : | #include "../global.h" | ||
36 : | #include "mem_transfer.h" | ||
37 : | #include "timer.h" | ||
38 : | chl | 1.21.2.9 | #include "../bitstream/mbcoding.h" |
39 : | chl | 1.21.2.10 | #include "../bitstream/zigzag.h" |
40 : | Isibaar | 1.1 | #include "../dct/fdct.h" |
41 : | #include "../dct/idct.h" | ||
42 : | edgomez | 1.21.2.18 | #include "../quant/quant.h" |
43 : | Isibaar | 1.1 | #include "../encoder.h" |
44 : | |||
45 : | edgomez | 1.21 | #include "../image/reduced.h" |
46 : | edgomez | 1.21.2.19 | #include "../quant/quant_matrix.h" |
47 : | Isibaar | 1.1 | |
48 : | edgomez | 1.21 | MBFIELDTEST_PTR MBFieldTest; |
49 : | Isibaar | 1.1 | |
50 : | edgomez | 1.21.2.6 | /* |
51 : | * Skip blocks having a coefficient sum below this value. This value will be | ||
52 : | * corrected according to the MB quantizer to avoid artifacts for quant==1 | ||
53 : | */ | ||
54 : | #define PVOP_TOOSMALL_LIMIT 1 | ||
55 : | #define BVOP_TOOSMALL_LIMIT 3 | ||
56 : | |||
57 : | /***************************************************************************** | ||
58 : | * Local functions | ||
59 : | ****************************************************************************/ | ||
60 : | Isibaar | 1.1 | |
61 : | edgomez | 1.21.2.6 | /* permute block and return field dct choice */ |
62 : | static __inline uint32_t | ||
63 : | MBDecideFieldDCT(int16_t data[6 * 64]) | ||
64 : | Isibaar | 1.1 | { |
65 : | edgomez | 1.21.2.6 | uint32_t field = MBFieldTest(data); |
66 : | edgomez | 1.3 | |
67 : | edgomez | 1.21.2.6 | if (field) |
68 : | MBFrameToField(data); | ||
69 : | edgomez | 1.21 | |
70 : | edgomez | 1.21.2.6 | return field; |
71 : | } | ||
72 : | h | 1.2 | |
73 : | edgomez | 1.21.2.6 | /* Performs Forward DCT on all blocks */ |
74 : | static __inline void | ||
75 : | syskin | 1.21.2.8 | MBfDCT(const MBParam * const pParam, |
76 : | const FRAMEINFO * const frame, | ||
77 : | MACROBLOCK * const pMB, | ||
78 : | edgomez | 1.21.2.6 | uint32_t x_pos, |
79 : | uint32_t y_pos, | ||
80 : | int16_t data[6 * 64]) | ||
81 : | syskin | 1.21.2.8 | { |
82 : | edgomez | 1.21.2.6 | /* Handles interlacing */ |
83 : | h | 1.2 | start_timer(); |
84 : | pMB->field_dct = 0; | ||
85 : | edgomez | 1.21.2.5 | if ((frame->vol_flags & XVID_VOL_INTERLACING) && |
86 : | h | 1.11 | (x_pos>0) && (x_pos<pParam->mb_width-1) && |
87 : | (y_pos>0) && (y_pos<pParam->mb_height-1)) { | ||
88 : | h | 1.2 | pMB->field_dct = MBDecideFieldDCT(data); |
89 : | } | ||
90 : | stop_interlacing_timer(); | ||
91 : | |||
92 : | edgomez | 1.21.2.6 | /* Perform DCT */ |
93 : | start_timer(); | ||
94 : | fdct(&data[0 * 64]); | ||
95 : | fdct(&data[1 * 64]); | ||
96 : | fdct(&data[2 * 64]); | ||
97 : | fdct(&data[3 * 64]); | ||
98 : | fdct(&data[4 * 64]); | ||
99 : | fdct(&data[5 * 64]); | ||
100 : | stop_dct_timer(); | ||
101 : | } | ||
102 : | |||
103 : | /* Performs Inverse DCT on all blocks */ | ||
104 : | static __inline void | ||
105 : | MBiDCT(int16_t data[6 * 64], | ||
106 : | const uint8_t cbp) | ||
107 : | { | ||
108 : | start_timer(); | ||
109 : | if(cbp & (1 << (5 - 0))) idct(&data[0 * 64]); | ||
110 : | if(cbp & (1 << (5 - 1))) idct(&data[1 * 64]); | ||
111 : | if(cbp & (1 << (5 - 2))) idct(&data[2 * 64]); | ||
112 : | if(cbp & (1 << (5 - 3))) idct(&data[3 * 64]); | ||
113 : | if(cbp & (1 << (5 - 4))) idct(&data[4 * 64]); | ||
114 : | if(cbp & (1 << (5 - 5))) idct(&data[5 * 64]); | ||
115 : | stop_idct_timer(); | ||
116 : | } | ||
117 : | |||
118 : | /* Quantize all blocks -- Intra mode */ | ||
119 : | static __inline void | ||
120 : | MBQuantIntra(const MBParam * pParam, | ||
121 : | chl | 1.21.2.9 | const FRAMEINFO * const frame, |
122 : | edgomez | 1.21.2.6 | const MACROBLOCK * pMB, |
123 : | syskin | 1.21.2.8 | int16_t qcoeff[6 * 64], |
124 : | edgomez | 1.21.2.6 | int16_t data[6*64]) |
125 : | { | ||
126 : | edgomez | 1.21.2.16 | int mpeg; |
127 : | int scaler_lum, scaler_chr; | ||
128 : | edgomez | 1.21.2.6 | |
129 : | edgomez | 1.21.2.18 | quant_intraFuncPtr const quant[2] = |
130 : | edgomez | 1.21.2.16 | { |
131 : | edgomez | 1.21.2.18 | quant_h263_intra, |
132 : | quant_mpeg_intra | ||
133 : | edgomez | 1.21.2.16 | }; |
134 : | |||
135 : | mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT); | ||
136 : | scaler_lum = get_dc_scaler(pMB->quant, 1); | ||
137 : | scaler_chr = get_dc_scaler(pMB->quant, 0); | ||
138 : | edgomez | 1.21.2.6 | |
139 : | edgomez | 1.21.2.16 | /* Quantize the block */ |
140 : | start_timer(); | ||
141 : | quant[mpeg](&data[0 * 64], &qcoeff[0 * 64], pMB->quant, scaler_lum); | ||
142 : | quant[mpeg](&data[1 * 64], &qcoeff[1 * 64], pMB->quant, scaler_lum); | ||
143 : | quant[mpeg](&data[2 * 64], &qcoeff[2 * 64], pMB->quant, scaler_lum); | ||
144 : | quant[mpeg](&data[3 * 64], &qcoeff[3 * 64], pMB->quant, scaler_lum); | ||
145 : | quant[mpeg](&data[4 * 64], &qcoeff[4 * 64], pMB->quant, scaler_chr); | ||
146 : | quant[mpeg](&data[5 * 64], &qcoeff[5 * 64], pMB->quant, scaler_chr); | ||
147 : | stop_quant_timer(); | ||
148 : | edgomez | 1.21.2.6 | } |
149 : | |||
150 : | /* DeQuantize all blocks -- Intra mode */ | ||
151 : | static __inline void | ||
152 : | MBDeQuantIntra(const MBParam * pParam, | ||
153 : | const int iQuant, | ||
154 : | int16_t qcoeff[6 * 64], | ||
155 : | int16_t data[6*64]) | ||
156 : | { | ||
157 : | edgomez | 1.21.2.16 | int mpeg; |
158 : | int scaler_lum, scaler_chr; | ||
159 : | edgomez | 1.21.2.6 | |
160 : | edgomez | 1.21.2.18 | quant_intraFuncPtr const dequant[2] = |
161 : | edgomez | 1.21.2.16 | { |
162 : | edgomez | 1.21.2.18 | dequant_h263_intra, |
163 : | dequant_mpeg_intra | ||
164 : | edgomez | 1.21.2.16 | }; |
165 : | |||
166 : | mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT); | ||
167 : | scaler_lum = get_dc_scaler(iQuant, 1); | ||
168 : | scaler_chr = get_dc_scaler(iQuant, 0); | ||
169 : | Isibaar | 1.1 | |
170 : | edgomez | 1.21.2.16 | start_timer(); |
171 : | dequant[mpeg](&qcoeff[0 * 64], &data[0 * 64], iQuant, scaler_lum); | ||
172 : | dequant[mpeg](&qcoeff[1 * 64], &data[1 * 64], iQuant, scaler_lum); | ||
173 : | dequant[mpeg](&qcoeff[2 * 64], &data[2 * 64], iQuant, scaler_lum); | ||
174 : | dequant[mpeg](&qcoeff[3 * 64], &data[3 * 64], iQuant, scaler_lum); | ||
175 : | dequant[mpeg](&qcoeff[4 * 64], &data[4 * 64], iQuant, scaler_chr); | ||
176 : | dequant[mpeg](&qcoeff[5 * 64], &data[5 * 64], iQuant, scaler_chr); | ||
177 : | stop_iquant_timer(); | ||
178 : | edgomez | 1.21.2.6 | } |
179 : | Isibaar | 1.1 | |
180 : | edgomez | 1.21.2.17 | static int |
181 : | edgomez | 1.21.2.19 | dct_quantize_trellis_c(int16_t *const Out, |
182 : | const int16_t *const In, | ||
183 : | int Q, | ||
184 : | const uint16_t * const Zigzag, | ||
185 : | const uint16_t * const QuantMatrix, | ||
186 : | int Non_Zero); | ||
187 : | chl | 1.21.2.10 | |
188 : | edgomez | 1.21.2.6 | /* Quantize all blocks -- Inter mode */ |
189 : | static __inline uint8_t | ||
190 : | MBQuantInter(const MBParam * pParam, | ||
191 : | chl | 1.21.2.9 | const FRAMEINFO * const frame, |
192 : | edgomez | 1.21.2.6 | const MACROBLOCK * pMB, |
193 : | int16_t data[6 * 64], | ||
194 : | int16_t qcoeff[6 * 64], | ||
195 : | int bvop, | ||
196 : | int limit) | ||
197 : | { | ||
198 : | |||
199 : | int i; | ||
200 : | uint8_t cbp = 0; | ||
201 : | int sum; | ||
202 : | edgomez | 1.21.2.16 | int code_block, mpeg; |
203 : | |||
204 : | edgomez | 1.21.2.18 | quant_interFuncPtr const quant[2] = |
205 : | edgomez | 1.21.2.16 | { |
206 : | edgomez | 1.21.2.18 | quant_h263_inter, |
207 : | quant_mpeg_inter | ||
208 : | edgomez | 1.21.2.16 | }; |
209 : | |||
210 : | mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT); | ||
211 : | edgomez | 1.21.2.6 | |
212 : | for (i = 0; i < 6; i++) { | ||
213 : | syskin | 1.21.2.8 | |
214 : | edgomez | 1.21.2.6 | /* Quantize the block */ |
215 : | start_timer(); | ||
216 : | edgomez | 1.21.2.16 | |
217 : | sum = quant[mpeg](&qcoeff[i*64], &data[i*64], pMB->quant); | ||
218 : | |||
219 : | if(sum && (frame->vop_flags & XVID_VOP_TRELLISQUANT)) { | ||
220 : | edgomez | 1.21.2.19 | const static uint16_t h263matrix[] = |
221 : | { | ||
222 : | 16, 16, 16, 16, 16, 16, 16, 16, | ||
223 : | 16, 16, 16, 16, 16, 16, 16, 16, | ||
224 : | 16, 16, 16, 16, 16, 16, 16, 16, | ||
225 : | 16, 16, 16, 16, 16, 16, 16, 16, | ||
226 : | 16, 16, 16, 16, 16, 16, 16, 16, | ||
227 : | 16, 16, 16, 16, 16, 16, 16, 16, | ||
228 : | 16, 16, 16, 16, 16, 16, 16, 16, | ||
229 : | 16, 16, 16, 16, 16, 16, 16, 16 | ||
230 : | }; | ||
231 : | sum = dct_quantize_trellis_c(&qcoeff[i*64], &data[i*64], | ||
232 : | pMB->quant, &scan_tables[0][0], | ||
233 : | (mpeg)?(uint16_t*)get_inter_matrix():h263matrix, | ||
234 : | 63); | ||
235 : | chl | 1.21.2.9 | } |
236 : | edgomez | 1.21.2.6 | stop_quant_timer(); |
237 : | |||
238 : | /* | ||
239 : | * We code the block if the sum is higher than the limit and if the first | ||
240 : | * two AC coefficients in zig zag order are not zero. | ||
241 : | */ | ||
242 : | code_block = 0; | ||
243 : | if ((sum >= limit) || (qcoeff[i*64+1] != 0) || (qcoeff[i*64+8] != 0)) { | ||
244 : | code_block = 1; | ||
245 : | edgomez | 1.7 | } else { |
246 : | edgomez | 1.21 | |
247 : | edgomez | 1.21.2.6 | if (bvop && (pMB->mode == MODE_DIRECT || pMB->mode == MODE_DIRECT_NO4V)) { |
248 : | /* dark blocks prevention for direct mode */ | ||
249 : | if ((qcoeff[i*64] < -1) || (qcoeff[i*64] > 0)) | ||
250 : | code_block = 1; | ||
251 : | edgomez | 1.21 | } else { |
252 : | edgomez | 1.21.2.6 | /* not direct mode */ |
253 : | if (qcoeff[i*64] != 0) | ||
254 : | code_block = 1; | ||
255 : | edgomez | 1.21 | } |
256 : | } | ||
257 : | edgomez | 1.21.2.6 | |
258 : | /* Set the corresponding cbp bit */ | ||
259 : | cbp |= code_block << (5 - i); | ||
260 : | edgomez | 1.21 | } |
261 : | |||
262 : | edgomez | 1.21.2.6 | return(cbp); |
263 : | } | ||
264 : | Isibaar | 1.1 | |
265 : | edgomez | 1.21.2.6 | /* DeQuantize all blocks -- Inter mode */ |
266 : | syskin | 1.21.2.8 | static __inline void |
267 : | edgomez | 1.21.2.6 | MBDeQuantInter(const MBParam * pParam, |
268 : | const int iQuant, | ||
269 : | int16_t data[6 * 64], | ||
270 : | int16_t qcoeff[6 * 64], | ||
271 : | const uint8_t cbp) | ||
272 : | { | ||
273 : | edgomez | 1.21.2.16 | int mpeg; |
274 : | edgomez | 1.21.2.6 | |
275 : | edgomez | 1.21.2.18 | quant_interFuncPtr const dequant[2] = |
276 : | edgomez | 1.21.2.16 | { |
277 : | edgomez | 1.21.2.18 | dequant_h263_inter, |
278 : | dequant_mpeg_inter | ||
279 : | edgomez | 1.21.2.16 | }; |
280 : | |||
281 : | mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT); | ||
282 : | |||
283 : | start_timer(); | ||
284 : | if(cbp & (1 << (5 - 0))) dequant[mpeg](&data[0 * 64], &qcoeff[0 * 64], iQuant); | ||
285 : | if(cbp & (1 << (5 - 1))) dequant[mpeg](&data[1 * 64], &qcoeff[1 * 64], iQuant); | ||
286 : | if(cbp & (1 << (5 - 2))) dequant[mpeg](&data[2 * 64], &qcoeff[2 * 64], iQuant); | ||
287 : | if(cbp & (1 << (5 - 3))) dequant[mpeg](&data[3 * 64], &qcoeff[3 * 64], iQuant); | ||
288 : | if(cbp & (1 << (5 - 4))) dequant[mpeg](&data[4 * 64], &qcoeff[4 * 64], iQuant); | ||
289 : | if(cbp & (1 << (5 - 5))) dequant[mpeg](&data[5 * 64], &qcoeff[5 * 64], iQuant); | ||
290 : | stop_iquant_timer(); | ||
291 : | Isibaar | 1.1 | } |
292 : | |||
293 : | edgomez | 1.21.2.6 | typedef void (transfer_operation_8to16_t) (int16_t *Dst, const uint8_t *Src, int BpS); |
294 : | typedef void (transfer_operation_16to8_t) (uint8_t *Dst, const int16_t *Src, int BpS); | ||
295 : | Isibaar | 1.1 | |
296 : | edgomez | 1.3 | |
297 : | edgomez | 1.21.2.6 | static __inline void |
298 : | syskin | 1.21.2.8 | MBTrans8to16(const MBParam * const pParam, |
299 : | const FRAMEINFO * const frame, | ||
300 : | const MACROBLOCK * const pMB, | ||
301 : | edgomez | 1.21.2.6 | const uint32_t x_pos, |
302 : | const uint32_t y_pos, | ||
303 : | int16_t data[6 * 64]) | ||
304 : | { | ||
305 : | h | 1.4 | uint32_t stride = pParam->edged_width; |
306 : | uint32_t stride2 = stride / 2; | ||
307 : | edgomez | 1.21.2.6 | uint32_t next_block = stride * 8; |
308 : | syskin | 1.21.2.8 | int32_t cst; |
309 : | edgomez | 1.21.2.16 | int vop_reduced; |
310 : | Isibaar | 1.1 | uint8_t *pY_Cur, *pU_Cur, *pV_Cur; |
311 : | syskin | 1.21.2.8 | const IMAGE * const pCurrent = &frame->image; |
312 : | edgomez | 1.21.2.16 | transfer_operation_8to16_t * const functions[2] = |
313 : | { | ||
314 : | (transfer_operation_8to16_t *)transfer_8to16copy, | ||
315 : | (transfer_operation_8to16_t *)filter_18x18_to_8x8 | ||
316 : | }; | ||
317 : | edgomez | 1.21.2.6 | transfer_operation_8to16_t *transfer_op = NULL; |
318 : | edgomez | 1.7 | |
319 : | edgomez | 1.21.2.16 | vop_reduced = !!(frame->vop_flags & XVID_VOP_REDUCED); |
320 : | edgomez | 1.21.2.6 | |
321 : | edgomez | 1.21.2.16 | /* Image pointers */ |
322 : | pY_Cur = pCurrent->y + (y_pos << (4+vop_reduced)) * stride + (x_pos << (4+vop_reduced)); | ||
323 : | pU_Cur = pCurrent->u + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced)); | ||
324 : | pV_Cur = pCurrent->v + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced)); | ||
325 : | edgomez | 1.21.2.6 | |
326 : | edgomez | 1.21.2.16 | /* Block size */ |
327 : | cst = 8<<vop_reduced; | ||
328 : | edgomez | 1.21.2.6 | |
329 : | edgomez | 1.21.2.16 | /* Operation function */ |
330 : | transfer_op = functions[vop_reduced]; | ||
331 : | Isibaar | 1.1 | |
332 : | edgomez | 1.21.2.6 | /* Do the transfer */ |
333 : | h | 1.2 | start_timer(); |
334 : | edgomez | 1.21.2.6 | transfer_op(&data[0 * 64], pY_Cur, stride); |
335 : | transfer_op(&data[1 * 64], pY_Cur + cst, stride); | ||
336 : | transfer_op(&data[2 * 64], pY_Cur + next_block, stride); | ||
337 : | transfer_op(&data[3 * 64], pY_Cur + next_block + cst, stride); | ||
338 : | transfer_op(&data[4 * 64], pU_Cur, stride2); | ||
339 : | transfer_op(&data[5 * 64], pV_Cur, stride2); | ||
340 : | stop_transfer_timer(); | ||
341 : | syskin | 1.21.2.8 | } |
342 : | h | 1.2 | |
343 : | edgomez | 1.21.2.6 | static __inline void |
344 : | syskin | 1.21.2.8 | MBTrans16to8(const MBParam * const pParam, |
345 : | const FRAMEINFO * const frame, | ||
346 : | const MACROBLOCK * const pMB, | ||
347 : | edgomez | 1.21.2.6 | const uint32_t x_pos, |
348 : | const uint32_t y_pos, | ||
349 : | int16_t data[6 * 64], | ||
350 : | edgomez | 1.21.2.16 | const uint32_t add, /* Must be 1 or 0 */ |
351 : | edgomez | 1.21.2.6 | const uint8_t cbp) |
352 : | { | ||
353 : | uint8_t *pY_Cur, *pU_Cur, *pV_Cur; | ||
354 : | uint32_t stride = pParam->edged_width; | ||
355 : | uint32_t stride2 = stride / 2; | ||
356 : | uint32_t next_block = stride * 8; | ||
357 : | syskin | 1.21.2.8 | uint32_t cst; |
358 : | edgomez | 1.21.2.16 | int vop_reduced; |
359 : | syskin | 1.21.2.8 | const IMAGE * const pCurrent = &frame->image; |
360 : | edgomez | 1.21.2.18 | |
361 : | edgomez | 1.21.2.16 | /* Array of function pointers, indexed by [vop_reduced<<1+add] */ |
362 : | transfer_operation_16to8_t * const functions[4] = | ||
363 : | { | ||
364 : | (transfer_operation_16to8_t*)transfer_16to8copy, | ||
365 : | (transfer_operation_16to8_t*)transfer_16to8add, | ||
366 : | (transfer_operation_16to8_t*)copy_upsampled_8x8_16to8, | ||
367 : | (transfer_operation_16to8_t*)add_upsampled_8x8_16to8 | ||
368 : | }; | ||
369 : | edgomez | 1.21.2.17 | |
370 : | edgomez | 1.21.2.6 | transfer_operation_16to8_t *transfer_op = NULL; |
371 : | edgomez | 1.21 | |
372 : | edgomez | 1.21.2.6 | if (pMB->field_dct) { |
373 : | next_block = stride; | ||
374 : | stride *= 2; | ||
375 : | } | ||
376 : | Isibaar | 1.1 | |
377 : | edgomez | 1.21.2.16 | /* Makes this vars booleans */ |
378 : | vop_reduced = !!(frame->vop_flags & XVID_VOP_REDUCED); | ||
379 : | edgomez | 1.21.2.6 | |
380 : | edgomez | 1.21.2.16 | /* Image pointers */ |
381 : | pY_Cur = pCurrent->y + (y_pos << (4+vop_reduced)) * stride + (x_pos << (4+vop_reduced)); | ||
382 : | pU_Cur = pCurrent->u + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced)); | ||
383 : | pV_Cur = pCurrent->v + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced)); | ||
384 : | Isibaar | 1.1 | |
385 : | edgomez | 1.21.2.16 | /* Block size */ |
386 : | cst = 8<<vop_reduced; | ||
387 : | |||
388 : | /* Operation function */ | ||
389 : | transfer_op = functions[(vop_reduced<<1) + add]; | ||
390 : | h | 1.2 | |
391 : | edgomez | 1.21.2.6 | /* Do the operation */ |
392 : | h | 1.2 | start_timer(); |
393 : | edgomez | 1.21.2.16 | if (cbp&32) transfer_op(pY_Cur, &data[0 * 64], stride); |
394 : | if (cbp&16) transfer_op(pY_Cur + cst, &data[1 * 64], stride); | ||
395 : | if (cbp& 8) transfer_op(pY_Cur + next_block, &data[2 * 64], stride); | ||
396 : | edgomez | 1.21.2.6 | if (cbp& 4) transfer_op(pY_Cur + next_block + cst, &data[3 * 64], stride); |
397 : | edgomez | 1.21.2.16 | if (cbp& 2) transfer_op(pU_Cur, &data[4 * 64], stride2); |
398 : | if (cbp& 1) transfer_op(pV_Cur, &data[5 * 64], stride2); | ||
399 : | h | 1.2 | stop_transfer_timer(); |
400 : | } | ||
401 : | |||
402 : | edgomez | 1.21.2.6 | /***************************************************************************** |
403 : | * Module functions | ||
404 : | ****************************************************************************/ | ||
405 : | |||
406 : | syskin | 1.21.2.8 | void |
407 : | MBTransQuantIntra(const MBParam * const pParam, | ||
408 : | const FRAMEINFO * const frame, | ||
409 : | MACROBLOCK * const pMB, | ||
410 : | chl | 1.8 | const uint32_t x_pos, |
411 : | const uint32_t y_pos, | ||
412 : | int16_t data[6 * 64], | ||
413 : | int16_t qcoeff[6 * 64]) | ||
414 : | { | ||
415 : | |||
416 : | edgomez | 1.21.2.6 | /* Transfer data */ |
417 : | MBTrans8to16(pParam, frame, pMB, x_pos, y_pos, data); | ||
418 : | chl | 1.8 | |
419 : | edgomez | 1.21.2.6 | /* Perform DCT (and field decision) */ |
420 : | MBfDCT(pParam, frame, pMB, x_pos, y_pos, data); | ||
421 : | chl | 1.8 | |
422 : | edgomez | 1.21.2.6 | /* Quantize the block */ |
423 : | chl | 1.21.2.9 | MBQuantIntra(pParam, frame, pMB, data, qcoeff); |
424 : | edgomez | 1.21.2.6 | |
425 : | /* DeQuantize the block */ | ||
426 : | MBDeQuantIntra(pParam, pMB->quant, data, qcoeff); | ||
427 : | |||
428 : | /* Perform inverse DCT*/ | ||
429 : | MBiDCT(data, 0x3F); | ||
430 : | |||
431 : | /* Transfer back the data -- Don't add data */ | ||
432 : | MBTrans16to8(pParam, frame, pMB, x_pos, y_pos, data, 0, 0x3F); | ||
433 : | chl | 1.8 | } |
434 : | |||
435 : | edgomez | 1.21.2.6 | |
436 : | chl | 1.8 | uint8_t |
437 : | syskin | 1.21.2.8 | MBTransQuantInter(const MBParam * const pParam, |
438 : | const FRAMEINFO * const frame, | ||
439 : | MACROBLOCK * const pMB, | ||
440 : | edgomez | 1.21.2.2 | const uint32_t x_pos, |
441 : | const uint32_t y_pos, | ||
442 : | chl | 1.8 | int16_t data[6 * 64], |
443 : | int16_t qcoeff[6 * 64]) | ||
444 : | { | ||
445 : | uint8_t cbp; | ||
446 : | edgomez | 1.21.2.6 | uint32_t limit; |
447 : | chl | 1.8 | |
448 : | edgomez | 1.21.2.18 | /* There is no MBTrans8to16 for Inter block, that's done in motion compensation |
449 : | * already */ | ||
450 : | edgomez | 1.21.2.2 | |
451 : | edgomez | 1.21.2.6 | /* Perform DCT (and field decision) */ |
452 : | MBfDCT(pParam, frame, pMB, x_pos, y_pos, data); | ||
453 : | chl | 1.8 | |
454 : | edgomez | 1.21.2.6 | /* Set the limit threshold */ |
455 : | limit = PVOP_TOOSMALL_LIMIT + ((pMB->quant == 1)? 1 : 0); | ||
456 : | chl | 1.8 | |
457 : | Isibaar | 1.21.2.15 | if (frame->vop_flags & XVID_VOP_CARTOON) |
458 : | limit *= 3; | ||
459 : | |||
460 : | edgomez | 1.21.2.6 | /* Quantize the block */ |
461 : | chl | 1.21.2.9 | cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 0, limit); |
462 : | chl | 1.8 | |
463 : | edgomez | 1.21.2.6 | /* DeQuantize the block */ |
464 : | MBDeQuantInter(pParam, pMB->quant, data, qcoeff, cbp); | ||
465 : | chl | 1.8 | |
466 : | edgomez | 1.21.2.6 | /* Perform inverse DCT*/ |
467 : | MBiDCT(data, cbp); | ||
468 : | chl | 1.8 | |
469 : | edgomez | 1.21.2.6 | /* Transfer back the data -- Add the data */ |
470 : | MBTrans16to8(pParam, frame, pMB, x_pos, y_pos, data, 1, cbp); | ||
471 : | syskin | 1.21.2.8 | |
472 : | edgomez | 1.21.2.6 | return(cbp); |
473 : | chl | 1.8 | } |
474 : | |||
475 : | edgomez | 1.21.2.6 | uint8_t |
476 : | MBTransQuantInterBVOP(const MBParam * pParam, | ||
477 : | edgomez | 1.21.2.16 | FRAMEINFO * frame, |
478 : | MACROBLOCK * pMB, | ||
479 : | const uint32_t x_pos, | ||
480 : | const uint32_t y_pos, | ||
481 : | int16_t data[6 * 64], | ||
482 : | int16_t qcoeff[6 * 64]) | ||
483 : | chl | 1.8 | { |
484 : | edgomez | 1.21.2.6 | uint8_t cbp; |
485 : | uint32_t limit; | ||
486 : | syskin | 1.21.2.8 | |
487 : | edgomez | 1.21.2.18 | /* There is no MBTrans8to16 for Inter block, that's done in motion compensation |
488 : | * already */ | ||
489 : | chl | 1.8 | |
490 : | edgomez | 1.21.2.6 | /* Perform DCT (and field decision) */ |
491 : | MBfDCT(pParam, frame, pMB, x_pos, y_pos, data); | ||
492 : | chl | 1.8 | |
493 : | edgomez | 1.21.2.6 | /* Set the limit threshold */ |
494 : | limit = BVOP_TOOSMALL_LIMIT; | ||
495 : | Isibaar | 1.21.2.15 | |
496 : | if (frame->vop_flags & XVID_VOP_CARTOON) | ||
497 : | limit *= 2; | ||
498 : | chl | 1.8 | |
499 : | edgomez | 1.21.2.6 | /* Quantize the block */ |
500 : | chl | 1.21.2.9 | cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 1, limit); |
501 : | h | 1.2 | |
502 : | edgomez | 1.21.2.6 | /* |
503 : | * History comment: | ||
504 : | * We don't have to DeQuant, iDCT and Transfer back data for B-frames. | ||
505 : | * | ||
506 : | edgomez | 1.21.2.18 | * BUT some plugins require the rebuilt original frame to be passed so we |
507 : | * have to take care of that here | ||
508 : | edgomez | 1.21.2.6 | */ |
509 : | if((pParam->plugin_flags & XVID_REQORIGINAL)) { | ||
510 : | h | 1.2 | |
511 : | edgomez | 1.21.2.6 | /* DeQuantize the block */ |
512 : | MBDeQuantInter(pParam, pMB->quant, data, qcoeff, cbp); | ||
513 : | Isibaar | 1.1 | |
514 : | edgomez | 1.21.2.6 | /* Perform inverse DCT*/ |
515 : | MBiDCT(data, cbp); | ||
516 : | edgomez | 1.21 | |
517 : | edgomez | 1.21.2.6 | /* Transfer back the data -- Add the data */ |
518 : | MBTrans16to8(pParam, frame, pMB, x_pos, y_pos, data, 1, cbp); | ||
519 : | edgomez | 1.21 | } |
520 : | |||
521 : | edgomez | 1.21.2.6 | return(cbp); |
522 : | edgomez | 1.21 | } |
523 : | |||
524 : | /* if sum(diff between field lines) < sum(diff between frame lines), use field dct */ | ||
525 : | uint32_t | ||
526 : | MBFieldTest_c(int16_t data[6 * 64]) | ||
527 : | { | ||
528 : | edgomez | 1.7 | const uint8_t blocks[] = |
529 : | { 0 * 64, 0 * 64, 0 * 64, 0 * 64, 2 * 64, 2 * 64, 2 * 64, 2 * 64 }; | ||
530 : | const uint8_t lines[] = { 0, 16, 32, 48, 0, 16, 32, 48 }; | ||
531 : | h | 1.2 | |
532 : | int frame = 0, field = 0; | ||
533 : | int i, j; | ||
534 : | |||
535 : | edgomez | 1.7 | for (i = 0; i < 7; ++i) { |
536 : | for (j = 0; j < 8; ++j) { | ||
537 : | frame += | ||
538 : | edgomez | 1.21.2.7 | abs(data[0 * 64 + (i + 1) * 8 + j] - data[0 * 64 + i * 8 + j]); |
539 : | edgomez | 1.7 | frame += |
540 : | edgomez | 1.21.2.7 | abs(data[1 * 64 + (i + 1) * 8 + j] - data[1 * 64 + i * 8 + j]); |
541 : | edgomez | 1.7 | frame += |
542 : | edgomez | 1.21.2.7 | abs(data[2 * 64 + (i + 1) * 8 + j] - data[2 * 64 + i * 8 + j]); |
543 : | edgomez | 1.7 | frame += |
544 : | edgomez | 1.21.2.7 | abs(data[3 * 64 + (i + 1) * 8 + j] - data[3 * 64 + i * 8 + j]); |
545 : | edgomez | 1.7 | |
546 : | field += | ||
547 : | edgomez | 1.21.2.7 | abs(data[blocks[i + 1] + lines[i + 1] + j] - |
548 : | edgomez | 1.7 | data[blocks[i] + lines[i] + j]); |
549 : | field += | ||
550 : | edgomez | 1.21.2.7 | abs(data[blocks[i + 1] + lines[i + 1] + 8 + j] - |
551 : | edgomez | 1.7 | data[blocks[i] + lines[i] + 8 + j]); |
552 : | field += | ||
553 : | edgomez | 1.21.2.7 | abs(data[blocks[i + 1] + 64 + lines[i + 1] + j] - |
554 : | edgomez | 1.7 | data[blocks[i] + 64 + lines[i] + j]); |
555 : | field += | ||
556 : | edgomez | 1.21.2.7 | abs(data[blocks[i + 1] + 64 + lines[i + 1] + 8 + j] - |
557 : | edgomez | 1.7 | data[blocks[i] + 64 + lines[i] + 8 + j]); |
558 : | Isibaar | 1.1 | } |
559 : | } | ||
560 : | h | 1.2 | |
561 : | edgomez | 1.21 | return (frame >= (field + 350)); |
562 : | h | 1.2 | } |
563 : | |||
564 : | |||
565 : | /* deinterlace Y blocks vertically */ | ||
566 : | |||
567 : | #define MOVLINE(X,Y) memcpy(X, Y, sizeof(tmp)) | ||
568 : | syskin | 1.21.2.8 | #define LINE(X,Y) &data[X*64 + Y*8] |
569 : | h | 1.2 | |
570 : | edgomez | 1.7 | void |
571 : | MBFrameToField(int16_t data[6 * 64]) | ||
572 : | h | 1.2 | { |
573 : | int16_t tmp[8]; | ||
574 : | |||
575 : | /* left blocks */ | ||
576 : | |||
577 : | edgomez | 1.21.2.13 | /* 1=2, 2=4, 4=8, 8=1 */ |
578 : | edgomez | 1.7 | MOVLINE(tmp, LINE(0, 1)); |
579 : | MOVLINE(LINE(0, 1), LINE(0, 2)); | ||
580 : | MOVLINE(LINE(0, 2), LINE(0, 4)); | ||
581 : | MOVLINE(LINE(0, 4), LINE(2, 0)); | ||
582 : | MOVLINE(LINE(2, 0), tmp); | ||
583 : | h | 1.2 | |
584 : | edgomez | 1.21.2.13 | /* 3=6, 6=12, 12=9, 9=3 */ |
585 : | edgomez | 1.7 | MOVLINE(tmp, LINE(0, 3)); |
586 : | MOVLINE(LINE(0, 3), LINE(0, 6)); | ||
587 : | MOVLINE(LINE(0, 6), LINE(2, 4)); | ||
588 : | MOVLINE(LINE(2, 4), LINE(2, 1)); | ||
589 : | MOVLINE(LINE(2, 1), tmp); | ||
590 : | h | 1.2 | |
591 : | edgomez | 1.21.2.13 | /* 5=10, 10=5 */ |
592 : | edgomez | 1.7 | MOVLINE(tmp, LINE(0, 5)); |
593 : | MOVLINE(LINE(0, 5), LINE(2, 2)); | ||
594 : | MOVLINE(LINE(2, 2), tmp); | ||
595 : | h | 1.2 | |
596 : | edgomez | 1.21.2.13 | /* 7=14, 14=13, 13=11, 11=7 */ |
597 : | edgomez | 1.7 | MOVLINE(tmp, LINE(0, 7)); |
598 : | MOVLINE(LINE(0, 7), LINE(2, 6)); | ||
599 : | MOVLINE(LINE(2, 6), LINE(2, 5)); | ||
600 : | MOVLINE(LINE(2, 5), LINE(2, 3)); | ||
601 : | MOVLINE(LINE(2, 3), tmp); | ||
602 : | h | 1.2 | |
603 : | /* right blocks */ | ||
604 : | |||
605 : | edgomez | 1.21.2.13 | /* 1=2, 2=4, 4=8, 8=1 */ |
606 : | edgomez | 1.7 | MOVLINE(tmp, LINE(1, 1)); |
607 : | MOVLINE(LINE(1, 1), LINE(1, 2)); | ||
608 : | MOVLINE(LINE(1, 2), LINE(1, 4)); | ||
609 : | MOVLINE(LINE(1, 4), LINE(3, 0)); | ||
610 : | MOVLINE(LINE(3, 0), tmp); | ||
611 : | h | 1.2 | |
612 : | edgomez | 1.21.2.13 | /* 3=6, 6=12, 12=9, 9=3 */ |
613 : | edgomez | 1.7 | MOVLINE(tmp, LINE(1, 3)); |
614 : | MOVLINE(LINE(1, 3), LINE(1, 6)); | ||
615 : | MOVLINE(LINE(1, 6), LINE(3, 4)); | ||
616 : | MOVLINE(LINE(3, 4), LINE(3, 1)); | ||
617 : | MOVLINE(LINE(3, 1), tmp); | ||
618 : | h | 1.2 | |
619 : | edgomez | 1.21.2.13 | /* 5=10, 10=5 */ |
620 : | edgomez | 1.7 | MOVLINE(tmp, LINE(1, 5)); |
621 : | MOVLINE(LINE(1, 5), LINE(3, 2)); | ||
622 : | MOVLINE(LINE(3, 2), tmp); | ||
623 : | h | 1.2 | |
624 : | edgomez | 1.21.2.13 | /* 7=14, 14=13, 13=11, 11=7 */ |
625 : | edgomez | 1.7 | MOVLINE(tmp, LINE(1, 7)); |
626 : | MOVLINE(LINE(1, 7), LINE(3, 6)); | ||
627 : | MOVLINE(LINE(3, 6), LINE(3, 5)); | ||
628 : | MOVLINE(LINE(3, 5), LINE(3, 3)); | ||
629 : | MOVLINE(LINE(3, 3), tmp); | ||
630 : | Isibaar | 1.1 | } |
631 : | chl | 1.21.2.10 | |
632 : | edgomez | 1.21.2.13 | /***************************************************************************** |
633 : | * Trellis based R-D optimal quantization | ||
634 : | * | ||
635 : | * Trellis Quant code (C) 2003 Pascal Massimino skal(at)planet-d.net | ||
636 : | * | ||
637 : | ****************************************************************************/ | ||
638 : | |||
639 : | /*---------------------------------------------------------------------------- | ||
640 : | * | ||
641 : | * Trellis-Based quantization | ||
642 : | * | ||
643 : | * So far I understand this paper: | ||
644 : | * | ||
645 : | * "Trellis-Based R-D Optimal Quantization in H.263+" | ||
646 : | * J.Wen, M.Luttrell, J.Villasenor | ||
647 : | * IEEE Transactions on Image Processing, Vol.9, No.8, Aug. 2000. | ||
648 : | * | ||
649 : | * we are at stake with a simplified Bellmand-Ford / Dijkstra Single | ||
650 : | * Source Shorted Path algo. But due to the underlying graph structure | ||
651 : | * ("Trellis"), it can be turned into a dynamic programming algo, | ||
652 : | edgomez | 1.21.2.17 | * partially saving the explicit graph's nodes representation. And |
653 : | edgomez | 1.21.2.13 | * without using a heap, since the open frontier of the DAG is always |
654 : | * known, and of fixed sized. | ||
655 : | *--------------------------------------------------------------------------*/ | ||
656 : | |||
657 : | chl | 1.21.2.10 | |
658 : | |||
659 : | edgomez | 1.21.2.13 | /* Codes lengths for relevant levels. */ |
660 : | chl | 1.21.2.10 | |
661 : | edgomez | 1.21.2.16 | /* let's factorize: */ |
662 : | chl | 1.21.2.10 | static const uint8_t Code_Len0[64] = { |
663 : | edgomez | 1.21.2.16 | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
664 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
665 : | chl | 1.21.2.10 | static const uint8_t Code_Len1[64] = { |
666 : | edgomez | 1.21.2.16 | 20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
667 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
668 : | chl | 1.21.2.10 | static const uint8_t Code_Len2[64] = { |
669 : | edgomez | 1.21.2.16 | 19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
670 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
671 : | chl | 1.21.2.10 | static const uint8_t Code_Len3[64] = { |
672 : | edgomez | 1.21.2.16 | 18,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
673 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
674 : | chl | 1.21.2.10 | static const uint8_t Code_Len4[64] = { |
675 : | edgomez | 1.21.2.16 | 17,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
676 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
677 : | chl | 1.21.2.10 | static const uint8_t Code_Len5[64] = { |
678 : | edgomez | 1.21.2.16 | 16,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
679 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
680 : | chl | 1.21.2.10 | static const uint8_t Code_Len6[64] = { |
681 : | edgomez | 1.21.2.16 | 15,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
682 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
683 : | chl | 1.21.2.10 | static const uint8_t Code_Len7[64] = { |
684 : | edgomez | 1.21.2.16 | 13,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
685 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
686 : | chl | 1.21.2.10 | static const uint8_t Code_Len8[64] = { |
687 : | edgomez | 1.21.2.16 | 11,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
688 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
689 : | chl | 1.21.2.10 | static const uint8_t Code_Len9[64] = { |
690 : | edgomez | 1.21.2.16 | 12,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
691 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
692 : | chl | 1.21.2.10 | static const uint8_t Code_Len10[64] = { |
693 : | edgomez | 1.21.2.16 | 12,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
694 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
695 : | chl | 1.21.2.10 | static const uint8_t Code_Len11[64] = { |
696 : | edgomez | 1.21.2.16 | 12,19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
697 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
698 : | chl | 1.21.2.10 | static const uint8_t Code_Len12[64] = { |
699 : | edgomez | 1.21.2.16 | 11,17,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
700 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
701 : | chl | 1.21.2.10 | static const uint8_t Code_Len13[64] = { |
702 : | edgomez | 1.21.2.16 | 11,15,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
703 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
704 : | chl | 1.21.2.10 | static const uint8_t Code_Len14[64] = { |
705 : | edgomez | 1.21.2.16 | 10,12,19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
706 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
707 : | chl | 1.21.2.10 | static const uint8_t Code_Len15[64] = { |
708 : | edgomez | 1.21.2.16 | 10,13,17,19,21,21,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
709 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
710 : | chl | 1.21.2.10 | static const uint8_t Code_Len16[64] = { |
711 : | edgomez | 1.21.2.16 | 9,12,13,18,18,19,19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
712 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30}; | ||
713 : | chl | 1.21.2.10 | static const uint8_t Code_Len17[64] = { |
714 : | edgomez | 1.21.2.16 | 8,11,13,14,14,14,15,19,19,19,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
715 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
716 : | chl | 1.21.2.10 | static const uint8_t Code_Len18[64] = { |
717 : | edgomez | 1.21.2.16 | 7, 9,11,11,13,13,13,15,15,15,16,22,22,22,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
718 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
719 : | chl | 1.21.2.10 | static const uint8_t Code_Len19[64] = { |
720 : | edgomez | 1.21.2.16 | 5, 7, 9,10,10,11,11,11,11,11,13,14,16,17,17,18,18,18,18,18,18,18,18,20,20,21,21,30,30,30,30,30, |
721 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 }; | ||
722 : | chl | 1.21.2.10 | static const uint8_t Code_Len20[64] = { |
723 : | edgomez | 1.21.2.16 | 3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9,10,10,10,10,10,10,10,10,12,12,13,13,12,13,14,15,15, |
724 : | 15,16,16,16,16,17,17,17,18,18,19,19,19,19,19,19,19,19,21,21,22,22,30,30,30,30,30,30,30,30,30,30 }; | ||
725 : | chl | 1.21.2.10 | |
726 : | edgomez | 1.21.2.16 | /* a few more table for LAST table: */ |
727 : | chl | 1.21.2.10 | static const uint8_t Code_Len21[64] = { |
728 : | edgomez | 1.21.2.16 | 13,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
729 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30}; | ||
730 : | chl | 1.21.2.10 | static const uint8_t Code_Len22[64] = { |
731 : | edgomez | 1.21.2.16 | 12,15,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30, |
732 : | 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30}; | ||
733 : | chl | 1.21.2.10 | static const uint8_t Code_Len23[64] = { |
734 : | edgomez | 1.21.2.16 | 10,12,15,15,15,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,20,20,20, |
735 : | 20,21,21,21,21,21,21,21,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30}; | ||
736 : | chl | 1.21.2.10 | static const uint8_t Code_Len24[64] = { |
737 : | edgomez | 1.21.2.16 | 5, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,10,10,10,10,10,10,10,10,11,11,11,11,12,12,12, |
738 : | 12,13,13,13,13,13,13,13,13,14,16,16,16,16,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19}; | ||
739 : | chl | 1.21.2.10 | |
740 : | |||
741 : | edgomez | 1.21.2.13 | static const uint8_t * const B16_17_Code_Len[24] = { /* levels [1..24] */ |
742 : | edgomez | 1.21.2.16 | Code_Len20,Code_Len19,Code_Len18,Code_Len17, |
743 : | Code_Len16,Code_Len15,Code_Len14,Code_Len13, | ||
744 : | Code_Len12,Code_Len11,Code_Len10,Code_Len9, | ||
745 : | Code_Len8, Code_Len7 ,Code_Len6 ,Code_Len5, | ||
746 : | Code_Len4, Code_Len3, Code_Len3 ,Code_Len2, | ||
747 : | Code_Len2, Code_Len1, Code_Len1, Code_Len1, | ||
748 : | chl | 1.21.2.10 | }; |
749 : | |||
750 : | edgomez | 1.21.2.13 | static const uint8_t * const B16_17_Code_Len_Last[6] = { /* levels [1..6] */ |
751 : | edgomez | 1.21.2.17 | Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1, |
752 : | chl | 1.21.2.10 | }; |
753 : | |||
754 : | edgomez | 1.21.2.19 | /* TL_SHIFT controls the precision of the RD optimizations in trellis |
755 : | * valid range is [10..16]. The bigger, the more trellis is vulnerable | ||
756 : | * to overflows in cost formulas. | ||
757 : | * - 10 allows ac values up to 2^11 == 2048 | ||
758 : | * - 16 allows ac values up to 2^8 == 256 | ||
759 : | */ | ||
760 : | #define TL_SHIFT 11 | ||
761 : | #define TL(q) ((0xfe00>>(16-TL_SHIFT))/(q*q)) | ||
762 : | chl | 1.21.2.10 | |
763 : | static const int Trellis_Lambda_Tabs[31] = { | ||
764 : | edgomez | 1.21.2.16 | TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7), |
765 : | TL( 8),TL( 9),TL(10),TL(11),TL(12),TL(13),TL(14), TL(15), | ||
766 : | TL(16),TL(17),TL(18),TL(19),TL(20),TL(21),TL(22), TL(23), | ||
767 : | TL(24),TL(25),TL(26),TL(27),TL(28),TL(29),TL(30), TL(31) | ||
768 : | chl | 1.21.2.10 | }; |
769 : | #undef TL | ||
770 : | |||
771 : | edgomez | 1.21.2.16 | static int __inline |
772 : | Find_Last(const int16_t *C, const uint16_t *Zigzag, int i) | ||
773 : | chl | 1.21.2.10 | { |
774 : | edgomez | 1.21.2.16 | while(i>=0) |
775 : | if (C[Zigzag[i]]) | ||
776 : | return i; | ||
777 : | else i--; | ||
778 : | return -1; | ||
779 : | chl | 1.21.2.10 | } |
780 : | |||
781 : | edgomez | 1.21.2.16 | static int __inline |
782 : | Compute_Sum(const int16_t *C, int last) | ||
783 : | { | ||
784 : | int sum = 0; | ||
785 : | |||
786 : | while(last--) | ||
787 : | sum += abs(C[last]); | ||
788 : | |||
789 : | return(sum); | ||
790 : | } | ||
791 : | chl | 1.21.2.11 | |
792 : | edgomez | 1.21.2.19 | /* this routine has been strippen of all debug code */ |
793 : | edgomez | 1.21.2.17 | static int |
794 : | edgomez | 1.21.2.19 | dct_quantize_trellis_c(int16_t *const Out, |
795 : | const int16_t *const In, | ||
796 : | int Q, | ||
797 : | const uint16_t * const Zigzag, | ||
798 : | const uint16_t * const QuantMatrix, | ||
799 : | int Non_Zero) | ||
800 : | chl | 1.21.2.11 | { |
801 : | |||
802 : | edgomez | 1.21.2.13 | /* |
803 : | * Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]), | ||
804 : | * not quantized one (Out[]). However, it only improves the result *very* | ||
805 : | * slightly (~0.01dB), whereas speed drops to crawling level :) | ||
806 : | * Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps. | ||
807 : | */ | ||
808 : | edgomez | 1.21.2.16 | typedef struct { int16_t Run, Level; } NODE; |
809 : | edgomez | 1.21.2.17 | |
810 : | edgomez | 1.21.2.16 | NODE Nodes[65], Last; |
811 : | uint32_t Run_Costs0[64+1]; | ||
812 : | uint32_t * const Run_Costs = Run_Costs0 + 1; | ||
813 : | edgomez | 1.21.2.19 | |
814 : | edgomez | 1.21.2.16 | const int Lambda = Trellis_Lambda_Tabs[Q-1]; /* it's 1/lambda, actually */ |
815 : | |||
816 : | int Run_Start = -1; | ||
817 : | edgomez | 1.21.2.19 | uint32_t Min_Cost = 2<<TL_SHIFT; |
818 : | edgomez | 1.21.2.16 | |
819 : | int Last_Node = -1; | ||
820 : | uint32_t Last_Cost = 0; | ||
821 : | |||
822 : | int i, j, sum; | ||
823 : | edgomez | 1.21.2.19 | Run_Costs[-1] = 2<<TL_SHIFT; /* source (w/ CBP penalty) */ |
824 : | edgomez | 1.21.2.16 | |
825 : | Non_Zero = Find_Last(Out, Zigzag, Non_Zero); | ||
826 : | if (Non_Zero<0) | ||
827 : | return 0; /* Sum is zero if there are only zero coeffs */ | ||
828 : | |||
829 : | for(i=0; i<=Non_Zero; i++) { | ||
830 : | edgomez | 1.21.2.19 | const int q = ((Q*QuantMatrix[Zigzag[i]])>>4); |
831 : | const int Mult = 2*q; | ||
832 : | const int Bias = (q-1) | 1; | ||
833 : | const int Lev0 = Mult + Bias; | ||
834 : | |||
835 : | edgomez | 1.21.2.16 | const int AC = In[Zigzag[i]]; |
836 : | const int Level1 = Out[Zigzag[i]]; | ||
837 : | edgomez | 1.21.2.19 | const unsigned int Dist0 = Lambda* AC*AC; |
838 : | edgomez | 1.21.2.16 | uint32_t Best_Cost = 0xf0000000; |
839 : | Last_Cost += Dist0; | ||
840 : | |||
841 : | /* very specialized loop for -1,0,+1 */ | ||
842 : | if ((uint32_t)(Level1+1)<3) { | ||
843 : | int dQ; | ||
844 : | int Run; | ||
845 : | uint32_t Cost0; | ||
846 : | |||
847 : | if (AC<0) { | ||
848 : | Nodes[i].Level = -1; | ||
849 : | dQ = Lev0 + AC; | ||
850 : | } else { | ||
851 : | Nodes[i].Level = 1; | ||
852 : | dQ = Lev0 - AC; | ||
853 : | } | ||
854 : | Cost0 = Lambda*dQ*dQ; | ||
855 : | edgomez | 1.21.2.17 | |
856 : | edgomez | 1.21.2.16 | Nodes[i].Run = 1; |
857 : | edgomez | 1.21.2.19 | Best_Cost = (Code_Len20[0]<<TL_SHIFT) + Run_Costs[i-1]+Cost0; |
858 : | edgomez | 1.21.2.16 | for(Run=i-Run_Start; Run>0; --Run) { |
859 : | const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run]; | ||
860 : | edgomez | 1.21.2.19 | const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<TL_SHIFT); |
861 : | const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<TL_SHIFT); | ||
862 : | edgomez | 1.21.2.16 | |
863 : | /* | ||
864 : | * TODO: what about tie-breaks? Should we favor short runs or | ||
865 : | * long runs? Although the error is the same, it would not be | ||
866 : | * spread the same way along high and low frequencies... | ||
867 : | */ | ||
868 : | |||
869 : | /* (I'd say: favour short runs => hifreq errors (HVS) -- gruel ) */ | ||
870 : | |||
871 : | if (Cost<Best_Cost) { | ||
872 : | Best_Cost = Cost; | ||
873 : | Nodes[i].Run = Run; | ||
874 : | } | ||
875 : | |||
876 : | if (lCost<Last_Cost) { | ||
877 : | Last_Cost = lCost; | ||
878 : | Last.Run = Run; | ||
879 : | Last_Node = i; | ||
880 : | } | ||
881 : | } | ||
882 : | edgomez | 1.21.2.17 | if (Last_Node==i) |
883 : | edgomez | 1.21.2.16 | Last.Level = Nodes[i].Level; |
884 : | } else { /* "big" levels */ | ||
885 : | const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last; | ||
886 : | int Level2; | ||
887 : | int dQ1, dQ2; | ||
888 : | int Run; | ||
889 : | uint32_t Dist1,Dist2; | ||
890 : | int dDist21; | ||
891 : | edgomez | 1.21.2.17 | |
892 : | edgomez | 1.21.2.16 | if (Level1>1) { |
893 : | dQ1 = Level1*Mult-AC + Bias; | ||
894 : | dQ2 = dQ1 - Mult; | ||
895 : | Level2 = Level1-1; | ||
896 : | Tbl_L1 = (Level1<=24) ? B16_17_Code_Len[Level1-1] : Code_Len0; | ||
897 : | Tbl_L2 = (Level2<=24) ? B16_17_Code_Len[Level2-1] : Code_Len0; | ||
898 : | Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0; | ||
899 : | Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0; | ||
900 : | } else { /* Level1<-1 */ | ||
901 : | dQ1 = Level1*Mult-AC - Bias; | ||
902 : | dQ2 = dQ1 + Mult; | ||
903 : | Level2 = Level1 + 1; | ||
904 : | Tbl_L1 = (Level1>=-24) ? B16_17_Code_Len[Level1^-1] : Code_Len0; | ||
905 : | Tbl_L2 = (Level2>=-24) ? B16_17_Code_Len[Level2^-1] : Code_Len0; | ||
906 : | Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0; | ||
907 : | Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0; | ||
908 : | } | ||
909 : | chl | 1.21.2.11 | |
910 : | edgomez | 1.21.2.16 | Dist1 = Lambda*dQ1*dQ1; |
911 : | Dist2 = Lambda*dQ2*dQ2; | ||
912 : | dDist21 = Dist2-Dist1; | ||
913 : | |||
914 : | for(Run=i-Run_Start; Run>0; --Run) | ||
915 : | { | ||
916 : | const uint32_t Cost_Base = Dist1 + Run_Costs[i-Run]; | ||
917 : | uint32_t Cost1, Cost2; | ||
918 : | int bLevel; | ||
919 : | |||
920 : | /* | ||
921 : | * for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following: | ||
922 : | * if (Cost_Base>=Best_Cost) continue; | ||
923 : | * (? doesn't seem to have any effect -- gruel ) | ||
924 : | */ | ||
925 : | |||
926 : | edgomez | 1.21.2.19 | Cost1 = Cost_Base + (Tbl_L1[Run-1]<<TL_SHIFT); |
927 : | Cost2 = Cost_Base + (Tbl_L2[Run-1]<<TL_SHIFT) + dDist21; | ||
928 : | edgomez | 1.21.2.16 | |
929 : | edgomez | 1.21.2.17 | if (Cost2<Cost1) { |
930 : | Cost1 = Cost2; | ||
931 : | bLevel = Level2; | ||
932 : | edgomez | 1.21.2.16 | } else { |
933 : | bLevel = Level1; | ||
934 : | } | ||
935 : | |||
936 : | if (Cost1<Best_Cost) { | ||
937 : | Best_Cost = Cost1; | ||
938 : | Nodes[i].Run = Run; | ||
939 : | Nodes[i].Level = bLevel; | ||
940 : | } | ||
941 : | |||
942 : | edgomez | 1.21.2.19 | Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<TL_SHIFT); |
943 : | Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<TL_SHIFT) + dDist21; | ||
944 : | edgomez | 1.21.2.16 | |
945 : | edgomez | 1.21.2.17 | if (Cost2<Cost1) { |
946 : | Cost1 = Cost2; | ||
947 : | bLevel = Level2; | ||
948 : | edgomez | 1.21.2.16 | } else { |
949 : | bLevel = Level1; | ||
950 : | } | ||
951 : | edgomez | 1.21.2.17 | |
952 : | edgomez | 1.21.2.16 | if (Cost1<Last_Cost) { |
953 : | Last_Cost = Cost1; | ||
954 : | Last.Run = Run; | ||
955 : | Last.Level = bLevel; | ||
956 : | Last_Node = i; | ||
957 : | } | ||
958 : | } /* end of "for Run" */ | ||
959 : | chl | 1.21.2.11 | |
960 : | edgomez | 1.21.2.16 | } |
961 : | chl | 1.21.2.11 | |
962 : | edgomez | 1.21.2.16 | Run_Costs[i] = Best_Cost; |
963 : | chl | 1.21.2.11 | |
964 : | edgomez | 1.21.2.16 | if (Best_Cost < Min_Cost + Dist0) { |
965 : | Min_Cost = Best_Cost; | ||
966 : | Run_Start = i; | ||
967 : | } else { | ||
968 : | /* | ||
969 : | * as noticed by Michael Niedermayer (michaelni at gmx.at), there's | ||
970 : | * a code shorter by 1 bit for a larger run (!), same level. We give | ||
971 : | * it a chance by not moving the left barrier too much. | ||
972 : | */ | ||
973 : | |||
974 : | edgomez | 1.21.2.19 | while( Run_Costs[Run_Start]>Min_Cost+(1<<TL_SHIFT) ) |
975 : | edgomez | 1.21.2.16 | Run_Start++; |
976 : | |||
977 : | /* spread on preceding coeffs the cost incurred by skipping this one */ | ||
978 : | for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0; | ||
979 : | Min_Cost += Dist0; | ||
980 : | } | ||
981 : | } | ||
982 : | chl | 1.21.2.11 | |
983 : | edgomez | 1.21.2.16 | /* It seems trellis doesn't give good results... just compute the Out sum and |
984 : | * quit (even if we did not modify it, upperlayer relies on this data) */ | ||
985 : | if (Last_Node<0) | ||
986 : | return Compute_Sum(Out, Non_Zero); | ||
987 : | |||
988 : | /* reconstruct optimal sequence backward with surviving paths */ | ||
989 : | memset(Out, 0x00, 64*sizeof(*Out)); | ||
990 : | Out[Zigzag[Last_Node]] = Last.Level; | ||
991 : | i = Last_Node - Last.Run; | ||
992 : | sum = 0; | ||
993 : | while(i>=0) { | ||
994 : | Out[Zigzag[i]] = Nodes[i].Level; | ||
995 : | sum += abs(Nodes[i].Level); | ||
996 : | i -= Nodes[i].Run; | ||
997 : | } | ||
998 : | chl | 1.21.2.11 | |
999 : | edgomez | 1.21.2.16 | return sum; |
1000 : | } | ||
1001 : | chl | 1.21.2.11 | |
1002 : | edgomez | 1.21.2.13 | /* original version including heavy debugging info */ |
1003 : | chl | 1.21.2.11 | |
1004 : | #ifdef DBGTRELL | ||
1005 : | chl | 1.21.2.10 | |
1006 : | #define DBG 0 | ||
1007 : | |||
1008 : | suxen_drol | 1.21.2.12 | static __inline uint32_t Evaluate_Cost(const int16_t *C, int Mult, int Bias, |
1009 : | edgomez | 1.21.2.16 | const uint16_t * Zigzag, int Max, int Lambda) |
1010 : | chl | 1.21.2.10 | { |
1011 : | #if (DBG>0) | ||
1012 : | edgomez | 1.21.2.16 | const int16_t * const Ref = C + 6*64; |
1013 : | int Last = Max; | ||
1014 : | int Bits = 0; | ||
1015 : | int Dist = 0; | ||
1016 : | edgomez | 1.21.2.17 | int i; |
1017 : | edgomez | 1.21.2.16 | uint32_t Cost; |
1018 : | edgomez | 1.21.2.17 | |
1019 : | while(Last>=0 && C[Zigzag[Last]]==0) | ||
1020 : | edgomez | 1.21.2.16 | Last--; |
1021 : | edgomez | 1.21.2.17 | |
1022 : | edgomez | 1.21.2.16 | if (Last>=0) { |
1023 : | int j=0, j0=0; | ||
1024 : | int Run, Level; | ||
1025 : | |||
1026 : | Bits = 2; /* CBP */ | ||
1027 : | while(j<Last) { | ||
1028 : | edgomez | 1.21.2.17 | while(!C[Zigzag[j]]) |
1029 : | edgomez | 1.21.2.16 | j++; |
1030 : | edgomez | 1.21.2.17 | if (j==Last) |
1031 : | edgomez | 1.21.2.16 | break; |
1032 : | Level=C[Zigzag[j]]; | ||
1033 : | Run = j - j0; | ||
1034 : | j0 = ++j; | ||
1035 : | edgomez | 1.21.2.17 | if (Level>=-24 && Level<=24) |
1036 : | edgomez | 1.21.2.16 | Bits += B16_17_Code_Len[(Level<0) ? -Level-1 : Level-1][Run]; |
1037 : | edgomez | 1.21.2.17 | else |
1038 : | edgomez | 1.21.2.16 | Bits += 30; |
1039 : | } | ||
1040 : | Level = C[Zigzag[Last]]; | ||
1041 : | Run = j - j0; | ||
1042 : | edgomez | 1.21.2.17 | if (Level>=-6 && Level<=6) |
1043 : | edgomez | 1.21.2.16 | Bits += B16_17_Code_Len_Last[(Level<0) ? -Level-1 : Level-1][Run]; |
1044 : | edgomez | 1.21.2.17 | else |
1045 : | chl | 1.21.2.11 | Bits += 30; |
1046 : | edgomez | 1.21.2.16 | } |
1047 : | |||
1048 : | for(i=0; i<=Last; ++i) { | ||
1049 : | int V = C[Zigzag[i]]*Mult; | ||
1050 : | edgomez | 1.21.2.17 | if (V>0) |
1051 : | edgomez | 1.21.2.16 | V += Bias; |
1052 : | edgomez | 1.21.2.17 | else |
1053 : | if (V<0) | ||
1054 : | edgomez | 1.21.2.16 | V -= Bias; |
1055 : | V -= Ref[Zigzag[i]]; | ||
1056 : | Dist += V*V; | ||
1057 : | } | ||
1058 : | edgomez | 1.21.2.19 | Cost = Lambda*Dist + (Bits<<TL_SHIFT); |
1059 : | edgomez | 1.21.2.16 | if (DBG==1) |
1060 : | printf( " Last:%2d/%2d Cost = [(Bits=%5.0d) + Lambda*(Dist=%6.0d) = %d ] >>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 ); | ||
1061 : | return Cost; | ||
1062 : | chl | 1.21.2.10 | |
1063 : | #else | ||
1064 : | edgomez | 1.21.2.16 | return 0; |
1065 : | chl | 1.21.2.10 | #endif |
1066 : | } | ||
1067 : | |||
1068 : | |||
1069 : | edgomez | 1.21.2.17 | static int |
1070 : | chl | 1.21.2.10 | dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero) |
1071 : | { | ||
1072 : | |||
1073 : | edgomez | 1.21.2.13 | /* |
1074 : | * Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]), | ||
1075 : | * not quantized one (Out[]). However, it only improves the result *very* | ||
1076 : | * slightly (~0.01dB), whereas speed drops to crawling level :) | ||
1077 : | * Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps. | ||
1078 : | */ | ||
1079 : | edgomez | 1.21.2.16 | typedef struct { int16_t Run, Level; } NODE; |
1080 : | edgomez | 1.21.2.17 | |
1081 : | edgomez | 1.21.2.16 | NODE Nodes[65], Last; |
1082 : | uint32_t Run_Costs0[64+1]; | ||
1083 : | uint32_t * const Run_Costs = Run_Costs0 + 1; | ||
1084 : | const int Mult = 2*Q; | ||
1085 : | const int Bias = (Q-1) | 1; | ||
1086 : | const int Lev0 = Mult + Bias; | ||
1087 : | const int Lambda = Trellis_Lambda_Tabs[Q-1]; /* it's 1/lambda, actually */ | ||
1088 : | |||
1089 : | int Run_Start = -1; | ||
1090 : | edgomez | 1.21.2.19 | Run_Costs[-1] = 2<<TL_SHIFT; /* source (w/ CBP penalty) */ |
1091 : | uint32_t Min_Cost = 2<<TL_SHIFT; | ||
1092 : | chl | 1.21.2.10 | |
1093 : | edgomez | 1.21.2.16 | int Last_Node = -1; |
1094 : | uint32_t Last_Cost = 0; | ||
1095 : | chl | 1.21.2.10 | |
1096 : | edgomez | 1.21.2.16 | int i, j; |
1097 : | chl | 1.21.2.11 | |
1098 : | chl | 1.21.2.10 | #if (DBG>0) |
1099 : | edgomez | 1.21.2.16 | Last.Level = 0; Last.Run = -1; /* just initialize to smthg */ |
1100 : | chl | 1.21.2.10 | #endif |
1101 : | |||
1102 : | edgomez | 1.21.2.16 | Non_Zero = Find_Last(Out, Zigzag, Non_Zero); |
1103 : | if (Non_Zero<0) | ||
1104 : | edgomez | 1.21.2.17 | return -1; |
1105 : | edgomez | 1.21.2.16 | |
1106 : | for(i=0; i<=Non_Zero; i++) | ||
1107 : | { | ||
1108 : | const int AC = In[Zigzag[i]]; | ||
1109 : | const int Level1 = Out[Zigzag[i]]; | ||
1110 : | const int Dist0 = Lambda* AC*AC; | ||
1111 : | uint32_t Best_Cost = 0xf0000000; | ||
1112 : | Last_Cost += Dist0; | ||
1113 : | |||
1114 : | if ((uint32_t)(Level1+1)<3) /* very specialized loop for -1,0,+1 */ | ||
1115 : | { | ||
1116 : | int dQ; | ||
1117 : | int Run; | ||
1118 : | uint32_t Cost0; | ||
1119 : | |||
1120 : | if (AC<0) { | ||
1121 : | Nodes[i].Level = -1; | ||
1122 : | dQ = Lev0 + AC; | ||
1123 : | } else { | ||
1124 : | Nodes[i].Level = 1; | ||
1125 : | dQ = Lev0 - AC; | ||
1126 : | } | ||
1127 : | Cost0 = Lambda*dQ*dQ; | ||
1128 : | edgomez | 1.21.2.17 | |
1129 : | edgomez | 1.21.2.16 | Nodes[i].Run = 1; |
1130 : | edgomez | 1.21.2.19 | Best_Cost = (Code_Len20[0]<<TL_SHIFT) + Run_Costs[i-1]+Cost0; |
1131 : | edgomez | 1.21.2.16 | for(Run=i-Run_Start; Run>0; --Run) |
1132 : | { | ||
1133 : | const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run]; | ||
1134 : | edgomez | 1.21.2.19 | const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<TL_SHIFT); |
1135 : | const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<TL_SHIFT); | ||
1136 : | edgomez | 1.21.2.16 | |
1137 : | /* | ||
1138 : | * TODO: what about tie-breaks? Should we favor short runs or | ||
1139 : | * long runs? Although the error is the same, it would not be | ||
1140 : | * spread the same way along high and low frequencies... | ||
1141 : | */ | ||
1142 : | if (Cost<Best_Cost) { | ||
1143 : | Best_Cost = Cost; | ||
1144 : | Nodes[i].Run = Run; | ||
1145 : | } | ||
1146 : | |||
1147 : | if (lCost<Last_Cost) { | ||
1148 : | Last_Cost = lCost; | ||
1149 : | Last.Run = Run; | ||
1150 : | Last_Node = i; | ||
1151 : | } | ||
1152 : | } | ||
1153 : | edgomez | 1.21.2.17 | if (Last_Node==i) |
1154 : | edgomez | 1.21.2.16 | Last.Level = Nodes[i].Level; |
1155 : | |||
1156 : | if (DBG==1) { | ||
1157 : | Run_Costs[i] = Best_Cost; | ||
1158 : | printf( "Costs #%2d: ", i); | ||
1159 : | for(j=-1;j<=Non_Zero;++j) { | ||
1160 : | if (j==Run_Start) printf( " %3.0d|", Run_Costs[j]>>12 ); | ||
1161 : | else if (j>Run_Start && j<i) printf( " %3.0d|", Run_Costs[j]>>12 ); | ||
1162 : | else if (j==i) printf( "(%3.0d)", Run_Costs[j]>>12 ); | ||
1163 : | else printf( " - |" ); | ||
1164 : | } | ||
1165 : | printf( "<%3.0d %2d %d>", Min_Cost>>12, Nodes[i].Level, Nodes[i].Run ); | ||
1166 : | printf( " Last:#%2d {%3.0d %2d %d}", Last_Node, Last_Cost>>12, Last.Level, Last.Run ); | ||
1167 : | printf( " AC:%3.0d Dist0:%3d Dist(%d)=%d", AC, Dist0>>12, Nodes[i].Level, Cost0>>12 ); | ||
1168 : | printf( "\n" ); | ||
1169 : | } | ||
1170 : | } | ||
1171 : | else /* "big" levels */ | ||
1172 : | { | ||
1173 : | const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last; | ||
1174 : | int Level2; | ||
1175 : | int dQ1, dQ2; | ||
1176 : | int Run; | ||
1177 : | uint32_t Dist1,Dist2; | ||
1178 : | int dDist21; | ||
1179 : | edgomez | 1.21.2.17 | |
1180 : | edgomez | 1.21.2.16 | if (Level1>1) { |
1181 : | dQ1 = Level1*Mult-AC + Bias; | ||
1182 : | dQ2 = dQ1 - Mult; | ||
1183 : | Level2 = Level1-1; | ||
1184 : | Tbl_L1 = (Level1<=24) ? B16_17_Code_Len[Level1-1] : Code_Len0; | ||
1185 : | Tbl_L2 = (Level2<=24) ? B16_17_Code_Len[Level2-1] : Code_Len0; | ||
1186 : | Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0; | ||
1187 : | Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0; | ||
1188 : | } else { /* Level1<-1 */ | ||
1189 : | dQ1 = Level1*Mult-AC - Bias; | ||
1190 : | dQ2 = dQ1 + Mult; | ||
1191 : | Level2 = Level1 + 1; | ||
1192 : | Tbl_L1 = (Level1>=-24) ? B16_17_Code_Len[Level1^-1] : Code_Len0; | ||
1193 : | Tbl_L2 = (Level2>=-24) ? B16_17_Code_Len[Level2^-1] : Code_Len0; | ||
1194 : | Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0; | ||
1195 : | Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0; | ||
1196 : | } | ||
1197 : | Dist1 = Lambda*dQ1*dQ1; | ||
1198 : | Dist2 = Lambda*dQ2*dQ2; | ||
1199 : | dDist21 = Dist2-Dist1; | ||
1200 : | |||
1201 : | for(Run=i-Run_Start; Run>0; --Run) | ||
1202 : | { | ||
1203 : | const uint32_t Cost_Base = Dist1 + Run_Costs[i-Run]; | ||
1204 : | uint32_t Cost1, Cost2; | ||
1205 : | int bLevel; | ||
1206 : | chl | 1.21.2.10 | |
1207 : | edgomez | 1.21.2.13 | /* |
1208 : | * for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following: | ||
1209 : | * if (Cost_Base>=Best_Cost) continue; | ||
1210 : | */ | ||
1211 : | edgomez | 1.21.2.19 | Cost1 = Cost_Base + (Tbl_L1[Run-1]<<TL_SHIFT); |
1212 : | Cost2 = Cost_Base + (Tbl_L2[Run-1]<<TL_SHIFT) + dDist21; | ||
1213 : | chl | 1.21.2.10 | |
1214 : | edgomez | 1.21.2.17 | if (Cost2<Cost1) { |
1215 : | Cost1 = Cost2; | ||
1216 : | bLevel = Level2; | ||
1217 : | } else | ||
1218 : | edgomez | 1.21.2.16 | bLevel = Level1; |
1219 : | |||
1220 : | if (Cost1<Best_Cost) { | ||
1221 : | Best_Cost = Cost1; | ||
1222 : | Nodes[i].Run = Run; | ||
1223 : | Nodes[i].Level = bLevel; | ||
1224 : | } | ||
1225 : | |||
1226 : | edgomez | 1.21.2.19 | Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<TL_SHIFT); |
1227 : | Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<TL_SHIFT) + dDist21; | ||
1228 : | edgomez | 1.21.2.16 | |
1229 : | edgomez | 1.21.2.17 | if (Cost2<Cost1) { |
1230 : | Cost1 = Cost2; | ||
1231 : | bLevel = Level2; | ||
1232 : | } else | ||
1233 : | edgomez | 1.21.2.16 | bLevel = Level1; |
1234 : | edgomez | 1.21.2.17 | |
1235 : | edgomez | 1.21.2.16 | if (Cost1<Last_Cost) { |
1236 : | Last_Cost = Cost1; | ||
1237 : | Last.Run = Run; | ||
1238 : | Last.Level = bLevel; | ||
1239 : | Last_Node = i; | ||
1240 : | } | ||
1241 : | } /* end of "for Run" */ | ||
1242 : | |||
1243 : | if (DBG==1) { | ||
1244 : | Run_Costs[i] = Best_Cost; | ||
1245 : | printf( "Costs #%2d: ", i); | ||
1246 : | for(j=-1;j<=Non_Zero;++j) { | ||
1247 : | if (j==Run_Start) printf( " %3.0d|", Run_Costs[j]>>12 ); | ||
1248 : | else if (j>Run_Start && j<i) printf( " %3.0d|", Run_Costs[j]>>12 ); | ||
1249 : | else if (j==i) printf( "(%3.0d)", Run_Costs[j]>>12 ); | ||
1250 : | else printf( " - |" ); | ||
1251 : | } | ||
1252 : | printf( "<%3.0d %2d %d>", Min_Cost>>12, Nodes[i].Level, Nodes[i].Run ); | ||
1253 : | printf( " Last:#%2d {%3.0d %2d %d}", Last_Node, Last_Cost>>12, Last.Level, Last.Run ); | ||
1254 : | printf( " AC:%3.0d Dist0:%3d Dist(%2d):%3d Dist(%2d):%3d", AC, Dist0>>12, Level1, Dist1>>12, Level2, Dist2>>12 ); | ||
1255 : | printf( "\n" ); | ||
1256 : | } | ||
1257 : | } | ||
1258 : | chl | 1.21.2.11 | |
1259 : | edgomez | 1.21.2.16 | Run_Costs[i] = Best_Cost; |
1260 : | chl | 1.21.2.10 | |
1261 : | edgomez | 1.21.2.16 | if (Best_Cost < Min_Cost + Dist0) { |
1262 : | Min_Cost = Best_Cost; | ||
1263 : | Run_Start = i; | ||
1264 : | } | ||
1265 : | else | ||
1266 : | { | ||
1267 : | /* | ||
1268 : | * as noticed by Michael Niedermayer (michaelni at gmx.at), there's | ||
1269 : | * a code shorter by 1 bit for a larger run (!), same level. We give | ||
1270 : | * it a chance by not moving the left barrier too much. | ||
1271 : | */ | ||
1272 : | |||
1273 : | edgomez | 1.21.2.19 | while( Run_Costs[Run_Start]>Min_Cost+(1<<TL_SHIFT) ) |
1274 : | edgomez | 1.21.2.16 | Run_Start++; |
1275 : | |||
1276 : | /* spread on preceding coeffs the cost incurred by skipping this one */ | ||
1277 : | for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0; | ||
1278 : | Min_Cost += Dist0; | ||
1279 : | } | ||
1280 : | } | ||
1281 : | |||
1282 : | if (DBG) { | ||
1283 : | Last_Cost = Evaluate_Cost(Out,Mult,Bias, Zigzag,Non_Zero, Lambda); | ||
1284 : | if (DBG==1) { | ||
1285 : | printf( "=> " ); | ||
1286 : | for(i=0; i<=Non_Zero; ++i) printf( "[%3.0d] ", Out[Zigzag[i]] ); | ||
1287 : | printf( "\n" ); | ||
1288 : | } | ||
1289 : | } | ||
1290 : | |||
1291 : | if (Last_Node<0) | ||
1292 : | return -1; | ||
1293 : | |||
1294 : | /* reconstruct optimal sequence backward with surviving paths */ | ||
1295 : | memset(Out, 0x00, 64*sizeof(*Out)); | ||
1296 : | Out[Zigzag[Last_Node]] = Last.Level; | ||
1297 : | i = Last_Node - Last.Run; | ||
1298 : | while(i>=0) { | ||
1299 : | Out[Zigzag[i]] = Nodes[i].Level; | ||
1300 : | i -= Nodes[i].Run; | ||
1301 : | } | ||
1302 : | |||
1303 : | if (DBG) { | ||
1304 : | uint32_t Cost = Evaluate_Cost(Out,Mult,Bias, Zigzag,Non_Zero, Lambda); | ||
1305 : | if (DBG==1) { | ||
1306 : | edgomez | 1.21.2.17 | printf( "<= " ); |
1307 : | edgomez | 1.21.2.16 | for(i=0; i<=Last_Node; ++i) printf( "[%3.0d] ", Out[Zigzag[i]] ); |
1308 : | printf( "\n--------------------------------\n" ); | ||
1309 : | } | ||
1310 : | if (Cost>Last_Cost) printf( "!!! %u > %u\n", Cost, Last_Cost ); | ||
1311 : | } | ||
1312 : | return Last_Node; | ||
1313 : | chl | 1.21.2.10 | } |
1314 : | |||
1315 : | #undef DBG | ||
1316 : | chl | 1.21.2.11 | |
1317 : | #endif |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |