1 |
/* idct.c, inverse fast discrete cosine transform */ |
/***************************************************************************** |
2 |
|
* |
3 |
|
* XVID MPEG-4 VIDEO CODEC |
4 |
|
* - Inverse DCT - |
5 |
|
* |
6 |
|
* These routines are from Independent JPEG Group's free JPEG software |
7 |
|
* Copyright (C) 1991-1998, Thomas G. Lane (see the file README.IJG) |
8 |
|
* |
9 |
|
* This program is free software ; you can redistribute it and/or modify |
10 |
|
* it under the terms of the GNU General Public License as published by |
11 |
|
* the Free Software Foundation ; either version 2 of the License, or |
12 |
|
* (at your option) any later version. |
13 |
|
* |
14 |
|
* This program is distributed in the hope that it will be useful, |
15 |
|
* but WITHOUT ANY WARRANTY ; without even the implied warranty of |
16 |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 |
|
* GNU General Public License for more details. |
18 |
|
* |
19 |
|
* You should have received a copy of the GNU General Public License |
20 |
|
* along with this program ; if not, write to the Free Software |
21 |
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 |
|
* |
23 |
|
* $Id$ |
24 |
|
* |
25 |
|
****************************************************************************/ |
26 |
|
|
27 |
/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */ |
/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */ |
28 |
|
|
76 |
/* this code assumes >> to be a two's-complement arithmetic */ |
/* this code assumes >> to be a two's-complement arithmetic */ |
77 |
/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ |
/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ |
78 |
|
|
|
//#include <windows.h> |
|
79 |
#include "idct.h" |
#include "idct.h" |
80 |
|
|
81 |
#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */ |
#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */ |
85 |
#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */ |
#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */ |
86 |
#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */ |
#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */ |
87 |
|
|
|
|
|
|
/* global declarations */ |
|
|
//void init_idct_int32 (void); |
|
|
//void idct_int32 (short *block); |
|
|
|
|
88 |
/* private data */ |
/* private data */ |
89 |
static short iclip[1024]; /* clipping table */ |
static short iclip[1024]; /* clipping table */ |
90 |
static short *iclp; |
static short *iclp; |
91 |
|
|
92 |
/* private prototypes */ |
/* private prototypes */ |
|
//static void idctrow _ANSI_ARGS_((short *blk)); |
|
|
//static void idctcol _ANSI_ARGS_((short *blk)); |
|
93 |
|
|
94 |
/* row (horizontal) IDCT |
/* row (horizontal) IDCT |
95 |
* |
* |
101 |
* c[1..7] = 128*sqrt(2) |
* c[1..7] = 128*sqrt(2) |
102 |
*/ |
*/ |
103 |
|
|
104 |
/* |
#if 0 |
105 |
static void idctrow(blk) |
static void idctrow(blk) |
106 |
short *blk; |
short *blk; |
107 |
{ |
{ |
108 |
int X0, X1, X2, X3, X4, X5, X6, X7, X8; |
int X0, X1, X2, X3, X4, X5, X6, X7, X8; |
109 |
|
|
110 |
// shortcut |
/* shortcut */ |
111 |
if (!((X1 = blk[4]<<11) | (X2 = blk[6]) | (X3 = blk[2]) | |
if (!((X1 = blk[4]<<11) | (X2 = blk[6]) | (X3 = blk[2]) | |
112 |
(X4 = blk[1]) | (X5 = blk[7]) | (X6 = blk[5]) | (X7 = blk[3]))) |
(X4 = blk[1]) | (X5 = blk[7]) | (X6 = blk[5]) | (X7 = blk[3]))) |
113 |
{ |
{ |
115 |
return; |
return; |
116 |
} |
} |
117 |
|
|
118 |
X0 = (blk[0]<<11) + 128; // for proper rounding in the fourth stage |
X0 = (blk[0]<<11) + 128; /* for proper rounding in the fourth stage */ |
119 |
|
|
120 |
// first stage |
/* first stage */ |
121 |
X8 = W7*(X4+X5); |
X8 = W7*(X4+X5); |
122 |
X4 = X8 + (W1-W7)*X4; |
X4 = X8 + (W1-W7)*X4; |
123 |
X5 = X8 - (W1+W7)*X5; |
X5 = X8 - (W1+W7)*X5; |
125 |
X6 = X8 - (W3-W5)*X6; |
X6 = X8 - (W3-W5)*X6; |
126 |
X7 = X8 - (W3+W5)*X7; |
X7 = X8 - (W3+W5)*X7; |
127 |
|
|
128 |
// second stage |
/* second stage */ |
129 |
X8 = X0 + X1; |
X8 = X0 + X1; |
130 |
X0 -= X1; |
X0 -= X1; |
131 |
X1 = W6*(X3+X2); |
X1 = W6*(X3+X2); |
136 |
X6 = X5 + X7; |
X6 = X5 + X7; |
137 |
X5 -= X7; |
X5 -= X7; |
138 |
|
|
139 |
// third stage |
/* third stage */ |
140 |
X7 = X8 + X3; |
X7 = X8 + X3; |
141 |
X8 -= X3; |
X8 -= X3; |
142 |
X3 = X0 + X2; |
X3 = X0 + X2; |
144 |
X2 = (181*(X4+X5)+128)>>8; |
X2 = (181*(X4+X5)+128)>>8; |
145 |
X4 = (181*(X4-X5)+128)>>8; |
X4 = (181*(X4-X5)+128)>>8; |
146 |
|
|
147 |
// fourth stage |
/* fourth stage */ |
148 |
blk[0] = (X7+X1)>>8; |
blk[0] = (X7+X1)>>8; |
149 |
blk[1] = (X3+X2)>>8; |
blk[1] = (X3+X2)>>8; |
150 |
blk[2] = (X0+X4)>>8; |
blk[2] = (X0+X4)>>8; |
153 |
blk[5] = (X0-X4)>>8; |
blk[5] = (X0-X4)>>8; |
154 |
blk[6] = (X3-X2)>>8; |
blk[6] = (X3-X2)>>8; |
155 |
blk[7] = (X7-X1)>>8; |
blk[7] = (X7-X1)>>8; |
156 |
}*/ |
} |
157 |
|
#endif |
158 |
|
|
159 |
/* column (vertical) IDCT |
/* column (vertical) IDCT |
160 |
* |
* |
165 |
* where: c[0] = 1/1024 |
* where: c[0] = 1/1024 |
166 |
* c[1..7] = (1/1024)*sqrt(2) |
* c[1..7] = (1/1024)*sqrt(2) |
167 |
*/ |
*/ |
168 |
/* |
|
169 |
|
#if 0 |
170 |
static void idctcol(blk) |
static void idctcol(blk) |
171 |
short *blk; |
short *blk; |
172 |
{ |
{ |
173 |
int X0, X1, X2, X3, X4, X5, X6, X7, X8; |
int X0, X1, X2, X3, X4, X5, X6, X7, X8; |
174 |
|
|
175 |
// shortcut |
/* shortcut */ |
176 |
if (!((X1 = (blk[8*4]<<8)) | (X2 = blk[8*6]) | (X3 = blk[8*2]) | |
if (!((X1 = (blk[8*4]<<8)) | (X2 = blk[8*6]) | (X3 = blk[8*2]) | |
177 |
(X4 = blk[8*1]) | (X5 = blk[8*7]) | (X6 = blk[8*5]) | (X7 = blk[8*3]))) |
(X4 = blk[8*1]) | (X5 = blk[8*7]) | (X6 = blk[8*5]) | (X7 = blk[8*3]))) |
178 |
{ |
{ |
183 |
|
|
184 |
X0 = (blk[8*0]<<8) + 8192; |
X0 = (blk[8*0]<<8) + 8192; |
185 |
|
|
186 |
// first stage |
/* first stage */ |
187 |
X8 = W7*(X4+X5) + 4; |
X8 = W7*(X4+X5) + 4; |
188 |
X4 = (X8+(W1-W7)*X4)>>3; |
X4 = (X8+(W1-W7)*X4)>>3; |
189 |
X5 = (X8-(W1+W7)*X5)>>3; |
X5 = (X8-(W1+W7)*X5)>>3; |
191 |
X6 = (X8-(W3-W5)*X6)>>3; |
X6 = (X8-(W3-W5)*X6)>>3; |
192 |
X7 = (X8-(W3+W5)*X7)>>3; |
X7 = (X8-(W3+W5)*X7)>>3; |
193 |
|
|
194 |
// second stage |
/* second stage */ |
195 |
X8 = X0 + X1; |
X8 = X0 + X1; |
196 |
X0 -= X1; |
X0 -= X1; |
197 |
X1 = W6*(X3+X2) + 4; |
X1 = W6*(X3+X2) + 4; |
202 |
X6 = X5 + X7; |
X6 = X5 + X7; |
203 |
X5 -= X7; |
X5 -= X7; |
204 |
|
|
205 |
// third stage |
/* third stage */ |
206 |
X7 = X8 + X3; |
X7 = X8 + X3; |
207 |
X8 -= X3; |
X8 -= X3; |
208 |
X3 = X0 + X2; |
X3 = X0 + X2; |
210 |
X2 = (181*(X4+X5)+128)>>8; |
X2 = (181*(X4+X5)+128)>>8; |
211 |
X4 = (181*(X4-X5)+128)>>8; |
X4 = (181*(X4-X5)+128)>>8; |
212 |
|
|
213 |
// fourth stage |
/* fourth stage */ |
214 |
blk[8*0] = iclp[(X7+X1)>>14]; |
blk[8*0] = iclp[(X7+X1)>>14]; |
215 |
blk[8*1] = iclp[(X3+X2)>>14]; |
blk[8*1] = iclp[(X3+X2)>>14]; |
216 |
blk[8*2] = iclp[(X0+X4)>>14]; |
blk[8*2] = iclp[(X0+X4)>>14]; |
219 |
blk[8*5] = iclp[(X0-X4)>>14]; |
blk[8*5] = iclp[(X0-X4)>>14]; |
220 |
blk[8*6] = iclp[(X3-X2)>>14]; |
blk[8*6] = iclp[(X3-X2)>>14]; |
221 |
blk[8*7] = iclp[(X7-X1)>>14]; |
blk[8*7] = iclp[(X7-X1)>>14]; |
222 |
}*/ |
} |
223 |
|
#endif |
224 |
|
|
225 |
// function pointer |
/* function pointer */ |
226 |
idctFuncPtr idct; |
idctFuncPtr idct; |
227 |
|
|
228 |
/* two dimensional inverse discrete cosine transform */ |
/* two dimensional inverse discrete cosine transform */ |
|
//void j_rev_dct(block) |
|
|
//short *block; |
|
229 |
void |
void |
230 |
idct_int32(short *const block) |
idct_int32(short *const block) |
231 |
{ |
{ |
232 |
|
|
233 |
// idct_int32_init() must be called before the first call to this function! |
/* |
234 |
|
* idct_int32_init() must be called before the first call to this |
235 |
|
* function! |
236 |
|
*/ |
237 |
|
|
238 |
|
|
239 |
/*int i; |
#if 0 |
240 |
|
int i; |
241 |
long i; |
long i; |
242 |
|
|
243 |
for (i=0; i<8; i++) |
for (i=0; i<8; i++) |
244 |
idctrow(block+8*i); |
idctrow(block+8*i); |
245 |
|
|
246 |
for (i=0; i<8; i++) |
for (i=0; i<8; i++) |
247 |
idctcol(block+i); */ |
idctcol(block+i); |
248 |
|
#endif |
249 |
|
|
250 |
static short *blk; |
static short *blk; |
251 |
static long i; |
static long i; |
252 |
static long X0, X1, X2, X3, X4, X5, X6, X7, X8; |
static long X0, X1, X2, X3, X4, X5, X6, X7, X8; |
253 |
|
|
254 |
|
|
255 |
for (i = 0; i < 8; i++) // idct rows |
for (i = 0; i < 8; i++) /* idct rows */ |
256 |
{ |
{ |
257 |
blk = block + (i << 3); |
blk = block + (i << 3); |
258 |
if (! |
if (! |
264 |
continue; |
continue; |
265 |
} |
} |
266 |
|
|
267 |
X0 = (blk[0] << 11) + 128; // for proper rounding in the fourth stage |
X0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */ |
268 |
|
|
269 |
// first stage |
/* first stage */ |
270 |
X8 = W7 * (X4 + X5); |
X8 = W7 * (X4 + X5); |
271 |
X4 = X8 + (W1 - W7) * X4; |
X4 = X8 + (W1 - W7) * X4; |
272 |
X5 = X8 - (W1 + W7) * X5; |
X5 = X8 - (W1 + W7) * X5; |
274 |
X6 = X8 - (W3 - W5) * X6; |
X6 = X8 - (W3 - W5) * X6; |
275 |
X7 = X8 - (W3 + W5) * X7; |
X7 = X8 - (W3 + W5) * X7; |
276 |
|
|
277 |
// second stage |
/* second stage */ |
278 |
X8 = X0 + X1; |
X8 = X0 + X1; |
279 |
X0 -= X1; |
X0 -= X1; |
280 |
X1 = W6 * (X3 + X2); |
X1 = W6 * (X3 + X2); |
285 |
X6 = X5 + X7; |
X6 = X5 + X7; |
286 |
X5 -= X7; |
X5 -= X7; |
287 |
|
|
288 |
// third stage |
/* third stage */ |
289 |
X7 = X8 + X3; |
X7 = X8 + X3; |
290 |
X8 -= X3; |
X8 -= X3; |
291 |
X3 = X0 + X2; |
X3 = X0 + X2; |
293 |
X2 = (181 * (X4 + X5) + 128) >> 8; |
X2 = (181 * (X4 + X5) + 128) >> 8; |
294 |
X4 = (181 * (X4 - X5) + 128) >> 8; |
X4 = (181 * (X4 - X5) + 128) >> 8; |
295 |
|
|
296 |
// fourth stage |
/* fourth stage */ |
297 |
|
|
298 |
blk[0] = (short) ((X7 + X1) >> 8); |
blk[0] = (short) ((X7 + X1) >> 8); |
299 |
blk[1] = (short) ((X3 + X2) >> 8); |
blk[1] = (short) ((X3 + X2) >> 8); |
304 |
blk[6] = (short) ((X3 - X2) >> 8); |
blk[6] = (short) ((X3 - X2) >> 8); |
305 |
blk[7] = (short) ((X7 - X1) >> 8); |
blk[7] = (short) ((X7 - X1) >> 8); |
306 |
|
|
307 |
} // end for ( i = 0; i < 8; ++i ) IDCT-rows |
} /* end for ( i = 0; i < 8; ++i ) IDCT-rows */ |
308 |
|
|
309 |
|
|
310 |
|
|
311 |
for (i = 0; i < 8; i++) // idct columns |
for (i = 0; i < 8; i++) /* idct columns */ |
312 |
{ |
{ |
313 |
blk = block + i; |
blk = block + i; |
314 |
// shortcut |
/* shortcut */ |
315 |
if (! |
if (! |
316 |
((X1 = (blk[8 * 4] << 8)) | (X2 = blk[8 * 6]) | (X3 = |
((X1 = (blk[8 * 4] << 8)) | (X2 = blk[8 * 6]) | (X3 = |
317 |
blk[8 * |
blk[8 * |
327 |
|
|
328 |
X0 = (blk[8 * 0] << 8) + 8192; |
X0 = (blk[8 * 0] << 8) + 8192; |
329 |
|
|
330 |
// first stage |
/* first stage */ |
331 |
X8 = W7 * (X4 + X5) + 4; |
X8 = W7 * (X4 + X5) + 4; |
332 |
X4 = (X8 + (W1 - W7) * X4) >> 3; |
X4 = (X8 + (W1 - W7) * X4) >> 3; |
333 |
X5 = (X8 - (W1 + W7) * X5) >> 3; |
X5 = (X8 - (W1 + W7) * X5) >> 3; |
335 |
X6 = (X8 - (W3 - W5) * X6) >> 3; |
X6 = (X8 - (W3 - W5) * X6) >> 3; |
336 |
X7 = (X8 - (W3 + W5) * X7) >> 3; |
X7 = (X8 - (W3 + W5) * X7) >> 3; |
337 |
|
|
338 |
// second stage |
/* second stage */ |
339 |
X8 = X0 + X1; |
X8 = X0 + X1; |
340 |
X0 -= X1; |
X0 -= X1; |
341 |
X1 = W6 * (X3 + X2) + 4; |
X1 = W6 * (X3 + X2) + 4; |
346 |
X6 = X5 + X7; |
X6 = X5 + X7; |
347 |
X5 -= X7; |
X5 -= X7; |
348 |
|
|
349 |
// third stage |
/* third stage */ |
350 |
X7 = X8 + X3; |
X7 = X8 + X3; |
351 |
X8 -= X3; |
X8 -= X3; |
352 |
X3 = X0 + X2; |
X3 = X0 + X2; |
354 |
X2 = (181 * (X4 + X5) + 128) >> 8; |
X2 = (181 * (X4 + X5) + 128) >> 8; |
355 |
X4 = (181 * (X4 - X5) + 128) >> 8; |
X4 = (181 * (X4 - X5) + 128) >> 8; |
356 |
|
|
357 |
// fourth stage |
/* fourth stage */ |
358 |
blk[8 * 0] = iclp[(X7 + X1) >> 14]; |
blk[8 * 0] = iclp[(X7 + X1) >> 14]; |
359 |
blk[8 * 1] = iclp[(X3 + X2) >> 14]; |
blk[8 * 1] = iclp[(X3 + X2) >> 14]; |
360 |
blk[8 * 2] = iclp[(X0 + X4) >> 14]; |
blk[8 * 2] = iclp[(X0 + X4) >> 14]; |
365 |
blk[8 * 7] = iclp[(X7 - X1) >> 14]; |
blk[8 * 7] = iclp[(X7 - X1) >> 14]; |
366 |
} |
} |
367 |
|
|
368 |
} // end function idct_int32(block) |
} /* end function idct_int32(block) */ |
369 |
|
|
370 |
|
|
|
//void |
|
|
//idct_int32_init() |
|
371 |
void |
void |
372 |
idct_int32_init() |
idct_int32_init() |
373 |
{ |
{ |