ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvs/xvidcore/src/dct/simple_idct.c
Revision: 1.6
Committed: Tue Mar 9 10:00:14 2010 UTC (14 years, 7 months ago) by Isibaar
Content type: text/plain
Branch: MAIN
CVS Tags: release-1_3_1, release-1_3_0, rc1_1_3_0, tag-branching-1_3_0, HEAD
Branch point for: release-1_3-branch
Changes since 1.5: +2 -2 lines
Log Message:
app-level multi-threading for xvid_encraw

File Contents

# Content
1 /*****************************************************************************
2 *
3 * XVID MPEG-4 VIDEO CODEC
4 * - Inverse DCT (More precise version) -
5 *
6 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
7 *
8 * Originally distributed under the GNU LGPL License (ffmpeg).
9 * It is licensed under the GNU GPL for the Xvid tree.
10 *
11 * This program is free software ; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation ; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program ; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 *
25 * $Id: simple_idct.c,v 1.5 2004/04/05 20:36:36 edgomez Exp $
26 *
27 ****************************************************************************/
28
29 /*
30 based upon some outcommented c code from mpeg2dec (idct_mmx.c
31 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
32 */
33
34 #include "../portab.h"
35 #include "idct.h"
36
37 #if 0
38 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
39 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
40 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
41 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
42 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
43 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
44 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
45 #define ROW_SHIFT 8
46 #define COL_SHIFT 17
47 #else
48 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
49 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
50 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
51 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
52 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
53 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
54 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
55 #define ROW_SHIFT 11
56 #define COL_SHIFT 20 /* 6 */
57 #endif
58
59 /*
60 PPC mac operation. Causes compile problems on newer ppc targets
61
62 Was originally: #if defined(ARCH_IS_PPC)
63 */
64 #if 0
65
66 /* signed 16x16 -> 32 multiply add accumulate */
67 #define MAC16(rt, ra, rb) \
68 asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
69
70 /* signed 16x16 -> 32 multiply */
71 #define MUL16(rt, ra, rb) \
72 asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
73
74 #else
75
76 /* signed 16x16 -> 32 multiply add accumulate */
77 #define MAC16(rt, ra, rb) rt += (ra) * (rb)
78
79 /* signed 16x16 -> 32 multiply */
80 #define MUL16(rt, ra, rb) rt = (ra) * (rb)
81
82 #endif
83
84 static __inline void idctRowCondDC (int16_t * const row)
85 {
86 int a0, a1, a2, a3, b0, b1, b2, b3;
87 #ifdef FAST_64BIT
88 uint64_t temp;
89 #else
90 uint32_t temp;
91 #endif
92
93 #ifdef FAST_64BIT
94 #ifdef ARCH_IS_BIG_ENDIAN
95 #define ROW0_MASK 0xffff000000000000LL
96 #else
97 #define ROW0_MASK 0xffffLL
98 #endif
99 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
100 ((uint64_t *)row)[1]) == 0) {
101 temp = (row[0] << 3) & 0xffff;
102 temp += temp << 16;
103 temp += temp << 32;
104 ((uint64_t *)row)[0] = temp;
105 ((uint64_t *)row)[1] = temp;
106 return;
107 }
108 #else
109 if (!(((uint32_t*)row)[1] |
110 ((uint32_t*)row)[2] |
111 ((uint32_t*)row)[3] |
112 row[1])) {
113 temp = (row[0] << 3) & 0xffff;
114 temp += temp << 16;
115 ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
116 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
117 return;
118 }
119 #endif
120
121 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
122 a1 = a0;
123 a2 = a0;
124 a3 = a0;
125
126 /* no need to optimize : gcc does it */
127 a0 += W2 * row[2];
128 a1 += W6 * row[2];
129 a2 -= W6 * row[2];
130 a3 -= W2 * row[2];
131
132 MUL16(b0, W1, row[1]);
133 MAC16(b0, W3, row[3]);
134 MUL16(b1, W3, row[1]);
135 MAC16(b1, -W7, row[3]);
136 MUL16(b2, W5, row[1]);
137 MAC16(b2, -W1, row[3]);
138 MUL16(b3, W7, row[1]);
139 MAC16(b3, -W5, row[3]);
140
141 #ifdef FAST_64BIT
142 temp = ((uint64_t*)row)[1];
143 #else
144 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
145 #endif
146 if (temp != 0) {
147 a0 += W4*row[4] + W6*row[6];
148 a1 += - W4*row[4] - W2*row[6];
149 a2 += - W4*row[4] + W2*row[6];
150 a3 += W4*row[4] - W6*row[6];
151
152 MAC16(b0, W5, row[5]);
153 MAC16(b0, W7, row[7]);
154
155 MAC16(b1, -W1, row[5]);
156 MAC16(b1, -W5, row[7]);
157
158 MAC16(b2, W7, row[5]);
159 MAC16(b2, W3, row[7]);
160
161 MAC16(b3, W3, row[5]);
162 MAC16(b3, -W1, row[7]);
163 }
164
165 row[0] = (a0 + b0) >> ROW_SHIFT;
166 row[7] = (a0 - b0) >> ROW_SHIFT;
167 row[1] = (a1 + b1) >> ROW_SHIFT;
168 row[6] = (a1 - b1) >> ROW_SHIFT;
169 row[2] = (a2 + b2) >> ROW_SHIFT;
170 row[5] = (a2 - b2) >> ROW_SHIFT;
171 row[3] = (a3 + b3) >> ROW_SHIFT;
172 row[4] = (a3 - b3) >> ROW_SHIFT;
173 }
174
175
176 static __inline void idctSparseCol (int16_t * const col)
177 {
178 int a0, a1, a2, a3, b0, b1, b2, b3;
179
180 /* XXX: I did that only to give same values as previous code */
181 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
182 a1 = a0;
183 a2 = a0;
184 a3 = a0;
185
186 a0 += + W2*col[8*2];
187 a1 += + W6*col[8*2];
188 a2 += - W6*col[8*2];
189 a3 += - W2*col[8*2];
190
191 MUL16(b0, W1, col[8*1]);
192 MUL16(b1, W3, col[8*1]);
193 MUL16(b2, W5, col[8*1]);
194 MUL16(b3, W7, col[8*1]);
195
196 MAC16(b0, + W3, col[8*3]);
197 MAC16(b1, - W7, col[8*3]);
198 MAC16(b2, - W1, col[8*3]);
199 MAC16(b3, - W5, col[8*3]);
200
201 if(col[8*4]){
202 a0 += + W4*col[8*4];
203 a1 += - W4*col[8*4];
204 a2 += - W4*col[8*4];
205 a3 += + W4*col[8*4];
206 }
207
208 if (col[8*5]) {
209 MAC16(b0, + W5, col[8*5]);
210 MAC16(b1, - W1, col[8*5]);
211 MAC16(b2, + W7, col[8*5]);
212 MAC16(b3, + W3, col[8*5]);
213 }
214
215 if(col[8*6]){
216 a0 += + W6*col[8*6];
217 a1 += - W2*col[8*6];
218 a2 += + W2*col[8*6];
219 a3 += - W6*col[8*6];
220 }
221
222 if (col[8*7]) {
223 MAC16(b0, + W7, col[8*7]);
224 MAC16(b1, - W5, col[8*7]);
225 MAC16(b2, + W3, col[8*7]);
226 MAC16(b3, - W1, col[8*7]);
227 }
228
229 col[0 ] = ((a0 + b0) >> COL_SHIFT);
230 col[8 ] = ((a1 + b1) >> COL_SHIFT);
231 col[16] = ((a2 + b2) >> COL_SHIFT);
232 col[24] = ((a3 + b3) >> COL_SHIFT);
233 col[32] = ((a3 - b3) >> COL_SHIFT);
234 col[40] = ((a2 - b2) >> COL_SHIFT);
235 col[48] = ((a1 - b1) >> COL_SHIFT);
236 col[56] = ((a0 - b0) >> COL_SHIFT);
237 }
238
239 void simple_idct_c(int16_t * const block)
240 {
241 int i;
242 for(i=0; i<8; i++)
243 idctRowCondDC(block + i*8);
244
245 for(i=0; i<8; i++)
246 idctSparseCol(block + i);
247 }