Annotation of /xvidcore/src/dct/simple_idct.c

Revision 1.2 - (view) (download)

1 :	edgomez	1.2	/*
2 :			* Simple IDCT
3 :			*
4 :			* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5 :			*
6 :			* This library is free software; you can redistribute it and/or
7 :			* modify it under the terms of the GNU Lesser General Public
8 :			* License as published by the Free Software Foundation; either
9 :			* version 2 of the License, or (at your option) any later version.
10 :			*
11 :			* This library is distributed in the hope that it will be useful,
12 :			* but WITHOUT ANY WARRANTY; without even the implied warranty of
13 :			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 :			* Lesser General Public License for more details.
15 :			*
16 :			* You should have received a copy of the GNU Lesser General Public
17 :			* License along with this library; if not, write to the Free Software
18 :			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 :			*/
20 :			/*
21 :			based upon some outcommented c code from mpeg2dec (idct_mmx.c
22 :			written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
23 :			*/
24 :			#include "../portab.h"
25 :			#include "idct.h"
26 :
27 :			#if 0
28 :			#define W1 2841 /* 2048sqrt (2)cos (1pi/16) /
29 :			#define W2 2676 /* 2048sqrt (2)cos (2pi/16) /
30 :			#define W3 2408 /* 2048sqrt (2)cos (3pi/16) /
31 :			#define W4 2048 /* 2048sqrt (2)cos (4pi/16) /
32 :			#define W5 1609 /* 2048sqrt (2)cos (5pi/16) /
33 :			#define W6 1108 /* 2048sqrt (2)cos (6pi/16) /
34 :			#define W7 565 /* 2048sqrt (2)cos (7pi/16) /
35 :			#define ROW_SHIFT 8
36 :			#define COL_SHIFT 17
37 :			#else
38 :			#define W1 22725 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
39 :			#define W2 21407 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
40 :			#define W3 19266 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
41 :			#define W4 16383 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
42 :			#define W5 12873 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
43 :			#define W6 8867 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
44 :			#define W7 4520 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
45 :			#define ROW_SHIFT 11
46 :			#define COL_SHIFT 20 // 6
47 :			#endif
48 :
49 :			#if defined(ARCH_IS_PPC)
50 :
51 :			/* signed 16x16 -> 32 multiply add accumulate */
52 :			#define MAC16(rt, ra, rb) \
53 :			asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
54 :
55 :			/* signed 16x16 -> 32 multiply */
56 :			#define MUL16(rt, ra, rb) \
57 :			asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
58 :
59 :			#else
60 :
61 :			/* signed 16x16 -> 32 multiply add accumulate */
62 :			#define MAC16(rt, ra, rb) rt += (ra) * (rb)
63 :
64 :			/* signed 16x16 -> 32 multiply */
65 :			#define MUL16(rt, ra, rb) rt = (ra) * (rb)
66 :
67 :			#endif
68 :
69 :			static __inline void idctRowCondDC (int16_t * const row)
70 :			{
71 :			int a0, a1, a2, a3, b0, b1, b2, b3;
72 :			#ifdef FAST_64BIT
73 :			uint64_t temp;
74 :			#else
75 :			uint32_t temp;
76 :			#endif
77 :
78 :			#ifdef FAST_64BIT
79 :			#ifdef ARCH_IS_BIG_ENDIAN
80 :			#define ROW0_MASK 0xffff000000000000LL
81 :			#else
82 :			#define ROW0_MASK 0xffffLL
83 :			#endif
84 :			if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) \|
85 :			((uint64_t *)row)[1]) == 0) {
86 :			temp = (row[0] << 3) & 0xffff;
87 :			temp += temp << 16;
88 :			temp += temp << 32;
89 :			((uint64_t *)row)[0] = temp;
90 :			((uint64_t *)row)[1] = temp;
91 :			return;
92 :			}
93 :			#else
94 :			if (!(((uint32_t*)row)[1] \|
95 :			((uint32_t*)row)[2] \|
96 :			((uint32_t*)row)[3] \|
97 :			row[1])) {
98 :			temp = (row[0] << 3) & 0xffff;
99 :			temp += temp << 16;
100 :			((uint32_t)row)[0]=((uint32_t)row)[1] =
101 :			((uint32_t)row)[2]=((uint32_t)row)[3] = temp;
102 :			return;
103 :			}
104 :			#endif
105 :
106 :			a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
107 :			a1 = a0;
108 :			a2 = a0;
109 :			a3 = a0;
110 :
111 :			/* no need to optimize : gcc does it */
112 :			a0 += W2 * row[2];
113 :			a1 += W6 * row[2];
114 :			a2 -= W6 * row[2];
115 :			a3 -= W2 * row[2];
116 :
117 :			MUL16(b0, W1, row[1]);
118 :			MAC16(b0, W3, row[3]);
119 :			MUL16(b1, W3, row[1]);
120 :			MAC16(b1, -W7, row[3]);
121 :			MUL16(b2, W5, row[1]);
122 :			MAC16(b2, -W1, row[3]);
123 :			MUL16(b3, W7, row[1]);
124 :			MAC16(b3, -W5, row[3]);
125 :
126 :			#ifdef FAST_64BIT
127 :			temp = ((uint64_t*)row)[1];
128 :			#else
129 :			temp = ((uint32_t)row)[2] \| ((uint32_t)row)[3];
130 :			#endif
131 :			if (temp != 0) {
132 :			a0 += W4row[4] + W6row[6];
133 :			a1 += - W4row[4] - W2row[6];
134 :			a2 += - W4row[4] + W2row[6];
135 :			a3 += W4row[4] - W6row[6];
136 :
137 :			MAC16(b0, W5, row[5]);
138 :			MAC16(b0, W7, row[7]);
139 :
140 :			MAC16(b1, -W1, row[5]);
141 :			MAC16(b1, -W5, row[7]);
142 :
143 :			MAC16(b2, W7, row[5]);
144 :			MAC16(b2, W3, row[7]);
145 :
146 :			MAC16(b3, W3, row[5]);
147 :			MAC16(b3, -W1, row[7]);
148 :			}
149 :
150 :			row[0] = (a0 + b0) >> ROW_SHIFT;
151 :			row[7] = (a0 - b0) >> ROW_SHIFT;
152 :			row[1] = (a1 + b1) >> ROW_SHIFT;
153 :			row[6] = (a1 - b1) >> ROW_SHIFT;
154 :			row[2] = (a2 + b2) >> ROW_SHIFT;
155 :			row[5] = (a2 - b2) >> ROW_SHIFT;
156 :			row[3] = (a3 + b3) >> ROW_SHIFT;
157 :			row[4] = (a3 - b3) >> ROW_SHIFT;
158 :			}
159 :
160 :
161 :			static __inline void idctSparseCol (int16_t * const col)
162 :			{
163 :			int a0, a1, a2, a3, b0, b1, b2, b3;
164 :
165 :			/* XXX: I did that only to give same values as previous code */
166 :			a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
167 :			a1 = a0;
168 :			a2 = a0;
169 :			a3 = a0;
170 :
171 :			a0 += + W2col[82];
172 :			a1 += + W6col[82];
173 :			a2 += - W6col[82];
174 :			a3 += - W2col[82];
175 :
176 :			MUL16(b0, W1, col[8*1]);
177 :			MUL16(b1, W3, col[8*1]);
178 :			MUL16(b2, W5, col[8*1]);
179 :			MUL16(b3, W7, col[8*1]);
180 :
181 :			MAC16(b0, + W3, col[8*3]);
182 :			MAC16(b1, - W7, col[8*3]);
183 :			MAC16(b2, - W1, col[8*3]);
184 :			MAC16(b3, - W5, col[8*3]);
185 :
186 :			if(col[8*4]){
187 :			a0 += + W4col[84];
188 :			a1 += - W4col[84];
189 :			a2 += - W4col[84];
190 :			a3 += + W4col[84];
191 :			}
192 :
193 :			if (col[8*5]) {
194 :			MAC16(b0, + W5, col[8*5]);
195 :			MAC16(b1, - W1, col[8*5]);
196 :			MAC16(b2, + W7, col[8*5]);
197 :			MAC16(b3, + W3, col[8*5]);
198 :			}
199 :
200 :			if(col[8*6]){
201 :			a0 += + W6col[86];
202 :			a1 += - W2col[86];
203 :			a2 += + W2col[86];
204 :			a3 += - W6col[86];
205 :			}
206 :
207 :			if (col[8*7]) {
208 :			MAC16(b0, + W7, col[8*7]);
209 :			MAC16(b1, - W5, col[8*7]);
210 :			MAC16(b2, + W3, col[8*7]);
211 :			MAC16(b3, - W1, col[8*7]);
212 :			}
213 :
214 :			col[0 ] = ((a0 + b0) >> COL_SHIFT);
215 :			col[8 ] = ((a1 + b1) >> COL_SHIFT);
216 :			col[16] = ((a2 + b2) >> COL_SHIFT);
217 :			col[24] = ((a3 + b3) >> COL_SHIFT);
218 :			col[32] = ((a3 - b3) >> COL_SHIFT);
219 :			col[40] = ((a2 - b2) >> COL_SHIFT);
220 :			col[48] = ((a1 - b1) >> COL_SHIFT);
221 :			col[56] = ((a0 - b0) >> COL_SHIFT);
222 :			}
223 :
224 :			void simple_idct_c(int16_t * const block)
225 :			{
226 :			int i;
227 :			for(i=0; i<8; i++)
228 :			idctRowCondDC(block + i*8);
229 :
230 :			for(i=0; i<8; i++)
231 :			idctSparseCol(block + i);
232 :			}
233 :
234 :
235 :			/* Input permutation for the simple_idct_mmx */
236 :			static const uint8_t simple_mmx_permutation[64]={
237 :			0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
238 :			0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
239 :			0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
240 :			0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
241 :			0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
242 :			0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
243 :			0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
244 :			0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
245 :			};
246 :
247 :			#if defined(ARCH_IS_IA32)
248 :			/* wrapper function, as simple_idct_mmx expects data to be permutated */
249 :			void simple_idct_mmx2(int16_t * const block)
250 :			{
251 :			int16_t tmp[64];
252 :			int i;
253 :
254 :			for(i=0;i<64;i++) tmp[simple_mmx_permutation[i]] = block[i];
255 :			simple_idct_mmx(tmp);
256 :			for(i=0;i<64;i++) block[i] = tmp[i];
257 :			}
258 :			#endif

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4