[cvs] / xvidcore / src / dct / simple_idct.c Repository:
ViewVC logotype

Diff of /xvidcore/src/dct/simple_idct.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Tue Jan 21 12:51:16 2003 UTC revision 1.1.2.3, Tue Feb 11 12:45:52 2003 UTC
# Line 0  Line 1 
1    /*
2     * Simple IDCT
3     *
4     * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5     *
6     * This library is free software; you can redistribute it and/or
7     * modify it under the terms of the GNU Lesser General Public
8     * License as published by the Free Software Foundation; either
9     * version 2 of the License, or (at your option) any later version.
10     *
11     * This library is distributed in the hope that it will be useful,
12     * but WITHOUT ANY WARRANTY; without even the implied warranty of
13     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14     * Lesser General Public License for more details.
15     *
16     * You should have received a copy of the GNU Lesser General Public
17     * License along with this library; if not, write to the Free Software
18     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19     */
20    /*
21      based upon some outcommented c code from mpeg2dec (idct_mmx.c
22      written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
23     */
24    #include "../portab.h"
25    #include "idct.h"
26    
27    #if 0
28    #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
29    #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
30    #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
31    #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
32    #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
33    #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
34    #define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
35    #define ROW_SHIFT 8
36    #define COL_SHIFT 17
37    #else
38    #define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
39    #define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
40    #define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
41    #define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
42    #define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
43    #define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
44    #define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
45    #define ROW_SHIFT 11
46    #define COL_SHIFT 20 // 6
47    #endif
48    
49    #if defined(ARCH_POWERPC_405)
50    
51    /* signed 16x16 -> 32 multiply add accumulate */
52    #define MAC16(rt, ra, rb) \
53        asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
54    
55    /* signed 16x16 -> 32 multiply */
56    #define MUL16(rt, ra, rb) \
57        asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
58    
59    #else
60    
61    /* signed 16x16 -> 32 multiply add accumulate */
62    #define MAC16(rt, ra, rb) rt += (ra) * (rb)
63    
64    /* signed 16x16 -> 32 multiply */
65    #define MUL16(rt, ra, rb) rt = (ra) * (rb)
66    
67    #endif
68    
69    static __inline void idctRowCondDC (int16_t * const row)
70    {
71            int a0, a1, a2, a3, b0, b1, b2, b3;
72    #ifdef FAST_64BIT
73            uint64_t temp;
74    #else
75            uint32_t temp;
76    #endif
77    
78    #ifdef FAST_64BIT
79    #ifdef WORDS_BIGENDIAN
80    #define ROW0_MASK 0xffff000000000000LL
81    #else
82    #define ROW0_MASK 0xffffLL
83    #endif
84            if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
85                  ((uint64_t *)row)[1]) == 0) {
86                temp = (row[0] << 3) & 0xffff;
87                temp += temp << 16;
88                temp += temp << 32;
89                ((uint64_t *)row)[0] = temp;
90                ((uint64_t *)row)[1] = temp;
91                return;
92            }
93    #else
94            if (!(((uint32_t*)row)[1] |
95                  ((uint32_t*)row)[2] |
96                  ((uint32_t*)row)[3] |
97                  row[1])) {
98                temp = (row[0] << 3) & 0xffff;
99                temp += temp << 16;
100                ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
101                    ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
102                    return;
103            }
104    #endif
105    
106            a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
107            a1 = a0;
108            a2 = a0;
109            a3 = a0;
110    
111            /* no need to optimize : gcc does it */
112            a0 += W2 * row[2];
113            a1 += W6 * row[2];
114            a2 -= W6 * row[2];
115            a3 -= W2 * row[2];
116    
117            MUL16(b0, W1, row[1]);
118            MAC16(b0, W3, row[3]);
119            MUL16(b1, W3, row[1]);
120            MAC16(b1, -W7, row[3]);
121            MUL16(b2, W5, row[1]);
122            MAC16(b2, -W1, row[3]);
123            MUL16(b3, W7, row[1]);
124            MAC16(b3, -W5, row[3]);
125    
126    #ifdef FAST_64BIT
127            temp = ((uint64_t*)row)[1];
128    #else
129            temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
130    #endif
131            if (temp != 0) {
132                a0 += W4*row[4] + W6*row[6];
133                a1 += - W4*row[4] - W2*row[6];
134                a2 += - W4*row[4] + W2*row[6];
135                a3 += W4*row[4] - W6*row[6];
136    
137                MAC16(b0, W5, row[5]);
138                MAC16(b0, W7, row[7]);
139    
140                MAC16(b1, -W1, row[5]);
141                MAC16(b1, -W5, row[7]);
142    
143                MAC16(b2, W7, row[5]);
144                MAC16(b2, W3, row[7]);
145    
146                MAC16(b3, W3, row[5]);
147                MAC16(b3, -W1, row[7]);
148            }
149    
150            row[0] = (a0 + b0) >> ROW_SHIFT;
151            row[7] = (a0 - b0) >> ROW_SHIFT;
152            row[1] = (a1 + b1) >> ROW_SHIFT;
153            row[6] = (a1 - b1) >> ROW_SHIFT;
154            row[2] = (a2 + b2) >> ROW_SHIFT;
155            row[5] = (a2 - b2) >> ROW_SHIFT;
156            row[3] = (a3 + b3) >> ROW_SHIFT;
157            row[4] = (a3 - b3) >> ROW_SHIFT;
158    }
159    
160    
161    static __inline void idctSparseCol (int16_t * const col)
162    {
163            int a0, a1, a2, a3, b0, b1, b2, b3;
164    
165            /* XXX: I did that only to give same values as previous code */
166            a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
167            a1 = a0;
168            a2 = a0;
169            a3 = a0;
170    
171            a0 +=  + W2*col[8*2];
172            a1 +=  + W6*col[8*2];
173            a2 +=  - W6*col[8*2];
174            a3 +=  - W2*col[8*2];
175    
176            MUL16(b0, W1, col[8*1]);
177            MUL16(b1, W3, col[8*1]);
178            MUL16(b2, W5, col[8*1]);
179            MUL16(b3, W7, col[8*1]);
180    
181            MAC16(b0, + W3, col[8*3]);
182            MAC16(b1, - W7, col[8*3]);
183            MAC16(b2, - W1, col[8*3]);
184            MAC16(b3, - W5, col[8*3]);
185    
186            if(col[8*4]){
187                a0 += + W4*col[8*4];
188                a1 += - W4*col[8*4];
189                a2 += - W4*col[8*4];
190                a3 += + W4*col[8*4];
191            }
192    
193            if (col[8*5]) {
194                MAC16(b0, + W5, col[8*5]);
195                MAC16(b1, - W1, col[8*5]);
196                MAC16(b2, + W7, col[8*5]);
197                MAC16(b3, + W3, col[8*5]);
198            }
199    
200            if(col[8*6]){
201                a0 += + W6*col[8*6];
202                a1 += - W2*col[8*6];
203                a2 += + W2*col[8*6];
204                a3 += - W6*col[8*6];
205            }
206    
207            if (col[8*7]) {
208                MAC16(b0, + W7, col[8*7]);
209                MAC16(b1, - W5, col[8*7]);
210                MAC16(b2, + W3, col[8*7]);
211                MAC16(b3, - W1, col[8*7]);
212            }
213    
214            col[0 ] = ((a0 + b0) >> COL_SHIFT);
215            col[8 ] = ((a1 + b1) >> COL_SHIFT);
216            col[16] = ((a2 + b2) >> COL_SHIFT);
217            col[24] = ((a3 + b3) >> COL_SHIFT);
218            col[32] = ((a3 - b3) >> COL_SHIFT);
219            col[40] = ((a2 - b2) >> COL_SHIFT);
220            col[48] = ((a1 - b1) >> COL_SHIFT);
221            col[56] = ((a0 - b0) >> COL_SHIFT);
222    }
223    
224    void simple_idct_c(int16_t * const block)
225    {
226        int i;
227        for(i=0; i<8; i++)
228            idctRowCondDC(block + i*8);
229    
230        for(i=0; i<8; i++)
231            idctSparseCol(block + i);
232    }
233    
234    
235    /* Input permutation for the simple_idct_mmx */
236    static const uint8_t simple_mmx_permutation[64]={
237            0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
238            0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
239            0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
240            0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
241            0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
242            0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
243            0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
244            0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
245    };
246    
247    #if defined(ARCH_X86)
248    /* wrapper function, as simple_idct_mmx expects data to be permutated */
249    void simple_idct_mmx2(int16_t * const block)
250    {
251        int16_t tmp[64];
252        int i;
253    
254        for(i=0;i<64;i++) tmp[simple_mmx_permutation[i]] = block[i];
255        simple_idct_mmx(tmp);
256        for(i=0;i<64;i++) block[i] = tmp[i];
257    }
258    #endif

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.1.2.3

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4