Parent Directory | Revision Log
Revision 1.1 - (view) (download)
1 : | edgomez | 1.1 | /* |
2 : | * Copyright (c) 2001 Michel Lespinasse | ||
3 : | * | ||
4 : | * This library is free software; you can redistribute it and/or | ||
5 : | * modify it under the terms of the GNU Lesser General Public | ||
6 : | * License as published by the Free Software Foundation; either | ||
7 : | * version 2 of the License, or (at your option) any later version. | ||
8 : | * | ||
9 : | * This library is distributed in the hope that it will be useful, | ||
10 : | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 : | * Lesser General Public License for more details. | ||
13 : | * | ||
14 : | * You should have received a copy of the GNU Lesser General Public | ||
15 : | * License along with this library; if not, write to the Free Software | ||
16 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 : | * | ||
18 : | */ | ||
19 : | |||
20 : | /* | ||
21 : | * XviD integration by Christoph NŠgeli <chn@kbw.ch> | ||
22 : | * | ||
23 : | * This file is a direct copy of the altivec idct module from the libmpeg2 | ||
24 : | * project with some minor changes to fit in XviD. | ||
25 : | */ | ||
26 : | |||
27 : | |||
28 : | #ifdef HAVE_ALTIVEC_H | ||
29 : | #include <altivec.h> | ||
30 : | #endif | ||
31 : | |||
32 : | #include "../../portab.h" | ||
33 : | |||
34 : | #define IDCT_Vectors \ | ||
35 : | vector signed short vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ | ||
36 : | vector signed short vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ | ||
37 : | vector signed short a0, a1, a2, ma2, c4, mc4, zero, bias; \ | ||
38 : | vector signed short t0, t1, t2, t3, t4, t5, t6, t7, t8; \ | ||
39 : | vector unsigned short shift | ||
40 : | |||
41 : | static const vector signed short constants [5] = { | ||
42 : | (vector signed short)AVV(23170, 13573, 6518, 21895, -23170, -21895, 32, 31), | ||
43 : | (vector signed short)AVV(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725), | ||
44 : | (vector signed short)AVV(16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289), | ||
45 : | (vector signed short)AVV(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692), | ||
46 : | (vector signed short)AVV(13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895) | ||
47 : | }; | ||
48 : | |||
49 : | #define IDCT()\ | ||
50 : | c4 = vec_splat (constants[0], 0); \ | ||
51 : | a0 = vec_splat (constants[0], 1); \ | ||
52 : | a1 = vec_splat (constants[0], 2); \ | ||
53 : | a2 = vec_splat (constants[0], 3); \ | ||
54 : | mc4 = vec_splat (constants[0], 4); \ | ||
55 : | ma2 = vec_splat (constants[0], 5); \ | ||
56 : | bias = (vector signed short)vec_splat((vector signed int)constants[0], 3); \ | ||
57 : | \ | ||
58 : | zero = vec_splat_s16 (0); \ | ||
59 : | \ | ||
60 : | vx0 = vec_adds (block[0], block[4]); \ | ||
61 : | vx4 = vec_subs (block[0], block[4]); \ | ||
62 : | t5 = vec_mradds (vx0, constants[1], zero); \ | ||
63 : | t0 = vec_mradds (vx4, constants[1], zero); \ | ||
64 : | \ | ||
65 : | vx1 = vec_mradds (a1, block[7], block[1]); \ | ||
66 : | vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7])); \ | ||
67 : | t1 = vec_mradds (vx1, constants[2], zero); \ | ||
68 : | t8 = vec_mradds (vx7, constants[2], zero); \ | ||
69 : | \ | ||
70 : | vx2 = vec_mradds (a0, block[6], block[2]); \ | ||
71 : | vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6])); \ | ||
72 : | t2 = vec_mradds (vx2, constants[3], zero); \ | ||
73 : | t4 = vec_mradds (vx6, constants[3], zero); \ | ||
74 : | \ | ||
75 : | vx3 = vec_mradds (block[3], constants[4], zero); \ | ||
76 : | vx5 = vec_mradds (block[5], constants[4], zero); \ | ||
77 : | t7 = vec_mradds (a2, vx5, vx3); \ | ||
78 : | t3 = vec_mradds (ma2, vx3, vx5); \ | ||
79 : | \ | ||
80 : | t6 = vec_adds (t8, t3); \ | ||
81 : | t3 = vec_subs (t8, t3); \ | ||
82 : | t8 = vec_subs (t1, t7); \ | ||
83 : | t1 = vec_adds (t1, t7); \ | ||
84 : | t6 = vec_mradds (a0, t6, t6); \ | ||
85 : | t1 = vec_mradds (a0, t1, t1); \ | ||
86 : | \ | ||
87 : | t7 = vec_adds (t5, t2); \ | ||
88 : | t2 = vec_subs (t5, t2); \ | ||
89 : | t5 = vec_adds (t0, t4); \ | ||
90 : | t0 = vec_subs (t0, t4); \ | ||
91 : | t4 = vec_subs (t8, t3); \ | ||
92 : | t3 = vec_adds (t8, t3); \ | ||
93 : | \ | ||
94 : | vy0 = vec_adds (t7, t1); \ | ||
95 : | vy7 = vec_subs (t7, t1); \ | ||
96 : | vy1 = vec_adds (t5, t3); \ | ||
97 : | vy6 = vec_subs (t5, t3); \ | ||
98 : | vy2 = vec_adds (t0, t4); \ | ||
99 : | vy5 = vec_subs (t0, t4); \ | ||
100 : | vy3 = vec_adds (t2, t6); \ | ||
101 : | vy4 = vec_subs (t2, t6); \ | ||
102 : | \ | ||
103 : | vx0 = vec_mergeh (vy0, vy4); \ | ||
104 : | vx1 = vec_mergel (vy0, vy4); \ | ||
105 : | vx2 = vec_mergeh (vy1, vy5); \ | ||
106 : | vx3 = vec_mergel (vy1, vy5); \ | ||
107 : | vx4 = vec_mergeh (vy2, vy6); \ | ||
108 : | vx5 = vec_mergel (vy2, vy6); \ | ||
109 : | vx6 = vec_mergeh (vy3, vy7); \ | ||
110 : | vx7 = vec_mergel (vy3, vy7); \ | ||
111 : | \ | ||
112 : | vy0 = vec_mergeh (vx0, vx4); \ | ||
113 : | vy1 = vec_mergel (vx0, vx4); \ | ||
114 : | vy2 = vec_mergeh (vx1, vx5); \ | ||
115 : | vy3 = vec_mergel (vx1, vx5); \ | ||
116 : | vy4 = vec_mergeh (vx2, vx6); \ | ||
117 : | vy5 = vec_mergel (vx2, vx6); \ | ||
118 : | vy6 = vec_mergeh (vx3, vx7); \ | ||
119 : | vy7 = vec_mergel (vx3, vx7); \ | ||
120 : | \ | ||
121 : | vx0 = vec_mergeh (vy0, vy4); \ | ||
122 : | vx1 = vec_mergel (vy0, vy4); \ | ||
123 : | vx2 = vec_mergeh (vy1, vy5); \ | ||
124 : | vx3 = vec_mergel (vy1, vy5); \ | ||
125 : | vx4 = vec_mergeh (vy2, vy6); \ | ||
126 : | vx5 = vec_mergel (vy2, vy6); \ | ||
127 : | vx6 = vec_mergeh (vy3, vy7); \ | ||
128 : | vx7 = vec_mergel (vy3, vy7); \ | ||
129 : | \ | ||
130 : | vx0 = vec_adds (vx0, bias); \ | ||
131 : | t5 = vec_adds (vx0, vx4); \ | ||
132 : | t0 = vec_subs (vx0, vx4); \ | ||
133 : | \ | ||
134 : | t1 = vec_mradds (a1, vx7, vx1); \ | ||
135 : | t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ | ||
136 : | \ | ||
137 : | t2 = vec_mradds (a0, vx6, vx2); \ | ||
138 : | t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ | ||
139 : | \ | ||
140 : | t7 = vec_mradds (a2, vx5, vx3); \ | ||
141 : | t3 = vec_mradds (ma2, vx3, vx5); \ | ||
142 : | \ | ||
143 : | t6 = vec_adds (t8, t3); \ | ||
144 : | t3 = vec_subs (t8, t3); \ | ||
145 : | t8 = vec_subs (t1, t7); \ | ||
146 : | t1 = vec_adds (t1, t7); \ | ||
147 : | \ | ||
148 : | t7 = vec_adds (t5, t2); \ | ||
149 : | t2 = vec_subs (t5, t2); \ | ||
150 : | t5 = vec_adds (t0, t4); \ | ||
151 : | t0 = vec_subs (t0, t4); \ | ||
152 : | t4 = vec_subs (t8, t3); \ | ||
153 : | t3 = vec_adds (t8, t3); \ | ||
154 : | \ | ||
155 : | vy0 = vec_adds (t7, t1); \ | ||
156 : | vy7 = vec_subs (t7, t1); \ | ||
157 : | vy1 = vec_mradds (c4, t3, t5); \ | ||
158 : | vy6 = vec_mradds (mc4, t3, t5); \ | ||
159 : | vy2 = vec_mradds (c4, t4, t0); \ | ||
160 : | vy5 = vec_mradds (mc4, t4, t0); \ | ||
161 : | vy3 = vec_adds (t2, t6); \ | ||
162 : | vy4 = vec_subs (t2, t6); \ | ||
163 : | \ | ||
164 : | shift = vec_splat_u16 (6); \ | ||
165 : | vx0 = vec_sra (vy0, shift); \ | ||
166 : | vx1 = vec_sra (vy1, shift); \ | ||
167 : | vx2 = vec_sra (vy2, shift); \ | ||
168 : | vx3 = vec_sra (vy3, shift); \ | ||
169 : | vx4 = vec_sra (vy4, shift); \ | ||
170 : | vx5 = vec_sra (vy5, shift); \ | ||
171 : | vx6 = vec_sra (vy6, shift); \ | ||
172 : | vx7 = vec_sra (vy7, shift) | ||
173 : | |||
174 : | void | ||
175 : | idct_altivec_c(vector short *const block) | ||
176 : | { | ||
177 : | int i; | ||
178 : | int j; | ||
179 : | short block2[64]; | ||
180 : | short *block_ptr; | ||
181 : | IDCT_Vectors; | ||
182 : | |||
183 : | block_ptr = (short*)block; | ||
184 : | for (i = 0; i < 64; i++) | ||
185 : | block2[i] = block_ptr[i]; | ||
186 : | |||
187 : | for (i = 0; i < 8; i++) | ||
188 : | for (j = 0; j < 8; j++) | ||
189 : | block_ptr[i+8*j] = block2[j+8*i] << 4; | ||
190 : | |||
191 : | IDCT(); | ||
192 : | |||
193 : | block[0] = vx0; | ||
194 : | block[1] = vx1; | ||
195 : | block[2] = vx2; | ||
196 : | block[3] = vx3; | ||
197 : | block[4] = vx4; | ||
198 : | block[5] = vx5; | ||
199 : | block[6] = vx6; | ||
200 : | block[7] = vx7; | ||
201 : | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |