[cvs] / xvidcore / src / dct / ppc_asm / idct_altivec.c Repository:
ViewVC logotype

Annotation of /xvidcore/src/dct/ppc_asm/idct_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download)

1 : edgomez 1.1 /*
2 :     * Copyright (c) 2001 Michel Lespinasse
3 :     *
4 :     * This library is free software; you can redistribute it and/or
5 :     * modify it under the terms of the GNU Lesser General Public
6 :     * License as published by the Free Software Foundation; either
7 :     * version 2 of the License, or (at your option) any later version.
8 :     *
9 :     * This library is distributed in the hope that it will be useful,
10 :     * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 :     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 :     * Lesser General Public License for more details.
13 :     *
14 :     * You should have received a copy of the GNU Lesser General Public
15 :     * License along with this library; if not, write to the Free Software
16 :     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :     *
18 :     */
19 :    
20 :     /*
21 :     * XviD integration by Christoph NŠgeli <chn@kbw.ch>
22 :     *
23 :     * This file is a direct copy of the altivec idct module from the libmpeg2
24 :     * project with some minor changes to fit in XviD.
25 :     */
26 :    
27 :    
28 :     #ifdef HAVE_ALTIVEC_H
29 :     #include <altivec.h>
30 :     #endif
31 :    
32 :     #include "../../portab.h"
33 :    
34 :     #define IDCT_Vectors \
35 :     vector signed short vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
36 :     vector signed short vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
37 :     vector signed short a0, a1, a2, ma2, c4, mc4, zero, bias; \
38 :     vector signed short t0, t1, t2, t3, t4, t5, t6, t7, t8; \
39 :     vector unsigned short shift
40 :    
41 :     static const vector signed short constants [5] = {
42 :     (vector signed short)AVV(23170, 13573, 6518, 21895, -23170, -21895, 32, 31),
43 :     (vector signed short)AVV(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725),
44 :     (vector signed short)AVV(16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289),
45 :     (vector signed short)AVV(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692),
46 :     (vector signed short)AVV(13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895)
47 :     };
48 :    
49 :     #define IDCT()\
50 :     c4 = vec_splat (constants[0], 0); \
51 :     a0 = vec_splat (constants[0], 1); \
52 :     a1 = vec_splat (constants[0], 2); \
53 :     a2 = vec_splat (constants[0], 3); \
54 :     mc4 = vec_splat (constants[0], 4); \
55 :     ma2 = vec_splat (constants[0], 5); \
56 :     bias = (vector signed short)vec_splat((vector signed int)constants[0], 3); \
57 :     \
58 :     zero = vec_splat_s16 (0); \
59 :     \
60 :     vx0 = vec_adds (block[0], block[4]); \
61 :     vx4 = vec_subs (block[0], block[4]); \
62 :     t5 = vec_mradds (vx0, constants[1], zero); \
63 :     t0 = vec_mradds (vx4, constants[1], zero); \
64 :     \
65 :     vx1 = vec_mradds (a1, block[7], block[1]); \
66 :     vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7])); \
67 :     t1 = vec_mradds (vx1, constants[2], zero); \
68 :     t8 = vec_mradds (vx7, constants[2], zero); \
69 :     \
70 :     vx2 = vec_mradds (a0, block[6], block[2]); \
71 :     vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6])); \
72 :     t2 = vec_mradds (vx2, constants[3], zero); \
73 :     t4 = vec_mradds (vx6, constants[3], zero); \
74 :     \
75 :     vx3 = vec_mradds (block[3], constants[4], zero); \
76 :     vx5 = vec_mradds (block[5], constants[4], zero); \
77 :     t7 = vec_mradds (a2, vx5, vx3); \
78 :     t3 = vec_mradds (ma2, vx3, vx5); \
79 :     \
80 :     t6 = vec_adds (t8, t3); \
81 :     t3 = vec_subs (t8, t3); \
82 :     t8 = vec_subs (t1, t7); \
83 :     t1 = vec_adds (t1, t7); \
84 :     t6 = vec_mradds (a0, t6, t6); \
85 :     t1 = vec_mradds (a0, t1, t1); \
86 :     \
87 :     t7 = vec_adds (t5, t2); \
88 :     t2 = vec_subs (t5, t2); \
89 :     t5 = vec_adds (t0, t4); \
90 :     t0 = vec_subs (t0, t4); \
91 :     t4 = vec_subs (t8, t3); \
92 :     t3 = vec_adds (t8, t3); \
93 :     \
94 :     vy0 = vec_adds (t7, t1); \
95 :     vy7 = vec_subs (t7, t1); \
96 :     vy1 = vec_adds (t5, t3); \
97 :     vy6 = vec_subs (t5, t3); \
98 :     vy2 = vec_adds (t0, t4); \
99 :     vy5 = vec_subs (t0, t4); \
100 :     vy3 = vec_adds (t2, t6); \
101 :     vy4 = vec_subs (t2, t6); \
102 :     \
103 :     vx0 = vec_mergeh (vy0, vy4); \
104 :     vx1 = vec_mergel (vy0, vy4); \
105 :     vx2 = vec_mergeh (vy1, vy5); \
106 :     vx3 = vec_mergel (vy1, vy5); \
107 :     vx4 = vec_mergeh (vy2, vy6); \
108 :     vx5 = vec_mergel (vy2, vy6); \
109 :     vx6 = vec_mergeh (vy3, vy7); \
110 :     vx7 = vec_mergel (vy3, vy7); \
111 :     \
112 :     vy0 = vec_mergeh (vx0, vx4); \
113 :     vy1 = vec_mergel (vx0, vx4); \
114 :     vy2 = vec_mergeh (vx1, vx5); \
115 :     vy3 = vec_mergel (vx1, vx5); \
116 :     vy4 = vec_mergeh (vx2, vx6); \
117 :     vy5 = vec_mergel (vx2, vx6); \
118 :     vy6 = vec_mergeh (vx3, vx7); \
119 :     vy7 = vec_mergel (vx3, vx7); \
120 :     \
121 :     vx0 = vec_mergeh (vy0, vy4); \
122 :     vx1 = vec_mergel (vy0, vy4); \
123 :     vx2 = vec_mergeh (vy1, vy5); \
124 :     vx3 = vec_mergel (vy1, vy5); \
125 :     vx4 = vec_mergeh (vy2, vy6); \
126 :     vx5 = vec_mergel (vy2, vy6); \
127 :     vx6 = vec_mergeh (vy3, vy7); \
128 :     vx7 = vec_mergel (vy3, vy7); \
129 :     \
130 :     vx0 = vec_adds (vx0, bias); \
131 :     t5 = vec_adds (vx0, vx4); \
132 :     t0 = vec_subs (vx0, vx4); \
133 :     \
134 :     t1 = vec_mradds (a1, vx7, vx1); \
135 :     t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \
136 :     \
137 :     t2 = vec_mradds (a0, vx6, vx2); \
138 :     t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \
139 :     \
140 :     t7 = vec_mradds (a2, vx5, vx3); \
141 :     t3 = vec_mradds (ma2, vx3, vx5); \
142 :     \
143 :     t6 = vec_adds (t8, t3); \
144 :     t3 = vec_subs (t8, t3); \
145 :     t8 = vec_subs (t1, t7); \
146 :     t1 = vec_adds (t1, t7); \
147 :     \
148 :     t7 = vec_adds (t5, t2); \
149 :     t2 = vec_subs (t5, t2); \
150 :     t5 = vec_adds (t0, t4); \
151 :     t0 = vec_subs (t0, t4); \
152 :     t4 = vec_subs (t8, t3); \
153 :     t3 = vec_adds (t8, t3); \
154 :     \
155 :     vy0 = vec_adds (t7, t1); \
156 :     vy7 = vec_subs (t7, t1); \
157 :     vy1 = vec_mradds (c4, t3, t5); \
158 :     vy6 = vec_mradds (mc4, t3, t5); \
159 :     vy2 = vec_mradds (c4, t4, t0); \
160 :     vy5 = vec_mradds (mc4, t4, t0); \
161 :     vy3 = vec_adds (t2, t6); \
162 :     vy4 = vec_subs (t2, t6); \
163 :     \
164 :     shift = vec_splat_u16 (6); \
165 :     vx0 = vec_sra (vy0, shift); \
166 :     vx1 = vec_sra (vy1, shift); \
167 :     vx2 = vec_sra (vy2, shift); \
168 :     vx3 = vec_sra (vy3, shift); \
169 :     vx4 = vec_sra (vy4, shift); \
170 :     vx5 = vec_sra (vy5, shift); \
171 :     vx6 = vec_sra (vy6, shift); \
172 :     vx7 = vec_sra (vy7, shift)
173 :    
174 :     void
175 :     idct_altivec_c(vector short *const block)
176 :     {
177 :     int i;
178 :     int j;
179 :     short block2[64];
180 :     short *block_ptr;
181 :     IDCT_Vectors;
182 :    
183 :     block_ptr = (short*)block;
184 :     for (i = 0; i < 64; i++)
185 :     block2[i] = block_ptr[i];
186 :    
187 :     for (i = 0; i < 8; i++)
188 :     for (j = 0; j < 8; j++)
189 :     block_ptr[i+8*j] = block2[j+8*i] << 4;
190 :    
191 :     IDCT();
192 :    
193 :     block[0] = vx0;
194 :     block[1] = vx1;
195 :     block[2] = vx2;
196 :     block[3] = vx3;
197 :     block[4] = vx4;
198 :     block[5] = vx5;
199 :     block[6] = vx6;
200 :     block[7] = vx7;
201 :     }

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4