Annotation of /xvidcore/src/dct/ppc_asm/idct_altivec.c

Revision 1.1 - (view) (download)

1 :	edgomez	1.1	/*
2 :			* Copyright (c) 2001 Michel Lespinasse
3 :			*
4 :			* This library is free software; you can redistribute it and/or
5 :			* modify it under the terms of the GNU Lesser General Public
6 :			* License as published by the Free Software Foundation; either
7 :			* version 2 of the License, or (at your option) any later version.
8 :			*
9 :			* This library is distributed in the hope that it will be useful,
10 :			* but WITHOUT ANY WARRANTY; without even the implied warranty of
11 :			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 :			* Lesser General Public License for more details.
13 :			*
14 :			* You should have received a copy of the GNU Lesser General Public
15 :			* License along with this library; if not, write to the Free Software
16 :			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :			*
18 :			*/
19 :
20 :			/*
21 :			* XviD integration by Christoph NŠgeli <chn@kbw.ch>
22 :			*
23 :			* This file is a direct copy of the altivec idct module from the libmpeg2
24 :			* project with some minor changes to fit in XviD.
25 :			*/
26 :
27 :
28 :			#ifdef HAVE_ALTIVEC_H
29 :			#include <altivec.h>
30 :			#endif
31 :
32 :			#include "../../portab.h"
33 :
34 :			#define IDCT_Vectors \
35 :			vector signed short vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
36 :			vector signed short vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
37 :			vector signed short a0, a1, a2, ma2, c4, mc4, zero, bias; \
38 :			vector signed short t0, t1, t2, t3, t4, t5, t6, t7, t8; \
39 :			vector unsigned short shift
40 :
41 :			static const vector signed short constants [5] = {
42 :			(vector signed short)AVV(23170, 13573, 6518, 21895, -23170, -21895, 32, 31),
43 :			(vector signed short)AVV(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725),
44 :			(vector signed short)AVV(16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289),
45 :			(vector signed short)AVV(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692),
46 :			(vector signed short)AVV(13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895)
47 :			};
48 :
49 :			#define IDCT()\
50 :			c4 = vec_splat (constants[0], 0); \
51 :			a0 = vec_splat (constants[0], 1); \
52 :			a1 = vec_splat (constants[0], 2); \
53 :			a2 = vec_splat (constants[0], 3); \
54 :			mc4 = vec_splat (constants[0], 4); \
55 :			ma2 = vec_splat (constants[0], 5); \
56 :			bias = (vector signed short)vec_splat((vector signed int)constants[0], 3); \
57 :			\
58 :			zero = vec_splat_s16 (0); \
59 :			\
60 :			vx0 = vec_adds (block[0], block[4]); \
61 :			vx4 = vec_subs (block[0], block[4]); \
62 :			t5 = vec_mradds (vx0, constants[1], zero); \
63 :			t0 = vec_mradds (vx4, constants[1], zero); \
64 :			\
65 :			vx1 = vec_mradds (a1, block[7], block[1]); \
66 :			vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7])); \
67 :			t1 = vec_mradds (vx1, constants[2], zero); \
68 :			t8 = vec_mradds (vx7, constants[2], zero); \
69 :			\
70 :			vx2 = vec_mradds (a0, block[6], block[2]); \
71 :			vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6])); \
72 :			t2 = vec_mradds (vx2, constants[3], zero); \
73 :			t4 = vec_mradds (vx6, constants[3], zero); \
74 :			\
75 :			vx3 = vec_mradds (block[3], constants[4], zero); \
76 :			vx5 = vec_mradds (block[5], constants[4], zero); \
77 :			t7 = vec_mradds (a2, vx5, vx3); \
78 :			t3 = vec_mradds (ma2, vx3, vx5); \
79 :			\
80 :			t6 = vec_adds (t8, t3); \
81 :			t3 = vec_subs (t8, t3); \
82 :			t8 = vec_subs (t1, t7); \
83 :			t1 = vec_adds (t1, t7); \
84 :			t6 = vec_mradds (a0, t6, t6); \
85 :			t1 = vec_mradds (a0, t1, t1); \
86 :			\
87 :			t7 = vec_adds (t5, t2); \
88 :			t2 = vec_subs (t5, t2); \
89 :			t5 = vec_adds (t0, t4); \
90 :			t0 = vec_subs (t0, t4); \
91 :			t4 = vec_subs (t8, t3); \
92 :			t3 = vec_adds (t8, t3); \
93 :			\
94 :			vy0 = vec_adds (t7, t1); \
95 :			vy7 = vec_subs (t7, t1); \
96 :			vy1 = vec_adds (t5, t3); \
97 :			vy6 = vec_subs (t5, t3); \
98 :			vy2 = vec_adds (t0, t4); \
99 :			vy5 = vec_subs (t0, t4); \
100 :			vy3 = vec_adds (t2, t6); \
101 :			vy4 = vec_subs (t2, t6); \
102 :			\
103 :			vx0 = vec_mergeh (vy0, vy4); \
104 :			vx1 = vec_mergel (vy0, vy4); \
105 :			vx2 = vec_mergeh (vy1, vy5); \
106 :			vx3 = vec_mergel (vy1, vy5); \
107 :			vx4 = vec_mergeh (vy2, vy6); \
108 :			vx5 = vec_mergel (vy2, vy6); \
109 :			vx6 = vec_mergeh (vy3, vy7); \
110 :			vx7 = vec_mergel (vy3, vy7); \
111 :			\
112 :			vy0 = vec_mergeh (vx0, vx4); \
113 :			vy1 = vec_mergel (vx0, vx4); \
114 :			vy2 = vec_mergeh (vx1, vx5); \
115 :			vy3 = vec_mergel (vx1, vx5); \
116 :			vy4 = vec_mergeh (vx2, vx6); \
117 :			vy5 = vec_mergel (vx2, vx6); \
118 :			vy6 = vec_mergeh (vx3, vx7); \
119 :			vy7 = vec_mergel (vx3, vx7); \
120 :			\
121 :			vx0 = vec_mergeh (vy0, vy4); \
122 :			vx1 = vec_mergel (vy0, vy4); \
123 :			vx2 = vec_mergeh (vy1, vy5); \
124 :			vx3 = vec_mergel (vy1, vy5); \
125 :			vx4 = vec_mergeh (vy2, vy6); \
126 :			vx5 = vec_mergel (vy2, vy6); \
127 :			vx6 = vec_mergeh (vy3, vy7); \
128 :			vx7 = vec_mergel (vy3, vy7); \
129 :			\
130 :			vx0 = vec_adds (vx0, bias); \
131 :			t5 = vec_adds (vx0, vx4); \
132 :			t0 = vec_subs (vx0, vx4); \
133 :			\
134 :			t1 = vec_mradds (a1, vx7, vx1); \
135 :			t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \
136 :			\
137 :			t2 = vec_mradds (a0, vx6, vx2); \
138 :			t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \
139 :			\
140 :			t7 = vec_mradds (a2, vx5, vx3); \
141 :			t3 = vec_mradds (ma2, vx3, vx5); \
142 :			\
143 :			t6 = vec_adds (t8, t3); \
144 :			t3 = vec_subs (t8, t3); \
145 :			t8 = vec_subs (t1, t7); \
146 :			t1 = vec_adds (t1, t7); \
147 :			\
148 :			t7 = vec_adds (t5, t2); \
149 :			t2 = vec_subs (t5, t2); \
150 :			t5 = vec_adds (t0, t4); \
151 :			t0 = vec_subs (t0, t4); \
152 :			t4 = vec_subs (t8, t3); \
153 :			t3 = vec_adds (t8, t3); \
154 :			\
155 :			vy0 = vec_adds (t7, t1); \
156 :			vy7 = vec_subs (t7, t1); \
157 :			vy1 = vec_mradds (c4, t3, t5); \
158 :			vy6 = vec_mradds (mc4, t3, t5); \
159 :			vy2 = vec_mradds (c4, t4, t0); \
160 :			vy5 = vec_mradds (mc4, t4, t0); \
161 :			vy3 = vec_adds (t2, t6); \
162 :			vy4 = vec_subs (t2, t6); \
163 :			\
164 :			shift = vec_splat_u16 (6); \
165 :			vx0 = vec_sra (vy0, shift); \
166 :			vx1 = vec_sra (vy1, shift); \
167 :			vx2 = vec_sra (vy2, shift); \
168 :			vx3 = vec_sra (vy3, shift); \
169 :			vx4 = vec_sra (vy4, shift); \
170 :			vx5 = vec_sra (vy5, shift); \
171 :			vx6 = vec_sra (vy6, shift); \
172 :			vx7 = vec_sra (vy7, shift)
173 :
174 :			void
175 :			idct_altivec_c(vector short *const block)
176 :			{
177 :			int i;
178 :			int j;
179 :			short block2[64];
180 :			short *block_ptr;
181 :			IDCT_Vectors;
182 :
183 :			block_ptr = (short*)block;
184 :			for (i = 0; i < 64; i++)
185 :			block2[i] = block_ptr[i];
186 :
187 :			for (i = 0; i < 8; i++)
188 :			for (j = 0; j < 8; j++)
189 :			block_ptr[i+8j] = block2[j+8i] << 4;
190 :
191 :			IDCT();
192 :
193 :			block[0] = vx0;
194 :			block[1] = vx1;
195 :			block[2] = vx2;
196 :			block[3] = vx3;
197 :			block[4] = vx4;
198 :			block[5] = vx5;
199 :			block[6] = vx6;
200 :			block[7] = vx7;
201 :			}

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4