Annotation of /xvidcore/src/motion/ppc_asm/sad_altivec.c

Revision 1.5 - (view) (download)

1 :	chl	1.4	/*****************************************************************************
2 :			*
3 :			* XVID MPEG-4 VIDEO CODEC
4 :			* - altivec sum of absolute difference (C version)
5 :			*
6 :			* Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
7 :			*
8 :	edgomez	1.5	* This file is part of XviD, a free MPEG-4 video encoder/decoder
9 :	chl	1.4	*
10 :	edgomez	1.5	* XviD is free software; you can redistribute it and/or modify it
11 :			* under the terms of the GNU General Public License as published by
12 :	chl	1.4	* the Free Software Foundation; either version 2 of the License, or
13 :			* (at your option) any later version.
14 :			*
15 :			* This program is distributed in the hope that it will be useful,
16 :			* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 :			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 :			* GNU General Public License for more details.
19 :			*
20 :			* You should have received a copy of the GNU General Public License
21 :			* along with this program; if not, write to the Free Software
22 :			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 :			*
24 :	edgomez	1.5	* Under section 8 of the GNU General Public License, the copyright
25 :			* holders of XVID explicitly forbid distribution in the following
26 :			* countries:
27 :			*
28 :			* - Japan
29 :			* - United States of America
30 :			*
31 :			* Linking XviD statically or dynamically with other modules is making a
32 :			* combined work based on XviD. Thus, the terms and conditions of the
33 :			* GNU General Public License cover the whole combination.
34 :			*
35 :			* As a special exception, the copyright holders of XviD give you
36 :			* permission to link XviD with independent modules that communicate with
37 :			* XviD solely through the VFW1.1 and DShow interfaces, regardless of the
38 :			* license terms of these independent modules, and to copy and distribute
39 :			* the resulting combined work under terms of your choice, provided that
40 :			* every copy of the combined work is accompanied by a complete copy of
41 :			* the source code of XviD (the version of XviD used to produce the
42 :			* combined work), being distributed under the terms of the GNU General
43 :			* Public License plus this exception. An independent module is a module
44 :			* which is not derived from or based on XviD.
45 :			*
46 :			* Note that people who make modified versions of XviD are not obligated
47 :			* to grant this special exception for their modified versions; it is
48 :			* their choice whether to do so. The GNU General Public License gives
49 :			* permission to release a modified version without this exception; this
50 :			* exception also makes it possible to release a modified version which
51 :			* carries forward this exception.
52 :			*
53 :			* $Id$
54 :	chl	1.4	*
55 :			****************************************************************************/
56 :	canard	1.1
57 :	canard	1.2	#define G_REG
58 :
59 :			#ifdef G_REG
60 :	edgomez	1.3	register vector unsigned char perm0 asm("%v29");
61 :			register vector unsigned char perm1 asm("%v30");
62 :			register vector unsigned int zerovec asm("%v31");
63 :	canard	1.2	#endif
64 :
65 :	canard	1.1	#include <stdio.h>
66 :
67 :			#undef DEBUG
68 :
69 :	canard	1.2	static const vector unsigned char perms[2] = {
70 :	edgomez	1.3	(vector unsigned char) ( /* Used when cur is aligned */
71 :			0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
72 :			0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17),
73 :			(vector unsigned char) ( /* Used when cur is unaligned */
74 :			0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
75 :			0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f),
76 :	canard	1.2	};
77 :
78 :			#ifdef G_REG
79 :	edgomez	1.3	void
80 :			sadInit_altivec(void)
81 :	canard	1.2	{
82 :			perm0 = perms[0];
83 :			perm1 = perms[1];
84 :	edgomez	1.3	zerovec = (vector unsigned int) (0);
85 :	canard	1.2	}
86 :	edgomez	1.3	static inline const vector unsigned char
87 :			get_perm(unsigned long i)
88 :	canard	1.2	{
89 :			return i ? perm1 : perm0;
90 :			}
91 :	edgomez	1.3
92 :	canard	1.2	#define ZERODEF
93 :			#define ZEROVEC zerovec
94 :			#else
95 :	edgomez	1.3	void
96 :			sadInit_altivec(void)
97 :			{
98 :			}
99 :			static inline const vector unsigned char
100 :			get_perm(unsigned long i)
101 :	canard	1.2	{
102 :			return perms[i];
103 :			}
104 :	edgomez	1.3
105 :	canard	1.2	#define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)
106 :			#define ZEROVEC zerovec
107 :			#endif
108 :
109 :
110 :	canard	1.1	#define SAD16() \
111 :			t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \
112 :			t2 = vec_max(t1, cur); / find largest of two */ \
113 :			t3 = vec_min(t1, cur); / find smaller of two */ \
114 :			t4 = vec_sub(t2, t3); /* find absolute difference */ \
115 :			sad = vec_sum4s(t4, sad); /* accumulate sum of differences */ \
116 :			cur += stride; ref += stride;
117 :
118 :			/*
119 :			* This function assumes cur and stride are 16 bytes aligned and ref is unaligned
120 :			*/
121 :			unsigned long
122 :	edgomez	1.3	sad16_altivec(const vector unsigned char *cur,
123 :			const vector unsigned char *ref,
124 :			unsigned long stride,
125 :			const unsigned long best_sad)
126 :	canard	1.1	{
127 :	edgomez	1.3	vector unsigned char perm;
128 :			vector unsigned char t1, t2, t3, t4;
129 :			vector unsigned int sad;
130 :			vector signed int sumdiffs, best_vec;
131 :			unsigned long result;
132 :
133 :			ZERODEF;
134 :
135 :	canard	1.1	#ifdef DEBUG
136 :	edgomez	1.3	if (((unsigned long) cur) & 0xf)
137 :			fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
138 :	canard	1.1	// if (((unsigned long)ref) & 0xf)
139 :	edgomez	1.3	// fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);
140 :			if (stride & 0xf)
141 :			fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
142 :			#endif
143 :			/* initialization */
144 :			sad = (vector unsigned int) (ZEROVEC);
145 :			stride >>= 4;
146 :			perm = vec_lvsl(0, (unsigned char *) ref);
147 :			((unsigned long ) &best_vec) = best_sad;
148 :			best_vec = vec_splat(best_vec, 0);
149 :
150 :			/* perform sum of differences between current and previous */
151 :			SAD16();
152 :			SAD16();
153 :			SAD16();
154 :			SAD16();
155 :			/* Temp sum for exit */
156 :			sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
157 :			if (vec_all_ge(sumdiffs, best_vec))
158 :			goto bail;
159 :			SAD16();
160 :			SAD16();
161 :			SAD16();
162 :			SAD16();
163 :			sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
164 :			if (vec_all_ge(sumdiffs, best_vec))
165 :			goto bail;
166 :			SAD16();
167 :			SAD16();
168 :			SAD16();
169 :			SAD16();
170 :			SAD16();
171 :			SAD16();
172 :			SAD16();
173 :			SAD16();
174 :
175 :			/* sum all parts of difference into one 32 bit quantity */
176 :			sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
177 :			bail:
178 :			/* copy vector sum into unaligned result */
179 :			sumdiffs = vec_splat(sumdiffs, 3);
180 :			vec_ste(sumdiffs, 0, (int *) &result);
181 :			return (result);
182 :	canard	1.1	}
183 :
184 :			#define SAD8() \
185 :			t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \
186 :			t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \
187 :			tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \
188 :			t2 = vec_perm(t2,tp,perm_ref2); \
189 :			t3 = vec_max(t1, t2); /* find largest of two */ \
190 :			t4 = vec_min(t1, t2); /* find smaller of two */ \
191 :			t5 = vec_sub(t3, t4); /* find absolute difference */ \
192 :			sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \
193 :			cur += stride<<1; ref += stride<<1;
194 :
195 :			/*
196 :			* This function assumes cur is 8 bytes aligned, stride is 16 bytes
197 :			* aligned and ref is unaligned
198 :			*/
199 :			unsigned long
200 :	edgomez	1.3	sad8_altivec(const vector unsigned char *cur,
201 :			const vector unsigned char *ref,
202 :			unsigned long stride)
203 :	canard	1.1	{
204 :	edgomez	1.3	vector unsigned char t1, t2, t3, t4, t5, tp;
205 :			vector unsigned int sad;
206 :			vector signed int sumdiffs;
207 :			vector unsigned char perm_cur;
208 :			vector unsigned char perm_ref1, perm_ref2;
209 :			unsigned long result;
210 :
211 :			ZERODEF;
212 :	canard	1.1
213 :			#ifdef DEBUG
214 :	edgomez	1.3	if (((unsigned long) cur) & 0x7)
215 :			fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
216 :	canard	1.1	// if (((unsigned long)ref) & 0x7)
217 :	edgomez	1.3	// fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);
218 :			if (stride & 0xf)
219 :			fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
220 :			#endif
221 :
222 :			perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);
223 :			perm_ref1 = vec_lvsl(0, (unsigned char *) ref);
224 :			perm_ref2 = get_perm(0);
225 :
226 :			/* initialization */
227 :			sad = (vector unsigned int) (ZEROVEC);
228 :			stride >>= 4;
229 :
230 :			/* perform sum of differences between current and previous */
231 :			SAD8();
232 :			SAD8();
233 :			SAD8();
234 :			SAD8();
235 :
236 :			/* sum all parts of difference into one 32 bit quantity */
237 :			sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
238 :
239 :			/* copy vector sum into unaligned result */
240 :			sumdiffs = vec_splat(sumdiffs, 3);
241 :			vec_ste(sumdiffs, 0, (int *) &result);
242 :			return (result);
243 :	canard	1.1	}
244 :
245 :			#define MEAN16(i)\
246 :			c##i=*cur;\
247 :			mean = vec_sum4s(c##i,mean);\
248 :			cur += stride;
249 :
250 :			#define DEV16(i) \
251 :			t2 = vec_max(c##i, mn); /* find largest of two */ \
252 :			t3 = vec_min(c##i, mn); /* find smaller of two */ \
253 :			t4 = vec_sub(t2, t3); /* find absolute difference */ \
254 :			dev = vec_sum4s(t4, dev);
255 :
256 :			unsigned long
257 :	edgomez	1.3	dev16_altivec(const vector unsigned char *cur,
258 :			unsigned long stride)
259 :	canard	1.1	{
260 :	edgomez	1.3	vector unsigned char t2, t3, t4, mn;
261 :			vector unsigned int mean, dev;
262 :			vector signed int sumdiffs;
263 :			vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
264 :			c13, c14, c15;
265 :			unsigned long result;
266 :
267 :			ZERODEF;
268 :
269 :			mean = (vector unsigned int) (ZEROVEC);
270 :			dev = (vector unsigned int) (ZEROVEC);
271 :			stride >>= 4;
272 :
273 :			MEAN16(0);
274 :			MEAN16(1);
275 :			MEAN16(2);
276 :			MEAN16(3);
277 :			MEAN16(4);
278 :			MEAN16(5);
279 :			MEAN16(6);
280 :			MEAN16(7);
281 :			MEAN16(8);
282 :			MEAN16(9);
283 :			MEAN16(10);
284 :			MEAN16(11);
285 :			MEAN16(12);
286 :			MEAN16(13);
287 :			MEAN16(14);
288 :			MEAN16(15);
289 :
290 :			sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);
291 :			mn = vec_perm((vector unsigned char) sumdiffs,
292 :			(vector unsigned char) sumdiffs, (vector unsigned char) (14,
293 :			14,
294 :			14,
295 :			14,
296 :			14,
297 :			14,
298 :			14,
299 :			14,
300 :			14,
301 :			14,
302 :			14,
303 :			14,
304 :			14,
305 :			14,
306 :			14,
307 :			14));
308 :			DEV16(0);
309 :			DEV16(1);
310 :			DEV16(2);
311 :			DEV16(3);
312 :			DEV16(4);
313 :			DEV16(5);
314 :			DEV16(6);
315 :			DEV16(7);
316 :			DEV16(8);
317 :			DEV16(9);
318 :			DEV16(10);
319 :			DEV16(11);
320 :			DEV16(12);
321 :			DEV16(13);
322 :			DEV16(14);
323 :			DEV16(15);
324 :
325 :			/* sum all parts of difference into one 32 bit quantity */
326 :			sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);
327 :
328 :			/* copy vector sum into unaligned result */
329 :			sumdiffs = vec_splat(sumdiffs, 3);
330 :			vec_ste(sumdiffs, 0, (int *) &result);
331 :			return (result);
332 :	canard	1.1	}

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4