Annotation of /xvidcore/src/motion/ppc_asm/sad_altivec.c

Revision 1.9 - (view) (download)

1 :	edgomez	1.6	/*
2 :
3 :			Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
4 :
5 :			This program is free software; you can redistribute it and/or modify
6 :			it under the terms of the GNU General Public License as published by
7 :			the Free Software Foundation; either version 2 of the License, or
8 :			(at your option) any later version.
9 :
10 :			This program is distributed in the hope that it will be useful,
11 :			but WITHOUT ANY WARRANTY; without even the implied warranty of
12 :			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 :			GNU General Public License for more details.
14 :
15 :			You should have received a copy of the GNU General Public License
16 :			along with this program; if not, write to the Free Software
17 :			Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 :
19 :
20 :	edgomez	1.9	$Id: sad_altivec.c,v 1.6.2.1 2003/06/09 13:55:03 edgomez Exp $
21 :	edgomez	1.8	$Source: /xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $
22 :	edgomez	1.9	$Date: 2003/06/09 13:55:03 $
23 :	edgomez	1.8	$Author: edgomez $
24 :	edgomez	1.6
25 :			*/
26 :	canard	1.1
27 :	edgomez	1.9	#ifdef HAVE_ALTIVEC_H
28 :			#include <altivec.h>
29 :			#endif
30 :	canard	1.2
31 :
32 :	edgomez	1.9	#include "../../portab.h"
33 :	canard	1.1
34 :	edgomez	1.9	/* no debugging by default */
35 :	canard	1.1	#undef DEBUG
36 :
37 :	edgomez	1.9	#include <stdio.h>
38 :	canard	1.2
39 :	canard	1.1	#define SAD16() \
40 :			t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \
41 :			t2 = vec_max(t1, cur); / find largest of two */ \
42 :	edgomez	1.9	t1 = vec_min(t1, cur); / find smaller of two */ \
43 :			t1 = vec_sub(t2, t1); /* find absolute difference */ \
44 :			sad = vec_sum4s(t1, vec_splat_u32(0)); /* sum of differences */ \
45 :			sumdiffs = (vector unsigned int)vec_sums((vector signed int)sad, (vector signed int)sumdiffs); /* accumulate sumdiffs */ \
46 :			if(vec_any_ge(sumdiffs, best_vec)) \
47 :			goto bail; \
48 :	canard	1.1	cur += stride; ref += stride;
49 :
50 :			/*
51 :			* This function assumes cur and stride are 16 bytes aligned and ref is unaligned
52 :			*/
53 :			unsigned long
54 :	edgomez	1.9	sad16_altivec_c(const vector unsigned char *cur,
55 :	edgomez	1.3	const vector unsigned char *ref,
56 :			unsigned long stride,
57 :			const unsigned long best_sad)
58 :	canard	1.1	{
59 :	edgomez	1.3	vector unsigned char perm;
60 :	edgomez	1.9	vector unsigned char t1, t2;
61 :	edgomez	1.3	vector unsigned int sad;
62 :	edgomez	1.9	vector unsigned int sumdiffs;
63 :			vector unsigned int best_vec;
64 :	edgomez	1.3	unsigned long result;
65 :
66 :	edgomez	1.9
67 :	canard	1.1	#ifdef DEBUG
68 :	edgomez	1.9	/* print alignment errors if DEBUG is on */
69 :	edgomez	1.3	if (((unsigned long) cur) & 0xf)
70 :			fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
71 :			if (stride & 0xf)
72 :			fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
73 :			#endif
74 :			/* initialization */
75 :	edgomez	1.9	sad = vec_splat_u32(0);
76 :			sumdiffs = sad;
77 :	edgomez	1.3	stride >>= 4;
78 :			perm = vec_lvsl(0, (unsigned char *) ref);
79 :			((unsigned long ) &best_vec) = best_sad;
80 :			best_vec = vec_splat(best_vec, 0);
81 :
82 :			/* perform sum of differences between current and previous */
83 :			SAD16();
84 :			SAD16();
85 :			SAD16();
86 :			SAD16();
87 :	edgomez	1.9
88 :	edgomez	1.3	SAD16();
89 :			SAD16();
90 :			SAD16();
91 :			SAD16();
92 :	edgomez	1.9
93 :	edgomez	1.3	SAD16();
94 :			SAD16();
95 :			SAD16();
96 :			SAD16();
97 :	edgomez	1.9
98 :	edgomez	1.3	SAD16();
99 :			SAD16();
100 :			SAD16();
101 :			SAD16();
102 :
103 :			bail:
104 :			/* copy vector sum into unaligned result */
105 :			sumdiffs = vec_splat(sumdiffs, 3);
106 :	edgomez	1.9	vec_ste(sumdiffs, 0, (unsigned long *) &result);
107 :			return result;
108 :	canard	1.1	}
109 :
110 :	edgomez	1.9
111 :	canard	1.1	#define SAD8() \
112 :			t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \
113 :			t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \
114 :			tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \
115 :			t2 = vec_perm(t2,tp,perm_ref2); \
116 :	edgomez	1.9	tp = vec_max(t1, t2); /* find largest of two */ \
117 :			t1 = vec_min(t1, t2); /* find smaller of two */ \
118 :			tp = vec_sub(tp, t1); /* find absolute difference */ \
119 :			sad = vec_sum4s(tp, sad); /* accumulate sum of differences */ \
120 :	canard	1.1	cur += stride<<1; ref += stride<<1;
121 :
122 :			/*
123 :			* This function assumes cur is 8 bytes aligned, stride is 16 bytes
124 :			* aligned and ref is unaligned
125 :			*/
126 :			unsigned long
127 :	edgomez	1.9	sad8_altivec_c(const vector unsigned char *cur,
128 :	edgomez	1.3	const vector unsigned char *ref,
129 :			unsigned long stride)
130 :	canard	1.1	{
131 :	edgomez	1.9	vector unsigned char t1, t2, tp;
132 :	edgomez	1.3	vector unsigned int sad;
133 :	edgomez	1.9	vector unsigned int sumdiffs;
134 :	edgomez	1.3	vector unsigned char perm_cur;
135 :			vector unsigned char perm_ref1, perm_ref2;
136 :			unsigned long result;
137 :
138 :	canard	1.1	#ifdef DEBUG
139 :	edgomez	1.9	/* print alignment errors if DEBUG is on */
140 :	edgomez	1.3	if (((unsigned long) cur) & 0x7)
141 :			fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
142 :			if (stride & 0xf)
143 :			fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
144 :			#endif
145 :	edgomez	1.9
146 :			/* check if cur is 8 or 16 bytes aligned an create the perm_cur vector */
147 :			perm_ref1 = vec_lvsl(0, (unsigned char*)ref);
148 :			perm_ref2 = vec_add(vec_lvsl(0, (unsigned char*)NULL), vec_pack(vec_splat_u16(0), vec_splat_u16(8)));
149 :			perm_cur = vec_add(perm_ref2, vec_splat(vec_lvsl(0, (unsigned char*)cur), 0));
150 :
151 :	edgomez	1.3	/* initialization */
152 :	edgomez	1.9	sad = vec_splat_u32(0);
153 :	edgomez	1.3	stride >>= 4;
154 :
155 :			/* perform sum of differences between current and previous */
156 :			SAD8();
157 :			SAD8();
158 :			SAD8();
159 :			SAD8();
160 :
161 :			/* sum all parts of difference into one 32 bit quantity */
162 :	edgomez	1.9	sumdiffs = (vector unsigned int)vec_sums((vector signed int) sad, vec_splat_s32(0));
163 :	edgomez	1.3
164 :			/* copy vector sum into unaligned result */
165 :			sumdiffs = vec_splat(sumdiffs, 3);
166 :	edgomez	1.9	vec_ste(sumdiffs, 0, (unsigned int *) &result);
167 :			return result;
168 :	canard	1.1	}
169 :
170 :
171 :	edgomez	1.9	#define MEAN16() \
172 :			mean = vec_sum4s(*ptr,mean);\
173 :			ptr += stride
174 :
175 :			#define DEV16() \
176 :			t2 = vec_max(ptr, mn); / find largest of two */ \
177 :			t3 = vec_min(ptr, mn); / find smaller of two */ \
178 :			t2 = vec_sub(t2, t3); /* find absolute difference */ \
179 :			dev = vec_sum4s(t2, dev); \
180 :			ptr += stride
181 :
182 :			/*
183 :			* This function assumes cur is 16 bytes aligned and stride is 16 bytes
184 :			* aligned
185 :			*/
186 :	canard	1.1	unsigned long
187 :	edgomez	1.9	dev16_altivec_c(const vector unsigned char *cur,
188 :	edgomez	1.3	unsigned long stride)
189 :	canard	1.1	{
190 :	edgomez	1.9	vector unsigned char t2, t3, mn;
191 :	edgomez	1.3	vector unsigned int mean, dev;
192 :	edgomez	1.9	vector unsigned int sumdiffs;
193 :			const vector unsigned char *ptr;
194 :	edgomez	1.3	unsigned long result;
195 :
196 :	edgomez	1.9	#ifdef DEBUG
197 :			/* print alignment errors if DEBUG is on */
198 :			if(((unsigned long)cur) & 0x7)
199 :			fprintf(stderr, "dev16_altivec:incorrect align, cur: %x\n", cur);
200 :			if(stride & 0xf)
201 :			fprintf(stderr, "dev16_altivec:incorrect align, stride: %ld\n", stride);
202 :			#endif
203 :	edgomez	1.3
204 :	edgomez	1.9	dev = mean = vec_splat_u32(0);
205 :	edgomez	1.3	stride >>= 4;
206 :	edgomez	1.9
207 :			/* set pointer to iterate through cur */
208 :			ptr = cur;
209 :
210 :			MEAN16();
211 :			MEAN16();
212 :			MEAN16();
213 :			MEAN16();
214 :			MEAN16();
215 :			MEAN16();
216 :			MEAN16();
217 :			MEAN16();
218 :			MEAN16();
219 :			MEAN16();
220 :			MEAN16();
221 :			MEAN16();
222 :			MEAN16();
223 :			MEAN16();
224 :			MEAN16();
225 :			MEAN16();
226 :
227 :			/* Add all together in sumdiffs */
228 :			sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0));
229 :			/* teilen durch 16 * 16 */
230 :			mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14));
231 :
232 :			/* set pointer to iterate through cur */
233 :			ptr = cur;
234 :
235 :			DEV16();
236 :			DEV16();
237 :			DEV16();
238 :			DEV16();
239 :			DEV16();
240 :			DEV16();
241 :			DEV16();
242 :			DEV16();
243 :			DEV16();
244 :			DEV16();
245 :			DEV16();
246 :			DEV16();
247 :			DEV16();
248 :			DEV16();
249 :			DEV16();
250 :			DEV16();
251 :	edgomez	1.3
252 :			/* sum all parts of difference into one 32 bit quantity */
253 :	edgomez	1.9	sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0));
254 :	edgomez	1.3
255 :			/* copy vector sum into unaligned result */
256 :			sumdiffs = vec_splat(sumdiffs, 3);
257 :	edgomez	1.9	vec_ste(sumdiffs, 0, (unsigned int *) &result);
258 :			return result;
259 :			}
260 :
261 :			#define SAD16BI() \
262 :			t1 = vec_perm(ref1[0], ref1[1], mask1); \
263 :			t2 = vec_perm(ref2[0], ref2[1], mask2); \
264 :			t1 = vec_avg(t1, t2); \
265 :			t2 = vec_max(t1, *cur); \
266 :			t1 = vec_min(t1, *cur); \
267 :			sad = vec_sub(t2, t1); \
268 :			sum = vec_sum4s(sad, sum); \
269 :			cur += stride; \
270 :			ref1 += stride; \
271 :			ref2 += stride
272 :
273 :			/*
274 :			* This function assumes cur is 16 bytes aligned, stride is 16 bytes
275 :			* aligned and ref1 and ref2 is unaligned
276 :			*/
277 :			unsigned long
278 :			sad16bi_altivec_c(vector unsigned char *cur,
279 :			vector unsigned char *ref1,
280 :			vector unsigned char *ref2,
281 :			unsigned long stride)
282 :			{
283 :			vector unsigned char t1, t2;
284 :			vector unsigned char mask1, mask2;
285 :			vector unsigned char sad;
286 :			vector unsigned int sum;
287 :			unsigned long result;
288 :
289 :			#ifdef DEBUG
290 :			/* print alignment errors if this is on */
291 :			if(cur & 0xf)
292 :			fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %x\n", cur);
293 :			if(stride & 0xf)
294 :			fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %ld\n", stride);
295 :			#endif
296 :
297 :			/* Initialisation stuff */
298 :			stride >>= 4;
299 :			mask1 = vec_lvsl(0, (unsigned char*)ref1);
300 :			mask2 = vec_lvsl(0, (unsigned char*)ref2);
301 :			sad = vec_splat_u8(0);
302 :			sum = (vector unsigned int)sad;
303 :
304 :			SAD16BI();
305 :			SAD16BI();
306 :			SAD16BI();
307 :			SAD16BI();
308 :
309 :			SAD16BI();
310 :			SAD16BI();
311 :			SAD16BI();
312 :			SAD16BI();
313 :
314 :			SAD16BI();
315 :			SAD16BI();
316 :			SAD16BI();
317 :			SAD16BI();
318 :
319 :			SAD16BI();
320 :			SAD16BI();
321 :			SAD16BI();
322 :			SAD16BI();
323 :
324 :			sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));
325 :			sum = vec_splat(sum, 3);
326 :			vec_ste(sum, 0, (unsigned int*)&result);
327 :
328 :			return result;
329 :			}
330 :
331 :
332 :			#define SSE8_16BIT() \
333 :			b1_vec = vec_perm(vec_ld(0,b1), vec_ld(16,b1), vec_lvsl(0,b1)); \
334 :			b2_vec = vec_perm(vec_ld(0,b2), vec_ld(16,b2), vec_lvsl(0,b2)); \
335 :			diff = vec_sub(b1_vec,b2_vec); \
336 :			sum = vec_msum(diff,diff,sum); \
337 :			b1 = (const int16_t)((int8_t)b1+stride); \
338 :			b2 = (const int16_t)((int8_t)b2+stride)
339 :
340 :			uint32_t
341 :			sse8_16bit_altivec_c(const int16_t * b1,
342 :			const int16_t * b2,
343 :			const uint32_t stride)
344 :			{
345 :			register vector signed short b1_vec;
346 :			register vector signed short b2_vec;
347 :			register vector signed short diff;
348 :			register vector signed int sum;
349 :			uint32_t result;
350 :
351 :			/* initialize */
352 :			sum = vec_splat_s32(0);
353 :
354 :			SSE8_16BIT();
355 :			SSE8_16BIT();
356 :			SSE8_16BIT();
357 :			SSE8_16BIT();
358 :
359 :			SSE8_16BIT();
360 :			SSE8_16BIT();
361 :			SSE8_16BIT();
362 :			SSE8_16BIT();
363 :
364 :			/* sum the vector */
365 :			sum = vec_sums(sum, vec_splat_s32(0));
366 :			sum = vec_splat(sum,3);
367 :
368 :			vec_ste(sum,0,(int*)&result);
369 :
370 :			/* and return */
371 :			return result;
372 :	canard	1.1	}

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4