Annotation of /xvidcore/src/motion/ppc_asm/sad_altivec.c

Revision 1.11 - (view) (download)

1 :	edgomez	1.6	/*
2 :
3 :			Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
4 :
5 :			This program is free software; you can redistribute it and/or modify
6 :			it under the terms of the GNU General Public License as published by
7 :			the Free Software Foundation; either version 2 of the License, or
8 :			(at your option) any later version.
9 :
10 :			This program is distributed in the hope that it will be useful,
11 :			but WITHOUT ANY WARRANTY; without even the implied warranty of
12 :			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 :			GNU General Public License for more details.
14 :
15 :			You should have received a copy of the GNU General Public License
16 :			along with this program; if not, write to the Free Software
17 :			Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 :
19 :
20 :	edgomez	1.10	$Id$
21 :	edgomez	1.6	*/
22 :	canard	1.1
23 :	edgomez	1.9	#ifdef HAVE_ALTIVEC_H
24 :			#include <altivec.h>
25 :			#endif
26 :	canard	1.2
27 :
28 :	edgomez	1.9	#include "../../portab.h"
29 :	canard	1.1
30 :	edgomez	1.9	/* no debugging by default */
31 :	canard	1.1	#undef DEBUG
32 :
33 :	edgomez	1.9	#include <stdio.h>
34 :	canard	1.2
35 :	canard	1.1	#define SAD16() \
36 :			t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \
37 :			t2 = vec_max(t1, cur); / find largest of two */ \
38 :	edgomez	1.9	t1 = vec_min(t1, cur); / find smaller of two */ \
39 :			t1 = vec_sub(t2, t1); /* find absolute difference */ \
40 :			sad = vec_sum4s(t1, vec_splat_u32(0)); /* sum of differences */ \
41 :			sumdiffs = (vector unsigned int)vec_sums((vector signed int)sad, (vector signed int)sumdiffs); /* accumulate sumdiffs */ \
42 :			if(vec_any_ge(sumdiffs, best_vec)) \
43 :			goto bail; \
44 :	canard	1.1	cur += stride; ref += stride;
45 :
46 :			/*
47 :			* This function assumes cur and stride are 16 bytes aligned and ref is unaligned
48 :			*/
49 :	edgomez	1.11
50 :			uint32_t
51 :			sad16_altivec_c(vector unsigned char *cur,
52 :			vector unsigned char *ref,
53 :			uint32_t stride,
54 :			const uint32_t best_sad)
55 :	canard	1.1	{
56 :	edgomez	1.3	vector unsigned char perm;
57 :	edgomez	1.9	vector unsigned char t1, t2;
58 :	edgomez	1.3	vector unsigned int sad;
59 :	edgomez	1.9	vector unsigned int sumdiffs;
60 :	edgomez	1.11	vector unsigned int best_vec;
61 :			uint32_t result;
62 :	edgomez	1.3
63 :	edgomez	1.9
64 :	canard	1.1	#ifdef DEBUG
65 :	edgomez	1.9	/* print alignment errors if DEBUG is on */
66 :	edgomez	1.3	if (((unsigned long) cur) & 0xf)
67 :	edgomez	1.11	fprintf(stderr, "sad16_altivec:incorrect align, cur: %lx\n", (long)cur);
68 :	edgomez	1.3	if (stride & 0xf)
69 :	edgomez	1.11	fprintf(stderr, "sad16_altivec:incorrect align, stride: %lu\n", stride);
70 :	edgomez	1.3	#endif
71 :			/* initialization */
72 :	edgomez	1.11	sad = vec_splat_u32(0);
73 :			sumdiffs = sad;
74 :	edgomez	1.3	stride >>= 4;
75 :			perm = vec_lvsl(0, (unsigned char *) ref);
76 :	edgomez	1.11	((uint32_t)&best_vec) = best_sad;
77 :	edgomez	1.3	best_vec = vec_splat(best_vec, 0);
78 :
79 :			/* perform sum of differences between current and previous */
80 :			SAD16();
81 :			SAD16();
82 :			SAD16();
83 :			SAD16();
84 :	edgomez	1.9
85 :	edgomez	1.3	SAD16();
86 :			SAD16();
87 :			SAD16();
88 :			SAD16();
89 :	edgomez	1.9
90 :	edgomez	1.3	SAD16();
91 :			SAD16();
92 :			SAD16();
93 :			SAD16();
94 :	edgomez	1.9
95 :	edgomez	1.3	SAD16();
96 :			SAD16();
97 :			SAD16();
98 :			SAD16();
99 :
100 :			bail:
101 :			/* copy vector sum into unaligned result */
102 :			sumdiffs = vec_splat(sumdiffs, 3);
103 :	edgomez	1.11	vec_ste(sumdiffs, 0, (uint32_t*) &result);
104 :	edgomez	1.9	return result;
105 :	canard	1.1	}
106 :
107 :	edgomez	1.9
108 :	canard	1.1	#define SAD8() \
109 :	edgomez	1.11	c = vec_perm(vec_ld(0,cur),vec_ld(16,cur),vec_lvsl(0,cur));\
110 :			r = vec_perm(vec_ld(0,ref),vec_ld(16,ref),vec_lvsl(0,ref));\
111 :			c = vec_sub(vec_max(c,r),vec_min(c,r));\
112 :			sad = vec_sum4s(c,sad);\
113 :			cur += stride;\
114 :			ref += stride
115 :	canard	1.1
116 :			/*
117 :	edgomez	1.11	* This function assumes nothing
118 :	canard	1.1	*/
119 :	edgomez	1.11
120 :			uint32_t
121 :			sad8_altivec_c(const uint8_t * cur,
122 :			const uint8_t *ref,
123 :			const uint32_t stride)
124 :	canard	1.1	{
125 :	edgomez	1.11	uint32_t result = 0;
126 :
127 :			register vector unsigned int sad;
128 :			register vector unsigned char c;
129 :			register vector unsigned char r;
130 :
131 :			/* initialize */
132 :	edgomez	1.9	sad = vec_splat_u32(0);
133 :	edgomez	1.11
134 :			/* Perform sad operations */
135 :			SAD8();
136 :			SAD8();
137 :			SAD8();
138 :			SAD8();
139 :
140 :	edgomez	1.3	SAD8();
141 :			SAD8();
142 :			SAD8();
143 :			SAD8();
144 :	edgomez	1.11
145 :			/* finish addition, add the first 2 together */
146 :			sad = vec_and(sad, (vector unsigned int)vec_pack(vec_splat_u16(-1),vec_splat_u16(0)));
147 :			sad = (vector unsigned int)vec_sums((vector signed int)sad, vec_splat_s32(0));
148 :			sad = vec_splat(sad,3);
149 :			vec_ste(sad, 0, &result);
150 :
151 :			return result;
152 :			}
153 :	edgomez	1.3
154 :
155 :	canard	1.1
156 :
157 :	edgomez	1.9	#define MEAN16() \
158 :			mean = vec_sum4s(*ptr,mean);\
159 :			ptr += stride
160 :
161 :			#define DEV16() \
162 :			t2 = vec_max(ptr, mn); / find largest of two */ \
163 :			t3 = vec_min(ptr, mn); / find smaller of two */ \
164 :			t2 = vec_sub(t2, t3); /* find absolute difference */ \
165 :			dev = vec_sum4s(t2, dev); \
166 :			ptr += stride
167 :
168 :			/*
169 :			* This function assumes cur is 16 bytes aligned and stride is 16 bytes
170 :			* aligned
171 :			*/
172 :	edgomez	1.11
173 :			uint32_t
174 :			dev16_altivec_c(vector unsigned char *cur,
175 :			uint32_t stride)
176 :	canard	1.1	{
177 :	edgomez	1.9	vector unsigned char t2, t3, mn;
178 :	edgomez	1.3	vector unsigned int mean, dev;
179 :	edgomez	1.9	vector unsigned int sumdiffs;
180 :	edgomez	1.11	vector unsigned char *ptr;
181 :			uint32_t result;
182 :	edgomez	1.3
183 :	edgomez	1.9	#ifdef DEBUG
184 :			/* print alignment errors if DEBUG is on */
185 :			if(((unsigned long)cur) & 0x7)
186 :	edgomez	1.11	fprintf(stderr, "dev16_altivec:incorrect align, cur: %lx\n", (long)cur);
187 :	edgomez	1.9	if(stride & 0xf)
188 :	edgomez	1.11	fprintf(stderr, "dev16_altivec:incorrect align, stride: %lu\n", stride);
189 :	edgomez	1.9	#endif
190 :	edgomez	1.3
191 :	edgomez	1.9	dev = mean = vec_splat_u32(0);
192 :	edgomez	1.3	stride >>= 4;
193 :	edgomez	1.9
194 :	edgomez	1.11	/* set pointer to iterate through cur */
195 :			ptr = cur;
196 :	edgomez	1.9
197 :			MEAN16();
198 :			MEAN16();
199 :			MEAN16();
200 :			MEAN16();
201 :			MEAN16();
202 :			MEAN16();
203 :			MEAN16();
204 :			MEAN16();
205 :			MEAN16();
206 :			MEAN16();
207 :			MEAN16();
208 :			MEAN16();
209 :			MEAN16();
210 :			MEAN16();
211 :			MEAN16();
212 :			MEAN16();
213 :
214 :			/* Add all together in sumdiffs */
215 :			sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0));
216 :			/* teilen durch 16 * 16 */
217 :			mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14));
218 :
219 :			/* set pointer to iterate through cur */
220 :			ptr = cur;
221 :
222 :			DEV16();
223 :			DEV16();
224 :			DEV16();
225 :			DEV16();
226 :			DEV16();
227 :			DEV16();
228 :			DEV16();
229 :			DEV16();
230 :			DEV16();
231 :			DEV16();
232 :			DEV16();
233 :			DEV16();
234 :			DEV16();
235 :			DEV16();
236 :			DEV16();
237 :			DEV16();
238 :	edgomez	1.3
239 :			/* sum all parts of difference into one 32 bit quantity */
240 :	edgomez	1.9	sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0));
241 :	edgomez	1.3
242 :			/* copy vector sum into unaligned result */
243 :			sumdiffs = vec_splat(sumdiffs, 3);
244 :	edgomez	1.11	vec_ste(sumdiffs, 0, (uint32_t*) &result);
245 :	edgomez	1.9	return result;
246 :			}
247 :
248 :			#define SAD16BI() \
249 :			t1 = vec_perm(ref1[0], ref1[1], mask1); \
250 :			t2 = vec_perm(ref2[0], ref2[1], mask2); \
251 :			t1 = vec_avg(t1, t2); \
252 :			t2 = vec_max(t1, *cur); \
253 :			t1 = vec_min(t1, *cur); \
254 :			sad = vec_sub(t2, t1); \
255 :			sum = vec_sum4s(sad, sum); \
256 :			cur += stride; \
257 :			ref1 += stride; \
258 :			ref2 += stride
259 :
260 :			/*
261 :			* This function assumes cur is 16 bytes aligned, stride is 16 bytes
262 :			* aligned and ref1 and ref2 is unaligned
263 :			*/
264 :	edgomez	1.11
265 :			uint32_t
266 :	edgomez	1.9	sad16bi_altivec_c(vector unsigned char *cur,
267 :			vector unsigned char *ref1,
268 :			vector unsigned char *ref2,
269 :	edgomez	1.11	uint32_t stride)
270 :	edgomez	1.9	{
271 :			vector unsigned char t1, t2;
272 :			vector unsigned char mask1, mask2;
273 :			vector unsigned char sad;
274 :			vector unsigned int sum;
275 :	edgomez	1.11	uint32_t result;
276 :	edgomez	1.9
277 :			#ifdef DEBUG
278 :			/* print alignment errors if this is on */
279 :	edgomez	1.11	if((long)cur & 0xf)
280 :			fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lx\n", (long)cur);
281 :	edgomez	1.9	if(stride & 0xf)
282 :	edgomez	1.11	fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lu\n", stride);
283 :	edgomez	1.9	#endif
284 :
285 :			/* Initialisation stuff */
286 :			stride >>= 4;
287 :			mask1 = vec_lvsl(0, (unsigned char*)ref1);
288 :			mask2 = vec_lvsl(0, (unsigned char*)ref2);
289 :			sad = vec_splat_u8(0);
290 :			sum = (vector unsigned int)sad;
291 :
292 :			SAD16BI();
293 :			SAD16BI();
294 :			SAD16BI();
295 :			SAD16BI();
296 :
297 :			SAD16BI();
298 :			SAD16BI();
299 :			SAD16BI();
300 :			SAD16BI();
301 :
302 :			SAD16BI();
303 :			SAD16BI();
304 :			SAD16BI();
305 :			SAD16BI();
306 :
307 :			SAD16BI();
308 :			SAD16BI();
309 :			SAD16BI();
310 :			SAD16BI();
311 :
312 :			sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));
313 :			sum = vec_splat(sum, 3);
314 :	edgomez	1.11	vec_ste(sum, 0, (uint32_t*)&result);
315 :	edgomez	1.9
316 :			return result;
317 :			}
318 :
319 :
320 :			#define SSE8_16BIT() \
321 :			b1_vec = vec_perm(vec_ld(0,b1), vec_ld(16,b1), vec_lvsl(0,b1)); \
322 :			b2_vec = vec_perm(vec_ld(0,b2), vec_ld(16,b2), vec_lvsl(0,b2)); \
323 :			diff = vec_sub(b1_vec,b2_vec); \
324 :			sum = vec_msum(diff,diff,sum); \
325 :			b1 = (const int16_t)((int8_t)b1+stride); \
326 :			b2 = (const int16_t)((int8_t)b2+stride)
327 :
328 :			uint32_t
329 :			sse8_16bit_altivec_c(const int16_t * b1,
330 :			const int16_t * b2,
331 :			const uint32_t stride)
332 :			{
333 :			register vector signed short b1_vec;
334 :			register vector signed short b2_vec;
335 :			register vector signed short diff;
336 :			register vector signed int sum;
337 :			uint32_t result;
338 :
339 :			/* initialize */
340 :			sum = vec_splat_s32(0);
341 :
342 :			SSE8_16BIT();
343 :			SSE8_16BIT();
344 :			SSE8_16BIT();
345 :			SSE8_16BIT();
346 :
347 :			SSE8_16BIT();
348 :			SSE8_16BIT();
349 :			SSE8_16BIT();
350 :			SSE8_16BIT();
351 :
352 :			/* sum the vector */
353 :			sum = vec_sums(sum, vec_splat_s32(0));
354 :			sum = vec_splat(sum,3);
355 :
356 :			vec_ste(sum,0,(int*)&result);
357 :
358 :			/* and return */
359 :			return result;
360 :	canard	1.1	}

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4