Parent Directory | Revision Log
Revision 1.9 - (view) (download)
1 : | edgomez | 1.6 | /* |
2 : | |||
3 : | Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org> | ||
4 : | |||
5 : | This program is free software; you can redistribute it and/or modify | ||
6 : | it under the terms of the GNU General Public License as published by | ||
7 : | the Free Software Foundation; either version 2 of the License, or | ||
8 : | (at your option) any later version. | ||
9 : | |||
10 : | This program is distributed in the hope that it will be useful, | ||
11 : | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 : | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 : | GNU General Public License for more details. | ||
14 : | |||
15 : | You should have received a copy of the GNU General Public License | ||
16 : | along with this program; if not, write to the Free Software | ||
17 : | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 : | |||
19 : | |||
20 : | edgomez | 1.9 | $Id: sad_altivec.c,v 1.6.2.1 2003/06/09 13:55:03 edgomez Exp $ |
21 : | edgomez | 1.8 | $Source: /xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $ |
22 : | edgomez | 1.9 | $Date: 2003/06/09 13:55:03 $ |
23 : | edgomez | 1.8 | $Author: edgomez $ |
24 : | edgomez | 1.6 | |
25 : | */ | ||
26 : | canard | 1.1 | |
27 : | edgomez | 1.9 | #ifdef HAVE_ALTIVEC_H |
28 : | #include <altivec.h> | ||
29 : | #endif | ||
30 : | canard | 1.2 | |
31 : | |||
32 : | edgomez | 1.9 | #include "../../portab.h" |
33 : | canard | 1.1 | |
34 : | edgomez | 1.9 | /* no debugging by default */ |
35 : | canard | 1.1 | #undef DEBUG |
36 : | |||
37 : | edgomez | 1.9 | #include <stdio.h> |
38 : | canard | 1.2 | |
39 : | canard | 1.1 | #define SAD16() \ |
40 : | t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \ | ||
41 : | t2 = vec_max(t1, *cur); /* find largest of two */ \ | ||
42 : | edgomez | 1.9 | t1 = vec_min(t1, *cur); /* find smaller of two */ \ |
43 : | t1 = vec_sub(t2, t1); /* find absolute difference */ \ | ||
44 : | sad = vec_sum4s(t1, vec_splat_u32(0)); /* sum of differences */ \ | ||
45 : | sumdiffs = (vector unsigned int)vec_sums((vector signed int)sad, (vector signed int)sumdiffs); /* accumulate sumdiffs */ \ | ||
46 : | if(vec_any_ge(sumdiffs, best_vec)) \ | ||
47 : | goto bail; \ | ||
48 : | canard | 1.1 | cur += stride; ref += stride; |
49 : | |||
50 : | /* | ||
51 : | * This function assumes cur and stride are 16 bytes aligned and ref is unaligned | ||
52 : | */ | ||
53 : | unsigned long | ||
54 : | edgomez | 1.9 | sad16_altivec_c(const vector unsigned char *cur, |
55 : | edgomez | 1.3 | const vector unsigned char *ref, |
56 : | unsigned long stride, | ||
57 : | const unsigned long best_sad) | ||
58 : | canard | 1.1 | { |
59 : | edgomez | 1.3 | vector unsigned char perm; |
60 : | edgomez | 1.9 | vector unsigned char t1, t2; |
61 : | edgomez | 1.3 | vector unsigned int sad; |
62 : | edgomez | 1.9 | vector unsigned int sumdiffs; |
63 : | vector unsigned int best_vec; | ||
64 : | edgomez | 1.3 | unsigned long result; |
65 : | |||
66 : | edgomez | 1.9 | |
67 : | canard | 1.1 | #ifdef DEBUG |
68 : | edgomez | 1.9 | /* print alignment errors if DEBUG is on */ |
69 : | edgomez | 1.3 | if (((unsigned long) cur) & 0xf) |
70 : | fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); | ||
71 : | if (stride & 0xf) | ||
72 : | fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); | ||
73 : | #endif | ||
74 : | /* initialization */ | ||
75 : | edgomez | 1.9 | sad = vec_splat_u32(0); |
76 : | sumdiffs = sad; | ||
77 : | edgomez | 1.3 | stride >>= 4; |
78 : | perm = vec_lvsl(0, (unsigned char *) ref); | ||
79 : | *((unsigned long *) &best_vec) = best_sad; | ||
80 : | best_vec = vec_splat(best_vec, 0); | ||
81 : | |||
82 : | /* perform sum of differences between current and previous */ | ||
83 : | SAD16(); | ||
84 : | SAD16(); | ||
85 : | SAD16(); | ||
86 : | SAD16(); | ||
87 : | edgomez | 1.9 | |
88 : | edgomez | 1.3 | SAD16(); |
89 : | SAD16(); | ||
90 : | SAD16(); | ||
91 : | SAD16(); | ||
92 : | edgomez | 1.9 | |
93 : | edgomez | 1.3 | SAD16(); |
94 : | SAD16(); | ||
95 : | SAD16(); | ||
96 : | SAD16(); | ||
97 : | edgomez | 1.9 | |
98 : | edgomez | 1.3 | SAD16(); |
99 : | SAD16(); | ||
100 : | SAD16(); | ||
101 : | SAD16(); | ||
102 : | |||
103 : | bail: | ||
104 : | /* copy vector sum into unaligned result */ | ||
105 : | sumdiffs = vec_splat(sumdiffs, 3); | ||
106 : | edgomez | 1.9 | vec_ste(sumdiffs, 0, (unsigned long *) &result); |
107 : | return result; | ||
108 : | canard | 1.1 | } |
109 : | |||
110 : | edgomez | 1.9 | |
111 : | canard | 1.1 | #define SAD8() \ |
112 : | t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \ | ||
113 : | t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \ | ||
114 : | tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \ | ||
115 : | t2 = vec_perm(t2,tp,perm_ref2); \ | ||
116 : | edgomez | 1.9 | tp = vec_max(t1, t2); /* find largest of two */ \ |
117 : | t1 = vec_min(t1, t2); /* find smaller of two */ \ | ||
118 : | tp = vec_sub(tp, t1); /* find absolute difference */ \ | ||
119 : | sad = vec_sum4s(tp, sad); /* accumulate sum of differences */ \ | ||
120 : | canard | 1.1 | cur += stride<<1; ref += stride<<1; |
121 : | |||
122 : | /* | ||
123 : | * This function assumes cur is 8 bytes aligned, stride is 16 bytes | ||
124 : | * aligned and ref is unaligned | ||
125 : | */ | ||
126 : | unsigned long | ||
127 : | edgomez | 1.9 | sad8_altivec_c(const vector unsigned char *cur, |
128 : | edgomez | 1.3 | const vector unsigned char *ref, |
129 : | unsigned long stride) | ||
130 : | canard | 1.1 | { |
131 : | edgomez | 1.9 | vector unsigned char t1, t2, tp; |
132 : | edgomez | 1.3 | vector unsigned int sad; |
133 : | edgomez | 1.9 | vector unsigned int sumdiffs; |
134 : | edgomez | 1.3 | vector unsigned char perm_cur; |
135 : | vector unsigned char perm_ref1, perm_ref2; | ||
136 : | unsigned long result; | ||
137 : | |||
138 : | canard | 1.1 | #ifdef DEBUG |
139 : | edgomez | 1.9 | /* print alignment errors if DEBUG is on */ |
140 : | edgomez | 1.3 | if (((unsigned long) cur) & 0x7) |
141 : | fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); | ||
142 : | if (stride & 0xf) | ||
143 : | fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); | ||
144 : | #endif | ||
145 : | edgomez | 1.9 | |
146 : | /* check if cur is 8 or 16 bytes aligned an create the perm_cur vector */ | ||
147 : | perm_ref1 = vec_lvsl(0, (unsigned char*)ref); | ||
148 : | perm_ref2 = vec_add(vec_lvsl(0, (unsigned char*)NULL), vec_pack(vec_splat_u16(0), vec_splat_u16(8))); | ||
149 : | perm_cur = vec_add(perm_ref2, vec_splat(vec_lvsl(0, (unsigned char*)cur), 0)); | ||
150 : | |||
151 : | edgomez | 1.3 | /* initialization */ |
152 : | edgomez | 1.9 | sad = vec_splat_u32(0); |
153 : | edgomez | 1.3 | stride >>= 4; |
154 : | |||
155 : | /* perform sum of differences between current and previous */ | ||
156 : | SAD8(); | ||
157 : | SAD8(); | ||
158 : | SAD8(); | ||
159 : | SAD8(); | ||
160 : | |||
161 : | /* sum all parts of difference into one 32 bit quantity */ | ||
162 : | edgomez | 1.9 | sumdiffs = (vector unsigned int)vec_sums((vector signed int) sad, vec_splat_s32(0)); |
163 : | edgomez | 1.3 | |
164 : | /* copy vector sum into unaligned result */ | ||
165 : | sumdiffs = vec_splat(sumdiffs, 3); | ||
166 : | edgomez | 1.9 | vec_ste(sumdiffs, 0, (unsigned int *) &result); |
167 : | return result; | ||
168 : | canard | 1.1 | } |
169 : | |||
170 : | |||
171 : | edgomez | 1.9 | #define MEAN16() \ |
172 : | mean = vec_sum4s(*ptr,mean);\ | ||
173 : | ptr += stride | ||
174 : | |||
175 : | #define DEV16() \ | ||
176 : | t2 = vec_max(*ptr, mn); /* find largest of two */ \ | ||
177 : | t3 = vec_min(*ptr, mn); /* find smaller of two */ \ | ||
178 : | t2 = vec_sub(t2, t3); /* find absolute difference */ \ | ||
179 : | dev = vec_sum4s(t2, dev); \ | ||
180 : | ptr += stride | ||
181 : | |||
182 : | /* | ||
183 : | * This function assumes cur is 16 bytes aligned and stride is 16 bytes | ||
184 : | * aligned | ||
185 : | */ | ||
186 : | canard | 1.1 | unsigned long |
187 : | edgomez | 1.9 | dev16_altivec_c(const vector unsigned char *cur, |
188 : | edgomez | 1.3 | unsigned long stride) |
189 : | canard | 1.1 | { |
190 : | edgomez | 1.9 | vector unsigned char t2, t3, mn; |
191 : | edgomez | 1.3 | vector unsigned int mean, dev; |
192 : | edgomez | 1.9 | vector unsigned int sumdiffs; |
193 : | const vector unsigned char *ptr; | ||
194 : | edgomez | 1.3 | unsigned long result; |
195 : | |||
196 : | edgomez | 1.9 | #ifdef DEBUG |
197 : | /* print alignment errors if DEBUG is on */ | ||
198 : | if(((unsigned long)cur) & 0x7) | ||
199 : | fprintf(stderr, "dev16_altivec:incorrect align, cur: %x\n", cur); | ||
200 : | if(stride & 0xf) | ||
201 : | fprintf(stderr, "dev16_altivec:incorrect align, stride: %ld\n", stride); | ||
202 : | #endif | ||
203 : | edgomez | 1.3 | |
204 : | edgomez | 1.9 | dev = mean = vec_splat_u32(0); |
205 : | edgomez | 1.3 | stride >>= 4; |
206 : | edgomez | 1.9 | |
207 : | /* set pointer to iterate through cur */ | ||
208 : | ptr = cur; | ||
209 : | |||
210 : | MEAN16(); | ||
211 : | MEAN16(); | ||
212 : | MEAN16(); | ||
213 : | MEAN16(); | ||
214 : | MEAN16(); | ||
215 : | MEAN16(); | ||
216 : | MEAN16(); | ||
217 : | MEAN16(); | ||
218 : | MEAN16(); | ||
219 : | MEAN16(); | ||
220 : | MEAN16(); | ||
221 : | MEAN16(); | ||
222 : | MEAN16(); | ||
223 : | MEAN16(); | ||
224 : | MEAN16(); | ||
225 : | MEAN16(); | ||
226 : | |||
227 : | /* Add all together in sumdiffs */ | ||
228 : | sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0)); | ||
229 : | /* teilen durch 16 * 16 */ | ||
230 : | mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14)); | ||
231 : | |||
232 : | /* set pointer to iterate through cur */ | ||
233 : | ptr = cur; | ||
234 : | |||
235 : | DEV16(); | ||
236 : | DEV16(); | ||
237 : | DEV16(); | ||
238 : | DEV16(); | ||
239 : | DEV16(); | ||
240 : | DEV16(); | ||
241 : | DEV16(); | ||
242 : | DEV16(); | ||
243 : | DEV16(); | ||
244 : | DEV16(); | ||
245 : | DEV16(); | ||
246 : | DEV16(); | ||
247 : | DEV16(); | ||
248 : | DEV16(); | ||
249 : | DEV16(); | ||
250 : | DEV16(); | ||
251 : | edgomez | 1.3 | |
252 : | /* sum all parts of difference into one 32 bit quantity */ | ||
253 : | edgomez | 1.9 | sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0)); |
254 : | edgomez | 1.3 | |
255 : | /* copy vector sum into unaligned result */ | ||
256 : | sumdiffs = vec_splat(sumdiffs, 3); | ||
257 : | edgomez | 1.9 | vec_ste(sumdiffs, 0, (unsigned int *) &result); |
258 : | return result; | ||
259 : | } | ||
260 : | |||
261 : | #define SAD16BI() \ | ||
262 : | t1 = vec_perm(ref1[0], ref1[1], mask1); \ | ||
263 : | t2 = vec_perm(ref2[0], ref2[1], mask2); \ | ||
264 : | t1 = vec_avg(t1, t2); \ | ||
265 : | t2 = vec_max(t1, *cur); \ | ||
266 : | t1 = vec_min(t1, *cur); \ | ||
267 : | sad = vec_sub(t2, t1); \ | ||
268 : | sum = vec_sum4s(sad, sum); \ | ||
269 : | cur += stride; \ | ||
270 : | ref1 += stride; \ | ||
271 : | ref2 += stride | ||
272 : | |||
273 : | /* | ||
274 : | * This function assumes cur is 16 bytes aligned, stride is 16 bytes | ||
275 : | * aligned and ref1 and ref2 is unaligned | ||
276 : | */ | ||
277 : | unsigned long | ||
278 : | sad16bi_altivec_c(vector unsigned char *cur, | ||
279 : | vector unsigned char *ref1, | ||
280 : | vector unsigned char *ref2, | ||
281 : | unsigned long stride) | ||
282 : | { | ||
283 : | vector unsigned char t1, t2; | ||
284 : | vector unsigned char mask1, mask2; | ||
285 : | vector unsigned char sad; | ||
286 : | vector unsigned int sum; | ||
287 : | unsigned long result; | ||
288 : | |||
289 : | #ifdef DEBUG | ||
290 : | /* print alignment errors if this is on */ | ||
291 : | if(cur & 0xf) | ||
292 : | fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %x\n", cur); | ||
293 : | if(stride & 0xf) | ||
294 : | fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %ld\n", stride); | ||
295 : | #endif | ||
296 : | |||
297 : | /* Initialisation stuff */ | ||
298 : | stride >>= 4; | ||
299 : | mask1 = vec_lvsl(0, (unsigned char*)ref1); | ||
300 : | mask2 = vec_lvsl(0, (unsigned char*)ref2); | ||
301 : | sad = vec_splat_u8(0); | ||
302 : | sum = (vector unsigned int)sad; | ||
303 : | |||
304 : | SAD16BI(); | ||
305 : | SAD16BI(); | ||
306 : | SAD16BI(); | ||
307 : | SAD16BI(); | ||
308 : | |||
309 : | SAD16BI(); | ||
310 : | SAD16BI(); | ||
311 : | SAD16BI(); | ||
312 : | SAD16BI(); | ||
313 : | |||
314 : | SAD16BI(); | ||
315 : | SAD16BI(); | ||
316 : | SAD16BI(); | ||
317 : | SAD16BI(); | ||
318 : | |||
319 : | SAD16BI(); | ||
320 : | SAD16BI(); | ||
321 : | SAD16BI(); | ||
322 : | SAD16BI(); | ||
323 : | |||
324 : | sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0)); | ||
325 : | sum = vec_splat(sum, 3); | ||
326 : | vec_ste(sum, 0, (unsigned int*)&result); | ||
327 : | |||
328 : | return result; | ||
329 : | } | ||
330 : | |||
331 : | |||
332 : | #define SSE8_16BIT() \ | ||
333 : | b1_vec = vec_perm(vec_ld(0,b1), vec_ld(16,b1), vec_lvsl(0,b1)); \ | ||
334 : | b2_vec = vec_perm(vec_ld(0,b2), vec_ld(16,b2), vec_lvsl(0,b2)); \ | ||
335 : | diff = vec_sub(b1_vec,b2_vec); \ | ||
336 : | sum = vec_msum(diff,diff,sum); \ | ||
337 : | b1 = (const int16_t*)((int8_t*)b1+stride); \ | ||
338 : | b2 = (const int16_t*)((int8_t*)b2+stride) | ||
339 : | |||
340 : | uint32_t | ||
341 : | sse8_16bit_altivec_c(const int16_t * b1, | ||
342 : | const int16_t * b2, | ||
343 : | const uint32_t stride) | ||
344 : | { | ||
345 : | register vector signed short b1_vec; | ||
346 : | register vector signed short b2_vec; | ||
347 : | register vector signed short diff; | ||
348 : | register vector signed int sum; | ||
349 : | uint32_t result; | ||
350 : | |||
351 : | /* initialize */ | ||
352 : | sum = vec_splat_s32(0); | ||
353 : | |||
354 : | SSE8_16BIT(); | ||
355 : | SSE8_16BIT(); | ||
356 : | SSE8_16BIT(); | ||
357 : | SSE8_16BIT(); | ||
358 : | |||
359 : | SSE8_16BIT(); | ||
360 : | SSE8_16BIT(); | ||
361 : | SSE8_16BIT(); | ||
362 : | SSE8_16BIT(); | ||
363 : | |||
364 : | /* sum the vector */ | ||
365 : | sum = vec_sums(sum, vec_splat_s32(0)); | ||
366 : | sum = vec_splat(sum,3); | ||
367 : | |||
368 : | vec_ste(sum,0,(int*)&result); | ||
369 : | |||
370 : | /* and return */ | ||
371 : | return result; | ||
372 : | canard | 1.1 | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |