Parent Directory | Revision Log
Revision 1.5 - (view) (download)
1 : | chl | 1.4 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - altivec sum of absolute difference (C version) | ||
5 : | * | ||
6 : | * Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org> | ||
7 : | * | ||
8 : | edgomez | 1.5 | * This file is part of XviD, a free MPEG-4 video encoder/decoder |
9 : | chl | 1.4 | * |
10 : | edgomez | 1.5 | * XviD is free software; you can redistribute it and/or modify it |
11 : | * under the terms of the GNU General Public License as published by | ||
12 : | chl | 1.4 | * the Free Software Foundation; either version 2 of the License, or |
13 : | * (at your option) any later version. | ||
14 : | * | ||
15 : | * This program is distributed in the hope that it will be useful, | ||
16 : | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 : | * GNU General Public License for more details. | ||
19 : | * | ||
20 : | * You should have received a copy of the GNU General Public License | ||
21 : | * along with this program; if not, write to the Free Software | ||
22 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 : | * | ||
24 : | edgomez | 1.5 | * Under section 8 of the GNU General Public License, the copyright |
25 : | * holders of XVID explicitly forbid distribution in the following | ||
26 : | * countries: | ||
27 : | * | ||
28 : | * - Japan | ||
29 : | * - United States of America | ||
30 : | * | ||
31 : | * Linking XviD statically or dynamically with other modules is making a | ||
32 : | * combined work based on XviD. Thus, the terms and conditions of the | ||
33 : | * GNU General Public License cover the whole combination. | ||
34 : | * | ||
35 : | * As a special exception, the copyright holders of XviD give you | ||
36 : | * permission to link XviD with independent modules that communicate with | ||
37 : | * XviD solely through the VFW1.1 and DShow interfaces, regardless of the | ||
38 : | * license terms of these independent modules, and to copy and distribute | ||
39 : | * the resulting combined work under terms of your choice, provided that | ||
40 : | * every copy of the combined work is accompanied by a complete copy of | ||
41 : | * the source code of XviD (the version of XviD used to produce the | ||
42 : | * combined work), being distributed under the terms of the GNU General | ||
43 : | * Public License plus this exception. An independent module is a module | ||
44 : | * which is not derived from or based on XviD. | ||
45 : | * | ||
46 : | * Note that people who make modified versions of XviD are not obligated | ||
47 : | * to grant this special exception for their modified versions; it is | ||
48 : | * their choice whether to do so. The GNU General Public License gives | ||
49 : | * permission to release a modified version without this exception; this | ||
50 : | * exception also makes it possible to release a modified version which | ||
51 : | * carries forward this exception. | ||
52 : | * | ||
53 : | * $Id$ | ||
54 : | chl | 1.4 | * |
55 : | ****************************************************************************/ | ||
56 : | canard | 1.1 | |
57 : | canard | 1.2 | #define G_REG |
58 : | |||
59 : | #ifdef G_REG | ||
60 : | edgomez | 1.3 | register vector unsigned char perm0 asm("%v29"); |
61 : | register vector unsigned char perm1 asm("%v30"); | ||
62 : | register vector unsigned int zerovec asm("%v31"); | ||
63 : | canard | 1.2 | #endif |
64 : | |||
65 : | canard | 1.1 | #include <stdio.h> |
66 : | |||
67 : | #undef DEBUG | ||
68 : | |||
69 : | canard | 1.2 | static const vector unsigned char perms[2] = { |
70 : | edgomez | 1.3 | (vector unsigned char) ( /* Used when cur is aligned */ |
71 : | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||
72 : | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17), | ||
73 : | (vector unsigned char) ( /* Used when cur is unaligned */ | ||
74 : | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | ||
75 : | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f), | ||
76 : | canard | 1.2 | }; |
77 : | |||
78 : | #ifdef G_REG | ||
79 : | edgomez | 1.3 | void |
80 : | sadInit_altivec(void) | ||
81 : | canard | 1.2 | { |
82 : | perm0 = perms[0]; | ||
83 : | perm1 = perms[1]; | ||
84 : | edgomez | 1.3 | zerovec = (vector unsigned int) (0); |
85 : | canard | 1.2 | } |
86 : | edgomez | 1.3 | static inline const vector unsigned char |
87 : | get_perm(unsigned long i) | ||
88 : | canard | 1.2 | { |
89 : | return i ? perm1 : perm0; | ||
90 : | } | ||
91 : | edgomez | 1.3 | |
92 : | canard | 1.2 | #define ZERODEF |
93 : | #define ZEROVEC zerovec | ||
94 : | #else | ||
95 : | edgomez | 1.3 | void |
96 : | sadInit_altivec(void) | ||
97 : | { | ||
98 : | } | ||
99 : | static inline const vector unsigned char | ||
100 : | get_perm(unsigned long i) | ||
101 : | canard | 1.2 | { |
102 : | return perms[i]; | ||
103 : | } | ||
104 : | edgomez | 1.3 | |
105 : | canard | 1.2 | #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0) |
106 : | #define ZEROVEC zerovec | ||
107 : | #endif | ||
108 : | |||
109 : | |||
110 : | canard | 1.1 | #define SAD16() \ |
111 : | t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \ | ||
112 : | t2 = vec_max(t1, *cur); /* find largest of two */ \ | ||
113 : | t3 = vec_min(t1, *cur); /* find smaller of two */ \ | ||
114 : | t4 = vec_sub(t2, t3); /* find absolute difference */ \ | ||
115 : | sad = vec_sum4s(t4, sad); /* accumulate sum of differences */ \ | ||
116 : | cur += stride; ref += stride; | ||
117 : | |||
118 : | /* | ||
119 : | * This function assumes cur and stride are 16 bytes aligned and ref is unaligned | ||
120 : | */ | ||
121 : | unsigned long | ||
122 : | edgomez | 1.3 | sad16_altivec(const vector unsigned char *cur, |
123 : | const vector unsigned char *ref, | ||
124 : | unsigned long stride, | ||
125 : | const unsigned long best_sad) | ||
126 : | canard | 1.1 | { |
127 : | edgomez | 1.3 | vector unsigned char perm; |
128 : | vector unsigned char t1, t2, t3, t4; | ||
129 : | vector unsigned int sad; | ||
130 : | vector signed int sumdiffs, best_vec; | ||
131 : | unsigned long result; | ||
132 : | |||
133 : | ZERODEF; | ||
134 : | |||
135 : | canard | 1.1 | #ifdef DEBUG |
136 : | edgomez | 1.3 | if (((unsigned long) cur) & 0xf) |
137 : | fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); | ||
138 : | canard | 1.1 | // if (((unsigned long)ref) & 0xf) |
139 : | edgomez | 1.3 | // fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref); |
140 : | if (stride & 0xf) | ||
141 : | fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); | ||
142 : | #endif | ||
143 : | /* initialization */ | ||
144 : | sad = (vector unsigned int) (ZEROVEC); | ||
145 : | stride >>= 4; | ||
146 : | perm = vec_lvsl(0, (unsigned char *) ref); | ||
147 : | *((unsigned long *) &best_vec) = best_sad; | ||
148 : | best_vec = vec_splat(best_vec, 0); | ||
149 : | |||
150 : | /* perform sum of differences between current and previous */ | ||
151 : | SAD16(); | ||
152 : | SAD16(); | ||
153 : | SAD16(); | ||
154 : | SAD16(); | ||
155 : | /* Temp sum for exit */ | ||
156 : | sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); | ||
157 : | if (vec_all_ge(sumdiffs, best_vec)) | ||
158 : | goto bail; | ||
159 : | SAD16(); | ||
160 : | SAD16(); | ||
161 : | SAD16(); | ||
162 : | SAD16(); | ||
163 : | sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); | ||
164 : | if (vec_all_ge(sumdiffs, best_vec)) | ||
165 : | goto bail; | ||
166 : | SAD16(); | ||
167 : | SAD16(); | ||
168 : | SAD16(); | ||
169 : | SAD16(); | ||
170 : | SAD16(); | ||
171 : | SAD16(); | ||
172 : | SAD16(); | ||
173 : | SAD16(); | ||
174 : | |||
175 : | /* sum all parts of difference into one 32 bit quantity */ | ||
176 : | sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); | ||
177 : | bail: | ||
178 : | /* copy vector sum into unaligned result */ | ||
179 : | sumdiffs = vec_splat(sumdiffs, 3); | ||
180 : | vec_ste(sumdiffs, 0, (int *) &result); | ||
181 : | return (result); | ||
182 : | canard | 1.1 | } |
183 : | |||
184 : | #define SAD8() \ | ||
185 : | t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \ | ||
186 : | t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \ | ||
187 : | tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \ | ||
188 : | t2 = vec_perm(t2,tp,perm_ref2); \ | ||
189 : | t3 = vec_max(t1, t2); /* find largest of two */ \ | ||
190 : | t4 = vec_min(t1, t2); /* find smaller of two */ \ | ||
191 : | t5 = vec_sub(t3, t4); /* find absolute difference */ \ | ||
192 : | sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \ | ||
193 : | cur += stride<<1; ref += stride<<1; | ||
194 : | |||
195 : | /* | ||
196 : | * This function assumes cur is 8 bytes aligned, stride is 16 bytes | ||
197 : | * aligned and ref is unaligned | ||
198 : | */ | ||
199 : | unsigned long | ||
200 : | edgomez | 1.3 | sad8_altivec(const vector unsigned char *cur, |
201 : | const vector unsigned char *ref, | ||
202 : | unsigned long stride) | ||
203 : | canard | 1.1 | { |
204 : | edgomez | 1.3 | vector unsigned char t1, t2, t3, t4, t5, tp; |
205 : | vector unsigned int sad; | ||
206 : | vector signed int sumdiffs; | ||
207 : | vector unsigned char perm_cur; | ||
208 : | vector unsigned char perm_ref1, perm_ref2; | ||
209 : | unsigned long result; | ||
210 : | |||
211 : | ZERODEF; | ||
212 : | canard | 1.1 | |
213 : | #ifdef DEBUG | ||
214 : | edgomez | 1.3 | if (((unsigned long) cur) & 0x7) |
215 : | fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); | ||
216 : | canard | 1.1 | // if (((unsigned long)ref) & 0x7) |
217 : | edgomez | 1.3 | // fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref); |
218 : | if (stride & 0xf) | ||
219 : | fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); | ||
220 : | #endif | ||
221 : | |||
222 : | perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01); | ||
223 : | perm_ref1 = vec_lvsl(0, (unsigned char *) ref); | ||
224 : | perm_ref2 = get_perm(0); | ||
225 : | |||
226 : | /* initialization */ | ||
227 : | sad = (vector unsigned int) (ZEROVEC); | ||
228 : | stride >>= 4; | ||
229 : | |||
230 : | /* perform sum of differences between current and previous */ | ||
231 : | SAD8(); | ||
232 : | SAD8(); | ||
233 : | SAD8(); | ||
234 : | SAD8(); | ||
235 : | |||
236 : | /* sum all parts of difference into one 32 bit quantity */ | ||
237 : | sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); | ||
238 : | |||
239 : | /* copy vector sum into unaligned result */ | ||
240 : | sumdiffs = vec_splat(sumdiffs, 3); | ||
241 : | vec_ste(sumdiffs, 0, (int *) &result); | ||
242 : | return (result); | ||
243 : | canard | 1.1 | } |
244 : | |||
245 : | #define MEAN16(i)\ | ||
246 : | c##i=*cur;\ | ||
247 : | mean = vec_sum4s(c##i,mean);\ | ||
248 : | cur += stride; | ||
249 : | |||
250 : | #define DEV16(i) \ | ||
251 : | t2 = vec_max(c##i, mn); /* find largest of two */ \ | ||
252 : | t3 = vec_min(c##i, mn); /* find smaller of two */ \ | ||
253 : | t4 = vec_sub(t2, t3); /* find absolute difference */ \ | ||
254 : | dev = vec_sum4s(t4, dev); | ||
255 : | |||
256 : | unsigned long | ||
257 : | edgomez | 1.3 | dev16_altivec(const vector unsigned char *cur, |
258 : | unsigned long stride) | ||
259 : | canard | 1.1 | { |
260 : | edgomez | 1.3 | vector unsigned char t2, t3, t4, mn; |
261 : | vector unsigned int mean, dev; | ||
262 : | vector signed int sumdiffs; | ||
263 : | vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, | ||
264 : | c13, c14, c15; | ||
265 : | unsigned long result; | ||
266 : | |||
267 : | ZERODEF; | ||
268 : | |||
269 : | mean = (vector unsigned int) (ZEROVEC); | ||
270 : | dev = (vector unsigned int) (ZEROVEC); | ||
271 : | stride >>= 4; | ||
272 : | |||
273 : | MEAN16(0); | ||
274 : | MEAN16(1); | ||
275 : | MEAN16(2); | ||
276 : | MEAN16(3); | ||
277 : | MEAN16(4); | ||
278 : | MEAN16(5); | ||
279 : | MEAN16(6); | ||
280 : | MEAN16(7); | ||
281 : | MEAN16(8); | ||
282 : | MEAN16(9); | ||
283 : | MEAN16(10); | ||
284 : | MEAN16(11); | ||
285 : | MEAN16(12); | ||
286 : | MEAN16(13); | ||
287 : | MEAN16(14); | ||
288 : | MEAN16(15); | ||
289 : | |||
290 : | sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC); | ||
291 : | mn = vec_perm((vector unsigned char) sumdiffs, | ||
292 : | (vector unsigned char) sumdiffs, (vector unsigned char) (14, | ||
293 : | 14, | ||
294 : | 14, | ||
295 : | 14, | ||
296 : | 14, | ||
297 : | 14, | ||
298 : | 14, | ||
299 : | 14, | ||
300 : | 14, | ||
301 : | 14, | ||
302 : | 14, | ||
303 : | 14, | ||
304 : | 14, | ||
305 : | 14, | ||
306 : | 14, | ||
307 : | 14)); | ||
308 : | DEV16(0); | ||
309 : | DEV16(1); | ||
310 : | DEV16(2); | ||
311 : | DEV16(3); | ||
312 : | DEV16(4); | ||
313 : | DEV16(5); | ||
314 : | DEV16(6); | ||
315 : | DEV16(7); | ||
316 : | DEV16(8); | ||
317 : | DEV16(9); | ||
318 : | DEV16(10); | ||
319 : | DEV16(11); | ||
320 : | DEV16(12); | ||
321 : | DEV16(13); | ||
322 : | DEV16(14); | ||
323 : | DEV16(15); | ||
324 : | |||
325 : | /* sum all parts of difference into one 32 bit quantity */ | ||
326 : | sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC); | ||
327 : | |||
328 : | /* copy vector sum into unaligned result */ | ||
329 : | sumdiffs = vec_splat(sumdiffs, 3); | ||
330 : | vec_ste(sumdiffs, 0, (int *) &result); | ||
331 : | return (result); | ||
332 : | canard | 1.1 | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |