Parent Directory | Revision Log
Revision 1.1 - (view) (download)
1 : | edgomez | 1.1 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - 8x8 block-based halfpel interpolation with altivec optimization - | ||
5 : | * | ||
6 : | * Copyright(C) 2004 Christoph Naegeli <chn@kbw.ch> | ||
7 : | * | ||
8 : | * This program is free software ; you can redistribute it and/or modify | ||
9 : | * it under the terms of the GNU General Public License as published by | ||
10 : | * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | * (at your option) any later version. | ||
12 : | * | ||
13 : | * This program is distributed in the hope that it will be useful, | ||
14 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | * GNU General Public License for more details. | ||
17 : | * | ||
18 : | * You should have received a copy of the GNU General Public License | ||
19 : | * along with this program ; if not, write to the Free Software | ||
20 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | * | ||
22 : | * $Id$ | ||
23 : | * | ||
24 : | ****************************************************************************/ | ||
25 : | |||
26 : | |||
27 : | #ifdef HAVE_ALTIVEC_H | ||
28 : | #include <altivec.h> | ||
29 : | #endif | ||
30 : | |||
31 : | #include "../../portab.h" | ||
32 : | |||
33 : | #undef DEBUG | ||
34 : | #include <stdio.h> | ||
35 : | |||
36 : | static inline unsigned | ||
37 : | build_prefetch(unsigned char block_size, unsigned char block_count, short stride) | ||
38 : | { | ||
39 : | if(block_size > 31) | ||
40 : | block_size = 0; | ||
41 : | |||
42 : | return ((block_size << 24) | (block_count << 16) | stride); | ||
43 : | } | ||
44 : | |||
45 : | #define NO_ROUNDING | ||
46 : | |||
47 : | #define ROUNDING \ | ||
48 : | s1 = vec_and(vec_add(s1, s2), vec_splat_u8(1)); \ | ||
49 : | d = vec_sub(d, s1); | ||
50 : | |||
51 : | #define INTERPLATE8X8_HALFPEL_H(round) \ | ||
52 : | s1 = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \ | ||
53 : | s2 = vec_perm(s1, s1, s2_mask); \ | ||
54 : | d = vec_avg(s1, s2); \ | ||
55 : | round; \ | ||
56 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsl(0, dst)); \ | ||
57 : | d = vec_perm(d, d, vec_lvsl(0, dst)); \ | ||
58 : | d = vec_sel(d, vec_ld(0, dst), mask); \ | ||
59 : | vec_st(d, 0, dst); \ | ||
60 : | dst += stride; \ | ||
61 : | src += stride | ||
62 : | |||
63 : | |||
64 : | /* This function assumes: | ||
65 : | * dst is 8 byte aligned | ||
66 : | * src is unaligned | ||
67 : | * stride is a multiple of 8 | ||
68 : | */ | ||
69 : | void | ||
70 : | interpolate8x8_halfpel_h_altivec_c( uint8_t *dst, | ||
71 : | uint8_t *src, | ||
72 : | const uint32_t stride, | ||
73 : | const uint32_t rounding) | ||
74 : | { | ||
75 : | register vector unsigned char s1, s2; | ||
76 : | register vector unsigned char d; | ||
77 : | register vector unsigned char mask; | ||
78 : | register vector unsigned char s2_mask; | ||
79 : | register vector unsigned char mask_stencil; | ||
80 : | |||
81 : | #ifdef DEBUG | ||
82 : | /* Dump alignment errors if DEBUG is defined */ | ||
83 : | if(((unsigned long)dst) & 0x7) | ||
84 : | fprintf(stderr, "interpolate8x8_halfpel_h_altivec_c:incorrect align, dst: %x\n", dst); | ||
85 : | if(stride & 0x7) | ||
86 : | fprintf(stderr, "interpolate8x8_halfpel_h_altivec_c:incorrect stride, stride: %u\n", stride); | ||
87 : | #endif | ||
88 : | |||
89 : | s2_mask = vec_lvsl(1, (unsigned char*)0); | ||
90 : | mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
91 : | |||
92 : | if(rounding) { | ||
93 : | INTERPLATE8X8_HALFPEL_H(ROUNDING); | ||
94 : | INTERPLATE8X8_HALFPEL_H(ROUNDING); | ||
95 : | INTERPLATE8X8_HALFPEL_H(ROUNDING); | ||
96 : | INTERPLATE8X8_HALFPEL_H(ROUNDING); | ||
97 : | |||
98 : | INTERPLATE8X8_HALFPEL_H(ROUNDING); | ||
99 : | INTERPLATE8X8_HALFPEL_H(ROUNDING); | ||
100 : | INTERPLATE8X8_HALFPEL_H(ROUNDING); | ||
101 : | INTERPLATE8X8_HALFPEL_H(ROUNDING); | ||
102 : | } | ||
103 : | else { | ||
104 : | INTERPLATE8X8_HALFPEL_H(NO_ROUNDING); | ||
105 : | INTERPLATE8X8_HALFPEL_H(NO_ROUNDING); | ||
106 : | INTERPLATE8X8_HALFPEL_H(NO_ROUNDING); | ||
107 : | INTERPLATE8X8_HALFPEL_H(NO_ROUNDING); | ||
108 : | |||
109 : | INTERPLATE8X8_HALFPEL_H(NO_ROUNDING); | ||
110 : | INTERPLATE8X8_HALFPEL_H(NO_ROUNDING); | ||
111 : | INTERPLATE8X8_HALFPEL_H(NO_ROUNDING); | ||
112 : | INTERPLATE8X8_HALFPEL_H(NO_ROUNDING); | ||
113 : | } | ||
114 : | } | ||
115 : | |||
116 : | #define INTERPLATE8X8_HALFPEL_V(round) \ | ||
117 : | s1 = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \ | ||
118 : | s2 = vec_perm(vec_ld(0, src + stride), vec_ld(16, src + stride), vec_lvsl(0, src + stride)); \ | ||
119 : | d = vec_avg(s1, s2); \ | ||
120 : | round; \ | ||
121 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsl(0, dst)); \ | ||
122 : | d = vec_perm(d, d, vec_lvsl(0, dst)); \ | ||
123 : | d = vec_sel(d, vec_ld(0, dst), mask); \ | ||
124 : | vec_st(d, 0, dst); \ | ||
125 : | dst += stride; \ | ||
126 : | src += stride | ||
127 : | |||
128 : | /* | ||
129 : | * This function assumes | ||
130 : | * dst is 8 byte aligned | ||
131 : | * src is unaligned | ||
132 : | * stride is a multiple of 8 | ||
133 : | */ | ||
134 : | void | ||
135 : | interpolate8x8_halfpel_v_altivec_c( uint8_t *dst, | ||
136 : | uint8_t *src, | ||
137 : | const uint32_t stride, | ||
138 : | const uint32_t rounding) | ||
139 : | { | ||
140 : | vector unsigned char s1, s2; | ||
141 : | vector unsigned char d; | ||
142 : | vector unsigned char mask; | ||
143 : | vector unsigned char mask_stencil; | ||
144 : | |||
145 : | #ifdef DEBUG | ||
146 : | /* if this is on, print alignment errors */ | ||
147 : | if(((unsigned long)dst) & 0x7) | ||
148 : | fprintf(stderr, "interpolate8x8_halfpel_v_altivec_c:incorrect align, dst: %x\n", dst); | ||
149 : | if(stride & 0x7) | ||
150 : | fprintf(stderr, "interpolate8x8_halfpel_v_altivec_c:incorrect stride, stride: %u\n", stride); | ||
151 : | #endif | ||
152 : | |||
153 : | mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
154 : | |||
155 : | if(rounding) { | ||
156 : | INTERPLATE8X8_HALFPEL_V(ROUNDING); | ||
157 : | INTERPLATE8X8_HALFPEL_V(ROUNDING); | ||
158 : | INTERPLATE8X8_HALFPEL_V(ROUNDING); | ||
159 : | INTERPLATE8X8_HALFPEL_V(ROUNDING); | ||
160 : | |||
161 : | INTERPLATE8X8_HALFPEL_V(ROUNDING); | ||
162 : | INTERPLATE8X8_HALFPEL_V(ROUNDING); | ||
163 : | INTERPLATE8X8_HALFPEL_V(ROUNDING); | ||
164 : | INTERPLATE8X8_HALFPEL_V(ROUNDING); | ||
165 : | } | ||
166 : | else { | ||
167 : | INTERPLATE8X8_HALFPEL_V(NO_ROUNDING); | ||
168 : | INTERPLATE8X8_HALFPEL_V(NO_ROUNDING); | ||
169 : | INTERPLATE8X8_HALFPEL_V(NO_ROUNDING); | ||
170 : | INTERPLATE8X8_HALFPEL_V(NO_ROUNDING); | ||
171 : | |||
172 : | INTERPLATE8X8_HALFPEL_V(NO_ROUNDING); | ||
173 : | INTERPLATE8X8_HALFPEL_V(NO_ROUNDING); | ||
174 : | INTERPLATE8X8_HALFPEL_V(NO_ROUNDING); | ||
175 : | INTERPLATE8X8_HALFPEL_V(NO_ROUNDING); | ||
176 : | } | ||
177 : | } | ||
178 : | |||
179 : | |||
180 : | #define INTERPOLATE8X8_HALFPEL_HV(adding) \ | ||
181 : | t = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \ | ||
182 : | s1 = (vector unsigned short)vec_mergeh(zerovec, t); \ | ||
183 : | t = vec_perm(vec_ld(1, src), vec_ld(17, src), vec_lvsl(1, src)); \ | ||
184 : | s2 = (vector unsigned short)vec_mergeh(zerovec, t); \ | ||
185 : | t = vec_perm(vec_ld(0, src + stride), vec_ld(16, src + stride), vec_lvsl(0, src + stride)); \ | ||
186 : | s3 = (vector unsigned short)vec_mergeh(zerovec, t); \ | ||
187 : | t = vec_perm(vec_ld(1, src + stride), vec_ld(17, src + stride), vec_lvsl(1, src + stride)); \ | ||
188 : | s4 = (vector unsigned short)vec_mergeh(zerovec, t); \ | ||
189 : | s1 = vec_add(s1, vec_add(s2, vec_add(s3, s4))); \ | ||
190 : | s1 = vec_add(s1, adding); \ | ||
191 : | s1 = vec_sr(s1, two); \ | ||
192 : | t = vec_pack(s1, s1); \ | ||
193 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsl(0, dst)); \ | ||
194 : | t = vec_sel(t, vec_ld(0, dst), mask); \ | ||
195 : | vec_st(t, 0, dst); \ | ||
196 : | dst += stride; \ | ||
197 : | src += stride | ||
198 : | |||
199 : | void | ||
200 : | interpolate8x8_halfpel_hv_altivec_c(uint8_t *dst, | ||
201 : | uint8_t *src, | ||
202 : | const uint32_t stride, | ||
203 : | const uint32_t rounding) | ||
204 : | { | ||
205 : | vector unsigned short s1, s2, s3, s4; | ||
206 : | vector unsigned char t; | ||
207 : | vector unsigned short one, two; | ||
208 : | vector unsigned char zerovec; | ||
209 : | vector unsigned char mask; | ||
210 : | vector unsigned char mask_stencil; | ||
211 : | |||
212 : | /* Initialisation stuff */ | ||
213 : | zerovec = vec_splat_u8(0); | ||
214 : | one = vec_splat_u16(1); | ||
215 : | two = vec_splat_u16(2); | ||
216 : | mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
217 : | |||
218 : | if(rounding) { | ||
219 : | INTERPOLATE8X8_HALFPEL_HV(one); | ||
220 : | INTERPOLATE8X8_HALFPEL_HV(one); | ||
221 : | INTERPOLATE8X8_HALFPEL_HV(one); | ||
222 : | INTERPOLATE8X8_HALFPEL_HV(one); | ||
223 : | |||
224 : | INTERPOLATE8X8_HALFPEL_HV(one); | ||
225 : | INTERPOLATE8X8_HALFPEL_HV(one); | ||
226 : | INTERPOLATE8X8_HALFPEL_HV(one); | ||
227 : | INTERPOLATE8X8_HALFPEL_HV(one); | ||
228 : | } | ||
229 : | else { | ||
230 : | INTERPOLATE8X8_HALFPEL_HV(two); | ||
231 : | INTERPOLATE8X8_HALFPEL_HV(two); | ||
232 : | INTERPOLATE8X8_HALFPEL_HV(two); | ||
233 : | INTERPOLATE8X8_HALFPEL_HV(two); | ||
234 : | |||
235 : | INTERPOLATE8X8_HALFPEL_HV(two); | ||
236 : | INTERPOLATE8X8_HALFPEL_HV(two); | ||
237 : | INTERPOLATE8X8_HALFPEL_HV(two); | ||
238 : | INTERPOLATE8X8_HALFPEL_HV(two); | ||
239 : | } | ||
240 : | } | ||
241 : | |||
242 : | /* | ||
243 : | * This function assumes: | ||
244 : | * dst is 8 byte aligned | ||
245 : | * src1 is unaligned | ||
246 : | * src2 is unaligned | ||
247 : | * stirde is a multiple of 8 | ||
248 : | * rounding is smaller than than max signed short + 2 | ||
249 : | */ | ||
250 : | |||
251 : | void | ||
252 : | interpolate8x8_avg2_altivec_c( uint8_t *dst, | ||
253 : | const uint8_t *src1, | ||
254 : | const uint8_t *src2, | ||
255 : | const uint32_t stride, | ||
256 : | const uint32_t rounding, | ||
257 : | const uint32_t height) | ||
258 : | { | ||
259 : | uint32_t i; | ||
260 : | vector unsigned char t; | ||
261 : | vector unsigned char mask; | ||
262 : | vector unsigned char mask_stencil; | ||
263 : | vector unsigned char zerovec; | ||
264 : | vector signed short s1, s2; | ||
265 : | vector signed short d; | ||
266 : | vector signed short round; | ||
267 : | |||
268 : | #ifdef DEBUG | ||
269 : | /* If this is on, print alignment errors */ | ||
270 : | if(((unsigned long)dst) & 0x7) | ||
271 : | fprintf(stderr, "interpolate8x8_avg2_altivec_c:incorrect align, dst: %x\n", dst); | ||
272 : | if(stride & 0x7) | ||
273 : | fprintf(stderr, "interpolate8x8_avg2_altivec_c:incorrect stride, stride: %u\n", stride); | ||
274 : | if(rounding > (32767 + 2)) | ||
275 : | fprintf(stderr, "interpolate8x8_avg2_altivec_c:incorrect rounding, rounding: %d\n", rounding); | ||
276 : | #endif | ||
277 : | |||
278 : | /* initialisation */ | ||
279 : | zerovec = vec_splat_u8(0); | ||
280 : | *((short*)&round) = 1 - rounding; | ||
281 : | round = vec_splat(round, 0); | ||
282 : | mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
283 : | |||
284 : | for(i = 0; i < height; i++) { | ||
285 : | |||
286 : | t = vec_perm(vec_ld(0, src1), vec_ld(16, src1), vec_lvsl(0, src1)); | ||
287 : | d = vec_add((vector signed short)zerovec, round); | ||
288 : | s1 = (vector signed short)vec_mergeh(zerovec, t); | ||
289 : | |||
290 : | t = vec_perm(vec_ld(0, src2), vec_ld(16, src2), vec_lvsl(0, src2)); | ||
291 : | d = vec_add(d, s1); | ||
292 : | s2 = (vector signed short)vec_mergeh(zerovec, t); | ||
293 : | |||
294 : | d = vec_add(d, s2); | ||
295 : | d = vec_sr(d, vec_splat_u16(1)); | ||
296 : | |||
297 : | t = vec_pack((vector unsigned short)d, (vector unsigned short)zerovec); | ||
298 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsl(0, dst)); | ||
299 : | t = vec_perm(t, t, vec_lvsl(0, dst)); | ||
300 : | t = vec_sel(t, vec_ld(0, dst), mask); | ||
301 : | vec_st(t, 0, dst); | ||
302 : | |||
303 : | dst += stride; | ||
304 : | src1 += stride; | ||
305 : | src2 += stride; | ||
306 : | } | ||
307 : | } | ||
308 : | |||
309 : | |||
310 : | #define INTERPOLATE8X8_AVG4() \ | ||
311 : | d = r; \ | ||
312 : | \ | ||
313 : | t = vec_perm(vec_ld(0, src1), vec_ld(16, src1), vec_lvsl(0, src1)); \ | ||
314 : | s = (vector signed short)vec_mergeh(zerovec, t); \ | ||
315 : | d = vec_add(d, s); \ | ||
316 : | \ | ||
317 : | t = vec_perm(vec_ld(0, src2), vec_ld(16, src2), vec_lvsl(0, src2)); \ | ||
318 : | s = (vector signed short)vec_mergeh(zerovec, t); \ | ||
319 : | d = vec_add(d, s); \ | ||
320 : | \ | ||
321 : | t = vec_perm(vec_ld(0, src3), vec_ld(16, src3), vec_lvsl(0, src3)); \ | ||
322 : | s = (vector signed short)vec_mergeh(zerovec, t); \ | ||
323 : | d = vec_add(d, s); \ | ||
324 : | \ | ||
325 : | t = vec_perm(vec_ld(0, src4), vec_ld(16, src4), vec_lvsl(0, src4)); \ | ||
326 : | s = (vector signed short)vec_mergeh(zerovec, t); \ | ||
327 : | d = vec_add(d, s); \ | ||
328 : | \ | ||
329 : | d = vec_sr(d, shift); \ | ||
330 : | \ | ||
331 : | t = vec_pack((vector unsigned short)d, (vector unsigned short)zerovec); \ | ||
332 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsl(0, dst)); \ | ||
333 : | t = vec_perm(t, t, vec_lvsl(0, dst)); \ | ||
334 : | t = vec_sel(t, vec_ld(0, dst), mask); \ | ||
335 : | vec_st(t, 0, dst); \ | ||
336 : | \ | ||
337 : | dst += stride; \ | ||
338 : | src1 += stride; \ | ||
339 : | src2 += stride; \ | ||
340 : | src3 += stride; \ | ||
341 : | src4 += stride | ||
342 : | |||
343 : | /* This function assumes: | ||
344 : | * dst is 8 byte aligned | ||
345 : | * src1, src2, src3, src4 are unaligned | ||
346 : | * stride is a multiple of 8 | ||
347 : | */ | ||
348 : | |||
349 : | void | ||
350 : | interpolate8x8_avg4_altivec_c(uint8_t *dst, | ||
351 : | const uint8_t *src1, const uint8_t *src2, | ||
352 : | const uint8_t *src3, const uint8_t *src4, | ||
353 : | const uint32_t stride, const uint32_t rounding) | ||
354 : | { | ||
355 : | vector signed short r; | ||
356 : | register vector signed short s, d; | ||
357 : | register vector unsigned short shift; | ||
358 : | register vector unsigned char t; | ||
359 : | register vector unsigned char zerovec; | ||
360 : | register vector unsigned char mask; | ||
361 : | register vector unsigned char mask_stencil; | ||
362 : | |||
363 : | #ifdef DEBUG | ||
364 : | /* if debug is set, print alignment errors */ | ||
365 : | if(((unsigned)dst) & 0x7) | ||
366 : | fprintf(stderr, "interpolate8x8_avg4_altivec_c:incorrect align, dst: %x\n", dst); | ||
367 : | if(stride & 0x7) | ||
368 : | fprintf(stderr, "interpolate8x8_avg4_altivec_c:incorrect stride, stride: %u\n", stride); | ||
369 : | #endif | ||
370 : | |||
371 : | /* Initialization */ | ||
372 : | zerovec = vec_splat_u8(0); | ||
373 : | *((short*)&r) = 2 - rounding; | ||
374 : | r = vec_splat(r, 0); | ||
375 : | shift = vec_splat_u16(2); | ||
376 : | mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
377 : | |||
378 : | /* interpolate */ | ||
379 : | INTERPOLATE8X8_AVG4(); | ||
380 : | INTERPOLATE8X8_AVG4(); | ||
381 : | INTERPOLATE8X8_AVG4(); | ||
382 : | INTERPOLATE8X8_AVG4(); | ||
383 : | |||
384 : | INTERPOLATE8X8_AVG4(); | ||
385 : | INTERPOLATE8X8_AVG4(); | ||
386 : | INTERPOLATE8X8_AVG4(); | ||
387 : | INTERPOLATE8X8_AVG4(); | ||
388 : | } | ||
389 : | |||
390 : | |||
391 : | |||
392 : | /************************************************************* | ||
393 : | * QPEL STUFF STARTS HERE * | ||
394 : | *************************************************************/ | ||
395 : | |||
396 : | |||
397 : | #define INTERPOLATE8X8_6TAP_LOWPASS_H() \ | ||
398 : | vec_dstt(src, prefetch_constant, 0); \ | ||
399 : | data = vec_perm(vec_ld(-2, src), vec_ld(14, src), vec_lvsl(-2, src)); \ | ||
400 : | s1 = (vector signed short)vec_mergeh(zerovec, data); \ | ||
401 : | t = vec_perm(data, data, vec_lvsl(5, (unsigned char*)0)); \ | ||
402 : | s2 = (vector signed short)vec_mergeh(zerovec, t); \ | ||
403 : | d = vec_add(s1, s2); \ | ||
404 : | \ | ||
405 : | t = vec_perm(data, data, vec_lvsl(2, (unsigned char*)0)); \ | ||
406 : | s1 = (vector signed short)vec_mergeh(zerovec, t); \ | ||
407 : | t = vec_perm(data, data, vec_lvsl(3, (unsigned char*)0)); \ | ||
408 : | s2 = (vector signed short)vec_mergeh(zerovec, t); \ | ||
409 : | s1 = vec_add(s1,s2); \ | ||
410 : | z = vec_sl(s1, vec_splat_u16(2)); \ | ||
411 : | t = vec_perm(data, data, vec_lvsl(1, (unsigned char*)0)); \ | ||
412 : | s1 = (vector signed short)vec_mergeh(zerovec, t); \ | ||
413 : | t = vec_perm(data, data, vec_lvsl(4, (unsigned char*)0)); \ | ||
414 : | s2 = (vector signed short)vec_mergeh(zerovec, t); \ | ||
415 : | s1 = vec_add(s1, s2); \ | ||
416 : | z = vec_sub(z, s1); \ | ||
417 : | z = vec_add(vec_sl(z, vec_splat_u16(2)), z); \ | ||
418 : | d = vec_add(d, z); \ | ||
419 : | \ | ||
420 : | d = vec_add(d, round_add); \ | ||
421 : | d = vec_sra(d, vec_splat_u16(5)); \ | ||
422 : | \ | ||
423 : | t = vec_packsu(d, (vector signed short)zerovec); \ | ||
424 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsl(0, dst)); \ | ||
425 : | t = vec_perm(t, t, vec_lvsl(0, dst)); \ | ||
426 : | t = vec_sel(t, vec_ld(0, dst), mask); \ | ||
427 : | vec_st(t, 0, dst); \ | ||
428 : | \ | ||
429 : | dst += stride; \ | ||
430 : | src += stride | ||
431 : | |||
432 : | /* This function assumes: | ||
433 : | * dst is 8 byte aligned | ||
434 : | * src is unaligned | ||
435 : | * stride is a muliple of 8 | ||
436 : | */ | ||
437 : | |||
438 : | void | ||
439 : | interpolate8x8_6tap_lowpass_h_altivec_c(uint8_t *dst, uint8_t *src, int32_t stride, int32_t rounding) | ||
440 : | { | ||
441 : | vector signed short s1, s2; | ||
442 : | vector signed short z; | ||
443 : | vector signed short d; | ||
444 : | vector signed short round_add; | ||
445 : | vector unsigned char t; | ||
446 : | vector unsigned char data; | ||
447 : | vector unsigned char mask; | ||
448 : | vector unsigned char mask_stencil; | ||
449 : | vector unsigned char zerovec; | ||
450 : | |||
451 : | unsigned prefetch_constant; | ||
452 : | |||
453 : | zerovec = vec_splat_u8(0); | ||
454 : | *((short*)&round_add) = (short)(16 - rounding); | ||
455 : | round_add = vec_splat(round_add, 0); | ||
456 : | mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
457 : | |||
458 : | prefetch_constant = build_prefetch(1, 4, (short)stride); | ||
459 : | |||
460 : | INTERPOLATE8X8_6TAP_LOWPASS_H(); | ||
461 : | INTERPOLATE8X8_6TAP_LOWPASS_H(); | ||
462 : | INTERPOLATE8X8_6TAP_LOWPASS_H(); | ||
463 : | INTERPOLATE8X8_6TAP_LOWPASS_H(); | ||
464 : | |||
465 : | INTERPOLATE8X8_6TAP_LOWPASS_H(); | ||
466 : | INTERPOLATE8X8_6TAP_LOWPASS_H(); | ||
467 : | INTERPOLATE8X8_6TAP_LOWPASS_H(); | ||
468 : | INTERPOLATE8X8_6TAP_LOWPASS_H(); | ||
469 : | |||
470 : | vec_dss(0); | ||
471 : | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |