Parent Directory | Revision Log
Revision 1.2 - (view) (download)
1 : | edgomez | 1.1 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - Altivec 8bit<->16bit transfer - | ||
5 : | * | ||
6 : | * Copyright(C) 2004 Christoph Naegeli <chn@kbw.ch> | ||
7 : | * | ||
8 : | * This program is free software ; you can redistribute it and/or modify | ||
9 : | * it under the terms of the GNU General Public License as published by | ||
10 : | * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | * (at your option) any later version. | ||
12 : | * | ||
13 : | * This program is distributed in the hope that it will be useful, | ||
14 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | * GNU General Public License for more details. | ||
17 : | * | ||
18 : | * You should have received a copy of the GNU General Public License | ||
19 : | * along with this program ; if not, write to the Free Software | ||
20 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | * | ||
22 : | * $Id$ | ||
23 : | * | ||
24 : | ****************************************************************************/ | ||
25 : | |||
26 : | #ifdef HAVE_ALTIVEC_H | ||
27 : | #include <altivec.h> | ||
28 : | #endif | ||
29 : | |||
30 : | #include "../../portab.h" | ||
31 : | |||
32 : | |||
33 : | /* Turn this on if you like debugging the alignment */ | ||
34 : | #undef DEBUG | ||
35 : | |||
36 : | #include <stdio.h> | ||
37 : | |||
38 : | edgomez | 1.2 | /* This function assumes: |
39 : | * dst: 16 byte aligned | ||
40 : | edgomez | 1.1 | */ |
41 : | |||
42 : | #define COPY8TO16() \ | ||
43 : | edgomez | 1.2 | s = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\ |
44 : | vec_st((vector signed short)vec_mergeh(zerovec,s),0,dst);\ | ||
45 : | src += stride;\ | ||
46 : | dst += 8 | ||
47 : | edgomez | 1.1 | |
48 : | void | ||
49 : | edgomez | 1.2 | transfer_8to16copy_altivec_c(int16_t *dst, |
50 : | edgomez | 1.1 | uint8_t * src, |
51 : | uint32_t stride) | ||
52 : | { | ||
53 : | edgomez | 1.2 | register vector unsigned char s; |
54 : | register vector unsigned char zerovec; | ||
55 : | |||
56 : | edgomez | 1.1 | #ifdef DEBUG |
57 : | edgomez | 1.2 | /* Check the alignment */ |
58 : | if((long)dst & 0xf) | ||
59 : | fprintf(stderr, "transfer_8to16copy_altivec_c:incorrect align, dst: %lx\n", (long)dst); | ||
60 : | edgomez | 1.1 | #endif |
61 : | edgomez | 1.2 | |
62 : | /* initialization */ | ||
63 : | zerovec = vec_splat_u8(0); | ||
64 : | |||
65 : | COPY8TO16(); | ||
66 : | COPY8TO16(); | ||
67 : | COPY8TO16(); | ||
68 : | COPY8TO16(); | ||
69 : | |||
70 : | COPY8TO16(); | ||
71 : | COPY8TO16(); | ||
72 : | COPY8TO16(); | ||
73 : | COPY8TO16(); | ||
74 : | edgomez | 1.1 | } |
75 : | |||
76 : | |||
77 : | /* | ||
78 : | * This function assumes dst is 8 byte aligned and stride is a multiple of 8 | ||
79 : | * src may be unaligned | ||
80 : | */ | ||
81 : | |||
82 : | #define COPY16TO8() \ | ||
83 : | s = vec_perm(src[0], src[1], load_src_perm); \ | ||
84 : | packed = vec_packsu(s, vec_splat_s16(0)); \ | ||
85 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsl(0, dst)); \ | ||
86 : | packed = vec_perm(packed, packed, vec_lvsl(0, dst)); \ | ||
87 : | packed = vec_sel(packed, vec_ld(0, dst), mask); \ | ||
88 : | vec_st(packed, 0, dst); \ | ||
89 : | src++; \ | ||
90 : | dst += stride | ||
91 : | |||
92 : | void transfer_16to8copy_altivec_c(uint8_t *dst, | ||
93 : | vector signed short *src, | ||
94 : | uint32_t stride) | ||
95 : | { | ||
96 : | register vector signed short s; | ||
97 : | register vector unsigned char packed; | ||
98 : | register vector unsigned char mask_stencil; | ||
99 : | register vector unsigned char mask; | ||
100 : | register vector unsigned char load_src_perm; | ||
101 : | |||
102 : | #ifdef DEBUG | ||
103 : | /* if this is on, print alignment errors */ | ||
104 : | if(((unsigned long) dst) & 0x7) | ||
105 : | edgomez | 1.2 | fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, dst %lx\n", (long)dst); |
106 : | edgomez | 1.1 | if(stride & 0x7) |
107 : | fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, stride %u\n", stride); | ||
108 : | #endif | ||
109 : | /* Initialisation stuff */ | ||
110 : | load_src_perm = vec_lvsl(0, (unsigned char*)src); | ||
111 : | mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
112 : | |||
113 : | COPY16TO8(); | ||
114 : | COPY16TO8(); | ||
115 : | COPY16TO8(); | ||
116 : | COPY16TO8(); | ||
117 : | |||
118 : | COPY16TO8(); | ||
119 : | COPY16TO8(); | ||
120 : | COPY16TO8(); | ||
121 : | COPY16TO8(); | ||
122 : | } | ||
123 : | |||
124 : | |||
125 : | |||
126 : | /* | ||
127 : | * This function assumes dst is 8 byte aligned and src is unaligned. Stride has | ||
128 : | * to be a multiple of 8 | ||
129 : | */ | ||
130 : | |||
131 : | #define COPY8TO8() \ | ||
132 : | tmp = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \ | ||
133 : | edgomez | 1.2 | t0 = vec_perm(tmp, tmp, vec_lvsl(0, dst));\ |
134 : | t1 = vec_perm(mask, mask, vec_lvsl(0, dst));\ | ||
135 : | tmp = vec_sel(t0, vec_ld(0, dst), t1);\ | ||
136 : | vec_st(tmp, 0, dst);\ | ||
137 : | dst += stride;\ | ||
138 : | edgomez | 1.1 | src += stride |
139 : | |||
140 : | void | ||
141 : | transfer8x8_copy_altivec_c( uint8_t * dst, | ||
142 : | uint8_t * src, | ||
143 : | uint32_t stride) | ||
144 : | { | ||
145 : | register vector unsigned char tmp; | ||
146 : | register vector unsigned char mask; | ||
147 : | edgomez | 1.2 | register vector unsigned char t0, t1; |
148 : | edgomez | 1.1 | |
149 : | #ifdef DEBUG | ||
150 : | if(((unsigned long)dst) & 0x7) | ||
151 : | edgomez | 1.2 | fprintf(stderr, "transfer8x8_copy_altivec:incorrect align, dst: %lx\n", (long)dst); |
152 : | edgomez | 1.1 | if(stride & 0x7) |
153 : | edgomez | 1.2 | fprintf(stderr, "transfer8x8_copy_altivec:incorrect stride, stride: %u\n", stride); |
154 : | edgomez | 1.1 | #endif |
155 : | mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
156 : | |||
157 : | COPY8TO8(); | ||
158 : | COPY8TO8(); | ||
159 : | COPY8TO8(); | ||
160 : | COPY8TO8(); | ||
161 : | |||
162 : | COPY8TO8(); | ||
163 : | COPY8TO8(); | ||
164 : | COPY8TO8(); | ||
165 : | COPY8TO8(); | ||
166 : | } | ||
167 : | |||
168 : | |||
169 : | edgomez | 1.2 | #define SUB8TO16() \ |
170 : | c = vec_perm(vec_ld(0,cur),vec_ld(16,cur),vec_lvsl(0,cur));\ | ||
171 : | r = vec_perm(vec_ld(0,ref),vec_ld(16,ref),vec_lvsl(0,ref));\ | ||
172 : | cs = (vector signed short)vec_mergeh(ox00,c);\ | ||
173 : | rs = (vector signed short)vec_mergeh(ox00,r);\ | ||
174 : | \ | ||
175 : | c = vec_lvsr(0,cur);\ | ||
176 : | mask = vec_perm(mask_00ff, mask_00ff, c);\ | ||
177 : | r = vec_perm(r, r, c);\ | ||
178 : | r = vec_sel(r, vec_ld(0,cur), mask);\ | ||
179 : | vec_st(r,0,cur);\ | ||
180 : | vec_st( vec_sub(cs,rs), 0, dct );\ | ||
181 : | \ | ||
182 : | dct += 8;\ | ||
183 : | cur += stride;\ | ||
184 : | ref += stride | ||
185 : | |||
186 : | |||
187 : | /* This function assumes: | ||
188 : | * dct: 16 Byte aligned | ||
189 : | * cur: 8 Byte aligned | ||
190 : | * stride: multiple of 8 | ||
191 : | edgomez | 1.1 | */ |
192 : | |||
193 : | void | ||
194 : | edgomez | 1.2 | transfer_8to16sub_altivec_c(int16_t * dct, |
195 : | uint8_t * cur, | ||
196 : | uint8_t * ref, | ||
197 : | const uint32_t stride) | ||
198 : | edgomez | 1.1 | { |
199 : | edgomez | 1.2 | register vector unsigned char c,r; |
200 : | register vector unsigned char ox00; | ||
201 : | register vector unsigned char mask_00ff; | ||
202 : | register vector unsigned char mask; | ||
203 : | register vector signed short cs,rs; | ||
204 : | |||
205 : | edgomez | 1.1 | #ifdef DEBUG |
206 : | edgomez | 1.2 | if((long)dct & 0xf) |
207 : | fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, dct: %lx\n", (long)dct); | ||
208 : | if((long)cur & 0x7) | ||
209 : | fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, cur: %lx\n", (long)cur); | ||
210 : | if(stride & 0x7) | ||
211 : | fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect stride, stride: %lu\n", (long)stride); | ||
212 : | edgomez | 1.1 | #endif |
213 : | edgomez | 1.2 | /* initialize */ |
214 : | ox00 = vec_splat_u8(0); | ||
215 : | mask_00ff = vec_pack((vector unsigned short)ox00,vec_splat_u16(-1)); | ||
216 : | |||
217 : | SUB8TO16(); | ||
218 : | SUB8TO16(); | ||
219 : | SUB8TO16(); | ||
220 : | SUB8TO16(); | ||
221 : | |||
222 : | SUB8TO16(); | ||
223 : | SUB8TO16(); | ||
224 : | SUB8TO16(); | ||
225 : | SUB8TO16(); | ||
226 : | edgomez | 1.1 | } |
227 : | |||
228 : | |||
229 : | #define SUBRO8TO16() \ | ||
230 : | edgomez | 1.2 | c = vec_perm(vec_ld(0,cur),vec_ld(16,cur),vec_lvsl(0,cur));\ |
231 : | r = vec_perm(vec_ld(0,ref),vec_ld(16,ref),vec_lvsl(0,ref));\ | ||
232 : | cs = (vector signed short)vec_mergeh(z,c);\ | ||
233 : | rs = (vector signed short)vec_mergeh(z,r);\ | ||
234 : | vec_st( vec_sub(cs,rs), 0, dct );\ | ||
235 : | dct += 8;\ | ||
236 : | cur += stride;\ | ||
237 : | ref += stride | ||
238 : | |||
239 : | |||
240 : | /* This function assumes: | ||
241 : | * dct: 16 Byte aligned | ||
242 : | */ | ||
243 : | edgomez | 1.1 | |
244 : | void | ||
245 : | edgomez | 1.2 | transfer_8to16subro_altivec_c(int16_t * dct, |
246 : | const uint8_t * cur, | ||
247 : | const uint8_t * ref, | ||
248 : | const uint32_t stride) | ||
249 : | edgomez | 1.1 | { |
250 : | edgomez | 1.2 | register vector unsigned char c; |
251 : | register vector unsigned char r; | ||
252 : | register vector unsigned char z; | ||
253 : | register vector signed short cs; | ||
254 : | register vector signed short rs; | ||
255 : | |||
256 : | edgomez | 1.1 | #ifdef DEBUG |
257 : | edgomez | 1.2 | /* Check the alignment assumptions if this is on */ |
258 : | if((long)dct & 0xf) | ||
259 : | fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, dct: %lx\n", (long)dct); | ||
260 : | edgomez | 1.1 | #endif |
261 : | edgomez | 1.2 | /* initialize */ |
262 : | z = vec_splat_u8(0); | ||
263 : | |||
264 : | SUBRO8TO16(); | ||
265 : | SUBRO8TO16(); | ||
266 : | SUBRO8TO16(); | ||
267 : | SUBRO8TO16(); | ||
268 : | |||
269 : | SUBRO8TO16(); | ||
270 : | SUBRO8TO16(); | ||
271 : | SUBRO8TO16(); | ||
272 : | SUBRO8TO16(); | ||
273 : | edgomez | 1.1 | } |
274 : | |||
275 : | /* | ||
276 : | * This function assumes: | ||
277 : | * dct: 16 bytes alignment | ||
278 : | * cur: 8 bytes alignment | ||
279 : | * ref1: unaligned | ||
280 : | * ref2: unaligned | ||
281 : | * stride: multiple of 8 | ||
282 : | */ | ||
283 : | |||
284 : | #define SUB28TO16() \ | ||
285 : | r1 = vec_perm(vec_ld(0, ref1), vec_ld(16, ref1), vec_lvsl(0, ref1)); \ | ||
286 : | r2 = vec_perm(vec_ld(0, ref2), vec_ld(16, ref2), vec_lvsl(0, ref2)); \ | ||
287 : | c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \ | ||
288 : | r = vec_avg(r1, r2); \ | ||
289 : | cs = (vector signed short)vec_mergeh(vec_splat_u8(0), c); \ | ||
290 : | rs = (vector signed short)vec_mergeh(vec_splat_u8(0), r); \ | ||
291 : | edgomez | 1.2 | c = vec_perm(mask, mask, vec_lvsl(0, cur));\ |
292 : | r = vec_sel(r, vec_ld(0, cur), c);\ | ||
293 : | vec_st(r, 0, cur); \ | ||
294 : | edgomez | 1.1 | *dct++ = vec_sub(cs, rs); \ |
295 : | cur += stride; \ | ||
296 : | ref1 += stride; \ | ||
297 : | ref2 += stride | ||
298 : | |||
299 : | void | ||
300 : | transfer_8to16sub2_altivec_c(vector signed short *dct, | ||
301 : | uint8_t *cur, | ||
302 : | uint8_t *ref1, | ||
303 : | uint8_t *ref2, | ||
304 : | const uint32_t stride) | ||
305 : | { | ||
306 : | vector unsigned char r1; | ||
307 : | vector unsigned char r2; | ||
308 : | vector unsigned char r; | ||
309 : | vector unsigned char c; | ||
310 : | vector unsigned char mask; | ||
311 : | vector signed short cs; | ||
312 : | vector signed short rs; | ||
313 : | |||
314 : | #ifdef DEBUG | ||
315 : | /* Dump alignment erros if DEBUG is set */ | ||
316 : | if(((unsigned long)dct) & 0xf) | ||
317 : | edgomez | 1.2 | fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %lx\n", (long)dct); |
318 : | edgomez | 1.1 | if(((unsigned long)cur) & 0x7) |
319 : | edgomez | 1.2 | fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, cur: %lx\n", (long)cur); |
320 : | edgomez | 1.1 | if(stride & 0x7) |
321 : | fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %u\n", stride); | ||
322 : | #endif | ||
323 : | |||
324 : | /* Initialisation */ | ||
325 : | mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
326 : | |||
327 : | SUB28TO16(); | ||
328 : | SUB28TO16(); | ||
329 : | SUB28TO16(); | ||
330 : | SUB28TO16(); | ||
331 : | |||
332 : | SUB28TO16(); | ||
333 : | SUB28TO16(); | ||
334 : | SUB28TO16(); | ||
335 : | SUB28TO16(); | ||
336 : | } | ||
337 : | |||
338 : | |||
339 : | |||
340 : | /* | ||
341 : | * This function assumes: | ||
342 : | * dst: 8 byte aligned | ||
343 : | * src: unaligned | ||
344 : | * stride: multiple of 8 | ||
345 : | */ | ||
346 : | |||
347 : | #define ADD16TO8() \ | ||
348 : | s = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \ | ||
349 : | d = vec_perm(vec_ld(0, dst), vec_ld(16, dst), vec_lvsl(0, dst)); \ | ||
350 : | ds = (vector signed short)vec_mergeh(vec_splat_u8(0), d); \ | ||
351 : | ds = vec_add(ds, s); \ | ||
352 : | packed = vec_packsu(ds, vec_splat_s16(0)); \ | ||
353 : | mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); \ | ||
354 : | mask = vec_perm(mask, mask, vec_lvsl(0, dst)); \ | ||
355 : | packed = vec_perm(packed, packed, vec_lvsl(0, dst)); \ | ||
356 : | packed = vec_sel(packed, vec_ld(0, dst), mask); \ | ||
357 : | vec_st(packed, 0, dst); \ | ||
358 : | src += 8; \ | ||
359 : | dst += stride | ||
360 : | |||
361 : | void | ||
362 : | transfer_16to8add_altivec_c(uint8_t *dst, | ||
363 : | int16_t *src, | ||
364 : | uint32_t stride) | ||
365 : | { | ||
366 : | vector signed short s; | ||
367 : | vector signed short ds; | ||
368 : | vector unsigned char d; | ||
369 : | vector unsigned char packed; | ||
370 : | vector unsigned char mask; | ||
371 : | |||
372 : | #ifdef DEBUG | ||
373 : | /* if this is set, dump alignment errors */ | ||
374 : | if(((unsigned long)dst) & 0x7) | ||
375 : | edgomez | 1.2 | fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %lx\n", (long)dst); |
376 : | edgomez | 1.1 | if(stride & 0x7) |
377 : | fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %u\n", stride); | ||
378 : | #endif | ||
379 : | |||
380 : | ADD16TO8(); | ||
381 : | ADD16TO8(); | ||
382 : | ADD16TO8(); | ||
383 : | ADD16TO8(); | ||
384 : | |||
385 : | ADD16TO8(); | ||
386 : | ADD16TO8(); | ||
387 : | ADD16TO8(); | ||
388 : | ADD16TO8(); | ||
389 : | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |