24 |
|
|
25 |
*/ |
*/ |
26 |
|
|
27 |
#define G_REG |
#ifdef HAVE_ALTIVEC_H |
28 |
|
#include <altivec.h> |
|
#ifdef G_REG |
|
|
register vector unsigned char perm0 asm("%v29"); |
|
|
register vector unsigned char perm1 asm("%v30"); |
|
|
register vector unsigned int zerovec asm("%v31"); |
|
29 |
#endif |
#endif |
30 |
|
|
|
#include <stdio.h> |
|
|
|
|
|
#undef DEBUG |
|
31 |
|
|
32 |
static const vector unsigned char perms[2] = { |
#include "../../portab.h" |
|
(vector unsigned char) ( /* Used when cur is aligned */ |
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
|
|
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17), |
|
|
(vector unsigned char) ( /* Used when cur is unaligned */ |
|
|
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
|
|
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f), |
|
|
}; |
|
|
|
|
|
#ifdef G_REG |
|
|
void |
|
|
sadInit_altivec(void) |
|
|
{ |
|
|
perm0 = perms[0]; |
|
|
perm1 = perms[1]; |
|
|
zerovec = (vector unsigned int) (0); |
|
|
} |
|
|
static inline const vector unsigned char |
|
|
get_perm(unsigned long i) |
|
|
{ |
|
|
return i ? perm1 : perm0; |
|
|
} |
|
|
|
|
|
#define ZERODEF |
|
|
#define ZEROVEC zerovec |
|
|
#else |
|
|
void |
|
|
sadInit_altivec(void) |
|
|
{ |
|
|
} |
|
|
static inline const vector unsigned char |
|
|
get_perm(unsigned long i) |
|
|
{ |
|
|
return perms[i]; |
|
|
} |
|
33 |
|
|
34 |
#define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0) |
/* no debugging by default */ |
35 |
#define ZEROVEC zerovec |
#undef DEBUG |
|
#endif |
|
36 |
|
|
37 |
|
#include <stdio.h> |
38 |
|
|
39 |
#define SAD16() \ |
#define SAD16() \ |
40 |
t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \ |
t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \ |
41 |
t2 = vec_max(t1, *cur); /* find largest of two */ \ |
t2 = vec_max(t1, *cur); /* find largest of two */ \ |
42 |
t3 = vec_min(t1, *cur); /* find smaller of two */ \ |
t1 = vec_min(t1, *cur); /* find smaller of two */ \ |
43 |
t4 = vec_sub(t2, t3); /* find absolute difference */ \ |
t1 = vec_sub(t2, t1); /* find absolute difference */ \ |
44 |
sad = vec_sum4s(t4, sad); /* accumulate sum of differences */ \ |
sad = vec_sum4s(t1, vec_splat_u32(0)); /* sum of differences */ \ |
45 |
|
sumdiffs = (vector unsigned int)vec_sums((vector signed int)sad, (vector signed int)sumdiffs); /* accumulate sumdiffs */ \ |
46 |
|
if(vec_any_ge(sumdiffs, best_vec)) \ |
47 |
|
goto bail; \ |
48 |
cur += stride; ref += stride; |
cur += stride; ref += stride; |
49 |
|
|
50 |
/* |
/* |
51 |
* This function assumes cur and stride are 16 bytes aligned and ref is unaligned |
* This function assumes cur and stride are 16 bytes aligned and ref is unaligned |
52 |
*/ |
*/ |
53 |
unsigned long |
unsigned long |
54 |
sad16_altivec(const vector unsigned char *cur, |
sad16_altivec_c(const vector unsigned char *cur, |
55 |
const vector unsigned char *ref, |
const vector unsigned char *ref, |
56 |
unsigned long stride, |
unsigned long stride, |
57 |
const unsigned long best_sad) |
const unsigned long best_sad) |
58 |
{ |
{ |
59 |
vector unsigned char perm; |
vector unsigned char perm; |
60 |
vector unsigned char t1, t2, t3, t4; |
vector unsigned char t1, t2; |
61 |
vector unsigned int sad; |
vector unsigned int sad; |
62 |
vector signed int sumdiffs, best_vec; |
vector unsigned int sumdiffs; |
63 |
|
vector unsigned int best_vec; |
64 |
unsigned long result; |
unsigned long result; |
65 |
|
|
|
ZERODEF; |
|
66 |
|
|
67 |
#ifdef DEBUG |
#ifdef DEBUG |
68 |
|
/* print alignment errors if DEBUG is on */ |
69 |
if (((unsigned long) cur) & 0xf) |
if (((unsigned long) cur) & 0xf) |
70 |
fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); |
fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); |
|
// if (((unsigned long)ref) & 0xf) |
|
|
// fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref); |
|
71 |
if (stride & 0xf) |
if (stride & 0xf) |
72 |
fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); |
fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); |
73 |
#endif |
#endif |
74 |
/* initialization */ |
/* initialization */ |
75 |
sad = (vector unsigned int) (ZEROVEC); |
sad = vec_splat_u32(0); |
76 |
|
sumdiffs = sad; |
77 |
stride >>= 4; |
stride >>= 4; |
78 |
perm = vec_lvsl(0, (unsigned char *) ref); |
perm = vec_lvsl(0, (unsigned char *) ref); |
79 |
*((unsigned long *) &best_vec) = best_sad; |
*((unsigned long *) &best_vec) = best_sad; |
84 |
SAD16(); |
SAD16(); |
85 |
SAD16(); |
SAD16(); |
86 |
SAD16(); |
SAD16(); |
87 |
/* Temp sum for exit */ |
|
|
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); |
|
|
if (vec_all_ge(sumdiffs, best_vec)) |
|
|
goto bail; |
|
88 |
SAD16(); |
SAD16(); |
89 |
SAD16(); |
SAD16(); |
90 |
SAD16(); |
SAD16(); |
91 |
SAD16(); |
SAD16(); |
92 |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); |
|
|
if (vec_all_ge(sumdiffs, best_vec)) |
|
|
goto bail; |
|
93 |
SAD16(); |
SAD16(); |
94 |
SAD16(); |
SAD16(); |
95 |
SAD16(); |
SAD16(); |
96 |
SAD16(); |
SAD16(); |
97 |
|
|
98 |
SAD16(); |
SAD16(); |
99 |
SAD16(); |
SAD16(); |
100 |
SAD16(); |
SAD16(); |
101 |
SAD16(); |
SAD16(); |
102 |
|
|
|
/* sum all parts of difference into one 32 bit quantity */ |
|
|
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); |
|
103 |
bail: |
bail: |
104 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
105 |
sumdiffs = vec_splat(sumdiffs, 3); |
sumdiffs = vec_splat(sumdiffs, 3); |
106 |
vec_ste(sumdiffs, 0, (int *) &result); |
vec_ste(sumdiffs, 0, (unsigned long *) &result); |
107 |
return (result); |
return result; |
108 |
} |
} |
109 |
|
|
110 |
|
|
111 |
#define SAD8() \ |
#define SAD8() \ |
112 |
t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \ |
t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \ |
113 |
t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \ |
t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \ |
114 |
tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \ |
tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \ |
115 |
t2 = vec_perm(t2,tp,perm_ref2); \ |
t2 = vec_perm(t2,tp,perm_ref2); \ |
116 |
t3 = vec_max(t1, t2); /* find largest of two */ \ |
tp = vec_max(t1, t2); /* find largest of two */ \ |
117 |
t4 = vec_min(t1, t2); /* find smaller of two */ \ |
t1 = vec_min(t1, t2); /* find smaller of two */ \ |
118 |
t5 = vec_sub(t3, t4); /* find absolute difference */ \ |
tp = vec_sub(tp, t1); /* find absolute difference */ \ |
119 |
sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \ |
sad = vec_sum4s(tp, sad); /* accumulate sum of differences */ \ |
120 |
cur += stride<<1; ref += stride<<1; |
cur += stride<<1; ref += stride<<1; |
121 |
|
|
122 |
/* |
/* |
124 |
* aligned and ref is unaligned |
* aligned and ref is unaligned |
125 |
*/ |
*/ |
126 |
unsigned long |
unsigned long |
127 |
sad8_altivec(const vector unsigned char *cur, |
sad8_altivec_c(const vector unsigned char *cur, |
128 |
const vector unsigned char *ref, |
const vector unsigned char *ref, |
129 |
unsigned long stride) |
unsigned long stride) |
130 |
{ |
{ |
131 |
vector unsigned char t1, t2, t3, t4, t5, tp; |
vector unsigned char t1, t2, tp; |
132 |
vector unsigned int sad; |
vector unsigned int sad; |
133 |
vector signed int sumdiffs; |
vector unsigned int sumdiffs; |
134 |
vector unsigned char perm_cur; |
vector unsigned char perm_cur; |
135 |
vector unsigned char perm_ref1, perm_ref2; |
vector unsigned char perm_ref1, perm_ref2; |
136 |
unsigned long result; |
unsigned long result; |
137 |
|
|
|
ZERODEF; |
|
|
|
|
138 |
#ifdef DEBUG |
#ifdef DEBUG |
139 |
|
/* print alignment errors if DEBUG is on */ |
140 |
if (((unsigned long) cur) & 0x7) |
if (((unsigned long) cur) & 0x7) |
141 |
fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); |
fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); |
|
// if (((unsigned long)ref) & 0x7) |
|
|
// fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref); |
|
142 |
if (stride & 0xf) |
if (stride & 0xf) |
143 |
fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); |
fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); |
144 |
#endif |
#endif |
145 |
|
|
146 |
perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01); |
/* check if cur is 8 or 16 bytes aligned an create the perm_cur vector */ |
147 |
perm_ref1 = vec_lvsl(0, (unsigned char *) ref); |
perm_ref1 = vec_lvsl(0, (unsigned char *) ref); |
148 |
perm_ref2 = get_perm(0); |
perm_ref2 = vec_add(vec_lvsl(0, (unsigned char*)NULL), vec_pack(vec_splat_u16(0), vec_splat_u16(8))); |
149 |
|
perm_cur = vec_add(perm_ref2, vec_splat(vec_lvsl(0, (unsigned char*)cur), 0)); |
150 |
|
|
151 |
/* initialization */ |
/* initialization */ |
152 |
sad = (vector unsigned int) (ZEROVEC); |
sad = vec_splat_u32(0); |
153 |
stride >>= 4; |
stride >>= 4; |
154 |
|
|
155 |
/* perform sum of differences between current and previous */ |
/* perform sum of differences between current and previous */ |
159 |
SAD8(); |
SAD8(); |
160 |
|
|
161 |
/* sum all parts of difference into one 32 bit quantity */ |
/* sum all parts of difference into one 32 bit quantity */ |
162 |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); |
sumdiffs = (vector unsigned int)vec_sums((vector signed int) sad, vec_splat_s32(0)); |
163 |
|
|
164 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
165 |
sumdiffs = vec_splat(sumdiffs, 3); |
sumdiffs = vec_splat(sumdiffs, 3); |
166 |
vec_ste(sumdiffs, 0, (int *) &result); |
vec_ste(sumdiffs, 0, (unsigned int *) &result); |
167 |
return (result); |
return result; |
168 |
} |
} |
169 |
|
|
|
#define MEAN16(i)\ |
|
|
c##i=*cur;\ |
|
|
mean = vec_sum4s(c##i,mean);\ |
|
|
cur += stride; |
|
|
|
|
|
#define DEV16(i) \ |
|
|
t2 = vec_max(c##i, mn); /* find largest of two */ \ |
|
|
t3 = vec_min(c##i, mn); /* find smaller of two */ \ |
|
|
t4 = vec_sub(t2, t3); /* find absolute difference */ \ |
|
|
dev = vec_sum4s(t4, dev); |
|
170 |
|
|
171 |
|
#define MEAN16() \ |
172 |
|
mean = vec_sum4s(*ptr,mean);\ |
173 |
|
ptr += stride |
174 |
|
|
175 |
|
#define DEV16() \ |
176 |
|
t2 = vec_max(*ptr, mn); /* find largest of two */ \ |
177 |
|
t3 = vec_min(*ptr, mn); /* find smaller of two */ \ |
178 |
|
t2 = vec_sub(t2, t3); /* find absolute difference */ \ |
179 |
|
dev = vec_sum4s(t2, dev); \ |
180 |
|
ptr += stride |
181 |
|
|
182 |
|
/* |
183 |
|
* This function assumes cur is 16 bytes aligned and stride is 16 bytes |
184 |
|
* aligned |
185 |
|
*/ |
186 |
unsigned long |
unsigned long |
187 |
dev16_altivec(const vector unsigned char *cur, |
dev16_altivec_c(const vector unsigned char *cur, |
188 |
unsigned long stride) |
unsigned long stride) |
189 |
{ |
{ |
190 |
vector unsigned char t2, t3, t4, mn; |
vector unsigned char t2, t3, mn; |
191 |
vector unsigned int mean, dev; |
vector unsigned int mean, dev; |
192 |
vector signed int sumdiffs; |
vector unsigned int sumdiffs; |
193 |
vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, |
const vector unsigned char *ptr; |
|
c13, c14, c15; |
|
194 |
unsigned long result; |
unsigned long result; |
195 |
|
|
196 |
ZERODEF; |
#ifdef DEBUG |
197 |
|
/* print alignment errors if DEBUG is on */ |
198 |
|
if(((unsigned long)cur) & 0x7) |
199 |
|
fprintf(stderr, "dev16_altivec:incorrect align, cur: %x\n", cur); |
200 |
|
if(stride & 0xf) |
201 |
|
fprintf(stderr, "dev16_altivec:incorrect align, stride: %ld\n", stride); |
202 |
|
#endif |
203 |
|
|
204 |
mean = (vector unsigned int) (ZEROVEC); |
dev = mean = vec_splat_u32(0); |
|
dev = (vector unsigned int) (ZEROVEC); |
|
205 |
stride >>= 4; |
stride >>= 4; |
206 |
|
|
207 |
MEAN16(0); |
/* set pointer to iterate through cur */ |
208 |
MEAN16(1); |
ptr = cur; |
209 |
MEAN16(2); |
|
210 |
MEAN16(3); |
MEAN16(); |
211 |
MEAN16(4); |
MEAN16(); |
212 |
MEAN16(5); |
MEAN16(); |
213 |
MEAN16(6); |
MEAN16(); |
214 |
MEAN16(7); |
MEAN16(); |
215 |
MEAN16(8); |
MEAN16(); |
216 |
MEAN16(9); |
MEAN16(); |
217 |
MEAN16(10); |
MEAN16(); |
218 |
MEAN16(11); |
MEAN16(); |
219 |
MEAN16(12); |
MEAN16(); |
220 |
MEAN16(13); |
MEAN16(); |
221 |
MEAN16(14); |
MEAN16(); |
222 |
MEAN16(15); |
MEAN16(); |
223 |
|
MEAN16(); |
224 |
sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC); |
MEAN16(); |
225 |
mn = vec_perm((vector unsigned char) sumdiffs, |
MEAN16(); |
226 |
(vector unsigned char) sumdiffs, (vector unsigned char) (14, |
|
227 |
14, |
/* Add all together in sumdiffs */ |
228 |
14, |
sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0)); |
229 |
14, |
/* teilen durch 16 * 16 */ |
230 |
14, |
mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14)); |
231 |
14, |
|
232 |
14, |
/* set pointer to iterate through cur */ |
233 |
14, |
ptr = cur; |
234 |
14, |
|
235 |
14, |
DEV16(); |
236 |
14, |
DEV16(); |
237 |
14, |
DEV16(); |
238 |
14, |
DEV16(); |
239 |
14, |
DEV16(); |
240 |
14, |
DEV16(); |
241 |
14)); |
DEV16(); |
242 |
DEV16(0); |
DEV16(); |
243 |
DEV16(1); |
DEV16(); |
244 |
DEV16(2); |
DEV16(); |
245 |
DEV16(3); |
DEV16(); |
246 |
DEV16(4); |
DEV16(); |
247 |
DEV16(5); |
DEV16(); |
248 |
DEV16(6); |
DEV16(); |
249 |
DEV16(7); |
DEV16(); |
250 |
DEV16(8); |
DEV16(); |
|
DEV16(9); |
|
|
DEV16(10); |
|
|
DEV16(11); |
|
|
DEV16(12); |
|
|
DEV16(13); |
|
|
DEV16(14); |
|
|
DEV16(15); |
|
251 |
|
|
252 |
/* sum all parts of difference into one 32 bit quantity */ |
/* sum all parts of difference into one 32 bit quantity */ |
253 |
sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC); |
sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0)); |
254 |
|
|
255 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
256 |
sumdiffs = vec_splat(sumdiffs, 3); |
sumdiffs = vec_splat(sumdiffs, 3); |
257 |
vec_ste(sumdiffs, 0, (int *) &result); |
vec_ste(sumdiffs, 0, (unsigned int *) &result); |
258 |
return (result); |
return result; |
259 |
|
} |
260 |
|
|
261 |
|
#define SAD16BI() \ |
262 |
|
t1 = vec_perm(ref1[0], ref1[1], mask1); \ |
263 |
|
t2 = vec_perm(ref2[0], ref2[1], mask2); \ |
264 |
|
t1 = vec_avg(t1, t2); \ |
265 |
|
t2 = vec_max(t1, *cur); \ |
266 |
|
t1 = vec_min(t1, *cur); \ |
267 |
|
sad = vec_sub(t2, t1); \ |
268 |
|
sum = vec_sum4s(sad, sum); \ |
269 |
|
cur += stride; \ |
270 |
|
ref1 += stride; \ |
271 |
|
ref2 += stride |
272 |
|
|
273 |
|
/* |
274 |
|
* This function assumes cur is 16 bytes aligned, stride is 16 bytes |
275 |
|
* aligned and ref1 and ref2 is unaligned |
276 |
|
*/ |
277 |
|
unsigned long |
278 |
|
sad16bi_altivec_c(vector unsigned char *cur, |
279 |
|
vector unsigned char *ref1, |
280 |
|
vector unsigned char *ref2, |
281 |
|
unsigned long stride) |
282 |
|
{ |
283 |
|
vector unsigned char t1, t2; |
284 |
|
vector unsigned char mask1, mask2; |
285 |
|
vector unsigned char sad; |
286 |
|
vector unsigned int sum; |
287 |
|
unsigned long result; |
288 |
|
|
289 |
|
#ifdef DEBUG |
290 |
|
/* print alignment errors if this is on */ |
291 |
|
if(cur & 0xf) |
292 |
|
fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %x\n", cur); |
293 |
|
if(stride & 0xf) |
294 |
|
fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %ld\n", stride); |
295 |
|
#endif |
296 |
|
|
297 |
|
/* Initialisation stuff */ |
298 |
|
stride >>= 4; |
299 |
|
mask1 = vec_lvsl(0, (unsigned char*)ref1); |
300 |
|
mask2 = vec_lvsl(0, (unsigned char*)ref2); |
301 |
|
sad = vec_splat_u8(0); |
302 |
|
sum = (vector unsigned int)sad; |
303 |
|
|
304 |
|
SAD16BI(); |
305 |
|
SAD16BI(); |
306 |
|
SAD16BI(); |
307 |
|
SAD16BI(); |
308 |
|
|
309 |
|
SAD16BI(); |
310 |
|
SAD16BI(); |
311 |
|
SAD16BI(); |
312 |
|
SAD16BI(); |
313 |
|
|
314 |
|
SAD16BI(); |
315 |
|
SAD16BI(); |
316 |
|
SAD16BI(); |
317 |
|
SAD16BI(); |
318 |
|
|
319 |
|
SAD16BI(); |
320 |
|
SAD16BI(); |
321 |
|
SAD16BI(); |
322 |
|
SAD16BI(); |
323 |
|
|
324 |
|
sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0)); |
325 |
|
sum = vec_splat(sum, 3); |
326 |
|
vec_ste(sum, 0, (unsigned int*)&result); |
327 |
|
|
328 |
|
return result; |
329 |
|
} |
330 |
|
|
331 |
|
|
332 |
|
#define SSE8_16BIT() \ |
333 |
|
b1_vec = vec_perm(vec_ld(0,b1), vec_ld(16,b1), vec_lvsl(0,b1)); \ |
334 |
|
b2_vec = vec_perm(vec_ld(0,b2), vec_ld(16,b2), vec_lvsl(0,b2)); \ |
335 |
|
diff = vec_sub(b1_vec,b2_vec); \ |
336 |
|
sum = vec_msum(diff,diff,sum); \ |
337 |
|
b1 = (const int16_t*)((int8_t*)b1+stride); \ |
338 |
|
b2 = (const int16_t*)((int8_t*)b2+stride) |
339 |
|
|
340 |
|
uint32_t |
341 |
|
sse8_16bit_altivec_c(const int16_t * b1, |
342 |
|
const int16_t * b2, |
343 |
|
const uint32_t stride) |
344 |
|
{ |
345 |
|
register vector signed short b1_vec; |
346 |
|
register vector signed short b2_vec; |
347 |
|
register vector signed short diff; |
348 |
|
register vector signed int sum; |
349 |
|
uint32_t result; |
350 |
|
|
351 |
|
/* initialize */ |
352 |
|
sum = vec_splat_s32(0); |
353 |
|
|
354 |
|
SSE8_16BIT(); |
355 |
|
SSE8_16BIT(); |
356 |
|
SSE8_16BIT(); |
357 |
|
SSE8_16BIT(); |
358 |
|
|
359 |
|
SSE8_16BIT(); |
360 |
|
SSE8_16BIT(); |
361 |
|
SSE8_16BIT(); |
362 |
|
SSE8_16BIT(); |
363 |
|
|
364 |
|
/* sum the vector */ |
365 |
|
sum = vec_sums(sum, vec_splat_s32(0)); |
366 |
|
sum = vec_splat(sum,3); |
367 |
|
|
368 |
|
vec_ste(sum,0,(int*)&result); |
369 |
|
|
370 |
|
/* and return */ |
371 |
|
return result; |
372 |
} |
} |