24 |
|
|
25 |
*/ |
*/ |
26 |
|
|
27 |
|
#define G_REG |
28 |
|
|
29 |
|
#ifdef G_REG |
30 |
|
register vector unsigned char perm0 asm ("%v29"); |
31 |
|
register vector unsigned char perm1 asm ("%v30"); |
32 |
|
register vector unsigned int zerovec asm ("%v31"); |
33 |
|
#endif |
34 |
|
|
35 |
#include <stdio.h> |
#include <stdio.h> |
36 |
|
|
37 |
#undef DEBUG |
#undef DEBUG |
38 |
|
|
39 |
|
static const vector unsigned char perms[2] = { |
40 |
|
(vector unsigned char)( /* Used when cur is aligned */ |
41 |
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
42 |
|
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 |
43 |
|
), |
44 |
|
(vector unsigned char)( /* Used when cur is unaligned */ |
45 |
|
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
46 |
|
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f |
47 |
|
), |
48 |
|
}; |
49 |
|
|
50 |
|
#ifdef G_REG |
51 |
|
void sadInit_altivec(void) |
52 |
|
{ |
53 |
|
perm0 = perms[0]; |
54 |
|
perm1 = perms[1]; |
55 |
|
zerovec = (vector unsigned int)(0); |
56 |
|
} |
57 |
|
static inline const vector unsigned char get_perm(unsigned long i) |
58 |
|
{ |
59 |
|
return i ? perm1 : perm0; |
60 |
|
} |
61 |
|
#define ZERODEF |
62 |
|
#define ZEROVEC zerovec |
63 |
|
#else |
64 |
|
void sadInit_altivec(void) { } |
65 |
|
static inline const vector unsigned char get_perm(unsigned long i) |
66 |
|
{ |
67 |
|
return perms[i]; |
68 |
|
} |
69 |
|
#define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0) |
70 |
|
#define ZEROVEC zerovec |
71 |
|
#endif |
72 |
|
|
73 |
|
|
74 |
#define SAD16() \ |
#define SAD16() \ |
75 |
t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \ |
t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \ |
76 |
t2 = vec_max(t1, *cur); /* find largest of two */ \ |
t2 = vec_max(t1, *cur); /* find largest of two */ \ |
90 |
{ |
{ |
91 |
vector unsigned char perm; |
vector unsigned char perm; |
92 |
vector unsigned char t1, t2, t3, t4 ; |
vector unsigned char t1, t2, t3, t4 ; |
93 |
vector unsigned int sad, zero; |
vector unsigned int sad; |
94 |
vector signed int sumdiffs, best_vec; |
vector signed int sumdiffs, best_vec; |
95 |
unsigned long result; |
unsigned long result; |
96 |
|
ZERODEF; |
97 |
|
|
98 |
#ifdef DEBUG |
#ifdef DEBUG |
99 |
if (((unsigned long)cur) & 0xf) |
if (((unsigned long)cur) & 0xf) |
104 |
fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); |
fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); |
105 |
#endif |
#endif |
106 |
/* initialization */ |
/* initialization */ |
107 |
zero = (vector unsigned int)(0); |
sad = (vector unsigned int)(ZEROVEC); |
|
sad = (vector unsigned int)(0); |
|
108 |
stride >>= 4; |
stride >>= 4; |
109 |
perm = vec_lvsl(0, (unsigned char *)ref); |
perm = vec_lvsl(0, (unsigned char *)ref); |
110 |
*((unsigned long *)&best_vec) = best_sad; |
*((unsigned long *)&best_vec) = best_sad; |
116 |
SAD16(); |
SAD16(); |
117 |
SAD16(); |
SAD16(); |
118 |
/* Temp sum for exit */ |
/* Temp sum for exit */ |
119 |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); |
120 |
if (vec_all_ge(sumdiffs, best_vec)) |
if (vec_all_ge(sumdiffs, best_vec)) |
121 |
goto bail; |
goto bail; |
122 |
SAD16(); |
SAD16(); |
123 |
SAD16(); |
SAD16(); |
124 |
SAD16(); |
SAD16(); |
125 |
SAD16(); |
SAD16(); |
126 |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); |
127 |
if (vec_all_ge(sumdiffs, best_vec)) |
if (vec_all_ge(sumdiffs, best_vec)) |
128 |
goto bail; |
goto bail; |
129 |
SAD16(); |
SAD16(); |
136 |
SAD16(); |
SAD16(); |
137 |
|
|
138 |
/* sum all parts of difference into one 32 bit quantity */ |
/* sum all parts of difference into one 32 bit quantity */ |
139 |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); |
140 |
bail: |
bail: |
141 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
142 |
sumdiffs = vec_splat( sumdiffs, 3 ); |
sumdiffs = vec_splat( sumdiffs, 3 ); |
155 |
sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \ |
sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \ |
156 |
cur += stride<<1; ref += stride<<1; |
cur += stride<<1; ref += stride<<1; |
157 |
|
|
|
static const vector unsigned char perms[2] = { |
|
|
(vector unsigned char)( /* Used when cur is aligned */ |
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
|
|
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 |
|
|
), |
|
|
(vector unsigned char)( /* Used when cur is unaligned */ |
|
|
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
|
|
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f |
|
|
), |
|
|
}; |
|
|
|
|
158 |
/* |
/* |
159 |
* This function assumes cur is 8 bytes aligned, stride is 16 bytes |
* This function assumes cur is 8 bytes aligned, stride is 16 bytes |
160 |
* aligned and ref is unaligned |
* aligned and ref is unaligned |
165 |
unsigned long stride) |
unsigned long stride) |
166 |
{ |
{ |
167 |
vector unsigned char t1, t2, t3, t4, t5, tp ; |
vector unsigned char t1, t2, t3, t4, t5, tp ; |
168 |
vector unsigned int sad, zero; |
vector unsigned int sad; |
169 |
vector signed int sumdiffs; |
vector signed int sumdiffs; |
170 |
vector unsigned char perm_cur; |
vector unsigned char perm_cur; |
171 |
vector unsigned char perm_ref1, perm_ref2; |
vector unsigned char perm_ref1, perm_ref2; |
172 |
unsigned long result; |
unsigned long result; |
173 |
|
ZERODEF; |
174 |
|
|
175 |
#ifdef DEBUG |
#ifdef DEBUG |
176 |
if (((unsigned long)cur) & 0x7) |
if (((unsigned long)cur) & 0x7) |
181 |
fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); |
fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); |
182 |
#endif |
#endif |
183 |
|
|
184 |
perm_cur = perms[(((unsigned long)cur)>>3) & 0x01]; |
perm_cur = get_perm((((unsigned long)cur)>>3) & 0x01); |
185 |
perm_ref1 = vec_lvsl(0, (unsigned char *)ref); |
perm_ref1 = vec_lvsl(0, (unsigned char *)ref); |
186 |
perm_ref2 = perms[0]; |
perm_ref2 = get_perm(0); |
187 |
|
|
188 |
/* initialization */ |
/* initialization */ |
189 |
zero = (vector unsigned int)(0); |
sad = (vector unsigned int)(ZEROVEC); |
|
sad = (vector unsigned int)(0); |
|
190 |
stride >>= 4; |
stride >>= 4; |
191 |
|
|
192 |
/* perform sum of differences between current and previous */ |
/* perform sum of differences between current and previous */ |
196 |
SAD8(); |
SAD8(); |
197 |
|
|
198 |
/* sum all parts of difference into one 32 bit quantity */ |
/* sum all parts of difference into one 32 bit quantity */ |
199 |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); |
200 |
|
|
201 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
202 |
sumdiffs = vec_splat( sumdiffs, 3 ); |
sumdiffs = vec_splat( sumdiffs, 3 ); |
220 |
unsigned long stride) |
unsigned long stride) |
221 |
{ |
{ |
222 |
vector unsigned char t2,t3,t4, mn; |
vector unsigned char t2,t3,t4, mn; |
223 |
vector unsigned int mean, dev, zero; |
vector unsigned int mean, dev; |
224 |
vector signed int sumdiffs; |
vector signed int sumdiffs; |
225 |
vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15; |
vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15; |
226 |
unsigned long result; |
unsigned long result; |
227 |
|
ZERODEF; |
228 |
|
|
229 |
zero = (vector unsigned int)(0); |
mean = (vector unsigned int)(ZEROVEC); |
230 |
mean = (vector unsigned int)(0); |
dev = (vector unsigned int)(ZEROVEC); |
|
dev = (vector unsigned int)(0); |
|
231 |
stride >>= 4; |
stride >>= 4; |
232 |
|
|
233 |
MEAN16(0); |
MEAN16(0); |
247 |
MEAN16(14); |
MEAN16(14); |
248 |
MEAN16(15); |
MEAN16(15); |
249 |
|
|
250 |
sumdiffs = vec_sums((vector signed int) mean, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC); |
251 |
mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, |
mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, |
252 |
(vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14)); |
(vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14)); |
253 |
DEV16(0); |
DEV16(0); |
268 |
DEV16(15); |
DEV16(15); |
269 |
|
|
270 |
/* sum all parts of difference into one 32 bit quantity */ |
/* sum all parts of difference into one 32 bit quantity */ |
271 |
sumdiffs = vec_sums((vector signed int) dev, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC); |
272 |
|
|
273 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
274 |
sumdiffs = vec_splat( sumdiffs, 3 ); |
sumdiffs = vec_splat( sumdiffs, 3 ); |