[cvs] / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Diff of /xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Wed Apr 3 14:17:05 2002 UTC revision 1.9, Mon Apr 5 20:39:49 2004 UTC
# Line 24  Line 24 
24    
25  */  */
26    
27  #include <stdio.h>  #ifdef HAVE_ALTIVEC_H
28    #include <altivec.h>
29    #endif
30    
31    
32    #include "../../portab.h"
33    
34    /* no debugging by default */
35  #undef DEBUG  #undef DEBUG
36    
37    #include <stdio.h>
38    
39  #define SAD16() \  #define SAD16() \
40  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \
41  t2  = vec_max(t1, *cur);         /* find largest of two           */ \  t2  = vec_max(t1, *cur);         /* find largest of two           */ \
42  t3  = vec_min(t1, *cur);                 /* find smaller of two           */ \  t1  = vec_min(t1, *cur);                 /* find smaller of two           */ \
43  t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \  t1  = vec_sub(t2, t1);                   /* find absolute difference      */ \
44  sad = vec_sum4s(t4, sad);                /* accumulate sum of differences */ \  sad = vec_sum4s(t1, vec_splat_u32(0));                /* sum of differences */ \
45    sumdiffs = (vector unsigned int)vec_sums((vector signed int)sad, (vector signed int)sumdiffs);    /* accumulate sumdiffs */ \
46    if(vec_any_ge(sumdiffs, best_vec)) \
47        goto bail; \
48  cur += stride; ref += stride;  cur += stride; ref += stride;
49    
50  /*  /*
51   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
52   */   */
53  unsigned long  unsigned long
54  sad16_altivec(  const vector unsigned char * cur,  sad16_altivec_c(const vector unsigned char *cur,
55                  const vector unsigned char * ref,                  const vector unsigned char * ref,
56                  unsigned long stride,                  unsigned long stride,
57                  const unsigned long best_sad)                  const unsigned long best_sad)
58  {  {
59    vector unsigned char perm;    vector unsigned char perm;
60    vector unsigned char t1, t2, t3, t4 ;          vector unsigned char t1, t2;
61    vector unsigned int sad, zero;          vector unsigned int sad;
62    vector signed int sumdiffs, best_vec;          vector unsigned int sumdiffs;
63            vector unsigned int best_vec;
64    unsigned long result;    unsigned long result;
65    
66    
67  #ifdef DEBUG  #ifdef DEBUG
68            /* print alignment errors if DEBUG is on */
69    if (((unsigned long)cur) & 0xf)    if (((unsigned long)cur) & 0xf)
70          fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);          fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
 //  if (((unsigned long)ref) & 0xf)  
 //      fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);  
71    if (stride & 0xf)    if (stride & 0xf)
72          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
73  #endif  #endif
74    /* initialization */    /* initialization */
75    zero = (vector unsigned int)(0);          sad = vec_splat_u32(0);
76    sad  = (vector unsigned int)(0);          sumdiffs = sad;
77    stride >>= 4;    stride >>= 4;
78    perm = vec_lvsl(0, (unsigned char *)ref);    perm = vec_lvsl(0, (unsigned char *)ref);
79    *((unsigned long *)&best_vec) = best_sad;    *((unsigned long *)&best_vec) = best_sad;
# Line 72  Line 84 
84    SAD16();    SAD16();
85    SAD16();    SAD16();
86    SAD16();    SAD16();
87    /* Temp sum for exit */  
   sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);  
   if (vec_all_ge(sumdiffs, best_vec))  
         goto bail;  
88    SAD16();    SAD16();
89    SAD16();    SAD16();
90    SAD16();    SAD16();
91    SAD16();    SAD16();
92    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);  
   if (vec_all_ge(sumdiffs, best_vec))  
         goto bail;  
93    SAD16();    SAD16();
94    SAD16();    SAD16();
95    SAD16();    SAD16();
96    SAD16();    SAD16();
97    
98    SAD16();    SAD16();
99    SAD16();    SAD16();
100    SAD16();    SAD16();
101    SAD16();    SAD16();
102    
   /* sum all parts of difference into one 32 bit quantity */  
   sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);  
103  bail:  bail:
104    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
105    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
106    vec_ste( sumdiffs, 0, (int *)&result );          vec_ste(sumdiffs, 0, (unsigned long *) &result);
107    return( result );          return result;
108  }  }
109    
110    
111  #define SAD8() \  #define SAD8() \
112  t1  = vec_perm(cur[0], cur[stride], perm_cur);  /* align current vector  */ \  t1  = vec_perm(cur[0], cur[stride], perm_cur);  /* align current vector  */ \
113  t2  = vec_perm(ref[0], ref[1], perm_ref1);  /* align current vector  */ \  t2  = vec_perm(ref[0], ref[1], perm_ref1);  /* align current vector  */ \
114  tp  = vec_perm(ref[stride], ref[stride+1], perm_ref1);  /* align current vector  */ \  tp  = vec_perm(ref[stride], ref[stride+1], perm_ref1);  /* align current vector  */ \
115  t2  = vec_perm(t2,tp,perm_ref2); \  t2  = vec_perm(t2,tp,perm_ref2); \
116  t3  = vec_max(t1, t2);                  /* find largest of two           */ \  tp  = vec_max(t1, t2);                  /* find largest of two           */ \
117  t4  = vec_min(t1, t2);                   /* find smaller of two           */ \  t1  = vec_min(t1, t2);                   /* find smaller of two           */ \
118  t5  = vec_sub(t3, t4);                   /* find absolute difference      */ \  tp  = vec_sub(tp, t1);                   /* find absolute difference      */ \
119  sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \  sad = vec_sum4s(tp, sad);                /* accumulate sum of differences */ \
120  cur += stride<<1; ref += stride<<1;  cur += stride<<1; ref += stride<<1;
121    
 static const vector unsigned char perms[2] = {  
         (vector unsigned char)( /* Used when cur is aligned */  
                 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,  
                 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17  
         ),  
         (vector unsigned char)( /* Used when cur is unaligned */  
                 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,  
                 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f  
         ),  
 };  
   
122  /*  /*
123   * This function assumes cur is 8 bytes aligned, stride is 16 bytes   * This function assumes cur is 8 bytes aligned, stride is 16 bytes
124   * aligned and ref is unaligned   * aligned and ref is unaligned
125   */   */
126  unsigned long  unsigned long
127  sad8_altivec(   const vector unsigned char * cur,  sad8_altivec_c(const vector unsigned char *cur,
128                  const vector unsigned char * ref,                  const vector unsigned char * ref,
129                  unsigned long stride)                  unsigned long stride)
130  {  {
131    vector unsigned char t1, t2, t3, t4, t5, tp ;          vector unsigned char t1, t2, tp;
132    vector unsigned int sad, zero;          vector unsigned int sad;
133    vector signed int sumdiffs;          vector unsigned int sumdiffs;
134    vector unsigned char perm_cur;    vector unsigned char perm_cur;
135    vector unsigned char perm_ref1, perm_ref2;    vector unsigned char perm_ref1, perm_ref2;
136    unsigned long result;    unsigned long result;
137    
138  #ifdef DEBUG  #ifdef DEBUG
139            /* print alignment errors if DEBUG is on */
140    if (((unsigned long)cur) & 0x7)    if (((unsigned long)cur) & 0x7)
141          fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);          fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
 //  if (((unsigned long)ref) & 0x7)  
 //      fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);  
142    if (stride & 0xf)    if (stride & 0xf)
143          fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);          fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
144  #endif  #endif
145    
146    perm_cur = perms[(((unsigned long)cur)>>3) & 0x01];          /* check if cur is 8 or 16 bytes aligned an create the perm_cur vector */
147    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);
148    perm_ref2 = perms[0];          perm_ref2 = vec_add(vec_lvsl(0, (unsigned char*)NULL), vec_pack(vec_splat_u16(0), vec_splat_u16(8)));
149            perm_cur = vec_add(perm_ref2, vec_splat(vec_lvsl(0, (unsigned char*)cur), 0));
150    
151    /* initialization */    /* initialization */
152    zero = (vector unsigned int)(0);          sad = vec_splat_u32(0);
   sad  = (vector unsigned int)(0);  
153    stride >>= 4;    stride >>= 4;
154    
155    /* perform sum of differences between current and previous */    /* perform sum of differences between current and previous */
# Line 164  Line 159 
159    SAD8();    SAD8();
160    
161    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
162    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);          sumdiffs = (vector unsigned int)vec_sums((vector signed int) sad, vec_splat_s32(0));
163    
164    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
165    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
166    vec_ste( sumdiffs, 0, (int *)&result );          vec_ste(sumdiffs, 0, (unsigned int *) &result);
167    return( result );          return result;
168  }  }
169    
 #define MEAN16(i)\  
 c##i=*cur;\  
 mean = vec_sum4s(c##i,mean);\  
 cur += stride;  
   
 #define DEV16(i) \  
 t2  = vec_max(c##i, mn);                /* find largest of two           */ \  
 t3  = vec_min(c##i, mn);                         /* find smaller of two           */ \  
 t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \  
 dev = vec_sum4s(t4, dev);  
170    
171    #define MEAN16() \
172    mean = vec_sum4s(*ptr,mean);\
173    ptr += stride
174    
175    #define DEV16() \
176    t2  = vec_max(*ptr, mn);                    /* find largest of two           */ \
177    t3  = vec_min(*ptr, mn);                    /* find smaller of two           */ \
178    t2  = vec_sub(t2, t3);                      /* find absolute difference      */ \
179    dev = vec_sum4s(t2, dev); \
180    ptr += stride
181    
182    /*
183     * This function assumes cur is 16 bytes aligned and stride is 16 bytes
184     * aligned
185    */
186  unsigned long  unsigned long
187  dev16_altivec(  const vector unsigned char * cur,  dev16_altivec_c(const vector unsigned char *cur,
188                  unsigned long stride)                  unsigned long stride)
189  {  {
190    vector unsigned char t2,t3,t4, mn;          vector unsigned char t2, t3, mn;
191    vector unsigned int mean, dev, zero;          vector unsigned int mean, dev;
192    vector signed int sumdiffs;          vector unsigned int sumdiffs;
193    vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;          const vector unsigned char *ptr;
194    unsigned long result;    unsigned long result;
195    
196    zero = (vector unsigned int)(0);  #ifdef DEBUG
197    mean = (vector unsigned int)(0);          /* print alignment errors if DEBUG is on */
198    dev = (vector unsigned int)(0);          if(((unsigned long)cur) & 0x7)
199                fprintf(stderr, "dev16_altivec:incorrect align, cur: %x\n", cur);
200            if(stride & 0xf)
201                fprintf(stderr, "dev16_altivec:incorrect align, stride: %ld\n", stride);
202    #endif
203    
204            dev = mean = vec_splat_u32(0);
205    stride >>= 4;    stride >>= 4;
206    
207    MEAN16(0);          /* set pointer to iterate through cur */
208    MEAN16(1);          ptr = cur;
209    MEAN16(2);  
210    MEAN16(3);          MEAN16();
211    MEAN16(4);          MEAN16();
212    MEAN16(5);          MEAN16();
213    MEAN16(6);          MEAN16();
214    MEAN16(7);          MEAN16();
215    MEAN16(8);          MEAN16();
216    MEAN16(9);          MEAN16();
217    MEAN16(10);          MEAN16();
218    MEAN16(11);          MEAN16();
219    MEAN16(12);          MEAN16();
220    MEAN16(13);          MEAN16();
221    MEAN16(14);          MEAN16();
222    MEAN16(15);          MEAN16();
223            MEAN16();
224    sumdiffs = vec_sums((vector signed int) mean, (vector signed int) zero);          MEAN16();
225    mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs,          MEAN16();
226          (vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14));  
227    DEV16(0);          /* Add all together in sumdiffs */
228    DEV16(1);          sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0));
229    DEV16(2);          /* teilen durch 16 * 16 */
230    DEV16(3);          mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14));
231    DEV16(4);  
232    DEV16(5);          /* set pointer to iterate through cur */
233    DEV16(6);          ptr = cur;
234    DEV16(7);  
235    DEV16(8);          DEV16();
236    DEV16(9);          DEV16();
237    DEV16(10);          DEV16();
238    DEV16(11);          DEV16();
239    DEV16(12);          DEV16();
240    DEV16(13);          DEV16();
241    DEV16(14);          DEV16();
242    DEV16(15);          DEV16();
243            DEV16();
244            DEV16();
245            DEV16();
246            DEV16();
247            DEV16();
248            DEV16();
249            DEV16();
250            DEV16();
251    
252    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
253    sumdiffs = vec_sums((vector signed int) dev, (vector signed int) zero);          sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0));
254    
255    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
256    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
257    vec_ste( sumdiffs, 0, (int *)&result );          vec_ste(sumdiffs, 0, (unsigned int *) &result);
258    return( result );          return result;
259    }
260    
261    #define SAD16BI() \
262        t1 = vec_perm(ref1[0], ref1[1], mask1); \
263        t2 = vec_perm(ref2[0], ref2[1], mask2); \
264        t1 = vec_avg(t1, t2); \
265        t2 = vec_max(t1, *cur); \
266        t1 = vec_min(t1, *cur); \
267        sad = vec_sub(t2, t1); \
268        sum = vec_sum4s(sad, sum); \
269        cur += stride; \
270        ref1 += stride; \
271        ref2 += stride
272    
273    /*
274     * This function assumes cur is 16 bytes aligned, stride is 16 bytes
275     * aligned and ref1 and ref2 is unaligned
276    */
277    unsigned long
278    sad16bi_altivec_c(vector unsigned char *cur,
279                            vector unsigned char *ref1,
280                            vector unsigned char *ref2,
281                            unsigned long stride)
282    {
283        vector unsigned char t1, t2;
284        vector unsigned char mask1, mask2;
285        vector unsigned char sad;
286        vector unsigned int sum;
287        unsigned long result;
288    
289    #ifdef DEBUG
290        /* print alignment errors if this is on */
291        if(cur & 0xf)
292            fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %x\n", cur);
293        if(stride & 0xf)
294            fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %ld\n", stride);
295    #endif
296    
297        /* Initialisation stuff */
298        stride >>= 4;
299        mask1 = vec_lvsl(0, (unsigned char*)ref1);
300        mask2 = vec_lvsl(0, (unsigned char*)ref2);
301        sad = vec_splat_u8(0);
302        sum = (vector unsigned int)sad;
303    
304        SAD16BI();
305        SAD16BI();
306        SAD16BI();
307        SAD16BI();
308    
309        SAD16BI();
310        SAD16BI();
311        SAD16BI();
312        SAD16BI();
313    
314        SAD16BI();
315        SAD16BI();
316        SAD16BI();
317        SAD16BI();
318    
319        SAD16BI();
320        SAD16BI();
321        SAD16BI();
322        SAD16BI();
323    
324        sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));
325        sum = vec_splat(sum, 3);
326        vec_ste(sum, 0, (unsigned int*)&result);
327    
328        return result;
329    }
330    
331    
332    #define SSE8_16BIT() \
333    b1_vec = vec_perm(vec_ld(0,b1), vec_ld(16,b1), vec_lvsl(0,b1)); \
334    b2_vec = vec_perm(vec_ld(0,b2), vec_ld(16,b2), vec_lvsl(0,b2)); \
335    diff = vec_sub(b1_vec,b2_vec);  \
336    sum = vec_msum(diff,diff,sum);  \
337    b1 = (const int16_t*)((int8_t*)b1+stride);  \
338    b2 = (const int16_t*)((int8_t*)b2+stride)
339    
340    uint32_t
341    sse8_16bit_altivec_c(const int16_t * b1,
342                             const int16_t * b2,
343                             const uint32_t stride)
344    {
345        register vector signed short b1_vec;
346        register vector signed short b2_vec;
347        register vector signed short diff;
348        register vector signed int sum;
349        uint32_t result;
350    
351        /* initialize */
352        sum = vec_splat_s32(0);
353    
354        SSE8_16BIT();
355        SSE8_16BIT();
356        SSE8_16BIT();
357        SSE8_16BIT();
358    
359        SSE8_16BIT();
360        SSE8_16BIT();
361        SSE8_16BIT();
362        SSE8_16BIT();
363    
364        /* sum the vector */
365        sum = vec_sums(sum, vec_splat_s32(0));
366        sum = vec_splat(sum,3);
367    
368        vec_ste(sum,0,(int*)&result);
369    
370        /* and return */
371        return result;
372  }  }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.9

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4