[cvs] / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Diff of /xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Wed Apr 3 14:17:05 2002 UTC revision 1.10, Mon Apr 12 14:05:08 2004 UTC
# Line 18  Line 18 
18    
19    
20      $Id$      $Id$
     $Source$  
     $Date$  
     $Author$  
   
21  */  */
22    
23  #include <stdio.h>  #ifdef HAVE_ALTIVEC_H
24    #include <altivec.h>
25    #endif
26    
27    
28    #include "../../portab.h"
29    
30    /* no debugging by default */
31  #undef DEBUG  #undef DEBUG
32    
33    #include <stdio.h>
34    
35  #define SAD16() \  #define SAD16() \
36  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \
37  t2  = vec_max(t1, *cur);         /* find largest of two           */ \  t2  = vec_max(t1, *cur);         /* find largest of two           */ \
38  t3  = vec_min(t1, *cur);                 /* find smaller of two           */ \  t1  = vec_min(t1, *cur);                 /* find smaller of two           */ \
39  t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \  t1  = vec_sub(t2, t1);                   /* find absolute difference      */ \
40  sad = vec_sum4s(t4, sad);                /* accumulate sum of differences */ \  sad = vec_sum4s(t1, vec_splat_u32(0));                /* sum of differences */ \
41    sumdiffs = (vector unsigned int)vec_sums((vector signed int)sad, (vector signed int)sumdiffs);    /* accumulate sumdiffs */ \
42    if(vec_any_ge(sumdiffs, best_vec)) \
43        goto bail; \
44  cur += stride; ref += stride;  cur += stride; ref += stride;
45    
46  /*  /*
47   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
48   */   */
49  unsigned long  unsigned long
50  sad16_altivec(  const vector unsigned char * cur,  sad16_altivec_c(const vector unsigned char *cur,
51                  const vector unsigned char * ref,                  const vector unsigned char * ref,
52                  unsigned long stride,                  unsigned long stride,
53                  const unsigned long best_sad)                  const unsigned long best_sad)
54  {  {
55    vector unsigned char perm;    vector unsigned char perm;
56    vector unsigned char t1, t2, t3, t4 ;          vector unsigned char t1, t2;
57    vector unsigned int sad, zero;          vector unsigned int sad;
58    vector signed int sumdiffs, best_vec;          vector unsigned int sumdiffs;
59            vector unsigned int best_vec;
60    unsigned long result;    unsigned long result;
61    
62    
63  #ifdef DEBUG  #ifdef DEBUG
64            /* print alignment errors if DEBUG is on */
65    if (((unsigned long)cur) & 0xf)    if (((unsigned long)cur) & 0xf)
66          fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);          fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
 //  if (((unsigned long)ref) & 0xf)  
 //      fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);  
67    if (stride & 0xf)    if (stride & 0xf)
68          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
69  #endif  #endif
70    /* initialization */    /* initialization */
71    zero = (vector unsigned int)(0);          sad = vec_splat_u32(0);
72    sad  = (vector unsigned int)(0);          sumdiffs = sad;
73    stride >>= 4;    stride >>= 4;
74    perm = vec_lvsl(0, (unsigned char *)ref);    perm = vec_lvsl(0, (unsigned char *)ref);
75    *((unsigned long *)&best_vec) = best_sad;    *((unsigned long *)&best_vec) = best_sad;
# Line 72  Line 80 
80    SAD16();    SAD16();
81    SAD16();    SAD16();
82    SAD16();    SAD16();
83    /* Temp sum for exit */  
   sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);  
   if (vec_all_ge(sumdiffs, best_vec))  
         goto bail;  
84    SAD16();    SAD16();
85    SAD16();    SAD16();
86    SAD16();    SAD16();
87    SAD16();    SAD16();
88    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);  
   if (vec_all_ge(sumdiffs, best_vec))  
         goto bail;  
89    SAD16();    SAD16();
90    SAD16();    SAD16();
91    SAD16();    SAD16();
92    SAD16();    SAD16();
93    
94    SAD16();    SAD16();
95    SAD16();    SAD16();
96    SAD16();    SAD16();
97    SAD16();    SAD16();
98    
   /* sum all parts of difference into one 32 bit quantity */  
   sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);  
99  bail:  bail:
100    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
101    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
102    vec_ste( sumdiffs, 0, (int *)&result );          vec_ste(sumdiffs, 0, (unsigned long *) &result);
103    return( result );          return result;
104  }  }
105    
106    
107  #define SAD8() \  #define SAD8() \
108  t1  = vec_perm(cur[0], cur[stride], perm_cur);  /* align current vector  */ \  t1  = vec_perm(cur[0], cur[stride], perm_cur);  /* align current vector  */ \
109  t2  = vec_perm(ref[0], ref[1], perm_ref1);  /* align current vector  */ \  t2  = vec_perm(ref[0], ref[1], perm_ref1);  /* align current vector  */ \
110  tp  = vec_perm(ref[stride], ref[stride+1], perm_ref1);  /* align current vector  */ \  tp  = vec_perm(ref[stride], ref[stride+1], perm_ref1);  /* align current vector  */ \
111  t2  = vec_perm(t2,tp,perm_ref2); \  t2  = vec_perm(t2,tp,perm_ref2); \
112  t3  = vec_max(t1, t2);                  /* find largest of two           */ \  tp  = vec_max(t1, t2);                  /* find largest of two           */ \
113  t4  = vec_min(t1, t2);                   /* find smaller of two           */ \  t1  = vec_min(t1, t2);                   /* find smaller of two           */ \
114  t5  = vec_sub(t3, t4);                   /* find absolute difference      */ \  tp  = vec_sub(tp, t1);                   /* find absolute difference      */ \
115  sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \  sad = vec_sum4s(tp, sad);                /* accumulate sum of differences */ \
116  cur += stride<<1; ref += stride<<1;  cur += stride<<1; ref += stride<<1;
117    
 static const vector unsigned char perms[2] = {  
         (vector unsigned char)( /* Used when cur is aligned */  
                 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,  
                 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17  
         ),  
         (vector unsigned char)( /* Used when cur is unaligned */  
                 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,  
                 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f  
         ),  
 };  
   
118  /*  /*
119   * This function assumes cur is 8 bytes aligned, stride is 16 bytes   * This function assumes cur is 8 bytes aligned, stride is 16 bytes
120   * aligned and ref is unaligned   * aligned and ref is unaligned
121   */   */
122  unsigned long  unsigned long
123  sad8_altivec(   const vector unsigned char * cur,  sad8_altivec_c(const vector unsigned char *cur,
124                  const vector unsigned char * ref,                  const vector unsigned char * ref,
125                  unsigned long stride)                  unsigned long stride)
126  {  {
127    vector unsigned char t1, t2, t3, t4, t5, tp ;          vector unsigned char t1, t2, tp;
128    vector unsigned int sad, zero;          vector unsigned int sad;
129    vector signed int sumdiffs;          vector unsigned int sumdiffs;
130    vector unsigned char perm_cur;    vector unsigned char perm_cur;
131    vector unsigned char perm_ref1, perm_ref2;    vector unsigned char perm_ref1, perm_ref2;
132    unsigned long result;    unsigned long result;
133    
134  #ifdef DEBUG  #ifdef DEBUG
135            /* print alignment errors if DEBUG is on */
136    if (((unsigned long)cur) & 0x7)    if (((unsigned long)cur) & 0x7)
137          fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);          fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
 //  if (((unsigned long)ref) & 0x7)  
 //      fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);  
138    if (stride & 0xf)    if (stride & 0xf)
139          fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);          fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
140  #endif  #endif
141    
142    perm_cur = perms[(((unsigned long)cur)>>3) & 0x01];          /* check if cur is 8 or 16 bytes aligned an create the perm_cur vector */
143    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);
144    perm_ref2 = perms[0];          perm_ref2 = vec_add(vec_lvsl(0, (unsigned char*)NULL), vec_pack(vec_splat_u16(0), vec_splat_u16(8)));
145            perm_cur = vec_add(perm_ref2, vec_splat(vec_lvsl(0, (unsigned char*)cur), 0));
146    
147    /* initialization */    /* initialization */
148    zero = (vector unsigned int)(0);          sad = vec_splat_u32(0);
   sad  = (vector unsigned int)(0);  
149    stride >>= 4;    stride >>= 4;
150    
151    /* perform sum of differences between current and previous */    /* perform sum of differences between current and previous */
# Line 164  Line 155 
155    SAD8();    SAD8();
156    
157    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
158    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);          sumdiffs = (vector unsigned int)vec_sums((vector signed int) sad, vec_splat_s32(0));
159    
160    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
161    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
162    vec_ste( sumdiffs, 0, (int *)&result );          vec_ste(sumdiffs, 0, (unsigned int *) &result);
163    return( result );          return result;
164  }  }
165    
 #define MEAN16(i)\  
 c##i=*cur;\  
 mean = vec_sum4s(c##i,mean);\  
 cur += stride;  
   
 #define DEV16(i) \  
 t2  = vec_max(c##i, mn);                /* find largest of two           */ \  
 t3  = vec_min(c##i, mn);                         /* find smaller of two           */ \  
 t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \  
 dev = vec_sum4s(t4, dev);  
166    
167    #define MEAN16() \
168    mean = vec_sum4s(*ptr,mean);\
169    ptr += stride
170    
171    #define DEV16() \
172    t2  = vec_max(*ptr, mn);                    /* find largest of two           */ \
173    t3  = vec_min(*ptr, mn);                    /* find smaller of two           */ \
174    t2  = vec_sub(t2, t3);                      /* find absolute difference      */ \
175    dev = vec_sum4s(t2, dev); \
176    ptr += stride
177    
178    /*
179     * This function assumes cur is 16 bytes aligned and stride is 16 bytes
180     * aligned
181    */
182  unsigned long  unsigned long
183  dev16_altivec(  const vector unsigned char * cur,  dev16_altivec_c(const vector unsigned char *cur,
184                  unsigned long stride)                  unsigned long stride)
185  {  {
186    vector unsigned char t2,t3,t4, mn;          vector unsigned char t2, t3, mn;
187    vector unsigned int mean, dev, zero;          vector unsigned int mean, dev;
188    vector signed int sumdiffs;          vector unsigned int sumdiffs;
189    vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;          const vector unsigned char *ptr;
190    unsigned long result;    unsigned long result;
191    
192    zero = (vector unsigned int)(0);  #ifdef DEBUG
193    mean = (vector unsigned int)(0);          /* print alignment errors if DEBUG is on */
194    dev = (vector unsigned int)(0);          if(((unsigned long)cur) & 0x7)
195                fprintf(stderr, "dev16_altivec:incorrect align, cur: %x\n", cur);
196            if(stride & 0xf)
197                fprintf(stderr, "dev16_altivec:incorrect align, stride: %ld\n", stride);
198    #endif
199    
200            dev = mean = vec_splat_u32(0);
201    stride >>= 4;    stride >>= 4;
202    
203    MEAN16(0);          /* set pointer to iterate through cur */
204    MEAN16(1);          ptr = cur;
205    MEAN16(2);  
206    MEAN16(3);          MEAN16();
207    MEAN16(4);          MEAN16();
208    MEAN16(5);          MEAN16();
209    MEAN16(6);          MEAN16();
210    MEAN16(7);          MEAN16();
211    MEAN16(8);          MEAN16();
212    MEAN16(9);          MEAN16();
213    MEAN16(10);          MEAN16();
214    MEAN16(11);          MEAN16();
215    MEAN16(12);          MEAN16();
216    MEAN16(13);          MEAN16();
217    MEAN16(14);          MEAN16();
218    MEAN16(15);          MEAN16();
219            MEAN16();
220    sumdiffs = vec_sums((vector signed int) mean, (vector signed int) zero);          MEAN16();
221    mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs,          MEAN16();
222          (vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14));  
223    DEV16(0);          /* Add all together in sumdiffs */
224    DEV16(1);          sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0));
225    DEV16(2);          /* teilen durch 16 * 16 */
226    DEV16(3);          mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14));
227    DEV16(4);  
228    DEV16(5);          /* set pointer to iterate through cur */
229    DEV16(6);          ptr = cur;
230    DEV16(7);  
231    DEV16(8);          DEV16();
232    DEV16(9);          DEV16();
233    DEV16(10);          DEV16();
234    DEV16(11);          DEV16();
235    DEV16(12);          DEV16();
236    DEV16(13);          DEV16();
237    DEV16(14);          DEV16();
238    DEV16(15);          DEV16();
239            DEV16();
240            DEV16();
241            DEV16();
242            DEV16();
243            DEV16();
244            DEV16();
245            DEV16();
246            DEV16();
247    
248    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
249    sumdiffs = vec_sums((vector signed int) dev, (vector signed int) zero);          sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0));
250    
251    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
252    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
253    vec_ste( sumdiffs, 0, (int *)&result );          vec_ste(sumdiffs, 0, (unsigned int *) &result);
254    return( result );          return result;
255    }
256    
257    #define SAD16BI() \
258        t1 = vec_perm(ref1[0], ref1[1], mask1); \
259        t2 = vec_perm(ref2[0], ref2[1], mask2); \
260        t1 = vec_avg(t1, t2); \
261        t2 = vec_max(t1, *cur); \
262        t1 = vec_min(t1, *cur); \
263        sad = vec_sub(t2, t1); \
264        sum = vec_sum4s(sad, sum); \
265        cur += stride; \
266        ref1 += stride; \
267        ref2 += stride
268    
269    /*
270     * This function assumes cur is 16 bytes aligned, stride is 16 bytes
271     * aligned and ref1 and ref2 is unaligned
272    */
273    unsigned long
274    sad16bi_altivec_c(vector unsigned char *cur,
275                            vector unsigned char *ref1,
276                            vector unsigned char *ref2,
277                            unsigned long stride)
278    {
279        vector unsigned char t1, t2;
280        vector unsigned char mask1, mask2;
281        vector unsigned char sad;
282        vector unsigned int sum;
283        unsigned long result;
284    
285    #ifdef DEBUG
286        /* print alignment errors if this is on */
287        if(cur & 0xf)
288            fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %x\n", cur);
289        if(stride & 0xf)
290            fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %ld\n", stride);
291    #endif
292    
293        /* Initialisation stuff */
294        stride >>= 4;
295        mask1 = vec_lvsl(0, (unsigned char*)ref1);
296        mask2 = vec_lvsl(0, (unsigned char*)ref2);
297        sad = vec_splat_u8(0);
298        sum = (vector unsigned int)sad;
299    
300        SAD16BI();
301        SAD16BI();
302        SAD16BI();
303        SAD16BI();
304    
305        SAD16BI();
306        SAD16BI();
307        SAD16BI();
308        SAD16BI();
309    
310        SAD16BI();
311        SAD16BI();
312        SAD16BI();
313        SAD16BI();
314    
315        SAD16BI();
316        SAD16BI();
317        SAD16BI();
318        SAD16BI();
319    
320        sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));
321        sum = vec_splat(sum, 3);
322        vec_ste(sum, 0, (unsigned int*)&result);
323    
324        return result;
325    }
326    
327    
328    #define SSE8_16BIT() \
329    b1_vec = vec_perm(vec_ld(0,b1), vec_ld(16,b1), vec_lvsl(0,b1)); \
330    b2_vec = vec_perm(vec_ld(0,b2), vec_ld(16,b2), vec_lvsl(0,b2)); \
331    diff = vec_sub(b1_vec,b2_vec);  \
332    sum = vec_msum(diff,diff,sum);  \
333    b1 = (const int16_t*)((int8_t*)b1+stride);  \
334    b2 = (const int16_t*)((int8_t*)b2+stride)
335    
336    uint32_t
337    sse8_16bit_altivec_c(const int16_t * b1,
338                             const int16_t * b2,
339                             const uint32_t stride)
340    {
341        register vector signed short b1_vec;
342        register vector signed short b2_vec;
343        register vector signed short diff;
344        register vector signed int sum;
345        uint32_t result;
346    
347        /* initialize */
348        sum = vec_splat_s32(0);
349    
350        SSE8_16BIT();
351        SSE8_16BIT();
352        SSE8_16BIT();
353        SSE8_16BIT();
354    
355        SSE8_16BIT();
356        SSE8_16BIT();
357        SSE8_16BIT();
358        SSE8_16BIT();
359    
360        /* sum the vector */
361        sum = vec_sums(sum, vec_splat_s32(0));
362        sum = vec_splat(sum,3);
363    
364        vec_ste(sum,0,(int*)&result);
365    
366        /* and return */
367        return result;
368  }  }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.10

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4