[cvs] / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Diff of /xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Thu Apr 11 10:18:40 2002 UTC revision 1.11, Thu Dec 9 23:02:54 2004 UTC
# Line 18  Line 18 
18    
19    
20      $Id$      $Id$
     $Source$  
     $Date$  
     $Author$  
   
21  */  */
22    
23  #define G_REG  #ifdef HAVE_ALTIVEC_H
24    #include <altivec.h>
 #ifdef G_REG  
 register vector unsigned char perm0 asm ("%v29");  
 register vector unsigned char perm1 asm ("%v30");  
 register vector unsigned int zerovec asm ("%v31");  
25  #endif  #endif
26    
 #include <stdio.h>  
   
 #undef DEBUG  
27    
28  static const vector unsigned char perms[2] = {  #include "../../portab.h"
         (vector unsigned char)( /* Used when cur is aligned */  
                 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,  
                 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17  
         ),  
         (vector unsigned char)( /* Used when cur is unaligned */  
                 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,  
                 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f  
         ),  
 };  
29    
30  #ifdef G_REG  /* no debugging by default */
31  void sadInit_altivec(void)  #undef DEBUG
 {  
         perm0 = perms[0];  
         perm1 = perms[1];  
         zerovec = (vector unsigned int)(0);  
 }  
 static inline const vector unsigned char get_perm(unsigned long i)  
 {  
         return i ? perm1 : perm0;  
 }  
 #define ZERODEF  
 #define ZEROVEC zerovec  
 #else  
 void sadInit_altivec(void) { }  
 static inline const vector unsigned char get_perm(unsigned long i)  
 {  
         return perms[i];  
 }  
 #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)  
 #define ZEROVEC zerovec  
 #endif  
32    
33    #include <stdio.h>
34    
35  #define SAD16() \  #define SAD16() \
36  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \
37  t2  = vec_max(t1, *cur);         /* find largest of two           */ \  t2  = vec_max(t1, *cur);         /* find largest of two           */ \
38  t3  = vec_min(t1, *cur);                 /* find smaller of two           */ \  t1  = vec_min(t1, *cur);                 /* find smaller of two           */ \
39  t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \  t1  = vec_sub(t2, t1);                   /* find absolute difference      */ \
40  sad = vec_sum4s(t4, sad);                /* accumulate sum of differences */ \  sad = vec_sum4s(t1, vec_splat_u32(0));                /* sum of differences */ \
41    sumdiffs = (vector unsigned int)vec_sums((vector signed int)sad, (vector signed int)sumdiffs);    /* accumulate sumdiffs */ \
42    if(vec_any_ge(sumdiffs, best_vec)) \
43        goto bail; \
44  cur += stride; ref += stride;  cur += stride; ref += stride;
45    
46  /*  /*
47   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
48   */   */
49  unsigned long  
50  sad16_altivec(  const vector unsigned char * cur,  uint32_t
51                  const vector unsigned char * ref,  sad16_altivec_c(vector unsigned char *cur,
52                  unsigned long stride,                            vector unsigned char *ref,
53                  const unsigned long best_sad)                            uint32_t stride,
54                              const uint32_t best_sad)
55  {  {
56    vector unsigned char perm;    vector unsigned char perm;
57    vector unsigned char t1, t2, t3, t4 ;          vector unsigned char t1, t2;
58    vector unsigned int sad;    vector unsigned int sad;
59    vector signed int sumdiffs, best_vec;          vector unsigned int sumdiffs;
60    unsigned long result;          vector unsigned int best_vec;
61    ZERODEF;          uint32_t result;
62    
63    
64  #ifdef DEBUG  #ifdef DEBUG
65            /* print alignment errors if DEBUG is on */
66    if (((unsigned long)cur) & 0xf)    if (((unsigned long)cur) & 0xf)
67          fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);                  fprintf(stderr, "sad16_altivec:incorrect align, cur: %lx\n", (long)cur);
 //  if (((unsigned long)ref) & 0xf)  
 //      fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);  
68    if (stride & 0xf)    if (stride & 0xf)
69          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);                  fprintf(stderr, "sad16_altivec:incorrect align, stride: %lu\n", stride);
70  #endif  #endif
71    /* initialization */    /* initialization */
72    sad  = (vector unsigned int)(ZEROVEC);          sad = vec_splat_u32(0);
73            sumdiffs = sad;
74    stride >>= 4;    stride >>= 4;
75    perm = vec_lvsl(0, (unsigned char *)ref);    perm = vec_lvsl(0, (unsigned char *)ref);
76    *((unsigned long *)&best_vec) = best_sad;          *((uint32_t*)&best_vec) = best_sad;
77    best_vec = vec_splat(best_vec, 0);    best_vec = vec_splat(best_vec, 0);
78    
79    /* perform sum of differences between current and previous */    /* perform sum of differences between current and previous */
# Line 115  Line 81 
81    SAD16();    SAD16();
82    SAD16();    SAD16();
83    SAD16();    SAD16();
84    /* Temp sum for exit */  
   sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);  
   if (vec_all_ge(sumdiffs, best_vec))  
         goto bail;  
85    SAD16();    SAD16();
86    SAD16();    SAD16();
87    SAD16();    SAD16();
88    SAD16();    SAD16();
89    sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);  
   if (vec_all_ge(sumdiffs, best_vec))  
         goto bail;  
90    SAD16();    SAD16();
91    SAD16();    SAD16();
92    SAD16();    SAD16();
93    SAD16();    SAD16();
94    
95    SAD16();    SAD16();
96    SAD16();    SAD16();
97    SAD16();    SAD16();
98    SAD16();    SAD16();
99    
   /* sum all parts of difference into one 32 bit quantity */  
   sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);  
100  bail:  bail:
101    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
102    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
103    vec_ste( sumdiffs, 0, (int *)&result );          vec_ste(sumdiffs, 0, (uint32_t*) &result);
104    return( result );          return result;
105  }  }
106    
107    
108  #define SAD8() \  #define SAD8() \
109  t1  = vec_perm(cur[0], cur[stride], perm_cur);  /* align current vector  */ \          c = vec_perm(vec_ld(0,cur),vec_ld(16,cur),vec_lvsl(0,cur));\
110  t2  = vec_perm(ref[0], ref[1], perm_ref1);  /* align current vector  */ \          r = vec_perm(vec_ld(0,ref),vec_ld(16,ref),vec_lvsl(0,ref));\
111  tp  = vec_perm(ref[stride], ref[stride+1], perm_ref1);  /* align current vector  */ \          c = vec_sub(vec_max(c,r),vec_min(c,r));\
112  t2  = vec_perm(t2,tp,perm_ref2); \          sad = vec_sum4s(c,sad);\
113  t3  = vec_max(t1, t2);                  /* find largest of two           */ \          cur += stride;\
114  t4  = vec_min(t1, t2);                   /* find smaller of two           */ \          ref += stride
 t5  = vec_sub(t3, t4);                   /* find absolute difference      */ \  
 sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \  
 cur += stride<<1; ref += stride<<1;  
115    
116  /*  /*
117   * This function assumes cur is 8 bytes aligned, stride is 16 bytes   * This function assumes nothing
  * aligned and ref is unaligned  
118   */   */
119  unsigned long  
120  sad8_altivec(   const vector unsigned char * cur,  uint32_t
121                  const vector unsigned char * ref,  sad8_altivec_c(const uint8_t * cur,
122                  unsigned long stride)             const uint8_t *ref,
123               const uint32_t stride)
124  {  {
125    vector unsigned char t1, t2, t3, t4, t5, tp ;          uint32_t result = 0;
   vector unsigned int sad;  
   vector signed int sumdiffs;  
   vector unsigned char perm_cur;  
   vector unsigned char perm_ref1, perm_ref2;  
   unsigned long result;  
   ZERODEF;  
126    
127  #ifdef DEBUG          register vector unsigned int sad;
128    if (((unsigned long)cur) & 0x7)          register vector unsigned char c;
129          fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);          register vector unsigned char r;
 //  if (((unsigned long)ref) & 0x7)  
 //      fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);  
   if (stride & 0xf)  
         fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);  
 #endif  
130    
131    perm_cur = get_perm((((unsigned long)cur)>>3) & 0x01);          /* initialize */
132    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);          sad = vec_splat_u32(0);
   perm_ref2 = get_perm(0);  
133    
134    /* initialization */          /* Perform sad operations */
135    sad  = (vector unsigned int)(ZEROVEC);          SAD8();
136    stride >>= 4;          SAD8();
137            SAD8();
138            SAD8();
139    
   /* perform sum of differences between current and previous */  
140    SAD8();    SAD8();
141    SAD8();    SAD8();
142    SAD8();    SAD8();
143    SAD8();    SAD8();
144    
145    /* sum all parts of difference into one 32 bit quantity */          /* finish addition, add the first 2 together */
146    sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);          sad = vec_and(sad, (vector unsigned int)vec_pack(vec_splat_u16(-1),vec_splat_u16(0)));
147            sad = (vector unsigned int)vec_sums((vector signed int)sad, vec_splat_s32(0));
148            sad = vec_splat(sad,3);
149            vec_ste(sad, 0, &result);
150    
151    /* copy vector sum into unaligned result */          return result;
   sumdiffs = vec_splat( sumdiffs, 3 );  
   vec_ste( sumdiffs, 0, (int *)&result );  
   return( result );  
152  }  }
153    
154  #define MEAN16(i)\  
155  c##i=*cur;\  
156  mean = vec_sum4s(c##i,mean);\  
157  cur += stride;  #define MEAN16() \
158    mean = vec_sum4s(*ptr,mean);\
159  #define DEV16(i) \  ptr += stride
160  t2  = vec_max(c##i, mn);                /* find largest of two           */ \  
161  t3  = vec_min(c##i, mn);                         /* find smaller of two           */ \  #define DEV16() \
162  t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \  t2  = vec_max(*ptr, mn);                    /* find largest of two           */ \
163  dev = vec_sum4s(t4, dev);  t3  = vec_min(*ptr, mn);                    /* find smaller of two           */ \
164    t2  = vec_sub(t2, t3);                      /* find absolute difference      */ \
165  unsigned long  dev = vec_sum4s(t2, dev); \
166  dev16_altivec(  const vector unsigned char * cur,  ptr += stride
167                  unsigned long stride)  
168    /*
169     * This function assumes cur is 16 bytes aligned and stride is 16 bytes
170     * aligned
171    */
172    
173    uint32_t
174    dev16_altivec_c(vector unsigned char *cur,
175                              uint32_t stride)
176  {  {
177    vector unsigned char t2,t3,t4, mn;          vector unsigned char t2, t3, mn;
178    vector unsigned int mean, dev;    vector unsigned int mean, dev;
179    vector signed int sumdiffs;          vector unsigned int sumdiffs;
180    vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;          vector unsigned char *ptr;
181    unsigned long result;          uint32_t result;
   ZERODEF;  
182    
183    mean = (vector unsigned int)(ZEROVEC);  #ifdef DEBUG
184    dev = (vector unsigned int)(ZEROVEC);          /* print alignment errors if DEBUG is on */
185            if(((unsigned long)cur) & 0x7)
186                fprintf(stderr, "dev16_altivec:incorrect align, cur: %lx\n", (long)cur);
187            if(stride & 0xf)
188                fprintf(stderr, "dev16_altivec:incorrect align, stride: %lu\n", stride);
189    #endif
190    
191            dev = mean = vec_splat_u32(0);
192    stride >>= 4;    stride >>= 4;
193    
194    MEAN16(0);          /* set pointer to iterate through cur */
195    MEAN16(1);          ptr = cur;
196    MEAN16(2);  
197    MEAN16(3);          MEAN16();
198    MEAN16(4);          MEAN16();
199    MEAN16(5);          MEAN16();
200    MEAN16(6);          MEAN16();
201    MEAN16(7);          MEAN16();
202    MEAN16(8);          MEAN16();
203    MEAN16(9);          MEAN16();
204    MEAN16(10);          MEAN16();
205    MEAN16(11);          MEAN16();
206    MEAN16(12);          MEAN16();
207    MEAN16(13);          MEAN16();
208    MEAN16(14);          MEAN16();
209    MEAN16(15);          MEAN16();
210            MEAN16();
211    sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);          MEAN16();
212    mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs,          MEAN16();
213          (vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14));  
214    DEV16(0);          /* Add all together in sumdiffs */
215    DEV16(1);          sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0));
216    DEV16(2);          /* teilen durch 16 * 16 */
217    DEV16(3);          mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14));
218    DEV16(4);  
219    DEV16(5);          /* set pointer to iterate through cur */
220    DEV16(6);          ptr = cur;
221    DEV16(7);  
222    DEV16(8);          DEV16();
223    DEV16(9);          DEV16();
224    DEV16(10);          DEV16();
225    DEV16(11);          DEV16();
226    DEV16(12);          DEV16();
227    DEV16(13);          DEV16();
228    DEV16(14);          DEV16();
229    DEV16(15);          DEV16();
230            DEV16();
231            DEV16();
232            DEV16();
233            DEV16();
234            DEV16();
235            DEV16();
236            DEV16();
237            DEV16();
238    
239    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
240    sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);          sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0));
241    
242    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
243    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
244    vec_ste( sumdiffs, 0, (int *)&result );          vec_ste(sumdiffs, 0, (uint32_t*) &result);
245    return( result );          return result;
246    }
247    
248    #define SAD16BI() \
249        t1 = vec_perm(ref1[0], ref1[1], mask1); \
250        t2 = vec_perm(ref2[0], ref2[1], mask2); \
251        t1 = vec_avg(t1, t2); \
252        t2 = vec_max(t1, *cur); \
253        t1 = vec_min(t1, *cur); \
254        sad = vec_sub(t2, t1); \
255        sum = vec_sum4s(sad, sum); \
256        cur += stride; \
257        ref1 += stride; \
258        ref2 += stride
259    
260    /*
261     * This function assumes cur is 16 bytes aligned, stride is 16 bytes
262     * aligned and ref1 and ref2 is unaligned
263    */
264    
265    uint32_t
266    sad16bi_altivec_c(vector unsigned char *cur,
267                            vector unsigned char *ref1,
268                            vector unsigned char *ref2,
269                            uint32_t stride)
270    {
271        vector unsigned char t1, t2;
272        vector unsigned char mask1, mask2;
273        vector unsigned char sad;
274        vector unsigned int sum;
275        uint32_t result;
276    
277    #ifdef DEBUG
278        /* print alignment errors if this is on */
279        if((long)cur & 0xf)
280            fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lx\n", (long)cur);
281        if(stride & 0xf)
282            fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lu\n", stride);
283    #endif
284    
285        /* Initialisation stuff */
286        stride >>= 4;
287        mask1 = vec_lvsl(0, (unsigned char*)ref1);
288        mask2 = vec_lvsl(0, (unsigned char*)ref2);
289        sad = vec_splat_u8(0);
290        sum = (vector unsigned int)sad;
291    
292        SAD16BI();
293        SAD16BI();
294        SAD16BI();
295        SAD16BI();
296    
297        SAD16BI();
298        SAD16BI();
299        SAD16BI();
300        SAD16BI();
301    
302        SAD16BI();
303        SAD16BI();
304        SAD16BI();
305        SAD16BI();
306    
307        SAD16BI();
308        SAD16BI();
309        SAD16BI();
310        SAD16BI();
311    
312        sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));
313        sum = vec_splat(sum, 3);
314        vec_ste(sum, 0, (uint32_t*)&result);
315    
316        return result;
317    }
318    
319    
320    #define SSE8_16BIT() \
321    b1_vec = vec_perm(vec_ld(0,b1), vec_ld(16,b1), vec_lvsl(0,b1)); \
322    b2_vec = vec_perm(vec_ld(0,b2), vec_ld(16,b2), vec_lvsl(0,b2)); \
323    diff = vec_sub(b1_vec,b2_vec);  \
324    sum = vec_msum(diff,diff,sum);  \
325    b1 = (const int16_t*)((int8_t*)b1+stride);  \
326    b2 = (const int16_t*)((int8_t*)b2+stride)
327    
328    uint32_t
329    sse8_16bit_altivec_c(const int16_t * b1,
330                             const int16_t * b2,
331                             const uint32_t stride)
332    {
333        register vector signed short b1_vec;
334        register vector signed short b2_vec;
335        register vector signed short diff;
336        register vector signed int sum;
337        uint32_t result;
338    
339        /* initialize */
340        sum = vec_splat_s32(0);
341    
342        SSE8_16BIT();
343        SSE8_16BIT();
344        SSE8_16BIT();
345        SSE8_16BIT();
346    
347        SSE8_16BIT();
348        SSE8_16BIT();
349        SSE8_16BIT();
350        SSE8_16BIT();
351    
352        /* sum the vector */
353        sum = vec_sums(sum, vec_splat_s32(0));
354        sum = vec_splat(sum,3);
355    
356        vec_ste(sum,0,(int*)&result);
357    
358        /* and return */
359        return result;
360  }  }

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.11

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4