[cvs] / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Diff of /xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Fri Sep 6 16:59:47 2002 UTC revision 1.10, Mon Apr 12 14:05:08 2004 UTC
# Line 1  Line 1 
1  /*****************************************************************************  /*
  *  
  *  XVID MPEG-4 VIDEO CODEC  
  *  - altivec sum of absolute difference (C version)  
  *  
  *  Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>  
  *  
  *  This program is an implementation of a part of one or more MPEG-4  
  *  Video tools as specified in ISO/IEC 14496-2 standard.  Those intending  
  *  to use this software module in hardware or software products are  
  *  advised that its use may infringe existing patents or copyrights, and  
  *  any such use would be at such party's own risk.  The original  
  *  developer of this software module and his/her company, and subsequent  
  *  editors and their companies, will have no liability for use of this  
  *  software or modifications or derivatives thereof.  
  *  
  *  This program is free software; you can redistribute it and/or modify  
  *  it under the terms of the GNU General Public License as published by  
  *  the Free Software Foundation; either version 2 of the License, or  
  *  (at your option) any later version.  
  *  
  *  This program is distributed in the hope that it will be useful,  
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of  
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  
  *  GNU General Public License for more details.  
  *  
  *  You should have received a copy of the GNU General Public License  
  *  along with this program; if not, write to the Free Software  
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA  
  *  
  *  $Id$  
  *  
  ****************************************************************************/  
   
 #define G_REG  
   
 #ifdef G_REG  
 register vector unsigned char perm0 asm("%v29");  
 register vector unsigned char perm1 asm("%v30");  
 register vector unsigned int zerovec asm("%v31");  
 #endif  
2    
3  #include <stdio.h>      Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
4    
5  #undef DEBUG      This program is free software; you can redistribute it and/or modify
6        it under the terms of the GNU General Public License as published by
7        the Free Software Foundation; either version 2 of the License, or
8        (at your option) any later version.
9    
10  static const vector unsigned char perms[2] = {      This program is distributed in the hope that it will be useful,
11          (vector unsigned char) (        /* Used when cur is aligned */      but WITHOUT ANY WARRANTY; without even the implied warranty of
12                                                             0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13                                                             0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17),      GNU General Public License for more details.
         (vector unsigned char) (        /* Used when cur is unaligned */  
                                                            0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,  
                                                            0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f),  
 };  
   
 #ifdef G_REG  
 void  
 sadInit_altivec(void)  
 {  
         perm0 = perms[0];  
         perm1 = perms[1];  
         zerovec = (vector unsigned int) (0);  
 }  
 static inline const vector unsigned char  
 get_perm(unsigned long i)  
 {  
         return i ? perm1 : perm0;  
 }  
14    
15  #define ZERODEF      You should have received a copy of the GNU General Public License
16  #define ZEROVEC zerovec      along with this program; if not, write to the Free Software
17  #else      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  void  
19  sadInit_altivec(void)  
20  {      $Id$
21  }  */
 static inline const vector unsigned char  
 get_perm(unsigned long i)  
 {  
         return perms[i];  
 }  
22    
23  #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)  #ifdef HAVE_ALTIVEC_H
24  #define ZEROVEC zerovec  #include <altivec.h>
25  #endif  #endif
26    
27    
28    #include "../../portab.h"
29    
30    /* no debugging by default */
31    #undef DEBUG
32    
33    #include <stdio.h>
34    
35  #define SAD16() \  #define SAD16() \
36  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \
37  t2  = vec_max(t1, *cur);         /* find largest of two           */ \  t2  = vec_max(t1, *cur);         /* find largest of two           */ \
38  t3  = vec_min(t1, *cur);                 /* find smaller of two           */ \  t1  = vec_min(t1, *cur);                 /* find smaller of two           */ \
39  t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \  t1  = vec_sub(t2, t1);                   /* find absolute difference      */ \
40  sad = vec_sum4s(t4, sad);                /* accumulate sum of differences */ \  sad = vec_sum4s(t1, vec_splat_u32(0));                /* sum of differences */ \
41    sumdiffs = (vector unsigned int)vec_sums((vector signed int)sad, (vector signed int)sumdiffs);    /* accumulate sumdiffs */ \
42    if(vec_any_ge(sumdiffs, best_vec)) \
43        goto bail; \
44  cur += stride; ref += stride;  cur += stride; ref += stride;
45    
46  /*  /*
47   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
48   */   */
49  unsigned long  unsigned long
50  sad16_altivec(const vector unsigned char *cur,  sad16_altivec_c(const vector unsigned char *cur,
51                            const vector unsigned char *ref,                            const vector unsigned char *ref,
52                            unsigned long stride,                            unsigned long stride,
53                            const unsigned long best_sad)                            const unsigned long best_sad)
54  {  {
55          vector unsigned char perm;          vector unsigned char perm;
56          vector unsigned char t1, t2, t3, t4;          vector unsigned char t1, t2;
57          vector unsigned int sad;          vector unsigned int sad;
58          vector signed int sumdiffs, best_vec;          vector unsigned int sumdiffs;
59            vector unsigned int best_vec;
60          unsigned long result;          unsigned long result;
61    
         ZERODEF;  
62    
63  #ifdef DEBUG  #ifdef DEBUG
64            /* print alignment errors if DEBUG is on */
65          if (((unsigned long) cur) & 0xf)          if (((unsigned long) cur) & 0xf)
66                  fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);                  fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
 //  if (((unsigned long)ref) & 0xf)  
 //      fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);  
67          if (stride & 0xf)          if (stride & 0xf)
68                  fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);                  fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
69  #endif  #endif
70          /* initialization */          /* initialization */
71          sad = (vector unsigned int) (ZEROVEC);          sad = vec_splat_u32(0);
72            sumdiffs = sad;
73          stride >>= 4;          stride >>= 4;
74          perm = vec_lvsl(0, (unsigned char *) ref);          perm = vec_lvsl(0, (unsigned char *) ref);
75          *((unsigned long *) &best_vec) = best_sad;          *((unsigned long *) &best_vec) = best_sad;
# Line 130  Line 80 
80          SAD16();          SAD16();
81          SAD16();          SAD16();
82          SAD16();          SAD16();
83          /* Temp sum for exit */  
         sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);  
         if (vec_all_ge(sumdiffs, best_vec))  
                 goto bail;  
84          SAD16();          SAD16();
85          SAD16();          SAD16();
86          SAD16();          SAD16();
87          SAD16();          SAD16();
88          sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);  
         if (vec_all_ge(sumdiffs, best_vec))  
                 goto bail;  
89          SAD16();          SAD16();
90          SAD16();          SAD16();
91          SAD16();          SAD16();
92          SAD16();          SAD16();
93    
94          SAD16();          SAD16();
95          SAD16();          SAD16();
96          SAD16();          SAD16();
97          SAD16();          SAD16();
98    
         /* sum all parts of difference into one 32 bit quantity */  
         sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);  
99    bail:    bail:
100          /* copy vector sum into unaligned result */          /* copy vector sum into unaligned result */
101          sumdiffs = vec_splat(sumdiffs, 3);          sumdiffs = vec_splat(sumdiffs, 3);
102          vec_ste(sumdiffs, 0, (int *) &result);          vec_ste(sumdiffs, 0, (unsigned long *) &result);
103          return (result);          return result;
104  }  }
105    
106    
107  #define SAD8() \  #define SAD8() \
108  t1  = vec_perm(cur[0], cur[stride], perm_cur);  /* align current vector  */ \  t1  = vec_perm(cur[0], cur[stride], perm_cur);  /* align current vector  */ \
109  t2  = vec_perm(ref[0], ref[1], perm_ref1);  /* align current vector  */ \  t2  = vec_perm(ref[0], ref[1], perm_ref1);  /* align current vector  */ \
110  tp  = vec_perm(ref[stride], ref[stride+1], perm_ref1);  /* align current vector  */ \  tp  = vec_perm(ref[stride], ref[stride+1], perm_ref1);  /* align current vector  */ \
111  t2  = vec_perm(t2,tp,perm_ref2); \  t2  = vec_perm(t2,tp,perm_ref2); \
112  t3  = vec_max(t1, t2);                  /* find largest of two           */ \  tp  = vec_max(t1, t2);                  /* find largest of two           */ \
113  t4  = vec_min(t1, t2);                   /* find smaller of two           */ \  t1  = vec_min(t1, t2);                   /* find smaller of two           */ \
114  t5  = vec_sub(t3, t4);                   /* find absolute difference      */ \  tp  = vec_sub(tp, t1);                   /* find absolute difference      */ \
115  sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \  sad = vec_sum4s(tp, sad);                /* accumulate sum of differences */ \
116  cur += stride<<1; ref += stride<<1;  cur += stride<<1; ref += stride<<1;
117    
118  /*  /*
# Line 175  Line 120 
120   * aligned and ref is unaligned   * aligned and ref is unaligned
121   */   */
122  unsigned long  unsigned long
123  sad8_altivec(const vector unsigned char *cur,  sad8_altivec_c(const vector unsigned char *cur,
124                           const vector unsigned char *ref,                           const vector unsigned char *ref,
125                           unsigned long stride)                           unsigned long stride)
126  {  {
127          vector unsigned char t1, t2, t3, t4, t5, tp;          vector unsigned char t1, t2, tp;
128          vector unsigned int sad;          vector unsigned int sad;
129          vector signed int sumdiffs;          vector unsigned int sumdiffs;
130          vector unsigned char perm_cur;          vector unsigned char perm_cur;
131          vector unsigned char perm_ref1, perm_ref2;          vector unsigned char perm_ref1, perm_ref2;
132          unsigned long result;          unsigned long result;
133    
         ZERODEF;  
   
134  #ifdef DEBUG  #ifdef DEBUG
135            /* print alignment errors if DEBUG is on */
136          if (((unsigned long) cur) & 0x7)          if (((unsigned long) cur) & 0x7)
137                  fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);                  fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
 //  if (((unsigned long)ref) & 0x7)  
 //      fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);  
138          if (stride & 0xf)          if (stride & 0xf)
139                  fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);                  fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
140  #endif  #endif
141    
142          perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);          /* check if cur is 8 or 16 bytes aligned an create the perm_cur vector */
143          perm_ref1 = vec_lvsl(0, (unsigned char *) ref);          perm_ref1 = vec_lvsl(0, (unsigned char *) ref);
144          perm_ref2 = get_perm(0);          perm_ref2 = vec_add(vec_lvsl(0, (unsigned char*)NULL), vec_pack(vec_splat_u16(0), vec_splat_u16(8)));
145            perm_cur = vec_add(perm_ref2, vec_splat(vec_lvsl(0, (unsigned char*)cur), 0));
146    
147          /* initialization */          /* initialization */
148          sad = (vector unsigned int) (ZEROVEC);          sad = vec_splat_u32(0);
149          stride >>= 4;          stride >>= 4;
150    
151          /* perform sum of differences between current and previous */          /* perform sum of differences between current and previous */
# Line 212  Line 155 
155          SAD8();          SAD8();
156    
157          /* sum all parts of difference into one 32 bit quantity */          /* sum all parts of difference into one 32 bit quantity */
158          sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);          sumdiffs = (vector unsigned int)vec_sums((vector signed int) sad, vec_splat_s32(0));
159    
160          /* copy vector sum into unaligned result */          /* copy vector sum into unaligned result */
161          sumdiffs = vec_splat(sumdiffs, 3);          sumdiffs = vec_splat(sumdiffs, 3);
162          vec_ste(sumdiffs, 0, (int *) &result);          vec_ste(sumdiffs, 0, (unsigned int *) &result);
163          return (result);          return result;
164  }  }
165    
 #define MEAN16(i)\  
 c##i=*cur;\  
 mean = vec_sum4s(c##i,mean);\  
 cur += stride;  
   
 #define DEV16(i) \  
 t2  = vec_max(c##i, mn);                /* find largest of two           */ \  
 t3  = vec_min(c##i, mn);                         /* find smaller of two           */ \  
 t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \  
 dev = vec_sum4s(t4, dev);  
166    
167    #define MEAN16() \
168    mean = vec_sum4s(*ptr,mean);\
169    ptr += stride
170    
171    #define DEV16() \
172    t2  = vec_max(*ptr, mn);                    /* find largest of two           */ \
173    t3  = vec_min(*ptr, mn);                    /* find smaller of two           */ \
174    t2  = vec_sub(t2, t3);                      /* find absolute difference      */ \
175    dev = vec_sum4s(t2, dev); \
176    ptr += stride
177    
178    /*
179     * This function assumes cur is 16 bytes aligned and stride is 16 bytes
180     * aligned
181    */
182  unsigned long  unsigned long
183  dev16_altivec(const vector unsigned char *cur,  dev16_altivec_c(const vector unsigned char *cur,
184                            unsigned long stride)                            unsigned long stride)
185  {  {
186          vector unsigned char t2, t3, t4, mn;          vector unsigned char t2, t3, mn;
187          vector unsigned int mean, dev;          vector unsigned int mean, dev;
188          vector signed int sumdiffs;          vector unsigned int sumdiffs;
189          vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,          const vector unsigned char *ptr;
                 c13, c14, c15;  
190          unsigned long result;          unsigned long result;
191    
192          ZERODEF;  #ifdef DEBUG
193            /* print alignment errors if DEBUG is on */
194            if(((unsigned long)cur) & 0x7)
195                fprintf(stderr, "dev16_altivec:incorrect align, cur: %x\n", cur);
196            if(stride & 0xf)
197                fprintf(stderr, "dev16_altivec:incorrect align, stride: %ld\n", stride);
198    #endif
199    
200          mean = (vector unsigned int) (ZEROVEC);          dev = mean = vec_splat_u32(0);
         dev = (vector unsigned int) (ZEROVEC);  
201          stride >>= 4;          stride >>= 4;
202    
203          MEAN16(0);          /* set pointer to iterate through cur */
204          MEAN16(1);          ptr = cur;
205          MEAN16(2);  
206          MEAN16(3);          MEAN16();
207          MEAN16(4);          MEAN16();
208          MEAN16(5);          MEAN16();
209          MEAN16(6);          MEAN16();
210          MEAN16(7);          MEAN16();
211          MEAN16(8);          MEAN16();
212          MEAN16(9);          MEAN16();
213          MEAN16(10);          MEAN16();
214          MEAN16(11);          MEAN16();
215          MEAN16(12);          MEAN16();
216          MEAN16(13);          MEAN16();
217          MEAN16(14);          MEAN16();
218          MEAN16(15);          MEAN16();
219            MEAN16();
220          sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);          MEAN16();
221          mn = vec_perm((vector unsigned char) sumdiffs,          MEAN16();
222                                    (vector unsigned char) sumdiffs, (vector unsigned char) (14,  
223                                                                                                                                                     14,          /* Add all together in sumdiffs */
224                                                                                                                                                     14,          sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0));
225                                                                                                                                                     14,          /* teilen durch 16 * 16 */
226                                                                                                                                                     14,          mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14));
227                                                                                                                                                     14,  
228                                                                                                                                                     14,          /* set pointer to iterate through cur */
229                                                                                                                                                     14,          ptr = cur;
230                                                                                                                                                     14,  
231                                                                                                                                                     14,          DEV16();
232                                                                                                                                                     14,          DEV16();
233                                                                                                                                                     14,          DEV16();
234                                                                                                                                                     14,          DEV16();
235                                                                                                                                                     14,          DEV16();
236                                                                                                                                                     14,          DEV16();
237                                                                                                                                                     14));          DEV16();
238          DEV16(0);          DEV16();
239          DEV16(1);          DEV16();
240          DEV16(2);          DEV16();
241          DEV16(3);          DEV16();
242          DEV16(4);          DEV16();
243          DEV16(5);          DEV16();
244          DEV16(6);          DEV16();
245          DEV16(7);          DEV16();
246          DEV16(8);          DEV16();
         DEV16(9);  
         DEV16(10);  
         DEV16(11);  
         DEV16(12);  
         DEV16(13);  
         DEV16(14);  
         DEV16(15);  
247    
248          /* sum all parts of difference into one 32 bit quantity */          /* sum all parts of difference into one 32 bit quantity */
249          sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);          sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0));
250    
251          /* copy vector sum into unaligned result */          /* copy vector sum into unaligned result */
252          sumdiffs = vec_splat(sumdiffs, 3);          sumdiffs = vec_splat(sumdiffs, 3);
253          vec_ste(sumdiffs, 0, (int *) &result);          vec_ste(sumdiffs, 0, (unsigned int *) &result);
254          return (result);          return result;
255    }
256    
257    #define SAD16BI() \
258        t1 = vec_perm(ref1[0], ref1[1], mask1); \
259        t2 = vec_perm(ref2[0], ref2[1], mask2); \
260        t1 = vec_avg(t1, t2); \
261        t2 = vec_max(t1, *cur); \
262        t1 = vec_min(t1, *cur); \
263        sad = vec_sub(t2, t1); \
264        sum = vec_sum4s(sad, sum); \
265        cur += stride; \
266        ref1 += stride; \
267        ref2 += stride
268    
269    /*
270     * This function assumes cur is 16 bytes aligned, stride is 16 bytes
271     * aligned and ref1 and ref2 is unaligned
272    */
273    unsigned long
274    sad16bi_altivec_c(vector unsigned char *cur,
275                            vector unsigned char *ref1,
276                            vector unsigned char *ref2,
277                            unsigned long stride)
278    {
279        vector unsigned char t1, t2;
280        vector unsigned char mask1, mask2;
281        vector unsigned char sad;
282        vector unsigned int sum;
283        unsigned long result;
284    
285    #ifdef DEBUG
286        /* print alignment errors if this is on */
287        if(cur & 0xf)
288            fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %x\n", cur);
289        if(stride & 0xf)
290            fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %ld\n", stride);
291    #endif
292    
293        /* Initialisation stuff */
294        stride >>= 4;
295        mask1 = vec_lvsl(0, (unsigned char*)ref1);
296        mask2 = vec_lvsl(0, (unsigned char*)ref2);
297        sad = vec_splat_u8(0);
298        sum = (vector unsigned int)sad;
299    
300        SAD16BI();
301        SAD16BI();
302        SAD16BI();
303        SAD16BI();
304    
305        SAD16BI();
306        SAD16BI();
307        SAD16BI();
308        SAD16BI();
309    
310        SAD16BI();
311        SAD16BI();
312        SAD16BI();
313        SAD16BI();
314    
315        SAD16BI();
316        SAD16BI();
317        SAD16BI();
318        SAD16BI();
319    
320        sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));
321        sum = vec_splat(sum, 3);
322        vec_ste(sum, 0, (unsigned int*)&result);
323    
324        return result;
325    }
326    
327    
328    #define SSE8_16BIT() \
329    b1_vec = vec_perm(vec_ld(0,b1), vec_ld(16,b1), vec_lvsl(0,b1)); \
330    b2_vec = vec_perm(vec_ld(0,b2), vec_ld(16,b2), vec_lvsl(0,b2)); \
331    diff = vec_sub(b1_vec,b2_vec);  \
332    sum = vec_msum(diff,diff,sum);  \
333    b1 = (const int16_t*)((int8_t*)b1+stride);  \
334    b2 = (const int16_t*)((int8_t*)b2+stride)
335    
336    uint32_t
337    sse8_16bit_altivec_c(const int16_t * b1,
338                             const int16_t * b2,
339                             const uint32_t stride)
340    {
341        register vector signed short b1_vec;
342        register vector signed short b2_vec;
343        register vector signed short diff;
344        register vector signed int sum;
345        uint32_t result;
346    
347        /* initialize */
348        sum = vec_splat_s32(0);
349    
350        SSE8_16BIT();
351        SSE8_16BIT();
352        SSE8_16BIT();
353        SSE8_16BIT();
354    
355        SSE8_16BIT();
356        SSE8_16BIT();
357        SSE8_16BIT();
358        SSE8_16BIT();
359    
360        /* sum the vector */
361        sum = vec_sums(sum, vec_splat_s32(0));
362        sum = vec_splat(sum,3);
363    
364        vec_ste(sum,0,(int*)&result);
365    
366        /* and return */
367        return result;
368  }  }

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.10

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4