Diff of /xvidcore/src/motion/ppc_asm/sad_altivec.c

-revision 1.5, Sun Nov 17 00:32:06 2002 UTC
+revision 1.11, Thu Dec  9 23:02:54 2004 UTC
 Line 1
- /*****************************************************************************
+ /*
-  *
-  *  XVID MPEG-4 VIDEO CODEC
-  *  - altivec sum of absolute difference (C version)
-  *
-  *  Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
-  *
-  *  This file is part of XviD, a free MPEG-4 video encoder/decoder
-  *
-  *  XviD is free software; you can redistribute it and/or modify it
-  *  under the terms of the GNU General Public License as published by
-  *  the Free Software Foundation; either version 2 of the License, or
-  *  (at your option) any later version.
-  *
-  *  This program is distributed in the hope that it will be useful,
-  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  *  GNU General Public License for more details.
-  *
-  *  You should have received a copy of the GNU General Public License
-  *  along with this program; if not, write to the Free Software
-  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
-  *
-  *  Under section 8 of the GNU General Public License, the copyright
-  *  holders of XVID explicitly forbid distribution in the following
-  *  countries:
-  *
-  *    - Japan
-  *    - United States of America
-  *
-  *  Linking XviD statically or dynamically with other modules is making a
-  *  combined work based on XviD.  Thus, the terms and conditions of the
-  *  GNU General Public License cover the whole combination.
-  *
-  *  As a special exception, the copyright holders of XviD give you
-  *  permission to link XviD with independent modules that communicate with
-  *  XviD solely through the VFW1.1 and DShow interfaces, regardless of the
-  *  license terms of these independent modules, and to copy and distribute
-  *  the resulting combined work under terms of your choice, provided that
-  *  every copy of the combined work is accompanied by a complete copy of
-  *  the source code of XviD (the version of XviD used to produce the
-  *  combined work), being distributed under the terms of the GNU General
-  *  Public License plus this exception.  An independent module is a module
-  *  which is not derived from or based on XviD.
-  *
-  *  Note that people who make modified versions of XviD are not obligated
-  *  to grant this special exception for their modified versions; it is
-  *  their choice whether to do so.  The GNU General Public License gives
-  *  permission to release a modified version without this exception; this
-  *  exception also makes it possible to release a modified version which
-  *  carries forward this exception.
-  *
-  * $Id$
-  *
-  ****************************************************************************/
- #define G_REG
- #ifdef G_REG
- register vector unsigned char perm0 asm("%v29");
- register vector unsigned char perm1 asm("%v30");
- register vector unsigned int zerovec asm("%v31");
- #endif
- #include <stdio.h>
+     Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
- #undef DEBUG
+     This program is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation; either version 2 of the License, or
+     (at your option) any later version.
- static const vector unsigned char perms[2] = {
+     This program is distributed in the hope that it will be useful,
-         (vector unsigned char) (        /* Used when cur is aligned */
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
-x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17),
+     GNU General Public License for more details.
-         (vector unsigned char) (        /* Used when cur is unaligned */
-x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
-x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f),
- };
- #ifdef G_REG
- void
- sadInit_altivec(void)
- {
-         perm0 = perms[0];
-         perm1 = perms[1];
-         zerovec = (vector unsigned int) (0);
- }
- static inline const vector unsigned char
- get_perm(unsigned long i)
- {
-         return i ? perm1 : perm0;
- }
- #define ZERODEF
+     You should have received a copy of the GNU General Public License
- #define ZEROVEC zerovec
+     along with this program; if not, write to the Free Software
- #else
+     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- void
- sadInit_altivec(void)
- {
+     $Id$
- }
+ */
- static inline const vector unsigned char
- get_perm(unsigned long i)
- {
-         return perms[i];
- }
- #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)
+ #ifdef HAVE_ALTIVEC_H
- #define ZEROVEC zerovec
+ #include <altivec.h>
  #endif
+ #include "../../portab.h"
+ /* no debugging by default */
+ #undef DEBUG
+ #include <stdio.h>
  #define SAD16() \
  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \
  t2  = vec_max(t1, *cur);         /* find largest of two           */ \
- t3  = vec_min(t1, *cur);                 /* find smaller of two           */ \
+ t1  = vec_min(t1, *cur);                 /* find smaller of two           */ \
- t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \
+ t1  = vec_sub(t2, t1);                   /* find absolute difference      */ \
- sad = vec_sum4s(t4, sad);                /* accumulate sum of differences */ \
+ sad = vec_sum4s(t1, vec_splat_u32(0));                /* sum of differences */ \
+ sumdiffs = (vector unsigned int)vec_sums((vector signed int)sad, (vector signed int)sumdiffs);    /* accumulate sumdiffs */ \
+ if(vec_any_ge(sumdiffs, best_vec)) \
+     goto bail; \
  cur += stride; ref += stride;
  /*
   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
   */
- unsigned long
- sad16_altivec(const vector unsigned char *cur,
+ uint32_t
-                           const vector unsigned char *ref,
+ sad16_altivec_c(vector unsigned char *cur,
-                           unsigned long stride,
+                           vector unsigned char *ref,
-                           const unsigned long best_sad)
+                           uint32_t stride,
+                           const uint32_t best_sad)
  {
          vector unsigned char perm;
-         vector unsigned char t1, t2, t3, t4;
+         vector unsigned char t1, t2;
          vector unsigned int sad;
-         vector signed int sumdiffs, best_vec;
+         vector unsigned int sumdiffs;
-         unsigned long result;
+         vector unsigned int best_vec;
+         uint32_t result;
-         ZERODEF;
  #ifdef DEBUG
+         /* print alignment errors if DEBUG is on */
          if (((unsigned long) cur) & 0xf)
-                 fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
+                 fprintf(stderr, "sad16_altivec:incorrect align, cur: %lx\n", (long)cur);
- //  if (((unsigned long)ref) & 0xf)
- //      fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);
          if (stride & 0xf)
-                 fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
+                 fprintf(stderr, "sad16_altivec:incorrect align, stride: %lu\n", stride);
  #endif
          /* initialization */
-         sad = (vector unsigned int) (ZEROVEC);
+         sad = vec_splat_u32(0);
+         sumdiffs = sad;
          stride >>= 4;
          perm = vec_lvsl(0, (unsigned char *) ref);
-         *((unsigned long *) &best_vec) = best_sad;
+         *((uint32_t*)&best_vec) = best_sad;
          best_vec = vec_splat(best_vec, 0);
          /* perform sum of differences between current and previous */
-Line 152
+Line 81
          SAD16();
          SAD16();
          SAD16();
-         /* Temp sum for exit */
-         sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
-         if (vec_all_ge(sumdiffs, best_vec))
-                 goto bail;
          SAD16();
          SAD16();
          SAD16();
          SAD16();
-         sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
-         if (vec_all_ge(sumdiffs, best_vec))
-                 goto bail;
          SAD16();
          SAD16();
          SAD16();
          SAD16();
          SAD16();
          SAD16();
          SAD16();
          SAD16();
-         /* sum all parts of difference into one 32 bit quantity */
-         sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
    bail:
          /* copy vector sum into unaligned result */
          sumdiffs = vec_splat(sumdiffs, 3);
-         vec_ste(sumdiffs, 0, (int *) &result);
+         vec_ste(sumdiffs, 0, (uint32_t*) &result);
-         return (result);
+         return result;
  }
  #define SAD8() \
- t1  = vec_perm(cur[0], cur[stride], perm_cur);  /* align current vector  */ \
+         c = vec_perm(vec_ld(0,cur),vec_ld(16,cur),vec_lvsl(0,cur));\
- t2  = vec_perm(ref[0], ref[1], perm_ref1);  /* align current vector  */ \
+         r = vec_perm(vec_ld(0,ref),vec_ld(16,ref),vec_lvsl(0,ref));\
- tp  = vec_perm(ref[stride], ref[stride+1], perm_ref1);  /* align current vector  */ \
+         c = vec_sub(vec_max(c,r),vec_min(c,r));\
- t2  = vec_perm(t2,tp,perm_ref2); \
+         sad = vec_sum4s(c,sad);\
- t3  = vec_max(t1, t2);                  /* find largest of two           */ \
+         cur += stride;\
- t4  = vec_min(t1, t2);                   /* find smaller of two           */ \
+         ref += stride
- t5  = vec_sub(t3, t4);                   /* find absolute difference      */ \
- sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \
- cur += stride<<1; ref += stride<<1;
  /*
-  * This function assumes cur is 8 bytes aligned, stride is 16 bytes
+  * This function assumes nothing
-  * aligned and ref is unaligned
   */
- unsigned long
- sad8_altivec(const vector unsigned char *cur,
+ uint32_t
-                          const vector unsigned char *ref,
+ sad8_altivec_c(const uint8_t * cur,
-                          unsigned long stride)
+            const uint8_t *ref,
+            const uint32_t stride)
  {
-         vector unsigned char t1, t2, t3, t4, t5, tp;
+         uint32_t result = 0;
-         vector unsigned int sad;
-         vector signed int sumdiffs;
-         vector unsigned char perm_cur;
-         vector unsigned char perm_ref1, perm_ref2;
-         unsigned long result;
-         ZERODEF;
+         register vector unsigned int sad;
+         register vector unsigned char c;
+         register vector unsigned char r;
- #ifdef DEBUG
+         /* initialize */
-         if (((unsigned long) cur) & 0x7)
+         sad = vec_splat_u32(0);
-                 fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
- //  if (((unsigned long)ref) & 0x7)
- //      fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);
-         if (stride & 0xf)
-                 fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
- #endif
-         perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);
+         /* Perform sad operations */
-         perm_ref1 = vec_lvsl(0, (unsigned char *) ref);
+         SAD8();
-         perm_ref2 = get_perm(0);
+         SAD8();
+         SAD8();
-         /* initialization */
+         SAD8();
-         sad = (vector unsigned int) (ZEROVEC);
-         stride >>= 4;
-         /* perform sum of differences between current and previous */
          SAD8();
          SAD8();
          SAD8();
          SAD8();
-         /* sum all parts of difference into one 32 bit quantity */
+         /* finish addition, add the first 2 together */
-         sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
+         sad = vec_and(sad, (vector unsigned int)vec_pack(vec_splat_u16(-1),vec_splat_u16(0)));
+         sad = (vector unsigned int)vec_sums((vector signed int)sad, vec_splat_s32(0));
+         sad = vec_splat(sad,3);
+         vec_ste(sad, 0, &result);
-         /* copy vector sum into unaligned result */
+         return result;
-         sumdiffs = vec_splat(sumdiffs, 3);
-         vec_ste(sumdiffs, 0, (int *) &result);
-         return (result);
  }
- #define MEAN16(i)\
- c##i=*cur;\
- mean = vec_sum4s(c##i,mean);\
- cur += stride;
+ #define MEAN16() \
+ mean = vec_sum4s(*ptr,mean);\
- #define DEV16(i) \
+ ptr += stride
- t2  = vec_max(c##i, mn);                /* find largest of two           */ \
- t3  = vec_min(c##i, mn);                         /* find smaller of two           */ \
+ #define DEV16() \
- t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \
+ t2  = vec_max(*ptr, mn);                    /* find largest of two           */ \
- dev = vec_sum4s(t4, dev);
+ t3  = vec_min(*ptr, mn);                    /* find smaller of two           */ \
+ t2  = vec_sub(t2, t3);                      /* find absolute difference      */ \
- unsigned long
+ dev = vec_sum4s(t2, dev); \
- dev16_altivec(const vector unsigned char *cur,
+ ptr += stride
-                           unsigned long stride)
+ /*
+  * This function assumes cur is 16 bytes aligned and stride is 16 bytes
+  * aligned
+ */
+ uint32_t
+ dev16_altivec_c(vector unsigned char *cur,
+                           uint32_t stride)
  {
-         vector unsigned char t2, t3, t4, mn;
+         vector unsigned char t2, t3, mn;
          vector unsigned int mean, dev;
-         vector signed int sumdiffs;
+         vector unsigned int sumdiffs;
-         vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
+         vector unsigned char *ptr;
-                 c13, c14, c15;
+         uint32_t result;
-         unsigned long result;
-         ZERODEF;
+ #ifdef DEBUG
+         /* print alignment errors if DEBUG is on */
+         if(((unsigned long)cur) & 0x7)
+             fprintf(stderr, "dev16_altivec:incorrect align, cur: %lx\n", (long)cur);
+         if(stride & 0xf)
+             fprintf(stderr, "dev16_altivec:incorrect align, stride: %lu\n", stride);
+ #endif
-         mean = (vector unsigned int) (ZEROVEC);
+         dev = mean = vec_splat_u32(0);
-         dev = (vector unsigned int) (ZEROVEC);
          stride >>= 4;
-         MEAN16(0);
+         /* set pointer to iterate through cur */
-         MEAN16(1);
+         ptr = cur;
-         MEAN16(2);
-         MEAN16(3);
+         MEAN16();
-         MEAN16(4);
+         MEAN16();
-         MEAN16(5);
+         MEAN16();
-         MEAN16(6);
+         MEAN16();
-         MEAN16(7);
+         MEAN16();
-         MEAN16(8);
+         MEAN16();
-         MEAN16(9);
+         MEAN16();
-         MEAN16(10);
+         MEAN16();
-         MEAN16(11);
+         MEAN16();
-         MEAN16(12);
+         MEAN16();
-         MEAN16(13);
+         MEAN16();
-         MEAN16(14);
+         MEAN16();
-         MEAN16(15);
+         MEAN16();
+         MEAN16();
-         sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);
+         MEAN16();
-         mn = vec_perm((vector unsigned char) sumdiffs,
+         MEAN16();
-                                   (vector unsigned char) sumdiffs, (vector unsigned char) (14,
-,
+         /* Add all together in sumdiffs */
-,
+         sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0));
-,
+         /* teilen durch 16 * 16 */
-,
+         mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14));
-,
-,
+         /* set pointer to iterate through cur */
-,
+         ptr = cur;
-,
-,
+         DEV16();
-,
+         DEV16();
-,
+         DEV16();
-,
+         DEV16();
-,
+         DEV16();
-,
+         DEV16();
-));
+         DEV16();
-         DEV16(0);
+         DEV16();
-         DEV16(1);
+         DEV16();
-         DEV16(2);
+         DEV16();
-         DEV16(3);
+         DEV16();
-         DEV16(4);
+         DEV16();
-         DEV16(5);
+         DEV16();
-         DEV16(6);
+         DEV16();
-         DEV16(7);
+         DEV16();
-         DEV16(8);
+         DEV16();
-         DEV16(9);
-         DEV16(10);
-         DEV16(11);
-         DEV16(12);
-         DEV16(13);
-         DEV16(14);
-         DEV16(15);
          /* sum all parts of difference into one 32 bit quantity */
-         sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);
+         sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0));
          /* copy vector sum into unaligned result */
          sumdiffs = vec_splat(sumdiffs, 3);
-         vec_ste(sumdiffs, 0, (int *) &result);
+         vec_ste(sumdiffs, 0, (uint32_t*) &result);
-         return (result);
+         return result;
+ }
+ #define SAD16BI() \
+     t1 = vec_perm(ref1[0], ref1[1], mask1); \
+     t2 = vec_perm(ref2[0], ref2[1], mask2); \
+     t1 = vec_avg(t1, t2); \
+     t2 = vec_max(t1, *cur); \
+     t1 = vec_min(t1, *cur); \
+     sad = vec_sub(t2, t1); \
+     sum = vec_sum4s(sad, sum); \
+     cur += stride; \
+     ref1 += stride; \
+     ref2 += stride
+ /*
+  * This function assumes cur is 16 bytes aligned, stride is 16 bytes
+  * aligned and ref1 and ref2 is unaligned
+ */
+ uint32_t
+ sad16bi_altivec_c(vector unsigned char *cur,
+                         vector unsigned char *ref1,
+                         vector unsigned char *ref2,
+                         uint32_t stride)
+ {
+     vector unsigned char t1, t2;
+     vector unsigned char mask1, mask2;
+     vector unsigned char sad;
+     vector unsigned int sum;
+     uint32_t result;
+ #ifdef DEBUG
+     /* print alignment errors if this is on */
+     if((long)cur & 0xf)
+         fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lx\n", (long)cur);
+     if(stride & 0xf)
+         fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lu\n", stride);
+ #endif
+     /* Initialisation stuff */
+     stride >>= 4;
+     mask1 = vec_lvsl(0, (unsigned char*)ref1);
+     mask2 = vec_lvsl(0, (unsigned char*)ref2);
+     sad = vec_splat_u8(0);
+     sum = (vector unsigned int)sad;
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     SAD16BI();
+     sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));
+     sum = vec_splat(sum, 3);
+     vec_ste(sum, 0, (uint32_t*)&result);
+     return result;
+ }
+ #define SSE8_16BIT() \
+ b1_vec = vec_perm(vec_ld(0,b1), vec_ld(16,b1), vec_lvsl(0,b1)); \
+ b2_vec = vec_perm(vec_ld(0,b2), vec_ld(16,b2), vec_lvsl(0,b2)); \
+ diff = vec_sub(b1_vec,b2_vec);  \
+ sum = vec_msum(diff,diff,sum);  \
+ b1 = (const int16_t*)((int8_t*)b1+stride);  \
+ b2 = (const int16_t*)((int8_t*)b2+stride)
+ uint32_t
+ sse8_16bit_altivec_c(const int16_t * b1,
+                          const int16_t * b2,
+                          const uint32_t stride)
+ {
+     register vector signed short b1_vec;
+     register vector signed short b2_vec;
+     register vector signed short diff;
+     register vector signed int sum;
+     uint32_t result;
+     /* initialize */
+     sum = vec_splat_s32(0);
+     SSE8_16BIT();
+     SSE8_16BIT();
+     SSE8_16BIT();
+     SSE8_16BIT();
+     SSE8_16BIT();
+     SSE8_16BIT();
+     SSE8_16BIT();
+     SSE8_16BIT();
+     /* sum the vector */
+     sum = vec_sums(sum, vec_splat_s32(0));
+     sum = vec_splat(sum,3);
+     vec_ste(sum,0,(int*)&result);
+     /* and return */
+     return result;
  }

 Legend:



Removed from v.1.5
 


changed lines


 
Added in v.1.11
 Legend:



Removed from v.1.5
 


changed lines


 
Added in v.1.11
-Removed from v.1.5
+Added in v.1.11

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4