Diff of /xvidcore/src/motion/x86_asm/sad_xmm.asm

-revision 1.5, Sun Nov 17 00:32:06 2002 UTC
+revision 1.9, Sun Aug 22 11:46:10 2004 UTC
 Line 1
- ;/*****************************************************************************
+ ;/****************************************************************************
  ; *
  ; *  XVID MPEG-4 VIDEO CODEC
- ; *  xmm (extended mmx) sum of absolute difference
+ ; *  - K7 optimized SAD operators -
  ; *
- ; *  Copyright(C) 2002 Peter Ross <pross@xvid.org>
+ ; *  Copyright(C) 2001 Peter Ross <pross@xvid.org>
- ; *  Copyright(C) 2002 Michael Militzer <michael@xvid.org>
+ ; *               2001 Michael Militzer <isibaar@xvid.org>
- ; *  Copyright(C) 2002 Pascal Massimino <skal@planet-d.net>
+ ; *               2002 Pascal Massimino <skal@planet-d.net>
  ; *
- ; *  This file is part of XviD, a free MPEG-4 video encoder/decoder
+ ; *  This program is free software; you can redistribute it and/or modify it
- ; *
- ; *  XviD is free software; you can redistribute it and/or modify it
  ; *  under the terms of the GNU General Public License as published by
  ; *  the Free Software Foundation; either version 2 of the License, or
  ; *  (at your option) any later version.
-Line 23
+Line 21
  ; *  along with this program; if not, write to the Free Software
  ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  ; *
- ; *  Under section 8 of the GNU General Public License, the copyright
- ; *  holders of XVID explicitly forbid distribution in the following
- ; *  countries:
- ; *
- ; *    - Japan
- ; *    - United States of America
- ; *
- ; *  Linking XviD statically or dynamically with other modules is making a
- ; *  combined work based on XviD.  Thus, the terms and conditions of the
- ; *  GNU General Public License cover the whole combination.
- ; *
- ; *  As a special exception, the copyright holders of XviD give you
- ; *  permission to link XviD with independent modules that communicate with
- ; *  XviD solely through the VFW1.1 and DShow interfaces, regardless of the
- ; *  license terms of these independent modules, and to copy and distribute
- ; *  the resulting combined work under terms of your choice, provided that
- ; *  every copy of the combined work is accompanied by a complete copy of
- ; *  the source code of XviD (the version of XviD used to produce the
- ; *  combined work), being distributed under the terms of the GNU General
- ; *  Public License plus this exception.  An independent module is a module
- ; *  which is not derived from or based on XviD.
- ; *
- ; *  Note that people who make modified versions of XviD are not obligated
- ; *  to grant this special exception for their modified versions; it is
- ; *  their choice whether to do so.  The GNU General Public License gives
- ; *  permission to release a modified version without this exception; this
- ; *  exception also makes it possible to release a modified version which
- ; *  carries forward this exception.
- ; *
  ; * $Id$
  ; *
- ; *************************************************************************/
+ ; ***************************************************************************/
- bits 32
+ BITS 32
  %macro cglobal 1
          %ifdef PREFIX
+                 %ifdef MARK_FUNCS
+                         global _%1:function
+                         %define %1 _%1:function
+                 %else
                  global _%1
                  %define %1 _%1
+                 %endif
+         %else
+                 %ifdef MARK_FUNCS
+                         global %1:function
          %else
                  global %1
          %endif
+         %endif
+ %endmacro
+ ;=============================================================================
+ ; Read only data
+ ;=============================================================================
+ %ifdef FORMAT_COFF
+ SECTION .rodata
+ %else
+ SECTION .rodata align=16
+ %endif
+ ALIGN 16
+ mmx_one: times 4 dw 1
+ ;=============================================================================
+ ; Helper macros
+ ;=============================================================================
+ %macro SAD_16x16_SSE 0
+   movq mm0, [eax]
+   psadbw mm0, [edx]
+   movq mm1, [eax+8]
+   add eax, ecx
+   psadbw mm1, [edx+8]
+   paddusw mm5, mm0
+   add edx, ecx
+   paddusw mm6, mm1
+ %endmacro
+ %macro SAD_8x8_SSE 0
+   movq mm0, [eax]
+   movq mm1, [eax+ecx]
+   psadbw mm0, [edx]
+   psadbw mm1, [edx+ecx]
+   add eax, ebx
+   add edx, ebx
+         paddusw mm5, mm0
+         paddusw mm6, mm1
+ %endmacro
+ %macro SADBI_16x16_SSE 0
+   movq mm0, [eax]
+   movq mm1, [eax+8]
+   movq mm2, [edx]
+   movq mm3, [edx+8]
+   pavgb mm2, [ebx]
+   add edx, ecx
+   pavgb mm3, [ebx+8]
+   add ebx, ecx
+   psadbw mm0, mm2
+   add eax, ecx
+   psadbw mm1, mm3
+   paddusw mm5, mm0
+   paddusw mm6, mm1
  %endmacro
- section .data
+ %macro SADBI_8x8_XMM 0
+   movq mm0, [eax]
+   movq mm1, [eax+ecx]
+   movq mm2, [edx]
+   movq mm3, [edx+ecx]
+   pavgb mm2, [ebx]
+   lea edx, [edx+2*ecx]
+   pavgb mm3, [ebx+ecx]
+   lea ebx, [ebx+2*ecx]
+   psadbw mm0, mm2
+   lea eax, [eax+2*ecx]
+   psadbw mm1, mm3
+   paddusw mm5, mm0
+   paddusw mm6, mm1
+ %endmacro
- align 16
+ %macro MEAN_16x16_SSE 0
- mmx_one times 4 dw 1
+   movq mm0, [eax]
+   movq mm1, [eax+8]
+   psadbw mm0, mm7
+   psadbw mm1, mm7
+   add eax, ecx
+   paddw mm5, mm0
+   paddw mm6, mm1
+ %endmacro
- section .text
+ %macro ABS_16x16_SSE 0
+   movq mm0, [eax]
+   movq mm1, [eax+8]
+   psadbw mm0, mm4
+   psadbw mm1, mm4
+   lea eax, [eax+ecx]
+   paddw mm5, mm0
+   paddw mm6, mm1
+ %endmacro
+ ;=============================================================================
+ ; Code
+ ;=============================================================================
+ SECTION .text
  cglobal  sad16_xmm
  cglobal  sad8_xmm
  cglobal  sad16bi_xmm
  cglobal  sad8bi_xmm
  cglobal  dev16_xmm
+ cglobal sad16v_xmm
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t sad16_xmm(const uint8_t * const cur,
  ;                                       const uint8_t * const ref,
  ;                                       const uint32_t stride,
  ;                                       const uint32_t best_sad);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- %macro SAD_16x16_SSE 0
-     movq mm0, [eax]
-     psadbw mm0, [edx]
-     movq mm1, [eax+8]
-     add eax, ecx
-     psadbw mm1, [edx+8]
-     paddusw mm5,mm0
-     add edx, ecx
-     paddusw mm6,mm1
- %endmacro
- align 16
+ ALIGN 16
  sad16_xmm:
      mov eax, [esp+ 4] ; Src1
-Line 133
+Line 191
      ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t sad8_xmm(const uint8_t * const cur,
  ;                                       const uint8_t * const ref,
  ;                                       const uint32_t stride);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- %macro SAD_8x8_SSE 0
-     movq mm0, [eax]
-     movq mm1, [eax+ecx]
-     psadbw mm0, [edx]
-     psadbw mm1, [edx+ecx]
-     add eax, ebx
-     add edx, ebx
-     paddusw mm5,mm0
-     paddusw mm6,mm1
- %endmacro
- align 16
+ ALIGN 16
  sad8_xmm:
      mov eax, [esp+ 4] ; Src1
-Line 186
+Line 231
      ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t sad16bi_xmm(const uint8_t * const cur,
  ;                                       const uint8_t * const ref1,
  ;                                       const uint8_t * const ref2,
  ;                                       const uint32_t stride);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- %macro SADBI_16x16_SSE 0
-     movq mm0, [eax]
-     movq mm1, [eax+8]
-     movq mm2, [edx]
-     movq mm3, [edx+8]
-     pavgb mm2, [ebx]
+ ALIGN 16
-     add edx, ecx
-     pavgb mm3, [ebx+8]
-     add ebx, ecx
-     psadbw mm0, mm2
-     add eax, ecx
-     psadbw mm1, mm3
-     paddusw mm5,mm0
-     paddusw mm6,mm1
- %endmacro
- align 16
  sad16bi_xmm:
      push ebx
      mov eax, [esp+4+ 4] ; Src
-Line 251
+Line 274
      pop ebx
      ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t sad8bi_xmm(const uint8_t * const cur,
  ; const uint8_t * const ref1,
  ; const uint8_t * const ref2,
  ; const uint32_t stride);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- %macro SADBI_8x8_XMM 0
-    movq mm0, [eax]
-    movq mm1, [eax+ecx]
-    movq mm2, [edx]
-    movq mm3, [edx+ecx]
-    pavgb mm2, [ebx]
-    lea edx, [edx+2*ecx]
-    pavgb mm3, [ebx+ecx]
-    lea ebx, [ebx+2*ecx]
-    psadbw mm0, mm2
+ ALIGN 16
-    lea eax, [eax+2*ecx]
-    psadbw mm1, mm3
-    paddusw mm5,mm0
-    paddusw mm6,mm1
- %endmacro
- align 16
  sad8bi_xmm:
     push ebx
     mov eax, [esp+4+ 4] ; Src
-Line 304
+Line 305
     ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t dev16_xmm(const uint8_t * const cur,
  ;                                       const uint32_t stride);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- %macro MEAN_16x16_SSE 0
-     movq mm0, [eax]
-     movq mm1, [eax+8]
-     psadbw mm0, mm7
-     psadbw mm1, mm7
-     add eax, ecx
-     paddw mm5, mm0
-     paddw mm6, mm1
- %endmacro
- %macro ABS_16x16_SSE 0
+ ALIGN 16
-     movq mm0, [eax]
-     movq mm1, [eax+8]
-     psadbw mm0, mm4
-     psadbw mm1, mm4
-     lea eax,[eax+ecx]
-     paddw mm5, mm0
-     paddw mm6, mm1
- %endmacro
- align 16
  dev16_xmm:
      mov eax, [esp+ 4] ; Src
-Line 373
+Line 354
      mov eax, [esp+ 4] ; Src
      pxor mm5, mm5 ; sums
      pxor mm6, mm6
-Line 401
+Line 383
      movd eax, mm6
      ret
+ ;-----------------------------------------------------------------------------
+ ;int sad16v_xmm(const uint8_t * const cur,
+ ;               const uint8_t * const ref,
+ ;               const uint32_t stride,
+ ;               int* sad8);
+ ;-----------------------------------------------------------------------------
+ ALIGN 16
+ sad16v_xmm:
+   push ebx
+   mov eax, [esp+4+ 4] ; Src1
+   mov edx, [esp+4+ 8] ; Src2
+   mov ecx, [esp+4+12] ; Stride
+   mov ebx, [esp+4+16] ; sad ptr
+   pxor mm5, mm5 ; accum1
+   pxor mm6, mm6 ; accum2
+   pxor mm7, mm7 ; total
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   paddusw mm7, mm5
+   paddusw mm7, mm6
+   movd [ebx], mm5
+   movd [ebx+4], mm6
+   pxor mm5, mm5 ; accum1
+   pxor mm6, mm6 ; accum2
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   SAD_16x16_SSE
+   paddusw mm7, mm5
+   paddusw mm7, mm6
+   movd [ebx+8], mm5
+   movd [ebx+12], mm6
+   movd eax, mm7
+   pop ebx
+   ret

 Legend:



Removed from v.1.5
 


changed lines


 
Added in v.1.9
 Legend:



Removed from v.1.5
 


changed lines


 
Added in v.1.9
-Removed from v.1.5
+Added in v.1.9

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4