Diff of /xvidcore/src/quant/x86_asm/quantize_h263_3dne.asm

-revision 1.1.2.2, Thu Oct  9 18:50:22 2003 UTC
+revision 1.5, Sun Aug 29 10:02:38 2004 UTC
 Line 29
  ; enable dequant saturate [-2048,2047], test purposes only.
  %define SATURATE
- ; data/text alignment
+ BITS 32
- %define ALIGN 16
- bits 32
  %macro cglobal 1
          %ifdef PREFIX
+                 %ifdef MARK_FUNCS
+                         global _%1:function %1.endfunc-%1
+                         %define %1 _%1:function %1.endfunc-%1
+                 %else
                  global _%1
                  %define %1 _%1
+                 %endif
+         %else
+                 %ifdef MARK_FUNCS
+                         global %1:function %1.endfunc-%1
          %else
                  global %1
          %endif
+         %endif
  %endmacro
- ;***************************************************************************
+ ;=============================================================================
  ; Local data
- ;***************************************************************************
+ ;=============================================================================
  %ifdef FORMAT_COFF
- section .data data
+ SECTION .rodata
  %else
- section .data data align=16
+ SECTION .rodata align=16
  %endif
  align 4
-Line 62
+Line 68
          %assign i i+1
  %endrep
- align 16
+ ALIGN 16
  plus_one:
          times 8 dw 1
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- ;
  ; subtract by Q/2 table
- ;
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
- align 16
+ ALIGN 16
  mmx_sub:
  %assign i 1
  %rep 31
-Line 81
+Line 85
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; divide by 2Q table
  ;
-Line 89
+Line 93
  ; for q=1, _pmulhw_ will overflow so it is treated seperately
  ; (3dnow2 provides _pmulhuw_ which wont cause overflow)
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
  mmx_div:
  %assign i 1
  %rep 31
-Line 99
+Line 103
          %assign i i+1
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- ;
  ; add by (odd(Q) ? Q : Q - 1) table
- ;
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
- align 16
+ ALIGN 16
  mmx_add:
  %assign i 1
  %rep 31
-Line 117
+Line 119
          %assign i i+1
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- ;
  ; multiple by 2Q table
- ;
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
- align 16
+ ALIGN 16
  mmx_mul:
  %assign i 1
  %rep 31
 Line 131
          %assign i i+1
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- ;
  ; saturation limits
- ;
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
- align 8
+ ALIGN 8
  mmx_32768_minus_2048:
          times 4 dw (32768-2048)
  mmx_32767_minus_2047:
          times 4 dw (32767-2047)
- align 16
+ ALIGN 16
  mmx_2047:
          times 4 dw 2047
- align 8
+ ALIGN 8
  mmzero:
          dd 0, 0
  int2047:
-Line 155
+Line 153
  int_2048:
          dd -2048
- ;***************************************************************************
+ ;=============================================================================
  ; Code
- ;***************************************************************************
+ ;=============================================================================
- section .text
+ SECTION .text
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t quant_h263_intra_3dne(int16_t * coeff,
  ;                                const int16_t const * data,
  ;                                const uint32_t quant,
- ;                                const uint32_t dcscalar);
+ ;                                const uint32_t dcscalar,
+ ;                                const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;This is Athlon-optimized code (ca 70 clk per call)
  %macro quant_intra1  1
          psubw   mm1, mm0        ;A3
          psubw   mm3, mm2        ;B3
  %if (%1)
-Line 181
+Line 179
          psubw   mm7, mm6        ;D8
  %endif
- align 8
+ ALIGN 8
          db      0Fh, 6Fh, 64h, 21h, (%1 * 32 +16)       ;movq   mm4, [ecx + %1 * 32 +16+32]     ;C1
          pmaxsw  mm1, mm0        ;A4
          db      0Fh, 6Fh, 74h, 21h, (%1 * 32 +24)       ;movq   mm6, [ecx + %1 * 32 +24+32]     ;D1
-Line 255
+Line 253
          psubw   mm7, mm6        ;D8
  %endif
- align 8
+ ALIGN 8
          db      0Fh, 6Fh, 64h, 21h, (%1 * 32 +16)       ;movq   mm4, [ecx + %1 * 32 +16+32]     ;C1
          pmaxsw  mm1, mm0        ;A4
          db      0Fh, 6Fh, 74h, 21h, (%1 * 32 +24)       ;movq   mm6, [ecx + %1 * 32 +24+32]     ;D1
-Line 322
+Line 320
  %endmacro
- align ALIGN
+ ALIGN 16
  cglobal quant_h263_intra_3dne
  quant_h263_intra_3dne:
-Line 377
+Line 375
          xor             eax, eax
          ret
-         align 16
+ ALIGN 16
  .q1loop
  quant_intra1 0
-Line 412
+Line 410
          xor             eax, eax
          ret
+ .endfunc
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t quant_h263_inter_3dne(int16_t * coeff,
  ;                                const int16_t const * data,
- ;                                const uint32_t quant);
+ ;                                const uint32_t quant,
+ ;                                const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;This is Athlon-optimized code (ca 90 clk per call)
  ;Optimized by Jaan, 30 Nov 2002
 Line 497
          movq    [edx + %1*16+8], mm4
  %endmacro
- align ALIGN
+ ALIGN 16
  cglobal quant_h263_inter_3dne
  quant_h263_inter_3dne:
          mov             edx, [esp  + 4]         ; coeff
 Line 514
          lea             eax, [mmzero]
          jz              near .q1loop
          cmp             esp, esp
- align 8
+ ALIGN 8
          movq    mm3, [ecx + 120]        ;B1
          pxor    mm4, mm4                ;B2
          psubw   mm4, mm3                ;B3
 Line 545
          ret
- align ALIGN
+ ALIGN 16
  .q1loop
          movq mm6, [byte ebx]
 Line 567
          pop ebx
          ret
+ .endfunc
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t dequant_h263_intra_3dne(int16_t *data,
  ;                                  const int16_t const *coeff,
  ;                                  const uint32_t quant,
- ;                                  const uint32_t dcscalar);
+ ;                                  const uint32_t dcscalar,
+ ;                                  const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
    ; this is the same as dequant_inter_3dne, except that we're
    ; saturating using 'pminsw' (saves 2 cycles/loop => ~5% faster)
-Line 601
+Line 603
          movq    mm4, [esi]              ;C1 ;0
          mov             esp, esp
          pcmpeqw mm6, [ecx+%1*24]        ;A6 (c ==0) ? -1 : 0 (1st)
- align 4
+ ALIGN 4
          psraw   mm1, 15                 ; sign(c)       ;A7 (2nd)
  %if (%1)
          movq    [edx+%1*24+16-24], mm5  ; C14 (7th) 2later
-Line 647
+Line 649
  %endmacro
- align ALIGN
+ ALIGN 16
  cglobal dequant_h263_intra_3dne
  dequant_h263_intra_3dne:
          mov             ecx, [esp+ 8]                   ; coeff
-Line 674
+Line 676
          psraw   mm3, 15                         ; sign(c)       ;B7 (2nd)
          mov             edx, [esp+ 4+16]                ; data
- align 8
+ ALIGN 8
          dequant 0
          cmp             ebp, -2048
-Line 717
+Line 719
          xor             eax, eax
          ret
+ .endfunc
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t dequant_h263_inter_3dne(int16_t * data,
  ;                                  const int16_t * const coeff,
- ;                                  const uint32_t quant);
+ ;                                  const uint32_t quant,
+ ;                                  const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ; this is the same as dequant_inter_3dne,
  ; except that we're saturating using 'pminsw' (saves 2 cycles/loop)
  ; This is Athlon-optimized code (ca 100 clk per call)
- align ALIGN
+ ALIGN 16
  cglobal dequant_h263_inter_3dne
  dequant_h263_inter_3dne:
          mov             ecx, [esp+ 8]                   ; coeff
-Line 753
+Line 757
          psraw   mm3, 15                         ; sign(c)       ;B7 (2nd)
          mov             edx, [dword esp+ 4+12]          ; data
- align 8
+ ALIGN 8
          dequant 0
          dequant 1
-Line 778
+Line 782
          xor             eax, eax
          ret
+ .endfunc

 Legend:



Removed from v.1.1.2.2
 


changed lines


 
Added in v.1.5
 Legend:



Removed from v.1.1.2.2
 


changed lines


 
Added in v.1.5
-Removed from v.1.1.2.2
+Added in v.1.5

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4