Diff of /xvidcore/src/quant/x86_asm/quantize_h263_3dne.asm

-revision 1.1.2.2, Thu Oct  9 18:50:22 2003 UTC
+revision 1.2.2.1, Sat Jul 24 11:38:12 2004 UTC
 Line 29
  ; enable dequant saturate [-2048,2047], test purposes only.
  %define SATURATE
- ; data/text alignment
+ BITS 32
- %define ALIGN 16
- bits 32
  %macro cglobal 1
          %ifdef PREFIX
-Line 43
+Line 40
          %endif
  %endmacro
- ;***************************************************************************
+ ;=============================================================================
  ; Local data
- ;***************************************************************************
+ ;=============================================================================
  %ifdef FORMAT_COFF
- section .data data
+ SECTION .rodata
  %else
- section .data data align=16
+ SECTION .rodata align=16
  %endif
  align 4
-Line 62
+Line 59
          %assign i i+1
  %endrep
- align 16
+ ALIGN 16
  plus_one:
          times 8 dw 1
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- ;
  ; subtract by Q/2 table
- ;
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
- align 16
+ ALIGN 16
  mmx_sub:
  %assign i 1
  %rep 31
-Line 81
+Line 76
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; divide by 2Q table
  ;
-Line 89
+Line 84
  ; for q=1, _pmulhw_ will overflow so it is treated seperately
  ; (3dnow2 provides _pmulhuw_ which wont cause overflow)
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
  mmx_div:
  %assign i 1
  %rep 31
-Line 99
+Line 94
          %assign i i+1
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- ;
  ; add by (odd(Q) ? Q : Q - 1) table
- ;
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
- align 16
+ ALIGN 16
  mmx_add:
  %assign i 1
  %rep 31
-Line 117
+Line 110
          %assign i i+1
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- ;
  ; multiple by 2Q table
- ;
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
- align 16
+ ALIGN 16
  mmx_mul:
  %assign i 1
  %rep 31
-Line 131
+Line 122
          %assign i i+1
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- ;
  ; saturation limits
- ;
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
- align 8
+ ALIGN 8
  mmx_32768_minus_2048:
          times 4 dw (32768-2048)
  mmx_32767_minus_2047:
          times 4 dw (32767-2047)
- align 16
+ ALIGN 16
  mmx_2047:
          times 4 dw 2047
- align 8
+ ALIGN 8
  mmzero:
          dd 0, 0
  int2047:
-Line 155
+Line 144
  int_2048:
          dd -2048
- ;***************************************************************************
+ ;=============================================================================
  ; Code
- ;***************************************************************************
+ ;=============================================================================
- section .text
+ SECTION .text
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t quant_h263_intra_3dne(int16_t * coeff,
  ;                                const int16_t const * data,
  ;                                const uint32_t quant,
- ;                                const uint32_t dcscalar);
+ ;                                const uint32_t dcscalar,
+ ;                                const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;This is Athlon-optimized code (ca 70 clk per call)
  %macro quant_intra1  1
          psubw   mm1, mm0        ;A3
          psubw   mm3, mm2        ;B3
  %if (%1)
-Line 181
+Line 170
          psubw   mm7, mm6        ;D8
  %endif
- align 8
+ ALIGN 8
          db      0Fh, 6Fh, 64h, 21h, (%1 * 32 +16)       ;movq   mm4, [ecx + %1 * 32 +16+32]     ;C1
          pmaxsw  mm1, mm0        ;A4
          db      0Fh, 6Fh, 74h, 21h, (%1 * 32 +24)       ;movq   mm6, [ecx + %1 * 32 +24+32]     ;D1
-Line 255
+Line 244
          psubw   mm7, mm6        ;D8
  %endif
- align 8
+ ALIGN 8
          db      0Fh, 6Fh, 64h, 21h, (%1 * 32 +16)       ;movq   mm4, [ecx + %1 * 32 +16+32]     ;C1
          pmaxsw  mm1, mm0        ;A4
          db      0Fh, 6Fh, 74h, 21h, (%1 * 32 +24)       ;movq   mm6, [ecx + %1 * 32 +24+32]     ;D1
-Line 322
+Line 311
  %endmacro
- align ALIGN
+ ALIGN 16
  cglobal quant_h263_intra_3dne
  quant_h263_intra_3dne:
-Line 377
+Line 366
          xor             eax, eax
          ret
-         align 16
+ ALIGN 16
  .q1loop
  quant_intra1 0
-Line 416
+Line 405
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t quant_h263_inter_3dne(int16_t * coeff,
  ;                                const int16_t const * data,
- ;                                const uint32_t quant);
+ ;                                const uint32_t quant,
+ ;                                const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;This is Athlon-optimized code (ca 90 clk per call)
  ;Optimized by Jaan, 30 Nov 2002
-Line 497
+Line 487
          movq    [edx + %1*16+8], mm4
  %endmacro
- align ALIGN
+ ALIGN 16
  cglobal quant_h263_inter_3dne
  quant_h263_inter_3dne:
          mov             edx, [esp  + 4]         ; coeff
-Line 514
+Line 504
          lea             eax, [mmzero]
          jz              near .q1loop
          cmp             esp, esp
- align 8
+ ALIGN 8
          movq    mm3, [ecx + 120]        ;B1
          pxor    mm4, mm4                ;B2
          psubw   mm4, mm3                ;B3
-Line 545
+Line 535
          ret
- align ALIGN
+ ALIGN 16
  .q1loop
          movq mm6, [byte ebx]
-Line 568
+Line 558
          ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t dequant_h263_intra_3dne(int16_t *data,
  ;                                  const int16_t const *coeff,
  ;                                  const uint32_t quant,
- ;                                  const uint32_t dcscalar);
+ ;                                  const uint32_t dcscalar,
+ ;                                  const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
    ; this is the same as dequant_inter_3dne, except that we're
    ; saturating using 'pminsw' (saves 2 cycles/loop => ~5% faster)
-Line 601
+Line 592
          movq    mm4, [esi]              ;C1 ;0
          mov             esp, esp
          pcmpeqw mm6, [ecx+%1*24]        ;A6 (c ==0) ? -1 : 0 (1st)
- align 4
+ ALIGN 4
          psraw   mm1, 15                 ; sign(c)       ;A7 (2nd)
  %if (%1)
          movq    [edx+%1*24+16-24], mm5  ; C14 (7th) 2later
-Line 647
+Line 638
  %endmacro
- align ALIGN
+ ALIGN 16
  cglobal dequant_h263_intra_3dne
  dequant_h263_intra_3dne:
          mov             ecx, [esp+ 8]                   ; coeff
-Line 674
+Line 665
          psraw   mm3, 15                         ; sign(c)       ;B7 (2nd)
          mov             edx, [esp+ 4+16]                ; data
- align 8
+ ALIGN 8
          dequant 0
          cmp             ebp, -2048
-Line 718
+Line 709
          xor             eax, eax
          ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t dequant_h263_inter_3dne(int16_t * data,
  ;                                  const int16_t * const coeff,
- ;                                  const uint32_t quant);
+ ;                                  const uint32_t quant,
+ ;                                  const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ; this is the same as dequant_inter_3dne,
  ; except that we're saturating using 'pminsw' (saves 2 cycles/loop)
  ; This is Athlon-optimized code (ca 100 clk per call)
- align ALIGN
+ ALIGN 16
  cglobal dequant_h263_inter_3dne
  dequant_h263_inter_3dne:
          mov             ecx, [esp+ 8]                   ; coeff
-Line 753
+Line 745
          psraw   mm3, 15                         ; sign(c)       ;B7 (2nd)
          mov             edx, [dword esp+ 4+12]          ; data
- align 8
+ ALIGN 8
          dequant 0
          dequant 1

 Legend:



Removed from v.1.1.2.2
 


changed lines


 
Added in v.1.2.2.1
 Legend:



Removed from v.1.1.2.2
 


changed lines


 
Added in v.1.2.2.1
-Removed from v.1.1.2.2
+Added in v.1.2.2.1

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4