Diff of /xvidcore/src/quant/x86_asm/quantize_h263_mmx.asm

-revision 1.1.2.2, Thu Oct  9 18:50:22 2003 UTC
+revision 1.1.2.5, Sun Nov 30 16:13:16 2003 UTC
 Line 27
  ; enable dequant saturate [-2048,2047], test purposes only.
  %define SATURATE
- ; data/text alignment
+ BITS 32
- %define ALIGN 8
- bits 32
  %macro cglobal 1
         %ifdef PREFIX
-Line 41
+Line 38
          %endif
  %endmacro
- ;***************************************************************************
+ ;=============================================================================
- ; Local data
+ ; Read only Local data
- ;***************************************************************************
+ ;=============================================================================
  %ifdef FORMAT_COFF
- section .data data
+ SECTION .rodata data
  %else
- section .data data align=16
+ SECTION .rodata data align=16
  %endif
- align 16
+ ALIGN 16
  plus_one:
          times 8 dw 1
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; subtract by Q/2 table
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
  mmx_sub:
  %assign quant 1
  %rep 31
-Line 69
+Line 66
          %assign quant quant+1
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; divide by 2Q table
  ;
-Line 77
+Line 74
  ; for q=1, _pmulhw_ will overflow so it is treated seperately
  ; (3dnow2 provides _pmulhuw_ which wont cause overflow)
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
  mmx_div:
  %assign quant 1
  %rep 31
-Line 87
+Line 84
          %assign quant quant+1
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; add by (odd(Q) ? Q : Q - 1) table
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
  mmx_add:
  %assign quant 1
  %rep 31
-Line 105
+Line 102
          %assign quant quant+1
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; multiple by 2Q table
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
  mmx_mul:
  %assign quant 1
  %rep 31
-Line 119
+Line 116
          %assign quant quant+1
  %endrep
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; saturation limits
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
  sse2_2047:
          times 8 dw 2047
- align 16
+ ALIGN 16
  mmx_2047:
          times 4 dw 2047
- align 8
+ ALIGN 8
  mmx_32768_minus_2048:
          times 4 dw (32768-2048)
-Line 141
+Line 138
          times 4 dw (32767-2047)
- ;***************************************************************************
+ ;=============================================================================
  ; Code
- ;***************************************************************************
+ ;=============================================================================
- section .text
+ SECTION .text
+ cglobal quant_h263_intra_mmx
+ cglobal quant_h263_intra_sse2
+ cglobal quant_h263_inter_mmx
+ cglobal quant_h263_inter_sse2
+ cglobal dequant_h263_intra_mmx
+ cglobal dequant_h263_intra_xmm
+ cglobal dequant_h263_intra_sse2
+ cglobal dequant_h263_inter_mmx
+ cglobal dequant_h263_inter_xmm
+ cglobal dequant_h263_inter_sse2
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t quant_h263_intra_mmx(int16_t * coeff,
  ;                               const int16_t const * data,
  ;                               const uint32_t quant,
- ;                               const uint32_t dcscalar);
+ ;                               const uint32_t dcscalar,
+ ;                               const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
+ ALIGN 16
- cglobal quant_h263_intra_mmx
  quant_h263_intra_mmx:
          push    ecx
-Line 175
+Line 182
          movq    mm7, [mmx_div + eax * 8 - 8]
- align ALIGN
+ ALIGN 16
  .loop
          movq    mm0, [esi + 8*ecx]      ; mm0 = [1st]
          movq    mm3, [esi + 8*ecx + 8]
-Line 227
+Line 234
          ret
- align ALIGN
+ ALIGN 16
  .q1loop
          movq    mm0, [esi + 8*ecx]      ; mm0 = [1st]
          movq    mm3, [esi + 8*ecx + 8]
-Line 255
+Line 262
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t quant_h263_intra_sse2(int16_t * coeff,
  ;                                const int16_t const * data,
  ;                                const uint32_t quant,
- ;                                const uint32_t dcscalar);
+ ;                                const uint32_t dcscalar,
+ ;                                const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
+ ALIGN 16
- cglobal quant_h263_intra_sse2
  quant_h263_intra_sse2:
          push    esi
-Line 284
+Line 291
          movq2dq xmm7, mm7
          movlhps xmm7, xmm7
- align 16
+ ALIGN 16
  .qas2_loop
          movdqa  xmm0, [esi + ecx*8]                ; xmm0 = [1st]
          movdqa  xmm3, [esi + ecx*8 + 16]           ; xmm3 = [2nd]
-Line 335
+Line 342
          ret
- align 16
+ ALIGN 16
  .qas2_q1loop
          movdqa  xmm0, [esi + ecx*8]         ; xmm0 = [1st]
          movdqa  xmm3, [esi + ecx*8 + 16]    ; xmm3 = [2nd]
-Line 363
+Line 370
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t quant_h263_inter_mmx(int16_t * coeff,
  ;                               const int16_t const * data,
- ;                               const uint32_t quant);
+ ;                               const uint32_t quant,
+ ;                               const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
+ ALIGN 16
- cglobal quant_h263_inter_mmx
  quant_h263_inter_mmx:
          push    ecx
-Line 393
+Line 400
          movq    mm7, [mmx_div + eax * 8 - 8] ; divider
- align ALIGN
+ ALIGN 8
  .loop
          movq    mm0, [esi + 8*ecx]      ; mm0 = [1st]
          movq    mm3, [esi + 8*ecx + 8]
-Line 435
+Line 442
          ret
- align ALIGN
+ ALIGN 8
  .q1loop
          movq    mm0, [esi + 8*ecx]              ; mm0 = [1st]
          movq    mm3, [esi + 8*ecx+ 8]           ;
-Line 468
+Line 475
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t quant_h263_inter_sse2(int16_t * coeff,
  ;                                const int16_t const * data,
- ;                                const uint32_t quant);
+ ;                                const uint32_t quant,
+ ;                                const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
- cglobal quant_h263_inter_sse2
  quant_h263_inter_sse2:
          push    esi
-Line 503
+Line 510
          movq2dq xmm7, mm0
          movlhps xmm7, xmm7
- align 16
+ ALIGN 16
  .qes2_loop
          movdqa  xmm0, [esi + ecx*8]                ; xmm0 = [1st]
          movdqa  xmm3, [esi + ecx*8 + 16]           ; xmm3 = [2nd]
-Line 550
+Line 557
          ret
- align 16
+ ALIGN 16
  .qes2_q1loop
          movdqa  xmm0, [esi + ecx*8]             ; xmm0 = [1st]
          movdqa  xmm3, [esi + ecx*8 + 16]        ; xmm3 = [2nd]
-Line 581
+Line 588
          jmp             .qes2_done
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t dequant_h263_intra_mmx(int16_t *data,
  ;                                 const int16_t const *coeff,
  ;                                 const uint32_t quant,
- ;                                 const uint32_t dcscalar);
+ ;                                 const uint32_t dcscalar,
+ ;                                 const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
    ; note: we only saturate to +2047 *before* restoring the sign.
    ; Hence, final clamp really is [-2048,2047]
- align ALIGN
+ ALIGN 16
- cglobal dequant_h263_intra_mmx
  dequant_h263_intra_mmx:
          mov             edx, [esp+ 4]                   ; data
-Line 604
+Line 611
          movq    mm7, [mmx_mul + eax*8 - 8]      ; 2*quant
          mov             eax, -16
- align ALIGN
+ ALIGN 16
  .loop
          movq    mm0, [ecx+8*eax+8*16]           ; c  = coeff[i]
          movq    mm3, [ecx+8*eax+8*16 + 8]       ; c' = coeff[i+1]
-Line 658
+Line 665
          xor              eax, eax                               ; return(0);
          ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t dequant_h263_intra_xmm(int16_t *data,
  ;                                 const int16_t const *coeff,
  ;                                 const uint32_t quant,
- ;                                 const uint32_t dcscalar);
+ ;                                 const uint32_t dcscalar,
+ ;                                 const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
    ; this is the same as dequant_inter_mmx, except that we're
    ; saturating using 'pminsw' (saves 2 cycles/loop => ~5% faster)
- align ALIGN
+ ALIGN 16
- cglobal dequant_h263_intra_xmm
  dequant_h263_intra_xmm:
          mov             edx, [esp+ 4]        ; data
-Line 681
+Line 688
          movq    mm7, [mmx_mul + eax*8 - 8]  ; 2*quant
          mov             eax, -16
- align ALIGN
+ ALIGN 16
  .loop
          movq    mm0, [ecx+8*eax+8*16]      ; c  = coeff[i]
          movq    mm3, [ecx+8*eax+8*16 + 8]  ; c' = coeff[i+1]
-Line 734
+Line 741
          ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t dequant_h263_intra_sse2(int16_t *data,
  ;                                  const int16_t const *coeff,
  ;                                  const uint32_t quant,
- ;                                  const uint32_t dcscalar);
+ ;                                  const uint32_t dcscalar,
+ ;                                  const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
+ ALIGN 16
- cglobal dequant_h263_intra_sse2
  dequant_h263_intra_sse2:
          mov             edx, [esp+ 4]        ; data
          mov             ecx, [esp+ 8]        ; coeff
-Line 757
+Line 764
          movlhps xmm7, xmm7
          mov             eax, -16
- align ALIGN
+ ALIGN 16
  .loop
          movdqa  xmm0, [ecx + 8*16 + 8*eax]      ; c  = coeff[i]
          movdqa  xmm3, [ecx + 8*16 + 8*eax+ 16]
-Line 809
+Line 816
          xor             eax, eax
          ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32t dequant_h263_inter_mmx(int16_t * data,
  ;                                const int16_t * const coeff,
- ;                                const uint32_t quant);
+ ;                                const uint32_t quant,
+ ;                                const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
+ ALIGN 16
- cglobal dequant_h263_inter_mmx
  dequant_h263_inter_mmx:
          mov             edx, [esp+ 4]        ; data
-Line 828
+Line 835
          movq    mm7, [mmx_mul + eax*8 - 8]  ; 2*quant
          mov             eax, -16
- align ALIGN
+ ALIGN 16
  .loop
          movq    mm0, [ecx+8*eax+8*16]      ; c  = coeff[i]
          movq    mm3, [ecx+8*eax+8*16 + 8]  ; c' = coeff[i+1]
-Line 869
+Line 876
          xor             eax, eax
          ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t dequant_h263_inter_xmm(int16_t * data,
  ;                                 const int16_t * const coeff,
- ;                                 const uint32_t quant);
+ ;                                 const uint32_t quant,
+ ;                                 const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
    ; this is the same as dequant_inter_mmx,
    ; except that we're saturating using 'pminsw' (saves 2 cycles/loop)
- align ALIGN
+ ALIGN 16
- cglobal dequant_h263_inter_xmm
  dequant_h263_inter_xmm:
          mov             edx, [esp+ 4]        ; data
-Line 891
+Line 898
          movq    mm7, [mmx_mul + eax*8 - 8]  ; 2*quant
          mov             eax, -16
- align ALIGN
+ ALIGN 16
  .loop
          movq    mm0, [ecx+8*eax+8*16]      ; c  = coeff[i]
          movq    mm3, [ecx+8*eax+8*16 + 8]  ; c' = coeff[i+1]
-Line 930
+Line 937
          xor             eax, eax
          ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; uint32_t dequant_h263_inter_sse2(int16_t * data,
  ;                                  const int16_t * const coeff,
- ;                                  const uint32_t quant);
+ ;                                  const uint32_t quant,
+ ;                                  const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
+ ALIGN 16
- cglobal dequant_h263_inter_sse2
  dequant_h263_inter_sse2:
          mov             edx, [esp + 4]  ; data
          mov             ecx, [esp + 8]  ; coeff
-Line 952
+Line 959
          movlhps xmm7, xmm7
          mov             eax, -16
- align ALIGN
+ ALIGN 16
  .loop
          movdqa  xmm0, [ecx + 8*16 + 8*eax]  ; c  = coeff[i]
          movdqa  xmm3, [ecx + 8*16 + 8*eax + 16]

 Legend:



Removed from v.1.1.2.2
 


changed lines


 
Added in v.1.1.2.5
 Legend:



Removed from v.1.1.2.2
 


changed lines


 
Added in v.1.1.2.5
-Removed from v.1.1.2.2
+Added in v.1.1.2.5

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4