Diff of /xvidcore/src/quant/x86_asm/quantize_h263_mmx.asm

-revision 1.1.2.1, Tue Oct  7 13:02:35 2003 UTC
+revision 1.1.2.5, Sun Nov 30 16:13:16 2003 UTC
 Line 1
- ;/**************************************************************************
+ ;/*****************************************************************************
  ; *
  ; *     XVID MPEG-4 VIDEO CODEC
- ; *     mmx quantization/dequantization
+ ; *  - MPEG4 Quantization H263 implementation / MMX optimized -
  ; *
- ; *     This program is an implementation of a part of one or more MPEG-4
+ ; *  Copyright(C) 2001-2003 Peter Ross <pross@xvid.org>
- ; *     Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
+ ; *               2002-2003 Pascal Massimino <skal@planet-d.net>
- ; *     to use this software module in hardware or software products are
- ; *     advised that its use may infringe existing patents or copyrights, and
- ; *     any such use would be at such party's own risk.  The original
- ; *     developer of this software module and his/her company, and subsequent
- ; *     editors and their companies, will have no liability for use of this
- ; *     software or modifications or derivatives thereof.
  ; *
  ; *     This program is free software; you can redistribute it and/or modify
  ; *     it under the terms of the GNU General Public License as published by
-Line 24
+Line 18
  ; *
  ; *     You should have received a copy of the GNU General Public License
  ; *     along with this program; if not, write to the Free Software
- ; *     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  ; *
- ; *************************************************************************/
+ ; * $Id$
- ;/**************************************************************************
- ; *
- ; *     History:
- ; *
- ; * 09.08.2002  sse2 dequant funcs revamped
- ; * 14.06.2002  mmx+xmm dequant_* funcs revamped  -Skal-
- ; * 24.02.2002  sse2 quant_intra / dequant_intra (have to use movdqu ???)
- ; * 17.04.2002  sse2 quant_inter / dequant_inter
- ; * 26.12.2001  minor bug fixes, dequant saturate, further optimization
- ; * 19.11.2001  quant_inter_mmx now returns sum of abs. coefficient values
- ; *     04.11.2001      nasm version; (c)2001 peter ross <pross@cs.rmit.edu.au>
  ; *
- ; *************************************************************************/
+ ; ****************************************************************************/
  ; enable dequant saturate [-2048,2047], test purposes only.
  %define SATURATE
- ; data/text alignment
+ BITS 32
- %define ALIGN 8
- bits 32
- section .data
  %macro cglobal 1
          %ifdef PREFIX
-Line 62
+Line 38
          %endif
  %endmacro
- align 16
+ ;=============================================================================
+ ; Read only Local data
+ ;=============================================================================
- plus_one times 8        dw       1
+ %ifdef FORMAT_COFF
+ SECTION .rodata data
+ %else
+ SECTION .rodata data align=16
+ %endif
- ;===========================================================================
+ ALIGN 16
+ plus_one:
+         times 8 dw 1
+ ;-----------------------------------------------------------------------------
  ;
  ; subtract by Q/2 table
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- %macro MMX_SUB  1
+ ALIGN 16
- times 4 dw %1 / 2
+ mmx_sub:
- %endmacro
+ %assign quant 1
+ %rep 31
+         times 4 dw  quant / 2
+         %assign quant quant+1
+ %endrep
- align 16
+ ;-----------------------------------------------------------------------------
- mmx_sub
-                 MMX_SUB 1
-                 MMX_SUB 2
-                 MMX_SUB 3
-                 MMX_SUB 4
-                 MMX_SUB 5
-                 MMX_SUB 6
-                 MMX_SUB 7
-                 MMX_SUB 8
-                 MMX_SUB 9
-                 MMX_SUB 10
-                 MMX_SUB 11
-                 MMX_SUB 12
-                 MMX_SUB 13
-                 MMX_SUB 14
-                 MMX_SUB 15
-                 MMX_SUB 16
-                 MMX_SUB 17
-                 MMX_SUB 18
-                 MMX_SUB 19
-                 MMX_SUB 20
-                 MMX_SUB 21
-                 MMX_SUB 22
-                 MMX_SUB 23
-                 MMX_SUB 24
-                 MMX_SUB 25
-                 MMX_SUB 26
-                 MMX_SUB 27
-                 MMX_SUB 28
-                 MMX_SUB 29
-                 MMX_SUB 30
-                 MMX_SUB 31
- ;===========================================================================
  ;
  ; divide by 2Q table
  ;
-Line 120
+Line 74
  ; for q=1, _pmulhw_ will overflow so it is treated seperately
  ; (3dnow2 provides _pmulhuw_ which wont cause overflow)
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- %macro MMX_DIV  1
- times 4 dw  (1 << 16) / (%1 * 2) + 1
- %endmacro
- align 16
+ ALIGN 16
- mmx_div
+ mmx_div:
-                 MMX_DIV 1
+ %assign quant 1
-                 MMX_DIV 2
+ %rep 31
-                 MMX_DIV 3
+         times 4 dw  (1<<16) / (quant*2) + 1
-                 MMX_DIV 4
+         %assign quant quant+1
-                 MMX_DIV 5
+ %endrep
-                 MMX_DIV 6
-                 MMX_DIV 7
-                 MMX_DIV 8
-                 MMX_DIV 9
-                 MMX_DIV 10
-                 MMX_DIV 11
-                 MMX_DIV 12
-                 MMX_DIV 13
-                 MMX_DIV 14
-                 MMX_DIV 15
-                 MMX_DIV 16
-                 MMX_DIV 17
-                 MMX_DIV 18
-                 MMX_DIV 19
-                 MMX_DIV 20
-                 MMX_DIV 21
-                 MMX_DIV 22
-                 MMX_DIV 23
-                 MMX_DIV 24
-                 MMX_DIV 25
-                 MMX_DIV 26
-                 MMX_DIV 27
-                 MMX_DIV 28
-                 MMX_DIV 29
-                 MMX_DIV 30
-                 MMX_DIV 31
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
  ;
  ; add by (odd(Q) ? Q : Q - 1) table
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- %macro MMX_ADD  1
+ ALIGN 16
- %if %1 % 2 != 0
+ mmx_add:
- times 4 dw %1
+ %assign quant 1
+ %rep 31
+         %if quant % 2 != 0
+         times 4 dw  quant
  %else
- times 4 dw %1 - 1
+         times 4 dw quant - 1
  %endif
- %endmacro
+         %assign quant quant+1
+ %endrep
- align 16
- mmx_add
-                 MMX_ADD 1
-                 MMX_ADD 2
-                 MMX_ADD 3
-                 MMX_ADD 4
-                 MMX_ADD 5
-                 MMX_ADD 6
-                 MMX_ADD 7
-                 MMX_ADD 8
-                 MMX_ADD 9
-                 MMX_ADD 10
-                 MMX_ADD 11
-                 MMX_ADD 12
-                 MMX_ADD 13
-                 MMX_ADD 14
-                 MMX_ADD 15
-                 MMX_ADD 16
-                 MMX_ADD 17
-                 MMX_ADD 18
-                 MMX_ADD 19
-                 MMX_ADD 20
-                 MMX_ADD 21
-                 MMX_ADD 22
-                 MMX_ADD 23
-                 MMX_ADD 24
-                 MMX_ADD 25
-                 MMX_ADD 26
-                 MMX_ADD 27
-                 MMX_ADD 28
-                 MMX_ADD 29
-                 MMX_ADD 30
-                 MMX_ADD 31
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
  ;
  ; multiple by 2Q table
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- %macro MMX_MUL  1
- times 4 dw %1 * 2
- %endmacro
- align 16
+ ALIGN 16
- mmx_mul
+ mmx_mul:
-                 MMX_MUL 1
+ %assign quant 1
-                 MMX_MUL 2
+ %rep 31
-                 MMX_MUL 3
+         times 4 dw  quant*2
-                 MMX_MUL 4
+         %assign quant quant+1
-                 MMX_MUL 5
+ %endrep
-                 MMX_MUL 6
-                 MMX_MUL 7
-                 MMX_MUL 8
-                 MMX_MUL 9
-                 MMX_MUL 10
-                 MMX_MUL 11
-                 MMX_MUL 12
-                 MMX_MUL 13
-                 MMX_MUL 14
-                 MMX_MUL 15
-                 MMX_MUL 16
-                 MMX_MUL 17
-                 MMX_MUL 18
-                 MMX_MUL 19
-                 MMX_MUL 20
-                 MMX_MUL 21
-                 MMX_MUL 22
-                 MMX_MUL 23
-                 MMX_MUL 24
-                 MMX_MUL 25
-                 MMX_MUL 26
-                 MMX_MUL 27
-                 MMX_MUL 28
-                 MMX_MUL 29
-                 MMX_MUL 30
-                 MMX_MUL 31
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
  ;
  ; saturation limits
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
+ ALIGN 16
+ sse2_2047:
+         times 8 dw 2047
- align 16
+ ALIGN 16
- sse2_2047       times 8 dw 2047
+ mmx_2047:
+         times 4 dw 2047
- align 16
+ ALIGN 8
- mmx_2047        times 4 dw 2047
+ mmx_32768_minus_2048:
+         times 4 dw (32768-2048)
- align 8
+ mmx_32767_minus_2047:
- mmx_32768_minus_2048                            times 4 dw (32768-2048)
+         times 4 dw (32767-2047)
- mmx_32767_minus_2047                            times 4 dw (32767-2047)
- section .text
+ ;=============================================================================
+ ; Code
+ ;=============================================================================
+ SECTION .text
+ cglobal quant_h263_intra_mmx
+ cglobal quant_h263_intra_sse2
+ cglobal quant_h263_inter_mmx
+ cglobal quant_h263_inter_sse2
+ cglobal dequant_h263_intra_mmx
+ cglobal dequant_h263_intra_xmm
+ cglobal dequant_h263_intra_sse2
+ cglobal dequant_h263_inter_mmx
+ cglobal dequant_h263_inter_xmm
+ cglobal dequant_h263_inter_sse2
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
- ; void quant_intra_mmx(int16_t * coeff,
+ ; uint32_t quant_h263_intra_mmx(int16_t * coeff,
  ;                                       const int16_t const * data,
  ;                                       const uint32_t quant,
- ;                                       const uint32_t dcscalar);
+ ;                               const uint32_t dcscalar,
+ ;                               const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
+ ALIGN 16
- cglobal quant_h263_intra_mmx
  quant_h263_intra_mmx:
                  push    ecx
-Line 302
+Line 181
                  jz      .q1loop
                  movq    mm7, [mmx_div + eax * 8 - 8]
- align ALIGN
+ ALIGN 16
  .loop
                  movq    mm0, [esi + 8*ecx]              ; mm0 = [1st]
-                 movq    mm3, [esi + 8*ecx + 8]  ;
+   movq mm3, [esi + 8*ecx + 8]
                  pxor    mm1, mm1                ; mm1 = 0
                  pxor    mm4, mm4                ;
                  pcmpgtw mm1, mm0                ; mm1 = (0 > mm0)
-Line 328
+Line 208
                  jnz     .loop
  .done
-         ; caclulate  data[0] // (int32_t)dcscalar)
+     ; caclulate  data[0] // (int32_t)dcscalar)
                  mov     ecx, [esp + 12 + 16]    ; dcscalar
                  mov     edx, ecx
                  movsx   eax, word [esi] ; data[0]
-Line 339
+Line 219
                  sub             eax, edx
                  jmp             short .mul
  .gtzero
                  add             eax, edx
  .mul
                  cdq                             ; expand eax -> edx:eax
                  idiv    ecx                     ; eax = edx:eax / dcscalar
                  mov     [edi], ax               ; coeff[0] = ax
+   xor eax, eax      ; return(0);
                  pop     edi
                  pop     esi
                  pop     ecx
                  ret
- align ALIGN
+ ALIGN 16
  .q1loop
                  movq    mm0, [esi + 8*ecx]              ; mm0 = [1st]
-                 movq    mm3, [esi + 8*ecx + 8]  ;
+   movq mm3, [esi + 8*ecx + 8]
                  pxor    mm1, mm1                ; mm1 = 0
                  pxor    mm4, mm4                ;
                  pcmpgtw mm1, mm0                ; mm1 = (0 > mm0)
-Line 368
+Line 249
                  psrlw   mm0, 1                  ; mm0 >>= 1   (/2)
                  psrlw   mm3, 1                  ;
                  pxor    mm0, mm1                ; mm0 *= sign(mm0)
-                 pxor    mm3, mm4        ;
+   pxor mm3, mm4
                  psubw   mm0, mm1                ; undisplace
                  psubw   mm3, mm4                ;
                  movq    [edi + 8*ecx], mm0
-Line 381
+Line 262
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
- ; void quant_intra_sse2(int16_t * coeff,
+ ; uint32_t quant_h263_intra_sse2(int16_t * coeff,
  ;                                       const int16_t const * data,
  ;                                       const uint32_t quant,
- ;                                       const uint32_t dcscalar);
+ ;                                const uint32_t dcscalar,
+ ;                                const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
+ ALIGN 16
- cglobal quant_h263_intra_sse2
  quant_h263_intra_sse2:
                  push    esi
-Line 410
+Line 291
                  movq2dq xmm7, mm7
                  movlhps xmm7, xmm7
- align 16
+ ALIGN 16
  .qas2_loop
                  movdqa  xmm0, [esi + ecx*8]                     ; xmm0 = [1st]
                  movdqa  xmm3, [esi + ecx*8 + 16]        ; xmm3 = [2nd]
-Line 445
+Line 326
                  sub             eax, edx
                  jmp             short .qas2_mul
  .qas2_gtzero
                  add             eax, edx
  .qas2_mul
                  cdq
                  idiv    ecx
                  mov             [edi], ax
+   xor eax, eax      ; return(0);
                  pop             edi
                  pop             esi
                  ret
- align 16
+ ALIGN 16
  .qas2_q1loop
                  movdqa  xmm0, [esi + ecx*8]                     ; xmm0 = [1st]
                  movdqa  xmm3, [esi + ecx*8 + 16]        ; xmm3 = [2nd]
-Line 486
+Line 370
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
- ; uint32_t quant_inter_mmx(int16_t * coeff,
+ ; uint32_t quant_h263_inter_mmx(int16_t * coeff,
  ;                                       const int16_t const * data,
- ;                                       const uint32_t quant);
+ ;                               const uint32_t quant,
+ ;                               const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
+ ALIGN 16
- cglobal quant_h263_inter_mmx
  quant_h263_inter_mmx:
                  push    ecx
-Line 516
+Line 400
                  movq    mm7, [mmx_div + eax * 8 - 8]    ; divider
- align ALIGN
+ ALIGN 8
  .loop
                  movq    mm0, [esi + 8*ecx]              ; mm0 = [1st]
-                 movq    mm3, [esi + 8*ecx + 8]  ;
+   movq mm3, [esi + 8*ecx + 8]
                  pxor    mm1, mm1                ; mm1 = 0
                  pxor    mm4, mm4                ;
                  pcmpgtw mm1, mm0                ; mm1 = (0 > mm0)
-Line 550
+Line 434
                  movq    mm0, mm5
                  psrlq   mm5, 32
                  paddd   mm0, mm5
-                 movd    eax, mm0                ; return sum
+   movd eax, mm0     ; return sum
                  pop     edi
                  pop     esi
                  pop ecx
                  ret
- align ALIGN
+ ALIGN 8
  .q1loop
                  movq    mm0, [esi + 8*ecx]              ; mm0 = [1st]
                  movq    mm3, [esi + 8*ecx+ 8]           ;
-Line 591
+Line 475
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
- ; uint32_t quant_inter_sse2(int16_t * coeff,
+ ; uint32_t quant_h263_inter_sse2(int16_t * coeff,
  ;                                       const int16_t const * data,
- ;                                       const uint32_t quant);
+ ;                                const uint32_t quant,
+ ;                                const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
- cglobal quant_h263_inter_sse2
  quant_h263_inter_sse2:
                  push    esi
-Line 626
+Line 510
                  movq2dq xmm7, mm0
                  movlhps xmm7, xmm7
- align 16
+ ALIGN 16
  .qes2_loop
                  movdqa  xmm0, [esi + ecx*8]                     ; xmm0 = [1st]
                  movdqa  xmm3, [esi + ecx*8 + 16]        ; xmm3 = [2nd]
-Line 665
+Line 549
                  movq    mm5, mm0
                  psrlq   mm5, 32
                  paddd   mm0, mm5
                  movd    eax, mm0                                        ; return sum
                  pop             edi
-Line 672
+Line 557
                  ret
- align 16
+ ALIGN 16
  .qes2_q1loop
                  movdqa  xmm0, [esi + ecx*8]                     ; xmm0 = [1st]
                  movdqa  xmm3, [esi + ecx*8 + 16]        ; xmm3 = [2nd]
-Line 703
+Line 588
                  jmp             .qes2_done
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
- ; void dequant_intra_mmx(int16_t *data,
+ ; uint32_t dequant_h263_intra_mmx(int16_t *data,
  ;                                       const int16_t const *coeff,
  ;                                       const uint32_t quant,
- ;                                       const uint32_t dcscalar);
+ ;                                 const uint32_t dcscalar,
+ ;                                 const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
    ; note: we only saturate to +2047 *before* restoring the sign.
    ; Hence, final clamp really is [-2048,2047]
- align ALIGN
+ ALIGN 16
- cglobal dequant_h263_intra_mmx
  dequant_h263_intra_mmx:
    mov    edx, [esp+ 4]        ; data
-Line 726
+Line 611
    movq mm7, [mmx_mul + eax*8 - 8]  ; 2*quant
    mov eax, -16
- align ALIGN
+ ALIGN 16
  .loop
    movq mm0, [ecx+8*eax+8*16]      ; c  = coeff[i]
    movq mm3, [ecx+8*eax+8*16 + 8]  ; c' = coeff[i+1]
-Line 766
+Line 651
    jnz   near .loop
      ; deal with DC
    movd mm0, [ecx]
    pmullw mm0, [esp+16]    ; dcscalar
    movq mm2, [mmx_32767_minus_2047]
-Line 778
+Line 662
    movd eax, mm0
    mov [edx], ax
+   xor eax, eax              ; return(0);
    ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
- ; void dequant_intra_xmm(int16_t *data,
+ ; uint32_t dequant_h263_intra_xmm(int16_t *data,
  ;                                       const int16_t const *coeff,
  ;                                       const uint32_t quant,
- ;                                       const uint32_t dcscalar);
+ ;                                 const uint32_t dcscalar,
+ ;                                 const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
    ; this is the same as dequant_inter_mmx, except that we're
    ; saturating using 'pminsw' (saves 2 cycles/loop => ~5% faster)
- align ALIGN
+ ALIGN 16
- cglobal dequant_h263_intra_xmm
  dequant_h263_intra_xmm:
    mov    edx, [esp+ 4]        ; data
-Line 803
+Line 688
    movq mm7, [mmx_mul + eax*8 - 8]  ; 2*quant
    mov eax, -16
- align ALIGN
+ ALIGN 16
  .loop
    movq mm0, [ecx+8*eax+8*16]      ; c  = coeff[i]
    movq mm3, [ecx+8*eax+8*16 + 8]  ; c' = coeff[i+1]
-Line 841
+Line 726
    jnz   near .loop
      ; deal with DC
    movd mm0, [ecx]
    pmullw mm0, [esp+16]    ; dcscalar
    movq mm2, [mmx_32767_minus_2047]
-Line 853
+Line 737
    movd eax, mm0
    mov [edx], ax
+   xor eax, eax
    ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
- ; void dequant_intra_sse2(int16_t *data,
+ ; uint32_t dequant_h263_intra_sse2(int16_t *data,
  ;                                       const int16_t const *coeff,
  ;                                       const uint32_t quant,
- ;                                       const uint32_t dcscalar);
+ ;                                  const uint32_t dcscalar,
+ ;                                  const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
- cglobal dequant_h263_intra_sse2
+ ALIGN 16
  dequant_h263_intra_sse2:
          mov edx, [esp+ 4]        ; data
          mov ecx, [esp+ 8]        ; coeff
-Line 878
+Line 764
          movlhps xmm7, xmm7
          mov eax, -16
- align ALIGN
+ ALIGN 16
  .loop
          movdqa xmm0, [ecx + 8*16 + 8*eax]      ; c  = coeff[i]
          movdqa xmm3, [ecx + 8*16 + 8*eax+ 16]
-Line 927
+Line 813
          movd eax, mm0
          mov [edx], ax
+   xor eax, eax
          ret
+ ;-----------------------------------------------------------------------------
- ;===========================================================================
  ;
- ; void dequant_inter_mmx(int16_t * data,
+ ; uint32t dequant_h263_inter_mmx(int16_t * data,
  ;                                       const int16_t * const coeff,
- ;                                       const uint32_t quant);
+ ;                                const uint32_t quant,
+ ;                                const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
+ ALIGN 16
- cglobal dequant_h263_inter_mmx
  dequant_h263_inter_mmx:
    mov    edx, [esp+ 4]        ; data
-Line 951
+Line 835
    movq mm7, [mmx_mul + eax*8 - 8]  ; 2*quant
    mov eax, -16
- align ALIGN
+ ALIGN 16
  .loop
    movq mm0, [ecx+8*eax+8*16]      ; c  = coeff[i]
    movq mm3, [ecx+8*eax+8*16 + 8]  ; c' = coeff[i+1]
-Line 975
+Line 859
    paddw mm3, mm5 ; + offset
    paddw mm0, mm1 ; negate back
    paddw mm3, mm4 ; negate back
      ; saturates to +2047
    movq mm2, [mmx_32767_minus_2047]
    add eax, 2
-Line 990
+Line 873
    movq [edx + 8*eax + 8*16+8 - 2*8], mm3
    jnz   near .loop
+   xor eax, eax
    ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
- ; void dequant_inter_xmm(int16_t * data,
+ ; uint32_t dequant_h263_inter_xmm(int16_t * data,
  ;                                       const int16_t * const coeff,
- ;                                       const uint32_t quant);
+ ;                                 const uint32_t quant,
+ ;                                 const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
    ; this is the same as dequant_inter_mmx,
    ; except that we're saturating using 'pminsw' (saves 2 cycles/loop)
- align ALIGN
+ ALIGN 16
- cglobal dequant_h263_inter_xmm
  dequant_h263_inter_xmm:
    mov    edx, [esp+ 4]        ; data
-Line 1014
+Line 898
    movq mm7, [mmx_mul + eax*8 - 8]  ; 2*quant
    mov eax, -16
- align ALIGN
+ ALIGN 16
  .loop
    movq mm0, [ecx+8*eax+8*16]      ; c  = coeff[i]
    movq mm3, [ecx+8*eax+8*16 + 8]  ; c' = coeff[i+1]
-Line 1038
+Line 922
    paddw mm3, mm5 ; + offset
    paddw mm0, mm1 ; start restoring sign
    paddw mm3, mm4 ; start restoring sign
        ; saturates to +2047
    movq mm2, [mmx_2047]
    pminsw mm0, mm2
-Line 1051
+Line 934
    movq [edx + 8*eax + 8*16+8 - 2*8], mm3
    jnz   near .loop
+   xor eax, eax
    ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
- ; void dequant_inter_sse2(int16_t * data,
+ ; uint32_t dequant_h263_inter_sse2(int16_t * data,
  ;                                       const int16_t * const coeff,
- ;                                       const uint32_t quant);
+ ;                                  const uint32_t quant,
+ ;                                  const uint16_t *mpeg_matrices);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align ALIGN
- cglobal dequant_h263_inter_sse2
+ ALIGN 16
  dequant_h263_inter_sse2:
          mov edx, [esp + 4]      ; data
          mov ecx, [esp + 8]      ; coeff
-Line 1074
+Line 959
          movlhps xmm7, xmm7
          mov eax, -16
- align ALIGN
+ ALIGN 16
  .loop
          movdqa xmm0, [ecx + 8*16 + 8*eax]  ; c  = coeff[i]
          movdqa xmm3, [ecx + 8*16 + 8*eax + 16]
-Line 1113
+Line 998
          movdqa [edx + 8*16 - 8*4 + 8*eax + 16], xmm3
          jnz     near .loop
+   xor eax, eax
          ret

 Legend:



Removed from v.1.1.2.1
 


changed lines


 
Added in v.1.1.2.5
 Legend:



Removed from v.1.1.2.1
 


changed lines


 
Added in v.1.1.2.5
-Removed from v.1.1.2.1
+Added in v.1.1.2.5

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4