343 |
%endif |
%endif |
344 |
|
|
345 |
push _EBX |
push _EBX |
346 |
mov _EBX, mmzero |
lea _EBX, [mmzero] |
347 |
push _EDI |
push _EDI |
348 |
jz near .q1loop |
jz near .q1loop |
349 |
|
|
351 |
mov _EBP, [_ESP + (4+4)*PTR_SIZE] ; dcscalar |
mov _EBP, [_ESP + (4+4)*PTR_SIZE] ; dcscalar |
352 |
; NB -- there are 3 pushes in the function preambule and one more |
; NB -- there are 3 pushes in the function preambule and one more |
353 |
; in "quant_intra 0", thus an added offset of 16 bytes |
; in "quant_intra 0", thus an added offset of 16 bytes |
354 |
XVID_MOVSX _EAX, word [byte _ECX] ; DC |
movsx _EAX, word [byte _ECX] ; DC |
355 |
|
|
356 |
quant_intra 1 |
quant_intra 1 |
357 |
mov _EDI, _EAX |
mov _EDI, _EAX |
398 |
.q1loop: |
.q1loop: |
399 |
quant_intra1 0 |
quant_intra1 0 |
400 |
mov _EBP, [_ESP + (4+4)*PTR_SIZE] ; dcscalar |
mov _EBP, [_ESP + (4+4)*PTR_SIZE] ; dcscalar |
401 |
XVID_MOVSX _EAX, word [byte _ECX] ; DC |
movsx _EAX, word [byte _ECX] ; DC |
402 |
|
|
403 |
quant_intra1 1 |
quant_intra1 1 |
404 |
mov _EDI, _EAX |
mov _EDI, _EAX |
752 |
lea _EDI, [mmx_mul + _EAX*8 - 8] ; 2*quant |
lea _EDI, [mmx_mul + _EAX*8 - 8] ; 2*quant |
753 |
%endif |
%endif |
754 |
push _EBP |
push _EBP |
755 |
mov _EBX, mmx_2047 |
lea _EBX, [mmx_2047] |
756 |
XVID_MOVSX _EBP, word [_ECX] |
movsx _EBP, word [_ECX] |
757 |
%ifdef ARCH_IS_X86_64 |
%ifdef ARCH_IS_X86_64 |
758 |
lea r9, [mmx_add] |
lea r9, [mmx_add] |
759 |
lea _EAX, [r9 + _EAX*8 - 8] ; quant or quant-1 |
lea _EAX, [r9 + _EAX*8 - 8] ; quant or quant-1 |
761 |
lea _EAX, [mmx_add + _EAX*8 - 8] ; quant or quant-1 |
lea _EAX, [mmx_add + _EAX*8 - 8] ; quant or quant-1 |
762 |
%endif |
%endif |
763 |
push _ESI |
push _ESI |
764 |
mov _ESI, mmzero |
lea _ESI, [mmzero] |
765 |
pxor mm7, mm7 |
pxor mm7, mm7 |
766 |
movq mm3, [_ECX+120] ;B2 ; c = coeff[i] |
movq mm3, [_ECX+120] ;B2 ; c = coeff[i] |
767 |
pcmpeqw mm7, [_ECX+120] ;B6 (c ==0) ? -1 : 0 (1st) |
pcmpeqw mm7, [_ECX+120] ;B6 (c ==0) ? -1 : 0 (1st) |
875 |
%else |
%else |
876 |
lea _EDI, [mmx_mul + _EAX*8 - 8] ; 2*quant |
lea _EDI, [mmx_mul + _EAX*8 - 8] ; 2*quant |
877 |
%endif |
%endif |
878 |
mov _EBX, mmx_2047 |
lea _EBX, [mmx_2047] |
879 |
pxor mm7, mm7 |
pxor mm7, mm7 |
880 |
movq mm3, [_ECX+120] ;B2 ; c = coeff[i] |
movq mm3, [_ECX+120] ;B2 ; c = coeff[i] |
881 |
pcmpeqw mm7, [_ECX+120] ;B6 (c ==0) ? -1 : 0 (1st) |
pcmpeqw mm7, [_ECX+120] ;B6 (c ==0) ? -1 : 0 (1st) |
886 |
lea _EAX, [mmx_add + _EAX*8 - 8] ; quant or quant-1 |
lea _EAX, [mmx_add + _EAX*8 - 8] ; quant or quant-1 |
887 |
%endif |
%endif |
888 |
psubw mm2, mm3 ;-c ;B3 (1st dep) |
psubw mm2, mm3 ;-c ;B3 (1st dep) |
889 |
mov _ESI, mmzero |
lea _ESI, [mmzero] |
890 |
pmaxsw mm2, mm3 ;|c| ;B4 (2nd) |
pmaxsw mm2, mm3 ;|c| ;B4 (2nd) |
891 |
pmullw mm2, [_EDI] ;*= 2Q ;B8 (3rd+) |
pmullw mm2, [_EDI] ;*= 2Q ;B8 (3rd+) |
892 |
psraw mm3, 15 ; sign(c) ;B7 (2nd) |
psraw mm3, 15 ; sign(c) ;B7 (2nd) |