--- quantize_mpeg_mmx.asm	2006/07/10 08:09:59	1.6
+++ quantize_mpeg_mmx.asm	2008/11/11 20:46:24	1.10
@@ -21,7 +21,7 @@
 ; *  along with this program ; if not, write to the Free Software
 ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 ; *
-; * $Id: quantize_mpeg_mmx.asm,v 1.6 2006/07/10 08:09:59 syskin Exp $
+; * $Id: quantize_mpeg_mmx.asm,v 1.10 2008/11/11 20:46:24 Isibaar Exp $
 ; *
 ; *************************************************************************/
 
@@ -34,15 +34,19 @@
 		%ifdef MARK_FUNCS
 			global _%1:function %1.endfunc-%1
 			%define %1 _%1:function %1.endfunc-%1
+			%define ENDFUNC .endfunc
 		%else
 			global _%1
 			%define %1 _%1
+			%define ENDFUNC
 		%endif
 	%else
 		%ifdef MARK_FUNCS
 			global %1:function %1.endfunc-%1
+			%define ENDFUNC .endfunc
 		%else
 			global %1
+			%define ENDFUNC
 		%endif
 	%endif
 %endmacro
@@ -221,27 +225,23 @@
   ; calculate DC
   movsx eax, word [eax]     ; data[0]
   mov ecx, [esp + 4 + 16]   ; dcscalar
-  mov edx, ecx
-  shr edx, 1                ; edx = dcscalar /2
-  mov edi, edx
-  neg edi
-  
-  cmp eax, 0
-  cmovg edx, edi
-  sub eax, edx
-
-  mov edi, [esp + 4 + 4]	; coeff again
-
-  cdq                       ; expand eax -> edx:eax
-  idiv ecx                  ; eax = edx:eax / dcscalar
-
-  mov [edi], ax             ; coeff[0] = ax
-
+  mov edx, eax
+  mov edi, ecx
+  shr ecx, 1                ; ecx = dcscalar/2
+  sar edx, 31               ; edx = sign extend of eax (ready for division too)
+  xor ecx, edx              ; adjust ecx according to the sign of data[0]
+  sub ecx, edx
+  add eax, ecx
+
+  mov ecx, [esp + 4 + 4]	; coeff again 
+  idiv edi                  ; eax = edx:eax / dcscalar
+  mov [ecx], ax             ; coeff[0] = ax
+ 
   pop edi
 
   xor eax, eax              ; return(0);
   ret
-
+ENDFUNC
 
 
 ;-----------------------------------------------------------------------------
@@ -279,7 +279,7 @@
   movq mm7, [mmx_div + eax * 8 - 8] ; divider
 
 ALIGN 16
-.loop
+.loop:
   movq mm0, [esi + 8*ecx]       ; mm0 = [1st]
   movq mm3, [esi + 8*ecx + 8]   ;
   pxor mm1, mm1                 ; mm1 = 0
@@ -319,7 +319,7 @@
   cmp ecx, 16
   jnz near .loop
 
-.done
+.done:
   pmaddwd mm5, [mmx_one]
   movq mm0, mm5
   psrlq mm5, 32
@@ -334,7 +334,7 @@
   ret
 
 ALIGN 16
-.q1loop
+.q1loop:
   movq mm0, [esi + 8*ecx]       ; mm0 = [1st]
   movq mm3, [esi + 8*ecx+ 8]
   pxor mm1, mm1                 ; mm1 = 0
@@ -375,7 +375,7 @@
   jmp .done
 
 ALIGN 16
-.q2loop
+.q2loop:
   movq mm0, [esi + 8*ecx]       ; mm0 = [1st]
   movq mm3, [esi + 8*ecx+ 8]
   pxor mm1, mm1                 ; mm1 = 0
@@ -414,7 +414,7 @@
   jnz near .q2loop
 
   jmp .done
-.endfunc
+ENDFUNC
 
 
 ;-----------------------------------------------------------------------------
@@ -482,7 +482,7 @@
   pxor mm6, mm6     ; this is a NOP
 
 ALIGN 16
-.loop
+.loop:
   movq mm0, [ecx+8*eax + 8*16]   ; mm0 = c  = coeff[i]
   movq mm3, [ecx+8*eax + 8*16 +8]; mm3 = c' = coeff[i+1]
   pxor mm1, mm1
@@ -554,7 +554,7 @@
   pop ebx
 
   ret
-.endfunc
+ENDFUNC
 
 ;-----------------------------------------------------------------------------
 ;
@@ -586,7 +586,7 @@
   pxor mm6, mm6     ; mismatch sum
 
 ALIGN 16
-.loop
+.loop:
   movq mm0, [ecx+8*eax + 8*16   ]   ; mm0 = coeff[i]
   movq mm2, [ecx+8*eax + 8*16 +8]   ; mm2 = coeff[i+1]
   add eax, 2
@@ -668,5 +668,10 @@
   pop ebx
 
   ret
-.endfunc
+ENDFUNC
+
+
+%ifidn __OUTPUT_FORMAT__,elf
+section ".note.GNU-stack" noalloc noexec nowrite progbits
+%endif