--- interlacing_mmx.asm	2002/09/26 04:51:01	1.1.2.2
+++ interlacing_mmx.asm	2009/09/16 17:07:58	1.12
@@ -1,83 +1,71 @@
-;/**************************************************************************
+;/****************************************************************************
 ; *
-; *	XVID MPEG-4 VIDEO CODEC
-; *	mmx interlacing decision
+; *  XVID MPEG-4 VIDEO CODEC
+; *  - Interlacing Field test -
 ; *
-; *	This program is an implementation of a part of one or more MPEG-4
-; *	Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
-; *	to use this software module in hardware or software products are
-; *	advised that its use may infringe existing patents or copyrights, and
-; *	any such use would be at such party's own risk.  The original
-; *	developer of this software module and his/her company, and subsequent
-; *	editors and their companies, will have no liability for use of this
-; *	software or modifications or derivatives thereof.
+; *  Copyright(C) 2002 Daniel Smith <danielsmith@astroboymail.com>
 ; *
-; *	This program is free software; you can redistribute it and/or modify
-; *	it under the terms of the GNU General Public License as published by
-; *	the Free Software Foundation; either version 2 of the License, or
-; *	(at your option) any later version.
+; *  This program is free software ; you can r_EDIstribute it and/or modify
+; *  it under the terms of the GNU General Public License as published by
+; *  the Free Software Foundation ; either version 2 of the License, or
+; *  (at your option) any later version.
 ; *
-; *	This program is distributed in the hope that it will be useful,
-; *	but WITHOUT ANY WARRANTY; without even the implied warranty of
-; *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-; *	GNU General Public License for more details.
+; *  This program is distributed in the hope that it will be useful,
+; *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
+; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; *  GNU General Public License for more details.
 ; *
-; *	You should have received a copy of the GNU General Public License
-; *	along with this program; if not, write to the Free Software
-; *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+; *  You should have received a copy of the GNU General Public License
+; *  along with this program ; if not, write to the Free Software
+; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 ; *
-; *************************************************************************/
-
-;/**************************************************************************
-; *
-; *	History:
-; *
-; * 04.09.2002  initial version; (c)2002 daniel smith
+; * $Id: interlacing_mmx.asm,v 1.12 2009/09/16 17:07:58 Isibaar Exp $
 ; *
-; *************************************************************************/
+; ***************************************************************************/
 
+%include "nasm.inc"
 
-bits 32
-
-%macro cglobal 1 
-	%ifdef PREFIX
-		global _%1 
-		%define %1 _%1
-	%else
-		global %1
-	%endif
-%endmacro
+;=============================================================================
+; Read only data
+;=============================================================================
 
+DATA
 
-section .text
+; advances to next block on right
+ALIGN SECTION_ALIGN
+nexts:
+	dd 0, 0, 8, 120, 8
 
-cglobal MBFieldTest_mmx
+; multiply word sums into dwords
+ALIGN SECTION_ALIGN
+ones:
+	times 4 dw 1
+
+;=============================================================================
+; Code
+;=============================================================================
 
-; advances to next block on right 
-align 16
-nexts	dd 0, 0, 8, 120, 8
+TEXT
 
-; multiply word sums into dwords
-align 16
-ones	times 4 dw 1
+cglobal MBFieldTest_mmx
 
 ; neater
-%define	line0	esi
-%define	line1	esi+16
-%define	line2	esi+32
-%define	line3	esi+48
-%define	line4	esi+64
-%define	line5	esi+80
-%define	line6	esi+96
-%define	line7	esi+112
-%define	line8	edi
-%define	line9	edi+16
-%define	line10	edi+32
-%define	line11	edi+48
-%define	line12	edi+64
-%define	line13	edi+80
-%define	line14	edi+96
-%define	line15	edi+112
+%define	line0	_ESI
+%define	line1	_ESI+16
+%define	line2	_ESI+32
+%define	line3	_ESI+48
+%define	line4	_ESI+64
+%define	line5	_ESI+80
+%define	line6	_ESI+96
+%define	line7	_ESI+112
+%define	line8	_EDI
+%define	line9	_EDI+16
+%define	line10	_EDI+32
+%define	line11	_EDI+48
+%define	line12	_EDI+64
+%define	line13	_EDI+80
+%define	line14	_EDI+96
+%define	line15	_EDI+112
 
 ; keep from losing track which reg holds which line - these never overlap
 %define	m00		mm0
@@ -99,112 +87,115 @@
 
 ; gets diff between three lines low(%2),mid(%3),hi(%4): frame = mid-low, field = hi-low
 %macro ABS8 4
-	movq	%4, [%1]		; m02 = hi
-	movq	mm3, %2			; mm3 = low copy
+  movq %4, [%1]         ; m02 = hi
+  movq mm3, %2          ; mm3 = low copy
 
-	pxor	mm4, mm4		; mm4 = 0
-	pxor	mm5, mm5		; mm5 = 0
+  pxor mm4, mm4         ; mm4 = 0
+  pxor mm5, mm5         ; mm5 = 0
 
-	psubw	%2,  %3			; diff(med,low) for frame
-	psubw	mm3, %4			; diff(hi,low) for field
+  psubw %2,  %3         ; diff(med,low) for frame
+  psubw mm3, %4         ; diff(hi,low) for field
 
-	pcmpgtw	mm4, %2			; if (diff<0), mm4 will be all 1's, else all 0's
-	pcmpgtw	mm5, mm3
-	pxor	%2,  mm4		; this will get abs(), but off by 1 if (diff<0)
-	pxor	mm3, mm5
-	psubw	%2,  mm4		; correct abs being off by 1 when (diff<0)
-	psubw	mm3, mm5
+  pcmpgtw mm4, %2       ; if (diff<0), mm4 will be all 1's, else all 0's
+  pcmpgtw mm5, mm3
+  pxor %2,  mm4         ; this will get abs(), but off by 1 if (diff<0)
+  pxor mm3, mm5
+  psubw %2,  mm4        ; correct abs being off by 1 when (diff<0)
+  psubw mm3, mm5
 
-	paddw	mm6, %2			; add to totals
-	paddw	mm7, mm3
+  paddw mm6, %2         ; add to totals
+  paddw mm7, mm3
 %endmacro
 
-section .text
-
-;===========================================================================
+;-----------------------------------------------------------------------------
 ;
 ; uint32_t MBFieldTest_mmx(int16_t * const data);
 ;
-;===========================================================================
+;-----------------------------------------------------------------------------
 
-align 16
+ALIGN SECTION_ALIGN
 MBFieldTest_mmx:
 
-	push	esi
-	push	edi
+  mov  _EAX, prm1
+
+  push _ESI
+  push _EDI
+
+  mov _ESI, _EAX                ; _ESI = top left block
+  mov _EDI, _ESI
+  add _EDI, 256                 ; _EDI = bottom left block
+
+  pxor mm6, mm6                 ; frame total
+  pxor mm7, mm7                 ; field total
+
+  mov _EAX, 4                   ; we do left 8 bytes of data[0*64], then right 8 bytes
+                                ; then left 8 bytes of data[1*64], then last 8 bytes
+.loop:
+  movq m00, [line0]             ; line0
+  movq m01, [line1]             ; line1
+
+  ABS8 line2, m00, m01, m02     ; frame += (line2-line1), field += (line2-line0)
+  ABS8 line3, m01, m02, m03
+  ABS8 line4, m02, m03, m04
+  ABS8 line5, m03, m04, m05
+  ABS8 line6, m04, m05, m06
+  ABS8 line7, m05, m06, m07
+  ABS8 line8, m06, m07, m08
+
+  movq m09, [line9]             ; line9-line7, no frame comp for line9-line8!
+  pxor mm4, mm4
+  psubw m07, m09
+  pcmpgtw mm4, mm1
+  pxor m07, mm4
+  psubw m07, mm4
+  paddw mm7, m07                ; add to field total
+
+  ABS8 line10, m08, m09, m10    ; frame += (line10-line9), field += (line10-line8)
+  ABS8 line11, m09, m10, m11
+  ABS8 line12, m10, m11, m12
+  ABS8 line13, m11, m12, m13
+  ABS8 line14, m12, m13, m14
+  ABS8 line15, m13, m14, m15
+
+  pxor mm4, mm4                 ; line15-line14, we're done with field comps!
+  psubw m14, m15
+  pcmpgtw mm4, m14
+  pxor m14, mm4
+  psubw m14, mm4
+  paddw mm6, m14                ; add to frame total
+
+  lea TMP0, [nexts]
+  mov TMP0d, dword [TMP0+_EAX*4] ; move _ESI/_EDI 8 pixels to the right
+  add _ESI, TMP0
+  add _EDI, TMP0
+
+  dec _EAX
+  jnz near .loop
+
+.decide:
+  movq mm0, [ones]              ; add packed words into single dwords
+  pmaddwd mm6, mm0
+  pmaddwd mm7, mm0
+
+  movq mm0, mm6                 ; TMP0 will be frame total, TMP1 field
+  movq mm1, mm7
+  psrlq mm0, 32
+  psrlq mm1, 32
+  paddd mm0, mm6
+  paddd mm1, mm7
+  movd TMP0d, mm0
+  movd TMP1d, mm1
+
+  add TMP1, 350                  ; add bias against field decision
+  cmp TMP0, TMP1
+  jb .end                       ; if frame<field, don't use field dct
+  inc _EAX                       ; if frame>=field, use field dct (return 1)
+
+.end:
+  pop _EDI
+  pop _ESI
 
-	mov		esi, [esp+8+4]			; esi = top left block
-	mov		edi, esi
-	add		edi, 256				; edi = bottom left block
-
-	pxor	mm6, mm6				; frame total
-	pxor	mm7, mm7				; field total
-
-	mov		eax, 4					; we do left 8 bytes of data[0*64], then right 8 bytes
-									; then left 8 bytes of data[1*64], then last 8 bytes
-
-_loop:
-	movq	m00, [line0]			; line0
-	movq	m01, [line1]			; line1
-
-	ABS8	line2, m00, m01, m02	; frame += (line2-line1), field += (line2-line0)
-	ABS8	line3, m01, m02, m03
-	ABS8	line4, m02, m03, m04
-	ABS8	line5, m03, m04, m05
-	ABS8	line6, m04, m05, m06
-	ABS8	line7, m05, m06, m07
-	ABS8	line8, m06, m07, m08
-
-	movq	m09, [line9]			; line9-line7, no frame comp for line9-line8!
-	pxor	mm4, mm4
-	psubw	m07, m09
-	pcmpgtw	mm4, mm1
-	pxor	m07, mm4
-	psubw	m07, mm4
-	paddw	mm7, m07				; add to field total
-
-	ABS8	line10, m08, m09, m10	; frame += (line10-line9), field += (line10-line8)
-	ABS8	line11, m09, m10, m11
-	ABS8	line12, m10, m11, m12
-	ABS8	line13, m11, m12, m13
-	ABS8	line14, m12, m13, m14
-	ABS8	line15, m13, m14, m15
-
-	pxor	mm4, mm4				; line15-line14, we're done with field comps!
-	psubw	m14, m15
-	pcmpgtw	mm4, m14
-	pxor	m14, mm4
-	psubw	m14, mm4
-	paddw	mm6, m14				; add to frame total
-
-	mov		ecx, [nexts+eax*4]		; move esi/edi 8 pixels to the right
-	add		esi, ecx
-	add		edi, ecx
-
-	dec		eax
-	jnz		near _loop
-
-_decide:
-	movq	mm0, [ones]				; add packed words into single dwords
-	pmaddwd	mm6, mm0
-	pmaddwd mm7, mm0
-
-	movq	mm0, mm6				; ecx will be frame total, edx field
-	movq	mm1, mm7
-	psrlq	mm0, 32
-	psrlq	mm1, 32
-	paddd	mm0, mm6
-	paddd	mm1, mm7
-	movd	ecx, mm0
-	movd	edx, mm1
-
-	add		edx, 350				; add bias against field decision
-	cmp		ecx, edx
-	jb		_end					; if frame<field, don't use field dct
-	inc		eax						; if frame>=field, use field dct (return 1)
-
-_end:
-	pop		edi
-	pop		esi
+  ret
+ENDFUNC
 
-	ret
+NON_EXEC_STACK