1 |
;/************************************************************************** |
;/**************************************************************************** |
2 |
; * |
; * |
3 |
; * XVID MPEG-4 VIDEO CODEC |
; * XVID MPEG-4 VIDEO CODEC |
4 |
; * mmx interlacing decision |
; * - Interlacing Field test - |
5 |
; * |
; * |
6 |
; * This program is an implementation of a part of one or more MPEG-4 |
; * Copyright(C) 2002 Daniel Smith <danielsmith@astroboymail.com> |
|
; * Video tools as specified in ISO/IEC 14496-2 standard. Those intending |
|
|
; * to use this software module in hardware or software products are |
|
|
; * advised that its use may infringe existing patents or copyrights, and |
|
|
; * any such use would be at such party's own risk. The original |
|
|
; * developer of this software module and his/her company, and subsequent |
|
|
; * editors and their companies, will have no liability for use of this |
|
|
; * software or modifications or derivatives thereof. |
|
7 |
; * |
; * |
8 |
; * This program is free software; you can redistribute it and/or modify |
; * This program is free software ; you can r_EDIstribute it and/or modify |
9 |
; * it under the terms of the GNU General Public License as published by |
; * it under the terms of the GNU General Public License as published by |
10 |
; * the Free Software Foundation; either version 2 of the License, or |
; * the Free Software Foundation; either version 2 of the License, or |
11 |
; * (at your option) any later version. |
; * (at your option) any later version. |
17 |
; * |
; * |
18 |
; * You should have received a copy of the GNU General Public License |
; * You should have received a copy of the GNU General Public License |
19 |
; * along with this program; if not, write to the Free Software |
; * along with this program; if not, write to the Free Software |
20 |
; * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
; * |
; * |
22 |
; *************************************************************************/ |
; * $Id$ |
|
|
|
|
;/************************************************************************** |
|
|
; * |
|
|
; * History: |
|
|
; * |
|
|
; * 04.09.2002 initial version; (c)2002 daniel smith |
|
23 |
; * |
; * |
24 |
; *************************************************************************/ |
; ***************************************************************************/ |
|
|
|
|
|
|
|
bits 32 |
|
25 |
|
|
26 |
%macro cglobal 1 |
%include "nasm.inc" |
|
%ifdef PREFIX |
|
|
global _%1 |
|
|
%define %1 _%1 |
|
|
%else |
|
|
global %1 |
|
|
%endif |
|
|
%endmacro |
|
27 |
|
|
28 |
|
;============================================================================= |
29 |
|
; Read only data |
30 |
|
;============================================================================= |
31 |
|
|
32 |
section .text |
DATA |
|
|
|
|
cglobal MBFieldTest_mmx |
|
33 |
|
|
34 |
; advances to next block on right |
; advances to next block on right |
35 |
align 16 |
ALIGN SECTION_ALIGN |
36 |
nexts dd 0, 0, 8, 120, 8 |
nexts: |
37 |
|
dd 0, 0, 8, 120, 8 |
38 |
|
|
39 |
; multiply word sums into dwords |
; multiply word sums into dwords |
40 |
align 16 |
ALIGN SECTION_ALIGN |
41 |
ones times 4 dw 1 |
ones: |
42 |
|
times 4 dw 1 |
43 |
|
|
44 |
|
;============================================================================= |
45 |
|
; Code |
46 |
|
;============================================================================= |
47 |
|
|
48 |
|
TEXT |
49 |
|
|
50 |
|
cglobal MBFieldTest_mmx |
51 |
|
|
52 |
; neater |
; neater |
53 |
%define line0 esi |
%define line0 _ESI |
54 |
%define line1 esi+16 |
%define line1 _ESI+16 |
55 |
%define line2 esi+32 |
%define line2 _ESI+32 |
56 |
%define line3 esi+48 |
%define line3 _ESI+48 |
57 |
%define line4 esi+64 |
%define line4 _ESI+64 |
58 |
%define line5 esi+80 |
%define line5 _ESI+80 |
59 |
%define line6 esi+96 |
%define line6 _ESI+96 |
60 |
%define line7 esi+112 |
%define line7 _ESI+112 |
61 |
%define line8 edi |
%define line8 _EDI |
62 |
%define line9 edi+16 |
%define line9 _EDI+16 |
63 |
%define line10 edi+32 |
%define line10 _EDI+32 |
64 |
%define line11 edi+48 |
%define line11 _EDI+48 |
65 |
%define line12 edi+64 |
%define line12 _EDI+64 |
66 |
%define line13 edi+80 |
%define line13 _EDI+80 |
67 |
%define line14 edi+96 |
%define line14 _EDI+96 |
68 |
%define line15 edi+112 |
%define line15 _EDI+112 |
69 |
|
|
70 |
; keep from losing track which reg holds which line - these never overlap |
; keep from losing track which reg holds which line - these never overlap |
71 |
%define m00 mm0 |
%define m00 mm0 |
107 |
paddw mm7, mm3 |
paddw mm7, mm3 |
108 |
%endmacro |
%endmacro |
109 |
|
|
110 |
section .text |
;----------------------------------------------------------------------------- |
|
|
|
|
;=========================================================================== |
|
111 |
; |
; |
112 |
; uint32_t MBFieldTest_mmx(int16_t * const data); |
; uint32_t MBFieldTest_mmx(int16_t * const data); |
113 |
; |
; |
114 |
;=========================================================================== |
;----------------------------------------------------------------------------- |
115 |
|
|
116 |
align 16 |
ALIGN SECTION_ALIGN |
117 |
MBFieldTest_mmx: |
MBFieldTest_mmx: |
118 |
|
|
119 |
push esi |
mov _EAX, prm1 |
|
push edi |
|
120 |
|
|
121 |
mov esi, [esp+8+4] ; esi = top left block |
push _ESI |
122 |
mov edi, esi |
push _EDI |
123 |
add edi, 256 ; edi = bottom left block |
|
124 |
|
mov _ESI, _EAX ; _ESI = top left block |
125 |
|
mov _EDI, _ESI |
126 |
|
add _EDI, 256 ; _EDI = bottom left block |
127 |
|
|
128 |
pxor mm6, mm6 ; frame total |
pxor mm6, mm6 ; frame total |
129 |
pxor mm7, mm7 ; field total |
pxor mm7, mm7 ; field total |
130 |
|
|
131 |
mov eax, 4 ; we do left 8 bytes of data[0*64], then right 8 bytes |
mov _EAX, 4 ; we do left 8 bytes of data[0*64], then right 8 bytes |
132 |
; then left 8 bytes of data[1*64], then last 8 bytes |
; then left 8 bytes of data[1*64], then last 8 bytes |
133 |
|
.loop: |
|
_loop: |
|
134 |
movq m00, [line0] ; line0 |
movq m00, [line0] ; line0 |
135 |
movq m01, [line1] ; line1 |
movq m01, [line1] ; line1 |
136 |
|
|
164 |
psubw m14, mm4 |
psubw m14, mm4 |
165 |
paddw mm6, m14 ; add to frame total |
paddw mm6, m14 ; add to frame total |
166 |
|
|
167 |
mov ecx, [nexts+eax*4] ; move esi/edi 8 pixels to the right |
lea TMP0, [nexts] |
168 |
add esi, ecx |
mov TMP0d, dword [TMP0+_EAX*4] ; move _ESI/_EDI 8 pixels to the right |
169 |
add edi, ecx |
add _ESI, TMP0 |
170 |
|
add _EDI, TMP0 |
171 |
|
|
172 |
dec eax |
dec _EAX |
173 |
jnz near _loop |
jnz near .loop |
174 |
|
|
175 |
_decide: |
.decide: |
176 |
movq mm0, [ones] ; add packed words into single dwords |
movq mm0, [ones] ; add packed words into single dwords |
177 |
pmaddwd mm6, mm0 |
pmaddwd mm6, mm0 |
178 |
pmaddwd mm7, mm0 |
pmaddwd mm7, mm0 |
179 |
|
|
180 |
movq mm0, mm6 ; ecx will be frame total, edx field |
movq mm0, mm6 ; TMP0 will be frame total, TMP1 field |
181 |
movq mm1, mm7 |
movq mm1, mm7 |
182 |
psrlq mm0, 32 |
psrlq mm0, 32 |
183 |
psrlq mm1, 32 |
psrlq mm1, 32 |
184 |
paddd mm0, mm6 |
paddd mm0, mm6 |
185 |
paddd mm1, mm7 |
paddd mm1, mm7 |
186 |
movd ecx, mm0 |
movd TMP0d, mm0 |
187 |
movd edx, mm1 |
movd TMP1d, mm1 |
188 |
|
|
189 |
add edx, 350 ; add bias against field decision |
add TMP1, 350 ; add bias against field decision |
190 |
cmp ecx, edx |
cmp TMP0, TMP1 |
191 |
jb _end ; if frame<field, don't use field dct |
jb .end ; if frame<field, don't use field dct |
192 |
inc eax ; if frame>=field, use field dct (return 1) |
inc _EAX ; if frame>=field, use field dct (return 1) |
193 |
|
|
194 |
_end: |
.end: |
195 |
pop edi |
pop _EDI |
196 |
pop esi |
pop _ESI |
197 |
|
|
198 |
ret |
ret |
199 |
|
ENDFUNC |
200 |
|
|
201 |
|
|
202 |
|
%ifidn __OUTPUT_FORMAT__,elf |
203 |
|
section ".note.GNU-stack" noalloc noexec nowrite progbits |
204 |
|
%endif |
205 |
|
|