5 |
; * |
; * |
6 |
; * Copyright(C) 2002 Daniel Smith <danielsmith@astroboymail.com> |
; * Copyright(C) 2002 Daniel Smith <danielsmith@astroboymail.com> |
7 |
; * |
; * |
8 |
; * This program is free software ; you can redistribute it and/or modify |
; * This program is free software ; you can r_EDIstribute it and/or modify |
9 |
; * it under the terms of the GNU General Public License as published by |
; * it under the terms of the GNU General Public License as published by |
10 |
; * the Free Software Foundation ; either version 2 of the License, or |
; * the Free Software Foundation ; either version 2 of the License, or |
11 |
; * (at your option) any later version. |
; * (at your option) any later version. |
23 |
; * |
; * |
24 |
; ***************************************************************************/ |
; ***************************************************************************/ |
25 |
|
|
26 |
BITS 32 |
%include "nasm.inc" |
|
|
|
|
%macro cglobal 1 |
|
|
%ifdef PREFIX |
|
|
global _%1 |
|
|
%define %1 _%1 |
|
|
%else |
|
|
global %1 |
|
|
%endif |
|
|
%endmacro |
|
27 |
|
|
28 |
;============================================================================= |
;============================================================================= |
29 |
; Read only data |
; Read only data |
30 |
;============================================================================= |
;============================================================================= |
31 |
|
|
32 |
%ifdef FORMAT_COFF |
DATA |
|
SECTION .rodata data |
|
|
%else |
|
|
SECTION .rodata data align=16 |
|
|
%endif |
|
33 |
|
|
34 |
; advances to next block on right |
; advances to next block on right |
35 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
36 |
nexts: |
nexts: |
37 |
dd 0, 0, 8, 120, 8 |
dd 0, 0, 8, 120, 8 |
38 |
|
|
39 |
; multiply word sums into dwords |
; multiply word sums into dwords |
40 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
41 |
ones: |
ones: |
42 |
times 4 dw 1 |
times 4 dw 1 |
43 |
|
|
45 |
; Code |
; Code |
46 |
;============================================================================= |
;============================================================================= |
47 |
|
|
48 |
SECTION .text |
TEXT |
49 |
|
|
50 |
cglobal MBFieldTest_mmx |
cglobal MBFieldTest_mmx |
51 |
|
|
52 |
; neater |
; neater |
53 |
%define line0 esi |
%define line0 _ESI |
54 |
%define line1 esi+16 |
%define line1 _ESI+16 |
55 |
%define line2 esi+32 |
%define line2 _ESI+32 |
56 |
%define line3 esi+48 |
%define line3 _ESI+48 |
57 |
%define line4 esi+64 |
%define line4 _ESI+64 |
58 |
%define line5 esi+80 |
%define line5 _ESI+80 |
59 |
%define line6 esi+96 |
%define line6 _ESI+96 |
60 |
%define line7 esi+112 |
%define line7 _ESI+112 |
61 |
%define line8 edi |
%define line8 _EDI |
62 |
%define line9 edi+16 |
%define line9 _EDI+16 |
63 |
%define line10 edi+32 |
%define line10 _EDI+32 |
64 |
%define line11 edi+48 |
%define line11 _EDI+48 |
65 |
%define line12 edi+64 |
%define line12 _EDI+64 |
66 |
%define line13 edi+80 |
%define line13 _EDI+80 |
67 |
%define line14 edi+96 |
%define line14 _EDI+96 |
68 |
%define line15 edi+112 |
%define line15 _EDI+112 |
69 |
|
|
70 |
; keep from losing track which reg holds which line - these never overlap |
; keep from losing track which reg holds which line - these never overlap |
71 |
%define m00 mm0 |
%define m00 mm0 |
113 |
; |
; |
114 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
115 |
|
|
116 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
117 |
MBFieldTest_mmx: |
MBFieldTest_mmx: |
118 |
|
|
119 |
push esi |
mov _EAX, prm1 |
|
push edi |
|
120 |
|
|
121 |
mov esi, [esp+8+4] ; esi = top left block |
push _ESI |
122 |
mov edi, esi |
push _EDI |
123 |
add edi, 256 ; edi = bottom left block |
|
124 |
|
mov _ESI, _EAX ; _ESI = top left block |
125 |
|
mov _EDI, _ESI |
126 |
|
add _EDI, 256 ; _EDI = bottom left block |
127 |
|
|
128 |
pxor mm6, mm6 ; frame total |
pxor mm6, mm6 ; frame total |
129 |
pxor mm7, mm7 ; field total |
pxor mm7, mm7 ; field total |
130 |
|
|
131 |
mov eax, 4 ; we do left 8 bytes of data[0*64], then right 8 bytes |
mov _EAX, 4 ; we do left 8 bytes of data[0*64], then right 8 bytes |
132 |
; then left 8 bytes of data[1*64], then last 8 bytes |
; then left 8 bytes of data[1*64], then last 8 bytes |
133 |
.loop: |
.loop: |
134 |
movq m00, [line0] ; line0 |
movq m00, [line0] ; line0 |
164 |
psubw m14, mm4 |
psubw m14, mm4 |
165 |
paddw mm6, m14 ; add to frame total |
paddw mm6, m14 ; add to frame total |
166 |
|
|
167 |
mov ecx, [nexts+eax*4] ; move esi/edi 8 pixels to the right |
lea TMP0, [nexts] |
168 |
add esi, ecx |
mov TMP0d, dword [TMP0+_EAX*4] ; move _ESI/_EDI 8 pixels to the right |
169 |
add edi, ecx |
add _ESI, TMP0 |
170 |
|
add _EDI, TMP0 |
171 |
|
|
172 |
dec eax |
dec _EAX |
173 |
jnz near .loop |
jnz near .loop |
174 |
|
|
175 |
.decide: |
.decide: |
177 |
pmaddwd mm6, mm0 |
pmaddwd mm6, mm0 |
178 |
pmaddwd mm7, mm0 |
pmaddwd mm7, mm0 |
179 |
|
|
180 |
movq mm0, mm6 ; ecx will be frame total, edx field |
movq mm0, mm6 ; TMP0 will be frame total, TMP1 field |
181 |
movq mm1, mm7 |
movq mm1, mm7 |
182 |
psrlq mm0, 32 |
psrlq mm0, 32 |
183 |
psrlq mm1, 32 |
psrlq mm1, 32 |
184 |
paddd mm0, mm6 |
paddd mm0, mm6 |
185 |
paddd mm1, mm7 |
paddd mm1, mm7 |
186 |
movd ecx, mm0 |
movd TMP0d, mm0 |
187 |
movd edx, mm1 |
movd TMP1d, mm1 |
188 |
|
|
189 |
add edx, 350 ; add bias against field decision |
add TMP1, 350 ; add bias against field decision |
190 |
cmp ecx, edx |
cmp TMP0, TMP1 |
191 |
jb .end ; if frame<field, don't use field dct |
jb .end ; if frame<field, don't use field dct |
192 |
inc eax ; if frame>=field, use field dct (return 1) |
inc _EAX ; if frame>=field, use field dct (return 1) |
193 |
|
|
194 |
.end: |
.end: |
195 |
pop edi |
pop _EDI |
196 |
pop esi |
pop _ESI |
197 |
|
|
198 |
ret |
ret |
199 |
|
ENDFUNC |
200 |
|
|
201 |
|
NON_EXEC_STACK |