1 |
|
;/************************************************************************** |
2 |
|
; * |
3 |
|
; * XVID MPEG-4 VIDEO CODEC |
4 |
|
; * mmx interlacing decision |
5 |
|
; * |
6 |
|
; * This program is an implementation of a part of one or more MPEG-4 |
7 |
|
; * Video tools as specified in ISO/IEC 14496-2 standard. Those intending |
8 |
|
; * to use this software module in hardware or software products are |
9 |
|
; * advised that its use may infringe existing patents or copyrights, and |
10 |
|
; * any such use would be at such party's own risk. The original |
11 |
|
; * developer of this software module and his/her company, and subsequent |
12 |
|
; * editors and their companies, will have no liability for use of this |
13 |
|
; * software or modifications or derivatives thereof. |
14 |
|
; * |
15 |
|
; * This program is free software; you can redistribute it and/or modify |
16 |
|
; * it under the terms of the GNU General Public License as published by |
17 |
|
; * the Free Software Foundation; either version 2 of the License, or |
18 |
|
; * (at your option) any later version. |
19 |
|
; * |
20 |
|
; * This program is distributed in the hope that it will be useful, |
21 |
|
; * but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 |
|
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 |
|
; * GNU General Public License for more details. |
24 |
|
; * |
25 |
|
; * You should have received a copy of the GNU General Public License |
26 |
|
; * along with this program; if not, write to the Free Software |
27 |
|
; * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
28 |
|
; * |
29 |
|
; *************************************************************************/ |
30 |
|
|
31 |
|
;/************************************************************************** |
32 |
|
; * |
33 |
|
; * History: |
34 |
|
; * |
35 |
|
; * 04.09.2002 initial version; (c)2002 daniel smith |
36 |
|
; * |
37 |
|
; *************************************************************************/ |
38 |
|
|
39 |
|
|
40 |
|
bits 32 |
41 |
|
|
42 |
|
%macro cglobal 1 |
43 |
|
%ifdef PREFIX |
44 |
|
global _%1 |
45 |
|
%define %1 _%1 |
46 |
|
%else |
47 |
|
global %1 |
48 |
|
%endif |
49 |
|
%endmacro |
50 |
|
|
51 |
|
|
52 |
|
section .text |
53 |
|
|
54 |
|
cglobal MBFieldTest_mmx |
55 |
|
|
56 |
|
; advances to next block on right |
57 |
|
align 16 |
58 |
|
nexts dd 0, 0, 8, 120, 8 |
59 |
|
|
60 |
|
; multiply word sums into dwords |
61 |
|
align 16 |
62 |
|
ones times 4 dw 1 |
63 |
|
|
64 |
|
; neater |
65 |
|
%define line0 esi |
66 |
|
%define line1 esi+16 |
67 |
|
%define line2 esi+32 |
68 |
|
%define line3 esi+48 |
69 |
|
%define line4 esi+64 |
70 |
|
%define line5 esi+80 |
71 |
|
%define line6 esi+96 |
72 |
|
%define line7 esi+112 |
73 |
|
%define line8 edi |
74 |
|
%define line9 edi+16 |
75 |
|
%define line10 edi+32 |
76 |
|
%define line11 edi+48 |
77 |
|
%define line12 edi+64 |
78 |
|
%define line13 edi+80 |
79 |
|
%define line14 edi+96 |
80 |
|
%define line15 edi+112 |
81 |
|
|
82 |
|
; keep from losing track which reg holds which line - these never overlap |
83 |
|
%define m00 mm0 |
84 |
|
%define m01 mm1 |
85 |
|
%define m02 mm2 |
86 |
|
%define m03 mm0 |
87 |
|
%define m04 mm1 |
88 |
|
%define m05 mm2 |
89 |
|
%define m06 mm0 |
90 |
|
%define m07 mm1 |
91 |
|
%define m08 mm2 |
92 |
|
%define m09 mm0 |
93 |
|
%define m10 mm1 |
94 |
|
%define m11 mm2 |
95 |
|
%define m12 mm0 |
96 |
|
%define m13 mm1 |
97 |
|
%define m14 mm2 |
98 |
|
%define m15 mm0 |
99 |
|
|
100 |
|
; gets diff between three lines low(%2),mid(%3),hi(%4): frame = mid-low, field = hi-low |
101 |
|
%macro ABS8 4 |
102 |
|
movq %4, [%1] ; m02 = hi |
103 |
|
movq mm3, %2 ; mm3 = low copy |
104 |
|
|
105 |
|
pxor mm4, mm4 ; mm4 = 0 |
106 |
|
pxor mm5, mm5 ; mm5 = 0 |
107 |
|
|
108 |
|
psubw %2, %3 ; diff(med,low) for frame |
109 |
|
psubw mm3, %4 ; diff(hi,low) for field |
110 |
|
|
111 |
|
pcmpgtw mm4, %2 ; if (diff<0), mm4 will be all 1's, else all 0's |
112 |
|
pcmpgtw mm5, mm3 |
113 |
|
pxor %2, mm4 ; this will get abs(), but off by 1 if (diff<0) |
114 |
|
pxor mm3, mm5 |
115 |
|
psubw %2, mm4 ; correct abs being off by 1 when (diff<0) |
116 |
|
psubw mm3, mm5 |
117 |
|
|
118 |
|
paddw mm6, %2 ; add to totals |
119 |
|
paddw mm7, mm3 |
120 |
|
%endmacro |
121 |
|
|
122 |
|
section .text |
123 |
|
|
124 |
|
;=========================================================================== |
125 |
|
; |
126 |
|
; uint32_t MBFieldTest_mmx(int16_t * const data); |
127 |
|
; |
128 |
|
;=========================================================================== |
129 |
|
|
130 |
|
align 16 |
131 |
|
MBFieldTest_mmx: |
132 |
|
|
133 |
|
push esi |
134 |
|
push edi |
135 |
|
|
136 |
|
mov esi, [esp+8+4] ; esi = top left block |
137 |
|
mov edi, esi |
138 |
|
add edi, 256 ; edi = bottom left block |
139 |
|
|
140 |
|
pxor mm6, mm6 ; frame total |
141 |
|
pxor mm7, mm7 ; field total |
142 |
|
|
143 |
|
mov eax, 4 ; we do left 8 bytes of data[0*64], then right 8 bytes |
144 |
|
; then left 8 bytes of data[1*64], then last 8 bytes |
145 |
|
|
146 |
|
_loop: |
147 |
|
movq m00, [line0] ; line0 |
148 |
|
movq m01, [line1] ; line1 |
149 |
|
|
150 |
|
ABS8 line2, m00, m01, m02 ; frame += (line2-line1), field += (line2-line0) |
151 |
|
ABS8 line3, m01, m02, m03 |
152 |
|
ABS8 line4, m02, m03, m04 |
153 |
|
ABS8 line5, m03, m04, m05 |
154 |
|
ABS8 line6, m04, m05, m06 |
155 |
|
ABS8 line7, m05, m06, m07 |
156 |
|
ABS8 line8, m06, m07, m08 |
157 |
|
|
158 |
|
movq m09, [line9] ; line9-line7, no frame comp for line9-line8! |
159 |
|
pxor mm4, mm4 |
160 |
|
psubw m07, m09 |
161 |
|
pcmpgtw mm4, mm1 |
162 |
|
pxor m07, mm4 |
163 |
|
psubw m07, mm4 |
164 |
|
paddw mm7, m07 ; add to field total |
165 |
|
|
166 |
|
ABS8 line10, m08, m09, m10 ; frame += (line10-line9), field += (line10-line8) |
167 |
|
ABS8 line11, m09, m10, m11 |
168 |
|
ABS8 line12, m10, m11, m12 |
169 |
|
ABS8 line13, m11, m12, m13 |
170 |
|
ABS8 line14, m12, m13, m14 |
171 |
|
ABS8 line15, m13, m14, m15 |
172 |
|
|
173 |
|
pxor mm4, mm4 ; line15-line14, we're done with field comps! |
174 |
|
psubw m14, m15 |
175 |
|
pcmpgtw mm4, m14 |
176 |
|
pxor m14, mm4 |
177 |
|
psubw m14, mm4 |
178 |
|
paddw mm6, m14 ; add to frame total |
179 |
|
|
180 |
|
mov ecx, [nexts+eax*4] ; move esi/edi 8 pixels to the right |
181 |
|
add esi, ecx |
182 |
|
add edi, ecx |
183 |
|
|
184 |
|
dec eax |
185 |
|
jnz near _loop |
186 |
|
|
187 |
|
_decide: |
188 |
|
movq mm0, [ones] ; add packed words into single dwords |
189 |
|
pmaddwd mm6, mm0 |
190 |
|
pmaddwd mm7, mm0 |
191 |
|
|
192 |
|
movq mm0, mm6 ; ecx will be frame total, edx field |
193 |
|
movq mm1, mm7 |
194 |
|
psrlq mm0, 32 |
195 |
|
psrlq mm1, 32 |
196 |
|
paddd mm0, mm6 |
197 |
|
paddd mm1, mm7 |
198 |
|
movd ecx, mm0 |
199 |
|
movd edx, mm1 |
200 |
|
|
201 |
|
cmp ecx, edx |
202 |
|
jb _end ; if frame<field, don't use field dct |
203 |
|
inc eax ; if frame>=field, use field dct (return 1) |
204 |
|
|
205 |
|
_end: |
206 |
|
pop edi |
207 |
|
pop esi |
208 |
|
|
209 |
|
ret |