1 |
|
;/************************************************************************** |
2 |
|
; * |
3 |
|
; * XVID MPEG-4 VIDEO CODEC |
4 |
|
; * colorspace |
5 |
|
; * |
6 |
|
; * This program is free software; you can redistribute it and/or modify |
7 |
|
; * it under the terms of the GNU General Public License as published by |
8 |
|
; * the Free Software Foundation; either version 2 of the License, or |
9 |
|
; * (at your option) any later version. |
10 |
|
; * |
11 |
|
; * This program is distributed in the hope that it will be useful, |
12 |
|
; * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
|
; * GNU General Public License for more details. |
15 |
|
; * |
16 |
|
; * You should have received a copy of the GNU General Public License |
17 |
|
; * along with this program; if not, write to the Free Software |
18 |
|
; * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 |
|
; * |
20 |
|
; *************************************************************************/ |
21 |
|
|
22 |
|
;/************************************************************************** |
23 |
|
; * |
24 |
|
; * History: |
25 |
|
; * |
26 |
|
; * 10.10.2001 initial version; (c)2002 peter ross <pross@xvid.org> |
27 |
|
; * |
28 |
|
; *************************************************************************/ |
29 |
|
|
30 |
|
|
31 |
|
bits 32 |
32 |
|
|
33 |
|
%macro cglobal 1 |
34 |
|
%ifdef PREFIX |
35 |
|
global _%1 |
36 |
|
%define %1 _%1 |
37 |
|
%else |
38 |
|
global %1 |
39 |
|
%endif |
40 |
|
%endmacro |
41 |
|
|
42 |
|
|
43 |
|
section .data |
44 |
|
align 16 |
45 |
|
|
46 |
|
|
47 |
|
|
48 |
|
;=========================================================================== |
49 |
|
; yuyv/uyvy mask for extracting yuv components |
50 |
|
;=========================================================================== |
51 |
|
; y u y v y u y v |
52 |
|
yuyv_mask db 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0 |
53 |
|
mmx_one dw 1, 1, 1, 1 |
54 |
|
|
55 |
|
|
56 |
|
|
57 |
|
section .text |
58 |
|
|
59 |
|
%include "colorspace_mmx.inc" |
60 |
|
|
61 |
|
|
62 |
|
;==================================================================== |
63 |
|
; YUYV_TO_YV12( TYPE, PAVG ) |
64 |
|
; |
65 |
|
; TYPE 0=yuyv, 1=uyvy |
66 |
|
; PAVG 0=mmx, pavgusb=3dnow, pavgb=xmm |
67 |
|
; |
68 |
|
; bytes=2, pixels = 8, vpixels=2 |
69 |
|
;==================================================================== |
70 |
|
%macro YUYV_TO_YV12_INIT 2 |
71 |
|
movq mm7, [yuyv_mask] |
72 |
|
%endmacro |
73 |
|
|
74 |
|
|
75 |
|
%macro YUYV_TO_YV12 2 |
76 |
|
movq mm0, [edi] ; x_ptr[0] |
77 |
|
movq mm1, [edi + 8] ; x_ptr[8] |
78 |
|
movq mm2, [edi + edx] ; x_ptr[x_stride + 0] |
79 |
|
movq mm3, [edi + edx + 8] ; x_ptr[x_stride + 8] |
80 |
|
|
81 |
|
; average uv-components |
82 |
|
;---[ plain mmx ]---------------------------------------------------- |
83 |
|
%ifidn %2,0 ; if (%2 eq "0") |
84 |
|
movq mm4, mm0 |
85 |
|
movq mm5, mm2 |
86 |
|
%if %1 == 0 ; yuyv |
87 |
|
psrlw mm4, 8 |
88 |
|
psrlw mm5, 8 |
89 |
|
%endif |
90 |
|
pand mm4, mm7 |
91 |
|
pand mm5, mm7 |
92 |
|
paddw mm4, mm5 |
93 |
|
|
94 |
|
movq mm5, mm1 |
95 |
|
movq mm6, mm3 |
96 |
|
%if %1 == 0 ; yuyv |
97 |
|
psrlw mm5, 8 |
98 |
|
psrlw mm6, 8 |
99 |
|
%endif |
100 |
|
pand mm5, mm7 |
101 |
|
pand mm6, mm7 |
102 |
|
paddw mm5, mm6 |
103 |
|
paddw mm4, [mmx_one] ; +1 rounding |
104 |
|
paddw mm5, [mmx_one] ; |
105 |
|
psrlw mm4, 1 |
106 |
|
psrlw mm5, 1 |
107 |
|
;---[ 3dnow/xmm ]---------------------------------------------------- |
108 |
|
%else |
109 |
|
movq mm4, mm0 |
110 |
|
movq mm5, mm1 |
111 |
|
%2 mm4, mm2 ;pavgb/pavgusb mm4, mm2 |
112 |
|
%2 mm5, mm3 ;pavgb/pavgusb mm5, mm3 |
113 |
|
|
114 |
|
;;movq mm6, mm0 ; 0 rounding |
115 |
|
;;pxor mm6, mm2 ; |
116 |
|
;;psubb mm4, mm6 ; |
117 |
|
;;movq mm6, mm1 ; |
118 |
|
;;pxor mm6, mm3 ; |
119 |
|
;;psubb mm5, mm5 ; |
120 |
|
|
121 |
|
%if %1 == 0 ; yuyv |
122 |
|
psrlw mm4, 8 |
123 |
|
psrlw mm5, 8 |
124 |
|
%endif |
125 |
|
pand mm4, mm7 |
126 |
|
pand mm5, mm7 |
127 |
|
%endif |
128 |
|
;-------------------------------------------------------------------- |
129 |
|
|
130 |
|
; write y-component |
131 |
|
%if %1 == 1 ; uyvy |
132 |
|
psrlw mm0, 8 |
133 |
|
psrlw mm1, 8 |
134 |
|
psrlw mm2, 8 |
135 |
|
psrlw mm3, 8 |
136 |
|
%endif |
137 |
|
pand mm0, mm7 |
138 |
|
pand mm1, mm7 |
139 |
|
pand mm2, mm7 |
140 |
|
pand mm3, mm7 |
141 |
|
packuswb mm0, mm1 |
142 |
|
packuswb mm2, mm3 |
143 |
|
|
144 |
|
%ifidn %2,pavgb ; xmm |
145 |
|
movntq [esi], mm0 |
146 |
|
movntq [esi+eax], mm2 |
147 |
|
%else ; plain mmx,3dnow |
148 |
|
movq [esi], mm0 |
149 |
|
movq [esi+eax], mm2 |
150 |
|
%endif |
151 |
|
|
152 |
|
; write uv-components |
153 |
|
packuswb mm4, mm5 |
154 |
|
movq mm5, mm4 |
155 |
|
psrlq mm4, 8 |
156 |
|
pand mm5, mm7 |
157 |
|
pand mm4, mm7 |
158 |
|
packuswb mm5,mm5 |
159 |
|
packuswb mm4,mm4 |
160 |
|
movd [ebx],mm5 |
161 |
|
movd [ecx],mm4 |
162 |
|
%endmacro |
163 |
|
;==================================================================== |
164 |
|
|
165 |
|
|
166 |
|
;------------------------------------------------------------------------------ |
167 |
|
; YV12_TO_YUYV( TYPE ) |
168 |
|
; |
169 |
|
; TYPE 0=yuyv, 1=uyvy |
170 |
|
; |
171 |
|
; bytes=2, pixels = 8, vpixels=2 |
172 |
|
;------------------------------------------------------------------------------ |
173 |
|
%macro YV12_TO_YUYV_INIT 2 |
174 |
|
%endmacro |
175 |
|
|
176 |
|
|
177 |
|
%macro YV12_TO_YUYV 2 |
178 |
|
movd mm4, [ebx] ; [ |uuuu] |
179 |
|
movd mm5, [ecx] ; [ |vvvv] |
180 |
|
movq mm0, [esi] ; [yyyy|yyyy] ; y row 0 |
181 |
|
movq mm1, [esi+eax] ; [yyyy|yyyy] ; y row 1 |
182 |
|
punpcklbw mm4, mm5 ; [vuvu|vuvu] ; uv row 0 |
183 |
|
|
184 |
|
%if %1 == 0 ; YUYV |
185 |
|
movq mm2, mm0 |
186 |
|
movq mm3, mm1 |
187 |
|
punpcklbw mm0, mm4 ; [vyuy|vyuy] ; y row 0 + 0 |
188 |
|
punpckhbw mm2, mm4 ; [vyuy|vyuy] ; y row 0 + 8 |
189 |
|
punpcklbw mm1, mm4 ; [vyuy|vyuy] ; y row 1 + 0 |
190 |
|
punpckhbw mm3, mm4 ; [vyuy|vyuy] ; y row 1 + 8 |
191 |
|
movq [edi], mm0 |
192 |
|
movq [edi+8], mm2 |
193 |
|
movq [edi+edx], mm1 |
194 |
|
movq [edi+edx+8], mm3 |
195 |
|
%else ; UYVY |
196 |
|
movq mm5, mm4 |
197 |
|
movq mm6, mm4 |
198 |
|
movq mm7, mm4 |
199 |
|
punpcklbw mm4, mm0 ; [yvyu|yvyu] ; y row 0 + 0 |
200 |
|
punpckhbw mm5, mm0 ; [yvyu|yvyu] ; y row 0 + 8 |
201 |
|
punpcklbw mm6, mm1 ; [yvyu|yvyu] ; y row 1 + 0 |
202 |
|
punpckhbw mm7, mm1 ; [yvyu|yvyu] ; y row 1 + 8 |
203 |
|
movq [edi], mm4 |
204 |
|
movq [edi+8], mm5 |
205 |
|
movq [edi+edx], mm6 |
206 |
|
movq [edi+edx+8], mm7 |
207 |
|
%endif |
208 |
|
%endmacro |
209 |
|
;------------------------------------------------------------------------------ |
210 |
|
|
211 |
|
|
212 |
|
;------------------------------------------------------------------------------ |
213 |
|
; YV12_TO_YUYVI( TYPE ) |
214 |
|
; |
215 |
|
; TYPE 0=yuyv, 1=uyvy |
216 |
|
; |
217 |
|
; bytes=2, pixels = 8, vpixels=4 |
218 |
|
;------------------------------------------------------------------------------ |
219 |
|
%macro YV12_TO_YUYVI_INIT 2 |
220 |
|
%endmacro |
221 |
|
|
222 |
|
%macro YV12_TO_YUYVI 2 |
223 |
|
xchg ebp, [uv_stride] |
224 |
|
movd mm0, [ebx] ; [ |uuuu] |
225 |
|
movd mm1, [ebx+ebp] ; [ |uuuu] |
226 |
|
punpcklbw mm0, [ecx] ; [vuvu|vuvu] ; uv row 0 |
227 |
|
punpcklbw mm1, [ecx+ebp] ; [vuvu|vuvu] ; uv row 1 |
228 |
|
xchg ebp, [uv_stride] |
229 |
|
|
230 |
|
%if %1 == 0 ; YUYV |
231 |
|
movq mm4, [esi] ; [yyyy|yyyy] ; y row 0 |
232 |
|
movq mm6, [esi+eax] ; [yyyy|yyyy] ; y row 1 |
233 |
|
movq mm5, mm4 |
234 |
|
movq mm7, mm6 |
235 |
|
punpcklbw mm4, mm0 ; [yuyv|yuyv] ; y row 0 + 0 |
236 |
|
punpckhbw mm5, mm0 ; [yuyv|yuyv] ; y row 0 + 8 |
237 |
|
punpcklbw mm6, mm1 ; [yuyv|yuyv] ; y row 1 + 0 |
238 |
|
punpckhbw mm7, mm1 ; [yuyv|yuyv] ; y row 1 + 8 |
239 |
|
movq [edi], mm4 |
240 |
|
movq [edi+8], mm5 |
241 |
|
movq [edi+edx], mm6 |
242 |
|
movq [edi+edx+8], mm7 |
243 |
|
|
244 |
|
push esi |
245 |
|
push edi |
246 |
|
add esi, eax |
247 |
|
add edi, edx |
248 |
|
movq mm4, [esi+eax] ; [yyyy|yyyy] ; y row 2 |
249 |
|
movq mm6, [esi+2*eax] ; [yyyy|yyyy] ; y row 3 |
250 |
|
movq mm5, mm4 |
251 |
|
movq mm7, mm6 |
252 |
|
punpcklbw mm4, mm0 ; [yuyv|yuyv] ; y row 2 + 0 |
253 |
|
punpckhbw mm5, mm0 ; [yuyv|yuyv] ; y row 2 + 8 |
254 |
|
punpcklbw mm6, mm1 ; [yuyv|yuyv] ; y row 3 + 0 |
255 |
|
punpckhbw mm7, mm1 ; [yuyv|yuyv] ; y row 3 + 8 |
256 |
|
movq [edi+edx], mm4 |
257 |
|
movq [edi+edx+8], mm5 |
258 |
|
movq [edi+2*edx], mm6 |
259 |
|
movq [edi+2*edx+8], mm7 |
260 |
|
pop edi |
261 |
|
pop esi |
262 |
|
%else ; UYVY |
263 |
|
movq mm2, [esi] ; [yyyy|yyyy] ; y row 0 |
264 |
|
movq mm3, [esi+eax] ; [yyyy|yyyy] ; y row 1 |
265 |
|
movq mm4, mm0 |
266 |
|
movq mm5, mm0 |
267 |
|
movq mm6, mm1 |
268 |
|
movq mm7, mm1 |
269 |
|
punpcklbw mm4, mm2 ; [uyvy|uyvy] ; y row 0 + 0 |
270 |
|
punpckhbw mm5, mm2 ; [uyvy|uyvy] ; y row 0 + 8 |
271 |
|
punpcklbw mm6, mm3 ; [uyvy|uyvy] ; y row 1 + 0 |
272 |
|
punpckhbw mm7, mm3 ; [uyvy|uyvy] ; y row 1 + 8 |
273 |
|
movq [edi], mm4 |
274 |
|
movq [edi+8], mm5 |
275 |
|
movq [edi+edx], mm6 |
276 |
|
movq [edi+edx+8], mm7 |
277 |
|
|
278 |
|
push esi |
279 |
|
push edi |
280 |
|
add esi, eax |
281 |
|
add edi, edx |
282 |
|
movq mm2, [esi+eax] ; [yyyy|yyyy] ; y row 2 |
283 |
|
movq mm3, [esi+2*eax] ; [yyyy|yyyy] ; y row 3 |
284 |
|
movq mm4, mm0 |
285 |
|
movq mm5, mm0 |
286 |
|
movq mm6, mm1 |
287 |
|
movq mm7, mm1 |
288 |
|
punpcklbw mm4, mm2 ; [uyvy|uyvy] ; y row 2 + 0 |
289 |
|
punpckhbw mm5, mm2 ; [uyvy|uyvy] ; y row 2 + 8 |
290 |
|
punpcklbw mm6, mm3 ; [uyvy|uyvy] ; y row 3 + 0 |
291 |
|
punpckhbw mm7, mm3 ; [uyvy|uyvy] ; y row 3 + 8 |
292 |
|
movq [edi+edx], mm4 |
293 |
|
movq [edi+edx+8], mm5 |
294 |
|
movq [edi+2*edx], mm6 |
295 |
|
movq [edi+2*edx+8], mm7 |
296 |
|
pop edi |
297 |
|
pop esi |
298 |
|
%endif |
299 |
|
%endmacro |
300 |
|
;------------------------------------------------------------------------------ |
301 |
|
|
302 |
|
|
303 |
|
; input |
304 |
|
|
305 |
|
MAKE_COLORSPACE yuyv_to_yv12_mmx,0, 2,8,2, YUYV_TO_YV12, 0, 0 |
306 |
|
MAKE_COLORSPACE yuyv_to_yv12_3dn,0, 2,8,2, YUYV_TO_YV12, 0, pavgusb |
307 |
|
MAKE_COLORSPACE yuyv_to_yv12_xmm,0, 2,8,2, YUYV_TO_YV12, 0, pavgb |
308 |
|
|
309 |
|
MAKE_COLORSPACE uyvy_to_yv12_mmx,0, 2,8,2, YUYV_TO_YV12, 1, 0 |
310 |
|
MAKE_COLORSPACE uyvy_to_yv12_3dn,0, 2,8,2, YUYV_TO_YV12, 1, pavgusb |
311 |
|
MAKE_COLORSPACE uyvy_to_yv12_xmm,0, 2,8,2, YUYV_TO_YV12, 1, pavgb |
312 |
|
|
313 |
|
; output |
314 |
|
|
315 |
|
MAKE_COLORSPACE yv12_to_yuyv_mmx,0, 2,8,2, YV12_TO_YUYV, 0, -1 |
316 |
|
MAKE_COLORSPACE yv12_to_uyvy_mmx,0, 2,8,2, YV12_TO_YUYV, 1, -1 |
317 |
|
|
318 |
|
MAKE_COLORSPACE yv12_to_yuyvi_mmx,0, 2,8,4, YV12_TO_YUYVI, 0, -1 |
319 |
|
MAKE_COLORSPACE yv12_to_uyvyi_mmx,0, 2,8,4, YV12_TO_YUYVI, 1, -1 |