Parent Directory
|
Revision Log
Revision 1.11.2.1 - (view) (download)
1 : | edgomez | 1.3 | ;/**************************************************************************** |
2 : | edgomez | 1.2 | ; * |
3 : | edgomez | 1.3 | ; * XVID MPEG-4 VIDEO CODEC |
4 : | ; * - 8<->16 bit transfer functions - | ||
5 : | edgomez | 1.2 | ; * |
6 : | edgomez | 1.3 | ; * Copyright (C) 2002 Jaan Kalda |
7 : | edgomez | 1.2 | ; * |
8 : | edgomez | 1.3 | ; * This program is free software ; you can redistribute it and/or modify |
9 : | ; * it under the terms of the GNU General Public License as published by | ||
10 : | ; * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | ; * (at your option) any later version. | ||
12 : | edgomez | 1.2 | ; * |
13 : | edgomez | 1.3 | ; * This program is distributed in the hope that it will be useful, |
14 : | ; * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | ; * GNU General Public License for more details. | ||
17 : | edgomez | 1.2 | ; * |
18 : | edgomez | 1.3 | ; * You should have received a copy of the GNU General Public License |
19 : | ; * along with this program ; if not, write to the Free Software | ||
20 : | ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | edgomez | 1.2 | ; * |
22 : | Isibaar | 1.11.2.1 | ; * $Id: mem_transfer_3dne.asm,v 1.11 2008/11/26 01:04:34 Isibaar Exp $ |
23 : | edgomez | 1.3 | ; * |
24 : | ; ***************************************************************************/ | ||
25 : | |||
26 : | ; these 3dne functions are compatible with iSSE, but are optimized specifically | ||
27 : | ; for K7 pipelines | ||
28 : | |||
29 : | Isibaar | 1.11 | %include "nasm.inc" |
30 : | edgomez | 1.2 | |
31 : | edgomez | 1.3 | ;============================================================================= |
32 : | ; Read only data | ||
33 : | ;============================================================================= | ||
34 : | edgomez | 1.2 | |
35 : | Isibaar | 1.11 | DATA |
36 : | edgomez | 1.2 | |
37 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
38 : | edgomez | 1.2 | mm_zero: |
39 : | edgomez | 1.3 | dd 0,0 |
40 : | ;============================================================================= | ||
41 : | ; Macros | ||
42 : | ;============================================================================= | ||
43 : | edgomez | 1.2 | |
44 : | Isibaar | 1.11 | %ifdef ARCH_IS_X86_64 |
45 : | %define nop4 | ||
46 : | %else | ||
47 : | edgomez | 1.2 | %macro nop4 0 |
48 : | edgomez | 1.3 | db 08Dh, 074h, 026h, 0 |
49 : | edgomez | 1.2 | %endmacro |
50 : | Isibaar | 1.11 | %endif |
51 : | edgomez | 1.2 | |
52 : | edgomez | 1.3 | ;============================================================================= |
53 : | ; Code | ||
54 : | ;============================================================================= | ||
55 : | |||
56 : | Isibaar | 1.11.2.1 | TEXT |
57 : | edgomez | 1.2 | |
58 : | cglobal transfer_8to16copy_3dne | ||
59 : | cglobal transfer_16to8copy_3dne | ||
60 : | cglobal transfer_8to16sub_3dne | ||
61 : | cglobal transfer_8to16subro_3dne | ||
62 : | cglobal transfer_8to16sub2_3dne | ||
63 : | cglobal transfer_16to8add_3dne | ||
64 : | cglobal transfer8x8_copy_3dne | ||
65 : | suxen_drol | 1.7 | cglobal transfer8x4_copy_3dne |
66 : | edgomez | 1.2 | |
67 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
68 : | edgomez | 1.2 | ; |
69 : | ; void transfer_8to16copy_3dne(int16_t * const dst, | ||
70 : | ; const uint8_t * const src, | ||
71 : | ; uint32_t stride); | ||
72 : | ; | ||
73 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
74 : | edgomez | 1.2 | |
75 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
76 : | edgomez | 1.2 | transfer_8to16copy_3dne: |
77 : | |||
78 : | Isibaar | 1.11 | mov _EAX, prm2 ; Src |
79 : | mov TMP1, prm3 ; Stride | ||
80 : | mov TMP0, prm1 ; Dst | ||
81 : | punpcklbw mm0, [byte _EAX] | ||
82 : | punpcklbw mm1, [_EAX+4] | ||
83 : | movq mm2, [_EAX+TMP1] | ||
84 : | movq mm3, [_EAX+TMP1] | ||
85 : | edgomez | 1.3 | pxor mm7, mm7 |
86 : | Isibaar | 1.11 | lea _EAX, [_EAX+2*TMP1] |
87 : | edgomez | 1.3 | punpcklbw mm2, mm7 |
88 : | punpckhbw mm3, mm7 | ||
89 : | psrlw mm0, 8 | ||
90 : | psrlw mm1, 8 | ||
91 : | Isibaar | 1.11 | punpcklbw mm4, [_EAX] |
92 : | punpcklbw mm5, [_EAX+TMP1+4] | ||
93 : | movq [byte TMP0+0*64], mm0 | ||
94 : | movq [TMP0+0*64+8], mm1 | ||
95 : | punpcklbw mm6, [_EAX+TMP1] | ||
96 : | punpcklbw mm7, [_EAX+4] | ||
97 : | lea _EAX, [byte _EAX+2*TMP1] | ||
98 : | edgomez | 1.3 | psrlw mm4, 8 |
99 : | psrlw mm5, 8 | ||
100 : | Isibaar | 1.11 | punpcklbw mm0, [_EAX] |
101 : | punpcklbw mm1, [_EAX+TMP1+4] | ||
102 : | movq [TMP0+0*64+16], mm2 | ||
103 : | movq [TMP0+0*64+24], mm3 | ||
104 : | edgomez | 1.3 | psrlw mm6, 8 |
105 : | psrlw mm7, 8 | ||
106 : | Isibaar | 1.11 | punpcklbw mm2, [_EAX+TMP1] |
107 : | punpcklbw mm3, [_EAX+4] | ||
108 : | lea _EAX, [byte _EAX+2*TMP1] | ||
109 : | movq [byte TMP0+0*64+32], mm4 | ||
110 : | movq [TMP0+0*64+56], mm5 | ||
111 : | edgomez | 1.3 | psrlw mm0, 8 |
112 : | psrlw mm1, 8 | ||
113 : | Isibaar | 1.11 | punpcklbw mm4, [_EAX] |
114 : | punpcklbw mm5, [_EAX+TMP1+4] | ||
115 : | movq [byte TMP0+0*64+48], mm6 | ||
116 : | movq [TMP0+0*64+40], mm7 | ||
117 : | edgomez | 1.3 | psrlw mm2, 8 |
118 : | psrlw mm3, 8 | ||
119 : | Isibaar | 1.11 | punpcklbw mm6, [_EAX+TMP1] |
120 : | punpcklbw mm7, [_EAX+4] | ||
121 : | movq [byte TMP0+1*64], mm0 | ||
122 : | movq [TMP0+1*64+24], mm1 | ||
123 : | edgomez | 1.3 | psrlw mm4, 8 |
124 : | psrlw mm5, 8 | ||
125 : | Isibaar | 1.11 | movq [TMP0+1*64+16], mm2 |
126 : | movq [TMP0+1*64+8], mm3 | ||
127 : | edgomez | 1.3 | psrlw mm6, 8 |
128 : | psrlw mm7, 8 | ||
129 : | Isibaar | 1.11 | movq [byte TMP0+1*64+32], mm4 |
130 : | movq [TMP0+1*64+56], mm5 | ||
131 : | movq [byte TMP0+1*64+48], mm6 | ||
132 : | movq [TMP0+1*64+40], mm7 | ||
133 : | edgomez | 1.3 | ret |
134 : | Isibaar | 1.10 | ENDFUNC |
135 : | edgomez | 1.2 | |
136 : | edgomez | 1.3 | |
137 : | ;----------------------------------------------------------------------------- | ||
138 : | edgomez | 1.2 | ; |
139 : | ; void transfer_16to8copy_3dne(uint8_t * const dst, | ||
140 : | ; const int16_t * const src, | ||
141 : | ; uint32_t stride); | ||
142 : | ; | ||
143 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
144 : | edgomez | 1.2 | |
145 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
146 : | edgomez | 1.2 | transfer_16to8copy_3dne: |
147 : | |||
148 : | Isibaar | 1.11 | mov _EAX, prm2 ; Src |
149 : | mov TMP0, prm1 ; Dst | ||
150 : | mov TMP1, prm3 ; Stride | ||
151 : | |||
152 : | movq mm0, [byte _EAX+0*32] | ||
153 : | packuswb mm0, [_EAX+0*32+8] | ||
154 : | movq mm1, [_EAX+0*32+16] | ||
155 : | packuswb mm1, [_EAX+0*32+24] | ||
156 : | movq mm5, [_EAX+2*32+16] | ||
157 : | movq mm2, [_EAX+1*32] | ||
158 : | packuswb mm2, [_EAX+1*32+8] | ||
159 : | movq mm3, [_EAX+1*32+16] | ||
160 : | packuswb mm3, [_EAX+1*32+24] | ||
161 : | movq mm6, [_EAX+3*32] | ||
162 : | movq mm4, [_EAX+2*32] | ||
163 : | packuswb mm4, [_EAX+2*32+8] | ||
164 : | packuswb mm5, [_EAX+2*32+24] | ||
165 : | movq mm7, [_EAX+3*32+16] | ||
166 : | packuswb mm7, [_EAX+3*32+24] | ||
167 : | packuswb mm6, [_EAX+3*32+8] | ||
168 : | movq [TMP0], mm0 | ||
169 : | lea _EAX, [3*TMP1] | ||
170 : | add _EAX, TMP0 | ||
171 : | movq [TMP0+TMP1], mm1 | ||
172 : | movq [TMP0+2*TMP1], mm2 | ||
173 : | movq [byte _EAX], mm3 | ||
174 : | movq [TMP0+4*TMP1], mm4 | ||
175 : | lea TMP0, [byte TMP0+4*TMP1] | ||
176 : | movq [_EAX+2*TMP1], mm5 | ||
177 : | movq [_EAX+4*TMP1], mm7 | ||
178 : | movq [TMP0+2*TMP1], mm6 | ||
179 : | edgomez | 1.2 | ret |
180 : | Isibaar | 1.10 | ENDFUNC |
181 : | edgomez | 1.2 | |
182 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
183 : | edgomez | 1.2 | ; |
184 : | ; void transfer_8to16sub_3dne(int16_t * const dct, | ||
185 : | ; uint8_t * const cur, | ||
186 : | ; const uint8_t * const ref, | ||
187 : | ; const uint32_t stride); | ||
188 : | ; | ||
189 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
190 : | edgomez | 1.2 | |
191 : | Isibaar | 1.11 | ; when second argument == 1, reference (ebx) block is to current (_EAX) |
192 : | edgomez | 1.2 | %macro COPY_8_TO_16_SUB 2 |
193 : | Isibaar | 1.11 | movq mm1, [_EAX] ; cur |
194 : | edgomez | 1.2 | movq mm0, mm1 |
195 : | Isibaar | 1.11 | movq mm4, [TMP0] ; ref |
196 : | edgomez | 1.2 | movq mm6, mm4 |
197 : | %if %2 == 1 | ||
198 : | Isibaar | 1.11 | movq [_EAX], mm4 |
199 : | edgomez | 1.2 | %endif |
200 : | punpckhbw mm1, mm7 | ||
201 : | punpckhbw mm6, mm7 | ||
202 : | punpcklbw mm4, mm7 | ||
203 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
204 : | movq mm2, [byte _EAX+TMP1] | ||
205 : | edgomez | 1.2 | punpcklbw mm0, mm7 |
206 : | Isibaar | 1.11 | movq mm3, [byte _EAX+TMP1] |
207 : | edgomez | 1.2 | punpcklbw mm2, mm7 |
208 : | Isibaar | 1.11 | movq mm5, [byte TMP0+TMP1] ; ref |
209 : | edgomez | 1.2 | punpckhbw mm3, mm7 |
210 : | %if %2 == 1 | ||
211 : | Isibaar | 1.11 | movq [byte _EAX+TMP1], mm5 |
212 : | edgomez | 1.2 | %endif |
213 : | psubsw mm1, mm6 | ||
214 : | |||
215 : | movq mm6, mm5 | ||
216 : | psubsw mm0, mm4 | ||
217 : | edgomez | 1.3 | %if (%1 < 3) |
218 : | Isibaar | 1.11 | lea _EAX,[_EAX+2*TMP1] |
219 : | lea TMP0,[TMP0+2*TMP1] | ||
220 : | edgomez | 1.2 | %else |
221 : | Isibaar | 1.11 | mov TMP0,[_ESP] |
222 : | add _ESP,byte PTR_SIZE | ||
223 : | edgomez | 1.2 | %endif |
224 : | Isibaar | 1.11 | movq [_EDI+%1*32+ 8], mm1 |
225 : | movq [byte _EDI+%1*32+ 0], mm0 ; dst | ||
226 : | edgomez | 1.2 | punpcklbw mm5, mm7 |
227 : | punpckhbw mm6, mm7 | ||
228 : | psubsw mm2, mm5 | ||
229 : | psubsw mm3, mm6 | ||
230 : | Isibaar | 1.11 | movq [_EDI+%1*32+16], mm2 |
231 : | movq [_EDI+%1*32+24], mm3 | ||
232 : | edgomez | 1.2 | %endmacro |
233 : | |||
234 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
235 : | edgomez | 1.2 | transfer_8to16sub_3dne: |
236 : | Isibaar | 1.11 | mov _EAX, prm2 ; Cur |
237 : | mov TMP0, prm3 ; Ref | ||
238 : | mov TMP1, prm4 ; Stride | ||
239 : | |||
240 : | push _EDI | ||
241 : | %ifdef ARCH_IS_X86_64 | ||
242 : | mov _EDI, prm1 | ||
243 : | %else | ||
244 : | mov _EDI, [_ESP+4+4] ; Dst | ||
245 : | %endif | ||
246 : | |||
247 : | edgomez | 1.2 | pxor mm7, mm7 |
248 : | nop | ||
249 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
250 : | edgomez | 1.2 | COPY_8_TO_16_SUB 0, 1 |
251 : | COPY_8_TO_16_SUB 1, 1 | ||
252 : | COPY_8_TO_16_SUB 2, 1 | ||
253 : | COPY_8_TO_16_SUB 3, 1 | ||
254 : | Isibaar | 1.11 | mov _EDI, TMP0 |
255 : | edgomez | 1.2 | ret |
256 : | Isibaar | 1.10 | ENDFUNC |
257 : | edgomez | 1.2 | |
258 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
259 : | edgomez | 1.2 | transfer_8to16subro_3dne: |
260 : | Isibaar | 1.11 | mov _EAX, prm2 ; Cur |
261 : | mov TMP0, prm3 ; Ref | ||
262 : | mov TMP1, prm4 ; Stride | ||
263 : | |||
264 : | push _EDI | ||
265 : | %ifdef ARCH_IS_X86_64 | ||
266 : | mov _EDI, prm1 | ||
267 : | %else | ||
268 : | mov _EDI, [_ESP+4+ 4] ; Dst | ||
269 : | %endif | ||
270 : | |||
271 : | edgomez | 1.2 | pxor mm7, mm7 |
272 : | nop | ||
273 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
274 : | edgomez | 1.2 | COPY_8_TO_16_SUB 0, 0 |
275 : | COPY_8_TO_16_SUB 1, 0 | ||
276 : | COPY_8_TO_16_SUB 2, 0 | ||
277 : | COPY_8_TO_16_SUB 3, 0 | ||
278 : | Isibaar | 1.11 | mov _EDI, TMP0 |
279 : | edgomez | 1.2 | ret |
280 : | Isibaar | 1.10 | ENDFUNC |
281 : | edgomez | 1.2 | |
282 : | |||
283 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
284 : | edgomez | 1.2 | ; |
285 : | ; void transfer_8to16sub2_3dne(int16_t * const dct, | ||
286 : | ; uint8_t * const cur, | ||
287 : | ; const uint8_t * ref1, | ||
288 : | ; const uint8_t * ref2, | ||
289 : | ; const uint32_t stride) | ||
290 : | ; | ||
291 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
292 : | edgomez | 1.2 | |
293 : | %macro COPY_8_TO_16_SUB2_SSE 1 | ||
294 : | Isibaar | 1.11 | db 0Fh, 6Fh, 44h, 20h, 00 ;movq mm0, [byte _EAX] ; cur |
295 : | edgomez | 1.2 | punpcklbw mm0, mm7 |
296 : | Isibaar | 1.11 | movq mm2, [byte _EAX+TMP1] |
297 : | edgomez | 1.2 | punpcklbw mm2, mm7 |
298 : | Isibaar | 1.11 | db 0Fh, 6Fh, 4ch, 20h, 00 ;movq mm1, [byte _EAX] |
299 : | edgomez | 1.2 | punpckhbw mm1, mm7 |
300 : | Isibaar | 1.11 | movq mm3, [byte _EAX+TMP1] |
301 : | edgomez | 1.2 | punpckhbw mm3, mm7 |
302 : | edgomez | 1.3 | |
303 : | Isibaar | 1.11 | movq mm4, [byte _EBX] ; ref1 |
304 : | pavgb mm4, [byte _ESI] ; ref2 | ||
305 : | movq [_EAX], mm4 | ||
306 : | movq mm5, [_EBX+TMP1] ; ref | ||
307 : | pavgb mm5, [_ESI+TMP1] ; ref2 | ||
308 : | movq [_EAX+TMP1], mm5 | ||
309 : | edgomez | 1.2 | movq mm6, mm4 |
310 : | punpcklbw mm4, mm7 | ||
311 : | punpckhbw mm6, mm7 | ||
312 : | %if (%1 < 3) | ||
313 : | Isibaar | 1.11 | lea _ESI,[_ESI+2*TMP1] |
314 : | lea _EBX,[byte _EBX+2*TMP1] | ||
315 : | lea _EAX,[_EAX+2*TMP1] | ||
316 : | edgomez | 1.2 | %else |
317 : | Isibaar | 1.11 | mov _ESI,[_ESP] |
318 : | mov _EBX,[_ESP+PTR_SIZE] | ||
319 : | add _ESP,byte 2*PTR_SIZE | ||
320 : | edgomez | 1.3 | %endif |
321 : | edgomez | 1.2 | psubsw mm0, mm4 |
322 : | psubsw mm1, mm6 | ||
323 : | movq mm6, mm5 | ||
324 : | punpcklbw mm5, mm7 | ||
325 : | punpckhbw mm6, mm7 | ||
326 : | psubsw mm2, mm5 | ||
327 : | psubsw mm3, mm6 | ||
328 : | Isibaar | 1.11 | movq [byte TMP0+%1*32+ 0], mm0 ; dst |
329 : | movq [TMP0+%1*32+ 8], mm1 | ||
330 : | movq [TMP0+%1*32+16], mm2 | ||
331 : | movq [TMP0+%1*32+24], mm3 | ||
332 : | edgomez | 1.2 | %endmacro |
333 : | |||
334 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
335 : | edgomez | 1.2 | transfer_8to16sub2_3dne: |
336 : | Isibaar | 1.11 | mov TMP1d, prm5d ; Stride |
337 : | mov TMP0, prm1 ; Dst | ||
338 : | mov _EAX, prm2 ; Cur | ||
339 : | push _EBX | ||
340 : | lea _EBP,[byte _EBP] | ||
341 : | |||
342 : | %ifdef ARCH_IS_X86_64 | ||
343 : | mov _EBX, prm3 | ||
344 : | %else | ||
345 : | mov _EBX, [_ESP+4+12] ; Ref1 | ||
346 : | %endif | ||
347 : | |||
348 : | push _ESI | ||
349 : | edgomez | 1.2 | pxor mm7, mm7 |
350 : | Isibaar | 1.11 | |
351 : | %ifdef ARCH_IS_X86_64 | ||
352 : | mov _ESI, prm4 | ||
353 : | %else | ||
354 : | mov _ESI, [_ESP+8+16] ; Ref2 | ||
355 : | %endif | ||
356 : | |||
357 : | edgomez | 1.3 | nop4 |
358 : | edgomez | 1.2 | COPY_8_TO_16_SUB2_SSE 0 |
359 : | COPY_8_TO_16_SUB2_SSE 1 | ||
360 : | COPY_8_TO_16_SUB2_SSE 2 | ||
361 : | COPY_8_TO_16_SUB2_SSE 3 | ||
362 : | |||
363 : | ret | ||
364 : | Isibaar | 1.10 | ENDFUNC |
365 : | edgomez | 1.2 | |
366 : | |||
367 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
368 : | edgomez | 1.2 | ; |
369 : | ; void transfer_16to8add_3dne(uint8_t * const dst, | ||
370 : | ; const int16_t * const src, | ||
371 : | ; uint32_t stride); | ||
372 : | ; | ||
373 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
374 : | edgomez | 1.2 | |
375 : | %macro COPY_16_TO_8_ADD 1 | ||
376 : | Isibaar | 1.11 | movq mm0, [byte TMP0] |
377 : | edgomez | 1.2 | punpcklbw mm0, mm7 |
378 : | Isibaar | 1.11 | movq mm2, [byte TMP0+TMP1] |
379 : | edgomez | 1.2 | punpcklbw mm2, mm7 |
380 : | Isibaar | 1.11 | movq mm1, [byte TMP0] |
381 : | edgomez | 1.2 | punpckhbw mm1, mm7 |
382 : | Isibaar | 1.11 | movq mm3, [byte TMP0+TMP1] |
383 : | edgomez | 1.2 | punpckhbw mm3, mm7 |
384 : | Isibaar | 1.11 | paddsw mm0, [byte _EAX+%1*32+ 0] |
385 : | paddsw mm1, [_EAX+%1*32+ 8] | ||
386 : | paddsw mm2, [_EAX+%1*32+16] | ||
387 : | paddsw mm3, [_EAX+%1*32+24] | ||
388 : | edgomez | 1.2 | packuswb mm0, mm1 |
389 : | packuswb mm2, mm3 | ||
390 : | Isibaar | 1.11 | mov _ESP, _ESP |
391 : | movq [byte TMP0], mm0 | ||
392 : | movq [TMP0+TMP1], mm2 | ||
393 : | edgomez | 1.2 | %endmacro |
394 : | |||
395 : | |||
396 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
397 : | edgomez | 1.2 | transfer_16to8add_3dne: |
398 : | Isibaar | 1.11 | mov TMP0, prm1 ; Dst |
399 : | mov TMP1, prm3 ; Stride | ||
400 : | mov _EAX, prm2 ; Src | ||
401 : | edgomez | 1.2 | pxor mm7, mm7 |
402 : | nop | ||
403 : | |||
404 : | COPY_16_TO_8_ADD 0 | ||
405 : | Isibaar | 1.11 | lea TMP0,[byte TMP0+2*TMP1] |
406 : | edgomez | 1.2 | COPY_16_TO_8_ADD 1 |
407 : | Isibaar | 1.11 | lea TMP0,[byte TMP0+2*TMP1] |
408 : | edgomez | 1.2 | COPY_16_TO_8_ADD 2 |
409 : | Isibaar | 1.11 | lea TMP0,[byte TMP0+2*TMP1] |
410 : | edgomez | 1.2 | COPY_16_TO_8_ADD 3 |
411 : | ret | ||
412 : | Isibaar | 1.10 | ENDFUNC |
413 : | edgomez | 1.2 | |
414 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
415 : | edgomez | 1.2 | ; |
416 : | ; void transfer8x8_copy_3dne(uint8_t * const dst, | ||
417 : | ; const uint8_t * const src, | ||
418 : | ; const uint32_t stride); | ||
419 : | ; | ||
420 : | ; | ||
421 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
422 : | edgomez | 1.2 | |
423 : | %macro COPY_8_TO_8 0 | ||
424 : | Isibaar | 1.11 | movq mm0, [byte _EAX] |
425 : | movq mm1, [_EAX+TMP1] | ||
426 : | movq [byte TMP0], mm0 | ||
427 : | lea _EAX,[byte _EAX+2*TMP1] | ||
428 : | movq [TMP0+TMP1], mm1 | ||
429 : | edgomez | 1.2 | %endmacro |
430 : | |||
431 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
432 : | edgomez | 1.2 | transfer8x8_copy_3dne: |
433 : | Isibaar | 1.11 | mov _EAX, prm2 ; Src |
434 : | mov TMP1, prm3 ; Stride | ||
435 : | mov TMP0, prm1 ; Dst | ||
436 : | edgomez | 1.2 | |
437 : | COPY_8_TO_8 | ||
438 : | Isibaar | 1.11 | lea TMP0,[byte TMP0+2*TMP1] |
439 : | edgomez | 1.2 | COPY_8_TO_8 |
440 : | Isibaar | 1.11 | lea TMP0,[byte TMP0+2*TMP1] |
441 : | edgomez | 1.2 | COPY_8_TO_8 |
442 : | Isibaar | 1.11 | lea TMP0,[byte TMP0+2*TMP1] |
443 : | edgomez | 1.2 | COPY_8_TO_8 |
444 : | ret | ||
445 : | Isibaar | 1.10 | ENDFUNC |
446 : | edgomez | 1.6 | |
447 : | suxen_drol | 1.7 | ;----------------------------------------------------------------------------- |
448 : | ; | ||
449 : | ; void transfer8x4_copy_3dne(uint8_t * const dst, | ||
450 : | ; const uint8_t * const src, | ||
451 : | ; const uint32_t stride); | ||
452 : | ; | ||
453 : | ; | ||
454 : | ;----------------------------------------------------------------------------- | ||
455 : | |||
456 : | Isibaar | 1.11 | ALIGN SECTION_ALIGN |
457 : | suxen_drol | 1.7 | transfer8x4_copy_3dne: |
458 : | Isibaar | 1.11 | mov _EAX, prm2 ; Src |
459 : | mov TMP1, prm3 ; Stride | ||
460 : | mov TMP0, prm1 ; Dst | ||
461 : | suxen_drol | 1.7 | |
462 : | COPY_8_TO_8 | ||
463 : | Isibaar | 1.11 | lea TMP0,[byte TMP0+2*TMP1] |
464 : | suxen_drol | 1.7 | COPY_8_TO_8 |
465 : | ret | ||
466 : | Isibaar | 1.10 | ENDFUNC |
467 : | suxen_drol | 1.7 | |
468 : | Isibaar | 1.9 | |
469 : | %ifidn __OUTPUT_FORMAT__,elf | ||
470 : | section ".note.GNU-stack" noalloc noexec nowrite progbits | ||
471 : | %endif | ||
472 : |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |