42 |
; * respective work in order to have a nice/fast mmx fDCT. |
; * respective work in order to have a nice/fast mmx fDCT. |
43 |
; ***************************************************************************/ |
; ***************************************************************************/ |
44 |
|
|
|
BITS 32 |
|
|
|
|
45 |
;============================================================================= |
;============================================================================= |
46 |
; Macros and other preprocessor constants |
; Macros and other preprocessor constants |
47 |
;============================================================================= |
;============================================================================= |
48 |
|
|
49 |
%macro cglobal 1 |
%include "nasm.inc" |
|
%ifdef PREFIX |
|
|
%ifdef MARK_FUNCS |
|
|
global _%1:function %1.endfunc-%1 |
|
|
%define %1 _%1:function %1.endfunc-%1 |
|
|
%else |
|
|
global _%1 |
|
|
%define %1 _%1 |
|
|
%endif |
|
|
%else |
|
|
%ifdef MARK_FUNCS |
|
|
global %1:function %1.endfunc-%1 |
|
|
%else |
|
|
global %1 |
|
|
%endif |
|
|
%endif |
|
|
%endmacro |
|
50 |
|
|
51 |
;;; Define this if you want an unrolled version of the code |
;;; Define this if you want an unrolled version of the code |
52 |
%define UNROLLED_LOOP |
%define UNROLLED_LOOP |
61 |
; Local Data (Read Only) |
; Local Data (Read Only) |
62 |
;============================================================================= |
;============================================================================= |
63 |
|
|
64 |
%ifdef FORMAT_COFF |
DATA |
|
SECTION .rodata |
|
|
%else |
|
|
SECTION .rodata align=16 |
|
|
%endif |
|
65 |
|
|
66 |
ALIGN 8 |
ALIGN SECTION_ALIGN |
67 |
tab_frw_01234567: |
tab_frw_01234567: |
68 |
dw 16384, 16384, -8867, -21407 |
dw 16384, 16384, -8867, -21407 |
69 |
dw 16384, 16384, 21407, 8867 |
dw 16384, 16384, 21407, 8867 |
137 |
dw 17855, -31521, 26722, -31521 |
dw 17855, -31521, 26722, -31521 |
138 |
dw 6270, 26722, 6270, -17855 |
dw 6270, 26722, 6270, -17855 |
139 |
|
|
140 |
ALIGN 8 |
ALIGN SECTION_ALIGN |
141 |
fdct_one_corr: |
fdct_one_corr: |
142 |
dw 1, 1, 1, 1 |
dw 1, 1, 1, 1 |
143 |
|
|
144 |
ALIGN 8 |
ALIGN SECTION_ALIGN |
145 |
fdct_tg_all_16: |
fdct_tg_all_16: |
146 |
dw 13036, 13036, 13036, 13036 |
dw 13036, 13036, 13036, 13036 |
147 |
dw 27146, 27146, 27146, 27146 |
dw 27146, 27146, 27146, 27146 |
148 |
dw -21746, -21746, -21746, -21746 |
dw -21746, -21746, -21746, -21746 |
149 |
|
|
150 |
ALIGN 8 |
ALIGN SECTION_ALIGN |
151 |
cos_4_16: |
cos_4_16: |
152 |
dw -19195, -19195, -19195, -19195 |
dw -19195, -19195, -19195, -19195 |
153 |
|
|
154 |
ALIGN 8 |
ALIGN SECTION_ALIGN |
155 |
ocos_4_16: |
ocos_4_16: |
156 |
dw 23170, 23170, 23170, 23170 |
dw 23170, 23170, 23170, 23170 |
157 |
|
|
158 |
ALIGN 8 |
ALIGN SECTION_ALIGN |
159 |
fdct_r_row: |
fdct_r_row: |
160 |
dd RND_FRW_ROW, RND_FRW_ROW |
dd RND_FRW_ROW, RND_FRW_ROW |
161 |
|
|
349 |
%endmacro |
%endmacro |
350 |
|
|
351 |
%macro MAKE_FDCT_FUNC 2 |
%macro MAKE_FDCT_FUNC 2 |
352 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
353 |
cglobal %1 |
cglobal %1 |
354 |
%1: |
%1: |
355 |
;; Move the destination/source address to the eax register |
;; Move the destination/source address to the eax register |
356 |
mov eax, [esp + 4] |
mov _EAX, prm1 |
357 |
|
|
358 |
;; Process the columns (4 at a time) |
;; Process the columns (4 at a time) |
359 |
FDCT_COLUMN_COMMON eax, eax, 0 ; columns 0..3 |
FDCT_COLUMN_COMMON _EAX, _EAX, 0 ; columns 0..3 |
360 |
FDCT_COLUMN_COMMON eax, eax, 4 ; columns 4..7 |
FDCT_COLUMN_COMMON _EAX, _EAX, 4 ; columns 4..7 |
361 |
|
|
362 |
%ifdef UNROLLED_LOOP |
%ifdef UNROLLED_LOOP |
363 |
; Unrolled loop version |
; Unrolled loop version |
364 |
%assign i 0 |
%assign i 0 |
365 |
%rep 8 |
%rep 8 |
366 |
;; Process the 'i'th row |
;; Process the 'i'th row |
367 |
%2 eax+2*i*8, eax+2*i*8, tab_frw_01234567+2*32*i |
%2 _EAX+2*i*8, _EAX+2*i*8, tab_frw_01234567+2*32*i |
368 |
%assign i i+1 |
%assign i i+1 |
369 |
%endrep |
%endrep |
370 |
%else |
%else |
371 |
mov ecx, 8 |
mov _ECX, 8 |
372 |
mov edx, tab_frw_01234567 |
mov _EDX, tab_frw_01234567 |
373 |
ALIGN 8 |
ALIGN SECTION_ALIGN |
374 |
.loop |
.loop |
375 |
%2 eax, eax, edx |
%2 _EAX, _EAX,_EDX |
376 |
add eax, 2*8 |
add _EAX, 2*8 |
377 |
add edx, 2*32 |
add _EDX, 2*32 |
378 |
dec ecx |
dec _ECX |
379 |
jne .loop |
jne .loop |
380 |
%endif |
%endif |
381 |
|
|
382 |
ret |
ret |
383 |
.endfunc |
ENDFUNC |
384 |
%endmacro |
%endmacro |
385 |
|
|
386 |
;============================================================================= |
;============================================================================= |
387 |
; Code |
; Code |
388 |
;============================================================================= |
;============================================================================= |
389 |
|
|
390 |
SECTION .text |
SECTION .rotext align=SECTION_ALIGN |
391 |
|
|
392 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
393 |
; void fdct_mmx_ffmpeg(int16_t block[64]); |
; void fdct_mmx_ffmpeg(int16_t block[64]); |