23 |
; * |
; * |
24 |
; ***************************************************************************/ |
; ***************************************************************************/ |
25 |
|
|
26 |
BITS 32 |
%include "nasm.inc" |
|
|
|
|
%macro cglobal 1 |
|
|
%ifdef PREFIX |
|
|
%ifdef MARK_FUNCS |
|
|
global _%1:function %1.endfunc-%1 |
|
|
%define %1 _%1:function %1.endfunc-%1 |
|
|
%define ENDFUNC .endfunc |
|
|
%else |
|
|
global _%1 |
|
|
%define %1 _%1 |
|
|
%define ENDFUNC |
|
|
%endif |
|
|
%else |
|
|
%ifdef MARK_FUNCS |
|
|
global %1:function %1.endfunc-%1 |
|
|
%define ENDFUNC .endfunc |
|
|
%else |
|
|
global %1 |
|
|
%define ENDFUNC |
|
|
%endif |
|
|
%endif |
|
|
%endmacro |
|
27 |
|
|
28 |
;;; Define this if you want an unrolled version of the code |
;;; Define this if you want an unrolled version of the code |
29 |
%define UNROLLED_LOOP |
%define UNROLLED_LOOP |
95 |
; Read only data |
; Read only data |
96 |
;============================================================================= |
;============================================================================= |
97 |
|
|
98 |
%ifdef FORMAT_COFF |
DATA |
|
SECTION .rodata |
|
|
%else |
|
|
SECTION .rodata align=16 |
|
|
%endif |
|
99 |
|
|
100 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
101 |
tan1: |
tan1: |
102 |
dw 0x32ec,0x32ec,0x32ec,0x32ec ; tan( pi/16) |
dw 0x32ec,0x32ec,0x32ec,0x32ec ; tan( pi/16) |
103 |
tan2: |
tan2: |
107 |
sqrt2: |
sqrt2: |
108 |
dw 0x5a82,0x5a82,0x5a82,0x5a82 ; 0.5/sqrt(2) |
dw 0x5a82,0x5a82,0x5a82,0x5a82 ; 0.5/sqrt(2) |
109 |
|
|
110 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
111 |
fdct_table: |
fdct_table: |
112 |
;fTab1: |
;fTab1: |
113 |
dw 0x4000, 0x4000, 0x58c5, 0x4b42 |
dw 0x4000, 0x4000, 0x58c5, 0x4b42 |
189 |
dw 0x300b, 0x8c04, 0x187e, 0xba41 |
dw 0x300b, 0x8c04, 0x187e, 0xba41 |
190 |
dw 0x73fc, 0xcff5, 0x6862, 0x84df |
dw 0x73fc, 0xcff5, 0x6862, 0x84df |
191 |
|
|
192 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
193 |
fdct_rounding_1: |
fdct_rounding_1: |
194 |
dw 6, 8, 8, 8 |
dw 6, 8, 8, 8 |
195 |
dw 10, 8, 8, 8 |
dw 10, 8, 8, 8 |
200 |
dw 8, 8, 8, 8 |
dw 8, 8, 8, 8 |
201 |
dw 8, 8, 8, 8 |
dw 8, 8, 8, 8 |
202 |
|
|
203 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
204 |
fdct_rounding_2: |
fdct_rounding_2: |
205 |
dw 6, 8, 8, 8 |
dw 6, 8, 8, 8 |
206 |
dw 8, 8, 8, 8 |
dw 8, 8, 8, 8 |
211 |
dw 8, 8, 8, 8 |
dw 8, 8, 8, 8 |
212 |
dw 8, 8, 8, 8 |
dw 8, 8, 8, 8 |
213 |
|
|
214 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
215 |
MMX_One: |
MMX_One: |
216 |
dw 1, 1, 1, 1 |
dw 1, 1, 1, 1 |
217 |
|
|
442 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
443 |
|
|
444 |
%macro MAKE_FDCT_FUNC 2 |
%macro MAKE_FDCT_FUNC 2 |
445 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
446 |
cglobal %1 |
cglobal %1 |
447 |
%1: |
%1: |
448 |
%ifdef UNROLLED_LOOP |
mov TMP0, prm1 |
449 |
mov ecx, [esp + 4] |
%ifndef UNROLLED_LOOP |
450 |
%else |
push _EBX |
451 |
push ebx |
push _EDI |
|
push edi |
|
|
mov ecx, [esp + 8 + 4] |
|
452 |
%endif |
%endif |
453 |
|
|
454 |
fLLM_PASS ecx+0, ecx+0, 3 |
fLLM_PASS TMP0+0, TMP0+0, 3 |
455 |
fLLM_PASS ecx+8, ecx+8, 3 |
fLLM_PASS TMP0+8, TMP0+8, 3 |
456 |
|
|
457 |
%ifdef UNROLLED_LOOP |
%ifdef UNROLLED_LOOP |
458 |
%assign i 0 |
%assign i 0 |
459 |
%rep 8 |
%rep 8 |
460 |
%2 ecx+i*16, ecx+i*16, fdct_table+i*64, fdct_rounding_1+i*8, fdct_rounding_2+i*8 |
%2 TMP0+i*16, TMP0+i*16, fdct_table+i*64, fdct_rounding_1+i*8, fdct_rounding_2+i*8 |
461 |
%assign i i+1 |
%assign i i+1 |
462 |
%endrep |
%endrep |
463 |
%else |
%else |
464 |
mov eax, 8 |
mov _EAX, 8 |
465 |
mov edx, fdct_table |
mov TMP1, fdct_table |
466 |
mov ebx, fdct_rounding_1 |
mov _EBX, fdct_rounding_1 |
467 |
mov edi, fdct_rounding_2 |
mov _EDI, fdct_rounding_2 |
468 |
.loop |
.loop |
469 |
%2 ecx, ecx, edx, ebx, edi |
%2 TMP0, TMP0, TMP1, _EBX, _EDI |
470 |
add ecx, 2*8 |
add TMP0, 2*8 |
471 |
add edx, 2*32 |
add TMP1, 2*32 |
472 |
add ebx, 2*4 |
add _EBX, 2*4 |
473 |
add edi, 2*4 |
add _EDI, 2*4 |
474 |
dec eax |
dec _EAX |
475 |
jne .loop |
jne .loop |
476 |
|
|
477 |
pop edi |
pop _EDI |
478 |
pop ebx |
pop _EBX |
479 |
%endif |
%endif |
480 |
|
|
481 |
ret |
ret |
486 |
; Code |
; Code |
487 |
;============================================================================= |
;============================================================================= |
488 |
|
|
489 |
SECTION .text |
SECTION .rotext align=SECTION_ALIGN |
490 |
|
|
491 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
492 |
; void fdct_mmx_skal(int16_t block[64]]; |
; void fdct_mmx_skal(int16_t block[64]]; |