44 |
; (for rows) and first stage DCT 8x8 (for columns) |
; (for rows) and first stage DCT 8x8 (for columns) |
45 |
; |
; |
46 |
|
|
|
BITS 32 |
|
|
|
|
47 |
;============================================================================= |
;============================================================================= |
48 |
; Macros and other preprocessor constants |
; Macros and other preprocessor constants |
49 |
;============================================================================= |
;============================================================================= |
50 |
|
|
51 |
%macro cglobal 1 |
%include "nasm.inc" |
|
%ifdef PREFIX |
|
|
%ifdef MARK_FUNCS |
|
|
global _%1:function %1.endfunc-%1 |
|
|
%define %1 _%1:function %1.endfunc-%1 |
|
|
%define ENDFUNC .endfunc |
|
|
%else |
|
|
global _%1 |
|
|
%define %1 _%1 |
|
|
%define ENDFUNC |
|
|
%endif |
|
|
%else |
|
|
%ifdef MARK_FUNCS |
|
|
global %1:function %1.endfunc-%1 |
|
|
%define ENDFUNC .endfunc |
|
|
%else |
|
|
global %1 |
|
|
%define ENDFUNC |
|
|
%endif |
|
|
%endif |
|
|
%endmacro |
|
52 |
|
|
53 |
%define BITS_INV_ACC 5 ; 4 or 5 for IEEE |
%define BITS_INV_ACC 5 ; 4 or 5 for IEEE |
54 |
%define SHIFT_INV_ROW 16 - BITS_INV_ACC |
%define SHIFT_INV_ROW 16 - BITS_INV_ACC |
66 |
; Local Data (Read Only) |
; Local Data (Read Only) |
67 |
;============================================================================= |
;============================================================================= |
68 |
|
|
69 |
%ifdef FORMAT_COFF |
DATA |
|
SECTION .rodata |
|
|
%else |
|
|
SECTION .rodata align=16 |
|
|
%endif |
|
70 |
|
|
71 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
72 |
; Various memory constants (trigonometric values or rounding values) |
; Various memory constants (trigonometric values or rounding values) |
73 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
74 |
|
|
75 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
76 |
one_corr: |
one_corr: |
77 |
dw 1, 1, 1, 1 |
dw 1, 1, 1, 1 |
78 |
round_inv_row: |
round_inv_row: |
553 |
; Code |
; Code |
554 |
;============================================================================= |
;============================================================================= |
555 |
|
|
556 |
SECTION .text |
SECTION .rotext align=SECTION_ALIGN |
557 |
|
|
558 |
cglobal idct_mmx |
cglobal idct_mmx |
559 |
cglobal idct_xmm |
cglobal idct_xmm |
562 |
; void idct_mmx(uint16_t block[64]); |
; void idct_mmx(uint16_t block[64]); |
563 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
564 |
|
|
565 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
566 |
idct_mmx: |
idct_mmx: |
567 |
mov eax, dword [esp + 4] |
mov TMP0, prm1 |
568 |
|
|
569 |
;; Process each row |
;; Process each row |
570 |
DCT_8_INV_ROW_MMX eax+0*16, eax+0*16, tab_i_04_mmx, rounder_0 |
DCT_8_INV_ROW_MMX TMP0+0*16, TMP0+0*16, tab_i_04_mmx, rounder_0 |
571 |
DCT_8_INV_ROW_MMX eax+1*16, eax+1*16, tab_i_17_mmx, rounder_1 |
DCT_8_INV_ROW_MMX TMP0+1*16, TMP0+1*16, tab_i_17_mmx, rounder_1 |
572 |
DCT_8_INV_ROW_MMX eax+2*16, eax+2*16, tab_i_26_mmx, rounder_2 |
DCT_8_INV_ROW_MMX TMP0+2*16, TMP0+2*16, tab_i_26_mmx, rounder_2 |
573 |
DCT_8_INV_ROW_MMX eax+3*16, eax+3*16, tab_i_35_mmx, rounder_3 |
DCT_8_INV_ROW_MMX TMP0+3*16, TMP0+3*16, tab_i_35_mmx, rounder_3 |
574 |
DCT_8_INV_ROW_MMX eax+4*16, eax+4*16, tab_i_04_mmx, rounder_4 |
DCT_8_INV_ROW_MMX TMP0+4*16, TMP0+4*16, tab_i_04_mmx, rounder_4 |
575 |
DCT_8_INV_ROW_MMX eax+5*16, eax+5*16, tab_i_35_mmx, rounder_5 |
DCT_8_INV_ROW_MMX TMP0+5*16, TMP0+5*16, tab_i_35_mmx, rounder_5 |
576 |
DCT_8_INV_ROW_MMX eax+6*16, eax+6*16, tab_i_26_mmx, rounder_6 |
DCT_8_INV_ROW_MMX TMP0+6*16, TMP0+6*16, tab_i_26_mmx, rounder_6 |
577 |
DCT_8_INV_ROW_MMX eax+7*16, eax+7*16, tab_i_17_mmx, rounder_7 |
DCT_8_INV_ROW_MMX TMP0+7*16, TMP0+7*16, tab_i_17_mmx, rounder_7 |
578 |
|
|
579 |
;; Process the columns (4 at a time) |
;; Process the columns (4 at a time) |
580 |
DCT_8_INV_COL eax+0, eax+0 |
DCT_8_INV_COL TMP0+0, TMP0+0 |
581 |
DCT_8_INV_COL eax+8, eax+8 |
DCT_8_INV_COL TMP0+8, TMP0+8 |
582 |
|
|
583 |
ret |
ret |
584 |
ENDFUNC |
ENDFUNC |
587 |
; void idct_xmm(uint16_t block[64]); |
; void idct_xmm(uint16_t block[64]); |
588 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
589 |
|
|
590 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
591 |
idct_xmm: |
idct_xmm: |
592 |
mov eax, dword [esp + 4] |
mov TMP0, prm1 |
593 |
|
|
594 |
;; Process each row |
;; Process each row |
595 |
DCT_8_INV_ROW_XMM eax+0*16, eax+0*16, tab_i_04_xmm, rounder_0 |
DCT_8_INV_ROW_XMM TMP0+0*16, TMP0+0*16, tab_i_04_xmm, rounder_0 |
596 |
DCT_8_INV_ROW_XMM eax+1*16, eax+1*16, tab_i_17_xmm, rounder_1 |
DCT_8_INV_ROW_XMM TMP0+1*16, TMP0+1*16, tab_i_17_xmm, rounder_1 |
597 |
DCT_8_INV_ROW_XMM eax+2*16, eax+2*16, tab_i_26_xmm, rounder_2 |
DCT_8_INV_ROW_XMM TMP0+2*16, TMP0+2*16, tab_i_26_xmm, rounder_2 |
598 |
DCT_8_INV_ROW_XMM eax+3*16, eax+3*16, tab_i_35_xmm, rounder_3 |
DCT_8_INV_ROW_XMM TMP0+3*16, TMP0+3*16, tab_i_35_xmm, rounder_3 |
599 |
DCT_8_INV_ROW_XMM eax+4*16, eax+4*16, tab_i_04_xmm, rounder_4 |
DCT_8_INV_ROW_XMM TMP0+4*16, TMP0+4*16, tab_i_04_xmm, rounder_4 |
600 |
DCT_8_INV_ROW_XMM eax+5*16, eax+5*16, tab_i_35_xmm, rounder_5 |
DCT_8_INV_ROW_XMM TMP0+5*16, TMP0+5*16, tab_i_35_xmm, rounder_5 |
601 |
DCT_8_INV_ROW_XMM eax+6*16, eax+6*16, tab_i_26_xmm, rounder_6 |
DCT_8_INV_ROW_XMM TMP0+6*16, TMP0+6*16, tab_i_26_xmm, rounder_6 |
602 |
DCT_8_INV_ROW_XMM eax+7*16, eax+7*16, tab_i_17_xmm, rounder_7 |
DCT_8_INV_ROW_XMM TMP0+7*16, TMP0+7*16, tab_i_17_xmm, rounder_7 |
603 |
|
|
604 |
;; Process the columns (4 at a time) |
;; Process the columns (4 at a time) |
605 |
DCT_8_INV_COL eax+0, eax+0 |
DCT_8_INV_COL TMP0+0, TMP0+0 |
606 |
DCT_8_INV_COL eax+8, eax+8 |
DCT_8_INV_COL TMP0+8, TMP0+8 |
607 |
|
|
608 |
ret |
ret |
609 |
ENDFUNC |
ENDFUNC |