20 |
; * Ported to nasm by Peter Ross <pross@xvid.org> |
; * Ported to nasm by Peter Ross <pross@xvid.org> |
21 |
; */ |
; */ |
22 |
|
|
23 |
bits 32 |
BITS 32 |
24 |
|
|
25 |
|
;============================================================================= |
26 |
|
; Macros and other preprocessor constants |
27 |
|
;============================================================================= |
28 |
|
|
29 |
;=========================================================================== |
%macro cglobal 1 |
30 |
; data |
%ifdef PREFIX |
31 |
;=========================================================================== |
%ifdef MARK_FUNCS |
32 |
|
global _%1:function |
33 |
%ifdef FORMAT_COFF |
%define %1 _%1:function |
|
section .data |
|
|
align 8 |
|
34 |
%else |
%else |
35 |
section .data data align=8 |
global _%1 |
36 |
|
%define %1 _%1 |
37 |
%endif |
%endif |
38 |
|
%else |
39 |
wm1010 dw 0, 0xffff, 0, 0xffff |
%ifdef MARK_FUNCS |
40 |
d40000 dd 0x40000, 0 |
global %1:function |
41 |
|
%else |
42 |
|
global %1 |
43 |
|
%endif |
44 |
|
%endif |
45 |
|
%endmacro |
46 |
|
|
47 |
%define ROW_SHIFT 11 |
%define ROW_SHIFT 11 |
48 |
%define COL_SHIFT 20 |
%define COL_SHIFT 20 |
55 |
%define C6 8867 ;cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 = 8866.956905 |
%define C6 8867 ;cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 = 8866.956905 |
56 |
%define C7 4520 ;cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 = 4520.335430 |
%define C7 4520 ;cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 = 4520.335430 |
57 |
|
|
58 |
coeffs |
;=========================================================================== |
59 |
|
; Data (Read Only) |
60 |
|
;=========================================================================== |
61 |
|
|
62 |
|
%ifdef FORMAT_COFF |
63 |
|
SECTION .rodata |
64 |
|
%else |
65 |
|
SECTION .rodata align=16 |
66 |
|
%endif |
67 |
|
|
68 |
|
;----------------------------------------------------------------------------- |
69 |
|
; Trigonometric Tables |
70 |
|
;----------------------------------------------------------------------------- |
71 |
|
|
72 |
|
ALIGN 16 |
73 |
|
wm1010: |
74 |
|
dw 0, 0xffff, 0, 0xffff |
75 |
|
|
76 |
|
ALIGN 16 |
77 |
|
d40000: |
78 |
|
dd 0x40000, 0 |
79 |
|
|
80 |
|
ALIGN 16 |
81 |
|
coeffs: |
82 |
dw 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0, ; 0 |
dw 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0, ; 0 |
83 |
dw 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0, ; 8 |
dw 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0, ; 8 |
84 |
|
|
102 |
|
|
103 |
|
|
104 |
;=========================================================================== |
;=========================================================================== |
105 |
; text |
; Helper macros |
106 |
;=========================================================================== |
;=========================================================================== |
107 |
section .text |
|
108 |
|
;--------------------------------------------------------------------------- |
109 |
|
; DC_COND_IDCT |
110 |
|
;--------------------------------------------------------------------------- |
111 |
|
|
112 |
%macro DC_COND_IDCT 8 |
%macro DC_COND_IDCT 8 |
113 |
%define src0 %1 |
%define src0 %1 |
214 |
%undef shift |
%undef shift |
215 |
%endmacro |
%endmacro |
216 |
|
|
217 |
|
;--------------------------------------------------------------------------- |
218 |
|
; Z_COND_IDCT |
219 |
|
;--------------------------------------------------------------------------- |
220 |
|
|
221 |
%macro Z_COND_IDCT 9 |
%macro Z_COND_IDCT 9 |
222 |
%define src0 %1 |
%define src0 %1 |
313 |
%undef bt |
%undef bt |
314 |
%endmacro |
%endmacro |
315 |
|
|
316 |
|
;--------------------------------------------------------------------------- |
317 |
|
; IDCT0 |
318 |
|
;--------------------------------------------------------------------------- |
319 |
|
|
320 |
%macro IDCT0 8 |
%macro IDCT0 8 |
321 |
%define src0 %1 |
%define src0 %1 |
410 |
%undef shift |
%undef shift |
411 |
%endmacro |
%endmacro |
412 |
|
|
413 |
|
;--------------------------------------------------------------------------- |
414 |
|
; IDCT4 |
415 |
|
;--------------------------------------------------------------------------- |
416 |
|
|
417 |
%macro IDCT4 8 |
%macro IDCT4 8 |
418 |
%define src0 %1 |
%define src0 %1 |
495 |
%undef shift |
%undef shift |
496 |
%endmacro |
%endmacro |
497 |
|
|
498 |
|
;--------------------------------------------------------------------------- |
499 |
|
; IDCT6 |
500 |
|
;--------------------------------------------------------------------------- |
501 |
|
|
502 |
%macro IDCT6 8 |
%macro IDCT6 8 |
503 |
%define src0 %1 |
%define src0 %1 |
571 |
%undef shift |
%undef shift |
572 |
%endmacro |
%endmacro |
573 |
|
|
574 |
|
;--------------------------------------------------------------------------- |
575 |
|
; IDCT2 |
576 |
|
;--------------------------------------------------------------------------- |
577 |
|
|
578 |
%macro IDCT2 8 |
%macro IDCT2 8 |
579 |
%define src0 %1 |
%define src0 %1 |
659 |
%undef shift |
%undef shift |
660 |
%endmacro |
%endmacro |
661 |
|
|
662 |
|
;--------------------------------------------------------------------------- |
663 |
|
; IDCT3 |
664 |
|
;--------------------------------------------------------------------------- |
665 |
|
|
666 |
%macro IDCT3 8 |
%macro IDCT3 8 |
667 |
%define src0 %1 |
%define src0 %1 |
735 |
%undef shift |
%undef shift |
736 |
%endmacro |
%endmacro |
737 |
|
|
738 |
|
;--------------------------------------------------------------------------- |
739 |
|
; IDCT5 |
740 |
|
;--------------------------------------------------------------------------- |
741 |
|
|
742 |
%macro IDCT5 8 |
%macro IDCT5 8 |
743 |
%define src0 %1 |
%define src0 %1 |
813 |
%undef shift |
%undef shift |
814 |
%endmacro |
%endmacro |
815 |
|
|
816 |
|
;--------------------------------------------------------------------------- |
817 |
|
; IDCT1 |
818 |
|
;--------------------------------------------------------------------------- |
819 |
|
|
820 |
%macro IDCT1 8 |
%macro IDCT1 8 |
821 |
%define src0 %1 |
%define src0 %1 |
898 |
%undef shift |
%undef shift |
899 |
%endmacro |
%endmacro |
900 |
|
|
901 |
|
;--------------------------------------------------------------------------- |
902 |
|
; IDCT7 |
903 |
|
;--------------------------------------------------------------------------- |
904 |
|
|
905 |
%macro IDCT7 8 |
%macro IDCT7 8 |
906 |
%define src0 %1 |
%define src0 %1 |
950 |
%undef shift |
%undef shift |
951 |
%endmacro |
%endmacro |
952 |
|
|
953 |
|
;--------------------------------------------------------------------------- |
954 |
|
; Permutation helpers |
955 |
%macro cglobal 1 |
;--------------------------------------------------------------------------- |
|
%ifdef PREFIX |
|
|
global _%1 |
|
|
%define %1 _%1 |
|
|
%else |
|
|
global %1 |
|
|
%endif |
|
|
%endmacro |
|
|
|
|
|
|
|
|
; void simple_idct_mmx_P(int16_t * const block); |
|
|
; expects input data to be permutated |
|
|
; |
|
|
align 16 |
|
|
cglobal simple_idct_mmx_P |
|
|
simple_idct_mmx_P |
|
|
sub esp, 128 |
|
|
mov edx, [esp+128+4] |
|
|
|
|
|
; src0, src4, src1, src5, dst, rndop, rndarg, shift, bt |
|
|
|
|
|
DC_COND_IDCT edx+0, edx+8, edx+16, edx+24, esp, paddd, [coeffs+8], 11 |
|
|
Z_COND_IDCT edx+32, edx+40, edx+48, edx+56, esp+32, paddd, [coeffs], 11, .four |
|
|
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .two |
|
|
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .one |
|
|
IDCT0 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
|
|
IDCT0 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
|
|
IDCT0 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
|
|
IDCT0 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
|
|
jmp .ret |
|
|
|
|
|
align 16 |
|
|
.four |
|
|
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .six |
|
|
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .five |
|
|
IDCT4 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
|
|
IDCT4 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
|
|
IDCT4 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
|
|
IDCT4 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
|
|
jmp .ret |
|
|
|
|
|
align 16 |
|
|
.six |
|
|
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .seven |
|
|
IDCT6 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
|
|
IDCT6 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
|
|
IDCT6 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
|
|
IDCT6 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
|
|
jmp .ret |
|
|
|
|
|
align 16 |
|
|
.two |
|
|
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .three |
|
|
IDCT2 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
|
|
IDCT2 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
|
|
IDCT2 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
|
|
IDCT2 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
|
|
jmp .ret |
|
|
|
|
|
align 16 |
|
|
.three |
|
|
IDCT3 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
|
|
IDCT3 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
|
|
IDCT3 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
|
|
IDCT3 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
|
|
jmp .ret |
|
|
|
|
|
align 16 |
|
|
.five |
|
|
IDCT5 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
|
|
; IDCT5 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
|
|
IDCT5 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
|
|
; IDCT5 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
|
|
jmp .ret |
|
|
|
|
|
align 16 |
|
|
.one |
|
|
IDCT1 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
|
|
IDCT1 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
|
|
IDCT1 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
|
|
IDCT1 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
|
|
jmp .ret |
|
|
|
|
|
align 16 |
|
|
.seven |
|
|
IDCT7 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
|
|
; IDCT7 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
|
|
IDCT7 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
|
|
; IDCT7 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
|
|
|
|
|
.ret |
|
|
add esp, 128 |
|
|
ret |
|
|
|
|
|
|
|
|
;------------------ again with permuted parms -------- |
|
|
; |
|
|
; simple_idct_mmx is the same function as simple_idct_mmx_P above except that on entry it will |
|
|
; do a fast in-line and in-place permutation on the iDCT parm list. This means that same parm list |
|
|
; will also not have to be copied on the way out. - trbarry 6/2003 |
|
956 |
|
|
957 |
%macro XLODA 2 |
%macro XLODA 2 |
958 |
mov bx, [srcP+2*%2] ; get src contents |
mov bx, [srcP+2*%2] ; get src contents |
978 |
mov [srcP+2*%1], ax ; store dest val |
mov [srcP+2*%1], ax ; store dest val |
979 |
%endmacro |
%endmacro |
980 |
|
|
981 |
|
;--------------------------------------------------------------------------- |
982 |
|
; Permutation macro |
983 |
|
;--------------------------------------------------------------------------- |
984 |
|
|
985 |
%macro PERMUTEP 1 |
%macro PERMUTEP 1 |
986 |
%define srcP %1 |
%define srcP %1 |
987 |
push ebx |
push ebx |
1067 |
%undef srcP |
%undef srcP |
1068 |
%endmacro |
%endmacro |
1069 |
|
|
1070 |
; void simple_idct_mmx(int16_t * const block); |
;============================================================================= |
1071 |
align 16 |
; Code |
1072 |
|
;============================================================================= |
1073 |
|
|
1074 |
|
SECTION .text |
1075 |
|
|
1076 |
|
cglobal simple_idct_mmx_P |
1077 |
cglobal simple_idct_mmx |
cglobal simple_idct_mmx |
1078 |
|
|
1079 |
simple_idct_mmx |
;----------------------------------------------------------------------------- |
1080 |
|
; void simple_idct_mmx_P(int16_t * const block) |
1081 |
|
; expects input data to be permutated |
1082 |
|
;----------------------------------------------------------------------------- |
1083 |
|
|
1084 |
|
ALIGN 16 |
1085 |
|
simple_idct_mmx_P: |
1086 |
sub esp, 128 |
sub esp, 128 |
1087 |
mov edx, [esp+128+4] |
mov edx, [esp+128+4] |
|
PERMUTEP edx ; permute parm list in place |
|
1088 |
|
|
1089 |
; src0, src4, src1, src5, dst, rndop, rndarg, shift, bt |
; src0, src4, src1, src5, dst, rndop, rndarg, shift, bt |
1090 |
|
DC_COND_IDCT edx+0, edx+8, edx+16, edx+24, esp, paddd, [coeffs+8], 11 |
1091 |
|
Z_COND_IDCT edx+32, edx+40, edx+48, edx+56, esp+32, paddd, [coeffs], 11, .four |
1092 |
|
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .two |
1093 |
|
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .one |
1094 |
|
IDCT0 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1095 |
|
IDCT0 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1096 |
|
IDCT0 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1097 |
|
IDCT0 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1098 |
|
jmp .ret |
1099 |
|
|
1100 |
|
ALIGN 16 |
1101 |
|
.four |
1102 |
|
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .six |
1103 |
|
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .five |
1104 |
|
IDCT4 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1105 |
|
IDCT4 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1106 |
|
IDCT4 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1107 |
|
IDCT4 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1108 |
|
jmp .ret |
1109 |
|
|
1110 |
|
ALIGN 16 |
1111 |
|
.six |
1112 |
|
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .seven |
1113 |
|
IDCT6 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1114 |
|
IDCT6 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1115 |
|
IDCT6 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1116 |
|
IDCT6 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1117 |
|
jmp .ret |
1118 |
|
|
1119 |
|
ALIGN 16 |
1120 |
|
.two |
1121 |
|
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .three |
1122 |
|
IDCT2 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1123 |
|
IDCT2 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1124 |
|
IDCT2 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1125 |
|
IDCT2 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1126 |
|
jmp .ret |
1127 |
|
|
1128 |
|
ALIGN 16 |
1129 |
|
.three |
1130 |
|
IDCT3 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1131 |
|
IDCT3 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1132 |
|
IDCT3 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1133 |
|
IDCT3 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1134 |
|
jmp .ret |
1135 |
|
|
1136 |
|
ALIGN 16 |
1137 |
|
.five |
1138 |
|
IDCT5 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1139 |
|
; IDCT5 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1140 |
|
IDCT5 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1141 |
|
; IDCT5 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1142 |
|
jmp .ret |
1143 |
|
|
1144 |
|
ALIGN 16 |
1145 |
|
.one |
1146 |
|
IDCT1 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1147 |
|
IDCT1 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1148 |
|
IDCT1 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1149 |
|
IDCT1 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1150 |
|
jmp .ret |
1151 |
|
|
1152 |
|
ALIGN 16 |
1153 |
|
.seven |
1154 |
|
IDCT7 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1155 |
|
; IDCT7 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1156 |
|
IDCT7 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1157 |
|
; IDCT7 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1158 |
|
|
1159 |
|
.ret |
1160 |
|
add esp, 128 |
1161 |
|
|
1162 |
|
ret |
1163 |
|
|
1164 |
|
|
1165 |
|
;----------------------------------------------------------------------------- |
1166 |
|
; void simple_idct_mmx(int16_t * const block) |
1167 |
|
; |
1168 |
|
; simple_idct_mmx is the same function as simple_idct_mmx_P above except that |
1169 |
|
; on entry it will do a fast in-line and in-place permutation on the iDCT parm |
1170 |
|
; list. This means that same parm list will also not have to be copied on the |
1171 |
|
; way out. - trbarry 6/2003 |
1172 |
|
;----------------------------------------------------------------------------- |
1173 |
|
|
1174 |
|
ALIGN 16 |
1175 |
|
simple_idct_mmx: |
1176 |
|
sub esp, 128 |
1177 |
|
mov edx, [esp+128+4] |
1178 |
|
PERMUTEP edx ; permute parm list in place |
1179 |
|
|
1180 |
|
; src0, src4, src1, src5, dst, rndop, rndarg, shift, bt |
1181 |
DC_COND_IDCT edx+0, edx+8, edx+16, edx+24, esp, paddd, [coeffs+8], 11 |
DC_COND_IDCT edx+0, edx+8, edx+16, edx+24, esp, paddd, [coeffs+8], 11 |
1182 |
Z_COND_IDCT edx+32, edx+40, edx+48, edx+56, esp+32, paddd, [coeffs], 11, .fourP |
Z_COND_IDCT edx+32, edx+40, edx+48, edx+56, esp+32, paddd, [coeffs], 11, .fourP |
1183 |
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .twoP |
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .twoP |
1188 |
IDCT0 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
IDCT0 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1189 |
jmp .retP |
jmp .retP |
1190 |
|
|
1191 |
align 16 |
ALIGN 16 |
1192 |
.fourP |
.fourP |
1193 |
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .sixP |
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .sixP |
1194 |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .fiveP |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .fiveP |
1198 |
IDCT4 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
IDCT4 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1199 |
jmp .retP |
jmp .retP |
1200 |
|
|
1201 |
align 16 |
ALIGN 16 |
1202 |
.sixP |
.sixP |
1203 |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .sevenP |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .sevenP |
1204 |
IDCT6 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT6 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1207 |
IDCT6 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
IDCT6 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1208 |
jmp .retP |
jmp .retP |
1209 |
|
|
1210 |
align 16 |
ALIGN 16 |
1211 |
.twoP |
.twoP |
1212 |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .threeP |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .threeP |
1213 |
IDCT2 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT2 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1216 |
IDCT2 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
IDCT2 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1217 |
jmp .retP |
jmp .retP |
1218 |
|
|
1219 |
align 16 |
ALIGN 16 |
1220 |
.threeP |
.threeP |
1221 |
IDCT3 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT3 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1222 |
IDCT3 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
IDCT3 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1224 |
IDCT3 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
IDCT3 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1225 |
jmp .retP |
jmp .retP |
1226 |
|
|
1227 |
align 16 |
ALIGN 16 |
1228 |
.fiveP |
.fiveP |
1229 |
IDCT5 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT5 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1230 |
; IDCT5 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
; IDCT5 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1232 |
; IDCT5 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
; IDCT5 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1233 |
jmp .retP |
jmp .retP |
1234 |
|
|
1235 |
align 16 |
ALIGN 16 |
1236 |
.oneP |
.oneP |
1237 |
IDCT1 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT1 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1238 |
IDCT1 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
IDCT1 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1240 |
IDCT1 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
IDCT1 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1241 |
jmp .retP |
jmp .retP |
1242 |
|
|
1243 |
align 16 |
ALIGN 16 |
1244 |
.sevenP |
.sevenP |
1245 |
IDCT7 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT7 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1246 |
; IDCT7 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
; IDCT7 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1249 |
|
|
1250 |
.retP |
.retP |
1251 |
add esp, 128 |
add esp, 128 |
|
ret |
|
|
|
|
1252 |
|
|
1253 |
|
ret |