28 |
|
|
29 |
%macro cglobal 1 |
%macro cglobal 1 |
30 |
%ifdef PREFIX |
%ifdef PREFIX |
31 |
|
%ifdef MARK_FUNCS |
32 |
|
global _%1:function %1.endfunc-%1 |
33 |
|
%define %1 _%1:function %1.endfunc-%1 |
34 |
|
%define ENDFUNC .endfunc |
35 |
|
%else |
36 |
global _%1 |
global _%1 |
37 |
%define %1 _%1 |
%define %1 _%1 |
38 |
|
%define ENDFUNC |
39 |
|
%endif |
40 |
|
%else |
41 |
|
%ifdef MARK_FUNCS |
42 |
|
global %1:function %1.endfunc-%1 |
43 |
|
%define ENDFUNC .endfunc |
44 |
%else |
%else |
45 |
global %1 |
global %1 |
46 |
|
%define ENDFUNC |
47 |
|
%endif |
48 |
%endif |
%endif |
49 |
%endmacro |
%endmacro |
50 |
|
|
64 |
;=========================================================================== |
;=========================================================================== |
65 |
|
|
66 |
%ifdef FORMAT_COFF |
%ifdef FORMAT_COFF |
67 |
SECTION .rodata data |
SECTION .rodata |
68 |
%else |
%else |
69 |
SECTION .rodata data align=16 |
SECTION .rodata align=16 |
70 |
%endif |
%endif |
71 |
|
|
72 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
198 |
packssdw mm4,mm0 ; A2-B2 a2-b2 A3-B3 a3-b3 |
packssdw mm4,mm0 ; A2-B2 a2-b2 A3-B3 a3-b3 |
199 |
movq [ dst + 16],mm4 |
movq [ dst + 16],mm4 |
200 |
jmp short .skip2 |
jmp short .skip2 |
201 |
.skip1 |
.skip1: |
202 |
pslld mm0,16 |
pslld mm0,16 |
203 |
paddd mm0,[d40000] |
paddd mm0,[d40000] |
204 |
psrad mm0,13 |
psrad mm0,13 |
207 |
movq [ dst + 8],mm0 |
movq [ dst + 8],mm0 |
208 |
movq [ dst + 16],mm0 |
movq [ dst + 16],mm0 |
209 |
movq [ dst + 24],mm0 |
movq [ dst + 24],mm0 |
210 |
.skip2 |
.skip2: |
211 |
%undef src0 |
%undef src0 |
212 |
%undef src4 |
%undef src4 |
213 |
%undef src1 |
%undef src1 |
1102 |
jmp .ret |
jmp .ret |
1103 |
|
|
1104 |
ALIGN 16 |
ALIGN 16 |
1105 |
.four |
.four: |
1106 |
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .six |
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .six |
1107 |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .five |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .five |
1108 |
IDCT4 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT4 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1112 |
jmp .ret |
jmp .ret |
1113 |
|
|
1114 |
ALIGN 16 |
ALIGN 16 |
1115 |
.six |
.six: |
1116 |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .seven |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .seven |
1117 |
IDCT6 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT6 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1118 |
IDCT6 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
IDCT6 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1121 |
jmp .ret |
jmp .ret |
1122 |
|
|
1123 |
ALIGN 16 |
ALIGN 16 |
1124 |
.two |
.two: |
1125 |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .three |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .three |
1126 |
IDCT2 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT2 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1127 |
IDCT2 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
IDCT2 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1130 |
jmp .ret |
jmp .ret |
1131 |
|
|
1132 |
ALIGN 16 |
ALIGN 16 |
1133 |
.three |
.three: |
1134 |
IDCT3 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT3 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1135 |
IDCT3 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
IDCT3 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1136 |
IDCT3 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
IDCT3 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1138 |
jmp .ret |
jmp .ret |
1139 |
|
|
1140 |
ALIGN 16 |
ALIGN 16 |
1141 |
.five |
.five: |
1142 |
IDCT5 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT5 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1143 |
; IDCT5 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
; IDCT5 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1144 |
IDCT5 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
IDCT5 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1146 |
jmp .ret |
jmp .ret |
1147 |
|
|
1148 |
ALIGN 16 |
ALIGN 16 |
1149 |
.one |
.one: |
1150 |
IDCT1 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT1 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1151 |
IDCT1 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
IDCT1 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1152 |
IDCT1 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
IDCT1 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1154 |
jmp .ret |
jmp .ret |
1155 |
|
|
1156 |
ALIGN 16 |
ALIGN 16 |
1157 |
.seven |
.seven: |
1158 |
IDCT7 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT7 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1159 |
; IDCT7 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
; IDCT7 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1160 |
IDCT7 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
IDCT7 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1161 |
; IDCT7 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
; IDCT7 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1162 |
|
|
1163 |
.ret |
.ret: |
1164 |
add esp, 128 |
add esp, 128 |
1165 |
|
|
1166 |
ret |
ret |
1167 |
|
ENDFUNC |
1168 |
|
|
1169 |
|
|
1170 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
1194 |
jmp .retP |
jmp .retP |
1195 |
|
|
1196 |
ALIGN 16 |
ALIGN 16 |
1197 |
.fourP |
.fourP: |
1198 |
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .sixP |
Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .sixP |
1199 |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .fiveP |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .fiveP |
1200 |
IDCT4 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT4 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1204 |
jmp .retP |
jmp .retP |
1205 |
|
|
1206 |
ALIGN 16 |
ALIGN 16 |
1207 |
.sixP |
.sixP: |
1208 |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .sevenP |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .sevenP |
1209 |
IDCT6 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT6 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1210 |
IDCT6 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
IDCT6 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1213 |
jmp .retP |
jmp .retP |
1214 |
|
|
1215 |
ALIGN 16 |
ALIGN 16 |
1216 |
.twoP |
.twoP: |
1217 |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .threeP |
Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .threeP |
1218 |
IDCT2 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT2 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1219 |
IDCT2 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
IDCT2 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1222 |
jmp .retP |
jmp .retP |
1223 |
|
|
1224 |
ALIGN 16 |
ALIGN 16 |
1225 |
.threeP |
.threeP: |
1226 |
IDCT3 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT3 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1227 |
IDCT3 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
IDCT3 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1228 |
IDCT3 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
IDCT3 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1230 |
jmp .retP |
jmp .retP |
1231 |
|
|
1232 |
ALIGN 16 |
ALIGN 16 |
1233 |
.fiveP |
.fiveP: |
1234 |
IDCT5 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT5 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1235 |
; IDCT5 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
; IDCT5 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1236 |
IDCT5 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
IDCT5 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1238 |
jmp .retP |
jmp .retP |
1239 |
|
|
1240 |
ALIGN 16 |
ALIGN 16 |
1241 |
.oneP |
.oneP: |
1242 |
IDCT1 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT1 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1243 |
IDCT1 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
IDCT1 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1244 |
IDCT1 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
IDCT1 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1246 |
jmp .retP |
jmp .retP |
1247 |
|
|
1248 |
ALIGN 16 |
ALIGN 16 |
1249 |
.sevenP |
.sevenP: |
1250 |
IDCT7 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
IDCT7 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 |
1251 |
; IDCT7 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
; IDCT7 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 |
1252 |
IDCT7 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
IDCT7 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 |
1253 |
; IDCT7 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
; IDCT7 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 |
1254 |
|
|
1255 |
.retP |
.retP: |
1256 |
add esp, 128 |
add esp, 128 |
1257 |
|
|
1258 |
ret |
ret |
1259 |
|
ENDFUNC |
1260 |
|
|
1261 |
|
|
1262 |
|
%ifidn __OUTPUT_FORMAT__,elf |
1263 |
|
section ".note.GNU-stack" noalloc noexec nowrite progbits |
1264 |
|
%endif |
1265 |
|
|