--- simple_idct_mmx.asm 2003/11/03 15:51:50 1.2.2.4 +++ simple_idct_mmx.asm 2008/11/26 01:04:34 1.10 @@ -28,10 +28,23 @@ %macro cglobal 1 %ifdef PREFIX - global _%1 - %define %1 _%1 + %ifdef MARK_FUNCS + global _%1:function %1.endfunc-%1 + %define %1 _%1:function %1.endfunc-%1 + %define ENDFUNC .endfunc + %else + global _%1 + %define %1 _%1 + %define ENDFUNC + %endif %else - global %1 + %ifdef MARK_FUNCS + global %1:function %1.endfunc-%1 + %define ENDFUNC .endfunc + %else + global %1 + %define ENDFUNC + %endif %endif %endmacro @@ -51,9 +64,9 @@ ;=========================================================================== %ifdef FORMAT_COFF -SECTION .rodata data +SECTION .rodata %else -SECTION .rodata data align=16 +SECTION .rodata align=16 %endif ;----------------------------------------------------------------------------- @@ -185,7 +198,7 @@ packssdw mm4,mm0 ; A2-B2 a2-b2 A3-B3 a3-b3 movq [ dst + 16],mm4 jmp short .skip2 -.skip1 +.skip1: pslld mm0,16 paddd mm0,[d40000] psrad mm0,13 @@ -194,7 +207,7 @@ movq [ dst + 8],mm0 movq [ dst + 16],mm0 movq [ dst + 24],mm0 -.skip2 +.skip2: %undef src0 %undef src4 %undef src1 @@ -1089,7 +1102,7 @@ jmp .ret ALIGN 16 -.four +.four: Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .six Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .five IDCT4 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 @@ -1099,7 +1112,7 @@ jmp .ret ALIGN 16 -.six +.six: Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .seven IDCT6 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 IDCT6 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 @@ -1108,7 +1121,7 @@ jmp .ret ALIGN 16 -.two +.two: Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .three IDCT2 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 IDCT2 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 @@ -1117,7 +1130,7 @@ jmp .ret ALIGN 16 -.three +.three: IDCT3 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 IDCT3 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 IDCT3 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 @@ -1125,7 +1138,7 @@ jmp .ret ALIGN 16 -.five +.five: IDCT5 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 ; IDCT5 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 IDCT5 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 @@ -1133,7 +1146,7 @@ jmp .ret ALIGN 16 -.one +.one: IDCT1 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 IDCT1 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 IDCT1 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 @@ -1141,16 +1154,17 @@ jmp .ret ALIGN 16 -.seven +.seven: IDCT7 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 ; IDCT7 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 IDCT7 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 ; IDCT7 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 -.ret +.ret: add esp, 128 ret +ENDFUNC ;----------------------------------------------------------------------------- @@ -1180,7 +1194,7 @@ jmp .retP ALIGN 16 -.fourP +.fourP: Z_COND_IDCT edx+64, edx+72, edx+80, edx+88, esp+64, paddd, [coeffs], 11, .sixP Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .fiveP IDCT4 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 @@ -1190,7 +1204,7 @@ jmp .retP ALIGN 16 -.sixP +.sixP: Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .sevenP IDCT6 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 IDCT6 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 @@ -1199,7 +1213,7 @@ jmp .retP ALIGN 16 -.twoP +.twoP: Z_COND_IDCT edx+96, edx+104,edx+112,edx+120,esp+96, paddd, [coeffs], 11, .threeP IDCT2 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 IDCT2 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 @@ -1208,7 +1222,7 @@ jmp .retP ALIGN 16 -.threeP +.threeP: IDCT3 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 IDCT3 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 IDCT3 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 @@ -1216,7 +1230,7 @@ jmp .retP ALIGN 16 -.fiveP +.fiveP: IDCT5 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 ; IDCT5 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 IDCT5 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 @@ -1224,7 +1238,7 @@ jmp .retP ALIGN 16 -.oneP +.oneP: IDCT1 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 IDCT1 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 IDCT1 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 @@ -1232,13 +1246,20 @@ jmp .retP ALIGN 16 -.sevenP +.sevenP: IDCT7 esp, esp+64, esp+32, esp+96, edx, nop, 0, 20 ; IDCT7 esp+8, esp+72, esp+40, esp+104,edx+4, nop, 0, 20 IDCT7 esp+16, esp+80, esp+48, esp+112,edx+8, nop, 0, 20 ; IDCT7 esp+24, esp+88, esp+56, esp+120,edx+12, nop, 0, 20 -.retP +.retP: add esp, 128 ret +ENDFUNC + + +%ifidn __OUTPUT_FORMAT__,elf +section ".note.GNU-stack" noalloc noexec nowrite progbits +%endif +