23 |
; * |
; * |
24 |
; *************************************************************************/ |
; *************************************************************************/ |
25 |
|
|
26 |
BITS 32 |
%include "nasm.inc" |
|
|
|
|
%macro cglobal 1 |
|
|
%ifdef PREFIX |
|
|
%ifdef MARK_FUNCS |
|
|
global _%1:function %1.endfunc-%1 |
|
|
%define %1 _%1:function %1.endfunc-%1 |
|
|
%else |
|
|
global _%1 |
|
|
%define %1 _%1 |
|
|
%endif |
|
|
%else |
|
|
%ifdef MARK_FUNCS |
|
|
global %1:function %1.endfunc-%1 |
|
|
%else |
|
|
global %1 |
|
|
%endif |
|
|
%endif |
|
|
%endmacro |
|
27 |
|
|
28 |
;=========================================================================== |
;=========================================================================== |
29 |
; read only data |
; read only data |
30 |
;=========================================================================== |
;=========================================================================== |
31 |
|
|
32 |
%ifdef FORMAT_COFF |
DATA |
|
SECTION .rodata |
|
|
%else |
|
|
SECTION .rodata align=16 |
|
|
%endif |
|
33 |
|
|
34 |
mmx_0x80: |
mmx_0x80: |
35 |
times 8 db 0x80 |
times 8 db 0x80 |
46 |
; Code |
; Code |
47 |
;============================================================================= |
;============================================================================= |
48 |
|
|
49 |
SECTION .text |
SECTION .rotext align=SECTION_ALIGN |
50 |
|
|
51 |
cglobal image_brightness_mmx |
cglobal image_brightness_mmx |
52 |
|
|
55 |
;// image_brightness_mmx |
;// image_brightness_mmx |
56 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
57 |
|
|
58 |
align 16 |
align SECTION_ALIGN |
59 |
image_brightness_mmx: |
image_brightness_mmx: |
60 |
|
|
|
push esi |
|
|
push edi |
|
|
|
|
61 |
movq mm6, [mmx_0x80] |
movq mm6, [mmx_0x80] |
62 |
|
|
63 |
mov eax, [esp+8+20] ; offset |
mov eax, prm5d ; offset |
64 |
movq mm7, [mmx_offset + (eax + 128)*8] ; being lazy |
%ifdef ARCH_IS_X86_64 |
65 |
|
lea r9, [mmx_offset] |
66 |
|
movq mm7, [r9 + (_EAX + 128)*8] ; being lazy |
67 |
|
%else |
68 |
|
movq mm7, [mmx_offset + (_EAX + 128)*8] ; being lazy |
69 |
|
%endif |
70 |
|
|
71 |
|
mov TMP1, prm1 ; Dst |
72 |
|
mov TMP0, prm2 ; stride |
73 |
|
|
74 |
|
push _ESI |
75 |
|
push _EDI |
76 |
|
%ifdef ARCH_IS_X86_64 |
77 |
|
mov _ESI, prm3 |
78 |
|
mov _EDI, prm4 |
79 |
|
%else |
80 |
|
mov _ESI, [_ESP+8+12] ; width |
81 |
|
mov _EDI, [_ESP+8+16] ; height |
82 |
|
%endif |
83 |
|
|
84 |
|
.yloop: |
85 |
|
xor _EAX, _EAX |
86 |
|
|
87 |
mov edx, [esp+8+4] ; Dst |
.xloop: |
88 |
mov ecx, [esp+8+8] ; stride |
movq mm0, [TMP1 + _EAX] |
89 |
mov esi, [esp+8+12] ; width |
movq mm1, [TMP1 + _EAX + 8] ; mm0 = [dst] |
|
mov edi, [esp+8+16] ; height |
|
|
|
|
|
.yloop |
|
|
xor eax, eax |
|
|
|
|
|
.xloop |
|
|
movq mm0, [edx + eax] |
|
|
movq mm1, [edx + eax + 8] ; mm0 = [dst] |
|
90 |
|
|
91 |
paddb mm0, mm6 ; unsigned -> signed domain |
paddb mm0, mm6 ; unsigned -> signed domain |
92 |
paddb mm1, mm6 |
paddb mm1, mm6 |
95 |
psubb mm0, mm6 |
psubb mm0, mm6 |
96 |
psubb mm1, mm6 ; signed -> unsigned domain |
psubb mm1, mm6 ; signed -> unsigned domain |
97 |
|
|
98 |
movq [edx + eax], mm0 |
movq [TMP1 + _EAX], mm0 |
99 |
movq [edx + eax + 8], mm1 ; [dst] = mm0 |
movq [TMP1 + _EAX + 8], mm1 ; [dst] = mm0 |
100 |
|
|
101 |
add eax,16 |
add _EAX,16 |
102 |
cmp eax,esi |
cmp _EAX,_ESI |
103 |
jl .xloop |
jl .xloop |
104 |
|
|
105 |
add edx, ecx ; dst += stride |
add TMP1, TMP0 ; dst += stride |
106 |
sub edi, 1 |
sub _EDI, 1 |
107 |
jg .yloop |
jg .yloop |
108 |
|
|
109 |
pop edi |
pop _EDI |
110 |
pop esi |
pop _ESI |
111 |
|
|
112 |
ret |
ret |
113 |
.endfunc |
ENDFUNC |
114 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
115 |
|
|
116 |
%ifidn __OUTPUT_FORMAT__,elf |
%ifidn __OUTPUT_FORMAT__,elf |