22 |
; * |
; * |
23 |
; *************************************************************************/ |
; *************************************************************************/ |
24 |
|
|
25 |
BITS 32 |
%include "nasm.inc" |
|
|
|
|
%macro cglobal 1 |
|
|
%ifdef PREFIX |
|
|
%ifdef MARK_FUNCS |
|
|
global _%1:function %1.endfunc-%1 |
|
|
%define %1 _%1:function %1.endfunc-%1 |
|
|
%else |
|
|
global _%1 |
|
|
%define %1 _%1 |
|
|
%endif |
|
|
%else |
|
|
%ifdef MARK_FUNCS |
|
|
global %1:function %1.endfunc-%1 |
|
|
%else |
|
|
global %1 |
|
|
%endif |
|
|
%endif |
|
|
%endmacro |
|
26 |
|
|
27 |
;=========================================================================== |
;=========================================================================== |
28 |
; read only data |
; read only data |
29 |
;=========================================================================== |
;=========================================================================== |
30 |
|
|
31 |
%ifdef FORMAT_COFF |
DATA |
|
SECTION .rodata |
|
|
%else |
|
|
SECTION .rodata align=16 |
|
|
%endif |
|
32 |
|
|
33 |
xmm_0x80: |
xmm_0x80: |
34 |
times 16 db 0x80 |
times 16 db 0x80 |
37 |
; Code |
; Code |
38 |
;============================================================================= |
;============================================================================= |
39 |
|
|
40 |
SECTION .text |
SECTION .rotext align=SECTION_ALIGN |
41 |
|
|
42 |
cglobal image_brightness_sse2 |
cglobal image_brightness_sse2 |
43 |
|
|
64 |
mov [%1 + 15], %2 |
mov [%1 + 15], %2 |
65 |
%endmacro |
%endmacro |
66 |
|
|
67 |
ALIGN 16 |
ALIGN SECTION_ALIGN |
68 |
image_brightness_sse2: |
image_brightness_sse2: |
69 |
|
PUSH_XMM6_XMM7 |
70 |
|
%ifdef ARCH_IS_X86_64 |
71 |
|
movsx _EAX, prm5d |
72 |
|
%else |
73 |
|
mov eax, prm5 ; brightness offset value |
74 |
|
%endif |
75 |
|
mov TMP1, prm1 ; Dst |
76 |
|
mov TMP0, prm2 ; stride |
77 |
|
|
78 |
push esi |
push _ESI |
79 |
push edi ; 8 bytes offset for push |
push _EDI ; 8 bytes offset for push |
80 |
sub esp, 32 ; 32 bytes for local data (16bytes will be used, 16bytes more to align correctly mod 16) |
sub _ESP, 32 ; 32 bytes for local data (16bytes will be used, 16bytes more to align correctly mod 16) |
81 |
|
|
82 |
movdqa xmm6, [xmm_0x80] |
movdqa xmm6, [xmm_0x80] |
83 |
|
|
84 |
; Create a offset...offset vector |
; Create a offset...offset vector |
85 |
mov eax, [esp+8+32+20] ; brightness offset value |
mov _ESI, _ESP ; TMP1 will be esp aligned mod 16 |
86 |
mov edx, esp ; edx will be esp aligned mod 16 |
add _ESI, 15 ; TMP1 = esp + 15 |
87 |
add edx, 15 ; edx = esp + 15 |
and _ESI, ~15 ; TMP1 = (esp + 15)&(~15) |
88 |
and edx, ~15 ; edx = (esp + 15)&(~15) |
CREATE_OFFSET_VECTOR _ESI, al |
89 |
CREATE_OFFSET_VECTOR edx, al |
movdqa xmm7, [_ESI] |
90 |
movdqa xmm7, [edx] |
|
91 |
|
%ifdef ARCH_IS_X86_64 |
92 |
mov edx, [esp+8+32+4] ; Dst |
mov _ESI, prm3 |
93 |
mov ecx, [esp+8+32+8] ; stride |
mov _EDI, prm4 |
94 |
mov esi, [esp+8+32+12] ; width |
%else |
95 |
mov edi, [esp+8+32+16] ; height |
mov _ESI, [_ESP+8+32+12] ; width |
96 |
|
mov _EDI, [_ESP+8+32+16] ; height |
97 |
.yloop |
%endif |
98 |
xor eax, eax |
|
99 |
|
.yloop: |
100 |
.xloop |
xor _EAX, _EAX |
101 |
movdqa xmm0, [edx + eax] |
|
102 |
movdqa xmm1, [edx + eax + 16] ; xmm0 = [dst] |
.xloop: |
103 |
|
movdqa xmm0, [TMP1 + _EAX] |
104 |
|
movdqa xmm1, [TMP1 + _EAX + 16] ; xmm0 = [dst] |
105 |
|
|
106 |
paddb xmm0, xmm6 ; unsigned -> signed domain |
paddb xmm0, xmm6 ; unsigned -> signed domain |
107 |
paddb xmm1, xmm6 |
paddb xmm1, xmm6 |
110 |
psubb xmm0, xmm6 |
psubb xmm0, xmm6 |
111 |
psubb xmm1, xmm6 ; signed -> unsigned domain |
psubb xmm1, xmm6 ; signed -> unsigned domain |
112 |
|
|
113 |
movdqa [edx + eax], xmm0 |
movdqa [TMP1 + _EAX], xmm0 |
114 |
movdqa [edx + eax + 16], xmm1 ; [dst] = xmm0 |
movdqa [TMP1 + _EAX + 16], xmm1 ; [dst] = xmm0 |
115 |
|
|
116 |
add eax,32 |
add _EAX,32 |
117 |
cmp eax,esi |
cmp _EAX,_ESI |
118 |
jl .xloop |
jl .xloop |
119 |
|
|
120 |
add edx, ecx ; dst += stride |
add TMP1, TMP0 ; dst += stride |
121 |
sub edi, 1 |
sub _EDI, 1 |
122 |
jg .yloop |
jg .yloop |
123 |
|
|
124 |
add esp, 32 |
add _ESP, 32 |
125 |
pop edi |
pop _EDI |
126 |
pop esi |
pop _ESI |
127 |
|
|
128 |
|
POP_XMM6_XMM7 |
129 |
ret |
ret |
130 |
.endfunc |
ENDFUNC |
131 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
132 |
|
|
133 |
|
%ifidn __OUTPUT_FORMAT__,elf |
134 |
|
section ".note.GNU-stack" noalloc noexec nowrite progbits |
135 |
|
%endif |
136 |
|
|