26 |
|
|
27 |
%macro cglobal 1 |
%macro cglobal 1 |
28 |
%ifdef PREFIX |
%ifdef PREFIX |
29 |
|
%ifdef MARK_FUNCS |
30 |
|
global _%1:function |
31 |
|
%define %1 _%1:function |
32 |
|
%else |
33 |
global _%1 |
global _%1 |
34 |
%define %1 _%1 |
%define %1 _%1 |
35 |
|
%endif |
36 |
|
%else |
37 |
|
%ifdef MARK_FUNCS |
38 |
|
global %1:function |
39 |
%else |
%else |
40 |
global %1 |
global %1 |
41 |
%endif |
%endif |
42 |
|
%endif |
43 |
%endmacro |
%endmacro |
44 |
|
|
|
%macro FILLBYTES 2 |
|
|
|
|
|
mov [%1], %2 |
|
|
mov [%1 + 1], %2 |
|
|
mov [%1 + 2], %2 |
|
|
mov [%1 + 3], %2 |
|
|
mov [%1 + 4], %2 |
|
|
mov [%1 + 5], %2 |
|
|
mov [%1 + 6], %2 |
|
|
mov [%1 + 7], %2 |
|
|
mov [%1 + 8], %2 |
|
|
mov [%1 + 9], %2 |
|
|
mov [%1 + 10], %2 |
|
|
mov [%1 + 11], %2 |
|
|
mov [%1 + 12], %2 |
|
|
mov [%1 + 13], %2 |
|
|
mov [%1 + 14], %2 |
|
|
mov [%1 + 15], %2 |
|
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
45 |
;=========================================================================== |
;=========================================================================== |
46 |
; read only data |
; read only data |
47 |
;=========================================================================== |
;=========================================================================== |
55 |
xmm_0x80: |
xmm_0x80: |
56 |
times 16 db 0x80 |
times 16 db 0x80 |
57 |
|
|
|
offset_xmm: |
|
|
times 16 db 0x00 |
|
|
|
|
58 |
;============================================================================= |
;============================================================================= |
59 |
; Code |
; Code |
60 |
;============================================================================= |
;============================================================================= |
63 |
|
|
64 |
cglobal image_brightness_sse2 |
cglobal image_brightness_sse2 |
65 |
|
|
|
|
|
66 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
67 |
;// image_brightness_sse2 |
;// image_brightness_sse2 |
68 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
69 |
|
|
70 |
align 16 |
%macro CREATE_OFFSET_VECTOR 2 |
71 |
|
mov [%1 + 0], %2 |
72 |
|
mov [%1 + 1], %2 |
73 |
|
mov [%1 + 2], %2 |
74 |
|
mov [%1 + 3], %2 |
75 |
|
mov [%1 + 4], %2 |
76 |
|
mov [%1 + 5], %2 |
77 |
|
mov [%1 + 6], %2 |
78 |
|
mov [%1 + 7], %2 |
79 |
|
mov [%1 + 8], %2 |
80 |
|
mov [%1 + 9], %2 |
81 |
|
mov [%1 + 10], %2 |
82 |
|
mov [%1 + 11], %2 |
83 |
|
mov [%1 + 12], %2 |
84 |
|
mov [%1 + 13], %2 |
85 |
|
mov [%1 + 14], %2 |
86 |
|
mov [%1 + 15], %2 |
87 |
|
%endmacro |
88 |
|
|
89 |
|
ALIGN 16 |
90 |
image_brightness_sse2: |
image_brightness_sse2: |
91 |
|
|
92 |
push esi |
push esi |
93 |
push edi |
push edi ; 8 bytes offset for push |
94 |
|
sub esp, 32 ; 32 bytes for local data (16bytes will be used, 16bytes more to align correctly mod 16) |
95 |
|
|
96 |
movdqa xmm6, [xmm_0x80] |
movdqa xmm6, [xmm_0x80] |
97 |
|
|
98 |
mov eax, [esp+8+20] ; offset |
; Create a offset...offset vector |
99 |
|
mov eax, [esp+8+32+20] ; brightness offset value |
100 |
FILLBYTES offset_xmm, al |
mov edx, esp ; edx will be esp aligned mod 16 |
101 |
|
add edx, 15 ; edx = esp + 15 |
102 |
movdqa xmm7, [offset_xmm] |
and edx, ~15 ; edx = (esp + 15)&(~15) |
103 |
|
CREATE_OFFSET_VECTOR edx, al |
104 |
mov edx, [esp+8+4] ; Dst |
movdqa xmm7, [edx] |
105 |
mov ecx, [esp+8+8] ; stride |
|
106 |
mov esi, [esp+8+12] ; width |
mov edx, [esp+8+32+4] ; Dst |
107 |
mov edi, [esp+8+16] ; height |
mov ecx, [esp+8+32+8] ; stride |
108 |
|
mov esi, [esp+8+32+12] ; width |
109 |
|
mov edi, [esp+8+32+16] ; height |
110 |
|
|
111 |
.yloop |
.yloop |
112 |
xor eax, eax |
xor eax, eax |
133 |
sub edi, 1 |
sub edi, 1 |
134 |
jg .yloop |
jg .yloop |
135 |
|
|
136 |
|
add esp, 32 |
137 |
pop edi |
pop edi |
138 |
pop esi |
pop esi |
139 |
|
|