29 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
30 |
global _%1:function %1.endfunc-%1 |
global _%1:function %1.endfunc-%1 |
31 |
%define %1 _%1:function %1.endfunc-%1 |
%define %1 _%1:function %1.endfunc-%1 |
32 |
|
%define ENDFUNC .endfunc |
33 |
%else |
%else |
34 |
global _%1 |
global _%1 |
35 |
%define %1 _%1 |
%define %1 _%1 |
36 |
|
%define ENDFUNC |
37 |
%endif |
%endif |
38 |
%else |
%else |
39 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
40 |
global %1:function %1.endfunc-%1 |
global %1:function %1.endfunc-%1 |
41 |
|
%define ENDFUNC .endfunc |
42 |
%else |
%else |
43 |
global %1 |
global %1 |
44 |
|
%define ENDFUNC |
45 |
%endif |
%endif |
46 |
%endif |
%endif |
47 |
%endmacro |
%endmacro |
116 |
%macro CONSIM_WRITEOUT 3 |
%macro CONSIM_WRITEOUT 3 |
117 |
mov eax,[esp + 16];lumo |
mov eax,[esp + 16];lumo |
118 |
mul eax; lumo^2 |
mul eax; lumo^2 |
119 |
|
add eax, 32 |
120 |
shr eax,6; 64*lum0^2 |
shr eax,6; 64*lum0^2 |
121 |
movd ecx,%1 |
movd ecx,%1 |
122 |
sub ecx,eax |
sub ecx,eax |
126 |
|
|
127 |
mov eax,[esp + 20];lumc |
mov eax,[esp + 20];lumc |
128 |
mul eax; lumc^2 |
mul eax; lumc^2 |
129 |
|
add eax, 32 |
130 |
shr eax,6; 64*lumc^2 |
shr eax,6; 64*lumc^2 |
131 |
movd ecx,%2 |
movd ecx,%2 |
132 |
sub ecx,eax |
sub ecx,eax |
136 |
|
|
137 |
mov eax,[esp + 16];lumo |
mov eax,[esp + 16];lumo |
138 |
mul dword [esp + 20]; lumo*lumc, should fit in eax |
mul dword [esp + 20]; lumo*lumc, should fit in eax |
139 |
|
add eax, 32 |
140 |
shr eax,6; 64*lumo*lumc |
shr eax,6; 64*lumo*lumc |
141 |
movd ecx,%3 |
movd ecx,%3 |
142 |
sub ecx,eax |
sub ecx,eax |
175 |
|
|
176 |
movd eax,mm1 |
movd eax,mm1 |
177 |
ret |
ret |
178 |
.endfunc |
ENDFUNC |
179 |
|
|
180 |
ALIGN 16 |
ALIGN 16 |
181 |
consim_sse2: |
consim_sse2: |
220 |
CONSIM_1x8_SSE2 |
CONSIM_1x8_SSE2 |
221 |
|
|
222 |
;accumulate xmm5-7 |
;accumulate xmm5-7 |
223 |
pshufd xmm0, xmm5, 0EH |
pshufd xmm0, xmm5, 0x0E |
224 |
paddd xmm5, xmm0 |
paddd xmm5, xmm0 |
225 |
pshufd xmm0, xmm5, 01H |
pshufd xmm0, xmm5, 0x01 |
226 |
paddd xmm5, xmm0 |
paddd xmm5, xmm0 |
227 |
|
|
228 |
pshufd xmm1, xmm6, 0EH |
pshufd xmm1, xmm6, 0x0E |
229 |
paddd xmm6, xmm1 |
paddd xmm6, xmm1 |
230 |
pshufd xmm1, xmm6, 01H |
pshufd xmm1, xmm6, 0x01 |
231 |
paddd xmm6, xmm1 |
paddd xmm6, xmm1 |
232 |
|
|
233 |
pshufd xmm2, xmm7, 0EH |
pshufd xmm2, xmm7, 0x0E |
234 |
paddd xmm7, xmm2 |
paddd xmm7, xmm2 |
235 |
pshufd xmm2, xmm7, 01H |
pshufd xmm2, xmm7, 0x01 |
236 |
paddd xmm7, xmm2 |
paddd xmm7, xmm2 |
237 |
|
|
238 |
CONSIM_WRITEOUT xmm5,xmm6,xmm7 |
CONSIM_WRITEOUT xmm5,xmm6,xmm7 |
239 |
ret |
ret |
240 |
.endfunc |
ENDFUNC |
241 |
|
|
242 |
|
|
243 |
|
|
288 |
|
|
289 |
CONSIM_WRITEOUT mm5,mm6,mm7 |
CONSIM_WRITEOUT mm5,mm6,mm7 |
290 |
ret |
ret |
291 |
.endfunc |
ENDFUNC |
292 |
|
|
293 |
|
%ifidn __OUTPUT_FORMAT__,elf |
294 |
|
section ".note.GNU-stack" noalloc noexec nowrite progbits |
295 |
|
%endif |