78 |
paddusw xmm6,xmm1 |
paddusw xmm6,xmm1 |
79 |
%endmacro |
%endmacro |
80 |
|
|
81 |
align 16 |
ALIGN 16 |
82 |
sad16_sse2: |
sad16_sse2: |
83 |
mov eax, [esp+ 4] ; cur (assumed aligned) |
mov eax, [esp+ 4] ; cur (assumed aligned) |
84 |
mov edx, [esp+ 8] ; ref |
mov edx, [esp+ 8] ; ref |
116 |
%endmacro |
%endmacro |
117 |
|
|
118 |
|
|
119 |
align 16 |
ALIGN 16 |
120 |
dev16_sse2: |
dev16_sse2: |
121 |
mov eax, [esp+ 4] ; src |
mov eax, [esp+ 4] ; src |
122 |
mov ecx, [esp+ 8] ; stride |
mov ecx, [esp+ 8] ; stride |
136 |
|
|
137 |
mov eax, [esp+ 4] ; src again |
mov eax, [esp+ 4] ; src again |
138 |
|
|
139 |
pshufd xmm7, xmm6, 0010b |
pshufd xmm7, xmm6, 10b |
140 |
paddusw xmm7, xmm6 |
paddusw xmm7, xmm6 |
141 |
pxor xmm6, xmm6 ; zero accum |
pxor xmm6, xmm6 ; zero accum |
142 |
psrlw xmm7, 8 ; => Mean |
psrlw xmm7, 8 ; => Mean |
143 |
pshuflw xmm7, xmm7, 0 ; replicate Mean |
pshuflw xmm7, xmm7, 0 ; replicate Mean |
144 |
packuswb xmm7,xmm7 |
packuswb xmm7,xmm7 |
145 |
|
pshufd xmm7, xmm7, 00000000b |
146 |
|
|
147 |
MEAN_16x16_SSE2 |
MEAN_16x16_SSE2 |
148 |
MEAN_16x16_SSE2 |
MEAN_16x16_SSE2 |
154 |
MEAN_16x16_SSE2 |
MEAN_16x16_SSE2 |
155 |
MEAN_16x16_SSE2 |
MEAN_16x16_SSE2 |
156 |
|
|
157 |
pshufd xmm5, xmm6, 0010b |
pshufd xmm7, xmm6, 10b |
158 |
paddusw xmm6, xmm5 |
paddusw xmm7, xmm6 |
159 |
pextrw eax, xmm6, 0 |
pextrw eax, xmm7, 0 |
|
|
|
160 |
ret |
ret |