--- mem_transfer_mmx.asm 2004/04/13 20:06:53 1.12 +++ mem_transfer_mmx.asm 2004/12/19 13:16:50 1.16 @@ -21,7 +21,7 @@ ; * along with this program ; if not, write to the Free Software ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ; * -; * $Id: mem_transfer_mmx.asm,v 1.12 2004/04/13 20:06:53 edgomez Exp $ +; * $Id: mem_transfer_mmx.asm,v 1.16 2004/12/19 13:16:50 syskin Exp $ ; * ; ***************************************************************************/ @@ -29,10 +29,19 @@ %macro cglobal 1 %ifdef PREFIX - global _%1 - %define %1 _%1 + %ifdef MARK_FUNCS + global _%1:function %1.endfunc-%1 + %define %1 _%1:function %1.endfunc-%1 + %else + global _%1 + %define %1 _%1 + %endif %else - global %1 + %ifdef MARK_FUNCS + global %1:function %1.endfunc-%1 + %else + global %1 + %endif %endif %endmacro @@ -41,9 +50,9 @@ ;============================================================================= %ifdef FORMAT_COFF -SECTION .rodata data +SECTION .rodata %else -SECTION .rodata data align=16 +SECTION .rodata align=16 %endif ALIGN 16 @@ -62,6 +71,7 @@ cglobal transfer_8to16subro_mmx cglobal transfer_8to16sub2_mmx cglobal transfer_8to16sub2_xmm +cglobal transfer_8to16sub2ro_xmm cglobal transfer_16to8add_mmx cglobal transfer8x8_copy_mmx @@ -102,6 +112,7 @@ COPY_8_TO_16 2 COPY_8_TO_16 3 ret +.endfunc ;----------------------------------------------------------------------------- ; @@ -137,6 +148,7 @@ lea ecx,[ecx+2*edx] COPY_16_TO_8 3 ret +.endfunc ;----------------------------------------------------------------------------- ; @@ -200,6 +212,7 @@ pop ebx ret +.endfunc ALIGN 16 @@ -218,6 +231,7 @@ pop ebx ret +.endfunc ;----------------------------------------------------------------------------- @@ -316,6 +330,7 @@ pop esi pop ebx ret +.endfunc ;----------------------------------------------------------------------------- ; @@ -383,6 +398,75 @@ pop esi pop ebx ret +.endfunc + + +;----------------------------------------------------------------------------- +; +; void transfer_8to16sub2ro_xmm(int16_t * const dct, +; const uint8_t * const cur, +; const uint8_t * ref1, +; const uint8_t * ref2, +; const uint32_t stride) +; +;----------------------------------------------------------------------------- + +%macro COPY_8_TO_16_SUB2RO_SSE 1 + movq mm0, [eax] ; cur + movq mm2, [eax+edx] + movq mm1, mm0 + movq mm3, mm2 + + punpcklbw mm0, mm7 + punpcklbw mm2, mm7 + movq mm4, [ebx] ; ref1 + pavgb mm4, [esi] ; ref2 + punpckhbw mm1, mm7 + punpckhbw mm3, mm7 + movq mm5, [ebx+edx] ; ref + pavgb mm5, [esi+edx] ; ref2 + + movq mm6, mm4 + punpcklbw mm4, mm7 + punpckhbw mm6, mm7 + psubsw mm0, mm4 + psubsw mm1, mm6 + lea esi, [esi+2*edx] + movq mm6, mm5 + punpcklbw mm5, mm7 + punpckhbw mm6, mm7 + psubsw mm2, mm5 + lea eax, [eax+2*edx] + psubsw mm3, mm6 + lea ebx, [ebx+2*edx] + + movq [ecx+%1*32+ 0], mm0 ; dst + movq [ecx+%1*32+ 8], mm1 + movq [ecx+%1*32+16], mm2 + movq [ecx+%1*32+24], mm3 +%endmacro + +ALIGN 16 +transfer_8to16sub2ro_xmm: + pxor mm7, mm7 + mov ecx, [esp + 4] ; Dst + mov eax, [esp + 8] ; Cur + push ebx + mov ebx, [esp+4+12] ; Ref1 + push esi + mov esi, [esp+8+16] ; Ref2 + mov edx, [esp+8+20] ; Stride + + COPY_8_TO_16_SUB2RO_SSE 0 + COPY_8_TO_16_SUB2RO_SSE 1 + COPY_8_TO_16_SUB2RO_SSE 2 + COPY_8_TO_16_SUB2RO_SSE 3 + + pop esi + pop ebx + ret +.endfunc + ;----------------------------------------------------------------------------- ; @@ -427,6 +511,7 @@ lea ecx,[ecx+2*edx] COPY_16_TO_8_ADD 3 ret +.endfunc ;----------------------------------------------------------------------------- ; @@ -459,3 +544,5 @@ lea ecx,[ecx+2*edx] COPY_8_TO_8 ret +.endfunc +