--- mem_transfer_mmx.asm 2004/07/24 11:46:08 1.13 +++ mem_transfer_mmx.asm 2005/09/13 12:12:15 1.17 @@ -21,7 +21,7 @@ ; * along with this program ; if not, write to the Free Software ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ; * -; * $Id: mem_transfer_mmx.asm,v 1.13 2004/07/24 11:46:08 edgomez Exp $ +; * $Id: mem_transfer_mmx.asm,v 1.17 2005/09/13 12:12:15 suxen_drol Exp $ ; * ; ***************************************************************************/ @@ -29,10 +29,19 @@ %macro cglobal 1 %ifdef PREFIX - global _%1 - %define %1 _%1 + %ifdef MARK_FUNCS + global _%1:function %1.endfunc-%1 + %define %1 _%1:function %1.endfunc-%1 + %else + global _%1 + %define %1 _%1 + %endif %else - global %1 + %ifdef MARK_FUNCS + global %1:function %1.endfunc-%1 + %else + global %1 + %endif %endif %endmacro @@ -62,8 +71,10 @@ cglobal transfer_8to16subro_mmx cglobal transfer_8to16sub2_mmx cglobal transfer_8to16sub2_xmm +cglobal transfer_8to16sub2ro_xmm cglobal transfer_16to8add_mmx cglobal transfer8x8_copy_mmx +cglobal transfer8x4_copy_mmx ;----------------------------------------------------------------------------- ; @@ -102,6 +113,7 @@ COPY_8_TO_16 2 COPY_8_TO_16 3 ret +.endfunc ;----------------------------------------------------------------------------- ; @@ -137,6 +149,7 @@ lea ecx,[ecx+2*edx] COPY_16_TO_8 3 ret +.endfunc ;----------------------------------------------------------------------------- ; @@ -200,6 +213,7 @@ pop ebx ret +.endfunc ALIGN 16 @@ -218,6 +232,7 @@ pop ebx ret +.endfunc ;----------------------------------------------------------------------------- @@ -316,6 +331,7 @@ pop esi pop ebx ret +.endfunc ;----------------------------------------------------------------------------- ; @@ -383,6 +399,75 @@ pop esi pop ebx ret +.endfunc + + +;----------------------------------------------------------------------------- +; +; void transfer_8to16sub2ro_xmm(int16_t * const dct, +; const uint8_t * const cur, +; const uint8_t * ref1, +; const uint8_t * ref2, +; const uint32_t stride) +; +;----------------------------------------------------------------------------- + +%macro COPY_8_TO_16_SUB2RO_SSE 1 + movq mm0, [eax] ; cur + movq mm2, [eax+edx] + movq mm1, mm0 + movq mm3, mm2 + + punpcklbw mm0, mm7 + punpcklbw mm2, mm7 + movq mm4, [ebx] ; ref1 + pavgb mm4, [esi] ; ref2 + punpckhbw mm1, mm7 + punpckhbw mm3, mm7 + movq mm5, [ebx+edx] ; ref + pavgb mm5, [esi+edx] ; ref2 + + movq mm6, mm4 + punpcklbw mm4, mm7 + punpckhbw mm6, mm7 + psubsw mm0, mm4 + psubsw mm1, mm6 + lea esi, [esi+2*edx] + movq mm6, mm5 + punpcklbw mm5, mm7 + punpckhbw mm6, mm7 + psubsw mm2, mm5 + lea eax, [eax+2*edx] + psubsw mm3, mm6 + lea ebx, [ebx+2*edx] + + movq [ecx+%1*32+ 0], mm0 ; dst + movq [ecx+%1*32+ 8], mm1 + movq [ecx+%1*32+16], mm2 + movq [ecx+%1*32+24], mm3 +%endmacro + +ALIGN 16 +transfer_8to16sub2ro_xmm: + pxor mm7, mm7 + mov ecx, [esp + 4] ; Dst + mov eax, [esp + 8] ; Cur + push ebx + mov ebx, [esp+4+12] ; Ref1 + push esi + mov esi, [esp+8+16] ; Ref2 + mov edx, [esp+8+20] ; Stride + + COPY_8_TO_16_SUB2RO_SSE 0 + COPY_8_TO_16_SUB2RO_SSE 1 + COPY_8_TO_16_SUB2RO_SSE 2 + COPY_8_TO_16_SUB2RO_SSE 3 + + pop esi + pop ebx + ret +.endfunc + ;----------------------------------------------------------------------------- ; @@ -427,6 +512,7 @@ lea ecx,[ecx+2*edx] COPY_16_TO_8_ADD 3 ret +.endfunc ;----------------------------------------------------------------------------- ; @@ -459,3 +545,26 @@ lea ecx,[ecx+2*edx] COPY_8_TO_8 ret +.endfunc + +;----------------------------------------------------------------------------- +; +; void transfer8x4_copy_mmx(uint8_t * const dst, +; const uint8_t * const src, +; const uint32_t stride); +; +; +;----------------------------------------------------------------------------- + +ALIGN 16 +transfer8x4_copy_mmx: + mov ecx, [esp+ 4] ; Dst + mov eax, [esp+ 8] ; Src + mov edx, [esp+12] ; Stride + + COPY_8_TO_8 + lea ecx,[ecx+2*edx] + COPY_8_TO_8 + ret +.endfunc +