--- mem_transfer_mmx.asm 2004/08/29 10:02:38 1.15 +++ mem_transfer_mmx.asm 2005/09/13 12:12:15 1.17 @@ -21,7 +21,7 @@ ; * along with this program ; if not, write to the Free Software ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ; * -; * $Id: mem_transfer_mmx.asm,v 1.15 2004/08/29 10:02:38 edgomez Exp $ +; * $Id: mem_transfer_mmx.asm,v 1.17 2005/09/13 12:12:15 suxen_drol Exp $ ; * ; ***************************************************************************/ @@ -71,8 +71,10 @@ cglobal transfer_8to16subro_mmx cglobal transfer_8to16sub2_mmx cglobal transfer_8to16sub2_xmm +cglobal transfer_8to16sub2ro_xmm cglobal transfer_16to8add_mmx cglobal transfer8x8_copy_mmx +cglobal transfer8x4_copy_mmx ;----------------------------------------------------------------------------- ; @@ -399,6 +401,74 @@ ret .endfunc + +;----------------------------------------------------------------------------- +; +; void transfer_8to16sub2ro_xmm(int16_t * const dct, +; const uint8_t * const cur, +; const uint8_t * ref1, +; const uint8_t * ref2, +; const uint32_t stride) +; +;----------------------------------------------------------------------------- + +%macro COPY_8_TO_16_SUB2RO_SSE 1 + movq mm0, [eax] ; cur + movq mm2, [eax+edx] + movq mm1, mm0 + movq mm3, mm2 + + punpcklbw mm0, mm7 + punpcklbw mm2, mm7 + movq mm4, [ebx] ; ref1 + pavgb mm4, [esi] ; ref2 + punpckhbw mm1, mm7 + punpckhbw mm3, mm7 + movq mm5, [ebx+edx] ; ref + pavgb mm5, [esi+edx] ; ref2 + + movq mm6, mm4 + punpcklbw mm4, mm7 + punpckhbw mm6, mm7 + psubsw mm0, mm4 + psubsw mm1, mm6 + lea esi, [esi+2*edx] + movq mm6, mm5 + punpcklbw mm5, mm7 + punpckhbw mm6, mm7 + psubsw mm2, mm5 + lea eax, [eax+2*edx] + psubsw mm3, mm6 + lea ebx, [ebx+2*edx] + + movq [ecx+%1*32+ 0], mm0 ; dst + movq [ecx+%1*32+ 8], mm1 + movq [ecx+%1*32+16], mm2 + movq [ecx+%1*32+24], mm3 +%endmacro + +ALIGN 16 +transfer_8to16sub2ro_xmm: + pxor mm7, mm7 + mov ecx, [esp + 4] ; Dst + mov eax, [esp + 8] ; Cur + push ebx + mov ebx, [esp+4+12] ; Ref1 + push esi + mov esi, [esp+8+16] ; Ref2 + mov edx, [esp+8+20] ; Stride + + COPY_8_TO_16_SUB2RO_SSE 0 + COPY_8_TO_16_SUB2RO_SSE 1 + COPY_8_TO_16_SUB2RO_SSE 2 + COPY_8_TO_16_SUB2RO_SSE 3 + + pop esi + pop ebx + ret +.endfunc + + ;----------------------------------------------------------------------------- ; ; void transfer_16to8add_mmx(uint8_t * const dst, @@ -477,3 +547,24 @@ ret .endfunc +;----------------------------------------------------------------------------- +; +; void transfer8x4_copy_mmx(uint8_t * const dst, +; const uint8_t * const src, +; const uint32_t stride); +; +; +;----------------------------------------------------------------------------- + +ALIGN 16 +transfer8x4_copy_mmx: + mov ecx, [esp+ 4] ; Dst + mov eax, [esp+ 8] ; Src + mov edx, [esp+12] ; Stride + + COPY_8_TO_8 + lea ecx,[ecx+2*edx] + COPY_8_TO_8 + ret +.endfunc +