Diff of /xvidcore/src/image/x86_asm/colorspace_yuv_mmx.asm

-revision 1.1, Sun Nov  3 04:51:33 2002 UTC
+revision 1.2, Sat Feb 15 15:22:18 2003 UTC
-Line 0
+Line 1
+ ;------------------------------------------------------------------------------
+ ;
+ ;  This file is part of XviD, a free MPEG-4 video encoder/decoder
+ ;
+ ;  This program is free software; you can redistribute it and/or modify it
+ ;  under the terms of the GNU General Public License as published by
+ ;  the Free Software Foundation; either version 2 of the License, or
+ ;  (at your option) any later version.
+ ;
+ ;  This program is distributed in the hope that it will be useful, but
+ ;  WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ ;  GNU General Public License for more details.
+ ;
+ ;  You should have received a copy of the GNU General Public License
+ ;  along with this program; if not, write to the Free Software
+ ;  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ ;
+ ;------------------------------------------------------------------------------
+ ;------------------------------------------------------------------------------
+ ;
+ ;  yuv_to_yuv.asm, MMX optimized color conversion
+ ;
+ ;  Copyright (C) 2001 - Michael Militzer <isibaar@xvid.org>
+ ;
+ ;  For more information visit the XviD homepage: http://www.xvid.org
+ ;
+ ;------------------------------------------------------------------------------
+ ;------------------------------------------------------------------------------
+ ;
+ ;  Revision history:
+ ;
+ ;  24.11.2001 initial version  (Isibaar)
+ ;  23.07.2002 thread safe (edgomez)
+ ;
+ ;  $Id$
+ ;
+ ;------------------------------------------------------------------------------
+ BITS 32
+ %macro cglobal 1
+ %ifdef PREFIX
+         global _%1
+                 %define %1 _%1
+         %else
+                 global %1
+         %endif
+ %endmacro
+ SECTION .text
+ ALIGN 64
+ ;------------------------------------------------------------------------------
+ ; PLANE_COPY ( DST, DST_DIF, SRC, SRC_DIF, WIDTH, HEIGHT, OPT )
+ ; DST           dst buffer
+ ; DST_DIF       dst stride difference (e.g. stride - width)
+ ; SRC           src destination buffer
+ ; SRC_DIF       src stride difference (e.g. stride - width)
+ ; WIDTH         width
+ ; HEIGHT        height
+ ; OPT           0=plain mmx, 1=xmm
+ ;------------------------------------------------------------------------------
+ %macro  PLANE_COPY      7
+ %define DST                     %1
+ %define DST_DIF         %2
+ %define SRC                     %3
+ %define SRC_DIF         %4
+ %define WIDTH           %5
+ %define HEIGHT          %6
+ %define OPT                     %7
+         mov eax, WIDTH
+         mov ebp, HEIGHT         ; $ebp$ = height
+         mov esi, SRC
+         mov edi, DST
+         mov ebx, eax
+         shr eax, 6                      ; $eax$ = width / 64
+         and ebx, 63                     ; remainder = width % 64
+         mov edx, ebx
+         shr ebx, 4                      ; $ebx$ = reaminder / 16
+         and edx, 15                     ; $edx$ = remainder % 16
+ %%loop64_start
+         or eax, eax
+         jz %%loop16_start
+         mov ecx, eax            ; width64
+ %%loop64:
+ %if OPT == 1                    ; xmm
+         prefetchnta [esi + 64]  ; non temporal prefetch
+         prefetchnta [esi + 96]
+ %endif
+         movq mm1, [esi]         ; read from src
+         movq mm2, [esi + 8]
+         movq mm3, [esi + 16]
+         movq mm4, [esi + 24]
+         movq mm5, [esi + 32]
+         movq mm6, [esi + 40]
+         movq mm7, [esi + 48]
+         movq mm0, [esi + 56]
+ %if OPT == 0                    ; plain mmx
+         movq [edi], mm1         ; write to y_out
+         movq [edi + 8], mm2
+         movq [edi + 16], mm3
+         movq [edi + 24], mm4
+         movq [edi + 32], mm5
+         movq [edi + 40], mm6
+         movq [edi + 48], mm7
+         movq [edi + 56], mm0
+ %else
+         movntq [edi], mm1               ; write to y_out
+         movntq [edi + 8], mm2
+         movntq [edi + 16], mm3
+         movntq [edi + 24], mm4
+         movntq [edi + 32], mm5
+         movntq [edi + 40], mm6
+         movntq [edi + 48], mm7
+         movntq [edi + 56], mm0
+ %endif
+         add esi, 64
+         add edi, 64
+         dec ecx
+         jnz %%loop64
+ %%loop16_start
+         or ebx, ebx
+         jz %%loop1_start
+         mov ecx, ebx            ; width16
+ %%loop16:
+         movq mm1, [esi]
+         movq mm2, [esi + 8]
+ %if OPT == 0                    ; plain mmx
+         movq [edi], mm1
+         movq [edi + 8], mm2
+ %else
+         movntq [edi], mm1
+         movntq [edi + 8], mm2
+ %endif
+         add esi, 16
+         add edi, 16
+         dec ecx
+         jnz %%loop16
+ %%loop1_start
+         mov ecx, edx
+         rep movsb
+         add esi, SRC_DIF
+         add edi, DST_DIF
+         dec ebp
+         jnz near %%loop64_start
+ %endmacro
+ ;------------------------------------------------------------------------------
+ ;------------------------------------------------------------------------------
+ ; MAKE_YV12_TO_YV12( NAME, OPT )
+ ; NAME  function name
+ ; OPT   0=plain mmx, 1=xmm
+ ;
+ ; yv12_to_yv12_mmx(uint8_t * y_dst, uint8_t * u_dst, uint8_t * v_dst,
+ ;                               int y_dst_stride, int uv_dst_stride,
+ ;                               uint8_t * y_src, uint8_t * u_src, uint8_t * v_src,
+ ;                               int y_src_stride, int uv_src_stride,
+ ;                               int width, int height, int vflip)
+ ;------------------------------------------------------------------------------
+ %macro  MAKE_YV12_TO_YV12       2
+ %define NAME            %1
+ %define OPT                     %2
+ align 16
+ cglobal NAME
+ NAME
+ %define pushsize        16
+ %define localsize       24
+ %define vflip                   esp + localsize + pushsize + 52
+ %define height                  esp + localsize + pushsize + 48
+ %define width           esp + localsize + pushsize + 44
+ %define uv_src_stride   esp + localsize + pushsize + 40
+ %define y_src_stride    esp + localsize + pushsize + 36
+ %define v_src                   esp     + localsize + pushsize + 32
+ %define u_src                   esp + localsize + pushsize + 28
+ %define y_src               esp + localsize + pushsize + 24
+ %define uv_dst_stride   esp + localsize + pushsize + 20
+ %define y_dst_stride    esp + localsize + pushsize + 16
+ %define v_dst                   esp     + localsize + pushsize + 12
+ %define u_dst                   esp + localsize + pushsize + 8
+ %define y_dst               esp + localsize + pushsize + 4
+ %define _ip                             esp + localsize + pushsize + 0
+         push ebx        ;       esp + localsize + 16
+         push esi        ;       esp + localsize + 8
+         push edi        ;       esp + localsize + 4
+         push ebp        ;       esp + localsize + 0
+ %define width2                  esp + localsize - 4
+ %define height2                 esp + localsize - 8
+ %define y_src_dif               esp + localsize - 12
+ %define y_dst_dif               esp + localsize - 16
+ %define uv_src_dif              esp + localsize - 20
+ %define uv_dst_dif              esp + localsize - 24
+         sub esp, localsize
+         mov eax, [width]
+         mov ebx, [height]
+         shr eax, 1                                      ; calculate widht/2, heigh/2
+         shr ebx, 1
+         mov [width2], eax
+         mov [height2], ebx
+         mov ebp, [vflip]
+         or ebp, ebp
+         jz near .dont_flip
+ ; flipping support
+         mov eax, [height]
+         mov esi, [y_src]
+         mov edx, [y_src_stride]
+         push edx
+         mul edx
+         pop edx
+         add esi, eax                            ; y_src += (height-1) * y_src_stride
+         neg edx
+         mov [y_src], esi
+         mov [y_src_stride], edx         ; y_src_stride = -y_src_stride
+         mov eax, [height2]
+         mov esi, [u_src]
+         mov edi, [v_src]
+         mov edx, [uv_src_stride]
+         sub eax, 1                                      ; ebp = height2 - 1
+         push edx
+         mul edx
+         pop edx
+         add esi, eax                            ; u_src += (height2-1) * uv_src_stride
+         add edi, eax                            ; v_src += (height2-1) * uv_src_stride
+         neg edx
+         mov [u_src], esi
+         mov [v_src], edi
+         mov [uv_src_stride], edx        ; uv_src_stride = -uv_src_stride
+ .dont_flip
+         mov eax, [y_src_stride]
+         mov ebx, [y_dst_stride]
+         mov ecx, [uv_src_stride]
+         mov edx, [uv_dst_stride]
+         sub eax, [width]
+         sub ebx, [width]
+         sub ecx, [width2]
+         sub edx, [width2]
+         mov [y_src_dif], eax            ; y_src_dif = y_src_stride - width
+         mov [y_dst_dif], ebx            ; y_dst_dif = y_dst_stride - width
+         mov [uv_src_dif], ecx           ; uv_src_dif = uv_src_stride - width2
+         mov [uv_dst_dif], edx           ; uv_dst_dif = uv_dst_stride - width2
+         PLANE_COPY      [y_dst], [y_dst_dif],  [y_src], [y_src_dif],  [width],  [height], OPT
+         PLANE_COPY      [u_dst], [uv_dst_dif], [u_src], [uv_src_dif], [width2], [height2], OPT
+         PLANE_COPY      [v_dst], [uv_dst_dif], [v_src], [uv_src_dif], [width2], [height2], OPT
+         add esp, localsize
+         pop ebp
+         pop edi
+         pop esi
+         pop ebx
+         ret
+ %endmacro
+ ;------------------------------------------------------------------------------
+ MAKE_YV12_TO_YV12       yv12_to_yv12_mmx, 0
+ MAKE_YV12_TO_YV12       yv12_to_yv12_xmm, 1

 Legend:



Removed from v.1.1
 


changed lines


 
Added in v.1.2
 Legend:



Removed from v.1.1
 


changed lines


 
Added in v.1.2
-Removed from v.1.1
+Added in v.1.2

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4