--- mem_transfer.h 2002/11/17 00:51:11 1.11 +++ mem_transfer.h 2004/04/05 20:36:37 1.14 @@ -3,54 +3,23 @@ * XVID MPEG-4 VIDEO CODEC * - 8<->16 bit buffer transfer header - * - * Copyright(C) 2002 Michael Militzer + * Copyright(C) 2001-2003 Peter Ross * - * This file is part of XviD, a free MPEG-4 video encoder/decoder - * - * XviD is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of + * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * Under section 8 of the GNU General Public License, the copyright - * holders of XVID explicitly forbid distribution in the following - * countries: - * - * - Japan - * - United States of America - * - * Linking XviD statically or dynamically with other modules is making a - * combined work based on XviD. Thus, the terms and conditions of the - * GNU General Public License cover the whole combination. - * - * As a special exception, the copyright holders of XviD give you - * permission to link XviD with independent modules that communicate with - * XviD solely through the VFW1.1 and DShow interfaces, regardless of the - * license terms of these independent modules, and to copy and distribute - * the resulting combined work under terms of your choice, provided that - * every copy of the combined work is accompanied by a complete copy of - * the source code of XviD (the version of XviD used to produce the - * combined work), being distributed under the terms of the GNU General - * Public License plus this exception. An independent module is a module - * which is not derived from or based on XviD. - * - * Note that people who make modified versions of XviD are not obligated - * to grant this special exception for their modified versions; it is - * their choice whether to do so. The GNU General Public License gives - * permission to release a modified version without this exception; this - * exception also makes it possible to release a modified version which - * carries forward this exception. - * - * $Id: mem_transfer.h,v 1.11 2002/11/17 00:51:11 edgomez Exp $ + * $Id: mem_transfer.h,v 1.14 2004/04/05 20:36:37 edgomez Exp $ * ****************************************************************************/ @@ -71,9 +40,20 @@ extern TRANSFER_8TO16COPY_PTR transfer_8to16copy; /* Implemented functions */ -TRANSFER_8TO16COPY transfer_8to16copy_c; -TRANSFER_8TO16COPY transfer_8to16copy_mmx; -TRANSFER_8TO16COPY transfer_8to16copy_ia64; +extern TRANSFER_8TO16COPY transfer_8to16copy_c; + +#ifdef ARCH_IS_IA32 +extern TRANSFER_8TO16COPY transfer_8to16copy_mmx; +extern TRANSFER_8TO16COPY transfer_8to16copy_3dne; +#endif + +#ifdef ARCH_IS_IA64 +extern TRANSFER_8TO16COPY transfer_8to16copy_ia64; +#endif + +#ifdef ARCH_IS_PPC +extern TRANSFER_8TO16COPY transfer_8to16copy_altivec_c; +#endif /***************************************************************************** * transfer16to8 API @@ -88,12 +68,23 @@ extern TRANSFER_16TO8COPY_PTR transfer_16to8copy; /* Implemented functions */ -TRANSFER_16TO8COPY transfer_16to8copy_c; -TRANSFER_16TO8COPY transfer_16to8copy_mmx; -TRANSFER_16TO8COPY transfer_16to8copy_ia64; +extern TRANSFER_16TO8COPY transfer_16to8copy_c; + +#ifdef ARCH_IS_IA32 +extern TRANSFER_16TO8COPY transfer_16to8copy_mmx; +extern TRANSFER_16TO8COPY transfer_16to8copy_3dne; +#endif + +#ifdef ARCH_IS_IA64 +extern TRANSFER_16TO8COPY transfer_16to8copy_ia64; +#endif + +#ifdef ARCH_IS_PPC +extern TRANSFER_16TO8COPY transfer_16to8copy_altivec_c; +#endif /***************************************************************************** - * transfer8to16 + substraction op API + * transfer8to16 + substraction *writeback* op API ****************************************************************************/ typedef void (TRANSFER_8TO16SUB) (int16_t * const dct, @@ -107,9 +98,46 @@ extern TRANSFER_8TO16SUB_PTR transfer_8to16sub; /* Implemented functions */ -TRANSFER_8TO16SUB transfer_8to16sub_c; -TRANSFER_8TO16SUB transfer_8to16sub_mmx; -TRANSFER_8TO16SUB transfer_8to16sub_ia64; +extern TRANSFER_8TO16SUB transfer_8to16sub_c; + +#ifdef ARCH_IS_IA32 +extern TRANSFER_8TO16SUB transfer_8to16sub_mmx; +extern TRANSFER_8TO16SUB transfer_8to16sub_3dne; +#endif + +#ifdef ARCH_IS_IA64 +extern TRANSFER_8TO16SUB transfer_8to16sub_ia64; +#endif + +#ifdef ARCH_IS_PPC +extern TRANSFER_8TO16SUB transfer_8to16sub_altivec_c; +#endif + +/***************************************************************************** + * transfer8to16 + substraction *readonly* op API + ****************************************************************************/ + +typedef void (TRANSFER_8TO16SUBRO) (int16_t * const dct, + const uint8_t * const cur, + const uint8_t * ref, + const uint32_t stride); + +typedef TRANSFER_8TO16SUBRO *TRANSFER_8TO16SUBRO_PTR; + +/* Our global function pointer - Initialized in xvid.c */ +extern TRANSFER_8TO16SUBRO_PTR transfer_8to16subro; + +/* Implemented functions */ +extern TRANSFER_8TO16SUBRO transfer_8to16subro_c; + +#ifdef ARCH_IS_IA32 +extern TRANSFER_8TO16SUBRO transfer_8to16subro_mmx; +extern TRANSFER_8TO16SUBRO transfer_8to16subro_3dne; +#endif + +#ifdef ARCH_IS_PPC +extern TRANSFER_8TO16SUBRO transfer_8to16subro_altivec_c; +#endif /***************************************************************************** * transfer8to16 + substraction op API - Bidirectionnal Version @@ -128,10 +156,20 @@ /* Implemented functions */ TRANSFER_8TO16SUB2 transfer_8to16sub2_c; -TRANSFER_8TO16SUB2 transfer_8to16sub2_mmx; -TRANSFER_8TO16SUB2 transfer_8to16sub2_xmm; -TRANSFER_8TO16SUB2 transfer_8to16sub2_ia64; +#ifdef ARCH_IS_IA32 +extern TRANSFER_8TO16SUB2 transfer_8to16sub2_mmx; +extern TRANSFER_8TO16SUB2 transfer_8to16sub2_xmm; +extern TRANSFER_8TO16SUB2 transfer_8to16sub2_3dne; +#endif + +#ifdef ARCH_IS_IA64 +extern TRANSFER_8TO16SUB2 transfer_8to16sub2_ia64; +#endif + +#ifdef ARCH_IS_PPC +extern TRANSFER_8TO16SUB2 transfer_8to16sub2_altivec_c; +#endif /***************************************************************************** * transfer16to8 + addition op API @@ -147,9 +185,20 @@ extern TRANSFER_16TO8ADD_PTR transfer_16to8add; /* Implemented functions */ -TRANSFER_16TO8ADD transfer_16to8add_c; -TRANSFER_16TO8ADD transfer_16to8add_mmx; -TRANSFER_16TO8ADD transfer_16to8add_ia64; +extern TRANSFER_16TO8ADD transfer_16to8add_c; + +#ifdef ARCH_IS_IA32 +extern TRANSFER_16TO8ADD transfer_16to8add_mmx; +extern TRANSFER_16TO8ADD transfer_16to8add_3dne; +#endif + +#ifdef ARCH_IS_IA64 +extern TRANSFER_16TO8ADD transfer_16to8add_ia64; +#endif + +#ifdef ARCH_IS_PPC +extern TRANSFER_16TO8ADD transfer_16to8add_altivec_c; +#endif /***************************************************************************** * transfer8to8 + no op @@ -165,8 +214,42 @@ extern TRANSFER8X8_COPY_PTR transfer8x8_copy; /* Implemented functions */ -TRANSFER8X8_COPY transfer8x8_copy_c; -TRANSFER8X8_COPY transfer8x8_copy_mmx; -TRANSFER8X8_COPY transfer8x8_copy_ia64; +extern TRANSFER8X8_COPY transfer8x8_copy_c; + +#ifdef ARCH_IS_IA32 +extern TRANSFER8X8_COPY transfer8x8_copy_mmx; +extern TRANSFER8X8_COPY transfer8x8_copy_3dne; +#endif + +#ifdef ARCH_IS_IA64 +extern TRANSFER8X8_COPY transfer8x8_copy_ia64; +#endif + +#ifdef ARCH_IS_PPC +extern TRANSFER8X8_COPY transfer8x8_copy_altivec_c; +#endif + +static __inline void +transfer16x16_copy(uint8_t * const dst, + const uint8_t * const src, + const uint32_t stride) +{ + transfer8x8_copy(dst, src, stride); + transfer8x8_copy(dst + 8, src + 8, stride); + transfer8x8_copy(dst + 8*stride, src + 8*stride, stride); + transfer8x8_copy(dst + 8*stride + 8, src + 8*stride + 8, stride); +} + +static __inline void +transfer32x32_copy(uint8_t * const dst, + const uint8_t * const src, + const uint32_t stride) +{ + transfer16x16_copy(dst, src, stride); + transfer16x16_copy(dst + 16, src + 16, stride); + transfer16x16_copy(dst + 16*stride, src + 16*stride, stride); + transfer16x16_copy(dst + 16*stride + 16, src + 16*stride + 16, stride); +} + #endif