--- qpel.h 2004/10/12 21:06:34 1.2.2.1 +++ qpel.h 2008/11/26 01:04:34 1.8 @@ -19,13 +19,14 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: qpel.h,v 1.2.2.1 2004/10/12 21:06:34 edgomez Exp $ + * $Id: qpel.h,v 1.8 2008/11/26 01:04:34 Isibaar Exp $ * ****************************************************************************/ #ifndef _XVID_QPEL_H_ #define _XVID_QPEL_H_ +#include "interpolate8x8.h" #include "../utils/mem_transfer.h" /***************************************************************************** @@ -66,14 +67,22 @@ ****************************************************************************/ extern void xvid_Init_QP(); +extern XVID_QP_FUNCS xvid_QP_Funcs_C_ref; /* for P-frames */ +extern XVID_QP_FUNCS xvid_QP_Add_Funcs_C_ref; /* for B-frames */ + extern XVID_QP_FUNCS xvid_QP_Funcs_C; /* for P-frames */ extern XVID_QP_FUNCS xvid_QP_Add_Funcs_C; /* for B-frames */ -#ifdef ARCH_IS_IA32 +#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) extern XVID_QP_FUNCS xvid_QP_Funcs_mmx; extern XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx; #endif +#ifdef ARCH_IS_PPC +extern XVID_QP_FUNCS xvid_QP_Funcs_Altivec_C; +extern XVID_QP_FUNCS xvid_QP_Add_Funcs_Altivec_C; +#endif + extern XVID_QP_FUNCS *xvid_QP_Funcs; /* <- main pointer for enc/dec structure */ extern XVID_QP_FUNCS *xvid_QP_Add_Funcs; /* <- main pointer for enc/dec structure */ @@ -97,8 +106,8 @@ ****************************************************************************/ -static void __inline -new_interpolate16x16_quarterpel(uint8_t * const cur, +static void __inline +interpolate16x16_quarterpel(uint8_t * const cur, uint8_t * const refn, uint8_t * const refh, uint8_t * const refv, @@ -119,16 +128,11 @@ const int32_t xRef = (int)x*4 + dx; const int32_t yRef = (int)y*4 + dy; - Ops = xvid_QP_Funcs; /* TODO: pass as argument */ + Ops = xvid_QP_Funcs; quads = (dx&3) | ((dy&3)<<2); - x_int = xRef/4; - if (xRef < 0 && xRef % 4) - x_int--; - - y_int = yRef/4; - if (yRef < 0 && yRef % 4) - y_int--; + x_int = xRef >> 2; + y_int = yRef >> 2; dst = cur + y * stride + x; src = refn + y_int * (int)stride + x_int; @@ -137,10 +141,10 @@ switch(quads) { case 0: - transfer8x8_copy( dst, src, stride); - transfer8x8_copy( dst+8, src+8, stride); - transfer8x8_copy( dst+8*stride, src+8*stride, stride); - transfer8x8_copy( dst+8*stride+8, src+8*stride+8, stride); + transfer8x8_copy(dst, src, stride); + transfer8x8_copy(dst+8, src+8, stride); + transfer8x8_copy(dst+8*stride, src+8*stride, stride); + transfer8x8_copy(dst+8*stride+8, src+8*stride+8, stride); break; case 1: Ops->H_Pass_Avrg(dst, src, 16, stride, rounding); @@ -199,8 +203,109 @@ } } +static void __inline +interpolate16x16_add_quarterpel(uint8_t * const cur, + uint8_t * const refn, + uint8_t * const refh, + uint8_t * const refv, + uint8_t * const refhv, + const uint32_t x, const uint32_t y, + const int32_t dx, const int dy, + const uint32_t stride, + const uint32_t rounding) +{ + const uint8_t *src; + uint8_t *dst; + uint8_t *tmp; + int32_t quads; + const XVID_QP_FUNCS *Ops; + const XVID_QP_FUNCS *Ops_Copy; + + int32_t x_int, y_int; + + const int32_t xRef = (int)x*4 + dx; + const int32_t yRef = (int)y*4 + dy; + + Ops = xvid_QP_Add_Funcs; + Ops_Copy = xvid_QP_Funcs; + quads = (dx&3) | ((dy&3)<<2); + + x_int = xRef >> 2; + y_int = yRef >> 2; + + dst = cur + y * stride + x; + src = refn + y_int * (int)stride + x_int; + + tmp = refh; /* we need at least a 16 x stride scratch block */ + + switch(quads) { + case 0: + /* NB: there is no halfpel involved ! the name's function can be + * misleading */ + interpolate8x8_halfpel_add(dst, src, stride, rounding); + interpolate8x8_halfpel_add(dst+8, src+8, stride, rounding); + interpolate8x8_halfpel_add(dst+8*stride, src+8*stride, stride, rounding); + interpolate8x8_halfpel_add(dst+8*stride+8, src+8*stride+8, stride, rounding); + break; + case 1: + Ops->H_Pass_Avrg(dst, src, 16, stride, rounding); + break; + case 2: + Ops->H_Pass(dst, src, 16, stride, rounding); + break; + case 3: + Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding); + break; + case 4: + Ops->V_Pass_Avrg(dst, src, 16, stride, rounding); + break; + case 5: + Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); + break; + case 6: + Ops_Copy->H_Pass(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); + break; + case 7: + Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); + break; + case 8: + Ops->V_Pass(dst, src, 16, stride, rounding); + break; + case 9: + Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding); + Ops->V_Pass(dst, tmp, 16, stride, rounding); + break; + case 10: + Ops_Copy->H_Pass(tmp, src, 17, stride, rounding); + Ops->V_Pass(dst, tmp, 16, stride, rounding); + break; + case 11: + Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); + Ops->V_Pass(dst, tmp, 16, stride, rounding); + break; + case 12: + Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding); + break; + case 13: + Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding); + break; + case 14: + Ops_Copy->H_Pass(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding); + break; + case 15: + Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding); + break; + } +} + static void __inline -new_interpolate16x8_quarterpel(uint8_t * const cur, +interpolate16x8_quarterpel(uint8_t * const cur, uint8_t * const refn, uint8_t * const refh, uint8_t * const refv, @@ -221,16 +326,11 @@ const int32_t xRef = (int)x*4 + dx; const int32_t yRef = (int)y*4 + dy; - Ops = xvid_QP_Funcs; /* TODO: pass as argument */ + Ops = xvid_QP_Funcs; quads = (dx&3) | ((dy&3)<<2); - x_int = xRef/4; - if (xRef < 0 && xRef % 4) - x_int--; - - y_int = yRef/4; - if (yRef < 0 && yRef % 4) - y_int--; + x_int = xRef >> 2; + y_int = yRef >> 2; dst = cur + y * stride + x; src = refn + y_int * (int)stride + x_int; @@ -300,7 +400,7 @@ } static void __inline -new_interpolate8x8_quarterpel(uint8_t * const cur, +interpolate8x8_quarterpel(uint8_t * const cur, uint8_t * const refn, uint8_t * const refh, uint8_t * const refv, @@ -321,16 +421,11 @@ const int32_t xRef = (int)x*4 + dx; const int32_t yRef = (int)y*4 + dy; - Ops = xvid_QP_Funcs; /* TODO: pass as argument */ + Ops = xvid_QP_Funcs; quads = (dx&3) | ((dy&3)<<2); - x_int = xRef/4; - if (xRef < 0 && xRef % 4) - x_int--; - - y_int = yRef/4; - if (yRef < 0 && yRef % 4) - y_int--; + x_int = xRef >> 2; + y_int = yRef >> 2; dst = cur + y * stride + x; src = refn + y_int * (int)stride + x_int; @@ -398,4 +493,102 @@ } } +static void __inline +interpolate8x8_add_quarterpel(uint8_t * const cur, + uint8_t * const refn, + uint8_t * const refh, + uint8_t * const refv, + uint8_t * const refhv, + const uint32_t x, const uint32_t y, + const int32_t dx, const int dy, + const uint32_t stride, + const uint32_t rounding) +{ + const uint8_t *src; + uint8_t *dst; + uint8_t *tmp; + int32_t quads; + const XVID_QP_FUNCS *Ops; + const XVID_QP_FUNCS *Ops_Copy; + + int32_t x_int, y_int; + + const int32_t xRef = (int)x*4 + dx; + const int32_t yRef = (int)y*4 + dy; + + Ops = xvid_QP_Add_Funcs; + Ops_Copy = xvid_QP_Funcs; + quads = (dx&3) | ((dy&3)<<2); + + x_int = xRef >> 2; + y_int = yRef >> 2; + + dst = cur + y * stride + x; + src = refn + y_int * (int)stride + x_int; + + tmp = refh; /* we need at least a 16 x stride scratch block */ + + switch(quads) { + case 0: + /* Misleading function name, there is no halfpel involved + * just dst and src averaging with rounding=0 */ + interpolate8x8_halfpel_add(dst, src, stride, rounding); + break; + case 1: + Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding); + break; + case 2: + Ops->H_Pass_8(dst, src, 8, stride, rounding); + break; + case 3: + Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding); + break; + case 4: + Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding); + break; + case 5: + Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); + break; + case 6: + Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); + break; + case 7: + Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); + break; + case 8: + Ops->V_Pass_8(dst, src, 8, stride, rounding); + break; + case 9: + Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 8, stride, rounding); + break; + case 10: + Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 8, stride, rounding); + break; + case 11: + Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 8, stride, rounding); + break; + case 12: + Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding); + break; + case 13: + Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding); + break; + case 14: + Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding); + break; + case 15: + Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding); + break; + } +} + #endif /* _XVID_QPEL_H_ */