--- qpel.h 2003/08/23 15:11:23 1.1.4.2 +++ qpel.h 2004/10/17 10:20:15 1.5 @@ -19,13 +19,14 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: qpel.h,v 1.1.4.2 2003/08/23 15:11:23 edgomez Exp $ + * $Id: qpel.h,v 1.5 2004/10/17 10:20:15 edgomez Exp $ * ****************************************************************************/ #ifndef _XVID_QPEL_H_ #define _XVID_QPEL_H_ +#include "interpolate8x8.h" #include "../utils/mem_transfer.h" /***************************************************************************** @@ -51,7 +52,7 @@ XVID_QP_PASS *V_Pass_Avrg; XVID_QP_PASS *V_Pass_Avrg_Up; - // filter for QPel 8x? prediction + /* filter for QPel 8x? prediction */ XVID_QP_PASS *H_Pass_8; XVID_QP_PASS *H_Pass_Avrg_8; @@ -66,16 +67,24 @@ ****************************************************************************/ extern void xvid_Init_QP(); -extern XVID_QP_FUNCS xvid_QP_Funcs_C; // for P-frames -extern XVID_QP_FUNCS xvid_QP_Add_Funcs_C; // for B-frames +extern XVID_QP_FUNCS xvid_QP_Funcs_C_ref; /* for P-frames */ +extern XVID_QP_FUNCS xvid_QP_Add_Funcs_C_ref; /* for B-frames */ + +extern XVID_QP_FUNCS xvid_QP_Funcs_C; /* for P-frames */ +extern XVID_QP_FUNCS xvid_QP_Add_Funcs_C; /* for B-frames */ #ifdef ARCH_IS_IA32 extern XVID_QP_FUNCS xvid_QP_Funcs_mmx; extern XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx; #endif -extern XVID_QP_FUNCS *xvid_QP_Funcs; // <- main pointer for enc/dec structure -extern XVID_QP_FUNCS *xvid_QP_Add_Funcs; // <- main pointer for enc/dec structure +#ifdef ARCH_IS_PPC +extern XVID_QP_FUNCS xvid_QP_Funcs_Altivec_C; +extern XVID_QP_FUNCS xvid_QP_Add_Funcs_Altivec_C; +#endif + +extern XVID_QP_FUNCS *xvid_QP_Funcs; /* <- main pointer for enc/dec structure */ +extern XVID_QP_FUNCS *xvid_QP_Add_Funcs; /* <- main pointer for enc/dec structure */ /***************************************************************************** * macros @@ -97,8 +106,8 @@ ****************************************************************************/ -static void __inline -new_interpolate16x16_quarterpel(uint8_t * const cur, +static void __inline +interpolate16x16_quarterpel(uint8_t * const cur, uint8_t * const refn, uint8_t * const refh, uint8_t * const refv, @@ -116,31 +125,31 @@ int32_t x_int, y_int; - const int32_t xRef = x*4 + dx; - const int32_t yRef = y*4 + dy; - - Ops = xvid_QP_Funcs; // TODO: pass as argument + const int32_t xRef = (int)x*4 + dx; + const int32_t yRef = (int)y*4 + dy; + + Ops = xvid_QP_Funcs; quads = (dx&3) | ((dy&3)<<2); - + x_int = xRef/4; if (xRef < 0 && xRef % 4) x_int--; - + y_int = yRef/4; if (yRef < 0 && yRef % 4) y_int--; - + dst = cur + y * stride + x; - src = refn + y_int * stride + x_int; + src = refn + y_int * (int)stride + x_int; - tmp = refh; // we need at least a 16 x stride scratch block + tmp = refh; /* we need at least a 16 x stride scratch block */ switch(quads) { case 0: - transfer8x8_copy( dst, src, stride); - transfer8x8_copy( dst+8, src+8, stride); - transfer8x8_copy( dst+8*stride, src+8*stride, stride); - transfer8x8_copy( dst+8*stride+8, src+8*stride+8, stride); + transfer8x8_copy(dst, src, stride); + transfer8x8_copy(dst+8, src+8, stride); + transfer8x8_copy(dst+8*stride, src+8*stride, stride); + transfer8x8_copy(dst+8*stride+8, src+8*stride+8, stride); break; case 1: Ops->H_Pass_Avrg(dst, src, 16, stride, rounding); @@ -199,8 +208,114 @@ } } +static void __inline +interpolate16x16_add_quarterpel(uint8_t * const cur, + uint8_t * const refn, + uint8_t * const refh, + uint8_t * const refv, + uint8_t * const refhv, + const uint32_t x, const uint32_t y, + const int32_t dx, const int dy, + const uint32_t stride, + const uint32_t rounding) +{ + const uint8_t *src; + uint8_t *dst; + uint8_t *tmp; + int32_t quads; + const XVID_QP_FUNCS *Ops; + const XVID_QP_FUNCS *Ops_Copy; + + int32_t x_int, y_int; + + const int32_t xRef = (int)x*4 + dx; + const int32_t yRef = (int)y*4 + dy; + + Ops = xvid_QP_Add_Funcs; + Ops_Copy = xvid_QP_Funcs; + quads = (dx&3) | ((dy&3)<<2); + + x_int = xRef/4; + if (xRef < 0 && xRef % 4) + x_int--; + + y_int = yRef/4; + if (yRef < 0 && yRef % 4) + y_int--; + + dst = cur + y * stride + x; + src = refn + y_int * (int)stride + x_int; + + tmp = refh; /* we need at least a 16 x stride scratch block */ + + switch(quads) { + case 0: + /* NB: there is no halfpel involved ! the name's function can be + * misleading */ + interpolate8x8_halfpel_add(dst, src, stride, rounding); + interpolate8x8_halfpel_add(dst+8, src+8, stride, rounding); + interpolate8x8_halfpel_add(dst+8*stride, src+8*stride, stride, rounding); + interpolate8x8_halfpel_add(dst+8*stride+8, src+8*stride+8, stride, rounding); + break; + case 1: + Ops->H_Pass_Avrg(dst, src, 16, stride, rounding); + break; + case 2: + Ops->H_Pass(dst, src, 16, stride, rounding); + break; + case 3: + Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding); + break; + case 4: + Ops->V_Pass_Avrg(dst, src, 16, stride, rounding); + break; + case 5: + Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); + break; + case 6: + Ops_Copy->H_Pass(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); + break; + case 7: + Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); + break; + case 8: + Ops->V_Pass(dst, src, 16, stride, rounding); + break; + case 9: + Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding); + Ops->V_Pass(dst, tmp, 16, stride, rounding); + break; + case 10: + Ops_Copy->H_Pass(tmp, src, 17, stride, rounding); + Ops->V_Pass(dst, tmp, 16, stride, rounding); + break; + case 11: + Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); + Ops->V_Pass(dst, tmp, 16, stride, rounding); + break; + case 12: + Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding); + break; + case 13: + Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding); + break; + case 14: + Ops_Copy->H_Pass(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding); + break; + case 15: + Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding); + break; + } +} + static void __inline -new_interpolate16x8_quarterpel(uint8_t * const cur, +interpolate16x8_quarterpel(uint8_t * const cur, uint8_t * const refn, uint8_t * const refh, uint8_t * const refv, @@ -218,24 +333,24 @@ int32_t x_int, y_int; - const int32_t xRef = x*4 + dx; - const int32_t yRef = y*4 + dy; - - Ops = xvid_QP_Funcs; // TODO: pass as argument + const int32_t xRef = (int)x*4 + dx; + const int32_t yRef = (int)y*4 + dy; + + Ops = xvid_QP_Funcs; quads = (dx&3) | ((dy&3)<<2); - + x_int = xRef/4; if (xRef < 0 && xRef % 4) x_int--; - + y_int = yRef/4; if (yRef < 0 && yRef % 4) y_int--; - + dst = cur + y * stride + x; - src = refn + y_int * stride + x_int; + src = refn + y_int * (int)stride + x_int; - tmp = refh; // we need at least a 16 x stride scratch block + tmp = refh; /* we need at least a 16 x stride scratch block */ switch(quads) { case 0: @@ -300,7 +415,7 @@ } static void __inline -new_interpolate8x8_quarterpel(uint8_t * const cur, +interpolate8x8_quarterpel(uint8_t * const cur, uint8_t * const refn, uint8_t * const refh, uint8_t * const refv, @@ -318,24 +433,24 @@ int32_t x_int, y_int; - const int32_t xRef = x*4 + dx; - const int32_t yRef = y*4 + dy; - - Ops = xvid_QP_Funcs; // TODO: pass as argument + const int32_t xRef = (int)x*4 + dx; + const int32_t yRef = (int)y*4 + dy; + + Ops = xvid_QP_Funcs; quads = (dx&3) | ((dy&3)<<2); - + x_int = xRef/4; if (xRef < 0 && xRef % 4) x_int--; - + y_int = yRef/4; if (yRef < 0 && yRef % 4) y_int--; - + dst = cur + y * stride + x; - src = refn + y_int * stride + x_int; + src = refn + y_int * (int)stride + x_int; - tmp = refh; // we need at least a 16 x stride scratch block + tmp = refh; /* we need at least a 16 x stride scratch block */ switch(quads) { case 0: @@ -398,4 +513,107 @@ } } +static void __inline +interpolate8x8_add_quarterpel(uint8_t * const cur, + uint8_t * const refn, + uint8_t * const refh, + uint8_t * const refv, + uint8_t * const refhv, + const uint32_t x, const uint32_t y, + const int32_t dx, const int dy, + const uint32_t stride, + const uint32_t rounding) +{ + const uint8_t *src; + uint8_t *dst; + uint8_t *tmp; + int32_t quads; + const XVID_QP_FUNCS *Ops; + const XVID_QP_FUNCS *Ops_Copy; + + int32_t x_int, y_int; + + const int32_t xRef = (int)x*4 + dx; + const int32_t yRef = (int)y*4 + dy; + + Ops = xvid_QP_Add_Funcs; + Ops_Copy = xvid_QP_Funcs; + quads = (dx&3) | ((dy&3)<<2); + + x_int = xRef/4; + if (xRef < 0 && xRef % 4) + x_int--; + + y_int = yRef/4; + if (yRef < 0 && yRef % 4) + y_int--; + + dst = cur + y * stride + x; + src = refn + y_int * (int)stride + x_int; + + tmp = refh; /* we need at least a 16 x stride scratch block */ + + switch(quads) { + case 0: + /* Misleading function name, there is no halfpel involved + * just dst and src averaging with rounding=0 */ + interpolate8x8_halfpel_add(dst, src, stride, rounding); + break; + case 1: + Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding); + break; + case 2: + Ops->H_Pass_8(dst, src, 8, stride, rounding); + break; + case 3: + Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding); + break; + case 4: + Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding); + break; + case 5: + Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); + break; + case 6: + Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); + break; + case 7: + Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); + break; + case 8: + Ops->V_Pass_8(dst, src, 8, stride, rounding); + break; + case 9: + Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 8, stride, rounding); + break; + case 10: + Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 8, stride, rounding); + break; + case 11: + Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 8, stride, rounding); + break; + case 12: + Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding); + break; + case 13: + Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding); + break; + case 14: + Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding); + break; + case 15: + Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding); + break; + } +} + #endif /* _XVID_QPEL_H_ */