/***************************************************************************** * * XVID MPEG-4 VIDEO CODEC * - QPel interpolation - * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation ; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *****************************************************************************/ /************************************************************************** * * History: * * 22.10.2002 initial coding - Skal - * *************************************************************************/ #ifndef _XVID_QPEL_H_ #define _XVID_QPEL_H_ #include "../utils/mem_transfer.h" /***************************************************************************** * Signatures ****************************************************************************/ #define XVID_QP_PASS_SIGNATURE(NAME) \ void (NAME)(uint8_t *dst, const uint8_t *src, int32_t length, int32_t BpS, int32_t rounding) typedef XVID_QP_PASS_SIGNATURE(XVID_QP_PASS); // We put everything in a single struct so it can easily be passed // to prediction functions as a whole... struct XVID_QP_FUNCS { // filter for QPel 16x? prediction XVID_QP_PASS *H_Pass; XVID_QP_PASS *H_Pass_Avrg; XVID_QP_PASS *H_Pass_Avrg_Up; XVID_QP_PASS *V_Pass; XVID_QP_PASS *V_Pass_Avrg; XVID_QP_PASS *V_Pass_Avrg_Up; // filter for QPel 8x? prediction XVID_QP_PASS *H_Pass_8; XVID_QP_PASS *H_Pass_Avrg_8; XVID_QP_PASS *H_Pass_Avrg_Up_8; XVID_QP_PASS *V_Pass_8; XVID_QP_PASS *V_Pass_Avrg_8; XVID_QP_PASS *V_Pass_Avrg_Up_8; }; typedef struct XVID_QP_FUNCS XVID_QP_FUNCS; /***************************************************************************** * fwd dcl ****************************************************************************/ extern XVID_QP_FUNCS xvid_QP_Funcs_C; // for P-frames extern XVID_QP_FUNCS xvid_QP_Add_Funcs_C; // for B-frames extern XVID_QP_FUNCS xvid_QP_Funcs_mmx; extern XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx; extern void xvid_Init_QP_mmx(); // should be called at mmx initialization extern XVID_QP_FUNCS *xvid_QP_Funcs; // <- main pointer for enc/dec structure extern XVID_QP_FUNCS *xvid_QP_Add_Funcs; // <- main pointer for enc/dec structure /***************************************************************************** * macros ****************************************************************************/ /***************************************************************************** Passes to be performed case 0: copy case 2: h-pass case 1/3: h-pass + h-avrg case 8: v-pass case 10: h-pass + v-pass case 9/11: h-pass + h-avrg + v-pass case 4/12: v-pass + v-avrg case 6/14: h-pass + v-pass + v-avrg case 5/13/7/15: h-pass + h-avrg + v-pass + v-avrg ****************************************************************************/ static __inline void new_interpolate16x16_quarterpel( uint8_t * const cur, uint8_t * const refn, uint8_t * const refh, uint8_t * const refv, uint8_t * const refhv, const uint32_t x, const uint32_t y, const int32_t dx, const int dy, const uint32_t stride, const uint32_t rounding) { const uint8_t *src; uint8_t *dst; uint8_t *tmp; int32_t quads; const XVID_QP_FUNCS *Ops; int32_t x_int, y_int; const int32_t xRef = x*4 + dx; const int32_t yRef = y*4 + dy; Ops = xvid_QP_Funcs; // TODO: pass as argument quads = (dx&3) | ((dy&3)<<2); x_int = xRef/4; if (xRef < 0 && xRef % 4) x_int--; y_int = yRef/4; if (yRef < 0 && yRef % 4) y_int--; dst = cur + y * stride + x; src = refn + y_int * stride + x_int; tmp = refh; // we need at least a 16 x stride scratch block switch(quads) { case 0: transfer8x8_copy( dst, src, stride); transfer8x8_copy( dst+8, src+8, stride); transfer8x8_copy( dst+8*stride, src+8*stride, stride); transfer8x8_copy( dst+8*stride+8, src+8*stride+8, stride); break; case 1: Ops->H_Pass_Avrg(dst, src, 16, stride, rounding); break; case 2: Ops->H_Pass(dst, src, 16, stride, rounding); break; case 3: Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding); break; case 4: Ops->V_Pass_Avrg(dst, src, 16, stride, rounding); break; case 5: Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding); Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); break; case 6: Ops->H_Pass(tmp, src, 17, stride, rounding); Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); break; case 7: Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); break; case 8: Ops->V_Pass(dst, src, 16, stride, rounding); break; case 9: Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding); Ops->V_Pass(dst, tmp, 16, stride, rounding); break; case 10: Ops->H_Pass(tmp, src, 17, stride, rounding); Ops->V_Pass(dst, tmp, 16, stride, rounding); break; case 11: Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); Ops->V_Pass(dst, tmp, 16, stride, rounding); break; case 12: Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding); break; case 13: Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding); Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding); break; case 14: Ops->H_Pass(tmp, src, 17, stride, rounding); Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding); break; case 15: Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding); break; } } static __inline void new_interpolate16x8_quarterpel( uint8_t * const cur, uint8_t * const refn, uint8_t * const refh, uint8_t * const refv, uint8_t * const refhv, const uint32_t x, const uint32_t y, const int32_t dx, const int dy, const uint32_t stride, const uint32_t rounding) { const uint8_t *src; uint8_t *dst; uint8_t *tmp; int32_t quads; const XVID_QP_FUNCS *Ops; int32_t x_int, y_int; const int32_t xRef = x*4 + dx; const int32_t yRef = y*4 + dy; Ops = xvid_QP_Funcs; // TODO: pass as argument quads = (dx&3) | ((dy&3)<<2); x_int = xRef/4; if (xRef < 0 && xRef % 4) x_int--; y_int = yRef/4; if (yRef < 0 && yRef % 4) y_int--; dst = cur + y * stride + x; src = refn + y_int * stride + x_int; tmp = refh; // we need at least a 16 x stride scratch block switch(quads) { case 0: transfer8x8_copy( dst, src, stride); transfer8x8_copy( dst+8, src+8, stride); break; case 1: Ops->H_Pass_Avrg(dst, src, 8, stride, rounding); break; case 2: Ops->H_Pass(dst, src, 8, stride, rounding); break; case 3: Ops->H_Pass_Avrg_Up(dst, src, 8, stride, rounding); break; case 4: Ops->V_Pass_Avrg_8(dst, src, 16, stride, rounding); break; case 5: Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding); break; case 6: Ops->H_Pass(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding); break; case 7: Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding); break; case 8: Ops->V_Pass_8(dst, src, 16, stride, rounding); break; case 9: Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding); Ops->V_Pass_8(dst, tmp, 16, stride, rounding); break; case 10: Ops->H_Pass(tmp, src, 9, stride, rounding); Ops->V_Pass_8(dst, tmp, 16, stride, rounding); break; case 11: Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding); Ops->V_Pass_8(dst, tmp, 16, stride, rounding); break; case 12: Ops->V_Pass_Avrg_Up_8(dst, src, 16, stride, rounding); break; case 13: Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_Up_8(dst, tmp, 16, stride, rounding); break; case 14: Ops->H_Pass(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_Up_8( dst, tmp, 16, stride, rounding); break; case 15: Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_Up_8(dst, tmp, 16, stride, rounding); break; } } static __inline void new_interpolate8x8_quarterpel( uint8_t * const cur, uint8_t * const refn, uint8_t * const refh, uint8_t * const refv, uint8_t * const refhv, const uint32_t x, const uint32_t y, const int32_t dx, const int dy, const uint32_t stride, const uint32_t rounding) { const uint8_t *src; uint8_t *dst; uint8_t *tmp; int32_t quads; const XVID_QP_FUNCS *Ops; int32_t x_int, y_int; const int32_t xRef = x*4 + dx; const int32_t yRef = y*4 + dy; Ops = xvid_QP_Funcs; // TODO: pass as argument quads = (dx&3) | ((dy&3)<<2); x_int = xRef/4; if (xRef < 0 && xRef % 4) x_int--; y_int = yRef/4; if (yRef < 0 && yRef % 4) y_int--; dst = cur + y * stride + x; src = refn + y_int * stride + x_int; tmp = refh; // we need at least a 16 x stride scratch block switch(quads) { case 0: transfer8x8_copy( dst, src, stride); break; case 1: Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding); break; case 2: Ops->H_Pass_8(dst, src, 8, stride, rounding); break; case 3: Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding); break; case 4: Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding); break; case 5: Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); break; case 6: Ops->H_Pass_8(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); break; case 7: Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); break; case 8: Ops->V_Pass_8(dst, src, 8, stride, rounding); break; case 9: Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); Ops->V_Pass_8(dst, tmp, 8, stride, rounding); break; case 10: Ops->H_Pass_8(tmp, src, 9, stride, rounding); Ops->V_Pass_8(dst, tmp, 8, stride, rounding); break; case 11: Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); Ops->V_Pass_8(dst, tmp, 8, stride, rounding); break; case 12: Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding); break; case 13: Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding); break; case 14: Ops->H_Pass_8(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding); break; case 15: Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding); break; } } /*****************************************************************************/ #endif /* _XVID_QPEL_H_ */