--- qpel.h 2003/05/03 23:26:35 1.1 +++ qpel.h 2003/05/03 23:26:35 1.1.2.1 @@ -0,0 +1,405 @@ +/***************************************************************************** +* +* XVID MPEG-4 VIDEO CODEC +* - QPel interpolation - +* +* This program is free software ; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation ; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY ; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program ; if not, write to the Free Software +* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +* +*****************************************************************************/ + +/************************************************************************** + * + * History: + * + * 22.10.2002 initial coding - Skal - + * + *************************************************************************/ + +#ifndef _XVID_QPEL_H_ +#define _XVID_QPEL_H_ + +#include "../utils/mem_transfer.h" + +/***************************************************************************** + * Signatures + ****************************************************************************/ + +#define XVID_QP_PASS_SIGNATURE(NAME) \ + void (NAME)(uint8_t *dst, const uint8_t *src, int32_t length, int32_t BpS, int32_t rounding) + +typedef XVID_QP_PASS_SIGNATURE(XVID_QP_PASS); + + // We put everything in a single struct so it can easily be passed + // to prediction functions as a whole... + +struct XVID_QP_FUNCS { + + // filter for QPel 16x? prediction + + XVID_QP_PASS *H_Pass; + XVID_QP_PASS *H_Pass_Avrg; + XVID_QP_PASS *H_Pass_Avrg_Up; + XVID_QP_PASS *V_Pass; + XVID_QP_PASS *V_Pass_Avrg; + XVID_QP_PASS *V_Pass_Avrg_Up; + + // filter for QPel 8x? prediction + + XVID_QP_PASS *H_Pass_8; + XVID_QP_PASS *H_Pass_Avrg_8; + XVID_QP_PASS *H_Pass_Avrg_Up_8; + XVID_QP_PASS *V_Pass_8; + XVID_QP_PASS *V_Pass_Avrg_8; + XVID_QP_PASS *V_Pass_Avrg_Up_8; +}; +typedef struct XVID_QP_FUNCS XVID_QP_FUNCS; + +/***************************************************************************** + * fwd dcl + ****************************************************************************/ + +extern XVID_QP_FUNCS xvid_QP_Funcs_C; // for P-frames +extern XVID_QP_FUNCS xvid_QP_Add_Funcs_C; // for B-frames + +extern XVID_QP_FUNCS xvid_QP_Funcs_mmx; +extern XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx; +extern void xvid_Init_QP_mmx(); // should be called at mmx initialization + +extern XVID_QP_FUNCS *xvid_QP_Funcs; // <- main pointer for enc/dec structure +extern XVID_QP_FUNCS *xvid_QP_Add_Funcs; // <- main pointer for enc/dec structure + +/***************************************************************************** + * macros + ****************************************************************************/ + +/***************************************************************************** + + Passes to be performed + + case 0: copy + case 2: h-pass + case 1/3: h-pass + h-avrg + case 8: v-pass + case 10: h-pass + v-pass + case 9/11: h-pass + h-avrg + v-pass + case 4/12: v-pass + v-avrg + case 6/14: h-pass + v-pass + v-avrg + case 5/13/7/15: h-pass + h-avrg + v-pass + v-avrg + + ****************************************************************************/ + +static __inline void new_interpolate16x16_quarterpel( + uint8_t * const cur, + uint8_t * const refn, + uint8_t * const refh, + uint8_t * const refv, + uint8_t * const refhv, + const uint32_t x, const uint32_t y, + const int32_t dx, const int dy, + const uint32_t stride, + const uint32_t rounding) +{ + const uint8_t *src; + uint8_t *dst; + uint8_t *tmp; + int32_t quads; + const XVID_QP_FUNCS *Ops; + + int32_t x_int, y_int; + + const int32_t xRef = x*4 + dx; + const int32_t yRef = y*4 + dy; + + Ops = xvid_QP_Funcs; // TODO: pass as argument + quads = (dx&3) | ((dy&3)<<2); + + x_int = xRef/4; + if (xRef < 0 && xRef % 4) + x_int--; + + y_int = yRef/4; + if (yRef < 0 && yRef % 4) + y_int--; + + dst = cur + y * stride + x; + src = refn + y_int * stride + x_int; + + tmp = refh; // we need at least a 16 x stride scratch block + + switch(quads) { + case 0: + transfer8x8_copy( dst, src, stride); + transfer8x8_copy( dst+8, src+8, stride); + transfer8x8_copy( dst+8*stride, src+8*stride, stride); + transfer8x8_copy( dst+8*stride+8, src+8*stride+8, stride); + break; + case 1: + Ops->H_Pass_Avrg(dst, src, 16, stride, rounding); + break; + case 2: + Ops->H_Pass(dst, src, 16, stride, rounding); + break; + case 3: + Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding); + break; + case 4: + Ops->V_Pass_Avrg(dst, src, 16, stride, rounding); + break; + case 5: + Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); + break; + case 6: + Ops->H_Pass(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); + break; + case 7: + Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding); + break; + case 8: + Ops->V_Pass(dst, src, 16, stride, rounding); + break; + case 9: + Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding); + Ops->V_Pass(dst, tmp, 16, stride, rounding); + break; + case 10: + Ops->H_Pass(tmp, src, 17, stride, rounding); + Ops->V_Pass(dst, tmp, 16, stride, rounding); + break; + case 11: + Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); + Ops->V_Pass(dst, tmp, 16, stride, rounding); + break; + case 12: + Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding); + break; + case 13: + Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding); + break; + case 14: + Ops->H_Pass(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding); + break; + case 15: + Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding); + Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding); + break; + } +} + +static __inline void new_interpolate16x8_quarterpel( + uint8_t * const cur, + uint8_t * const refn, + uint8_t * const refh, + uint8_t * const refv, + uint8_t * const refhv, + const uint32_t x, const uint32_t y, + const int32_t dx, const int dy, + const uint32_t stride, + const uint32_t rounding) +{ + const uint8_t *src; + uint8_t *dst; + uint8_t *tmp; + int32_t quads; + const XVID_QP_FUNCS *Ops; + + int32_t x_int, y_int; + + const int32_t xRef = x*4 + dx; + const int32_t yRef = y*4 + dy; + + Ops = xvid_QP_Funcs; // TODO: pass as argument + quads = (dx&3) | ((dy&3)<<2); + + x_int = xRef/4; + if (xRef < 0 && xRef % 4) + x_int--; + + y_int = yRef/4; + if (yRef < 0 && yRef % 4) + y_int--; + + dst = cur + y * stride + x; + src = refn + y_int * stride + x_int; + + tmp = refh; // we need at least a 16 x stride scratch block + + switch(quads) { + case 0: + transfer8x8_copy( dst, src, stride); + transfer8x8_copy( dst+8, src+8, stride); + break; + case 1: + Ops->H_Pass_Avrg(dst, src, 8, stride, rounding); + break; + case 2: + Ops->H_Pass(dst, src, 8, stride, rounding); + break; + case 3: + Ops->H_Pass_Avrg_Up(dst, src, 8, stride, rounding); + break; + case 4: + Ops->V_Pass_Avrg_8(dst, src, 16, stride, rounding); + break; + case 5: + Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding); + break; + case 6: + Ops->H_Pass(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding); + break; + case 7: + Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding); + break; + case 8: + Ops->V_Pass_8(dst, src, 16, stride, rounding); + break; + case 9: + Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 16, stride, rounding); + break; + case 10: + Ops->H_Pass(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 16, stride, rounding); + break; + case 11: + Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 16, stride, rounding); + break; + case 12: + Ops->V_Pass_Avrg_Up_8(dst, src, 16, stride, rounding); + break; + case 13: + Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8(dst, tmp, 16, stride, rounding); + break; + case 14: + Ops->H_Pass(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8( dst, tmp, 16, stride, rounding); + break; + case 15: + Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8(dst, tmp, 16, stride, rounding); + break; + } +} + +static __inline void new_interpolate8x8_quarterpel( + uint8_t * const cur, + uint8_t * const refn, + uint8_t * const refh, + uint8_t * const refv, + uint8_t * const refhv, + const uint32_t x, const uint32_t y, + const int32_t dx, const int dy, + const uint32_t stride, + const uint32_t rounding) +{ + const uint8_t *src; + uint8_t *dst; + uint8_t *tmp; + int32_t quads; + const XVID_QP_FUNCS *Ops; + + int32_t x_int, y_int; + + const int32_t xRef = x*4 + dx; + const int32_t yRef = y*4 + dy; + + Ops = xvid_QP_Funcs; // TODO: pass as argument + quads = (dx&3) | ((dy&3)<<2); + + x_int = xRef/4; + if (xRef < 0 && xRef % 4) + x_int--; + + y_int = yRef/4; + if (yRef < 0 && yRef % 4) + y_int--; + + dst = cur + y * stride + x; + src = refn + y_int * stride + x_int; + + tmp = refh; // we need at least a 16 x stride scratch block + + switch(quads) { + case 0: + transfer8x8_copy( dst, src, stride); + break; + case 1: + Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding); + break; + case 2: + Ops->H_Pass_8(dst, src, 8, stride, rounding); + break; + case 3: + Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding); + break; + case 4: + Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding); + break; + case 5: + Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); + break; + case 6: + Ops->H_Pass_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); + break; + case 7: + Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding); + break; + case 8: + Ops->V_Pass_8(dst, src, 8, stride, rounding); + break; + case 9: + Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 8, stride, rounding); + break; + case 10: + Ops->H_Pass_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 8, stride, rounding); + break; + case 11: + Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_8(dst, tmp, 8, stride, rounding); + break; + case 12: + Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding); + break; + case 13: + Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding); + break; + case 14: + Ops->H_Pass_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding); + break; + case 15: + Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding); + Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding); + break; + } +} +/*****************************************************************************/ + +#endif /* _XVID_QPEL_H_ */