--- qpel.h	2003/05/03 23:26:35	1.1
+++ qpel.h	2003/05/03 23:26:35	1.1.2.1
@@ -0,0 +1,405 @@
+/*****************************************************************************
+*
+*  XVID MPEG-4 VIDEO CODEC
+*  - QPel interpolation -
+*
+*  This program is free software ; you can redistribute it and/or modify
+*  it under the terms of the GNU General Public License as published by
+*  the Free Software Foundation ; either version 2 of the License, or
+*  (at your option) any later version.
+*
+*  This program is distributed in the hope that it will be useful,
+*  but WITHOUT ANY WARRANTY ; without even the implied warranty of
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*  GNU General Public License for more details.
+*
+*  You should have received a copy of the GNU General Public License
+*  along with this program ; if not, write to the Free Software
+*  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+*
+*****************************************************************************/
+
+/**************************************************************************
+ *
+ *	History:
+ *
+ *  22.10.2002	initial coding  - Skal -
+ *
+ *************************************************************************/
+
+#ifndef _XVID_QPEL_H_
+#define _XVID_QPEL_H_
+
+#include "../utils/mem_transfer.h"
+
+/*****************************************************************************
+ * Signatures
+ ****************************************************************************/
+
+#define XVID_QP_PASS_SIGNATURE(NAME)  \
+  void (NAME)(uint8_t *dst, const uint8_t *src, int32_t length, int32_t BpS, int32_t rounding)
+
+typedef  XVID_QP_PASS_SIGNATURE(XVID_QP_PASS);
+
+    // We put everything in a single struct so it can easily be passed
+    // to prediction functions as a whole...
+
+struct XVID_QP_FUNCS {
+
+    // filter for QPel 16x? prediction
+
+  XVID_QP_PASS *H_Pass;
+  XVID_QP_PASS *H_Pass_Avrg;
+  XVID_QP_PASS *H_Pass_Avrg_Up;
+  XVID_QP_PASS *V_Pass;
+  XVID_QP_PASS *V_Pass_Avrg;
+  XVID_QP_PASS *V_Pass_Avrg_Up;
+
+    // filter for QPel 8x? prediction
+
+  XVID_QP_PASS *H_Pass_8;
+  XVID_QP_PASS *H_Pass_Avrg_8;
+  XVID_QP_PASS *H_Pass_Avrg_Up_8;
+  XVID_QP_PASS *V_Pass_8;
+  XVID_QP_PASS *V_Pass_Avrg_8;
+  XVID_QP_PASS *V_Pass_Avrg_Up_8;
+};
+typedef struct XVID_QP_FUNCS  XVID_QP_FUNCS;
+
+/*****************************************************************************
+ * fwd dcl
+ ****************************************************************************/
+
+extern XVID_QP_FUNCS xvid_QP_Funcs_C;       // for P-frames
+extern XVID_QP_FUNCS xvid_QP_Add_Funcs_C;   // for B-frames
+
+extern XVID_QP_FUNCS xvid_QP_Funcs_mmx;
+extern XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx;
+extern void xvid_Init_QP_mmx(); // should be called at mmx initialization
+
+extern XVID_QP_FUNCS *xvid_QP_Funcs;      // <- main pointer for enc/dec structure
+extern XVID_QP_FUNCS *xvid_QP_Add_Funcs;  // <- main pointer for enc/dec structure
+
+/*****************************************************************************
+ * macros
+ ****************************************************************************/
+
+/*****************************************************************************
+
+    Passes to be performed
+
+ case 0:         copy
+ case 2:         h-pass
+ case 1/3:       h-pass + h-avrg
+ case 8:                           v-pass
+ case 10:        h-pass          + v-pass
+ case 9/11:      h-pass + h-avrg + v-pass
+ case 4/12:                        v-pass + v-avrg
+ case 6/14:      h-pass          + v-pass + v-avrg
+ case 5/13/7/15: h-pass + h-avrg + v-pass + v-avrg
+
+ ****************************************************************************/
+
+static __inline void new_interpolate16x16_quarterpel(
+    uint8_t * const cur,
+    uint8_t * const refn,
+		uint8_t * const refh,
+		uint8_t * const refv,
+		uint8_t * const refhv,
+		const uint32_t x, const uint32_t y,
+		const int32_t dx,  const int dy,
+		const uint32_t stride,
+		const uint32_t rounding)
+{
+	const uint8_t *src;
+	uint8_t *dst;
+	uint8_t *tmp;
+	int32_t quads;
+	const XVID_QP_FUNCS *Ops;
+
+	int32_t x_int, y_int;
+
+  const int32_t xRef = x*4 + dx;
+  const int32_t yRef = y*4 + dy;
+ 
+     Ops = xvid_QP_Funcs; // TODO: pass as argument
+     quads = (dx&3) | ((dy&3)<<2);
+ 
+  x_int = xRef/4;
+  if (xRef < 0 && xRef % 4)
+   x_int--;
+ 
+  y_int  = yRef/4;
+  if (yRef < 0 && yRef % 4)
+   y_int--;
+ 
+    dst = cur + y * stride + x;
+	src = refn + y_int * stride + x_int;
+
+  tmp = refh; // we need at least a 16 x stride scratch block
+
+  switch(quads) {
+    case 0:
+      transfer8x8_copy( dst, src, stride);
+      transfer8x8_copy( dst+8, src+8, stride);
+      transfer8x8_copy( dst+8*stride, src+8*stride, stride);
+      transfer8x8_copy( dst+8*stride+8, src+8*stride+8, stride);
+    break;
+    case 1:
+      Ops->H_Pass_Avrg(dst, src, 16, stride, rounding);
+    break;
+    case 2:
+      Ops->H_Pass(dst, src, 16, stride, rounding);
+    break;
+    case 3:
+      Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding);
+    break;
+    case 4:
+      Ops->V_Pass_Avrg(dst, src, 16, stride, rounding);
+    break;
+    case 5:
+      Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
+      Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
+    break;
+    case 6:
+      Ops->H_Pass(tmp, src,   17, stride, rounding);
+      Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
+    break;
+    case 7:
+      Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
+      Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
+    break;
+    case 8:
+      Ops->V_Pass(dst, src, 16, stride, rounding);
+    break;
+    case 9:
+      Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
+      Ops->V_Pass(dst, tmp, 16, stride, rounding);
+    break;
+    case 10:
+      Ops->H_Pass(tmp, src, 17, stride, rounding);
+      Ops->V_Pass(dst, tmp, 16, stride, rounding);
+    break;
+    case 11:
+      Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
+      Ops->V_Pass(dst, tmp, 16, stride, rounding);
+    break;
+    case 12:
+      Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding);
+    break;
+    case 13:
+      Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
+      Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
+    break;
+    case 14:
+      Ops->H_Pass(tmp, src, 17, stride, rounding);
+      Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding);
+    break;
+    case 15:
+      Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
+      Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
+    break;
+  }
+}
+
+static __inline void new_interpolate16x8_quarterpel(
+    uint8_t * const cur,
+    uint8_t * const refn,
+		uint8_t * const refh,
+		uint8_t * const refv,
+		uint8_t * const refhv,
+		const uint32_t x, const uint32_t y,
+		const int32_t dx,  const int dy,
+		const uint32_t stride,
+		const uint32_t rounding)
+{
+	const uint8_t *src;
+	uint8_t *dst;
+	uint8_t *tmp;
+	int32_t quads;
+	const XVID_QP_FUNCS *Ops;
+
+	int32_t x_int, y_int;
+
+  const int32_t xRef = x*4 + dx;
+  const int32_t yRef = y*4 + dy;
+ 
+     Ops = xvid_QP_Funcs; // TODO: pass as argument
+     quads = (dx&3) | ((dy&3)<<2);
+ 
+  x_int = xRef/4;
+  if (xRef < 0 && xRef % 4)
+   x_int--;
+ 
+  y_int  = yRef/4;
+  if (yRef < 0 && yRef % 4)
+   y_int--;
+ 
+    dst = cur + y * stride + x;
+	src = refn + y_int * stride + x_int;
+
+  tmp = refh; // we need at least a 16 x stride scratch block
+
+  switch(quads) {
+    case 0:
+      transfer8x8_copy( dst, src, stride);
+      transfer8x8_copy( dst+8, src+8, stride);
+    break;
+    case 1:
+      Ops->H_Pass_Avrg(dst, src, 8, stride, rounding);
+    break;
+    case 2:
+      Ops->H_Pass(dst, src, 8, stride, rounding);
+    break;
+    case 3:
+      Ops->H_Pass_Avrg_Up(dst, src, 8, stride, rounding);
+    break;
+    case 4:
+      Ops->V_Pass_Avrg_8(dst, src, 16, stride, rounding);
+    break;
+    case 5:
+      Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding);
+    break;
+    case 6:
+      Ops->H_Pass(tmp, src,   9, stride, rounding);
+      Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding);
+    break;
+    case 7:
+      Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding);
+    break;
+    case 8:
+      Ops->V_Pass_8(dst, src, 16, stride, rounding);
+    break;
+    case 9:
+      Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_8(dst, tmp, 16, stride, rounding);
+    break;
+    case 10:
+      Ops->H_Pass(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_8(dst, tmp, 16, stride, rounding);
+    break;
+    case 11:
+      Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_8(dst, tmp, 16, stride, rounding);
+    break;
+    case 12:
+      Ops->V_Pass_Avrg_Up_8(dst, src, 16, stride, rounding);
+    break;
+    case 13:
+      Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_Up_8(dst, tmp, 16, stride, rounding);
+    break;
+    case 14:
+      Ops->H_Pass(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_Up_8( dst, tmp, 16, stride, rounding);
+    break;
+    case 15:
+      Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_Up_8(dst, tmp, 16, stride, rounding);
+    break;
+  }
+}
+
+static __inline void new_interpolate8x8_quarterpel(
+    uint8_t * const cur,
+    uint8_t * const refn,
+		uint8_t * const refh,
+		uint8_t * const refv,
+		uint8_t * const refhv,
+		const uint32_t x, const uint32_t y,
+		const int32_t dx,  const int dy,
+		const uint32_t stride,
+		const uint32_t rounding)
+{
+	const uint8_t *src;
+	uint8_t *dst;
+	uint8_t *tmp;
+	int32_t quads;
+	const XVID_QP_FUNCS *Ops;
+
+	int32_t x_int, y_int;
+
+  const int32_t xRef = x*4 + dx;
+  const int32_t yRef = y*4 + dy;
+ 
+     Ops = xvid_QP_Funcs; // TODO: pass as argument
+     quads = (dx&3) | ((dy&3)<<2);
+ 
+  x_int = xRef/4;
+  if (xRef < 0 && xRef % 4)
+   x_int--;
+ 
+  y_int  = yRef/4;
+  if (yRef < 0 && yRef % 4)
+   y_int--;
+ 
+    dst = cur + y * stride + x;
+	src = refn + y_int * stride + x_int;
+
+  tmp = refh; // we need at least a 16 x stride scratch block
+
+  switch(quads) {
+    case 0:
+      transfer8x8_copy( dst, src, stride);
+    break;
+    case 1:
+      Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding);
+    break;
+    case 2:
+      Ops->H_Pass_8(dst, src, 8, stride, rounding);
+    break;
+    case 3:
+      Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
+    break;
+    case 4:
+      Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding);
+    break;
+    case 5:
+      Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
+    break;
+    case 6:
+      Ops->H_Pass_8(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
+    break;
+    case 7:
+      Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
+    break;
+    case 8:
+      Ops->V_Pass_8(dst, src, 8, stride, rounding);
+    break;
+    case 9:
+      Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
+    break;
+    case 10:
+      Ops->H_Pass_8(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
+    break;
+    case 11:
+      Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
+    break;
+    case 12:
+      Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
+    break;
+    case 13:
+      Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
+    break;
+    case 14:
+      Ops->H_Pass_8(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding);
+    break;
+    case 15:
+      Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
+      Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
+    break;
+  }
+}
+/*****************************************************************************/
+
+#endif  /* _XVID_QPEL_H_ */