--- xvid.c	2002/12/08 05:38:56	1.33.2.15
+++ xvid.c	2003/02/09 19:32:52	1.40
@@ -3,41 +3,54 @@
  *  XVID MPEG-4 VIDEO CODEC
  *  - Native API implementation  -
  *
- *  This program is an implementation of a part of one or more MPEG-4
- *  Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
- *  to use this software module in hardware or software products are
- *  advised that its use may infringe existing patents or copyrights, and
- *  any such use would be at such party's own risk.  The original
- *  developer of this software module and his/her company, and subsequent
- *  editors and their companies, will have no liability for use of this
- *  software or modifications or derivatives thereof.
- *
- *  This program is free software ; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation ; either version 2 of the License, or
+ *  Copyright(C) 2001-2002 Peter Ross <pross@xvid.org>
+ *
+ *  This file is part of XviD, a free MPEG-4 video encoder/decoder
+ *
+ *  XviD is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
- *  along with this program ; if not, write to the Free Software
+ *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
- ****************************************************************************/
-
-/*****************************************************************************
- *
- *  History
+ *  Under section 8 of the GNU General Public License, the copyright
+ *  holders of XVID explicitly forbid distribution in the following
+ *  countries:
+ *
+ *    - Japan
+ *    - United States of America
+ *
+ *  Linking XviD statically or dynamically with other modules is making a
+ *  combined work based on XviD.  Thus, the terms and conditions of the
+ *  GNU General Public License cover the whole combination.
+ *
+ *  As a special exception, the copyright holders of XviD give you
+ *  permission to link XviD with independent modules that communicate with
+ *  XviD solely through the VFW1.1 and DShow interfaces, regardless of the
+ *  license terms of these independent modules, and to copy and distribute
+ *  the resulting combined work under terms of your choice, provided that
+ *  every copy of the combined work is accompanied by a complete copy of
+ *  the source code of XviD (the version of XviD used to produce the
+ *  combined work), being distributed under the terms of the GNU General
+ *  Public License plus this exception.  An independent module is a module
+ *  which is not derived from or based on XviD.
+ *
+ *  Note that people who make modified versions of XviD are not obligated
+ *  to grant this special exception for their modified versions; it is
+ *  their choice whether to do so.  The GNU General Public License gives
+ *  permission to release a modified version without this exception; this
+ *  exception also makes it possible to release a modified version which
+ *  carries forward this exception.
  *
- *	- 23.06.2002	added XVID_CPU_CHKONLY
- *  - 17.03.2002	Added interpolate8x8_halfpel_hv_xmm
- *  - 22.12.2001  API change: added xvid_init() - Isibaar
- *  - 16.12.2001	inital version; (c)2001 peter ross <pross@cs.rmit.edu.au>
- *
- *  $Id: xvid.c,v 1.33.2.15 2002/12/08 05:38:56 suxen_drol Exp $
+ * $Id: xvid.c,v 1.40 2003/02/09 19:32:52 edgomez Exp $
  *
  ****************************************************************************/
 
@@ -49,9 +62,7 @@
 #include "dct/fdct.h"
 #include "image/colorspace.h"
 #include "image/interpolate8x8.h"
-#include "image/reduced.h"
 #include "utils/mem_transfer.h"
-#include "utils/mbfunctions.h"
 #include "quant/quant_h263.h"
 #include "quant/quant_mpeg4.h"
 #include "motion/motion.h"
@@ -60,9 +71,9 @@
 #include "utils/timer.h"
 #include "bitstream/mbcoding.h"
 
-#if defined(ARCH_X86) && defined(EXPERIMENTAL_SSE2_CODE)
+#if defined(ARCH_IS_IA32) && defined(EXPERIMENTAL_SSE2_CODE)
 
-#ifdef WIN32
+#ifdef _MSC_VER
 #include <windows.h>
 #else
 #include <signal.h>
@@ -70,7 +81,7 @@
 #endif
 
 
-#ifndef WIN32
+#ifndef _MSC_VER
 
 static jmp_buf mark;
 
@@ -83,17 +94,17 @@
 
 
 /*
-calls the funcptr, and returns whether SIGILL (illegal instruction) was signalled
-return values:
--1 : could not determine
-0  : SIGILL was *not* signalled
-1  : SIGILL was signalled
-*/
+ * Calls the funcptr, and returns whether SIGILL (illegal instruction) was signalled
+ * Return values:
+ * -1 : could not determine
+ * 0  : SIGILL was *not* signalled
+ * 1  : SIGILL was signalled
+ */
 
 int
 sigill_check(void (*func)())
 {
-#ifdef WIN32
+#ifdef _MSC_VER
 	_try {
 		func();
 	}
@@ -141,11 +152,16 @@
  *
  ****************************************************************************/
 
-
-static 
-int xvid_init_init(XVID_INIT_PARAM * init_param)
+int
+xvid_init(void *handle,
+		  int opt,
+		  void *param1,
+		  void *param2)
 {
 	int cpu_flags;
+	XVID_INIT_PARAM *init_param;
+
+	init_param = (XVID_INIT_PARAM *) param1;
 
 	/* Inform the client the API version */
 	init_param->api_version = API_VERSION;
@@ -162,7 +178,7 @@
 
 		cpu_flags = check_cpu_features();
 
-#if defined(ARCH_X86) && defined(EXPERIMENTAL_SSE2_CODE)
+#if defined(ARCH_IS_IA32) && defined(EXPERIMENTAL_SSE2_CODE)
 		if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))
 			cpu_flags &= ~XVID_CPU_SSE;
 
@@ -213,82 +229,31 @@
 	transfer_16to8add  = transfer_16to8add_c;
 	transfer8x8_copy   = transfer8x8_copy_c;
 
-	/* Interlacing functions */
-	MBFieldTest = MBFieldTest_c;
-
 	/* Image interpolation related functions */
 	interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_c;
 	interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_c;
 	interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_c;
 
-	interpolate16x16_lowpass_h = interpolate16x16_lowpass_h_c;
-	interpolate16x16_lowpass_v = interpolate16x16_lowpass_v_c;
-	interpolate16x16_lowpass_hv = interpolate16x16_lowpass_hv_c;
-
-	interpolate8x8_lowpass_h = interpolate8x8_lowpass_h_c;
-	interpolate8x8_lowpass_v = interpolate8x8_lowpass_v_c;
-	interpolate8x8_lowpass_hv = interpolate8x8_lowpass_hv_c;
-
-	interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_c;
-	interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_c;
-
-	interpolate8x8_avg2 = interpolate8x8_avg2_c;
-	interpolate8x8_avg4 = interpolate8x8_avg4_c;
-
-	/* reduced resoltuion */
-
-#ifdef ARCH_X86
-	vfilter_31 = xvid_VFilter_31_x86;
-	hfilter_31 = xvid_HFilter_31_x86;
-#else
-	copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_C;
-	add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_C;
-	vfilter_31 = xvid_VFilter_31_C;
-	hfilter_31 = xvid_HFilter_31_C;
-#endif
-
 	/* Initialize internal colorspace transformation tables */
 	colorspace_init();
 
 	/* All colorspace transformation functions User Format->YV12 */
-	yv12_to_yv12    = yv12_to_yv12_c;
-	rgb555_to_yv12  = rgb555_to_yv12_c;
-	rgb565_to_yv12  = rgb565_to_yv12_c;
-	bgr_to_yv12     = bgr_to_yv12_c;
-	bgra_to_yv12    = bgra_to_yv12_c;
-	abgr_to_yv12    = abgr_to_yv12_c;
-	rgba_to_yv12    = rgba_to_yv12_c;
-	yuyv_to_yv12    = yuyv_to_yv12_c;
-	uyvy_to_yv12    = uyvy_to_yv12_c;
-
-	rgb555i_to_yv12 = rgb555i_to_yv12_c;
-	rgb565i_to_yv12 = rgb565i_to_yv12_c;
-	bgri_to_yv12    = bgri_to_yv12_c;
-	bgrai_to_yv12   = bgrai_to_yv12_c;
-	abgri_to_yv12   = abgri_to_yv12_c;
-	rgbai_to_yv12   = rgbai_to_yv12_c;
-	yuyvi_to_yv12   = yuyvi_to_yv12_c;
-	uyvyi_to_yv12   = uyvyi_to_yv12_c;
-
+	rgb555_to_yv12 = rgb555_to_yv12_c;
+	rgb565_to_yv12 = rgb565_to_yv12_c;
+	rgb24_to_yv12  = rgb24_to_yv12_c;
+	rgb32_to_yv12  = rgb32_to_yv12_c;
+	yuv_to_yv12    = yuv_to_yv12_c;
+	yuyv_to_yv12   = yuyv_to_yv12_c;
+	uyvy_to_yv12   = uyvy_to_yv12_c;
 
 	/* All colorspace transformation functions YV12->User format */
-	yv12_to_rgb555  = yv12_to_rgb555_c;
-	yv12_to_rgb565  = yv12_to_rgb565_c;
-	yv12_to_bgr     = yv12_to_bgr_c;
-	yv12_to_bgra    = yv12_to_bgra_c;
-	yv12_to_abgr    = yv12_to_abgr_c;
-	yv12_to_rgba    = yv12_to_rgba_c;
-	yv12_to_yuyv    = yv12_to_yuyv_c;
-	yv12_to_uyvy    = yv12_to_uyvy_c;
- 
-	yv12_to_rgb555i = yv12_to_rgb555i_c;
-	yv12_to_rgb565i = yv12_to_rgb565i_c;
-	yv12_to_bgri    = yv12_to_bgri_c;
-	yv12_to_bgrai   = yv12_to_bgrai_c;
-	yv12_to_abgri   = yv12_to_abgri_c;
-	yv12_to_rgbai   = yv12_to_rgbai_c;
-	yv12_to_yuyvi   = yv12_to_yuyvi_c;
-	yv12_to_uyvyi   = yv12_to_uyvyi_c;
+	yv12_to_rgb555 = yv12_to_rgb555_c;
+	yv12_to_rgb565 = yv12_to_rgb565_c;
+	yv12_to_rgb24  = yv12_to_rgb24_c;
+	yv12_to_rgb32  = yv12_to_rgb32_c;
+	yv12_to_yuv    = yv12_to_yuv_c;
+	yv12_to_yuyv   = yv12_to_yuyv_c;
+	yv12_to_uyvy   = yv12_to_uyvy_c;
 
 	/* Functions used in motion estimation algorithms */
 	calc_cbp = calc_cbp_c;
@@ -297,26 +262,19 @@
 	sad16bi  = sad16bi_c;
 	sad8bi   = sad8bi_c;
 	dev16    = dev16_c;
-	sad16v	 = sad16v_c;
 	
-//	Halfpel8_Refine = Halfpel8_Refine_c;
-
-#ifdef ARCH_X86
-
-	if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||
-		(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
-		(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2))
-	{
-		/* Restore FPU context : emms_c is a nop functions */
-		emms = emms_mmx;
-	}
+	Halfpel8_Refine = Halfpel8_Refine_c;
 
+#ifdef ARCH_IS_IA32
 	if ((cpu_flags & XVID_CPU_MMX) > 0) {
 
 		/* Forward and Inverse Discrete Cosine Transformation functions */
 		fdct = fdct_mmx;
 		idct = idct_mmx;
 
+		/* To restore FPU context after mmx use */
+		emms = emms_mmx;
+
 		/* Quantization related functions */
 		quant_intra   = quant_intra_mmx;
 		dequant_intra = dequant_intra_mmx;
@@ -336,40 +294,24 @@
 		transfer_16to8add  = transfer_16to8add_mmx;
 		transfer8x8_copy   = transfer8x8_copy_mmx;
 
-		/* Interlacing Functions */
-		MBFieldTest = MBFieldTest_mmx;
 
 		/* Image Interpolation related functions */
 		interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_mmx;
 		interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_mmx;
 		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_mmx;
 
-		interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_mmx;
-		interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_mmx;
-
-		interpolate8x8_avg2 = interpolate8x8_avg2_mmx;
-		interpolate8x8_avg4 = interpolate8x8_avg4_mmx;
-
-		/* reduced resolution */
-		copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_mmx;
-		add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_mmx;
-		hfilter_31 = xvid_HFilter_31_mmx;
-
-		/* image input xxx_to_yv12 related functions */
-		yv12_to_yv12  = yv12_to_yv12_mmx;
-		bgr_to_yv12   = bgr_to_yv12_mmx;
-		bgra_to_yv12  = bgra_to_yv12_mmx;
+		/* Image RGB->YV12 related functions */
+		rgb24_to_yv12 = rgb24_to_yv12_mmx;
+		rgb32_to_yv12 = rgb32_to_yv12_mmx;
+		yuv_to_yv12   = yuv_to_yv12_mmx;
 		yuyv_to_yv12  = yuyv_to_yv12_mmx;
 		uyvy_to_yv12  = uyvy_to_yv12_mmx;
 
-		/* image output yv12_to_xxx related functions */
-		yv12_to_bgr   = yv12_to_bgr_mmx;
-		yv12_to_bgra  = yv12_to_bgra_mmx;
+		/* Image YV12->RGB related functions */
+		yv12_to_rgb24 = yv12_to_rgb24_mmx;
+		yv12_to_rgb32 = yv12_to_rgb32_mmx;
 		yv12_to_yuyv  = yv12_to_yuyv_mmx;
 		yv12_to_uyvy  = yv12_to_uyvy_mmx;
-		
-		yv12_to_yuyvi = yv12_to_yuyvi_mmx;
-		yv12_to_uyvyi = yv12_to_uyvyi_mmx;
 
 		/* Motion estimation related functions */
 		calc_cbp = calc_cbp_mmx;
@@ -378,7 +320,6 @@
 		sad16bi = sad16bi_mmx;
 		sad8bi  = sad8bi_mmx;
 		dev16    = dev16_mmx;
-		sad16v	 = sad16v_mmx;
 
 	}
 
@@ -388,9 +329,6 @@
 		/* ME functions */
 		sad16bi = sad16bi_3dn;
 		sad8bi  = sad8bi_3dn;
-
-		yuyv_to_yv12  = yuyv_to_yv12_3dn;
-		uyvy_to_yv12  = uyvy_to_yv12_3dn;
 	}
 
 
@@ -404,10 +342,6 @@
 		interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_xmm;
 		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_xmm;
 
-		/* reduced resolution */
-		copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_xmm;
-		add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_xmm;
-
 		/* Quantization */
 		dequant_intra = dequant_intra_xmm;
 		dequant_inter = dequant_inter_xmm;
@@ -416,9 +350,7 @@
 		transfer_8to16sub2 = transfer_8to16sub2_xmm;
 
 		/* Colorspace transformation */
-		yv12_to_yv12  = yv12_to_yv12_xmm;
-		yuyv_to_yv12  = yuyv_to_yv12_xmm;
-		uyvy_to_yv12  = uyvy_to_yv12_xmm;
+		yuv_to_yv12 = yuv_to_yv12_xmm;
 
 		/* ME functions */
 		sad16 = sad16_xmm;
@@ -426,7 +358,7 @@
 		sad16bi = sad16bi_xmm;
 		sad8bi  = sad8bi_xmm;
 		dev16 = dev16_xmm;
-		sad16v	 = sad16v_xmm;
+
 	}
 
 	if ((cpu_flags & XVID_CPU_3DNOW) > 0) {
@@ -460,11 +392,11 @@
 
 #endif
 
-#ifdef ARCH_IA64
+#ifdef ARCH_IS_IA64
 	if ((cpu_flags & XVID_CPU_IA64) > 0) { //use assembler routines?
 	  idct_ia64_init();
 	  fdct = fdct_ia64;
-	  idct = idct_ia64;   //not yet working, crashes
+	  idct = idct_ia64;
 	  interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_ia64;
 	  interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_ia64;
 	  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_ia64;
@@ -472,7 +404,7 @@
 	  sad16bi = sad16bi_ia64;
 	  sad8 = sad8_ia64;
 	  dev16 = dev16_ia64;
-//	  Halfpel8_Refine = Halfpel8_Refine_ia64;
+	  Halfpel8_Refine = Halfpel8_Refine_ia64;
 	  quant_intra = quant_intra_ia64;
 	  dequant_intra = dequant_intra_ia64;
 	  quant_inter = quant_inter_ia64;
@@ -483,12 +415,12 @@
 	  transfer_8to16sub2 = transfer_8to16sub2_ia64;
 	  transfer_16to8add = transfer_16to8add_ia64;
 	  transfer8x8_copy = transfer8x8_copy_ia64;
-	  DEBUG("Using IA-64 assembler routines.\n");
+//	  DEBUG("Using IA-64 assembler routines.\n");
 	}
 #endif 
 
-#ifdef ARCH_PPC
-#ifdef ARCH_PPC_ALTIVEC
+#ifdef ARCH_IS_PPC
+#ifdef ARCH_IS_PPC_ALTIVEC
 	calc_cbp = calc_cbp_altivec;
 	fdct = fdct_altivec;
 	idct = idct_altivec;
@@ -504,58 +436,6 @@
 	return XVID_ERR_OK;
 }
 
-
-
-static int
-xvid_init_convert(XVID_INIT_CONVERTINFO* convert)
-{
-	const int flip1 = (convert->input.colorspace & XVID_CSP_VFLIP) ^ (convert->output.colorspace & XVID_CSP_VFLIP);
-	const int width = convert->width;
-	const int height = convert->height;
-	const int width2 = convert->width/2;
-	const int height2 = convert->height/2;
-	IMAGE img;
-
-	switch (convert->input.colorspace & ~XVID_CSP_VFLIP)
-	{
-		case XVID_CSP_YV12 :
-			img.y = convert->input.y;
-			img.v = (uint8_t*)convert->input.y + width*height; 
-			img.u = (uint8_t*)convert->input.y + width*height + width2*height2;
-			image_output(&img, width, height, width,
-						convert->output.y, convert->output.y_stride,
-						convert->output.colorspace, convert->interlacing);
-			break;
-
-		default :
-			return XVID_ERR_FORMAT;
-	}
-
-
-	emms();
-	return XVID_ERR_OK;
-}
-
-
-int
-xvid_init(void *handle,
-		  int opt,
-		  void *param1,
-		  void *param2)
-{
-	switch(opt)
-	{
-		case XVID_INIT_INIT :
-			return xvid_init_init((XVID_INIT_PARAM*)param1);
-
-		case XVID_INIT_CONVERT :
-			return xvid_init_convert((XVID_INIT_CONVERTINFO*)param1);
-
-		default :
-			return XVID_ERR_FAIL;
-	}
-}
-
 /*****************************************************************************
  * XviD Native decoder entry point
  *
@@ -574,7 +454,7 @@
 {
 	switch (opt) {
 	case XVID_DEC_DECODE:
-		return decoder_decode((DECODER *) handle, (XVID_DEC_FRAME *) param1, (XVID_DEC_STATS*) param2);
+		return decoder_decode((DECODER *) handle, (XVID_DEC_FRAME *) param1);
 
 	case XVID_DEC_CREATE:
 		return decoder_create((XVID_DEC_PARAM *) param1);
@@ -606,11 +486,6 @@
 {
 	switch (opt) {
 	case XVID_ENC_ENCODE:
-
-		if (((Encoder *) handle)->mbParam.max_bframes >= 0)
-		return encoder_encode_bframes((Encoder *) handle, (XVID_ENC_FRAME *) param1,
-							  (XVID_ENC_STATS *) param2);
-		else 
 		return encoder_encode((Encoder *) handle, (XVID_ENC_FRAME *) param1,
 							  (XVID_ENC_STATS *) param2);