Diff of /xvidcore/src/xvid.c

-revision 1.40, Sun Feb  9 19:32:52 2003 UTC
+revision 1.41, Sat Feb 15 15:22:17 2003 UTC
 Line 3
   *  XVID MPEG-4 VIDEO CODEC
   *  - Native API implementation  -
   *
-  *  Copyright(C) 2001-2002 Peter Ross <pross@xvid.org>
+  *  This program is an implementation of a part of one or more MPEG-4
+  *  Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
+  *  to use this software module in hardware or software products are
+  *  advised that its use may infringe existing patents or copyrights, and
+  *  any such use would be at such party's own risk.  The original
+  *  developer of this software module and his/her company, and subsequent
+  *  editors and their companies, will have no liability for use of this
+  *  software or modifications or derivatives thereof.
   *
-  *  This file is part of XviD, a free MPEG-4 video encoder/decoder
+  *  This program is free software ; you can redistribute it and/or modify
-  *
+  *  it under the terms of the GNU General Public License as published by
-  *  XviD is free software; you can redistribute it and/or modify it
-  *  under the terms of the GNU General Public License as published by
   *  the Free Software Foundation; either version 2 of the License, or
   *  (at your option) any later version.
   *
-Line 21
+Line 26
   *  along with this program; if not, write to the Free Software
   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
   *
-  *  Under section 8 of the GNU General Public License, the copyright
+  ****************************************************************************/
-  *  holders of XVID explicitly forbid distribution in the following
-  *  countries:
+ /*****************************************************************************
   *
-  *    - Japan
+  *  History
-  *    - United States of America
+  *
-  *
+  *      - 23.06.2002    added XVID_CPU_CHKONLY
-  *  Linking XviD statically or dynamically with other modules is making a
+  *  - 17.03.2002        Added interpolate8x8_halfpel_hv_xmm
-  *  combined work based on XviD.  Thus, the terms and conditions of the
+  *  - 22.12.2001  API change: added xvid_init() - Isibaar
-  *  GNU General Public License cover the whole combination.
+  *  - 16.12.2001        inital version; (c)2001 peter ross <pross@cs.rmit.edu.au>
-  *
-  *  As a special exception, the copyright holders of XviD give you
-  *  permission to link XviD with independent modules that communicate with
-  *  XviD solely through the VFW1.1 and DShow interfaces, regardless of the
-  *  license terms of these independent modules, and to copy and distribute
-  *  the resulting combined work under terms of your choice, provided that
-  *  every copy of the combined work is accompanied by a complete copy of
-  *  the source code of XviD (the version of XviD used to produce the
-  *  combined work), being distributed under the terms of the GNU General
-  *  Public License plus this exception.  An independent module is a module
-  *  which is not derived from or based on XviD.
-  *
-  *  Note that people who make modified versions of XviD are not obligated
-  *  to grant this special exception for their modified versions; it is
-  *  their choice whether to do so.  The GNU General Public License gives
-  *  permission to release a modified version without this exception; this
-  *  exception also makes it possible to release a modified version which
-  *  carries forward this exception.
   *
   * $Id$
   *
   ****************************************************************************/
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <time.h>
  #include "xvid.h"
  #include "decoder.h"
  #include "encoder.h"
-Line 62
+Line 54
  #include "dct/fdct.h"
  #include "image/colorspace.h"
  #include "image/interpolate8x8.h"
+ #include "image/reduced.h"
  #include "utils/mem_transfer.h"
+ #include "utils/mbfunctions.h"
  #include "quant/quant_h263.h"
  #include "quant/quant_mpeg4.h"
  #include "motion/motion.h"
-Line 71
+Line 65
  #include "utils/timer.h"
  #include "bitstream/mbcoding.h"
- #if defined(ARCH_IS_IA32) && defined(EXPERIMENTAL_SSE2_CODE)
+ #if defined(ARCH_IS_IA32)
- #ifdef _MSC_VER
+ #if defined(_MSC_VER)
  #include <windows.h>
  #else
  #include <signal.h>
  #include <setjmp.h>
- #endif
- #ifndef _MSC_VER
  static jmp_buf mark;
-Line 94
+Line 84
  /*
-  * Calls the funcptr, and returns whether SIGILL (illegal instruction) was signalled
+ calls the funcptr, and returns whether SIGILL (illegal instruction) was signalled
-  * Return values:
+ return values:
-  * -1 : could not determine
+ -1 : could not determine
-  * 0  : SIGILL was *not* signalled
+ : SIGILL was *not* signalled
-  * 1  : SIGILL was signalled
+ : SIGILL was signalled
   */
  int
  sigill_check(void (*func)())
  {
- #ifdef _MSC_VER
+ #if defined(_MSC_VER)
          _try {
                  func();
          }
-Line 138
+Line 128
  }
  #endif
+ /* detect cpu flags  */
+ static unsigned int
+ detect_cpu_flags()
+ {
+         /* enable native assembly optimizations by default */
+         unsigned int cpu_flags = XVID_CPU_ASM;
+ #if defined(ARCH_IS_IA32)
+         cpu_flags |= check_cpu_features();
+         if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))
+                 cpu_flags &= ~XVID_CPU_SSE;
+         if ((cpu_flags & XVID_CPU_SSE2) && sigill_check(sse2_os_trigger))
+                 cpu_flags &= ~XVID_CPU_SSE2;
+ #endif
+ #if defined(ARCH_IS_PPC)
+ #if defined(ARCH_IS_PPC_ALTIVEC)
+         cpu_flags |= XVID_CPU_ALTIVEC;
+ #endif
+ #endif
+         return cpu_flags;
+ }
  /*****************************************************************************
   * XviD Init Entry point
   *
-Line 152
+Line 169
   *
   ****************************************************************************/
- int
- xvid_init(void *handle,
+ static
-                   int opt,
+ int xvid_init_init(XVID_INIT_PARAM * init_param)
-                   void *param1,
-                   void *param2)
  {
          int cpu_flags;
-         XVID_INIT_PARAM *init_param;
-         init_param = (XVID_INIT_PARAM *) param1;
          /* Inform the client the API version */
          init_param->api_version = API_VERSION;
-Line 176
+Line 188
          } else {
-                 cpu_flags = check_cpu_features();
+                 cpu_flags = detect_cpu_flags();
- #if defined(ARCH_IS_IA32) && defined(EXPERIMENTAL_SSE2_CODE)
-                 if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))
-                         cpu_flags &= ~XVID_CPU_SSE;
-                 if ((cpu_flags & XVID_CPU_SSE2) && sigill_check(sse2_os_trigger))
-                         cpu_flags &= ~XVID_CPU_SSE2;
- #endif
          }
          if ((init_param->cpu_flags & XVID_CPU_CHKONLY))
-Line 225
+Line 229
          transfer_8to16copy = transfer_8to16copy_c;
          transfer_16to8copy = transfer_16to8copy_c;
          transfer_8to16sub  = transfer_8to16sub_c;
+         transfer_8to16subro  = transfer_8to16subro_c;
          transfer_8to16sub2 = transfer_8to16sub2_c;
          transfer_16to8add  = transfer_16to8add_c;
          transfer8x8_copy   = transfer8x8_copy_c;
+         /* Interlacing functions */
+         MBFieldTest = MBFieldTest_c;
          /* Image interpolation related functions */
          interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_c;
          interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_c;
          interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_c;
+         interpolate16x16_lowpass_h = interpolate16x16_lowpass_h_c;
+         interpolate16x16_lowpass_v = interpolate16x16_lowpass_v_c;
+         interpolate16x16_lowpass_hv = interpolate16x16_lowpass_hv_c;
+         interpolate8x8_lowpass_h = interpolate8x8_lowpass_h_c;
+         interpolate8x8_lowpass_v = interpolate8x8_lowpass_v_c;
+         interpolate8x8_lowpass_hv = interpolate8x8_lowpass_hv_c;
+         interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_c;
+         interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_c;
+         interpolate8x8_avg2 = interpolate8x8_avg2_c;
+         interpolate8x8_avg4 = interpolate8x8_avg4_c;
+         /* reduced resoltuion */
+         copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_C;
+         add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_C;
+         vfilter_31 = xvid_VFilter_31_C;
+         hfilter_31 = xvid_HFilter_31_C;
+         filter_18x18_to_8x8 = xvid_Filter_18x18_To_8x8_C;
+         filter_diff_18x18_to_8x8 = xvid_Filter_Diff_18x18_To_8x8_C;
          /* Initialize internal colorspace transformation tables */
          colorspace_init();
          /* All colorspace transformation functions User Format->YV12 */
+         yv12_to_yv12    = yv12_to_yv12_c;
          rgb555_to_yv12 = rgb555_to_yv12_c;
          rgb565_to_yv12 = rgb565_to_yv12_c;
-         rgb24_to_yv12  = rgb24_to_yv12_c;
+         bgr_to_yv12     = bgr_to_yv12_c;
-         rgb32_to_yv12  = rgb32_to_yv12_c;
+         bgra_to_yv12    = bgra_to_yv12_c;
-         yuv_to_yv12    = yuv_to_yv12_c;
+         abgr_to_yv12    = abgr_to_yv12_c;
+         rgba_to_yv12    = rgba_to_yv12_c;
          yuyv_to_yv12   = yuyv_to_yv12_c;
          uyvy_to_yv12   = uyvy_to_yv12_c;
+         rgb555i_to_yv12 = rgb555i_to_yv12_c;
+         rgb565i_to_yv12 = rgb565i_to_yv12_c;
+         bgri_to_yv12    = bgri_to_yv12_c;
+         bgrai_to_yv12   = bgrai_to_yv12_c;
+         abgri_to_yv12   = abgri_to_yv12_c;
+         rgbai_to_yv12   = rgbai_to_yv12_c;
+         yuyvi_to_yv12   = yuyvi_to_yv12_c;
+         uyvyi_to_yv12   = uyvyi_to_yv12_c;
          /* All colorspace transformation functions YV12->User format */
          yv12_to_rgb555 = yv12_to_rgb555_c;
          yv12_to_rgb565 = yv12_to_rgb565_c;
-         yv12_to_rgb24  = yv12_to_rgb24_c;
+         yv12_to_bgr     = yv12_to_bgr_c;
-         yv12_to_rgb32  = yv12_to_rgb32_c;
+         yv12_to_bgra    = yv12_to_bgra_c;
-         yv12_to_yuv    = yv12_to_yuv_c;
+         yv12_to_abgr    = yv12_to_abgr_c;
+         yv12_to_rgba    = yv12_to_rgba_c;
          yv12_to_yuyv   = yv12_to_yuyv_c;
          yv12_to_uyvy   = yv12_to_uyvy_c;
+         yv12_to_rgb555i = yv12_to_rgb555i_c;
+         yv12_to_rgb565i = yv12_to_rgb565i_c;
+         yv12_to_bgri    = yv12_to_bgri_c;
+         yv12_to_bgrai   = yv12_to_bgrai_c;
+         yv12_to_abgri   = yv12_to_abgri_c;
+         yv12_to_rgbai   = yv12_to_rgbai_c;
+         yv12_to_yuyvi   = yv12_to_yuyvi_c;
+         yv12_to_uyvyi   = yv12_to_uyvyi_c;
          /* Functions used in motion estimation algorithms */
          calc_cbp = calc_cbp_c;
          sad16    = sad16_c;
-Line 262
+Line 314
          sad16bi  = sad16bi_c;
          sad8bi   = sad8bi_c;
          dev16    = dev16_c;
+         sad16v   = sad16v_c;
-         Halfpel8_Refine = Halfpel8_Refine_c;
+ //      Halfpel8_Refine = Halfpel8_Refine_c;
+ #if defined(ARCH_IS_IA32)
+         if ((cpu_flags & XVID_CPU_ASM))
+         {
+                 vfilter_31 = xvid_VFilter_31_x86;
+                 hfilter_31 = xvid_HFilter_31_x86;
+         }
+         if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||
+                 (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
+                 (cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2))
+         {
+                 /* Restore FPU context : emms_c is a nop functions */
+                 emms = emms_mmx;
+         }
- #ifdef ARCH_IS_IA32
+         if ((cpu_flags & XVID_CPU_MMX)) {
-         if ((cpu_flags & XVID_CPU_MMX) > 0) {
                  /* Forward and Inverse Discrete Cosine Transformation functions */
                  fdct = fdct_mmx;
                  idct = idct_mmx;
-                 /* To restore FPU context after mmx use */
-                 emms = emms_mmx;
                  /* Quantization related functions */
                  quant_intra   = quant_intra_mmx;
                  dequant_intra = dequant_intra_mmx;
-Line 290
+Line 355
                  transfer_8to16copy = transfer_8to16copy_mmx;
                  transfer_16to8copy = transfer_16to8copy_mmx;
                  transfer_8to16sub  = transfer_8to16sub_mmx;
+                 transfer_8to16subro  = transfer_8to16subro_mmx;
                  transfer_8to16sub2 = transfer_8to16sub2_mmx;
                  transfer_16to8add  = transfer_16to8add_mmx;
                  transfer8x8_copy   = transfer8x8_copy_mmx;
+                 /* Interlacing Functions */
+                 MBFieldTest = MBFieldTest_mmx;
                  /* Image Interpolation related functions */
                  interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_mmx;
                  interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_mmx;
                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_mmx;
-                 /* Image RGB->YV12 related functions */
+                 interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_mmx;
-                 rgb24_to_yv12 = rgb24_to_yv12_mmx;
+                 interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_mmx;
-                 rgb32_to_yv12 = rgb32_to_yv12_mmx;
-                 yuv_to_yv12   = yuv_to_yv12_mmx;
+                 interpolate8x8_avg2 = interpolate8x8_avg2_mmx;
+                 interpolate8x8_avg4 = interpolate8x8_avg4_mmx;
+                 /* reduced resolution */
+                 copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_mmx;
+                 add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_mmx;
+                 hfilter_31 = xvid_HFilter_31_mmx;
+                 filter_18x18_to_8x8 = xvid_Filter_18x18_To_8x8_mmx;
+                 filter_diff_18x18_to_8x8 = xvid_Filter_Diff_18x18_To_8x8_mmx;
+                 /* image input xxx_to_yv12 related functions */
+                 yv12_to_yv12  = yv12_to_yv12_mmx;
+                 bgr_to_yv12   = bgr_to_yv12_mmx;
+                 bgra_to_yv12  = bgra_to_yv12_mmx;
                  yuyv_to_yv12  = yuyv_to_yv12_mmx;
                  uyvy_to_yv12  = uyvy_to_yv12_mmx;
-                 /* Image YV12->RGB related functions */
+                 /* image output yv12_to_xxx related functions */
-                 yv12_to_rgb24 = yv12_to_rgb24_mmx;
+                 yv12_to_bgr   = yv12_to_bgr_mmx;
-                 yv12_to_rgb32 = yv12_to_rgb32_mmx;
+                 yv12_to_bgra  = yv12_to_bgra_mmx;
                  yv12_to_yuyv  = yv12_to_yuyv_mmx;
                  yv12_to_uyvy  = yv12_to_uyvy_mmx;
+                 yv12_to_yuyvi = yv12_to_yuyvi_mmx;
+                 yv12_to_uyvyi = yv12_to_uyvyi_mmx;
                  /* Motion estimation related functions */
                  calc_cbp = calc_cbp_mmx;
                  sad16    = sad16_mmx;
-Line 320
+Line 404
                  sad16bi = sad16bi_mmx;
                  sad8bi  = sad8bi_mmx;
                  dev16    = dev16_mmx;
+                 sad16v   = sad16v_mmx;
          }
          /* these 3dnow functions are faster than mmx, but slower than xmm. */
-         if ((cpu_flags & XVID_CPU_3DNOW) > 0) {
+         if ((cpu_flags & XVID_CPU_3DNOW)) {
+                 emms = emms_3dn;
                  /* ME functions */
                  sad16bi = sad16bi_3dn;
                  sad8bi  = sad8bi_3dn;
+                 yuyv_to_yv12  = yuyv_to_yv12_3dn;
+                 uyvy_to_yv12  = uyvy_to_yv12_3dn;
          }
-         if ((cpu_flags & XVID_CPU_MMXEXT) > 0) {
+         if ((cpu_flags & XVID_CPU_MMXEXT)) {
                  /* Inverse DCT */
                  idct = idct_xmm;
-Line 342
+Line 431
                  interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_xmm;
                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_xmm;
+                 /* reduced resolution */
+                 copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_xmm;
+                 add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_xmm;
                  /* Quantization */
+                 quant4_intra = quant4_intra_xmm;
+                 quant4_inter = quant4_inter_xmm;
                  dequant_intra = dequant_intra_xmm;
                  dequant_inter = dequant_inter_xmm;
-Line 350
+Line 446
                  transfer_8to16sub2 = transfer_8to16sub2_xmm;
                  /* Colorspace transformation */
-                 yuv_to_yv12 = yuv_to_yv12_xmm;
+                 yv12_to_yv12  = yv12_to_yv12_xmm;
+                 yuyv_to_yv12  = yuyv_to_yv12_xmm;
+                 uyvy_to_yv12  = uyvy_to_yv12_xmm;
                  /* ME functions */
                  sad16 = sad16_xmm;
-Line 358
+Line 456
                  sad16bi = sad16bi_xmm;
                  sad8bi  = sad8bi_xmm;
                  dev16 = dev16_xmm;
+                 sad16v   = sad16v_xmm;
          }
-         if ((cpu_flags & XVID_CPU_3DNOW) > 0) {
+         if ((cpu_flags & XVID_CPU_3DNOW)) {
                  /* Interpolation */
                  interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn;
-Line 369
+Line 467
                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn;
          }
-         if ((cpu_flags & XVID_CPU_SSE2) > 0) {
+         if ((cpu_flags & XVID_CPU_3DNOWEXT)) {
- #ifdef EXPERIMENTAL_SSE2_CODE
+                 /* Inverse DCT */
+                 idct =  idct_3dne;
+                 /* Buffer transfer */
+                 transfer_8to16copy =  transfer_8to16copy_3dne;
+                 transfer_16to8copy = transfer_16to8copy_3dne;
+                 transfer_8to16sub =  transfer_8to16sub_3dne;
+                 transfer_8to16subro =  transfer_8to16subro_3dne;
+                 transfer_8to16sub2 =  transfer_8to16sub2_3dne;
+                 transfer_16to8add = transfer_16to8add_3dne;
+                 transfer8x8_copy = transfer8x8_copy_3dne;
+                 /* Quantization */
+                 dequant4_intra = dequant4_intra_3dne;
+                 dequant4_inter = dequant4_inter_3dne;
+                 quant_intra = quant_intra_3dne;
+                 quant_inter = quant_inter_3dne;
+                 dequant_intra = dequant_intra_3dne;
+                 dequant_inter = dequant_inter_3dne;
+                 /* ME functions */
+                 calc_cbp = calc_cbp_3dne;
+                 sad16 = sad16_3dne;
+                 sad8 = sad8_3dne;
+                 sad16bi = sad16bi_3dne;
+                 sad8bi = sad8bi_3dne;
+                 dev16 = dev16_3dne;
+                 /* Interpolation */
+                 interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dne;
+                 interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dne;
+                 interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne;
+         }
+         if ((cpu_flags & XVID_CPU_SSE2)) {
                  calc_cbp = calc_cbp_sse2;
-Line 380
+Line 514
                  quant_inter   = quant_inter_sse2;
                  dequant_inter = dequant_inter_sse2;
-                 /* ME */
+ #if defined(EXPERIMENTAL_SSE2_CODE)
+                 /* ME; slower than xmm */
                  sad16    = sad16_sse2;
                  dev16    = dev16_sse2;
+ #endif
                  /* Forward and Inverse DCT */
                  idct  = idct_sse2;
                  fdct = fdct_sse2;
- #endif
          }
  #endif
- #ifdef ARCH_IS_IA64
+ #if defined(ARCH_IS_IA64)
-         if ((cpu_flags & XVID_CPU_IA64) > 0) { //use assembler routines?
+         if ((cpu_flags & XVID_CPU_ASM)) { //use assembler routines?
            idct_ia64_init();
            fdct = fdct_ia64;
-           idct = idct_ia64;
+           idct = idct_ia64;   //not yet working, crashes
            interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_ia64;
            interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_ia64;
            interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_ia64;
-Line 404
+Line 537
            sad16bi = sad16bi_ia64;
            sad8 = sad8_ia64;
            dev16 = dev16_ia64;
-           Halfpel8_Refine = Halfpel8_Refine_ia64;
+ //        Halfpel8_Refine = Halfpel8_Refine_ia64;
            quant_intra = quant_intra_ia64;
            dequant_intra = dequant_intra_ia64;
            quant_inter = quant_inter_ia64;
-Line 415
+Line 548
            transfer_8to16sub2 = transfer_8to16sub2_ia64;
            transfer_16to8add = transfer_16to8add_ia64;
            transfer8x8_copy = transfer8x8_copy_ia64;
- //        DEBUG("Using IA-64 assembler routines.\n");
+           DEBUG("Using IA-64 assembler routines.\n");
          }
  #endif
- #ifdef ARCH_IS_PPC
+ #if defined(ARCH_IS_PPC)
- #ifdef ARCH_IS_PPC_ALTIVEC
+         if ((cpu_flags & XVID_CPU_ASM))
+         {
+                 calc_cbp = calc_cbp_ppc;
+         }
+         if ((cpu_flags & XVID_CPU_ALTIVEC))
+         {
          calc_cbp = calc_cbp_altivec;
          fdct = fdct_altivec;
          idct = idct_altivec;
-Line 428
+Line 567
          sad16 = sad16_altivec;
          sad8 = sad8_altivec;
          dev16 = dev16_altivec;
- #else
+         }
-         calc_cbp = calc_cbp_ppc;
  #endif
+         return XVID_ERR_OK;
+ }
+ static int
+ xvid_init_convert(XVID_INIT_CONVERTINFO* convert)
+ {
+         // const int flip1 = (convert->input.colorspace & XVID_CSP_VFLIP) ^ (convert->output.colorspace & XVID_CSP_VFLIP);
+         const int width = convert->width;
+         const int height = convert->height;
+         const int width2 = convert->width/2;
+         const int height2 = convert->height/2;
+         IMAGE img;
+         switch (convert->input.colorspace & ~XVID_CSP_VFLIP)
+         {
+                 case XVID_CSP_YV12 :
+                         img.y = convert->input.y;
+                         img.v = (uint8_t*)convert->input.y + width*height;
+                         img.u = (uint8_t*)convert->input.y + width*height + width2*height2;
+                         image_output(&img, width, height, width,
+                                                 convert->output.y, convert->output.y_stride,
+                                                 convert->output.colorspace, convert->interlacing);
+                         break;
+                 default :
+                         return XVID_ERR_FORMAT;
+         }
+         emms();
+         return XVID_ERR_OK;
+ }
+ void fill8(uint8_t * block, int size, int value)
+ {
+         int i;
+         for (i = 0; i < size; i++)
+                 block[i] = value;
+ }
+ void fill16(int16_t * block, int size, int value)
+ {
+         int i;
+         for (i = 0; i < size; i++)
+                 block[i] = value;
+ }
+ #define RANDOM(min,max) min + (rand() % (max-min))
+ void random8(uint8_t * block, int size, int min, int max)
+ {
+         int i;
+         for (i = 0; i < size; i++)
+                 block[i] = RANDOM(min,max);
+ }
+ void random16(int16_t * block, int size, int min, int max)
+ {
+         int i;
+         for (i = 0; i < size; i++)
+                 block[i] = RANDOM(min,max);
+ }
+ int compare16(const int16_t * blockA, const int16_t * blockB, int size)
+ {
+         int i;
+         for (i = 0; i < size; i++)
+                 if (blockA[i] != blockB[i])
+                         return 1;
+         return 0;
+ }
+ int diff16(const int16_t * blockA, const int16_t * blockB, int size)
+ {
+         int i, diff = 0;
+         for (i = 0; i < size; i++)
+                 diff += ABS(blockA[i]-blockB[i]);
+         return diff;
+ }
+ #define XVID_TEST_RANDOM        0x00000001      /* random input data */
+ #define XVID_TEST_VERBOSE       0x00000002      /* verbose error output */
+ #define TEST_FORWARD    0x00000001      /* intra */
+ #define TEST_FDCT  (TEST_FORWARD)
+ #define TEST_IDCT  (0)
+ int test_transform(void * funcA, void * funcB, const char * nameB,
+                                    int test, int flags)
+ {
+         int i;
+         int64_t timeSTART;
+         int64_t timeA = 0;
+         int64_t timeB = 0;
+         DECLARE_ALIGNED_MATRIX(arrayA, 1, 64, int16_t, CACHE_LINE);
+         DECLARE_ALIGNED_MATRIX(arrayB, 1, 64, int16_t, CACHE_LINE);
+         int min, max;
+         int count = 0;
+         int tmp;
+         int min_error = 0x10000*64;
+         int max_error = 0;
+         if ((test & TEST_FORWARD))      /* forward */
+         {
+                 min = -256;
+                 max = 255;
+         }else{          /* inverse */
+                 min = -2048;
+                 max = 2047;
+         }
+         for (i = 0; i < 64*64; i++)
+         {
+                 if ((flags & XVID_TEST_RANDOM))
+                 {
+                         random16(arrayA, 64, min, max);
+                 }else{
+                         fill16(arrayA, 64, i);
+                 }
+                 memcpy(arrayB, arrayA, 64*sizeof(int16_t));
+                 if ((test & TEST_FORWARD))
+                 {
+                         timeSTART = read_counter();
+                         ((fdctFunc*)funcA)(arrayA);
+                         timeA += read_counter() - timeSTART;
+                         timeSTART = read_counter();
+                         ((fdctFunc*)funcB)(arrayB);
+                         timeB += read_counter() - timeSTART;
+                 }
+                 else
+                 {
+                         timeSTART = read_counter();
+                         ((idctFunc*)funcA)(arrayA);
+                         timeA += read_counter() - timeSTART;
+                         timeSTART = read_counter();
+                         ((idctFunc*)funcB)(arrayB);
+                         timeB += read_counter() - timeSTART;
+                 }
+                 tmp = diff16(arrayA, arrayB, 64) / 64;
+                 if (tmp > max_error)
+                         max_error = tmp;
+                 if (tmp < min_error)
+                         min_error = tmp;
+                 count++;
+         }
+         /* print the "average difference" of best/worst transforms */
+         printf("%s:\t%i\t(min_error:%i, max_error:%i)\n", nameB, (int)(timeB / count), min_error, max_error);
+         return 0;
+ }
+ #define TEST_QUANT      0x00000001      /* forward quantization */
+ #define TEST_INTRA      0x00000002      /* intra */
+ #define TEST_QUANT_INTRA        (TEST_QUANT|TEST_INTRA)
+ #define TEST_QUANT_INTER        (TEST_QUANT)
+ #define TEST_DEQUANT_INTRA      (TEST_INTRA)
+ #define TEST_DEQUANT_INTER      (0)
+ int test_quant(void * funcA, void * funcB, const char * nameB,
+                            int test, int flags)
+ {
+         int q,i;
+         int64_t timeSTART;
+         int64_t timeA = 0;
+         int64_t timeB = 0;
+         int retA, retB;
+         DECLARE_ALIGNED_MATRIX(arrayX, 1, 64, int16_t, CACHE_LINE);
+         DECLARE_ALIGNED_MATRIX(arrayA, 1, 64, int16_t, CACHE_LINE);
+         DECLARE_ALIGNED_MATRIX(arrayB, 1, 64, int16_t, CACHE_LINE);
+         int min, max;
+         int count = 0;
+         int errors = 0;
+         if ((test & TEST_QUANT))        /* quant */
+         {
+                 min = -2048;
+                 max = 2047;
+         }else{          /* dequant */
+                 min = -256;
+                 max = 255;
+         }
+         for (q = 1; q <= 31; q++)       /* quantizer */
+         {
+                 for (i = min; i < max; i++)     /* input coeff */
+                 {
+                         if ((flags & XVID_TEST_RANDOM))
+                         {
+                                 random16(arrayX, 64, min, max);
+                         }else{
+                                 fill16(arrayX, 64, i);
+                         }
+                         if ((test & TEST_INTRA))        /* intra */
+                         {
+                                 timeSTART = read_counter();
+                                 ((quanth263_intraFunc*)funcA)(arrayA, arrayX, q, q);
+                                 timeA += read_counter() - timeSTART;
+                                 timeSTART = read_counter();
+                                 ((quanth263_intraFunc*)funcB)(arrayB, arrayX, q, q);
+                                 timeB += read_counter() - timeSTART;
+                         }
+                         else    /* inter */
+                         {
+                                 timeSTART = read_counter();
+                                 retA = ((quanth263_interFunc*)funcA)(arrayA, arrayX, q);
+                                 timeA += read_counter() - timeSTART;
+                                 timeSTART = read_counter();
+                                 retB = ((quanth263_interFunc*)funcB)(arrayB, arrayX, q);
+                                 timeB += read_counter() - timeSTART;
+                         }
+                         /* compare return value from quant_inter, and compare (de)quantiz'd arrays */
+                         if ( ((test&TEST_QUANT) && !(test&TEST_INTRA) && retA != retB ) ||
+                                 compare16(arrayA, arrayB, 64))
+                         {
+                                 errors++;
+                                 if ((flags & XVID_TEST_VERBOSE))
+                                         printf("%s error: q=%i, i=%i\n", nameB, q, i);
+                         }
+                         count++;
+                 }
+         }
+         printf("%s:\t%i", nameB, (int)(timeB / count));
+         if (errors>0)
+                 printf("\t(%i errors out of %i)", errors, count);
+         printf("\n");
+         return 0;
+ }
+ int xvid_init_test(int flags)
+ {
+         int cpu_flags;
+         srand(time(0));
+         printf("xvid_init_test\n");
+ #if defined(ARCH_X86)
+         cpu_flags = detect_cpu_flags();
+         idct_int32_init();
+         emms_mmx();
+         printf("--- fdct ---\n");
+                 test_transform(fdct_int32, fdct_int32, "c", TEST_FDCT, flags);
+         if (cpu_flags & XVID_CPU_MMX)
+                 test_transform(fdct_int32, fdct_mmx, "mmx", TEST_FDCT, flags);
+         if (cpu_flags & XVID_CPU_SSE2)
+                 test_transform(fdct_int32, fdct_sse2, "sse2", TEST_FDCT, flags);
+         printf("\n--- idct ---\n");
+                 test_transform(idct_int32, idct_int32, "c", TEST_IDCT, flags);
+         if (cpu_flags & XVID_CPU_MMX)
+                 test_transform(idct_int32, idct_mmx, "mmx", TEST_IDCT, flags);
+         if (cpu_flags & XVID_CPU_MMXEXT)
+                 test_transform(idct_int32, idct_xmm, "xmm", TEST_IDCT, flags);
+         if (cpu_flags & XVID_CPU_3DNOWEXT)
+                 test_transform(idct_int32, idct_3dne, "3dne", TEST_IDCT, flags);
+         if (cpu_flags & XVID_CPU_SSE2)
+                 test_transform(idct_int32, idct_sse2, "sse2", TEST_IDCT, flags);
+         printf("\n--- quant intra ---\n");
+                 test_quant(quant_intra_c, quant_intra_c, "c", TEST_QUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_MMX)
+                 test_quant(quant_intra_c, quant_intra_mmx, "mmx", TEST_QUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_3DNOWEXT)
+                 test_quant(quant_intra_c, quant_intra_3dne, "3dne", TEST_QUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_SSE2)
+                 test_quant(quant_intra_c, quant_intra_sse2, "sse2", TEST_QUANT_INTRA, flags);
+         printf("\n--- quant inter ---\n");
+                 test_quant(quant_inter_c, quant_inter_c, "c", TEST_QUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_MMX)
+                 test_quant(quant_inter_c, quant_inter_mmx, "mmx", TEST_QUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_3DNOWEXT)
+                 test_quant(quant_inter_c, quant_inter_3dne, "3dne", TEST_QUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_SSE2)
+                 test_quant(quant_inter_c, quant_inter_sse2, "sse2", TEST_QUANT_INTER, flags);
+         printf("\n--- dequant intra ---\n");
+                 test_quant(dequant_intra_c, dequant_intra_c, "c", TEST_DEQUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_MMX)
+                 test_quant(dequant_intra_c, dequant_intra_mmx, "mmx", TEST_DEQUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_MMXEXT)
+                 test_quant(dequant_intra_c, dequant_intra_xmm, "xmm", TEST_DEQUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_3DNOWEXT)
+                 test_quant(dequant_intra_c, dequant_intra_3dne, "3dne", TEST_DEQUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_SSE2)
+                 test_quant(dequant_intra_c, dequant_intra_sse2, "sse2", TEST_DEQUANT_INTRA, flags);
+         printf("\n--- dequant inter ---\n");
+                 test_quant(dequant_inter_c, dequant_inter_c, "c", TEST_DEQUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_MMX)
+                 test_quant(dequant_inter_c, dequant_inter_mmx, "mmx", TEST_DEQUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_MMXEXT)
+                 test_quant(dequant_inter_c, dequant_inter_xmm, "xmm", TEST_DEQUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_3DNOWEXT)
+                 test_quant(dequant_inter_c, dequant_inter_3dne, "3dne", TEST_DEQUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_SSE2)
+                 test_quant(dequant_inter_c, dequant_inter_sse2, "sse2", TEST_DEQUANT_INTER, flags);
+         printf("\n--- quant4_intra ---\n");
+                 test_quant(quant4_intra_c, quant4_intra_c, "c", TEST_QUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_MMX)
+                 test_quant(quant4_intra_c, quant4_intra_mmx, "mmx", TEST_QUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_MMXEXT)
+                 test_quant(quant4_intra_c, quant4_intra_xmm, "xmm", TEST_QUANT_INTRA, flags);
+         printf("\n--- quant4_inter ---\n");
+                 test_quant(quant4_inter_c, quant4_inter_c, "c", TEST_QUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_MMX)
+                 test_quant(quant4_inter_c, quant4_inter_mmx, "mmx", TEST_QUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_MMXEXT)
+                 test_quant(quant4_inter_c, quant4_inter_xmm, "xmm", TEST_QUANT_INTER, flags);
+         printf("\n--- dequant4_intra ---\n");
+                 test_quant(dequant4_intra_c, dequant4_intra_c, "c", TEST_DEQUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_MMX)
+                 test_quant(dequant4_intra_c, dequant4_intra_mmx, "mmx", TEST_DEQUANT_INTRA, flags);
+         if (cpu_flags & XVID_CPU_3DNOWEXT)
+                 test_quant(dequant4_intra_c, dequant4_intra_3dne, "3dne", TEST_DEQUANT_INTRA, flags);
+         printf("\n--- dequant4_inter ---\n");
+                 test_quant(dequant4_inter_c, dequant4_inter_c, "c", TEST_DEQUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_MMX)
+                 test_quant(dequant4_inter_c, dequant4_inter_mmx, "mmx", TEST_DEQUANT_INTER, flags);
+         if (cpu_flags & XVID_CPU_3DNOWEXT)
+                 test_quant(dequant4_inter_c, dequant4_inter_3dne, "3dne", TEST_DEQUANT_INTER, flags);
+         emms_mmx();
  #endif
          return XVID_ERR_OK;
  }
+ int
+ xvid_init(void *handle,
+                   int opt,
+                   void *param1,
+                   void *param2)
+ {
+         switch(opt)
+         {
+                 case XVID_INIT_INIT :
+                         return xvid_init_init((XVID_INIT_PARAM*)param1);
+                 case XVID_INIT_CONVERT :
+                         return xvid_init_convert((XVID_INIT_CONVERTINFO*)param1);
+                 case XVID_INIT_TEST :
+                         return xvid_init_test((int)param1);
+                 default :
+                         return XVID_ERR_FAIL;
+         }
+ }
  /*****************************************************************************
   * XviD Native decoder entry point
   *
-Line 454
+Line 970
  {
          switch (opt) {
          case XVID_DEC_DECODE:
-                 return decoder_decode((DECODER *) handle, (XVID_DEC_FRAME *) param1);
+                 return decoder_decode((DECODER *) handle, (XVID_DEC_FRAME *) param1, (XVID_DEC_STATS*) param2);
          case XVID_DEC_CREATE:
                  return decoder_create((XVID_DEC_PARAM *) param1);
-Line 486
+Line 1002
  {
          switch (opt) {
          case XVID_ENC_ENCODE:
+                 if (((Encoder *) handle)->mbParam.max_bframes >= 0)
+                 return encoder_encode_bframes((Encoder *) handle, (XVID_ENC_FRAME *) param1,
+                                                           (XVID_ENC_STATS *) param2);
+                 else
                  return encoder_encode((Encoder *) handle, (XVID_ENC_FRAME *) param1,
                                                            (XVID_ENC_STATS *) param2);

 Legend:



Removed from v.1.40
 


changed lines


 
Added in v.1.41
 Legend:



Removed from v.1.40
 


changed lines


 
Added in v.1.41
-Removed from v.1.40
+Added in v.1.41

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4