[cvs] / xvidcore / src / xvid.c Repository:
ViewVC logotype

Diff of /xvidcore/src/xvid.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.64, Tue Sep 13 12:12:15 2005 UTC revision 1.81, Tue Jun 2 13:06:49 2009 UTC
# Line 28  Line 28 
28  #include <string.h>  #include <string.h>
29  #include <time.h>  #include <time.h>
30    
31    #if !defined(_WIN32)
32    #include <unistd.h>
33    #endif
34    
35    #if defined(__APPLE__) && defined(__MACH__) && !defined(_SC_NPROCESSORS_CONF)
36    #include <sys/types.h>
37    #include <sys/sysctl.h>
38    #ifdef MAX
39    #undef MAX
40    #endif
41    #ifdef MIN
42    #undef MIN
43    #endif
44    #endif
45    
46  #include "xvid.h"  #include "xvid.h"
47  #include "decoder.h"  #include "decoder.h"
48  #include "encoder.h"  #include "encoder.h"
# Line 40  Line 55 
55  #include "utils/mbfunctions.h"  #include "utils/mbfunctions.h"
56  #include "quant/quant.h"  #include "quant/quant.h"
57  #include "motion/motion.h"  #include "motion/motion.h"
58    #include "motion/gmc.h"
59  #include "motion/sad.h"  #include "motion/sad.h"
60  #include "utils/emms.h"  #include "utils/emms.h"
61  #include "utils/timer.h"  #include "utils/timer.h"
# Line 126  Line 142 
142    
143  /* detect cpu flags  */  /* detect cpu flags  */
144  static unsigned int  static unsigned int
145  detect_cpu_flags()  detect_cpu_flags(void)
146  {  {
147          /* enable native assembly optimizations by default */          /* enable native assembly optimizations by default */
148          unsigned int cpu_flags = XVID_CPU_ASM;          unsigned int cpu_flags = XVID_CPU_ASM;
# Line 136  Line 152 
152          if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))          if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))
153                  cpu_flags &= ~XVID_CPU_SSE;                  cpu_flags &= ~XVID_CPU_SSE;
154    
155          if ((cpu_flags & XVID_CPU_SSE2) && sigill_check(sse2_os_trigger))          if ((cpu_flags & (XVID_CPU_SSE2|XVID_CPU_SSE3|XVID_CPU_SSE41)) && sigill_check(sse2_os_trigger))
156                  cpu_flags &= ~XVID_CPU_SSE2;                  cpu_flags &= ~(XVID_CPU_SSE2|XVID_CPU_SSE3|XVID_CPU_SSE41);
157  #endif  #endif
158    
159  #if defined(ARCH_IS_PPC)  #if defined(ARCH_IS_PPC)
# Line 183  Line 199 
199          idct = idct_int32;          idct = idct_int32;
200    
201          /* Only needed on PPC Altivec archs */          /* Only needed on PPC Altivec archs */
202          sadInit = 0;          sadInit = NULL;
203    
204          /* Restore FPU context : emms_c is a nop functions */          /* Restore FPU context : emms_c is a nop functions */
205          emms = emms_c;          emms = emms_c;
# Line 256  Line 272 
272          yv12_to_yv12    = yv12_to_yv12_c;          yv12_to_yv12    = yv12_to_yv12_c;
273          rgb555_to_yv12  = rgb555_to_yv12_c;          rgb555_to_yv12  = rgb555_to_yv12_c;
274          rgb565_to_yv12  = rgb565_to_yv12_c;          rgb565_to_yv12  = rgb565_to_yv12_c;
275            rgb_to_yv12     = rgb_to_yv12_c;
276          bgr_to_yv12     = bgr_to_yv12_c;          bgr_to_yv12     = bgr_to_yv12_c;
277          bgra_to_yv12    = bgra_to_yv12_c;          bgra_to_yv12    = bgra_to_yv12_c;
278          abgr_to_yv12    = abgr_to_yv12_c;          abgr_to_yv12    = abgr_to_yv12_c;
# Line 277  Line 294 
294          /* All colorspace transformation functions YV12->User format */          /* All colorspace transformation functions YV12->User format */
295          yv12_to_rgb555  = yv12_to_rgb555_c;          yv12_to_rgb555  = yv12_to_rgb555_c;
296          yv12_to_rgb565  = yv12_to_rgb565_c;          yv12_to_rgb565  = yv12_to_rgb565_c;
297            yv12_to_rgb     = yv12_to_rgb_c;
298          yv12_to_bgr     = yv12_to_bgr_c;          yv12_to_bgr     = yv12_to_bgr_c;
299          yv12_to_bgra    = yv12_to_bgra_c;          yv12_to_bgra    = yv12_to_bgra_c;
300          yv12_to_abgr    = yv12_to_abgr_c;          yv12_to_abgr    = yv12_to_abgr_c;
# Line 306  Line 324 
324          sse8_16bit = sse8_16bit_c;          sse8_16bit = sse8_16bit_c;
325          sse8_8bit  = sse8_8bit_c;          sse8_8bit  = sse8_8bit_c;
326    
327  #if defined(ARCH_IS_IA32)          init_GMC(cpu_flags);
328    
329    #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
330    
331          if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||          if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||
332                  (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||                  (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
333                  (cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2))                  (cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2) ||
334            (cpu_flags & XVID_CPU_SSE3) || (cpu_flags & XVID_CPU_SSE41))
335          {          {
336                  /* Restore FPU context : emms_c is a nop functions */                  /* Restore FPU context : emms_c is a nop functions */
337                  emms = emms_mmx;                  emms = emms_mmx;
# Line 331  Line 352 
352                  quant_h263_inter   = quant_h263_inter_mmx;                  quant_h263_inter   = quant_h263_inter_mmx;
353                  dequant_h263_intra = dequant_h263_intra_mmx;                  dequant_h263_intra = dequant_h263_intra_mmx;
354                  dequant_h263_inter = dequant_h263_inter_mmx;                  dequant_h263_inter = dequant_h263_inter_mmx;
   
355                  quant_mpeg_intra   = quant_mpeg_intra_mmx;                  quant_mpeg_intra   = quant_mpeg_intra_mmx;
356                  quant_mpeg_inter   = quant_mpeg_inter_mmx;                  quant_mpeg_inter   = quant_mpeg_inter_mmx;
357                  dequant_mpeg_intra = dequant_mpeg_intra_mmx;                  dequant_mpeg_intra = dequant_mpeg_intra_mmx;
358                  dequant_mpeg_inter = dequant_mpeg_inter_mmx;                  dequant_mpeg_inter = dequant_mpeg_inter_mmx;
359    
360    
361                  /* Block related functions */                  /* Block related functions */
362                  transfer_8to16copy = transfer_8to16copy_mmx;                  transfer_8to16copy = transfer_8to16copy_mmx;
363                  transfer_16to8copy = transfer_16to8copy_mmx;                  transfer_16to8copy = transfer_16to8copy_mmx;
# Line 374  Line 395 
395                  image_brightness = image_brightness_mmx;                  image_brightness = image_brightness_mmx;
396    
397                  /* image input xxx_to_yv12 related functions */                  /* image input xxx_to_yv12 related functions */
398    
399                  yv12_to_yv12  = yv12_to_yv12_mmx;                  yv12_to_yv12  = yv12_to_yv12_mmx;
400    
401                  bgr_to_yv12   = bgr_to_yv12_mmx;                  bgr_to_yv12   = bgr_to_yv12_mmx;
402                    rgb_to_yv12   = rgb_to_yv12_mmx;
403                  bgra_to_yv12  = bgra_to_yv12_mmx;                  bgra_to_yv12  = bgra_to_yv12_mmx;
404                    rgba_to_yv12  = rgba_to_yv12_mmx;
405                  yuyv_to_yv12  = yuyv_to_yv12_mmx;                  yuyv_to_yv12  = yuyv_to_yv12_mmx;
406                  uyvy_to_yv12  = uyvy_to_yv12_mmx;                  uyvy_to_yv12  = uyvy_to_yv12_mmx;
407    
# Line 412  Line 437 
437    
438                  yuyv_to_yv12  = yuyv_to_yv12_3dn;                  yuyv_to_yv12  = yuyv_to_yv12_3dn;
439                  uyvy_to_yv12  = uyvy_to_yv12_3dn;                  uyvy_to_yv12  = uyvy_to_yv12_3dn;
440    
441          }          }
442    
443    
# Line 436  Line 462 
462                  interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_xmm;                  interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_xmm;
463    
464                  /* Quantization */                  /* Quantization */
                 quant_mpeg_intra = quant_mpeg_intra_xmm;  
465                  quant_mpeg_inter = quant_mpeg_inter_xmm;                  quant_mpeg_inter = quant_mpeg_inter_xmm;
466    
467                  dequant_h263_intra = dequant_h263_intra_xmm;                  dequant_h263_intra = dequant_h263_intra_xmm;
# Line 447  Line 472 
472                  transfer_8to16sub2ro = transfer_8to16sub2ro_xmm;                  transfer_8to16sub2ro = transfer_8to16sub2ro_xmm;
473    
474                  /* Colorspace transformation */                  /* Colorspace transformation */
475                  yv12_to_yv12  = yv12_to_yv12_xmm;                  /* yv12_to_yv12  = yv12_to_yv12_xmm; */ /* appears to be slow on many machines */
476                  yuyv_to_yv12  = yuyv_to_yv12_xmm;                  yuyv_to_yv12  = yuyv_to_yv12_xmm;
477                  uyvy_to_yv12  = uyvy_to_yv12_xmm;                  uyvy_to_yv12  = uyvy_to_yv12_xmm;
478    
# Line 508  Line 533 
533                          dequant_h263_inter = dequant_h263_inter_3dne;                          dequant_h263_inter = dequant_h263_inter_3dne;
534    
535                          /* ME functions */                          /* ME functions */
                         calc_cbp = calc_cbp_3dne;  
   
536                          sad16 = sad16_3dne;                          sad16 = sad16_3dne;
537                          sad8 = sad8_3dne;                          sad8 = sad8_3dne;
538                          sad16bi = sad16bi_3dne;                          sad16bi = sad16bi_3dne;
# Line 534  Line 557 
557    
558                  /* DCT operators */                  /* DCT operators */
559                  fdct = fdct_sse2_skal;                  fdct = fdct_sse2_skal;
560      /* idct = idct_sse2_skal; */   /* Is now IEEE1180 and Walken compliant. Disabled until fully tested. */                  idct = idct_sse2_skal;   /* Is now IEEE1180 and Walken compliant. */
561    
562                  /* postprocessing */                  /* postprocessing */
563                  image_brightness = image_brightness_sse2;                  image_brightness = image_brightness_sse2;
564    
565            }
566    
567            if ((cpu_flags & XVID_CPU_SSE3)) {
568    
569                    /* SAD operators */
570                    sad16    = sad16_sse3;
571                    dev16    = dev16_sse3;
572          }          }
573    
574  #endif /* ARCH_IS_IA32 */  #endif /* ARCH_IS_IA32 */
575    
576  #if defined(ARCH_IS_IA64)  #if defined(ARCH_IS_IA64)
# Line 629  Line 661 
661          }          }
662  #endif  #endif
663    
 #if defined(ARCH_IS_X86_64)  
         /* For now, only XVID_CPU_ASM is looked for, so user can still  
          * disable asm usage the usual way. When Intel EMT64 cpus will  
          * be out, maybe we'll have to check more precisely what cpu  
          * features there really are. */  
         if (cpu_flags & XVID_CPU_ASM) {  
                 /* SIMD state flusher */  
                 emms = emms_3dn;  
   
                 /* DCT operators */  
                 fdct = fdct_skal_x86_64;  
                 idct = idct_x86_64;  
   
                 /* SAD operators */  
                 sad16      = sad16_x86_64;  
                 sad8       = sad8_x86_64;  
                 sad16bi    = sad16bi_x86_64;  
                 sad8bi     = sad8bi_x86_64;  
                 dev16      = dev16_x86_64;  
                 sad16v     = sad16v_x86_64;  
                 sse8_16bit = sse8_16bit_x86_64;  
                 sse8_8bit  = sse8_8bit_x86_64;  
   
                 /* Interpolation operators */  
                 interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_x86_64;  
                 interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_x86_64;  
                 interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_x86_64;  
   
                 interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_x86_64;  
                 interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_x86_64;  
                 interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_x86_64;  
                 interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_x86_64;  
   
                 interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_x86_64;  
                 interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_x86_64;  
   
                 interpolate8x8_avg2 = interpolate8x8_avg2_x86_64;  
                 interpolate8x8_avg4 = interpolate8x8_avg4_x86_64;  
   
                 /* Quantization related functions */  
                 quant_h263_intra   = quant_h263_intra_x86_64;  
                 quant_h263_inter   = quant_h263_inter_x86_64;  
                 dequant_h263_intra = dequant_h263_intra_x86_64;  
                 dequant_h263_inter = dequant_h263_inter_x86_64;  
                 quant_mpeg_intra   = quant_mpeg_intra_x86_64;  
                 quant_mpeg_inter   = quant_mpeg_inter_x86_64;  
                 dequant_mpeg_intra   = dequant_mpeg_intra_x86_64;  
                 dequant_mpeg_inter   = dequant_mpeg_inter_x86_64;  
   
                 /* Block related functions */  
                 transfer_8to16copy  = transfer_8to16copy_x86_64;  
                 transfer_16to8copy  = transfer_16to8copy_x86_64;  
                 transfer_8to16sub   = transfer_8to16sub_x86_64;  
                 transfer_8to16subro = transfer_8to16subro_x86_64;  
                 transfer_8to16sub2  = transfer_8to16sub2_x86_64;  
                 transfer_8to16sub2ro= transfer_8to16sub2ro_x86_64;  
                 transfer_16to8add   = transfer_16to8add_x86_64;  
                 transfer8x8_copy    = transfer8x8_copy_x86_64;  
   
                 /* Qpel stuff */  
                 xvid_QP_Funcs = &xvid_QP_Funcs_x86_64;  
                 xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_x86_64;  
   
                 /* Interlacing Functions */  
                 MBFieldTest = MBFieldTest_x86_64;  
         }  
 #endif  
   
664  #if defined(_DEBUG)  #if defined(_DEBUG)
665      xvid_debug = init->debug;      xvid_debug = init->debug;
666  #endif  #endif
# Line 712  Line 676 
676                  return XVID_ERR_VERSION;                  return XVID_ERR_VERSION;
677    
678          info->actual_version = XVID_VERSION;          info->actual_version = XVID_VERSION;
679          info->build = "xvid-1.1.0-beta2";          info->build = "xvid-1.3.0-dev";
680          info->cpu_flags = detect_cpu_flags();          info->cpu_flags = detect_cpu_flags();
681            info->num_threads = 0; /* single-thread */
682    
683    #if defined(_WIN32)
684    
685      {
686            SYSTEM_INFO siSysInfo;
687            GetSystemInfo(&siSysInfo);
688            info->num_threads = siSysInfo.dwNumberOfProcessors; /* number of _logical_ cores */
689      }
690    
691    #elif defined(_SC_NPROCESSORS_CONF) /* should be available on Apple too actually */
692    
693      info->num_threads = sysconf(_SC_NPROCESSORS_CONF);
694    
695    #elif defined(__APPLE__) && defined(__MACH__)
696    
697      {
698        size_t len;
699        int    mib[2], ncpu;
700    
701        mib[0] = CTL_HW;
702        mib[1] = HW_NCPU;
703        len    = sizeof(ncpu);
704        if (sysctl(mib, 2, &ncpu, &len, NULL, 0) == 0)
705          info -> num_threads = ncpu;
706        else
707          info -> num_threads = 1;
708      }
709    
 #if defined(_SMP) && defined(WIN32)  
         info->num_threads = pthread_num_processors_np();;  
 #else  
         info->num_threads = 0;  
710  #endif  #endif
711    
712          return 0;          return 0;

Legend:
Removed from v.1.64  
changed lines
  Added in v.1.81

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4