[cvs] / xvidcore / src / xvid.c Repository:
ViewVC logotype

Diff of /xvidcore/src/xvid.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.62, Mon Apr 4 23:49:37 2005 UTC revision 1.76, Thu Nov 27 00:47:03 2008 UTC
# Line 40  Line 40 
40  #include "utils/mbfunctions.h"  #include "utils/mbfunctions.h"
41  #include "quant/quant.h"  #include "quant/quant.h"
42  #include "motion/motion.h"  #include "motion/motion.h"
43    #include "motion/gmc.h"
44  #include "motion/sad.h"  #include "motion/sad.h"
45  #include "utils/emms.h"  #include "utils/emms.h"
46  #include "utils/timer.h"  #include "utils/timer.h"
# Line 126  Line 127 
127    
128  /* detect cpu flags  */  /* detect cpu flags  */
129  static unsigned int  static unsigned int
130  detect_cpu_flags()  detect_cpu_flags(void)
131  {  {
132          /* enable native assembly optimizations by default */          /* enable native assembly optimizations by default */
133          unsigned int cpu_flags = XVID_CPU_ASM;          unsigned int cpu_flags = XVID_CPU_ASM;
# Line 136  Line 137 
137          if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))          if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))
138                  cpu_flags &= ~XVID_CPU_SSE;                  cpu_flags &= ~XVID_CPU_SSE;
139    
140          if ((cpu_flags & XVID_CPU_SSE2) && sigill_check(sse2_os_trigger))          if ((cpu_flags & (XVID_CPU_SSE2|XVID_CPU_SSE3|XVID_CPU_SSE41)) && sigill_check(sse2_os_trigger))
141                  cpu_flags &= ~XVID_CPU_SSE2;                  cpu_flags &= ~(XVID_CPU_SSE2|XVID_CPU_SSE3|XVID_CPU_SSE41);
142  #endif  #endif
143    
144  #if defined(ARCH_IS_PPC)  #if defined(ARCH_IS_PPC)
# Line 183  Line 184 
184          idct = idct_int32;          idct = idct_int32;
185    
186          /* Only needed on PPC Altivec archs */          /* Only needed on PPC Altivec archs */
187          sadInit = 0;          sadInit = NULL;
188    
189          /* Restore FPU context : emms_c is a nop functions */          /* Restore FPU context : emms_c is a nop functions */
190          emms = emms_c;          emms = emms_c;
# Line 213  Line 214 
214          transfer_8to16sub2ro = transfer_8to16sub2ro_c;          transfer_8to16sub2ro = transfer_8to16sub2ro_c;
215          transfer_16to8add  = transfer_16to8add_c;          transfer_16to8add  = transfer_16to8add_c;
216          transfer8x8_copy   = transfer8x8_copy_c;          transfer8x8_copy   = transfer8x8_copy_c;
217            transfer8x4_copy   = transfer8x4_copy_c;
218    
219          /* Interlacing functions */          /* Interlacing functions */
220          MBFieldTest = MBFieldTest_c;          MBFieldTest = MBFieldTest_c;
# Line 222  Line 224 
224          interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_c;          interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_c;
225          interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_c;          interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_c;
226    
227            interpolate8x4_halfpel_h  = interpolate8x4_halfpel_h_c;
228            interpolate8x4_halfpel_v  = interpolate8x4_halfpel_v_c;
229            interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_c;
230    
231          interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_c;          interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_c;
232          interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_c;          interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_c;
233          interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_c;          interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_c;
# Line 251  Line 257 
257          yv12_to_yv12    = yv12_to_yv12_c;          yv12_to_yv12    = yv12_to_yv12_c;
258          rgb555_to_yv12  = rgb555_to_yv12_c;          rgb555_to_yv12  = rgb555_to_yv12_c;
259          rgb565_to_yv12  = rgb565_to_yv12_c;          rgb565_to_yv12  = rgb565_to_yv12_c;
260            rgb_to_yv12     = rgb_to_yv12_c;
261          bgr_to_yv12     = bgr_to_yv12_c;          bgr_to_yv12     = bgr_to_yv12_c;
262          bgra_to_yv12    = bgra_to_yv12_c;          bgra_to_yv12    = bgra_to_yv12_c;
263          abgr_to_yv12    = abgr_to_yv12_c;          abgr_to_yv12    = abgr_to_yv12_c;
# Line 272  Line 279 
279          /* All colorspace transformation functions YV12->User format */          /* All colorspace transformation functions YV12->User format */
280          yv12_to_rgb555  = yv12_to_rgb555_c;          yv12_to_rgb555  = yv12_to_rgb555_c;
281          yv12_to_rgb565  = yv12_to_rgb565_c;          yv12_to_rgb565  = yv12_to_rgb565_c;
282            yv12_to_rgb     = yv12_to_rgb_c;
283          yv12_to_bgr     = yv12_to_bgr_c;          yv12_to_bgr     = yv12_to_bgr_c;
284          yv12_to_bgra    = yv12_to_bgra_c;          yv12_to_bgra    = yv12_to_bgra_c;
285          yv12_to_abgr    = yv12_to_abgr_c;          yv12_to_abgr    = yv12_to_abgr_c;
# Line 301  Line 309 
309          sse8_16bit = sse8_16bit_c;          sse8_16bit = sse8_16bit_c;
310          sse8_8bit  = sse8_8bit_c;          sse8_8bit  = sse8_8bit_c;
311    
312  #if defined(ARCH_IS_IA32)          init_GMC(cpu_flags);
313    
314    #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
315    
316          if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||          if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||
317                  (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||                  (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
318                  (cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2))                  (cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2) ||
319            (cpu_flags & XVID_CPU_SSE3) || (cpu_flags & XVID_CPU_SSE41))
320          {          {
321                  /* Restore FPU context : emms_c is a nop functions */                  /* Restore FPU context : emms_c is a nop functions */
322                  emms = emms_mmx;                  emms = emms_mmx;
# Line 326  Line 337 
337                  quant_h263_inter   = quant_h263_inter_mmx;                  quant_h263_inter   = quant_h263_inter_mmx;
338                  dequant_h263_intra = dequant_h263_intra_mmx;                  dequant_h263_intra = dequant_h263_intra_mmx;
339                  dequant_h263_inter = dequant_h263_inter_mmx;                  dequant_h263_inter = dequant_h263_inter_mmx;
   
340                  quant_mpeg_intra   = quant_mpeg_intra_mmx;                  quant_mpeg_intra   = quant_mpeg_intra_mmx;
341                  quant_mpeg_inter   = quant_mpeg_inter_mmx;                  quant_mpeg_inter   = quant_mpeg_inter_mmx;
342                  dequant_mpeg_intra = dequant_mpeg_intra_mmx;                  dequant_mpeg_intra = dequant_mpeg_intra_mmx;
343                  dequant_mpeg_inter = dequant_mpeg_inter_mmx;                  dequant_mpeg_inter = dequant_mpeg_inter_mmx;
344    
345    
346                  /* Block related functions */                  /* Block related functions */
347                  transfer_8to16copy = transfer_8to16copy_mmx;                  transfer_8to16copy = transfer_8to16copy_mmx;
348                  transfer_16to8copy = transfer_16to8copy_mmx;                  transfer_16to8copy = transfer_16to8copy_mmx;
# Line 340  Line 351 
351                  transfer_8to16sub2 = transfer_8to16sub2_mmx;                  transfer_8to16sub2 = transfer_8to16sub2_mmx;
352                  transfer_16to8add  = transfer_16to8add_mmx;                  transfer_16to8add  = transfer_16to8add_mmx;
353                  transfer8x8_copy   = transfer8x8_copy_mmx;                  transfer8x8_copy   = transfer8x8_copy_mmx;
354                    transfer8x4_copy   = transfer8x4_copy_mmx;
355    
356                  /* Interlacing Functions */                  /* Interlacing Functions */
357                  MBFieldTest = MBFieldTest_mmx;                  MBFieldTest = MBFieldTest_mmx;
# Line 349  Line 361 
361                  interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_mmx;                  interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_mmx;
362                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_mmx;                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_mmx;
363    
364                    interpolate8x4_halfpel_h  = interpolate8x4_halfpel_h_mmx;
365                    interpolate8x4_halfpel_v  = interpolate8x4_halfpel_v_mmx;
366                    interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_mmx;
367    
368                  interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_mmx;                  interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_mmx;
369                  interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_mmx;                  interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_mmx;
370                  interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_mmx;                  interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_mmx;
# Line 364  Line 380 
380                  image_brightness = image_brightness_mmx;                  image_brightness = image_brightness_mmx;
381    
382                  /* image input xxx_to_yv12 related functions */                  /* image input xxx_to_yv12 related functions */
383    
384                  yv12_to_yv12  = yv12_to_yv12_mmx;                  yv12_to_yv12  = yv12_to_yv12_mmx;
385    
386                  bgr_to_yv12   = bgr_to_yv12_mmx;                  bgr_to_yv12   = bgr_to_yv12_mmx;
387                    rgb_to_yv12   = rgb_to_yv12_mmx;
388                  bgra_to_yv12  = bgra_to_yv12_mmx;                  bgra_to_yv12  = bgra_to_yv12_mmx;
389                    rgba_to_yv12  = rgba_to_yv12_mmx;
390                  yuyv_to_yv12  = yuyv_to_yv12_mmx;                  yuyv_to_yv12  = yuyv_to_yv12_mmx;
391                  uyvy_to_yv12  = uyvy_to_yv12_mmx;                  uyvy_to_yv12  = uyvy_to_yv12_mmx;
392    
# Line 402  Line 422 
422    
423                  yuyv_to_yv12  = yuyv_to_yv12_3dn;                  yuyv_to_yv12  = yuyv_to_yv12_3dn;
424                  uyvy_to_yv12  = uyvy_to_yv12_3dn;                  uyvy_to_yv12  = uyvy_to_yv12_3dn;
425    
426          }          }
427    
428    
# Line 416  Line 437 
437                  interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_xmm;                  interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_xmm;
438                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_xmm;                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_xmm;
439    
440                    interpolate8x4_halfpel_h  = interpolate8x4_halfpel_h_xmm;
441                    interpolate8x4_halfpel_v  = interpolate8x4_halfpel_v_xmm;
442                    interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_xmm;
443    
444                  interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_xmm;                  interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_xmm;
445                  interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_xmm;                  interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_xmm;
446                  interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_xmm;                  interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_xmm;
447                  interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_xmm;                  interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_xmm;
448    
449                  /* Quantization */                  /* Quantization */
                 quant_mpeg_intra = quant_mpeg_intra_xmm;  
450                  quant_mpeg_inter = quant_mpeg_inter_xmm;                  quant_mpeg_inter = quant_mpeg_inter_xmm;
451    
452                  dequant_h263_intra = dequant_h263_intra_xmm;                  dequant_h263_intra = dequant_h263_intra_xmm;
# Line 433  Line 457 
457                  transfer_8to16sub2ro = transfer_8to16sub2ro_xmm;                  transfer_8to16sub2ro = transfer_8to16sub2ro_xmm;
458    
459                  /* Colorspace transformation */                  /* Colorspace transformation */
460                  yv12_to_yv12  = yv12_to_yv12_xmm;                  /* yv12_to_yv12  = yv12_to_yv12_xmm; */ /* appears to be slow on many machines */
461                  yuyv_to_yv12  = yuyv_to_yv12_xmm;                  yuyv_to_yv12  = yuyv_to_yv12_xmm;
462                  uyvy_to_yv12  = uyvy_to_yv12_xmm;                  uyvy_to_yv12  = uyvy_to_yv12_xmm;
463    
# Line 452  Line 476 
476                  interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn;                  interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn;
477                  interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dn;                  interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dn;
478                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn;                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn;
479    
480                    interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_3dn;
481                    interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_3dn;
482                    interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_3dn;
483          }          }
484    
485          if ((cpu_flags & XVID_CPU_3DNOWEXT)) {          if ((cpu_flags & XVID_CPU_3DNOWEXT)) {
# Line 463  Line 491 
491                  transfer_8to16subro =  transfer_8to16subro_3dne;                  transfer_8to16subro =  transfer_8to16subro_3dne;
492                  transfer_16to8add = transfer_16to8add_3dne;                  transfer_16to8add = transfer_16to8add_3dne;
493                  transfer8x8_copy = transfer8x8_copy_3dne;                  transfer8x8_copy = transfer8x8_copy_3dne;
494                    transfer8x4_copy = transfer8x4_copy_3dne;
495    
496                  if ((cpu_flags & XVID_CPU_MMXEXT)) {                  if ((cpu_flags & XVID_CPU_MMXEXT)) {
497                          /* Inverse DCT */                          /* Inverse DCT */
# Line 476  Line 505 
505                          interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dne;                          interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dne;
506                          interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne;                          interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne;
507    
508                            interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_3dne;
509                            interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_3dne;
510                            interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_3dne;
511    
512                          /* Quantization */                          /* Quantization */
513                          quant_h263_intra = quant_h263_intra_3dne;               /* cmov only */                          quant_h263_intra = quant_h263_intra_3dne;               /* cmov only */
514                          quant_h263_inter = quant_h263_inter_3dne;                          quant_h263_inter = quant_h263_inter_3dne;
# Line 485  Line 518 
518                          dequant_h263_inter = dequant_h263_inter_3dne;                          dequant_h263_inter = dequant_h263_inter_3dne;
519    
520                          /* ME functions */                          /* ME functions */
                         calc_cbp = calc_cbp_3dne;  
   
521                          sad16 = sad16_3dne;                          sad16 = sad16_3dne;
522                          sad8 = sad8_3dne;                          sad8 = sad8_3dne;
523                          sad16bi = sad16bi_3dne;                          sad16bi = sad16bi_3dne;
# Line 509  Line 540 
540                  sad16    = sad16_sse2;                  sad16    = sad16_sse2;
541                  dev16    = dev16_sse2;                  dev16    = dev16_sse2;
542    
543                  /* DCT operators                  /* DCT operators */
                  * no iDCT because it's not "Walken matching" */  
544                  fdct = fdct_sse2_skal;                  fdct = fdct_sse2_skal;
545                    idct = idct_sse2_skal;   /* Is now IEEE1180 and Walken compliant. */
546    
547                  /* postprocessing */                  /* postprocessing */
548                  image_brightness = image_brightness_sse2;                  image_brightness = image_brightness_sse2;
549    
550          }          }
551    
552            if ((cpu_flags & XVID_CPU_SSE3)) {
553    
554                    /* SAD operators */
555                    sad16    = sad16_sse3;
556                    dev16    = dev16_sse3;
557            }
558    
559  #endif /* ARCH_IS_IA32 */  #endif /* ARCH_IS_IA32 */
560    
561  #if defined(ARCH_IS_IA64)  #if defined(ARCH_IS_IA64)
# Line 606  Line 646 
646          }          }
647  #endif  #endif
648    
 #if defined(ARCH_IS_X86_64)  
         /* For now, only XVID_CPU_ASM is looked for, so user can still  
          * disable asm usage the usual way. When Intel EMT64 cpus will  
          * be out, maybe we'll have to check more precisely what cpu  
          * features there really are. */  
         if (cpu_flags & XVID_CPU_ASM) {  
                 /* SIMD state flusher */  
                 emms = emms_3dn;  
   
                 /* DCT operators */  
                 fdct = fdct_skal_x86_64;  
                 idct = idct_x86_64;  
   
                 /* SAD operators */  
                 sad16      = sad16_x86_64;  
                 sad8       = sad8_x86_64;  
                 sad16bi    = sad16bi_x86_64;  
                 sad8bi     = sad8bi_x86_64;  
                 dev16      = dev16_x86_64;  
                 sad16v     = sad16v_x86_64;  
                 sse8_16bit = sse8_16bit_x86_64;  
                 sse8_8bit  = sse8_8bit_x86_64;  
   
                 /* Interpolation operators */  
                 interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_x86_64;  
                 interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_x86_64;  
                 interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_x86_64;  
   
                 interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_x86_64;  
                 interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_x86_64;  
                 interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_x86_64;  
                 interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_x86_64;  
   
                 interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_x86_64;  
                 interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_x86_64;  
   
                 interpolate8x8_avg2 = interpolate8x8_avg2_x86_64;  
                 interpolate8x8_avg4 = interpolate8x8_avg4_x86_64;  
   
                 /* Quantization related functions */  
                 quant_h263_intra   = quant_h263_intra_x86_64;  
                 quant_h263_inter   = quant_h263_inter_x86_64;  
                 dequant_h263_intra = dequant_h263_intra_x86_64;  
                 dequant_h263_inter = dequant_h263_inter_x86_64;  
                 quant_mpeg_intra   = quant_mpeg_intra_x86_64;  
                 quant_mpeg_inter   = quant_mpeg_inter_x86_64;  
                 dequant_mpeg_intra   = dequant_mpeg_intra_x86_64;  
                 dequant_mpeg_inter   = dequant_mpeg_inter_x86_64;  
   
                 /* Block related functions */  
                 transfer_8to16copy  = transfer_8to16copy_x86_64;  
                 transfer_16to8copy  = transfer_16to8copy_x86_64;  
                 transfer_8to16sub   = transfer_8to16sub_x86_64;  
                 transfer_8to16subro = transfer_8to16subro_x86_64;  
                 transfer_8to16sub2  = transfer_8to16sub2_x86_64;  
                 transfer_8to16sub2ro= transfer_8to16sub2ro_x86_64;  
                 transfer_16to8add   = transfer_16to8add_x86_64;  
                 transfer8x8_copy    = transfer8x8_copy_x86_64;  
   
                 /* Qpel stuff */  
                 xvid_QP_Funcs = &xvid_QP_Funcs_x86_64;  
                 xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_x86_64;  
   
                 /* Interlacing Functions */  
                 MBFieldTest = MBFieldTest_x86_64;  
         }  
 #endif  
   
649  #if defined(_DEBUG)  #if defined(_DEBUG)
650      xvid_debug = init->debug;      xvid_debug = init->debug;
651  #endif  #endif
# Line 689  Line 661 
661                  return XVID_ERR_VERSION;                  return XVID_ERR_VERSION;
662    
663          info->actual_version = XVID_VERSION;          info->actual_version = XVID_VERSION;
664          info->build = "xvid-1.1.0-beta2";          info->build = "xvid-1.2.0-dev";
665          info->cpu_flags = detect_cpu_flags();          info->cpu_flags = detect_cpu_flags();
666      info->num_threads = 0;
667    
668  #if defined(_SMP) && defined(WIN32)  #if defined(_WIN32)
669          info->num_threads = pthread_num_processors_np();;    {
670        DWORD dwProcessAffinityMask, dwSystemAffinityMask;
671        if (GetProcessAffinityMask(GetCurrentProcess(), (PDWORD_PTR) &dwProcessAffinityMask, (PDWORD_PTR) &dwSystemAffinityMask)) {
672          int i;
673          for(i=0; i<32; i++) {
674            if ((dwProcessAffinityMask & (1<<i)))
675              info->num_threads++;
676          }
677        }
678      }
679  #else  #else
680          info->num_threads = 0;  
681      #include <unistd.h>
682      info->num_threads = sysconf(_SC_NPROCESSORS_CONF);
683    
684  #endif  #endif
685    
686          return 0;          return 0;

Legend:
Removed from v.1.62  
changed lines
  Added in v.1.76

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4