[cvs] / xvidcore / src / xvid.c Repository:
ViewVC logotype

Diff of /xvidcore/src/xvid.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.33.2.20, Thu Jan 2 13:58:54 2003 UTC revision 1.33.2.23, Wed Feb 12 11:46:18 2003 UTC
# Line 41  Line 41 
41   *   *
42   ****************************************************************************/   ****************************************************************************/
43    
44    #include <stdio.h>
45  #include <stdlib.h>  #include <stdlib.h>
46    #include <string.h>
47  #include <time.h>  #include <time.h>
48    
49  #include "xvid.h"  #include "xvid.h"
# Line 63  Line 65 
65  #include "utils/timer.h"  #include "utils/timer.h"
66  #include "bitstream/mbcoding.h"  #include "bitstream/mbcoding.h"
67    
68  #if defined(ARCH_X86) && defined(EXPERIMENTAL_SSE2_CODE)  #if defined(ARCH_X86)
69    
70  #ifdef WIN32  #if defined(_MSC_VER)
71  #include <windows.h>  #include <windows.h>
72  #else  #else
73  #include <signal.h>  #include <signal.h>
74  #include <setjmp.h>  #include <setjmp.h>
 #endif  
   
   
 #ifndef WIN32  
75    
76  static jmp_buf mark;  static jmp_buf mark;
77    
# Line 96  Line 94 
94  int  int
95  sigill_check(void (*func)())  sigill_check(void (*func)())
96  {  {
97  #ifdef WIN32  #if defined(_MSC_VER)
98          _try {          _try {
99                  func();                  func();
100          }          }
# Line 130  Line 128 
128  }  }
129  #endif  #endif
130    
131    
132    /* detect cpu flags  */
133    static unsigned int
134    detect_cpu_flags()
135    {
136            /* enable native assembly optimizations by default */
137            unsigned int cpu_flags = XVID_CPU_ASM;
138    
139    #if defined(ARCH_X86)
140            cpu_flags |= check_cpu_features();
141            if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))
142                    cpu_flags &= ~XVID_CPU_SSE;
143    
144            if ((cpu_flags & XVID_CPU_SSE2) && sigill_check(sse2_os_trigger))
145                    cpu_flags &= ~XVID_CPU_SSE2;
146    #endif
147    
148    #if defined(ARCH_PPC)
149    #if defined(ARCH_PPC_ALTIVEC)
150            cpu_flags |= XVID_CPU_ALTIVEC;
151    #endif
152    #endif
153    
154            return cpu_flags;
155    }
156    
157    
158  /*****************************************************************************  /*****************************************************************************
159   * XviD Init Entry point   * XviD Init Entry point
160   *   *
# Line 163  Line 188 
188    
189          } else {          } else {
190    
191                  cpu_flags = check_cpu_features();                  cpu_flags = detect_cpu_flags();
   
 #if defined(ARCH_X86) && defined(EXPERIMENTAL_SSE2_CODE)  
                 if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))  
                         cpu_flags &= ~XVID_CPU_SSE;  
   
                 if ((cpu_flags & XVID_CPU_SSE2) && sigill_check(sse2_os_trigger))  
                         cpu_flags &= ~XVID_CPU_SSE2;  
 #endif  
192          }          }
193    
194          if ((init_param->cpu_flags & XVID_CPU_CHKONLY))          if ((init_param->cpu_flags & XVID_CPU_CHKONLY))
# Line 212  Line 229 
229          transfer_8to16copy = transfer_8to16copy_c;          transfer_8to16copy = transfer_8to16copy_c;
230          transfer_16to8copy = transfer_16to8copy_c;          transfer_16to8copy = transfer_16to8copy_c;
231          transfer_8to16sub  = transfer_8to16sub_c;          transfer_8to16sub  = transfer_8to16sub_c;
232            transfer_8to16subro  = transfer_8to16subro_c;
233          transfer_8to16sub2 = transfer_8to16sub2_c;          transfer_8to16sub2 = transfer_8to16sub2_c;
234          transfer_16to8add  = transfer_16to8add_c;          transfer_16to8add  = transfer_16to8add_c;
235          transfer8x8_copy   = transfer8x8_copy_c;          transfer8x8_copy   = transfer8x8_copy_c;
# Line 239  Line 257 
257          interpolate8x8_avg4 = interpolate8x8_avg4_c;          interpolate8x8_avg4 = interpolate8x8_avg4_c;
258    
259          /* reduced resoltuion */          /* reduced resoltuion */
   
260          copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_C;          copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_C;
261          add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_C;          add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_C;
 #ifdef ARCH_X86  
         vfilter_31 = xvid_VFilter_31_x86;  
         hfilter_31 = xvid_HFilter_31_x86;  
 #else  
262          vfilter_31 = xvid_VFilter_31_C;          vfilter_31 = xvid_VFilter_31_C;
263          hfilter_31 = xvid_HFilter_31_C;          hfilter_31 = xvid_HFilter_31_C;
 #endif  
264          filter_18x18_to_8x8 = xvid_Filter_18x18_To_8x8_C;          filter_18x18_to_8x8 = xvid_Filter_18x18_To_8x8_C;
265          filter_diff_18x18_to_8x8 = xvid_Filter_Diff_18x18_To_8x8_C;          filter_diff_18x18_to_8x8 = xvid_Filter_Diff_18x18_To_8x8_C;
266    
# Line 306  Line 318 
318    
319  //      Halfpel8_Refine = Halfpel8_Refine_c;  //      Halfpel8_Refine = Halfpel8_Refine_c;
320    
321  #ifdef ARCH_X86  #if defined(ARCH_X86)
322    
323            if ((cpu_flags & XVID_CPU_ASM))
324            {
325                    vfilter_31 = xvid_VFilter_31_x86;
326                    hfilter_31 = xvid_HFilter_31_x86;
327            }
328    
329          if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||          if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||
330                  (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||                  (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
# Line 316  Line 334 
334                  emms = emms_mmx;                  emms = emms_mmx;
335          }          }
336    
337          if ((cpu_flags & XVID_CPU_MMX) > 0) {          if ((cpu_flags & XVID_CPU_MMX)) {
338    
339                  /* Forward and Inverse Discrete Cosine Transformation functions */                  /* Forward and Inverse Discrete Cosine Transformation functions */
340                  fdct = fdct_mmx;                  fdct = fdct_mmx;
# Line 337  Line 355 
355                  transfer_8to16copy = transfer_8to16copy_mmx;                  transfer_8to16copy = transfer_8to16copy_mmx;
356                  transfer_16to8copy = transfer_16to8copy_mmx;                  transfer_16to8copy = transfer_16to8copy_mmx;
357                  transfer_8to16sub  = transfer_8to16sub_mmx;                  transfer_8to16sub  = transfer_8to16sub_mmx;
358                    transfer_8to16subro  = transfer_8to16subro_mmx;
359                  transfer_8to16sub2 = transfer_8to16sub2_mmx;                  transfer_8to16sub2 = transfer_8to16sub2_mmx;
360                  transfer_16to8add  = transfer_16to8add_mmx;                  transfer_16to8add  = transfer_16to8add_mmx;
361                  transfer8x8_copy   = transfer8x8_copy_mmx;                  transfer8x8_copy   = transfer8x8_copy_mmx;
# Line 386  Line 405 
405                  sad8bi  = sad8bi_mmx;                  sad8bi  = sad8bi_mmx;
406                  dev16    = dev16_mmx;                  dev16    = dev16_mmx;
407                  sad16v   = sad16v_mmx;                  sad16v   = sad16v_mmx;
   
408          }          }
409    
410          /* these 3dnow functions are faster than mmx, but slower than xmm. */          /* these 3dnow functions are faster than mmx, but slower than xmm. */
411          if ((cpu_flags & XVID_CPU_3DNOW) > 0) {          if ((cpu_flags & XVID_CPU_3DNOW)) {
412    
413                    emms = emms_3dn;
414    
415                  /* ME functions */                  /* ME functions */
416                  sad16bi = sad16bi_3dn;                  sad16bi = sad16bi_3dn;
# Line 401  Line 421 
421          }          }
422    
423    
424          if ((cpu_flags & XVID_CPU_MMXEXT) > 0) {          if ((cpu_flags & XVID_CPU_MMXEXT)) {
425    
426                  /* Inverse DCT */                  /* Inverse DCT */
427                  idct = idct_xmm;                  idct = idct_xmm;
# Line 439  Line 459 
459                  sad16v   = sad16v_xmm;                  sad16v   = sad16v_xmm;
460          }          }
461    
462          if ((cpu_flags & XVID_CPU_3DNOW) > 0) {          if ((cpu_flags & XVID_CPU_3DNOW)) {
463    
464                  /* Interpolation */                  /* Interpolation */
465                  interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn;                  interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn;
# Line 447  Line 467 
467                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn;                  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn;
468          }          }
469    
470          if ((cpu_flags & XVID_CPU_3DNOWEXT) > 0) {          if ((cpu_flags & XVID_CPU_3DNOWEXT)) {
471    
472                  /* Inverse DCT */                  /* Inverse DCT */
473                  idct =  idct_3dne;                  idct =  idct_3dne;
# Line 456  Line 476 
476                  transfer_8to16copy =  transfer_8to16copy_3dne;                  transfer_8to16copy =  transfer_8to16copy_3dne;
477                  transfer_16to8copy = transfer_16to8copy_3dne;                  transfer_16to8copy = transfer_16to8copy_3dne;
478                  transfer_8to16sub =  transfer_8to16sub_3dne;                  transfer_8to16sub =  transfer_8to16sub_3dne;
479                    transfer_8to16subro =  transfer_8to16subro_3dne;
480                  transfer_8to16sub2 =  transfer_8to16sub2_3dne;                  transfer_8to16sub2 =  transfer_8to16sub2_3dne;
481                  transfer_16to8add = transfer_16to8add_3dne;                  transfer_16to8add = transfer_16to8add_3dne;
482                  transfer8x8_copy = transfer8x8_copy_3dne;                  transfer8x8_copy = transfer8x8_copy_3dne;
# Line 483  Line 504 
504          }          }
505    
506    
507          if ((cpu_flags & XVID_CPU_SSE2) > 0) {          if ((cpu_flags & XVID_CPU_SSE2)) {
 #ifdef EXPERIMENTAL_SSE2_CODE  
508    
509                  calc_cbp = calc_cbp_sse2;                  calc_cbp = calc_cbp_sse2;
510    
# Line 494  Line 514 
514                  quant_inter   = quant_inter_sse2;                  quant_inter   = quant_inter_sse2;
515                  dequant_inter = dequant_inter_sse2;                  dequant_inter = dequant_inter_sse2;
516    
517                  /* ME */  #if defined(EXPERIMENTAL_SSE2_CODE)
518                    /* ME; slower than xmm */
519                  sad16    = sad16_sse2;                  sad16    = sad16_sse2;
520                  dev16    = dev16_sse2;                  dev16    = dev16_sse2;
521    #endif
522                  /* Forward and Inverse DCT */                  /* Forward and Inverse DCT */
523                  idct  = idct_sse2;                  idct  = idct_sse2;
524                  fdct = fdct_sse2;                  fdct = fdct_sse2;
 #endif  
525          }          }
   
526  #endif  #endif
527    
528  #ifdef ARCH_IA64  #if defined(ARCH_IA64)
529          if ((cpu_flags & XVID_CPU_IA64) > 0) { //use assembler routines?          if ((cpu_flags & XVID_CPU_ASM)) { //use assembler routines?
530            idct_ia64_init();            idct_ia64_init();
531            fdct = fdct_ia64;            fdct = fdct_ia64;
532            idct = idct_ia64;   //not yet working, crashes            idct = idct_ia64;   //not yet working, crashes
# Line 533  Line 552 
552          }          }
553  #endif  #endif
554    
555  #ifdef ARCH_PPC  #if defined(ARCH_PPC)
556  #ifdef ARCH_PPC_ALTIVEC          if ((cpu_flags & XVID_CPU_ASM))
557            {
558                    calc_cbp = calc_cbp_ppc;
559            }
560    
561            if ((cpu_flags & XVID_CPU_ALTIVEC))
562            {
563          calc_cbp = calc_cbp_altivec;          calc_cbp = calc_cbp_altivec;
564          fdct = fdct_altivec;          fdct = fdct_altivec;
565          idct = idct_altivec;          idct = idct_altivec;
# Line 542  Line 567 
567          sad16 = sad16_altivec;          sad16 = sad16_altivec;
568          sad8 = sad8_altivec;          sad8 = sad8_altivec;
569          dev16 = dev16_altivec;          dev16 = dev16_altivec;
570  #else          }
         calc_cbp = calc_cbp_ppc;  
 #endif  
571  #endif  #endif
572    
573          return XVID_ERR_OK;          return XVID_ERR_OK;
# Line 708  Line 731 
731          }          }
732    
733          /* print the "average difference" of best/worst transforms */          /* print the "average difference" of best/worst transforms */
734          printf("%s:\t%I64i\t(min_error:%i, max_error:%i)\n", nameB, timeB / count, min_error, max_error);          printf("%s:\t%i\t(min_error:%i, max_error:%i)\n", nameB, (int)(timeB / count), min_error, max_error);
735    
736          return 0;          return 0;
737  }  }
# Line 790  Line 813 
813                  }                  }
814          }          }
815    
816          printf("%s:\t%I64i", nameB, timeB / count);          printf("%s:\t%i", nameB, (int)(timeB / count));
817          if (errors>0)          if (errors>0)
818                  printf("\t(%i errors out of %i)", errors, count);                  printf("\t(%i errors out of %i)", errors, count);
819          printf("\n");          printf("\n");
# Line 809  Line 832 
832          printf("xvid_init_test\n");          printf("xvid_init_test\n");
833    
834  #if defined(ARCH_X86)  #if defined(ARCH_X86)
835          cpu_flags = check_cpu_features();          cpu_flags = detect_cpu_flags();
836          idct_int32_init();          idct_int32_init();
837          emms_mmx();          emms_mmx();
838    

Legend:
Removed from v.1.33.2.20  
changed lines
  Added in v.1.33.2.23

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4