46 |
#include "utils/timer.h" |
#include "utils/timer.h" |
47 |
#include "bitstream/mbcoding.h" |
#include "bitstream/mbcoding.h" |
48 |
#include "image/qpel.h" |
#include "image/qpel.h" |
49 |
|
#include "image/postprocessing.h" |
50 |
|
|
51 |
#if defined(_DEBUG) |
#if defined(_DEBUG) |
52 |
unsigned int xvid_debug = 0; /* xvid debug mask */ |
unsigned int xvid_debug = 0; /* xvid debug mask */ |
171 |
/* Initialize the function pointers */ |
/* Initialize the function pointers */ |
172 |
idct_int32_init(); |
idct_int32_init(); |
173 |
init_vlc_tables(); |
init_vlc_tables(); |
174 |
|
init_postproc(); |
175 |
|
|
176 |
/* Fixed Point Forward/Inverse DCT transformations */ |
/* Fixed Point Forward/Inverse DCT transformations */ |
177 |
fdct = fdct_int32; |
fdct = fdct_int32; |
289 |
sad8bi = sad8bi_c; |
sad8bi = sad8bi_c; |
290 |
dev16 = dev16_c; |
dev16 = dev16_c; |
291 |
sad16v = sad16v_c; |
sad16v = sad16v_c; |
292 |
|
sse8_16bit = sse8_16bit_c; |
|
/* Halfpel8_Refine = Halfpel8_Refine_c; */ |
|
293 |
|
|
294 |
#if defined(ARCH_IS_IA32) |
#if defined(ARCH_IS_IA32) |
295 |
|
|
309 |
if ((cpu_flags & XVID_CPU_MMX)) { |
if ((cpu_flags & XVID_CPU_MMX)) { |
310 |
|
|
311 |
/* Forward and Inverse Discrete Cosine Transformation functions */ |
/* Forward and Inverse Discrete Cosine Transformation functions */ |
312 |
fdct = fdct_mmx; |
fdct = fdct_mmx_skal; |
313 |
idct = idct_mmx; |
idct = idct_mmx; |
314 |
|
|
315 |
/* Qpel stuff */ |
/* Qpel stuff */ |
381 |
sad8bi = sad8bi_mmx; |
sad8bi = sad8bi_mmx; |
382 |
dev16 = dev16_mmx; |
dev16 = dev16_mmx; |
383 |
sad16v = sad16v_mmx; |
sad16v = sad16v_mmx; |
384 |
|
sse8_16bit = sse8_16bit_mmx; |
385 |
} |
} |
386 |
|
|
387 |
/* these 3dnow functions are faster than mmx, but slower than xmm. */ |
/* these 3dnow functions are faster than mmx, but slower than xmm. */ |
400 |
|
|
401 |
if ((cpu_flags & XVID_CPU_MMXEXT)) { |
if ((cpu_flags & XVID_CPU_MMXEXT)) { |
402 |
|
|
403 |
/* Inverse DCT */ |
/* DCT */ |
404 |
|
fdct = fdct_xmm_skal; |
405 |
idct = idct_xmm; |
idct = idct_xmm; |
406 |
|
|
407 |
/* Interpolation */ |
/* Interpolation */ |
481 |
interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne; |
interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne; |
482 |
} |
} |
483 |
|
|
|
#if defined(EXPERIMENTAL_SSE2_CODE) /* mark the whole SSE2 stuff as experimental. At least on |
|
|
my P4, it crashes... */ |
|
484 |
if ((cpu_flags & XVID_CPU_SSE2)) { |
if ((cpu_flags & XVID_CPU_SSE2)) { |
485 |
|
|
486 |
calc_cbp = calc_cbp_sse2; |
calc_cbp = calc_cbp_sse2; |
491 |
dequant_h263_intra = dequant_h263_intra_sse2; |
dequant_h263_intra = dequant_h263_intra_sse2; |
492 |
dequant_h263_inter = dequant_h263_inter_sse2; |
dequant_h263_inter = dequant_h263_inter_sse2; |
493 |
|
|
494 |
/* ME; slower than xmm */ |
/* SAD operators */ |
495 |
sad16 = sad16_sse2; |
sad16 = sad16_sse2; |
496 |
dev16 = dev16_sse2; |
dev16 = dev16_sse2; |
497 |
/* Forward and Inverse DCT */ |
|
498 |
#if 0 /* Both function are known to be unprecise, better keep them deactivated */ |
/* DCT operators |
499 |
idct = idct_sse2; |
* no iDCT because it's not "Walken matching" */ |
500 |
fdct = fdct_sse2; |
fdct = fdct_sse2_skal; |
|
#endif |
|
501 |
} |
} |
502 |
#endif |
#endif /* ARCH_IS_IA32 */ |
|
#endif |
|
503 |
|
|
504 |
#if defined(ARCH_IS_IA64) |
#if defined(ARCH_IS_IA64) |
505 |
if ((cpu_flags & XVID_CPU_ASM)) { /* use assembler routines? */ |
if ((cpu_flags & XVID_CPU_ASM)) { /* use assembler routines? */ |
560 |
return XVID_ERR_VERSION; |
return XVID_ERR_VERSION; |
561 |
|
|
562 |
info->actual_version = XVID_VERSION; |
info->actual_version = XVID_VERSION; |
563 |
info->build = "dev-api-4"; |
info->build = "xvid-1.0.0"; |
564 |
info->cpu_flags = detect_cpu_flags(); |
info->cpu_flags = detect_cpu_flags(); |
565 |
|
|
566 |
#if defined(_SMP) && defined(WIN32) |
#if defined(_SMP) && defined(WIN32) |