46 |
#include "utils/timer.h" |
#include "utils/timer.h" |
47 |
#include "bitstream/mbcoding.h" |
#include "bitstream/mbcoding.h" |
48 |
#include "image/qpel.h" |
#include "image/qpel.h" |
49 |
|
#include "image/postprocessing.h" |
50 |
|
|
51 |
#if defined(_DEBUG) |
#if defined(_DEBUG) |
52 |
unsigned int xvid_debug = 0; /* xvid debug mask */ |
unsigned int xvid_debug = 0; /* xvid debug mask */ |
288 |
sad8bi = sad8bi_c; |
sad8bi = sad8bi_c; |
289 |
dev16 = dev16_c; |
dev16 = dev16_c; |
290 |
sad16v = sad16v_c; |
sad16v = sad16v_c; |
291 |
|
sse8_16bit = sse8_16bit_c; |
|
/* Halfpel8_Refine = Halfpel8_Refine_c; */ |
|
292 |
|
|
293 |
#if defined(ARCH_IS_IA32) |
#if defined(ARCH_IS_IA32) |
294 |
|
|
308 |
if ((cpu_flags & XVID_CPU_MMX)) { |
if ((cpu_flags & XVID_CPU_MMX)) { |
309 |
|
|
310 |
/* Forward and Inverse Discrete Cosine Transformation functions */ |
/* Forward and Inverse Discrete Cosine Transformation functions */ |
311 |
fdct = fdct_mmx; |
fdct = fdct_mmx_skal; |
312 |
idct = idct_mmx; |
idct = idct_mmx; |
313 |
|
|
314 |
/* Qpel stuff */ |
/* Qpel stuff */ |
380 |
sad8bi = sad8bi_mmx; |
sad8bi = sad8bi_mmx; |
381 |
dev16 = dev16_mmx; |
dev16 = dev16_mmx; |
382 |
sad16v = sad16v_mmx; |
sad16v = sad16v_mmx; |
383 |
|
sse8_16bit = sse8_16bit_mmx; |
384 |
} |
} |
385 |
|
|
386 |
/* these 3dnow functions are faster than mmx, but slower than xmm. */ |
/* these 3dnow functions are faster than mmx, but slower than xmm. */ |
399 |
|
|
400 |
if ((cpu_flags & XVID_CPU_MMXEXT)) { |
if ((cpu_flags & XVID_CPU_MMXEXT)) { |
401 |
|
|
402 |
/* Inverse DCT */ |
/* DCT */ |
403 |
|
fdct = fdct_xmm_skal; |
404 |
idct = idct_xmm; |
idct = idct_xmm; |
405 |
|
|
406 |
/* Interpolation */ |
/* Interpolation */ |
480 |
interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne; |
interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne; |
481 |
} |
} |
482 |
|
|
|
#if defined(EXPERIMENTAL_SSE2_CODE) /* mark the whole SSE2 stuff as experimental. At least on |
|
|
my P4, it crashes... */ |
|
483 |
if ((cpu_flags & XVID_CPU_SSE2)) { |
if ((cpu_flags & XVID_CPU_SSE2)) { |
484 |
|
|
485 |
calc_cbp = calc_cbp_sse2; |
calc_cbp = calc_cbp_sse2; |
490 |
dequant_h263_intra = dequant_h263_intra_sse2; |
dequant_h263_intra = dequant_h263_intra_sse2; |
491 |
dequant_h263_inter = dequant_h263_inter_sse2; |
dequant_h263_inter = dequant_h263_inter_sse2; |
492 |
|
|
493 |
/* ME; slower than xmm */ |
/* SAD operators */ |
494 |
sad16 = sad16_sse2; |
sad16 = sad16_sse2; |
495 |
dev16 = dev16_sse2; |
dev16 = dev16_sse2; |
496 |
/* Forward and Inverse DCT */ |
|
497 |
#if 0 /* Both function are known to be unprecise, better keep them deactivated */ |
/* DCT operators |
498 |
idct = idct_sse2; |
* no iDCT because it's not "Walken matching" */ |
499 |
fdct = fdct_sse2; |
fdct = fdct_sse2_skal; |
|
#endif |
|
500 |
} |
} |
501 |
#endif |
#endif /* ARCH_IS_IA32 */ |
|
#endif |
|
502 |
|
|
503 |
#if defined(ARCH_IS_IA64) |
#if defined(ARCH_IS_IA64) |
504 |
if ((cpu_flags & XVID_CPU_ASM)) { /* use assembler routines? */ |
if ((cpu_flags & XVID_CPU_ASM)) { /* use assembler routines? */ |
559 |
return XVID_ERR_VERSION; |
return XVID_ERR_VERSION; |
560 |
|
|
561 |
info->actual_version = XVID_VERSION; |
info->actual_version = XVID_VERSION; |
562 |
info->build = "dev-api-4"; |
info->build = "xvid-1.0.0"; |
563 |
info->cpu_flags = detect_cpu_flags(); |
info->cpu_flags = detect_cpu_flags(); |
564 |
|
|
565 |
#if defined(_SMP) && defined(WIN32) |
#if defined(_SMP) && defined(WIN32) |