27 |
#include "../global.h" |
#include "../global.h" |
28 |
#include "../encoder.h" |
#include "../encoder.h" |
29 |
#include "gmc.h" |
#include "gmc.h" |
30 |
|
#include "../utils/emms.h" |
31 |
|
|
32 |
#include <stdio.h> |
#include <stdio.h> |
33 |
|
|
400 |
extern void xvid_GMC_Core_Lin_8_sse2(uint8_t *Dst, const uint16_t * Offsets, |
extern void xvid_GMC_Core_Lin_8_sse2(uint8_t *Dst, const uint16_t * Offsets, |
401 |
const uint8_t * const Src0, const int BpS, const int Rounder); |
const uint8_t * const Src0, const int BpS, const int Rounder); |
402 |
|
|
403 |
|
extern void xvid_GMC_Core_Lin_8_sse41(uint8_t *Dst, const uint16_t * Offsets, |
404 |
|
const uint8_t * const Src0, const int BpS, const int Rounder); |
405 |
|
|
406 |
/* *************************************************************/ |
/* *************************************************************/ |
407 |
|
|
408 |
static void GMC_Core_Non_Lin_8(uint8_t *Dst, |
static void GMC_Core_Non_Lin_8(uint8_t *Dst, |
463 |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
464 |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
465 |
{ |
{ |
466 |
|
uint32_t UV1, UV2; |
467 |
for(i=0; i<16; ++i) |
for(i=0; i<16; ++i) |
468 |
{ |
{ |
469 |
uint32_t u = ( U >> 16 ) << rho; |
uint32_t u = ( U >> 16 ) << rho; |
472 |
Offsets[ i] = u; |
Offsets[ i] = u; |
473 |
Offsets[16+i] = v; |
Offsets[16+i] = v; |
474 |
} |
} |
|
|
|
|
{ |
|
475 |
// batch 8 input pixels when linearity says it's ok |
// batch 8 input pixels when linearity says it's ok |
476 |
uint32_t UV1, UV2; |
|
477 |
UV1 = (Offsets[0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
UV1 = (Offsets[0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
478 |
UV2 = (Offsets[7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
UV2 = (Offsets[7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
479 |
if (UV1+7*16==UV2) |
if (UV1+7*16==UV2) |
487 |
else |
else |
488 |
GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder); |
GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder); |
489 |
} |
} |
|
} |
|
490 |
else |
else |
491 |
{ |
{ |
492 |
for(i=0; i<16; ++i) |
for(i=0; i<16; ++i) |
537 |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
538 |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
539 |
{ |
{ |
540 |
|
uint32_t UV1, UV2; |
541 |
for(i=0; i<8; ++i) |
for(i=0; i<8; ++i) |
542 |
{ |
{ |
543 |
int32_t u = ( U >> 16 ) << rho; |
int32_t u = ( U >> 16 ) << rho; |
547 |
Offsets[16+i] = v; |
Offsets[16+i] = v; |
548 |
} |
} |
549 |
|
|
|
{ |
|
550 |
// batch 8 input pixels when linearity says it's ok |
// batch 8 input pixels when linearity says it's ok |
551 |
const uint32_t UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
552 |
const uint32_t UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
553 |
if (UV1+7*16==UV2) |
if (UV1+7*16==UV2) |
554 |
{ |
{ |
555 |
const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride; |
const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride; |
561 |
GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder); |
GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder); |
562 |
} |
} |
563 |
} |
} |
|
} |
|
564 |
else |
else |
565 |
{ |
{ |
566 |
for(i=0; i<8; ++i) |
for(i=0; i<8; ++i) |
593 |
#if defined(ARCH_IS_IA32) |
#if defined(ARCH_IS_IA32) |
594 |
if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || |
if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || |
595 |
(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || |
(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || |
596 |
(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2)) |
(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2) || |
597 |
|
(cpu_flags & XVID_CPU_SSE3) || (cpu_flags & XVID_CPU_SSE41)) |
598 |
{ |
{ |
599 |
Predict_16x16_func = Predict_16x16_mmx; |
Predict_16x16_func = Predict_16x16_mmx; |
600 |
Predict_8x8_func = Predict_8x8_mmx; |
Predict_8x8_func = Predict_8x8_mmx; |
601 |
|
#if 0 |
602 |
|
if (cpu_flags & XVID_CPU_SSE41) |
603 |
|
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse41; |
604 |
|
else |
605 |
|
#endif |
606 |
if (cpu_flags & XVID_CPU_SSE2) |
if (cpu_flags & XVID_CPU_SSE2) |
607 |
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2; |
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2; |
608 |
else |
else |
747 |
|
|
748 |
pMBs[mbnum].mcsel = 0; /* until mode decision */ |
pMBs[mbnum].mcsel = 0; /* until mode decision */ |
749 |
} |
} |
750 |
|
emms(); |
751 |
} |
} |