29 |
#include "../encoder.h" |
#include "../encoder.h" |
30 |
#include "../utils/mbfunctions.h" |
#include "../utils/mbfunctions.h" |
31 |
#include "../image/interpolate8x8.h" |
#include "../image/interpolate8x8.h" |
32 |
|
#include "../image/qpel.h" |
33 |
#include "../image/reduced.h" |
#include "../image/reduced.h" |
34 |
#include "../utils/timer.h" |
#include "../utils/timer.h" |
35 |
#include "motion.h" |
#include "motion.h" |
66 |
#endif |
#endif |
67 |
} |
} |
68 |
|
|
69 |
|
/* |
70 |
|
* getref: calculate reference image pointer |
71 |
|
* the decision to use interpolation h/v/hv or the normal image is |
72 |
|
* based on dx & dy. |
73 |
|
*/ |
74 |
|
|
75 |
|
static __inline const uint8_t * |
76 |
|
get_ref(const uint8_t * const refn, |
77 |
|
const uint8_t * const refh, |
78 |
|
const uint8_t * const refv, |
79 |
|
const uint8_t * const refhv, |
80 |
|
const uint32_t x, |
81 |
|
const uint32_t y, |
82 |
|
const uint32_t block, |
83 |
|
const int32_t dx, |
84 |
|
const int32_t dy, |
85 |
|
const int32_t stride) |
86 |
|
{ |
87 |
|
switch (((dx & 1) << 1) + (dy & 1)) { |
88 |
|
case 0: |
89 |
|
return refn + (int) ((x * block + dx / 2) + (y * block + dy / 2) * stride); |
90 |
|
case 1: |
91 |
|
return refv + (int) ((x * block + dx / 2) + (y * block + (dy - 1) / 2) * stride); |
92 |
|
case 2: |
93 |
|
return refh + (int) ((x * block + (dx - 1) / 2) + (y * block + dy / 2) * stride); |
94 |
|
default: |
95 |
|
return refhv + (int) ((x * block + (dx - 1) / 2) + (y * block + (dy - 1) / 2) * stride); |
96 |
|
} |
97 |
|
} |
98 |
|
|
99 |
static __inline void |
static __inline void |
100 |
compensate16x16_interpolate(int16_t * const dct_codes, |
compensate16x16_interpolate(int16_t * const dct_codes, |
119 |
|
|
120 |
if(quarterpel) { |
if(quarterpel) { |
121 |
if ((dx&3) | (dy&3)) { |
if ((dx&3) | (dy&3)) { |
122 |
|
#if defined(ARCH_IS_IA32) /* new_interpolate is only faster on x86 (MMX) machines */ |
123 |
|
new_interpolate16x16_quarterpel(tmp - y * stride - x, |
124 |
|
(uint8_t *) ref, tmp + 32, |
125 |
|
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
126 |
|
#else |
127 |
interpolate16x16_quarterpel(tmp - y * stride - x, |
interpolate16x16_quarterpel(tmp - y * stride - x, |
128 |
(uint8_t *) ref, tmp + 32, |
(uint8_t *) ref, tmp + 32, |
129 |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
130 |
|
#endif |
131 |
ptr = tmp; |
ptr = tmp; |
132 |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; /* fullpixel position */ |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; /* fullpixel position */ |
133 |
|
|
187 |
|
|
188 |
if(quarterpel) { |
if(quarterpel) { |
189 |
if ((dx&3) | (dy&3)) { |
if ((dx&3) | (dy&3)) { |
190 |
|
#if defined(ARCH_IS_IA32) /* new_interpolate is only faster on x86 (MMX) machines */ |
191 |
|
new_interpolate8x8_quarterpel(tmp - y*stride - x, |
192 |
|
(uint8_t *) ref, tmp + 32, |
193 |
|
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
194 |
|
#else |
195 |
interpolate8x8_quarterpel(tmp - y*stride - x, |
interpolate8x8_quarterpel(tmp - y*stride - x, |
196 |
(uint8_t *) ref, tmp + 32, |
(uint8_t *) ref, tmp + 32, |
197 |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
198 |
|
#endif |
199 |
ptr = tmp; |
ptr = tmp; |
200 |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; /* fullpixel position */ |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; /* fullpixel position */ |
201 |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |