29 |
#include "../encoder.h" |
#include "../encoder.h" |
30 |
#include "../utils/mbfunctions.h" |
#include "../utils/mbfunctions.h" |
31 |
#include "../image/interpolate8x8.h" |
#include "../image/interpolate8x8.h" |
32 |
|
#include "../image/qpel.h" |
33 |
#include "../image/reduced.h" |
#include "../image/reduced.h" |
34 |
#include "../utils/timer.h" |
#include "../utils/timer.h" |
35 |
#include "motion.h" |
#include "motion.h" |
66 |
#endif |
#endif |
67 |
} |
} |
68 |
|
|
69 |
|
/* |
70 |
|
* getref: calculate reference image pointer |
71 |
|
* the decision to use interpolation h/v/hv or the normal image is |
72 |
|
* based on dx & dy. |
73 |
|
*/ |
74 |
|
|
75 |
|
static __inline const uint8_t * |
76 |
|
get_ref(const uint8_t * const refn, |
77 |
|
const uint8_t * const refh, |
78 |
|
const uint8_t * const refv, |
79 |
|
const uint8_t * const refhv, |
80 |
|
const uint32_t x, |
81 |
|
const uint32_t y, |
82 |
|
const uint32_t block, |
83 |
|
const int32_t dx, |
84 |
|
const int32_t dy, |
85 |
|
const int32_t stride) |
86 |
|
{ |
87 |
|
switch (((dx & 1) << 1) + (dy & 1)) { |
88 |
|
case 0: |
89 |
|
return refn + (int) (((int)x * (int)block + dx / 2) + ((int)y * (int)block + dy / 2) * (int)stride); |
90 |
|
case 1: |
91 |
|
return refv + (int) (((int)x * (int)block + dx / 2) + ((int)y * (int)block + (dy - 1) / 2) * (int)stride); |
92 |
|
case 2: |
93 |
|
return refh + (int) (((int)x * (int)block + (dx - 1) / 2) + ((int)y * (int)block + dy / 2) * (int)stride); |
94 |
|
default: |
95 |
|
return refhv + (int) (((int)x * (int)block + (dx - 1) / 2) + ((int)y * (int)block + (dy - 1) / 2) * (int)stride); |
96 |
|
} |
97 |
|
} |
98 |
|
|
99 |
static __inline void |
static __inline void |
100 |
compensate16x16_interpolate(int16_t * const dct_codes, |
compensate16x16_interpolate(int16_t * const dct_codes, |
123 |
(uint8_t *) ref, tmp + 32, |
(uint8_t *) ref, tmp + 32, |
124 |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
125 |
ptr = tmp; |
ptr = tmp; |
126 |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; /* fullpixel position */ |
} else ptr = ref + ((int)y + dy/4)*(int)stride + (int)x + dx/4; /* fullpixel position */ |
127 |
|
|
128 |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
129 |
|
|
185 |
(uint8_t *) ref, tmp + 32, |
(uint8_t *) ref, tmp + 32, |
186 |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
187 |
ptr = tmp; |
ptr = tmp; |
188 |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; /* fullpixel position */ |
} else ptr = ref + ((int)y + dy/4)*(int)stride + (int)x + dx/4; /* fullpixel position */ |
189 |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
190 |
|
|
191 |
transfer_8to16sub(dct_codes, cur + y * stride + x, ptr, stride); |
transfer_8to16sub(dct_codes, cur + y * stride + x, ptr, stride); |
451 |
(uint8_t *) f_ref->y, tmp + 32, |
(uint8_t *) f_ref->y, tmp + 32, |
452 |
tmp + 64, tmp + 96, 16*i, 16*j, dx, dy, edged_width, 0); |
tmp + 64, tmp + 96, 16*i, 16*j, dx, dy, edged_width, 0); |
453 |
ptr1 = tmp; |
ptr1 = tmp; |
454 |
} else ptr1 = f_ref->y + (16*j + dy/4)*edged_width + 16*i + dx/4; /* fullpixel position */ |
} else ptr1 = f_ref->y + (16*(int)j + dy/4)*(int)edged_width + 16*(int)i + dx/4; /* fullpixel position */ |
455 |
|
|
456 |
if ((b_dx&3) | (b_dy&3)) { |
if ((b_dx&3) | (b_dy&3)) { |
457 |
interpolate16x16_quarterpel(tmp - i * 16 - j * 16 * edged_width + 16, |
interpolate16x16_quarterpel(tmp - i * 16 - j * 16 * edged_width + 16, |
458 |
(uint8_t *) b_ref->y, tmp + 32, |
(uint8_t *) b_ref->y, tmp + 32, |
459 |
tmp + 64, tmp + 96, 16*i, 16*j, b_dx, b_dy, edged_width, 0); |
tmp + 64, tmp + 96, 16*i, 16*j, b_dx, b_dy, edged_width, 0); |
460 |
ptr2 = tmp + 16; |
ptr2 = tmp + 16; |
461 |
} else ptr2 = b_ref->y + (16*j + b_dy/4)*edged_width + 16*i + b_dx/4; /* fullpixel position */ |
} else ptr2 = b_ref->y + (16*(int)j + b_dy/4)*(int)edged_width + 16*(int)i + b_dx/4; /* fullpixel position */ |
462 |
|
|
463 |
b_dx /= 2; |
b_dx /= 2; |
464 |
b_dy /= 2; |
b_dy /= 2; |
505 |
tmp + 32, tmp + 64, tmp + 96, |
tmp + 32, tmp + 64, tmp + 96, |
506 |
16*i + (k&1)*8, 16*j + (k>>1)*8, dx, dy, edged_width, 0); |
16*i + (k&1)*8, 16*j + (k>>1)*8, dx, dy, edged_width, 0); |
507 |
ptr1 = tmp; |
ptr1 = tmp; |
508 |
} else ptr1 = f_ref->y + (16*j + (k>>1)*8 + dy/4)*edged_width + 16*i + (k&1)*8 + dx/4; |
} else ptr1 = f_ref->y + (16*(int)j + (k>>1)*8 + dy/4)*(int)edged_width + 16*(int)i + (k&1)*8 + dx/4; |
509 |
|
|
510 |
if ((b_dx&3) | (b_dy&3)) { |
if ((b_dx&3) | (b_dy&3)) { |
511 |
interpolate8x8_quarterpel(tmp - (i * 16+(k&1)*8) - (j * 16+((k>>1)*8)) * edged_width + 16, |
interpolate8x8_quarterpel(tmp - (i * 16+(k&1)*8) - (j * 16+((k>>1)*8)) * edged_width + 16, |
513 |
tmp + 16, tmp + 32, tmp + 48, |
tmp + 16, tmp + 32, tmp + 48, |
514 |
16*i + (k&1)*8, 16*j + (k>>1)*8, b_dx, b_dy, edged_width, 0); |
16*i + (k&1)*8, 16*j + (k>>1)*8, b_dx, b_dy, edged_width, 0); |
515 |
ptr2 = tmp + 16; |
ptr2 = tmp + 16; |
516 |
} else ptr2 = b_ref->y + (16*j + (k>>1)*8 + b_dy/4)*edged_width + 16*i + (k&1)*8 + b_dx/4; |
} else ptr2 = b_ref->y + (16*(int)j + (k>>1)*8 + b_dy/4)*(int)edged_width + 16*(int)i + (k&1)*8 + b_dx/4; |
517 |
} else { |
} else { |
518 |
sumx += dx; sumy += dy; |
sumx += dx; sumy += dy; |
519 |
b_sumx += b_dx; b_sumy += b_dy; |
b_sumx += b_dx; b_sumy += b_dy; |