34 |
#include "interpolate8x8.h" |
#include "interpolate8x8.h" |
35 |
#include "../utils/mem_align.h" |
#include "../utils/mem_align.h" |
36 |
#include "../motion/sad.h" |
#include "../motion/sad.h" |
37 |
|
#include "../utils/emms.h" |
38 |
|
|
39 |
#include "font.h" /* XXX: remove later */ |
#include "font.h" /* XXX: remove later */ |
40 |
|
|
237 |
} |
} |
238 |
} |
} |
239 |
|
|
|
/* bframe encoding requires image-based u,v interpolation */ |
|
240 |
void |
void |
241 |
image_interpolate(const IMAGE * refn, |
image_interpolate(const uint8_t * refn, |
242 |
IMAGE * refh, |
uint8_t * refh, |
243 |
IMAGE * refv, |
uint8_t * refv, |
244 |
IMAGE * refhv, |
uint8_t * refhv, |
245 |
uint32_t edged_width, |
uint32_t edged_width, |
246 |
uint32_t edged_height, |
uint32_t edged_height, |
247 |
uint32_t quarterpel, |
uint32_t quarterpel, |
249 |
{ |
{ |
250 |
const uint32_t offset = EDGE_SIZE2 * (edged_width + 1); /* we only interpolate half of the edge area */ |
const uint32_t offset = EDGE_SIZE2 * (edged_width + 1); /* we only interpolate half of the edge area */ |
251 |
const uint32_t stride_add = 7 * edged_width; |
const uint32_t stride_add = 7 * edged_width; |
|
#if 0 |
|
|
const uint32_t edged_width2 = edged_width / 2; |
|
|
const uint32_t edged_height2 = edged_height / 2; |
|
|
const uint32_t offset2 = EDGE_SIZE2 * (edged_width2 + 1); |
|
|
const uint32_t stride_add2 = 7 * edged_width2; |
|
|
#endif |
|
|
uint8_t *n_ptr, *h_ptr, *v_ptr, *hv_ptr; |
|
|
uint32_t x, y; |
|
252 |
|
|
253 |
|
uint8_t *n_ptr; |
254 |
|
uint8_t *h_ptr, *v_ptr, *hv_ptr; |
255 |
|
uint32_t x, y; |
256 |
|
|
257 |
n_ptr = refn->y; |
n_ptr = (uint8_t*)refn; |
258 |
h_ptr = refh->y; |
h_ptr = refh; |
259 |
v_ptr = refv->y; |
v_ptr = refv; |
260 |
|
|
261 |
n_ptr -= offset; |
n_ptr -= offset; |
262 |
h_ptr -= offset; |
h_ptr -= offset; |
285 |
n_ptr += stride_add; |
n_ptr += stride_add; |
286 |
} |
} |
287 |
|
|
288 |
h_ptr = refh->y + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; |
h_ptr = refh + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; |
289 |
hv_ptr = refhv->y + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; |
hv_ptr = refhv + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; |
290 |
|
|
291 |
for (y = 0; y < (edged_height - EDGE_SIZE); y = y + 8) { |
for (y = 0; y < (edged_height - EDGE_SIZE); y = y + 8) { |
292 |
hv_ptr -= stride_add; |
hv_ptr -= stride_add; |
302 |
} |
} |
303 |
} else { |
} else { |
304 |
|
|
305 |
hv_ptr = refhv->y; |
hv_ptr = refhv; |
306 |
hv_ptr -= offset; |
hv_ptr -= offset; |
307 |
|
|
308 |
for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) { |
for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) { |
328 |
n_ptr += stride_add; |
n_ptr += stride_add; |
329 |
} |
} |
330 |
} |
} |
|
/* |
|
|
#ifdef BFRAMES |
|
|
n_ptr = refn->u; |
|
|
h_ptr = refh->u; |
|
|
v_ptr = refv->u; |
|
|
hv_ptr = refhv->u; |
|
|
|
|
|
n_ptr -= offset2; |
|
|
h_ptr -= offset2; |
|
|
v_ptr -= offset2; |
|
|
hv_ptr -= offset2; |
|
|
|
|
|
for (y = 0; y < edged_height2; y += 8) { |
|
|
for (x = 0; x < edged_width2; x += 8) { |
|
|
interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width2, rounding); |
|
|
interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width2, rounding); |
|
|
interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width2, rounding); |
|
|
|
|
|
n_ptr += 8; |
|
|
h_ptr += 8; |
|
|
v_ptr += 8; |
|
|
hv_ptr += 8; |
|
|
} |
|
|
h_ptr += stride_add2; |
|
|
v_ptr += stride_add2; |
|
|
hv_ptr += stride_add2; |
|
|
n_ptr += stride_add2; |
|
|
} |
|
|
|
|
|
n_ptr = refn->v; |
|
|
h_ptr = refh->v; |
|
|
v_ptr = refv->v; |
|
|
hv_ptr = refhv->v; |
|
|
|
|
|
n_ptr -= offset2; |
|
|
h_ptr -= offset2; |
|
|
v_ptr -= offset2; |
|
|
hv_ptr -= offset2; |
|
|
|
|
|
for (y = 0; y < edged_height2; y = y + 8) { |
|
|
for (x = 0; x < edged_width2; x = x + 8) { |
|
|
interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width2, rounding); |
|
|
interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width2, rounding); |
|
|
interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width2, rounding); |
|
|
|
|
|
n_ptr += 8; |
|
|
h_ptr += 8; |
|
|
v_ptr += 8; |
|
|
hv_ptr += 8; |
|
|
} |
|
|
h_ptr += stride_add2; |
|
|
v_ptr += stride_add2; |
|
|
hv_ptr += stride_add2; |
|
|
n_ptr += stride_add2; |
|
|
} |
|
|
#endif |
|
|
*/ |
|
|
/* |
|
|
interpolate_halfpel_h( |
|
|
refh->y - offset, |
|
|
refn->y - offset, |
|
|
edged_width, edged_height, |
|
|
rounding); |
|
|
|
|
|
interpolate_halfpel_v( |
|
|
refv->y - offset, |
|
|
refn->y - offset, |
|
|
edged_width, edged_height, |
|
|
rounding); |
|
|
|
|
|
interpolate_halfpel_hv( |
|
|
refhv->y - offset, |
|
|
refn->y - offset, |
|
|
edged_width, edged_height, |
|
|
rounding); |
|
|
*/ |
|
|
|
|
|
/* uv-image-based compensation |
|
|
offset = EDGE_SIZE2 * (edged_width / 2 + 1); |
|
|
|
|
|
interpolate_halfpel_h( |
|
|
refh->u - offset, |
|
|
refn->u - offset, |
|
|
edged_width / 2, edged_height / 2, |
|
|
rounding); |
|
|
|
|
|
interpolate_halfpel_v( |
|
|
refv->u - offset, |
|
|
refn->u - offset, |
|
|
edged_width / 2, edged_height / 2, |
|
|
rounding); |
|
|
|
|
|
interpolate_halfpel_hv( |
|
|
refhv->u - offset, |
|
|
refn->u - offset, |
|
|
edged_width / 2, edged_height / 2, |
|
|
rounding); |
|
|
|
|
|
|
|
|
interpolate_halfpel_h( |
|
|
refh->v - offset, |
|
|
refn->v - offset, |
|
|
edged_width / 2, edged_height / 2, |
|
|
rounding); |
|
|
|
|
|
interpolate_halfpel_v( |
|
|
refv->v - offset, |
|
|
refn->v - offset, |
|
|
edged_width / 2, edged_height / 2, |
|
|
rounding); |
|
|
|
|
|
interpolate_halfpel_hv( |
|
|
refhv->v - offset, |
|
|
refn->v - offset, |
|
|
edged_width / 2, edged_height / 2, |
|
|
rounding); |
|
|
*/ |
|
331 |
} |
} |
332 |
|
|
333 |
|
|
473 |
interlacing?abgri_to_yv12_c:abgr_to_yv12_c, 4); |
interlacing?abgri_to_yv12_c:abgr_to_yv12_c, 4); |
474 |
break; |
break; |
475 |
|
|
476 |
|
case XVID_CSP_RGB: |
477 |
|
safe_packed_conv( |
478 |
|
src[0], src_stride[0], image->y, image->u, image->v, |
479 |
|
edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), |
480 |
|
interlacing?rgbi_to_yv12 :rgb_to_yv12, |
481 |
|
interlacing?rgbi_to_yv12_c:rgb_to_yv12_c, 3); |
482 |
|
break; |
483 |
|
|
484 |
case XVID_CSP_RGBA : |
case XVID_CSP_RGBA : |
485 |
safe_packed_conv( |
safe_packed_conv( |
486 |
src[0], src_stride[0], image->y, image->u, image->v, |
src[0], src_stride[0], image->y, image->u, image->v, |
657 |
interlacing?yv12_to_abgri_c:yv12_to_abgr_c, 4); |
interlacing?yv12_to_abgri_c:yv12_to_abgr_c, 4); |
658 |
return 0; |
return 0; |
659 |
|
|
660 |
|
case XVID_CSP_RGB: |
661 |
|
safe_packed_conv( |
662 |
|
dst[0], dst_stride[0], image->y, image->u, image->v, |
663 |
|
edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), |
664 |
|
interlacing?yv12_to_rgbi :yv12_to_rgb, |
665 |
|
interlacing?yv12_to_rgbi_c:yv12_to_rgb_c, 3); |
666 |
|
return 0; |
667 |
|
|
668 |
case XVID_CSP_RGBA: |
case XVID_CSP_RGBA: |
669 |
safe_packed_conv( |
safe_packed_conv( |
670 |
dst[0], dst_stride[0], image->y, image->u, image->v, |
dst[0], dst_stride[0], image->y, image->u, image->v, |
1031 |
p += edged_width/2; |
p += edged_width/2; |
1032 |
} |
} |
1033 |
} |
} |
1034 |
|
|
1035 |
|
/****************************************************************************/ |
1036 |
|
|
1037 |
|
static void (*deintl_core)(uint8_t *, int width, int height, const int stride) = 0; |
1038 |
|
extern void xvid_deinterlace_sse(uint8_t *, int width, int height, const int stride); |
1039 |
|
|
1040 |
|
#define CLIP_255(x) ( ((x)&~255) ? ((-(x)) >> (8*sizeof((x))-1))&0xff : (x) ) |
1041 |
|
|
1042 |
|
static void deinterlace_c(uint8_t *pix, int width, int height, const int bps) |
1043 |
|
{ |
1044 |
|
pix += bps; |
1045 |
|
while(width-->0) |
1046 |
|
{ |
1047 |
|
int p1 = pix[-bps]; |
1048 |
|
int p2 = pix[0]; |
1049 |
|
int p0 = p2; |
1050 |
|
int j = (height>>1) - 1; |
1051 |
|
int V; |
1052 |
|
unsigned char *P = pix++; |
1053 |
|
while(j-->0) |
1054 |
|
{ |
1055 |
|
const int p3 = P[ bps]; |
1056 |
|
const int p4 = P[2*bps]; |
1057 |
|
V = ((p1+p3+1)>>1) + ((p2 - ((p0+p4+1)>>1)) >> 2); |
1058 |
|
P[0] = CLIP_255( V ); |
1059 |
|
p0 = p2; |
1060 |
|
p1 = p3; |
1061 |
|
p2 = p4; |
1062 |
|
P += 2*bps; |
1063 |
|
} |
1064 |
|
V = ((p1+p1+1)>>1) + ((p2 - ((p0+p2+1)>>1)) >> 2); |
1065 |
|
P[0] = CLIP_255( V ); |
1066 |
|
} |
1067 |
|
} |
1068 |
|
#undef CLIP_255 |
1069 |
|
|
1070 |
|
int xvid_image_deinterlace(xvid_image_t* img, int width, int height, int bottom_first) |
1071 |
|
{ |
1072 |
|
if (height&1) |
1073 |
|
return 0; |
1074 |
|
if (img->csp!=XVID_CSP_PLANAR && img->csp!=XVID_CSP_I420 && img->csp!=XVID_CSP_YV12) |
1075 |
|
return 0; /* not yet supported */ |
1076 |
|
if (deintl_core==0) { |
1077 |
|
deintl_core = deinterlace_c; |
1078 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
1079 |
|
{ |
1080 |
|
int cpu_flags = check_cpu_features(); |
1081 |
|
if (cpu_flags & XVID_CPU_MMX) |
1082 |
|
deintl_core = xvid_deinterlace_sse; |
1083 |
|
} |
1084 |
|
#endif |
1085 |
|
} |
1086 |
|
if (!bottom_first) { |
1087 |
|
deintl_core(img->plane[0], width, height, img->stride[0]); |
1088 |
|
deintl_core(img->plane[1], width>>1, height>>1, img->stride[1]); |
1089 |
|
deintl_core(img->plane[2], width>>1, height>>1, img->stride[2]); |
1090 |
|
} |
1091 |
|
else { |
1092 |
|
deintl_core((uint8_t *)img->plane[0] + ( height -1)*img->stride[0], width, height, -img->stride[0]); |
1093 |
|
deintl_core((uint8_t *)img->plane[1] + ((height>>1)-1)*img->stride[1], width>>1, height>>1, -img->stride[1]); |
1094 |
|
deintl_core((uint8_t *)img->plane[2] + ((height>>1)-1)*img->stride[2], width>>1, height>>1, -img->stride[2]); |
1095 |
|
} |
1096 |
|
emms(); |
1097 |
|
|
1098 |
|
return 1; |
1099 |
|
} |
1100 |
|
|