3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* - Image management functions - |
* - Image management functions - |
5 |
* |
* |
6 |
* Copyright(C) 2001-2003 Peter Ross <pross@xvid.org> |
* Copyright(C) 2001-2004 Peter Ross <pross@xvid.org> |
7 |
* |
* |
8 |
* This program is free software ; you can redistribute it and/or modify |
* This program is free software ; you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
35 |
#include "interpolate8x8.h" |
#include "interpolate8x8.h" |
36 |
#include "reduced.h" |
#include "reduced.h" |
37 |
#include "../utils/mem_align.h" |
#include "../utils/mem_align.h" |
38 |
|
#include "../motion/sad.h" |
39 |
|
|
40 |
#include "font.h" /* XXX: remove later */ |
#include "font.h" /* XXX: remove later */ |
41 |
|
|
50 |
{ |
{ |
51 |
const uint32_t edged_width2 = edged_width / 2; |
const uint32_t edged_width2 = edged_width / 2; |
52 |
const uint32_t edged_height2 = edged_height / 2; |
const uint32_t edged_height2 = edged_height / 2; |
|
uint32_t i; |
|
53 |
|
|
54 |
image->y = |
image->y = |
55 |
xvid_malloc(edged_width * (edged_height + 1) + SAFETY, CACHE_LINE); |
xvid_malloc(edged_width * (edged_height + 1) + SAFETY, CACHE_LINE); |
56 |
if (image->y == NULL) { |
if (image->y == NULL) { |
57 |
return -1; |
return -1; |
58 |
} |
} |
59 |
|
memset(image->y, 0, edged_width * (edged_height + 1) + SAFETY); |
|
for (i = 0; i < edged_width * edged_height + SAFETY; i++) { |
|
|
image->y[i] = 0; |
|
|
} |
|
60 |
|
|
61 |
image->u = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE); |
image->u = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE); |
62 |
if (image->u == NULL) { |
if (image->u == NULL) { |
63 |
xvid_free(image->y); |
xvid_free(image->y); |
64 |
|
image->y = NULL; |
65 |
return -1; |
return -1; |
66 |
} |
} |
67 |
|
memset(image->u, 0, edged_width2 * edged_height2 + SAFETY); |
68 |
|
|
69 |
image->v = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE); |
image->v = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE); |
70 |
if (image->v == NULL) { |
if (image->v == NULL) { |
71 |
xvid_free(image->u); |
xvid_free(image->u); |
72 |
|
image->u = NULL; |
73 |
xvid_free(image->y); |
xvid_free(image->y); |
74 |
|
image->y = NULL; |
75 |
return -1; |
return -1; |
76 |
} |
} |
77 |
|
memset(image->v, 0, edged_width2 * edged_height2 + SAFETY); |
78 |
|
|
79 |
image->y += EDGE_SIZE * edged_width + EDGE_SIZE; |
image->y += EDGE_SIZE * edged_width + EDGE_SIZE; |
80 |
image->u += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2; |
image->u += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2; |
94 |
|
|
95 |
if (image->y) { |
if (image->y) { |
96 |
xvid_free(image->y - (EDGE_SIZE * edged_width + EDGE_SIZE)); |
xvid_free(image->y - (EDGE_SIZE * edged_width + EDGE_SIZE)); |
97 |
|
image->y = NULL; |
98 |
} |
} |
99 |
if (image->u) { |
if (image->u) { |
100 |
xvid_free(image->u - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2)); |
xvid_free(image->u - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2)); |
101 |
|
image->u = NULL; |
102 |
} |
} |
103 |
if (image->v) { |
if (image->v) { |
104 |
xvid_free(image->v - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2)); |
xvid_free(image->v - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2)); |
105 |
|
image->v = NULL; |
106 |
} |
} |
107 |
} |
} |
108 |
|
|
128 |
memcpy(image1->v, image2->v, edged_width * height / 4); |
memcpy(image1->v, image2->v, edged_width * height / 4); |
129 |
} |
} |
130 |
|
|
131 |
|
/* setedges bug was fixed in this BS version */ |
132 |
|
#define SETEDGES_BUG_BEFORE 18 |
133 |
|
|
134 |
void |
void |
135 |
image_setedges(IMAGE * image, |
image_setedges(IMAGE * image, |
136 |
uint32_t edged_width, |
uint32_t edged_width, |
137 |
uint32_t edged_height, |
uint32_t edged_height, |
138 |
uint32_t width, |
uint32_t width, |
139 |
uint32_t height) |
uint32_t height, |
140 |
|
int bs_version) |
141 |
{ |
{ |
142 |
const uint32_t edged_width2 = edged_width / 2; |
const uint32_t edged_width2 = edged_width / 2; |
143 |
const uint32_t width2 = width / 2; |
uint32_t width2; |
144 |
uint32_t i; |
uint32_t i; |
145 |
uint8_t *dst; |
uint8_t *dst; |
146 |
uint8_t *src; |
uint8_t *src; |
147 |
|
|
|
|
|
148 |
dst = image->y - (EDGE_SIZE + EDGE_SIZE * edged_width); |
dst = image->y - (EDGE_SIZE + EDGE_SIZE * edged_width); |
149 |
src = image->y; |
src = image->y; |
150 |
|
|
151 |
|
/* According to the Standard Clause 7.6.4, padding is done starting at 16 |
152 |
|
* pixel width and height multiples. This was not respected in old xvids */ |
153 |
|
if (bs_version == 0 || bs_version >= SETEDGES_BUG_BEFORE) { |
154 |
|
width = (width+15)&~15; |
155 |
|
height = (height+15)&~15; |
156 |
|
} |
157 |
|
|
158 |
|
width2 = width/2; |
159 |
|
|
160 |
for (i = 0; i < EDGE_SIZE; i++) { |
for (i = 0; i < EDGE_SIZE; i++) { |
161 |
memset(dst, *src, EDGE_SIZE); |
memset(dst, *src, EDGE_SIZE); |
162 |
memcpy(dst + EDGE_SIZE, src, width); |
memcpy(dst + EDGE_SIZE, src, width); |
264 |
n_ptr = refn->y; |
n_ptr = refn->y; |
265 |
h_ptr = refh->y; |
h_ptr = refh->y; |
266 |
v_ptr = refv->y; |
v_ptr = refv->y; |
|
hv_ptr = refhv->y; |
|
267 |
|
|
268 |
n_ptr -= offset; |
n_ptr -= offset; |
269 |
h_ptr -= offset; |
h_ptr -= offset; |
270 |
v_ptr -= offset; |
v_ptr -= offset; |
|
hv_ptr -= offset; |
|
271 |
|
|
272 |
|
/* Note we initialize the hv pointer later, as we can optimize code a bit |
273 |
|
* doing it down to up in quarterpel and up to down in halfpel */ |
274 |
if(quarterpel) { |
if(quarterpel) { |
275 |
|
|
276 |
for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) { |
for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) { |
292 |
n_ptr += stride_add; |
n_ptr += stride_add; |
293 |
} |
} |
294 |
|
|
295 |
h_ptr = refh->y; |
h_ptr = refh->y + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; |
296 |
h_ptr -= offset; |
hv_ptr = refhv->y + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; |
297 |
|
|
298 |
for (y = 0; y < (edged_height - EDGE_SIZE); y = y + 8) { |
for (y = 0; y < (edged_height - EDGE_SIZE); y = y + 8) { |
299 |
|
hv_ptr -= stride_add; |
300 |
|
h_ptr -= stride_add; |
301 |
|
hv_ptr -= EDGE_SIZE; |
302 |
|
h_ptr -= EDGE_SIZE; |
303 |
|
|
304 |
for (x = 0; x < (edged_width - EDGE_SIZE); x = x + 8) { |
for (x = 0; x < (edged_width - EDGE_SIZE); x = x + 8) { |
305 |
|
hv_ptr -= 8; |
306 |
|
h_ptr -= 8; |
307 |
interpolate8x8_6tap_lowpass_v(hv_ptr, h_ptr, edged_width, rounding); |
interpolate8x8_6tap_lowpass_v(hv_ptr, h_ptr, edged_width, rounding); |
|
hv_ptr += 8; |
|
|
h_ptr += 8; |
|
|
} |
|
|
|
|
|
hv_ptr += EDGE_SIZE; |
|
|
h_ptr += EDGE_SIZE; |
|
|
|
|
|
hv_ptr += stride_add; |
|
|
h_ptr += stride_add; |
|
308 |
} |
} |
309 |
} |
} |
310 |
else { |
} else { |
311 |
|
|
312 |
|
hv_ptr = refhv->y; |
313 |
|
hv_ptr -= offset; |
314 |
|
|
315 |
for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) { |
for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) { |
316 |
for (x = 0; x < (edged_width - EDGE_SIZE); x += 8) { |
for (x = 0; x < (edged_width - EDGE_SIZE); x += 8) { |
605 |
interlacing?rgbai_to_yv12_c:rgba_to_yv12_c, 4); |
interlacing?rgbai_to_yv12_c:rgba_to_yv12_c, 4); |
606 |
break; |
break; |
607 |
|
|
608 |
|
case XVID_CSP_ARGB: |
609 |
|
safe_packed_conv( |
610 |
|
src[0], src_stride[0], image->y, image->u, image->v, |
611 |
|
edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), |
612 |
|
interlacing?argbi_to_yv12 : argb_to_yv12, |
613 |
|
interlacing?argbi_to_yv12_c: argb_to_yv12_c, 4); |
614 |
|
break; |
615 |
|
|
616 |
case XVID_CSP_YUY2: |
case XVID_CSP_YUY2: |
617 |
safe_packed_conv( |
safe_packed_conv( |
618 |
src[0], src_stride[0], image->y, image->u, image->v, |
src[0], src_stride[0], image->y, image->u, image->v, |
623 |
|
|
624 |
case XVID_CSP_YVYU: /* u/v swapped */ |
case XVID_CSP_YVYU: /* u/v swapped */ |
625 |
safe_packed_conv( |
safe_packed_conv( |
626 |
src[0], src_stride[0], image->y, image->v, image->y, |
src[0], src_stride[0], image->y, image->v, image->u, |
627 |
edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), |
edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), |
628 |
interlacing?yuyvi_to_yv12 :yuyv_to_yv12, |
interlacing?yuyvi_to_yv12 :yuyv_to_yv12, |
629 |
interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2); |
interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2); |
637 |
interlacing?uyvyi_to_yv12_c:uyvy_to_yv12_c, 2); |
interlacing?uyvyi_to_yv12_c:uyvy_to_yv12_c, 2); |
638 |
break; |
break; |
639 |
|
|
640 |
case XVID_CSP_I420: |
case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */ |
641 |
yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2, |
yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2, |
642 |
src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2, |
src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2, |
643 |
src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP)); |
src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP)); |
644 |
break |
break; |
645 |
; |
|
646 |
case XVID_CSP_YV12: /* u/v swapped */ |
case XVID_CSP_YV12: /* YCrCb == YVA == U and V plane swapped */ |
647 |
yv12_to_yv12(image->y, image->v, image->u, edged_width, edged_width2, |
yv12_to_yv12(image->y, image->v, image->u, edged_width, edged_width2, |
648 |
src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2, |
src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2, |
649 |
src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP)); |
src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP)); |
650 |
break; |
break; |
651 |
|
|
652 |
case XVID_CSP_USER: |
case XVID_CSP_PLANAR: /* YCbCr with arbitrary pointers and different strides for Y and UV */ |
|
/*XXX: support for different u & v strides */ |
|
653 |
yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2, |
yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2, |
654 |
src[0], src[1], src[2], src_stride[0], src_stride[1], |
src[0], src[1], src[2], src_stride[0], src_stride[1], /* v: dst_stride[2] not yet supported */ |
655 |
width, height, (csp & XVID_CSP_VFLIP)); |
width, height, (csp & XVID_CSP_VFLIP)); |
656 |
break; |
break; |
657 |
|
|
781 |
interlacing?yv12_to_rgbai_c:yv12_to_rgba_c, 4); |
interlacing?yv12_to_rgbai_c:yv12_to_rgba_c, 4); |
782 |
return 0; |
return 0; |
783 |
|
|
784 |
|
case XVID_CSP_ARGB: |
785 |
|
safe_packed_conv( |
786 |
|
dst[0], dst_stride[0], image->y, image->u, image->v, |
787 |
|
edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), |
788 |
|
interlacing?yv12_to_argbi :yv12_to_argb, |
789 |
|
interlacing?yv12_to_argbi_c:yv12_to_argb_c, 4); |
790 |
|
return 0; |
791 |
|
|
792 |
case XVID_CSP_YUY2: |
case XVID_CSP_YUY2: |
793 |
safe_packed_conv( |
safe_packed_conv( |
794 |
dst[0], dst_stride[0], image->y, image->u, image->v, |
dst[0], dst_stride[0], image->y, image->u, image->v, |
813 |
interlacing?yv12_to_uyvyi_c:yv12_to_uyvy_c, 2); |
interlacing?yv12_to_uyvyi_c:yv12_to_uyvy_c, 2); |
814 |
return 0; |
return 0; |
815 |
|
|
816 |
case XVID_CSP_I420: |
case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */ |
817 |
yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2, |
yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2, |
818 |
dst_stride[0], dst_stride[0]/2, |
dst_stride[0], dst_stride[0]/2, |
819 |
image->y, image->u, image->v, edged_width, edged_width2, |
image->y, image->u, image->v, edged_width, edged_width2, |
820 |
width, height, (csp & XVID_CSP_VFLIP)); |
width, height, (csp & XVID_CSP_VFLIP)); |
821 |
return 0; |
return 0; |
822 |
|
|
823 |
case XVID_CSP_YV12: /* u,v swapped */ |
case XVID_CSP_YV12: /* YCrCb == YVU == U and V plane swapped */ |
824 |
yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2, |
yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2, |
825 |
dst_stride[0], dst_stride[0]/2, |
dst_stride[0], dst_stride[0]/2, |
826 |
image->y, image->v, image->u, edged_width, edged_width2, |
image->y, image->v, image->u, edged_width, edged_width2, |
827 |
width, height, (csp & XVID_CSP_VFLIP)); |
width, height, (csp & XVID_CSP_VFLIP)); |
828 |
return 0; |
return 0; |
829 |
|
|
830 |
case XVID_CSP_USER : /* u,v swapped */ |
case XVID_CSP_PLANAR: /* YCbCr with arbitrary pointers and different strides for Y and UV */ |
831 |
yv12_to_yv12(dst[0], dst[1], dst[2], |
yv12_to_yv12(dst[0], dst[1], dst[2], |
832 |
dst_stride[0], dst_stride[1], /* v: dst_stride[2] */ |
dst_stride[0], dst_stride[1], /* v: dst_stride[2] not yet supported */ |
833 |
image->y, image->v, image->u, edged_width, edged_width2, |
image->y, image->u, image->v, edged_width, edged_width2, |
834 |
width, height, (csp & XVID_CSP_VFLIP)); |
width, height, (csp & XVID_CSP_VFLIP)); |
835 |
return 0; |
return 0; |
836 |
|
|
900 |
uint16_t width, |
uint16_t width, |
901 |
uint16_t height) |
uint16_t height) |
902 |
{ |
{ |
903 |
int diff, x, y; |
int y, bwidth, bheight; |
904 |
long sse=0; |
long sse=0; |
905 |
|
|
906 |
for (y = 0; y < height; y++) { |
bwidth = width & (~0x07); |
907 |
|
bheight = height & (~0x07); |
908 |
|
|
909 |
|
/* Compute the 8x8 integer part */ |
910 |
|
for (y = 0; y<bheight; y += 8) { |
911 |
|
int x; |
912 |
|
|
913 |
|
/* Compute sse for the band */ |
914 |
|
for (x = 0; x<bwidth; x += 8) |
915 |
|
sse += sse8_8bit(orig + x, recon + x, stride); |
916 |
|
|
917 |
|
/* remaining pixels of the 8 pixels high band */ |
918 |
|
for (x = bwidth; x < width; x++) { |
919 |
|
int diff; |
920 |
|
diff = *(orig + 0*stride + x) - *(recon + 0*stride + x); |
921 |
|
sse += diff * diff; |
922 |
|
diff = *(orig + 1*stride + x) - *(recon + 1*stride + x); |
923 |
|
sse += diff * diff; |
924 |
|
diff = *(orig + 2*stride + x) - *(recon + 2*stride + x); |
925 |
|
sse += diff * diff; |
926 |
|
diff = *(orig + 3*stride + x) - *(recon + 3*stride + x); |
927 |
|
sse += diff * diff; |
928 |
|
diff = *(orig + 4*stride + x) - *(recon + 4*stride + x); |
929 |
|
sse += diff * diff; |
930 |
|
diff = *(orig + 5*stride + x) - *(recon + 5*stride + x); |
931 |
|
sse += diff * diff; |
932 |
|
diff = *(orig + 6*stride + x) - *(recon + 6*stride + x); |
933 |
|
sse += diff * diff; |
934 |
|
diff = *(orig + 7*stride + x) - *(recon + 7*stride + x); |
935 |
|
sse += diff * diff; |
936 |
|
} |
937 |
|
|
938 |
|
orig += 8*stride; |
939 |
|
recon += 8*stride; |
940 |
|
} |
941 |
|
|
942 |
|
/* Compute the down rectangle sse */ |
943 |
|
for (y = bheight; y < height; y++) { |
944 |
|
int x; |
945 |
for (x = 0; x < width; x++) { |
for (x = 0; x < width; x++) { |
946 |
|
int diff; |
947 |
diff = *(orig + x) - *(recon + x); |
diff = *(orig + x) - *(recon + x); |
948 |
sse += diff * diff; |
sse += diff * diff; |
949 |
} |
} |
950 |
orig += stride; |
orig += stride; |
951 |
recon += stride; |
recon += stride; |
952 |
} |
} |
953 |
return sse; |
|
954 |
|
return (sse); |
955 |
} |
} |
956 |
|
|
957 |
#if 0 |
#if 0 |