--- image.c	2004/01/30 18:53:50	1.26.2.14
+++ image.c	2005/09/09 12:18:10	1.32
@@ -3,7 +3,7 @@
  *  XVID MPEG-4 VIDEO CODEC
  *  - Image management functions -
  *
- *  Copyright(C) 2001-2003 Peter Ross <pross@xvid.org>
+ *  Copyright(C) 2001-2004 Peter Ross <pross@xvid.org>
  *
  *  This program is free software ; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -19,22 +19,21 @@
  *  along with this program ; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
- * $Id: image.c,v 1.26.2.14 2004/01/30 18:53:50 chl Exp $
+ * $Id: image.c,v 1.32 2005/09/09 12:18:10 suxen_drol Exp $
  *
  ****************************************************************************/
 
 #include <stdlib.h>
 #include <string.h>				/* memcpy, memset */
 #include <math.h>
-
 #include "../portab.h"
 #include "../global.h"			/* XVID_CSP_XXX's */
 #include "../xvid.h"			/* XVID_CSP_XXX's */
 #include "image.h"
 #include "colorspace.h"
 #include "interpolate8x8.h"
-#include "reduced.h"
 #include "../utils/mem_align.h"
+#include "../motion/sad.h"
 
 #include "font.h"		/* XXX: remove later */
 
@@ -127,13 +126,16 @@
 	memcpy(image1->v, image2->v, edged_width * height / 4);
 }
 
+/* setedges bug was fixed in this BS version */
+#define SETEDGES_BUG_BEFORE		18
 
 void
 image_setedges(IMAGE * image,
 			   uint32_t edged_width,
 			   uint32_t edged_height,
 			   uint32_t width,
-			   uint32_t height)
+			   uint32_t height,
+			   int bs_version)
 {
 	const uint32_t edged_width2 = edged_width / 2;
 	uint32_t width2;
@@ -141,14 +143,16 @@
 	uint8_t *dst;
 	uint8_t *src;
 
-
 	dst = image->y - (EDGE_SIZE + EDGE_SIZE * edged_width);
 	src = image->y;
 
 	/* According to the Standard Clause 7.6.4, padding is done starting at 16
-	 * pixel width and height multiples */
-	width  = (width+15)&~15;
-	height = (height+15)&~15;
+	 * pixel width and height multiples. This was not respected in old xvids */
+	if (bs_version == 0 || bs_version >= SETEDGES_BUG_BEFORE) {
+		width  = (width+15)&~15;
+		height = (height+15)&~15;
+	}
+
 	width2 = width/2;
 
 	for (i = 0; i < EDGE_SIZE; i++) {
@@ -617,7 +621,7 @@
 
 	case XVID_CSP_YVYU:		/* u/v swapped */
 		safe_packed_conv(
-			src[0], src_stride[0], image->y, image->v, image->y,
+			src[0], src_stride[0], image->y, image->v, image->u,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
 			interlacing?yuyvi_to_yv12  :yuyv_to_yv12,
 			interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2);
@@ -631,13 +635,13 @@
 			interlacing?uyvyi_to_yv12_c:uyvy_to_yv12_c, 2);
 		break;
 
-	case XVID_CSP_YV12:	/* YCrCb == internal colorspace for MPEG */
+	case XVID_CSP_I420:	/* YCbCr == YUV == internal colorspace for MPEG */
 		yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2,
 			src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2,
 			src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP));
 		break;
 
-	case XVID_CSP_I420: /* YCbCr == U and V plane swapped */
+	case XVID_CSP_YV12: /* YCrCb == YVA == U and V plane swapped */
 		yv12_to_yv12(image->y, image->v, image->u, edged_width, edged_width2,
 			src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2,
 			src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP));
@@ -713,7 +717,7 @@
 			 int height,
 			 uint32_t edged_width,
 			 uint8_t * dst[4],
-			 uint32_t dst_stride[4],
+			 int dst_stride[4],
 			 int csp,
 			 int interlacing)
 {
@@ -807,14 +811,14 @@
 			interlacing?yv12_to_uyvyi_c:yv12_to_uyvy_c, 2);
 		return 0;
 
-	case XVID_CSP_YV12: /* YCbCr == internal colorspace for MPEG */
+	case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */
 		yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
 			dst_stride[0], dst_stride[0]/2,
 			image->y, image->u, image->v, edged_width, edged_width2,
 			width, height, (csp & XVID_CSP_VFLIP));
 		return 0;
 
-	case XVID_CSP_I420:	/* YCrCb == U and V plane swapped */
+	case XVID_CSP_YV12:	/* YCrCb == YVU == U and V plane swapped */
 		yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
 			dst_stride[0], dst_stride[0]/2,
 			image->y, image->v, image->u, edged_width, edged_width2,
@@ -888,24 +892,64 @@
 
 }
 
-long plane_sse(uint8_t * orig,
-		   uint8_t * recon,
-		   uint16_t stride,
-		   uint16_t width,
-		   uint16_t height)
+long plane_sse(uint8_t *orig,
+			   uint8_t *recon,
+			   uint16_t stride,
+			   uint16_t width,
+			   uint16_t height)
 {
-	int diff, x, y;
-	long sse=0;
+	int y, bwidth, bheight;
+	long sse = 0;
 
-	for (y = 0; y < height; y++) {
+	bwidth  = width  & (~0x07);
+	bheight = height & (~0x07);
+
+	/* Compute the 8x8 integer part */
+	for (y = 0; y<bheight; y += 8) {
+		int x;
+
+		/* Compute sse for the band */
+		for (x = 0; x<bwidth; x += 8)
+			sse += sse8_8bit(orig  + x, recon + x, stride);
+
+		/* remaining pixels of the 8 pixels high band */
+		for (x = bwidth; x < width; x++) {
+			int diff;
+			diff = *(orig + 0*stride + x) - *(recon + 0*stride + x);
+			sse += diff * diff;
+			diff = *(orig + 1*stride + x) - *(recon + 1*stride + x);
+			sse += diff * diff;
+			diff = *(orig + 2*stride + x) - *(recon + 2*stride + x);
+			sse += diff * diff;
+			diff = *(orig + 3*stride + x) - *(recon + 3*stride + x);
+			sse += diff * diff;
+			diff = *(orig + 4*stride + x) - *(recon + 4*stride + x);
+			sse += diff * diff;
+			diff = *(orig + 5*stride + x) - *(recon + 5*stride + x);
+			sse += diff * diff;
+			diff = *(orig + 6*stride + x) - *(recon + 6*stride + x);
+			sse += diff * diff;
+			diff = *(orig + 7*stride + x) - *(recon + 7*stride + x);
+			sse += diff * diff;
+		}
+
+		orig  += 8*stride;
+		recon += 8*stride;
+	}
+
+	/* Compute the down rectangle sse */
+	for (y = bheight; y < height; y++) {
+		int x;
 		for (x = 0; x < width; x++) {
+			int diff;
 			diff = *(orig + x) - *(recon + x);
 			sse += diff * diff;
 		}
 		orig += stride;
 		recon += stride;
 	}
-	return sse;
+
+	return (sse);
 }
 
 #if 0
@@ -1034,9 +1078,9 @@
 }
 
 void
-output_slice(IMAGE * cur, int std, int width, xvid_image_t* out_frm, int mbx, int mby,int mbl) {
+output_slice(IMAGE * cur, int stride, int width, xvid_image_t* out_frm, int mbx, int mby,int mbl) {
   uint8_t *dY,*dU,*dV,*sY,*sU,*sV;
-  int std2 = std >> 1;
+  int stride2 = stride >> 1;
   int w = mbl << 4, w2,i;
 
   if(w > width)
@@ -1046,24 +1090,24 @@
   dY = (uint8_t*)out_frm->plane[0] + (mby << 4) * out_frm->stride[0] + (mbx << 4);
   dU = (uint8_t*)out_frm->plane[1] + (mby << 3) * out_frm->stride[1] + (mbx << 3);
   dV = (uint8_t*)out_frm->plane[2] + (mby << 3) * out_frm->stride[2] + (mbx << 3);
-  sY = cur->y + (mby << 4) * std + (mbx << 4);
-  sU = cur->u + (mby << 3) * std2 + (mbx << 3);
-  sV = cur->v + (mby << 3) * std2 + (mbx << 3);
+  sY = cur->y + (mby << 4) * stride + (mbx << 4);
+  sU = cur->u + (mby << 3) * stride2 + (mbx << 3);
+  sV = cur->v + (mby << 3) * stride2 + (mbx << 3);
 
   for(i = 0 ; i < 16 ; i++) {
     memcpy(dY,sY,w);
     dY += out_frm->stride[0];
-    sY += std;
+    sY += stride;
   }
   for(i = 0 ; i < 8 ; i++) {
     memcpy(dU,sU,w2);
     dU += out_frm->stride[1];
-    sU += std2;
+    sU += stride2;
   }
   for(i = 0 ; i < 8 ; i++) {
     memcpy(dV,sV,w2);
     dV += out_frm->stride[2];
-    sV += std2;
+    sV += stride2;
   }
 }
 
@@ -1093,78 +1137,3 @@
 		p += edged_width/2;
 	}
 }
-
-
-/* reduced resolution deblocking filter
-	block = block size (16=rrv, 8=full resolution)
-	flags = XVID_DEC_YDEBLOCK|XVID_DEC_UVDEBLOCK
-*/
-void
-image_deblock_rrv(IMAGE * img, int edged_width,
-				const MACROBLOCK * mbs, int mb_width, int mb_height, int mb_stride,
-				int block, int flags)
-{
-	const int edged_width2 = edged_width /2;
-	const int nblocks = block / 8;	/* skals code uses 8pixel block uints */
-	int i,j;
-
-	/* luma: j,i in block units */
-
-		for (j = 1; j < mb_height*2; j++)		/* horizontal deblocking */
-		for (i = 0; i < mb_width*2; i++)
-		{
-			if (mbs[(j-1)/2*mb_stride + (i/2)].mode != MODE_NOT_CODED ||
-				mbs[(j+0)/2*mb_stride + (i/2)].mode != MODE_NOT_CODED)
-			{
-				hfilter_31(img->y + (j*block - 1)*edged_width + i*block,
-								  img->y + (j*block + 0)*edged_width + i*block, nblocks);
-			}
-		}
-
-		for (j = 0; j < mb_height*2; j++)		/* vertical deblocking */
-		for (i = 1; i < mb_width*2; i++)
-		{
-			if (mbs[(j/2)*mb_stride + (i-1)/2].mode != MODE_NOT_CODED ||
-				mbs[(j/2)*mb_stride + (i+0)/2].mode != MODE_NOT_CODED)
-			{
-				vfilter_31(img->y + (j*block)*edged_width + i*block - 1,
-						   img->y + (j*block)*edged_width + i*block + 0,
-						   edged_width, nblocks);
-			}
-		}
-
-
-
-	/* chroma */
-
-		for (j = 1; j < mb_height; j++)		/* horizontal deblocking */
-		for (i = 0; i < mb_width; i++)
-		{
-			if (mbs[(j-1)*mb_stride + i].mode != MODE_NOT_CODED ||
-				mbs[(j+0)*mb_stride + i].mode != MODE_NOT_CODED)
-			{
-				hfilter_31(img->u + (j*block - 1)*edged_width2 + i*block,
-						   img->u + (j*block + 0)*edged_width2 + i*block, nblocks);
-				hfilter_31(img->v + (j*block - 1)*edged_width2 + i*block,
-						   img->v + (j*block + 0)*edged_width2 + i*block, nblocks);
-			}
-		}
-
-		for (j = 0; j < mb_height; j++)		/* vertical deblocking */
-		for (i = 1; i < mb_width; i++)
-		{
-			if (mbs[j*mb_stride + i - 1].mode != MODE_NOT_CODED ||
-				mbs[j*mb_stride + i + 0].mode != MODE_NOT_CODED)
-			{
-				vfilter_31(img->u + (j*block)*edged_width2 + i*block - 1,
-						   img->u + (j*block)*edged_width2 + i*block + 0,
-						   edged_width2, nblocks);
-				vfilter_31(img->v + (j*block)*edged_width2 + i*block - 1,
-						   img->v + (j*block)*edged_width2 + i*block + 0,
-						   edged_width2, nblocks);
-			}
-		}
-
-
-}
-