--- decoder.c	2003/11/13 22:35:30	1.49.2.19
+++ decoder.c	2004/06/04 11:54:41	1.51.2.5
@@ -20,7 +20,7 @@
  *  along with this program ; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
- * $Id: decoder.c,v 1.49.2.19 2003/11/13 22:35:30 edgomez Exp $
+ * $Id: decoder.c,v 1.51.2.5 2004/06/04 11:54:41 edgomez Exp $
  *
  ****************************************************************************/
 
@@ -41,6 +41,7 @@
 #include "bitstream/mbcoding.h"
 
 #include "quant/quant.h"
+#include "quant/quant_matrix.h"
 #include "dct/idct.h"
 #include "dct/fdct.h"
 #include "utils/mem_transfer.h"
@@ -57,6 +58,7 @@
 
 #include "image/image.h"
 #include "image/colorspace.h"
+#include "image/postprocessing.h"
 #include "utils/mem_align.h"
 
 static int
@@ -75,6 +77,8 @@
 		xvid_free(dec->last_mbs);
 	if (dec->mbs)
 		xvid_free(dec->mbs);
+	if (dec->qscale)
+		xvid_free(dec->qscale);
 
 	/* realloc */
 	dec->mb_width = (dec->width + 15) / 16;
@@ -159,6 +163,13 @@
 
 	memset(dec->last_mbs, 0, sizeof(MACROBLOCK) * dec->mb_width * dec->mb_height);
 
+	/* nothing happens if that fails */
+	dec->qscale =
+		xvid_malloc(sizeof(int) * dec->mb_width * dec->mb_height, CACHE_LINE);
+	
+	if (dec->qscale)
+		memset(dec->qscale, 0, sizeof(int) * dec->mb_width * dec->mb_height);
+
 	return 0;
 }
 
@@ -175,8 +186,15 @@
 	if (dec == NULL) {
 		return XVID_ERR_MEMORY;
 	}
+
 	memset(dec, 0, sizeof(DECODER));
 
+	dec->mpeg_quant_matrices = xvid_malloc(sizeof(uint16_t) * 64 * 8, CACHE_LINE);
+	if (dec->mpeg_quant_matrices == NULL) {
+		xvid_free(dec);
+		return XVID_ERR_MEMORY;
+	}
+
 	create->handle = dec;
 
 	dec->width = create->width;
@@ -191,17 +209,20 @@
 	/* image based GMC */
 	image_null(&dec->gmc);
 
-
 	dec->mbs = NULL;
 	dec->last_mbs = NULL;
+	dec->qscale = NULL;
 
 	init_timer();
+	init_postproc(&dec->postproc);
+	init_mpeg_matrix(dec->mpeg_quant_matrices);
 
 	/* For B-frame support (used to save reference frame's time */
 	dec->frames = 0;
 	dec->time = dec->time_base = dec->last_time_base = 0;
 	dec->low_delay = 0;
 	dec->packed_mode = 0;
+	dec->time_inc_resolution = 1; /* until VOL header says otherwise */
 
 	dec->fixed_dimensions = (dec->width > 0 && dec->height > 0);
 
@@ -217,6 +238,7 @@
 {
 	xvid_free(dec->last_mbs);
 	xvid_free(dec->mbs);
+	xvid_free(dec->qscale);
 
 	/* image based GMC */
 	image_destroy(&dec->gmc, dec->edged_width, dec->edged_height);
@@ -226,6 +248,7 @@
 	image_destroy(&dec->tmp, dec->edged_width, dec->edged_height);
 	image_destroy(&dec->qtmp, dec->edged_width, dec->edged_height);
 	image_destroy(&dec->cur, dec->edged_width, dec->edged_height);
+	xvid_free(dec->mpeg_quant_matrices);
 	xvid_free(dec);
 
 	write_timer();
@@ -316,14 +339,14 @@
 		stop_coding_timer();
 
 		start_timer();
-		add_acdc(pMB, i, &block[i * 64], iDcScaler, predictors);
+		add_acdc(pMB, i, &block[i * 64], iDcScaler, predictors, dec->bs_version);
 		stop_prediction_timer();
 
 		start_timer();
 		if (dec->quant_type == 0) {
-			dequant_h263_intra(&data[i * 64], &block[i * 64], iQuant, iDcScaler);
+			dequant_h263_intra(&data[i * 64], &block[i * 64], iQuant, iDcScaler, dec->mpeg_quant_matrices);
 		} else {
-			dequant_mpeg_intra(&data[i * 64], &block[i * 64], iQuant, iDcScaler);
+			dequant_mpeg_intra(&data[i * 64], &block[i * 64], iQuant, iDcScaler, dec->mpeg_quant_matrices);
 		}
 		stop_iquant_timer();
 
@@ -392,7 +415,7 @@
 			stop_coding_timer();
 
 			start_timer();
-			dequant(&data[i * 64], block, iQuant);
+			dequant(&data[i * 64], block, iQuant, dec->mpeg_quant_matrices);
 			stop_iquant_timer();
 
 			start_timer();
@@ -474,6 +497,33 @@
 			mv[i] = pMB->mvs[i];
 	}
 
+	for (i = 0; i < 4; i++) {
+		/* clip to valid range */
+		int border = (int)(dec->mb_width - x_pos) << (5 + dec->quarterpel);
+		if (mv[i].x > border) {
+			DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", mv[i].x, border, x_pos, y_pos);
+			mv[i].x = border;
+		} else {
+			border = (-(int)x_pos-1) << (5 + dec->quarterpel);
+			if (mv[i].x < border) {
+				DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", mv[i].x, border, x_pos, y_pos);
+				mv[i].x = border;
+			}
+		}
+
+		border = (int)(dec->mb_height - y_pos) << (5 + dec->quarterpel);
+		if (mv[i].y >  border) {
+			DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", mv[i].y, border, x_pos, y_pos);
+			mv[i].y = border;
+		} else {
+			border = (-(int)y_pos-1) << (5 + dec->quarterpel);
+			if (mv[i].y < border) {
+				DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", mv[i].y, border, x_pos, y_pos);
+				mv[i].y = border;
+			}
+		}
+	}
+
 	start_timer();
 
 	if (pMB->mode != MODE_INTER4V) { /* INTER, INTER_Q, NOT_CODED, FORWARD, BACKWARD */
@@ -767,7 +817,7 @@
 
 	start_timer();
 	image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height,
-					dec->width, dec->height);
+					dec->width, dec->height, dec->bs_version);
 	stop_edges_timer();
 
 	if (gmc_warp) {
@@ -839,12 +889,12 @@
 				mb->quant = quant;
 
 				if (dec->interlacing) {
-					if ((cbp || intra) && !mcsel) {
+					if (cbp || intra) {
 						mb->field_dct = BitstreamGetBit(bs);
 						DPRINTF(XVID_DEBUG_MB,"decp: field_dct: %i\n", mb->field_dct);
 					}
 
-					if (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) {
+					if ((mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) && !mcsel) {
 						mb->field_pred = BitstreamGetBit(bs);
 						DPRINTF(XVID_DEBUG_MB, "decp: field_pred: %i\n", mb->field_pred);
 
@@ -888,6 +938,7 @@
 
 			} else if (gmc_warp) {	/* a not coded S(GMC)-VOP macroblock */
 				mb->mode = MODE_NOT_CODED_GMC;
+				mb->quant = quant;
 				decoder_mbgmc(dec, mb, x, y, fcode, 0x00, bs, rounding);
 
 				if(dec->out_frm && cp_mb > 0) {
@@ -897,6 +948,7 @@
 				st_mb = x+1;
 			} else {	/* not coded P_VOP macroblock */
 				mb->mode = MODE_NOT_CODED;
+				mb->quant = quant;
 
 				mb->mvs[0].x = mb->mvs[1].x = mb->mvs[2].x = mb->mvs[3].x = 0;
 				mb->mvs[0].y = mb->mvs[1].y = mb->mvs[2].y = mb->mvs[3].y = 0;
@@ -923,7 +975,9 @@
 get_b_motion_vector(Bitstream * bs,
 					VECTOR * mv,
 					int fcode,
-					const VECTOR pmv)
+					const VECTOR pmv,
+					const DECODER * const dec,
+					const int x, const int y)
 {
 	const int scale_fac = 1 << (fcode - 1);
 	const int high = (32 * scale_fac) - 1;
@@ -1038,8 +1092,8 @@
 							pMB->mvs[1].x, pMB->mvs[1].y, stride, 0);
 		interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos, 16 * y_pos + 8,
 							pMB->mvs[2].x, pMB->mvs[2].y, stride, 0);
-		interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos + 8,
-							16 * y_pos + 8, pMB->mvs[3].x, pMB->mvs[3].y, stride, 0);
+		interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos + 8, 16 * y_pos + 8,
+							pMB->mvs[3].x, pMB->mvs[3].y, stride, 0);
 	}
 
 	interpolate8x8_switch(dec->cur.u, forward.u, 8 * x_pos, 8 * y_pos, uv_dx,
@@ -1161,14 +1215,13 @@
 	uint32_t x, y;
 	VECTOR mv;
 	const VECTOR zeromv = {0,0};
-	const int64_t TRB = dec->time_pp - dec->time_bp, TRD = dec->time_pp;
 	int i;
 
 	start_timer();
 	image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height,
-					dec->width, dec->height);
+					dec->width, dec->height, dec->bs_version);
 	image_setedges(&dec->refn[1], dec->edged_width, dec->edged_height,
-					dec->width, dec->height);
+					dec->width, dec->height, dec->bs_version);
 	stop_edges_timer();
 
 	for (y = 0; y < dec->mb_height; y++) {
@@ -1252,18 +1305,19 @@
 
 			switch (mb->mode) {
 			case MODE_DIRECT:
-				get_b_motion_vector(bs, &mv, 1, zeromv);
+				get_b_motion_vector(bs, &mv, 1, zeromv, dec, x, y);
 
 			case MODE_DIRECT_NONE_MV:
 				for (i = 0; i < 4; i++) {
-					mb->mvs[i].x = (int32_t) ((TRB * last_mb->mvs[i].x) / TRD + mv.x);
-					mb->b_mvs[i].x = (int32_t) ((mv.x == 0)
-									? ((TRB - TRD) * last_mb->mvs[i].x) / TRD
-									: mb->mvs[i].x - last_mb->mvs[i].x);
-					mb->mvs[i].y = (int32_t) ((TRB * last_mb->mvs[i].y) / TRD + mv.y);
-					mb->b_mvs[i].y = (int32_t) ((mv.y == 0)
-									? ((TRB - TRD) * last_mb->mvs[i].y) / TRD
-									: mb->mvs[i].y - last_mb->mvs[i].y);
+					mb->mvs[i].x = last_mb->mvs[i].x*dec->time_bp/dec->time_pp + mv.x;
+					mb->mvs[i].y = last_mb->mvs[i].y*dec->time_bp/dec->time_pp + mv.y;
+					
+					mb->b_mvs[i].x = (mv.x)
+						?  mb->mvs[i].x - last_mb->mvs[i].x
+						: last_mb->mvs[i].x*(dec->time_bp - dec->time_pp)/dec->time_pp;
+					mb->b_mvs[i].y = (mv.y)
+						? mb->mvs[i].y - last_mb->mvs[i].y
+						: last_mb->mvs[i].y*(dec->time_bp - dec->time_pp)/dec->time_pp;
 				}
 
 				decoder_bf_interpolate_mbinter(dec, dec->refn[1], dec->refn[0],
@@ -1271,10 +1325,10 @@
 				break;
 
 			case MODE_INTERPOLATE:
-				get_b_motion_vector(bs, &mb->mvs[0], fcode_forward, dec->p_fmv);
+				get_b_motion_vector(bs, &mb->mvs[0], fcode_forward, dec->p_fmv, dec, x, y);
 				dec->p_fmv = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] =	mb->mvs[0];
 
-				get_b_motion_vector(bs, &mb->b_mvs[0], fcode_backward, dec->p_bmv);
+				get_b_motion_vector(bs, &mb->b_mvs[0], fcode_backward, dec->p_bmv, dec, x, y);
 				dec->p_bmv = mb->b_mvs[1] = mb->b_mvs[2] = mb->b_mvs[3] = mb->b_mvs[0];
 
 				decoder_bf_interpolate_mbinter(dec, dec->refn[1], dec->refn[0],
@@ -1282,14 +1336,14 @@
 				break;
 
 			case MODE_BACKWARD:
-				get_b_motion_vector(bs, &mb->mvs[0], fcode_backward, dec->p_bmv);
+				get_b_motion_vector(bs, &mb->mvs[0], fcode_backward, dec->p_bmv, dec, x, y);
 				dec->p_bmv = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] =	mb->mvs[0];
 
 				decoder_mbinter(dec, mb, x, y, mb->cbp, bs, 0, 0, 0);
 				break;
 
 			case MODE_FORWARD:
-				get_b_motion_vector(bs, &mb->mvs[0], fcode_forward, dec->p_fmv);
+				get_b_motion_vector(bs, &mb->mvs[0], fcode_forward, dec->p_fmv, dec, x, y);
 				dec->p_fmv = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] =	mb->mvs[0];
 
 				decoder_mbinter(dec, mb, x, y, mb->cbp, bs, 0, 0, 1);
@@ -1304,8 +1358,22 @@
 
 /* perform post processing if necessary, and output the image */
 void decoder_output(DECODER * dec, IMAGE * img, MACROBLOCK * mbs,
-					xvid_dec_frame_t * frame, xvid_dec_stats_t * stats, int coding_type)
+					xvid_dec_frame_t * frame, xvid_dec_stats_t * stats,
+					int coding_type, int quant)
 {
+	if (dec->cartoon_mode)
+		frame->general &= ~XVID_FILMEFFECT;
+
+	if (frame->general & (XVID_DEBLOCKY|XVID_DEBLOCKUV|XVID_FILMEFFECT) && mbs != NULL)	/* post process */
+	{
+		/* note: image is stored to tmp */
+		image_copy(&dec->tmp, img, dec->edged_width, dec->height);
+		image_postproc(&dec->postproc, &dec->tmp, dec->edged_width, 
+					   mbs, dec->mb_width, dec->mb_height, dec->mb_width,
+					   frame->general, dec->frames, (coding_type == B_VOP));
+		img = &dec->tmp;
+	}
+
 	image_output(img, dec->width, dec->height,
 				 dec->edged_width, (uint8_t**)frame->output.plane, frame->output.stride,
 				 frame->output.csp, dec->interlacing);
@@ -1314,6 +1382,14 @@
 		stats->type = coding2type(coding_type);
 		stats->data.vop.time_base = (int)dec->time_base;
 		stats->data.vop.time_increment = 0;	/* XXX: todo */
+		stats->data.vop.qscale_stride = dec->mb_width;
+		stats->data.vop.qscale = dec->qscale;
+		if (stats->data.vop.qscale != NULL && mbs != NULL) {
+			int i;
+			for (i = 0; i < dec->mb_width*dec->mb_height; i++)
+				stats->data.vop.qscale[i] = mbs[i].quant;
+		} else 
+			stats->data.vop.qscale = NULL;
 	}
 }
 
@@ -1326,7 +1402,7 @@
 	Bitstream bs;
 	uint32_t rounding;
 	uint32_t reduced_resolution;
-	uint32_t quant;
+	uint32_t quant = 2;
 	uint32_t fcode_forward;
 	uint32_t fcode_backward;
 	uint32_t intra_dc_threshold;
@@ -1349,7 +1425,7 @@
 		/* if not decoding "low_delay/packed", and this isn't low_delay and
 			we have a reference frame, then outout the reference frame */
 		if (!(dec->low_delay_default && dec->packed_mode) && !dec->low_delay && dec->frames>0) {
-			decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type);
+			decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 			dec->frames = 0;
 			ret = 0;
 		} else {
@@ -1414,12 +1490,17 @@
 		goto repeat;
 	}
 
+	if(dec->frames == 0 && coding_type != I_VOP) {
+		/* 1st frame is not an i-vop */
+		goto repeat;
+	}
+
 	dec->p_bmv.x = dec->p_bmv.y = dec->p_fmv.y = dec->p_fmv.y = 0;	/* init pred vector to 0 */
 
 	/* packed_mode: special-N_VOP treament */
 	if (dec->packed_mode && coding_type == N_VOP) {
 		if (dec->low_delay_default && dec->frames > 0) {
-			decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type);
+			decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 			output = 1;
 		}
 		/* ignore otherwise */
@@ -1440,6 +1521,7 @@
 			/* XXX: not_coded vops are not used for forward prediction */
 			/* we should not swap(last_mbs,mbs) */
 			image_copy(&dec->cur, &dec->refn[0], dec->edged_width, dec->height);
+			SWAP(MACROBLOCK *, dec->mbs, dec->last_mbs); /* it will be swapped back */
 			break;
 		}
 
@@ -1452,11 +1534,11 @@
 		/* note: for packed_mode, output is performed when the special-N_VOP is decoded */
 		if (!(dec->low_delay_default && dec->packed_mode)) {
 			if (dec->low_delay) {
-				decoder_output(dec, &dec->cur, dec->mbs, frame, stats, coding_type);
+				decoder_output(dec, &dec->cur, dec->mbs, frame, stats, coding_type, quant);
 				output = 1;
 			} else if (dec->frames > 0)	{ /* is the reference frame valid? */
 				/* output the reference frame */
-				decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type);
+				decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 				output = 1;
 			}
 		}
@@ -1481,21 +1563,25 @@
 			/* attemping to decode a bvop without atleast 2 reference frames */
 			image_printf(&dec->cur, dec->edged_width, dec->height, 16, 16,
 						"broken b-frame, mising ref frames");
+			if (stats) stats->type = XVID_TYPE_NOTHING;
 		} else if (dec->time_pp <= dec->time_bp) {
 			/* this occurs when dx50_bvop_compatibility==0 sequences are
 			decoded in vfw. */
 			image_printf(&dec->cur, dec->edged_width, dec->height, 16, 16,
 						"broken b-frame, tpp=%i tbp=%i", dec->time_pp, dec->time_bp);
+			if (stats) stats->type = XVID_TYPE_NOTHING;
 		} else {
 			decoder_bframe(dec, &bs, quant, fcode_forward, fcode_backward);
+			decoder_output(dec, &dec->cur, dec->mbs, frame, stats, coding_type, quant);
 		}
 
-		decoder_output(dec, &dec->cur, dec->mbs, frame, stats, coding_type);
 		output = 1;
 		dec->frames++;
 	}
 
-	BitstreamByteAlign(&bs);
+#if 0 /* Avoids to read to much data because of 32bit reads in our BS functions */
+	 BitstreamByteAlign(&bs);
+#endif
 
 	/* low_delay_default mode: repeat in packed_mode */
 	if (dec->low_delay_default && dec->packed_mode && output == 0 && success == 0) {
@@ -1510,7 +1596,7 @@
 	if (dec->low_delay_default && output == 0) {
 		if (dec->packed_mode && seen_something) {
 			/* output the recently decoded frame */
-			decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type);
+			decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 		} else {
 			image_clear(&dec->cur, dec->width, dec->height, dec->edged_width, 0, 128, 128);
 			image_printf(&dec->cur, dec->edged_width, dec->height, 16, 16,
@@ -1518,7 +1604,7 @@
 			image_printf(&dec->cur, dec->edged_width, dec->height, 16, 64,
 				"bframe decoder lag");
 
-			decoder_output(dec, &dec->cur, NULL, frame, stats, P_VOP);
+			decoder_output(dec, &dec->cur, NULL, frame, stats, P_VOP, quant);
 			if (stats) stats->type = XVID_TYPE_NOTHING;
 		}
 	}
@@ -1526,5 +1612,5 @@
 	emms();
 	stop_global_timer();
 
-	return BitstreamPos(&bs) / 8;	/* number of bytes consumed */
+	return (BitstreamPos(&bs) + 7) / 8;	/* number of bytes consumed */
 }