--- postprocessing.c	2003/12/17 17:07:38	1.1.4.3
+++ postprocessing.c	2011/04/07 19:07:36	1.6.2.1
@@ -3,7 +3,8 @@
  *  XVID MPEG-4 VIDEO CODEC
  *  - Postprocessing  functions -
  *
- *  Copyright(C) 2003 Michael Militzer <isibaar@xvid.org>
+ *  Copyright(C) 2003-2010 Michael Militzer <isibaar@xvid.org>
+ *                    2004 Marc Fauconneau
  *
  *  This program is free software ; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@
  *  along with this program ; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
- * $Id: postprocessing.c,v 1.1.4.3 2003/12/17 17:07:38 Isibaar Exp $
+ * $Id: postprocessing.c,v 1.6.2.1 2011/04/07 19:07:36 Isibaar Exp $
  *
  ****************************************************************************/
 
@@ -33,10 +34,9 @@
 #include "../utils/emms.h"
 #include "postprocessing.h"
 
-/* Filtering thresholds */
+/* function pointers */
+IMAGEBRIGHTNESS_PTR image_brightness;
 
-#define THR1 2
-#define THR2 6
 
 /* Some useful (and fast) macros
    Note that the MIN/MAX macros assume signed shift - if your compiler
@@ -47,80 +47,178 @@
 #define FAST_ABS(x) ((((int)(x)) >> 31) ^ ((int)(x))) - (((int)(x)) >> 31)
 #define ABS(X)    (((X)>0)?(X):-(X)) 
 
-void init_postproc(void)
+void init_postproc(XVID_POSTPROC *tbls)
 {
-	init_deblock();
-	init_noise();
+	init_deblock(tbls);
+	init_noise(tbls);
 }
 
-void
-image_postproc(IMAGE * img, int edged_width,
-				const MACROBLOCK * mbs, int mb_width, int mb_height, int mb_stride,
-				int flags, int frame_num)
+void 
+stripe_deblock_h(SMPDeblock *h)
 {
-	const int edged_width2 = edged_width /2;
+	const int stride = h->stride;
+	const int stride2 = stride /2;
+
 	int i,j;
 	int quant;
 
 	/* luma: j,i in block units */
-	if ((flags & XVID_DEBLOCKY))
+	if ((h->flags & XVID_DEBLOCKY))
 	{
-		for (j = 1; j < mb_height*2; j++)		/* horizontal deblocking */
-		for (i = 0; i < mb_width*2; i++)
+		int dering = h->flags & XVID_DERINGY;
+
+		for (j = 1; j < h->stop_y; j++)		/* horizontal luma deblocking */
+		for (i = h->start_x; i < h->stop_x; i++)
 		{
-			quant = mbs[(j+0)/2*mb_stride + (i/2)].quant;
-			deblock8x8_h(img->y + j*8*edged_width + i*8, edged_width, quant);
+			quant = h->mbs[(j+0)/2*h->mb_stride + (i/2)].quant;
+			deblock8x8_h(h->tbls, h->img->y + j*8*stride + i*8, stride, quant, dering);
 		}
+	}
+
+	/* chroma */
+	if ((h->flags & XVID_DEBLOCKUV))
+	{
+		int dering = h->flags & XVID_DERINGUV;
 
-		for (j = 0; j < mb_height*2; j++)		/* vertical deblocking */
-		for (i = 1; i < mb_width*2; i++)
+		for (j = 1; j < h->stop_y/2; j++)		/* horizontal deblocking */
+		for (i = h->start_x/2; i < h->stop_x/2; i++)
 		{
-			quant = mbs[(j+0)/2*mb_stride + (i/2)].quant;
-			deblock8x8_v(img->y + j*8*edged_width + i*8, edged_width, quant);
+			quant = h->mbs[(j+0)*h->mb_stride + i].quant;
+			deblock8x8_h(h->tbls, h->img->u + j*8*stride2 + i*8, stride2, quant, dering);
+			deblock8x8_h(h->tbls, h->img->v + j*8*stride2 + i*8, stride2, quant, dering);
 		}
 	}
+}
 
+void 
+stripe_deblock_v(SMPDeblock *h)
+{
+	const int stride = h->stride;
+	const int stride2 = stride /2;
 
-	/* chroma */
-	if ((flags & XVID_DEBLOCKUV))
+	int i,j;
+	int quant;
+
+	/* luma: j,i in block units */
+	if ((h->flags & XVID_DEBLOCKY))
 	{
-		for (j = 1; j < mb_height; j++)		/* horizontal deblocking */
-		for (i = 0; i < mb_width; i++)
+		int dering = h->flags & XVID_DERINGY;
+
+		for (j = h->start_y; j < h->stop_y; j++)		/* vertical deblocking */
+		for (i = 1; i < h->stop_x; i++)
 		{
-			quant = mbs[(j+0)*mb_stride + i].quant;
-			deblock8x8_h(img->u + j*8*edged_width2 + i*8, edged_width2, quant);
-			deblock8x8_h(img->v + j*8*edged_width2 + i*8, edged_width2, quant);
+			quant = h->mbs[(j+0)/2*h->mb_stride + (i/2)].quant;
+			deblock8x8_v(h->tbls, h->img->y + j*8*stride + i*8, stride, quant, dering);
 		}
+	}
+
+	/* chroma */
+	if ((h->flags & XVID_DEBLOCKUV))
+	{
+		int dering = h->flags & XVID_DERINGUV;
 
-		for (j = 0; j < mb_height; j++)		/* vertical deblocking */	
-		for (i = 1; i < mb_width; i++)
+		for (j = h->start_y/2; j < h->stop_y/2; j++)		/* vertical deblocking */	
+		for (i = 1; i < h->stop_x/2; i++)
 		{
-			quant = mbs[(j+0)*mb_stride + i].quant;
-			deblock8x8_v(img->u + j*8*edged_width2 + i*8, edged_width2, quant);
-			deblock8x8_v(img->v + j*8*edged_width2 + i*8, edged_width2, quant);
+			quant = h->mbs[(j+0)*h->mb_stride + i].quant;
+			deblock8x8_v(h->tbls, h->img->u + j*8*stride2 + i*8, stride2, quant, dering);
+			deblock8x8_v(h->tbls, h->img->v + j*8*stride2 + i*8, stride2, quant, dering);
 		}
 	}
+}
+
+void
+image_postproc(XVID_POSTPROC *tbls, IMAGE * img, int edged_width,
+				const MACROBLOCK * mbs, int mb_width, int mb_height, int mb_stride,
+				int flags, int brightness, int frame_num, int bvop, int threads)
+{
+	int k;
+#ifndef HAVE_PTHREAD
+	int num_threads = 1;
+#else
+	int num_threads = MAX(1, MIN(threads, 4));
+	void *status = NULL;
+#endif
+	SMPDeblock data[4];
+
+	/* horizontal deblocking, dispatch threads */
+	for (k = 0; k < num_threads; k++) {
+		data[k].flags = flags;
+		data[k].img = img;
+		data[k].mb_stride = mb_stride;
+		data[k].mbs = mbs;
+		data[k].stride = edged_width;
+		data[k].tbls = tbls;
+
+		data[k].start_x = (k*mb_width / num_threads)*2;
+		data[k].stop_x = ((k+1)*mb_width / num_threads)*2;
+
+		data[k].stop_y = mb_height*2;
+	}
+#ifdef HAVE_PTHREAD
+	/* create threads */
+	for (k = 1; k < num_threads; k++) {
+		pthread_create(&data[k].handle, NULL, 
+		               (void*)stripe_deblock_h, (void*)&data[k]);
+	}
+#endif
+	stripe_deblock_h(&data[0]);
+
+#ifdef HAVE_PTHREAD
+	/* wait until all threads are finished */
+	for (k = 1; k < num_threads; k++) {
+		pthread_join(data[k].handle, &status);
+	}
+#endif
+
+	/* vertical deblocking, dispatch threads */
+	for (k = 0; k < num_threads; k++) {
+		data[k].start_y = (k*mb_height / num_threads)*2;
+		data[k].stop_y = ((k+1)*mb_height / num_threads)*2;
+		data[k].stop_x = mb_width*2;
+	}
+
+#ifdef HAVE_PTHREAD
+	/* create threads */
+	for (k = 1; k < num_threads; k++) {
+		pthread_create(&data[k].handle, NULL, 
+		               (void*)stripe_deblock_v, (void*)&data[k]);
+	}
+#endif
+	stripe_deblock_v(&data[0]);
+
+#ifdef HAVE_PTHREAD
+	/* wait until all threads are finished */
+	for (k = 1; k < num_threads; k++) {
+		pthread_join(data[k].handle, &status);
+	}
+#endif
+
+	if (!bvop)
+		tbls->prev_quant = mbs->quant;
 
 	if ((flags & XVID_FILMEFFECT))
 	{
-		add_noise(img->y, img->y, edged_width, mb_width*16, mb_height*16, frame_num % 3);
+		add_noise(tbls, img->y, img->y, edged_width, mb_width*16,
+				  mb_height*16, frame_num % 3, tbls->prev_quant);
+	}
+
+	if (brightness != 0) {
+		image_brightness(img->y, edged_width, mb_width*16, mb_height*16, brightness);
 	}
 }
 
 /******************************************************************************/
 
-static int8_t xvid_thresh_tbl[510];
-static int8_t xvid_abs_tbl[510];
-
-void init_deblock(void)
+void init_deblock(XVID_POSTPROC *tbls)
 {
 	int i;
 
 	for(i = -255; i < 256; i++) {
-		xvid_thresh_tbl[i + 255] = 0;
+		tbls->xvid_thresh_tbl[i + 255] = 0;
 		if(ABS(i) < THR1)
-			xvid_thresh_tbl[i + 255] = 1;
-		xvid_abs_tbl[i + 255] = ABS(i);
+			tbls->xvid_thresh_tbl[i + 255] = 1;
+		tbls->xvid_abs_tbl[i + 255] = ABS(i);
 	}
 }
 
@@ -150,30 +248,39 @@
 		s[8] = *(v[8] = img + x*stride + 3); \
 		s[9] = *(v[9] = img + x*stride + 4);
 
+#define APPLY_DERING(x) \
+		*v[x] = (e[x] == 0) ? (			\
+			(e[x-1] == 0) ? (			\
+			(e[x+1] == 0) ? 			\
+			((s[x-1]+s[x]*2+s[x+1])>>2)	\
+			: ((s[x-1]+s[x])>>1) )		\
+			: ((s[x]+s[x+1])>>1) )		\
+			: s[x];	
+
 #define APPLY_FILTER_CORE \
 		/* First, decide whether to use default or DC-offset mode */ \
 		\
 		eq_cnt = 0; \
 		\
-		eq_cnt += xvid_thresh_tbl[s[0] - s[1] + 255]; \
-		eq_cnt += xvid_thresh_tbl[s[1] - s[2] + 255]; \
-		eq_cnt += xvid_thresh_tbl[s[2] - s[3] + 255]; \
-		eq_cnt += xvid_thresh_tbl[s[3] - s[4] + 255]; \
-		eq_cnt += xvid_thresh_tbl[s[4] - s[5] + 255]; \
-		eq_cnt += xvid_thresh_tbl[s[5] - s[6] + 255]; \
-		eq_cnt += xvid_thresh_tbl[s[6] - s[7] + 255]; \
-		eq_cnt += xvid_thresh_tbl[s[7] - s[8] + 255]; \
+		eq_cnt += tbls->xvid_thresh_tbl[s[0] - s[1] + 255]; \
+		eq_cnt += tbls->xvid_thresh_tbl[s[1] - s[2] + 255]; \
+		eq_cnt += tbls->xvid_thresh_tbl[s[2] - s[3] + 255]; \
+		eq_cnt += tbls->xvid_thresh_tbl[s[3] - s[4] + 255]; \
+		eq_cnt += tbls->xvid_thresh_tbl[s[4] - s[5] + 255]; \
+		eq_cnt += tbls->xvid_thresh_tbl[s[5] - s[6] + 255]; \
+		eq_cnt += tbls->xvid_thresh_tbl[s[6] - s[7] + 255]; \
+		eq_cnt += tbls->xvid_thresh_tbl[s[7] - s[8] + 255]; \
 		\
 		if(eq_cnt < THR2) { /* Default mode */  \
 			int a30, a31, a32;					\
 			int diff, limit;					\
 												\
-			if(xvid_abs_tbl[(s[4] - s[5]) + 255] < quant) {			\
+			if(tbls->xvid_abs_tbl[(s[4] - s[5]) + 255] < quant) {			\
 				a30 = ((s[3]<<1) - s[4] * 5 + s[5] * 5 - (s[6]<<1));	\
 				a31 = ((s[1]<<1) - s[2] * 5 + s[3] * 5 - (s[4]<<1));	\
 				a32 = ((s[5]<<1) - s[6] * 5 + s[7] * 5 - (s[8]<<1));	\
 																		\
-				diff = (5 * ((SIGN(a30) * MIN(xvid_abs_tbl[a30 + 255], MIN(xvid_abs_tbl[a31 + 255], xvid_abs_tbl[a32 + 255]))) - a30) + 32) >> 6;	\
+				diff = (5 * ((SIGN(a30) * MIN(FAST_ABS(a30), MIN(FAST_ABS(a31), FAST_ABS(a32)))) - a30) + 32) >> 6;	\
 				limit = (s[4] - s[5]) / 2;	\
 				\
 				if (limit > 0)				\
@@ -184,6 +291,36 @@
 				*v[4] -= diff;	\
 				*v[5] += diff;	\
 			}	\
+			if (dering) {	\
+				e[0] = (tbls->xvid_abs_tbl[(s[0] - s[1]) + 255] > quant + DERING_STRENGTH) ? 1 : 0;	\
+				e[1] = (tbls->xvid_abs_tbl[(s[1] - s[2]) + 255] > quant + DERING_STRENGTH) ? 1 : 0;	\
+				e[2] = (tbls->xvid_abs_tbl[(s[2] - s[3]) + 255] > quant + DERING_STRENGTH) ? 1 : 0;	\
+				e[3] = (tbls->xvid_abs_tbl[(s[3] - s[4]) + 255] > quant + DERING_STRENGTH) ? 1 : 0;	\
+				e[4] = (tbls->xvid_abs_tbl[(s[4] - s[5]) + 255] > quant + DERING_STRENGTH) ? 1 : 0;	\
+				e[5] = (tbls->xvid_abs_tbl[(s[5] - s[6]) + 255] > quant + DERING_STRENGTH) ? 1 : 0;	\
+				e[6] = (tbls->xvid_abs_tbl[(s[6] - s[7]) + 255] > quant + DERING_STRENGTH) ? 1 : 0;	\
+				e[7] = (tbls->xvid_abs_tbl[(s[7] - s[8]) + 255] > quant + DERING_STRENGTH) ? 1 : 0;	\
+				e[8] = (tbls->xvid_abs_tbl[(s[8] - s[9]) + 255] > quant + DERING_STRENGTH) ? 1 : 0;	\
+				\
+				e[1] |= e[0];	\
+				e[2] |= e[1];	\
+				e[3] |= e[2];	\
+				e[4] |= e[3];	\
+				e[5] |= e[4];	\
+				e[6] |= e[5];	\
+				e[7] |= e[6];	\
+				e[8] |= e[7];	\
+				e[9]  = e[8];	\
+				\
+				APPLY_DERING(1)	\
+				APPLY_DERING(2)	\
+				APPLY_DERING(3)	\
+				APPLY_DERING(4)	\
+				APPLY_DERING(5)	\
+				APPLY_DERING(6)	\
+				APPLY_DERING(7)	\
+				APPLY_DERING(8) \
+			}	\
 		}	\
 		else {	/* DC-offset mode */	\
 			uint8_t p0, p9;	\
@@ -196,8 +333,8 @@
 			if(((max-min)) < 2*quant) {	\
 										\
 				/* Choose edge pixels */	\
-				p0 = (xvid_abs_tbl[(s[1] - s[0]) + 255] < quant) ? s[0] : s[1];	\
-				p9 = (xvid_abs_tbl[(s[8] - s[9]) + 255] < quant) ? s[9] : s[8];	\
+				p0 = (tbls->xvid_abs_tbl[(s[1] - s[0]) + 255] < quant) ? s[0] : s[1];	\
+				p9 = (tbls->xvid_abs_tbl[(s[8] - s[9]) + 255] < quant) ? s[9] : s[8];	\
 																\
 				*v[1] = (uint8_t) ((6*p0 + (s[1]<<2) + (s[2]<<1) + (s[3]<<1) + s[4] + s[5] + 8) >> 4);	\
 				*v[2] = (uint8_t) (((p0<<2) + (s[1]<<1) + (s[2]<<2) + (s[3]<<1) + (s[4]<<1) + s[5] + s[6] + 8) >> 4);	\
@@ -210,11 +347,12 @@
 			}	\
 		}	
 
-void deblock8x8_h(uint8_t *img, int stride, int quant)
+void deblock8x8_h(XVID_POSTPROC *tbls, uint8_t *img, int stride, int quant, int dering)
 {
 	int eq_cnt;
 	uint8_t *v[10];
-	int32_t s[10];
+	int s[10];
+	int e[10];
 
 	LOAD_DATA_HOR(0)
 	APPLY_FILTER_CORE
@@ -242,11 +380,12 @@
 }
 
 
-void deblock8x8_v(uint8_t *img, int stride, int quant)
+void deblock8x8_v(XVID_POSTPROC *tbls, uint8_t *img, int stride, int quant, int dering)
 {
 	int eq_cnt;
 	uint8_t *v[10];
 	int s[10];
+	int e[10];
 
 	LOAD_DATA_VER(0)
 	APPLY_FILTER_CORE
@@ -280,18 +419,11 @@
  *																			  *
  ******************************************************************************/
 
-#define MAX_NOISE 4096
-#define MAX_SHIFT 1024
-#define MAX_RES (MAX_NOISE - MAX_SHIFT)
-
 #define RAND_N(range) ((int) ((double)range * rand() / (RAND_MAX + 1.0)))
+#define STRENGTH1 12
+#define STRENGTH2 8
 
-#define STRENGTH 13
-
-static int8_t xvid_noise[MAX_NOISE * sizeof(int8_t)];
-static int8_t *xvid_prev_shift[MAX_RES][3];
-
-void init_noise(void)
+void init_noise(XVID_POSTPROC *tbls)
 {
 	int i, j;
 	int patt[4] = { -1,0,1,0 };
@@ -302,7 +434,7 @@
 
 	for(i = 0, j = 0; i < MAX_NOISE; i++, j++)
 	{
-		double x1, x2, w, y1;
+		double x1, x2, w, y1, y2;
 		
 		do {
 			x1 = 2.0 * rand() / (float) RAND_MAX - 1.0;
@@ -312,10 +444,15 @@
 		
 		w = sqrt((-2.0 * log(w)) / w);
 		y1 = x1 * w;
-		y1 *= STRENGTH / sqrt(3.0);
+		y2 = x1 * w;
+
+		y1 *= STRENGTH1 / sqrt(3.0);
+		y2 *= STRENGTH2 / sqrt(3.0);
 
 	    y1 /= 2;
-	    y1 += patt[j%4] * STRENGTH * 0.35;
+		y2 /= 2;
+	    y1 += patt[j%4] * STRENGTH1 * 0.35;
+		y2 += patt[j%4] * STRENGTH2 * 0.35;
 
 		if (y1 < -128) {
 			y1=-128;
@@ -324,8 +461,17 @@
 			y1= 127;
 		}
 
+		if (y2 < -128) {
+			y2=-128;
+		}
+		else if (y2 > 127) {
+			y2= 127;
+		}
+
 		y1 /= 3.0;
-		xvid_noise[i] = (int) y1;
+		y2 /= 3.0;
+		tbls->xvid_noise1[i] = (int) y1;
+		tbls->xvid_noise2[i] = (int) y2;
 	
 		if (RAND_N(6) == 0) {
 			j--;
@@ -334,14 +480,17 @@
 	
 	for (i = 0; i < MAX_RES; i++)
 		for (j = 0; j < 3; j++) {
-			xvid_prev_shift[i][j] = xvid_noise + (rand() & (MAX_SHIFT - 1));
+			tbls->xvid_prev_shift[i][j] = tbls->xvid_noise1 + (rand() & (MAX_SHIFT - 1));
+			tbls->xvid_prev_shift[i][3 + j] = tbls->xvid_noise2 + (rand() & (MAX_SHIFT - 1));
 		}
 }
 
-void add_noise(uint8_t *dst, uint8_t *src, int stride, int width, int height, int shiftptr)
+void add_noise(XVID_POSTPROC *tbls, uint8_t *dst, uint8_t *src, int stride, int width, int height, int shiftptr, int quant)
 {
 	int x, y;
 	int shift = 0;
+	int add = (quant < 5) ? 3 : 0;
+	int8_t *noise = (quant < 5) ? tbls->xvid_noise2 : tbls->xvid_noise1;
 
 	for(y = 0; y < height; y++)
 	{
@@ -352,15 +501,30 @@
 		shift &= ~7;
 		for(x = 0; x < width; x++)
 		{
-			const int n = xvid_prev_shift[y][0][x] + xvid_prev_shift[y][1][x] + 
-				          xvid_prev_shift[y][2][x];
+			const int n = tbls->xvid_prev_shift[y][0 + add][x] + tbls->xvid_prev_shift[y][1 + add][x] + 
+				          tbls->xvid_prev_shift[y][2 + add][x];
 
 			dst[x] = src2[x] + ((n * src2[x]) >> 7);
 		}
 
-		xvid_prev_shift[y][shiftptr] = xvid_noise + shift;
+		tbls->xvid_prev_shift[y][shiftptr + add] = noise + shift;
 
 		dst += stride;
 		src += stride;
 	}
 }
+
+
+void image_brightness_c(uint8_t *dst, int stride, int width, int height, int offset)
+{
+	int x,y;
+
+	for(y = 0; y < height; y++)
+	{
+		for(x = 0; x < width; x++)
+		{
+			int p = dst[y*stride + x];
+			dst[y*stride + x] = CLIP( p + offset, 0, 255);
+		}
+	}
+}