--- postprocessing.c 2003/12/17 17:07:38 1.1.4.3 +++ postprocessing.c 2011/04/07 19:07:36 1.6.2.1 @@ -3,7 +3,8 @@ * XVID MPEG-4 VIDEO CODEC * - Postprocessing functions - * - * Copyright(C) 2003 Michael Militzer + * Copyright(C) 2003-2010 Michael Militzer + * 2004 Marc Fauconneau * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: postprocessing.c,v 1.1.4.3 2003/12/17 17:07:38 Isibaar Exp $ + * $Id: postprocessing.c,v 1.6.2.1 2011/04/07 19:07:36 Isibaar Exp $ * ****************************************************************************/ @@ -33,10 +34,9 @@ #include "../utils/emms.h" #include "postprocessing.h" -/* Filtering thresholds */ +/* function pointers */ +IMAGEBRIGHTNESS_PTR image_brightness; -#define THR1 2 -#define THR2 6 /* Some useful (and fast) macros Note that the MIN/MAX macros assume signed shift - if your compiler @@ -47,80 +47,178 @@ #define FAST_ABS(x) ((((int)(x)) >> 31) ^ ((int)(x))) - (((int)(x)) >> 31) #define ABS(X) (((X)>0)?(X):-(X)) -void init_postproc(void) +void init_postproc(XVID_POSTPROC *tbls) { - init_deblock(); - init_noise(); + init_deblock(tbls); + init_noise(tbls); } -void -image_postproc(IMAGE * img, int edged_width, - const MACROBLOCK * mbs, int mb_width, int mb_height, int mb_stride, - int flags, int frame_num) +void +stripe_deblock_h(SMPDeblock *h) { - const int edged_width2 = edged_width /2; + const int stride = h->stride; + const int stride2 = stride /2; + int i,j; int quant; /* luma: j,i in block units */ - if ((flags & XVID_DEBLOCKY)) + if ((h->flags & XVID_DEBLOCKY)) { - for (j = 1; j < mb_height*2; j++) /* horizontal deblocking */ - for (i = 0; i < mb_width*2; i++) + int dering = h->flags & XVID_DERINGY; + + for (j = 1; j < h->stop_y; j++) /* horizontal luma deblocking */ + for (i = h->start_x; i < h->stop_x; i++) { - quant = mbs[(j+0)/2*mb_stride + (i/2)].quant; - deblock8x8_h(img->y + j*8*edged_width + i*8, edged_width, quant); + quant = h->mbs[(j+0)/2*h->mb_stride + (i/2)].quant; + deblock8x8_h(h->tbls, h->img->y + j*8*stride + i*8, stride, quant, dering); } + } + + /* chroma */ + if ((h->flags & XVID_DEBLOCKUV)) + { + int dering = h->flags & XVID_DERINGUV; - for (j = 0; j < mb_height*2; j++) /* vertical deblocking */ - for (i = 1; i < mb_width*2; i++) + for (j = 1; j < h->stop_y/2; j++) /* horizontal deblocking */ + for (i = h->start_x/2; i < h->stop_x/2; i++) { - quant = mbs[(j+0)/2*mb_stride + (i/2)].quant; - deblock8x8_v(img->y + j*8*edged_width + i*8, edged_width, quant); + quant = h->mbs[(j+0)*h->mb_stride + i].quant; + deblock8x8_h(h->tbls, h->img->u + j*8*stride2 + i*8, stride2, quant, dering); + deblock8x8_h(h->tbls, h->img->v + j*8*stride2 + i*8, stride2, quant, dering); } } +} +void +stripe_deblock_v(SMPDeblock *h) +{ + const int stride = h->stride; + const int stride2 = stride /2; - /* chroma */ - if ((flags & XVID_DEBLOCKUV)) + int i,j; + int quant; + + /* luma: j,i in block units */ + if ((h->flags & XVID_DEBLOCKY)) { - for (j = 1; j < mb_height; j++) /* horizontal deblocking */ - for (i = 0; i < mb_width; i++) + int dering = h->flags & XVID_DERINGY; + + for (j = h->start_y; j < h->stop_y; j++) /* vertical deblocking */ + for (i = 1; i < h->stop_x; i++) { - quant = mbs[(j+0)*mb_stride + i].quant; - deblock8x8_h(img->u + j*8*edged_width2 + i*8, edged_width2, quant); - deblock8x8_h(img->v + j*8*edged_width2 + i*8, edged_width2, quant); + quant = h->mbs[(j+0)/2*h->mb_stride + (i/2)].quant; + deblock8x8_v(h->tbls, h->img->y + j*8*stride + i*8, stride, quant, dering); } + } + + /* chroma */ + if ((h->flags & XVID_DEBLOCKUV)) + { + int dering = h->flags & XVID_DERINGUV; - for (j = 0; j < mb_height; j++) /* vertical deblocking */ - for (i = 1; i < mb_width; i++) + for (j = h->start_y/2; j < h->stop_y/2; j++) /* vertical deblocking */ + for (i = 1; i < h->stop_x/2; i++) { - quant = mbs[(j+0)*mb_stride + i].quant; - deblock8x8_v(img->u + j*8*edged_width2 + i*8, edged_width2, quant); - deblock8x8_v(img->v + j*8*edged_width2 + i*8, edged_width2, quant); + quant = h->mbs[(j+0)*h->mb_stride + i].quant; + deblock8x8_v(h->tbls, h->img->u + j*8*stride2 + i*8, stride2, quant, dering); + deblock8x8_v(h->tbls, h->img->v + j*8*stride2 + i*8, stride2, quant, dering); } } +} + +void +image_postproc(XVID_POSTPROC *tbls, IMAGE * img, int edged_width, + const MACROBLOCK * mbs, int mb_width, int mb_height, int mb_stride, + int flags, int brightness, int frame_num, int bvop, int threads) +{ + int k; +#ifndef HAVE_PTHREAD + int num_threads = 1; +#else + int num_threads = MAX(1, MIN(threads, 4)); + void *status = NULL; +#endif + SMPDeblock data[4]; + + /* horizontal deblocking, dispatch threads */ + for (k = 0; k < num_threads; k++) { + data[k].flags = flags; + data[k].img = img; + data[k].mb_stride = mb_stride; + data[k].mbs = mbs; + data[k].stride = edged_width; + data[k].tbls = tbls; + + data[k].start_x = (k*mb_width / num_threads)*2; + data[k].stop_x = ((k+1)*mb_width / num_threads)*2; + + data[k].stop_y = mb_height*2; + } +#ifdef HAVE_PTHREAD + /* create threads */ + for (k = 1; k < num_threads; k++) { + pthread_create(&data[k].handle, NULL, + (void*)stripe_deblock_h, (void*)&data[k]); + } +#endif + stripe_deblock_h(&data[0]); + +#ifdef HAVE_PTHREAD + /* wait until all threads are finished */ + for (k = 1; k < num_threads; k++) { + pthread_join(data[k].handle, &status); + } +#endif + + /* vertical deblocking, dispatch threads */ + for (k = 0; k < num_threads; k++) { + data[k].start_y = (k*mb_height / num_threads)*2; + data[k].stop_y = ((k+1)*mb_height / num_threads)*2; + data[k].stop_x = mb_width*2; + } + +#ifdef HAVE_PTHREAD + /* create threads */ + for (k = 1; k < num_threads; k++) { + pthread_create(&data[k].handle, NULL, + (void*)stripe_deblock_v, (void*)&data[k]); + } +#endif + stripe_deblock_v(&data[0]); + +#ifdef HAVE_PTHREAD + /* wait until all threads are finished */ + for (k = 1; k < num_threads; k++) { + pthread_join(data[k].handle, &status); + } +#endif + + if (!bvop) + tbls->prev_quant = mbs->quant; if ((flags & XVID_FILMEFFECT)) { - add_noise(img->y, img->y, edged_width, mb_width*16, mb_height*16, frame_num % 3); + add_noise(tbls, img->y, img->y, edged_width, mb_width*16, + mb_height*16, frame_num % 3, tbls->prev_quant); + } + + if (brightness != 0) { + image_brightness(img->y, edged_width, mb_width*16, mb_height*16, brightness); } } /******************************************************************************/ -static int8_t xvid_thresh_tbl[510]; -static int8_t xvid_abs_tbl[510]; - -void init_deblock(void) +void init_deblock(XVID_POSTPROC *tbls) { int i; for(i = -255; i < 256; i++) { - xvid_thresh_tbl[i + 255] = 0; + tbls->xvid_thresh_tbl[i + 255] = 0; if(ABS(i) < THR1) - xvid_thresh_tbl[i + 255] = 1; - xvid_abs_tbl[i + 255] = ABS(i); + tbls->xvid_thresh_tbl[i + 255] = 1; + tbls->xvid_abs_tbl[i + 255] = ABS(i); } } @@ -150,30 +248,39 @@ s[8] = *(v[8] = img + x*stride + 3); \ s[9] = *(v[9] = img + x*stride + 4); +#define APPLY_DERING(x) \ + *v[x] = (e[x] == 0) ? ( \ + (e[x-1] == 0) ? ( \ + (e[x+1] == 0) ? \ + ((s[x-1]+s[x]*2+s[x+1])>>2) \ + : ((s[x-1]+s[x])>>1) ) \ + : ((s[x]+s[x+1])>>1) ) \ + : s[x]; + #define APPLY_FILTER_CORE \ /* First, decide whether to use default or DC-offset mode */ \ \ eq_cnt = 0; \ \ - eq_cnt += xvid_thresh_tbl[s[0] - s[1] + 255]; \ - eq_cnt += xvid_thresh_tbl[s[1] - s[2] + 255]; \ - eq_cnt += xvid_thresh_tbl[s[2] - s[3] + 255]; \ - eq_cnt += xvid_thresh_tbl[s[3] - s[4] + 255]; \ - eq_cnt += xvid_thresh_tbl[s[4] - s[5] + 255]; \ - eq_cnt += xvid_thresh_tbl[s[5] - s[6] + 255]; \ - eq_cnt += xvid_thresh_tbl[s[6] - s[7] + 255]; \ - eq_cnt += xvid_thresh_tbl[s[7] - s[8] + 255]; \ + eq_cnt += tbls->xvid_thresh_tbl[s[0] - s[1] + 255]; \ + eq_cnt += tbls->xvid_thresh_tbl[s[1] - s[2] + 255]; \ + eq_cnt += tbls->xvid_thresh_tbl[s[2] - s[3] + 255]; \ + eq_cnt += tbls->xvid_thresh_tbl[s[3] - s[4] + 255]; \ + eq_cnt += tbls->xvid_thresh_tbl[s[4] - s[5] + 255]; \ + eq_cnt += tbls->xvid_thresh_tbl[s[5] - s[6] + 255]; \ + eq_cnt += tbls->xvid_thresh_tbl[s[6] - s[7] + 255]; \ + eq_cnt += tbls->xvid_thresh_tbl[s[7] - s[8] + 255]; \ \ if(eq_cnt < THR2) { /* Default mode */ \ int a30, a31, a32; \ int diff, limit; \ \ - if(xvid_abs_tbl[(s[4] - s[5]) + 255] < quant) { \ + if(tbls->xvid_abs_tbl[(s[4] - s[5]) + 255] < quant) { \ a30 = ((s[3]<<1) - s[4] * 5 + s[5] * 5 - (s[6]<<1)); \ a31 = ((s[1]<<1) - s[2] * 5 + s[3] * 5 - (s[4]<<1)); \ a32 = ((s[5]<<1) - s[6] * 5 + s[7] * 5 - (s[8]<<1)); \ \ - diff = (5 * ((SIGN(a30) * MIN(xvid_abs_tbl[a30 + 255], MIN(xvid_abs_tbl[a31 + 255], xvid_abs_tbl[a32 + 255]))) - a30) + 32) >> 6; \ + diff = (5 * ((SIGN(a30) * MIN(FAST_ABS(a30), MIN(FAST_ABS(a31), FAST_ABS(a32)))) - a30) + 32) >> 6; \ limit = (s[4] - s[5]) / 2; \ \ if (limit > 0) \ @@ -184,6 +291,36 @@ *v[4] -= diff; \ *v[5] += diff; \ } \ + if (dering) { \ + e[0] = (tbls->xvid_abs_tbl[(s[0] - s[1]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ + e[1] = (tbls->xvid_abs_tbl[(s[1] - s[2]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ + e[2] = (tbls->xvid_abs_tbl[(s[2] - s[3]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ + e[3] = (tbls->xvid_abs_tbl[(s[3] - s[4]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ + e[4] = (tbls->xvid_abs_tbl[(s[4] - s[5]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ + e[5] = (tbls->xvid_abs_tbl[(s[5] - s[6]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ + e[6] = (tbls->xvid_abs_tbl[(s[6] - s[7]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ + e[7] = (tbls->xvid_abs_tbl[(s[7] - s[8]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ + e[8] = (tbls->xvid_abs_tbl[(s[8] - s[9]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ + \ + e[1] |= e[0]; \ + e[2] |= e[1]; \ + e[3] |= e[2]; \ + e[4] |= e[3]; \ + e[5] |= e[4]; \ + e[6] |= e[5]; \ + e[7] |= e[6]; \ + e[8] |= e[7]; \ + e[9] = e[8]; \ + \ + APPLY_DERING(1) \ + APPLY_DERING(2) \ + APPLY_DERING(3) \ + APPLY_DERING(4) \ + APPLY_DERING(5) \ + APPLY_DERING(6) \ + APPLY_DERING(7) \ + APPLY_DERING(8) \ + } \ } \ else { /* DC-offset mode */ \ uint8_t p0, p9; \ @@ -196,8 +333,8 @@ if(((max-min)) < 2*quant) { \ \ /* Choose edge pixels */ \ - p0 = (xvid_abs_tbl[(s[1] - s[0]) + 255] < quant) ? s[0] : s[1]; \ - p9 = (xvid_abs_tbl[(s[8] - s[9]) + 255] < quant) ? s[9] : s[8]; \ + p0 = (tbls->xvid_abs_tbl[(s[1] - s[0]) + 255] < quant) ? s[0] : s[1]; \ + p9 = (tbls->xvid_abs_tbl[(s[8] - s[9]) + 255] < quant) ? s[9] : s[8]; \ \ *v[1] = (uint8_t) ((6*p0 + (s[1]<<2) + (s[2]<<1) + (s[3]<<1) + s[4] + s[5] + 8) >> 4); \ *v[2] = (uint8_t) (((p0<<2) + (s[1]<<1) + (s[2]<<2) + (s[3]<<1) + (s[4]<<1) + s[5] + s[6] + 8) >> 4); \ @@ -210,11 +347,12 @@ } \ } -void deblock8x8_h(uint8_t *img, int stride, int quant) +void deblock8x8_h(XVID_POSTPROC *tbls, uint8_t *img, int stride, int quant, int dering) { int eq_cnt; uint8_t *v[10]; - int32_t s[10]; + int s[10]; + int e[10]; LOAD_DATA_HOR(0) APPLY_FILTER_CORE @@ -242,11 +380,12 @@ } -void deblock8x8_v(uint8_t *img, int stride, int quant) +void deblock8x8_v(XVID_POSTPROC *tbls, uint8_t *img, int stride, int quant, int dering) { int eq_cnt; uint8_t *v[10]; int s[10]; + int e[10]; LOAD_DATA_VER(0) APPLY_FILTER_CORE @@ -280,18 +419,11 @@ * * ******************************************************************************/ -#define MAX_NOISE 4096 -#define MAX_SHIFT 1024 -#define MAX_RES (MAX_NOISE - MAX_SHIFT) - #define RAND_N(range) ((int) ((double)range * rand() / (RAND_MAX + 1.0))) +#define STRENGTH1 12 +#define STRENGTH2 8 -#define STRENGTH 13 - -static int8_t xvid_noise[MAX_NOISE * sizeof(int8_t)]; -static int8_t *xvid_prev_shift[MAX_RES][3]; - -void init_noise(void) +void init_noise(XVID_POSTPROC *tbls) { int i, j; int patt[4] = { -1,0,1,0 }; @@ -302,7 +434,7 @@ for(i = 0, j = 0; i < MAX_NOISE; i++, j++) { - double x1, x2, w, y1; + double x1, x2, w, y1, y2; do { x1 = 2.0 * rand() / (float) RAND_MAX - 1.0; @@ -312,10 +444,15 @@ w = sqrt((-2.0 * log(w)) / w); y1 = x1 * w; - y1 *= STRENGTH / sqrt(3.0); + y2 = x1 * w; + + y1 *= STRENGTH1 / sqrt(3.0); + y2 *= STRENGTH2 / sqrt(3.0); y1 /= 2; - y1 += patt[j%4] * STRENGTH * 0.35; + y2 /= 2; + y1 += patt[j%4] * STRENGTH1 * 0.35; + y2 += patt[j%4] * STRENGTH2 * 0.35; if (y1 < -128) { y1=-128; @@ -324,8 +461,17 @@ y1= 127; } + if (y2 < -128) { + y2=-128; + } + else if (y2 > 127) { + y2= 127; + } + y1 /= 3.0; - xvid_noise[i] = (int) y1; + y2 /= 3.0; + tbls->xvid_noise1[i] = (int) y1; + tbls->xvid_noise2[i] = (int) y2; if (RAND_N(6) == 0) { j--; @@ -334,14 +480,17 @@ for (i = 0; i < MAX_RES; i++) for (j = 0; j < 3; j++) { - xvid_prev_shift[i][j] = xvid_noise + (rand() & (MAX_SHIFT - 1)); + tbls->xvid_prev_shift[i][j] = tbls->xvid_noise1 + (rand() & (MAX_SHIFT - 1)); + tbls->xvid_prev_shift[i][3 + j] = tbls->xvid_noise2 + (rand() & (MAX_SHIFT - 1)); } } -void add_noise(uint8_t *dst, uint8_t *src, int stride, int width, int height, int shiftptr) +void add_noise(XVID_POSTPROC *tbls, uint8_t *dst, uint8_t *src, int stride, int width, int height, int shiftptr, int quant) { int x, y; int shift = 0; + int add = (quant < 5) ? 3 : 0; + int8_t *noise = (quant < 5) ? tbls->xvid_noise2 : tbls->xvid_noise1; for(y = 0; y < height; y++) { @@ -352,15 +501,30 @@ shift &= ~7; for(x = 0; x < width; x++) { - const int n = xvid_prev_shift[y][0][x] + xvid_prev_shift[y][1][x] + - xvid_prev_shift[y][2][x]; + const int n = tbls->xvid_prev_shift[y][0 + add][x] + tbls->xvid_prev_shift[y][1 + add][x] + + tbls->xvid_prev_shift[y][2 + add][x]; dst[x] = src2[x] + ((n * src2[x]) >> 7); } - xvid_prev_shift[y][shiftptr] = xvid_noise + shift; + tbls->xvid_prev_shift[y][shiftptr + add] = noise + shift; dst += stride; src += stride; } } + + +void image_brightness_c(uint8_t *dst, int stride, int width, int height, int offset) +{ + int x,y; + + for(y = 0; y < height; y++) + { + for(x = 0; x < width; x++) + { + int p = dst[y*stride + x]; + dst[y*stride + x] = CLIP( p + offset, 0, 255); + } + } +}