3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* - Postprocessing functions - |
* - Postprocessing functions - |
5 |
* |
* |
6 |
* Copyright(C) 2003 Michael Militzer <isibaar@xvid.org> |
* Copyright(C) 2003-2010 Michael Militzer <isibaar@xvid.org> |
7 |
|
* 2004 Marc Fauconneau |
8 |
* |
* |
9 |
* This program is free software ; you can redistribute it and/or modify |
* This program is free software ; you can redistribute it and/or modify |
10 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
54 |
} |
} |
55 |
|
|
56 |
void |
void |
57 |
image_postproc(XVID_POSTPROC *tbls, IMAGE * img, int edged_width, |
stripe_deblock_h(SMPDeblock *h) |
|
const MACROBLOCK * mbs, int mb_width, int mb_height, int mb_stride, |
|
|
int flags, int brightness, int frame_num, int bvop) |
|
58 |
{ |
{ |
59 |
const int edged_width2 = edged_width /2; |
const int stride = h->stride; |
60 |
|
const int stride2 = stride /2; |
61 |
|
|
62 |
int i,j; |
int i,j; |
63 |
int quant; |
int quant; |
64 |
|
|
65 |
/* luma: j,i in block units */ |
/* luma: j,i in block units */ |
66 |
if ((flags & XVID_DEBLOCKY)) |
if ((h->flags & XVID_DEBLOCKY)) |
67 |
{ |
{ |
68 |
for (j = 1; j < mb_height*2; j++) /* horizontal deblocking */ |
int dering = h->flags & XVID_DERINGY; |
69 |
for (i = 0; i < mb_width*2; i++) |
|
70 |
|
for (j = 1; j < h->stop_y; j++) /* horizontal luma deblocking */ |
71 |
|
for (i = h->start_x; i < h->stop_x; i++) |
72 |
{ |
{ |
73 |
quant = mbs[(j+0)/2*mb_stride + (i/2)].quant; |
quant = h->mbs[(j+0)/2*h->mb_stride + (i/2)].quant; |
74 |
deblock8x8_h(tbls, img->y + j*8*edged_width + i*8, edged_width, quant); |
deblock8x8_h(h->tbls, h->img->y + j*8*stride + i*8, stride, quant, dering); |
75 |
|
} |
76 |
} |
} |
77 |
|
|
78 |
for (j = 0; j < mb_height*2; j++) /* vertical deblocking */ |
/* chroma */ |
79 |
for (i = 1; i < mb_width*2; i++) |
if ((h->flags & XVID_DEBLOCKUV)) |
80 |
|
{ |
81 |
|
int dering = h->flags & XVID_DERINGUV; |
82 |
|
|
83 |
|
for (j = 1; j < h->stop_y/2; j++) /* horizontal deblocking */ |
84 |
|
for (i = h->start_x/2; i < h->stop_x/2; i++) |
85 |
{ |
{ |
86 |
quant = mbs[(j+0)/2*mb_stride + (i/2)].quant; |
quant = h->mbs[(j+0)*h->mb_stride + i].quant; |
87 |
deblock8x8_v(tbls, img->y + j*8*edged_width + i*8, edged_width, quant); |
deblock8x8_h(h->tbls, h->img->u + j*8*stride2 + i*8, stride2, quant, dering); |
88 |
|
deblock8x8_h(h->tbls, h->img->v + j*8*stride2 + i*8, stride2, quant, dering); |
89 |
|
} |
90 |
} |
} |
91 |
} |
} |
92 |
|
|
93 |
|
void |
94 |
|
stripe_deblock_v(SMPDeblock *h) |
95 |
|
{ |
96 |
|
const int stride = h->stride; |
97 |
|
const int stride2 = stride /2; |
98 |
|
|
99 |
|
int i,j; |
100 |
|
int quant; |
101 |
|
|
102 |
|
/* luma: j,i in block units */ |
103 |
|
if ((h->flags & XVID_DEBLOCKY)) |
104 |
|
{ |
105 |
|
int dering = h->flags & XVID_DERINGY; |
106 |
|
|
107 |
|
for (j = h->start_y; j < h->stop_y; j++) /* vertical deblocking */ |
108 |
|
for (i = 1; i < h->stop_x; i++) |
109 |
|
{ |
110 |
|
quant = h->mbs[(j+0)/2*h->mb_stride + (i/2)].quant; |
111 |
|
deblock8x8_v(h->tbls, h->img->y + j*8*stride + i*8, stride, quant, dering); |
112 |
|
} |
113 |
|
} |
114 |
|
|
115 |
/* chroma */ |
/* chroma */ |
116 |
if ((flags & XVID_DEBLOCKUV)) |
if ((h->flags & XVID_DEBLOCKUV)) |
117 |
{ |
{ |
118 |
for (j = 1; j < mb_height; j++) /* horizontal deblocking */ |
int dering = h->flags & XVID_DERINGUV; |
119 |
for (i = 0; i < mb_width; i++) |
|
120 |
|
for (j = h->start_y/2; j < h->stop_y/2; j++) /* vertical deblocking */ |
121 |
|
for (i = 1; i < h->stop_x/2; i++) |
122 |
{ |
{ |
123 |
quant = mbs[(j+0)*mb_stride + i].quant; |
quant = h->mbs[(j+0)*h->mb_stride + i].quant; |
124 |
deblock8x8_h(tbls, img->u + j*8*edged_width2 + i*8, edged_width2, quant); |
deblock8x8_v(h->tbls, h->img->u + j*8*stride2 + i*8, stride2, quant, dering); |
125 |
deblock8x8_h(tbls, img->v + j*8*edged_width2 + i*8, edged_width2, quant); |
deblock8x8_v(h->tbls, h->img->v + j*8*stride2 + i*8, stride2, quant, dering); |
126 |
|
} |
127 |
|
} |
128 |
} |
} |
129 |
|
|
130 |
for (j = 0; j < mb_height; j++) /* vertical deblocking */ |
void |
131 |
for (i = 1; i < mb_width; i++) |
image_postproc(XVID_POSTPROC *tbls, IMAGE * img, int edged_width, |
132 |
|
const MACROBLOCK * mbs, int mb_width, int mb_height, int mb_stride, |
133 |
|
int flags, int brightness, int frame_num, int bvop, int threads) |
134 |
{ |
{ |
135 |
quant = mbs[(j+0)*mb_stride + i].quant; |
int k, num_threads = MAX(1, MIN(threads, 4)); |
136 |
deblock8x8_v(tbls, img->u + j*8*edged_width2 + i*8, edged_width2, quant); |
SMPDeblock data[4]; |
137 |
deblock8x8_v(tbls, img->v + j*8*edged_width2 + i*8, edged_width2, quant); |
void *status = NULL; |
138 |
|
|
139 |
|
/* horizontal deblocking, dispatch threads */ |
140 |
|
for (k = 0; k < num_threads; k++) { |
141 |
|
data[k].flags = flags; |
142 |
|
data[k].img = img; |
143 |
|
data[k].mb_stride = mb_stride; |
144 |
|
data[k].mbs = mbs; |
145 |
|
data[k].stride = edged_width; |
146 |
|
data[k].tbls = tbls; |
147 |
|
|
148 |
|
data[k].start_x = (k*mb_width / num_threads)*2; |
149 |
|
data[k].stop_x = ((k+1)*mb_width / num_threads)*2; |
150 |
|
|
151 |
|
data[k].stop_y = mb_height*2; |
152 |
|
} |
153 |
|
|
154 |
|
/* create threads */ |
155 |
|
for (k = 1; k < num_threads; k++) { |
156 |
|
pthread_create(&data[k].handle, NULL, |
157 |
|
(void*)stripe_deblock_h, (void*)&data[k]); |
158 |
} |
} |
159 |
|
|
160 |
|
stripe_deblock_h(&data[0]); |
161 |
|
|
162 |
|
/* wait until all threads are finished */ |
163 |
|
for (k = 1; k < num_threads; k++) { |
164 |
|
pthread_join(data[k].handle, &status); |
165 |
} |
} |
166 |
|
|
167 |
|
|
168 |
|
/* vertical deblocking, dispatch threads */ |
169 |
|
for (k = 0; k < num_threads; k++) { |
170 |
|
data[k].start_y = (k*mb_height / num_threads)*2; |
171 |
|
data[k].stop_y = ((k+1)*mb_height / num_threads)*2; |
172 |
|
data[k].stop_x = mb_width*2; |
173 |
|
} |
174 |
|
|
175 |
|
/* create threads */ |
176 |
|
for (k = 1; k < num_threads; k++) { |
177 |
|
pthread_create(&data[k].handle, NULL, |
178 |
|
(void*)stripe_deblock_v, (void*)&data[k]); |
179 |
|
} |
180 |
|
|
181 |
|
stripe_deblock_v(&data[0]); |
182 |
|
|
183 |
|
/* wait until all threads are finished */ |
184 |
|
for (k = 1; k < num_threads; k++) { |
185 |
|
pthread_join(data[k].handle, &status); |
186 |
|
} |
187 |
|
|
188 |
|
|
189 |
if (!bvop) |
if (!bvop) |
190 |
tbls->prev_quant = mbs->quant; |
tbls->prev_quant = mbs->quant; |
191 |
|
|
240 |
s[8] = *(v[8] = img + x*stride + 3); \ |
s[8] = *(v[8] = img + x*stride + 3); \ |
241 |
s[9] = *(v[9] = img + x*stride + 4); |
s[9] = *(v[9] = img + x*stride + 4); |
242 |
|
|
243 |
|
#define APPLY_DERING(x) \ |
244 |
|
*v[x] = (e[x] == 0) ? ( \ |
245 |
|
(e[x-1] == 0) ? ( \ |
246 |
|
(e[x+1] == 0) ? \ |
247 |
|
((s[x-1]+s[x]*2+s[x+1])>>2) \ |
248 |
|
: ((s[x-1]+s[x])>>1) ) \ |
249 |
|
: ((s[x]+s[x+1])>>1) ) \ |
250 |
|
: s[x]; |
251 |
|
|
252 |
#define APPLY_FILTER_CORE \ |
#define APPLY_FILTER_CORE \ |
253 |
/* First, decide whether to use default or DC-offset mode */ \ |
/* First, decide whether to use default or DC-offset mode */ \ |
254 |
\ |
\ |
283 |
*v[4] -= diff; \ |
*v[4] -= diff; \ |
284 |
*v[5] += diff; \ |
*v[5] += diff; \ |
285 |
} \ |
} \ |
286 |
|
if (dering) { \ |
287 |
|
e[0] = (tbls->xvid_abs_tbl[(s[0] - s[1]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ |
288 |
|
e[1] = (tbls->xvid_abs_tbl[(s[1] - s[2]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ |
289 |
|
e[2] = (tbls->xvid_abs_tbl[(s[2] - s[3]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ |
290 |
|
e[3] = (tbls->xvid_abs_tbl[(s[3] - s[4]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ |
291 |
|
e[4] = (tbls->xvid_abs_tbl[(s[4] - s[5]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ |
292 |
|
e[5] = (tbls->xvid_abs_tbl[(s[5] - s[6]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ |
293 |
|
e[6] = (tbls->xvid_abs_tbl[(s[6] - s[7]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ |
294 |
|
e[7] = (tbls->xvid_abs_tbl[(s[7] - s[8]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ |
295 |
|
e[8] = (tbls->xvid_abs_tbl[(s[8] - s[9]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \ |
296 |
|
\ |
297 |
|
e[1] |= e[0]; \ |
298 |
|
e[2] |= e[1]; \ |
299 |
|
e[3] |= e[2]; \ |
300 |
|
e[4] |= e[3]; \ |
301 |
|
e[5] |= e[4]; \ |
302 |
|
e[6] |= e[5]; \ |
303 |
|
e[7] |= e[6]; \ |
304 |
|
e[8] |= e[7]; \ |
305 |
|
e[9] = e[8]; \ |
306 |
|
\ |
307 |
|
APPLY_DERING(1) \ |
308 |
|
APPLY_DERING(2) \ |
309 |
|
APPLY_DERING(3) \ |
310 |
|
APPLY_DERING(4) \ |
311 |
|
APPLY_DERING(5) \ |
312 |
|
APPLY_DERING(6) \ |
313 |
|
APPLY_DERING(7) \ |
314 |
|
APPLY_DERING(8) \ |
315 |
|
} \ |
316 |
} \ |
} \ |
317 |
else { /* DC-offset mode */ \ |
else { /* DC-offset mode */ \ |
318 |
uint8_t p0, p9; \ |
uint8_t p0, p9; \ |
339 |
} \ |
} \ |
340 |
} |
} |
341 |
|
|
342 |
void deblock8x8_h(XVID_POSTPROC *tbls, uint8_t *img, int stride, int quant) |
void deblock8x8_h(XVID_POSTPROC *tbls, uint8_t *img, int stride, int quant, int dering) |
343 |
{ |
{ |
344 |
int eq_cnt; |
int eq_cnt; |
345 |
uint8_t *v[10]; |
uint8_t *v[10]; |
346 |
int32_t s[10]; |
int s[10]; |
347 |
|
int e[10]; |
348 |
|
|
349 |
LOAD_DATA_HOR(0) |
LOAD_DATA_HOR(0) |
350 |
APPLY_FILTER_CORE |
APPLY_FILTER_CORE |
372 |
} |
} |
373 |
|
|
374 |
|
|
375 |
void deblock8x8_v(XVID_POSTPROC *tbls, uint8_t *img, int stride, int quant) |
void deblock8x8_v(XVID_POSTPROC *tbls, uint8_t *img, int stride, int quant, int dering) |
376 |
{ |
{ |
377 |
int eq_cnt; |
int eq_cnt; |
378 |
uint8_t *v[10]; |
uint8_t *v[10]; |
379 |
int s[10]; |
int s[10]; |
380 |
|
int e[10]; |
381 |
|
|
382 |
LOAD_DATA_VER(0) |
LOAD_DATA_VER(0) |
383 |
APPLY_FILTER_CORE |
APPLY_FILTER_CORE |
515 |
{ |
{ |
516 |
for(x = 0; x < width; x++) |
for(x = 0; x < width; x++) |
517 |
{ |
{ |
518 |
dst[y*stride + x] = CLIP( dst[y*stride + x] + offset, 0, 255); |
int p = dst[y*stride + x]; |
519 |
|
dst[y*stride + x] = CLIP( p + offset, 0, 255); |
520 |
} |
} |
521 |
} |
} |
522 |
} |
} |