48 |
|
|
49 |
typedef struct { |
typedef struct { |
50 |
|
|
51 |
int pixels; /* width*height */ |
uint64_t mse_sum_y; /* for avrg psnr-hvs-m */ |
52 |
uint64_t mse_sum; /* for avrg psnrhvsm */ |
uint64_t mse_sum_u; |
53 |
|
uint64_t mse_sum_v; |
54 |
|
|
55 |
long frame_cnt; |
long frame_cnt; |
56 |
|
|
57 |
} psnrhvsm_data_t; /* internal plugin data */ |
} psnrhvsm_data_t; /* internal plugin data */ |
78 |
0.019290f, 0.011815f, 0.011080f, 0.010412f, 0.007972f, 0.010000f, 0.009426f, 0.010203f |
0.019290f, 0.011815f, 0.011080f, 0.010412f, 0.007972f, 0.010000f, 0.009426f, 0.010203f |
79 |
}; |
}; |
80 |
|
|
81 |
#if 1 /* Floating-point implementation */ |
#if 0 /* Floating-point implementation */ |
82 |
|
|
83 |
static uint32_t Calc_MSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride) |
static uint32_t Calc_MSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride) |
84 |
{ |
{ |
87 |
uint32_t Local[8] = {0, 0, 0, 0, 0, 0, 0, 0}; |
uint32_t Local[8] = {0, 0, 0, 0, 0, 0, 0, 0}; |
88 |
uint32_t Local_Square[8] = {0, 0, 0, 0, 0, 0, 0, 0}; |
uint32_t Local_Square[8] = {0, 0, 0, 0, 0, 0, 0, 0}; |
89 |
float MASK_A = 0.f, MASK_B = 0.f; |
float MASK_A = 0.f, MASK_B = 0.f; |
90 |
|
float Mult1 = 1.f, Mult2 = 1.f; |
91 |
uint32_t MSE_H = 0; |
uint32_t MSE_H = 0; |
92 |
|
|
93 |
/* Step 1: Calculate CSF weighted energy of DCT coefficients */ |
/* Step 1: Calculate CSF weighted energy of DCT coefficients */ |
101 |
/* Step 2: Determine local variances compared to entire block variance */ |
/* Step 2: Determine local variances compared to entire block variance */ |
102 |
for (y = 0; y < 2; y++) { |
for (y = 0; y < 2; y++) { |
103 |
for (x = 0; x < 2; x++) { |
for (x = 0; x < 2; x++) { |
104 |
|
for (j = 0; j < 4; j++) { |
105 |
for (i = 0; i < 4; i++) { |
for (i = 0; i < 4; i++) { |
106 |
uint8_t A = IMG_A[y*4*stride + 4*x + i]; |
uint8_t A = IMG_A[(y*4+j)*stride + 4*x + i]; |
107 |
uint8_t B = IMG_B[y*4*stride + 4*x + i]; |
uint8_t B = IMG_B[(y*4+j)*stride + 4*x + i]; |
108 |
|
|
109 |
Local[y*2 + x] += A; |
Local[y*2 + x] += A; |
110 |
Local[y*2 + x + 4] += B; |
Local[y*2 + x + 4] += B; |
113 |
} |
} |
114 |
} |
} |
115 |
} |
} |
116 |
|
} |
117 |
|
|
118 |
Global_A = Local[0] + Local[1] + Local[2] + Local[3]; |
Global_A = Local[0] + Local[1] + Local[2] + Local[3]; |
119 |
Global_B = Local[4] + Local[5] + Local[6] + Local[7]; |
Global_B = Local[4] + Local[5] + Local[6] + Local[7]; |
128 |
Global_B = (Local_Square[4]<<6) - Global_B*Global_B; /* 64*Var(D) */ |
Global_B = (Local_Square[4]<<6) - Global_B*Global_B; /* 64*Var(D) */ |
129 |
|
|
130 |
/* Step 3: Calculate contrast masking threshold */ |
/* Step 3: Calculate contrast masking threshold */ |
131 |
MASK_A = (float)sqrt(MASK_A*((float)(Local[0]+Local[1]+Local[2]+Local[3])/((float)Global_A/4.f)))/32.f; |
if (Global_A) |
132 |
MASK_B = (float)sqrt(MASK_B*((float)(Local[4]+Local[5]+Local[6]+Local[7])/((float)Global_B/4.f)))/32.f; |
Mult1 = (float)(Local[0]+Local[1]+Local[2]+Local[3])/((float)(Global_A)/4.f); |
133 |
|
|
134 |
|
if (Global_B) |
135 |
|
Mult2 = (float)(Local[4]+Local[5]+Local[6]+Local[7])/((float)(Global_B)/4.f); |
136 |
|
|
137 |
|
MASK_A = (float)sqrt(MASK_A * Mult1) / 32.f; |
138 |
|
MASK_B = (float)sqrt(MASK_B * Mult2) / 32.f; |
139 |
|
|
140 |
if (MASK_B > MASK_A) MASK_A = MASK_B; /* MAX(MASK_A, MASK_B) */ |
if (MASK_B > MASK_A) MASK_A = MASK_B; /* MAX(MASK_A, MASK_B) */ |
141 |
|
|
157 |
return MSE_H; /* Fixed-point value right-shifted by eight */ |
return MSE_H; /* Fixed-point value right-shifted by eight */ |
158 |
} |
} |
159 |
|
|
160 |
#else /* First draft of a fixed-point implementation. |
#else |
161 |
|
|
162 |
|
/* First draft of a fixed-point implementation. |
163 |
Might serve as a template for MMX/SSE code */ |
Might serve as a template for MMX/SSE code */ |
164 |
|
|
165 |
static const uint16_t iMask_Coeff[64] = |
static const uint16_t iMask_Coeff[64] = |
195 |
366, 286, 277, 269, 235, 264, 256, 266 |
366, 286, 277, 269, 235, 264, 256, 266 |
196 |
}; |
}; |
197 |
|
|
198 |
|
static __inline uint32_t isqrt(unsigned long n) |
199 |
|
{ |
200 |
|
uint32_t c = 0x8000; |
201 |
|
uint32_t g = 0x8000; |
202 |
|
|
203 |
|
for(;;) { |
204 |
|
if(g*g > n) |
205 |
|
g ^= c; |
206 |
|
c >>= 1; |
207 |
|
if(c == 0) |
208 |
|
return g; |
209 |
|
g |= c; |
210 |
|
} |
211 |
|
} |
212 |
|
|
213 |
static uint32_t Calc_MSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride) |
static uint32_t Calc_MSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride) |
214 |
{ |
{ |
215 |
int x, y, i, j; |
int x, y, i, j; |
233 |
/* Step 2: Determine local variances compared to entire block variance */ |
/* Step 2: Determine local variances compared to entire block variance */ |
234 |
for (y = 0; y < 2; y++) { |
for (y = 0; y < 2; y++) { |
235 |
for (x = 0; x < 2; x++) { |
for (x = 0; x < 2; x++) { |
236 |
|
for (j = 0; j < 4; j++) { |
237 |
for (i = 0; i < 4; i++) { |
for (i = 0; i < 4; i++) { |
238 |
uint8_t A = IMG_A[y*4*stride + 4*x + i]; |
uint8_t A = IMG_A[(y*4+j)*stride + 4*x + i]; |
239 |
uint8_t B = IMG_B[y*4*stride + 4*x + i]; |
uint8_t B = IMG_B[(y*4+j)*stride + 4*x + i]; |
240 |
|
|
241 |
Local[y*2 + x] += A; |
Local[y*2 + x] += A; |
242 |
Local[y*2 + x + 4] += B; |
Local[y*2 + x + 4] += B; |
245 |
} |
} |
246 |
} |
} |
247 |
} |
} |
248 |
|
} |
249 |
|
|
250 |
Global_A = Local[0] + Local[1] + Local[2] + Local[3]; |
Global_A = Local[0] + Local[1] + Local[2] + Local[3]; |
251 |
Global_B = Local[4] + Local[5] + Local[6] + Local[7]; |
Global_B = Local[4] + Local[5] + Local[6] + Local[7]; |
261 |
|
|
262 |
/* Step 3: Calculate contrast masking threshold */ |
/* Step 3: Calculate contrast masking threshold */ |
263 |
{ |
{ |
264 |
float MASK_A, MASK_B; /* TODO */ |
uint32_t MASK_A, MASK_B; |
265 |
|
uint32_t Mult1 = 64, Mult2 = 64; |
266 |
|
|
267 |
|
if (Global_A) |
268 |
|
Mult1 = ((Local[0]+Local[1]+Local[2]+Local[3])<<8) / Global_A; |
269 |
|
|
270 |
MASK_A = (float)((double)(Sum_A + 4) / 8.); |
if (Global_B) |
271 |
MASK_B = (float)((double)(Sum_B + 4) / 8.); |
Mult2 = ((Local[4]+Local[5]+Local[6]+Local[7])<<8) / Global_B; |
272 |
|
|
273 |
MASK_A = sqrt(MASK_A*((float)(Local[0]+Local[1]+Local[2]+Local[3])/((float)Global_A/4.f)))/32.f; |
MASK_A = isqrt(2*Sum_A*Mult1) + 16; |
274 |
MASK_B = sqrt(MASK_B*((float)(Local[4]+Local[5]+Local[6]+Local[7])/((float)Global_B/4.f)))/32.f; |
MASK_B = isqrt(2*Sum_B*Mult2) + 16; |
275 |
|
|
276 |
if (MASK_B > MASK_A) /* MAX(MASK_A, MASK_B) */ |
if (MASK_B > MASK_A) /* MAX(MASK_A, MASK_B) */ |
277 |
MASK = (uint32_t) (MASK_B * 1024.f + 0.5f); |
MASK = (uint32_t) MASK_B; |
278 |
else |
else |
279 |
MASK = (uint32_t) (MASK_A * 1024.f + 0.5f); |
MASK = (uint32_t) MASK_A; |
280 |
} |
} |
281 |
|
|
282 |
/* Step 4: Calculate MSE of DCT coeffs reduced by masking effect */ |
/* Step 4: Calculate MSE of DCT coeffs reduced by masking effect */ |
306 |
static void psnrhvsm_after(xvid_plg_data_t *data, psnrhvsm_data_t *psnrhvsm) |
static void psnrhvsm_after(xvid_plg_data_t *data, psnrhvsm_data_t *psnrhvsm) |
307 |
{ |
{ |
308 |
DECLARE_ALIGNED_MATRIX(DCT, 2, 64, int16_t, CACHE_LINE); |
DECLARE_ALIGNED_MATRIX(DCT, 2, 64, int16_t, CACHE_LINE); |
309 |
int x, y, stride = data->original.stride[0]; |
int32_t x, y, u, v; |
310 |
int16_t *DCT_A = &DCT[0], *DCT_B = &DCT[64]; |
int16_t *DCT_A = &DCT[0], *DCT_B = &DCT[64]; |
311 |
|
uint64_t sse_y = 0, sse_u = 0, sse_v = 0; |
312 |
|
|
313 |
|
for (y = 0; y < data->height>>3; y++) { |
314 |
uint8_t *IMG_A = (uint8_t *) data->original.plane[0]; |
uint8_t *IMG_A = (uint8_t *) data->original.plane[0]; |
315 |
uint8_t *IMG_B = (uint8_t *) data->current.plane[0]; |
uint8_t *IMG_B = (uint8_t *) data->current.plane[0]; |
316 |
uint32_t MSE_H = 0; |
uint32_t stride = data->original.stride[0]; |
|
|
|
|
psnrhvsm->pixels = data->width * data->height; |
|
317 |
|
|
318 |
for (y = 0; y < data->height; y += 8) { |
for (x = 0; x < data->width>>3; x++) { /* non multiple of 8 handling ?? */ |
319 |
for (x = 0; x < data->width; x += 8) { |
int offset = (y<<3)*stride + (x<<3); |
|
int offset = y*stride + x; |
|
320 |
|
|
321 |
emms(); |
emms(); |
322 |
|
|
331 |
emms(); |
emms(); |
332 |
|
|
333 |
/* Calculate MSE_H reduced by contrast masking effect */ |
/* Calculate MSE_H reduced by contrast masking effect */ |
334 |
MSE_H += Calc_MSE_H(DCT_A, DCT_B, IMG_A + offset, IMG_B + offset, stride); |
sse_y += Calc_MSE_H(DCT_A, DCT_B, IMG_A + offset, IMG_B + offset, stride); |
335 |
|
} |
336 |
|
} |
337 |
|
|
338 |
|
for (y = 0; y < data->height>>4; y++) { |
339 |
|
uint8_t *U_A = (uint8_t *) data->original.plane[1]; |
340 |
|
uint8_t *V_A = (uint8_t *) data->original.plane[2]; |
341 |
|
uint8_t *U_B = (uint8_t *) data->current.plane[1]; |
342 |
|
uint8_t *V_B = (uint8_t *) data->current.plane[2]; |
343 |
|
uint32_t stride_uv = data->current.stride[1]; |
344 |
|
|
345 |
|
for (x = 0; x < data->width>>4; x++) { /* non multiple of 8 handling ?? */ |
346 |
|
int offset = (y<<3)*stride_uv + (x<<3); |
347 |
|
|
348 |
|
emms(); |
349 |
|
|
350 |
|
/* Transfer data */ |
351 |
|
transfer_8to16copy(DCT_A, U_A + offset, stride_uv); |
352 |
|
transfer_8to16copy(DCT_B, U_B + offset, stride_uv); |
353 |
|
|
354 |
|
/* Perform DCT */ |
355 |
|
fdct(DCT_A); |
356 |
|
fdct(DCT_B); |
357 |
|
|
358 |
|
emms(); |
359 |
|
|
360 |
|
/* Calculate MSE_H reduced by contrast masking effect */ |
361 |
|
sse_u += Calc_MSE_H(DCT_A, DCT_B, U_A + offset, U_B + offset, stride_uv); |
362 |
|
|
363 |
|
emms(); |
364 |
|
|
365 |
|
/* Transfer data */ |
366 |
|
transfer_8to16copy(DCT_A, V_A + offset, stride_uv); |
367 |
|
transfer_8to16copy(DCT_B, V_B + offset, stride_uv); |
368 |
|
|
369 |
|
/* Perform DCT */ |
370 |
|
fdct(DCT_A); |
371 |
|
fdct(DCT_B); |
372 |
|
|
373 |
|
emms(); |
374 |
|
|
375 |
|
/* Calculate MSE_H reduced by contrast masking effect */ |
376 |
|
sse_v += Calc_MSE_H(DCT_A, DCT_B, V_A + offset, V_B + offset, stride_uv); |
377 |
} |
} |
378 |
} |
} |
379 |
|
|
380 |
psnrhvsm->mse_sum += MSE_H; |
y = (int32_t) ( 4*sse_y / (data->width * data->height)); |
381 |
|
u = (int32_t) (16*sse_u / (data->width * data->height)); |
382 |
|
v = (int32_t) (16*sse_v / (data->width * data->height)); |
383 |
|
|
384 |
|
psnrhvsm->mse_sum_y += y; |
385 |
|
psnrhvsm->mse_sum_u += u; |
386 |
|
psnrhvsm->mse_sum_v += v; |
387 |
psnrhvsm->frame_cnt++; |
psnrhvsm->frame_cnt++; |
388 |
|
|
389 |
printf(" psnrhvsm: %2.2f\n", sse_to_PSNR(MSE_H, 256*psnrhvsm->pixels)); |
printf(" psnrhvsm y: %2.2f, psnrhvsm u: %2.2f, psnrhvsm v: %2.2f\n", sse_to_PSNR(y, 1024), sse_to_PSNR(u, 1024), sse_to_PSNR(v, 1024)); |
390 |
} |
} |
391 |
|
|
392 |
static int psnrhvsm_create(xvid_plg_create_t *create, void **handle) |
static int psnrhvsm_create(xvid_plg_create_t *create, void **handle) |
394 |
psnrhvsm_data_t *psnrhvsm; |
psnrhvsm_data_t *psnrhvsm; |
395 |
psnrhvsm = (psnrhvsm_data_t *) malloc(sizeof(psnrhvsm_data_t)); |
psnrhvsm = (psnrhvsm_data_t *) malloc(sizeof(psnrhvsm_data_t)); |
396 |
|
|
397 |
psnrhvsm->mse_sum = 0; |
psnrhvsm->mse_sum_y = 0; |
398 |
psnrhvsm->frame_cnt = 0; |
psnrhvsm->mse_sum_u = 0; |
399 |
|
psnrhvsm->mse_sum_v = 0; |
400 |
|
|
401 |
psnrhvsm->pixels = 0; |
psnrhvsm->frame_cnt = 0; |
402 |
|
|
403 |
*(handle) = (void*) psnrhvsm; |
*(handle) = (void*) psnrhvsm; |
404 |
return 0; |
return 0; |
421 |
break; |
break; |
422 |
case(XVID_PLG_DESTROY): |
case(XVID_PLG_DESTROY): |
423 |
{ |
{ |
424 |
uint32_t MSE_H; |
uint32_t y, u, v; |
425 |
psnrhvsm_data_t *psnrhvsm = (psnrhvsm_data_t *)handle; |
psnrhvsm_data_t *psnrhvsm = (psnrhvsm_data_t *)handle; |
426 |
|
|
427 |
if (psnrhvsm) { |
if (psnrhvsm) { |
428 |
MSE_H = (uint32_t) (psnrhvsm->mse_sum / psnrhvsm->frame_cnt); |
y = (uint32_t) (psnrhvsm->mse_sum_y / psnrhvsm->frame_cnt); |
429 |
|
u = (uint32_t) (psnrhvsm->mse_sum_u / psnrhvsm->frame_cnt); |
430 |
|
v = (uint32_t) (psnrhvsm->mse_sum_v / psnrhvsm->frame_cnt); |
431 |
|
|
432 |
emms(); |
emms(); |
433 |
printf("Average psnrhvsm: %2.2f\n", sse_to_PSNR(MSE_H, 256*psnrhvsm->pixels)); |
printf("Average psnrhvsm y: %2.2f, psnrhvsm u: %2.2f, psnrhvsm v: %2.2f\n", |
434 |
|
sse_to_PSNR(y, 1024), sse_to_PSNR(u, 1024), sse_to_PSNR(v, 1024)); |
435 |
free(psnrhvsm); |
free(psnrhvsm); |
436 |
} |
} |
437 |
} |
} |