--- plugin_ssim.c 2006/11/08 06:55:27 1.6 +++ plugin_ssim.c 2008/11/27 20:34:53 1.12 @@ -23,7 +23,7 @@ * ****************************************************************************/ -#include +#include #include #include #include "../portab.h" @@ -39,10 +39,20 @@ /*dev 1.0 gaussian weighting. the weight for the pixel x,y is w(x)*w(y)*/ static float mask8[8] = { - 0.0069815, 0.1402264, 1.0361408, 2.8165226, - 2.8165226, 1.0361408, 0.1402264, 0.0069815 + 0.0069815f, 0.1402264f, 1.0361408f, 2.8165226f, + 2.8165226f, 1.0361408f, 0.1402264f, 0.0069815f }; +/* integer version. Norm: coeffs sums up to 4096. + Define USE_INT_GAUSSIAN to use it as replacement to float version */ + +/* #define USE_INT_GAUSSIAN */ +static const uint16_t imask8[8] = { + 4, 72, 530, 1442, 1442, 530, 72, 4 +}; +#define GACCUM(X) ( ((X)+(1<<11)) >> 12 ) + + struct framestat_t{ int type; int quant; @@ -238,7 +248,22 @@ sum *=mask8[i]; mean += sum; } - return (int) mean + 0.5; + return (int) (mean + 0.5); +} + +int lum_8x8_gaussian_int(uint8_t* ptr, int stride){ + uint32_t mean; + int i,j; + mean = 0; + for(i=0;i<8;i++){ + uint32_t sum = 0; + for(j=0;j<8;j++) + sum += ptr[i*stride + j]*imask8[j]; + + sum = GACCUM(sum) * imask8[i]; + mean += sum; + } + return (int)GACCUM(mean); } /*calculate the difference between two blocks next to each other on a row*/ @@ -279,9 +304,43 @@ ptrc += str; } - *pdevo = (int) (devo - ((lumo*lumo + 32) >> 6)) + 0.5; - *pdevc = (int) (devc - ((lumc*lumc + 32) >> 6)) + 0.5; - *pcorr = (int) (corr - ((lumo*lumc + 32) >> 6)) + 0.5; + *pdevo = (int) ((devo - ((lumo*lumo + 32) >> 6)) + 0.5); + *pdevc = (int) ((devc - ((lumc*lumc + 32) >> 6)) + 0.5); + *pcorr = (int) ((corr - ((lumo*lumc + 32) >> 6)) + 0.5); +}; + +void consim_gaussian_int(uint8_t* ptro, uint8_t* ptrc, int stride, int lumo, int lumc, int* pdevo, int* pdevc, int* pcorr) +{ + unsigned int valo, valc,i,j,str; + uint32_t devo=0, devc=0, corr=0; + str = stride - 8; + for(i=0;i< 8;i++){ + uint32_t sumo = 0; + uint32_t sumc = 0; + uint32_t sumcorr = 0; + for(j=0;j< 8;j++){ + valo = *ptro; + valc = *ptrc; + sumo += valo*valo*imask8[j]; + sumc += valc*valc*imask8[j]; + sumcorr += valo*valc*imask8[j]; + ptro++; + ptrc++; + } + + devo += GACCUM(sumo)*imask8[i]; + devc += GACCUM(sumc)*imask8[i]; + corr += GACCUM(sumcorr)*imask8[i]; + ptro += str; + ptrc += str; + } + + devo = GACCUM(devo); + devc = GACCUM(devc); + corr = GACCUM(corr); + *pdevo = (int) ((devo - ((lumo*lumo + 32) >> 6)) + 0.5); + *pdevc = (int) ((devc - ((lumc*lumc + 32) >> 6)) + 0.5); + *pcorr = (int) ((corr - ((lumo*lumc + 32) >> 6)) + 0.5); }; /*calculate contrast and correlation of the two blocks*/ @@ -309,10 +368,10 @@ /*calculate the final ssim value*/ static float calc_ssim(float meano, float meanc, float devo, float devc, float corr){ - static const float c1 = (0.01*255)*(0.01*255); - static const float c2 = (0.03*255)*(0.03*255); + static const float c1 = (0.01f*255)*(0.01f*255); + static const float c2 = (0.03f*255)*(0.03f*255); /*printf("meano: %f meanc: %f devo: %f devc: %f corr: %f\n",meano,meanc,devo,devc,corr);*/ - return ((2.0*meano*meanc + c1)*(corr/32.0 + c2))/((meano*meano + meanc*meanc + c1)*(devc/64.0 + devo/64.0 + c2)); + return ((2.0f*meano*meanc + c1)*(corr/32.0f + c2))/((meano*meano + meanc*meanc + c1)*(devc/64.0f + devo/64.0f + c2)); } static void ssim_after(xvid_plg_data_t* data, ssim_data_t* ssim){ @@ -407,13 +466,10 @@ static int ssim_create(xvid_plg_create_t* create, void** handle){ ssim_data_t* ssim; plg_ssim_param_t* param; - int cpu_flags; param = (plg_ssim_param_t*) malloc(sizeof(plg_ssim_param_t)); *param = *((plg_ssim_param_t*) create->param); ssim = (ssim_data_t*) malloc(sizeof(ssim_data_t)); - cpu_flags = check_cpu_features(); - ssim->func8x8 = lum_8x8_c; ssim->func2x8 = lum_2x8_c; ssim->consim = consim_c; @@ -422,23 +478,36 @@ ssim->grid = param->acc; -#if defined(ARCH_IS_IA32) - if((cpu_flags & XVID_CPU_MMX) && (param->acc > 0)){ - ssim->func8x8 = lum_8x8_mmx; - ssim->consim = consim_mmx; - } - if((cpu_flags & XVID_CPU_SSE2) && (param->acc > 0)){ - ssim->consim = consim_sse2; +#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) + { + int cpu_flags = (param->cpu_flags & XVID_CPU_FORCE) ? param->cpu_flags : check_cpu_features(); + + if((cpu_flags & XVID_CPU_MMX) && (param->acc > 0)){ + ssim->func8x8 = lum_8x8_mmx; + ssim->consim = consim_mmx; + } + if((cpu_flags & XVID_CPU_SSE2) && (param->acc > 0)){ + ssim->consim = consim_sse2; + } } #endif /*gaussian weigthing not implemented*/ +#if !defined(USE_INT_GAUSSIAN) if(ssim->grid == 0){ ssim->grid = 1; ssim->func8x8 = lum_8x8_gaussian; ssim->func2x8 = NULL; ssim->consim = consim_gaussian; } +#else + if(ssim->grid == 0){ + ssim->grid = 1; + ssim->func8x8 = lum_8x8_gaussian_int; + ssim->func2x8 = NULL; + ssim->consim = consim_gaussian_int; + } +#endif if(ssim->grid > 4) ssim->grid = 4; ssim->ssim_sum = 0.0;