872 |
DECLARE_ALIGNED_MATRIX(squares, 1, 4, uint32_t, CACHE_LINE); |
DECLARE_ALIGNED_MATRIX(squares, 1, 4, uint32_t, CACHE_LINE); |
873 |
|
|
874 |
int x, y, i, j; |
int x, y, i, j; |
875 |
uint8_t *orig = orig_image->y; |
uint8_t *orig_y = orig_image->y; |
876 |
|
uint8_t *orig_u = orig_image->u; |
877 |
|
uint8_t *orig_v = orig_image->v; |
878 |
|
|
879 |
for (y = 0; y < mb_height; y++) { |
for (y = 0; y < mb_height; y++) { |
880 |
for (x = 0; x < mb_width; x++) { |
for (x = 0; x < mb_width; x++) { |
882 |
uint32_t var4[4]; |
uint32_t var4[4]; |
883 |
uint32_t sum = 0, square = 0; |
uint32_t sum = 0, square = 0; |
884 |
|
|
885 |
|
/* y-blocks */ |
886 |
for (j = 0; j < 2; j++) { |
for (j = 0; j < 2; j++) { |
887 |
for (i = 0; i < 2; i++) { |
for (i = 0; i < 2; i++) { |
888 |
int lsum = blocksum8(orig + ((y<<4) + (j<<3))*stride + (x<<4) + (i<<3), |
int lsum = blocksum8(orig_y + ((y<<4) + (j<<3))*stride + (x<<4) + (i<<3), |
889 |
stride, sums, squares); |
stride, sums, squares); |
890 |
int lsquare = (squares[0] + squares[1] + squares[2] + squares[3]); |
int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6; |
891 |
|
|
892 |
sum += lsum; |
sum += lsum; |
893 |
square += lsquare; |
square += lsquare; |
898 |
var4[3] = (squares[3]<<4) - sums[3]*sums[3]; |
var4[3] = (squares[3]<<4) - sums[3]*sums[3]; |
899 |
|
|
900 |
pMB->rel_var8[j*2 + i] = lsquare - lsum*lsum; |
pMB->rel_var8[j*2 + i] = lsquare - lsum*lsum; |
|
|
|
901 |
if (pMB->rel_var8[j*2 + i]) |
if (pMB->rel_var8[j*2 + i]) |
902 |
pMB->rel_var8[j*2 + i] = 256*(var4[0] + var4[1] + var4[2] + var4[3]) / |
pMB->rel_var8[j*2 + i] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) / |
903 |
pMB->rel_var8[j*2 + i]; /* 4*(Var(Di)/Var(D)) */ |
pMB->rel_var8[j*2 + i]; /* 4*(Var(Di)/Var(D)) */ |
904 |
else |
else |
905 |
pMB->rel_var8[j*2 + i] = 64; |
pMB->rel_var8[j*2 + i] = 64; |
906 |
} |
} |
907 |
} |
} |
908 |
|
|
909 |
|
/* u */ |
910 |
|
{ |
911 |
|
int lsum = blocksum8(orig_u + (y<<3)*(stride>>1) + (x<<3), |
912 |
|
stride, sums, squares); |
913 |
|
int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6; |
914 |
|
|
915 |
|
sum += lsum; |
916 |
|
square += lsquare; |
917 |
|
|
918 |
|
var4[0] = (squares[0]<<4) - sums[0]*sums[0]; |
919 |
|
var4[1] = (squares[1]<<4) - sums[1]*sums[1]; |
920 |
|
var4[2] = (squares[2]<<4) - sums[2]*sums[2]; |
921 |
|
var4[3] = (squares[3]<<4) - sums[3]*sums[3]; |
922 |
|
|
923 |
|
pMB->rel_var8[4] = lsquare - lsum*lsum; |
924 |
|
if (pMB->rel_var8[4]) |
925 |
|
pMB->rel_var8[4] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) / |
926 |
|
pMB->rel_var8[4]; /* 4*(Var(Di)/Var(D)) */ |
927 |
|
else |
928 |
|
pMB->rel_var8[4] = 64; |
929 |
|
} |
930 |
|
|
931 |
|
/* v */ |
932 |
|
{ |
933 |
|
int lsum = blocksum8(orig_v + (y<<3)*(stride>>1) + (x<<3), |
934 |
|
stride, sums, squares); |
935 |
|
int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6; |
936 |
|
|
937 |
|
sum += lsum; |
938 |
|
square += lsquare; |
939 |
|
|
940 |
|
var4[0] = (squares[0]<<4) - sums[0]*sums[0]; |
941 |
|
var4[1] = (squares[1]<<4) - sums[1]*sums[1]; |
942 |
|
var4[2] = (squares[2]<<4) - sums[2]*sums[2]; |
943 |
|
var4[3] = (squares[3]<<4) - sums[3]*sums[3]; |
944 |
|
|
945 |
|
pMB->rel_var8[5] = lsquare - lsum*lsum; |
946 |
|
if (pMB->rel_var8[5]) |
947 |
|
pMB->rel_var8[5] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) / |
948 |
|
pMB->rel_var8[5]; /* 4*(Var(Di)/Var(D)) */ |
949 |
|
else |
950 |
|
pMB->rel_var8[5] = 64; |
951 |
|
} |
952 |
|
|
953 |
} |
} |
954 |
} |
} |
955 |
} |
} |