108 |
const uint32_t stride, |
const uint32_t stride, |
109 |
const uint32_t rounding) |
const uint32_t rounding) |
110 |
{ |
{ |
111 |
uint32_t i, j; |
intptr_t j; |
112 |
|
|
113 |
for (j = 0; j < 8; j++) { |
if (rounding) |
114 |
for (i = 0; i < 8; i++) { |
for (j = 7*stride; j >= 0; j-=stride) |
115 |
|
{ |
116 |
int16_t tot = |
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1); |
117 |
(int32_t) src[j * stride + i] + (int32_t) src[j * stride + i + |
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1); |
118 |
1]; |
dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] )>>1); |
119 |
|
dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] )>>1); |
120 |
tot = (int32_t) ((tot + 1 - rounding) >> 1); |
dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] )>>1); |
121 |
dst[j * stride + i] = (uint8_t) tot; |
dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] )>>1); |
122 |
} |
dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] )>>1); |
123 |
|
dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] )>>1); |
124 |
|
} |
125 |
|
else |
126 |
|
for (j = 0; j < 8*stride; j+=stride) /* forward or backwards? Who knows ... */ |
127 |
|
{ |
128 |
|
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] + 1)>>1); |
129 |
|
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] + 1)>>1); |
130 |
|
dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] + 1)>>1); |
131 |
|
dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] + 1)>>1); |
132 |
|
dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] + 1)>>1); |
133 |
|
dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] + 1)>>1); |
134 |
|
dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] + 1)>>1); |
135 |
|
dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] + 1)>>1); |
136 |
} |
} |
137 |
} |
} |
138 |
|
|
144 |
const uint32_t stride, |
const uint32_t stride, |
145 |
const uint32_t rounding) |
const uint32_t rounding) |
146 |
{ |
{ |
147 |
uint32_t i, j; |
intptr_t j; |
148 |
|
|
|
for (j = 0; j < 8; j++) { |
|
|
for (i = 0; i < 8; i++) { |
|
|
int16_t tot = src[j * stride + i] + src[j * stride + i + stride]; |
|
149 |
|
|
150 |
tot = ((tot + 1 - rounding) >> 1); |
if (rounding) |
151 |
dst[j * stride + i] = (uint8_t) tot; |
for (j = 0; j < 8*stride; j+=stride) /* forward is better. Some automatic prefetch perhaps. */ |
152 |
} |
{ |
153 |
|
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] )>>1); |
154 |
|
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] )>>1); |
155 |
|
dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] )>>1); |
156 |
|
dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] )>>1); |
157 |
|
dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] )>>1); |
158 |
|
dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] )>>1); |
159 |
|
dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] )>>1); |
160 |
|
dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] )>>1); |
161 |
|
} |
162 |
|
else |
163 |
|
for (j = 0; j < 8*stride; j+=stride) |
164 |
|
{ |
165 |
|
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] + 1)>>1); |
166 |
|
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] + 1)>>1); |
167 |
|
dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] + 1)>>1); |
168 |
|
dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] + 1)>>1); |
169 |
|
dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] + 1)>>1); |
170 |
|
dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] + 1)>>1); |
171 |
|
dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] + 1)>>1); |
172 |
|
dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] + 1)>>1); |
173 |
} |
} |
174 |
} |
} |
175 |
|
|
180 |
const uint32_t stride, |
const uint32_t stride, |
181 |
const uint32_t rounding) |
const uint32_t rounding) |
182 |
{ |
{ |
183 |
uint32_t i, j; |
intptr_t j; |
184 |
|
|
185 |
for (j = 0; j < 8; j++) { |
if (rounding) |
186 |
for (i = 0; i < 8; i++) { |
for (j = 7*stride; j >= 0; j-=stride) |
187 |
int16_t tot = |
{ |
188 |
src[j * stride + i] + src[j * stride + i + 1] + |
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2); |
189 |
src[j * stride + i + stride] + src[j * stride + i + stride + |
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2); |
190 |
1]; |
dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2); |
191 |
tot = ((tot + 2 - rounding) >> 2); |
dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2); |
192 |
dst[j * stride + i] = (uint8_t) tot; |
dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2); |
193 |
} |
dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2); |
194 |
|
dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2); |
195 |
|
dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2); |
196 |
|
} |
197 |
|
else |
198 |
|
for (j = 7*stride; j >= 0; j-=stride) |
199 |
|
{ |
200 |
|
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2); |
201 |
|
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2); |
202 |
|
dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2); |
203 |
|
dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2); |
204 |
|
dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2); |
205 |
|
dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2); |
206 |
|
dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2); |
207 |
|
dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2); |
208 |
} |
} |
209 |
} |
} |
210 |
|
|