67 |
****************************************************************************/ |
****************************************************************************/ |
68 |
|
|
69 |
#define XVID_AUTO_INCLUDE |
#define XVID_AUTO_INCLUDE |
70 |
|
/* First auto include this file to generate reference code for SIMD versions |
71 |
|
* This set of functions are good for educational purpose, because they're |
72 |
|
* straightforward to understand, use loops and so on... But obviously they |
73 |
|
* sux when it comes to speed */ |
74 |
|
#define REFERENCE_CODE |
75 |
|
|
76 |
/* 16x? filters */ |
/* 16x? filters */ |
77 |
|
|
79 |
#define TABLE FIR_Tab_16 |
#define TABLE FIR_Tab_16 |
80 |
|
|
81 |
#define STORE(d,s) (d) = (s) |
#define STORE(d,s) (d) = (s) |
82 |
|
#define FUNC_H H_Pass_16_C_ref |
83 |
|
#define FUNC_V V_Pass_16_C_ref |
84 |
|
#define FUNC_HA H_Pass_Avrg_16_C_ref |
85 |
|
#define FUNC_VA V_Pass_Avrg_16_C_ref |
86 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C_ref |
87 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C_ref |
88 |
|
|
89 |
|
#include "qpel.c" /* self-include ourself */ |
90 |
|
|
91 |
|
/* note: B-frame always uses Rnd=0... */ |
92 |
|
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
93 |
|
#define FUNC_H H_Pass_16_Add_C_ref |
94 |
|
#define FUNC_V V_Pass_16_Add_C_ref |
95 |
|
#define FUNC_HA H_Pass_Avrg_16_Add_C_ref |
96 |
|
#define FUNC_VA V_Pass_Avrg_16_Add_C_ref |
97 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C_ref |
98 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C_ref |
99 |
|
|
100 |
|
#include "qpel.c" /* self-include ourself */ |
101 |
|
|
102 |
|
#undef SIZE |
103 |
|
#undef TABLE |
104 |
|
|
105 |
|
/* 8x? filters */ |
106 |
|
|
107 |
|
#define SIZE 8 |
108 |
|
#define TABLE FIR_Tab_8 |
109 |
|
|
110 |
|
#define STORE(d,s) (d) = (s) |
111 |
|
#define FUNC_H H_Pass_8_C_ref |
112 |
|
#define FUNC_V V_Pass_8_C_ref |
113 |
|
#define FUNC_HA H_Pass_Avrg_8_C_ref |
114 |
|
#define FUNC_VA V_Pass_Avrg_8_C_ref |
115 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C_ref |
116 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C_ref |
117 |
|
|
118 |
|
#include "qpel.c" /* self-include ourself */ |
119 |
|
|
120 |
|
/* note: B-frame always uses Rnd=0... */ |
121 |
|
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
122 |
|
#define FUNC_H H_Pass_8_Add_C_ref |
123 |
|
#define FUNC_V V_Pass_8_Add_C_ref |
124 |
|
#define FUNC_HA H_Pass_Avrg_8_Add_C_ref |
125 |
|
#define FUNC_VA V_Pass_Avrg_8_Add_C_ref |
126 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C_ref |
127 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C_ref |
128 |
|
|
129 |
|
#include "qpel.c" /* self-include ourself */ |
130 |
|
|
131 |
|
#undef SIZE |
132 |
|
#undef TABLE |
133 |
|
|
134 |
|
/* Then we define more optimized C version where loops are unrolled, where |
135 |
|
* FIR coeffcients are not read from memory but are hardcoded in instructions |
136 |
|
* They should be faster */ |
137 |
|
#undef REFERENCE_CODE |
138 |
|
|
139 |
|
/* 16x? filters */ |
140 |
|
|
141 |
|
#define SIZE 16 |
142 |
|
|
143 |
|
#define STORE(d,s) (d) = (s) |
144 |
#define FUNC_H H_Pass_16_C |
#define FUNC_H H_Pass_16_C |
145 |
#define FUNC_V V_Pass_16_C |
#define FUNC_V V_Pass_16_C |
146 |
#define FUNC_HA H_Pass_Avrg_16_C |
#define FUNC_HA H_Pass_Avrg_16_C |
148 |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C |
149 |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C |
150 |
|
|
151 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
152 |
|
|
153 |
/* note: B-frame always uses Rnd=0... */ |
/* note: B-frame always uses Rnd=0... */ |
154 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
159 |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C |
160 |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C |
161 |
|
|
162 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
163 |
|
|
164 |
#undef SIZE |
#undef SIZE |
165 |
#undef TABLE |
#undef TABLE |
177 |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C |
178 |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C |
179 |
|
|
180 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
181 |
|
|
182 |
/* note: B-frame always uses Rnd=0... */ |
/* note: B-frame always uses Rnd=0... */ |
183 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
188 |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C |
189 |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C |
190 |
|
|
191 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
192 |
|
|
193 |
#undef SIZE |
#undef SIZE |
194 |
#undef TABLE |
#undef TABLE |
|
|
|
195 |
#undef XVID_AUTO_INCLUDE |
#undef XVID_AUTO_INCLUDE |
196 |
|
|
197 |
/* general-purpose hooks |
/* Global scope hooks |
|
* TODO: embed in enc/dec structure? |
|
198 |
****************************************************************************/ |
****************************************************************************/ |
199 |
|
|
200 |
XVID_QP_FUNCS *xvid_QP_Funcs = 0; |
XVID_QP_FUNCS *xvid_QP_Funcs = 0; |
201 |
XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0; |
XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0; |
202 |
|
|
203 |
/* plain C impl. declaration |
/* Reference plain C impl. declaration |
204 |
* TODO: should be declared elsewhere? |
****************************************************************************/ |
205 |
|
|
206 |
|
XVID_QP_FUNCS xvid_QP_Funcs_C_ref = { |
207 |
|
H_Pass_16_C_ref, H_Pass_Avrg_16_C_ref, H_Pass_Avrg_Up_16_C_ref, |
208 |
|
V_Pass_16_C_ref, V_Pass_Avrg_16_C_ref, V_Pass_Avrg_Up_16_C_ref, |
209 |
|
|
210 |
|
H_Pass_8_C_ref, H_Pass_Avrg_8_C_ref, H_Pass_Avrg_Up_8_C_ref, |
211 |
|
V_Pass_8_C_ref, V_Pass_Avrg_8_C_ref, V_Pass_Avrg_Up_8_C_ref |
212 |
|
}; |
213 |
|
|
214 |
|
XVID_QP_FUNCS xvid_QP_Add_Funcs_C_ref = { |
215 |
|
H_Pass_16_Add_C_ref, H_Pass_Avrg_16_Add_C_ref, H_Pass_Avrg_Up_16_Add_C_ref, |
216 |
|
V_Pass_16_Add_C_ref, V_Pass_Avrg_16_Add_C_ref, V_Pass_Avrg_Up_16_Add_C_ref, |
217 |
|
|
218 |
|
H_Pass_8_Add_C_ref, H_Pass_Avrg_8_Add_C_ref, H_Pass_Avrg_Up_8_Add_C_ref, |
219 |
|
V_Pass_8_Add_C_ref, V_Pass_Avrg_8_Add_C_ref, V_Pass_Avrg_Up_8_Add_C_ref |
220 |
|
}; |
221 |
|
|
222 |
|
/* Plain C impl. declaration (faster than ref one) |
223 |
****************************************************************************/ |
****************************************************************************/ |
224 |
|
|
225 |
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
239 |
}; |
}; |
240 |
|
|
241 |
/* mmx impl. declaration (see. qpel_mmx.asm |
/* mmx impl. declaration (see. qpel_mmx.asm |
|
* TODO: should be declared elsewhere? |
|
242 |
****************************************************************************/ |
****************************************************************************/ |
243 |
|
|
244 |
#ifdef ARCH_IS_IA32 |
#ifdef ARCH_IS_IA32 |
256 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx); |
257 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
258 |
|
|
|
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
|
|
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx, |
|
|
xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx, |
|
|
|
|
|
xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx, |
|
|
xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx |
|
|
}; |
|
|
|
|
259 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
260 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx); |
261 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx); |
270 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx); |
271 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
272 |
|
|
273 |
|
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
274 |
|
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx, |
275 |
|
xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx, |
276 |
|
|
277 |
|
xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx, |
278 |
|
xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx |
279 |
|
}; |
280 |
|
|
281 |
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
282 |
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx, |
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx, |
283 |
xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx, |
xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx, |
287 |
}; |
}; |
288 |
#endif /* ARCH_IS_IA32 */ |
#endif /* ARCH_IS_IA32 */ |
289 |
|
|
290 |
|
|
291 |
|
/* altivec impl. declaration (see qpel_altivec.c) |
292 |
|
****************************************************************************/ |
293 |
|
|
294 |
|
#ifdef ARCH_IS_PPC |
295 |
|
|
296 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_16_Altivec_C); |
297 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_16_Altivec_C); |
298 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_16_Altivec_C); |
299 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_16_Altivec_C); |
300 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_16_Altivec_C); |
301 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_16_Altivec_C); |
302 |
|
|
303 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_8_Altivec_C); |
304 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_8_Altivec_C); |
305 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_8_Altivec_C); |
306 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_8_Altivec_C); |
307 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_8_Altivec_C); |
308 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_8_Altivec_C); |
309 |
|
|
310 |
|
|
311 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_16_Add_Altivec_C); |
312 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_16_Add_Altivec_C); |
313 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_16_Add_Altivec_C); |
314 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_16_Add_Altivec_C); |
315 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_16_Add_Altivec_C); |
316 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_16_Add_Altivec_C); |
317 |
|
|
318 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_8_Add_Altivec_C); |
319 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_8_Add_Altivec_C); |
320 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_8_Add_Altivec_C); |
321 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_8_Add_Altivec_C); |
322 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_8_Add_Altivec_C); |
323 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_8_Add_Altivec_C); |
324 |
|
|
325 |
|
XVID_QP_FUNCS xvid_QP_Funcs_Altivec_C = { |
326 |
|
H_Pass_16_Altivec_C, H_Pass_Avrg_16_Altivec_C, H_Pass_Avrg_Up_16_Altivec_C, |
327 |
|
V_Pass_16_Altivec_C, V_Pass_Avrg_16_Altivec_C, V_Pass_Avrg_Up_16_Altivec_C, |
328 |
|
|
329 |
|
H_Pass_8_Altivec_C, H_Pass_Avrg_8_Altivec_C, H_Pass_Avrg_Up_8_Altivec_C, |
330 |
|
V_Pass_8_Altivec_C, V_Pass_Avrg_8_Altivec_C, V_Pass_Avrg_Up_8_Altivec_C |
331 |
|
}; |
332 |
|
|
333 |
|
XVID_QP_FUNCS xvid_QP_Add_Funcs_Altivec_C = { |
334 |
|
H_Pass_16_Add_Altivec_C, H_Pass_Avrg_16_Add_Altivec_C, H_Pass_Avrg_Up_16_Add_Altivec_C, |
335 |
|
V_Pass_16_Add_Altivec_C, V_Pass_Avrg_16_Add_Altivec_C, V_Pass_Avrg_Up_16_Add_Altivec_C, |
336 |
|
|
337 |
|
H_Pass_8_Add_Altivec_C, H_Pass_Avrg_8_Add_Altivec_C, H_Pass_Avrg_Up_8_Add_Altivec_C, |
338 |
|
V_Pass_8_Add_Altivec_C, V_Pass_Avrg_8_Add_Altivec_C, V_Pass_Avrg_Up_8_Add_Altivec_C |
339 |
|
}; |
340 |
|
|
341 |
|
#endif /* ARCH_IS_PPC */ |
342 |
|
|
343 |
|
/* mmx impl. (for 64bit bus) declaration (see. qpel_mmx.asm |
344 |
|
****************************************************************************/ |
345 |
|
|
346 |
|
#ifdef ARCH_IS_X86_64 |
347 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_x86_64); |
348 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_x86_64); |
349 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_x86_64); |
350 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_16_x86_64); |
351 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_16_x86_64); |
352 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_16_x86_64); |
353 |
|
|
354 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_x86_64); |
355 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_x86_64); |
356 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_x86_64); |
357 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_x86_64); |
358 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_x86_64); |
359 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_x86_64); |
360 |
|
|
361 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_x86_64); |
362 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_x86_64); |
363 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_x86_64); |
364 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Add_16_x86_64); |
365 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Add_16_x86_64); |
366 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_Add_16_x86_64); |
367 |
|
|
368 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_Add_x86_64); |
369 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_Add_x86_64); |
370 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_Add_x86_64); |
371 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_Add_x86_64); |
372 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_x86_64); |
373 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_x86_64); |
374 |
|
|
375 |
|
XVID_QP_FUNCS xvid_QP_Funcs_x86_64 = { |
376 |
|
xvid_H_Pass_16_x86_64, xvid_H_Pass_Avrg_16_x86_64, xvid_H_Pass_Avrg_Up_16_x86_64, |
377 |
|
xvid_V_Pass_16_x86_64, xvid_V_Pass_Avrg_16_x86_64, xvid_V_Pass_Avrg_Up_16_x86_64, |
378 |
|
|
379 |
|
xvid_H_Pass_8_x86_64, xvid_H_Pass_Avrg_8_x86_64, xvid_H_Pass_Avrg_Up_8_x86_64, |
380 |
|
xvid_V_Pass_8_x86_64, xvid_V_Pass_Avrg_8_x86_64, xvid_V_Pass_Avrg_Up_8_x86_64 |
381 |
|
}; |
382 |
|
|
383 |
|
XVID_QP_FUNCS xvid_QP_Add_Funcs_x86_64 = { |
384 |
|
xvid_H_Pass_Add_16_x86_64, xvid_H_Pass_Avrg_Add_16_x86_64, xvid_H_Pass_Avrg_Up_Add_16_x86_64, |
385 |
|
xvid_V_Pass_Add_16_x86_64, xvid_V_Pass_Avrg_Add_16_x86_64, xvid_V_Pass_Avrg_Up_Add_16_x86_64, |
386 |
|
|
387 |
|
xvid_H_Pass_8_Add_x86_64, xvid_H_Pass_Avrg_8_Add_x86_64, xvid_H_Pass_Avrg_Up_8_Add_x86_64, |
388 |
|
xvid_V_Pass_8_Add_x86_64, xvid_V_Pass_Avrg_8_Add_x86_64, xvid_V_Pass_Avrg_Up_8_Add_x86_64, |
389 |
|
}; |
390 |
|
#endif /* ARCH_IS_X86_64 */ |
391 |
|
|
392 |
/* tables for ASM |
/* tables for ASM |
393 |
****************************************************************************/ |
****************************************************************************/ |
394 |
|
|
396 |
uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */ |
uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */ |
397 |
#endif |
#endif |
398 |
|
|
399 |
|
#ifdef ARCH_IS_X86_64 |
400 |
|
extern uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */ |
401 |
|
#endif |
402 |
|
|
403 |
/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) |
/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) |
404 |
* |
* |
405 |
* 17 tables, 2K each => 34K |
* 17 tables, 2K each => 34K |
407 |
* (for instance: (23,19,-6,3)=(20,20,-6,3)+(3,-1,0,0) |
* (for instance: (23,19,-6,3)=(20,20,-6,3)+(3,-1,0,0) |
408 |
* Using Symmetries (and bswap) could reduce further |
* Using Symmetries (and bswap) could reduce further |
409 |
* the memory to 7 tables (->14K). */ |
* the memory to 7 tables (->14K). */ |
410 |
|
#ifdef ARCH_IS_X86_64 |
411 |
|
extern int16_t xvid_FIR_1_0_0_0[256][4]; |
412 |
|
extern int16_t xvid_FIR_3_1_0_0[256][4]; |
413 |
|
extern int16_t xvid_FIR_6_3_1_0[256][4]; |
414 |
|
extern int16_t xvid_FIR_14_3_2_1[256][4]; |
415 |
|
extern int16_t xvid_FIR_20_6_3_1[256][4]; |
416 |
|
extern int16_t xvid_FIR_20_20_6_3[256][4]; |
417 |
|
extern int16_t xvid_FIR_23_19_6_3[256][4]; |
418 |
|
extern int16_t xvid_FIR_7_20_20_6[256][4]; |
419 |
|
extern int16_t xvid_FIR_6_20_20_6[256][4]; |
420 |
|
extern int16_t xvid_FIR_6_20_20_7[256][4]; |
421 |
|
extern int16_t xvid_FIR_3_6_20_20[256][4]; |
422 |
|
extern int16_t xvid_FIR_3_6_19_23[256][4]; |
423 |
|
extern int16_t xvid_FIR_1_3_6_20[256][4]; |
424 |
|
extern int16_t xvid_FIR_1_2_3_14[256][4]; |
425 |
|
extern int16_t xvid_FIR_0_1_3_6[256][4]; |
426 |
|
extern int16_t xvid_FIR_0_0_1_3[256][4]; |
427 |
|
extern int16_t xvid_FIR_0_0_0_1[256][4]; |
428 |
|
#else |
429 |
int16_t xvid_FIR_1_0_0_0[256][4]; |
int16_t xvid_FIR_1_0_0_0[256][4]; |
430 |
int16_t xvid_FIR_3_1_0_0[256][4]; |
int16_t xvid_FIR_3_1_0_0[256][4]; |
431 |
int16_t xvid_FIR_6_3_1_0[256][4]; |
int16_t xvid_FIR_6_3_1_0[256][4]; |
443 |
int16_t xvid_FIR_0_1_3_6[256][4]; |
int16_t xvid_FIR_0_1_3_6[256][4]; |
444 |
int16_t xvid_FIR_0_0_1_3[256][4]; |
int16_t xvid_FIR_0_0_1_3[256][4]; |
445 |
int16_t xvid_FIR_0_0_0_1[256][4]; |
int16_t xvid_FIR_0_0_0_1[256][4]; |
446 |
|
#endif |
447 |
|
|
448 |
static void Init_FIR_Table(int16_t Tab[][4], |
static void Init_FIR_Table(int16_t Tab[][4], |
449 |
int A, int B, int C, int D) |
int A, int B, int C, int D) |
495 |
|
|
496 |
#endif /* !XVID_AUTO_INCLUDE */ |
#endif /* !XVID_AUTO_INCLUDE */ |
497 |
|
|
498 |
|
#if defined(XVID_AUTO_INCLUDE) && defined(REFERENCE_CODE) |
499 |
|
|
500 |
/***************************************************************************** |
/***************************************************************************** |
501 |
* "reference" filters impl. in plain C |
* "reference" filters impl. in plain C |
502 |
****************************************************************************/ |
****************************************************************************/ |
503 |
|
|
|
#ifdef XVID_AUTO_INCLUDE |
|
|
|
|
504 |
static |
static |
505 |
void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
506 |
{ |
{ |
652 |
#undef FUNC_HA_UP |
#undef FUNC_HA_UP |
653 |
#undef FUNC_VA_UP |
#undef FUNC_VA_UP |
654 |
|
|
655 |
#endif /* XVID_AUTO_INCLUDE */ |
#elif defined(XVID_AUTO_INCLUDE) && !defined(REFERENCE_CODE) |
656 |
|
|
657 |
|
/***************************************************************************** |
658 |
|
* "fast" filters impl. in plain C |
659 |
|
****************************************************************************/ |
660 |
|
|
661 |
|
#define CLIP_STORE(D,C) \ |
662 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
663 |
|
STORE(D, C) |
664 |
|
|
665 |
|
static void |
666 |
|
FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
667 |
|
{ |
668 |
|
#if (SIZE==16) |
669 |
|
while(H-->0) { |
670 |
|
int C; |
671 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
672 |
|
CLIP_STORE(Dst[ 0],C); |
673 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
674 |
|
CLIP_STORE(Dst[ 1],C); |
675 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
676 |
|
CLIP_STORE(Dst[ 2],C); |
677 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
678 |
|
CLIP_STORE(Dst[ 3],C); |
679 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
680 |
|
CLIP_STORE(Dst[ 4],C); |
681 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
682 |
|
CLIP_STORE(Dst[ 5],C); |
683 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
684 |
|
CLIP_STORE(Dst[ 6],C); |
685 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
686 |
|
CLIP_STORE(Dst[ 7],C); |
687 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
688 |
|
CLIP_STORE(Dst[ 8],C); |
689 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
690 |
|
CLIP_STORE(Dst[ 9],C); |
691 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
692 |
|
CLIP_STORE(Dst[10],C); |
693 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
694 |
|
CLIP_STORE(Dst[11],C); |
695 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
696 |
|
CLIP_STORE(Dst[12],C); |
697 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
698 |
|
CLIP_STORE(Dst[13],C); |
699 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
700 |
|
CLIP_STORE(Dst[14],C); |
701 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
702 |
|
CLIP_STORE(Dst[15],C); |
703 |
|
Src += BpS; |
704 |
|
Dst += BpS; |
705 |
|
} |
706 |
|
#else |
707 |
|
while(H-->0) { |
708 |
|
int C; |
709 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
710 |
|
CLIP_STORE(Dst[0],C); |
711 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
712 |
|
CLIP_STORE(Dst[1],C); |
713 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
714 |
|
CLIP_STORE(Dst[2],C); |
715 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
716 |
|
CLIP_STORE(Dst[3],C); |
717 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
718 |
|
CLIP_STORE(Dst[4],C); |
719 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
720 |
|
CLIP_STORE(Dst[5],C); |
721 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
722 |
|
CLIP_STORE(Dst[6],C); |
723 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
724 |
|
CLIP_STORE(Dst[7],C); |
725 |
|
Src += BpS; |
726 |
|
Dst += BpS; |
727 |
|
} |
728 |
|
#endif |
729 |
|
} |
730 |
|
#undef CLIP_STORE |
731 |
|
|
732 |
|
#define CLIP_STORE(i,C) \ |
733 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
734 |
|
C = (C+Src[i]+1-RND) >> 1; \ |
735 |
|
STORE(Dst[i], C) |
736 |
|
|
737 |
|
static void |
738 |
|
FUNC_HA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
739 |
|
{ |
740 |
|
#if (SIZE==16) |
741 |
|
while(H-->0) { |
742 |
|
int C; |
743 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
744 |
|
CLIP_STORE(0,C); |
745 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
746 |
|
CLIP_STORE( 1,C); |
747 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
748 |
|
CLIP_STORE( 2,C); |
749 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
750 |
|
CLIP_STORE( 3,C); |
751 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
752 |
|
CLIP_STORE( 4,C); |
753 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
754 |
|
CLIP_STORE( 5,C); |
755 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
756 |
|
CLIP_STORE( 6,C); |
757 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
758 |
|
CLIP_STORE( 7,C); |
759 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
760 |
|
CLIP_STORE( 8,C); |
761 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
762 |
|
CLIP_STORE( 9,C); |
763 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
764 |
|
CLIP_STORE(10,C); |
765 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
766 |
|
CLIP_STORE(11,C); |
767 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
768 |
|
CLIP_STORE(12,C); |
769 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
770 |
|
CLIP_STORE(13,C); |
771 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
772 |
|
CLIP_STORE(14,C); |
773 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
774 |
|
CLIP_STORE(15,C); |
775 |
|
Src += BpS; |
776 |
|
Dst += BpS; |
777 |
|
} |
778 |
|
#else |
779 |
|
while(H-->0) { |
780 |
|
int C; |
781 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
782 |
|
CLIP_STORE(0,C); |
783 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
784 |
|
CLIP_STORE(1,C); |
785 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
786 |
|
CLIP_STORE(2,C); |
787 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
788 |
|
CLIP_STORE(3,C); |
789 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
790 |
|
CLIP_STORE(4,C); |
791 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
792 |
|
CLIP_STORE(5,C); |
793 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
794 |
|
CLIP_STORE(6,C); |
795 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
796 |
|
CLIP_STORE(7,C); |
797 |
|
Src += BpS; |
798 |
|
Dst += BpS; |
799 |
|
} |
800 |
|
#endif |
801 |
|
} |
802 |
|
#undef CLIP_STORE |
803 |
|
|
804 |
|
#define CLIP_STORE(i,C) \ |
805 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
806 |
|
C = (C+Src[i+1]+1-RND) >> 1; \ |
807 |
|
STORE(Dst[i], C) |
808 |
|
|
809 |
|
static void |
810 |
|
FUNC_HA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
811 |
|
{ |
812 |
|
#if (SIZE==16) |
813 |
|
while(H-->0) { |
814 |
|
int C; |
815 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
816 |
|
CLIP_STORE(0,C); |
817 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
818 |
|
CLIP_STORE( 1,C); |
819 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
820 |
|
CLIP_STORE( 2,C); |
821 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
822 |
|
CLIP_STORE( 3,C); |
823 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
824 |
|
CLIP_STORE( 4,C); |
825 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
826 |
|
CLIP_STORE( 5,C); |
827 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
828 |
|
CLIP_STORE( 6,C); |
829 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
830 |
|
CLIP_STORE( 7,C); |
831 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
832 |
|
CLIP_STORE( 8,C); |
833 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
834 |
|
CLIP_STORE( 9,C); |
835 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
836 |
|
CLIP_STORE(10,C); |
837 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
838 |
|
CLIP_STORE(11,C); |
839 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
840 |
|
CLIP_STORE(12,C); |
841 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
842 |
|
CLIP_STORE(13,C); |
843 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
844 |
|
CLIP_STORE(14,C); |
845 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
846 |
|
CLIP_STORE(15,C); |
847 |
|
Src += BpS; |
848 |
|
Dst += BpS; |
849 |
|
} |
850 |
|
#else |
851 |
|
while(H-->0) { |
852 |
|
int C; |
853 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
854 |
|
CLIP_STORE(0,C); |
855 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
856 |
|
CLIP_STORE(1,C); |
857 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
858 |
|
CLIP_STORE(2,C); |
859 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
860 |
|
CLIP_STORE(3,C); |
861 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
862 |
|
CLIP_STORE(4,C); |
863 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
864 |
|
CLIP_STORE(5,C); |
865 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
866 |
|
CLIP_STORE(6,C); |
867 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
868 |
|
CLIP_STORE(7,C); |
869 |
|
Src += BpS; |
870 |
|
Dst += BpS; |
871 |
|
} |
872 |
|
#endif |
873 |
|
} |
874 |
|
#undef CLIP_STORE |
875 |
|
|
876 |
|
////////////////////////////////////////////////////////// |
877 |
|
// vertical passes |
878 |
|
////////////////////////////////////////////////////////// |
879 |
|
// Note: for vertical passes, width (W) needs only be 8 or 16. |
880 |
|
|
881 |
|
#define CLIP_STORE(D,C) \ |
882 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
883 |
|
STORE(D, C) |
884 |
|
|
885 |
|
static void |
886 |
|
FUNC_V(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
887 |
|
{ |
888 |
|
#if (SIZE==16) |
889 |
|
while(H-->0) { |
890 |
|
int C; |
891 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
892 |
|
CLIP_STORE(Dst[BpS* 0],C); |
893 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
894 |
|
CLIP_STORE(Dst[BpS* 1],C); |
895 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
896 |
|
CLIP_STORE(Dst[BpS* 2],C); |
897 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
898 |
|
CLIP_STORE(Dst[BpS* 3],C); |
899 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
900 |
|
CLIP_STORE(Dst[BpS* 4],C); |
901 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
902 |
|
CLIP_STORE(Dst[BpS* 5],C); |
903 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
904 |
|
CLIP_STORE(Dst[BpS* 6],C); |
905 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
906 |
|
CLIP_STORE(Dst[BpS* 7],C); |
907 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
908 |
|
CLIP_STORE(Dst[BpS* 8],C); |
909 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
910 |
|
CLIP_STORE(Dst[BpS* 9],C); |
911 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
912 |
|
CLIP_STORE(Dst[BpS*10],C); |
913 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
914 |
|
CLIP_STORE(Dst[BpS*11],C); |
915 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
916 |
|
CLIP_STORE(Dst[BpS*12],C); |
917 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
918 |
|
CLIP_STORE(Dst[BpS*13],C); |
919 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
920 |
|
CLIP_STORE(Dst[BpS*14],C); |
921 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
922 |
|
CLIP_STORE(Dst[BpS*15],C); |
923 |
|
Src += 1; |
924 |
|
Dst += 1; |
925 |
|
} |
926 |
|
#else |
927 |
|
while(H-->0) { |
928 |
|
int C; |
929 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
930 |
|
CLIP_STORE(Dst[BpS*0],C); |
931 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
932 |
|
CLIP_STORE(Dst[BpS*1],C); |
933 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
934 |
|
CLIP_STORE(Dst[BpS*2],C); |
935 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
936 |
|
CLIP_STORE(Dst[BpS*3],C); |
937 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
938 |
|
CLIP_STORE(Dst[BpS*4],C); |
939 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
940 |
|
CLIP_STORE(Dst[BpS*5],C); |
941 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
942 |
|
CLIP_STORE(Dst[BpS*6],C); |
943 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
944 |
|
CLIP_STORE(Dst[BpS*7],C); |
945 |
|
Src += 1; |
946 |
|
Dst += 1; |
947 |
|
} |
948 |
|
#endif |
949 |
|
} |
950 |
|
#undef CLIP_STORE |
951 |
|
|
952 |
|
#define CLIP_STORE(i,C) \ |
953 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
954 |
|
C = (C+Src[BpS*i]+1-RND) >> 1; \ |
955 |
|
STORE(Dst[BpS*i], C) |
956 |
|
|
957 |
|
static void |
958 |
|
FUNC_VA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
959 |
|
{ |
960 |
|
#if (SIZE==16) |
961 |
|
while(H-->0) { |
962 |
|
int C; |
963 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
964 |
|
CLIP_STORE(0,C); |
965 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
966 |
|
CLIP_STORE( 1,C); |
967 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
968 |
|
CLIP_STORE( 2,C); |
969 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
970 |
|
CLIP_STORE( 3,C); |
971 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
972 |
|
CLIP_STORE( 4,C); |
973 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
974 |
|
CLIP_STORE( 5,C); |
975 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
976 |
|
CLIP_STORE( 6,C); |
977 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
978 |
|
CLIP_STORE( 7,C); |
979 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
980 |
|
CLIP_STORE( 8,C); |
981 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
982 |
|
CLIP_STORE( 9,C); |
983 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
984 |
|
CLIP_STORE(10,C); |
985 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
986 |
|
CLIP_STORE(11,C); |
987 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
988 |
|
CLIP_STORE(12,C); |
989 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
990 |
|
CLIP_STORE(13,C); |
991 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
992 |
|
CLIP_STORE(14,C); |
993 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
994 |
|
CLIP_STORE(15,C); |
995 |
|
Src += 1; |
996 |
|
Dst += 1; |
997 |
|
} |
998 |
|
#else |
999 |
|
while(H-->0) { |
1000 |
|
int C; |
1001 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
1002 |
|
CLIP_STORE(0,C); |
1003 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
1004 |
|
CLIP_STORE(1,C); |
1005 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
1006 |
|
CLIP_STORE(2,C); |
1007 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
1008 |
|
CLIP_STORE(3,C); |
1009 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
1010 |
|
CLIP_STORE(4,C); |
1011 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
1012 |
|
CLIP_STORE(5,C); |
1013 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
1014 |
|
CLIP_STORE(6,C); |
1015 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
1016 |
|
CLIP_STORE(7,C); |
1017 |
|
Src += 1; |
1018 |
|
Dst += 1; |
1019 |
|
} |
1020 |
|
#endif |
1021 |
|
} |
1022 |
|
#undef CLIP_STORE |
1023 |
|
|
1024 |
|
#define CLIP_STORE(i,C) \ |
1025 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
1026 |
|
C = (C+Src[BpS*i+BpS]+1-RND) >> 1; \ |
1027 |
|
STORE(Dst[BpS*i], C) |
1028 |
|
|
1029 |
|
static void |
1030 |
|
FUNC_VA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
1031 |
|
{ |
1032 |
|
#if (SIZE==16) |
1033 |
|
while(H-->0) { |
1034 |
|
int C; |
1035 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
1036 |
|
CLIP_STORE(0,C); |
1037 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
1038 |
|
CLIP_STORE( 1,C); |
1039 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
1040 |
|
CLIP_STORE( 2,C); |
1041 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
1042 |
|
CLIP_STORE( 3,C); |
1043 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
1044 |
|
CLIP_STORE( 4,C); |
1045 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
1046 |
|
CLIP_STORE( 5,C); |
1047 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
1048 |
|
CLIP_STORE( 6,C); |
1049 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
1050 |
|
CLIP_STORE( 7,C); |
1051 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
1052 |
|
CLIP_STORE( 8,C); |
1053 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
1054 |
|
CLIP_STORE( 9,C); |
1055 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
1056 |
|
CLIP_STORE(10,C); |
1057 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
1058 |
|
CLIP_STORE(11,C); |
1059 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
1060 |
|
CLIP_STORE(12,C); |
1061 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
1062 |
|
CLIP_STORE(13,C); |
1063 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
1064 |
|
CLIP_STORE(14,C); |
1065 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
1066 |
|
CLIP_STORE(15,C); |
1067 |
|
Src += 1; |
1068 |
|
Dst += 1; |
1069 |
|
} |
1070 |
|
#else |
1071 |
|
while(H-->0) { |
1072 |
|
int C; |
1073 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
1074 |
|
CLIP_STORE(0,C); |
1075 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
1076 |
|
CLIP_STORE(1,C); |
1077 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
1078 |
|
CLIP_STORE(2,C); |
1079 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
1080 |
|
CLIP_STORE(3,C); |
1081 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
1082 |
|
CLIP_STORE(4,C); |
1083 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
1084 |
|
CLIP_STORE(5,C); |
1085 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
1086 |
|
CLIP_STORE(6,C); |
1087 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
1088 |
|
CLIP_STORE(7,C); |
1089 |
|
Src += 1; |
1090 |
|
Dst += 1; |
1091 |
|
} |
1092 |
|
#endif |
1093 |
|
} |
1094 |
|
#undef CLIP_STORE |
1095 |
|
|
1096 |
|
#undef STORE |
1097 |
|
#undef FUNC_H |
1098 |
|
#undef FUNC_V |
1099 |
|
#undef FUNC_HA |
1100 |
|
#undef FUNC_VA |
1101 |
|
#undef FUNC_HA_UP |
1102 |
|
#undef FUNC_VA_UP |
1103 |
|
|
1104 |
|
|
1105 |
|
#endif /* XVID_AUTO_INCLUDE && !defined(REF) */ |