25 |
|
|
26 |
#ifndef XVID_AUTO_INCLUDE |
#ifndef XVID_AUTO_INCLUDE |
27 |
|
|
28 |
|
#include <stdio.h> |
29 |
|
|
30 |
#include "../portab.h" |
#include "../portab.h" |
31 |
#include "qpel.h" |
#include "qpel.h" |
32 |
|
|
69 |
****************************************************************************/ |
****************************************************************************/ |
70 |
|
|
71 |
#define XVID_AUTO_INCLUDE |
#define XVID_AUTO_INCLUDE |
72 |
|
/* First auto include this file to generate reference code for SIMD versions |
73 |
|
* This set of functions are good for educational purpose, because they're |
74 |
|
* straightforward to understand, use loops and so on... But obviously they |
75 |
|
* sux when it comes to speed */ |
76 |
|
#define REFERENCE_CODE |
77 |
|
|
78 |
/* 16x? filters */ |
/* 16x? filters */ |
79 |
|
|
81 |
#define TABLE FIR_Tab_16 |
#define TABLE FIR_Tab_16 |
82 |
|
|
83 |
#define STORE(d,s) (d) = (s) |
#define STORE(d,s) (d) = (s) |
84 |
|
#define FUNC_H H_Pass_16_C_ref |
85 |
|
#define FUNC_V V_Pass_16_C_ref |
86 |
|
#define FUNC_HA H_Pass_Avrg_16_C_ref |
87 |
|
#define FUNC_VA V_Pass_Avrg_16_C_ref |
88 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C_ref |
89 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C_ref |
90 |
|
|
91 |
|
#include "qpel.c" /* self-include ourself */ |
92 |
|
|
93 |
|
/* note: B-frame always uses Rnd=0... */ |
94 |
|
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
95 |
|
#define FUNC_H H_Pass_16_Add_C_ref |
96 |
|
#define FUNC_V V_Pass_16_Add_C_ref |
97 |
|
#define FUNC_HA H_Pass_Avrg_16_Add_C_ref |
98 |
|
#define FUNC_VA V_Pass_Avrg_16_Add_C_ref |
99 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C_ref |
100 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C_ref |
101 |
|
|
102 |
|
#include "qpel.c" /* self-include ourself */ |
103 |
|
|
104 |
|
#undef SIZE |
105 |
|
#undef TABLE |
106 |
|
|
107 |
|
/* 8x? filters */ |
108 |
|
|
109 |
|
#define SIZE 8 |
110 |
|
#define TABLE FIR_Tab_8 |
111 |
|
|
112 |
|
#define STORE(d,s) (d) = (s) |
113 |
|
#define FUNC_H H_Pass_8_C_ref |
114 |
|
#define FUNC_V V_Pass_8_C_ref |
115 |
|
#define FUNC_HA H_Pass_Avrg_8_C_ref |
116 |
|
#define FUNC_VA V_Pass_Avrg_8_C_ref |
117 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C_ref |
118 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C_ref |
119 |
|
|
120 |
|
#include "qpel.c" /* self-include ourself */ |
121 |
|
|
122 |
|
/* note: B-frame always uses Rnd=0... */ |
123 |
|
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
124 |
|
#define FUNC_H H_Pass_8_Add_C_ref |
125 |
|
#define FUNC_V V_Pass_8_Add_C_ref |
126 |
|
#define FUNC_HA H_Pass_Avrg_8_Add_C_ref |
127 |
|
#define FUNC_VA V_Pass_Avrg_8_Add_C_ref |
128 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C_ref |
129 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C_ref |
130 |
|
|
131 |
|
#include "qpel.c" /* self-include ourself */ |
132 |
|
|
133 |
|
#undef SIZE |
134 |
|
#undef TABLE |
135 |
|
|
136 |
|
/* Then we define more optimized C version where loops are unrolled, where |
137 |
|
* FIR coeffcients are not read from memory but are hardcoded in instructions |
138 |
|
* They should be faster */ |
139 |
|
#undef REFERENCE_CODE |
140 |
|
|
141 |
|
/* 16x? filters */ |
142 |
|
|
143 |
|
#define SIZE 16 |
144 |
|
|
145 |
|
#define STORE(d,s) (d) = (s) |
146 |
#define FUNC_H H_Pass_16_C |
#define FUNC_H H_Pass_16_C |
147 |
#define FUNC_V V_Pass_16_C |
#define FUNC_V V_Pass_16_C |
148 |
#define FUNC_HA H_Pass_Avrg_16_C |
#define FUNC_HA H_Pass_Avrg_16_C |
150 |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C |
151 |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C |
152 |
|
|
153 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
154 |
|
|
155 |
/* note: B-frame always uses Rnd=0... */ |
/* note: B-frame always uses Rnd=0... */ |
156 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
161 |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C |
162 |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C |
163 |
|
|
164 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
165 |
|
|
166 |
#undef SIZE |
#undef SIZE |
167 |
#undef TABLE |
#undef TABLE |
179 |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C |
180 |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C |
181 |
|
|
182 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
183 |
|
|
184 |
/* note: B-frame always uses Rnd=0... */ |
/* note: B-frame always uses Rnd=0... */ |
185 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
190 |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C |
191 |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C |
192 |
|
|
193 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
194 |
|
|
195 |
#undef SIZE |
#undef SIZE |
196 |
#undef TABLE |
#undef TABLE |
|
|
|
197 |
#undef XVID_AUTO_INCLUDE |
#undef XVID_AUTO_INCLUDE |
198 |
|
|
199 |
/* general-purpose hooks |
/* Global scope hooks |
|
* TODO: embed in enc/dec structure? |
|
200 |
****************************************************************************/ |
****************************************************************************/ |
201 |
|
|
202 |
XVID_QP_FUNCS *xvid_QP_Funcs = 0; |
XVID_QP_FUNCS *xvid_QP_Funcs = NULL; |
203 |
XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0; |
XVID_QP_FUNCS *xvid_QP_Add_Funcs = NULL; |
204 |
|
|
205 |
|
/* Reference plain C impl. declaration |
206 |
|
****************************************************************************/ |
207 |
|
|
208 |
|
XVID_QP_FUNCS xvid_QP_Funcs_C_ref = { |
209 |
|
H_Pass_16_C_ref, H_Pass_Avrg_16_C_ref, H_Pass_Avrg_Up_16_C_ref, |
210 |
|
V_Pass_16_C_ref, V_Pass_Avrg_16_C_ref, V_Pass_Avrg_Up_16_C_ref, |
211 |
|
|
212 |
|
H_Pass_8_C_ref, H_Pass_Avrg_8_C_ref, H_Pass_Avrg_Up_8_C_ref, |
213 |
|
V_Pass_8_C_ref, V_Pass_Avrg_8_C_ref, V_Pass_Avrg_Up_8_C_ref |
214 |
|
}; |
215 |
|
|
216 |
|
XVID_QP_FUNCS xvid_QP_Add_Funcs_C_ref = { |
217 |
|
H_Pass_16_Add_C_ref, H_Pass_Avrg_16_Add_C_ref, H_Pass_Avrg_Up_16_Add_C_ref, |
218 |
|
V_Pass_16_Add_C_ref, V_Pass_Avrg_16_Add_C_ref, V_Pass_Avrg_Up_16_Add_C_ref, |
219 |
|
|
220 |
/* plain C impl. declaration |
H_Pass_8_Add_C_ref, H_Pass_Avrg_8_Add_C_ref, H_Pass_Avrg_Up_8_Add_C_ref, |
221 |
* TODO: should be declared elsewhere? |
V_Pass_8_Add_C_ref, V_Pass_Avrg_8_Add_C_ref, V_Pass_Avrg_Up_8_Add_C_ref |
222 |
|
}; |
223 |
|
|
224 |
|
/* Plain C impl. declaration (faster than ref one) |
225 |
****************************************************************************/ |
****************************************************************************/ |
226 |
|
|
227 |
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
241 |
}; |
}; |
242 |
|
|
243 |
/* mmx impl. declaration (see. qpel_mmx.asm |
/* mmx impl. declaration (see. qpel_mmx.asm |
|
* TODO: should be declared elsewhere? |
|
244 |
****************************************************************************/ |
****************************************************************************/ |
245 |
|
|
246 |
#ifdef ARCH_IS_IA32 |
#ifdef ARCH_IS_IA32 |
258 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx); |
259 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
260 |
|
|
|
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
|
|
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx, |
|
|
xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx, |
|
|
|
|
|
xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx, |
|
|
xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx |
|
|
}; |
|
|
|
|
261 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
262 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx); |
263 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx); |
272 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx); |
273 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
274 |
|
|
275 |
|
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
276 |
|
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx, |
277 |
|
xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx, |
278 |
|
|
279 |
|
xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx, |
280 |
|
xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx |
281 |
|
}; |
282 |
|
|
283 |
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
284 |
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx, |
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx, |
285 |
xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx, |
xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx, |
289 |
}; |
}; |
290 |
#endif /* ARCH_IS_IA32 */ |
#endif /* ARCH_IS_IA32 */ |
291 |
|
|
292 |
|
|
293 |
|
/* altivec impl. declaration (see qpel_altivec.c) |
294 |
|
****************************************************************************/ |
295 |
|
|
296 |
|
#ifdef ARCH_IS_PPC |
297 |
|
|
298 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_16_Altivec_C); |
299 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_16_Altivec_C); |
300 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_16_Altivec_C); |
301 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_16_Altivec_C); |
302 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_16_Altivec_C); |
303 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_16_Altivec_C); |
304 |
|
|
305 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_8_Altivec_C); |
306 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_8_Altivec_C); |
307 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_8_Altivec_C); |
308 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_8_Altivec_C); |
309 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_8_Altivec_C); |
310 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_8_Altivec_C); |
311 |
|
|
312 |
|
|
313 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_16_Add_Altivec_C); |
314 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_16_Add_Altivec_C); |
315 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_16_Add_Altivec_C); |
316 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_16_Add_Altivec_C); |
317 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_16_Add_Altivec_C); |
318 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_16_Add_Altivec_C); |
319 |
|
|
320 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_8_Add_Altivec_C); |
321 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_8_Add_Altivec_C); |
322 |
|
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_8_Add_Altivec_C); |
323 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_8_Add_Altivec_C); |
324 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_8_Add_Altivec_C); |
325 |
|
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_8_Add_Altivec_C); |
326 |
|
|
327 |
|
XVID_QP_FUNCS xvid_QP_Funcs_Altivec_C = { |
328 |
|
H_Pass_16_Altivec_C, H_Pass_Avrg_16_Altivec_C, H_Pass_Avrg_Up_16_Altivec_C, |
329 |
|
V_Pass_16_Altivec_C, V_Pass_Avrg_16_Altivec_C, V_Pass_Avrg_Up_16_Altivec_C, |
330 |
|
|
331 |
|
H_Pass_8_Altivec_C, H_Pass_Avrg_8_Altivec_C, H_Pass_Avrg_Up_8_Altivec_C, |
332 |
|
V_Pass_8_Altivec_C, V_Pass_Avrg_8_Altivec_C, V_Pass_Avrg_Up_8_Altivec_C |
333 |
|
}; |
334 |
|
|
335 |
|
XVID_QP_FUNCS xvid_QP_Add_Funcs_Altivec_C = { |
336 |
|
H_Pass_16_Add_Altivec_C, H_Pass_Avrg_16_Add_Altivec_C, H_Pass_Avrg_Up_16_Add_Altivec_C, |
337 |
|
V_Pass_16_Add_Altivec_C, V_Pass_Avrg_16_Add_Altivec_C, V_Pass_Avrg_Up_16_Add_Altivec_C, |
338 |
|
|
339 |
|
H_Pass_8_Add_Altivec_C, H_Pass_Avrg_8_Add_Altivec_C, H_Pass_Avrg_Up_8_Add_Altivec_C, |
340 |
|
V_Pass_8_Add_Altivec_C, V_Pass_Avrg_8_Add_Altivec_C, V_Pass_Avrg_Up_8_Add_Altivec_C |
341 |
|
}; |
342 |
|
|
343 |
|
#endif /* ARCH_IS_PPC */ |
344 |
|
|
345 |
|
/* mmx impl. (for 64bit bus) declaration (see. qpel_mmx.asm |
346 |
|
****************************************************************************/ |
347 |
|
|
348 |
|
#ifdef ARCH_IS_X86_64 |
349 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_x86_64); |
350 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_x86_64); |
351 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_x86_64); |
352 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_16_x86_64); |
353 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_16_x86_64); |
354 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_16_x86_64); |
355 |
|
|
356 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_x86_64); |
357 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_x86_64); |
358 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_x86_64); |
359 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_x86_64); |
360 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_x86_64); |
361 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_x86_64); |
362 |
|
|
363 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_x86_64); |
364 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_x86_64); |
365 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_x86_64); |
366 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Add_16_x86_64); |
367 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Add_16_x86_64); |
368 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_Add_16_x86_64); |
369 |
|
|
370 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_Add_x86_64); |
371 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_Add_x86_64); |
372 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_Add_x86_64); |
373 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_Add_x86_64); |
374 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_x86_64); |
375 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_x86_64); |
376 |
|
|
377 |
|
XVID_QP_FUNCS xvid_QP_Funcs_x86_64 = { |
378 |
|
xvid_H_Pass_16_x86_64, xvid_H_Pass_Avrg_16_x86_64, xvid_H_Pass_Avrg_Up_16_x86_64, |
379 |
|
xvid_V_Pass_16_x86_64, xvid_V_Pass_Avrg_16_x86_64, xvid_V_Pass_Avrg_Up_16_x86_64, |
380 |
|
|
381 |
|
xvid_H_Pass_8_x86_64, xvid_H_Pass_Avrg_8_x86_64, xvid_H_Pass_Avrg_Up_8_x86_64, |
382 |
|
xvid_V_Pass_8_x86_64, xvid_V_Pass_Avrg_8_x86_64, xvid_V_Pass_Avrg_Up_8_x86_64 |
383 |
|
}; |
384 |
|
|
385 |
|
XVID_QP_FUNCS xvid_QP_Add_Funcs_x86_64 = { |
386 |
|
xvid_H_Pass_Add_16_x86_64, xvid_H_Pass_Avrg_Add_16_x86_64, xvid_H_Pass_Avrg_Up_Add_16_x86_64, |
387 |
|
xvid_V_Pass_Add_16_x86_64, xvid_V_Pass_Avrg_Add_16_x86_64, xvid_V_Pass_Avrg_Up_Add_16_x86_64, |
388 |
|
|
389 |
|
xvid_H_Pass_8_Add_x86_64, xvid_H_Pass_Avrg_8_Add_x86_64, xvid_H_Pass_Avrg_Up_8_Add_x86_64, |
390 |
|
xvid_V_Pass_8_Add_x86_64, xvid_V_Pass_Avrg_8_Add_x86_64, xvid_V_Pass_Avrg_Up_8_Add_x86_64, |
391 |
|
}; |
392 |
|
#endif /* ARCH_IS_X86_64 */ |
393 |
|
|
394 |
/* tables for ASM |
/* tables for ASM |
395 |
****************************************************************************/ |
****************************************************************************/ |
396 |
|
|
397 |
|
|
398 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
399 |
|
/* These symbols will be used outsie this file, so tell the compiler |
400 |
|
* they're global. Only ia32 will define them in this file, x86_64 |
401 |
|
* will do in the assembly files */ |
402 |
|
extern uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */ |
403 |
|
|
404 |
|
extern int16_t xvid_FIR_1_0_0_0[256][4]; |
405 |
|
extern int16_t xvid_FIR_3_1_0_0[256][4]; |
406 |
|
extern int16_t xvid_FIR_6_3_1_0[256][4]; |
407 |
|
extern int16_t xvid_FIR_14_3_2_1[256][4]; |
408 |
|
extern int16_t xvid_FIR_20_6_3_1[256][4]; |
409 |
|
extern int16_t xvid_FIR_20_20_6_3[256][4]; |
410 |
|
extern int16_t xvid_FIR_23_19_6_3[256][4]; |
411 |
|
extern int16_t xvid_FIR_7_20_20_6[256][4]; |
412 |
|
extern int16_t xvid_FIR_6_20_20_6[256][4]; |
413 |
|
extern int16_t xvid_FIR_6_20_20_7[256][4]; |
414 |
|
extern int16_t xvid_FIR_3_6_20_20[256][4]; |
415 |
|
extern int16_t xvid_FIR_3_6_19_23[256][4]; |
416 |
|
extern int16_t xvid_FIR_1_3_6_20[256][4]; |
417 |
|
extern int16_t xvid_FIR_1_2_3_14[256][4]; |
418 |
|
extern int16_t xvid_FIR_0_1_3_6[256][4]; |
419 |
|
extern int16_t xvid_FIR_0_0_1_3[256][4]; |
420 |
|
extern int16_t xvid_FIR_0_0_0_1[256][4]; |
421 |
|
#endif |
422 |
|
|
423 |
|
/* Arrays definitions, according to the target platform */ |
424 |
#ifdef ARCH_IS_IA32 |
#ifdef ARCH_IS_IA32 |
425 |
uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */ |
uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */ |
426 |
#endif |
#endif |
427 |
|
|
428 |
/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) |
#if !defined(ARCH_IS_X86_64) |
429 |
* |
/* Only ia32 will use these tables outside this file so mark them |
430 |
* 17 tables, 2K each => 34K |
* static for all other archs */ |
431 |
* Mirroring can be acheived composing 11 basic tables |
#if defined(ARCH_IS_IA32) |
432 |
* (for instance: (23,19,-6,3)=(20,20,-6,3)+(3,-1,0,0) |
#define __SCOPE |
433 |
* Using Symmetries (and bswap) could reduce further |
#else |
434 |
* the memory to 7 tables (->14K). */ |
#define __SCOPE static |
435 |
|
#endif |
436 |
int16_t xvid_FIR_1_0_0_0[256][4]; |
__SCOPE int16_t xvid_FIR_1_0_0_0[256][4]; |
437 |
int16_t xvid_FIR_3_1_0_0[256][4]; |
__SCOPE int16_t xvid_FIR_3_1_0_0[256][4]; |
438 |
int16_t xvid_FIR_6_3_1_0[256][4]; |
__SCOPE int16_t xvid_FIR_6_3_1_0[256][4]; |
439 |
int16_t xvid_FIR_14_3_2_1[256][4]; |
__SCOPE int16_t xvid_FIR_14_3_2_1[256][4]; |
440 |
int16_t xvid_FIR_20_6_3_1[256][4]; |
__SCOPE int16_t xvid_FIR_20_6_3_1[256][4]; |
441 |
int16_t xvid_FIR_20_20_6_3[256][4]; |
__SCOPE int16_t xvid_FIR_20_20_6_3[256][4]; |
442 |
int16_t xvid_FIR_23_19_6_3[256][4]; |
__SCOPE int16_t xvid_FIR_23_19_6_3[256][4]; |
443 |
int16_t xvid_FIR_7_20_20_6[256][4]; |
__SCOPE int16_t xvid_FIR_7_20_20_6[256][4]; |
444 |
int16_t xvid_FIR_6_20_20_6[256][4]; |
__SCOPE int16_t xvid_FIR_6_20_20_6[256][4]; |
445 |
int16_t xvid_FIR_6_20_20_7[256][4]; |
__SCOPE int16_t xvid_FIR_6_20_20_7[256][4]; |
446 |
int16_t xvid_FIR_3_6_20_20[256][4]; |
__SCOPE int16_t xvid_FIR_3_6_20_20[256][4]; |
447 |
int16_t xvid_FIR_3_6_19_23[256][4]; |
__SCOPE int16_t xvid_FIR_3_6_19_23[256][4]; |
448 |
int16_t xvid_FIR_1_3_6_20[256][4]; |
__SCOPE int16_t xvid_FIR_1_3_6_20[256][4]; |
449 |
int16_t xvid_FIR_1_2_3_14[256][4]; |
__SCOPE int16_t xvid_FIR_1_2_3_14[256][4]; |
450 |
int16_t xvid_FIR_0_1_3_6[256][4]; |
__SCOPE int16_t xvid_FIR_0_1_3_6[256][4]; |
451 |
int16_t xvid_FIR_0_0_1_3[256][4]; |
__SCOPE int16_t xvid_FIR_0_0_1_3[256][4]; |
452 |
int16_t xvid_FIR_0_0_0_1[256][4]; |
__SCOPE int16_t xvid_FIR_0_0_0_1[256][4]; |
453 |
|
#endif |
454 |
|
|
455 |
static void Init_FIR_Table(int16_t Tab[][4], |
static void Init_FIR_Table(int16_t Tab[][4], |
456 |
int A, int B, int C, int D) |
int A, int B, int C, int D) |
465 |
} |
} |
466 |
|
|
467 |
|
|
468 |
void xvid_Init_QP() |
void xvid_Init_QP(void) |
469 |
{ |
{ |
470 |
#ifdef ARCH_IS_IA32 |
#ifdef ARCH_IS_IA32 |
471 |
int i; |
int i; |
502 |
|
|
503 |
#endif /* !XVID_AUTO_INCLUDE */ |
#endif /* !XVID_AUTO_INCLUDE */ |
504 |
|
|
505 |
|
#if defined(XVID_AUTO_INCLUDE) && defined(REFERENCE_CODE) |
506 |
|
|
507 |
/***************************************************************************** |
/***************************************************************************** |
508 |
* "reference" filters impl. in plain C |
* "reference" filters impl. in plain C |
509 |
****************************************************************************/ |
****************************************************************************/ |
510 |
|
|
|
#ifdef XVID_AUTO_INCLUDE |
|
|
|
|
511 |
static |
static |
512 |
void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
513 |
{ |
{ |
659 |
#undef FUNC_HA_UP |
#undef FUNC_HA_UP |
660 |
#undef FUNC_VA_UP |
#undef FUNC_VA_UP |
661 |
|
|
662 |
#endif /* XVID_AUTO_INCLUDE */ |
#elif defined(XVID_AUTO_INCLUDE) && !defined(REFERENCE_CODE) |
663 |
|
|
664 |
|
/***************************************************************************** |
665 |
|
* "fast" filters impl. in plain C |
666 |
|
****************************************************************************/ |
667 |
|
|
668 |
|
#define CLIP_STORE(D,C) \ |
669 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
670 |
|
STORE(D, C) |
671 |
|
|
672 |
|
static void |
673 |
|
FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
674 |
|
{ |
675 |
|
#if (SIZE==16) |
676 |
|
while(H-->0) { |
677 |
|
int C; |
678 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
679 |
|
CLIP_STORE(Dst[ 0],C); |
680 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
681 |
|
CLIP_STORE(Dst[ 1],C); |
682 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
683 |
|
CLIP_STORE(Dst[ 2],C); |
684 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
685 |
|
CLIP_STORE(Dst[ 3],C); |
686 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
687 |
|
CLIP_STORE(Dst[ 4],C); |
688 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
689 |
|
CLIP_STORE(Dst[ 5],C); |
690 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
691 |
|
CLIP_STORE(Dst[ 6],C); |
692 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
693 |
|
CLIP_STORE(Dst[ 7],C); |
694 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
695 |
|
CLIP_STORE(Dst[ 8],C); |
696 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
697 |
|
CLIP_STORE(Dst[ 9],C); |
698 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
699 |
|
CLIP_STORE(Dst[10],C); |
700 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
701 |
|
CLIP_STORE(Dst[11],C); |
702 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
703 |
|
CLIP_STORE(Dst[12],C); |
704 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
705 |
|
CLIP_STORE(Dst[13],C); |
706 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
707 |
|
CLIP_STORE(Dst[14],C); |
708 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
709 |
|
CLIP_STORE(Dst[15],C); |
710 |
|
Src += BpS; |
711 |
|
Dst += BpS; |
712 |
|
} |
713 |
|
#else |
714 |
|
while(H-->0) { |
715 |
|
int C; |
716 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
717 |
|
CLIP_STORE(Dst[0],C); |
718 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
719 |
|
CLIP_STORE(Dst[1],C); |
720 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
721 |
|
CLIP_STORE(Dst[2],C); |
722 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
723 |
|
CLIP_STORE(Dst[3],C); |
724 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
725 |
|
CLIP_STORE(Dst[4],C); |
726 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
727 |
|
CLIP_STORE(Dst[5],C); |
728 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
729 |
|
CLIP_STORE(Dst[6],C); |
730 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
731 |
|
CLIP_STORE(Dst[7],C); |
732 |
|
Src += BpS; |
733 |
|
Dst += BpS; |
734 |
|
} |
735 |
|
#endif |
736 |
|
} |
737 |
|
#undef CLIP_STORE |
738 |
|
|
739 |
|
#define CLIP_STORE(i,C) \ |
740 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
741 |
|
C = (C+Src[i]+1-RND) >> 1; \ |
742 |
|
STORE(Dst[i], C) |
743 |
|
|
744 |
|
static void |
745 |
|
FUNC_HA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
746 |
|
{ |
747 |
|
#if (SIZE==16) |
748 |
|
while(H-->0) { |
749 |
|
int C; |
750 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
751 |
|
CLIP_STORE(0,C); |
752 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
753 |
|
CLIP_STORE( 1,C); |
754 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
755 |
|
CLIP_STORE( 2,C); |
756 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
757 |
|
CLIP_STORE( 3,C); |
758 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
759 |
|
CLIP_STORE( 4,C); |
760 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
761 |
|
CLIP_STORE( 5,C); |
762 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
763 |
|
CLIP_STORE( 6,C); |
764 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
765 |
|
CLIP_STORE( 7,C); |
766 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
767 |
|
CLIP_STORE( 8,C); |
768 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
769 |
|
CLIP_STORE( 9,C); |
770 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
771 |
|
CLIP_STORE(10,C); |
772 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
773 |
|
CLIP_STORE(11,C); |
774 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
775 |
|
CLIP_STORE(12,C); |
776 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
777 |
|
CLIP_STORE(13,C); |
778 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
779 |
|
CLIP_STORE(14,C); |
780 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
781 |
|
CLIP_STORE(15,C); |
782 |
|
Src += BpS; |
783 |
|
Dst += BpS; |
784 |
|
} |
785 |
|
#else |
786 |
|
while(H-->0) { |
787 |
|
int C; |
788 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
789 |
|
CLIP_STORE(0,C); |
790 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
791 |
|
CLIP_STORE(1,C); |
792 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
793 |
|
CLIP_STORE(2,C); |
794 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
795 |
|
CLIP_STORE(3,C); |
796 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
797 |
|
CLIP_STORE(4,C); |
798 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
799 |
|
CLIP_STORE(5,C); |
800 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
801 |
|
CLIP_STORE(6,C); |
802 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
803 |
|
CLIP_STORE(7,C); |
804 |
|
Src += BpS; |
805 |
|
Dst += BpS; |
806 |
|
} |
807 |
|
#endif |
808 |
|
} |
809 |
|
#undef CLIP_STORE |
810 |
|
|
811 |
|
#define CLIP_STORE(i,C) \ |
812 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
813 |
|
C = (C+Src[i+1]+1-RND) >> 1; \ |
814 |
|
STORE(Dst[i], C) |
815 |
|
|
816 |
|
static void |
817 |
|
FUNC_HA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
818 |
|
{ |
819 |
|
#if (SIZE==16) |
820 |
|
while(H-->0) { |
821 |
|
int C; |
822 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
823 |
|
CLIP_STORE(0,C); |
824 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
825 |
|
CLIP_STORE( 1,C); |
826 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
827 |
|
CLIP_STORE( 2,C); |
828 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
829 |
|
CLIP_STORE( 3,C); |
830 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
831 |
|
CLIP_STORE( 4,C); |
832 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
833 |
|
CLIP_STORE( 5,C); |
834 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
835 |
|
CLIP_STORE( 6,C); |
836 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
837 |
|
CLIP_STORE( 7,C); |
838 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
839 |
|
CLIP_STORE( 8,C); |
840 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
841 |
|
CLIP_STORE( 9,C); |
842 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
843 |
|
CLIP_STORE(10,C); |
844 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
845 |
|
CLIP_STORE(11,C); |
846 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
847 |
|
CLIP_STORE(12,C); |
848 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
849 |
|
CLIP_STORE(13,C); |
850 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
851 |
|
CLIP_STORE(14,C); |
852 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
853 |
|
CLIP_STORE(15,C); |
854 |
|
Src += BpS; |
855 |
|
Dst += BpS; |
856 |
|
} |
857 |
|
#else |
858 |
|
while(H-->0) { |
859 |
|
int C; |
860 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
861 |
|
CLIP_STORE(0,C); |
862 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
863 |
|
CLIP_STORE(1,C); |
864 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
865 |
|
CLIP_STORE(2,C); |
866 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
867 |
|
CLIP_STORE(3,C); |
868 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
869 |
|
CLIP_STORE(4,C); |
870 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
871 |
|
CLIP_STORE(5,C); |
872 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
873 |
|
CLIP_STORE(6,C); |
874 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
875 |
|
CLIP_STORE(7,C); |
876 |
|
Src += BpS; |
877 |
|
Dst += BpS; |
878 |
|
} |
879 |
|
#endif |
880 |
|
} |
881 |
|
#undef CLIP_STORE |
882 |
|
|
883 |
|
////////////////////////////////////////////////////////// |
884 |
|
// vertical passes |
885 |
|
////////////////////////////////////////////////////////// |
886 |
|
// Note: for vertical passes, width (W) needs only be 8 or 16. |
887 |
|
|
888 |
|
#define CLIP_STORE(D,C) \ |
889 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
890 |
|
STORE(D, C) |
891 |
|
|
892 |
|
static void |
893 |
|
FUNC_V(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
894 |
|
{ |
895 |
|
#if (SIZE==16) |
896 |
|
while(H-->0) { |
897 |
|
int C; |
898 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
899 |
|
CLIP_STORE(Dst[BpS* 0],C); |
900 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
901 |
|
CLIP_STORE(Dst[BpS* 1],C); |
902 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
903 |
|
CLIP_STORE(Dst[BpS* 2],C); |
904 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
905 |
|
CLIP_STORE(Dst[BpS* 3],C); |
906 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
907 |
|
CLIP_STORE(Dst[BpS* 4],C); |
908 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
909 |
|
CLIP_STORE(Dst[BpS* 5],C); |
910 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
911 |
|
CLIP_STORE(Dst[BpS* 6],C); |
912 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
913 |
|
CLIP_STORE(Dst[BpS* 7],C); |
914 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
915 |
|
CLIP_STORE(Dst[BpS* 8],C); |
916 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
917 |
|
CLIP_STORE(Dst[BpS* 9],C); |
918 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
919 |
|
CLIP_STORE(Dst[BpS*10],C); |
920 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
921 |
|
CLIP_STORE(Dst[BpS*11],C); |
922 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
923 |
|
CLIP_STORE(Dst[BpS*12],C); |
924 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
925 |
|
CLIP_STORE(Dst[BpS*13],C); |
926 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
927 |
|
CLIP_STORE(Dst[BpS*14],C); |
928 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
929 |
|
CLIP_STORE(Dst[BpS*15],C); |
930 |
|
Src += 1; |
931 |
|
Dst += 1; |
932 |
|
} |
933 |
|
#else |
934 |
|
while(H-->0) { |
935 |
|
int C; |
936 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
937 |
|
CLIP_STORE(Dst[BpS*0],C); |
938 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
939 |
|
CLIP_STORE(Dst[BpS*1],C); |
940 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
941 |
|
CLIP_STORE(Dst[BpS*2],C); |
942 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
943 |
|
CLIP_STORE(Dst[BpS*3],C); |
944 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
945 |
|
CLIP_STORE(Dst[BpS*4],C); |
946 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
947 |
|
CLIP_STORE(Dst[BpS*5],C); |
948 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
949 |
|
CLIP_STORE(Dst[BpS*6],C); |
950 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
951 |
|
CLIP_STORE(Dst[BpS*7],C); |
952 |
|
Src += 1; |
953 |
|
Dst += 1; |
954 |
|
} |
955 |
|
#endif |
956 |
|
} |
957 |
|
#undef CLIP_STORE |
958 |
|
|
959 |
|
#define CLIP_STORE(i,C) \ |
960 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
961 |
|
C = (C+Src[BpS*i]+1-RND) >> 1; \ |
962 |
|
STORE(Dst[BpS*i], C) |
963 |
|
|
964 |
|
static void |
965 |
|
FUNC_VA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
966 |
|
{ |
967 |
|
#if (SIZE==16) |
968 |
|
while(H-->0) { |
969 |
|
int C; |
970 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
971 |
|
CLIP_STORE(0,C); |
972 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
973 |
|
CLIP_STORE( 1,C); |
974 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
975 |
|
CLIP_STORE( 2,C); |
976 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
977 |
|
CLIP_STORE( 3,C); |
978 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
979 |
|
CLIP_STORE( 4,C); |
980 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
981 |
|
CLIP_STORE( 5,C); |
982 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
983 |
|
CLIP_STORE( 6,C); |
984 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
985 |
|
CLIP_STORE( 7,C); |
986 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
987 |
|
CLIP_STORE( 8,C); |
988 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
989 |
|
CLIP_STORE( 9,C); |
990 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
991 |
|
CLIP_STORE(10,C); |
992 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
993 |
|
CLIP_STORE(11,C); |
994 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
995 |
|
CLIP_STORE(12,C); |
996 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
997 |
|
CLIP_STORE(13,C); |
998 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
999 |
|
CLIP_STORE(14,C); |
1000 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
1001 |
|
CLIP_STORE(15,C); |
1002 |
|
Src += 1; |
1003 |
|
Dst += 1; |
1004 |
|
} |
1005 |
|
#else |
1006 |
|
while(H-->0) { |
1007 |
|
int C; |
1008 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
1009 |
|
CLIP_STORE(0,C); |
1010 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
1011 |
|
CLIP_STORE(1,C); |
1012 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
1013 |
|
CLIP_STORE(2,C); |
1014 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
1015 |
|
CLIP_STORE(3,C); |
1016 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
1017 |
|
CLIP_STORE(4,C); |
1018 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
1019 |
|
CLIP_STORE(5,C); |
1020 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
1021 |
|
CLIP_STORE(6,C); |
1022 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
1023 |
|
CLIP_STORE(7,C); |
1024 |
|
Src += 1; |
1025 |
|
Dst += 1; |
1026 |
|
} |
1027 |
|
#endif |
1028 |
|
} |
1029 |
|
#undef CLIP_STORE |
1030 |
|
|
1031 |
|
#define CLIP_STORE(i,C) \ |
1032 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
1033 |
|
C = (C+Src[BpS*i+BpS]+1-RND) >> 1; \ |
1034 |
|
STORE(Dst[BpS*i], C) |
1035 |
|
|
1036 |
|
static void |
1037 |
|
FUNC_VA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
1038 |
|
{ |
1039 |
|
#if (SIZE==16) |
1040 |
|
while(H-->0) { |
1041 |
|
int C; |
1042 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
1043 |
|
CLIP_STORE(0,C); |
1044 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
1045 |
|
CLIP_STORE( 1,C); |
1046 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
1047 |
|
CLIP_STORE( 2,C); |
1048 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
1049 |
|
CLIP_STORE( 3,C); |
1050 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
1051 |
|
CLIP_STORE( 4,C); |
1052 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
1053 |
|
CLIP_STORE( 5,C); |
1054 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
1055 |
|
CLIP_STORE( 6,C); |
1056 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
1057 |
|
CLIP_STORE( 7,C); |
1058 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
1059 |
|
CLIP_STORE( 8,C); |
1060 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
1061 |
|
CLIP_STORE( 9,C); |
1062 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
1063 |
|
CLIP_STORE(10,C); |
1064 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
1065 |
|
CLIP_STORE(11,C); |
1066 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
1067 |
|
CLIP_STORE(12,C); |
1068 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
1069 |
|
CLIP_STORE(13,C); |
1070 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
1071 |
|
CLIP_STORE(14,C); |
1072 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
1073 |
|
CLIP_STORE(15,C); |
1074 |
|
Src += 1; |
1075 |
|
Dst += 1; |
1076 |
|
} |
1077 |
|
#else |
1078 |
|
while(H-->0) { |
1079 |
|
int C; |
1080 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
1081 |
|
CLIP_STORE(0,C); |
1082 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
1083 |
|
CLIP_STORE(1,C); |
1084 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
1085 |
|
CLIP_STORE(2,C); |
1086 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
1087 |
|
CLIP_STORE(3,C); |
1088 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
1089 |
|
CLIP_STORE(4,C); |
1090 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
1091 |
|
CLIP_STORE(5,C); |
1092 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
1093 |
|
CLIP_STORE(6,C); |
1094 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
1095 |
|
CLIP_STORE(7,C); |
1096 |
|
Src += 1; |
1097 |
|
Dst += 1; |
1098 |
|
} |
1099 |
|
#endif |
1100 |
|
} |
1101 |
|
#undef CLIP_STORE |
1102 |
|
|
1103 |
|
#undef STORE |
1104 |
|
#undef FUNC_H |
1105 |
|
#undef FUNC_V |
1106 |
|
#undef FUNC_HA |
1107 |
|
#undef FUNC_VA |
1108 |
|
#undef FUNC_HA_UP |
1109 |
|
#undef FUNC_VA_UP |
1110 |
|
|
1111 |
|
|
1112 |
|
#endif /* XVID_AUTO_INCLUDE && !defined(REF) */ |