Annotation of /xvidcore/src/image/qpel.c

Revision 1.8 - (view) (download)

1 :	edgomez	1.2	/*****************************************************************************
2 :			*
3 :			* XVID MPEG-4 VIDEO CODEC
4 :			* - QPel interpolation -
5 :			*
6 :			* Copyright(C) 2003 Pascal Massimino <skal@planet-d.net>
7 :			*
8 :			* This program is free software ; you can redistribute it and/or modify
9 :			* it under the terms of the GNU General Public License as published by
10 :			* the Free Software Foundation ; either version 2 of the License, or
11 :			* (at your option) any later version.
12 :			*
13 :			* This program is distributed in the hope that it will be useful,
14 :			* but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 :			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 :			* GNU General Public License for more details.
17 :			*
18 :			* You should have received a copy of the GNU General Public License
19 :			* along with this program ; if not, write to the Free Software
20 :			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 :			*
22 :	suxen_drol	1.8	* $Id: qpel.c,v 1.7 2005/08/01 10:53:46 Isibaar Exp $
23 :	edgomez	1.2	*
24 :			****************************************************************************/
25 :
26 :			#ifndef XVID_AUTO_INCLUDE
27 :
28 :	suxen_drol	1.8	#include <stdio.h>
29 :
30 :	edgomez	1.2	#include "../portab.h"
31 :			#include "qpel.h"
32 :
33 :			/* Quarterpel FIR definition
34 :			****************************************************************************/
35 :
36 :			static const int32_t FIR_Tab_8[9][8] = {
37 :			{ 14, -3, 2, -1, 0, 0, 0, 0 },
38 :			{ 23, 19, -6, 3, -1, 0, 0, 0 },
39 :			{ -7, 20, 20, -6, 3, -1, 0, 0 },
40 :			{ 3, -6, 20, 20, -6, 3, -1, 0 },
41 :			{ -1, 3, -6, 20, 20, -6, 3, -1 },
42 :			{ 0, -1, 3, -6, 20, 20, -6, 3 },
43 :			{ 0, 0, -1, 3, -6, 20, 20, -7 },
44 :			{ 0, 0, 0, -1, 3, -6, 19, 23 },
45 :			{ 0, 0, 0, 0, -1, 2, -3, 14 }
46 :			};
47 :
48 :			static const int32_t FIR_Tab_16[17][16] = {
49 :			{ 14, -3, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
50 :			{ 23, 19, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
51 :			{ -7, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
52 :			{ 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
53 :			{ -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0 },
54 :			{ 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0 },
55 :			{ 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0 },
56 :			{ 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0 },
57 :			{ 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0 },
58 :			{ 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0 },
59 :			{ 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0 },
60 :			{ 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0 },
61 :			{ 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1 },
62 :			{ 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3 },
63 :			{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -7 },
64 :			{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 19, 23 },
65 :			{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -3, 14 }
66 :			};
67 :
68 :			/* Implementation
69 :			****************************************************************************/
70 :
71 :			#define XVID_AUTO_INCLUDE
72 :	edgomez	1.3	/* First auto include this file to generate reference code for SIMD versions
73 :			* This set of functions are good for educational purpose, because they're
74 :			* straightforward to understand, use loops and so on... But obviously they
75 :			* sux when it comes to speed */
76 :			#define REFERENCE_CODE
77 :	edgomez	1.2
78 :			/* 16x? filters */
79 :
80 :			#define SIZE 16
81 :			#define TABLE FIR_Tab_16
82 :
83 :			#define STORE(d,s) (d) = (s)
84 :	edgomez	1.3	#define FUNC_H H_Pass_16_C_ref
85 :			#define FUNC_V V_Pass_16_C_ref
86 :			#define FUNC_HA H_Pass_Avrg_16_C_ref
87 :			#define FUNC_VA V_Pass_Avrg_16_C_ref
88 :			#define FUNC_HA_UP H_Pass_Avrg_Up_16_C_ref
89 :			#define FUNC_VA_UP V_Pass_Avrg_Up_16_C_ref
90 :
91 :			#include "qpel.c"
92 :
93 :			/* note: B-frame always uses Rnd=0... */
94 :			#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
95 :			#define FUNC_H H_Pass_16_Add_C_ref
96 :			#define FUNC_V V_Pass_16_Add_C_ref
97 :			#define FUNC_HA H_Pass_Avrg_16_Add_C_ref
98 :			#define FUNC_VA V_Pass_Avrg_16_Add_C_ref
99 :			#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C_ref
100 :			#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C_ref
101 :
102 :			#include "qpel.c"
103 :
104 :			#undef SIZE
105 :			#undef TABLE
106 :
107 :			/* 8x? filters */
108 :
109 :			#define SIZE 8
110 :			#define TABLE FIR_Tab_8
111 :
112 :			#define STORE(d,s) (d) = (s)
113 :			#define FUNC_H H_Pass_8_C_ref
114 :			#define FUNC_V V_Pass_8_C_ref
115 :			#define FUNC_HA H_Pass_Avrg_8_C_ref
116 :			#define FUNC_VA V_Pass_Avrg_8_C_ref
117 :			#define FUNC_HA_UP H_Pass_Avrg_Up_8_C_ref
118 :			#define FUNC_VA_UP V_Pass_Avrg_Up_8_C_ref
119 :
120 :			#include "qpel.c"
121 :
122 :			/* note: B-frame always uses Rnd=0... */
123 :			#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
124 :			#define FUNC_H H_Pass_8_Add_C_ref
125 :			#define FUNC_V V_Pass_8_Add_C_ref
126 :			#define FUNC_HA H_Pass_Avrg_8_Add_C_ref
127 :			#define FUNC_VA V_Pass_Avrg_8_Add_C_ref
128 :			#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C_ref
129 :			#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C_ref
130 :
131 :			#include "qpel.c"
132 :
133 :			#undef SIZE
134 :			#undef TABLE
135 :
136 :			/* Then we define more optimized C version where loops are unrolled, where
137 :			* FIR coeffcients are not read from memory but are hardcoded in instructions
138 :			* They should be faster */
139 :			#undef REFERENCE_CODE
140 :
141 :			/* 16x? filters */
142 :
143 :			#define SIZE 16
144 :
145 :			#define STORE(d,s) (d) = (s)
146 :	edgomez	1.2	#define FUNC_H H_Pass_16_C
147 :			#define FUNC_V V_Pass_16_C
148 :			#define FUNC_HA H_Pass_Avrg_16_C
149 :			#define FUNC_VA V_Pass_Avrg_16_C
150 :			#define FUNC_HA_UP H_Pass_Avrg_Up_16_C
151 :			#define FUNC_VA_UP V_Pass_Avrg_Up_16_C
152 :
153 :	edgomez	1.3	#include "qpel.c"
154 :	edgomez	1.2
155 :			/* note: B-frame always uses Rnd=0... */
156 :			#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
157 :			#define FUNC_H H_Pass_16_Add_C
158 :			#define FUNC_V V_Pass_16_Add_C
159 :			#define FUNC_HA H_Pass_Avrg_16_Add_C
160 :			#define FUNC_VA V_Pass_Avrg_16_Add_C
161 :			#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C
162 :			#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C
163 :
164 :	edgomez	1.3	#include "qpel.c"
165 :	edgomez	1.2
166 :			#undef SIZE
167 :			#undef TABLE
168 :
169 :			/* 8x? filters */
170 :
171 :			#define SIZE 8
172 :			#define TABLE FIR_Tab_8
173 :
174 :			#define STORE(d,s) (d) = (s)
175 :			#define FUNC_H H_Pass_8_C
176 :			#define FUNC_V V_Pass_8_C
177 :			#define FUNC_HA H_Pass_Avrg_8_C
178 :			#define FUNC_VA V_Pass_Avrg_8_C
179 :			#define FUNC_HA_UP H_Pass_Avrg_Up_8_C
180 :			#define FUNC_VA_UP V_Pass_Avrg_Up_8_C
181 :
182 :	edgomez	1.3	#include "qpel.c"
183 :	edgomez	1.2
184 :			/* note: B-frame always uses Rnd=0... */
185 :			#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
186 :			#define FUNC_H H_Pass_8_Add_C
187 :			#define FUNC_V V_Pass_8_Add_C
188 :			#define FUNC_HA H_Pass_Avrg_8_Add_C
189 :			#define FUNC_VA V_Pass_Avrg_8_Add_C
190 :			#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C
191 :			#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C
192 :
193 :	edgomez	1.3	#include "qpel.c"
194 :	edgomez	1.2
195 :			#undef SIZE
196 :			#undef TABLE
197 :			#undef XVID_AUTO_INCLUDE
198 :
199 :	edgomez	1.3	/* Global scope hooks
200 :	edgomez	1.2	****************************************************************************/
201 :
202 :	suxen_drol	1.8	XVID_QP_FUNCS *xvid_QP_Funcs = NULL;
203 :			XVID_QP_FUNCS *xvid_QP_Add_Funcs = NULL;
204 :	edgomez	1.2
205 :	edgomez	1.3	/* Reference plain C impl. declaration
206 :			****************************************************************************/
207 :
208 :			XVID_QP_FUNCS xvid_QP_Funcs_C_ref = {
209 :			H_Pass_16_C_ref, H_Pass_Avrg_16_C_ref, H_Pass_Avrg_Up_16_C_ref,
210 :			V_Pass_16_C_ref, V_Pass_Avrg_16_C_ref, V_Pass_Avrg_Up_16_C_ref,
211 :
212 :			H_Pass_8_C_ref, H_Pass_Avrg_8_C_ref, H_Pass_Avrg_Up_8_C_ref,
213 :			V_Pass_8_C_ref, V_Pass_Avrg_8_C_ref, V_Pass_Avrg_Up_8_C_ref
214 :			};
215 :
216 :			XVID_QP_FUNCS xvid_QP_Add_Funcs_C_ref = {
217 :			H_Pass_16_Add_C_ref, H_Pass_Avrg_16_Add_C_ref, H_Pass_Avrg_Up_16_Add_C_ref,
218 :			V_Pass_16_Add_C_ref, V_Pass_Avrg_16_Add_C_ref, V_Pass_Avrg_Up_16_Add_C_ref,
219 :
220 :			H_Pass_8_Add_C_ref, H_Pass_Avrg_8_Add_C_ref, H_Pass_Avrg_Up_8_Add_C_ref,
221 :			V_Pass_8_Add_C_ref, V_Pass_Avrg_8_Add_C_ref, V_Pass_Avrg_Up_8_Add_C_ref
222 :			};
223 :
224 :			/* Plain C impl. declaration (faster than ref one)
225 :	edgomez	1.2	****************************************************************************/
226 :
227 :			XVID_QP_FUNCS xvid_QP_Funcs_C = {
228 :			H_Pass_16_C, H_Pass_Avrg_16_C, H_Pass_Avrg_Up_16_C,
229 :			V_Pass_16_C, V_Pass_Avrg_16_C, V_Pass_Avrg_Up_16_C,
230 :
231 :			H_Pass_8_C, H_Pass_Avrg_8_C, H_Pass_Avrg_Up_8_C,
232 :			V_Pass_8_C, V_Pass_Avrg_8_C, V_Pass_Avrg_Up_8_C
233 :			};
234 :
235 :			XVID_QP_FUNCS xvid_QP_Add_Funcs_C = {
236 :			H_Pass_16_Add_C, H_Pass_Avrg_16_Add_C, H_Pass_Avrg_Up_16_Add_C,
237 :			V_Pass_16_Add_C, V_Pass_Avrg_16_Add_C, V_Pass_Avrg_Up_16_Add_C,
238 :
239 :			H_Pass_8_Add_C, H_Pass_Avrg_8_Add_C, H_Pass_Avrg_Up_8_Add_C,
240 :			V_Pass_8_Add_C, V_Pass_Avrg_8_Add_C, V_Pass_Avrg_Up_8_Add_C
241 :			};
242 :
243 :			/* mmx impl. declaration (see. qpel_mmx.asm
244 :			****************************************************************************/
245 :
246 :			#ifdef ARCH_IS_IA32
247 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_mmx);
248 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_mmx);
249 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_mmx);
250 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_16_mmx);
251 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_16_mmx);
252 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_16_mmx);
253 :
254 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_mmx);
255 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_mmx);
256 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_mmx);
257 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_mmx);
258 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx);
259 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx);
260 :
261 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx);
262 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx);
263 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx);
264 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Add_16_mmx);
265 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Add_16_mmx);
266 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_Add_16_mmx);
267 :
268 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_Add_mmx);
269 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_Add_mmx);
270 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_Add_mmx);
271 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_Add_mmx);
272 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx);
273 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx);
274 :
275 :	edgomez	1.3	XVID_QP_FUNCS xvid_QP_Funcs_mmx = {
276 :			xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx,
277 :			xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx,
278 :
279 :			xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx,
280 :			xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx
281 :			};
282 :
283 :	edgomez	1.2	XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = {
284 :			xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx,
285 :			xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx,
286 :
287 :			xvid_H_Pass_8_Add_mmx, xvid_H_Pass_Avrg_8_Add_mmx, xvid_H_Pass_Avrg_Up_8_Add_mmx,
288 :			xvid_V_Pass_8_Add_mmx, xvid_V_Pass_Avrg_8_Add_mmx, xvid_V_Pass_Avrg_Up_8_Add_mmx,
289 :			};
290 :			#endif /* ARCH_IS_IA32 */
291 :
292 :	edgomez	1.4
293 :			/* altivec impl. declaration (see qpel_altivec.c)
294 :			****************************************************************************/
295 :
296 :			#ifdef ARCH_IS_PPC
297 :
298 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_16_Altivec_C);
299 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_16_Altivec_C);
300 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_16_Altivec_C);
301 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_16_Altivec_C);
302 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_16_Altivec_C);
303 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_16_Altivec_C);
304 :
305 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_8_Altivec_C);
306 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_8_Altivec_C);
307 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_8_Altivec_C);
308 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_8_Altivec_C);
309 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_8_Altivec_C);
310 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_8_Altivec_C);
311 :
312 :
313 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_16_Add_Altivec_C);
314 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_16_Add_Altivec_C);
315 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_16_Add_Altivec_C);
316 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_16_Add_Altivec_C);
317 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_16_Add_Altivec_C);
318 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_16_Add_Altivec_C);
319 :
320 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_8_Add_Altivec_C);
321 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_8_Add_Altivec_C);
322 :			extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_8_Add_Altivec_C);
323 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_8_Add_Altivec_C);
324 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_8_Add_Altivec_C);
325 :			extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_8_Add_Altivec_C);
326 :
327 :			XVID_QP_FUNCS xvid_QP_Funcs_Altivec_C = {
328 :			H_Pass_16_Altivec_C, H_Pass_Avrg_16_Altivec_C, H_Pass_Avrg_Up_16_Altivec_C,
329 :			V_Pass_16_Altivec_C, V_Pass_Avrg_16_Altivec_C, V_Pass_Avrg_Up_16_Altivec_C,
330 :
331 :			H_Pass_8_Altivec_C, H_Pass_Avrg_8_Altivec_C, H_Pass_Avrg_Up_8_Altivec_C,
332 :			V_Pass_8_Altivec_C, V_Pass_Avrg_8_Altivec_C, V_Pass_Avrg_Up_8_Altivec_C
333 :			};
334 :
335 :			XVID_QP_FUNCS xvid_QP_Add_Funcs_Altivec_C = {
336 :			H_Pass_16_Add_Altivec_C, H_Pass_Avrg_16_Add_Altivec_C, H_Pass_Avrg_Up_16_Add_Altivec_C,
337 :			V_Pass_16_Add_Altivec_C, V_Pass_Avrg_16_Add_Altivec_C, V_Pass_Avrg_Up_16_Add_Altivec_C,
338 :
339 :			H_Pass_8_Add_Altivec_C, H_Pass_Avrg_8_Add_Altivec_C, H_Pass_Avrg_Up_8_Add_Altivec_C,
340 :			V_Pass_8_Add_Altivec_C, V_Pass_Avrg_8_Add_Altivec_C, V_Pass_Avrg_Up_8_Add_Altivec_C
341 :			};
342 :
343 :			#endif /* ARCH_IS_PPC */
344 :
345 :	edgomez	1.5	/* mmx impl. (for 64bit bus) declaration (see. qpel_mmx.asm
346 :			****************************************************************************/
347 :
348 :			#ifdef ARCH_IS_X86_64
349 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_x86_64);
350 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_x86_64);
351 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_x86_64);
352 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_16_x86_64);
353 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_16_x86_64);
354 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_16_x86_64);
355 :
356 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_x86_64);
357 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_x86_64);
358 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_x86_64);
359 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_x86_64);
360 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_x86_64);
361 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_x86_64);
362 :
363 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_x86_64);
364 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_x86_64);
365 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_x86_64);
366 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Add_16_x86_64);
367 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Add_16_x86_64);
368 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_Add_16_x86_64);
369 :
370 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_Add_x86_64);
371 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_Add_x86_64);
372 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_Add_x86_64);
373 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_Add_x86_64);
374 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_x86_64);
375 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_x86_64);
376 :
377 :			XVID_QP_FUNCS xvid_QP_Funcs_x86_64 = {
378 :			xvid_H_Pass_16_x86_64, xvid_H_Pass_Avrg_16_x86_64, xvid_H_Pass_Avrg_Up_16_x86_64,
379 :			xvid_V_Pass_16_x86_64, xvid_V_Pass_Avrg_16_x86_64, xvid_V_Pass_Avrg_Up_16_x86_64,
380 :
381 :			xvid_H_Pass_8_x86_64, xvid_H_Pass_Avrg_8_x86_64, xvid_H_Pass_Avrg_Up_8_x86_64,
382 :			xvid_V_Pass_8_x86_64, xvid_V_Pass_Avrg_8_x86_64, xvid_V_Pass_Avrg_Up_8_x86_64
383 :			};
384 :
385 :			XVID_QP_FUNCS xvid_QP_Add_Funcs_x86_64 = {
386 :			xvid_H_Pass_Add_16_x86_64, xvid_H_Pass_Avrg_Add_16_x86_64, xvid_H_Pass_Avrg_Up_Add_16_x86_64,
387 :			xvid_V_Pass_Add_16_x86_64, xvid_V_Pass_Avrg_Add_16_x86_64, xvid_V_Pass_Avrg_Up_Add_16_x86_64,
388 :
389 :			xvid_H_Pass_8_Add_x86_64, xvid_H_Pass_Avrg_8_Add_x86_64, xvid_H_Pass_Avrg_Up_8_Add_x86_64,
390 :			xvid_V_Pass_8_Add_x86_64, xvid_V_Pass_Avrg_8_Add_x86_64, xvid_V_Pass_Avrg_Up_8_Add_x86_64,
391 :			};
392 :			#endif /* ARCH_IS_X86_64 */
393 :
394 :	edgomez	1.2	/* tables for ASM
395 :			****************************************************************************/
396 :
397 :	suxen_drol	1.8
398 :			#if defined(ARCH_IS_IA32) \|\| defined(ARCH_IS_X86_64)
399 :			/* These symbols will be used outsie this file, so tell the compiler
400 :			* they're global. Only ia32 will define them in this file, x86_64
401 :			* will do in the assembly files */
402 :			extern uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */
403 :
404 :			extern int16_t xvid_FIR_1_0_0_0[256][4];
405 :			extern int16_t xvid_FIR_3_1_0_0[256][4];
406 :			extern int16_t xvid_FIR_6_3_1_0[256][4];
407 :			extern int16_t xvid_FIR_14_3_2_1[256][4];
408 :			extern int16_t xvid_FIR_20_6_3_1[256][4];
409 :			extern int16_t xvid_FIR_20_20_6_3[256][4];
410 :			extern int16_t xvid_FIR_23_19_6_3[256][4];
411 :			extern int16_t xvid_FIR_7_20_20_6[256][4];
412 :			extern int16_t xvid_FIR_6_20_20_6[256][4];
413 :			extern int16_t xvid_FIR_6_20_20_7[256][4];
414 :			extern int16_t xvid_FIR_3_6_20_20[256][4];
415 :			extern int16_t xvid_FIR_3_6_19_23[256][4];
416 :			extern int16_t xvid_FIR_1_3_6_20[256][4];
417 :			extern int16_t xvid_FIR_1_2_3_14[256][4];
418 :			extern int16_t xvid_FIR_0_1_3_6[256][4];
419 :			extern int16_t xvid_FIR_0_0_1_3[256][4];
420 :			extern int16_t xvid_FIR_0_0_0_1[256][4];
421 :			#endif
422 :
423 :			/* Arrays definitions, according to the target platform */
424 :	edgomez	1.2	#ifdef ARCH_IS_IA32
425 :			uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */
426 :			#endif
427 :
428 :	suxen_drol	1.8	#if !defined(ARCH_IS_X86_64)
429 :			/* Only ia32 will use these tables outside this file so mark them
430 :			* static for all other archs */
431 :			#if defined(ARCH_IS_IA32)
432 :			#define __SCOPE
433 :	edgomez	1.5	#else
434 :	suxen_drol	1.8	#define __SCOPE static
435 :	edgomez	1.5	#endif
436 :	Isibaar	1.7	__SCOPE int16_t xvid_FIR_1_0_0_0[256][4];
437 :			__SCOPE int16_t xvid_FIR_3_1_0_0[256][4];
438 :			__SCOPE int16_t xvid_FIR_6_3_1_0[256][4];
439 :			__SCOPE int16_t xvid_FIR_14_3_2_1[256][4];
440 :			__SCOPE int16_t xvid_FIR_20_6_3_1[256][4];
441 :			__SCOPE int16_t xvid_FIR_20_20_6_3[256][4];
442 :			__SCOPE int16_t xvid_FIR_23_19_6_3[256][4];
443 :			__SCOPE int16_t xvid_FIR_7_20_20_6[256][4];
444 :			__SCOPE int16_t xvid_FIR_6_20_20_6[256][4];
445 :			__SCOPE int16_t xvid_FIR_6_20_20_7[256][4];
446 :			__SCOPE int16_t xvid_FIR_3_6_20_20[256][4];
447 :			__SCOPE int16_t xvid_FIR_3_6_19_23[256][4];
448 :			__SCOPE int16_t xvid_FIR_1_3_6_20[256][4];
449 :			__SCOPE int16_t xvid_FIR_1_2_3_14[256][4];
450 :			__SCOPE int16_t xvid_FIR_0_1_3_6[256][4];
451 :			__SCOPE int16_t xvid_FIR_0_0_1_3[256][4];
452 :			__SCOPE int16_t xvid_FIR_0_0_0_1[256][4];
453 :	suxen_drol	1.8	#endif
454 :	Isibaar	1.7
455 :	edgomez	1.2	static void Init_FIR_Table(int16_t Tab[][4],
456 :			int A, int B, int C, int D)
457 :			{
458 :			int i;
459 :			for(i=0; i<256; ++i) {
460 :			Tab[i][0] = i*A;
461 :			Tab[i][1] = i*B;
462 :			Tab[i][2] = i*C;
463 :			Tab[i][3] = i*D;
464 :			}
465 :			}
466 :
467 :
468 :	suxen_drol	1.8	void xvid_Init_QP(void)
469 :	edgomez	1.2	{
470 :			#ifdef ARCH_IS_IA32
471 :			int i;
472 :
473 :			for(i=0; i<256; ++i) {
474 :			xvid_Expand_mmx[i][0] = i;
475 :			xvid_Expand_mmx[i][1] = i;
476 :			xvid_Expand_mmx[i][2] = i;
477 :			xvid_Expand_mmx[i][3] = i;
478 :			}
479 :			#endif
480 :
481 :			/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) */
482 :
483 :			Init_FIR_Table(xvid_FIR_1_0_0_0, -1, 0, 0, 0);
484 :			Init_FIR_Table(xvid_FIR_3_1_0_0, 3, -1, 0, 0);
485 :			Init_FIR_Table(xvid_FIR_6_3_1_0, -6, 3, -1, 0);
486 :			Init_FIR_Table(xvid_FIR_14_3_2_1, 14, -3, 2, -1);
487 :			Init_FIR_Table(xvid_FIR_20_6_3_1, 20, -6, 3, -1);
488 :			Init_FIR_Table(xvid_FIR_20_20_6_3, 20, 20, -6, 3);
489 :			Init_FIR_Table(xvid_FIR_23_19_6_3, 23, 19, -6, 3);
490 :			Init_FIR_Table(xvid_FIR_7_20_20_6, -7, 20, 20, -6);
491 :			Init_FIR_Table(xvid_FIR_6_20_20_6, -6, 20, 20, -6);
492 :			Init_FIR_Table(xvid_FIR_6_20_20_7, -6, 20, 20, -7);
493 :			Init_FIR_Table(xvid_FIR_3_6_20_20, 3, -6, 20, 20);
494 :			Init_FIR_Table(xvid_FIR_3_6_19_23, 3, -6, 19, 23);
495 :			Init_FIR_Table(xvid_FIR_1_3_6_20, -1, 3, -6, 20);
496 :			Init_FIR_Table(xvid_FIR_1_2_3_14, -1, 2, -3, 14);
497 :			Init_FIR_Table(xvid_FIR_0_1_3_6, 0, -1, 3, -6);
498 :			Init_FIR_Table(xvid_FIR_0_0_1_3, 0, 0, -1, 3);
499 :			Init_FIR_Table(xvid_FIR_0_0_0_1, 0, 0, 0, -1);
500 :
501 :			}
502 :
503 :			#endif /* !XVID_AUTO_INCLUDE */
504 :
505 :	edgomez	1.3	#if defined(XVID_AUTO_INCLUDE) && defined(REFERENCE_CODE)
506 :
507 :	edgomez	1.2	/*****************************************************************************
508 :			* "reference" filters impl. in plain C
509 :			****************************************************************************/
510 :
511 :			static
512 :			void FUNC_H(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t Rnd)
513 :			{
514 :			while(H-->0) {
515 :			int32_t i, k;
516 :			int32_t Sums[SIZE] = { 0 };
517 :			for(i=0; i<=SIZE; ++i)
518 :			for(k=0; k<SIZE; ++k)
519 :			Sums[k] += TABLE[i][k] * Src[i];
520 :
521 :			for(i=0; i<SIZE; ++i) {
522 :			int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
523 :			if (C<0) C = 0; else if (C>255) C = 255;
524 :			STORE(Dst[i], C);
525 :			}
526 :			Src += BpS;
527 :			Dst += BpS;
528 :			}
529 :			}
530 :
531 :			static
532 :			void FUNC_V(uint8_t Dst, const uint8_t Src, int32_t W, int32_t BpS, int32_t Rnd)
533 :			{
534 :			while(W-->0) {
535 :			int32_t i, k;
536 :			int32_t Sums[SIZE] = { 0 };
537 :			const uint8_t *S = Src++;
538 :			uint8_t *D = Dst++;
539 :			for(i=0; i<=SIZE; ++i) {
540 :			for(k=0; k<SIZE; ++k)
541 :			Sums[k] += TABLE[i][k] * S[0];
542 :			S += BpS;
543 :			}
544 :
545 :			for(i=0; i<SIZE; ++i) {
546 :			int32_t C = ( Sums[i] + 16-Rnd )>>5;
547 :			if (C<0) C = 0; else if (C>255) C = 255;
548 :			STORE(D[0], C);
549 :			D += BpS;
550 :			}
551 :			}
552 :			}
553 :
554 :			static
555 :			void FUNC_HA(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t Rnd)
556 :			{
557 :			while(H-->0) {
558 :			int32_t i, k;
559 :			int32_t Sums[SIZE] = { 0 };
560 :			for(i=0; i<=SIZE; ++i)
561 :			for(k=0; k<SIZE; ++k)
562 :			Sums[k] += TABLE[i][k] * Src[i];
563 :
564 :			for(i=0; i<SIZE; ++i) {
565 :			int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
566 :			if (C<0) C = 0; else if (C>255) C = 255;
567 :			C = (C+Src[i]+1-Rnd) >> 1;
568 :			STORE(Dst[i], C);
569 :			}
570 :			Src += BpS;
571 :			Dst += BpS;
572 :			}
573 :			}
574 :
575 :			static
576 :			void FUNC_HA_UP(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t Rnd)
577 :			{
578 :			while(H-->0) {
579 :			int32_t i, k;
580 :			int32_t Sums[SIZE] = { 0 };
581 :			for(i=0; i<=SIZE; ++i)
582 :			for(k=0; k<SIZE; ++k)
583 :			Sums[k] += TABLE[i][k] * Src[i];
584 :
585 :			for(i=0; i<SIZE; ++i) {
586 :			int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
587 :			if (C<0) C = 0; else if (C>255) C = 255;
588 :			C = (C+Src[i+1]+1-Rnd) >> 1;
589 :			STORE(Dst[i], C);
590 :			}
591 :			Src += BpS;
592 :			Dst += BpS;
593 :			}
594 :			}
595 :
596 :			static
597 :			void FUNC_VA(uint8_t Dst, const uint8_t Src, int32_t W, int32_t BpS, int32_t Rnd)
598 :			{
599 :			while(W-->0) {
600 :			int32_t i, k;
601 :			int32_t Sums[SIZE] = { 0 };
602 :			const uint8_t *S = Src;
603 :			uint8_t *D = Dst;
604 :
605 :			for(i=0; i<=SIZE; ++i) {
606 :			for(k=0; k<SIZE; ++k)
607 :			Sums[k] += TABLE[i][k] * S[0];
608 :			S += BpS;
609 :			}
610 :
611 :			S = Src;
612 :			for(i=0; i<SIZE; ++i) {
613 :			int32_t C = ( Sums[i] + 16-Rnd )>>5;
614 :			if (C<0) C = 0; else if (C>255) C = 255;
615 :			C = ( C+S[0]+1-Rnd ) >> 1;
616 :			STORE(D[0], C);
617 :			D += BpS;
618 :			S += BpS;
619 :			}
620 :			Src++;
621 :			Dst++;
622 :			}
623 :			}
624 :
625 :			static
626 :			void FUNC_VA_UP(uint8_t Dst, const uint8_t Src, int32_t W, int32_t BpS, int32_t Rnd)
627 :			{
628 :			while(W-->0) {
629 :			int32_t i, k;
630 :			int32_t Sums[SIZE] = { 0 };
631 :			const uint8_t *S = Src;
632 :			uint8_t *D = Dst;
633 :
634 :			for(i=0; i<=SIZE; ++i) {
635 :			for(k=0; k<SIZE; ++k)
636 :			Sums[k] += TABLE[i][k] * S[0];
637 :			S += BpS;
638 :			}
639 :
640 :			S = Src + BpS;
641 :			for(i=0; i<SIZE; ++i) {
642 :			int32_t C = ( Sums[i] + 16-Rnd )>>5;
643 :			if (C<0) C = 0; else if (C>255) C = 255;
644 :			C = ( C+S[0]+1-Rnd ) >> 1;
645 :			STORE(D[0], C);
646 :			D += BpS;
647 :			S += BpS;
648 :			}
649 :			Dst++;
650 :			Src++;
651 :			}
652 :			}
653 :
654 :			#undef STORE
655 :			#undef FUNC_H
656 :			#undef FUNC_V
657 :			#undef FUNC_HA
658 :			#undef FUNC_VA
659 :			#undef FUNC_HA_UP
660 :			#undef FUNC_VA_UP
661 :
662 :	edgomez	1.3	#elif defined(XVID_AUTO_INCLUDE) && !defined(REFERENCE_CODE)
663 :
664 :			/*****************************************************************************
665 :			* "fast" filters impl. in plain C
666 :			****************************************************************************/
667 :
668 :			#define CLIP_STORE(D,C) \
669 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
670 :			STORE(D, C)
671 :
672 :			static void
673 :			FUNC_H(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
674 :			{
675 :			#if (SIZE==16)
676 :			while(H-->0) {
677 :			int C;
678 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
679 :			CLIP_STORE(Dst[ 0],C);
680 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
681 :			CLIP_STORE(Dst[ 1],C);
682 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
683 :			CLIP_STORE(Dst[ 2],C);
684 :			C = 16-RND - (Src[0]+Src[7 ]) + 3(Src[ 1]+Src[ 6])-6(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
685 :			CLIP_STORE(Dst[ 3],C);
686 :			C = 16-RND - (Src[1]+Src[8 ]) + 3(Src[ 2]+Src[ 7])-6(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
687 :			CLIP_STORE(Dst[ 4],C);
688 :			C = 16-RND - (Src[2]+Src[9 ]) + 3(Src[ 3]+Src[ 8])-6(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
689 :			CLIP_STORE(Dst[ 5],C);
690 :			C = 16-RND - (Src[3]+Src[10]) + 3(Src[ 4]+Src[ 9])-6(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
691 :			CLIP_STORE(Dst[ 6],C);
692 :			C = 16-RND - (Src[4]+Src[11]) + 3(Src[ 5]+Src[10])-6(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
693 :			CLIP_STORE(Dst[ 7],C);
694 :			C = 16-RND - (Src[5]+Src[12]) + 3(Src[ 6]+Src[11])-6(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
695 :			CLIP_STORE(Dst[ 8],C);
696 :			C = 16-RND - (Src[6]+Src[13]) + 3(Src[ 7]+Src[12])-6(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
697 :			CLIP_STORE(Dst[ 9],C);
698 :			C = 16-RND - (Src[7]+Src[14]) + 3(Src[ 8]+Src[13])-6(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
699 :			CLIP_STORE(Dst[10],C);
700 :			C = 16-RND - (Src[8]+Src[15]) + 3(Src[ 9]+Src[14])-6(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
701 :			CLIP_STORE(Dst[11],C);
702 :			C = 16-RND - (Src[9]+Src[16]) + 3(Src[10]+Src[15])-6(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
703 :			CLIP_STORE(Dst[12],C);
704 :			C = 16-RND - Src[10] +3Src[11] -6(Src[12]+Src[15]) + 20(Src[13]+Src[14]) +2Src[16];
705 :			CLIP_STORE(Dst[13],C);
706 :			C = 16-RND - Src[11] +3(Src[12]-Src[16]) -6Src[13] + 20Src[14] + 19Src[15];
707 :			CLIP_STORE(Dst[14],C);
708 :			C = 16-RND - Src[12] +3Src[13] -7Src[14] + 23Src[15] + 14Src[16];
709 :			CLIP_STORE(Dst[15],C);
710 :			Src += BpS;
711 :			Dst += BpS;
712 :			}
713 :			#else
714 :			while(H-->0) {
715 :			int C;
716 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
717 :			CLIP_STORE(Dst[0],C);
718 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
719 :			CLIP_STORE(Dst[1],C);
720 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
721 :			CLIP_STORE(Dst[2],C);
722 :			C = 16-RND - (Src[0]+Src[7]) + 3(Src[1]+Src[6])-6(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
723 :			CLIP_STORE(Dst[3],C);
724 :			C = 16-RND - (Src[1]+Src[8]) + 3(Src[2]+Src[7])-6(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
725 :			CLIP_STORE(Dst[4],C);
726 :			C = 16-RND - Src[2] +3Src[3] -6(Src[4]+Src[7]) + 20(Src[5]+Src[6]) +2Src[8];
727 :			CLIP_STORE(Dst[5],C);
728 :			C = 16-RND - Src[3] +3(Src[4]-Src[8]) -6Src[5] + 20Src[6] + 19Src[7];
729 :			CLIP_STORE(Dst[6],C);
730 :			C = 16-RND - Src[4] +3Src[5] -7Src[6] + 23Src[7] + 14Src[8];
731 :			CLIP_STORE(Dst[7],C);
732 :			Src += BpS;
733 :			Dst += BpS;
734 :			}
735 :			#endif
736 :			}
737 :			#undef CLIP_STORE
738 :
739 :			#define CLIP_STORE(i,C) \
740 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
741 :			C = (C+Src[i]+1-RND) >> 1; \
742 :			STORE(Dst[i], C)
743 :
744 :			static void
745 :			FUNC_HA(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
746 :			{
747 :			#if (SIZE==16)
748 :			while(H-->0) {
749 :			int C;
750 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
751 :			CLIP_STORE(0,C);
752 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
753 :			CLIP_STORE( 1,C);
754 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
755 :			CLIP_STORE( 2,C);
756 :			C = 16-RND - (Src[0]+Src[7 ]) + 3(Src[ 1]+Src[ 6])-6(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
757 :			CLIP_STORE( 3,C);
758 :			C = 16-RND - (Src[1]+Src[8 ]) + 3(Src[ 2]+Src[ 7])-6(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
759 :			CLIP_STORE( 4,C);
760 :			C = 16-RND - (Src[2]+Src[9 ]) + 3(Src[ 3]+Src[ 8])-6(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
761 :			CLIP_STORE( 5,C);
762 :			C = 16-RND - (Src[3]+Src[10]) + 3(Src[ 4]+Src[ 9])-6(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
763 :			CLIP_STORE( 6,C);
764 :			C = 16-RND - (Src[4]+Src[11]) + 3(Src[ 5]+Src[10])-6(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
765 :			CLIP_STORE( 7,C);
766 :			C = 16-RND - (Src[5]+Src[12]) + 3(Src[ 6]+Src[11])-6(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
767 :			CLIP_STORE( 8,C);
768 :			C = 16-RND - (Src[6]+Src[13]) + 3(Src[ 7]+Src[12])-6(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
769 :			CLIP_STORE( 9,C);
770 :			C = 16-RND - (Src[7]+Src[14]) + 3(Src[ 8]+Src[13])-6(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
771 :			CLIP_STORE(10,C);
772 :			C = 16-RND - (Src[8]+Src[15]) + 3(Src[ 9]+Src[14])-6(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
773 :			CLIP_STORE(11,C);
774 :			C = 16-RND - (Src[9]+Src[16]) + 3(Src[10]+Src[15])-6(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
775 :			CLIP_STORE(12,C);
776 :			C = 16-RND - Src[10] +3Src[11] -6(Src[12]+Src[15]) + 20(Src[13]+Src[14]) +2Src[16];
777 :			CLIP_STORE(13,C);
778 :			C = 16-RND - Src[11] +3(Src[12]-Src[16]) -6Src[13] + 20Src[14] + 19Src[15];
779 :			CLIP_STORE(14,C);
780 :			C = 16-RND - Src[12] +3Src[13] -7Src[14] + 23Src[15] + 14Src[16];
781 :			CLIP_STORE(15,C);
782 :			Src += BpS;
783 :			Dst += BpS;
784 :			}
785 :			#else
786 :			while(H-->0) {
787 :			int C;
788 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
789 :			CLIP_STORE(0,C);
790 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
791 :			CLIP_STORE(1,C);
792 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
793 :			CLIP_STORE(2,C);
794 :			C = 16-RND - (Src[0]+Src[7]) + 3(Src[1]+Src[6])-6(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
795 :			CLIP_STORE(3,C);
796 :			C = 16-RND - (Src[1]+Src[8]) + 3(Src[2]+Src[7])-6(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
797 :			CLIP_STORE(4,C);
798 :			C = 16-RND - Src[2] +3Src[3] -6(Src[4]+Src[7]) + 20(Src[5]+Src[6]) +2Src[8];
799 :			CLIP_STORE(5,C);
800 :			C = 16-RND - Src[3] +3(Src[4]-Src[8]) -6Src[5] + 20Src[6] + 19Src[7];
801 :			CLIP_STORE(6,C);
802 :			C = 16-RND - Src[4] +3Src[5] -7Src[6] + 23Src[7] + 14Src[8];
803 :			CLIP_STORE(7,C);
804 :			Src += BpS;
805 :			Dst += BpS;
806 :			}
807 :			#endif
808 :			}
809 :			#undef CLIP_STORE
810 :
811 :			#define CLIP_STORE(i,C) \
812 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
813 :			C = (C+Src[i+1]+1-RND) >> 1; \
814 :			STORE(Dst[i], C)
815 :
816 :			static void
817 :			FUNC_HA_UP(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
818 :			{
819 :			#if (SIZE==16)
820 :			while(H-->0) {
821 :			int C;
822 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
823 :			CLIP_STORE(0,C);
824 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
825 :			CLIP_STORE( 1,C);
826 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
827 :			CLIP_STORE( 2,C);
828 :			C = 16-RND - (Src[0]+Src[7 ]) + 3(Src[ 1]+Src[ 6])-6(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
829 :			CLIP_STORE( 3,C);
830 :			C = 16-RND - (Src[1]+Src[8 ]) + 3(Src[ 2]+Src[ 7])-6(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
831 :			CLIP_STORE( 4,C);
832 :			C = 16-RND - (Src[2]+Src[9 ]) + 3(Src[ 3]+Src[ 8])-6(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
833 :			CLIP_STORE( 5,C);
834 :			C = 16-RND - (Src[3]+Src[10]) + 3(Src[ 4]+Src[ 9])-6(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
835 :			CLIP_STORE( 6,C);
836 :			C = 16-RND - (Src[4]+Src[11]) + 3(Src[ 5]+Src[10])-6(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
837 :			CLIP_STORE( 7,C);
838 :			C = 16-RND - (Src[5]+Src[12]) + 3(Src[ 6]+Src[11])-6(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
839 :			CLIP_STORE( 8,C);
840 :			C = 16-RND - (Src[6]+Src[13]) + 3(Src[ 7]+Src[12])-6(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
841 :			CLIP_STORE( 9,C);
842 :			C = 16-RND - (Src[7]+Src[14]) + 3(Src[ 8]+Src[13])-6(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
843 :			CLIP_STORE(10,C);
844 :			C = 16-RND - (Src[8]+Src[15]) + 3(Src[ 9]+Src[14])-6(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
845 :			CLIP_STORE(11,C);
846 :			C = 16-RND - (Src[9]+Src[16]) + 3(Src[10]+Src[15])-6(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
847 :			CLIP_STORE(12,C);
848 :			C = 16-RND - Src[10] +3Src[11] -6(Src[12]+Src[15]) + 20(Src[13]+Src[14]) +2Src[16];
849 :			CLIP_STORE(13,C);
850 :			C = 16-RND - Src[11] +3(Src[12]-Src[16]) -6Src[13] + 20Src[14] + 19Src[15];
851 :			CLIP_STORE(14,C);
852 :			C = 16-RND - Src[12] +3Src[13] -7Src[14] + 23Src[15] + 14Src[16];
853 :			CLIP_STORE(15,C);
854 :			Src += BpS;
855 :			Dst += BpS;
856 :			}
857 :			#else
858 :			while(H-->0) {
859 :			int C;
860 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
861 :			CLIP_STORE(0,C);
862 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
863 :			CLIP_STORE(1,C);
864 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
865 :			CLIP_STORE(2,C);
866 :			C = 16-RND - (Src[0]+Src[7]) + 3(Src[1]+Src[6])-6(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
867 :			CLIP_STORE(3,C);
868 :			C = 16-RND - (Src[1]+Src[8]) + 3(Src[2]+Src[7])-6(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
869 :			CLIP_STORE(4,C);
870 :			C = 16-RND - Src[2] +3Src[3] -6(Src[4]+Src[7]) + 20(Src[5]+Src[6]) +2Src[8];
871 :			CLIP_STORE(5,C);
872 :			C = 16-RND - Src[3] +3(Src[4]-Src[8]) -6Src[5] + 20Src[6] + 19Src[7];
873 :			CLIP_STORE(6,C);
874 :			C = 16-RND - Src[4] +3Src[5] -7Src[6] + 23Src[7] + 14Src[8];
875 :			CLIP_STORE(7,C);
876 :			Src += BpS;
877 :			Dst += BpS;
878 :			}
879 :			#endif
880 :			}
881 :			#undef CLIP_STORE
882 :
883 :			//////////////////////////////////////////////////////////
884 :			// vertical passes
885 :			//////////////////////////////////////////////////////////
886 :			// Note: for vertical passes, width (W) needs only be 8 or 16.
887 :
888 :			#define CLIP_STORE(D,C) \
889 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
890 :			STORE(D, C)
891 :
892 :			static void
893 :			FUNC_V(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
894 :			{
895 :			#if (SIZE==16)
896 :			while(H-->0) {
897 :			int C;
898 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
899 :			CLIP_STORE(Dst[BpS* 0],C);
900 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
901 :			CLIP_STORE(Dst[BpS* 1],C);
902 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
903 :			CLIP_STORE(Dst[BpS* 2],C);
904 :			C = 16-RND - (Src[BpS0]+Src[BpS7 ]) + 3(Src[BpS 1]+Src[BpS* 6])-6(Src[BpS 2]+Src[BpS* 5]) + 20(Src[BpS 3]+Src[BpS* 4]);
905 :			CLIP_STORE(Dst[BpS* 3],C);
906 :			C = 16-RND - (Src[BpS1]+Src[BpS8 ]) + 3(Src[BpS 2]+Src[BpS* 7])-6(Src[BpS 3]+Src[BpS* 6]) + 20(Src[BpS 4]+Src[BpS* 5]);
907 :			CLIP_STORE(Dst[BpS* 4],C);
908 :			C = 16-RND - (Src[BpS2]+Src[BpS9 ]) + 3(Src[BpS 3]+Src[BpS* 8])-6(Src[BpS 4]+Src[BpS* 7]) + 20(Src[BpS 5]+Src[BpS* 6]);
909 :			CLIP_STORE(Dst[BpS* 5],C);
910 :			C = 16-RND - (Src[BpS3]+Src[BpS10]) + 3(Src[BpS 4]+Src[BpS* 9])-6(Src[BpS 5]+Src[BpS* 8]) + 20(Src[BpS 6]+Src[BpS* 7]);
911 :			CLIP_STORE(Dst[BpS* 6],C);
912 :			C = 16-RND - (Src[BpS4]+Src[BpS11]) + 3(Src[BpS 5]+Src[BpS10])-6(Src[BpS* 6]+Src[BpS* 9]) + 20(Src[BpS 7]+Src[BpS* 8]);
913 :			CLIP_STORE(Dst[BpS* 7],C);
914 :			C = 16-RND - (Src[BpS5]+Src[BpS12]) + 3(Src[BpS 6]+Src[BpS11])-6(Src[BpS* 7]+Src[BpS10]) + 20(Src[BpS* 8]+Src[BpS* 9]);
915 :			CLIP_STORE(Dst[BpS* 8],C);
916 :			C = 16-RND - (Src[BpS6]+Src[BpS13]) + 3(Src[BpS 7]+Src[BpS12])-6(Src[BpS* 8]+Src[BpS11]) + 20(Src[BpS* 9]+Src[BpS*10]);
917 :			CLIP_STORE(Dst[BpS* 9],C);
918 :			C = 16-RND - (Src[BpS7]+Src[BpS14]) + 3(Src[BpS 8]+Src[BpS13])-6(Src[BpS* 9]+Src[BpS12]) + 20(Src[BpS10]+Src[BpS11]);
919 :			CLIP_STORE(Dst[BpS*10],C);
920 :			C = 16-RND - (Src[BpS8]+Src[BpS15]) + 3(Src[BpS 9]+Src[BpS14])-6(Src[BpS10]+Src[BpS13]) + 20(Src[BpS11]+Src[BpS*12]);
921 :			CLIP_STORE(Dst[BpS*11],C);
922 :			C = 16-RND - (Src[BpS9]+Src[BpS16]) + 3(Src[BpS10]+Src[BpS15])-6(Src[BpS11]+Src[BpS14]) + 20(Src[BpS12]+Src[BpS*13]);
923 :			CLIP_STORE(Dst[BpS*12],C);
924 :			C = 16-RND - Src[BpS10] +3Src[BpS11] -6(Src[BpS12]+Src[BpS15]) + 20(Src[BpS13]+Src[BpS14]) +2Src[BpS*16];
925 :			CLIP_STORE(Dst[BpS*13],C);
926 :			C = 16-RND - Src[BpS11] +3(Src[BpS12]-Src[BpS16]) -6Src[BpS13] + 20Src[BpS14] + 19Src[BpS15];
927 :			CLIP_STORE(Dst[BpS*14],C);
928 :			C = 16-RND - Src[BpS12] +3Src[BpS13] -7Src[BpS14] + 23Src[BpS15] + 14Src[BpS*16];
929 :			CLIP_STORE(Dst[BpS*15],C);
930 :			Src += 1;
931 :			Dst += 1;
932 :			}
933 :			#else
934 :			while(H-->0) {
935 :			int C;
936 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
937 :			CLIP_STORE(Dst[BpS*0],C);
938 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
939 :			CLIP_STORE(Dst[BpS*1],C);
940 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
941 :			CLIP_STORE(Dst[BpS*2],C);
942 :			C = 16-RND - (Src[BpS0]+Src[BpS7]) + 3(Src[BpS1]+Src[BpS6])-6(Src[BpS2]+Src[BpS5]) + 20(Src[BpS3]+Src[BpS*4]);
943 :			CLIP_STORE(Dst[BpS*3],C);
944 :			C = 16-RND - (Src[BpS1]+Src[BpS8]) + 3(Src[BpS2]+Src[BpS7])-6(Src[BpS3]+Src[BpS6]) + 20(Src[BpS4]+Src[BpS*5]);
945 :			CLIP_STORE(Dst[BpS*4],C);
946 :			C = 16-RND - Src[BpS2] +3Src[BpS3] -6(Src[BpS4]+Src[BpS7]) + 20(Src[BpS5]+Src[BpS6]) +2Src[BpS*8];
947 :			CLIP_STORE(Dst[BpS*5],C);
948 :			C = 16-RND - Src[BpS3] +3(Src[BpS4]-Src[BpS8]) -6Src[BpS5] + 20Src[BpS6] + 19Src[BpS7];
949 :			CLIP_STORE(Dst[BpS*6],C);
950 :			C = 16-RND - Src[BpS4] +3Src[BpS5] -7Src[BpS6] + 23Src[BpS7] + 14Src[BpS*8];
951 :			CLIP_STORE(Dst[BpS*7],C);
952 :			Src += 1;
953 :			Dst += 1;
954 :			}
955 :			#endif
956 :			}
957 :			#undef CLIP_STORE
958 :
959 :			#define CLIP_STORE(i,C) \
960 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
961 :			C = (C+Src[BpS*i]+1-RND) >> 1; \
962 :			STORE(Dst[BpS*i], C)
963 :
964 :			static void
965 :			FUNC_VA(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
966 :			{
967 :			#if (SIZE==16)
968 :			while(H-->0) {
969 :			int C;
970 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
971 :			CLIP_STORE(0,C);
972 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
973 :			CLIP_STORE( 1,C);
974 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
975 :			CLIP_STORE( 2,C);
976 :			C = 16-RND - (Src[BpS0]+Src[BpS7 ]) + 3(Src[BpS 1]+Src[BpS* 6])-6(Src[BpS 2]+Src[BpS* 5]) + 20(Src[BpS 3]+Src[BpS* 4]);
977 :			CLIP_STORE( 3,C);
978 :			C = 16-RND - (Src[BpS1]+Src[BpS8 ]) + 3(Src[BpS 2]+Src[BpS* 7])-6(Src[BpS 3]+Src[BpS* 6]) + 20(Src[BpS 4]+Src[BpS* 5]);
979 :			CLIP_STORE( 4,C);
980 :			C = 16-RND - (Src[BpS2]+Src[BpS9 ]) + 3(Src[BpS 3]+Src[BpS* 8])-6(Src[BpS 4]+Src[BpS* 7]) + 20(Src[BpS 5]+Src[BpS* 6]);
981 :			CLIP_STORE( 5,C);
982 :			C = 16-RND - (Src[BpS3]+Src[BpS10]) + 3(Src[BpS 4]+Src[BpS* 9])-6(Src[BpS 5]+Src[BpS* 8]) + 20(Src[BpS 6]+Src[BpS* 7]);
983 :			CLIP_STORE( 6,C);
984 :			C = 16-RND - (Src[BpS4]+Src[BpS11]) + 3(Src[BpS 5]+Src[BpS10])-6(Src[BpS* 6]+Src[BpS* 9]) + 20(Src[BpS 7]+Src[BpS* 8]);
985 :			CLIP_STORE( 7,C);
986 :			C = 16-RND - (Src[BpS5]+Src[BpS12]) + 3(Src[BpS 6]+Src[BpS11])-6(Src[BpS* 7]+Src[BpS10]) + 20(Src[BpS* 8]+Src[BpS* 9]);
987 :			CLIP_STORE( 8,C);
988 :			C = 16-RND - (Src[BpS6]+Src[BpS13]) + 3(Src[BpS 7]+Src[BpS12])-6(Src[BpS* 8]+Src[BpS11]) + 20(Src[BpS* 9]+Src[BpS*10]);
989 :			CLIP_STORE( 9,C);
990 :			C = 16-RND - (Src[BpS7]+Src[BpS14]) + 3(Src[BpS 8]+Src[BpS13])-6(Src[BpS* 9]+Src[BpS12]) + 20(Src[BpS10]+Src[BpS11]);
991 :			CLIP_STORE(10,C);
992 :			C = 16-RND - (Src[BpS8]+Src[BpS15]) + 3(Src[BpS 9]+Src[BpS14])-6(Src[BpS10]+Src[BpS13]) + 20(Src[BpS11]+Src[BpS*12]);
993 :			CLIP_STORE(11,C);
994 :			C = 16-RND - (Src[BpS9]+Src[BpS16]) + 3(Src[BpS10]+Src[BpS15])-6(Src[BpS11]+Src[BpS14]) + 20(Src[BpS12]+Src[BpS*13]);
995 :			CLIP_STORE(12,C);
996 :			C = 16-RND - Src[BpS10] +3Src[BpS11] -6(Src[BpS12]+Src[BpS15]) + 20(Src[BpS13]+Src[BpS14]) +2Src[BpS*16];
997 :			CLIP_STORE(13,C);
998 :			C = 16-RND - Src[BpS11] +3(Src[BpS12]-Src[BpS16]) -6Src[BpS13] + 20Src[BpS14] + 19Src[BpS15];
999 :			CLIP_STORE(14,C);
1000 :			C = 16-RND - Src[BpS12] +3Src[BpS13] -7Src[BpS14] + 23Src[BpS15] + 14Src[BpS*16];
1001 :			CLIP_STORE(15,C);
1002 :			Src += 1;
1003 :			Dst += 1;
1004 :			}
1005 :			#else
1006 :			while(H-->0) {
1007 :			int C;
1008 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
1009 :			CLIP_STORE(0,C);
1010 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
1011 :			CLIP_STORE(1,C);
1012 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
1013 :			CLIP_STORE(2,C);
1014 :			C = 16-RND - (Src[BpS0]+Src[BpS7]) + 3(Src[BpS1]+Src[BpS6])-6(Src[BpS2]+Src[BpS5]) + 20(Src[BpS3]+Src[BpS*4]);
1015 :			CLIP_STORE(3,C);
1016 :			C = 16-RND - (Src[BpS1]+Src[BpS8]) + 3(Src[BpS2]+Src[BpS7])-6(Src[BpS3]+Src[BpS6]) + 20(Src[BpS4]+Src[BpS*5]);
1017 :			CLIP_STORE(4,C);
1018 :			C = 16-RND - Src[BpS2] +3Src[BpS3] -6(Src[BpS4]+Src[BpS7]) + 20(Src[BpS5]+Src[BpS6]) +2Src[BpS*8];
1019 :			CLIP_STORE(5,C);
1020 :			C = 16-RND - Src[BpS3] +3(Src[BpS4]-Src[BpS8]) -6Src[BpS5] + 20Src[BpS6] + 19Src[BpS7];
1021 :			CLIP_STORE(6,C);
1022 :			C = 16-RND - Src[BpS4] +3Src[BpS5] -7Src[BpS6] + 23Src[BpS7] + 14Src[BpS*8];
1023 :			CLIP_STORE(7,C);
1024 :			Src += 1;
1025 :			Dst += 1;
1026 :			}
1027 :			#endif
1028 :			}
1029 :			#undef CLIP_STORE
1030 :
1031 :			#define CLIP_STORE(i,C) \
1032 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
1033 :			C = (C+Src[BpS*i+BpS]+1-RND) >> 1; \
1034 :			STORE(Dst[BpS*i], C)
1035 :
1036 :			static void
1037 :			FUNC_VA_UP(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
1038 :			{
1039 :			#if (SIZE==16)
1040 :			while(H-->0) {
1041 :			int C;
1042 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
1043 :			CLIP_STORE(0,C);
1044 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
1045 :			CLIP_STORE( 1,C);
1046 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
1047 :			CLIP_STORE( 2,C);
1048 :			C = 16-RND - (Src[BpS0]+Src[BpS7 ]) + 3(Src[BpS 1]+Src[BpS* 6])-6(Src[BpS 2]+Src[BpS* 5]) + 20(Src[BpS 3]+Src[BpS* 4]);
1049 :			CLIP_STORE( 3,C);
1050 :			C = 16-RND - (Src[BpS1]+Src[BpS8 ]) + 3(Src[BpS 2]+Src[BpS* 7])-6(Src[BpS 3]+Src[BpS* 6]) + 20(Src[BpS 4]+Src[BpS* 5]);
1051 :			CLIP_STORE( 4,C);
1052 :			C = 16-RND - (Src[BpS2]+Src[BpS9 ]) + 3(Src[BpS 3]+Src[BpS* 8])-6(Src[BpS 4]+Src[BpS* 7]) + 20(Src[BpS 5]+Src[BpS* 6]);
1053 :			CLIP_STORE( 5,C);
1054 :			C = 16-RND - (Src[BpS3]+Src[BpS10]) + 3(Src[BpS 4]+Src[BpS* 9])-6(Src[BpS 5]+Src[BpS* 8]) + 20(Src[BpS 6]+Src[BpS* 7]);
1055 :			CLIP_STORE( 6,C);
1056 :			C = 16-RND - (Src[BpS4]+Src[BpS11]) + 3(Src[BpS 5]+Src[BpS10])-6(Src[BpS* 6]+Src[BpS* 9]) + 20(Src[BpS 7]+Src[BpS* 8]);
1057 :			CLIP_STORE( 7,C);
1058 :			C = 16-RND - (Src[BpS5]+Src[BpS12]) + 3(Src[BpS 6]+Src[BpS11])-6(Src[BpS* 7]+Src[BpS10]) + 20(Src[BpS* 8]+Src[BpS* 9]);
1059 :			CLIP_STORE( 8,C);
1060 :			C = 16-RND - (Src[BpS6]+Src[BpS13]) + 3(Src[BpS 7]+Src[BpS12])-6(Src[BpS* 8]+Src[BpS11]) + 20(Src[BpS* 9]+Src[BpS*10]);
1061 :			CLIP_STORE( 9,C);
1062 :			C = 16-RND - (Src[BpS7]+Src[BpS14]) + 3(Src[BpS 8]+Src[BpS13])-6(Src[BpS* 9]+Src[BpS12]) + 20(Src[BpS10]+Src[BpS11]);
1063 :			CLIP_STORE(10,C);
1064 :			C = 16-RND - (Src[BpS8]+Src[BpS15]) + 3(Src[BpS 9]+Src[BpS14])-6(Src[BpS10]+Src[BpS13]) + 20(Src[BpS11]+Src[BpS*12]);
1065 :			CLIP_STORE(11,C);
1066 :			C = 16-RND - (Src[BpS9]+Src[BpS16]) + 3(Src[BpS10]+Src[BpS15])-6(Src[BpS11]+Src[BpS14]) + 20(Src[BpS12]+Src[BpS*13]);
1067 :			CLIP_STORE(12,C);
1068 :			C = 16-RND - Src[BpS10] +3Src[BpS11] -6(Src[BpS12]+Src[BpS15]) + 20(Src[BpS13]+Src[BpS14]) +2Src[BpS*16];
1069 :			CLIP_STORE(13,C);
1070 :			C = 16-RND - Src[BpS11] +3(Src[BpS12]-Src[BpS16]) -6Src[BpS13] + 20Src[BpS14] + 19Src[BpS15];
1071 :			CLIP_STORE(14,C);
1072 :			C = 16-RND - Src[BpS12] +3Src[BpS13] -7Src[BpS14] + 23Src[BpS15] + 14Src[BpS*16];
1073 :			CLIP_STORE(15,C);
1074 :			Src += 1;
1075 :			Dst += 1;
1076 :			}
1077 :			#else
1078 :			while(H-->0) {
1079 :			int C;
1080 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
1081 :			CLIP_STORE(0,C);
1082 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
1083 :			CLIP_STORE(1,C);
1084 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
1085 :			CLIP_STORE(2,C);
1086 :			C = 16-RND - (Src[BpS0]+Src[BpS7]) + 3(Src[BpS1]+Src[BpS6])-6(Src[BpS2]+Src[BpS5]) + 20(Src[BpS3]+Src[BpS*4]);
1087 :			CLIP_STORE(3,C);
1088 :			C = 16-RND - (Src[BpS1]+Src[BpS8]) + 3(Src[BpS2]+Src[BpS7])-6(Src[BpS3]+Src[BpS6]) + 20(Src[BpS4]+Src[BpS*5]);
1089 :			CLIP_STORE(4,C);
1090 :			C = 16-RND - Src[BpS2] +3Src[BpS3] -6(Src[BpS4]+Src[BpS7]) + 20(Src[BpS5]+Src[BpS6]) +2Src[BpS*8];
1091 :			CLIP_STORE(5,C);
1092 :			C = 16-RND - Src[BpS3] +3(Src[BpS4]-Src[BpS8]) -6Src[BpS5] + 20Src[BpS6] + 19Src[BpS7];
1093 :			CLIP_STORE(6,C);
1094 :			C = 16-RND - Src[BpS4] +3Src[BpS5] -7Src[BpS6] + 23Src[BpS7] + 14Src[BpS*8];
1095 :			CLIP_STORE(7,C);
1096 :			Src += 1;
1097 :			Dst += 1;
1098 :			}
1099 :			#endif
1100 :			}
1101 :			#undef CLIP_STORE
1102 :
1103 :			#undef STORE
1104 :			#undef FUNC_H
1105 :			#undef FUNC_V
1106 :			#undef FUNC_HA
1107 :			#undef FUNC_VA
1108 :			#undef FUNC_HA_UP
1109 :			#undef FUNC_VA_UP
1110 :
1111 :
1112 :			#endif /* XVID_AUTO_INCLUDE && !defined(REF) */

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4