1 |
|
/***************************************************************************** |
2 |
|
* |
3 |
|
* XVID MPEG-4 VIDEO CODEC |
4 |
|
* - QPel interpolation - |
5 |
|
* |
6 |
|
* Copyright(C) 2003 Pascal Massimino <skal@planet-d.net> |
7 |
|
* |
8 |
|
* This program is free software ; you can redistribute it and/or modify |
9 |
|
* it under the terms of the GNU General Public License as published by |
10 |
|
* the Free Software Foundation ; either version 2 of the License, or |
11 |
|
* (at your option) any later version. |
12 |
|
* |
13 |
|
* This program is distributed in the hope that it will be useful, |
14 |
|
* but WITHOUT ANY WARRANTY ; without even the implied warranty of |
15 |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 |
|
* GNU General Public License for more details. |
17 |
|
* |
18 |
|
* You should have received a copy of the GNU General Public License |
19 |
|
* along with this program ; if not, write to the Free Software |
20 |
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
|
* |
22 |
|
* $Id$ |
23 |
|
* |
24 |
|
****************************************************************************/ |
25 |
|
|
26 |
|
#ifndef XVID_AUTO_INCLUDE |
27 |
|
|
28 |
|
#include "../portab.h" |
29 |
|
#include "qpel.h" |
30 |
|
|
31 |
|
/* Quarterpel FIR definition |
32 |
|
****************************************************************************/ |
33 |
|
|
34 |
|
static const int32_t FIR_Tab_8[9][8] = { |
35 |
|
{ 14, -3, 2, -1, 0, 0, 0, 0 }, |
36 |
|
{ 23, 19, -6, 3, -1, 0, 0, 0 }, |
37 |
|
{ -7, 20, 20, -6, 3, -1, 0, 0 }, |
38 |
|
{ 3, -6, 20, 20, -6, 3, -1, 0 }, |
39 |
|
{ -1, 3, -6, 20, 20, -6, 3, -1 }, |
40 |
|
{ 0, -1, 3, -6, 20, 20, -6, 3 }, |
41 |
|
{ 0, 0, -1, 3, -6, 20, 20, -7 }, |
42 |
|
{ 0, 0, 0, -1, 3, -6, 19, 23 }, |
43 |
|
{ 0, 0, 0, 0, -1, 2, -3, 14 } |
44 |
|
}; |
45 |
|
|
46 |
|
static const int32_t FIR_Tab_16[17][16] = { |
47 |
|
{ 14, -3, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
48 |
|
{ 23, 19, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
49 |
|
{ -7, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
50 |
|
{ 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
51 |
|
{ -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0 }, |
52 |
|
{ 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0 }, |
53 |
|
{ 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0 }, |
54 |
|
{ 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0 }, |
55 |
|
{ 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0 }, |
56 |
|
{ 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0 }, |
57 |
|
{ 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0 }, |
58 |
|
{ 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0 }, |
59 |
|
{ 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1 }, |
60 |
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3 }, |
61 |
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -7 }, |
62 |
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 19, 23 }, |
63 |
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -3, 14 } |
64 |
|
}; |
65 |
|
|
66 |
|
/* Implementation |
67 |
|
****************************************************************************/ |
68 |
|
|
69 |
|
#define XVID_AUTO_INCLUDE |
70 |
|
|
71 |
|
/* 16x? filters */ |
72 |
|
|
73 |
|
#define SIZE 16 |
74 |
|
#define TABLE FIR_Tab_16 |
75 |
|
|
76 |
|
#define STORE(d,s) (d) = (s) |
77 |
|
#define FUNC_H H_Pass_16_C |
78 |
|
#define FUNC_V V_Pass_16_C |
79 |
|
#define FUNC_HA H_Pass_Avrg_16_C |
80 |
|
#define FUNC_VA V_Pass_Avrg_16_C |
81 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C |
82 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C |
83 |
|
|
84 |
|
#include __FILE__ /* self-include ourself */ |
85 |
|
|
86 |
|
/* note: B-frame always uses Rnd=0... */ |
87 |
|
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
88 |
|
#define FUNC_H H_Pass_16_Add_C |
89 |
|
#define FUNC_V V_Pass_16_Add_C |
90 |
|
#define FUNC_HA H_Pass_Avrg_16_Add_C |
91 |
|
#define FUNC_VA V_Pass_Avrg_16_Add_C |
92 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C |
93 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C |
94 |
|
|
95 |
|
#include __FILE__ /* self-include ourself */ |
96 |
|
|
97 |
|
#undef SIZE |
98 |
|
#undef TABLE |
99 |
|
|
100 |
|
/* 8x? filters */ |
101 |
|
|
102 |
|
#define SIZE 8 |
103 |
|
#define TABLE FIR_Tab_8 |
104 |
|
|
105 |
|
#define STORE(d,s) (d) = (s) |
106 |
|
#define FUNC_H H_Pass_8_C |
107 |
|
#define FUNC_V V_Pass_8_C |
108 |
|
#define FUNC_HA H_Pass_Avrg_8_C |
109 |
|
#define FUNC_VA V_Pass_Avrg_8_C |
110 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C |
111 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C |
112 |
|
|
113 |
|
#include __FILE__ /* self-include ourself */ |
114 |
|
|
115 |
|
/* note: B-frame always uses Rnd=0... */ |
116 |
|
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
117 |
|
#define FUNC_H H_Pass_8_Add_C |
118 |
|
#define FUNC_V V_Pass_8_Add_C |
119 |
|
#define FUNC_HA H_Pass_Avrg_8_Add_C |
120 |
|
#define FUNC_VA V_Pass_Avrg_8_Add_C |
121 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C |
122 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C |
123 |
|
|
124 |
|
#include __FILE__ /* self-include ourself */ |
125 |
|
|
126 |
|
#undef SIZE |
127 |
|
#undef TABLE |
128 |
|
|
129 |
|
#undef XVID_AUTO_INCLUDE |
130 |
|
|
131 |
|
/* general-purpose hooks |
132 |
|
* TODO: embed in enc/dec structure? |
133 |
|
****************************************************************************/ |
134 |
|
|
135 |
|
XVID_QP_FUNCS *xvid_QP_Funcs = 0; |
136 |
|
XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0; |
137 |
|
|
138 |
|
/* plain C impl. declaration |
139 |
|
* TODO: should be declared elsewhere? |
140 |
|
****************************************************************************/ |
141 |
|
|
142 |
|
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
143 |
|
H_Pass_16_C, H_Pass_Avrg_16_C, H_Pass_Avrg_Up_16_C, |
144 |
|
V_Pass_16_C, V_Pass_Avrg_16_C, V_Pass_Avrg_Up_16_C, |
145 |
|
|
146 |
|
H_Pass_8_C, H_Pass_Avrg_8_C, H_Pass_Avrg_Up_8_C, |
147 |
|
V_Pass_8_C, V_Pass_Avrg_8_C, V_Pass_Avrg_Up_8_C |
148 |
|
}; |
149 |
|
|
150 |
|
XVID_QP_FUNCS xvid_QP_Add_Funcs_C = { |
151 |
|
H_Pass_16_Add_C, H_Pass_Avrg_16_Add_C, H_Pass_Avrg_Up_16_Add_C, |
152 |
|
V_Pass_16_Add_C, V_Pass_Avrg_16_Add_C, V_Pass_Avrg_Up_16_Add_C, |
153 |
|
|
154 |
|
H_Pass_8_Add_C, H_Pass_Avrg_8_Add_C, H_Pass_Avrg_Up_8_Add_C, |
155 |
|
V_Pass_8_Add_C, V_Pass_Avrg_8_Add_C, V_Pass_Avrg_Up_8_Add_C |
156 |
|
}; |
157 |
|
|
158 |
|
/* mmx impl. declaration (see. qpel_mmx.asm |
159 |
|
* TODO: should be declared elsewhere? |
160 |
|
****************************************************************************/ |
161 |
|
|
162 |
|
#ifdef ARCH_IS_IA32 |
163 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_mmx); |
164 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_mmx); |
165 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_mmx); |
166 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_16_mmx); |
167 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_16_mmx); |
168 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_16_mmx); |
169 |
|
|
170 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_mmx); |
171 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_mmx); |
172 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_mmx); |
173 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_mmx); |
174 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx); |
175 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
176 |
|
|
177 |
|
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
178 |
|
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx, |
179 |
|
xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx, |
180 |
|
|
181 |
|
xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx, |
182 |
|
xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx |
183 |
|
}; |
184 |
|
|
185 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
186 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx); |
187 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx); |
188 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Add_16_mmx); |
189 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Add_16_mmx); |
190 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_Add_16_mmx); |
191 |
|
|
192 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_Add_mmx); |
193 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_Add_mmx); |
194 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_Add_mmx); |
195 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_Add_mmx); |
196 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx); |
197 |
|
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
198 |
|
|
199 |
|
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
200 |
|
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx, |
201 |
|
xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx, |
202 |
|
|
203 |
|
xvid_H_Pass_8_Add_mmx, xvid_H_Pass_Avrg_8_Add_mmx, xvid_H_Pass_Avrg_Up_8_Add_mmx, |
204 |
|
xvid_V_Pass_8_Add_mmx, xvid_V_Pass_Avrg_8_Add_mmx, xvid_V_Pass_Avrg_Up_8_Add_mmx, |
205 |
|
}; |
206 |
|
#endif /* ARCH_IS_IA32 */ |
207 |
|
|
208 |
|
/* tables for ASM |
209 |
|
****************************************************************************/ |
210 |
|
|
211 |
|
#ifdef ARCH_IS_IA32 |
212 |
|
uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */ |
213 |
|
#endif |
214 |
|
|
215 |
|
/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) |
216 |
|
* |
217 |
|
* 17 tables, 2K each => 34K |
218 |
|
* Mirroring can be acheived composing 11 basic tables |
219 |
|
* (for instance: (23,19,-6,3)=(20,20,-6,3)+(3,-1,0,0) |
220 |
|
* Using Symmetries (and bswap) could reduce further |
221 |
|
* the memory to 7 tables (->14K). */ |
222 |
|
|
223 |
|
int16_t xvid_FIR_1_0_0_0[256][4]; |
224 |
|
int16_t xvid_FIR_3_1_0_0[256][4]; |
225 |
|
int16_t xvid_FIR_6_3_1_0[256][4]; |
226 |
|
int16_t xvid_FIR_14_3_2_1[256][4]; |
227 |
|
int16_t xvid_FIR_20_6_3_1[256][4]; |
228 |
|
int16_t xvid_FIR_20_20_6_3[256][4]; |
229 |
|
int16_t xvid_FIR_23_19_6_3[256][4]; |
230 |
|
int16_t xvid_FIR_7_20_20_6[256][4]; |
231 |
|
int16_t xvid_FIR_6_20_20_6[256][4]; |
232 |
|
int16_t xvid_FIR_6_20_20_7[256][4]; |
233 |
|
int16_t xvid_FIR_3_6_20_20[256][4]; |
234 |
|
int16_t xvid_FIR_3_6_19_23[256][4]; |
235 |
|
int16_t xvid_FIR_1_3_6_20[256][4]; |
236 |
|
int16_t xvid_FIR_1_2_3_14[256][4]; |
237 |
|
int16_t xvid_FIR_0_1_3_6[256][4]; |
238 |
|
int16_t xvid_FIR_0_0_1_3[256][4]; |
239 |
|
int16_t xvid_FIR_0_0_0_1[256][4]; |
240 |
|
|
241 |
|
static void Init_FIR_Table(int16_t Tab[][4], |
242 |
|
int A, int B, int C, int D) |
243 |
|
{ |
244 |
|
int i; |
245 |
|
for(i=0; i<256; ++i) { |
246 |
|
Tab[i][0] = i*A; |
247 |
|
Tab[i][1] = i*B; |
248 |
|
Tab[i][2] = i*C; |
249 |
|
Tab[i][3] = i*D; |
250 |
|
} |
251 |
|
} |
252 |
|
|
253 |
|
|
254 |
|
void xvid_Init_QP() |
255 |
|
{ |
256 |
|
#ifdef ARCH_IS_IA32 |
257 |
|
int i; |
258 |
|
|
259 |
|
for(i=0; i<256; ++i) { |
260 |
|
xvid_Expand_mmx[i][0] = i; |
261 |
|
xvid_Expand_mmx[i][1] = i; |
262 |
|
xvid_Expand_mmx[i][2] = i; |
263 |
|
xvid_Expand_mmx[i][3] = i; |
264 |
|
} |
265 |
|
#endif |
266 |
|
|
267 |
|
/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) */ |
268 |
|
|
269 |
|
Init_FIR_Table(xvid_FIR_1_0_0_0, -1, 0, 0, 0); |
270 |
|
Init_FIR_Table(xvid_FIR_3_1_0_0, 3, -1, 0, 0); |
271 |
|
Init_FIR_Table(xvid_FIR_6_3_1_0, -6, 3, -1, 0); |
272 |
|
Init_FIR_Table(xvid_FIR_14_3_2_1, 14, -3, 2, -1); |
273 |
|
Init_FIR_Table(xvid_FIR_20_6_3_1, 20, -6, 3, -1); |
274 |
|
Init_FIR_Table(xvid_FIR_20_20_6_3, 20, 20, -6, 3); |
275 |
|
Init_FIR_Table(xvid_FIR_23_19_6_3, 23, 19, -6, 3); |
276 |
|
Init_FIR_Table(xvid_FIR_7_20_20_6, -7, 20, 20, -6); |
277 |
|
Init_FIR_Table(xvid_FIR_6_20_20_6, -6, 20, 20, -6); |
278 |
|
Init_FIR_Table(xvid_FIR_6_20_20_7, -6, 20, 20, -7); |
279 |
|
Init_FIR_Table(xvid_FIR_3_6_20_20, 3, -6, 20, 20); |
280 |
|
Init_FIR_Table(xvid_FIR_3_6_19_23, 3, -6, 19, 23); |
281 |
|
Init_FIR_Table(xvid_FIR_1_3_6_20, -1, 3, -6, 20); |
282 |
|
Init_FIR_Table(xvid_FIR_1_2_3_14, -1, 2, -3, 14); |
283 |
|
Init_FIR_Table(xvid_FIR_0_1_3_6, 0, -1, 3, -6); |
284 |
|
Init_FIR_Table(xvid_FIR_0_0_1_3, 0, 0, -1, 3); |
285 |
|
Init_FIR_Table(xvid_FIR_0_0_0_1, 0, 0, 0, -1); |
286 |
|
|
287 |
|
} |
288 |
|
|
289 |
|
#endif /* !XVID_AUTO_INCLUDE */ |
290 |
|
|
291 |
|
/***************************************************************************** |
292 |
|
* "reference" filters impl. in plain C |
293 |
|
****************************************************************************/ |
294 |
|
|
295 |
|
#ifdef XVID_AUTO_INCLUDE |
296 |
|
|
297 |
|
static |
298 |
|
void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
299 |
|
{ |
300 |
|
while(H-->0) { |
301 |
|
int32_t i, k; |
302 |
|
int32_t Sums[SIZE] = { 0 }; |
303 |
|
for(i=0; i<=SIZE; ++i) |
304 |
|
for(k=0; k<SIZE; ++k) |
305 |
|
Sums[k] += TABLE[i][k] * Src[i]; |
306 |
|
|
307 |
|
for(i=0; i<SIZE; ++i) { |
308 |
|
int32_t C = ( Sums[i] + 16-Rnd ) >> 5; |
309 |
|
if (C<0) C = 0; else if (C>255) C = 255; |
310 |
|
STORE(Dst[i], C); |
311 |
|
} |
312 |
|
Src += BpS; |
313 |
|
Dst += BpS; |
314 |
|
} |
315 |
|
} |
316 |
|
|
317 |
|
static |
318 |
|
void FUNC_V(uint8_t *Dst, const uint8_t *Src, int32_t W, int32_t BpS, int32_t Rnd) |
319 |
|
{ |
320 |
|
while(W-->0) { |
321 |
|
int32_t i, k; |
322 |
|
int32_t Sums[SIZE] = { 0 }; |
323 |
|
const uint8_t *S = Src++; |
324 |
|
uint8_t *D = Dst++; |
325 |
|
for(i=0; i<=SIZE; ++i) { |
326 |
|
for(k=0; k<SIZE; ++k) |
327 |
|
Sums[k] += TABLE[i][k] * S[0]; |
328 |
|
S += BpS; |
329 |
|
} |
330 |
|
|
331 |
|
for(i=0; i<SIZE; ++i) { |
332 |
|
int32_t C = ( Sums[i] + 16-Rnd )>>5; |
333 |
|
if (C<0) C = 0; else if (C>255) C = 255; |
334 |
|
STORE(D[0], C); |
335 |
|
D += BpS; |
336 |
|
} |
337 |
|
} |
338 |
|
} |
339 |
|
|
340 |
|
static |
341 |
|
void FUNC_HA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
342 |
|
{ |
343 |
|
while(H-->0) { |
344 |
|
int32_t i, k; |
345 |
|
int32_t Sums[SIZE] = { 0 }; |
346 |
|
for(i=0; i<=SIZE; ++i) |
347 |
|
for(k=0; k<SIZE; ++k) |
348 |
|
Sums[k] += TABLE[i][k] * Src[i]; |
349 |
|
|
350 |
|
for(i=0; i<SIZE; ++i) { |
351 |
|
int32_t C = ( Sums[i] + 16-Rnd ) >> 5; |
352 |
|
if (C<0) C = 0; else if (C>255) C = 255; |
353 |
|
C = (C+Src[i]+1-Rnd) >> 1; |
354 |
|
STORE(Dst[i], C); |
355 |
|
} |
356 |
|
Src += BpS; |
357 |
|
Dst += BpS; |
358 |
|
} |
359 |
|
} |
360 |
|
|
361 |
|
static |
362 |
|
void FUNC_HA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
363 |
|
{ |
364 |
|
while(H-->0) { |
365 |
|
int32_t i, k; |
366 |
|
int32_t Sums[SIZE] = { 0 }; |
367 |
|
for(i=0; i<=SIZE; ++i) |
368 |
|
for(k=0; k<SIZE; ++k) |
369 |
|
Sums[k] += TABLE[i][k] * Src[i]; |
370 |
|
|
371 |
|
for(i=0; i<SIZE; ++i) { |
372 |
|
int32_t C = ( Sums[i] + 16-Rnd ) >> 5; |
373 |
|
if (C<0) C = 0; else if (C>255) C = 255; |
374 |
|
C = (C+Src[i+1]+1-Rnd) >> 1; |
375 |
|
STORE(Dst[i], C); |
376 |
|
} |
377 |
|
Src += BpS; |
378 |
|
Dst += BpS; |
379 |
|
} |
380 |
|
} |
381 |
|
|
382 |
|
static |
383 |
|
void FUNC_VA(uint8_t *Dst, const uint8_t *Src, int32_t W, int32_t BpS, int32_t Rnd) |
384 |
|
{ |
385 |
|
while(W-->0) { |
386 |
|
int32_t i, k; |
387 |
|
int32_t Sums[SIZE] = { 0 }; |
388 |
|
const uint8_t *S = Src; |
389 |
|
uint8_t *D = Dst; |
390 |
|
|
391 |
|
for(i=0; i<=SIZE; ++i) { |
392 |
|
for(k=0; k<SIZE; ++k) |
393 |
|
Sums[k] += TABLE[i][k] * S[0]; |
394 |
|
S += BpS; |
395 |
|
} |
396 |
|
|
397 |
|
S = Src; |
398 |
|
for(i=0; i<SIZE; ++i) { |
399 |
|
int32_t C = ( Sums[i] + 16-Rnd )>>5; |
400 |
|
if (C<0) C = 0; else if (C>255) C = 255; |
401 |
|
C = ( C+S[0]+1-Rnd ) >> 1; |
402 |
|
STORE(D[0], C); |
403 |
|
D += BpS; |
404 |
|
S += BpS; |
405 |
|
} |
406 |
|
Src++; |
407 |
|
Dst++; |
408 |
|
} |
409 |
|
} |
410 |
|
|
411 |
|
static |
412 |
|
void FUNC_VA_UP(uint8_t *Dst, const uint8_t *Src, int32_t W, int32_t BpS, int32_t Rnd) |
413 |
|
{ |
414 |
|
while(W-->0) { |
415 |
|
int32_t i, k; |
416 |
|
int32_t Sums[SIZE] = { 0 }; |
417 |
|
const uint8_t *S = Src; |
418 |
|
uint8_t *D = Dst; |
419 |
|
|
420 |
|
for(i=0; i<=SIZE; ++i) { |
421 |
|
for(k=0; k<SIZE; ++k) |
422 |
|
Sums[k] += TABLE[i][k] * S[0]; |
423 |
|
S += BpS; |
424 |
|
} |
425 |
|
|
426 |
|
S = Src + BpS; |
427 |
|
for(i=0; i<SIZE; ++i) { |
428 |
|
int32_t C = ( Sums[i] + 16-Rnd )>>5; |
429 |
|
if (C<0) C = 0; else if (C>255) C = 255; |
430 |
|
C = ( C+S[0]+1-Rnd ) >> 1; |
431 |
|
STORE(D[0], C); |
432 |
|
D += BpS; |
433 |
|
S += BpS; |
434 |
|
} |
435 |
|
Dst++; |
436 |
|
Src++; |
437 |
|
} |
438 |
|
} |
439 |
|
|
440 |
|
#undef STORE |
441 |
|
#undef FUNC_H |
442 |
|
#undef FUNC_V |
443 |
|
#undef FUNC_HA |
444 |
|
#undef FUNC_VA |
445 |
|
#undef FUNC_HA_UP |
446 |
|
#undef FUNC_VA_UP |
447 |
|
|
448 |
|
#endif /* XVID_AUTO_INCLUDE */ |