1 |
/************************************************************************** |
/***************************************************************************** |
2 |
* |
* |
3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* - QPel interpolation - |
* - QPel interpolation - |
5 |
* |
* |
6 |
|
* Copyright(C) 2003 Pascal Massimino <skal@planet-d.net> |
7 |
|
* |
8 |
* This program is free software; you can redistribute it and/or modify |
* This program is free software; you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
10 |
* the Free Software Foundation; either version 2 of the License, or |
* the Free Software Foundation; either version 2 of the License, or |
17 |
* |
* |
18 |
* You should have received a copy of the GNU General Public License |
* You should have received a copy of the GNU General Public License |
19 |
* along with this program; if not, write to the Free Software |
* along with this program; if not, write to the Free Software |
20 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
* |
|
|
*************************************************************************/ |
|
|
|
|
|
/************************************************************************** |
|
|
* |
|
|
* History: |
|
21 |
* |
* |
22 |
* 22.10.2002 initial coding - Skal - |
* $Id$ |
23 |
* |
* |
24 |
*************************************************************************/ |
****************************************************************************/ |
25 |
|
|
26 |
#ifndef XVID_AUTO_INCLUDE |
#ifndef XVID_AUTO_INCLUDE |
27 |
|
|
28 |
#include "../portab.h" |
#include "../portab.h" |
29 |
#include "./qpel.h" |
#include "qpel.h" |
30 |
|
|
31 |
////////////////////////////////////////////////////////// |
/* Quarterpel FIR definition |
32 |
|
****************************************************************************/ |
33 |
|
|
34 |
static const int32_t FIR_Tab_8[9][8] = { |
static const int32_t FIR_Tab_8[9][8] = { |
35 |
{ 14, -3, 2, -1, 0, 0, 0, 0 } |
{ 14, -3, 2, -1, 0, 0, 0, 0 }, |
36 |
, { 23, 19, -6, 3, -1, 0, 0, 0 } |
{ 23, 19, -6, 3, -1, 0, 0, 0 }, |
37 |
, { -7, 20, 20, -6, 3, -1, 0, 0 } |
{ -7, 20, 20, -6, 3, -1, 0, 0 }, |
38 |
, { 3, -6, 20, 20, -6, 3, -1, 0 } |
{ 3, -6, 20, 20, -6, 3, -1, 0 }, |
39 |
, { -1, 3, -6, 20, 20, -6, 3, -1 } |
{ -1, 3, -6, 20, 20, -6, 3, -1 }, |
40 |
, { 0, -1, 3, -6, 20, 20, -6, 3 } |
{ 0, -1, 3, -6, 20, 20, -6, 3 }, |
41 |
, { 0, 0, -1, 3, -6, 20, 20, -7 } |
{ 0, 0, -1, 3, -6, 20, 20, -7 }, |
42 |
, { 0, 0, 0, -1, 3, -6, 19, 23 } |
{ 0, 0, 0, -1, 3, -6, 19, 23 }, |
43 |
, { 0, 0, 0, 0, -1, 2, -3, 14 } |
{ 0, 0, 0, 0, -1, 2, -3, 14 } |
44 |
}; |
}; |
45 |
|
|
46 |
static const int32_t FIR_Tab_16[17][16] = { |
static const int32_t FIR_Tab_16[17][16] = { |
47 |
{ 14, -3, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } |
{ 14, -3, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
48 |
, { 23, 19, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } |
{ 23, 19, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
49 |
, { -7, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } |
{ -7, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
50 |
, { 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 } |
{ 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
51 |
, { -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0 } |
{ -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0 }, |
52 |
, { 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0 } |
{ 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0 }, |
53 |
, { 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0 } |
{ 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0 }, |
54 |
, { 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0 } |
{ 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0 }, |
55 |
, { 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0 } |
{ 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0 }, |
56 |
, { 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0 } |
{ 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0 }, |
57 |
, { 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0 } |
{ 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0 }, |
58 |
, { 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0 } |
{ 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0 }, |
59 |
, { 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1 } |
{ 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1 }, |
60 |
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3 } |
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3 }, |
61 |
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -7 } |
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -7 }, |
62 |
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 19, 23 } |
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 19, 23 }, |
63 |
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -3, 14 } |
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -3, 14 } |
64 |
}; |
}; |
65 |
|
|
66 |
////////////////////////////////////////////////////////// |
/* Implementation |
67 |
// Implementation |
****************************************************************************/ |
68 |
|
|
69 |
#define XVID_AUTO_INCLUDE |
#define XVID_AUTO_INCLUDE |
70 |
|
/* First auto include this file to generate reference code for SIMD versions |
71 |
|
* This set of functions are good for educational purpose, because they're |
72 |
|
* straightforward to understand, use loops and so on... But obviously they |
73 |
|
* sux when it comes to speed */ |
74 |
|
#define REFERENCE_CODE |
75 |
|
|
76 |
// 16x? filters |
/* 16x? filters */ |
77 |
|
|
78 |
#define SIZE 16 |
#define SIZE 16 |
79 |
#define TABLE FIR_Tab_16 |
#define TABLE FIR_Tab_16 |
80 |
|
|
81 |
#define STORE(d,s) (d) = (s) |
#define STORE(d,s) (d) = (s) |
82 |
|
#define FUNC_H H_Pass_16_C_ref |
83 |
|
#define FUNC_V V_Pass_16_C_ref |
84 |
|
#define FUNC_HA H_Pass_Avrg_16_C_ref |
85 |
|
#define FUNC_VA V_Pass_Avrg_16_C_ref |
86 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C_ref |
87 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C_ref |
88 |
|
|
89 |
|
#include "qpel.c" /* self-include ourself */ |
90 |
|
|
91 |
|
/* note: B-frame always uses Rnd=0... */ |
92 |
|
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
93 |
|
#define FUNC_H H_Pass_16_Add_C_ref |
94 |
|
#define FUNC_V V_Pass_16_Add_C_ref |
95 |
|
#define FUNC_HA H_Pass_Avrg_16_Add_C_ref |
96 |
|
#define FUNC_VA V_Pass_Avrg_16_Add_C_ref |
97 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C_ref |
98 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C_ref |
99 |
|
|
100 |
|
#include "qpel.c" /* self-include ourself */ |
101 |
|
|
102 |
|
#undef SIZE |
103 |
|
#undef TABLE |
104 |
|
|
105 |
|
/* 8x? filters */ |
106 |
|
|
107 |
|
#define SIZE 8 |
108 |
|
#define TABLE FIR_Tab_8 |
109 |
|
|
110 |
|
#define STORE(d,s) (d) = (s) |
111 |
|
#define FUNC_H H_Pass_8_C_ref |
112 |
|
#define FUNC_V V_Pass_8_C_ref |
113 |
|
#define FUNC_HA H_Pass_Avrg_8_C_ref |
114 |
|
#define FUNC_VA V_Pass_Avrg_8_C_ref |
115 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C_ref |
116 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C_ref |
117 |
|
|
118 |
|
#include "qpel.c" /* self-include ourself */ |
119 |
|
|
120 |
|
/* note: B-frame always uses Rnd=0... */ |
121 |
|
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
122 |
|
#define FUNC_H H_Pass_8_Add_C_ref |
123 |
|
#define FUNC_V V_Pass_8_Add_C_ref |
124 |
|
#define FUNC_HA H_Pass_Avrg_8_Add_C_ref |
125 |
|
#define FUNC_VA V_Pass_Avrg_8_Add_C_ref |
126 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C_ref |
127 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C_ref |
128 |
|
|
129 |
|
#include "qpel.c" /* self-include ourself */ |
130 |
|
|
131 |
|
#undef SIZE |
132 |
|
#undef TABLE |
133 |
|
|
134 |
|
/* Then we define more optimized C version where loops are unrolled, where |
135 |
|
* FIR coeffcients are not read from memory but are hardcoded in instructions |
136 |
|
* They should be faster */ |
137 |
|
#undef REFERENCE_CODE |
138 |
|
|
139 |
|
/* 16x? filters */ |
140 |
|
|
141 |
|
#define SIZE 16 |
142 |
|
|
143 |
|
#define STORE(d,s) (d) = (s) |
144 |
#define FUNC_H H_Pass_16_C |
#define FUNC_H H_Pass_16_C |
145 |
#define FUNC_V V_Pass_16_C |
#define FUNC_V V_Pass_16_C |
146 |
#define FUNC_HA H_Pass_Avrg_16_C |
#define FUNC_HA H_Pass_Avrg_16_C |
148 |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C |
149 |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C |
150 |
|
|
151 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
152 |
|
|
153 |
// note: B-frame always uses Rnd=0... |
/* note: B-frame always uses Rnd=0... */ |
154 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
155 |
#define FUNC_H H_Pass_16_Add_C |
#define FUNC_H H_Pass_16_Add_C |
156 |
#define FUNC_V V_Pass_16_Add_C |
#define FUNC_V V_Pass_16_Add_C |
159 |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C |
160 |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C |
161 |
|
|
162 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
163 |
|
|
164 |
#undef SIZE |
#undef SIZE |
165 |
#undef TABLE |
#undef TABLE |
166 |
|
|
167 |
// 8x? filters |
/* 8x? filters */ |
168 |
|
|
169 |
#define SIZE 8 |
#define SIZE 8 |
170 |
#define TABLE FIR_Tab_8 |
#define TABLE FIR_Tab_8 |
177 |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C |
178 |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C |
179 |
|
|
180 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
181 |
|
|
182 |
// note: B-frame always uses Rnd=0... |
/* note: B-frame always uses Rnd=0... */ |
183 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
184 |
#define FUNC_H H_Pass_8_Add_C |
#define FUNC_H H_Pass_8_Add_C |
185 |
#define FUNC_V V_Pass_8_Add_C |
#define FUNC_V V_Pass_8_Add_C |
188 |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C |
189 |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C |
190 |
|
|
191 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
192 |
|
|
193 |
#undef SIZE |
#undef SIZE |
194 |
#undef TABLE |
#undef TABLE |
|
|
|
195 |
#undef XVID_AUTO_INCLUDE |
#undef XVID_AUTO_INCLUDE |
196 |
|
|
197 |
////////////////////////////////////////////////////////// |
/* Global scope hooks |
198 |
// general-purpose hooks |
****************************************************************************/ |
|
// TODO: embed in enc/dec structure? |
|
199 |
|
|
200 |
XVID_QP_FUNCS *xvid_QP_Funcs = 0; |
XVID_QP_FUNCS *xvid_QP_Funcs = 0; |
201 |
XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0; |
XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0; |
202 |
|
|
203 |
////////////////////////////////////////////////////////// |
/* Reference plain C impl. declaration |
204 |
// plain C impl. declaration |
****************************************************************************/ |
205 |
// TODO: should be declared elsewhere? |
|
206 |
|
XVID_QP_FUNCS xvid_QP_Funcs_C_ref = { |
207 |
|
H_Pass_16_C_ref, H_Pass_Avrg_16_C_ref, H_Pass_Avrg_Up_16_C_ref, |
208 |
|
V_Pass_16_C_ref, V_Pass_Avrg_16_C_ref, V_Pass_Avrg_Up_16_C_ref, |
209 |
|
|
210 |
|
H_Pass_8_C_ref, H_Pass_Avrg_8_C_ref, H_Pass_Avrg_Up_8_C_ref, |
211 |
|
V_Pass_8_C_ref, V_Pass_Avrg_8_C_ref, V_Pass_Avrg_Up_8_C_ref |
212 |
|
}; |
213 |
|
|
214 |
|
XVID_QP_FUNCS xvid_QP_Add_Funcs_C_ref = { |
215 |
|
H_Pass_16_Add_C_ref, H_Pass_Avrg_16_Add_C_ref, H_Pass_Avrg_Up_16_Add_C_ref, |
216 |
|
V_Pass_16_Add_C_ref, V_Pass_Avrg_16_Add_C_ref, V_Pass_Avrg_Up_16_Add_C_ref, |
217 |
|
|
218 |
|
H_Pass_8_Add_C_ref, H_Pass_Avrg_8_Add_C_ref, H_Pass_Avrg_Up_8_Add_C_ref, |
219 |
|
V_Pass_8_Add_C_ref, V_Pass_Avrg_8_Add_C_ref, V_Pass_Avrg_Up_8_Add_C_ref |
220 |
|
}; |
221 |
|
|
222 |
|
/* Plain C impl. declaration (faster than ref one) |
223 |
|
****************************************************************************/ |
224 |
|
|
225 |
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
226 |
H_Pass_16_C, H_Pass_Avrg_16_C, H_Pass_Avrg_Up_16_C |
H_Pass_16_C, H_Pass_Avrg_16_C, H_Pass_Avrg_Up_16_C, |
227 |
, V_Pass_16_C, V_Pass_Avrg_16_C, V_Pass_Avrg_Up_16_C |
V_Pass_16_C, V_Pass_Avrg_16_C, V_Pass_Avrg_Up_16_C, |
228 |
|
|
229 |
, H_Pass_8_C,H_Pass_Avrg_8_C,H_Pass_Avrg_Up_8_C |
H_Pass_8_C, H_Pass_Avrg_8_C, H_Pass_Avrg_Up_8_C, |
230 |
, V_Pass_8_C,V_Pass_Avrg_8_C,V_Pass_Avrg_Up_8_C |
V_Pass_8_C, V_Pass_Avrg_8_C, V_Pass_Avrg_Up_8_C |
231 |
}; |
}; |
232 |
|
|
233 |
XVID_QP_FUNCS xvid_QP_Add_Funcs_C = { |
XVID_QP_FUNCS xvid_QP_Add_Funcs_C = { |
234 |
H_Pass_16_Add_C, H_Pass_Avrg_16_Add_C, H_Pass_Avrg_Up_16_Add_C |
H_Pass_16_Add_C, H_Pass_Avrg_16_Add_C, H_Pass_Avrg_Up_16_Add_C, |
235 |
, V_Pass_16_Add_C, V_Pass_Avrg_16_Add_C, V_Pass_Avrg_Up_16_Add_C |
V_Pass_16_Add_C, V_Pass_Avrg_16_Add_C, V_Pass_Avrg_Up_16_Add_C, |
236 |
|
|
237 |
, H_Pass_8_Add_C,H_Pass_Avrg_8_Add_C,H_Pass_Avrg_Up_8_Add_C |
H_Pass_8_Add_C, H_Pass_Avrg_8_Add_C, H_Pass_Avrg_Up_8_Add_C, |
238 |
, V_Pass_8_Add_C,V_Pass_Avrg_8_Add_C,V_Pass_Avrg_Up_8_Add_C |
V_Pass_8_Add_C, V_Pass_Avrg_8_Add_C, V_Pass_Avrg_Up_8_Add_C |
239 |
}; |
}; |
240 |
|
|
241 |
////////////////////////////////////////////////////////// |
/* mmx impl. declaration (see. qpel_mmx.asm |
242 |
// mmx impl. declaration (cf. qpel_mmx.asm) |
****************************************************************************/ |
|
// TODO: should be declared elsewhere? |
|
243 |
|
|
244 |
|
#ifdef ARCH_IS_IA32 |
245 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_mmx); |
246 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_mmx); |
247 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_mmx); |
256 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx); |
257 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
258 |
|
|
|
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
|
|
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx |
|
|
, xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx |
|
|
|
|
|
, xvid_H_Pass_8_mmx,xvid_H_Pass_Avrg_8_mmx,xvid_H_Pass_Avrg_Up_8_mmx |
|
|
, xvid_V_Pass_8_mmx,xvid_V_Pass_Avrg_8_mmx,xvid_V_Pass_Avrg_Up_8_mmx |
|
|
}; |
|
|
|
|
259 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
260 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx); |
261 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx); |
270 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx); |
271 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
272 |
|
|
273 |
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
274 |
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx |
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx, |
275 |
, xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx |
xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx, |
276 |
|
|
277 |
, xvid_H_Pass_8_Add_mmx,xvid_H_Pass_Avrg_8_Add_mmx,xvid_H_Pass_Avrg_Up_8_Add_mmx |
xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx, |
278 |
, xvid_V_Pass_8_Add_mmx,xvid_V_Pass_Avrg_8_Add_mmx,xvid_V_Pass_Avrg_Up_8_Add_mmx |
xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx |
279 |
}; |
}; |
280 |
|
|
281 |
////////////////////////////////////////////////////////// |
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
282 |
// tables for ASM |
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx, |
283 |
|
xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx, |
284 |
|
|
285 |
extern uint16_t xvid_Expand_mmx[256][4]; // 8b -> 64b expansion table |
xvid_H_Pass_8_Add_mmx, xvid_H_Pass_Avrg_8_Add_mmx, xvid_H_Pass_Avrg_Up_8_Add_mmx, |
286 |
|
xvid_V_Pass_8_Add_mmx, xvid_V_Pass_Avrg_8_Add_mmx, xvid_V_Pass_Avrg_Up_8_Add_mmx, |
287 |
|
}; |
288 |
|
#endif /* ARCH_IS_IA32 */ |
289 |
|
|
290 |
// Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) |
/* tables for ASM |
291 |
// |
****************************************************************************/ |
|
// 17 tables, 2K each => 34K |
|
|
// Mirroring can be acheived composing 11 basic tables |
|
|
// (for instance: (23,19,-6,3)=(20,20,-6,3)+(3,-1,0,0) |
|
|
// Using Symmetries (and bswap) could reduce further |
|
|
// the memory to 7 tables (->14K). |
|
|
|
|
|
extern int16_t xvid_FIR_1_0_0_0[256][4]; |
|
|
extern int16_t xvid_FIR_3_1_0_0[256][4]; |
|
|
extern int16_t xvid_FIR_6_3_1_0[256][4]; |
|
|
extern int16_t xvid_FIR_14_3_2_1[256][4]; |
|
|
extern int16_t xvid_FIR_20_6_3_1[256][4]; |
|
|
extern int16_t xvid_FIR_20_20_6_3[256][4]; |
|
|
extern int16_t xvid_FIR_23_19_6_3[256][4]; |
|
|
extern int16_t xvid_FIR_7_20_20_6[256][4]; |
|
|
extern int16_t xvid_FIR_6_20_20_6[256][4]; |
|
|
extern int16_t xvid_FIR_6_20_20_7[256][4]; |
|
|
extern int16_t xvid_FIR_3_6_20_20[256][4]; |
|
|
extern int16_t xvid_FIR_3_6_19_23[256][4]; |
|
|
extern int16_t xvid_FIR_1_3_6_20[256][4]; |
|
|
extern int16_t xvid_FIR_1_2_3_14[256][4]; |
|
|
extern int16_t xvid_FIR_0_1_3_6[256][4]; |
|
|
extern int16_t xvid_FIR_0_0_1_3[256][4]; |
|
|
extern int16_t xvid_FIR_0_0_0_1[256][4]; |
|
292 |
|
|
293 |
////////////////////////////////////////////////////////// |
#ifdef ARCH_IS_IA32 |
294 |
|
uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */ |
295 |
|
#endif |
296 |
|
|
297 |
uint16_t xvid_Expand_mmx[256][4]; // 8b -> 64b expansion table |
/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) |
298 |
|
* |
299 |
|
* 17 tables, 2K each => 34K |
300 |
|
* Mirroring can be acheived composing 11 basic tables |
301 |
|
* (for instance: (23,19,-6,3)=(20,20,-6,3)+(3,-1,0,0) |
302 |
|
* Using Symmetries (and bswap) could reduce further |
303 |
|
* the memory to 7 tables (->14K). */ |
304 |
|
|
305 |
int16_t xvid_FIR_1_0_0_0[256][4]; |
int16_t xvid_FIR_1_0_0_0[256][4]; |
306 |
int16_t xvid_FIR_3_1_0_0[256][4]; |
int16_t xvid_FIR_3_1_0_0[256][4]; |
333 |
} |
} |
334 |
|
|
335 |
|
|
336 |
void xvid_Init_QP_mmx() |
void xvid_Init_QP() |
337 |
{ |
{ |
338 |
|
#ifdef ARCH_IS_IA32 |
339 |
int i; |
int i; |
340 |
|
|
341 |
for(i=0; i<256; ++i) { |
for(i=0; i<256; ++i) { |
342 |
xvid_Expand_mmx[i][0] = i; |
xvid_Expand_mmx[i][0] = i; |
343 |
xvid_Expand_mmx[i][1] = i; |
xvid_Expand_mmx[i][1] = i; |
344 |
xvid_Expand_mmx[i][2] = i; |
xvid_Expand_mmx[i][2] = i; |
345 |
xvid_Expand_mmx[i][3] = i; |
xvid_Expand_mmx[i][3] = i; |
346 |
} |
} |
347 |
|
#endif |
348 |
|
|
349 |
// Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) |
/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) */ |
350 |
|
|
351 |
Init_FIR_Table(xvid_FIR_1_0_0_0, -1, 0, 0, 0); |
Init_FIR_Table(xvid_FIR_1_0_0_0, -1, 0, 0, 0); |
352 |
Init_FIR_Table(xvid_FIR_3_1_0_0, 3, -1, 0, 0); |
Init_FIR_Table(xvid_FIR_3_1_0_0, 3, -1, 0, 0); |
370 |
|
|
371 |
#endif /* !XVID_AUTO_INCLUDE */ |
#endif /* !XVID_AUTO_INCLUDE */ |
372 |
|
|
373 |
////////////////////////////////////////////////////////// |
#if defined(XVID_AUTO_INCLUDE) && defined(REFERENCE_CODE) |
|
// "reference" filters impl. in plain C |
|
|
////////////////////////////////////////////////////////// |
|
374 |
|
|
375 |
#ifdef XVID_AUTO_INCLUDE |
/***************************************************************************** |
376 |
|
* "reference" filters impl. in plain C |
377 |
|
****************************************************************************/ |
378 |
|
|
379 |
static |
static |
380 |
void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
527 |
#undef FUNC_HA_UP |
#undef FUNC_HA_UP |
528 |
#undef FUNC_VA_UP |
#undef FUNC_VA_UP |
529 |
|
|
530 |
#endif /* XVID_AUTO_INCLUDE */ |
#elif defined(XVID_AUTO_INCLUDE) && !defined(REFERENCE_CODE) |
531 |
|
|
532 |
|
/***************************************************************************** |
533 |
|
* "fast" filters impl. in plain C |
534 |
|
****************************************************************************/ |
535 |
|
|
536 |
|
#define CLIP_STORE(D,C) \ |
537 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
538 |
|
STORE(D, C) |
539 |
|
|
540 |
|
static void |
541 |
|
FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
542 |
|
{ |
543 |
|
#if (SIZE==16) |
544 |
|
while(H-->0) { |
545 |
|
int C; |
546 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
547 |
|
CLIP_STORE(Dst[ 0],C); |
548 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
549 |
|
CLIP_STORE(Dst[ 1],C); |
550 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
551 |
|
CLIP_STORE(Dst[ 2],C); |
552 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
553 |
|
CLIP_STORE(Dst[ 3],C); |
554 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
555 |
|
CLIP_STORE(Dst[ 4],C); |
556 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
557 |
|
CLIP_STORE(Dst[ 5],C); |
558 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
559 |
|
CLIP_STORE(Dst[ 6],C); |
560 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
561 |
|
CLIP_STORE(Dst[ 7],C); |
562 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
563 |
|
CLIP_STORE(Dst[ 8],C); |
564 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
565 |
|
CLIP_STORE(Dst[ 9],C); |
566 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
567 |
|
CLIP_STORE(Dst[10],C); |
568 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
569 |
|
CLIP_STORE(Dst[11],C); |
570 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
571 |
|
CLIP_STORE(Dst[12],C); |
572 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
573 |
|
CLIP_STORE(Dst[13],C); |
574 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
575 |
|
CLIP_STORE(Dst[14],C); |
576 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
577 |
|
CLIP_STORE(Dst[15],C); |
578 |
|
Src += BpS; |
579 |
|
Dst += BpS; |
580 |
|
} |
581 |
|
#else |
582 |
|
while(H-->0) { |
583 |
|
int C; |
584 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
585 |
|
CLIP_STORE(Dst[0],C); |
586 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
587 |
|
CLIP_STORE(Dst[1],C); |
588 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
589 |
|
CLIP_STORE(Dst[2],C); |
590 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
591 |
|
CLIP_STORE(Dst[3],C); |
592 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
593 |
|
CLIP_STORE(Dst[4],C); |
594 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
595 |
|
CLIP_STORE(Dst[5],C); |
596 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
597 |
|
CLIP_STORE(Dst[6],C); |
598 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
599 |
|
CLIP_STORE(Dst[7],C); |
600 |
|
Src += BpS; |
601 |
|
Dst += BpS; |
602 |
|
} |
603 |
|
#endif |
604 |
|
} |
605 |
|
#undef CLIP_STORE |
606 |
|
|
607 |
|
#define CLIP_STORE(i,C) \ |
608 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
609 |
|
C = (C+Src[i]+1-RND) >> 1; \ |
610 |
|
STORE(Dst[i], C) |
611 |
|
|
612 |
|
static void |
613 |
|
FUNC_HA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
614 |
|
{ |
615 |
|
#if (SIZE==16) |
616 |
|
while(H-->0) { |
617 |
|
int C; |
618 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
619 |
|
CLIP_STORE(0,C); |
620 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
621 |
|
CLIP_STORE( 1,C); |
622 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
623 |
|
CLIP_STORE( 2,C); |
624 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
625 |
|
CLIP_STORE( 3,C); |
626 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
627 |
|
CLIP_STORE( 4,C); |
628 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
629 |
|
CLIP_STORE( 5,C); |
630 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
631 |
|
CLIP_STORE( 6,C); |
632 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
633 |
|
CLIP_STORE( 7,C); |
634 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
635 |
|
CLIP_STORE( 8,C); |
636 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
637 |
|
CLIP_STORE( 9,C); |
638 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
639 |
|
CLIP_STORE(10,C); |
640 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
641 |
|
CLIP_STORE(11,C); |
642 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
643 |
|
CLIP_STORE(12,C); |
644 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
645 |
|
CLIP_STORE(13,C); |
646 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
647 |
|
CLIP_STORE(14,C); |
648 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
649 |
|
CLIP_STORE(15,C); |
650 |
|
Src += BpS; |
651 |
|
Dst += BpS; |
652 |
|
} |
653 |
|
#else |
654 |
|
while(H-->0) { |
655 |
|
int C; |
656 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
657 |
|
CLIP_STORE(0,C); |
658 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
659 |
|
CLIP_STORE(1,C); |
660 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
661 |
|
CLIP_STORE(2,C); |
662 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
663 |
|
CLIP_STORE(3,C); |
664 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
665 |
|
CLIP_STORE(4,C); |
666 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
667 |
|
CLIP_STORE(5,C); |
668 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
669 |
|
CLIP_STORE(6,C); |
670 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
671 |
|
CLIP_STORE(7,C); |
672 |
|
Src += BpS; |
673 |
|
Dst += BpS; |
674 |
|
} |
675 |
|
#endif |
676 |
|
} |
677 |
|
#undef CLIP_STORE |
678 |
|
|
679 |
|
#define CLIP_STORE(i,C) \ |
680 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
681 |
|
C = (C+Src[i+1]+1-RND) >> 1; \ |
682 |
|
STORE(Dst[i], C) |
683 |
|
|
684 |
|
static void |
685 |
|
FUNC_HA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
686 |
|
{ |
687 |
|
#if (SIZE==16) |
688 |
|
while(H-->0) { |
689 |
|
int C; |
690 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
691 |
|
CLIP_STORE(0,C); |
692 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
693 |
|
CLIP_STORE( 1,C); |
694 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
695 |
|
CLIP_STORE( 2,C); |
696 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
697 |
|
CLIP_STORE( 3,C); |
698 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
699 |
|
CLIP_STORE( 4,C); |
700 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
701 |
|
CLIP_STORE( 5,C); |
702 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
703 |
|
CLIP_STORE( 6,C); |
704 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
705 |
|
CLIP_STORE( 7,C); |
706 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
707 |
|
CLIP_STORE( 8,C); |
708 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
709 |
|
CLIP_STORE( 9,C); |
710 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
711 |
|
CLIP_STORE(10,C); |
712 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
713 |
|
CLIP_STORE(11,C); |
714 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
715 |
|
CLIP_STORE(12,C); |
716 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
717 |
|
CLIP_STORE(13,C); |
718 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
719 |
|
CLIP_STORE(14,C); |
720 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
721 |
|
CLIP_STORE(15,C); |
722 |
|
Src += BpS; |
723 |
|
Dst += BpS; |
724 |
|
} |
725 |
|
#else |
726 |
|
while(H-->0) { |
727 |
|
int C; |
728 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
729 |
|
CLIP_STORE(0,C); |
730 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
731 |
|
CLIP_STORE(1,C); |
732 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
733 |
|
CLIP_STORE(2,C); |
734 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
735 |
|
CLIP_STORE(3,C); |
736 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
737 |
|
CLIP_STORE(4,C); |
738 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
739 |
|
CLIP_STORE(5,C); |
740 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
741 |
|
CLIP_STORE(6,C); |
742 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
743 |
|
CLIP_STORE(7,C); |
744 |
|
Src += BpS; |
745 |
|
Dst += BpS; |
746 |
|
} |
747 |
|
#endif |
748 |
|
} |
749 |
|
#undef CLIP_STORE |
750 |
|
|
751 |
|
////////////////////////////////////////////////////////// |
752 |
|
// vertical passes |
753 |
////////////////////////////////////////////////////////// |
////////////////////////////////////////////////////////// |
754 |
|
// Note: for vertical passes, width (W) needs only be 8 or 16. |
755 |
|
|
756 |
|
#define CLIP_STORE(D,C) \ |
757 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
758 |
|
STORE(D, C) |
759 |
|
|
760 |
|
static void |
761 |
|
FUNC_V(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
762 |
|
{ |
763 |
|
#if (SIZE==16) |
764 |
|
while(H-->0) { |
765 |
|
int C; |
766 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
767 |
|
CLIP_STORE(Dst[BpS* 0],C); |
768 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
769 |
|
CLIP_STORE(Dst[BpS* 1],C); |
770 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
771 |
|
CLIP_STORE(Dst[BpS* 2],C); |
772 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
773 |
|
CLIP_STORE(Dst[BpS* 3],C); |
774 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
775 |
|
CLIP_STORE(Dst[BpS* 4],C); |
776 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
777 |
|
CLIP_STORE(Dst[BpS* 5],C); |
778 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
779 |
|
CLIP_STORE(Dst[BpS* 6],C); |
780 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
781 |
|
CLIP_STORE(Dst[BpS* 7],C); |
782 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
783 |
|
CLIP_STORE(Dst[BpS* 8],C); |
784 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
785 |
|
CLIP_STORE(Dst[BpS* 9],C); |
786 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
787 |
|
CLIP_STORE(Dst[BpS*10],C); |
788 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
789 |
|
CLIP_STORE(Dst[BpS*11],C); |
790 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
791 |
|
CLIP_STORE(Dst[BpS*12],C); |
792 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
793 |
|
CLIP_STORE(Dst[BpS*13],C); |
794 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
795 |
|
CLIP_STORE(Dst[BpS*14],C); |
796 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
797 |
|
CLIP_STORE(Dst[BpS*15],C); |
798 |
|
Src += 1; |
799 |
|
Dst += 1; |
800 |
|
} |
801 |
|
#else |
802 |
|
while(H-->0) { |
803 |
|
int C; |
804 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
805 |
|
CLIP_STORE(Dst[BpS*0],C); |
806 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
807 |
|
CLIP_STORE(Dst[BpS*1],C); |
808 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
809 |
|
CLIP_STORE(Dst[BpS*2],C); |
810 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
811 |
|
CLIP_STORE(Dst[BpS*3],C); |
812 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
813 |
|
CLIP_STORE(Dst[BpS*4],C); |
814 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
815 |
|
CLIP_STORE(Dst[BpS*5],C); |
816 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
817 |
|
CLIP_STORE(Dst[BpS*6],C); |
818 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
819 |
|
CLIP_STORE(Dst[BpS*7],C); |
820 |
|
Src += 1; |
821 |
|
Dst += 1; |
822 |
|
} |
823 |
|
#endif |
824 |
|
} |
825 |
|
#undef CLIP_STORE |
826 |
|
|
827 |
|
#define CLIP_STORE(i,C) \ |
828 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
829 |
|
C = (C+Src[BpS*i]+1-RND) >> 1; \ |
830 |
|
STORE(Dst[BpS*i], C) |
831 |
|
|
832 |
|
static void |
833 |
|
FUNC_VA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
834 |
|
{ |
835 |
|
#if (SIZE==16) |
836 |
|
while(H-->0) { |
837 |
|
int C; |
838 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
839 |
|
CLIP_STORE(0,C); |
840 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
841 |
|
CLIP_STORE( 1,C); |
842 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
843 |
|
CLIP_STORE( 2,C); |
844 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
845 |
|
CLIP_STORE( 3,C); |
846 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
847 |
|
CLIP_STORE( 4,C); |
848 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
849 |
|
CLIP_STORE( 5,C); |
850 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
851 |
|
CLIP_STORE( 6,C); |
852 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
853 |
|
CLIP_STORE( 7,C); |
854 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
855 |
|
CLIP_STORE( 8,C); |
856 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
857 |
|
CLIP_STORE( 9,C); |
858 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
859 |
|
CLIP_STORE(10,C); |
860 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
861 |
|
CLIP_STORE(11,C); |
862 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
863 |
|
CLIP_STORE(12,C); |
864 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
865 |
|
CLIP_STORE(13,C); |
866 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
867 |
|
CLIP_STORE(14,C); |
868 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
869 |
|
CLIP_STORE(15,C); |
870 |
|
Src += 1; |
871 |
|
Dst += 1; |
872 |
|
} |
873 |
|
#else |
874 |
|
while(H-->0) { |
875 |
|
int C; |
876 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
877 |
|
CLIP_STORE(0,C); |
878 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
879 |
|
CLIP_STORE(1,C); |
880 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
881 |
|
CLIP_STORE(2,C); |
882 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
883 |
|
CLIP_STORE(3,C); |
884 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
885 |
|
CLIP_STORE(4,C); |
886 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
887 |
|
CLIP_STORE(5,C); |
888 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
889 |
|
CLIP_STORE(6,C); |
890 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
891 |
|
CLIP_STORE(7,C); |
892 |
|
Src += 1; |
893 |
|
Dst += 1; |
894 |
|
} |
895 |
|
#endif |
896 |
|
} |
897 |
|
#undef CLIP_STORE |
898 |
|
|
899 |
|
#define CLIP_STORE(i,C) \ |
900 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
901 |
|
C = (C+Src[BpS*i+BpS]+1-RND) >> 1; \ |
902 |
|
STORE(Dst[BpS*i], C) |
903 |
|
|
904 |
|
static void |
905 |
|
FUNC_VA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
906 |
|
{ |
907 |
|
#if (SIZE==16) |
908 |
|
while(H-->0) { |
909 |
|
int C; |
910 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
911 |
|
CLIP_STORE(0,C); |
912 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
913 |
|
CLIP_STORE( 1,C); |
914 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
915 |
|
CLIP_STORE( 2,C); |
916 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
917 |
|
CLIP_STORE( 3,C); |
918 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
919 |
|
CLIP_STORE( 4,C); |
920 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
921 |
|
CLIP_STORE( 5,C); |
922 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
923 |
|
CLIP_STORE( 6,C); |
924 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
925 |
|
CLIP_STORE( 7,C); |
926 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
927 |
|
CLIP_STORE( 8,C); |
928 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
929 |
|
CLIP_STORE( 9,C); |
930 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
931 |
|
CLIP_STORE(10,C); |
932 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
933 |
|
CLIP_STORE(11,C); |
934 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
935 |
|
CLIP_STORE(12,C); |
936 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
937 |
|
CLIP_STORE(13,C); |
938 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
939 |
|
CLIP_STORE(14,C); |
940 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
941 |
|
CLIP_STORE(15,C); |
942 |
|
Src += 1; |
943 |
|
Dst += 1; |
944 |
|
} |
945 |
|
#else |
946 |
|
while(H-->0) { |
947 |
|
int C; |
948 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
949 |
|
CLIP_STORE(0,C); |
950 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
951 |
|
CLIP_STORE(1,C); |
952 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
953 |
|
CLIP_STORE(2,C); |
954 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
955 |
|
CLIP_STORE(3,C); |
956 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
957 |
|
CLIP_STORE(4,C); |
958 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
959 |
|
CLIP_STORE(5,C); |
960 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
961 |
|
CLIP_STORE(6,C); |
962 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
963 |
|
CLIP_STORE(7,C); |
964 |
|
Src += 1; |
965 |
|
Dst += 1; |
966 |
|
} |
967 |
|
#endif |
968 |
|
} |
969 |
|
#undef CLIP_STORE |
970 |
|
|
971 |
|
#undef STORE |
972 |
|
#undef FUNC_H |
973 |
|
#undef FUNC_V |
974 |
|
#undef FUNC_HA |
975 |
|
#undef FUNC_VA |
976 |
|
#undef FUNC_HA_UP |
977 |
|
#undef FUNC_VA_UP |
978 |
|
|
979 |
|
|
980 |
|
#endif /* XVID_AUTO_INCLUDE && !defined(REF) */ |