1 |
/***************************************************************************** |
/***************************************************************************** |
2 |
* |
* |
3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* - SAD Routines header - |
* - Sum Of Absolute Difference header - |
5 |
* |
* |
6 |
* Copyright(C) 2002 Michael Militzer <isibaar@xvid.org> |
* Copyright(C) 2001-2010 Peter Ross <pross@xvid.org> |
|
* |
|
|
* This program is an implementation of a part of one or more MPEG-4 |
|
|
* Video tools as specified in ISO/IEC 14496-2 standard. Those intending |
|
|
* to use this software module in hardware or software products are |
|
|
* advised that its use may infringe existing patents or copyrights, and |
|
|
* any such use would be at such party's own risk. The original |
|
|
* developer of this software module and his/her company, and subsequent |
|
|
* editors and their companies, will have no liability for use of this |
|
|
* software or modifications or derivatives thereof. |
|
7 |
* |
* |
8 |
* This program is free software; you can redistribute it and/or modify |
* This program is free software; you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
19 |
* along with this program; if not, write to the Free Software |
* along with this program; if not, write to the Free Software |
20 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
* |
* |
22 |
* Copyright(C) 2002 Michael Militzer <isibaar@xvid.org> |
* $Id$ |
|
* Copyright(C) 2002 Peter Ross <pross@xvid.org> |
|
23 |
* |
* |
24 |
****************************************************************************/ |
****************************************************************************/ |
25 |
|
|
34 |
extern sadInitFuncPtr sadInit; |
extern sadInitFuncPtr sadInit; |
35 |
sadInitFunc sadInit_altivec; |
sadInitFunc sadInit_altivec; |
36 |
|
|
|
|
|
37 |
typedef uint32_t(sad16Func) (const uint8_t * const cur, |
typedef uint32_t(sad16Func) (const uint8_t * const cur, |
38 |
const uint8_t * const ref, |
const uint8_t * const ref, |
39 |
const uint32_t stride, |
const uint32_t stride, |
41 |
typedef sad16Func *sad16FuncPtr; |
typedef sad16Func *sad16FuncPtr; |
42 |
extern sad16FuncPtr sad16; |
extern sad16FuncPtr sad16; |
43 |
sad16Func sad16_c; |
sad16Func sad16_c; |
44 |
|
|
45 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
46 |
sad16Func sad16_mmx; |
sad16Func sad16_mmx; |
47 |
sad16Func sad16_xmm; |
sad16Func sad16_xmm; |
48 |
|
sad16Func sad16_3dne; |
49 |
sad16Func sad16_sse2; |
sad16Func sad16_sse2; |
50 |
sad16Func sad16_altivec; |
sad16Func sad16_sse3; |
51 |
|
#endif |
52 |
|
|
53 |
|
#ifdef ARCH_IS_IA64 |
54 |
sad16Func sad16_ia64; |
sad16Func sad16_ia64; |
55 |
|
#endif |
56 |
|
|
57 |
sad16Func mrsad16_c; |
#ifdef ARCH_IS_PPC |
58 |
|
sad16Func sad16_altivec_c; |
59 |
|
#endif |
60 |
|
|
61 |
|
sad16Func mrsad16_c; |
62 |
|
|
63 |
typedef uint32_t(sad8Func) (const uint8_t * const cur, |
typedef uint32_t(sad8Func) (const uint8_t * const cur, |
64 |
const uint8_t * const ref, |
const uint8_t * const ref, |
66 |
typedef sad8Func *sad8FuncPtr; |
typedef sad8Func *sad8FuncPtr; |
67 |
extern sad8FuncPtr sad8; |
extern sad8FuncPtr sad8; |
68 |
sad8Func sad8_c; |
sad8Func sad8_c; |
69 |
|
|
70 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
71 |
sad8Func sad8_mmx; |
sad8Func sad8_mmx; |
72 |
sad8Func sad8_xmm; |
sad8Func sad8_xmm; |
73 |
sad8Func sad8_altivec; |
sad8Func sad8_3dne; |
74 |
|
#endif |
75 |
|
|
76 |
|
#ifdef ARCH_IS_IA64 |
77 |
sad8Func sad8_ia64; |
sad8Func sad8_ia64; |
78 |
|
#endif |
79 |
|
|
80 |
|
#ifdef ARCH_IS_PPC |
81 |
|
sad8Func sad8_altivec_c; |
82 |
|
#endif |
83 |
|
|
84 |
typedef uint32_t(sad16biFunc) (const uint8_t * const cur, |
typedef uint32_t(sad16biFunc) (const uint8_t * const cur, |
85 |
const uint8_t * const ref1, |
const uint8_t * const ref1, |
88 |
typedef sad16biFunc *sad16biFuncPtr; |
typedef sad16biFunc *sad16biFuncPtr; |
89 |
extern sad16biFuncPtr sad16bi; |
extern sad16biFuncPtr sad16bi; |
90 |
sad16biFunc sad16bi_c; |
sad16biFunc sad16bi_c; |
91 |
sad16biFunc sad16bi_ia64; |
|
92 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
93 |
sad16biFunc sad16bi_mmx; |
sad16biFunc sad16bi_mmx; |
94 |
sad16biFunc sad16bi_xmm; |
sad16biFunc sad16bi_xmm; |
95 |
|
sad16biFunc sad16bi_3dne; |
96 |
sad16biFunc sad16bi_3dn; |
sad16biFunc sad16bi_3dn; |
97 |
|
#endif |
98 |
|
|
99 |
|
#ifdef ARCH_IS_IA64 |
100 |
|
sad16biFunc sad16bi_ia64; |
101 |
|
#endif |
102 |
|
|
103 |
|
#ifdef ARCH_IS_PPC |
104 |
|
sad16biFunc sad16bi_altivec_c; |
105 |
|
#endif |
106 |
|
|
107 |
typedef uint32_t(sad8biFunc) (const uint8_t * const cur, |
typedef uint32_t(sad8biFunc) (const uint8_t * const cur, |
108 |
const uint8_t * const ref1, |
const uint8_t * const ref1, |
111 |
typedef sad8biFunc *sad8biFuncPtr; |
typedef sad8biFunc *sad8biFuncPtr; |
112 |
extern sad8biFuncPtr sad8bi; |
extern sad8biFuncPtr sad8bi; |
113 |
sad8biFunc sad8bi_c; |
sad8biFunc sad8bi_c; |
114 |
|
|
115 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
116 |
sad8biFunc sad8bi_mmx; |
sad8biFunc sad8bi_mmx; |
117 |
sad8biFunc sad8bi_xmm; |
sad8biFunc sad8bi_xmm; |
118 |
|
sad8biFunc sad8bi_3dne; |
119 |
sad8biFunc sad8bi_3dn; |
sad8biFunc sad8bi_3dn; |
120 |
|
#endif |
121 |
|
|
122 |
typedef uint32_t(dev16Func) (const uint8_t * const cur, |
typedef uint32_t(dev16Func) (const uint8_t * const cur, |
123 |
const uint32_t stride); |
const uint32_t stride); |
124 |
typedef dev16Func *dev16FuncPtr; |
typedef dev16Func *dev16FuncPtr; |
125 |
extern dev16FuncPtr dev16; |
extern dev16FuncPtr dev16; |
126 |
dev16Func dev16_c; |
dev16Func dev16_c; |
|
dev16Func dev16_mmx; |
|
|
dev16Func dev16_xmm; |
|
|
dev16Func dev16_sse2; |
|
|
dev16Func dev16_altivec; |
|
|
dev16Func dev16_ia64; |
|
127 |
|
|
128 |
/* plain c */ |
typedef uint32_t (sad16vFunc)( const uint8_t * const cur, |
|
/* |
|
|
|
|
|
uint32_t sad16(const uint8_t * const cur, |
|
129 |
const uint8_t * const ref, |
const uint8_t * const ref, |
130 |
const uint32_t stride, |
const uint32_t stride, int32_t *sad8); |
131 |
const uint32_t best_sad); |
typedef sad16vFunc *sad16vFuncPtr; |
132 |
|
extern sad16vFuncPtr sad16v; |
133 |
|
|
134 |
uint32_t sad8(const uint8_t * const cur, |
sad16vFunc sad16v_c; |
135 |
const uint8_t * const ref, |
sad16vFunc sad32v_c; |
|
const uint32_t stride); |
|
136 |
|
|
137 |
uint32_t dev16(const uint8_t * const cur, |
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
138 |
const uint32_t stride); |
dev16Func dev16_mmx; |
139 |
*/ |
dev16Func dev16_xmm; |
140 |
/* mmx */ |
dev16Func dev16_3dne; |
141 |
/* |
dev16Func dev16_sse2; |
142 |
|
dev16Func dev16_sse3; |
143 |
uint32_t sad16_mmx(const uint8_t * const cur, |
sad16vFunc sad16v_xmm; |
144 |
const uint8_t * const ref, |
sad16vFunc sad16v_mmx; |
145 |
const uint32_t stride, |
#endif |
|
const uint32_t best_sad); |
|
|
|
|
|
uint32_t sad8_mmx(const uint8_t * const cur, |
|
|
const uint8_t * const ref, |
|
|
const uint32_t stride); |
|
|
|
|
|
|
|
|
uint32_t dev16_mmx(const uint8_t * const cur, |
|
|
const uint32_t stride); |
|
|
|
|
|
*/ |
|
|
/* xmm */ |
|
|
/* |
|
|
uint32_t sad16_xmm(const uint8_t * const cur, |
|
|
const uint8_t * const ref, |
|
|
const uint32_t stride, |
|
|
const uint32_t best_sad); |
|
146 |
|
|
147 |
uint32_t sad8_xmm(const uint8_t * const cur, |
#ifdef ARCH_IS_IA64 |
148 |
const uint8_t * const ref, |
dev16Func dev16_ia64; |
149 |
const uint32_t stride); |
#endif |
150 |
|
|
151 |
uint32_t dev16_xmm(const uint8_t * const cur, |
#ifdef ARCH_IS_PPC |
152 |
const uint32_t stride); |
dev16Func dev16_altivec_c; |
153 |
*/ |
#endif |
154 |
|
|
155 |
|
/* This function assumes blocks use 16bit signed elements */ |
156 |
|
typedef uint32_t (sse8Func_16bit)(const int16_t * cur, |
157 |
|
const int16_t * ref, |
158 |
|
const uint32_t stride); |
159 |
|
typedef sse8Func_16bit *sse8Func_16bitPtr; |
160 |
|
extern sse8Func_16bitPtr sse8_16bit; |
161 |
|
|
162 |
|
sse8Func_16bit sse8_16bit_c; |
163 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
164 |
|
sse8Func_16bit sse8_16bit_mmx; |
165 |
|
#endif |
166 |
|
|
167 |
|
#ifdef ARCH_IS_PPC |
168 |
|
sse8Func_16bit sse8_16bit_altivec_c; |
169 |
|
#endif |
170 |
|
|
171 |
|
/* This function assumes blocks use 8bit *un*signed elements */ |
172 |
|
typedef uint32_t (sse8Func_8bit)(const uint8_t * cur, |
173 |
|
const uint8_t * ref, |
174 |
|
const uint32_t stride); |
175 |
|
typedef sse8Func_8bit *sse8Func_8bitPtr; |
176 |
|
extern sse8Func_8bitPtr sse8_8bit; |
177 |
|
|
178 |
|
sse8Func_8bit sse8_8bit_c; |
179 |
|
|
180 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
181 |
|
sse8Func_8bit sse8_8bit_mmx; |
182 |
|
#endif |
183 |
|
|
184 |
|
typedef uint32_t (sseh8Func_16bit)(const int16_t * cur, |
185 |
|
const int16_t * ref, |
186 |
|
uint16_t mask); |
187 |
|
typedef sseh8Func_16bit *sseh8Func_16bitPtr; |
188 |
|
extern sseh8Func_16bitPtr sseh8_16bit; |
189 |
|
|
190 |
|
sseh8Func_16bit sseh8_16bit_c; |
191 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
192 |
|
sseh8Func_16bit sseh8_16bit_sse2; |
193 |
|
#endif |
194 |
|
|
195 |
|
typedef uint32_t (coeff8_energyFunc)(const int16_t * cur); |
196 |
|
typedef coeff8_energyFunc *coeff8_energyFunc_Ptr; |
197 |
|
extern coeff8_energyFunc_Ptr coeff8_energy; |
198 |
|
|
199 |
|
coeff8_energyFunc coeff8_energy_c; |
200 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
201 |
|
coeff8_energyFunc coeff8_energy_sse2; |
202 |
|
#endif |
203 |
|
|
204 |
|
typedef uint32_t (blocksum8Func)(const uint8_t * cur, int stride, |
205 |
|
uint16_t sums[4], uint32_t squares[4]); |
206 |
|
typedef blocksum8Func *blocksum8Func_Ptr; |
207 |
|
extern blocksum8Func_Ptr blocksum8; |
208 |
|
|
209 |
|
blocksum8Func blocksum8_c; |
210 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
211 |
|
blocksum8Func blocksum8_sse2; |
212 |
|
#endif |
213 |
|
|
214 |
|
/* Coeffs for MSE_H calculation */ |
215 |
|
|
216 |
|
static const int16_t Inv_iMask_Coeff[64] = { |
217 |
|
0, 155, 128, 328, 737, 2048, 3329, 4763, |
218 |
|
184, 184, 251, 462, 865, 4306, 4608, 3872, |
219 |
|
251, 216, 328, 737, 2048, 4159, 6094, 4014, |
220 |
|
251, 370, 620, 1076, 3329, 9688, 8192, 4920, |
221 |
|
415, 620, 1752, 4014, 5919, 15207, 13579, 7589, |
222 |
|
737, 1568, 3872, 5243, 8398, 13844, 16345, 10834, |
223 |
|
3073, 5243, 7787, 9688, 13579, 18741, 18433, 13057, |
224 |
|
6636, 10834, 11552, 12294, 16056, 12800, 13579, 12545 |
225 |
|
}; |
226 |
|
|
227 |
|
static const uint16_t iCSF_Coeff[64] = { |
228 |
|
26353, 38331, 42164, 26353, 17568, 10541, 8268, 6912, |
229 |
|
35137, 35137, 30117, 22192, 16217, 7270, 7027, 7666, |
230 |
|
30117, 32434, 26353, 17568, 10541, 7397, 6111, 7529, |
231 |
|
30117, 24803, 19166, 14539, 8268, 4846, 5271, 6801, |
232 |
|
23425, 19166, 11396, 7529, 6201, 3868, 4094, 5476, |
233 |
|
17568, 12047, 7666, 6588, 5205, 4054, 3731, 4583, |
234 |
|
8605, 6588, 5406, 4846, 4094, 3485, 3514, 4175, |
235 |
|
5856, 4583, 4438, 4302, 3765, 4216, 4094, 4259 |
236 |
|
}; |
237 |
|
|
238 |
|
static const uint16_t iCSF_Round[64] = { |
239 |
|
1, 1, 1, 1, 2, 3, 4, 5, |
240 |
|
1, 1, 1, 1, 2, 5, 5, 4, |
241 |
|
1, 1, 1, 2, 3, 4, 5, 4, |
242 |
|
1, 1, 2, 2, 4, 7, 6, 5, |
243 |
|
1, 2, 3, 4, 5, 8, 8, 6, |
244 |
|
2, 3, 4, 5, 6, 8, 9, 7, |
245 |
|
4, 5, 6, 7, 8, 9, 9, 8, |
246 |
|
6, 7, 7, 8, 9, 8, 8, 8 |
247 |
|
}; |
248 |
|
|
249 |
#endif /* _ENCODER_SAD_H_ */ |
#endif /* _ENCODER_SAD_H_ */ |