3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* - Sum Of Absolute Difference header - |
* - Sum Of Absolute Difference header - |
5 |
* |
* |
6 |
* Copyright(C) 2001-2003 Peter Ross <pross@xvid.org> |
* Copyright(C) 2001-2010 Peter Ross <pross@xvid.org> |
7 |
* |
* |
8 |
* This program is free software ; you can redistribute it and/or modify |
* This program is free software ; you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
42 |
extern sad16FuncPtr sad16; |
extern sad16FuncPtr sad16; |
43 |
sad16Func sad16_c; |
sad16Func sad16_c; |
44 |
|
|
45 |
#ifdef ARCH_IS_IA32 |
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
46 |
sad16Func sad16_mmx; |
sad16Func sad16_mmx; |
47 |
sad16Func sad16_xmm; |
sad16Func sad16_xmm; |
48 |
sad16Func sad16_3dne; |
sad16Func sad16_3dne; |
49 |
sad16Func sad16_sse2; |
sad16Func sad16_sse2; |
50 |
|
sad16Func sad16_sse3; |
51 |
#endif |
#endif |
52 |
|
|
53 |
#ifdef ARCH_IS_IA64 |
#ifdef ARCH_IS_IA64 |
58 |
sad16Func sad16_altivec_c; |
sad16Func sad16_altivec_c; |
59 |
#endif |
#endif |
60 |
|
|
|
#ifdef ARCH_IS_X86_64 |
|
|
sad16Func sad16_x86_64; |
|
|
#endif |
|
|
|
|
61 |
sad16Func mrsad16_c; |
sad16Func mrsad16_c; |
62 |
|
|
63 |
typedef uint32_t(sad8Func) (const uint8_t * const cur, |
typedef uint32_t(sad8Func) (const uint8_t * const cur, |
67 |
extern sad8FuncPtr sad8; |
extern sad8FuncPtr sad8; |
68 |
sad8Func sad8_c; |
sad8Func sad8_c; |
69 |
|
|
70 |
#ifdef ARCH_IS_IA32 |
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
71 |
sad8Func sad8_mmx; |
sad8Func sad8_mmx; |
72 |
sad8Func sad8_xmm; |
sad8Func sad8_xmm; |
73 |
sad8Func sad8_3dne; |
sad8Func sad8_3dne; |
81 |
sad8Func sad8_altivec_c; |
sad8Func sad8_altivec_c; |
82 |
#endif |
#endif |
83 |
|
|
|
#ifdef ARCH_IS_X86_64 |
|
|
sad8Func sad8_x86_64; |
|
|
#endif |
|
|
|
|
84 |
typedef uint32_t(sad16biFunc) (const uint8_t * const cur, |
typedef uint32_t(sad16biFunc) (const uint8_t * const cur, |
85 |
const uint8_t * const ref1, |
const uint8_t * const ref1, |
86 |
const uint8_t * const ref2, |
const uint8_t * const ref2, |
89 |
extern sad16biFuncPtr sad16bi; |
extern sad16biFuncPtr sad16bi; |
90 |
sad16biFunc sad16bi_c; |
sad16biFunc sad16bi_c; |
91 |
|
|
92 |
#ifdef ARCH_IS_IA32 |
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
93 |
sad16biFunc sad16bi_mmx; |
sad16biFunc sad16bi_mmx; |
94 |
sad16biFunc sad16bi_xmm; |
sad16biFunc sad16bi_xmm; |
95 |
sad16biFunc sad16bi_3dne; |
sad16biFunc sad16bi_3dne; |
104 |
sad16biFunc sad16bi_altivec_c; |
sad16biFunc sad16bi_altivec_c; |
105 |
#endif |
#endif |
106 |
|
|
|
#ifdef ARCH_IS_X86_64 |
|
|
sad16biFunc sad16bi_x86_64; |
|
|
#endif |
|
|
|
|
107 |
typedef uint32_t(sad8biFunc) (const uint8_t * const cur, |
typedef uint32_t(sad8biFunc) (const uint8_t * const cur, |
108 |
const uint8_t * const ref1, |
const uint8_t * const ref1, |
109 |
const uint8_t * const ref2, |
const uint8_t * const ref2, |
112 |
extern sad8biFuncPtr sad8bi; |
extern sad8biFuncPtr sad8bi; |
113 |
sad8biFunc sad8bi_c; |
sad8biFunc sad8bi_c; |
114 |
|
|
115 |
#ifdef ARCH_IS_IA32 |
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
116 |
sad8biFunc sad8bi_mmx; |
sad8biFunc sad8bi_mmx; |
117 |
sad8biFunc sad8bi_xmm; |
sad8biFunc sad8bi_xmm; |
118 |
sad8biFunc sad8bi_3dne; |
sad8biFunc sad8bi_3dne; |
119 |
sad8biFunc sad8bi_3dn; |
sad8biFunc sad8bi_3dn; |
120 |
#endif |
#endif |
121 |
|
|
|
#ifdef ARCH_IS_X86_64 |
|
|
sad8biFunc sad8bi_x86_64; |
|
|
#endif |
|
|
|
|
122 |
typedef uint32_t(dev16Func) (const uint8_t * const cur, |
typedef uint32_t(dev16Func) (const uint8_t * const cur, |
123 |
const uint32_t stride); |
const uint32_t stride); |
124 |
typedef dev16Func *dev16FuncPtr; |
typedef dev16Func *dev16FuncPtr; |
134 |
sad16vFunc sad16v_c; |
sad16vFunc sad16v_c; |
135 |
sad16vFunc sad32v_c; |
sad16vFunc sad32v_c; |
136 |
|
|
137 |
#ifdef ARCH_IS_IA32 |
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
138 |
dev16Func dev16_mmx; |
dev16Func dev16_mmx; |
139 |
dev16Func dev16_xmm; |
dev16Func dev16_xmm; |
140 |
dev16Func dev16_3dne; |
dev16Func dev16_3dne; |
141 |
dev16Func dev16_sse2; |
dev16Func dev16_sse2; |
142 |
|
dev16Func dev16_sse3; |
143 |
sad16vFunc sad16v_xmm; |
sad16vFunc sad16v_xmm; |
144 |
sad16vFunc sad16v_mmx; |
sad16vFunc sad16v_mmx; |
145 |
#endif |
#endif |
152 |
dev16Func dev16_altivec_c; |
dev16Func dev16_altivec_c; |
153 |
#endif |
#endif |
154 |
|
|
|
#ifdef ARCH_IS_X86_64 |
|
|
dev16Func dev16_x86_64; |
|
|
sad16vFunc sad16v_x86_64; |
|
|
#endif |
|
|
|
|
155 |
/* This function assumes blocks use 16bit signed elements */ |
/* This function assumes blocks use 16bit signed elements */ |
156 |
typedef uint32_t (sse8Func_16bit)(const int16_t * cur, |
typedef uint32_t (sse8Func_16bit)(const int16_t * cur, |
157 |
const int16_t * ref, |
const int16_t * ref, |
160 |
extern sse8Func_16bitPtr sse8_16bit; |
extern sse8Func_16bitPtr sse8_16bit; |
161 |
|
|
162 |
sse8Func_16bit sse8_16bit_c; |
sse8Func_16bit sse8_16bit_c; |
163 |
#ifdef ARCH_IS_IA32 |
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
164 |
sse8Func_16bit sse8_16bit_mmx; |
sse8Func_16bit sse8_16bit_mmx; |
165 |
#endif |
#endif |
166 |
|
|
168 |
sse8Func_16bit sse8_16bit_altivec_c; |
sse8Func_16bit sse8_16bit_altivec_c; |
169 |
#endif |
#endif |
170 |
|
|
|
#ifdef ARCH_IS_X86_64 |
|
|
sse8Func_16bit sse8_16bit_x86_64; |
|
|
#endif |
|
|
|
|
171 |
/* This function assumes blocks use 8bit *un*signed elements */ |
/* This function assumes blocks use 8bit *un*signed elements */ |
172 |
typedef uint32_t (sse8Func_8bit)(const uint8_t * cur, |
typedef uint32_t (sse8Func_8bit)(const uint8_t * cur, |
173 |
const uint8_t * ref, |
const uint8_t * ref, |
177 |
|
|
178 |
sse8Func_8bit sse8_8bit_c; |
sse8Func_8bit sse8_8bit_c; |
179 |
|
|
180 |
#ifdef ARCH_IS_IA32 |
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
181 |
sse8Func_8bit sse8_8bit_mmx; |
sse8Func_8bit sse8_8bit_mmx; |
182 |
#endif |
#endif |
183 |
|
|
184 |
#ifdef ARCH_IS_X86_64 |
typedef uint32_t (sseh8Func_16bit)(const int16_t * cur, |
185 |
sse8Func_8bit sse8_8bit_x86_64; |
const int16_t * ref, |
186 |
#endif |
uint16_t mask); |
187 |
|
typedef sseh8Func_16bit *sseh8Func_16bitPtr; |
188 |
|
extern sseh8Func_16bitPtr sseh8_16bit; |
189 |
|
|
190 |
|
sseh8Func_16bit sseh8_16bit_c; |
191 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
192 |
|
sseh8Func_16bit sseh8_16bit_sse2; |
193 |
|
#endif |
194 |
|
|
195 |
|
typedef uint32_t (coeff8_energyFunc)(const int16_t * cur); |
196 |
|
typedef coeff8_energyFunc *coeff8_energyFunc_Ptr; |
197 |
|
extern coeff8_energyFunc_Ptr coeff8_energy; |
198 |
|
|
199 |
|
coeff8_energyFunc coeff8_energy_c; |
200 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
201 |
|
coeff8_energyFunc coeff8_energy_sse2; |
202 |
|
#endif |
203 |
|
|
204 |
|
typedef uint32_t (blocksum8Func)(const int8_t * cur, int stride, |
205 |
|
uint16_t sums[4], uint32_t squares[4]); |
206 |
|
typedef blocksum8Func *blocksum8Func_Ptr; |
207 |
|
extern blocksum8Func_Ptr blocksum8; |
208 |
|
|
209 |
|
blocksum8Func blocksum8_c; |
210 |
|
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
211 |
|
blocksum8Func blocksum8_sse2; |
212 |
|
#endif |
213 |
|
|
214 |
|
/* Coeffs for MSE_H calculation */ |
215 |
|
|
216 |
|
static const int16_t Inv_iMask_Coeff[64] = { |
217 |
|
0, 155, 128, 328, 737, 2048, 3329, 4763, |
218 |
|
184, 184, 251, 462, 865, 4306, 4608, 3872, |
219 |
|
251, 216, 328, 737, 2048, 4159, 6094, 4014, |
220 |
|
251, 370, 620, 1076, 3329, 9688, 8192, 4920, |
221 |
|
415, 620, 1752, 4014, 5919, 15207, 13579, 7589, |
222 |
|
737, 1568, 3872, 5243, 8398, 13844, 16345, 10834, |
223 |
|
3073, 5243, 7787, 9688, 13579, 18741, 18433, 13057, |
224 |
|
6636, 10834, 11552, 12294, 16056, 12800, 13579, 12545 |
225 |
|
}; |
226 |
|
|
227 |
|
static const uint16_t iCSF_Coeff[64] = { |
228 |
|
26353, 38331, 42164, 26353, 17568, 10541, 8268, 6912, |
229 |
|
35137, 35137, 30117, 22192, 16217, 7270, 7027, 7666, |
230 |
|
30117, 32434, 26353, 17568, 10541, 7397, 6111, 7529, |
231 |
|
30117, 24803, 19166, 14539, 8268, 4846, 5271, 6801, |
232 |
|
23425, 19166, 11396, 7529, 6201, 3868, 4094, 5476, |
233 |
|
17568, 12047, 7666, 6588, 5205, 4054, 3731, 4583, |
234 |
|
8605, 6588, 5406, 4846, 4094, 3485, 3514, 4175, |
235 |
|
5856, 4583, 4438, 4302, 3765, 4216, 4094, 4259 |
236 |
|
}; |
237 |
|
|
238 |
|
static const uint16_t iCSF_Round[64] = { |
239 |
|
1, 1, 1, 1, 2, 3, 4, 5, |
240 |
|
1, 1, 1, 1, 2, 5, 5, 4, |
241 |
|
1, 1, 1, 2, 3, 4, 5, 4, |
242 |
|
1, 1, 2, 2, 4, 7, 6, 5, |
243 |
|
1, 2, 3, 4, 5, 8, 8, 6, |
244 |
|
2, 3, 4, 5, 6, 8, 9, 7, |
245 |
|
4, 5, 6, 7, 8, 9, 9, 8, |
246 |
|
6, 7, 7, 8, 9, 8, 8, 8 |
247 |
|
}; |
248 |
|
|
249 |
#endif /* _ENCODER_SAD_H_ */ |
#endif /* _ENCODER_SAD_H_ */ |