Parent Directory
|
Revision Log
Revision 1.27.2.1 - (view) (download)
1 : | edgomez | 1.11 | /***************************************************************************** |
2 : | Isibaar | 1.1 | * |
3 : | edgomez | 1.11 | * XVID MPEG-4 VIDEO CODEC |
4 : | * - Unit tests and benches - | ||
5 : | Isibaar | 1.1 | * |
6 : | edgomez | 1.11 | * Copyright(C) 2002 Pascal Massimino <skal@planet-d.net> |
7 : | Isibaar | 1.1 | * |
8 : | edgomez | 1.11 | * This program is free software; you can redistribute it and/or modify |
9 : | * it under the terms of the GNU General Public License as published by | ||
10 : | * the Free Software Foundation; either version 2 of the License, or | ||
11 : | * (at your option) any later version. | ||
12 : | Isibaar | 1.1 | * |
13 : | edgomez | 1.11 | * This program is distributed in the hope that it will be useful, |
14 : | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | * GNU General Public License for more details. | ||
17 : | Isibaar | 1.1 | * |
18 : | edgomez | 1.11 | * You should have received a copy of the GNU General Public License |
19 : | * along with this program; if not, write to the Free Software | ||
20 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | * | ||
22 : | Isibaar | 1.27.2.1 | * $Id: xvid_bench.c,v 1.27 2005/11/03 05:44:07 Skal Exp $ |
23 : | edgomez | 1.11 | * |
24 : | ****************************************************************************/ | ||
25 : | Isibaar | 1.1 | |
26 : | edgomez | 1.11 | /***************************************************************************** |
27 : | Isibaar | 1.1 | * |
28 : | * 'Reference' output is at the end of file. | ||
29 : | * | ||
30 : | edgomez | 1.11 | * compiles with something like: |
31 : | * gcc -o xvid_bench xvid_bench.c -I../src/ -lxvidcore -lm | ||
32 : | Isibaar | 1.1 | * |
33 : | edgomez | 1.11 | ****************************************************************************/ |
34 : | Isibaar | 1.1 | |
35 : | #include <stdio.h> | ||
36 : | #include <stdlib.h> | ||
37 : | edgomez | 1.11 | #include <string.h> /* for memset */ |
38 : | Isibaar | 1.1 | #include <assert.h> |
39 : | |||
40 : | suxen_drol | 1.9 | #ifndef WIN32 |
41 : | edgomez | 1.11 | #include <sys/time.h> /* for gettimeofday */ |
42 : | suxen_drol | 1.9 | #else |
43 : | #include <time.h> | ||
44 : | #endif | ||
45 : | |||
46 : | |||
47 : | Isibaar | 1.1 | #include "xvid.h" |
48 : | |||
49 : | edgomez | 1.8 | // inner guts |
50 : | Isibaar | 1.1 | #include "dct/idct.h" |
51 : | #include "dct/fdct.h" | ||
52 : | #include "image/colorspace.h" | ||
53 : | #include "image/interpolate8x8.h" | ||
54 : | #include "utils/mem_transfer.h" | ||
55 : | edgomez | 1.11 | #include "quant/quant.h" |
56 : | Isibaar | 1.1 | #include "motion/sad.h" |
57 : | #include "utils/emms.h" | ||
58 : | #include "utils/timer.h" | ||
59 : | #include "quant/quant_matrix.c" | ||
60 : | #include "bitstream/cbp.h" | ||
61 : | Isibaar | 1.27.2.1 | #include "bitstream/bitstream.h" |
62 : | Isibaar | 1.1 | |
63 : | Isibaar | 1.3 | #include <math.h> |
64 : | suxen_drol | 1.9 | |
65 : | #ifndef M_PI | ||
66 : | #define M_PI 3.14159265358979323846 | ||
67 : | #endif | ||
68 : | |||
69 : | Skal | 1.17 | int speed_ref = 100; /* on slow machines, decrease this value */ |
70 : | int verbose = 0; | ||
71 : | unsigned int cpu_mask; | ||
72 : | Isibaar | 1.1 | |
73 : | /********************************************************************* | ||
74 : | * misc | ||
75 : | *********************************************************************/ | ||
76 : | |||
77 : | edgomez | 1.11 | /* returns time in micro-s*/ |
78 : | Isibaar | 1.1 | double gettime_usec() |
79 : | { | ||
80 : | suxen_drol | 1.9 | #ifndef WIN32 |
81 : | edgomez | 1.11 | struct timeval tv; |
82 : | gettimeofday(&tv, 0); | ||
83 : | return tv.tv_sec*1.0e6 + tv.tv_usec; | ||
84 : | suxen_drol | 1.9 | #else |
85 : | clock_t clk; | ||
86 : | clk = clock(); | ||
87 : | Skal | 1.16 | return clk * 1000. / CLOCKS_PER_SEC; /* clock() returns time in Milliseconds */ |
88 : | suxen_drol | 1.9 | #endif |
89 : | Isibaar | 1.1 | } |
90 : | |||
91 : | edgomez | 1.11 | /* returns squared deviates (mean(v*v)-mean(v)^2) of a 8x8 block */ |
92 : | Isibaar | 1.1 | double sqr_dev(uint8_t v[8*8]) |
93 : | { | ||
94 : | edgomez | 1.11 | double sum=0.; |
95 : | double sum2=0.; | ||
96 : | int n; | ||
97 : | for (n=0;n<8*8;n++) | ||
98 : | { | ||
99 : | sum += v[n]; | ||
100 : | sum2 += v[n]*v[n]; | ||
101 : | } | ||
102 : | sum2 /= n; | ||
103 : | sum /= n; | ||
104 : | return sum2-sum*sum; | ||
105 : | Isibaar | 1.1 | } |
106 : | |||
107 : | /********************************************************************* | ||
108 : | * cpu init | ||
109 : | *********************************************************************/ | ||
110 : | |||
111 : | typedef struct { | ||
112 : | edgomez | 1.11 | const char *name; |
113 : | unsigned int cpu; | ||
114 : | Isibaar | 1.1 | } CPU; |
115 : | |||
116 : | Skal | 1.22 | CPU cpu_list[] = { |
117 : | { "PLAINC ", 0 }, | ||
118 : | edgomez | 1.11 | #ifdef ARCH_IS_IA32 |
119 : | Skal | 1.22 | { "MMX ", XVID_CPU_MMX }, |
120 : | { "MMXEXT ", XVID_CPU_MMXEXT | XVID_CPU_MMX }, | ||
121 : | { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX }, | ||
122 : | { "3DNOW ", XVID_CPU_3DNOW }, | ||
123 : | { "3DNOWE ", XVID_CPU_3DNOW | XVID_CPU_3DNOWEXT }, | ||
124 : | edgomez | 1.11 | #endif |
125 : | edgomez | 1.13 | #ifdef ARCH_IS_PPC |
126 : | Skal | 1.22 | { "ALTIVEC", XVID_CPU_ALTIVEC }, |
127 : | edgomez | 1.13 | #endif |
128 : | edgomez | 1.15 | #ifdef ARCH_IS_X86_64 |
129 : | Skal | 1.22 | { "X86_64 ", XVID_CPU_ASM}, |
130 : | edgomez | 1.15 | #endif |
131 : | Skal | 1.22 | #ifdef ARCH_IS_IA64 |
132 : | // { "IA64 ", XVID_CPU_IA64 }, | ||
133 : | edgomez | 1.11 | #endif |
134 : | Skal | 1.22 | // { "TSC ", XVID_CPU_TSC }, |
135 : | { 0, 0 } | ||
136 : | }; | ||
137 : | Isibaar | 1.1 | |
138 : | |||
139 : | int init_cpu(CPU *cpu) | ||
140 : | { | ||
141 : | edgomez | 1.11 | xvid_gbl_info_t xinfo; |
142 : | |||
143 : | /* Get the available CPU flags */ | ||
144 : | memset(&xinfo, 0, sizeof(xinfo)); | ||
145 : | xinfo.version = XVID_VERSION; | ||
146 : | xvid_global(NULL, XVID_GBL_INFO, &xinfo, NULL); | ||
147 : | |||
148 : | /* Are we trying to test a subset of the host CPU features */ | ||
149 : | if ((xinfo.cpu_flags & cpu->cpu) == cpu->cpu) { | ||
150 : | int xerr; | ||
151 : | xvid_gbl_init_t xinit; | ||
152 : | memset(&xinit, 0, sizeof(xinit)); | ||
153 : | xinit.cpu_flags = cpu->cpu | XVID_CPU_FORCE; | ||
154 : | xinit.version = XVID_VERSION; | ||
155 : | xerr = xvid_global(NULL, XVID_GBL_INIT, &xinit, NULL); | ||
156 : | if (xerr==XVID_ERR_FAIL) { | ||
157 : | /* libxvidcore failed to init */ | ||
158 : | return 0; | ||
159 : | } | ||
160 : | } else { | ||
161 : | /* The host CPU doesn't support some required feature for this test */ | ||
162 : | return(0); | ||
163 : | } | ||
164 : | return 1; | ||
165 : | } | ||
166 : | |||
167 : | #define CRC32_REMAINDER 0xCBF43926 | ||
168 : | #define CRC32_INITIAL 0xffffffff | ||
169 : | |||
170 : | #define DO1(c, crc) ((crc) = crc32tab[((unsigned int)((crc)>>24) ^ (*c++)) & 0xff] ^ ((crc) << 8)) | ||
171 : | #define DO2(c, crc) DO1(c, crc); DO1(c, crc); | ||
172 : | #define DO4(c, crc) DO2(c, crc); DO2(c, crc); | ||
173 : | #define DO8(c, crc) DO4(c, crc); DO4(c, crc); | ||
174 : | |||
175 : | /****************************************************************************** | ||
176 : | * Precomputed AAL5 CRC32 lookup table | ||
177 : | ******************************************************************************/ | ||
178 : | |||
179 : | static unsigned long crc32tab[256] = { | ||
180 : | |||
181 : | 0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L, | ||
182 : | 0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L, | ||
183 : | 0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L, | ||
184 : | 0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL, | ||
185 : | 0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L, | ||
186 : | 0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L, | ||
187 : | 0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L, | ||
188 : | 0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL, | ||
189 : | 0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L, | ||
190 : | 0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L, | ||
191 : | 0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L, | ||
192 : | 0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL, | ||
193 : | 0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L, | ||
194 : | 0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L, | ||
195 : | 0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L, | ||
196 : | 0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL, | ||
197 : | 0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL, | ||
198 : | 0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L, | ||
199 : | 0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L, | ||
200 : | 0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL, | ||
201 : | 0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL, | ||
202 : | 0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L, | ||
203 : | 0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L, | ||
204 : | 0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL, | ||
205 : | 0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL, | ||
206 : | 0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L, | ||
207 : | 0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L, | ||
208 : | 0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL, | ||
209 : | 0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL, | ||
210 : | 0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L, | ||
211 : | 0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L, | ||
212 : | 0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL, | ||
213 : | 0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L, | ||
214 : | 0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL, | ||
215 : | 0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL, | ||
216 : | 0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L, | ||
217 : | 0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L, | ||
218 : | 0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL, | ||
219 : | 0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL, | ||
220 : | 0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L, | ||
221 : | 0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L, | ||
222 : | 0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL, | ||
223 : | 0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL, | ||
224 : | 0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L, | ||
225 : | 0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L, | ||
226 : | 0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL, | ||
227 : | 0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL, | ||
228 : | 0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L, | ||
229 : | 0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L, | ||
230 : | 0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL, | ||
231 : | 0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L, | ||
232 : | 0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L, | ||
233 : | 0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L, | ||
234 : | 0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL, | ||
235 : | 0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L, | ||
236 : | 0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L, | ||
237 : | 0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L, | ||
238 : | 0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL, | ||
239 : | 0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L, | ||
240 : | 0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L, | ||
241 : | 0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L, | ||
242 : | 0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL, | ||
243 : | 0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L, | ||
244 : | 0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L | ||
245 : | |||
246 : | }; | ||
247 : | |||
248 : | uint32_t | ||
249 : | Skal | 1.17 | calc_crc(uint8_t *mem, int len, uint32_t crc) |
250 : | edgomez | 1.11 | { |
251 : | while( len >= 8) { | ||
252 : | DO8(mem, crc); | ||
253 : | len -= 8; | ||
254 : | } | ||
255 : | |||
256 : | while( len ) { | ||
257 : | DO1(mem, crc); | ||
258 : | len--; | ||
259 : | } | ||
260 : | |||
261 : | Skal | 1.17 | return crc; |
262 : | Isibaar | 1.1 | } |
263 : | |||
264 : | Skal | 1.22 | void byte_swap(uint8_t *mem, int len, int element_size) { |
265 : | #ifdef ARCH_IS_BIG_ENDIAN | ||
266 : | int i; | ||
267 : | |||
268 : | if(element_size == 1) { | ||
269 : | /* No need to swap */ | ||
270 : | } else if(element_size == 2) { | ||
271 : | uint8_t temp[2]; | ||
272 : | |||
273 : | for(i=0; i < (len/2); i++ ) { | ||
274 : | temp[0] = mem[0]; | ||
275 : | temp[1] = mem[1]; | ||
276 : | mem[0] = temp[1]; | ||
277 : | mem[1] = temp[0]; | ||
278 : | |||
279 : | mem += 2; | ||
280 : | } | ||
281 : | } else if(element_size == 4) { | ||
282 : | uint8_t temp[4]; | ||
283 : | |||
284 : | for(i=0; i < (len/4); i++ ) { | ||
285 : | temp[0] = mem[0]; | ||
286 : | temp[1] = mem[1]; | ||
287 : | temp[2] = mem[2]; | ||
288 : | temp[3] = mem[3]; | ||
289 : | mem[0] = temp[3]; | ||
290 : | mem[1] = temp[2]; | ||
291 : | mem[2] = temp[1]; | ||
292 : | mem[3] = temp[0]; | ||
293 : | |||
294 : | mem += 4; | ||
295 : | } | ||
296 : | } else { | ||
297 : | printf("ERROR: byte_swap unsupported element_size(%u)\n", element_size); | ||
298 : | } | ||
299 : | #endif | ||
300 : | } | ||
301 : | |||
302 : | Isibaar | 1.1 | /********************************************************************* |
303 : | * test DCT | ||
304 : | *********************************************************************/ | ||
305 : | |||
306 : | #define ABS(X) ((X)<0 ? -(X) : (X)) | ||
307 : | |||
308 : | void test_dct() | ||
309 : | { | ||
310 : | edgomez | 1.11 | const int nb_tests = 300*speed_ref; |
311 : | int tst; | ||
312 : | CPU *cpu; | ||
313 : | int i; | ||
314 : | DECLARE_ALIGNED_MATRIX(iDst0, 8, 8, short, 16); | ||
315 : | DECLARE_ALIGNED_MATRIX(iDst, 8, 8, short, 16); | ||
316 : | DECLARE_ALIGNED_MATRIX(fDst, 8, 8, short, 16); | ||
317 : | double overhead; | ||
318 : | |||
319 : | printf( "\n ===== test fdct/idct =====\n" ); | ||
320 : | |||
321 : | for(i=0; i<8*8; ++i) iDst0[i] = (i*7-i*i) & 0x7f; | ||
322 : | overhead = gettime_usec(); | ||
323 : | for(tst=0; tst<nb_tests; ++tst) | ||
324 : | { | ||
325 : | for(i=0; i<8*8; ++i) fDst[i] = iDst0[i]; | ||
326 : | for(i=0; i<8*8; ++i) iDst[i] = fDst[i]; | ||
327 : | } | ||
328 : | overhead = gettime_usec() - overhead; | ||
329 : | |||
330 : | for(cpu = cpu_list; cpu->name!=0; ++cpu) | ||
331 : | { | ||
332 : | double t, PSNR, MSE; | ||
333 : | |||
334 : | if (!init_cpu(cpu)) | ||
335 : | continue; | ||
336 : | |||
337 : | t = gettime_usec(); | ||
338 : | emms(); | ||
339 : | for(tst=0; tst<nb_tests; ++tst) | ||
340 : | { | ||
341 : | for(i=0; i<8*8; ++i) fDst[i] = iDst0[i]; | ||
342 : | fdct(fDst); | ||
343 : | for(i=0; i<8*8; ++i) iDst[i] = fDst[i]; | ||
344 : | idct(iDst); | ||
345 : | } | ||
346 : | emms(); | ||
347 : | t = (gettime_usec() - t - overhead) / nb_tests; | ||
348 : | MSE = 0.; | ||
349 : | for(i=0; i<8*8; ++i) { | ||
350 : | double delta = 1.0*(iDst[i] - iDst0[i]); | ||
351 : | MSE += delta*delta; | ||
352 : | } | ||
353 : | PSNR = (MSE==0.) ? 1.e6 : -4.3429448*log( MSE/64. ); | ||
354 : | printf( "%s - %.3f usec PSNR=%.3f MSE=%.3f %s\n", | ||
355 : | cpu->name, t, PSNR, MSE, | ||
356 : | (ABS(MSE)>=64)? "| ERROR" :""); | ||
357 : | } | ||
358 : | Isibaar | 1.1 | } |
359 : | |||
360 : | /********************************************************************* | ||
361 : | * test SAD | ||
362 : | *********************************************************************/ | ||
363 : | |||
364 : | void test_sad() | ||
365 : | { | ||
366 : | edgomez | 1.11 | const int nb_tests = 2000*speed_ref; |
367 : | int tst; | ||
368 : | CPU *cpu; | ||
369 : | int i; | ||
370 : | DECLARE_ALIGNED_MATRIX(Cur, 16, 16, uint8_t, 16); | ||
371 : | DECLARE_ALIGNED_MATRIX(Ref1, 16, 16, uint8_t, 16); | ||
372 : | DECLARE_ALIGNED_MATRIX(Ref2, 16, 16, uint8_t, 16); | ||
373 : | |||
374 : | printf( "\n ====== test SAD ======\n" ); | ||
375 : | for(i=0; i<16*16;++i) { | ||
376 : | Cur[i] = (i/5) ^ 0x05; | ||
377 : | Ref1[i] = (i + 0x0b) & 0xff; | ||
378 : | Ref2[i] = i ^ 0x76; | ||
379 : | } | ||
380 : | Isibaar | 1.1 | |
381 : | edgomez | 1.11 | for(cpu = cpu_list; cpu->name!=0; ++cpu) |
382 : | { | ||
383 : | double t; | ||
384 : | uint32_t s; | ||
385 : | if (!init_cpu(cpu)) | ||
386 : | continue; | ||
387 : | |||
388 : | t = gettime_usec(); | ||
389 : | emms(); | ||
390 : | for(tst=0; tst<nb_tests; ++tst) s = sad8(Cur, Ref1, 16); | ||
391 : | emms(); | ||
392 : | t = (gettime_usec() - t) / nb_tests; | ||
393 : | printf("%s - sad8 %.3f usec sad=%d %s\n", | ||
394 : | cpu->name, t, s, | ||
395 : | (s!=3776)?"| ERROR": "" ); | ||
396 : | |||
397 : | t = gettime_usec(); | ||
398 : | emms(); | ||
399 : | for(tst=0; tst<nb_tests; ++tst) s = sad16(Cur, Ref1, 16, -1); | ||
400 : | emms(); | ||
401 : | t = (gettime_usec() - t) / nb_tests; | ||
402 : | printf("%s - sad16 %.3f usec sad=%d %s\n", | ||
403 : | cpu->name, t, s, | ||
404 : | (s!=27214)?"| ERROR": "" ); | ||
405 : | |||
406 : | t = gettime_usec(); | ||
407 : | emms(); | ||
408 : | for(tst=0; tst<nb_tests; ++tst) s = sad16bi(Cur, Ref1, Ref2, 16); | ||
409 : | emms(); | ||
410 : | t = (gettime_usec() - t) / nb_tests; | ||
411 : | printf( "%s - sad16bi %.3f usec sad=%d %s\n", | ||
412 : | cpu->name, t, s, | ||
413 : | (s!=26274)?"| ERROR": "" ); | ||
414 : | |||
415 : | t = gettime_usec(); | ||
416 : | emms(); | ||
417 : | for(tst=0; tst<nb_tests; ++tst) s = dev16(Cur, 16); | ||
418 : | emms(); | ||
419 : | t = (gettime_usec() - t) / nb_tests; | ||
420 : | printf( "%s - dev16 %.3f usec sad=%d %s\n", | ||
421 : | cpu->name, t, s, | ||
422 : | (s!=3344)?"| ERROR": "" ); | ||
423 : | |||
424 : | printf( " --- \n" ); | ||
425 : | } | ||
426 : | Isibaar | 1.1 | } |
427 : | |||
428 : | /********************************************************************* | ||
429 : | * test interpolation | ||
430 : | *********************************************************************/ | ||
431 : | |||
432 : | #define ENTER \ | ||
433 : | edgomez | 1.11 | for(i=0; i<16*8; ++i) Dst[i] = 0; \ |
434 : | t = gettime_usec(); \ | ||
435 : | emms(); | ||
436 : | Isibaar | 1.1 | |
437 : | #define LEAVE \ | ||
438 : | edgomez | 1.11 | emms(); \ |
439 : | t = (gettime_usec() - t) / nb_tests; \ | ||
440 : | iCrc = calc_crc((uint8_t*)Dst, sizeof(Dst), CRC32_INITIAL) | ||
441 : | Isibaar | 1.1 | |
442 : | #define TEST_MB(FUNC, R) \ | ||
443 : | edgomez | 1.11 | ENTER \ |
444 : | for(tst=0; tst<nb_tests; ++tst) (FUNC)(Dst, Src0, 16, (R)); \ | ||
445 : | LEAVE | ||
446 : | Isibaar | 1.1 | |
447 : | #define TEST_MB2(FUNC) \ | ||
448 : | edgomez | 1.11 | ENTER \ |
449 : | for(tst=0; tst<nb_tests; ++tst) (FUNC)(Dst, Src0, 16); \ | ||
450 : | LEAVE | ||
451 : | Isibaar | 1.1 | |
452 : | |||
453 : | void test_mb() | ||
454 : | { | ||
455 : | edgomez | 1.11 | const int nb_tests = 2000*speed_ref; |
456 : | CPU *cpu; | ||
457 : | const uint8_t Src0[16*9] = { | ||
458 : | /* try to have every possible combinaison of rounding... */ | ||
459 : | 0, 0, 1, 0, 2, 0, 3, 0, 4 ,0,0,0, 0,0,0,0, | ||
460 : | 0, 1, 1, 1, 2, 1, 3, 1, 3 ,0,0,0, 0,0,0,0, | ||
461 : | 0, 2, 1, 2, 2, 2, 3, 2, 2 ,0,0,0, 0,0,0,0, | ||
462 : | 0, 3, 1, 3, 2, 3, 3, 3, 1 ,0,0,0, 0,0,0,0, | ||
463 : | 1, 3, 0, 2, 1, 0, 2, 3, 4 ,0,0,0, 0,0,0,0, | ||
464 : | 2, 2, 1, 2, 0, 1, 3, 5, 3 ,0,0,0, 0,0,0,0, | ||
465 : | 3, 1, 2, 3, 1, 2, 2, 6, 2 ,0,0,0, 0,0,0,0, | ||
466 : | 1, 0, 1, 3, 0, 3, 1, 6, 1 ,0,0,0, 0,0,0,0, | ||
467 : | 4, 3, 2, 1, 2, 3, 4, 0, 3 ,0,0,0, 0,0,0,0 | ||
468 : | }; | ||
469 : | uint8_t Dst[16*8] = {0}; | ||
470 : | |||
471 : | printf( "\n === test block motion ===\n" ); | ||
472 : | |||
473 : | for(cpu = cpu_list; cpu->name!=0; ++cpu) | ||
474 : | { | ||
475 : | double t; | ||
476 : | int tst, i, iCrc; | ||
477 : | |||
478 : | if (!init_cpu(cpu)) | ||
479 : | continue; | ||
480 : | |||
481 : | TEST_MB(interpolate8x8_halfpel_h, 0); | ||
482 : | printf("%s - interp- h-round0 %.3f usec crc32=0x%08x %s\n", | ||
483 : | cpu->name, t, iCrc, | ||
484 : | (iCrc!=0x115381ba)?"| ERROR": "" ); | ||
485 : | |||
486 : | TEST_MB(interpolate8x8_halfpel_h, 1); | ||
487 : | printf("%s - round1 %.3f usec crc32=0x%08x %s\n", | ||
488 : | cpu->name, t, iCrc, | ||
489 : | (iCrc!=0x2b1f528f)?"| ERROR": "" ); | ||
490 : | |||
491 : | |||
492 : | TEST_MB(interpolate8x8_halfpel_v, 0); | ||
493 : | printf("%s - interp- v-round0 %.3f usec crc32=0x%08x %s\n", | ||
494 : | cpu->name, t, iCrc, | ||
495 : | (iCrc!=0x423cdcc7)?"| ERROR": "" ); | ||
496 : | |||
497 : | TEST_MB(interpolate8x8_halfpel_v, 1); | ||
498 : | printf("%s - round1 %.3f usec crc32=0x%08x %s\n", | ||
499 : | cpu->name, t, iCrc, | ||
500 : | (iCrc!=0x42202efe)?"| ERROR": "" ); | ||
501 : | |||
502 : | |||
503 : | TEST_MB(interpolate8x8_halfpel_hv, 0); | ||
504 : | printf("%s - interp-hv-round0 %.3f usec crc32=0x%08x %s\n", | ||
505 : | cpu->name, t, iCrc, | ||
506 : | (iCrc!=0xd198d387)?"| ERROR": "" ); | ||
507 : | |||
508 : | TEST_MB(interpolate8x8_halfpel_hv, 1); | ||
509 : | printf("%s - round1 %.3f usec crc32=0x%08x %s\n", | ||
510 : | cpu->name, t, iCrc, | ||
511 : | (iCrc!=0x9ecfd921)?"| ERROR": "" ); | ||
512 : | Isibaar | 1.1 | |
513 : | Isibaar | 1.3 | |
514 : | edgomez | 1.11 | /* this is a new function, as of 06.06.2002 */ |
515 : | Isibaar | 1.3 | #if 0 |
516 : | edgomez | 1.11 | TEST_MB2(interpolate8x8_avrg); |
517 : | printf("%s - interpolate8x8_c %.3f usec crc32=0x%08x %s\n", | ||
518 : | cpu->name, t, iCrc, | ||
519 : | (iCrc!=8107)?"| ERROR": "" ); | ||
520 : | Isibaar | 1.3 | #endif |
521 : | |||
522 : | Skal | 1.26 | /* New functions for field prediction by CK 1.10.2005 */ |
523 : | #pragma NEW8X4 | ||
524 : | TEST_MB(interpolate8x4_halfpel_h, 0); | ||
525 : | printf("%s - interpfield-h -round0 %.3f usec crc32=0x%08x %s\n", | ||
526 : | cpu->name, t, iCrc, | ||
527 : | (iCrc!=0x9538d6df)?"| ERROR": "" ); | ||
528 : | |||
529 : | TEST_MB(interpolate8x4_halfpel_h, 1); | ||
530 : | printf("%s - round1 %.3f usec crc32=0x%08x %s\n", | ||
531 : | cpu->name, t, iCrc, | ||
532 : | (iCrc!=0xde5f1db4)?"| ERROR": "" ); | ||
533 : | |||
534 : | |||
535 : | TEST_MB(interpolate8x4_halfpel_v, 0); | ||
536 : | printf("%s - interpfield- v-round0 %.3f usec crc32=0x%08x %s\n", | ||
537 : | cpu->name, t, iCrc, | ||
538 : | (iCrc!=0xea5a69ef)?"| ERROR": "" ); | ||
539 : | |||
540 : | TEST_MB(interpolate8x4_halfpel_v, 1); | ||
541 : | printf("%s - round1 %.3f usec crc32=0x%08x %s\n", | ||
542 : | cpu->name, t, iCrc, | ||
543 : | (iCrc!=0x4f10ec0f)?"| ERROR": "" ); | ||
544 : | |||
545 : | |||
546 : | TEST_MB(interpolate8x4_halfpel_hv, 0); | ||
547 : | printf("%s - interpfield-hv-round0 %.3f usec crc32=0x%08x %s\n", | ||
548 : | cpu->name, t, iCrc, | ||
549 : | (iCrc!=0xf97ee367)?"| ERROR": "" ); | ||
550 : | |||
551 : | TEST_MB(interpolate8x4_halfpel_hv, 1); | ||
552 : | printf("%s - round1 %.3f usec crc32=0x%08x %s\n", | ||
553 : | cpu->name, t, iCrc, | ||
554 : | (iCrc!=0xb6a9f581)?"| ERROR": "" ); | ||
555 : | /* End of 8x4 functions */ | ||
556 : | |||
557 : | edgomez | 1.11 | printf( " --- \n" ); |
558 : | } | ||
559 : | Isibaar | 1.1 | } |
560 : | |||
561 : | /********************************************************************* | ||
562 : | * test transfer | ||
563 : | *********************************************************************/ | ||
564 : | |||
565 : | #define INIT_TRANSFER \ | ||
566 : | edgomez | 1.11 | for(i=0; i<8*32; ++i) { \ |
567 : | Src8[i] = i; Src16[i] = i; \ | ||
568 : | Dst8[i] = 0; Dst16[i] = 0; \ | ||
569 : | Ref1[i] = i^0x27; \ | ||
570 : | Ref2[i] = i^0x51; \ | ||
571 : | } | ||
572 : | Isibaar | 1.1 | |
573 : | #define TEST_TRANSFER_BEGIN(DST) \ | ||
574 : | edgomez | 1.11 | INIT_TRANSFER \ |
575 : | overhead = -gettime_usec(); \ | ||
576 : | for(tst=0; tst<nb_tests; ++tst) { \ | ||
577 : | for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a;\ | ||
578 : | } \ | ||
579 : | overhead += gettime_usec(); \ | ||
580 : | t = gettime_usec(); \ | ||
581 : | emms(); \ | ||
582 : | for(tst=0; tst<nb_tests; ++tst) { \ | ||
583 : | for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a; | ||
584 : | Isibaar | 1.1 | |
585 : | |||
586 : | #define TEST_TRANSFER_END(DST) \ | ||
587 : | edgomez | 1.11 | } \ |
588 : | emms(); \ | ||
589 : | t = (gettime_usec()-t -overhead) / nb_tests;\ | ||
590 : | Skal | 1.22 | byte_swap((uint8_t*)(DST), 8*32*sizeof((DST)[0]), sizeof((DST)[0])); \ |
591 : | Skal | 1.21 | s = calc_crc((uint8_t*)(DST), 8*32*sizeof((DST)[0]), CRC32_INITIAL) |
592 : | Isibaar | 1.1 | |
593 : | #define TEST_TRANSFER(FUNC, DST, SRC) \ | ||
594 : | edgomez | 1.11 | TEST_TRANSFER_BEGIN(DST); \ |
595 : | (FUNC)((DST), (SRC), 32); \ | ||
596 : | TEST_TRANSFER_END(DST) | ||
597 : | Isibaar | 1.1 | |
598 : | |||
599 : | #define TEST_TRANSFER2_BEGIN(DST, SRC) \ | ||
600 : | edgomez | 1.11 | INIT_TRANSFER \ |
601 : | overhead = -gettime_usec(); \ | ||
602 : | for(tst=0; tst<nb_tests; ++tst) { \ | ||
603 : | for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a;\ | ||
604 : | for(i=0; i<8*32; ++i) (SRC)[i] = i^0x3e;\ | ||
605 : | } \ | ||
606 : | overhead += gettime_usec(); \ | ||
607 : | t = gettime_usec(); \ | ||
608 : | emms(); \ | ||
609 : | for(tst=0; tst<nb_tests; ++tst) { \ | ||
610 : | for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a;\ | ||
611 : | for(i=0; i<8*32; ++i) (SRC)[i] = i^0x3e; | ||
612 : | Isibaar | 1.1 | |
613 : | #define TEST_TRANSFER2_END(DST) \ | ||
614 : | edgomez | 1.11 | } \ |
615 : | emms(); \ | ||
616 : | t = (gettime_usec()-t -overhead) / nb_tests;\ | ||
617 : | Skal | 1.22 | byte_swap((uint8_t*)(DST), 8*32*sizeof((DST)[0]), sizeof((DST)[0])); \ |
618 : | Skal | 1.21 | s = calc_crc((uint8_t*)(DST), 8*32*sizeof((DST)[0]), CRC32_INITIAL) |
619 : | Isibaar | 1.1 | |
620 : | #define TEST_TRANSFER2(FUNC, DST, SRC, R1) \ | ||
621 : | edgomez | 1.11 | TEST_TRANSFER2_BEGIN(DST,SRC); \ |
622 : | (FUNC)((DST), (SRC), (R1), 32); \ | ||
623 : | TEST_TRANSFER2_END(DST) | ||
624 : | Isibaar | 1.1 | |
625 : | #define TEST_TRANSFER3(FUNC, DST, SRC, R1, R2)\ | ||
626 : | edgomez | 1.11 | TEST_TRANSFER_BEGIN(DST); \ |
627 : | (FUNC)((DST), (SRC), (R1), (R2), 32); \ | ||
628 : | TEST_TRANSFER_END(DST) | ||
629 : | Isibaar | 1.1 | |
630 : | void test_transfer() | ||
631 : | { | ||
632 : | edgomez | 1.11 | const int nb_tests = 4000*speed_ref; |
633 : | int i; | ||
634 : | CPU *cpu; | ||
635 : | Skal | 1.21 | // uint8_t Src8[8*32], Dst8[8*32], Ref1[8*32], Ref2[8*32]; |
636 : | // int16_t Src16[8*32], Dst16[8*32]; | ||
637 : | DECLARE_ALIGNED_MATRIX(Src8, 8, 32, uint8_t, CACHE_LINE); | ||
638 : | DECLARE_ALIGNED_MATRIX(Dst8, 8, 32, uint8_t, CACHE_LINE); | ||
639 : | DECLARE_ALIGNED_MATRIX(Ref1, 8, 32, uint8_t, CACHE_LINE); | ||
640 : | DECLARE_ALIGNED_MATRIX(Ref2, 8, 32, uint8_t, CACHE_LINE); | ||
641 : | DECLARE_ALIGNED_MATRIX(Src16, 8, 32, uint16_t, CACHE_LINE); | ||
642 : | DECLARE_ALIGNED_MATRIX(Dst16, 8, 32, uint16_t, CACHE_LINE); | ||
643 : | edgomez | 1.11 | |
644 : | printf( "\n === test transfer ===\n" ); | ||
645 : | |||
646 : | for(cpu = cpu_list; cpu->name!=0; ++cpu) | ||
647 : | { | ||
648 : | double t, overhead; | ||
649 : | int tst, s; | ||
650 : | |||
651 : | if (!init_cpu(cpu)) | ||
652 : | continue; | ||
653 : | |||
654 : | TEST_TRANSFER(transfer_8to16copy, Dst16, Src8); | ||
655 : | printf("%s - 8to16 %.3f usec crc32=0x%08x %s\n", | ||
656 : | cpu->name, t, s, | ||
657 : | (s!=0x115814bb)?"| ERROR": ""); | ||
658 : | |||
659 : | TEST_TRANSFER(transfer_16to8copy, Dst8, Src16); | ||
660 : | printf( "%s - 16to8 %.3f usec crc32=0x%08x %s\n", | ||
661 : | cpu->name, t, s, | ||
662 : | (s!=0xee7ccbb4)?"| ERROR": ""); | ||
663 : | |||
664 : | Skal | 1.26 | /* New functions for field prediction by CK 1.10.2005 */ |
665 : | #pragma NEW8X4 | ||
666 : | TEST_TRANSFER(transfer8x4_copy, Dst8, Src8); | ||
667 : | printf("%s - 8to4 %.3f usec crc32=0x%08x %s\n", | ||
668 : | cpu->name, t, s, | ||
669 : | (s!=0xbb9c3db5)?"| ERROR": ""); | ||
670 : | /* End of new functions */ | ||
671 : | |||
672 : | edgomez | 1.11 | TEST_TRANSFER(transfer8x8_copy, Dst8, Src8); |
673 : | printf("%s - 8to8 %.3f usec crc32=0x%08x %s\n", | ||
674 : | cpu->name, t, s, | ||
675 : | (s!=0xd37b3295)?"| ERROR": ""); | ||
676 : | |||
677 : | TEST_TRANSFER(transfer_16to8add, Dst8, Src16); | ||
678 : | printf("%s - 16to8add %.3f usec crc32=0x%08x %s\n", | ||
679 : | cpu->name, t, s, | ||
680 : | (s!=0xdd817bf4)?"| ERROR": "" ); | ||
681 : | |||
682 : | TEST_TRANSFER2(transfer_8to16sub, Dst16, Src8, Ref1); | ||
683 : | { | ||
684 : | int s1, s2; | ||
685 : | Skal | 1.21 | s1 = calc_crc((uint8_t*)Dst16, 8*32*sizeof(Dst16[0]), CRC32_INITIAL); |
686 : | s2 = calc_crc((uint8_t*)Src8, 8*32*sizeof(Src8[0]), CRC32_INITIAL); | ||
687 : | edgomez | 1.11 | printf("%s - 8to16sub %.3f usec crc32(1)=0x%08x crc32(2)=0x%08x %s %s\n", |
688 : | cpu->name, t, s1, s2, | ||
689 : | (s1!=0xa1e07163)?"| ERROR1": "", | ||
690 : | (s2!=0xd86c5d23)?"| ERROR2": "" ); | ||
691 : | } | ||
692 : | |||
693 : | TEST_TRANSFER3(transfer_8to16sub2, Dst16, Src8, Ref1, Ref2); | ||
694 : | printf("%s - 8to16sub2 %.3f usec crc32=0x%08x %s\n", | ||
695 : | cpu->name, t, s, | ||
696 : | (s!=0x99b6c4c7)?"| ERROR": "" ); | ||
697 : | |||
698 : | printf( " --- \n" ); | ||
699 : | } | ||
700 : | Isibaar | 1.1 | } |
701 : | |||
702 : | /********************************************************************* | ||
703 : | * test quantization | ||
704 : | *********************************************************************/ | ||
705 : | |||
706 : | Isibaar | 1.3 | #define TEST_QUANT(FUNC, DST, SRC) \ |
707 : | edgomez | 1.11 | t = gettime_usec(); \ |
708 : | for(s=CRC32_INITIAL,qm=1; qm<=255; ++qm) { \ | ||
709 : | for(i=0; i<8*8; ++i) Quant[i] = qm; \ | ||
710 : | set_inter_matrix( mpeg_quant_matrices, Quant ); \ | ||
711 : | emms(); \ | ||
712 : | for(q=1; q<=max_Q; ++q) { \ | ||
713 : | for(tst=0; tst<nb_tests; ++tst) \ | ||
714 : | (FUNC)((DST), (SRC), q, mpeg_quant_matrices); \ | ||
715 : | Skal | 1.22 | byte_swap((uint8_t*)(DST), 64*sizeof((DST)[0]), sizeof((DST)[0])); \ |
716 : | s = calc_crc((uint8_t*)(DST), 64*sizeof((DST)[0]), s); \ | ||
717 : | edgomez | 1.11 | } \ |
718 : | emms(); \ | ||
719 : | } \ | ||
720 : | t = (gettime_usec()-t-overhead)/nb_tests/qm | ||
721 : | Isibaar | 1.3 | |
722 : | #define TEST_QUANT2(FUNC, DST, SRC) \ | ||
723 : | edgomez | 1.11 | t = gettime_usec(); \ |
724 : | for(s=CRC32_INITIAL,qm=1; qm<=255; ++qm) { \ | ||
725 : | for(i=0; i<8*8; ++i) Quant[i] = qm; \ | ||
726 : | set_intra_matrix( mpeg_quant_matrices, Quant ); \ | ||
727 : | emms(); \ | ||
728 : | for(q=1; q<=max_Q; ++q) { \ | ||
729 : | for(tst=0; tst<nb_tests; ++tst) \ | ||
730 : | (FUNC)((DST), (SRC), q, q, mpeg_quant_matrices); \ | ||
731 : | Skal | 1.22 | byte_swap((uint8_t*)(DST), 64*sizeof((DST)[0]), sizeof((DST)[0])); \ |
732 : | s = calc_crc((uint8_t*)(DST), 64*sizeof((DST)[0]), s); \ | ||
733 : | edgomez | 1.11 | } \ |
734 : | emms(); \ | ||
735 : | } \ | ||
736 : | t = (gettime_usec()-t-overhead)/nb_tests/qm | ||
737 : | Isibaar | 1.1 | |
738 : | edgomez | 1.12 | #define TEST_INTRA(REFFUNC, NEWFUNC, RANGE) \ |
739 : | { int i,q,s;\ | ||
740 : | DECLARE_ALIGNED_MATRIX(Src, 8, 8, int16_t, 16); \ | ||
741 : | DECLARE_ALIGNED_MATRIX(Dst, 8, 8, int16_t, 16); \ | ||
742 : | DECLARE_ALIGNED_MATRIX(Dst2,8, 8, int16_t, 16); \ | ||
743 : | for(q=1;q<=max_Q;q++) \ | ||
744 : | for(s=-RANGE;s<RANGE;s++) { \ | ||
745 : | for(i=0;i<64;i++) Src[i]=s; \ | ||
746 : | (REFFUNC)((Dst),(Src),q,q,mpeg_quant_matrices); \ | ||
747 : | (NEWFUNC)((Dst2),(Src),q,q,mpeg_quant_matrices); \ | ||
748 : | for(i=0;i<64;i++) \ | ||
749 : | if(Dst[i]!=Dst2[i]) printf("ERROR : " #NEWFUNC " i%d quant:%d input:%d C_result:%d ASM_result:%d\n",i,q,s,Dst[i],Dst2[i]); \ | ||
750 : | } \ | ||
751 : | } | ||
752 : | |||
753 : | #define TEST_INTER(REFFUNC, NEWFUNC, RANGE) \ | ||
754 : | { int i,q,s; \ | ||
755 : | DECLARE_ALIGNED_MATRIX(Src, 8, 8, int16_t, 16); \ | ||
756 : | DECLARE_ALIGNED_MATRIX(Dst, 8, 8, int16_t, 16); \ | ||
757 : | DECLARE_ALIGNED_MATRIX(Dst2,8, 8, int16_t, 16); \ | ||
758 : | for(q=1;q<=max_Q;q++) \ | ||
759 : | for(s=-RANGE;s<RANGE;s++) { \ | ||
760 : | for(i=0;i<64;i++) Src[i]=s; \ | ||
761 : | (REFFUNC)((Dst),(Src),q,mpeg_quant_matrices); \ | ||
762 : | (NEWFUNC)((Dst2),(Src),q,mpeg_quant_matrices); \ | ||
763 : | emms(); \ | ||
764 : | for(i=0;i<64;i++) \ | ||
765 : | if(Dst[i]!=Dst2[i]) printf("ERROR : " #NEWFUNC " i%d quant:%d input:%d C_result:%d ASM_result:%d\n",i,q,s,Dst[i],Dst2[i]); \ | ||
766 : | } \ | ||
767 : | } | ||
768 : | |||
769 : | Isibaar | 1.1 | void test_quant() |
770 : | { | ||
771 : | edgomez | 1.11 | const int nb_tests = 1*speed_ref; |
772 : | const int max_Q = 31; | ||
773 : | DECLARE_ALIGNED_MATRIX(mpeg_quant_matrices, 8, 64, uint16_t, 16); | ||
774 : | |||
775 : | int i, qm; | ||
776 : | CPU *cpu; | ||
777 : | DECLARE_ALIGNED_MATRIX(Src, 8, 8, int16_t, 16); | ||
778 : | DECLARE_ALIGNED_MATRIX(Dst, 8, 8, int16_t, 16); | ||
779 : | edgomez | 1.12 | DECLARE_ALIGNED_MATRIX(Dst2,8, 8, int16_t, 16); |
780 : | edgomez | 1.11 | uint8_t Quant[8*8]; |
781 : | |||
782 : | printf( "\n ===== test quant =====\n" ); | ||
783 : | |||
784 : | /* we deliberately enfringe the norm's specified range [-127,127], */ | ||
785 : | /* to test the robustness of the iquant module */ | ||
786 : | for(i=0; i<64; ++i) { | ||
787 : | Src[i] = 1 + (i-32) * (i&6); | ||
788 : | Dst[i] = 0; | ||
789 : | } | ||
790 : | Isibaar | 1.1 | |
791 : | edgomez | 1.11 | for(cpu = cpu_list; cpu->name!=0; ++cpu) |
792 : | { | ||
793 : | double t, overhead; | ||
794 : | int tst, q; | ||
795 : | uint32_t s; | ||
796 : | |||
797 : | if (!init_cpu(cpu)) | ||
798 : | edgomez | 1.12 | continue; |
799 : | |||
800 : | // exhaustive tests to compare against the (ref) C-version | ||
801 : | TEST_INTRA(quant_h263_intra_c, quant_h263_intra, 2048); | ||
802 : | TEST_INTRA(dequant_h263_intra_c, dequant_h263_intra , 512 ); | ||
803 : | TEST_INTER(quant_h263_inter_c, quant_h263_inter , 2048); | ||
804 : | TEST_INTER(dequant_h263_inter_c, dequant_h263_inter , 512 ); | ||
805 : | edgomez | 1.11 | |
806 : | overhead = -gettime_usec(); | ||
807 : | for(s=0,qm=1; qm<=255; ++qm) { | ||
808 : | for(i=0; i<8*8; ++i) Quant[i] = qm; | ||
809 : | set_inter_matrix(mpeg_quant_matrices, Quant ); | ||
810 : | for(q=1; q<=max_Q; ++q) | ||
811 : | for(i=0; i<64; ++i) s+=Dst[i]^i^qm; | ||
812 : | } | ||
813 : | overhead += gettime_usec(); | ||
814 : | |||
815 : | TEST_QUANT2(quant_mpeg_intra, Dst, Src); | ||
816 : | printf("%s - quant_mpeg_intra %.3f usec crc32=0x%08x %s\n", | ||
817 : | cpu->name, t, s, | ||
818 : | (s!=0xfd6a21a4)? "| ERROR": ""); | ||
819 : | |||
820 : | TEST_QUANT(quant_mpeg_inter, Dst, Src); | ||
821 : | printf("%s - quant_mpeg_inter %.3f usec crc32=0x%08x %s\n", | ||
822 : | cpu->name, t, s, | ||
823 : | (s!=0xf6de7757)?"| ERROR": ""); | ||
824 : | |||
825 : | TEST_QUANT2(dequant_mpeg_intra, Dst, Src); | ||
826 : | printf("%s - dequant_mpeg_intra %.3f usec crc32=0x%08x %s\n", | ||
827 : | cpu->name, t, s, | ||
828 : | (s!=0x2def7bc7)?"| ERROR": ""); | ||
829 : | |||
830 : | TEST_QUANT(dequant_mpeg_inter, Dst, Src); | ||
831 : | printf("%s - dequant_mpeg_inter %.3f usec crc32=0x%08x %s\n", | ||
832 : | cpu->name, t, s, | ||
833 : | (s!=0xd878c722)?"| ERROR": ""); | ||
834 : | |||
835 : | TEST_QUANT2(quant_h263_intra, Dst, Src); | ||
836 : | printf("%s - quant_h263_intra %.3f usec crc32=0x%08x %s\n", | ||
837 : | cpu->name, t, s, | ||
838 : | (s!=0x2eba9d43)?"| ERROR": ""); | ||
839 : | |||
840 : | TEST_QUANT(quant_h263_inter, Dst, Src); | ||
841 : | printf("%s - quant_h263_inter %.3f usec crc32=0x%08x %s\n", | ||
842 : | cpu->name, t, s, | ||
843 : | (s!=0xbd315a7e)?"| ERROR": ""); | ||
844 : | |||
845 : | TEST_QUANT2(dequant_h263_intra, Dst, Src); | ||
846 : | printf("%s - dequant_h263_intra %.3f usec crc32=0x%08x %s\n", | ||
847 : | cpu->name, t, s, | ||
848 : | (s!=0x9841212a)?"| ERROR": ""); | ||
849 : | |||
850 : | TEST_QUANT(dequant_h263_inter, Dst, Src); | ||
851 : | printf("%s - dequant_h263_inter %.3f usec crc32=0x%08x %s\n", | ||
852 : | cpu->name, t, s, | ||
853 : | (s!=0xe7df8fba)?"| ERROR": ""); | ||
854 : | |||
855 : | printf( " --- \n" ); | ||
856 : | } | ||
857 : | Isibaar | 1.1 | } |
858 : | |||
859 : | /********************************************************************* | ||
860 : | edgomez | 1.14 | * test distortion operators |
861 : | *********************************************************************/ | ||
862 : | |||
863 : | static void ieee_reseed(long s); | ||
864 : | static long ieee_rand(int Min, int Max); | ||
865 : | |||
866 : | #define TEST_SSE(FUNCTION, SRC1, SRC2, STRIDE) \ | ||
867 : | do { \ | ||
868 : | t = gettime_usec(); \ | ||
869 : | tst = nb_tests; \ | ||
870 : | while((tst--)>0) sse = (FUNCTION)((SRC1), (SRC2), (STRIDE)); \ | ||
871 : | emms(); \ | ||
872 : | t = (gettime_usec() - t)/(double)nb_tests; \ | ||
873 : | } while(0) | ||
874 : | |||
875 : | |||
876 : | void test_sse() | ||
877 : | { | ||
878 : | const int nb_tests = 100000*speed_ref; | ||
879 : | int i; | ||
880 : | CPU *cpu; | ||
881 : | DECLARE_ALIGNED_MATRIX(Src1, 8, 8, int16_t, 16); | ||
882 : | DECLARE_ALIGNED_MATRIX(Src2, 8, 8, int16_t, 16); | ||
883 : | DECLARE_ALIGNED_MATRIX(Src3, 8, 8, int16_t, 16); | ||
884 : | DECLARE_ALIGNED_MATRIX(Src4, 8, 8, int16_t, 16); | ||
885 : | |||
886 : | printf( "\n ===== test sse =====\n" ); | ||
887 : | |||
888 : | ieee_reseed(1); | ||
889 : | for(i=0; i<64; ++i) { | ||
890 : | Src1[i] = ieee_rand(-2048, 2047); | ||
891 : | Src2[i] = ieee_rand(-2048, 2047); | ||
892 : | Src3[i] = ieee_rand(-2048, 2047); | ||
893 : | Src4[i] = ieee_rand(-2048, 2047); | ||
894 : | } | ||
895 : | |||
896 : | for(cpu = cpu_list; cpu->name!=0; ++cpu) | ||
897 : | { | ||
898 : | double t; | ||
899 : | int tst, sse; | ||
900 : | |||
901 : | if (!init_cpu(cpu)) | ||
902 : | continue; | ||
903 : | |||
904 : | /* 16 bit element blocks */ | ||
905 : | TEST_SSE(sse8_16bit, Src1, Src2, 16); | ||
906 : | printf("%s - sse8_16bit#1 %.3f usec sse=%d %s\n", | ||
907 : | cpu->name, t, sse, (sse!=182013834)?"| ERROR": ""); | ||
908 : | TEST_SSE(sse8_16bit, Src1, Src3, 16); | ||
909 : | printf("%s - sse8_16bit#2 %.3f usec sse=%d %s\n", | ||
910 : | cpu->name, t, sse, (sse!=142545203)?"| ERROR": ""); | ||
911 : | TEST_SSE(sse8_16bit, Src1, Src4, 16); | ||
912 : | printf("%s - sse8_16bit#3 %.3f usec sse=%d %s\n", | ||
913 : | cpu->name, t, sse, (sse!=146340935)?"| ERROR": ""); | ||
914 : | TEST_SSE(sse8_16bit, Src2, Src3, 16); | ||
915 : | printf("%s - sse8_16bit#4 %.3f usec sse=%d %s\n", | ||
916 : | cpu->name, t, sse, (sse!=130136661)?"| ERROR": ""); | ||
917 : | TEST_SSE(sse8_16bit, Src2, Src4, 16); | ||
918 : | printf("%s - sse8_16bit#5 %.3f usec sse=%d %s\n", | ||
919 : | cpu->name, t, sse, (sse!=136870353)?"| ERROR": ""); | ||
920 : | TEST_SSE(sse8_16bit, Src3, Src4, 16); | ||
921 : | printf("%s - sse8_16bit#6 %.3f usec sse=%d %s\n", | ||
922 : | cpu->name, t, sse, (sse!=164107772)?"| ERROR": ""); | ||
923 : | |||
924 : | /* 8 bit element blocks */ | ||
925 : | TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src2, 8); | ||
926 : | printf("%s - sse8_8bit#1 %.3f usec sse=%d %s\n", | ||
927 : | cpu->name, t, sse, (sse!=1356423)?"| ERROR": ""); | ||
928 : | TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src3, 8); | ||
929 : | printf("%s - sse8_8bit#2 %.3f usec sse=%d %s\n", | ||
930 : | cpu->name, t, sse, (sse!=1173074)?"| ERROR": ""); | ||
931 : | TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src4, 8); | ||
932 : | printf("%s - sse8_8bit#3 %.3f usec sse=%d %s\n", | ||
933 : | cpu->name, t, sse, (sse!=1092357)?"| ERROR": ""); | ||
934 : | TEST_SSE(sse8_8bit, (int8_t*)Src2, (int8_t*)Src3, 8); | ||
935 : | printf("%s - sse8_8bit#4 %.3f usec sse=%d %s\n", | ||
936 : | cpu->name, t, sse, (sse!=1360239)?"| ERROR": ""); | ||
937 : | TEST_SSE(sse8_8bit, (int8_t*)Src2, (int8_t*)Src4, 8); | ||
938 : | printf("%s - sse8_8bit#5 %.3f usec sse=%d %s\n", | ||
939 : | cpu->name, t, sse, (sse!=1208414)?"| ERROR": ""); | ||
940 : | TEST_SSE(sse8_8bit, (int8_t*)Src3, (int8_t*)Src4, 8); | ||
941 : | printf("%s - sse8_8bit#6 %.3f usec sse=%d %s\n", | ||
942 : | cpu->name, t, sse, (sse!=1099285)?"| ERROR": ""); | ||
943 : | |||
944 : | printf(" ---\n"); | ||
945 : | } | ||
946 : | } | ||
947 : | |||
948 : | /********************************************************************* | ||
949 : | Isibaar | 1.1 | * test non-zero AC counting |
950 : | *********************************************************************/ | ||
951 : | |||
952 : | Skal | 1.23 | #define TEST_CBP(FUNC, SRC, NB) \ |
953 : | edgomez | 1.11 | t = gettime_usec(); \ |
954 : | emms(); \ | ||
955 : | Skal | 1.23 | for(tst=0; tst<NB; ++tst) { \ |
956 : | edgomez | 1.11 | cbp = (FUNC)((SRC)); \ |
957 : | } \ | ||
958 : | emms(); \ | ||
959 : | t = (gettime_usec()-t ) / nb_tests; | ||
960 : | Isibaar | 1.1 | |
961 : | void test_cbp() | ||
962 : | { | ||
963 : | edgomez | 1.11 | const int nb_tests = 10000*speed_ref; |
964 : | Skal | 1.23 | int i, n, m; |
965 : | edgomez | 1.11 | CPU *cpu; |
966 : | DECLARE_ALIGNED_MATRIX(Src1, 6, 64, int16_t, 16); | ||
967 : | DECLARE_ALIGNED_MATRIX(Src2, 6, 64, int16_t, 16); | ||
968 : | DECLARE_ALIGNED_MATRIX(Src3, 6, 64, int16_t, 16); | ||
969 : | DECLARE_ALIGNED_MATRIX(Src4, 6, 64, int16_t, 16); | ||
970 : | suxen_drol | 1.25 | DECLARE_ALIGNED_MATRIX(Src5, 6, 64, int16_t, 16); |
971 : | edgomez | 1.11 | |
972 : | printf( "\n ===== test cbp =====\n" ); | ||
973 : | |||
974 : | for(i=0; i<6*64; ++i) { | ||
975 : | Src1[i] = (i*i*3/8192)&(i/64)&1; /* 'random' */ | ||
976 : | Src2[i] = (i<3*64); /* half-full */ | ||
977 : | Src3[i] = ((i+32)>3*64); | ||
978 : | Src4[i] = (i==(3*64+2) || i==(5*64+9)); | ||
979 : | suxen_drol | 1.25 | Src5[i] = ieee_rand(0,1) ? -1 : 1; /* +/- test */ |
980 : | edgomez | 1.11 | } |
981 : | |||
982 : | for(cpu = cpu_list; cpu->name!=0; ++cpu) | ||
983 : | { | ||
984 : | double t; | ||
985 : | int tst, cbp; | ||
986 : | |||
987 : | if (!init_cpu(cpu)) | ||
988 : | continue; | ||
989 : | |||
990 : | Skal | 1.23 | TEST_CBP(calc_cbp, Src1, nb_tests); |
991 : | edgomez | 1.14 | printf("%s - calc_cbp#1 %.3f usec cbp=0x%02x %s\n", |
992 : | edgomez | 1.11 | cpu->name, t, cbp, (cbp!=0x15)?"| ERROR": ""); |
993 : | Skal | 1.23 | TEST_CBP(calc_cbp, Src2, nb_tests); |
994 : | edgomez | 1.14 | printf("%s - calc_cbp#2 %.3f usec cbp=0x%02x %s\n", |
995 : | edgomez | 1.11 | cpu->name, t, cbp, (cbp!=0x38)?"| ERROR": ""); |
996 : | Skal | 1.23 | TEST_CBP(calc_cbp, Src3, nb_tests); |
997 : | edgomez | 1.14 | printf("%s - calc_cbp#3 %.3f usec cbp=0x%02x %s\n", |
998 : | edgomez | 1.11 | cpu->name, t, cbp, (cbp!=0x0f)?"| ERROR": "" ); |
999 : | Skal | 1.23 | TEST_CBP(calc_cbp, Src4, nb_tests); |
1000 : | edgomez | 1.14 | printf("%s - calc_cbp#4 %.3f usec cbp=0x%02x %s\n", |
1001 : | edgomez | 1.11 | cpu->name, t, cbp, (cbp!=0x05)?"| ERROR": "" ); |
1002 : | suxen_drol | 1.25 | TEST_CBP(calc_cbp, Src5, nb_tests); |
1003 : | printf("%s - calc_cbp#4 %.3f usec cbp=0x%02x %s\n", | ||
1004 : | cpu->name, t, cbp, (cbp!=0x3f)?"| ERROR": "" ); | ||
1005 : | edgomez | 1.11 | printf( " --- \n" ); |
1006 : | } | ||
1007 : | Skal | 1.23 | |
1008 : | for(cpu = cpu_list; cpu->name!=0; ++cpu) /* bench suggested by Carlo (carlo dot bramix at libero dot it) */ | ||
1009 : | { | ||
1010 : | double t; | ||
1011 : | int tst, cbp, err; | ||
1012 : | |||
1013 : | if (!init_cpu(cpu)) | ||
1014 : | continue; | ||
1015 : | |||
1016 : | err = 0; | ||
1017 : | for(n=0; n<6; ++n) | ||
1018 : | { | ||
1019 : | for(m=0; m<64; ++m) | ||
1020 : | { | ||
1021 : | for(i=0; i<6*64; ++i) | ||
1022 : | Src1[i] = (i== (m + n*64)); | ||
1023 : | |||
1024 : | TEST_CBP(calc_cbp, Src1, 1); | ||
1025 : | if (cbp!= (((m!=0)<<(5-n)))) | ||
1026 : | { | ||
1027 : | printf( "%s - calc_cbp#5: ERROR at pos %d / %d!\n", cpu->name, n, m); | ||
1028 : | err = 1; | ||
1029 : | break; | ||
1030 : | } | ||
1031 : | } | ||
1032 : | } | ||
1033 : | if (!err) | ||
1034 : | printf( " %s - calc_cbp#5 : OK\n", cpu->name ); | ||
1035 : | |||
1036 : | } | ||
1037 : | Isibaar | 1.1 | } |
1038 : | |||
1039 : | /********************************************************************* | ||
1040 : | Isibaar | 1.3 | * fdct/idct IEEE1180 compliance |
1041 : | *********************************************************************/ | ||
1042 : | |||
1043 : | typedef struct { | ||
1044 : | edgomez | 1.11 | long Errors[64]; |
1045 : | long Sqr_Errors[64]; | ||
1046 : | long Max_Errors[64]; | ||
1047 : | long Nb; | ||
1048 : | Isibaar | 1.3 | } STATS_8x8; |
1049 : | |||
1050 : | void init_stats(STATS_8x8 *S) | ||
1051 : | { | ||
1052 : | edgomez | 1.11 | int i; |
1053 : | for(i=0; i<64; ++i) { | ||
1054 : | S->Errors[i] = 0; | ||
1055 : | S->Sqr_Errors[i] = 0; | ||
1056 : | S->Max_Errors[i] = 0; | ||
1057 : | } | ||
1058 : | S->Nb = 0; | ||
1059 : | Isibaar | 1.3 | } |
1060 : | |||
1061 : | void store_stats(STATS_8x8 *S, short Blk[64], short Ref[64]) | ||
1062 : | { | ||
1063 : | edgomez | 1.11 | int i; |
1064 : | for(i=0; i<64; ++i) | ||
1065 : | { | ||
1066 : | short Err = Blk[i] - Ref[i]; | ||
1067 : | S->Errors[i] += Err; | ||
1068 : | S->Sqr_Errors[i] += Err * Err; | ||
1069 : | if (Err<0) Err = -Err; | ||
1070 : | if (S->Max_Errors[i]<Err) | ||
1071 : | S->Max_Errors[i] = Err; | ||
1072 : | } | ||
1073 : | S->Nb++; | ||
1074 : | Isibaar | 1.3 | } |
1075 : | |||
1076 : | void print_stats(STATS_8x8 *S) | ||
1077 : | { | ||
1078 : | edgomez | 1.11 | int i; |
1079 : | double Norm; | ||
1080 : | Isibaar | 1.3 | |
1081 : | edgomez | 1.11 | assert(S->Nb>0); |
1082 : | Norm = 1. / (double)S->Nb; | ||
1083 : | printf("\n== Max absolute values of errors ==\n"); | ||
1084 : | for(i=0; i<64; i++) { | ||
1085 : | printf(" %4ld", S->Max_Errors[i]); | ||
1086 : | if ((i&7)==7) printf("\n"); | ||
1087 : | } | ||
1088 : | |||
1089 : | printf("\n== Mean square errors ==\n"); | ||
1090 : | for(i=0; i<64; i++) | ||
1091 : | { | ||
1092 : | double Err = Norm * (double)S->Sqr_Errors[i]; | ||
1093 : | printf(" %.3f", Err); | ||
1094 : | if ((i&7)==7) printf("\n"); | ||
1095 : | } | ||
1096 : | |||
1097 : | printf("\n== Mean errors ==\n"); | ||
1098 : | for(i=0; i<64; i++) | ||
1099 : | { | ||
1100 : | double Err = Norm * (double)S->Errors[i]; | ||
1101 : | printf(" %.3f", Err); | ||
1102 : | if ((i&7)==7) printf("\n"); | ||
1103 : | } | ||
1104 : | printf("\n"); | ||
1105 : | Isibaar | 1.3 | } |
1106 : | |||
1107 : | static const char *CHECK(double v, double l) { | ||
1108 : | edgomez | 1.11 | if (fabs(v)<=l) return "ok"; |
1109 : | else return "FAIL!"; | ||
1110 : | Isibaar | 1.3 | } |
1111 : | |||
1112 : | void report_stats(STATS_8x8 *S, const double *Limits) | ||
1113 : | { | ||
1114 : | edgomez | 1.11 | int i; |
1115 : | double Norm, PE, PMSE, OMSE, PME, OME; | ||
1116 : | |||
1117 : | assert(S->Nb>0); | ||
1118 : | Norm = 1. / (double)S->Nb; | ||
1119 : | PE = 0.; | ||
1120 : | for(i=0; i<64; i++) { | ||
1121 : | if (PE<S->Max_Errors[i]) | ||
1122 : | PE = S->Max_Errors[i]; | ||
1123 : | } | ||
1124 : | Isibaar | 1.3 | |
1125 : | edgomez | 1.11 | PMSE = 0.; |
1126 : | OMSE = 0.; | ||
1127 : | for(i=0; i<64; i++) | ||
1128 : | { | ||
1129 : | double Err = Norm * (double)S->Sqr_Errors[i]; | ||
1130 : | OMSE += Err; | ||
1131 : | if (PMSE < Err) PMSE = Err; | ||
1132 : | } | ||
1133 : | OMSE /= 64.; | ||
1134 : | |||
1135 : | PME = 0.; | ||
1136 : | OME = 0.; | ||
1137 : | for(i=0; i<64; i++) | ||
1138 : | { | ||
1139 : | double Err = Norm * (double)S->Errors[i]; | ||
1140 : | OME += Err; | ||
1141 : | Err = fabs(Err); | ||
1142 : | if (PME < Err) PME = Err; | ||
1143 : | } | ||
1144 : | OME /= 64.; | ||
1145 : | |||
1146 : | printf( "Peak error: %4.4f\n", PE ); | ||
1147 : | printf( "Peak MSE: %4.4f\n", PMSE ); | ||
1148 : | printf( "Overall MSE: %4.4f\n", OMSE ); | ||
1149 : | printf( "Peak ME: %4.4f\n", PME ); | ||
1150 : | printf( "Overall ME: %4.4f\n", OME ); | ||
1151 : | |||
1152 : | if (Limits!=0) | ||
1153 : | { | ||
1154 : | printf( "[PE<=%.4f %s] ", Limits[0], CHECK(PE, Limits[0]) ); | ||
1155 : | printf( "\n" ); | ||
1156 : | printf( "[PMSE<=%.4f %s]", Limits[1], CHECK(PMSE, Limits[1]) ); | ||
1157 : | printf( "[OMSE<=%.4f %s]", Limits[2], CHECK(OMSE, Limits[2]) ); | ||
1158 : | printf( "\n" ); | ||
1159 : | printf( "[PME<=%.4f %s] ", Limits[3], CHECK(PME , Limits[3]) ); | ||
1160 : | printf( "[OME<=%.4f %s] ", Limits[4], CHECK(OME , Limits[4]) ); | ||
1161 : | printf( "\n" ); | ||
1162 : | } | ||
1163 : | Isibaar | 1.3 | } |
1164 : | |||
1165 : | edgomez | 1.11 | ///* ////////////////////////////////////////////////////// */ |
1166 : | Isibaar | 1.3 | /* Pseudo-random generator specified by IEEE 1180 */ |
1167 : | |||
1168 : | static long ieee_seed = 1; | ||
1169 : | static void ieee_reseed(long s) { | ||
1170 : | edgomez | 1.11 | ieee_seed = s; |
1171 : | Isibaar | 1.3 | } |
1172 : | static long ieee_rand(int Min, int Max) | ||
1173 : | { | ||
1174 : | edgomez | 1.11 | static double z = (double) 0x7fffffff; |
1175 : | Isibaar | 1.3 | |
1176 : | edgomez | 1.11 | long i,j; |
1177 : | double x; | ||
1178 : | Isibaar | 1.3 | |
1179 : | edgomez | 1.11 | ieee_seed = (ieee_seed * 1103515245) + 12345; |
1180 : | i = ieee_seed & 0x7ffffffe; | ||
1181 : | x = ((double) i) / z; | ||
1182 : | x *= (Max-Min+1); | ||
1183 : | j = (long)x; | ||
1184 : | j = j + Min; | ||
1185 : | assert(j>=Min && j<=Max); | ||
1186 : | return (short)j; | ||
1187 : | Isibaar | 1.3 | } |
1188 : | |||
1189 : | #define CLAMP(x, M) (x) = ((x)<-(M)) ? (-(M)) : ((x)>=(M) ? ((M)-1) : (x)) | ||
1190 : | |||
1191 : | static double Cos[8][8]; | ||
1192 : | static void init_ref_dct() | ||
1193 : | { | ||
1194 : | edgomez | 1.11 | int i, j; |
1195 : | for(i=0; i<8; i++) | ||
1196 : | { | ||
1197 : | double scale = (i == 0) ? sqrt(0.125) : 0.5; | ||
1198 : | for (j=0; j<8; j++) | ||
1199 : | Cos[i][j] = scale*cos( (M_PI/8.0)*i*(j + 0.5) ); | ||
1200 : | } | ||
1201 : | Isibaar | 1.3 | } |
1202 : | |||
1203 : | void ref_idct(short *M) | ||
1204 : | { | ||
1205 : | edgomez | 1.11 | int i, j, k; |
1206 : | double Tmp[8][8]; | ||
1207 : | Isibaar | 1.3 | |
1208 : | edgomez | 1.11 | for(i=0; i<8; i++) { |
1209 : | for(j=0; j<8; j++) | ||
1210 : | { | ||
1211 : | double Sum = 0.0; | ||
1212 : | for (k=0; k<8; k++) Sum += Cos[k][j]*M[8*i+k]; | ||
1213 : | Tmp[i][j] = Sum; | ||
1214 : | } | ||
1215 : | } | ||
1216 : | for(i=0; i<8; i++) { | ||
1217 : | for(j=0; j<8; j++) { | ||
1218 : | double Sum = 0.0; | ||
1219 : | for (k=0; k<8; k++) Sum += Cos[k][i]*Tmp[k][j]; | ||
1220 : | M[8*i+j] = (short)floor(Sum + .5); | ||
1221 : | } | ||
1222 : | } | ||
1223 : | Isibaar | 1.3 | } |
1224 : | |||
1225 : | void ref_fdct(short *M) | ||
1226 : | { | ||
1227 : | edgomez | 1.11 | int i, j, k; |
1228 : | double Tmp[8][8]; | ||
1229 : | Isibaar | 1.3 | |
1230 : | edgomez | 1.11 | for(i=0; i<8; i++) { |
1231 : | for(j=0; j<8; j++) | ||
1232 : | { | ||
1233 : | double Sum = 0.0; | ||
1234 : | for (k=0; k<8; k++) Sum += Cos[j][k]*M[8*i+k]; | ||
1235 : | Tmp[i][j] = Sum; | ||
1236 : | } | ||
1237 : | } | ||
1238 : | for(i=0; i<8; i++) { | ||
1239 : | for(j=0; j<8; j++) { | ||
1240 : | double Sum = 0.0; | ||
1241 : | for (k=0; k<8; k++) Sum += Cos[i][k]*Tmp[k][j]; | ||
1242 : | M[8*i+j] = (short)floor(Sum + 0.5); | ||
1243 : | } | ||
1244 : | } | ||
1245 : | Isibaar | 1.3 | } |
1246 : | |||
1247 : | void test_IEEE1180_compliance(int Min, int Max, int Sign) | ||
1248 : | { | ||
1249 : | edgomez | 1.11 | static const double ILimits[5] = { 1., 0.06, 0.02, 0.015, 0.0015 }; |
1250 : | int Loops = 10000; | ||
1251 : | int i, m, n; | ||
1252 : | DECLARE_ALIGNED_MATRIX(Blk0, 8, 8, short, 16); /* reference */ | ||
1253 : | DECLARE_ALIGNED_MATRIX(Blk, 8, 8, short, 16); | ||
1254 : | DECLARE_ALIGNED_MATRIX(iBlk, 8, 8, short, 16); | ||
1255 : | DECLARE_ALIGNED_MATRIX(Ref_FDCT, 8, 8, short, 16); | ||
1256 : | DECLARE_ALIGNED_MATRIX(Ref_IDCT, 8, 8, short, 16); | ||
1257 : | Isibaar | 1.3 | |
1258 : | edgomez | 1.11 | STATS_8x8 FStats; /* forward dct stats */ |
1259 : | STATS_8x8 IStats; /* inverse dct stats */ | ||
1260 : | Isibaar | 1.3 | |
1261 : | edgomez | 1.11 | CPU *cpu; |
1262 : | Isibaar | 1.3 | |
1263 : | edgomez | 1.11 | init_ref_dct(); |
1264 : | Isibaar | 1.3 | |
1265 : | edgomez | 1.11 | for(cpu = cpu_list; cpu->name!=0; ++cpu) |
1266 : | { | ||
1267 : | if (!init_cpu(cpu)) | ||
1268 : | continue; | ||
1269 : | Isibaar | 1.3 | |
1270 : | edgomez | 1.11 | printf( "\n===== IEEE test for %s ==== (Min=%d Max=%d Sign=%d Loops=%d)\n", |
1271 : | cpu->name, Min, Max, Sign, Loops); | ||
1272 : | Isibaar | 1.3 | |
1273 : | edgomez | 1.11 | init_stats(&IStats); |
1274 : | init_stats(&FStats); | ||
1275 : | Isibaar | 1.3 | |
1276 : | edgomez | 1.11 | ieee_reseed(1); |
1277 : | for(n=0; n<Loops; ++n) | ||
1278 : | { | ||
1279 : | for(i=0; i<64; ++i) | ||
1280 : | Blk0[i] = (short)ieee_rand(Min,Max) * Sign; | ||
1281 : | Isibaar | 1.3 | |
1282 : | edgomez | 1.11 | /* hmm, I'm not quite sure this is exactly */ |
1283 : | /* the tests described in the norm. check... */ | ||
1284 : | Isibaar | 1.3 | |
1285 : | edgomez | 1.11 | memcpy(Ref_FDCT, Blk0, 64*sizeof(short)); |
1286 : | ref_fdct(Ref_FDCT); | ||
1287 : | for(i=0; i<64; i++) CLAMP( Ref_FDCT[i], 2048 ); | ||
1288 : | Isibaar | 1.3 | |
1289 : | edgomez | 1.11 | memcpy(Blk, Blk0, 64*sizeof(short)); |
1290 : | emms(); fdct(Blk); emms(); | ||
1291 : | for(i=0; i<64; i++) CLAMP( Blk[i], 2048 ); | ||
1292 : | Isibaar | 1.3 | |
1293 : | edgomez | 1.11 | store_stats(&FStats, Blk, Ref_FDCT); |
1294 : | Isibaar | 1.3 | |
1295 : | |||
1296 : | edgomez | 1.11 | memcpy(Ref_IDCT, Ref_FDCT, 64*sizeof(short)); |
1297 : | ref_idct(Ref_IDCT); | ||
1298 : | for (i=0; i<64; i++) CLAMP( Ref_IDCT[i], 256 ); | ||
1299 : | Isibaar | 1.3 | |
1300 : | edgomez | 1.11 | memcpy(iBlk, Ref_FDCT, 64*sizeof(short)); |
1301 : | emms(); idct(iBlk); emms(); | ||
1302 : | for(i=0; i<64; i++) CLAMP( iBlk[i], 256 ); | ||
1303 : | Isibaar | 1.3 | |
1304 : | edgomez | 1.11 | store_stats(&IStats, iBlk, Ref_IDCT); |
1305 : | } | ||
1306 : | Isibaar | 1.3 | |
1307 : | |||
1308 : | edgomez | 1.11 | printf( "\n -- FDCT report --\n" ); |
1309 : | edgomez | 1.8 | // print_stats(&FStats); |
1310 : | edgomez | 1.11 | report_stats(&FStats, 0); /* so far I know, IEEE1180 says nothing for fdct */ |
1311 : | Isibaar | 1.3 | |
1312 : | edgomez | 1.11 | for(i=0; i<64; i++) Blk[i] = 0; |
1313 : | emms(); fdct(Blk); emms(); | ||
1314 : | for(m=i=0; i<64; i++) if (Blk[i]!=0) m++; | ||
1315 : | printf( "FDCT(0) == 0 ? %s\n", (m!=0) ? "NOPE!" : "yup." ); | ||
1316 : | Isibaar | 1.3 | |
1317 : | edgomez | 1.11 | printf( "\n -- IDCT report --\n" ); |
1318 : | edgomez | 1.8 | // print_stats(&IStats); |
1319 : | edgomez | 1.11 | report_stats(&IStats, ILimits); |
1320 : | Isibaar | 1.3 | |
1321 : | |||
1322 : | edgomez | 1.11 | for(i=0; i<64; i++) Blk[i] = 0; |
1323 : | emms(); idct(Blk); emms(); | ||
1324 : | for(m=i=0; i<64; i++) if (Blk[i]!=0) m++; | ||
1325 : | printf( "IDCT(0) == 0 ? %s\n", (m!=0) ? "NOPE!" : "yup." ); | ||
1326 : | } | ||
1327 : | Isibaar | 1.3 | } |
1328 : | |||
1329 : | |||
1330 : | void test_dct_saturation(int Min, int Max) | ||
1331 : | { | ||
1332 : | edgomez | 1.11 | /* test behaviour on input range fringe */ |
1333 : | Isibaar | 1.3 | |
1334 : | edgomez | 1.11 | int i, n, p; |
1335 : | CPU *cpu; | ||
1336 : | // const short IDCT_MAX = 2047; /* 12bits input */ | ||
1337 : | edgomez | 1.8 | // const short IDCT_MIN = -2048; |
1338 : | edgomez | 1.11 | // const short IDCT_OUT = 256; /* 9bits ouput */ |
1339 : | const int Partitions = 4; | ||
1340 : | const int Loops = 10000 / Partitions; | ||
1341 : | |||
1342 : | init_ref_dct(); | ||
1343 : | |||
1344 : | for(cpu = cpu_list; cpu->name!=0; ++cpu) | ||
1345 : | { | ||
1346 : | short Blk0[64], Blk[64]; | ||
1347 : | STATS_8x8 Stats; | ||
1348 : | |||
1349 : | if (!init_cpu(cpu)) | ||
1350 : | continue; | ||
1351 : | |||
1352 : | printf( "\n===== IEEE test for %s Min=%d Max=%d =====\n", | ||
1353 : | cpu->name, Min, Max ); | ||
1354 : | |||
1355 : | /* FDCT tests // */ | ||
1356 : | |||
1357 : | init_stats(&Stats); | ||
1358 : | |||
1359 : | /* test each computation channels separately */ | ||
1360 : | for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? Max : 0; | ||
1361 : | ref_fdct(Blk0); | ||
1362 : | emms(); fdct(Blk); emms(); | ||
1363 : | store_stats(&Stats, Blk, Blk0); | ||
1364 : | |||
1365 : | for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? Min : 0; | ||
1366 : | ref_fdct(Blk0); | ||
1367 : | emms(); fdct(Blk); emms(); | ||
1368 : | store_stats(&Stats, Blk, Blk0); | ||
1369 : | |||
1370 : | /* randomly saturated inputs */ | ||
1371 : | for(p=0; p<Partitions; ++p) | ||
1372 : | { | ||
1373 : | for(n=0; n<Loops; ++n) | ||
1374 : | { | ||
1375 : | for(i=0; i<64; ++i) | ||
1376 : | Blk0[i] = Blk[i] = (ieee_rand(0,Partitions)>=p)? Max : Min; | ||
1377 : | ref_fdct(Blk0); | ||
1378 : | emms(); fdct(Blk); emms(); | ||
1379 : | store_stats(&Stats, Blk, Blk0); | ||
1380 : | } | ||
1381 : | } | ||
1382 : | printf( "\n -- FDCT saturation report --\n" ); | ||
1383 : | report_stats(&Stats, 0); | ||
1384 : | Isibaar | 1.3 | |
1385 : | |||
1386 : | edgomez | 1.11 | /* IDCT tests // */ |
1387 : | Isibaar | 1.3 | #if 0 |
1388 : | edgomez | 1.11 | /* no finished yet */ |
1389 : | Isibaar | 1.3 | |
1390 : | edgomez | 1.11 | init_stats(&Stats); |
1391 : | Isibaar | 1.3 | |
1392 : | edgomez | 1.11 | /* test each computation channel separately */ |
1393 : | for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? IDCT_MAX : 0; | ||
1394 : | ref_idct(Blk0); | ||
1395 : | emms(); idct(Blk); emms(); | ||
1396 : | for(i=0; i<64; i++) { CLAMP(Blk0[i], IDCT_OUT); CLAMP(Blk[i], IDCT_OUT); } | ||
1397 : | store_stats(&Stats, Blk, Blk0); | ||
1398 : | |||
1399 : | for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? IDCT_MIN : 0; | ||
1400 : | ref_idct(Blk0); | ||
1401 : | emms(); idct(Blk); emms(); | ||
1402 : | for(i=0; i<64; i++) { CLAMP(Blk0[i], IDCT_OUT); CLAMP(Blk[i], IDCT_OUT); } | ||
1403 : | store_stats(&Stats, Blk, Blk0); | ||
1404 : | |||
1405 : | /* randomly saturated inputs */ | ||
1406 : | for(p=0; p<Partitions; ++p) | ||
1407 : | { | ||
1408 : | for(n=0; n<Loops; ++n) | ||
1409 : | { | ||
1410 : | for(i=0; i<64; ++i) | ||
1411 : | Blk0[i] = Blk[i] = (ieee_rand(0,Partitions)>=p)? IDCT_MAX : IDCT_MIN; | ||
1412 : | ref_idct(Blk0); | ||
1413 : | emms(); idct(Blk); emms(); | ||
1414 : | for(i=0; i<64; i++) { CLAMP(Blk0[i],IDCT_OUT); CLAMP(Blk[i],IDCT_OUT); } | ||
1415 : | store_stats(&Stats, Blk, Blk0); | ||
1416 : | } | ||
1417 : | } | ||
1418 : | |||
1419 : | printf( "\n -- IDCT saturation report --\n" ); | ||
1420 : | print_stats(&Stats); | ||
1421 : | report_stats(&Stats, 0); | ||
1422 : | Isibaar | 1.3 | #endif |
1423 : | edgomez | 1.11 | } |
1424 : | Isibaar | 1.3 | } |
1425 : | |||
1426 : | /********************************************************************* | ||
1427 : | Isibaar | 1.1 | * measure raw decoding speed |
1428 : | *********************************************************************/ | ||
1429 : | |||
1430 : | Skal | 1.17 | void test_dec(const char *name, int width, int height, int ref_chksum) |
1431 : | Isibaar | 1.1 | { |
1432 : | edgomez | 1.11 | FILE *f = 0; |
1433 : | void *dechandle = 0; | ||
1434 : | int xerr; | ||
1435 : | xvid_gbl_init_t xinit; | ||
1436 : | xvid_dec_create_t xparam; | ||
1437 : | xvid_dec_frame_t xframe; | ||
1438 : | Isibaar | 1.1 | double t = 0.; |
1439 : | int nb = 0; | ||
1440 : | edgomez | 1.11 | uint8_t *buf = 0; |
1441 : | Skal | 1.17 | uint8_t *yuv_out = 0; |
1442 : | edgomez | 1.11 | int buf_size, pos; |
1443 : | uint32_t chksum = 0; | ||
1444 : | Skal | 1.17 | int bps = (width+31) & ~31; |
1445 : | Isibaar | 1.1 | |
1446 : | edgomez | 1.11 | memset(&xinit, 0, sizeof(xinit)); |
1447 : | Skal | 1.17 | xinit.cpu_flags = cpu_mask; |
1448 : | edgomez | 1.11 | xinit.version = XVID_VERSION; |
1449 : | xvid_global(NULL, 0, &xinit, NULL); | ||
1450 : | Isibaar | 1.1 | |
1451 : | edgomez | 1.11 | memset(&xparam, 0, sizeof(xparam)); |
1452 : | Skal | 1.17 | xparam.width = width; |
1453 : | Isibaar | 1.1 | xparam.height = height; |
1454 : | edgomez | 1.11 | xparam.version = XVID_VERSION; |
1455 : | Isibaar | 1.1 | xerr = xvid_decore(NULL, XVID_DEC_CREATE, &xparam, NULL); |
1456 : | edgomez | 1.11 | if (xerr==XVID_ERR_FAIL) { |
1457 : | Skal | 1.17 | printf("ERROR: can't init decoder (err=%d)\n", xerr); |
1458 : | edgomez | 1.11 | return; |
1459 : | Isibaar | 1.1 | } |
1460 : | dechandle = xparam.handle; | ||
1461 : | |||
1462 : | |||
1463 : | f = fopen(name, "rb"); | ||
1464 : | edgomez | 1.11 | if (f==0) { |
1465 : | Skal | 1.17 | printf( "ERROR: can't open file '%s'\n", name); |
1466 : | edgomez | 1.11 | return; |
1467 : | } | ||
1468 : | fseek(f, 0, SEEK_END); | ||
1469 : | buf_size = ftell(f); | ||
1470 : | fseek(f, 0, SEEK_SET); | ||
1471 : | if (buf_size<=0) { | ||
1472 : | Skal | 1.17 | printf("ERROR: error while stating file\n"); |
1473 : | edgomez | 1.11 | goto End; |
1474 : | } | ||
1475 : | |||
1476 : | Skal | 1.17 | buf = malloc(buf_size); |
1477 : | yuv_out = calloc(1, bps*height*3/2 + 15); | ||
1478 : | if (buf==0 || yuv_out==0) { | ||
1479 : | printf( "ERROR: malloc failed!\n" ); | ||
1480 : | edgomez | 1.11 | goto End; |
1481 : | } | ||
1482 : | |||
1483 : | if (fread(buf, buf_size, 1, f)!=1) { | ||
1484 : | Skal | 1.17 | printf( "ERROR: file-read failed\n" ); |
1485 : | edgomez | 1.11 | goto End; |
1486 : | } | ||
1487 : | |||
1488 : | nb = 0; | ||
1489 : | pos = 0; | ||
1490 : | t = -gettime_usec(); | ||
1491 : | while(1) { | ||
1492 : | Skal | 1.17 | int y; |
1493 : | |||
1494 : | edgomez | 1.11 | memset(&xframe, 0, sizeof(xframe)); |
1495 : | xframe.version = XVID_VERSION; | ||
1496 : | xframe.bitstream = buf + pos; | ||
1497 : | xframe.length = buf_size - pos; | ||
1498 : | Skal | 1.17 | xframe.output.plane[0] = (uint8_t*)(((size_t)yuv_out + 15) & ~15); |
1499 : | suxen_drol | 1.24 | xframe.output.plane[1] = (uint8_t*)xframe.output.plane[0] + bps*height; |
1500 : | xframe.output.plane[2] = (uint8_t*)xframe.output.plane[1] + bps/2; | ||
1501 : | Skal | 1.17 | xframe.output.stride[0] = bps; |
1502 : | xframe.output.stride[1] = bps; | ||
1503 : | xframe.output.stride[2] = bps; | ||
1504 : | xframe.output.csp = XVID_CSP_I420; | ||
1505 : | edgomez | 1.11 | xerr = xvid_decore(dechandle, XVID_DEC_DECODE, &xframe, 0); |
1506 : | Skal | 1.17 | if (xerr<0) { |
1507 : | printf("ERROR: decoding failed for frame #%d (err=%d)!\n", nb, xerr); | ||
1508 : | break; | ||
1509 : | } | ||
1510 : | else if (xerr==0) | ||
1511 : | break; | ||
1512 : | else if (verbose>0) printf("#%d %d\n", nb, xerr ); | ||
1513 : | |||
1514 : | pos += xerr; | ||
1515 : | edgomez | 1.11 | nb++; |
1516 : | Skal | 1.17 | |
1517 : | for(y=0; y<height/2; ++y) { | ||
1518 : | suxen_drol | 1.24 | chksum = calc_crc((uint8_t*)xframe.output.plane[0] + (2*y+0)*bps, width, chksum); |
1519 : | chksum = calc_crc((uint8_t*)xframe.output.plane[0] + (2*y+1)*bps, width, chksum); | ||
1520 : | chksum = calc_crc((uint8_t*)xframe.output.plane[1] + y*bps, width/2, chksum); | ||
1521 : | chksum = calc_crc((uint8_t*)xframe.output.plane[2] + y*bps, width/2, chksum); | ||
1522 : | edgomez | 1.11 | } |
1523 : | if (pos==buf_size) | ||
1524 : | break; | ||
1525 : | } | ||
1526 : | t += gettime_usec(); | ||
1527 : | Skal | 1.17 | if (ref_chksum==0) { |
1528 : | if (t>0.) | ||
1529 : | printf( "%d frames decoded in %.3f s -> %.1f FPS Checksum:0x%.8x\n", nb, t*1.e-6f, (float)(nb*1.e6f/t), chksum ); | ||
1530 : | } | ||
1531 : | else { | ||
1532 : | printf("FPS:%.1f Checksum: 0x%.8x Expected:0x%.8x | %s\n", | ||
1533 : | t>0. ? (float)(nb*1.e6f/t) : 0.f, chksum, ref_chksum, (chksum==ref_chksum) ? "OK" : "ERROR"); | ||
1534 : | } | ||
1535 : | edgomez | 1.11 | |
1536 : | End: | ||
1537 : | Skal | 1.17 | if (yuv_out!=0) free(yuv_out); |
1538 : | edgomez | 1.11 | if (buf!=0) free(buf); |
1539 : | if (dechandle!=0) { | ||
1540 : | xerr= xvid_decore(dechandle, XVID_DEC_DESTROY, NULL, NULL); | ||
1541 : | if (xerr==XVID_ERR_FAIL) | ||
1542 : | Skal | 1.17 | printf("ERROR: destroy-decoder failed (err=%d)!\n", xerr); |
1543 : | edgomez | 1.11 | } |
1544 : | if (f!=0) fclose(f); | ||
1545 : | Isibaar | 1.1 | } |
1546 : | |||
1547 : | /********************************************************************* | ||
1548 : | * non-regression tests | ||
1549 : | *********************************************************************/ | ||
1550 : | |||
1551 : | void test_bugs1() | ||
1552 : | { | ||
1553 : | edgomez | 1.11 | CPU *cpu; |
1554 : | uint16_t mpeg_quant_matrices[64*8]; | ||
1555 : | |||
1556 : | printf( "\n ===== (de)quant4_intra saturation bug? =====\n" ); | ||
1557 : | Isibaar | 1.1 | |
1558 : | edgomez | 1.11 | for(cpu = cpu_list; cpu->name!=0; ++cpu) |
1559 : | { | ||
1560 : | int i; | ||
1561 : | int16_t Src[8*8], Dst[8*8]; | ||
1562 : | |||
1563 : | if (!init_cpu(cpu)) | ||
1564 : | continue; | ||
1565 : | |||
1566 : | for(i=0; i<64; ++i) Src[i] = i-32; | ||
1567 : | set_intra_matrix( mpeg_quant_matrices, get_default_intra_matrix() ); | ||
1568 : | dequant_mpeg_intra(Dst, Src, 31, 5, mpeg_quant_matrices); | ||
1569 : | printf( "dequant_mpeg_intra with CPU=%s: ", cpu->name); | ||
1570 : | printf( " Out[]= " ); | ||
1571 : | for(i=0; i<64; ++i) printf( "[%d]", Dst[i]); | ||
1572 : | printf( "\n" ); | ||
1573 : | } | ||
1574 : | |||
1575 : | printf( "\n ===== (de)quant4_inter saturation bug? =====\n" ); | ||
1576 : | Isibaar | 1.1 | |
1577 : | edgomez | 1.11 | for(cpu = cpu_list; cpu->name!=0; ++cpu) |
1578 : | { | ||
1579 : | int i; | ||
1580 : | int16_t Src[8*8], Dst[8*8]; | ||
1581 : | |||
1582 : | if (!init_cpu(cpu)) | ||
1583 : | continue; | ||
1584 : | |||
1585 : | for(i=0; i<64; ++i) Src[i] = i-32; | ||
1586 : | set_inter_matrix( mpeg_quant_matrices, get_default_inter_matrix() ); | ||
1587 : | dequant_mpeg_inter(Dst, Src, 31, mpeg_quant_matrices); | ||
1588 : | printf( "dequant_mpeg_inter with CPU=%s: ", cpu->name); | ||
1589 : | printf( " Out[]= " ); | ||
1590 : | for(i=0; i<64; ++i) printf( "[%d]", Dst[i]); | ||
1591 : | printf( "\n" ); | ||
1592 : | } | ||
1593 : | Isibaar | 1.1 | } |
1594 : | |||
1595 : | void test_dct_precision_diffs() | ||
1596 : | { | ||
1597 : | edgomez | 1.11 | CPU *cpu; |
1598 : | DECLARE_ALIGNED_MATRIX(Blk, 8, 8, int16_t, 16); | ||
1599 : | DECLARE_ALIGNED_MATRIX(Blk0, 8, 8, int16_t, 16); | ||
1600 : | |||
1601 : | printf( "\n ===== fdct/idct precision diffs =====\n" ); | ||
1602 : | |||
1603 : | for(cpu = cpu_list; cpu->name!=0; ++cpu) | ||
1604 : | { | ||
1605 : | int i; | ||
1606 : | |||
1607 : | if (!init_cpu(cpu)) | ||
1608 : | continue; | ||
1609 : | |||
1610 : | for(i=0; i<8*8; ++i) { | ||
1611 : | Blk0[i] = (i*7-i*i) & 0x7f; | ||
1612 : | Blk[i] = Blk0[i]; | ||
1613 : | } | ||
1614 : | |||
1615 : | fdct(Blk); | ||
1616 : | idct(Blk); | ||
1617 : | printf( " fdct+idct diffs with CPU=%s: \n", cpu->name ); | ||
1618 : | for(i=0; i<8; ++i) { | ||
1619 : | int j; | ||
1620 : | for(j=0; j<8; ++j) printf( " %d ", Blk[i*8+j]-Blk0[i*8+j]); | ||
1621 : | printf("\n"); | ||
1622 : | } | ||
1623 : | printf("\n"); | ||
1624 : | } | ||
1625 : | Isibaar | 1.1 | } |
1626 : | |||
1627 : | Isibaar | 1.3 | void test_quant_bug() |
1628 : | { | ||
1629 : | edgomez | 1.11 | const int max_Q = 31; |
1630 : | int i, n, qm, q; | ||
1631 : | CPU *cpu; | ||
1632 : | DECLARE_ALIGNED_MATRIX(Src, 8, 8, int16_t, 16); | ||
1633 : | DECLARE_ALIGNED_MATRIX(Dst, 8, 8, int16_t, 16); | ||
1634 : | uint8_t Quant[8*8]; | ||
1635 : | CPU cpu_bug_list[] = { { "PLAINC", 0 }, { "MMX ", XVID_CPU_MMX }, {0,0} }; | ||
1636 : | uint16_t Crcs_Inter[2][32]; | ||
1637 : | uint16_t Crcs_Intra[2][32]; | ||
1638 : | DECLARE_ALIGNED_MATRIX(mpeg_quant_matrices, 8, 64, uint16_t, 16); | ||
1639 : | Isibaar | 1.3 | |
1640 : | edgomez | 1.11 | printf( "\n ===== test MPEG4-quantize bug =====\n" ); |
1641 : | |||
1642 : | for(i=0; i<64; ++i) Src[i] = 2048*(i-32)/32; | ||
1643 : | Isibaar | 1.3 | |
1644 : | #if 1 | ||
1645 : | edgomez | 1.11 | for(qm=1; qm<=255; ++qm) |
1646 : | { | ||
1647 : | for(i=0; i<8*8; ++i) Quant[i] = qm; | ||
1648 : | set_inter_matrix( mpeg_quant_matrices, Quant ); | ||
1649 : | |||
1650 : | for(n=0, cpu = cpu_bug_list; cpu->name!=0; ++cpu, ++n) | ||
1651 : | { | ||
1652 : | uint16_t s; | ||
1653 : | |||
1654 : | if (!init_cpu(cpu)) | ||
1655 : | continue; | ||
1656 : | |||
1657 : | for(q=1; q<=max_Q; ++q) { | ||
1658 : | emms(); | ||
1659 : | quant_mpeg_inter( Dst, Src, q, mpeg_quant_matrices ); | ||
1660 : | emms(); | ||
1661 : | for(s=0, i=0; i<64; ++i) s+=((uint16_t)Dst[i])^i; | ||
1662 : | Crcs_Inter[n][q] = s; | ||
1663 : | } | ||
1664 : | } | ||
1665 : | |||
1666 : | for(q=1; q<=max_Q; ++q) | ||
1667 : | for(i=0; i<n-1; ++i) | ||
1668 : | if (Crcs_Inter[i][q]!=Crcs_Inter[i+1][q]) | ||
1669 : | printf( "Discrepancy Inter: qm=%d, q=%d -> %d/%d !\n", | ||
1670 : | qm, q, Crcs_Inter[i][q], Crcs_Inter[i+1][q]); | ||
1671 : | } | ||
1672 : | Isibaar | 1.3 | #endif |
1673 : | |||
1674 : | #if 1 | ||
1675 : | edgomez | 1.11 | for(qm=1; qm<=255; ++qm) |
1676 : | { | ||
1677 : | for(i=0; i<8*8; ++i) Quant[i] = qm; | ||
1678 : | set_intra_matrix( mpeg_quant_matrices, Quant ); | ||
1679 : | |||
1680 : | for(n=0, cpu = cpu_bug_list; cpu->name!=0; ++cpu, ++n) | ||
1681 : | { | ||
1682 : | uint16_t s; | ||
1683 : | |||
1684 : | if (!init_cpu(cpu)) | ||
1685 : | continue; | ||
1686 : | |||
1687 : | for(q=1; q<=max_Q; ++q) { | ||
1688 : | emms(); | ||
1689 : | quant_mpeg_intra( Dst, Src, q, q, mpeg_quant_matrices); | ||
1690 : | emms(); | ||
1691 : | for(s=0, i=0; i<64; ++i) s+=((uint16_t)Dst[i])^i; | ||
1692 : | Crcs_Intra[n][q] = s; | ||
1693 : | } | ||
1694 : | } | ||
1695 : | |||
1696 : | for(q=1; q<=max_Q; ++q) | ||
1697 : | for(i=0; i<n-1; ++i) | ||
1698 : | if (Crcs_Intra[i][q]!=Crcs_Intra[i+1][q]) | ||
1699 : | printf( "Discrepancy Intra: qm=%d, q=%d -> %d/%d!\n", | ||
1700 : | qm, q, Crcs_Inter[i][q], Crcs_Inter[i+1][q]); | ||
1701 : | } | ||
1702 : | Isibaar | 1.3 | #endif |
1703 : | } | ||
1704 : | Skal | 1.18 | /*********************************************************************/ |
1705 : | |||
1706 : | static uint32_t __inline log2bin_v1(uint32_t value) | ||
1707 : | { | ||
1708 : | int n = 0; | ||
1709 : | while (value) { | ||
1710 : | value >>= 1; | ||
1711 : | n++; | ||
1712 : | } | ||
1713 : | return n; | ||
1714 : | } | ||
1715 : | |||
1716 : | Skal | 1.19 | static const uint8_t log2_tab_16[16] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4 }; |
1717 : | Skal | 1.18 | |
1718 : | static uint32_t __inline log2bin_v2(uint32_t value) | ||
1719 : | { | ||
1720 : | int n = 0; | ||
1721 : | if (value & 0xffff0000) { | ||
1722 : | value >>= 16; | ||
1723 : | n += 16; | ||
1724 : | } | ||
1725 : | if (value & 0xff00) { | ||
1726 : | value >>= 8; | ||
1727 : | n += 8; | ||
1728 : | } | ||
1729 : | if (value & 0xf0) { | ||
1730 : | value >>= 4; | ||
1731 : | n += 4; | ||
1732 : | } | ||
1733 : | return n + log2_tab_16[value]; | ||
1734 : | } | ||
1735 : | |||
1736 : | void test_log2bin() | ||
1737 : | { | ||
1738 : | const int nb_tests = 3000*speed_ref; | ||
1739 : | int n, crc1=0, crc2=0; | ||
1740 : | uint32_t s, s0; | ||
1741 : | double t1, t2; | ||
1742 : | |||
1743 : | t1 = gettime_usec(); | ||
1744 : | s0 = (int)(t1*31.241); | ||
1745 : | for(s=s0, n=0; n<nb_tests; ++n, s=(s*12363+31)&0x7fffffff) | ||
1746 : | crc1 += log2bin_v1(s); | ||
1747 : | t1 = (gettime_usec()-t1) / nb_tests; | ||
1748 : | |||
1749 : | t2 = gettime_usec(); | ||
1750 : | for(s=s0, n=0; n<nb_tests; ++n, s=(s*12363+31)&0x7fffffff) | ||
1751 : | crc2 += log2bin_v2(s); | ||
1752 : | t2 = (gettime_usec() - t2) / nb_tests; | ||
1753 : | Skal | 1.19 | |
1754 : | Skal | 1.18 | printf( "log2bin_v1: %.3f sec crc=%d\n", t1, crc1 ); |
1755 : | printf( "log2bin_v2: %.3f sec crc=%d\n", t2, crc2 ); | ||
1756 : | if (crc1!=crc2) printf( " CRC ERROR !\n" ); | ||
1757 : | } | ||
1758 : | Isibaar | 1.1 | |
1759 : | Skal | 1.19 | /*********************************************************************/ |
1760 : | |||
1761 : | static void __inline old_gcd(int *num, int *den) | ||
1762 : | { | ||
1763 : | int i = *num; | ||
1764 : | while (i > 1) { | ||
1765 : | if (*num % i == 0 && *den % i == 0) { | ||
1766 : | *num /= i; | ||
1767 : | *den /= i; | ||
1768 : | i = *num; | ||
1769 : | continue; | ||
1770 : | } | ||
1771 : | i--; | ||
1772 : | } | ||
1773 : | } | ||
1774 : | |||
1775 : | static uint32_t gcd(int num, int den) | ||
1776 : | { | ||
1777 : | int tmp; | ||
1778 : | while( (tmp=num%den) ) { num = den; den = tmp; } | ||
1779 : | return den; | ||
1780 : | } | ||
1781 : | static void __inline new_gcd(int *num, int *den) | ||
1782 : | { | ||
1783 : | const int div = gcd(*num, *den); | ||
1784 : | if (num) { | ||
1785 : | *num /= div; | ||
1786 : | *den /= div; | ||
1787 : | } | ||
1788 : | } | ||
1789 : | |||
1790 : | void test_gcd() | ||
1791 : | { | ||
1792 : | const int nb_tests = 10*speed_ref; | ||
1793 : | int i; | ||
1794 : | uint32_t crc1=0, crc2=0; | ||
1795 : | uint32_t n0, n, d0, d; | ||
1796 : | double t1, t2; | ||
1797 : | |||
1798 : | t1 = gettime_usec(); | ||
1799 : | n0 = 0xfffff & (int)(t1*31.241); | ||
1800 : | d0 = 0xfffff & (int)( ((n0*4123)%17) | 1 ); | ||
1801 : | for(n=n0, d=d0, i=0; i<nb_tests; ++i) { | ||
1802 : | old_gcd(&n, &d); | ||
1803 : | crc1 = (((crc1>>4)^d) + ((crc1<<2)^n) ) & 0xffffff; | ||
1804 : | n = d; | ||
1805 : | d = (d*12363+31) & 0xffff; | ||
1806 : | d |= !d; | ||
1807 : | } | ||
1808 : | t1 = (gettime_usec()-t1) / nb_tests; | ||
1809 : | |||
1810 : | t2 = gettime_usec(); | ||
1811 : | for(n=n0, d=d0, i=0; i<nb_tests; ++i) { | ||
1812 : | new_gcd(&n, &d); | ||
1813 : | crc2 = (((crc2>>4)^d) + ((crc2<<2)^n) ) & 0xffffff; | ||
1814 : | n = d; | ||
1815 : | d = (d*12363+31) & 0xffff; | ||
1816 : | d |= !d; | ||
1817 : | } | ||
1818 : | t2 = (gettime_usec() - t2) / nb_tests; | ||
1819 : | |||
1820 : | printf( "old_gcd: %.3f sec crc=%d\n", t1, crc1 ); | ||
1821 : | printf( "new_gcd: %.3f sec crc=%d\n", t2, crc2 ); | ||
1822 : | if (crc1!=crc2) printf( " CRC ERROR !\n" ); | ||
1823 : | } | ||
1824 : | |||
1825 : | Isibaar | 1.1 | /********************************************************************* |
1826 : | Skal | 1.26 | * test compiler |
1827 : | *********************************************************************/ | ||
1828 : | |||
1829 : | void test_compiler() { | ||
1830 : | int nb_err = 0; | ||
1831 : | int32_t v; | ||
1832 : | if (sizeof(uint16_t)<2) { | ||
1833 : | Skal | 1.27 | printf( "ERROR: sizeof(uint16_t)<2 !!\n" ); |
1834 : | Skal | 1.26 | nb_err++; |
1835 : | } | ||
1836 : | if (sizeof(int16_t)<2) { | ||
1837 : | Skal | 1.27 | printf( "ERROR: sizeof(int16_t)<2 !!\n" ); |
1838 : | Skal | 1.26 | nb_err++; |
1839 : | } | ||
1840 : | if (sizeof(uint8_t)!=1) { | ||
1841 : | Skal | 1.27 | printf( "ERROR: sizeof(uint8_t)!=1 !!\n" ); |
1842 : | Skal | 1.26 | nb_err++; |
1843 : | } | ||
1844 : | if (sizeof(int8_t)!=1) { | ||
1845 : | Skal | 1.27 | printf( "ERROR: sizeof(int8_t)!=1 !!\n" ); |
1846 : | Skal | 1.26 | nb_err++; |
1847 : | } | ||
1848 : | if (sizeof(uint32_t)<4) { | ||
1849 : | Skal | 1.27 | printf( "ERROR: sizeof(uint32_t)<4 !!\n" ); |
1850 : | Skal | 1.26 | nb_err++; |
1851 : | } | ||
1852 : | if (sizeof(int32_t)<4) { | ||
1853 : | Skal | 1.27 | printf( "ERROR: sizeof(int32_t)<4 !!\n" ); |
1854 : | Skal | 1.26 | nb_err++; |
1855 : | } | ||
1856 : | /* yes, i know, this test is silly. But better be safe than sorry. :) */ | ||
1857 : | for(v=1000; v>=0; v--) { | ||
1858 : | if ( (v>>2) != v/4) | ||
1859 : | nb_err++; | ||
1860 : | } | ||
1861 : | for(v=-1000; v!=-1; v++) { | ||
1862 : | if ( (v>>2) != (v/4)-!!(v%4)) | ||
1863 : | nb_err++; | ||
1864 : | } | ||
1865 : | if (nb_err!=0) { | ||
1866 : | printf( "ERROR! please post your platform/compiler specs to xvid-devel@xvid.org !\n" ); | ||
1867 : | } | ||
1868 : | } | ||
1869 : | |||
1870 : | /********************************************************************* | ||
1871 : | Isibaar | 1.27.2.1 | * test bitstream functions |
1872 : | *********************************************************************/ | ||
1873 : | |||
1874 : | #define BIT_BUF_SIZE 2000 | ||
1875 : | |||
1876 : | static void test_bits() | ||
1877 : | { | ||
1878 : | const int nb_tests = 50*speed_ref; | ||
1879 : | int tst; | ||
1880 : | uint32_t Crc; | ||
1881 : | uint8_t Buf[BIT_BUF_SIZE]; | ||
1882 : | uint32_t Extracted[BIT_BUF_SIZE*8]; /* worst case: bits read 1 by 1 */ | ||
1883 : | int Lens[BIT_BUF_SIZE*8]; | ||
1884 : | double t1; | ||
1885 : | |||
1886 : | |||
1887 : | printf( "\n === test bitstream ===\n" ); | ||
1888 : | ieee_reseed(1); | ||
1889 : | Crc = 0; | ||
1890 : | |||
1891 : | t1 = gettime_usec(); | ||
1892 : | for(tst=0; tst<nb_tests; ++tst) { | ||
1893 : | Bitstream bs; | ||
1894 : | int m, m2, l, l2; | ||
1895 : | |||
1896 : | for(l=0; l<BIT_BUF_SIZE; ++l) | ||
1897 : | Buf[l] = (uint8_t)ieee_rand(0,255); | ||
1898 : | |||
1899 : | l = BIT_BUF_SIZE - ieee_rand(1,BIT_BUF_SIZE/10); | ||
1900 : | BitstreamInit(&bs, (void*)(Buf+BIT_BUF_SIZE-l), l); | ||
1901 : | |||
1902 : | |||
1903 : | BitstreamReset(&bs); | ||
1904 : | for(l2=l*8, m=0; l2>0; m++) { | ||
1905 : | const int b = ieee_rand(1,32); | ||
1906 : | Lens[m] = b; | ||
1907 : | l2 -= b; | ||
1908 : | if (l2<0) break; | ||
1909 : | Extracted[m] = BitstreamShowBits(&bs, b); | ||
1910 : | BitstreamSkip(&bs, b); | ||
1911 : | // printf( "<= %d: %d 0x%x\n", m, b, Extracted[m]); | ||
1912 : | } | ||
1913 : | |||
1914 : | BitstreamReset(&bs); | ||
1915 : | for(m2=0; m2<m; ++m2) { | ||
1916 : | const int b = Lens[m2]; | ||
1917 : | const uint32_t v = BitstreamGetBits(&bs, b); | ||
1918 : | Crc |= (v!=Extracted[m2]); | ||
1919 : | // printf( "=> %d: %d 0x%x %c\n", m2, b, v, " *"[Crc]); | ||
1920 : | } | ||
1921 : | } | ||
1922 : | t1 = (gettime_usec() - t1) / nb_tests; | ||
1923 : | printf(" test_bits %.3f usec %s\n", t1, (Crc!=0)?"| ERROR": "" ); | ||
1924 : | } | ||
1925 : | |||
1926 : | /********************************************************************* | ||
1927 : | Isibaar | 1.1 | * main |
1928 : | *********************************************************************/ | ||
1929 : | |||
1930 : | Skal | 1.17 | static void arg_missing(const char *opt) |
1931 : | Isibaar | 1.1 | { |
1932 : | Skal | 1.17 | printf( "missing argument after option '%s'\n", opt); |
1933 : | exit(-1); | ||
1934 : | } | ||
1935 : | |||
1936 : | int main(int argc, const char *argv[]) | ||
1937 : | { | ||
1938 : | int c, what = 0; | ||
1939 : | int width, height; | ||
1940 : | uint32_t chksum = 0; | ||
1941 : | const char * test_bitstream = 0; | ||
1942 : | |||
1943 : | cpu_mask = 0; // default => will use autodectect | ||
1944 : | for(c=1; c<argc; ++c) | ||
1945 : | { | ||
1946 : | if (!strcmp(argv[c], "-v")) verbose++; | ||
1947 : | else if (!strcmp(argv[c], "-c")) cpu_mask = 0 /* PLAIN_C */ | XVID_CPU_FORCE; | ||
1948 : | else if (!strcmp(argv[c], "-mmx")) cpu_mask = XVID_CPU_MMX | XVID_CPU_FORCE; | ||
1949 : | else if (!strcmp(argv[c], "-mmxext")) cpu_mask = XVID_CPU_MMXEXT | XVID_CPU_MMX | XVID_CPU_FORCE; | ||
1950 : | Skal | 1.20 | else if (!strcmp(argv[c], "-sse2")) cpu_mask = XVID_CPU_SSE2 | XVID_CPU_MMXEXT | XVID_CPU_MMX | XVID_CPU_FORCE; |
1951 : | Skal | 1.17 | else if (!strcmp(argv[c], "-3dnow")) cpu_mask = XVID_CPU_3DNOW | XVID_CPU_FORCE; |
1952 : | else if (!strcmp(argv[c], "-3dnowe")) cpu_mask = XVID_CPU_3DNOW | XVID_CPU_3DNOWEXT | XVID_CPU_FORCE; | ||
1953 : | else if (!strcmp(argv[c], "-altivec")) cpu_mask = XVID_CPU_ALTIVEC | XVID_CPU_FORCE; | ||
1954 : | else if (!strcmp(argv[c], "-spd")) { | ||
1955 : | if (++c==argc) arg_missing( argv[argc-1] ); | ||
1956 : | speed_ref = atoi(argv[c]); | ||
1957 : | } | ||
1958 : | else if (argv[c][0]!='-') { | ||
1959 : | what = atoi(argv[c]); | ||
1960 : | if (what==9) { | ||
1961 : | if (c+4>argc) { | ||
1962 : | printf("usage: %s %d bitstream width height (checksum)\n", argv[0], what); | ||
1963 : | exit(-1); | ||
1964 : | } | ||
1965 : | test_bitstream = argv[++c]; | ||
1966 : | width = atoi(argv[++c]); | ||
1967 : | height = atoi(argv[++c]); | ||
1968 : | if (c+1<argc && argv[c+1][0]!='-') { | ||
1969 : | if (sscanf(argv[c+1], "0x%x", &chksum)!=1) { | ||
1970 : | printf( "can't read checksum value.\n" ); | ||
1971 : | exit(-1); | ||
1972 : | } | ||
1973 : | else c++; | ||
1974 : | } | ||
1975 : | // printf( "[%s] %dx%d (0x%.8x)\n", test_bitstream, width, height, chksum); | ||
1976 : | } | ||
1977 : | } | ||
1978 : | else { | ||
1979 : | printf( "unrecognized option '%s'\n", argv[c]); | ||
1980 : | exit(-1); | ||
1981 : | } | ||
1982 : | } | ||
1983 : | |||
1984 : | |||
1985 : | edgomez | 1.11 | if (what==0 || what==1) test_dct(); |
1986 : | if (what==0 || what==2) test_mb(); | ||
1987 : | if (what==0 || what==3) test_sad(); | ||
1988 : | if (what==0 || what==4) test_transfer(); | ||
1989 : | if (what==0 || what==5) test_quant(); | ||
1990 : | if (what==0 || what==6) test_cbp(); | ||
1991 : | edgomez | 1.14 | if (what==0 || what==10) test_sse(); |
1992 : | Skal | 1.18 | if (what==0 || what==11) test_log2bin(); |
1993 : | Skal | 1.19 | if (what==0 || what==12) test_gcd(); |
1994 : | Skal | 1.26 | if (what==0 || what==13) test_compiler(); |
1995 : | Isibaar | 1.27.2.1 | if (what==0 || what==17) test_bits(); |
1996 : | Skal | 1.18 | |
1997 : | edgomez | 1.11 | |
1998 : | if (what==7) { | ||
1999 : | test_IEEE1180_compliance(-256, 255, 1); | ||
2000 : | test_IEEE1180_compliance(-256, 255,-1); | ||
2001 : | test_IEEE1180_compliance( -5, 5, 1); | ||
2002 : | test_IEEE1180_compliance( -5, 5,-1); | ||
2003 : | test_IEEE1180_compliance(-300, 300, 1); | ||
2004 : | test_IEEE1180_compliance(-300, 300,-1); | ||
2005 : | } | ||
2006 : | if (what==8) test_dct_saturation(-256, 255); | ||
2007 : | |||
2008 : | Skal | 1.17 | if (test_bitstream) |
2009 : | test_dec(test_bitstream, width, height, chksum); | ||
2010 : | edgomez | 1.11 | if (what==-1) { |
2011 : | test_dct_precision_diffs(); | ||
2012 : | test_bugs1(); | ||
2013 : | } | ||
2014 : | if (what==-2) | ||
2015 : | test_quant_bug(); | ||
2016 : | |||
2017 : | edgomez | 1.14 | if ((what >= 0 && what <= 6) || what == 10) { |
2018 : | edgomez | 1.11 | printf("\n\n" |
2019 : | "NB: If a function isn't optimised for a specific set of intructions,\n" | ||
2020 : | " a C function is used instead. So don't panic if some functions\n" | ||
2021 : | " may appear to be slow.\n"); | ||
2022 : | } | ||
2023 : | |||
2024 : | #ifdef ARCH_IS_IA32 | ||
2025 : | if (what == 0 || what == 5) { | ||
2026 : | printf("\n" | ||
2027 : | "NB: MMX mpeg4 quantization is known to have very small errors (+/-1 magnitude)\n" | ||
2028 : | " for 1 or 2 coefficients a block. This is mainly caused by the fact the unit\n" | ||
2029 : | " test goes far behind the usual limits of real encoding. Please do not report\n" | ||
2030 : | " this error to the developers.\n"); | ||
2031 : | } | ||
2032 : | Isibaar | 1.3 | #endif |
2033 : | |||
2034 : | edgomez | 1.11 | return 0; |
2035 : | Isibaar | 1.1 | } |
2036 : | |||
2037 : | Skal | 1.19 | /*********************************************************************/ |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |