Parent Directory | Revision Log
Revision 1.5 - (view) (download)
1 : | ia64p | 1.5 | //******************************************************************************* |
2 : | //* * | ||
3 : | //* functions quant_inter and dequant_inter have been softwarepipelined * | ||
4 : | //* use was made of the pmpyshr2 instruction * | ||
5 : | //* * | ||
6 : | //* by Christian Engel and Hans-Joachim Daniels * | ||
7 : | //* christian.engel@ira.uka.de hans-joachim.daniels@ira.uka.de * | ||
8 : | //* * | ||
9 : | //* This was made for the ia64 DivX laboratory (yes, it was really called * | ||
10 : | //* this way, originally OpenDivX was intendet, but died shortly before our * | ||
11 : | //* work started (you will probably already know ...)) * | ||
12 : | //* at the Universitat Karlsruhe (TH) held between April and July 2002 * | ||
13 : | //* http://www.info.uni-karlsruhe.de/~rubino/ia64p/ * | ||
14 : | //* * | ||
15 : | //******************************************************************************* | ||
16 : | ia64p | 1.4 | .file "quant_h263_ia64.s" |
17 : | ia64p | 1.3 | .pred.safe_across_calls p1-p5,p16-p63 |
18 : | .section .rodata | ||
19 : | .align 4 | ||
20 : | .type multipliers#,@object | ||
21 : | .size multipliers#,128 | ||
22 : | multipliers: | ||
23 : | data4 0 | ||
24 : | data4 32769 | ||
25 : | data4 16385 | ||
26 : | data4 10923 | ||
27 : | data4 8193 | ||
28 : | data4 6554 | ||
29 : | data4 5462 | ||
30 : | data4 4682 | ||
31 : | data4 4097 | ||
32 : | data4 3641 | ||
33 : | data4 3277 | ||
34 : | data4 2979 | ||
35 : | data4 2731 | ||
36 : | data4 2521 | ||
37 : | data4 2341 | ||
38 : | data4 2185 | ||
39 : | data4 2049 | ||
40 : | data4 1928 | ||
41 : | data4 1821 | ||
42 : | data4 1725 | ||
43 : | data4 1639 | ||
44 : | data4 1561 | ||
45 : | data4 1490 | ||
46 : | data4 1425 | ||
47 : | data4 1366 | ||
48 : | data4 1311 | ||
49 : | data4 1261 | ||
50 : | data4 1214 | ||
51 : | data4 1171 | ||
52 : | data4 1130 | ||
53 : | data4 1093 | ||
54 : | data4 1058 | ||
55 : | .global __divdi3# | ||
56 : | .text | ||
57 : | .align 16 | ||
58 : | .global quant_intra_ia64# | ||
59 : | .proc quant_intra_ia64# | ||
60 : | quant_intra_ia64: | ||
61 : | ia64p | 1.4 | .prologue |
62 : | ia64p | 1.3 | .save ar.pfs, r38 |
63 : | alloc r38 = ar.pfs, 4, 3, 2, 0 | ||
64 : | adds r16 = -8, r12 | ||
65 : | .fframe 32 | ||
66 : | adds r12 = -32, r12 | ||
67 : | mov r17 = ar.lc | ||
68 : | addl r14 = @ltoff(multipliers#), gp | ||
69 : | ld2 r15 = [r33] | ||
70 : | ;; | ||
71 : | .savesp ar.lc, 24 | ||
72 : | st8 [r16] = r17, 8 | ||
73 : | ld8 r14 = [r14] | ||
74 : | sxt2 r15 = r15 | ||
75 : | ;; | ||
76 : | .save.f 0x1 | ||
77 : | stf.spill [r16] = f2 | ||
78 : | .save rp, r37 | ||
79 : | mov r37 = b0 | ||
80 : | .body | ||
81 : | dep.z r36 = r34, 1, 15 | ||
82 : | dep.z r16 = r34, 2, 32 | ||
83 : | cmp4.ge p6, p7 = 0, r15 | ||
84 : | ;; | ||
85 : | add r16 = r16, r14 | ||
86 : | ;; | ||
87 : | ld4 r16 = [r16] | ||
88 : | ;; | ||
89 : | setf.sig f2 = r16 | ||
90 : | (p6) br.cond.dptk .L8 | ||
91 : | extr r39 = r35, 1, 31 | ||
92 : | sxt4 r40 = r35 | ||
93 : | ;; | ||
94 : | add r39 = r39, r15 | ||
95 : | br .L21 | ||
96 : | ;; | ||
97 : | .L8: | ||
98 : | extr r39 = r35, 1, 31 | ||
99 : | sxt4 r40 = r35 | ||
100 : | ;; | ||
101 : | sub r39 = r15, r39 | ||
102 : | ;; | ||
103 : | .L21: | ||
104 : | sxt4 r39 = r39 | ||
105 : | br.call.sptk.many b0 = __divdi3# | ||
106 : | ;; | ||
107 : | addl r14 = 62, r0 | ||
108 : | st2 [r32] = r8 | ||
109 : | addl r19 = 1, r0 | ||
110 : | ;; | ||
111 : | mov ar.lc = r14 | ||
112 : | ;; | ||
113 : | .L20: | ||
114 : | dep.z r17 = r19, 1, 32 | ||
115 : | ;; | ||
116 : | add r15 = r17, r33 | ||
117 : | adds r19 = 1, r19 | ||
118 : | ;; | ||
119 : | ld2 r14 = [r15] | ||
120 : | ;; | ||
121 : | sxt2 r14 = r14 | ||
122 : | ;; | ||
123 : | mov r16 = r14 | ||
124 : | mov r18 = r14 | ||
125 : | ;; | ||
126 : | sub r15 = r0, r16 | ||
127 : | cmp4.le p8, p9 = r36, r16 | ||
128 : | cmp4.le p6, p7 = r0, r16 | ||
129 : | ;; | ||
130 : | sxt2 r14 = r15 | ||
131 : | (p6) br.cond.dptk .L14 | ||
132 : | ;; | ||
133 : | mov r16 = r14 | ||
134 : | add r18 = r17, r32 | ||
135 : | ;; | ||
136 : | setf.sig f6 = r16 | ||
137 : | cmp4.le p6, p7 = r36, r16 | ||
138 : | mov r15 = r18 | ||
139 : | ;; | ||
140 : | xma.l f6 = f6, f2, f0 | ||
141 : | (p7) st2 [r18] = r0 | ||
142 : | ;; | ||
143 : | getf.sig r14 = f6 | ||
144 : | ;; | ||
145 : | extr r14 = r14, 16, 16 | ||
146 : | ;; | ||
147 : | sub r14 = r0, r14 | ||
148 : | ;; | ||
149 : | (p6) st2 [r15] = r14 | ||
150 : | br .L12 | ||
151 : | .L14: | ||
152 : | ia64p | 1.4 | .pred.rel "mutex", p8, p9 |
153 : | ia64p | 1.3 | setf.sig f6 = r18 |
154 : | add r16 = r17, r32 | ||
155 : | ;; | ||
156 : | xma.l f6 = f6, f2, f0 | ||
157 : | mov r15 = r16 | ||
158 : | (p9) st2 [r16] = r0 | ||
159 : | ;; | ||
160 : | getf.sig r14 = f6 | ||
161 : | ;; | ||
162 : | extr r14 = r14, 16, 16 | ||
163 : | ;; | ||
164 : | (p8) st2 [r15] = r14 | ||
165 : | .L12: | ||
166 : | br.cloop.sptk.few .L20 | ||
167 : | adds r18 = 24, r12 | ||
168 : | ;; | ||
169 : | ld8 r19 = [r18], 8 | ||
170 : | mov ar.pfs = r38 | ||
171 : | mov b0 = r37 | ||
172 : | ;; | ||
173 : | mov ar.lc = r19 | ||
174 : | ldf.fill f2 = [r18] | ||
175 : | .restore sp | ||
176 : | adds r12 = 32, r12 | ||
177 : | br.ret.sptk.many b0 | ||
178 : | .endp quant_intra_ia64# | ||
179 : | .common quant_intra#,8,8 | ||
180 : | .common dequant_intra#,8,8 | ||
181 : | .align 16 | ||
182 : | .global dequant_intra_ia64# | ||
183 : | .proc dequant_intra_ia64# | ||
184 : | dequant_intra_ia64: | ||
185 : | .prologue | ||
186 : | ld2 r14 = [r33] | ||
187 : | andcm r15 = 1, r34 | ||
188 : | setf.sig f8 = r35 | ||
189 : | ;; | ||
190 : | sxt2 r14 = r14 | ||
191 : | sub r15 = r34, r15 | ||
192 : | addl r16 = -2048, r0 | ||
193 : | ;; | ||
194 : | setf.sig f6 = r14 | ||
195 : | setf.sig f7 = r15 | ||
196 : | shladd r34 = r34, 1, r0 | ||
197 : | ;; | ||
198 : | xma.l f8 = f6, f8, f0 | ||
199 : | .save ar.lc, r2 | ||
200 : | mov r2 = ar.lc | ||
201 : | ;; | ||
202 : | .body | ||
203 : | getf.sig r14 = f8 | ||
204 : | setf.sig f6 = r34 | ||
205 : | ;; | ||
206 : | sxt2 r15 = r14 | ||
207 : | st2 [r32] = r14 | ||
208 : | ;; | ||
209 : | cmp4.le p6, p7 = r16, r15 | ||
210 : | ;; | ||
211 : | (p7) st2 [r32] = r16 | ||
212 : | (p7) br.cond.dptk .L32 | ||
213 : | addl r14 = 2047, r0 | ||
214 : | ;; | ||
215 : | cmp4.ge p6, p7 = r14, r15 | ||
216 : | ;; | ||
217 : | (p7) st2 [r32] = r14 | ||
218 : | .L32: | ||
219 : | addl r14 = 62, r0 | ||
220 : | addl r19 = 1, r0 | ||
221 : | addl r22 = 2048, r0 | ||
222 : | addl r21 = -2048, r0 | ||
223 : | addl r20 = 2047, r0 | ||
224 : | ;; | ||
225 : | mov ar.lc = r14 | ||
226 : | ;; | ||
227 : | .L56: | ||
228 : | dep.z r16 = r19, 1, 32 | ||
229 : | ;; | ||
230 : | add r14 = r16, r33 | ||
231 : | add r17 = r16, r32 | ||
232 : | adds r19 = 1, r19 | ||
233 : | ;; | ||
234 : | ld2 r15 = [r14] | ||
235 : | ;; | ||
236 : | sxt2 r15 = r15 | ||
237 : | ;; | ||
238 : | cmp4.ne p6, p7 = 0, r15 | ||
239 : | cmp4.le p8, p9 = r0, r15 | ||
240 : | ;; | ||
241 : | (p7) st2 [r17] = r0 | ||
242 : | (p7) br.cond.dpnt .L36 | ||
243 : | add r18 = r16, r32 | ||
244 : | sub r17 = r0, r15 | ||
245 : | ;; | ||
246 : | mov r14 = r18 | ||
247 : | (p8) br.cond.dptk .L40 | ||
248 : | setf.sig f8 = r17 | ||
249 : | ;; | ||
250 : | xma.l f8 = f6, f8, f7 | ||
251 : | ;; | ||
252 : | getf.sig r15 = f8 | ||
253 : | ;; | ||
254 : | cmp4.lt p6, p7 = r22, r15 | ||
255 : | sub r16 = r0, r15 | ||
256 : | ;; | ||
257 : | (p7) st2 [r14] = r16 | ||
258 : | (p6) st2 [r14] = r21 | ||
259 : | br .L36 | ||
260 : | .L40: | ||
261 : | setf.sig f8 = r15 | ||
262 : | ;; | ||
263 : | xma.l f8 = f6, f8, f7 | ||
264 : | ;; | ||
265 : | getf.sig r15 = f8 | ||
266 : | ;; | ||
267 : | cmp4.le p6, p7 = r20, r15 | ||
268 : | ;; | ||
269 : | (p6) mov r14 = r20 | ||
270 : | (p7) mov r14 = r15 | ||
271 : | ;; | ||
272 : | st2 [r18] = r14 | ||
273 : | .L36: | ||
274 : | br.cloop.sptk.few .L56 | ||
275 : | ;; | ||
276 : | mov ar.lc = r2 | ||
277 : | br.ret.sptk.many b0 | ||
278 : | .endp dequant_intra_ia64# | ||
279 : | |||
280 : | |||
281 : | |||
282 : | //uint32_t quant_inter_ia64(int16_t *coeff, const int16_t *data, const uint32_t quant) | ||
283 : | |||
284 : | |||
285 : | |||
286 : | .common quant_inter#,8,8 | ||
287 : | .align 16 | ||
288 : | .global quant_inter_ia64# | ||
289 : | .proc quant_inter_ia64# | ||
290 : | quant_inter_ia64: | ||
291 : | |||
292 : | |||
293 : | ia64p | 1.4 | //******************************************************* |
294 : | //* * | ||
295 : | //* const uint32_t mult = multipliers[quant]; * | ||
296 : | //* const uint16_t quant_m_2 = quant << 1; * | ||
297 : | //* const uint16_t quant_d_2 = quant >> 1; * | ||
298 : | //* int sum = 0; * | ||
299 : | //* uint32_t i; * | ||
300 : | //* int16_t acLevel,acL; * | ||
301 : | //* * | ||
302 : | //*******************************************************/ | ||
303 : | ia64p | 1.3 | |
304 : | |||
305 : | |||
306 : | LL=3 // LL = load latency | ||
307 : | ia64p | 1.4 | //if LL is changed, you'll also have to change the .pred.rel... parts below! |
308 : | ia64p | 1.3 | .prologue |
309 : | addl r14 = @ltoff(multipliers#), gp | ||
310 : | dep.z r15 = r34, 2, 32 | ||
311 : | .save ar.lc, r2 | ||
312 : | mov r2 = ar.lc | ||
313 : | ;; | ||
314 : | .body | ||
315 : | alloc r9=ar.pfs,0,24,0,24 | ||
316 : | mov r17 = ar.ec | ||
317 : | mov r10 = pr | ||
318 : | ld8 r14 = [r14] | ||
319 : | extr.u r16 = r34, 1, 16 //r16 = quant_d_2 | ||
320 : | dep.z r20 = r34, 1, 15 //r20 = quant_m_2 | ||
321 : | ;; | ||
322 : | add r15 = r15, r14 | ||
323 : | mov r21 = r16 //r21 = quant_d_2 | ||
324 : | mov r8 = r0 //r8 = sum = 0 | ||
325 : | mov pr.rot = 0 //p16-p63 = 0 | ||
326 : | ;; | ||
327 : | ld4 r15 = [r15] | ||
328 : | addl r14 = 63, r0 | ||
329 : | mov pr.rot = 1 << 16 //p16=1 | ||
330 : | ;; | ||
331 : | mov ar.lc = r14 | ||
332 : | mov ar.ec = LL+9 | ||
333 : | mov r29 = r15 | ||
334 : | ;; | ||
335 : | mov r15 = r33 //r15 = data | ||
336 : | mov r18 = r32 //r18 = coeff | ||
337 : | ;; | ||
338 : | |||
339 : | |||
340 : | .rotr ac1[LL+3], ac2[8], ac3[2] | ||
341 : | .rotp p[LL+9], cmp1[8], cmp1neg[8],cmp2[5], cmp2neg[2] | ||
342 : | |||
343 : | |||
344 : | |||
345 : | ia64p | 1.4 | //******************************************************************************* |
346 : | //* * | ||
347 : | //* for (i = 0; i < 64; i++) { * | ||
348 : | //* acL=acLevel = data[i]; * | ||
349 : | //* acLevel = ((acLevel < 0)?-acLevel:acLevel) - quant_d_2; * | ||
350 : | //* if (acLevel < quant_m_2){ * | ||
351 : | //* acLevel = 0; * | ||
352 : | //* } * | ||
353 : | //* acLevel = (acLevel * mult) >> SCALEBITS; * | ||
354 : | //* sum += acLevel; * | ||
355 : | //* coeff[i] = ((acL < 0)?-acLevel:acLevel); * | ||
356 : | //* } * | ||
357 : | //* * | ||
358 : | //*******************************************************************************/ | ||
359 : | ia64p | 1.3 | |
360 : | |||
361 : | |||
362 : | .explicit | ||
363 : | .L58: | ||
364 : | ia64p | 1.4 | .pred.rel "clear", p29, p37 |
365 : | .pred.rel "mutex", p29, p37 | ||
366 : | ia64p | 1.5 | |
367 : | ia64p | 1.3 | //pipeline stage |
368 : | {.mmi | ||
369 : | ia64p | 1.5 | (p[0]) ld2 ac1[0] = [r15],2 // 0 acL=acLevel = data[i]; |
370 : | ia64p | 1.3 | (p[LL+1]) sub ac2[0] = r0, ac1[LL+1] // LL+1 ac2=-acLevel |
371 : | (p[LL]) sxt2 ac1[LL] = ac1[LL] // LL | ||
372 : | } | ||
373 : | ia64p | 1.5 | {.mmi |
374 : | (p[LL+1]) cmp4.le cmp1[0], cmp1neg[0] = r0, ac1[LL+1] // LL+1 cmp1 = (0<=acLevel) ; cmp1neg = !(0<=acLevel) | ||
375 : | (p[LL+4]) cmp4.le cmp2[0], cmp2neg[0] = r20, ac2[3] // LL+4 cmp2 = (quant_m_2 < acLevel) ; cmp2neg = !(quant_m_2 < acLevel) | ||
376 : | (cmp1[1]) sub ac2[1] = ac1[LL+2], r21 // LL+2 acLevel = acLevel - quant_d_2; | ||
377 : | ia64p | 1.3 | } |
378 : | {.mmi | ||
379 : | (cmp2neg[1]) mov ac2[4] = r0 // LL+5 if (acLevel < quant_m_2) acLevel=0; | ||
380 : | (cmp1neg[1]) sub ac2[1] = ac2[1], r21 // LL+2 acLevel = ac2 - quant_d_2; | ||
381 : | (p[LL+3]) sxt2 ac2[2] = ac2[2] // LL+3 | ||
382 : | ia64p | 1.5 | } |
383 : | ia64p | 1.3 | {.mmi |
384 : | ia64p | 1.4 | .pred.rel "mutex", p34, p42 |
385 : | ia64p | 1.3 | (cmp1[6]) mov ac3[0] = ac2[6] // LL+7 ac3 = acLevel; |
386 : | (cmp1neg[6]) sub ac3[0] = r0, ac2[6] // LL+7 ac3 = -acLevel; | ||
387 : | (p[LL+6]) pmpyshr2.u ac2[5] = r29, ac2[5], 16 // LL+6 acLevel = (acLevel * mult) >> SCALEBITS; | ||
388 : | } | ||
389 : | {.mib | ||
390 : | (p[LL+8]) st2 [r18] = ac3[1] , 2 // LL+8 coeff[i] = ac3; | ||
391 : | (cmp2[4]) add r8 = r8, ac2[7] // LL+8 sum += acLevel; | ||
392 : | br.ctop.sptk.few .L58 | ||
393 : | ;; | ||
394 : | } | ||
395 : | ia64p | 1.4 | |
396 : | .pred.rel "clear", p29, p37 | ||
397 : | ia64p | 1.3 | .default |
398 : | mov ar.ec = r17 | ||
399 : | ;; | ||
400 : | mov ar.lc = r2 | ||
401 : | mov pr = r10, -1 | ||
402 : | mov ar.pfs = r9 | ||
403 : | br.ret.sptk.many b0 | ||
404 : | .endp quant_inter_ia64# | ||
405 : | |||
406 : | |||
407 : | |||
408 : | |||
409 : | |||
410 : | |||
411 : | |||
412 : | // void dequant_inter_ia64(int16_t *data, const int16_t *coeff, const uint32_t quant) | ||
413 : | |||
414 : | .common dequant_inter#,8,8 | ||
415 : | .align 16 | ||
416 : | .global dequant_inter_ia64# | ||
417 : | .proc dequant_inter_ia64# | ||
418 : | dequant_inter_ia64: | ||
419 : | |||
420 : | //*********************************************************************** | ||
421 : | ia64p | 1.4 | //* * |
422 : | //* const uint16_t quant_m_2 = quant << 1; * | ||
423 : | //* const uint16_t quant_add = (quant & 1 ? quant : quant - 1); * | ||
424 : | //* uint32_t i; * | ||
425 : | //* * | ||
426 : | //*********************************************************************** | ||
427 : | ia64p | 1.3 | |
428 : | |||
429 : | |||
430 : | |||
431 : | .prologue | ||
432 : | andcm r14 = 1, r34 | ||
433 : | dep.z r29 = r34, 1, 15 | ||
434 : | alloc r9=ar.pfs,0,32,0,32 | ||
435 : | .save ar.lc, r2 | ||
436 : | mov r2 = ar.lc | ||
437 : | ;; | ||
438 : | .body | ||
439 : | sub r15 = r34, r14 // r15 = quant | ||
440 : | addl r14 = 63, r0 | ||
441 : | addl r21 = -2048, r0 | ||
442 : | addl r20 = 2047, r0 | ||
443 : | mov r16 = ar.ec | ||
444 : | mov r17 = pr | ||
445 : | ;; | ||
446 : | zxt2 r15 = r15 | ||
447 : | mov ar.lc = r14 | ||
448 : | mov pr.rot = 0 | ||
449 : | ;; | ||
450 : | adds r14 = 0, r33 // r14 = coeff | ||
451 : | mov r18 = r32 // r18 = data | ||
452 : | mov ar.ec = LL+10 | ||
453 : | mov pr.rot = 1 << 16 | ||
454 : | ;; | ||
455 : | |||
456 : | ia64p | 1.4 | //******************************************************************************* |
457 : | //* * | ||
458 : | //*for (i = 0; i < 64; i++) { * | ||
459 : | //* int16_t acLevel = coeff[i]; * | ||
460 : | //* * | ||
461 : | //* if (acLevel == 0) * | ||
462 : | //* { * | ||
463 : | //* data[i] = 0; * | ||
464 : | //* } * | ||
465 : | //* else if (acLevel < 0) * | ||
466 : | //* { * | ||
467 : | //* acLevel = acLevel * quant_m_2 - quant_add; * | ||
468 : | //* data[i] = (acLevel >= -2048 ? acLevel : -2048); * | ||
469 : | //* } * | ||
470 : | //* else // if (acLevel > 0) * | ||
471 : | //* { * | ||
472 : | //* acLevel = acLevel * quant_m_2 + quant_add; * | ||
473 : | //* data[i] = (acLevel <= 2047 ? acLevel : 2047); * | ||
474 : | //* } * | ||
475 : | //* } * | ||
476 : | //* * | ||
477 : | //*******************************************************************************/ | ||
478 : | ia64p | 1.3 | |
479 : | |||
480 : | |||
481 : | LL=2 // LL := load latency | ||
482 : | ia64p | 1.4 | //if LL is changed, you'll also have to change the .pred.rel... parts below! |
483 : | ia64p | 1.3 | |
484 : | |||
485 : | .rotr ac1[LL+10], x[5], y1[3], y2[3] | ||
486 : | .rotp p[LL+10] , cmp1neg[8], cmp2[5], cmp2neg[5],cmp3[2], cmp3neg[2] | ||
487 : | |||
488 : | .explicit | ||
489 : | //pipeline stage | ||
490 : | |||
491 : | .L60: | ||
492 : | ia64p | 1.4 | .pred.rel "clear", p36 |
493 : | .pred.rel "mutex", p47, p49 | ||
494 : | .pred.rel "mutex", p46, p48 | ||
495 : | .pred.rel "mutex", p40, p45 | ||
496 : | .pred.rel "mutex", p39, p44 | ||
497 : | .pred.rel "mutex", p38, p43 | ||
498 : | .pred.rel "mutex", p37, p42 | ||
499 : | .pred.rel "mutex", p36, p41 | ||
500 : | ia64p | 1.3 | {.mmi |
501 : | (p[0])ld2 ac1[0] = [r14] ,2 // 0 acLevel = coeff[i]; | ||
502 : | (p[LL+1])cmp4.ne p6, cmp1neg[0] = 0, ac1[LL+1] // LL+1 | ||
503 : | (p[LL])sxt2 ac1[LL] = ac1[LL] // LL | ||
504 : | |||
505 : | } | ||
506 : | {.mmi | ||
507 : | (p[LL+1])cmp4.le cmp2[0], cmp2neg[0] = r0, ac1[LL+1] // LL+1 | ||
508 : | (cmp2[1]) mov x[0] = r20 // LL+2 | ||
509 : | (p[LL+2])pmpyshr2.u ac1[LL+2] = r29, ac1[LL+2], 0 // LL+2 | ||
510 : | } | ||
511 : | {.mmi | ||
512 : | (cmp2neg[1]) mov x[0] = r21 // LL+2 | ||
513 : | (cmp2[2]) add ac1[LL+3] = ac1[LL+3], r15 // LL+3 | ||
514 : | (cmp2neg[2]) sub ac1[LL+3] = ac1[LL+3], r15 // LL+3 | ||
515 : | |||
516 : | } | ||
517 : | {.mmi | ||
518 : | (cmp2neg[4]) mov y1[0] = ac1[LL+5] // LL+5 | ||
519 : | (cmp2neg[4]) mov y2[0] = x[3] // LL+5 | ||
520 : | (p[LL+4])sxt2 ac1[LL+4] = ac1[LL+4] // LL+4 | ||
521 : | } | ||
522 : | {.mmi | ||
523 : | ia64p | 1.4 | (cmp2[4]) mov y1[0] = x[3] // LL+5 |
524 : | (cmp2[4]) mov y2[0] = ac1[LL+5] // LL+5 | ||
525 : | ia64p | 1.3 | (p[LL+6])cmp4.le cmp3[0], cmp3neg[0] = x[4], ac1[LL+6] // LL+6 |
526 : | } | ||
527 : | {.mmi | ||
528 : | (cmp3[1]) mov ac1[LL+7] = y1[2] // LL+7 | ||
529 : | (cmp3neg[1]) mov ac1[LL+7] = y2[2] // LL+7 | ||
530 : | (cmp1neg[7]) mov ac1[LL+8] = r0 // LL+8 | ||
531 : | } | ||
532 : | {.mbb | ||
533 : | (p[LL+9])st2 [r18] = ac1[LL+9] ,2 // LL+9 | ||
534 : | nop.b 0x0 | ||
535 : | br.ctop.sptk.few .L60 | ||
536 : | ;; | ||
537 : | } | ||
538 : | ia64p | 1.4 | .pred.rel "clear", p36 |
539 : | ia64p | 1.3 | .default |
540 : | mov ar.lc = r2 | ||
541 : | mov ar.pfs = r9 | ||
542 : | mov ar.ec = r16 | ||
543 : | mov pr = r17, -1 | ||
544 : | ;; | ||
545 : | mov ar.lc = r2 | ||
546 : | br.ret.sptk.many b0 | ||
547 : | .endp dequant_inter_ia64# | ||
548 : | .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |