--- quant_h263_ia64.s 2002/06/14 08:26:04 1.1 +++ quant_h263_ia64.s 2002/06/26 15:56:35 1.3 @@ -1,4 +1,4 @@ - .file "quant_h263.c" + .file "quant_h263.1.c" .pred.safe_across_calls p1-p5,p16-p63 .section .rodata .align 4 @@ -43,7 +43,7 @@ .global quant_intra_ia64# .proc quant_intra_ia64# quant_intra_ia64: - .prologue //12, 37 + .prologue 12, 37 .save ar.pfs, r38 alloc r38 = ar.pfs, 4, 3, 2, 0 adds r16 = -8, r12 @@ -72,187 +72,83 @@ ld4 r16 = [r16] ;; setf.sig f2 = r16 - (p6) br.cond.dptk .L4 + (p6) br.cond.dptk .L8 extr r39 = r35, 1, 31 sxt4 r40 = r35 ;; add r39 = r39, r15 - br .L38 + br .L21 ;; -.L4: +.L8: extr r39 = r35, 1, 31 sxt4 r40 = r35 ;; sub r39 = r15, r39 ;; -.L38: +.L21: sxt4 r39 = r39 br.call.sptk.many b0 = __divdi3# ;; - addl r16 = 2, r0 + addl r14 = 62, r0 st2 [r32] = r8 - addl r17 = 1, r0 - ;; - add r14 = r33, r16 - ;; - ld2 r15 = [r14] - ;; - sxt2 r15 = r15 - ;; - mov r14 = r15 - ;; - cmp4.le p6, p7 = r0, r14 - (p6) br.cond.dptk .L21 - sub r14 = r0, r14 - ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r36, r14 - ;; - (p7) add r14 = r32, r16 - (p6) add r15 = r32, r16 - (p6) setf.sig f6 = r14 - ;; - (p7) st2 [r14] = r0 - (p6) xma.l f6 = f6, f2, f0 - ;; - (p6) getf.sig r14 = f6 - ;; - (p6) extr r14 = r14, 16, 16 - ;; - (p6) sub r14 = r0, r14 - br .L39 - ;; -.L21: - cmp4.le p6, p7 = r36, r14 - ;; - (p7) add r14 = r32, r16 - (p6) setf.sig f6 = r15 - ;; - (p7) st2 [r14] = r0 - (p6) xma.l f6 = f6, f2, f0 - (p6) add r15 = r32, r16 - ;; - (p6) getf.sig r14 = f6 - ;; - (p6) extr r14 = r14, 16, 16 -.L39: - //.pred.rel.mutex p6, p7 - ;; - (p6) st2 [r15] = r14 - adds r17 = 1, r17 - ;; - cmp4.geu p6, p7 = 63, r17 - (p7) br.cond.dptk .L16 - addl r14 = 30, r0 + addl r19 = 1, r0 ;; mov ar.lc = r14 ;; -.L37: - dep.z r16 = r17, 1, 32 - ;; - add r14 = r16, r33 - ;; - ld2 r15 = [r14] - ;; - sxt2 r15 = r15 - ;; - mov r14 = r15 +.L20: + dep.z r17 = r19, 1, 32 ;; - cmp4.le p6, p7 = r0, r14 - (p6) br.cond.dptk .L27 - sub r14 = r0, r14 - ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r36, r14 - ;; - (p7) add r14 = r16, r32 - (p6) add r15 = r16, r32 - (p6) setf.sig f6 = r14 - ;; - (p7) st2 [r14] = r0 - (p6) xma.l f6 = f6, f2, f0 - ;; - (p6) getf.sig r14 = f6 - ;; - (p6) extr r14 = r14, 16, 16 - ;; - (p6) sub r14 = r0, r14 - br .L40 - ;; -.L27: - cmp4.le p6, p7 = r36, r14 - ;; - (p7) add r14 = r16, r32 - (p6) setf.sig f6 = r15 - ;; - (p7) st2 [r14] = r0 - (p6) xma.l f6 = f6, f2, f0 - (p6) add r15 = r16, r32 - ;; - (p6) getf.sig r14 = f6 - ;; - (p6) extr r14 = r14, 16, 16 -.L40: - //.pred.rel.mutex p6, p7 - ;; - (p6) st2 [r15] = r14 - adds r14 = 1, r17 - ;; - dep.z r16 = r14, 1, 32 - ;; - add r15 = r16, r33 + add r15 = r17, r33 + adds r19 = 1, r19 ;; ld2 r14 = [r15] ;; sxt2 r14 = r14 ;; - mov r15 = r14 - ;; - cmp4.le p6, p7 = r0, r15 - (p6) br.cond.dptk .L33 - sub r14 = r0, r15 - ;; - sxt2 r14 = r14 - ;; - mov r15 = r14 - ;; - cmp4.le p6, p7 = r36, r15 - ;; - (p7) add r14 = r16, r32 - (p6) setf.sig f6 = r15 + mov r16 = r14 + mov r18 = r14 ;; - (p7) st2 [r14] = r0 - (p6) xma.l f6 = f6, f2, f0 - (p6) add r15 = r16, r32 + sub r15 = r0, r16 + cmp4.le p8, p9 = r36, r16 + cmp4.le p6, p7 = r0, r16 ;; - (p6) getf.sig r14 = f6 + sxt2 r14 = r15 + (p6) br.cond.dptk .L14 ;; - (p6) extr r14 = r14, 16, 16 + mov r16 = r14 + add r18 = r17, r32 ;; - (p6) sub r14 = r0, r14 - br .L41 -.L33: - cmp4.le p6, p7 = r36, r15 + setf.sig f6 = r16 + cmp4.le p6, p7 = r36, r16 + mov r15 = r18 ;; - (p7) add r14 = r16, r32 - (p6) add r15 = r16, r32 - (p6) setf.sig f6 = r14 + xma.l f6 = f6, f2, f0 + (p7) st2 [r18] = r0 ;; - (p7) st2 [r14] = r0 - (p6) xma.l f6 = f6, f2, f0 + getf.sig r14 = f6 ;; - (p6) getf.sig r14 = f6 + extr r14 = r14, 16, 16 ;; - (p6) extr r14 = r14, 16, 16 -.L41: - //.pred.rel.mutex p6, p7 + sub r14 = r0, r14 ;; (p6) st2 [r15] = r14 - adds r17 = 2, r17 - br.cloop.sptk.few .L37 -.L16: + br .L12 +.L14: + .pred.rel.mutex p8, p9 + setf.sig f6 = r18 + add r16 = r17, r32 + ;; + xma.l f6 = f6, f2, f0 + mov r15 = r16 + (p9) st2 [r16] = r0 + ;; + getf.sig r14 = f6 + ;; + extr r14 = r14, 16, 16 + ;; + (p8) st2 [r15] = r14 +.L12: + br.cloop.sptk.few .L20 adds r18 = 24, r12 ;; ld8 r19 = [r18], 8 @@ -265,153 +161,6 @@ adds r12 = 32, r12 br.ret.sptk.many b0 .endp quant_intra_ia64# - .align 16 - .global quant_inter_ia64# - .proc quant_inter_ia64# -quant_inter_ia64: - .prologue - addl r14 = @ltoff(multipliers#), gp - dep.z r15 = r34, 2, 32 - .save ar.lc, r2 - mov r2 = ar.lc - ;; - .body - ld8 r14 = [r14] - extr.u r16 = r34, 1, 16 - dep.z r17 = r34, 1, 15 - ;; - add r15 = r15, r14 - mov r18 = r16 - mov r8 = r0 - ;; - ld4 r15 = [r15] - addl r14 = 31, r0 - mov r19 = r0 - ;; - setf.sig f6 = r15 - mov ar.lc = r14 - ;; -.L65: - dep.z r16 = r19, 1, 32 - ;; - add r14 = r16, r33 - ;; - ld2 r15 = [r14] - ;; - sxt2 r15 = r15 - ;; - mov r14 = r15 - ;; - cmp4.le p6, p7 = r0, r14 - (p6) br.cond.dptk .L55 - sub r14 = r0, r14 - ;; - sub r14 = r14, r18 - ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r17, r14 - ;; - (p7) add r14 = r16, r32 - (p6) setf.sig f7 = r14 - ;; - (p7) st2 [r14] = r0 - (p6) add r16 = r16, r32 - (p6) xma.l f7 = f7, f6, f0 - ;; - (p6) getf.sig r14 = f7 - ;; - (p6) extr r14 = r14, 16, 16 - ;; - (p6) sub r15 = r0, r14 - (p6) add r8 = r8, r14 - ;; - (p6) st2 [r16] = r15 - br .L53 -.L55: - sub r14 = r14, r18 - ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r17, r14 - ;; - (p7) add r14 = r16, r32 - (p6) add r15 = r16, r32 - (p6) setf.sig f7 = r14 - ;; - (p7) st2 [r14] = r0 - (p6) xma.l f7 = f7, f6, f0 - ;; - (p6) getf.sig r14 = f7 - ;; - (p6) extr r14 = r14, 16, 16 - ;; - (p6) st2 [r15] = r14 - (p6) add r8 = r8, r14 -.L53: - adds r14 = 1, r19 - ;; - dep.z r16 = r14, 1, 32 - ;; - add r15 = r16, r33 - ;; - ld2 r14 = [r15] - ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r0, r14 - (p6) br.cond.dptk .L61 - sub r14 = r0, r14 - ;; - sub r14 = r14, r18 - ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r17, r14 - ;; - (p7) add r14 = r16, r32 - (p6) setf.sig f7 = r14 - ;; - (p7) st2 [r14] = r0 - (p6) add r16 = r16, r32 - (p6) xma.l f7 = f7, f6, f0 - ;; - (p6) getf.sig r14 = f7 - ;; - (p6) extr r14 = r14, 16, 16 - ;; - (p6) sub r15 = r0, r14 - (p6) add r8 = r8, r14 - ;; - (p6) st2 [r16] = r15 - br .L59 -.L61: - sub r14 = r14, r18 - ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r17, r14 - ;; - (p7) add r14 = r16, r32 - (p6) add r15 = r16, r32 - (p6) setf.sig f7 = r14 - ;; - (p7) st2 [r14] = r0 - (p6) xma.l f7 = f7, f6, f0 - ;; - (p6) getf.sig r14 = f7 - ;; - (p6) extr r14 = r14, 16, 16 - ;; - (p6) st2 [r15] = r14 - (p6) add r8 = r8, r14 -.L59: - adds r19 = 2, r19 - br.cloop.sptk.few .L65 - ;; - mov ar.lc = r2 - br.ret.sptk.many b0 - .endp quant_inter_ia64# .common quant_intra#,8,8 .common dequant_intra#,8,8 .align 16 @@ -445,307 +194,322 @@ cmp4.le p6, p7 = r16, r15 ;; (p7) st2 [r32] = r16 - (p7) br.cond.dptk .L68 + (p7) br.cond.dptk .L32 addl r14 = 2047, r0 ;; cmp4.ge p6, p7 = r14, r15 ;; (p7) st2 [r32] = r14 -.L68: - addl r14 = 20, r0 +.L32: + addl r14 = 62, r0 addl r19 = 1, r0 - addl r21 = 2048, r0 - addl r20 = -2048, r0 - addl r18 = 2047, r0 + addl r22 = 2048, r0 + addl r21 = -2048, r0 + addl r20 = 2047, r0 ;; mov ar.lc = r14 ;; -.L110: +.L56: dep.z r16 = r19, 1, 32 ;; add r14 = r16, r33 + add r17 = r16, r32 + adds r19 = 1, r19 ;; ld2 r15 = [r14] ;; sxt2 r15 = r15 ;; cmp4.ne p6, p7 = 0, r15 + cmp4.le p8, p9 = r0, r15 ;; - (p7) add r14 = r16, r32 - ;; - (p7) st2 [r14] = r0 - (p7) br.cond.dpnt .L92 - cmp4.le p6, p7 = r0, r15 - (p6) br.cond.dptk .L95 - sub r14 = r0, r15 - add r17 = r16, r32 - ;; - setf.sig f8 = r14 + (p7) st2 [r17] = r0 + (p7) br.cond.dpnt .L36 + add r18 = r16, r32 + sub r17 = r0, r15 + ;; + mov r14 = r18 + (p8) br.cond.dptk .L40 + setf.sig f8 = r17 ;; xma.l f8 = f6, f8, f7 ;; getf.sig r15 = f8 ;; - cmp4.lt p6, p7 = r21, r15 - ;; - (p7) sub r14 = r0, r15 + cmp4.lt p6, p7 = r22, r15 + sub r16 = r0, r15 ;; - (p7) st2 [r17] = r14 - (p6) st2 [r17] = r20 - br .L92 -.L95: + (p7) st2 [r14] = r16 + (p6) st2 [r14] = r21 + br .L36 +.L40: setf.sig f8 = r15 - add r14 = r16, r32 ;; xma.l f8 = f6, f8, f7 ;; getf.sig r15 = f8 ;; - cmp4.le p6, p7 = r18, r15 - ;; - (p6) mov r15 = r18 - ;; - st2 [r14] = r15 -.L92: - adds r14 = 1, r19 - ;; - dep.z r17 = r14, 1, 32 - ;; - add r15 = r17, r33 - ;; - ld2 r14 = [r15] - ;; - sxt2 r14 = r14 - ;; - mov r16 = r14 - ;; - cmp4.ne p6, p7 = 0, r16 - ;; - (p7) add r14 = r17, r32 - ;; - (p7) st2 [r14] = r0 - (p7) br.cond.dpnt .L98 - cmp4.le p6, p7 = r0, r16 - (p6) br.cond.dptk .L101 - sub r14 = r0, r16 - add r17 = r17, r32 - ;; - setf.sig f8 = r14 - ;; - xma.l f8 = f6, f8, f7 - ;; - getf.sig r16 = f8 - ;; - cmp4.lt p6, p7 = r21, r16 + cmp4.le p6, p7 = r20, r15 ;; - (p7) sub r14 = r0, r16 + (p6) mov r14 = r20 + (p7) mov r14 = r15 ;; - (p7) st2 [r17] = r14 - (p6) st2 [r17] = r20 - br .L98 -.L101: - setf.sig f8 = r16 - add r14 = r17, r32 + st2 [r18] = r14 +.L36: + br.cloop.sptk.few .L56 ;; - xma.l f8 = f6, f8, f7 - ;; - getf.sig r16 = f8 - ;; - cmp4.le p6, p7 = r18, r16 - ;; - (p6) mov r15 = r18 - (p7) mov r15 = r16 - ;; - st2 [r14] = r15 -.L98: - adds r14 = 2, r19 - ;; - dep.z r17 = r14, 1, 32 - ;; - add r15 = r17, r33 - ;; - ld2 r14 = [r15] - ;; - sxt2 r14 = r14 - ;; - mov r16 = r14 - ;; - cmp4.ne p6, p7 = 0, r16 - ;; - (p7) add r14 = r17, r32 - ;; - (p7) st2 [r14] = r0 - (p7) br.cond.dpnt .L104 - cmp4.le p6, p7 = r0, r16 - (p6) br.cond.dptk .L107 - sub r14 = r0, r16 - add r17 = r17, r32 - ;; - setf.sig f8 = r14 - ;; - xma.l f8 = f6, f8, f7 - ;; - getf.sig r16 = f8 - ;; - cmp4.lt p6, p7 = r21, r16 - ;; - (p7) sub r14 = r0, r16 + mov ar.lc = r2 + br.ret.sptk.many b0 + .endp dequant_intra_ia64# + + + +//uint32_t quant_inter_ia64(int16_t *coeff, const int16_t *data, const uint32_t quant) + + + + .common quant_inter#,8,8 + .align 16 + .global quant_inter_ia64# + .proc quant_inter_ia64# +quant_inter_ia64: + + +/******************************************************** + * * + * const uint32_t mult = multipliers[quant]; * + * const uint16_t quant_m_2 = quant << 1; * + * const uint16_t quant_d_2 = quant >> 1; * + * int sum = 0; * + * uint32_t i; * + * int16_t acLevel,acL; * + * * + ********************************************************/ + + + + LL=3 // LL = load latency + + .prologue + addl r14 = @ltoff(multipliers#), gp + dep.z r15 = r34, 2, 32 + .save ar.lc, r2 + mov r2 = ar.lc ;; - (p7) st2 [r17] = r14 - (p6) st2 [r17] = r20 - br .L104 -.L107: - setf.sig f8 = r16 - add r14 = r17, r32 + .body + alloc r9=ar.pfs,0,24,0,24 + mov r17 = ar.ec + mov r10 = pr + ld8 r14 = [r14] + extr.u r16 = r34, 1, 16 //r16 = quant_d_2 + dep.z r20 = r34, 1, 15 //r20 = quant_m_2 ;; - xma.l f8 = f6, f8, f7 + add r15 = r15, r14 + mov r21 = r16 //r21 = quant_d_2 + mov r8 = r0 //r8 = sum = 0 + mov pr.rot = 0 //p16-p63 = 0 ;; - getf.sig r16 = f8 + ld4 r15 = [r15] + addl r14 = 63, r0 + mov pr.rot = 1 << 16 //p16=1 ;; - cmp4.le p6, p7 = r18, r16 + mov ar.lc = r14 + mov ar.ec = LL+9 + mov r29 = r15 ;; - (p6) mov r15 = r18 - (p7) mov r15 = r16 + mov r15 = r33 //r15 = data + mov r18 = r32 //r18 = coeff ;; - st2 [r14] = r15 -.L104: - adds r19 = 3, r19 - br.cloop.sptk.few .L110 + + + .rotr ac1[LL+3], ac2[8], ac3[2] + .rotp p[LL+9], cmp1[8], cmp1neg[8],cmp2[5], cmp2neg[2] + + + +/******************************************************************************** + * * + * for (i = 0; i < 64; i++) { * + * acL=acLevel = data[i]; * + * acLevel = ((acLevel < 0)?-acLevel:acLevel) - quant_d_2; * + * if (acLevel < quant_m_2){ * + * acLevel = 0; * + * } * + * acLevel = (acLevel * mult) >> SCALEBITS; * + * sum += acLevel; * + * coeff[i] = ((acL < 0)?-acLevel:acLevel); * + * } * + * * + ********************************************************************************/ + + + +.explicit +.L58: + //pipeline stage +{.mmi + (p[0]) ld2 ac1[0] = [r15],2 // 0 acL=acLevel = data[i]; + (p[LL+1]) sub ac2[0] = r0, ac1[LL+1] // LL+1 ac2=-acLevel + (p[LL]) sxt2 ac1[LL] = ac1[LL] // LL +} +{.mmi + (p[LL+1]) cmp4.le cmp1[0], cmp1neg[0] = r0, ac1[LL+1] // LL+1 cmp1 = (0<=acLevel) ; cmp1neg = !(0<=acLevel) + (p[LL+4]) cmp4.le cmp2[0], cmp2neg[0] = r20, ac2[3] // LL+4 cmp2 = (quant_m_2 < acLevel) ; cmp2neg = !(quant_m_2 < acLevel) + (cmp1[1]) sub ac2[1] = ac1[LL+2], r21 // LL+2 acLevel = acLevel - quant_d_2; +} +{.mmi + (cmp2neg[1]) mov ac2[4] = r0 // LL+5 if (acLevel < quant_m_2) acLevel=0; + (cmp1neg[1]) sub ac2[1] = ac2[1], r21 // LL+2 acLevel = ac2 - quant_d_2; + (p[LL+3]) sxt2 ac2[2] = ac2[2] // LL+3 +} +{.mmi + (cmp1[6]) mov ac3[0] = ac2[6] // LL+7 ac3 = acLevel; + (cmp1neg[6]) sub ac3[0] = r0, ac2[6] // LL+7 ac3 = -acLevel; + (p[LL+6]) pmpyshr2.u ac2[5] = r29, ac2[5], 16 // LL+6 acLevel = (acLevel * mult) >> SCALEBITS; +} +{.mib + (p[LL+8]) st2 [r18] = ac3[1] , 2 // LL+8 coeff[i] = ac3; + (cmp2[4]) add r8 = r8, ac2[7] // LL+8 sum += acLevel; + br.ctop.sptk.few .L58 + ;; +} +.default + mov ar.ec = r17 ;; mov ar.lc = r2 + mov pr = r10, -1 + mov ar.pfs = r9 br.ret.sptk.many b0 - .endp dequant_intra_ia64# - .common quant_inter#,8,8 + .endp quant_inter_ia64# + + + + + + + +// void dequant_inter_ia64(int16_t *data, const int16_t *coeff, const uint32_t quant) + .common dequant_inter#,8,8 .align 16 .global dequant_inter_ia64# .proc dequant_inter_ia64# dequant_inter_ia64: + +//*********************************************************************** +// * +// const uint16_t quant_m_2 = quant << 1; * +// const uint16_t quant_add = (quant & 1 ? quant : quant - 1); * +// uint32_t i; * +// * +//*********************************************************************** * + + + + .prologue andcm r14 = 1, r34 - dep.z r15 = r34, 1, 15 + dep.z r29 = r34, 1, 15 + alloc r9=ar.pfs,0,32,0,32 .save ar.lc, r2 mov r2 = ar.lc ;; .body - sub r34 = r34, r14 - setf.sig f6 = r15 - mov r19 = r0 - addl r14 = 31, r0 - addl r18 = -2048, r0 - addl r17 = 2047, r0 + sub r15 = r34, r14 // r15 = quant + addl r14 = 63, r0 + addl r21 = -2048, r0 + addl r20 = 2047, r0 + mov r16 = ar.ec + mov r17 = pr ;; - zxt2 r34 = r34 + zxt2 r15 = r15 mov ar.lc = r14 + mov pr.rot = 0 ;; -.L122: - dep.z r16 = r19, 1, 32 - ;; - add r14 = r16, r33 - ;; - ld2 r15 = [r14] - ;; - sxt2 r15 = r15 - ;; - mov r14 = r15 - ;; - cmp4.ne p6, p7 = 0, r14 - ;; - (p7) add r14 = r16, r32 - ;; - (p7) st2 [r14] = r0 - (p7) br.cond.dpnt .L112 - cmp4.le p6, p7 = r0, r14 - (p6) br.cond.dptk .L115 - setf.sig f7 = r14 - add r15 = r16, r32 - ;; - xma.l f7 = f7, f6, f0 - ;; - getf.sig r14 = f7 - ;; - sub r14 = r14, r34 - ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r18, r14 - ;; - (p7) mov r14 = r18 - br .L123 -.L115: - setf.sig f8 = r15 - setf.sig f7 = r34 - ;; - xma.l f8 = f8, f6, f7 - add r15 = r16, r32 - ;; - getf.sig r14 = f8 + adds r14 = 0, r33 // r14 = coeff + mov r18 = r32 // r18 = data + mov ar.ec = LL+10 + mov pr.rot = 1 << 16 + ;; + +/******************************************************************************** + * * + *for (i = 0; i < 64; i++) { * + * int16_t acLevel = coeff[i]; * + * * + * if (acLevel == 0) * + * { * + * data[i] = 0; * + * } * + * else if (acLevel < 0) * + * { * + * acLevel = acLevel * quant_m_2 - quant_add; * + * data[i] = (acLevel >= -2048 ? acLevel : -2048); * + * } * + * else // if (acLevel > 0) * + * { * + * acLevel = acLevel * quant_m_2 + quant_add; * + * data[i] = (acLevel <= 2047 ? acLevel : 2047); * + * } * + * } * + * * + ********************************************************************************/ + + + + LL=2 // LL := load latency + + + .rotr ac1[LL+10], x[5], y1[3], y2[3] + .rotp p[LL+10] , cmp1neg[8], cmp2[5], cmp2neg[5],cmp3[2], cmp3neg[2] + +.explicit + //pipeline stage + +.L60: +{.mmi + (p[0])ld2 ac1[0] = [r14] ,2 // 0 acLevel = coeff[i]; + (p[LL+1])cmp4.ne p6, cmp1neg[0] = 0, ac1[LL+1] // LL+1 + (p[LL])sxt2 ac1[LL] = ac1[LL] // LL + +} +{.mmi + (p[LL+1])cmp4.le cmp2[0], cmp2neg[0] = r0, ac1[LL+1] // LL+1 + (cmp2[1]) mov x[0] = r20 // LL+2 + (p[LL+2])pmpyshr2.u ac1[LL+2] = r29, ac1[LL+2], 0 // LL+2 +} +{.mmi + (cmp2neg[1]) mov x[0] = r21 // LL+2 + (cmp2[2]) add ac1[LL+3] = ac1[LL+3], r15 // LL+3 + (cmp2neg[2]) sub ac1[LL+3] = ac1[LL+3], r15 // LL+3 + +} +{.mmi + (cmp2neg[4]) mov y1[0] = ac1[LL+5] // LL+5 + (cmp2neg[4]) mov y2[0] = x[3] // LL+5 + (p[LL+4])sxt2 ac1[LL+4] = ac1[LL+4] // LL+4 +} +{.mmi + (cmp2[4]) mov y1[0] = x[3] // LL+4 + (cmp2[4]) mov y2[0] = ac1[LL+5] // LL+4 + (p[LL+6])cmp4.le cmp3[0], cmp3neg[0] = x[4], ac1[LL+6] // LL+6 +} +{.mmi + (cmp3[1]) mov ac1[LL+7] = y1[2] // LL+7 + (cmp3neg[1]) mov ac1[LL+7] = y2[2] // LL+7 + (cmp1neg[7]) mov ac1[LL+8] = r0 // LL+8 +} +{.mbb + (p[LL+9])st2 [r18] = ac1[LL+9] ,2 // LL+9 + nop.b 0x0 + br.ctop.sptk.few .L60 ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r17, r14 - ;; - (p6) mov r14 = r17 - ;; -.L123: - st2 [r15] = r14 -.L112: - adds r14 = 1, r19 - ;; - dep.z r16 = r14, 1, 32 - ;; - add r15 = r16, r33 - ;; - ld2 r14 = [r15] - ;; - sxt2 r14 = r14 - ;; - mov r15 = r14 - ;; - cmp4.ne p6, p7 = 0, r15 - ;; - (p7) add r14 = r16, r32 - ;; - (p7) st2 [r14] = r0 - (p7) br.cond.dpnt .L117 - cmp4.le p6, p7 = r0, r15 - (p6) br.cond.dptk .L120 - setf.sig f8 = r15 - ;; - xma.l f8 = f8, f6, f0 - add r15 = r16, r32 - ;; - getf.sig r14 = f8 - ;; - sub r14 = r14, r34 - ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r18, r14 - ;; - (p7) mov r14 = r18 - br .L124 - ;; -.L120: - setf.sig f7 = r14 - setf.sig f8 = r34 - add r15 = r16, r32 - ;; - xma.l f7 = f7, f6, f8 - ;; - getf.sig r14 = f7 - ;; - sxt2 r14 = r14 - ;; - cmp4.le p6, p7 = r17, r14 - ;; - (p6) mov r14 = r17 - ;; -.L124: - st2 [r15] = r14 -.L117: - adds r19 = 2, r19 - br.cloop.sptk.few .L122 +} +.default + mov ar.lc = r2 + mov ar.pfs = r9 + mov ar.ec = r16 + mov pr = r17, -1 ;; mov ar.lc = r2 br.ret.sptk.many b0