44 |
.rotp aldp[LL], sh1p[SL], or1p[OL], addp[AL], pavg1p[AVL],stp[STL] |
.rotp aldp[LL], sh1p[SL], or1p[OL], addp[AL], pavg1p[AVL],stp[STL] |
45 |
|
|
46 |
|
|
47 |
loop_interpolate: |
.Lloop_interpolate: |
48 |
(aldp[0]) ld8 ald1[0] = [r14],r16 // load aligned src |
(aldp[0]) ld8 ald1[0] = [r14],r16 // load aligned src |
49 |
(aldp[0]) ld8 ald2[0] = [r18],r16 // and aligned src+8 |
(aldp[0]) ld8 ald2[0] = [r18],r16 // and aligned src+8 |
50 |
|
|
66 |
|
|
67 |
|
|
68 |
|
|
69 |
br.ctop.sptk.few loop_interpolate |
br.ctop.sptk.few .Lloop_interpolate |
70 |
;; |
;; |
71 |
mov ar.lc = r20 |
mov ar.lc = r20 |
72 |
mov pr = r21,-1 |
mov pr = r21,-1 |
116 |
.rotp aldp[LL], sh1p[SL], or1p[OL], addp[AL], pavg1p[AVL],stp[STL] |
.rotp aldp[LL], sh1p[SL], or1p[OL], addp[AL], pavg1p[AVL],stp[STL] |
117 |
|
|
118 |
|
|
119 |
loop_interpolate2: |
.Lloop_interpolate2: |
120 |
(aldp[0]) ld8 ald1[0] = [r14],r16 |
(aldp[0]) ld8 ald1[0] = [r14],r16 |
121 |
(aldp[0]) ld8 ald2[0] = [r18],r16 |
(aldp[0]) ld8 ald2[0] = [r18],r16 |
122 |
(aldp[0]) ld8 ald3[0] = [r19],r16 |
(aldp[0]) ld8 ald3[0] = [r19],r16 |
140 |
|
|
141 |
|
|
142 |
|
|
143 |
br.ctop.sptk.few loop_interpolate2 |
br.ctop.sptk.few .Lloop_interpolate2 |
144 |
;; |
;; |
145 |
mov ar.lc = r20 |
mov ar.lc = r20 |
146 |
mov pr = r21,-1 |
mov pr = r21,-1 |
192 |
.rotp aldp[LL], sh1p[SL], or1p[OL], addp[AL],pavg1p[AVL],pavg2p[AVL],stp[STL] |
.rotp aldp[LL], sh1p[SL], or1p[OL], addp[AL],pavg1p[AVL],pavg2p[AVL],stp[STL] |
193 |
|
|
194 |
|
|
195 |
loop_interpolate3: |
.Lloop_interpolate3: |
196 |
(aldp[0]) ld8 ald1[0] = [r14],r16 |
(aldp[0]) ld8 ald1[0] = [r14],r16 |
197 |
(aldp[0]) ld8 ald2[0] = [r18],r16 |
(aldp[0]) ld8 ald2[0] = [r18],r16 |
198 |
(aldp[0]) ld8 ald3[0] = [r19],r16 |
(aldp[0]) ld8 ald3[0] = [r19],r16 |
226 |
|
|
227 |
|
|
228 |
|
|
229 |
br.ctop.sptk.few loop_interpolate3 |
br.ctop.sptk.few .Lloop_interpolate3 |
230 |
;; |
;; |
231 |
mov ar.lc = r20 |
mov ar.lc = r20 |
232 |
mov pr = r21,-1 |
mov pr = r21,-1 |