[cvs] / xvidcore / src / dct / ia64_asm / genidct.py Repository:
ViewVC logotype

View of /xvidcore/src/dct/ia64_asm/genidct.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Wed Jul 10 14:24:56 2002 UTC (22 years, 2 months ago) by ia64p
Branch: MAIN
CVS Tags: cvs-head, release-1_1_3-final, release-1_0_0, release-1_0_1, release-1_0_2, release-1_0_3, rc1_1_3_0, release-1_1_0, release-1_1_3, release-1_1_2, tag-branching-1_3_0, release-1_2_2, release-1_3_1, release-1_3_0, release-1_1_1-final, tag-branching-20020904, release-0_9_0, release-1_2_0, tag-merging-20040322, release-0_9_1, branch-release-1-0, release-0_9_2, release-1_1_0_final, tag-branching-1_2_0, merged-dev-api-3, HEAD
Branch point for: release-1_1-branch, release-0_9_1-fixes, release-1_3-branch, release-1_0-branch, dev-api-4, release-1_2-branch, dev-api-3, Isibaar
Changes since 1.1: +13 -0 lines
itanium idct generator
#! /usr/bin/python

# generate the linzer-feig multiply-add idct for ia64
# (c) 2002 Christian Schwarz <schwarz@ira.uka.de>,
#          Haiko Gaisser <haiko@gaisser.de>,
#          Sebastian Hack <mail@s-hack.de>


import math

pre_shuffle = [ 0, 4, 2, 6, 1, 7, 3, 5 ]
post_shuffle = [ 0, 1, 6, 3, 7, 2, 5, 4 ]

constants = 16
float_scratch = range(32, 32+constants)
regbase = max(float_scratch)+1
intregbase = 33

def print_matrix(matrix,s=''):
    if s != '':
        print "\n\t// %s" % s
    for i in range(0, 8):
        print "\t// ",
        for j in range(0, 4):
            print "%2d" % matrix[i*4+j],
        print ""

def exchange_elements(list, a, b):
    """ Exchange two list elements
    """
    (list[a], list[b]) = (list[b], list[a])

def alloc_regs(matrix, n):
    """ get the smallest register not used by the matrix
    """
    
    regs = [ ]
    for i in range(0, n):
        m = regbase
        while m in matrix or m in regs:
            m = m + 1
        regs.append(m)
    return regs

def transpose_2x2_submatrix(matrix, i, j):
    """ transpose a 2x2 submatrix in the 8x8 matrix
    """
    a = j
    b = i

    tmp = matrix[i*8+j]
    matrix[i*8+j] = matrix[a*8+b]
    matrix[a*8+b] = tmp

    tmp = matrix[i*8+j+4]
    matrix[i*8+j+4] = matrix[a*8+b+4]
    matrix[a*8+b+4] = tmp


def transpose(matrix):
    """ register renaming for transpose
    """
    regs = alloc_regs(matrix, 16)
    save_regs = regs[:]

    # emit code ...
    for i in range(1,8,2):
        for j in range(0,4):
            r1 = matrix[(i-1)*4+j]
            r2 = matrix[i*4+j]
            print '\tfmix.r  f%d = f%d, f%d' % (save_regs.pop(0), r1, r2)

    print '\t;;'

    for i in range(0,8,2):
        for j in range(0,4):
            r1 = matrix[i*4+j]
            r2 = matrix[(i+1)*4+j]
            print '\tfmix.l  f%d = f%d, f%d' % (r1, r1, r2)

    print '\t;;'

    # first stage, transpose the 2x2 matrices
    for i in range(1,8,2):
        for j in range(0,4):
            r = matrix[i*4+j]
            matrix[i*4+j] = regs.pop(0)

#    print_matrix(matrix)

    # exchange the 2x2 matrices by renaming the registers
    for i in range(0, 4):
        for j in range(i+1, 4):
            transpose_2x2_submatrix(matrix, i, j)

#    print ''
#    print_matrix(matrix)
#    print "transpose"
#    print_matrix(matrix)

# register renaming for 8 regs containing a column
def shuffle_column(matrix, col, permutation):
    l = [ ]
    for i in range(0,8):
        l.append(matrix[i*4+col])
    for i in range(0,8):
        matrix[i*4+col] = l[permutation[i]]

def butterfly(matrix, col, i, j, c1, c2):
    """ register renaming for a butterfly operation in a column
    """
    ri = matrix[i*4+col]
    rj = matrix[j*4+col]
    regs = alloc_regs(matrix, 1)

    print '\t// (f%d, f%d) = (f%d, f%d) $ (%s, %s), (line %d, %d)' % \
          (regs[0], rj, ri, rj, c1, c2, i, j)
    print '\tfpma    f%d = f%d, %s, f%d' % (regs[0], rj, c1, ri)
    print '\tfpnma   f%d = f%d, %s, f%d' % (rj, rj, c2, ri)
    print '\t;;'
    
    matrix[i*4+col] = regs[0]


def column_idct(matrix, col):

    print_matrix(matrix, "before pre shuffle")
    shuffle_column(matrix, col, pre_shuffle)
    print_matrix(matrix, "after pre shuffle")

    butterfly(matrix, col, 0, 1, 'c0', 'c0')
    butterfly(matrix, col, 2, 3, 'c1', 'c2')
    butterfly(matrix, col, 4, 5, 'c3', 'c4')
    butterfly(matrix, col, 6, 7, 'c5', 'c6')
    print '\t;;'
    butterfly(matrix, col, 0, 3, 'c7', 'c7')
    butterfly(matrix, col, 1, 2, 'c8', 'c8')
    butterfly(matrix, col, 4, 6, 'c9', 'c9')
    butterfly(matrix, col, 5, 7, 'c10', 'c10')
    print '\t;;'
    butterfly(matrix, col, 5, 6, 'c11', 'c11')
    butterfly(matrix, col, 0, 4, 'c12', 'c12')
    butterfly(matrix, col, 3, 7, 'c14', 'c14')
    print '\t;;'
    butterfly(matrix, col, 1, 5, 'c13', 'c13')
    butterfly(matrix, col, 2, 6, 'c13', 'c13')
    
    print_matrix(matrix, "before post shuffle")
    shuffle_column(matrix, col, post_shuffle)
    print_matrix(matrix, "after post shuffle")

def gen_idct(matrix):

    for j in range(0, 2):
        for i in range(0, 4):
           print '\tfpma    f%d = f%d, c0, f0' \
                 % (2 * (matrix[i],))
        print '\t;;'
        for i in range(0,4):
            column_idct(matrix, i)
        print '\t;;'
        transpose(matrix)

def gen_consts():
    print 'addreg1 = r14'
    print 'addreg2 = r15'
    
    for i in range(0, constants):
        print 'c%d = f%d' % (i, float_scratch.pop(0))

    sqrt2 = math.sqrt(2.0)
    t = [ ]
    s = [ ]
    c = [ ]
    for i in range(0,5):
        t.append(math.tan(i * math.pi / 16))
        s.append(math.sin(i * math.pi / 16))
        c.append(math.cos(i * math.pi / 16))
        
    consts = [ ]
    consts.append(1.0 / (2.0 * sqrt2))
    consts.append(-1 / t[2])
    consts.append(-t[2])
    consts.append(t[1])
    consts.append(1 / t[1])
    consts.append(t[3])
    consts.append(1 / t[3])
    consts.append(0.5 * c[2])
    consts.append(0.5 * s[2])
    consts.append(c[3] / c[1])
    consts.append(s[3] / s[1])
    consts.append(c[1] / s[1])
    consts.append(0.5 * c[1])
    consts.append(0.5 * s[1] * c[4])
    consts.append(0.5 * s[1])
    consts.append(1.0)

    print '.sdata'
    for i in range(0, constants):
        if i % 2 == 0:
            print '.align 16'
        print '.data_c%d:' % i
        print '.single %.30f, %.30f' % (consts[i], consts[i])
    print ''

def gen_load(matrix):
    
    for i in range(0, 64, 2):
        print '\tld2  r%d = [addreg1], 4' % (intregbase+i)
        print '\tld2  r%d = [addreg2], 4' % (intregbase+i+1)
        print '\t;;'

    for i in range(0, 64, 2):
        print '\tsxt2  r%d = r%d' % (2*(intregbase+i,))
        print '\tsxt2  r%d = r%d' % (2*(intregbase+i+1,))
    print '\t;;'
        
    for i in range(0, 64, 2):
        print '\tsetf.sig  f%d = r%d' % (regbase+i, intregbase+i)
        print '\tsetf.sig  f%d = r%d' % (regbase+i+1, intregbase+i+1)
    print '\t;;'

    for i in range(0, 64, 2):
        print '\tfcvt.xf  f%d = f%d' % (2*(regbase+i,))
        print '\tfcvt.xf  f%d = f%d' % (2*(regbase+i+1,))
    print '\t;;'

    for i in range(0, 32):
        print '\tfpack    f%d = f%d, f%d' \
              % (regbase+i, regbase+2*i, regbase+2*i+1)
        print '\t;;'

    """
    for i in range(0, len(matrix)):
        print '\tld2  r18 = [addreg1], 4' 
        print '\tld2  r19 = [addreg2], 4'
        print '\t;;'
        print '\tsxt2 r18 = r18'
        print '\tsxt2 r19 = r19'
        print '\t;;'
        print '\tsetf.sig f18 = r18'
        print '\tsetf.sig f19 = r19'
        print '\t;;'
        print '\tfcvt.xf  f18 = f18'
        print '\tfcvt.xf  f19 = f19'
        print '\t;;'
        print '\tfpack      f%d = f18, f19' % (matrix[i])
        print '\t;;'
    """
        
def gen_store(matrix):
    print '\tmov   addreg1 = in0'
    print '\tadd   addreg2 = 4, in0'
    print '\t;;'

    for i in range(0, len(matrix)):
        print '\tfpcvt.fx f%d = f%d' % (2*(matrix[i],))
    print '\t;;'

    for i in range(0, len(matrix)):
        print '\tgetf.sig r%d = f%d' % (intregbase+i, matrix[i])
    print '\t;;'

    for i in range(0, len(matrix)):
        print '\tshl      r%d = r%d, 7' % (2*(intregbase+i,))
    print '\t;;'

    for i in range(0, len(matrix)):
        print '\tpack4.sss r%d = r%d, r0' % (2*(intregbase+i,))
    print '\t;;'

    for i in range(0, len(matrix)):
        print '\tpshr2    r%d = r%d, 7' % (2*(intregbase+i,))
    print '\t;;'

    for i in range(0, len(matrix)):
        print '\tmux2     r%d = r%d, 0xe1' % (2*(intregbase+i,))
    print '\t;;'

    for i in range(0, len(matrix), 2):
        print '\tst4   [addreg1] = r%d, 8' % (intregbase+i)
        print '\tst4   [addreg2] = r%d, 8' % (intregbase+i+1)
	print '\t;;'
    
def main():
    gen_consts()

    print '.text'
    print '.global idct_ia64'
    print '.global idct_ia64_init'
    print '.align 16'
    print '.proc idct_ia64_init'
    print 'idct_ia64_init:'
    print 'br.ret.sptk.few b0'
    print '.endp'
    print '.align 16'
    print '.proc idct_ia64'
    print 'idct_ia64:'

    f = open('idct_init.s')
    print f.read()
    f.close()
    
    matrix = [ ]
    for i in range(0,32):
        matrix.append(regbase + i)

    gen_load(matrix)
#    print_matrix(matrix)
    gen_idct(matrix)
#    transpose(matrix)
    print_matrix(matrix)
    gen_store(matrix)

    f = open('idct_fini.s')
    print f.read()
    f.close()
    
    print '.endp'


if __name__ == "__main__":
    main()

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4