diff options
Diffstat (limited to 'sys/arch/hppa/spmath/impys.S')
-rw-r--r-- | sys/arch/hppa/spmath/impys.S | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/sys/arch/hppa/spmath/impys.S b/sys/arch/hppa/spmath/impys.S new file mode 100644 index 00000000000..7f5251337fc --- /dev/null +++ b/sys/arch/hppa/spmath/impys.S @@ -0,0 +1,323 @@ +/* + * Copyright 1996 1995 by Open Software Foundation, Inc. + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appears in all copies and + * that both the copyright notice and this permission notice appear in + * supporting documentation. + * + * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. + * + * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, + * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ +/* + * pmk1.1 + */ +/* + * (c) Copyright 1986 HEWLETT-PACKARD COMPANY + * + * To anyone who acknowledges that this file is provided "AS IS" + * without any express or implied warranty: + * permission to use, copy, modify, and distribute this file + * for any purpose is hereby granted without fee, provided that + * the above copyright notice and this notice appears in all + * copies, and that the name of Hewlett-Packard Company not be + * used in advertising or publicity pertaining to distribution + * of the software without specific, written prior permission. + * Hewlett-Packard Company makes no representations about the + * suitability of this software for any purpose. + */ +/* $Source: /cvs/OpenBSD/src/sys/arch/hppa/spmath/impys.S,v $ + * $Revision: 1.1 $ $Author: mickey $ + * $State: Exp $ $Locker: $ + * $Date: 1998/06/23 20:34:01 $ + */ + +#include <machine/asm.h> + +/**************************************************************************** + * + * Implement an integer multiply routine for 32-bit operands and 64-bit product + * with operand values of zero (multiplicand only) and -2**31 treated specially. + * The algorithm uses the absolute value of the multiplier, four bits at a time, + * from right to left, to generate partial product. Execution speed is more + * important than program size in this implementation. + * + ***************************************************************************/ +# +# Definitions - General registers +# +gr0 .equ 0 # General register zero +pu .equ 3 # upper part of product +pl .equ 4 # lower part of product +op2 .equ 4 # multiplier +op1 .equ 5 # multiplicand +cnt .equ 6 # count in multiply +brindex .equ 7 # index into the br. table +sign .equ 8 # sign of product +pc .equ 9 # carry bit of product, = 00...01 +pm .equ 10 # value of -1 used in shifting + +#***************************************************************************** + .export impys,entry + .space $TEXT$ + .subspa $CODE$ + .align 4 + .proc + .callinfo +# +#**************************************************************************** +impys stws,ma pu,4(sp) # save registers on stack + stws,ma pl,4(sp) # save registers on stack + stws,ma op1,4(sp) # save registers on stack + stws,ma cnt,4(sp) # save registers on stack + stws,ma brindex,4(sp) # save registers on stack + stws,ma sign,4(sp) # save registers on stack + stws,ma pc,4(sp) # save registers on stack + stws,ma pm,4(sp) # save registers on stack +# +# Start multiply process +# + ldws 0(arg1),op2 # get multiplier + ldws 0(arg0),op1 # get multiplicand + addi -1,gr0,pm # initialize pm to 111...1 + comb,< op2,gr0,mpyb # br. if multiplier < 0 + xor op2,op1,sign # sign(0) = sign of product +mpy1 comb,< op1,gr0,mpya # br. if multiplicand < 0 + addi 0,gr0,pu # clear product + addib,= 0,op1,fini0 # op1 = 0, product = 0 +mpy2 addi 1,gr0,pc # initialize pc to 00...01 + movib,tr 8,cnt,mloop # set count for mpy loop + extru op2,31,4,brindex # 4 bits as index into table +# + .align 8 +# + b sh4c # br. if sign overflow +sh4n shd pu,pl,4,pl # shift product right 4 bits + addib,<= -1,cnt,mulend # reduce count by 1, exit if + extru pu,27,28,pu # <= zero +# +mloop blr brindex,gr0 # br. into table + # entries of 2 words + extru op2,27,4,brindex # next 4 bits into index +# +# +# branch table for the multiplication process with four multiplier bits +# +mtable # two words per entry +# +# ---- bits = 0000 ---- shift product 4 bits ------------------------------- +# + b sh4n+4 # just shift partial + shd pu,pl,4,pl # product right 4 bits +# +# ---- bits = 0001 ---- add op1, then shift 4 bits +# + addb,tr op1,pu,sh4n+4 # add op1 to product, to shift + shd pu,pl,4,pl # product right 4 bits +# +# ---- bits = 0010 ---- add op1, add op1, then shift 4 bits +# + addb,tr op1,pu,sh4n # add 2*op1, to shift + addb,uv op1,pu,sh4c # product right 4 bits +# +# ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits +# + addb,tr op1,pu,sh4n-4 # add op1 & 2*op1, shift + sh1add,nsv op1,pu,pu # product right 4 bits +# +# ---- bits = 0100 ---- shift 2, add op1, shift 2 +# + b sh2sa + shd pu,pl,2,pl # shift product 2 bits +# +# ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again +# + addb,tr op1,pu,sh2us # add op1 to product + shd pu,pl,2,pl # shift 2 bits +# +# ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again +# + addb,tr op1,pu,sh2c # add 2*op1, to shift 2 bits + addb,nuv op1,pu,sh2us # br. if not overflow +# +# ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1 +# + b sh3s + sub pu,op1,pu # subtract op1, br. to sh3s + +# +# ---- bits = 1000 ---- shift 3, add op1, shift 1 +# + b sh3sa + shd pu,pl,3,pl # shift product right 3 bits +# +# ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1 +# + addb,tr op1,pu,sh3us # add op1, to shift 3, add op1, + shd pu,pl,3,pl # and shift 1 +# +# ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1 +# + addb,tr op1,pu,sh3c # add 2*op1, to shift 3 bits + addb,nuv op1,pu,sh3us # br. if no overflow +# +# ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index +# + addib,tr 1,brindex,sh2s # add 1 to index, subtract op1, + sub pu,op1,pu # shift 2 with minus sign +# +# ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index +# + addib,tr 1,brindex,sh2sb # add 1 to index, to shift + shd pu,pl,2,pl # shift right 2 bits signed +# +# ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2 +# + addb,tr op1,pu,sh2ns # add op1, to shift 2 + shd pu,pl,2,pl # right 2 unsigned, etc. +# +# ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed +# + addib,tr 1,brindex,sh1sa # add 1 to index, to shift + shd pu,pl,1,pl # shift 1 bit +# +# ---- bits = 1111 ---- add -op1, shift 4 signed +# + addib,tr 1,brindex,sh4s # add 1 to index, subtract op1, + sub pu,op1,pu # to shift 4 signed + +# +# ---- bits = 10000 ---- shift 4 signed +# + addib,tr 1,brindex,sh4s+4 # add 1 to index + shd pu,pl,4,pl # shift 4 signed +# +# ---- end of table --------------------------------------------------------- +# +sh4s shd pu,pl,4,pl + addib,tr -1,cnt,mloop # loop (count > 0 always here) + shd pm,pu,4,pu # shift 4, minus signed +# +sh4c addib,> -1,cnt,mloop # decrement count, loop if > 0 + shd pc,pu,4,pu # shift 4 with overflow + b signs # end of multiply + bb,>=,n sign,0,fini # test sign of procduct +# +mpyb add,= op2,op2,gr0 # if <> 0, back to main sect. + b mpy1 + sub 0,op2,op2 # op2 = |multiplier| + add,>= op1,gr0,gr0 # if op1 < 0, invert sign, + xor pm,sign,sign # for correct result +# +# special case for multiplier = -2**31, op1 = signed multiplicand +# or multiplicand = -2**31, op1 = signed multiplier +# + shd op1,0,1,pl # shift op1 left 31 bits +mmax extrs op1,30,31,pu + b signs # negate product (if needed) + bb,>=,n sign,0,fini # test sign of product +# +mpya add,= op1,op1,gr0 # op1 = -2**31, special case + b mpy2 + sub 0,op1,op1 # op1 = |multiplicand| + add,>= op2,gr0,gr0 # if op2 < 0, invert sign, + xor pm,sign,sign # for correct result + movb,tr op2,op1,mmax # use op2 as multiplicand + shd op1,0,1,pl # shift it left 31 bits +# +sh3c shd pu,pl,3,pl # shift product 3 bits + shd pc,pu,3,pu # shift 3 signed + addb,tr op1,pu,sh1 # add op1, to shift 1 bit + shd pu,pl,1,pl +# +sh3us extru pu,28,29,pu # shift 3 unsigned + addb,tr op1,pu,sh1 # add op1, to shift 1 bit + shd pu,pl,1,pl +# +sh3sa extrs pu,28,29,pu # shift 3 signed + addb,tr op1,pu,sh1 # add op1, to shift 1 bit + shd pu,pl,1,pl +# +sh3s shd pu,pl,3,pl # shift 3 minus signed + shd pm,pu,3,pu + addb,tr op1,pu,sh1 # add op1, to shift 1 bit + shd pu,pl,1,pl +# +sh1 addib,> -1,cnt,mloop # loop if count > 0 + extru pu,30,31,pu + b signs # end of multiply + bb,>=,n sign,0,fini # test sign of product +# +sh2ns addib,tr 1,brindex,sh2sb+4 # increment index + extru pu,29,30,pu # shift unsigned +# +sh2s shd pu,pl,2,pl # shift with minus sign + shd pm,pu,2,pu # + sub pu,op1,pu # subtract op1 + shd pu,pl,2,pl # shift with minus sign + addib,tr -1,cnt,mloop # decrement count, loop + shd pm,pu,2,pu # shift with minus sign + # count never reaches 0 here +# +sh2sb extrs pu,29,30,pu # shift 2 signed + sub pu,op1,pu # subtract op1 from product + shd pu,pl,2,pl # shift with minus sign + addib,tr -1,cnt,mloop # decrement count, loop + shd pm,pu,2,pu # shift with minus sign + # count never reaches 0 here +# +sh1sa extrs pu,30,31,pu # signed + sub pu,op1,pu # subtract op1 from product + shd pu,pl,3,pl # shift 3 with minus sign + addib,tr -1,cnt,mloop # dec. count, to loop + shd pm,pu,3,pu # count never reaches 0 here +# +fini0 movib,tr,n 0,pl,fini # product = 0 as op1 = 0 +# +sh2us extru pu,29,30,pu # shift 2 unsigned + addb,tr op1,pu,sh2a # add op1 + shd pu,pl,2,pl # shift 2 bits +# +sh2c shd pu,pl,2,pl + shd pc,pu,2,pu # shift with carry + addb,tr op1,pu,sh2a # add op1 to product + shd pu,pl,2,pl # br. to sh2 to shift pu +# +sh2sa extrs pu,29,30,pu # shift with sign + addb,tr op1,pu,sh2a # add op1 to product + shd pu,pl,2,pl # br. to sh2 to shift pu +# +sh2a addib,> -1,cnt,mloop # loop if count > 0 + extru pu,29,30,pu +# +mulend bb,>=,n sign,0,fini # test sign of product +signs sub 0,pl,pl # negate product if sign + subb 0,pu,pu # is negative +# +# finish +# +fini stws pu,0(arg2) # save high part of result + stws pl,4(arg2) # save low part of result + + ldws,mb -4(sp),pm # restore registers + ldws,mb -4(sp),pc # restore registers + ldws,mb -4(sp),sign # restore registers + ldws,mb -4(sp),brindex # restore registers + ldws,mb -4(sp),cnt # restore registers + ldws,mb -4(sp),op1 # restore registers + ldws,mb -4(sp),pl # restore registers + bv 0(rp) # return + ldws,mb -4(sp),pu # restore registers + + .procend + .end |