diff options
author | Niels Provos <provos@cvs.openbsd.org> | 1997-06-24 21:28:32 +0000 |
---|---|---|
committer | Niels Provos <provos@cvs.openbsd.org> | 1997-06-24 21:28:32 +0000 |
commit | 2797d26f259dcf820a086e4110ca53c2db78bc5d (patch) | |
tree | fa8f850d5b10a0f6fab21154cc60c759de02ddb2 /gnu/lib/libgmp/mpn | |
parent | 6d71925e98a49e80463b5f54135da67389b7ddea (diff) |
import of libgmp-2.0.2. Makefile.bsd-wrappers need to be worked on.
Diffstat (limited to 'gnu/lib/libgmp/mpn')
208 files changed, 21828 insertions, 0 deletions
diff --git a/gnu/lib/libgmp/mpn/Makefile.bsd-wrapper b/gnu/lib/libgmp/mpn/Makefile.bsd-wrapper new file mode 100644 index 00000000000..abcf8c45a3e --- /dev/null +++ b/gnu/lib/libgmp/mpn/Makefile.bsd-wrapper @@ -0,0 +1,27 @@ +# $OpenBSD: Makefile.bsd-wrapper,v 1.1 1997/06/24 21:28:15 provos Exp $ + +GNUCFLAGS= CFLAGS="${CFLAGS} ${COPTS}" + +all: config.status + ${MAKE} ${GNUCFLAGS} BISON=yacc CC=${CC} LDFLAGS=${LDSTATIC} + +.FORCE: .IGNORE + +config: .FORCE + -rm -f config.cache + /bin/sh ${.CURDIR}/configure --with-gnu-as --with-gnu-ld \ + --prefix=/usr --local-prefix=/usr + +config.status: Makefile.in configure + /bin/sh ${.CURDIR}/configure --with-gnu-as --with-gnu-ld \ + --prefix=/usr --local-prefix=/usr && touch config.status + +clean cleandir: + -@if [ -e Makefile ]; then ${MAKE} distclean; fi + +depend: + # Nothing here so far... + +.include <bsd.obj.mk> +.include <bsd.subdir.mk> +.include <bsd.man.mk> diff --git a/gnu/lib/libgmp/mpn/Makefile.in b/gnu/lib/libgmp/mpn/Makefile.in new file mode 100644 index 00000000000..132159b9465 --- /dev/null +++ b/gnu/lib/libgmp/mpn/Makefile.in @@ -0,0 +1,92 @@ +# Makefile for GNU MP/mpn functions +# Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +srcdir = . + +MPN_OBJECTS = This gets filled in by configure.in. +MPN_LINKS = This gets filled in by configure.in. +CC = gcc +CPP = $(CC) -E +CFLAGS = -g -O +INCLUDES = -I. -I.. -I$(srcdir) -I$(srcdir)/.. +AR = ar +AR_FLAGS = rc +SFLAGS= + +#### host and target specific makefile fragments come in here. +### + +libmpn.a: Makefile mp_bases.o $(MPN_OBJECTS) + rm -f $@ + $(AR) $(AR_FLAGS) $@ mp_bases.o $(MPN_OBJECTS) + +.SUFFIXES: .c .s .S + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(XCFLAGS) $< + +.s.o: + $(CC) -c $(CFLAGS) $< + +.S.o: + $(CPP) $(SFLAGS) $(INCLUDES) $(CFLAGS) $< | grep -v '^#' >tmp-$*.s + $(CC) -c tmp-$*.s -o $@ + rm -f tmp-$*.s + +clean mostlyclean: + rm -f *.o tmp-* libmpn.a + #-cd tests; $(MAKE) $@ +distclean maintainer-clean: clean + rm -f asm-syntax.h Makefile config.status $(MPN_LINKS) + -cd tests; $(MAKE) $@ + +Makefile: $(srcdir)/Makefile.in + $(SHELL) ./config.status + + +# Maybe configure could add dependencies here..? + +H = $(srcdir)/../gmp.h $(srcdir)/../gmp-impl.h gmp-mparam.h +L = $(srcdir)/../longlong.h + +mp_bases.o: $(srcdir)/mp_bases.c $(H) +bdivmod.o: bdivmod.c $(H) $(L) +cmp.o: cmp.c $(H) +divmod_1.o: divmod_1.c $(H) $(L) +divrem.o: divrem.c $(H) $(L) +divrem_1.o: divrem_1.c $(H) $(L) +dump.o: dump.c $(H) +gcd.o: gcd.c $(H) $(L) +gcd_1.o: gcd_1.c $(H) $(L) +gcdext.o: gcdext.c $(H) $(L) +get_str.o: get_str.c $(H) $(L) +hamdist.o: hamdist.c $(H) +inlines.o: inlines.c $(srcdir)/../gmp.h +mod_1.o: mod_1.c $(H) $(L) +mul.o: mul.c $(H) +mul_n.o: mul_n.c $(H) +perfsqr.o: perfsqr.c $(H) $(L) +popcount.o: popcount.c $(H) +pre_mod_1.o: pre_mod_1.c $(H) $(L) +random2.o: random2.c $(H) +scan0.o: scan0.c $(H) $(L) +scan1.o: scan1.c $(H) $(L) +set_str.o: set_str.c $(H) +sqrtrem.o: sqrtrem.c $(H) $(L) diff --git a/gnu/lib/libgmp/mpn/README b/gnu/lib/libgmp/mpn/README new file mode 100644 index 00000000000..3da559e509a --- /dev/null +++ b/gnu/lib/libgmp/mpn/README @@ -0,0 +1,15 @@ +This directory contains all code for the mpn layer of GMP. + +Most subdirectories contain machine-dependent code, written in assembly or +C. The `generic' subdirectory contains default code, used when there is no +machine-dependent replacement for a particular machine. + +There is one subdirectory for each architecture. Note that e.g., 32-bit +sparc and 64-bit sparc cannot share any code, and are therefore considered +completely different architecture. + +A particular machine will only use code from one such subdirectory, and the +`generic' subdirectory. The architecture-specific subdirectory contains a +hierachy of directories for various architecture variants and +implementations; the top-most level contains code that runs correctly on all +variants. diff --git a/gnu/lib/libgmp/mpn/a29k/add_n.s b/gnu/lib/libgmp/mpn/a29k/add_n.s new file mode 100644 index 00000000000..74c20e3f70e --- /dev/null +++ b/gnu/lib/libgmp/mpn/a29k/add_n.s @@ -0,0 +1,120 @@ +; 29000 __mpn_add -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + +; The 29k has no addition or subtraction instructions that doesn't +; affect carry, so we need to save and restore that as soon as we +; adjust the pointers. gr116 is used for this purpose. Note that +; gr116==0 means that carry should be set. + + .sect .lit,lit + .text + .align 4 + .global ___mpn_add_n + .word 0x60000 +___mpn_add_n: + srl gr117,lr5,3 + sub gr118,gr117,1 + jmpt gr118,Ltail + constn gr116,-1 ; init cy reg + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + mtsrim cr,(8-1) + loadm 0,0,gr104,lr4 + add lr4,lr4,32 + + subr gr116,gr116,0 ; restore carry + addc gr96,gr96,gr104 + addc gr97,gr97,gr105 + addc gr98,gr98,gr106 + addc gr99,gr99,gr107 + addc gr100,gr100,gr108 + addc gr101,gr101,gr109 + addc gr102,gr102,gr110 + addc gr103,gr103,gr111 + subc gr116,gr116,gr116 ; gr116 = not(cy) + + mtsrim cr,(8-1) + storem 0,0,gr96,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +; Code for the last up-to-7 limbs. +; This code might look very strange, but it's hard to write it +; differently without major slowdown. + + and lr5,lr5,(8-1) +Ltail: sub gr118,lr5,1 ; count for CR + jmpt gr118,Lend + sub gr117,lr5,2 ; count for jmpfdec + + mtsr cr,gr118 + loadm 0,0,gr96,lr3 + mtsr cr,gr118 + loadm 0,0,gr104,lr4 + + subr gr116,gr116,0 ; restore carry + + jmpfdec gr117,L1 + addc gr96,gr96,gr104 + jmp Lstore + mtsr cr,gr118 +L1: jmpfdec gr117,L2 + addc gr97,gr97,gr105 + jmp Lstore + mtsr cr,gr118 +L2: jmpfdec gr117,L3 + addc gr98,gr98,gr106 + jmp Lstore + mtsr cr,gr118 +L3: jmpfdec gr117,L4 + addc gr99,gr99,gr107 + jmp Lstore + mtsr cr,gr118 +L4: jmpfdec gr117,L5 + addc gr100,gr100,gr108 + jmp Lstore + mtsr cr,gr118 +L5: jmpfdec gr117,L6 + addc gr101,gr101,gr109 + jmp Lstore + mtsr cr,gr118 +L6: addc gr102,gr102,gr110 + +Lstore: storem 0,0,gr96,lr2 + subc gr116,gr116,gr116 ; gr116 = not(cy) + +Lend: jmpi lr0 + add gr96,gr116,1 diff --git a/gnu/lib/libgmp/mpn/a29k/addmul_1.s b/gnu/lib/libgmp/mpn/a29k/addmul_1.s new file mode 100644 index 00000000000..8c0ec96ce5d --- /dev/null +++ b/gnu/lib/libgmp/mpn/a29k/addmul_1.s @@ -0,0 +1,113 @@ +; 29000 __mpn_addmul_1 -- Multiply a limb vector with a single limb and +; add the product to a second limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; size lr4 +; s2_limb lr5 + + .cputype 29050 + .sect .lit,lit + .text + .align 4 + .global ___mpn_addmul_1 + .word 0x60000 +___mpn_addmul_1: + sub lr4,lr4,8 + jmpt lr4,Ltail + const gr120,0 ; init cylimb reg + + srl gr117,lr4,3 ; divide by 8 + sub gr117,gr117,1 ; count for jmpfdec + +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + + multiplu gr104,gr96,lr5 + multmu gr96,gr96,lr5 + multiplu gr105,gr97,lr5 + multmu gr97,gr97,lr5 + multiplu gr106,gr98,lr5 + multmu gr98,gr98,lr5 + multiplu gr107,gr99,lr5 + multmu gr99,gr99,lr5 + multiplu gr108,gr100,lr5 + multmu gr100,gr100,lr5 + multiplu gr109,gr101,lr5 + multmu gr101,gr101,lr5 + multiplu gr110,gr102,lr5 + multmu gr102,gr102,lr5 + multiplu gr111,gr103,lr5 + multmu gr103,gr103,lr5 + + add gr104,gr104,gr120 + addc gr105,gr105,gr96 + addc gr106,gr106,gr97 + addc gr107,gr107,gr98 + addc gr108,gr108,gr99 + addc gr109,gr109,gr100 + addc gr110,gr110,gr101 + addc gr111,gr111,gr102 + addc gr120,gr103,0 + + mtsrim cr,(8-1) + loadm 0,0,gr96,lr2 + + add gr104,gr96,gr104 + addc gr105,gr97,gr105 + addc gr106,gr98,gr106 + addc gr107,gr99,gr107 + addc gr108,gr100,gr108 + addc gr109,gr101,gr109 + addc gr110,gr102,gr110 + addc gr111,gr103,gr111 + addc gr120,gr120,0 + + mtsrim cr,(8-1) + storem 0,0,gr104,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +Ltail: and lr4,lr4,(8-1) + sub gr118,lr4,1 ; count for CR + jmpt gr118,Lend + sub lr4,lr4,2 + sub lr2,lr2,4 ; offset res_ptr by one limb + +Loop2: load 0,0,gr116,lr3 + add lr3,lr3,4 + multiplu gr117,gr116,lr5 + multmu gr118,gr116,lr5 + add lr2,lr2,4 + load 0,0,gr119,lr2 + add gr117,gr117,gr120 + addc gr118,gr118,0 + add gr117,gr117,gr119 + store 0,0,gr117,lr2 + jmpfdec lr4,Loop2 + addc gr120,gr118,0 + +Lend: jmpi lr0 + or gr96,gr120,0 ; copy diff --git a/gnu/lib/libgmp/mpn/a29k/lshift.s b/gnu/lib/libgmp/mpn/a29k/lshift.s new file mode 100644 index 00000000000..7554e2cbb3a --- /dev/null +++ b/gnu/lib/libgmp/mpn/a29k/lshift.s @@ -0,0 +1,93 @@ +; 29000 __mpn_lshift -- + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + + .sect .lit,lit + .text + .align 4 + .global ___mpn_lshift + .word 0x60000 +___mpn_lshift: + sll gr116,lr4,2 + add lr3,gr116,lr3 + add lr2,gr116,lr2 + sub lr3,lr3,4 + load 0,0,gr119,lr3 + + subr gr116,lr5,32 + srl gr96,gr119,gr116 ; return value + sub lr4,lr4,1 ; actual loop count is SIZE - 1 + + srl gr117,lr4,3 ; chuck count = (actual count) / 8 + cpeq gr118,gr117,0 + jmpt gr118,Ltail + mtsr fc,lr5 + + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: sub lr3,lr3,32 + mtsrim cr,(8-1) + loadm 0,0,gr100,lr3 + + extract gr109,gr119,gr107 + extract gr108,gr107,gr106 + extract gr107,gr106,gr105 + extract gr106,gr105,gr104 + extract gr105,gr104,gr103 + extract gr104,gr103,gr102 + extract gr103,gr102,gr101 + extract gr102,gr101,gr100 + + sub lr2,lr2,32 + mtsrim cr,(8-1) + storem 0,0,gr102,lr2 + jmpfdec gr117,Loop + or gr119,gr100,0 + +; Code for the last up-to-7 limbs. + + and lr4,lr4,(8-1) +Ltail: cpeq gr118,lr4,0 + jmpt gr118,Lend + sub lr4,lr4,2 ; count for jmpfdec + +Loop2: sub lr3,lr3,4 + load 0,0,gr116,lr3 + extract gr117,gr119,gr116 + sub lr2,lr2,4 + store 0,0,gr117,lr2 + jmpfdec lr4,Loop2 + or gr119,gr116,0 + +Lend: extract gr117,gr119,0 + sub lr2,lr2,4 + jmpi lr0 + store 0,0,gr117,lr2 diff --git a/gnu/lib/libgmp/mpn/a29k/mul_1.s b/gnu/lib/libgmp/mpn/a29k/mul_1.s new file mode 100644 index 00000000000..5d120f48ef9 --- /dev/null +++ b/gnu/lib/libgmp/mpn/a29k/mul_1.s @@ -0,0 +1,97 @@ +; 29000 __mpn_mul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; size lr4 +; s2_limb lr5 + + .cputype 29050 + .sect .lit,lit + .text + .align 4 + .global ___mpn_mul_1 + .word 0x60000 +___mpn_mul_1: + sub lr4,lr4,8 + jmpt lr4,Ltail + const gr120,0 ; init cylimb reg + + srl gr117,lr4,3 ; divide by 8 + sub gr117,gr117,1 ; count for jmpfdec + +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + + multiplu gr104,gr96,lr5 + multmu gr96,gr96,lr5 + multiplu gr105,gr97,lr5 + multmu gr97,gr97,lr5 + multiplu gr106,gr98,lr5 + multmu gr98,gr98,lr5 + multiplu gr107,gr99,lr5 + multmu gr99,gr99,lr5 + multiplu gr108,gr100,lr5 + multmu gr100,gr100,lr5 + multiplu gr109,gr101,lr5 + multmu gr101,gr101,lr5 + multiplu gr110,gr102,lr5 + multmu gr102,gr102,lr5 + multiplu gr111,gr103,lr5 + multmu gr103,gr103,lr5 + + add gr104,gr104,gr120 + addc gr105,gr105,gr96 + addc gr106,gr106,gr97 + addc gr107,gr107,gr98 + addc gr108,gr108,gr99 + addc gr109,gr109,gr100 + addc gr110,gr110,gr101 + addc gr111,gr111,gr102 + addc gr120,gr103,0 + + mtsrim cr,(8-1) + storem 0,0,gr104,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +Ltail: and lr4,lr4,(8-1) + sub gr118,lr4,1 ; count for CR + jmpt gr118,Lend + sub lr4,lr4,2 + sub lr2,lr2,4 ; offset res_ptr by one limb + +Loop2: load 0,0,gr116,lr3 + add lr3,lr3,4 + multiplu gr117,gr116,lr5 + multmu gr118,gr116,lr5 + add lr2,lr2,4 + add gr117,gr117,gr120 + store 0,0,gr117,lr2 + jmpfdec lr4,Loop2 + addc gr120,gr118,0 + +Lend: jmpi lr0 + or gr96,gr120,0 ; copy diff --git a/gnu/lib/libgmp/mpn/a29k/rshift.s b/gnu/lib/libgmp/mpn/a29k/rshift.s new file mode 100644 index 00000000000..fe53b71e246 --- /dev/null +++ b/gnu/lib/libgmp/mpn/a29k/rshift.s @@ -0,0 +1,89 @@ +; 29000 __mpn_rshift -- + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + + .sect .lit,lit + .text + .align 4 + .global ___mpn_rshift + .word 0x60000 +___mpn_rshift: + load 0,0,gr119,lr3 + add lr3,lr3,4 + + subr gr116,lr5,32 + sll gr96,gr119,gr116 ; return value + sub lr4,lr4,1 ; actual loop count is SIZE - 1 + + srl gr117,lr4,3 ; chuck count = (actual count) / 8 + cpeq gr118,gr117,0 + jmpt gr118,Ltail + mtsr fc,gr116 + + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: mtsrim cr,(8-1) + loadm 0,0,gr100,lr3 + add lr3,lr3,32 + + extract gr98,gr100,gr119 + extract gr99,gr101,gr100 + extract gr100,gr102,gr101 + extract gr101,gr103,gr102 + extract gr102,gr104,gr103 + extract gr103,gr105,gr104 + extract gr104,gr106,gr105 + extract gr105,gr107,gr106 + + mtsrim cr,(8-1) + storem 0,0,gr98,lr2 + add lr2,lr2,32 + jmpfdec gr117,Loop + or gr119,gr107,0 + +; Code for the last up-to-7 limbs. + + and lr4,lr4,(8-1) +Ltail: cpeq gr118,lr4,0 + jmpt gr118,Lend + sub lr4,lr4,2 ; count for jmpfdec + +Loop2: load 0,0,gr100,lr3 + add lr3,lr3,4 + extract gr117,gr100,gr119 + store 0,0,gr117,lr2 + add lr2,lr2,4 + jmpfdec lr4,Loop2 + or gr119,gr100,0 + +Lend: srl gr117,gr119,lr5 + jmpi lr0 + store 0,0,gr117,lr2 diff --git a/gnu/lib/libgmp/mpn/a29k/sub_n.s b/gnu/lib/libgmp/mpn/a29k/sub_n.s new file mode 100644 index 00000000000..3c8d6106573 --- /dev/null +++ b/gnu/lib/libgmp/mpn/a29k/sub_n.s @@ -0,0 +1,120 @@ +; 29000 __mpn_sub -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + +; The 29k has no addition or subtraction instructions that doesn't +; affect carry, so we need to save and restore that as soon as we +; adjust the pointers. gr116 is used for this purpose. Note that +; gr116==0 means that carry should be set. + + .sect .lit,lit + .text + .align 4 + .global ___mpn_sub_n + .word 0x60000 +___mpn_sub_n: + srl gr117,lr5,3 + sub gr118,gr117,1 + jmpt gr118,Ltail + constn gr116,-1 ; init cy reg + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + mtsrim cr,(8-1) + loadm 0,0,gr104,lr4 + add lr4,lr4,32 + + subr gr116,gr116,0 ; restore carry + subc gr96,gr96,gr104 + subc gr97,gr97,gr105 + subc gr98,gr98,gr106 + subc gr99,gr99,gr107 + subc gr100,gr100,gr108 + subc gr101,gr101,gr109 + subc gr102,gr102,gr110 + subc gr103,gr103,gr111 + subc gr116,gr116,gr116 ; gr116 = not(cy) + + mtsrim cr,(8-1) + storem 0,0,gr96,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +; Code for the last up-to-7 limbs. +; This code might look very strange, but it's hard to write it +; differently without major slowdown. + + and lr5,lr5,(8-1) +Ltail: sub gr118,lr5,1 ; count for CR + jmpt gr118,Lend + sub gr117,lr5,2 ; count for jmpfdec + + mtsr cr,gr118 + loadm 0,0,gr96,lr3 + mtsr cr,gr118 + loadm 0,0,gr104,lr4 + + subr gr116,gr116,0 ; restore carry + + jmpfdec gr117,L1 + subc gr96,gr96,gr104 + jmp Lstore + mtsr cr,gr118 +L1: jmpfdec gr117,L2 + subc gr97,gr97,gr105 + jmp Lstore + mtsr cr,gr118 +L2: jmpfdec gr117,L3 + subc gr98,gr98,gr106 + jmp Lstore + mtsr cr,gr118 +L3: jmpfdec gr117,L4 + subc gr99,gr99,gr107 + jmp Lstore + mtsr cr,gr118 +L4: jmpfdec gr117,L5 + subc gr100,gr100,gr108 + jmp Lstore + mtsr cr,gr118 +L5: jmpfdec gr117,L6 + subc gr101,gr101,gr109 + jmp Lstore + mtsr cr,gr118 +L6: subc gr102,gr102,gr110 + +Lstore: storem 0,0,gr96,lr2 + subc gr116,gr116,gr116 ; gr116 = not(cy) + +Lend: jmpi lr0 + add gr96,gr116,1 diff --git a/gnu/lib/libgmp/mpn/a29k/submul_1.s b/gnu/lib/libgmp/mpn/a29k/submul_1.s new file mode 100644 index 00000000000..ca2ef72a995 --- /dev/null +++ b/gnu/lib/libgmp/mpn/a29k/submul_1.s @@ -0,0 +1,116 @@ +; 29000 __mpn_submul_1 -- Multiply a limb vector with a single limb and +; subtract the product from a second limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; size lr4 +; s2_limb lr5 + + .cputype 29050 + .sect .lit,lit + .text + .align 4 + .global ___mpn_submul_1 + .word 0x60000 +___mpn_submul_1: + sub lr4,lr4,8 + jmpt lr4,Ltail + const gr120,0 ; init cylimb reg + + srl gr117,lr4,3 ; divide by 8 + sub gr117,gr117,1 ; count for jmpfdec + +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + + multiplu gr104,gr96,lr5 + multmu gr96,gr96,lr5 + multiplu gr105,gr97,lr5 + multmu gr97,gr97,lr5 + multiplu gr106,gr98,lr5 + multmu gr98,gr98,lr5 + multiplu gr107,gr99,lr5 + multmu gr99,gr99,lr5 + multiplu gr108,gr100,lr5 + multmu gr100,gr100,lr5 + multiplu gr109,gr101,lr5 + multmu gr101,gr101,lr5 + multiplu gr110,gr102,lr5 + multmu gr102,gr102,lr5 + multiplu gr111,gr103,lr5 + multmu gr103,gr103,lr5 + + add gr104,gr104,gr120 + addc gr105,gr105,gr96 + addc gr106,gr106,gr97 + addc gr107,gr107,gr98 + addc gr108,gr108,gr99 + addc gr109,gr109,gr100 + addc gr110,gr110,gr101 + addc gr111,gr111,gr102 + addc gr120,gr103,0 + + mtsrim cr,(8-1) + loadm 0,0,gr96,lr2 + + sub gr96,gr96,gr104 + subc gr97,gr97,gr105 + subc gr98,gr98,gr106 + subc gr99,gr99,gr107 + subc gr100,gr100,gr108 + subc gr101,gr101,gr109 + subc gr102,gr102,gr110 + subc gr103,gr103,gr111 + + add gr104,gr103,gr111 ; invert carry from previus sub + addc gr120,gr120,0 + + mtsrim cr,(8-1) + storem 0,0,gr96,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +Ltail: and lr4,lr4,(8-1) + sub gr118,lr4,1 ; count for CR + jmpt gr118,Lend + sub lr4,lr4,2 + sub lr2,lr2,4 ; offset res_ptr by one limb + +Loop2: load 0,0,gr116,lr3 + add lr3,lr3,4 + multiplu gr117,gr116,lr5 + multmu gr118,gr116,lr5 + add lr2,lr2,4 + load 0,0,gr119,lr2 + add gr117,gr117,gr120 + addc gr118,gr118,0 + sub gr119,gr119,gr117 + add gr104,gr119,gr117 ; invert carry from previus sub + store 0,0,gr119,lr2 + jmpfdec lr4,Loop2 + addc gr120,gr118,0 + +Lend: jmpi lr0 + or gr96,gr120,0 ; copy diff --git a/gnu/lib/libgmp/mpn/alpha/README b/gnu/lib/libgmp/mpn/alpha/README new file mode 100644 index 00000000000..55c0a2917cc --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/README @@ -0,0 +1,53 @@ +This directory contains mpn functions optimized for DEC Alpha processors. + +RELEVANT OPTIMIZATION ISSUES + +EV4 + +1. This chip has very limited store bandwidth. The on-chip L1 cache is +write-through, and a cache line is transfered from the store buffer to the +off-chip L2 in as much 15 cycles on most systems. This delay hurts +mpn_add_n, mpn_sub_n, mpn_lshift, and mpn_rshift. + +2. Pairing is possible between memory instructions and integer arithmetic +instructions. + +3. mulq and umulh is documented to have a latency of 23 cycles, but 2 of +these cycles are pipelined. Thus, multiply instructions can be issued at a +rate of one each 21nd cycle. + +EV5 + +1. The memory bandwidth of this chip seems excellent, both for loads and +stores. Even when the working set is larger than the on-chip L1 and L2 +caches, the perfromance remain almost unaffected. + +2. mulq has a measured latency of 13 cycles and an issue rate of 1 each 8th +cycle. umulh has a measured latency of 15 cycles and an issue rate of 1 +each 10th cycle. But the exact timing is somewhat confusing. + +3. mpn_add_n. With 4-fold unrolling, we need 37 instructions, whereof 12 + are memory operations. This will take at least + ceil(37/2) [dual issue] + 1 [taken branch] = 20 cycles + We have 12 memory cycles, plus 4 after-store conflict cycles, or 16 data + cache cycles, which should be completely hidden in the 20 issue cycles. + The computation is inherently serial, with these dependencies: + addq + / \ + addq cmpult + | | + cmpult | + \ / + or + I.e., there is a 4 cycle path for each limb, making 16 cycles the absolute + minimum. We could replace the `or' with a cmoveq/cmovne, which would save + a cycle on EV5, but that might waste a cycle on EV4. Also, cmov takes 2 + cycles. + addq + / \ + addq cmpult + | \ + cmpult -> cmovne + +STATUS + diff --git a/gnu/lib/libgmp/mpn/alpha/add_n.s b/gnu/lib/libgmp/mpn/alpha/add_n.s new file mode 100644 index 00000000000..426556e3986 --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/add_n.s @@ -0,0 +1,120 @@ + # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .frame $30,0,$26,0 + + ldq $3,0($17) + ldq $4,0($18) + + subq $19,1,$19 + and $19,4-1,$2 # number of limbs in first loop + bis $31,$31,$0 + beq $2,.L0 # if multiple of 4 limbs, skip first loop + + subq $19,$2,$19 + +.Loop0: subq $2,1,$2 + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + + addq $17,8,$17 + addq $18,8,$18 + bis $5,$5,$3 + bis $6,$6,$4 + addq $16,8,$16 + bne $2,.Loop0 + +.L0: beq $19,.Lend + + .align 3 +.Loop: subq $19,4,$19 + + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + + ldq $3,16($17) + addq $6,$0,$6 + ldq $4,16($18) + cmpult $6,$0,$1 + addq $5,$6,$6 + cmpult $6,$5,$0 + stq $6,8($16) + or $0,$1,$0 + + ldq $5,24($17) + addq $4,$0,$4 + ldq $6,24($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,16($16) + or $0,$1,$0 + + ldq $3,32($17) + addq $6,$0,$6 + ldq $4,32($18) + cmpult $6,$0,$1 + addq $5,$6,$6 + cmpult $6,$5,$0 + stq $6,24($16) + or $0,$1,$0 + + addq $17,32,$17 + addq $18,32,$18 + addq $16,32,$16 + bne $19,.Loop + +.Lend: addq $4,$0,$4 + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + ret $31,($26),1 + + .end __mpn_add_n diff --git a/gnu/lib/libgmp/mpn/alpha/addmul_1.s b/gnu/lib/libgmp/mpn/alpha/addmul_1.s new file mode 100644 index 00000000000..048238ae9d6 --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/addmul_1.s @@ -0,0 +1,92 @@ + # Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + # the result to a second limb vector. + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # s2_limb r19 + + # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_addmul_1 + .ent __mpn_addmul_1 2 +__mpn_addmul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + umulh $2,$19,$0 # $0 = prod_high + beq $18,.Lend1 # jump if size was == 1 + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + addq $5,$3,$3 + cmpult $3,$5,$4 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + beq $18,.Lend2 # jump if size was == 2 + + .align 3 +.Loop: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + addq $5,$0,$0 # combine carries + bne $18,.Loop + +.Lend2: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $5,$0,$0 # combine carries + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +.Lend1: addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $0,$5,$0 + ret $31,($26),1 + + .end __mpn_addmul_1 diff --git a/gnu/lib/libgmp/mpn/alpha/ev5/add_n.s b/gnu/lib/libgmp/mpn/alpha/ev5/add_n.s new file mode 100644 index 00000000000..1251a1fb716 --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/ev5/add_n.s @@ -0,0 +1,148 @@ + # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .frame $30,0,$26,0 + + or $31,$31,$25 # clear cy + subq $19,4,$19 # decr loop cnt + blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop + # Start software pipeline for 1st loop + ldq $0,0($18) + ldq $1,8($18) + ldq $4,0($17) + ldq $5,8($17) + addq $17,32,$17 # update s1_ptr + ldq $2,16($18) + addq $0,$4,$20 # 1st main add + ldq $3,24($18) + subq $19,4,$19 # decr loop cnt + ldq $6,-16($17) + cmpult $20,$0,$25 # compute cy from last add + ldq $7,-8($17) + addq $1,$25,$28 # cy add + addq $18,32,$18 # update s2_ptr + addq $5,$28,$21 # 2nd main add + cmpult $28,$25,$8 # compute cy from last add + blt $19,.Lend1 # if less than 4 limbs remain, jump + # 1st loop handles groups of 4 limbs in a software pipeline + .align 4 +.Loop: cmpult $21,$28,$25 # compute cy from last add + ldq $0,0($18) + or $8,$25,$25 # combine cy from the two adds + ldq $1,8($18) + addq $2,$25,$28 # cy add + ldq $4,0($17) + addq $28,$6,$22 # 3rd main add + ldq $5,8($17) + cmpult $28,$25,$8 # compute cy from last add + cmpult $22,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + addq $28,$7,$23 # 4th main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $23,$28,$25 # compute cy from last add + addq $17,32,$17 # update s1_ptr + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + addq $0,$25,$28 # cy add + ldq $2,16($18) + addq $4,$28,$20 # 1st main add + ldq $3,24($18) + cmpult $28,$25,$8 # compute cy from last add + ldq $6,-16($17) + cmpult $20,$28,$25 # compute cy from last add + ldq $7,-8($17) + or $8,$25,$25 # combine cy from the two adds + subq $19,4,$19 # decr loop cnt + stq $22,-16($16) + addq $1,$25,$28 # cy add + stq $23,-8($16) + addq $5,$28,$21 # 2nd main add + addq $18,32,$18 # update s2_ptr + cmpult $28,$25,$8 # compute cy from last add + bge $19,.Loop + # Finish software pipeline for 1st loop +.Lend1: cmpult $21,$28,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $2,$25,$28 # cy add + addq $28,$6,$22 # 3rd main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $22,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + addq $28,$7,$23 # 4th main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $23,$28,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + stq $22,-16($16) + stq $23,-8($16) +.Lend2: addq $19,4,$19 # restore loop cnt + beq $19,.Lret + # Start software pipeline for 2nd loop + ldq $0,0($18) + ldq $4,0($17) + subq $19,1,$19 + beq $19,.Lend0 + # 2nd loop handles remaining 1-3 limbs + .align 4 +.Loop0: addq $0,$25,$28 # cy add + ldq $0,8($18) + addq $4,$28,$20 # main add + ldq $4,8($17) + addq $18,8,$18 + cmpult $28,$25,$8 # compute cy from last add + addq $17,8,$17 + stq $20,0($16) + cmpult $20,$28,$25 # compute cy from last add + subq $19,1,$19 # decr loop cnt + or $8,$25,$25 # combine cy from the two adds + addq $16,8,$16 + bne $19,.Loop0 +.Lend0: addq $0,$25,$28 # cy add + addq $4,$28,$20 # main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $20,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + +.Lret: or $25,$31,$0 # return cy + ret $31,($26),1 + .end __mpn_add_n diff --git a/gnu/lib/libgmp/mpn/alpha/ev5/lshift.s b/gnu/lib/libgmp/mpn/alpha/ev5/lshift.s new file mode 100644 index 00000000000..ced55b72034 --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/ev5/lshift.s @@ -0,0 +1,174 @@ + # Alpha EV5 __mpn_lshift -- + + # Copyright (C) 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 3.25 cycles/limb on the EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_lshift + .ent __mpn_lshift +__mpn_lshift: + .frame $30,0,$26,0 + + s8addq $18,$17,$17 # make r17 point at end of s1 + ldq $4,-8($17) # load first limb + subq $31,$19,$20 + s8addq $18,$16,$16 # make r16 point at end of RES + subq $18,1,$18 + and $18,4-1,$28 # number of limbs in first loop + srl $4,$20,$0 # compute function result + + beq $28,.L0 + subq $18,$28,$18 + + .align 3 +.Loop0: ldq $3,-16($17) + subq $16,8,$16 + sll $4,$19,$5 + subq $17,8,$17 + subq $28,1,$28 + srl $3,$20,$6 + or $3,$3,$4 + or $5,$6,$8 + stq $8,0($16) + bne $28,.Loop0 + +.L0: sll $4,$19,$24 + beq $18,.Lend + # warm up phase 1 + ldq $1,-16($17) + subq $18,4,$18 + ldq $2,-24($17) + ldq $3,-32($17) + ldq $4,-40($17) + beq $18,.Lend1 + # warm up phase 2 + srl $1,$20,$7 + sll $1,$19,$21 + srl $2,$20,$8 + ldq $1,-48($17) + sll $2,$19,$22 + ldq $2,-56($17) + srl $3,$20,$5 + or $7,$24,$7 + sll $3,$19,$23 + or $8,$21,$8 + srl $4,$20,$6 + ldq $3,-64($17) + sll $4,$19,$24 + ldq $4,-72($17) + subq $18,4,$18 + beq $18,.Lend2 + .align 4 + # main loop +.Loop: stq $7,-8($16) + or $5,$22,$5 + stq $8,-16($16) + or $6,$23,$6 + + srl $1,$20,$7 + subq $18,4,$18 + sll $1,$19,$21 + unop # ldq $31,-96($17) + + srl $2,$20,$8 + ldq $1,-80($17) + sll $2,$19,$22 + ldq $2,-88($17) + + stq $5,-24($16) + or $7,$24,$7 + stq $6,-32($16) + or $8,$21,$8 + + srl $3,$20,$5 + unop # ldq $31,-96($17) + sll $3,$19,$23 + subq $16,32,$16 + + srl $4,$20,$6 + ldq $3,-96($17) + sll $4,$19,$24 + ldq $4,-104($17) + + subq $17,32,$17 + bne $18,.Loop + # cool down phase 2/1 +.Lend2: stq $7,-8($16) + or $5,$22,$5 + stq $8,-16($16) + or $6,$23,$6 + srl $1,$20,$7 + sll $1,$19,$21 + srl $2,$20,$8 + sll $2,$19,$22 + stq $5,-24($16) + or $7,$24,$7 + stq $6,-32($16) + or $8,$21,$8 + srl $3,$20,$5 + sll $3,$19,$23 + srl $4,$20,$6 + sll $4,$19,$24 + # cool down phase 2/2 + stq $7,-40($16) + or $5,$22,$5 + stq $8,-48($16) + or $6,$23,$6 + stq $5,-56($16) + stq $6,-64($16) + # cool down phase 2/3 + stq $24,-72($16) + ret $31,($26),1 + + # cool down phase 1/1 +.Lend1: srl $1,$20,$7 + sll $1,$19,$21 + srl $2,$20,$8 + sll $2,$19,$22 + srl $3,$20,$5 + or $7,$24,$7 + sll $3,$19,$23 + or $8,$21,$8 + srl $4,$20,$6 + sll $4,$19,$24 + # cool down phase 1/2 + stq $7,-8($16) + or $5,$22,$5 + stq $8,-16($16) + or $6,$23,$6 + stq $5,-24($16) + stq $6,-32($16) + stq $24,-40($16) + ret $31,($26),1 + +.Lend: stq $24,-8($16) + ret $31,($26),1 + .end __mpn_lshift diff --git a/gnu/lib/libgmp/mpn/alpha/ev5/rshift.s b/gnu/lib/libgmp/mpn/alpha/ev5/rshift.s new file mode 100644 index 00000000000..6e24fef9658 --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/ev5/rshift.s @@ -0,0 +1,172 @@ + # Alpha EV5 __mpn_rshift -- + + # Copyright (C) 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 3.25 cycles/limb on the EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_rshift + .ent __mpn_rshift +__mpn_rshift: + .frame $30,0,$26,0 + + ldq $4,0($17) # load first limb + subq $31,$19,$20 + subq $18,1,$18 + and $18,4-1,$28 # number of limbs in first loop + sll $4,$20,$0 # compute function result + + beq $28,.L0 + subq $18,$28,$18 + + .align 3 +.Loop0: ldq $3,8($17) + addq $16,8,$16 + srl $4,$19,$5 + addq $17,8,$17 + subq $28,1,$28 + sll $3,$20,$6 + or $3,$3,$4 + or $5,$6,$8 + stq $8,-8($16) + bne $28,.Loop0 + +.L0: srl $4,$19,$24 + beq $18,.Lend + # warm up phase 1 + ldq $1,8($17) + subq $18,4,$18 + ldq $2,16($17) + ldq $3,24($17) + ldq $4,32($17) + beq $18,.Lend1 + # warm up phase 2 + sll $1,$20,$7 + srl $1,$19,$21 + sll $2,$20,$8 + ldq $1,40($17) + srl $2,$19,$22 + ldq $2,48($17) + sll $3,$20,$5 + or $7,$24,$7 + srl $3,$19,$23 + or $8,$21,$8 + sll $4,$20,$6 + ldq $3,56($17) + srl $4,$19,$24 + ldq $4,64($17) + subq $18,4,$18 + beq $18,.Lend2 + .align 4 + # main loop +.Loop: stq $7,0($16) + or $5,$22,$5 + stq $8,8($16) + or $6,$23,$6 + + sll $1,$20,$7 + subq $18,4,$18 + srl $1,$19,$21 + unop # ldq $31,-96($17) + + sll $2,$20,$8 + ldq $1,72($17) + srl $2,$19,$22 + ldq $2,80($17) + + stq $5,16($16) + or $7,$24,$7 + stq $6,24($16) + or $8,$21,$8 + + sll $3,$20,$5 + unop # ldq $31,-96($17) + srl $3,$19,$23 + addq $16,32,$16 + + sll $4,$20,$6 + ldq $3,88($17) + srl $4,$19,$24 + ldq $4,96($17) + + addq $17,32,$17 + bne $18,.Loop + # cool down phase 2/1 +.Lend2: stq $7,0($16) + or $5,$22,$5 + stq $8,8($16) + or $6,$23,$6 + sll $1,$20,$7 + srl $1,$19,$21 + sll $2,$20,$8 + srl $2,$19,$22 + stq $5,16($16) + or $7,$24,$7 + stq $6,24($16) + or $8,$21,$8 + sll $3,$20,$5 + srl $3,$19,$23 + sll $4,$20,$6 + srl $4,$19,$24 + # cool down phase 2/2 + stq $7,32($16) + or $5,$22,$5 + stq $8,40($16) + or $6,$23,$6 + stq $5,48($16) + stq $6,56($16) + # cool down phase 2/3 + stq $24,64($16) + ret $31,($26),1 + + # cool down phase 1/1 +.Lend1: sll $1,$20,$7 + srl $1,$19,$21 + sll $2,$20,$8 + srl $2,$19,$22 + sll $3,$20,$5 + or $7,$24,$7 + srl $3,$19,$23 + or $8,$21,$8 + sll $4,$20,$6 + srl $4,$19,$24 + # cool down phase 1/2 + stq $7,0($16) + or $5,$22,$5 + stq $8,8($16) + or $6,$23,$6 + stq $5,16($16) + stq $6,24($16) + stq $24,32($16) + ret $31,($26),1 + +.Lend: stq $24,0($16) + ret $31,($26),1 + .end __mpn_rshift diff --git a/gnu/lib/libgmp/mpn/alpha/ev5/sub_n.s b/gnu/lib/libgmp/mpn/alpha/ev5/sub_n.s new file mode 100644 index 00000000000..6743af50b8f --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/ev5/sub_n.s @@ -0,0 +1,149 @@ + # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .frame $30,0,$26,0 + + or $31,$31,$25 # clear cy + subq $19,4,$19 # decr loop cnt + blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop + # Start software pipeline for 1st loop + ldq $0,0($18) + ldq $1,8($18) + ldq $4,0($17) + ldq $5,8($17) + addq $17,32,$17 # update s1_ptr + ldq $2,16($18) + subq $4,$0,$20 # 1st main sub + ldq $3,24($18) + subq $19,4,$19 # decr loop cnt + ldq $6,-16($17) + cmpult $4,$20,$25 # compute cy from last sub + ldq $7,-8($17) + addq $1,$25,$28 # cy add + addq $18,32,$18 # update s2_ptr + subq $5,$28,$21 # 2nd main sub + cmpult $28,$25,$8 # compute cy from last add + blt $19,.Lend1 # if less than 4 limbs remain, jump + # 1st loop handles groups of 4 limbs in a software pipeline + .align 4 +.Loop: cmpult $5,$21,$25 # compute cy from last add + ldq $0,0($18) + or $8,$25,$25 # combine cy from the two adds + ldq $1,8($18) + addq $2,$25,$28 # cy add + ldq $4,0($17) + subq $6,$28,$22 # 3rd main sub + ldq $5,8($17) + cmpult $28,$25,$8 # compute cy from last add + cmpult $6,$22,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + subq $7,$28,$23 # 4th main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $7,$23,$25 # compute cy from last add + addq $17,32,$17 # update s1_ptr + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + addq $0,$25,$28 # cy add + ldq $2,16($18) + subq $4,$28,$20 # 1st main sub + ldq $3,24($18) + cmpult $28,$25,$8 # compute cy from last add + ldq $6,-16($17) + cmpult $4,$20,$25 # compute cy from last add + ldq $7,-8($17) + or $8,$25,$25 # combine cy from the two adds + subq $19,4,$19 # decr loop cnt + stq $22,-16($16) + addq $1,$25,$28 # cy add + stq $23,-8($16) + subq $5,$28,$21 # 2nd main sub + addq $18,32,$18 # update s2_ptr + cmpult $28,$25,$8 # compute cy from last add + bge $19,.Loop + # Finish software pipeline for 1st loop +.Lend1: cmpult $5,$21,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $2,$25,$28 # cy add + subq $6,$28,$22 # 3rd main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $6,$22,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + subq $7,$28,$23 # 4th main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $7,$23,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + stq $22,-16($16) + stq $23,-8($16) +.Lend2: addq $19,4,$19 # restore loop cnt + beq $19,.Lret + # Start software pipeline for 2nd loop + ldq $0,0($18) + ldq $4,0($17) + subq $19,1,$19 + beq $19,.Lend0 + # 2nd loop handles remaining 1-3 limbs + .align 4 +.Loop0: addq $0,$25,$28 # cy add + ldq $0,8($18) + subq $4,$28,$20 # main sub + ldq $1,8($17) + addq $18,8,$18 + cmpult $28,$25,$8 # compute cy from last add + addq $17,8,$17 + stq $20,0($16) + cmpult $4,$20,$25 # compute cy from last add + subq $19,1,$19 # decr loop cnt + or $8,$25,$25 # combine cy from the two adds + addq $16,8,$16 + or $1,$31,$4 + bne $19,.Loop0 +.Lend0: addq $0,$25,$28 # cy add + subq $4,$28,$20 # main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $4,$20,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + +.Lret: or $25,$31,$0 # return cy + ret $31,($26),1 + .end __mpn_sub_n diff --git a/gnu/lib/libgmp/mpn/alpha/gmp-mparam.h b/gnu/lib/libgmp/mpn/alpha/gmp-mparam.h new file mode 100644 index 00000000000..a3c66974ded --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/gnu/lib/libgmp/mpn/alpha/lshift.s b/gnu/lib/libgmp/mpn/alpha/lshift.s new file mode 100644 index 00000000000..13bd24a4271 --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/lshift.s @@ -0,0 +1,109 @@ + # Alpha 21064 __mpn_lshift -- + + # Copyright (C) 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling, + # it would take 4 cycles/limb. It should be possible to get down to 3 + # cycles/limb since both ldq and stq can be paired with the other used + # instructions. But there are many restrictions in the 21064 pipeline that + # makes it hard, if not impossible, to get down to 3 cycles/limb: + + # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. + # 2. Only aligned instruction pairs can be paired. + # 3. The store buffer or silo might not be able to deal with the bandwidth. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_lshift + .ent __mpn_lshift +__mpn_lshift: + .frame $30,0,$26,0 + + s8addq $18,$17,$17 # make r17 point at end of s1 + ldq $4,-8($17) # load first limb + subq $17,8,$17 + subq $31,$19,$7 + s8addq $18,$16,$16 # make r16 point at end of RES + subq $18,1,$18 + and $18,4-1,$20 # number of limbs in first loop + srl $4,$7,$0 # compute function result + + beq $20,.L0 + subq $18,$20,$18 + + .align 3 +.Loop0: + ldq $3,-8($17) + subq $16,8,$16 + subq $17,8,$17 + subq $20,1,$20 + sll $4,$19,$5 + srl $3,$7,$6 + bis $3,$3,$4 + bis $5,$6,$8 + stq $8,0($16) + bne $20,.Loop0 + +.L0: beq $18,.Lend + + .align 3 +.Loop: ldq $3,-8($17) + subq $16,32,$16 + subq $18,4,$18 + sll $4,$19,$5 + srl $3,$7,$6 + + ldq $4,-16($17) + sll $3,$19,$1 + bis $5,$6,$8 + stq $8,24($16) + srl $4,$7,$2 + + ldq $3,-24($17) + sll $4,$19,$5 + bis $1,$2,$8 + stq $8,16($16) + srl $3,$7,$6 + + ldq $4,-32($17) + sll $3,$19,$1 + bis $5,$6,$8 + stq $8,8($16) + srl $4,$7,$2 + + subq $17,32,$17 + bis $1,$2,$8 + stq $8,0($16) + + bgt $18,.Loop + +.Lend: sll $4,$19,$8 + stq $8,-8($16) + ret $31,($26),1 + .end __mpn_lshift diff --git a/gnu/lib/libgmp/mpn/alpha/mul_1.s b/gnu/lib/libgmp/mpn/alpha/mul_1.s new file mode 100644 index 00000000000..a1f5a94b9e6 --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/mul_1.s @@ -0,0 +1,85 @@ + # Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store + # the result in a second limb vector. + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # s2_limb r19 + + # This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5. + + # To improve performance for long multiplications, we would use + # 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use + # these instructions without slowing down the general code: 1. We can + # only have two prefetches in operation at any time in the Alpha + # architecture. 2. There will seldom be any special alignment + # between RES_PTR and S1_PTR. Maybe we can simply divide the current + # loop into an inner and outer loop, having the inner loop handle + # exactly one prefetch block? + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_mul_1 + .ent __mpn_mul_1 2 +__mpn_mul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + bic $31,$31,$4 # clear cy_limb + umulh $2,$19,$0 # $0 = prod_high + beq $18,Lend1 # jump if size was == 1 + ldq $2,8($17) # $2 = s1_limb + subq $18,1,$18 # size-- + stq $3,0($16) + beq $18,Lend2 # jump if size was == 2 + + .align 3 +Loop: mulq $2,$19,$3 # $3 = prod_low + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,16($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + stq $3,8($16) + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $16,8,$16 # res_ptr++ + bne $18,Loop + +Lend2: mulq $2,$19,$3 # $3 = prod_low + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + stq $3,8($16) + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +Lend1: stq $3,0($16) + ret $31,($26),1 + + .end __mpn_mul_1 diff --git a/gnu/lib/libgmp/mpn/alpha/rshift.s b/gnu/lib/libgmp/mpn/alpha/rshift.s new file mode 100644 index 00000000000..389054ab0e9 --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/rshift.s @@ -0,0 +1,107 @@ + # Alpha 21064 __mpn_rshift -- + + # Copyright (C) 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling, + # it would take 4 cycles/limb. It should be possible to get down to 3 + # cycles/limb since both ldq and stq can be paired with the other used + # instructions. But there are many restrictions in the 21064 pipeline that + # makes it hard, if not impossible, to get down to 3 cycles/limb: + + # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. + # 2. Only aligned instruction pairs can be paired. + # 3. The store buffer or silo might not be able to deal with the bandwidth. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_rshift + .ent __mpn_rshift +__mpn_rshift: + .frame $30,0,$26,0 + + ldq $4,0($17) # load first limb + addq $17,8,$17 + subq $31,$19,$7 + subq $18,1,$18 + and $18,4-1,$20 # number of limbs in first loop + sll $4,$7,$0 # compute function result + + beq $20,.L0 + subq $18,$20,$18 + + .align 3 +.Loop0: + ldq $3,0($17) + addq $16,8,$16 + addq $17,8,$17 + subq $20,1,$20 + srl $4,$19,$5 + sll $3,$7,$6 + bis $3,$3,$4 + bis $5,$6,$8 + stq $8,-8($16) + bne $20,.Loop0 + +.L0: beq $18,.Lend + + .align 3 +.Loop: ldq $3,0($17) + addq $16,32,$16 + subq $18,4,$18 + srl $4,$19,$5 + sll $3,$7,$6 + + ldq $4,8($17) + srl $3,$19,$1 + bis $5,$6,$8 + stq $8,-32($16) + sll $4,$7,$2 + + ldq $3,16($17) + srl $4,$19,$5 + bis $1,$2,$8 + stq $8,-24($16) + sll $3,$7,$6 + + ldq $4,24($17) + srl $3,$19,$1 + bis $5,$6,$8 + stq $8,-16($16) + sll $4,$7,$2 + + addq $17,32,$17 + bis $1,$2,$8 + stq $8,-8($16) + + bgt $18,.Loop + +.Lend: srl $4,$19,$8 + stq $8,0($16) + ret $31,($26),1 + .end __mpn_rshift diff --git a/gnu/lib/libgmp/mpn/alpha/sub_n.s b/gnu/lib/libgmp/mpn/alpha/sub_n.s new file mode 100644 index 00000000000..3c90c116973 --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/sub_n.s @@ -0,0 +1,120 @@ + # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .frame $30,0,$26,0 + + ldq $3,0($17) + ldq $4,0($18) + + subq $19,1,$19 + and $19,4-1,$2 # number of limbs in first loop + bis $31,$31,$0 + beq $2,.L0 # if multiple of 4 limbs, skip first loop + + subq $19,$2,$19 + +.Loop0: subq $2,1,$2 + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + + addq $17,8,$17 + addq $18,8,$18 + bis $5,$5,$3 + bis $6,$6,$4 + addq $16,8,$16 + bne $2,.Loop0 + +.L0: beq $19,.Lend + + .align 3 +.Loop: subq $19,4,$19 + + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + + ldq $3,16($17) + addq $6,$0,$6 + ldq $4,16($18) + cmpult $6,$0,$1 + subq $5,$6,$6 + cmpult $5,$6,$0 + stq $6,8($16) + or $0,$1,$0 + + ldq $5,24($17) + addq $4,$0,$4 + ldq $6,24($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,16($16) + or $0,$1,$0 + + ldq $3,32($17) + addq $6,$0,$6 + ldq $4,32($18) + cmpult $6,$0,$1 + subq $5,$6,$6 + cmpult $5,$6,$0 + stq $6,24($16) + or $0,$1,$0 + + addq $17,32,$17 + addq $18,32,$18 + addq $16,32,$16 + bne $19,.Loop + +.Lend: addq $4,$0,$4 + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + ret $31,($26),1 + + .end __mpn_sub_n diff --git a/gnu/lib/libgmp/mpn/alpha/submul_1.s b/gnu/lib/libgmp/mpn/alpha/submul_1.s new file mode 100644 index 00000000000..1ed0c6a8d9e --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/submul_1.s @@ -0,0 +1,92 @@ + # Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and + # subtract the result from a second limb vector. + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # s2_limb r19 + + # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_submul_1 + .ent __mpn_submul_1 2 +__mpn_submul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + umulh $2,$19,$0 # $0 = prod_high + beq $18,.Lend1 # jump if size was == 1 + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + subq $5,$3,$3 + cmpult $5,$3,$4 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + beq $18,.Lend2 # jump if size was == 2 + + .align 3 +.Loop: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + addq $5,$0,$0 # combine carries + bne $18,.Loop + +.Lend2: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $5,$0,$0 # combine carries + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +.Lend1: subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $0,$5,$0 + ret $31,($26),1 + + .end __mpn_submul_1 diff --git a/gnu/lib/libgmp/mpn/alpha/udiv_qrnnd.S b/gnu/lib/libgmp/mpn/alpha/udiv_qrnnd.S new file mode 100644 index 00000000000..d3d2cee93d1 --- /dev/null +++ b/gnu/lib/libgmp/mpn/alpha/udiv_qrnnd.S @@ -0,0 +1,151 @@ + # Alpha 21064 __udiv_qrnnd + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + .set noreorder + .set noat +.text + .align 3 + .globl __udiv_qrnnd + .ent __udiv_qrnnd +__udiv_qrnnd: + .frame $30,0,$26,0 + .prologue 0 +#define cnt $2 +#define tmp $3 +#define rem_ptr $16 +#define n1 $17 +#define n0 $18 +#define d $19 +#define qb $20 + + ldiq cnt,16 + blt d,.Largedivisor + +.Loop1: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,.Loop1 + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +.Largedivisor: + and n0,1,$4 + + srl n0,1,n0 + sll n1,63,tmp + or tmp,n0,n0 + srl n1,1,n1 + + and d,1,$6 + srl d,1,$5 + addq $5,$6,$5 + +.Loop2: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,.Loop2 + + addq n1,n1,n1 + addq $4,n1,n1 + bne $6,.LOdd + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +.LOdd: + /* q' in n0. r' in n1 */ + addq n1,n0,n1 + cmpult n1,n0,tmp # tmp := carry from addq + beq tmp,.LLp6 + addq n0,1,n0 + subq n1,d,n1 +.LLp6: cmpult n1,d,tmp + bne tmp,.LLp7 + addq n0,1,n0 + subq n1,d,n1 +.LLp7: + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + + .end __udiv_qrnnd diff --git a/gnu/lib/libgmp/mpn/bsd.h b/gnu/lib/libgmp/mpn/bsd.h new file mode 100644 index 00000000000..8a0cbced256 --- /dev/null +++ b/gnu/lib/libgmp/mpn/bsd.h @@ -0,0 +1,5 @@ +#if __STDC__ +#define C_SYMBOL_NAME(name) _##name +#else +#define C_SYMBOL_NAME(name) _/**/name +#endif diff --git a/gnu/lib/libgmp/mpn/clipper/add_n.s b/gnu/lib/libgmp/mpn/clipper/add_n.s new file mode 100644 index 00000000000..8d9b9867309 --- /dev/null +++ b/gnu/lib/libgmp/mpn/clipper/add_n.s @@ -0,0 +1,48 @@ +; Clipper __mpn_add_n -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +.text + .align 16 +.globl ___mpn_add_n +___mpn_add_n: + subq $8,sp + storw r6,(sp) + loadw 12(sp),r2 + loadw 16(sp),r3 + loadq $0,r6 ; clear carry-save register + +.Loop: loadw (r1),r4 + loadw (r2),r5 + addwc r6,r6 ; restore carry from r6 + addwc r5,r4 + storw r4,(r0) + subwc r6,r6 ; save carry in r6 + addq $4,r0 + addq $4,r1 + addq $4,r2 + subq $1,r3 + brne .Loop + + negw r6,r0 + loadw (sp),r6 + addq $8,sp + ret sp diff --git a/gnu/lib/libgmp/mpn/clipper/mul_1.s b/gnu/lib/libgmp/mpn/clipper/mul_1.s new file mode 100644 index 00000000000..44d92c3d5b6 --- /dev/null +++ b/gnu/lib/libgmp/mpn/clipper/mul_1.s @@ -0,0 +1,47 @@ +; Clipper __mpn_mul_1 -- Multiply a limb vector with a limb and store +; the result in a second limb vector. + +; Copyright (C) 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +.text + .align 16 +.globl ___mpn_mul_1 +___mpn_mul_1: + subq $8,sp + storw r6,(sp) + loadw 12(sp),r2 + loadw 16(sp),r3 + loadq $0,r6 ; clear carry limb + +.Loop: loadw (r1),r4 + mulwux r3,r4 + addw r6,r4 ; add old carry limb into low product limb + loadq $0,r6 + addwc r5,r6 ; propagate cy into high product limb + storw r4,(r0) + addq $4,r0 + addq $4,r1 + subq $1,r2 + brne .Loop + + movw r6,r0 + loadw 0(sp),r6 + addq $8,sp + ret sp diff --git a/gnu/lib/libgmp/mpn/clipper/sub_n.s b/gnu/lib/libgmp/mpn/clipper/sub_n.s new file mode 100644 index 00000000000..882c99104e5 --- /dev/null +++ b/gnu/lib/libgmp/mpn/clipper/sub_n.s @@ -0,0 +1,48 @@ +; Clipper __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +.text + .align 16 +.globl ___mpn_sub_n +___mpn_sub_n: + subq $8,sp + storw r6,(sp) + loadw 12(sp),r2 + loadw 16(sp),r3 + loadq $0,r6 ; clear carry-save register + +.Loop: loadw (r1),r4 + loadw (r2),r5 + addwc r6,r6 ; restore carry from r6 + subwc r5,r4 + storw r4,(r0) + subwc r6,r6 ; save carry in r6 + addq $4,r0 + addq $4,r1 + addq $4,r2 + subq $1,r3 + brne .Loop + + negw r6,r0 + loadw (sp),r6 + addq $8,sp + ret sp diff --git a/gnu/lib/libgmp/mpn/config/t-oldgas b/gnu/lib/libgmp/mpn/config/t-oldgas new file mode 100644 index 00000000000..ba02fa76847 --- /dev/null +++ b/gnu/lib/libgmp/mpn/config/t-oldgas @@ -0,0 +1 @@ +SFLAGS=-DBROKEN_ALIGN diff --git a/gnu/lib/libgmp/mpn/config/t-ppc-aix b/gnu/lib/libgmp/mpn/config/t-ppc-aix new file mode 100644 index 00000000000..a31ce0d7093 --- /dev/null +++ b/gnu/lib/libgmp/mpn/config/t-ppc-aix @@ -0,0 +1 @@ +SFLAGS=-Wa,-mppc diff --git a/gnu/lib/libgmp/mpn/config/t-pwr-aix b/gnu/lib/libgmp/mpn/config/t-pwr-aix new file mode 100644 index 00000000000..3e11d9f9d9f --- /dev/null +++ b/gnu/lib/libgmp/mpn/config/t-pwr-aix @@ -0,0 +1 @@ +SFLAGS=-Wa,-mpwr diff --git a/gnu/lib/libgmp/mpn/configure.in b/gnu/lib/libgmp/mpn/configure.in new file mode 100644 index 00000000000..90c26263811 --- /dev/null +++ b/gnu/lib/libgmp/mpn/configure.in @@ -0,0 +1,178 @@ +# This file is a shell script fragment that supplies the information +# necessary for a configure script to process the program in +# this directory. For more information, look at ../configure. + +configdirs= +srctrigger=powerpc32 +srcname="GNU Multi-Precision library/mpn" + +# per-host: + +# per-target: + +case "${target}" in + sparc9*-*-* | sparc64*-*-* | ultrasparc*-*-*) + #path="sparc64" ;; Don't use this until compilers are ready + path="sparc32/v8 sparc" ;; + sparc8*-*-* | microsparc*-*-*) + path="sparc32/v8 sparc" ;; + supersparc*-*-*) + path="sparc32/v8/supersparc sparc32/v8 sparc" + extra_functions="udiv" ;; + sparc*-*-*) path="sparc32" + if [ x$floating_point = xno ] + then extra_functions="udiv_nfp" + else extra_functions="udiv_fp" + fi + ;; + hppa7000*-*-*) path="hppa/hppa1_1 hppa"; extra_functions="udiv_qrnnd" ;; + hppa1.0*-*-*) path="hppa"; extra_functions="udiv_qrnnd" ;; + hppa*-*-*) # assume pa7100 + path="hppa/hppa1_1/pa7100 hppa/hppa1_1 hppa" + extra_functions="udiv_qrnnd" ;; + cray2-cray-unicos* | [xy]mp-cray-unicos* | [ctj]90-cray-unicos*) + path="cray" ;; + alphaev5-*-*) path="alpha/ev5 alpha"; extra_functions="udiv_qrnnd" ;; + alpha*-*-*) path="alpha"; extra_functions="udiv_qrnnd" ;; + am29000*-*-*) path="am29000" ;; + a29k*-*-*) path="am29000" ;; + +# Intel x86 configurations + i[34]86*-*-linuxaout* | i[34]86*-*-linuxoldld* | \ + i[34]86*-*-*bsd*) # x86 running BSD or Linux with a.out + echo '#define BSD_SYNTAX' >asm-syntax.h + echo '#include "'$srcdir'/x86/syntax.h"' >>asm-syntax.h + path="x86" ;; + i[56]86*-*-linuxaout* | pentium-*-linuxaout* | pentiumpro-*-linuxaout* | \ + i[56]86*-*-linuxoldld* | pentium-*-linuxoldld* | pentiumpro-*-linuxoldld* | \ + i[56]86*-*-*bsd* | pentium-*-*bsd* | pentiumpro-*-*bsd*) + echo '#define BSD_SYNTAX' >asm-syntax.h + echo '#include "'$srcdir'/x86/syntax.h"' >>asm-syntax.h + path="x86/pentium x86" ;; + i[34]86*-*-*) # x86 with ELF/SysV format + echo '#define ELF_SYNTAX' >asm-syntax.h + echo '#include "'$srcdir'/x86/syntax.h"' >>asm-syntax.h + path="x86" ;; + i[56]86*-*-* | pentium-*-* | pentiumpro-*-*) # x86 with ELF/SysV format + echo '#define ELF_SYNTAX' >asm-syntax.h + echo '#include "'$srcdir'/x86/syntax.h"' >>asm-syntax.h + path="x86/pentium x86" ;; + +# Motorola 68k configurations. Let m68k mean 68020-68040. +# mc68000 or mc68060 configurations need to be specified explicitly + m680[234]0*-*-linuxaout* | m68k*-*-linuxaout*) + echo '#define MIT_SYNTAX' >asm-syntax.h + echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h + path="m68k/mc68020 m68k" ;; + m68060*-*-linuxaout*) + echo '#define MIT_SYNTAX' >asm-syntax.h + echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h + path="m68k" ;; + m680[234]0*-*-linux* | m68k*-*-linux*) + echo '#define ELF_SYNTAX' >asm-syntax.h + echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h + path="m68k/mc68020 m68k" ;; + m68060*-*-linux*) + echo '#define ELF_SYNTAX' >asm-syntax.h + echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h + path="m68k" ;; + m68000*-*-* | m68060*-*-*) + echo '#define MIT_SYNTAX' >asm-syntax.h + echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h + path="m68k/mc68000" ;; + m680[234]0*-*-* | m68k*-*-*) + echo '#define MIT_SYNTAX' >asm-syntax.h + echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h + path="m68k/mc68020 m68k" ;; + + i960*-*-*) path="i960" ;; + m88k*-*-* | m88k*-*-*) path="m88k" ;; + m88110*-*-*) path="m88k/mc88110 m88k" ;; + ns32k*-*-*) path="n32k" ;; + ppc601-*-*) path="power powerpc32" ;; + ppc620-*-* | powerpc64*-*-*) path="powerpc64" ;; + ppc60[234]*-*-* | powerpc*-*-*) path="powerpc32" ;; + pyramid-*-*) path="pyr" ;; + rs6000-*-* | power-*-* | power2-*-*) + path="power"; extra_functions="udiv_w_sdiv" ;; + sh-*-*) path="sh" ;; + sh2-*-*) path="sh/sh2 sh" ;; + mips[34]*-*-*) path="mips3" ;; + mips*-*-irix6*) path="mips3" ;; + mips*-*-*) path="mips2" ;; + vax*-*-*) path="vax"; extra_functions="udiv_w_sdiv" ;; + z8000x*-*-*) path="z8000x"; extra_functions="udiv_w_sdiv" ;; + z8000*-*-*) path="z8000"; extra_functions="udiv_w_sdiv" ;; + clipper*-*-*) path="clipper" ;; + *-*-*) ;; +esac + +case "${target}" in + *-*-linuxaout* | *-*-linuxoldld*) config=bsd.h ;; + *-sysv* | *-solaris* | *-*-linux*) config="sysv.h" ;; + *) config="bsd.h" ;; +esac + +case "${target}" in + i[3456]86*-*-*bsd* | i[3456]86*-*-linuxaout* | i[3456]86*-*-linuxoldld* | \ + pentium-*-*bsd* | pentium-*-linuxaout* | pentium-*-linuxoldld* | \ + pentiumpro-*-*bsd* | pentiumpro-*-linuxaout* | pentiumpro-*-linuxoldld*) + target_makefile_frag=config/t-oldgas ;; + rs6000-*-aix[456789]* | rs6000-*-aix3.2.[456789]) + target_makefile_frag=config/t-pwr-aix ;; + ppc601-*-aix[456789]* | ppc601-*-aix3.2.[456789] | \ + ppc60[234]*-*-aix[456789]* | ppc60[234]*-*-aix3.2.[456789] | \ + powerpc*-*-aix[456789]* | powerpc*-*-aix3.2.[456789]) + target_makefile_frag=config/t-ppc-aix ;; +esac + + +functions="${extra_functions} inlines add_n addmul_1 cmp divmod_1 \ + divrem divrem_1 dump lshift mod_1 mul mul_1 mul_n random2 rshift sqrtrem \ + sub_n submul_1 get_str set_str scan0 scan1 popcount hamdist gcd_1 \ + pre_mod_1 perfsqr bdivmod gcd gcdext" + +path="$path generic" +mpn_objects= + +for fn in $functions ; do + mpn_objects="$mpn_objects $fn.o" + for dir in $path ; do + rm -f $fn.[Ssc] + if test -f $srcdir/$dir/$fn.S ; then + files="$files $dir/$fn.S" + links="$links $fn.S" + break + elif test -f $srcdir/$dir/$fn.s ; then + files="$files $dir/$fn.s" + links="$links $fn.s" + break + elif test -f $srcdir/$dir/$fn.c ; then + files="$files $dir/$fn.c" + links="$links $fn.c" + break + fi + done +done + +for dir in $path ; do + rm -f gmp-mparam.h + if test -f $srcdir/$dir/gmp-mparam.h ; then + files="$files $dir/gmp-mparam.h" + links="$links gmp-mparam.h" + break + fi +done + +links="sysdep.h $links" +files="$config $files" + +mpn_links=$links + +# post-target: + +sed <Makefile >Makefile.tmp \ + -e "s/MPN_LINKS = .*/MPN_LINKS =${mpn_links}/" \ + -e "s/MPN_OBJECTS = .*/MPN_OBJECTS =${mpn_objects}/" + +mv Makefile.tmp Makefile diff --git a/gnu/lib/libgmp/mpn/cray/gmp-mparam.h b/gnu/lib/libgmp/mpn/cray/gmp-mparam.h new file mode 100644 index 00000000000..349c812d45a --- /dev/null +++ b/gnu/lib/libgmp/mpn/cray/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 64 +#define BITS_PER_SHORTINT 32 +#define BITS_PER_CHAR 8 diff --git a/gnu/lib/libgmp/mpn/generic/add_n.c b/gnu/lib/libgmp/mpn/generic/add_n.c new file mode 100644 index 00000000000..9d71df110c5 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/add_n.c @@ -0,0 +1,62 @@ +/* mpn_add_n -- Add two limb vectors of equal, non-zero length. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +#if __STDC__ +mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +mpn_add_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = (y < cy); /* get out carry from that addition */ + y = x + y; /* add other addend */ + cy = (y < x) + cy; /* get out carry from that add, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} diff --git a/gnu/lib/libgmp/mpn/generic/addmul_1.c b/gnu/lib/libgmp/mpn/generic/addmul_1.c new file mode 100644 index 00000000000..3a5e21400ad --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/addmul_1.c @@ -0,0 +1,65 @@ +/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR + by S2_LIMB, add the S1_SIZE least significant limbs of the product to the + limb vector pointed to by RES_PTR. Return the most significant limb of + the product, adjusted for carry-out from the addition. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += (prod_low < x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} diff --git a/gnu/lib/libgmp/mpn/generic/bdivmod.c b/gnu/lib/libgmp/mpn/generic/bdivmod.c new file mode 100644 index 00000000000..f095288b8bb --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/bdivmod.c @@ -0,0 +1,129 @@ +/* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d. + +Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d). + + Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and + returns the high d%BITS_PER_MP_LIMB bits of Q as the result. + + Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up. Since the + low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows + the limb vectors at qp to overwrite the low limbs at up, provided qp <= up. + + Preconditions: + 1. V is odd. + 2. usize * BITS_PER_MP_LIMB >= d. + 3. If Q and U overlap, qp <= up. + + Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu) + + Funding for this work has been partially provided by Conselho Nacional + de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant + 301314194-2, and was done while I was a visiting reseacher in the Instituto + de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS). + + References: + T. Jebelean, An algorithm for exact division, Journal of Symbolic + Computation, v. 15, 1993, pp. 169-180. + + K. Weber, The accelerated integer GCD algorithm, ACM Transactions on + Mathematical Software, v. 21 (March), 1995, pp. 111-122. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +#if __STDC__ +mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize, + mp_srcptr vp, mp_size_t vsize, unsigned long int d) +#else +mpn_bdivmod (qp, up, usize, vp, vsize, d) + mp_ptr qp; + mp_ptr up; + mp_size_t usize; + mp_srcptr vp; + mp_size_t vsize; + unsigned long int d; +#endif +{ + /* Cache for v_inv is used to make mpn_accelgcd faster. */ + static mp_limb_t previous_low_vlimb = 0; + static mp_limb_t v_inv; /* 1/V mod 2^BITS_PER_MP_LIMB. */ + + if (vp[0] != previous_low_vlimb) /* Cache miss; compute v_inv. */ + { + mp_limb_t v = previous_low_vlimb = vp[0]; + mp_limb_t make_zero = 1; + mp_limb_t two_i = 1; + v_inv = 0; + do + { + while ((two_i & make_zero) == 0) + two_i <<= 1, v <<= 1; + v_inv += two_i; + make_zero -= v; + } + while (make_zero); + } + + /* Need faster computation for some common cases in mpn_accelgcd. */ + if (usize == 2 && vsize == 2 && + (d == BITS_PER_MP_LIMB || d == 2*BITS_PER_MP_LIMB)) + { + mp_limb_t hi, lo; + mp_limb_t q = up[0] * v_inv; + umul_ppmm (hi, lo, q, vp[0]); + up[0] = 0, up[1] -= hi + q*vp[1], qp[0] = q; + if (d == 2*BITS_PER_MP_LIMB) + q = up[1] * v_inv, up[1] = 0, qp[1] = q; + return 0; + } + + /* Main loop. */ + while (d >= BITS_PER_MP_LIMB) + { + mp_limb_t q = up[0] * v_inv; + mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); + if (usize > vsize) + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + d -= BITS_PER_MP_LIMB; + up += 1, usize -= 1; + *qp++ = q; + } + + if (d) + { + mp_limb_t b; + mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1); + switch (q) + { + case 0: return 0; + case 1: b = mpn_sub_n (up, up, vp, MIN (usize, vsize)); break; + default: b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); break; + } + if (usize > vsize) + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + return q; + } + + return 0; +} diff --git a/gnu/lib/libgmp/mpn/generic/cmp.c b/gnu/lib/libgmp/mpn/generic/cmp.c new file mode 100644 index 00000000000..4e9c60d86e5 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/cmp.c @@ -0,0 +1,56 @@ +/* mpn_cmp -- Compare two low-level natural-number integers. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. + There are no restrictions on the relative sizes of + the two arguments. + Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. */ + +int +#if __STDC__ +mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size) +#else +mpn_cmp (op1_ptr, op2_ptr, size) + mp_srcptr op1_ptr; + mp_srcptr op2_ptr; + mp_size_t size; +#endif +{ + mp_size_t i; + mp_limb_t op1_word, op2_word; + + for (i = size - 1; i >= 0; i--) + { + op1_word = op1_ptr[i]; + op2_word = op2_ptr[i]; + if (op1_word != op2_word) + goto diff; + } + return 0; + diff: + /* This can *not* be simplified to + op2_word - op2_word + since that expression might give signed overflow. */ + return (op1_word > op2_word) ? 1 : -1; +} diff --git a/gnu/lib/libgmp/mpn/generic/divmod_1.c b/gnu/lib/libgmp/mpn/generic/divmod_1.c new file mode 100644 index 00000000000..f93841f63fe --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/divmod_1.c @@ -0,0 +1,208 @@ +/* mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + Return the single-limb remainder. + There are no constraints on the value of the divisor. + + QUOT_PTR and DIVIDEND_PTR might point to the same limb. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +/* FIXME: We should be using invert_limb (or invert_normalized_limb) + here (not udiv_qrnnd). */ + +mp_limb_t +#if __STDC__ +mpn_divmod_1 (mp_ptr quot_ptr, + mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb) +#else +mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb) + mp_ptr quot_ptr; + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; +#endif +{ + mp_size_t i; + mp_limb_t n1, n0, r; + int dummy; + + /* ??? Should this be handled at all? Rely on callers? */ + if (dividend_size == 0) + return 0; + + /* If multiplication is much faster than division, and the + dividend is large, pre-invert the divisor, and use + only multiplications in the inner loop. */ + + /* This test should be read: + Does it ever help to use udiv_qrnnd_preinv? + && Does what we save compensate for the inversion overhead? */ + if (UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + mp_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + most significant bit (with weight 2**N) implicit. */ + + /* Special case for DIVISOR_LIMB == 100...000. */ + if (divisor_limb << 1 == 0) + divisor_limb_inverted = ~(mp_limb_t) 0; + else + udiv_qrnnd (divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + udiv_qrnnd_preinv (quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } + else + { + mp_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + most significant bit (with weight 2**N) implicit. */ + + /* Special case for DIVISOR_LIMB == 100...000. */ + if (divisor_limb << 1 == 0) + divisor_limb_inverted = ~(mp_limb_t) 0; + else + udiv_qrnnd (divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + { + quot_ptr[i] = 0; + i--; + } + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (quot_ptr[i], r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } + else + { + if (UDIV_NEEDS_NORMALIZATION) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd (quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + it, or because DIVISOR_LIMB is already normalized. */ + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + { + quot_ptr[i] = 0; + i--; + } + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb); + } + return r; + } +} diff --git a/gnu/lib/libgmp/mpn/generic/divrem.c b/gnu/lib/libgmp/mpn/generic/divrem.c new file mode 100644 index 00000000000..1fe865a10bd --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/divrem.c @@ -0,0 +1,245 @@ +/* mpn_divrem -- Divide natural numbers, producing both remainder and + quotient. + +Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + the NSIZE-DSIZE least significant quotient limbs at QP + and the DSIZE long remainder at NP. If QEXTRA_LIMBS is + non-zero, generate that many fraction bits and append them after the + other quotient limbs. + Return the most significant limb of the quotient, this is always 0 or 1. + + Preconditions: + 0. NSIZE >= DSIZE. + 1. The most significant bit of the divisor must be set. + 2. QP must either not overlap with the input operands at all, or + QP + DSIZE >= NP must hold true. (This means that it's + possible to put the quotient in the high part of NUM, right after the + remainder in NUM. + 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. */ + +mp_limb_t +#if __STDC__ +mpn_divrem (mp_ptr qp, mp_size_t qextra_limbs, + mp_ptr np, mp_size_t nsize, + mp_srcptr dp, mp_size_t dsize) +#else +mpn_divrem (qp, qextra_limbs, np, nsize, dp, dsize) + mp_ptr qp; + mp_size_t qextra_limbs; + mp_ptr np; + mp_size_t nsize; + mp_srcptr dp; + mp_size_t dsize; +#endif +{ + mp_limb_t most_significant_q_limb = 0; + + switch (dsize) + { + case 0: + /* We are asked to divide by zero, so go ahead and do it! (To make + the compiler not remove this statement, return the value.) */ + return 1 / dsize; + + case 1: + { + mp_size_t i; + mp_limb_t n1; + mp_limb_t d; + + d = dp[0]; + n1 = np[nsize - 1]; + + if (n1 >= d) + { + n1 -= d; + most_significant_q_limb = 1; + } + + qp += qextra_limbs; + for (i = nsize - 2; i >= 0; i--) + udiv_qrnnd (qp[i], n1, n1, np[i], d); + qp -= qextra_limbs; + + for (i = qextra_limbs - 1; i >= 0; i--) + udiv_qrnnd (qp[i], n1, n1, 0, d); + + np[0] = n1; + } + break; + + case 2: + { + mp_size_t i; + mp_limb_t n1, n0, n2; + mp_limb_t d1, d0; + + np += nsize - 2; + d1 = dp[1]; + d0 = dp[0]; + n1 = np[1]; + n0 = np[0]; + + if (n1 >= d1 && (n1 > d1 || n0 >= d0)) + { + sub_ddmmss (n1, n0, n1, n0, d1, d0); + most_significant_q_limb = 1; + } + + for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) + { + mp_limb_t q; + mp_limb_t r; + + if (i >= qextra_limbs) + np--; + else + np[0] = 0; + + if (n1 == d1) + { + /* Q should be either 111..111 or 111..110. Need special + treatment of this rare case as normal division would + give overflow. */ + q = ~(mp_limb_t) 0; + + r = n0 + d1; + if (r < d1) /* Carry in the addition? */ + { + add_ssaaaa (n1, n0, r - d0, np[0], 0, d0); + qp[i] = q; + continue; + } + n1 = d0 - (d0 != 0); + n0 = -d0; + } + else + { + udiv_qrnnd (q, r, n1, n0, d1); + umul_ppmm (n1, n0, d0, q); + } + + n2 = np[0]; + q_test: + if (n1 > r || (n1 == r && n0 > n2)) + { + /* The estimated Q was too large. */ + q--; + + sub_ddmmss (n1, n0, n1, n0, 0, d0); + r += d1; + if (r >= d1) /* If not carry, test Q again. */ + goto q_test; + } + + qp[i] = q; + sub_ddmmss (n1, n0, r, n2, n1, n0); + } + np[1] = n1; + np[0] = n0; + } + break; + + default: + { + mp_size_t i; + mp_limb_t dX, d1, n0; + + np += nsize - dsize; + dX = dp[dsize - 1]; + d1 = dp[dsize - 2]; + n0 = np[dsize - 1]; + + if (n0 >= dX) + { + if (n0 > dX || mpn_cmp (np, dp, dsize - 1) >= 0) + { + mpn_sub_n (np, np, dp, dsize); + n0 = np[dsize - 1]; + most_significant_q_limb = 1; + } + } + + for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) + { + mp_limb_t q; + mp_limb_t n1, n2; + mp_limb_t cy_limb; + + if (i >= qextra_limbs) + { + np--; + n2 = np[dsize]; + } + else + { + n2 = np[dsize - 1]; + MPN_COPY_DECR (np + 1, np, dsize); + np[0] = 0; + } + + if (n0 == dX) + /* This might over-estimate q, but it's probably not worth + the extra code here to find out. */ + q = ~(mp_limb_t) 0; + else + { + mp_limb_t r; + + udiv_qrnnd (q, r, n0, np[dsize - 1], dX); + umul_ppmm (n1, n0, d1, q); + + while (n1 > r || (n1 == r && n0 > np[dsize - 2])) + { + q--; + r += dX; + if (r < dX) /* I.e. "carry in previous addition?" */ + break; + n1 -= n0 < d1; + n0 -= d1; + } + } + + /* Possible optimization: We already have (q * n0) and (1 * n1) + after the calculation of q. Taking advantage of that, we + could make this loop make two iterations less. */ + + cy_limb = mpn_submul_1 (np, dp, dsize, q); + + if (n2 != cy_limb) + { + mpn_add_n (np, np, dp, dsize); + q--; + } + + qp[i] = q; + n0 = np[dsize - 1]; + } + } + } + + return most_significant_q_limb; +} diff --git a/gnu/lib/libgmp/mpn/generic/divrem_1.c b/gnu/lib/libgmp/mpn/generic/divrem_1.c new file mode 100644 index 00000000000..d2132673882 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/divrem_1.c @@ -0,0 +1,58 @@ +/* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + Return the single-limb remainder. + There are no constraints on the value of the divisor. + + QUOT_PTR and DIVIDEND_PTR might point to the same limb. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +#if __STDC__ +mpn_divrem_1 (mp_ptr qp, mp_size_t qsize, + mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb) +#else +mpn_divrem_1 (qp, qsize, dividend_ptr, dividend_size, divisor_limb) + mp_ptr qp; + mp_size_t qsize; + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; +#endif +{ + mp_limb_t rlimb; + long i; + + /* Develop integer part of quotient. */ + rlimb = mpn_divmod_1 (qp + qsize, dividend_ptr, dividend_size, divisor_limb); + + if (qsize != 0) + { + for (i = qsize - 1; i >= 0; i--) + udiv_qrnnd (qp[i], rlimb, rlimb, 0, divisor_limb); + } + return rlimb; +} diff --git a/gnu/lib/libgmp/mpn/generic/dump.c b/gnu/lib/libgmp/mpn/generic/dump.c new file mode 100644 index 00000000000..a5831c4cc95 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/dump.c @@ -0,0 +1,20 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" + +void +mpn_dump (ptr, size) + mp_srcptr ptr; + mp_size_t size; +{ + if (size == 0) + printf ("0\n"); + { + while (size) + { + size--; + printf ("%0*lX", (int) (2 * BYTES_PER_MP_LIMB), ptr[size]); + } + printf ("\n"); + } +} diff --git a/gnu/lib/libgmp/mpn/generic/gcd.c b/gnu/lib/libgmp/mpn/generic/gcd.c new file mode 100644 index 00000000000..8c2bbf0bea8 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/gcd.c @@ -0,0 +1,402 @@ +/* mpn/gcd.c: mpn_gcd for gcd of two odd integers. + +Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* Integer greatest common divisor of two unsigned integers, using + the accelerated algorithm (see reference below). + + mp_size_t mpn_gcd (vp, vsize, up, usize). + + Preconditions [U = (up, usize) and V = (vp, vsize)]: + + 1. V is odd. + 2. numbits(U) >= numbits(V). + + Both U and V are destroyed by the operation. The result is left at vp, + and its size is returned. + + Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu) + + Funding for this work has been partially provided by Conselho Nacional + de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant + 301314194-2, and was done while I was a visiting reseacher in the Instituto + de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS). + + Refer to + K. Weber, The accelerated integer GCD algorithm, ACM Transactions on + Mathematical Software, v. 21 (March), 1995, pp. 111-122. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is + used, otherwise the binary algorithm is used. This may be adjusted for + different architectures. */ +#ifndef ACCEL_THRESHOLD +#define ACCEL_THRESHOLD 4 +#endif + +/* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated + algorithm reduces using the bmod operation. Otherwise, the k-ary reduction + is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */ +enum + { + BMOD_THRESHOLD = BITS_PER_MP_LIMB/2 + }; + +#define SIGN_BIT (~(~(mp_limb_t)0 >> 1)) + + +#define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0) +#define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0) +#define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0) +#define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0) + +/* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2. + Both U and V must be odd. */ +static __gmp_inline mp_size_t +#if __STDC__ +gcd_2 (mp_ptr vp, mp_srcptr up) +#else +gcd_2 (vp, up) + mp_ptr vp; + mp_srcptr up; +#endif +{ + mp_limb_t u0, u1, v0, v1; + mp_size_t vsize; + + u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1]; + + while (u1 != v1 && u0 != v0) + { + unsigned long int r; + if (u1 > v1) + { + u1 -= v1 + (u0 < v0), u0 -= v0; + count_trailing_zeros (r, u0); + u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r; + u1 >>= r; + } + else /* u1 < v1. */ + { + v1 -= u1 + (v0 < u0), v0 -= u0; + count_trailing_zeros (r, v0); + v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r; + v1 >>= r; + } + } + + vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0); + + /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */ + if (u1 == v1 && u0 == v0) + return vsize; + + v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0; + vp[0] = mpn_gcd_1 (vp, vsize, v0); + + return 1; +} + +/* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists + 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB). + In the reference article, D was computed along with N, but it is better to + compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating + the result as a twos' complement signed integer. + + Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference + article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use + 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double + precision. If N2 > N1 initially, the first iteration of the while loop + will swap them. In all other situations, N1 >= N2 is maintained. */ + +static __gmp_inline mp_limb_t +#if __STDC__ +find_a (mp_srcptr cp) +#else +find_a (cp) + mp_srcptr cp; +#endif +{ + unsigned long int leading_zero_bits = 0; + + mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */ + mp_limb_t n1_h = cp[1]; + + mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */ + mp_limb_t n2_h = ~n1_h; + + /* Main loop. */ + while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */ + { + /* N1 <-- N1 % N2. */ + if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0) + { + unsigned long int i; + count_leading_zeros (i, n2_h); + i -= leading_zero_bits, leading_zero_bits += i; + n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i; + do + { + if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) + n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1; + i -= 1; + } + while (i); + } + if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) + n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + + SWAP_LIMB (n1_h, n2_h); + SWAP_LIMB (n1_l, n2_l); + } + + return n2_l; +} + +mp_size_t +#if __STDC__ +mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize) +#else +mpn_gcd (gp, vp, vsize, up, usize) + mp_ptr gp; + mp_ptr vp; + mp_size_t vsize; + mp_ptr up; + mp_size_t usize; +#endif +{ + mp_ptr orig_vp = vp; + mp_size_t orig_vsize = vsize; + int binary_gcd_ctr; /* Number of times binary gcd will execute. */ + TMP_DECL (marker); + + TMP_MARK (marker); + + /* Use accelerated algorithm if vsize is over ACCEL_THRESHOLD. + Two EXTRA limbs for U and V are required for kary reduction. */ + if (vsize > ACCEL_THRESHOLD) + { + unsigned long int vbitsize, d; + mp_ptr orig_up = up; + mp_size_t orig_usize = usize; + mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB); + + MPN_COPY (anchor_up, orig_up, usize); + up = anchor_up; + + count_leading_zeros (d, up[usize-1]); + d = usize * BITS_PER_MP_LIMB - d; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize + 1; + + /* Use bmod reduction to quickly discover whether V divides U. */ + up[usize++] = 0; /* Insert leading zero. */ + mpn_bdivmod (up, up, usize, vp, vsize, d); + + /* Now skip U/V mod 2^d and any low zero limbs. */ + d /= BITS_PER_MP_LIMB, up += d, usize -= d; + while (usize != 0 && up[0] == 0) + up++, usize--; + + if (usize == 0) /* GCD == ORIG_V. */ + goto done; + + vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB); + MPN_COPY (vp, orig_vp, vsize); + + do /* Main loop. */ + { + if (up[usize-1] & SIGN_BIT) /* U < 0; take twos' compl. */ + { + mp_size_t i; + anchor_up[0] = -up[0]; + for (i = 1; i < usize; i++) + anchor_up[i] = ~up[i]; + up = anchor_up; + } + + MPN_NORMALIZE_NOT_ZERO (up, usize); + + if ((up[0] & 1) == 0) /* Result even; remove twos. */ + { + unsigned long int r; + count_trailing_zeros (r, up[0]); + mpn_rshift (anchor_up, up, usize, r); + usize -= (anchor_up[usize-1] == 0); + } + else if (anchor_up != up) + MPN_COPY (anchor_up, up, usize); + + SWAP_MPN (anchor_up, usize, vp, vsize); + up = anchor_up; + + if (vsize <= 2) /* Kary can't handle < 2 limbs and */ + break; /* isn't efficient for == 2 limbs. */ + + d = vbitsize; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize + 1; + + if (d > BMOD_THRESHOLD) /* Bmod reduction. */ + { + up[usize++] = 0; + mpn_bdivmod (up, up, usize, vp, vsize, d); + d /= BITS_PER_MP_LIMB, up += d, usize -= d; + } + else /* Kary reduction. */ + { + mp_limb_t bp[2], cp[2]; + + /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */ + cp[0] = vp[0], cp[1] = vp[1]; + mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB); + + /* U <-- find_a (C) * U. */ + up[usize] = mpn_mul_1 (up, up, usize, find_a (cp)); + usize++; + + /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1). + bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and + bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 */ + bp[0] = up[0], bp[1] = up[1]; + mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB); + bp[1] &= 1; /* Since V is odd, division is unnecessary. */ + + up[usize++] = 0; + if (bp[1]) /* B < 0: U <-- U + (-B) * V. */ + { + mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]); + mpn_add_1 (up + vsize, up + vsize, usize - vsize, c); + } + else /* B >= 0: U <-- U - B * V. */ + { + mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]); + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + } + + up += 2, usize -= 2; /* At least two low limbs are zero. */ + } + + /* Must remove low zero limbs before complementing. */ + while (usize != 0 && up[0] == 0) + up++, usize--; + } + while (usize); + + /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */ + up = orig_up, usize = orig_usize; + binary_gcd_ctr = 2; + } + else + binary_gcd_ctr = 1; + + /* Finish up with the binary algorithm. Executes once or twice. */ + for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize) + { + if (usize > 2) /* First make U close to V in size. */ + { + unsigned long int vbitsize, d; + count_leading_zeros (d, up[usize-1]); + d = usize * BITS_PER_MP_LIMB - d; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize - 1; + if (d != -(unsigned long int)1 && d > 2) + { + mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */ + d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d; + } + } + + /* Start binary GCD. */ + do + { + mp_size_t zeros; + + /* Make sure U is odd. */ + MPN_NORMALIZE (up, usize); + while (up[0] == 0) + up += 1, usize -= 1; + if ((up[0] & 1) == 0) + { + unsigned long int r; + count_trailing_zeros (r, up[0]); + mpn_rshift (up, up, usize, r); + usize -= (up[usize-1] == 0); + } + + /* Keep usize >= vsize. */ + if (usize < vsize) + SWAP_MPN (up, usize, vp, vsize); + + if (usize <= 2) /* Double precision. */ + { + if (vsize == 1) + vp[0] = mpn_gcd_1 (up, usize, vp[0]); + else + vsize = gcd_2 (vp, up); + break; /* Binary GCD done. */ + } + + /* Count number of low zero limbs of U - V. */ + for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; ) + continue; + + /* If U < V, swap U and V; in any case, subtract V from U. */ + if (zeros == vsize) /* Subtract done. */ + up += zeros, usize -= zeros; + else if (usize == vsize) + { + mp_size_t size = vsize; + do + size--; + while (up[size] == vp[size]); + if (up[size] < vp[size]) /* usize == vsize. */ + SWAP_PTR (up, vp); + up += zeros, usize = size + 1 - zeros; + mpn_sub_n (up, up, vp + zeros, usize); + } + else + { + mp_size_t size = vsize - zeros; + up += zeros, usize -= zeros; + if (mpn_sub_n (up, up, vp + zeros, size)) + { + while (up[size] == 0) /* Propagate borrow. */ + up[size++] = -(mp_limb_t)1; + up[size] -= 1; + } + } + } + while (usize); /* End binary GCD. */ + } + +done: + if (vp != gp) + MPN_COPY (gp, vp, vsize); + TMP_FREE (marker); + return vsize; +} diff --git a/gnu/lib/libgmp/mpn/generic/gcd_1.c b/gnu/lib/libgmp/mpn/generic/gcd_1.c new file mode 100644 index 00000000000..ebcdfb59159 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/gcd_1.c @@ -0,0 +1,73 @@ +/* mpn_gcd_1 -- + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Does not work for U == 0 or V == 0. It would be tough to make it work for + V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t. */ + +mp_limb_t +mpn_gcd_1 (up, size, vlimb) + mp_srcptr up; + mp_size_t size; + mp_limb_t vlimb; +{ + mp_limb_t ulimb; + unsigned long int u_low_zero_bits, v_low_zero_bits; + + if (size > 1) + { + ulimb = mpn_mod_1 (up, size, vlimb); + if (ulimb == 0) + return vlimb; + } + else + ulimb = up[0]; + + /* Need to eliminate low zero bits. */ + count_trailing_zeros (u_low_zero_bits, ulimb); + ulimb >>= u_low_zero_bits; + + count_trailing_zeros (v_low_zero_bits, vlimb); + vlimb >>= v_low_zero_bits; + + while (ulimb != vlimb) + { + if (ulimb > vlimb) + { + ulimb -= vlimb; + do + ulimb >>= 1; + while ((ulimb & 1) == 0); + } + else /* vlimb > ulimb. */ + { + vlimb -= ulimb; + do + vlimb >>= 1; + while ((vlimb & 1) == 0); + } + } + + return ulimb << MIN (u_low_zero_bits, v_low_zero_bits); +} diff --git a/gnu/lib/libgmp/mpn/generic/gcdext.c b/gnu/lib/libgmp/mpn/generic/gcdext.c new file mode 100644 index 00000000000..245e20a4d52 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/gcdext.c @@ -0,0 +1,441 @@ +/* mpn_gcdext -- Extended Greatest Common Divisor. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef EXTEND +#define EXTEND 1 +#endif + +#if STAT +int arr[BITS_PER_MP_LIMB]; +#endif + +#define SGN(A) (((A) < 0) ? -1 : ((A) > 0)) + +/* Idea 1: After we have performed a full division, don't shift operands back, + but instead account for the extra factors-of-2 thus introduced. + Idea 2: Simple generalization to use divide-and-conquer would give us an + algorithm that runs faster than O(n^2). + Idea 3: The input numbers need less space as the computation progresses, + while the s0 and s1 variables need more space. To save space, we + could make them share space, and have the latter variables grow + into the former. */ + +/* Precondition: U >= V. */ + +mp_size_t +#if EXTEND +#if __STDC__ +mpn_gcdext (mp_ptr gp, mp_ptr s0p, + mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize) +#else +mpn_gcdext (gp, s0p, up, size, vp, vsize) + mp_ptr gp; + mp_ptr s0p; + mp_ptr up; + mp_size_t size; + mp_ptr vp; + mp_size_t vsize; +#endif +#else +#if __STDC__ +mpn_gcd (mp_ptr gp, + mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize) +#else +mpn_gcd (gp, up, size, vp, vsize) + mp_ptr gp; + mp_ptr up; + mp_size_t size; + mp_ptr vp; + mp_size_t vsize; +#endif +#endif +{ + mp_limb_t uh, vh; + mp_limb_signed_t A, B, C, D; + int cnt; + mp_ptr tp, wp; +#if RECORD + mp_limb_signed_t min = 0, max = 0; +#endif +#if EXTEND + mp_ptr s1p; + mp_ptr orig_s0p = s0p; + mp_size_t ssize, orig_size = size; + TMP_DECL (mark); + + TMP_MARK (mark); + + tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); + wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); + s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB); + + MPN_ZERO (s0p, size); + MPN_ZERO (s1p, size); + + s0p[0] = 1; + s1p[0] = 0; + ssize = 1; +#endif + + if (size > vsize) + { + /* Normalize V (and shift up U the same amount). */ + count_leading_zeros (cnt, vp[vsize - 1]); + if (cnt != 0) + { + mp_limb_t cy; + mpn_lshift (vp, vp, vsize, cnt); + cy = mpn_lshift (up, up, size, cnt); + up[size] = cy; + size += cy != 0; + } + + mpn_divmod (up + vsize, up, size, vp, vsize); +#if EXTEND + /* This is really what it boils down to in this case... */ + s0p[0] = 0; + s1p[0] = 1; +#endif + size = vsize; + if (cnt != 0) + { + mpn_rshift (up, up, size, cnt); + mpn_rshift (vp, vp, size, cnt); + } + { + mp_ptr xp; + xp = up; up = vp; vp = xp; + } + } + + for (;;) + { + /* Figure out exact size of V. */ + vsize = size; + MPN_NORMALIZE (vp, vsize); + if (vsize <= 1) + break; + + /* Make UH be the most significant limb of U, and make VH be + corresponding bits from V. */ + uh = up[size - 1]; + vh = vp[size - 1]; + count_leading_zeros (cnt, uh); + if (cnt != 0) + { + uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt)); + vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt)); + } + +#if 0 + /* For now, only handle BITS_PER_MP_LIMB-1 bits. This makes + room for sign bit. */ + uh >>= 1; + vh >>= 1; +#endif + A = 1; + B = 0; + C = 0; + D = 1; + + for (;;) + { + mp_limb_signed_t q, T; + if (vh + C == 0 || vh + D == 0) + break; + + q = (uh + A) / (vh + C); + if (q != (uh + B) / (vh + D)) + break; + + T = A - q * C; + A = C; + C = T; + T = B - q * D; + B = D; + D = T; + T = uh - q * vh; + uh = vh; + vh = T; + } + +#if RECORD + min = MIN (A, min); min = MIN (B, min); + min = MIN (C, min); min = MIN (D, min); + max = MAX (A, max); max = MAX (B, max); + max = MAX (C, max); max = MAX (D, max); +#endif + + if (B == 0) + { + mp_limb_t qh; + mp_size_t i; + + /* This is quite rare. I.e., optimize something else! */ + + /* Normalize V (and shift up U the same amount). */ + count_leading_zeros (cnt, vp[vsize - 1]); + if (cnt != 0) + { + mp_limb_t cy; + mpn_lshift (vp, vp, vsize, cnt); + cy = mpn_lshift (up, up, size, cnt); + up[size] = cy; + size += cy != 0; + } + + qh = mpn_divmod (up + vsize, up, size, vp, vsize); +#if EXTEND + MPN_COPY (tp, s0p, ssize); + for (i = 0; i < size - vsize; i++) + { + mp_limb_t cy; + cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]); + if (cy != 0) + tp[ssize++] = cy; + } + if (qh != 0) + { + mp_limb_t cy; + abort (); + /* XXX since qh == 1, mpn_addmul_1 is overkill */ + cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh); + if (cy != 0) + tp[ssize++] = cy; + } +#if 0 + MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */ + MPN_COPY (s1p, tp, ssize); +#else + { + mp_ptr xp; + xp = s0p; s0p = s1p; s1p = xp; + xp = s1p; s1p = tp; tp = xp; + } +#endif +#endif + size = vsize; + if (cnt != 0) + { + mpn_rshift (up, up, size, cnt); + mpn_rshift (vp, vp, size, cnt); + } + + { + mp_ptr xp; + xp = up; up = vp; vp = xp; + } + MPN_NORMALIZE (up, size); + } + else + { + /* T = U*A + V*B + W = U*C + V*D + U = T + V = W */ + + if (SGN(A) == SGN(B)) /* should be different sign */ + abort (); + if (SGN(C) == SGN(D)) /* should be different sign */ + abort (); +#if STAT + { mp_limb_t x; + x = ABS (A) | ABS (B) | ABS (C) | ABS (D); + count_leading_zeros (cnt, x); + arr[BITS_PER_MP_LIMB - cnt]++; } +#endif + if (A == 0) + { + if (B != 1) abort (); + MPN_COPY (tp, vp, size); + } + else + { + if (A < 0) + { + mpn_mul_1 (tp, vp, size, B); + mpn_submul_1 (tp, up, size, -A); + } + else + { + mpn_mul_1 (tp, up, size, A); + mpn_submul_1 (tp, vp, size, -B); + } + } + if (C < 0) + { + mpn_mul_1 (wp, vp, size, D); + mpn_submul_1 (wp, up, size, -C); + } + else + { + mpn_mul_1 (wp, up, size, C); + mpn_submul_1 (wp, vp, size, -D); + } + + { + mp_ptr xp; + xp = tp; tp = up; up = xp; + xp = wp; wp = vp; vp = xp; + } + +#if EXTEND + { mp_limb_t cy; + MPN_ZERO (tp, orig_size); + if (A == 0) + { + if (B != 1) abort (); + MPN_COPY (tp, s1p, ssize); + } + else + { + if (A < 0) + { + cy = mpn_mul_1 (tp, s1p, ssize, B); + cy += mpn_addmul_1 (tp, s0p, ssize, -A); + } + else + { + cy = mpn_mul_1 (tp, s0p, ssize, A); + cy += mpn_addmul_1 (tp, s1p, ssize, -B); + } + if (cy != 0) + tp[ssize++] = cy; + } + MPN_ZERO (wp, orig_size); + if (C < 0) + { + cy = mpn_mul_1 (wp, s1p, ssize, D); + cy += mpn_addmul_1 (wp, s0p, ssize, -C); + } + else + { + cy = mpn_mul_1 (wp, s0p, ssize, C); + cy += mpn_addmul_1 (wp, s1p, ssize, -D); + } + if (cy != 0) + wp[ssize++] = cy; + } + { + mp_ptr xp; + xp = tp; tp = s0p; s0p = xp; + xp = wp; wp = s1p; s1p = xp; + } +#endif +#if 0 /* Is it a win to remove multiple zeros here? */ + MPN_NORMALIZE (up, size); +#else + if (up[size - 1] == 0) + size--; +#endif + } + } + +#if RECORD + printf ("min: %ld\n", min); + printf ("max: %ld\n", max); +#endif + + if (vsize == 0) + { + if (gp != up) + MPN_COPY (gp, up, size); +#if EXTEND + if (orig_s0p != s0p) + MPN_COPY (orig_s0p, s0p, ssize); +#endif + TMP_FREE (mark); + return size; + } + else + { + mp_limb_t vl, ul, t; +#if EXTEND + mp_limb_t cy; + mp_size_t i; +#endif + vl = vp[0]; +#if EXTEND + t = mpn_divmod_1 (wp, up, size, vl); + MPN_COPY (tp, s0p, ssize); + for (i = 0; i < size; i++) + { + cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]); + if (cy != 0) + tp[ssize++] = cy; + } +#if 0 + MPN_COPY (s0p, s1p, ssize); + MPN_COPY (s1p, tp, ssize); +#else + { + mp_ptr xp; + xp = s0p; s0p = s1p; s1p = xp; + xp = s1p; s1p = tp; tp = xp; + } +#endif +#else + t = mpn_mod_1 (up, size, vl); +#endif + ul = vl; + vl = t; + while (vl != 0) + { + mp_limb_t t; +#if EXTEND + mp_limb_t q, cy; + q = ul / vl; + t = ul - q*vl; + + MPN_COPY (tp, s0p, ssize); + cy = mpn_addmul_1 (tp, s1p, ssize, q); + if (cy != 0) + tp[ssize++] = cy; +#if 0 + MPN_COPY (s0p, s1p, ssize); + MPN_COPY (s1p, tp, ssize); +#else + { + mp_ptr xp; + xp = s0p; s0p = s1p; s1p = xp; + xp = s1p; s1p = tp; tp = xp; + } +#endif + +#else + t = ul % vl; +#endif + ul = vl; + vl = t; + } + gp[0] = ul; +#if EXTEND + if (orig_s0p != s0p) + MPN_COPY (orig_s0p, s0p, ssize); +#endif + TMP_FREE (mark); + return 1; + } +} diff --git a/gnu/lib/libgmp/mpn/generic/get_str.c b/gnu/lib/libgmp/mpn/generic/get_str.c new file mode 100644 index 00000000000..0e7fc60ef66 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/get_str.c @@ -0,0 +1,211 @@ +/* mpn_get_str -- Convert a MSIZE long limb vector pointed to by MPTR + to a printable string in STR in base BASE. + +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Convert the limb vector pointed to by MPTR and MSIZE long to a + char array, using base BASE for the result array. Store the + result in the character array STR. STR must point to an array with + space for the largest possible number represented by a MSIZE long + limb vector + 1 extra character. + + The result is NOT in Ascii, to convert it to printable format, add + '0' or 'A' depending on the base and range. + + Return the number of digits in the result string. + This may include some leading zeros. + + The limb vector pointed to by MPTR is clobbered. */ + +size_t +mpn_get_str (str, base, mptr, msize) + unsigned char *str; + int base; + mp_ptr mptr; + mp_size_t msize; +{ + mp_limb_t big_base; +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + int normalization_steps; +#endif +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb_t big_base_inverted; +#endif + unsigned int dig_per_u; + mp_size_t out_len; + register unsigned char *s; + + big_base = __mp_bases[base].big_base; + + s = str; + + /* Special case zero, as the code below doesn't handle it. */ + if (msize == 0) + { + s[0] = 0; + return 1; + } + + if ((base & (base - 1)) == 0) + { + /* The base is a power of 2. Make conversion from most + significant side. */ + mp_limb_t n1, n0; + register int bits_per_digit = big_base; + register int x; + register int bit_pos; + register int i; + + n1 = mptr[msize - 1]; + count_leading_zeros (x, n1); + + /* BIT_POS should be R when input ends in least sign. nibble, + R + bits_per_digit * n when input ends in n:th least significant + nibble. */ + + { + int bits; + + bits = BITS_PER_MP_LIMB * msize - x; + x = bits % bits_per_digit; + if (x != 0) + bits += bits_per_digit - x; + bit_pos = bits - (msize - 1) * BITS_PER_MP_LIMB; + } + + /* Fast loop for bit output. */ + i = msize - 1; + for (;;) + { + bit_pos -= bits_per_digit; + while (bit_pos >= 0) + { + *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1); + bit_pos -= bits_per_digit; + } + i--; + if (i < 0) + break; + n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1); + n1 = mptr[i]; + bit_pos += BITS_PER_MP_LIMB; + *s++ = n0 | (n1 >> bit_pos); + } + + *s = 0; + + return s - str; + } + else + { + /* General case. The base is not a power of 2. Make conversion + from least significant end. */ + + /* If udiv_qrnnd only handles divisors with the most significant bit + set, prepare BIG_BASE for being a divisor by shifting it to the + left exactly enough to set the most significant bit. */ +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + count_leading_zeros (normalization_steps, big_base); + big_base <<= normalization_steps; +#if UDIV_TIME > 2 * UMUL_TIME + /* Get the fixed-point approximation to 1/(BIG_BASE << NORMALIZATION_STEPS). */ + big_base_inverted = __mp_bases[base].big_base_inverted; +#endif +#endif + + dig_per_u = __mp_bases[base].chars_per_limb; + out_len = ((size_t) msize * BITS_PER_MP_LIMB + * __mp_bases[base].chars_per_bit_exactly) + 1; + s += out_len; + + while (msize != 0) + { + int i; + mp_limb_t n0, n1; + +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + /* If we shifted BIG_BASE above, shift the dividend too, to get + the right quotient. We need to do this every loop, + since the intermediate quotients are OK, but the quotient from + one turn in the loop is going to be the dividend in the + next turn, and the dividend needs to be up-shifted. */ + if (normalization_steps != 0) + { + n0 = mpn_lshift (mptr, mptr, msize, normalization_steps); + + /* If the shifting gave a carry out limb, store it and + increase the length. */ + if (n0 != 0) + { + mptr[msize] = n0; + msize++; + } + } +#endif + + /* Divide the number at TP with BIG_BASE to get a quotient and a + remainder. The remainder is our new digit in base BIG_BASE. */ + i = msize - 1; + n1 = mptr[i]; + + if (n1 >= big_base) + n1 = 0; + else + { + msize--; + i--; + } + + for (; i >= 0; i--) + { + n0 = mptr[i]; +#if UDIV_TIME > 2 * UMUL_TIME + udiv_qrnnd_preinv (mptr[i], n1, n1, n0, big_base, big_base_inverted); +#else + udiv_qrnnd (mptr[i], n1, n1, n0, big_base); +#endif + } + +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + /* If we shifted above (at previous UDIV_NEEDS_NORMALIZATION tests) + the remainder will be up-shifted here. Compensate. */ + n1 >>= normalization_steps; +#endif + + /* Convert N1 from BIG_BASE to a string of digits in BASE + using single precision operations. */ + for (i = dig_per_u - 1; i >= 0; i--) + { + *--s = n1 % base; + n1 /= base; + if (n1 == 0 && msize == 0) + break; + } + } + + while (s != str) + *--s = 0; + return out_len; + } +} diff --git a/gnu/lib/libgmp/mpn/generic/gmp-mparam.h b/gnu/lib/libgmp/mpn/generic/gmp-mparam.h new file mode 100644 index 00000000000..7c885575b33 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/gnu/lib/libgmp/mpn/generic/hamdist.c b/gnu/lib/libgmp/mpn/generic/hamdist.c new file mode 100644 index 00000000000..2190b636f90 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/hamdist.c @@ -0,0 +1,88 @@ +/* mpn_hamdist -- + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#if defined __GNUC__ +#if defined __sparc_v9__ && BITS_PER_MP_LIMB == 64 +#define popc_limb(a) \ + ({ \ + DItype __res; \ + asm ("popc %1,%0" : "=r" (__res) : "rI" (a)); \ + __res; \ + }) +#endif +#endif + +#ifndef popc_limb + +/* Cool population count of a mp_limb_t. + You have to figure out how this works, I won't tell you! */ + +static inline unsigned int +popc_limb (x) + mp_limb_t x; +{ +#if BITS_PER_MP_LIMB == 64 + /* We have to go into some trouble to define these constants. + (For mp_limb_t being `long long'.) */ + mp_limb_t cnst; + cnst = 0x55555555L | ((mp_limb_t) 0x55555555L << BITS_PER_MP_LIMB/2); + x = ((x & ~cnst) >> 1) + (x & cnst); + cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2); + x = ((x & ~cnst) >> 2) + (x & cnst); + cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2); + x = ((x >> 4) + x) & cnst; + x = ((x >> 8) + x); + x = ((x >> 16) + x); + x = ((x >> 32) + x) & 0xff; +#endif +#if BITS_PER_MP_LIMB == 32 + x = ((x >> 1) & 0x55555555L) + (x & 0x55555555L); + x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L); + x = ((x >> 4) + x) & 0x0f0f0f0fL; + x = ((x >> 8) + x); + x = ((x >> 16) + x) & 0xff; +#endif + return x; +} +#endif + +unsigned long int +#if __STDC__ +mpn_hamdist (mp_srcptr up, mp_srcptr vp, mp_size_t size) +#else +mpn_hamdist (up, vp, size) + register mp_srcptr up; + register mp_srcptr vp; + register mp_size_t size; +#endif +{ + unsigned long int hamdist; + mp_size_t i; + + hamdist = 0; + for (i = 0; i < size; i++) + hamdist += popc_limb (up[i] ^ vp[i]); + + return hamdist; +} diff --git a/gnu/lib/libgmp/mpn/generic/inlines.c b/gnu/lib/libgmp/mpn/generic/inlines.c new file mode 100644 index 00000000000..dca305e6e49 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/inlines.c @@ -0,0 +1,3 @@ +#define _FORCE_INLINES +#define _EXTERN_INLINE /* empty */ +#include "gmp.h" diff --git a/gnu/lib/libgmp/mpn/generic/lshift.c b/gnu/lib/libgmp/mpn/generic/lshift.c new file mode 100644 index 00000000000..e244bc52f10 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/lshift.c @@ -0,0 +1,87 @@ +/* mpn_lshift -- Shift left low level. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left + and store the USIZE least significant digits of the result at WP. + Return the bits shifted out from the most significant digit. + + Argument constraints: + 1. 0 < CNT < BITS_PER_MP_LIMB + 2. If the result is to be written over the input, WP must be >= UP. +*/ + +mp_limb_t +#if __STDC__ +mpn_lshift (register mp_ptr wp, + register mp_srcptr up, mp_size_t usize, + register unsigned int cnt) +#else +mpn_lshift (wp, up, usize, cnt) + register mp_ptr wp; + register mp_srcptr up; + mp_size_t usize; + register unsigned int cnt; +#endif +{ + register mp_limb_t high_limb, low_limb; + register unsigned sh_1, sh_2; + register mp_size_t i; + mp_limb_t retval; + +#ifdef DEBUG + if (usize == 0 || cnt == 0) + abort (); +#endif + + sh_1 = cnt; +#if 0 + if (sh_1 == 0) + { + if (wp != up) + { + /* Copy from high end to low end, to allow specified input/output + overlapping. */ + for (i = usize - 1; i >= 0; i--) + wp[i] = up[i]; + } + return 0; + } +#endif + + wp += 1; + sh_2 = BITS_PER_MP_LIMB - sh_1; + i = usize - 1; + low_limb = up[i]; + retval = low_limb >> sh_2; + high_limb = low_limb; + while (--i >= 0) + { + low_limb = up[i]; + wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); + high_limb = low_limb; + } + wp[i] = high_limb << sh_1; + + return retval; +} diff --git a/gnu/lib/libgmp/mpn/generic/mod_1.c b/gnu/lib/libgmp/mpn/generic/mod_1.c new file mode 100644 index 00000000000..314d11b3013 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/mod_1.c @@ -0,0 +1,197 @@ +/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + Return the single-limb remainder. + There are no constraints on the value of the divisor. + +Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +/* FIXME: We should be using invert_limb (or invert_normalized_limb) + here (not udiv_qrnnd). */ + +mp_limb_t +#if __STDC__ +mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb) +#else +mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb) + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; +#endif +{ + mp_size_t i; + mp_limb_t n1, n0, r; + int dummy; + + /* Botch: Should this be handled at all? Rely on callers? */ + if (dividend_size == 0) + return 0; + + /* If multiplication is much faster than division, and the + dividend is large, pre-invert the divisor, and use + only multiplications in the inner loop. */ + + /* This test should be read: + Does it ever help to use udiv_qrnnd_preinv? + && Does what we save compensate for the inversion overhead? */ + if (UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + mp_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + most significant bit (with weight 2**N) implicit. */ + + /* Special case for DIVISOR_LIMB == 100...000. */ + if (divisor_limb << 1 == 0) + divisor_limb_inverted = ~(mp_limb_t) 0; + else + udiv_qrnnd (divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + udiv_qrnnd_preinv (dummy, r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } + else + { + mp_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + most significant bit (with weight 2**N) implicit. */ + + /* Special case for DIVISOR_LIMB == 100...000. */ + if (divisor_limb << 1 == 0) + divisor_limb_inverted = ~(mp_limb_t) 0; + else + udiv_qrnnd (divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (dummy, r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } + else + { + if (UDIV_NEEDS_NORMALIZATION) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd (dummy, r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + it, or because DIVISOR_LIMB is already normalized. */ + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (dummy, r, r, n0, divisor_limb); + } + return r; + } +} diff --git a/gnu/lib/libgmp/mpn/generic/mul.c b/gnu/lib/libgmp/mpn/generic/mul.c new file mode 100644 index 00000000000..dcf8cb4da07 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/mul.c @@ -0,0 +1,152 @@ +/* mpn_mul -- Multiply two natural numbers. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) + and v (pointed to by VP, with VSIZE limbs), and store the result at + PRODP. USIZE + VSIZE limbs are always stored, but if the input + operands are normalized. Return the most significant limb of the + result. + + NOTE: The space pointed to by PRODP is overwritten before finished + with U and V, so overlap is an error. + + Argument constraints: + 1. USIZE >= VSIZE. + 2. PRODP != UP and PRODP != VP, i.e. the destination + must be distinct from the multiplier and the multiplicand. */ + +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + value which is good on most machines. */ +#ifndef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 32 +#endif + +mp_limb_t +#if __STDC__ +mpn_mul (mp_ptr prodp, + mp_srcptr up, mp_size_t usize, + mp_srcptr vp, mp_size_t vsize) +#else +mpn_mul (prodp, up, usize, vp, vsize) + mp_ptr prodp; + mp_srcptr up; + mp_size_t usize; + mp_srcptr vp; + mp_size_t vsize; +#endif +{ + mp_ptr prod_endp = prodp + usize + vsize - 1; + mp_limb_t cy; + mp_ptr tspace; + TMP_DECL (marker); + + if (vsize < KARATSUBA_THRESHOLD) + { + /* Handle simple cases with traditional multiplication. + + This is the most critical code of the entire function. All + multiplies rely on this, both small and huge. Small ones arrive + here immediately. Huge ones arrive here as this is the base case + for Karatsuba's recursive algorithm below. */ + mp_size_t i; + mp_limb_t cy_limb; + mp_limb_t v_limb; + + if (vsize == 0) + return 0; + + /* Multiply by the first limb in V separately, as the result can be + stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if (v_limb <= 1) + { + if (v_limb == 1) + MPN_COPY (prodp, up, usize); + else + MPN_ZERO (prodp, usize); + cy_limb = 0; + } + else + cy_limb = mpn_mul_1 (prodp, up, usize, v_limb); + + prodp[usize] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + U with one limb from V, and add it to PROD. */ + for (i = 1; i < vsize; i++) + { + v_limb = vp[i]; + if (v_limb <= 1) + { + cy_limb = 0; + if (v_limb == 1) + cy_limb = mpn_add_n (prodp, prodp, up, usize); + } + else + cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb); + + prodp[usize] = cy_limb; + prodp++; + } + return cy_limb; + } + + TMP_MARK (marker); + + tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); + MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace); + + prodp += vsize; + up += vsize; + usize -= vsize; + if (usize >= vsize) + { + mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); + do + { + MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace); + cy = mpn_add_n (prodp, prodp, tp, vsize); + mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy); + prodp += vsize; + up += vsize; + usize -= vsize; + } + while (usize >= vsize); + } + + /* True: usize < vsize. */ + + /* Make life simple: Recurse. */ + + if (usize != 0) + { + mpn_mul (tspace, vp, vsize, up, usize); + cy = mpn_add_n (prodp, prodp, tspace, vsize); + mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy); + } + + TMP_FREE (marker); + return *prod_endp; +} diff --git a/gnu/lib/libgmp/mpn/generic/mul_1.c b/gnu/lib/libgmp/mpn/generic/mul_1.c new file mode 100644 index 00000000000..2de680a64b0 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/mul_1.c @@ -0,0 +1,59 @@ +/* mpn_mul_1 -- Multiply a limb vector with a single limb and + store the product in a second limb vector. + +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + + /* The loop counter and index J goes from -S1_SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + res_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} diff --git a/gnu/lib/libgmp/mpn/generic/mul_n.c b/gnu/lib/libgmp/mpn/generic/mul_n.c new file mode 100644 index 00000000000..b38e8ad17ea --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/mul_n.c @@ -0,0 +1,401 @@ +/* mpn_mul_n -- Multiply two natural numbers of length n. + +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), + both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are + always stored. Return the most significant limb. + + Argument constraints: + 1. PRODP != UP and PRODP != VP, i.e. the destination + must be distinct from the multiplier and the multiplicand. */ + +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + value which is good on most machines. */ +#ifndef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 32 +#endif + +/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ +#if KARATSUBA_THRESHOLD < 2 +#undef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 2 +#endif + +/* Handle simple cases with traditional multiplication. + + This is the most critical code of multiplication. All multiplies rely + on this, both small and huge. Small ones arrive here immediately. Huge + ones arrive here as this is the base case for Karatsuba's recursive + algorithm below. */ + +void +#if __STDC__ +impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) +#else +impn_mul_n_basecase (prodp, up, vp, size) + mp_ptr prodp; + mp_srcptr up; + mp_srcptr vp; + mp_size_t size; +#endif +{ + mp_size_t i; + mp_limb_t cy_limb; + mp_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if (v_limb <= 1) + { + if (v_limb == 1) + MPN_COPY (prodp, up, size); + else + MPN_ZERO (prodp, size); + cy_limb = 0; + } + else + cy_limb = mpn_mul_1 (prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + U with one limb from V, and add it to PROD. */ + for (i = 1; i < size; i++) + { + v_limb = vp[i]; + if (v_limb <= 1) + { + cy_limb = 0; + if (v_limb == 1) + cy_limb = mpn_add_n (prodp, prodp, up, size); + } + else + cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + } +} + +void +#if __STDC__ +impn_mul_n (mp_ptr prodp, + mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace) +#else +impn_mul_n (prodp, up, vp, size, tspace) + mp_ptr prodp; + mp_srcptr up; + mp_srcptr vp; + mp_size_t size; + mp_ptr tspace; +#endif +{ + if ((size & 1) != 0) + { + /* The size is odd, the code code below doesn't handle that. + Multiply the least significant (size - 1) limbs with a recursive + call, and handle the most significant limb of S1 and S2 + separately. */ + /* A slightly faster way to do this would be to make the Karatsuba + code below behave as if the size were even, and let it check for + odd size in the end. I.e., in essence move this code to the end. + Doing so would save us a recursive call, and potentially make the + stack grow a lot less. */ + + mp_size_t esize = size - 1; /* even size */ + mp_limb_t cy_limb; + + MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace); + cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]); + prodp[esize + esize] = cy_limb; + cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]); + + prodp[esize + size] = cy_limb; + } + else + { + /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. + + Split U in two pieces, U1 and U0, such that + U = U0 + U1*(B**n), + and V in V1 and V0, such that + V = V0 + V1*(B**n). + + UV is then computed recursively using the identity + + 2n n n n + UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V + 1 1 1 0 0 1 0 0 + + Where B = 2**BITS_PER_MP_LIMB. */ + + mp_size_t hsize = size >> 1; + mp_limb_t cy; + int negflg; + + /*** Product H. ________________ ________________ + |_____U1 x V1____||____U0 x V0_____| */ + /* Put result in upper part of PROD and pass low part of TSPACE + as new TSPACE. */ + MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace); + + /*** Product M. ________________ + |_(U1-U0)(V0-V1)_| */ + if (mpn_cmp (up + hsize, up, hsize) >= 0) + { + mpn_sub_n (prodp, up + hsize, up, hsize); + negflg = 0; + } + else + { + mpn_sub_n (prodp, up, up + hsize, hsize); + negflg = 1; + } + if (mpn_cmp (vp + hsize, vp, hsize) >= 0) + { + mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize); + negflg ^= 1; + } + else + { + mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize); + /* No change of NEGFLG. */ + } + /* Read temporary operands from low part of PROD. + Put result in low part of TSPACE using upper part of TSPACE + as new TSPACE. */ + MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size); + + /*** Add/copy product H. */ + MPN_COPY (prodp + hsize, prodp + size, hsize); + cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); + + /*** Add product M (if NEGFLG M is a negative number). */ + if (negflg) + cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); + else + cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); + + /*** Product L. ________________ ________________ + |________________||____U0 x V0_____| */ + /* Read temporary operands from low part of PROD. + Put result in low part of TSPACE using upper part of TSPACE + as new TSPACE. */ + MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size); + + /*** Add/copy Product L (twice). */ + + cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); + if (cy) + mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); + + MPN_COPY (prodp, tspace, hsize); + cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); + if (cy) + mpn_add_1 (prodp + size, prodp + size, size, 1); + } +} + +void +#if __STDC__ +impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size) +#else +impn_sqr_n_basecase (prodp, up, size) + mp_ptr prodp; + mp_srcptr up; + mp_size_t size; +#endif +{ + mp_size_t i; + mp_limb_t cy_limb; + mp_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = up[0]; + if (v_limb <= 1) + { + if (v_limb == 1) + MPN_COPY (prodp, up, size); + else + MPN_ZERO (prodp, size); + cy_limb = 0; + } + else + cy_limb = mpn_mul_1 (prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + U with one limb from V, and add it to PROD. */ + for (i = 1; i < size; i++) + { + v_limb = up[i]; + if (v_limb <= 1) + { + cy_limb = 0; + if (v_limb == 1) + cy_limb = mpn_add_n (prodp, prodp, up, size); + } + else + cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + } +} + +void +#if __STDC__ +impn_sqr_n (mp_ptr prodp, + mp_srcptr up, mp_size_t size, mp_ptr tspace) +#else +impn_sqr_n (prodp, up, size, tspace) + mp_ptr prodp; + mp_srcptr up; + mp_size_t size; + mp_ptr tspace; +#endif +{ + if ((size & 1) != 0) + { + /* The size is odd, the code code below doesn't handle that. + Multiply the least significant (size - 1) limbs with a recursive + call, and handle the most significant limb of S1 and S2 + separately. */ + /* A slightly faster way to do this would be to make the Karatsuba + code below behave as if the size were even, and let it check for + odd size in the end. I.e., in essence move this code to the end. + Doing so would save us a recursive call, and potentially make the + stack grow a lot less. */ + + mp_size_t esize = size - 1; /* even size */ + mp_limb_t cy_limb; + + MPN_SQR_N_RECURSE (prodp, up, esize, tspace); + cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]); + prodp[esize + esize] = cy_limb; + cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]); + + prodp[esize + size] = cy_limb; + } + else + { + mp_size_t hsize = size >> 1; + mp_limb_t cy; + + /*** Product H. ________________ ________________ + |_____U1 x U1____||____U0 x U0_____| */ + /* Put result in upper part of PROD and pass low part of TSPACE + as new TSPACE. */ + MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace); + + /*** Product M. ________________ + |_(U1-U0)(U0-U1)_| */ + if (mpn_cmp (up + hsize, up, hsize) >= 0) + { + mpn_sub_n (prodp, up + hsize, up, hsize); + } + else + { + mpn_sub_n (prodp, up, up + hsize, hsize); + } + + /* Read temporary operands from low part of PROD. + Put result in low part of TSPACE using upper part of TSPACE + as new TSPACE. */ + MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size); + + /*** Add/copy product H. */ + MPN_COPY (prodp + hsize, prodp + size, hsize); + cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); + + /*** Add product M (if NEGFLG M is a negative number). */ + cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); + + /*** Product L. ________________ ________________ + |________________||____U0 x U0_____| */ + /* Read temporary operands from low part of PROD. + Put result in low part of TSPACE using upper part of TSPACE + as new TSPACE. */ + MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size); + + /*** Add/copy Product L (twice). */ + + cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); + if (cy) + mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); + + MPN_COPY (prodp, tspace, hsize); + cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); + if (cy) + mpn_add_1 (prodp + size, prodp + size, size, 1); + } +} + +/* This should be made into an inline function in gmp.h. */ +inline void +#if __STDC__ +mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) +#else +mpn_mul_n (prodp, up, vp, size) + mp_ptr prodp; + mp_srcptr up; + mp_srcptr vp; + mp_size_t size; +#endif +{ + TMP_DECL (marker); + TMP_MARK (marker); + if (up == vp) + { + if (size < KARATSUBA_THRESHOLD) + { + impn_sqr_n_basecase (prodp, up, size); + } + else + { + mp_ptr tspace; + tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB); + impn_sqr_n (prodp, up, size, tspace); + } + } + else + { + if (size < KARATSUBA_THRESHOLD) + { + impn_mul_n_basecase (prodp, up, vp, size); + } + else + { + mp_ptr tspace; + tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB); + impn_mul_n (prodp, up, vp, size, tspace); + } + } + TMP_FREE (marker); +} diff --git a/gnu/lib/libgmp/mpn/generic/perfsqr.c b/gnu/lib/libgmp/mpn/generic/perfsqr.c new file mode 100644 index 00000000000..5a6e2afe286 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/perfsqr.c @@ -0,0 +1,138 @@ +/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square, + zero otherwise. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +#if BITS_PER_MP_LIMB == 32 +#define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */ +#define PP_INVERTED 0x53E5645CL +#endif + +#if BITS_PER_MP_LIMB == 64 +#define PP 0xE221F97C30E94E1DL /* 3 x 5 x 7 x 11 x 13 x ... x 53 */ +#define PP_INVERTED 0x21CFE6CFC938B36BL +#endif + +/* sq_res_0x100[x mod 0x100] == 1 iff x mod 0x100 is a quadratic residue + modulo 0x100. */ +static unsigned char const sq_res_0x100[0x100] = +{ + 1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +}; + +int +#if __STDC__ +mpn_perfect_square_p (mp_srcptr up, mp_size_t usize) +#else +mpn_perfect_square_p (up, usize) + mp_srcptr up; + mp_size_t usize; +#endif +{ + mp_limb_t rem; + mp_ptr root_ptr; + int res; + TMP_DECL (marker); + + /* The first test excludes 55/64 (85.9%) of the perfect square candidates + in O(1) time. */ + if ((sq_res_0x100[(unsigned int) up[0] % 0x100] & 1) == 0) + return 0; + +#if defined (PP) + /* The second test excludes 30652543/30808063 (99.5%) of the remaining + perfect square candidates in O(n) time. */ + + /* Firstly, compute REM = A mod PP. */ + if (UDIV_TIME > (2 * UMUL_TIME + 6)) + rem = mpn_preinv_mod_1 (up, usize, (mp_limb_t) PP, (mp_limb_t) PP_INVERTED); + else + rem = mpn_mod_1 (up, usize, (mp_limb_t) PP); + + /* Now decide if REM is a quadratic residue modulo the factors in PP. */ + + /* If A is just a few limbs, computing the square root does not take long + time, so things might run faster if we limit this loop according to the + size of A. */ + +#if BITS_PER_MP_LIMB == 64 + if (((0x12DD703303AED3L >> rem % 53) & 1) == 0) + return 0; + if (((0x4351B2753DFL >> rem % 47) & 1) == 0) + return 0; + if (((0x35883A3EE53L >> rem % 43) & 1) == 0) + return 0; + if (((0x1B382B50737L >> rem % 41) & 1) == 0) + return 0; + if (((0x165E211E9BL >> rem % 37) & 1) == 0) + return 0; + if (((0x121D47B7L >> rem % 31) & 1) == 0) + return 0; +#endif + if (((0x13D122F3L >> rem % 29) & 1) == 0) + return 0; + if (((0x5335FL >> rem % 23) & 1) == 0) + return 0; + if (((0x30AF3L >> rem % 19) & 1) == 0) + return 0; + if (((0x1A317L >> rem % 17) & 1) == 0) + return 0; + if (((0x161BL >> rem % 13) & 1) == 0) + return 0; + if (((0x23BL >> rem % 11) & 1) == 0) + return 0; + if (((0x017L >> rem % 7) & 1) == 0) + return 0; + if (((0x13L >> rem % 5) & 1) == 0) + return 0; + if (((0x3L >> rem % 3) & 1) == 0) + return 0; +#endif + + TMP_MARK (marker); + + /* For the third and last test, we finally compute the square root, + to make sure we've really got a perfect square. */ + root_ptr = (mp_ptr) TMP_ALLOC ((usize + 1) / 2 * BYTES_PER_MP_LIMB); + + /* Iff mpn_sqrtrem returns zero, the square is perfect. */ + res = ! mpn_sqrtrem (root_ptr, NULL, up, usize); + TMP_FREE (marker); + return res; +} diff --git a/gnu/lib/libgmp/mpn/generic/popcount.c b/gnu/lib/libgmp/mpn/generic/popcount.c new file mode 100644 index 00000000000..c48573a4799 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/popcount.c @@ -0,0 +1,87 @@ +/* popcount.c + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#if defined __GNUC__ +#if defined __sparc_v9__ && BITS_PER_MP_LIMB == 64 +#define popc_limb(a) \ + ({ \ + DItype __res; \ + asm ("popc %1,%0" : "=r" (__res) : "rI" (a)); \ + __res; \ + }) +#endif +#endif + +#ifndef popc_limb + +/* Cool population count of a mp_limb_t. + You have to figure out how this works, I won't tell you! */ + +static inline unsigned int +popc_limb (x) + mp_limb_t x; +{ +#if BITS_PER_MP_LIMB == 64 + /* We have to go into some trouble to define these constants. + (For mp_limb_t being `long long'.) */ + mp_limb_t cnst; + cnst = 0x55555555L | ((mp_limb_t) 0x55555555L << BITS_PER_MP_LIMB/2); + x = ((x & ~cnst) >> 1) + (x & cnst); + cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2); + x = ((x & ~cnst) >> 2) + (x & cnst); + cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2); + x = ((x >> 4) + x) & cnst; + x = ((x >> 8) + x); + x = ((x >> 16) + x); + x = ((x >> 32) + x) & 0xff; +#endif +#if BITS_PER_MP_LIMB == 32 + x = ((x >> 1) & 0x55555555L) + (x & 0x55555555L); + x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L); + x = ((x >> 4) + x) & 0x0f0f0f0fL; + x = ((x >> 8) + x); + x = ((x >> 16) + x) & 0xff; +#endif + return x; +} +#endif + +unsigned long int +#if __STDC__ +mpn_popcount (register mp_srcptr p, register mp_size_t size) +#else +mpn_popcount (p, size) + register mp_srcptr p; + register mp_size_t size; +#endif +{ + unsigned long int popcnt; + mp_size_t i; + + popcnt = 0; + for (i = 0; i < size; i++) + popcnt += popc_limb (p[i]); + + return popcnt; +} diff --git a/gnu/lib/libgmp/mpn/generic/pre_mod_1.c b/gnu/lib/libgmp/mpn/generic/pre_mod_1.c new file mode 100644 index 00000000000..92d413b1342 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/pre_mod_1.c @@ -0,0 +1,69 @@ +/* mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb, + divisor_limb_inverted) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by the normalized DIVISOR_LIMB. + DIVISOR_LIMB_INVERTED should be 2^(2*BITS_PER_MP_LIMB) / DIVISOR_LIMB + + - 2^BITS_PER_MP_LIMB. + Return the single-limb remainder. + +Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +mp_limb_t +#if __STDC__ +mpn_preinv_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb, mp_limb_t divisor_limb_inverted) +#else +mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb, divisor_limb_inverted) + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; + mp_limb_t divisor_limb_inverted; +#endif +{ + mp_size_t i; + mp_limb_t n0, r; + int dummy; + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (dummy, r, r, n0, divisor_limb, divisor_limb_inverted); + } + return r; +} diff --git a/gnu/lib/libgmp/mpn/generic/random2.c b/gnu/lib/libgmp/mpn/generic/random2.c new file mode 100644 index 00000000000..29546083fcd --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/random2.c @@ -0,0 +1,93 @@ +/* mpn_random2 -- Generate random numbers with relatively long strings + of ones and zeroes. Suitable for border testing. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#if defined (__hpux) || defined (alpha__) || defined (__svr4__) || defined (__SVR4) +/* HPUX lacks random(). DEC OSF/1 1.2 random() returns a double. */ +long mrand48 (); +static inline long +random () +{ + return mrand48 (); +} +#else +long random (); +#endif + +/* It's a bit tricky to get this right, so please test the code well + if you hack with it. Some early versions of the function produced + random numbers with the leading limb == 0, and some versions never + made the most significant bit set. */ + +void +mpn_random2 (res_ptr, size) + mp_ptr res_ptr; + mp_size_t size; +{ + int n_bits; + int bit_pos; + mp_size_t limb_pos; + unsigned int ran; + mp_limb_t limb; + + limb = 0; + + /* Start off in a random bit position in the most significant limb. */ + bit_pos = random () & (BITS_PER_MP_LIMB - 1); + + /* Least significant bit of RAN chooses string of ones/string of zeroes. + Make most significant limb be non-zero by setting bit 0 of RAN. */ + ran = random () | 1; + + for (limb_pos = size - 1; limb_pos >= 0; ) + { + n_bits = (ran >> 1) % BITS_PER_MP_LIMB + 1; + if ((ran & 1) != 0) + { + /* Generate a string of ones. */ + if (n_bits >= bit_pos) + { + res_ptr[limb_pos--] = limb | ((((mp_limb_t) 2) << bit_pos) - 1); + bit_pos += BITS_PER_MP_LIMB; + limb = (~(mp_limb_t) 0) << (bit_pos - n_bits); + } + else + { + limb |= ((((mp_limb_t) 1) << n_bits) - 1) << (bit_pos - n_bits + 1); + } + } + else + { + /* Generate a string of zeroes. */ + if (n_bits >= bit_pos) + { + res_ptr[limb_pos--] = limb; + limb = 0; + bit_pos += BITS_PER_MP_LIMB; + } + } + bit_pos -= n_bits; + ran = random (); + } +} diff --git a/gnu/lib/libgmp/mpn/generic/rshift.c b/gnu/lib/libgmp/mpn/generic/rshift.c new file mode 100644 index 00000000000..804f9be582e --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/rshift.c @@ -0,0 +1,88 @@ +/* mpn_rshift -- Shift right a low-level natural-number integer. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right + and store the USIZE least significant limbs of the result at WP. + The bits shifted out to the right are returned. + + Argument constraints: + 1. 0 < CNT < BITS_PER_MP_LIMB + 2. If the result is to be written over the input, WP must be <= UP. +*/ + +mp_limb_t +#if __STDC__ +mpn_rshift (register mp_ptr wp, + register mp_srcptr up, mp_size_t usize, + register unsigned int cnt) +#else +mpn_rshift (wp, up, usize, cnt) + register mp_ptr wp; + register mp_srcptr up; + mp_size_t usize; + register unsigned int cnt; +#endif +{ + register mp_limb_t high_limb, low_limb; + register unsigned sh_1, sh_2; + register mp_size_t i; + mp_limb_t retval; + +#ifdef DEBUG + if (usize == 0 || cnt == 0) + abort (); +#endif + + sh_1 = cnt; + +#if 0 + if (sh_1 == 0) + { + if (wp != up) + { + /* Copy from low end to high end, to allow specified input/output + overlapping. */ + for (i = 0; i < usize; i++) + wp[i] = up[i]; + } + return usize; + } +#endif + + wp -= 1; + sh_2 = BITS_PER_MP_LIMB - sh_1; + high_limb = up[0]; + retval = high_limb << sh_2; + low_limb = high_limb; + + for (i = 1; i < usize; i++) + { + high_limb = up[i]; + wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); + low_limb = high_limb; + } + wp[i] = low_limb >> sh_1; + + return retval; +} diff --git a/gnu/lib/libgmp/mpn/generic/scan0.c b/gnu/lib/libgmp/mpn/generic/scan0.c new file mode 100644 index 00000000000..d6f6580d047 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/scan0.c @@ -0,0 +1,62 @@ +/* mpn_scan0 -- Scan from a given bit position for the next clear bit. + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Design issues: + 1. What if starting_bit is not within U? Caller's problem? + 2. Bit index should be 'unsigned'? + + Argument constraints: + 1. U must sooner ot later have a limb with a clear bit. + */ + +unsigned long int +#if __STDC__ +mpn_scan0 (register mp_srcptr up, + register unsigned long int starting_bit) +#else +mpn_scan0 (up, starting_bit) + register mp_srcptr up; + register unsigned long int starting_bit; +#endif +{ + mp_size_t starting_word; + mp_limb_t alimb; + int cnt; + mp_srcptr p; + + /* Start at the word implied by STARTING_BIT. */ + starting_word = starting_bit / BITS_PER_MP_LIMB; + p = up + starting_word; + alimb = ~*p++; + + /* Mask off any bits before STARTING_BIT in the first limb. */ + alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB); + + while (alimb == 0) + alimb = ~*p++; + + count_leading_zeros (cnt, alimb & -alimb); + return (p - up) * BITS_PER_MP_LIMB - 1 - cnt; +} diff --git a/gnu/lib/libgmp/mpn/generic/scan1.c b/gnu/lib/libgmp/mpn/generic/scan1.c new file mode 100644 index 00000000000..c95d090daf6 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/scan1.c @@ -0,0 +1,62 @@ +/* mpn_scan1 -- Scan from a given bit position for the next set bit. + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Design issues: + 1. What if starting_bit is not within U? Caller's problem? + 2. Bit index should be 'unsigned'? + + Argument constraints: + 1. U must sooner ot later have a limb != 0. + */ + +unsigned long int +#if __STDC__ +mpn_scan1 (register mp_srcptr up, + register unsigned long int starting_bit) +#else +mpn_scan1 (up, starting_bit) + register mp_srcptr up; + register unsigned long int starting_bit; +#endif +{ + mp_size_t starting_word; + mp_limb_t alimb; + int cnt; + mp_srcptr p; + + /* Start at the word implied by STARTING_BIT. */ + starting_word = starting_bit / BITS_PER_MP_LIMB; + p = up + starting_word; + alimb = *p++; + + /* Mask off any bits before STARTING_BIT in the first limb. */ + alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB); + + while (alimb == 0) + alimb = *p++; + + count_leading_zeros (cnt, alimb & -alimb); + return (p - up) * BITS_PER_MP_LIMB - 1 - cnt; +} diff --git a/gnu/lib/libgmp/mpn/generic/set_str.c b/gnu/lib/libgmp/mpn/generic/set_str.c new file mode 100644 index 00000000000..424fad30e16 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/set_str.c @@ -0,0 +1,154 @@ +/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base) + -- Convert a STR_LEN long base BASE byte string pointed to by STR to a + limb vector pointed to by RES_PTR. Return the number of limbs in + RES_PTR. + +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_size_t +mpn_set_str (xp, str, str_len, base) + mp_ptr xp; + const unsigned char *str; + size_t str_len; + int base; +{ + mp_size_t size; + mp_limb_t big_base; + int indigits_per_limb; + mp_limb_t res_digit; + + big_base = __mp_bases[base].big_base; + indigits_per_limb = __mp_bases[base].chars_per_limb; + +/* size = str_len / indigits_per_limb + 1; */ + + size = 0; + + if ((base & (base - 1)) == 0) + { + /* The base is a power of 2. Read the input string from + least to most significant character/digit. */ + + const unsigned char *s; + int next_bitpos; + int bits_per_indigit = big_base; + + res_digit = 0; + next_bitpos = 0; + + for (s = str + str_len - 1; s >= str; s--) + { + int inp_digit = *s; + + res_digit |= (mp_limb_t) inp_digit << next_bitpos; + next_bitpos += bits_per_indigit; + if (next_bitpos >= BITS_PER_MP_LIMB) + { + xp[size++] = res_digit; + next_bitpos -= BITS_PER_MP_LIMB; + res_digit = inp_digit >> (bits_per_indigit - next_bitpos); + } + } + + if (res_digit != 0) + xp[size++] = res_digit; + } + else + { + /* General case. The base is not a power of 2. */ + + size_t i; + int j; + mp_limb_t cy_limb; + + for (i = indigits_per_limb; i < str_len; i += indigits_per_limb) + { + res_digit = *str++; + if (base == 10) + { /* This is a common case. + Help the compiler to avoid multiplication. */ + for (j = 1; j < indigits_per_limb; j++) + res_digit = res_digit * 10 + *str++; + } + else + { + for (j = 1; j < indigits_per_limb; j++) + res_digit = res_digit * base + *str++; + } + + if (size == 0) + { + if (res_digit != 0) + { + xp[0] = res_digit; + size = 1; + } + } + else + { + cy_limb = mpn_mul_1 (xp, xp, size, big_base); + cy_limb += mpn_add_1 (xp, xp, size, res_digit); + if (cy_limb != 0) + xp[size++] = cy_limb; + } + } + + big_base = base; + res_digit = *str++; + if (base == 10) + { /* This is a common case. + Help the compiler to avoid multiplication. */ + for (j = 1; j < str_len - (i - indigits_per_limb); j++) + { + res_digit = res_digit * 10 + *str++; + big_base *= 10; + } + } + else + { + for (j = 1; j < str_len - (i - indigits_per_limb); j++) + { + res_digit = res_digit * base + *str++; + big_base *= base; + } + } + + if (size == 0) + { + if (res_digit != 0) + { + xp[0] = res_digit; + size = 1; + } + } + else + { + cy_limb = mpn_mul_1 (xp, xp, size, big_base); + cy_limb += mpn_add_1 (xp, xp, size, res_digit); + if (cy_limb != 0) + xp[size++] = cy_limb; + } + } + + return size; +} diff --git a/gnu/lib/libgmp/mpn/generic/sqrtrem.c b/gnu/lib/libgmp/mpn/generic/sqrtrem.c new file mode 100644 index 00000000000..539480d37a2 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/sqrtrem.c @@ -0,0 +1,498 @@ +/* mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) + + Write the square root of {OP_PTR, OP_SIZE} at ROOT_PTR. + Write the remainder at REM_PTR, if REM_PTR != NULL. + Return the size of the remainder. + (The size of the root is always half of the size of the operand.) + + OP_PTR and ROOT_PTR may not point to the same object. + OP_PTR and REM_PTR may point to the same object. + + If REM_PTR is NULL, only the root is computed and the return value of + the function is 0 if OP is a perfect square, and *any* non-zero number + otherwise. + +Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* This code is just correct if "unsigned char" has at least 8 bits. It + doesn't help to use CHAR_BIT from limits.h, as the real problem is + the static arrays. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Square root algorithm: + + 1. Shift OP (the input) to the left an even number of bits s.t. there + are an even number of words and either (or both) of the most + significant bits are set. This way, sqrt(OP) has exactly half as + many words as OP, and has its most significant bit set. + + 2. Get a 9-bit approximation to sqrt(OP) using the pre-computed tables. + This approximation is used for the first single-precision + iterations of Newton's method, yielding a full-word approximation + to sqrt(OP). + + 3. Perform multiple-precision Newton iteration until we have the + exact result. Only about half of the input operand is used in + this calculation, as the square root is perfectly determinable + from just the higher half of a number. */ + +/* Define this macro for IEEE P854 machines with a fast sqrt instruction. */ +#if defined __GNUC__ && ! defined __SOFT_FLOAT + +#if defined __sparc__ +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrtd %1,%0" : "=f" (__sqrt_res) : "f" (a)); \ + __sqrt_res; \ + }) +#endif + +#if defined __HAVE_68881__ +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrtx %1,%0" : "=f" (__sqrt_res) : "f" (a)); \ + __sqrt_res; \ + }) +#endif + +#if defined __hppa +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrt,dbl %1,%0" : "=fx" (__sqrt_res) : "fx" (a)); \ + __sqrt_res; \ + }) +#endif + +#if defined _ARCH_PWR2 +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrt %0,%1" : "=f" (__sqrt_res) : "f" (a)); \ + __sqrt_res; \ + }) +#endif + +#endif + +#ifndef SQRT + +/* Tables for initial approximation of the square root. These are + indexed with bits 1-8 of the operand for which the square root is + calculated, where bit 0 is the most significant non-zero bit. I.e. + the most significant one-bit is not used, since that per definition + is one. Likewise, the tables don't return the highest bit of the + result. That bit must be inserted by or:ing the returned value with + 0x100. This way, we get a 9-bit approximation from 8-bit tables! */ + +/* Table to be used for operands with an even total number of bits. + (Exactly as in the decimal system there are similarities between the + square root of numbers with the same initial digits and an even + difference in the total number of digits. Consider the square root + of 1, 10, 100, 1000, ...) */ +static unsigned char even_approx_tab[256] = +{ + 0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e, + 0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74, + 0x75, 0x75, 0x76, 0x77, 0x77, 0x78, 0x79, 0x79, + 0x7a, 0x7b, 0x7b, 0x7c, 0x7d, 0x7d, 0x7e, 0x7f, + 0x80, 0x80, 0x81, 0x81, 0x82, 0x83, 0x83, 0x84, + 0x85, 0x85, 0x86, 0x87, 0x87, 0x88, 0x89, 0x89, + 0x8a, 0x8b, 0x8b, 0x8c, 0x8d, 0x8d, 0x8e, 0x8f, + 0x8f, 0x90, 0x90, 0x91, 0x92, 0x92, 0x93, 0x94, + 0x94, 0x95, 0x96, 0x96, 0x97, 0x97, 0x98, 0x99, + 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9c, 0x9d, 0x9e, + 0x9e, 0x9f, 0xa0, 0xa0, 0xa1, 0xa1, 0xa2, 0xa3, + 0xa3, 0xa4, 0xa4, 0xa5, 0xa6, 0xa6, 0xa7, 0xa7, + 0xa8, 0xa9, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xac, + 0xad, 0xad, 0xae, 0xaf, 0xaf, 0xb0, 0xb0, 0xb1, + 0xb2, 0xb2, 0xb3, 0xb3, 0xb4, 0xb5, 0xb5, 0xb6, + 0xb6, 0xb7, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xba, + 0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf, + 0xc0, 0xc0, 0xc1, 0xc1, 0xc2, 0xc2, 0xc3, 0xc3, + 0xc4, 0xc5, 0xc5, 0xc6, 0xc6, 0xc7, 0xc7, 0xc8, + 0xc9, 0xc9, 0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcc, + 0xcd, 0xce, 0xce, 0xcf, 0xcf, 0xd0, 0xd0, 0xd1, + 0xd1, 0xd2, 0xd3, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5, + 0xd6, 0xd6, 0xd7, 0xd7, 0xd8, 0xd9, 0xd9, 0xda, + 0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde, + 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe1, 0xe2, 0xe2, + 0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6, 0xe6, + 0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb, + 0xeb, 0xec, 0xec, 0xed, 0xed, 0xee, 0xee, 0xef, + 0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3, + 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, + 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, + 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff, +}; + +/* Table to be used for operands with an odd total number of bits. + (Further comments before previous table.) */ +static unsigned char odd_approx_tab[256] = +{ + 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, + 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, + 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, + 0x0f, 0x10, 0x10, 0x10, 0x11, 0x11, 0x12, 0x12, + 0x13, 0x13, 0x14, 0x14, 0x15, 0x15, 0x16, 0x16, + 0x16, 0x17, 0x17, 0x18, 0x18, 0x19, 0x19, 0x1a, + 0x1a, 0x1b, 0x1b, 0x1b, 0x1c, 0x1c, 0x1d, 0x1d, + 0x1e, 0x1e, 0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21, + 0x21, 0x22, 0x22, 0x23, 0x23, 0x23, 0x24, 0x24, + 0x25, 0x25, 0x26, 0x26, 0x27, 0x27, 0x27, 0x28, + 0x28, 0x29, 0x29, 0x2a, 0x2a, 0x2a, 0x2b, 0x2b, + 0x2c, 0x2c, 0x2d, 0x2d, 0x2d, 0x2e, 0x2e, 0x2f, + 0x2f, 0x30, 0x30, 0x30, 0x31, 0x31, 0x32, 0x32, + 0x32, 0x33, 0x33, 0x34, 0x34, 0x35, 0x35, 0x35, + 0x36, 0x36, 0x37, 0x37, 0x37, 0x38, 0x38, 0x39, + 0x39, 0x39, 0x3a, 0x3a, 0x3b, 0x3b, 0x3b, 0x3c, + 0x3c, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3f, 0x3f, + 0x40, 0x40, 0x40, 0x41, 0x41, 0x41, 0x42, 0x42, + 0x43, 0x43, 0x43, 0x44, 0x44, 0x45, 0x45, 0x45, + 0x46, 0x46, 0x47, 0x47, 0x47, 0x48, 0x48, 0x49, + 0x49, 0x49, 0x4a, 0x4a, 0x4b, 0x4b, 0x4b, 0x4c, + 0x4c, 0x4c, 0x4d, 0x4d, 0x4e, 0x4e, 0x4e, 0x4f, + 0x4f, 0x50, 0x50, 0x50, 0x51, 0x51, 0x51, 0x52, + 0x52, 0x53, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55, + 0x55, 0x56, 0x56, 0x56, 0x57, 0x57, 0x57, 0x58, + 0x58, 0x59, 0x59, 0x59, 0x5a, 0x5a, 0x5a, 0x5b, + 0x5b, 0x5b, 0x5c, 0x5c, 0x5d, 0x5d, 0x5d, 0x5e, + 0x5e, 0x5e, 0x5f, 0x5f, 0x60, 0x60, 0x60, 0x61, + 0x61, 0x61, 0x62, 0x62, 0x62, 0x63, 0x63, 0x63, + 0x64, 0x64, 0x65, 0x65, 0x65, 0x66, 0x66, 0x66, + 0x67, 0x67, 0x67, 0x68, 0x68, 0x68, 0x69, 0x69, +}; +#endif + + +mp_size_t +#if __STDC__ +mpn_sqrtrem (mp_ptr root_ptr, mp_ptr rem_ptr, mp_srcptr op_ptr, mp_size_t op_size) +#else +mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) + mp_ptr root_ptr; + mp_ptr rem_ptr; + mp_srcptr op_ptr; + mp_size_t op_size; +#endif +{ + /* R (root result) */ + mp_ptr rp; /* Pointer to least significant word */ + mp_size_t rsize; /* The size in words */ + + /* T (OP shifted to the left a.k.a. normalized) */ + mp_ptr tp; /* Pointer to least significant word */ + mp_size_t tsize; /* The size in words */ + mp_ptr t_end_ptr; /* Pointer right beyond most sign. word */ + mp_limb_t t_high0, t_high1; /* The two most significant words */ + + /* TT (temporary for numerator/remainder) */ + mp_ptr ttp; /* Pointer to least significant word */ + + /* X (temporary for quotient in main loop) */ + mp_ptr xp; /* Pointer to least significant word */ + mp_size_t xsize; /* The size in words */ + + unsigned cnt; + mp_limb_t initial_approx; /* Initially made approximation */ + mp_size_t tsizes[BITS_PER_MP_LIMB]; /* Successive calculation precisions */ + mp_size_t tmp; + mp_size_t i; + + mp_limb_t cy_limb; + TMP_DECL (marker); + + /* If OP is zero, both results are zero. */ + if (op_size == 0) + return 0; + + count_leading_zeros (cnt, op_ptr[op_size - 1]); + tsize = op_size; + if ((tsize & 1) != 0) + { + cnt += BITS_PER_MP_LIMB; + tsize++; + } + + rsize = tsize / 2; + rp = root_ptr; + + TMP_MARK (marker); + + /* Shift OP an even number of bits into T, such that either the most or + the second most significant bit is set, and such that the number of + words in T becomes even. This way, the number of words in R=sqrt(OP) + is exactly half as many as in OP, and the most significant bit of R + is set. + + Also, the initial approximation is simplified by this up-shifted OP. + + Finally, the Newtonian iteration which is the main part of this + program performs division by R. The fast division routine expects + the divisor to be "normalized" in exactly the sense of having the + most significant bit set. */ + + tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + + if ((cnt & ~1) % BITS_PER_MP_LIMB != 0) + t_high0 = mpn_lshift (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size, + (cnt & ~1) % BITS_PER_MP_LIMB); + else + MPN_COPY (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size); + + if (cnt >= BITS_PER_MP_LIMB) + tp[0] = 0; + + t_high0 = tp[tsize - 1]; + t_high1 = tp[tsize - 2]; /* Never stray. TSIZE is >= 2. */ + +/* Is there a fast sqrt instruction defined for this machine? */ +#ifdef SQRT + { + initial_approx = SQRT (t_high0 * 2.0 + * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)) + + t_high1); + /* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have + become incorrect due to overflow in the conversion from double to + mp_limb_t above. It will typically be zero in that case, but might be + a small number on some machines. The most significant bit of + INITIAL_APPROX should be set, so that bit is a good overflow + indication. */ + if ((mp_limb_signed_t) initial_approx >= 0) + initial_approx = ~(mp_limb_t)0; + } +#else + /* Get a 9 bit approximation from the tables. The tables expect to + be indexed with the 8 high bits right below the highest bit. + Also, the highest result bit is not returned by the tables, and + must be or:ed into the result. The scheme gives 9 bits of start + approximation with just 256-entry 8 bit tables. */ + + if ((cnt & 1) == 0) + { + /* The most sign bit of t_high0 is set. */ + initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1); + initial_approx &= 0xff; + initial_approx = even_approx_tab[initial_approx]; + } + else + { + /* The most significant bit of T_HIGH0 is unset, + the second most significant is set. */ + initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2); + initial_approx &= 0xff; + initial_approx = odd_approx_tab[initial_approx]; + } + initial_approx |= 0x100; + initial_approx <<= BITS_PER_MP_LIMB - 8 - 1; + + /* Perform small precision Newtonian iterations to get a full word + approximation. For small operands, these iteration will make the + entire job. */ + if (t_high0 == ~(mp_limb_t)0) + initial_approx = t_high0; + else + { + mp_limb_t quot; + + if (t_high0 >= initial_approx) + initial_approx = t_high0 + 1; + + /* First get about 18 bits with pure C arithmetics. */ + quot = t_high0 / (initial_approx >> BITS_PER_MP_LIMB/2) << BITS_PER_MP_LIMB/2; + initial_approx = (initial_approx + quot) / 2; + initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + + /* Now get a full word by one (or for > 36 bit machines) several + iterations. */ + for (i = 16; i < BITS_PER_MP_LIMB; i <<= 1) + { + mp_limb_t ignored_remainder; + + udiv_qrnnd (quot, ignored_remainder, + t_high0, t_high1, initial_approx); + initial_approx = (initial_approx + quot) / 2; + initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + } + } +#endif + + rp[0] = initial_approx; + rsize = 1; + +#ifdef DEBUG + printf ("\n\nT = "); + mpn_dump (tp, tsize); +#endif + + if (tsize > 2) + { + /* Determine the successive precisions to use in the iteration. We + minimize the precisions, beginning with the highest (i.e. last + iteration) to the lowest (i.e. first iteration). */ + + xp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + ttp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + + t_end_ptr = tp + tsize; + + tmp = tsize / 2; + for (i = 0;; i++) + { + tsize = (tmp + 1) / 2; + if (tmp == tsize) + break; + tsizes[i] = tsize + tmp; + tmp = tsize; + } + + /* Main Newton iteration loop. For big arguments, most of the + time is spent here. */ + + /* It is possible to do a great optimization here. The successive + divisors in the mpn_divmod call below has more and more leading + words equal to its predecessor. Therefore the beginning of + each division will repeat the same work as did the last + division. If we could guarantee that the leading words of two + consecutive divisors are the same (i.e. in this case, a later + divisor has just more digits at the end) it would be a simple + matter of just using the old remainder of the last division in + a subsequent division, to take care of this optimization. This + idea would surely make a difference even for small arguments. */ + + /* Loop invariants: + + R <= shiftdown_to_same_size(floor(sqrt(OP))) < R + 1. + X - 1 < shiftdown_to_same_size(floor(sqrt(OP))) <= X. + R <= shiftdown_to_same_size(X). */ + + while (--i >= 0) + { + mp_limb_t cy; +#ifdef DEBUG + mp_limb_t old_least_sign_r = rp[0]; + mp_size_t old_rsize = rsize; + + printf ("R = "); + mpn_dump (rp, rsize); +#endif + tsize = tsizes[i]; + + /* Need to copy the numerator into temporary space, as + mpn_divmod overwrites its numerator argument with the + remainder (which we currently ignore). */ + MPN_COPY (ttp, t_end_ptr - tsize, tsize); + cy = mpn_divmod (xp, ttp, tsize, rp, rsize); + xsize = tsize - rsize; + +#ifdef DEBUG + printf ("X =%d ", cy); + mpn_dump (xp, xsize); +#endif + + /* Add X and R with the most significant limbs aligned, + temporarily ignoring at least one limb at the low end of X. */ + tmp = xsize - rsize; + cy += mpn_add_n (xp + tmp, rp, xp + tmp, rsize); + + /* If T begins with more than 2 x BITS_PER_MP_LIMB of ones, we get + intermediate roots that'd need an extra bit. We don't want to + handle that since it would make the subsequent divisor + non-normalized, so round such roots down to be only ones in the + current precision. */ + if (cy == 2) + { + mp_size_t j; + for (j = xsize; j >= 0; j--) + xp[j] = ~(mp_limb_t)0; + } + + /* Divide X by 2 and put the result in R. This is the new + approximation. Shift in the carry from the addition. */ + mpn_rshift (rp, xp, xsize, 1); + rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)); + rsize = xsize; +#ifdef DEBUG + if (old_least_sign_r != rp[rsize - old_rsize]) + printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n", + i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r, + 2 * BYTES_PER_MP_LIMB, rp[rsize - old_rsize]); +#endif + } + } + +#ifdef DEBUG + printf ("(final) R = "); + mpn_dump (rp, rsize); +#endif + + /* We computed the square root of OP * 2**(2*floor(cnt/2)). + This has resulted in R being 2**floor(cnt/2) to large. + Shift it down here to fix that. */ + if (cnt / 2 != 0) + { + mpn_rshift (rp, rp, rsize, cnt/2); + rsize -= rp[rsize - 1] == 0; + } + + /* Calculate the remainder. */ + mpn_mul_n (tp, rp, rp, rsize); + tsize = rsize + rsize; + tsize -= tp[tsize - 1] == 0; + if (op_size < tsize + || (op_size == tsize && mpn_cmp (op_ptr, tp, op_size) < 0)) + { + /* R is too large. Decrement it. */ + + /* These operations can't overflow. */ + cy_limb = mpn_sub_n (tp, tp, rp, rsize); + cy_limb += mpn_sub_n (tp, tp, rp, rsize); + mpn_sub_1 (tp + rsize, tp + rsize, tsize - rsize, cy_limb); + mpn_add_1 (tp, tp, tsize, (mp_limb_t) 1); + + mpn_sub_1 (rp, rp, rsize, (mp_limb_t) 1); + +#ifdef DEBUG + printf ("(adjusted) R = "); + mpn_dump (rp, rsize); +#endif + } + + if (rem_ptr != NULL) + { + cy_limb = mpn_sub (rem_ptr, op_ptr, op_size, tp, tsize); + MPN_NORMALIZE (rem_ptr, op_size); + TMP_FREE (marker); + return op_size; + } + else + { + int res; + res = op_size != tsize || mpn_cmp (op_ptr, tp, op_size); + TMP_FREE (marker); + return res; + } +} diff --git a/gnu/lib/libgmp/mpn/generic/sub_n.c b/gnu/lib/libgmp/mpn/generic/sub_n.c new file mode 100644 index 00000000000..9d4b216758b --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/sub_n.c @@ -0,0 +1,62 @@ +/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +#if __STDC__ +mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = (y < cy); /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy = (y > x) + cy; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} diff --git a/gnu/lib/libgmp/mpn/generic/submul_1.c b/gnu/lib/libgmp/mpn/generic/submul_1.c new file mode 100644 index 00000000000..b144283bd98 --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/submul_1.c @@ -0,0 +1,65 @@ +/* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR + by S2_LIMB, subtract the S1_SIZE least significant limbs of the product + from the limb vector pointed to by RES_PTR. Return the most significant + limb of the product, adjusted for carry-out from the subtraction. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x - prod_low; + cy_limb += (prod_low > x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} diff --git a/gnu/lib/libgmp/mpn/generic/udiv_w_sdiv.c b/gnu/lib/libgmp/mpn/generic/udiv_w_sdiv.c new file mode 100644 index 00000000000..d9e71b78a0f --- /dev/null +++ b/gnu/lib/libgmp/mpn/generic/udiv_w_sdiv.c @@ -0,0 +1,125 @@ +/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed + division. + + Contributed by Peter L. Montgomery. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_udiv_w_sdiv (rp, a1, a0, d) + mp_limb_t *rp, a1, a0, d; +{ + mp_limb_t q, r; + mp_limb_t c0, c1, b1; + + if ((mp_limb_signed_t) d >= 0) + { + if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1))) + { + /* dividend, divisor, and quotient are nonnegative */ + sdiv_qrnnd (q, r, a1, a0, d); + } + else + { + /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */ + sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1)); + /* Divide (c1*2^32 + c0) by d */ + sdiv_qrnnd (q, r, c1, c0, d); + /* Add 2^31 to quotient */ + q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + } + } + else + { + b1 = d >> 1; /* d/2, between 2^30 and 2^31 - 1 */ + c1 = a1 >> 1; /* A/2 */ + c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1); + + if (a1 < b1) /* A < 2^32*b1, so A/2 < 2^31*b1 */ + { + sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */ + + r = 2*r + (a0 & 1); /* Remainder from A/(2*b1) */ + if ((d & 1) != 0) + { + if (r >= q) + r = r - q; + else if (q - r <= d) + { + r = r - q + d; + q--; + } + else + { + r = r - q + 2*d; + q -= 2; + } + } + } + else if (c1 < b1) /* So 2^31 <= (A/2)/b1 < 2^32 */ + { + c1 = (b1 - 1) - c1; + c0 = ~c0; /* logical NOT */ + + sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */ + + q = ~q; /* (A/2)/b1 */ + r = (b1 - 1) - r; + + r = 2*r + (a0 & 1); /* A/(2*b1) */ + + if ((d & 1) != 0) + { + if (r >= q) + r = r - q; + else if (q - r <= d) + { + r = r - q + d; + q--; + } + else + { + r = r - q + 2*d; + q -= 2; + } + } + } + else /* Implies c1 = b1 */ + { /* Hence a1 = d - 1 = 2*b1 - 1 */ + if (a0 >= -d) + { + q = -1; + r = a0 + d; + } + else + { + q = -2; + r = a0 + 2*d; + } + } + } + + *rp = r; + return q; +} diff --git a/gnu/lib/libgmp/mpn/hppa/README b/gnu/lib/libgmp/mpn/hppa/README new file mode 100644 index 00000000000..5a2d5fd9704 --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/README @@ -0,0 +1,84 @@ +This directory contains mpn functions for various HP PA-RISC chips. Code +that runs faster on the PA7100 and later implementations, is in the pa7100 +directory. + +RELEVANT OPTIMIZATION ISSUES + + Load and Store timing + +On the PA7000 no memory instructions can issue the two cycles after a store. +For the PA7100, this is reduced to one cycle. + +The PA7100 has a lookup-free cache, so it helps to schedule loads and the +dependent instruction really far from each other. + +STATUS + +1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the + instructions bwlow (but some sw pipelining is needed to avoid the + xmpyu-fstds delay): + + fldds s1_ptr + + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + + addc + stws res_ptr + addc + stws res_ptr + + addib Loop + +2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb + (asymptotically) on the PA7100, using the instructions below. With proper + sw pipelining and the unrolling level below, the speed becomes 8 + cycles/limb. + + fldds s1_ptr + fldds s1_ptr + + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + addc + addc + addc + addc + addc %r0,%r0,cy-limb + + ldws res_ptr + ldws res_ptr + ldws res_ptr + ldws res_ptr + add + stws res_ptr + addc + stws res_ptr + addc + stws res_ptr + addc + stws res_ptr + + addib diff --git a/gnu/lib/libgmp/mpn/hppa/add_n.s b/gnu/lib/libgmp/mpn/hppa/add_n.s new file mode 100644 index 00000000000..b4a14283625 --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/add_n.s @@ -0,0 +1,58 @@ +; HP-PA __mpn_add_n -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; One might want to unroll this as for other processors, but it turns +; out that the data cache contention after a store makes such +; unrolling useless. We can't come under 5 cycles/limb anyway. + + .code + .export __mpn_add_n +__mpn_add_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,= -1,%r23,L$end ; check for (SIZE == 1) + add %r20,%r19,%r28 ; add first limbs ignoring cy + +L$loop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,<> -1,%r23,L$loop + addc %r20,%r19,%r28 + +L$end stws %r28,0(0,%r26) + bv 0(%r2) + addc %r0,%r0,%r28 + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/hppa1_1/addmul_1.s b/gnu/lib/libgmp/mpn/hppa/hppa1_1/addmul_1.s new file mode 100644 index 00000000000..0fdcb3cb207 --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/hppa1_1/addmul_1.s @@ -0,0 +1,102 @@ +; HP-PA-1.1 __mpn_addmul_1 -- Multiply a limb vector with a limb and +; add the result to a second limb vector. + +; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r26 +; s1_ptr r25 +; size r24 +; s2_limb r23 + +; This runs at 11 cycles/limb on a PA7000. With the used instructions, it +; can not become faster due to data cache contention after a store. On the +; PA7100 it runs at 10 cycles/limb, and that can not be improved either, +; since only the xmpyu does not need the integer pipeline, so the only +; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb +; on the PA7100. + +; There are some ideas described in mul_1.s that applies to this code too. + + .code + .export __mpn_addmul_1 +__mpn_addmul_1 + .proc + .callinfo frame=64,no_calls + .entry + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) ; move s2_limb ... + addib,= -1,%r24,L$just_one_limb + fldws -16(%r30),%fr4 ; ... into fr4 + add %r0,%r0,%r0 ; clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 ; least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L$end + ldw -12(%r30),%r1 + +; Main loop +L$loop ldws 0(%r26),%r29 + fldws,ma 4(%r25),%fr5 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addc %r0,%r28,%r28 + addib,<> -1,%r24,L$loop + ldw -12(%r30),%r1 + +L$end ldw 0(%r26),%r29 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + ldws 0(%r26),%r29 + addc %r0,%r28,%r28 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + +L$just_one_limb + xmpyu %fr4,%fr5,%fr6 + ldw 0(%r26),%r29 + fstds %fr6,-16(%r30) + ldw -12(%r30),%r1 + ldw -16(%r30),%r28 + add %r29,%r1,%r19 + stw %r19,0(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/hppa1_1/mul_1.s b/gnu/lib/libgmp/mpn/hppa/hppa1_1/mul_1.s new file mode 100644 index 00000000000..cdd0c1d7fac --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/hppa1_1/mul_1.s @@ -0,0 +1,98 @@ +; HP-PA-1.1 __mpn_mul_1 -- Multiply a limb vector with a limb and store +; the result in a second limb vector. + +; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r26 +; s1_ptr r25 +; size r24 +; s2_limb r23 + +; This runs at 9 cycles/limb on a PA7000. With the used instructions, it can +; not become faster due to data cache contention after a store. On the +; PA7100 it runs at 7 cycles/limb, and that can not be improved either, since +; only the xmpyu does not need the integer pipeline, so the only dual-issue +; we will get are addc+xmpyu. Unrolling would not help either CPU. + +; We could use fldds to read two limbs at a time from the S1 array, and that +; could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and +; PA7100, respectively. We don't do that since it does not seem worth the +; (alignment) troubles... + +; At least the PA7100 is rumored to be able to deal with cache-misses +; without stalling instruction issue. If this is true, and the cache is +; actually also lockup-free, we should use a deeper software pipeline, and +; load from S1 very early! (The loads and stores to -12(sp) will surely be +; in the cache.) + + .code + .export __mpn_mul_1 +__mpn_mul_1 + .proc + .callinfo frame=64,no_calls + .entry + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) ; move s2_limb ... + addib,= -1,%r24,L$just_one_limb + fldws -16(%r30),%fr4 ; ... into fr4 + add %r0,%r0,%r0 ; clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 ; least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L$end + ldw -12(%r30),%r1 + +; Main loop +L$loop fldws,ma 4(%r25),%fr5 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addib,<> -1,%r24,L$loop + ldw -12(%r30),%r1 + +L$end stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + stws,ma %r19,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + +L$just_one_limb + xmpyu %fr4,%fr5,%fr6 + fstds %fr6,-16(%r30) + ldw -16(%r30),%r28 + ldo -64(%r30),%r30 + bv 0(%r2) + fstws %fr6R,0(%r26) + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/add_n.s b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/add_n.s new file mode 100644 index 00000000000..21fe1615474 --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/add_n.s @@ -0,0 +1,75 @@ +; HP-PA __mpn_add_n -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. +; This is optimized for the PA7100, where is runs at 4.25 cycles/limb + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + + .code + .export __mpn_add_n +__mpn_add_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,<= -5,%r23,L$rest + add %r20,%r19,%r28 ; add first limbs ignoring cy + +L$loop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addc %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addc %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addc %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -4,%r23,L$loop + addc %r20,%r19,%r28 + +L$rest addib,= 4,%r23,L$end + nop +L$eloop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -1,%r23,L$eloop + addc %r20,%r19,%r28 + +L$end stws %r28,0(0,%r26) + bv 0(%r2) + addc %r0,%r0,%r28 + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S new file mode 100644 index 00000000000..eb1d12bf6bc --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S @@ -0,0 +1,189 @@ +; HP-PA 7100/7200 __mpn_addmul_1 -- Multiply a limb vector with a limb and +; add the result to a second limb vector. + +; Copyright (C) 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define res_ptr %r26 +#define s1_ptr %r25 +#define size %r24 +#define s2_limb %r23 + +#define cylimb %r28 +#define s0 %r19 +#define s1 %r20 +#define s2 %r3 +#define s3 %r4 +#define lo0 %r21 +#define lo1 %r5 +#define lo2 %r6 +#define lo3 %r7 +#define hi0 %r22 +#define hi1 %r23 /* safe to reuse */ +#define hi2 %r29 +#define hi3 %r1 + + .code + .export __mpn_addmul_1 +__mpn_addmul_1 + .proc + .callinfo frame=128,no_calls + .entry + + ldo 128(%r30),%r30 + stws s2_limb,-16(%r30) + add %r0,%r0,cylimb ; clear cy and cylimb + addib,< -4,size,L$few_limbs + fldws -16(%r30),%fr31R + + ldo -112(%r30),%r31 + stw %r3,-96(%r30) + stw %r4,-92(%r30) + stw %r5,-88(%r30) + stw %r6,-84(%r30) + stw %r7,-80(%r30) + + bb,>=,n s1_ptr,29,L$0 + + fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r31) + ldws -16(%r31),cylimb + ldws -12(%r31),lo0 + add s0,lo0,s0 + addib,< -1,size,L$few_limbs + stws,ma s0,4(res_ptr) + +; start software pipeline ---------------------------------------------------- +L$0 fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + xmpyu %fr4L,%fr31R,%fr5 + xmpyu %fr4R,%fr31R,%fr6 + xmpyu %fr8L,%fr31R,%fr9 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + fstds %fr6,-8(%r31) + fstds %fr9,0(%r31) + fstds %fr10,8(%r31) + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + addc lo1,hi0,lo1 + addc lo2,hi1,lo2 + addc lo3,hi2,lo3 + + addib,< -4,size,L$end + addc %r0,hi3,cylimb ; propagate carry into cylimb +; main loop ------------------------------------------------------------------ +L$loop fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + ldws 0(res_ptr),s0 + xmpyu %fr4L,%fr31R,%fr5 + ldws 4(res_ptr),s1 + xmpyu %fr4R,%fr31R,%fr6 + ldws 8(res_ptr),s2 + xmpyu %fr8L,%fr31R,%fr9 + ldws 12(res_ptr),s3 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + add s0,lo0,s0 + fstds %fr6,-8(%r31) + addc s1,lo1,s1 + fstds %fr9,0(%r31) + addc s2,lo2,s2 + fstds %fr10,8(%r31) + addc s3,lo3,s3 + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + stws,ma s0,4(res_ptr) + addc lo1,hi0,lo1 + stws,ma s1,4(res_ptr) + addc lo2,hi1,lo2 + stws,ma s2,4(res_ptr) + addc lo3,hi2,lo3 + stws,ma s3,4(res_ptr) + + addib,>= -4,size,L$loop + addc %r0,hi3,cylimb ; propagate carry into cylimb +; finish software pipeline --------------------------------------------------- +L$end ldws 0(res_ptr),s0 + ldws 4(res_ptr),s1 + ldws 8(res_ptr),s2 + ldws 12(res_ptr),s3 + + add s0,lo0,s0 + stws,ma s0,4(res_ptr) + addc s1,lo1,s1 + stws,ma s1,4(res_ptr) + addc s2,lo2,s2 + stws,ma s2,4(res_ptr) + addc s3,lo3,s3 + stws,ma s3,4(res_ptr) + +; restore callee-saves registers --------------------------------------------- + ldw -96(%r30),%r3 + ldw -92(%r30),%r4 + ldw -88(%r30),%r5 + ldw -84(%r30),%r6 + ldw -80(%r30),%r7 + +L$few_limbs + addib,=,n 4,size,L$ret +L$loop2 fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r30) + ldws -16(%r30),hi0 + ldws -12(%r30),lo0 + addc lo0,cylimb,lo0 + addc %r0,hi0,cylimb + add s0,lo0,s0 + stws,ma s0,4(res_ptr) + addib,<> -1,size,L$loop2 + nop + +L$ret addc %r0,cylimb,cylimb + bv 0(%r2) + ldo -128(%r30),%r30 + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/lshift.s b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/lshift.s new file mode 100644 index 00000000000..4c74a505ae8 --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/lshift.s @@ -0,0 +1,83 @@ +; HP-PA __mpn_lshift -- +; This is optimized for the PA7100, where is runs at 3.25 cycles/limb + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s_ptr gr25 +; size gr24 +; cnt gr23 + + .code + .export __mpn_lshift +__mpn_lshift + .proc + .callinfo frame=64,no_calls + .entry + + sh2add %r24,%r25,%r25 + sh2add %r24,%r26,%r26 + ldws,mb -4(0,%r25),%r22 + subi 32,%r23,%r1 + mtsar %r1 + addib,= -1,%r24,L$0004 + vshd %r0,%r22,%r28 ; compute carry out limb + ldws,mb -4(0,%r25),%r29 + addib,<= -5,%r24,L$rest + vshd %r22,%r29,%r20 + +L$loop ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + vshd %r22,%r29,%r20 + ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,> -4,%r24,L$loop + vshd %r22,%r29,%r20 + +L$rest addib,= 4,%r24,L$end1 + nop +L$eloop ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + addib,<= -1,%r24,L$end2 + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,> -1,%r24,L$eloop + vshd %r22,%r29,%r20 + +L$end1 stws,mb %r20,-4(0,%r26) + vshd %r29,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) +L$end2 stws,mb %r20,-4(0,%r26) +L$0004 vshd %r22,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/rshift.s b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/rshift.s new file mode 100644 index 00000000000..845418c5363 --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/rshift.s @@ -0,0 +1,80 @@ +; HP-PA __mpn_rshift -- +; This is optimized for the PA7100, where is runs at 3.25 cycles/limb + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s_ptr gr25 +; size gr24 +; cnt gr23 + + .code + .export __mpn_rshift +__mpn_rshift + .proc + .callinfo frame=64,no_calls + .entry + + ldws,ma 4(0,%r25),%r22 + mtsar %r23 + addib,= -1,%r24,L$0004 + vshd %r22,%r0,%r28 ; compute carry out limb + ldws,ma 4(0,%r25),%r29 + addib,<= -5,%r24,L$rest + vshd %r29,%r22,%r20 + +L$loop ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + vshd %r29,%r22,%r20 + ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,> -4,%r24,L$loop + vshd %r29,%r22,%r20 + +L$rest addib,= 4,%r24,L$end1 + nop +L$eloop ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + addib,<= -1,%r24,L$end2 + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,> -1,%r24,L$eloop + vshd %r29,%r22,%r20 + +L$end1 stws,ma %r20,4(0,%r26) + vshd %r0,%r29,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) +L$end2 stws,ma %r20,4(0,%r26) +L$0004 vshd %r0,%r22,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/sub_n.s b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/sub_n.s new file mode 100644 index 00000000000..1e1ebcf911c --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/sub_n.s @@ -0,0 +1,76 @@ +; HP-PA __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. +; This is optimized for the PA7100, where is runs at 4.25 cycles/limb + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + + .code + .export __mpn_sub_n +__mpn_sub_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,<= -5,%r23,L$rest + sub %r20,%r19,%r28 ; subtract first limbs ignoring cy + +L$loop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -4,%r23,L$loop + subb %r20,%r19,%r28 + +L$rest addib,= 4,%r23,L$end + nop +L$eloop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -1,%r23,L$eloop + subb %r20,%r19,%r28 + +L$end stws %r28,0(0,%r26) + addc %r0,%r0,%r28 + bv 0(%r2) + subi 1,%r28,%r28 + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/submul_1.S b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/submul_1.S new file mode 100644 index 00000000000..a71176e686d --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/hppa1_1/pa7100/submul_1.S @@ -0,0 +1,195 @@ +; HP-PA 7100/7200 __mpn_submul_1 -- Multiply a limb vector with a limb and +; subtract the result from a second limb vector. + +; Copyright (C) 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define res_ptr %r26 +#define s1_ptr %r25 +#define size %r24 +#define s2_limb %r23 + +#define cylimb %r28 +#define s0 %r19 +#define s1 %r20 +#define s2 %r3 +#define s3 %r4 +#define lo0 %r21 +#define lo1 %r5 +#define lo2 %r6 +#define lo3 %r7 +#define hi0 %r22 +#define hi1 %r23 /* safe to reuse */ +#define hi2 %r29 +#define hi3 %r1 + + .code + .export __mpn_submul_1 +__mpn_submul_1 + .proc + .callinfo frame=128,no_calls + .entry + + ldo 128(%r30),%r30 + stws s2_limb,-16(%r30) + add %r0,%r0,cylimb ; clear cy and cylimb + addib,< -4,size,L$few_limbs + fldws -16(%r30),%fr31R + + ldo -112(%r30),%r31 + stw %r3,-96(%r30) + stw %r4,-92(%r30) + stw %r5,-88(%r30) + stw %r6,-84(%r30) + stw %r7,-80(%r30) + + bb,>=,n s1_ptr,29,L$0 + + fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r31) + ldws -16(%r31),cylimb + ldws -12(%r31),lo0 + sub s0,lo0,s0 + add s0,lo0,%r0 ; invert cy + addib,< -1,size,L$few_limbs + stws,ma s0,4(res_ptr) + +; start software pipeline ---------------------------------------------------- +L$0 fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + xmpyu %fr4L,%fr31R,%fr5 + xmpyu %fr4R,%fr31R,%fr6 + xmpyu %fr8L,%fr31R,%fr9 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + fstds %fr6,-8(%r31) + fstds %fr9,0(%r31) + fstds %fr10,8(%r31) + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + addc lo1,hi0,lo1 + addc lo2,hi1,lo2 + addc lo3,hi2,lo3 + + addib,< -4,size,L$end + addc %r0,hi3,cylimb ; propagate carry into cylimb +; main loop ------------------------------------------------------------------ +L$loop fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + ldws 0(res_ptr),s0 + xmpyu %fr4L,%fr31R,%fr5 + ldws 4(res_ptr),s1 + xmpyu %fr4R,%fr31R,%fr6 + ldws 8(res_ptr),s2 + xmpyu %fr8L,%fr31R,%fr9 + ldws 12(res_ptr),s3 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + sub s0,lo0,s0 + fstds %fr6,-8(%r31) + subb s1,lo1,s1 + fstds %fr9,0(%r31) + subb s2,lo2,s2 + fstds %fr10,8(%r31) + subb s3,lo3,s3 + subb %r0,%r0,lo0 ; these two insns ... + add lo0,lo0,%r0 ; ... just invert cy + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + stws,ma s0,4(res_ptr) + addc lo1,hi0,lo1 + stws,ma s1,4(res_ptr) + addc lo2,hi1,lo2 + stws,ma s2,4(res_ptr) + addc lo3,hi2,lo3 + stws,ma s3,4(res_ptr) + + addib,>= -4,size,L$loop + addc %r0,hi3,cylimb ; propagate carry into cylimb +; finish software pipeline --------------------------------------------------- +L$end ldws 0(res_ptr),s0 + ldws 4(res_ptr),s1 + ldws 8(res_ptr),s2 + ldws 12(res_ptr),s3 + + sub s0,lo0,s0 + stws,ma s0,4(res_ptr) + subb s1,lo1,s1 + stws,ma s1,4(res_ptr) + subb s2,lo2,s2 + stws,ma s2,4(res_ptr) + subb s3,lo3,s3 + stws,ma s3,4(res_ptr) + subb %r0,%r0,lo0 ; these two insns ... + add lo0,lo0,%r0 ; ... invert cy + +; restore callee-saves registers --------------------------------------------- + ldw -96(%r30),%r3 + ldw -92(%r30),%r4 + ldw -88(%r30),%r5 + ldw -84(%r30),%r6 + ldw -80(%r30),%r7 + +L$few_limbs + addib,=,n 4,size,L$ret +L$loop2 fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r30) + ldws -16(%r30),hi0 + ldws -12(%r30),lo0 + addc lo0,cylimb,lo0 + addc %r0,hi0,cylimb + sub s0,lo0,s0 + add s0,lo0,%r0 ; invert cy + stws,ma s0,4(res_ptr) + addib,<> -1,size,L$loop2 + nop + +L$ret addc %r0,cylimb,cylimb + bv 0(%r2) + ldo -128(%r30),%r30 + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/hppa1_1/submul_1.s b/gnu/lib/libgmp/mpn/hppa/hppa1_1/submul_1.s new file mode 100644 index 00000000000..a4a385467e8 --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/hppa1_1/submul_1.s @@ -0,0 +1,111 @@ +; HP-PA-1.1 __mpn_submul_1 -- Multiply a limb vector with a limb and +; subtract the result from a second limb vector. + +; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r26 +; s1_ptr r25 +; size r24 +; s2_limb r23 + +; This runs at 12 cycles/limb on a PA7000. With the used instructions, it +; can not become faster due to data cache contention after a store. On the +; PA7100 it runs at 11 cycles/limb, and that can not be improved either, +; since only the xmpyu does not need the integer pipeline, so the only +; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb +; on the PA7100. + +; There are some ideas described in mul_1.s that applies to this code too. + +; It seems possible to make this run as fast as __mpn_addmul_1, if we use +; sub,>>= %r29,%r19,%r22 +; addi 1,%r28,%r28 +; but that requires reworking the hairy software pipeline... + + .code + .export __mpn_submul_1 +__mpn_submul_1 + .proc + .callinfo frame=64,no_calls + .entry + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) ; move s2_limb ... + addib,= -1,%r24,L$just_one_limb + fldws -16(%r30),%fr4 ; ... into fr4 + add %r0,%r0,%r0 ; clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 ; least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L$end + ldw -12(%r30),%r1 + +; Main loop +L$loop ldws 0(%r26),%r29 + fldws,ma 4(%r25),%fr5 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addc %r0,%r28,%r28 + addib,<> -1,%r24,L$loop + ldw -12(%r30),%r1 + +L$end ldw 0(%r26),%r29 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + ldws 0(%r26),%r29 + addc %r0,%r28,%r28 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + +L$just_one_limb + xmpyu %fr4,%fr5,%fr6 + ldw 0(%r26),%r29 + fstds %fr6,-16(%r30) + ldw -12(%r30),%r1 + ldw -16(%r30),%r28 + sub %r29,%r1,%r22 + add %r22,%r1,%r0 + stw %r22,0(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/hppa1_1/udiv_qrnnd.s b/gnu/lib/libgmp/mpn/hppa/hppa1_1/udiv_qrnnd.s new file mode 100644 index 00000000000..bf7dc70cd7c --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/hppa1_1/udiv_qrnnd.s @@ -0,0 +1,75 @@ +; HP-PA __udiv_qrnnd division support, used from longlong.h. +; This version runs fast on PA 7000 and later. + +; Copyright (C) 1993, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; rem_ptr gr26 +; n1 gr25 +; n0 gr24 +; d gr23 + + .code +L$0000 .word 0x43f00000 + .word 0x0 + .export __udiv_qrnnd +__udiv_qrnnd + .proc + .callinfo frame=64,no_calls + .entry + ldo 64(%r30),%r30 + + stws %r25,-16(0,%r30) ; n_hi + stws %r24,-12(0,%r30) ; n_lo + ldil L'L$0000,%r19 + ldo R'L$0000(%r19),%r19 + fldds -16(0,%r30),%fr5 + stws %r23,-12(0,%r30) + comib,<= 0,%r25,L$1 + fcnvxf,dbl,dbl %fr5,%fr5 + fldds 0(0,%r19),%fr4 + fadd,dbl %fr4,%fr5,%fr5 +L$1 + fcpy,sgl %fr0,%fr6L + fldws -12(0,%r30),%fr6R + fcnvxf,dbl,dbl %fr6,%fr4 + + fdiv,dbl %fr5,%fr4,%fr5 + + fcnvfx,dbl,dbl %fr5,%fr4 + fstws %fr4R,-16(%r30) + xmpyu %fr4R,%fr6R,%fr6 + ldws -16(%r30),%r28 + fstds %fr6,-16(0,%r30) + ldws -12(0,%r30),%r21 + ldws -16(0,%r30),%r20 + sub %r24,%r21,%r22 + subb %r25,%r20,%r19 + comib,= 0,%r19,L$2 + ldo -64(%r30),%r30 + + add %r22,%r23,%r22 + ldo -1(%r28),%r28 +L$2 bv 0(%r2) + stws %r22,0(0,%r26) + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/lshift.s b/gnu/lib/libgmp/mpn/hppa/lshift.s new file mode 100644 index 00000000000..abac6ec201b --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/lshift.s @@ -0,0 +1,66 @@ +; HP-PA __mpn_lshift -- + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s_ptr gr25 +; size gr24 +; cnt gr23 + + .code + .export __mpn_lshift +__mpn_lshift + .proc + .callinfo frame=64,no_calls + .entry + + sh2add %r24,%r25,%r25 + sh2add %r24,%r26,%r26 + ldws,mb -4(0,%r25),%r22 + subi 32,%r23,%r1 + mtsar %r1 + addib,= -1,%r24,L$0004 + vshd %r0,%r22,%r28 ; compute carry out limb + ldws,mb -4(0,%r25),%r29 + addib,= -1,%r24,L$0002 + vshd %r22,%r29,%r20 + +L$loop ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + addib,= -1,%r24,L$0003 + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,<> -1,%r24,L$loop + vshd %r22,%r29,%r20 + +L$0002 stws,mb %r20,-4(0,%r26) + vshd %r29,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) +L$0003 stws,mb %r20,-4(0,%r26) +L$0004 vshd %r22,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/rshift.s b/gnu/lib/libgmp/mpn/hppa/rshift.s new file mode 100644 index 00000000000..c1480e5abff --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/rshift.s @@ -0,0 +1,63 @@ +; HP-PA __mpn_rshift -- + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s_ptr gr25 +; size gr24 +; cnt gr23 + + .code + .export __mpn_rshift +__mpn_rshift + .proc + .callinfo frame=64,no_calls + .entry + + ldws,ma 4(0,%r25),%r22 + mtsar %r23 + addib,= -1,%r24,L$0004 + vshd %r22,%r0,%r28 ; compute carry out limb + ldws,ma 4(0,%r25),%r29 + addib,= -1,%r24,L$0002 + vshd %r29,%r22,%r20 + +L$loop ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + addib,= -1,%r24,L$0003 + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,<> -1,%r24,L$loop + vshd %r29,%r22,%r20 + +L$0002 stws,ma %r20,4(0,%r26) + vshd %r0,%r29,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) +L$0003 stws,ma %r20,4(0,%r26) +L$0004 vshd %r0,%r22,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/sub_n.s b/gnu/lib/libgmp/mpn/hppa/sub_n.s new file mode 100644 index 00000000000..04fa3e1e33d --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/sub_n.s @@ -0,0 +1,59 @@ +; HP-PA __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; One might want to unroll this as for other processors, but it turns +; out that the data cache contention after a store makes such +; unrolling useless. We can't come under 5 cycles/limb anyway. + + .code + .export __mpn_sub_n +__mpn_sub_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,= -1,%r23,L$end ; check for (SIZE == 1) + sub %r20,%r19,%r28 ; subtract first limbs ignoring cy + +L$loop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,<> -1,%r23,L$loop + subb %r20,%r19,%r28 + +L$end stws %r28,0(0,%r26) + addc %r0,%r0,%r28 + bv 0(%r2) + subi 1,%r28,%r28 + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/hppa/udiv_qrnnd.s b/gnu/lib/libgmp/mpn/hppa/udiv_qrnnd.s new file mode 100644 index 00000000000..9b45eb40df1 --- /dev/null +++ b/gnu/lib/libgmp/mpn/hppa/udiv_qrnnd.s @@ -0,0 +1,286 @@ +; HP-PA __udiv_qrnnd division support, used from longlong.h. +; This version runs fast on pre-PA7000 CPUs. + +; Copyright (C) 1993, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; rem_ptr gr26 +; n1 gr25 +; n0 gr24 +; d gr23 + +; The code size is a bit excessive. We could merge the last two ds;addc +; sequences by simply moving the "bb,< Odd" instruction down. The only +; trouble is the FFFFFFFF code that would need some hacking. + + .code + .export __udiv_qrnnd +__udiv_qrnnd + .proc + .callinfo frame=0,no_calls + .entry + + comb,< %r23,0,L$largedivisor + sub %r0,%r23,%r1 ; clear cy as side-effect + ds %r0,%r1,%r0 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r28 + ds %r25,%r23,%r25 + comclr,>= %r25,%r0,%r0 + addl %r25,%r23,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r28,%r28,%r28 + +L$largedivisor + extru %r24,31,1,%r19 ; r19 = n0 & 1 + bb,< %r23,31,L$odd + extru %r23,30,31,%r22 ; r22 = d >> 1 + shd %r25,%r24,1,%r24 ; r24 = new n0 + extru %r25,30,31,%r25 ; r25 = new n1 + sub %r0,%r22,%r21 + ds %r0,%r21,%r0 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + comclr,>= %r25,%r0,%r0 + addl %r25,%r22,%r25 + sh1addl %r25,%r19,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r24,%r24,%r28 + +L$odd addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1) + shd %r25,%r24,1,%r24 ; r24 = new n0 + extru %r25,30,31,%r25 ; r25 = new n1 + sub %r0,%r22,%r21 + ds %r0,%r21,%r0 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r28 + comclr,>= %r25,%r0,%r0 + addl %r25,%r22,%r25 + sh1addl %r25,%r19,%r25 +; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25 + add,nuv %r28,%r25,%r25 + addl %r25,%r1,%r25 + addc %r0,%r28,%r28 + sub,<< %r25,%r23,%r0 + addl %r25,%r1,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r0,%r28,%r28 + +; This is just a special case of the code above. +; We come here when d == 0xFFFFFFFF +L$FF.. add,uv %r25,%r24,%r24 + sub,<< %r24,%r23,%r0 + ldo 1(%r24),%r24 + stws %r24,0(0,%r26) + bv 0(%r2) + addc %r0,%r25,%r28 + + .exit + .procend diff --git a/gnu/lib/libgmp/mpn/i960/README b/gnu/lib/libgmp/mpn/i960/README new file mode 100644 index 00000000000..d68a0a83eb2 --- /dev/null +++ b/gnu/lib/libgmp/mpn/i960/README @@ -0,0 +1,9 @@ +This directory contains mpn functions for Intel i960 processors. + +RELEVANT OPTIMIZATION ISSUES + +The code in this directory is not well optimized. + +STATUS + +The code in this directory has not been tested. diff --git a/gnu/lib/libgmp/mpn/i960/add_n.s b/gnu/lib/libgmp/mpn/i960/add_n.s new file mode 100644 index 00000000000..6e674822c58 --- /dev/null +++ b/gnu/lib/libgmp/mpn/i960/add_n.s @@ -0,0 +1,43 @@ +# I960 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +# sum in a third limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___mpn_add_n +___mpn_add_n: + mov 0,g6 # clear carry-save register + cmpo 1,0 # clear cy + +Loop: subo 1,g3,g3 # update loop counter + ld (g1),g5 # load from s1_ptr + addo 4,g1,g1 # s1_ptr++ + ld (g2),g4 # load from s2_ptr + addo 4,g2,g2 # s2_ptr++ + cmpo g6,1 # restore cy from g6, relies on cy being 0 + addc g4,g5,g4 # main add + subc 0,0,g6 # save cy in g6 + st g4,(g0) # store result to res_ptr + addo 4,g0,g0 # res_ptr++ + cmpobne 0,g3,Loop # when branch is taken, clears C bit + + mov g6,g0 + ret diff --git a/gnu/lib/libgmp/mpn/i960/addmul_1.s b/gnu/lib/libgmp/mpn/i960/addmul_1.s new file mode 100644 index 00000000000..db53f64e21f --- /dev/null +++ b/gnu/lib/libgmp/mpn/i960/addmul_1.s @@ -0,0 +1,48 @@ +# I960 __mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___mpn_mul_1 +___mpn_mul_1: + subo g2,0,g2 + shlo 2,g2,g4 + subo g4,g1,g1 + subo g4,g0,g13 + mov 0,g0 + + cmpo 1,0 # clear C bit on AC.cc + +Loop: ld (g1)[g2*4],g5 + emul g3,g5,g6 + ld (g13)[g2*4],g5 + + addc g0,g6,g6 # relies on that C bit is clear + addc 0,g7,g7 + addc g5,g6,g6 # relies on that C bit is clear + st g6,(g13)[g2*4] + addc 0,g7,g0 + + addo g2,1,g2 + cmpobne 0,g2,Loop # when branch is taken, clears C bit + + ret diff --git a/gnu/lib/libgmp/mpn/i960/mul_1.s b/gnu/lib/libgmp/mpn/i960/mul_1.s new file mode 100644 index 00000000000..4ccaeabc43f --- /dev/null +++ b/gnu/lib/libgmp/mpn/i960/mul_1.s @@ -0,0 +1,45 @@ +# I960 __mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___mpn_mul_1 +___mpn_mul_1: + subo g2,0,g2 + shlo 2,g2,g4 + subo g4,g1,g1 + subo g4,g0,g13 + mov 0,g0 + + cmpo 1,0 # clear C bit on AC.cc + +Loop: ld (g1)[g2*4],g5 + emul g3,g5,g6 + + addc g0,g6,g6 # relies on that C bit is clear + st g6,(g13)[g2*4] + addc 0,g7,g0 + + addo g2,1,g2 + cmpobne 0,g2,Loop # when branch is taken, clears C bit + + ret diff --git a/gnu/lib/libgmp/mpn/i960/sub_n.s b/gnu/lib/libgmp/mpn/i960/sub_n.s new file mode 100644 index 00000000000..01b94a17261 --- /dev/null +++ b/gnu/lib/libgmp/mpn/i960/sub_n.s @@ -0,0 +1,43 @@ +# I960 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +# store difference in a third limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___mpn_sub_n +___mpn_sub_n: + mov 1,g6 # set carry-save register + cmpo 1,0 # clear cy + +Loop: subo 1,g3,g3 # update loop counter + ld (g1),g5 # load from s1_ptr + addo 4,g1,g1 # s1_ptr++ + ld (g2),g4 # load from s2_ptr + addo 4,g2,g2 # s2_ptr++ + cmpo g6,1 # restore cy from g6, relies on cy being 0 + subc g4,g5,g4 # main subtract + subc 0,0,g6 # save cy in g6 + st g4,(g0) # store result to res_ptr + addo 4,g0,g0 # res_ptr++ + cmpobne 0,g3,Loop # when branch is taken, cy will be 0 + + mov g6,g0 + ret diff --git a/gnu/lib/libgmp/mpn/m68k/add_n.S b/gnu/lib/libgmp/mpn/m68k/add_n.S new file mode 100644 index 00000000000..7ca5b95bccd --- /dev/null +++ b/gnu/lib/libgmp/mpn/m68k/add_n.S @@ -0,0 +1,80 @@ +/* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store + sum in a third limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 16) + size (sp + 12) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_add_n) + +C_SYMBOL_NAME(__mpn_add_n:) +PROLOG(__mpn_add_n) +/* Save used registers on the stack. */ + movel R(d2),MEM_PREDEC(sp) + movel R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,12),R(a2) + movel MEM_DISP(sp,16),R(a0) + movel MEM_DISP(sp,20),R(a1) + movel MEM_DISP(sp,24),R(d2) + + eorw #1,R(d2) + lsrl #1,R(d2) + bcc L(L1) + subql #1,R(d2) /* clears cy as side effect */ + +L(Loop:) + movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + addxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) +L(L1:) movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + addxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) + + dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */ + subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ + subl #0x10000,R(d2) + bcs L(L2) + addl R(d0),R(d0) /* restore cy */ + bra L(Loop) + +L(L2:) + negl R(d0) + +/* Restore used registers from stack frame. */ + movel MEM_POSTINC(sp),R(a2) + movel MEM_POSTINC(sp),R(d2) + + rts +EPILOG(__mpn_add_n) diff --git a/gnu/lib/libgmp/mpn/m68k/lshift.S b/gnu/lib/libgmp/mpn/m68k/lshift.S new file mode 100644 index 00000000000..77184d6ee57 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m68k/lshift.S @@ -0,0 +1,151 @@ +/* mc68020 __mpn_lshift -- Shift left a low-level natural-number integer. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + s_size (sp + 16) + cnt (sp + 12) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr a1 +#define s_ptr a0 +#define s_size d6 +#define cnt d4 + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_lshift) + +C_SYMBOL_NAME(__mpn_lshift:) +PROLOG(__mpn_lshift) + +/* Save used registers on the stack. */ + moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. */ + movel MEM_DISP(sp,28),R(res_ptr) + movel MEM_DISP(sp,32),R(s_ptr) + movel MEM_DISP(sp,36),R(s_size) + movel MEM_DISP(sp,40),R(cnt) + + moveql #1,R(d5) + cmpl R(d5),R(cnt) + bne L(Lnormal) + cmpl R(s_ptr),R(res_ptr) + bls L(Lspecial) /* jump if s_ptr >= res_ptr */ +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(a2) +#else /* not mc68020 */ + movel R(s_size),R(d0) + asll #2,R(d0) + lea MEM_INDX(s_ptr,d0,l),R(a2) +#endif + cmpl R(res_ptr),R(a2) + bls L(Lspecial) /* jump if res_ptr >= s_ptr + s_size */ + +L(Lnormal:) + moveql #32,R(d5) + subl R(cnt),R(d5) + +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr) + lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr) +#else /* not mc68000 */ + movel R(s_size),R(d0) + asll #2,R(d0) + addl R(s_size),R(s_ptr) + addl R(s_size),R(res_ptr) +#endif + movel MEM_PREDEC(s_ptr),R(d2) + movel R(d2),R(d0) + lsrl R(d5),R(d0) /* compute carry limb */ + + lsll R(cnt),R(d2) + movel R(d2),R(d1) + subql #1,R(s_size) + beq L(Lend) + lsrl #1,R(s_size) + bcs L(L1) + subql #1,R(s_size) + +L(Loop:) + movel MEM_PREDEC(s_ptr),R(d2) + movel R(d2),R(d3) + lsrl R(d5),R(d3) + orl R(d3),R(d1) + movel R(d1),MEM_PREDEC(res_ptr) + lsll R(cnt),R(d2) +L(L1:) + movel MEM_PREDEC(s_ptr),R(d1) + movel R(d1),R(d3) + lsrl R(d5),R(d3) + orl R(d3),R(d2) + movel R(d2),MEM_PREDEC(res_ptr) + lsll R(cnt),R(d1) + + dbf R(s_size),L(Loop) + subl #0x10000,R(s_size) + bcc L(Loop) + +L(Lend:) + movel R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */ + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. */ + +L(Lspecial:) + clrl R(d0) /* initialize carry */ + eorw #1,R(s_size) + lsrl #1,R(s_size) + bcc L(LL1) + subql #1,R(s_size) + +L(LLoop:) + movel MEM_POSTINC(s_ptr),R(d2) + addxl R(d2),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) +L(LL1:) + movel MEM_POSTINC(s_ptr),R(d2) + addxl R(d2),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) + + dbf R(s_size),L(LLoop) + addxl R(d0),R(d0) /* save cy in lsb */ + subl #0x10000,R(s_size) + bcs L(LLend) + lsrl #1,R(d0) /* restore cy */ + bra L(LLoop) + +L(LLend:) +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts +EPILOG(__mpn_lshift) diff --git a/gnu/lib/libgmp/mpn/m68k/mc68020/addmul_1.S b/gnu/lib/libgmp/mpn/m68k/mc68020/addmul_1.S new file mode 100644 index 00000000000..4b99c21f844 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m68k/mc68020/addmul_1.S @@ -0,0 +1,84 @@ +/* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + the result to a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s1_size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_addmul_1) + +C_SYMBOL_NAME(__mpn_addmul_1:) +PROLOG(__mpn_addmul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d5),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,20),R(res_ptr) + movel MEM_DISP(sp,24),R(s1_ptr) + movel MEM_DISP(sp,28),R(s1_size) + movel MEM_DISP(sp,32),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + clrl R(d5) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + addxl R(d5),R(d1) + addl R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + addxl R(d5),R(d0) + addl R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + addxl R(d5),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d5) + + rts +EPILOG(__mpn_addmul_1) diff --git a/gnu/lib/libgmp/mpn/m68k/mc68020/mul_1.S b/gnu/lib/libgmp/mpn/m68k/mc68020/mul_1.S new file mode 100644 index 00000000000..ef7d93721f7 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m68k/mc68020/mul_1.S @@ -0,0 +1,91 @@ +/* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store + the result in a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s1_size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_mul_1) + +C_SYMBOL_NAME(__mpn_mul_1:) +PROLOG(__mpn_mul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d4),MEM_PREDEC(sp) +#if 0 + movel R(d2),MEM_PREDEC(sp) + movel R(d3),MEM_PREDEC(sp) + movel R(d4),MEM_PREDEC(sp) +#endif + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,16),R(res_ptr) + movel MEM_DISP(sp,20),R(s1_ptr) + movel MEM_DISP(sp,24),R(s1_size) + movel MEM_DISP(sp,28),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + movel R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + movel R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + clrl R(d3) + addxl R(d3),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d4) +#if 0 + movel MEM_POSTINC(sp),R(d4) + movel MEM_POSTINC(sp),R(d3) + movel MEM_POSTINC(sp),R(d2) +#endif + rts +EPILOG(__mpn_mul_1) diff --git a/gnu/lib/libgmp/mpn/m68k/mc68020/submul_1.S b/gnu/lib/libgmp/mpn/m68k/mc68020/submul_1.S new file mode 100644 index 00000000000..9770c6cd6b4 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m68k/mc68020/submul_1.S @@ -0,0 +1,84 @@ +/* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract + the result from a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s1_size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_submul_1) + +C_SYMBOL_NAME(__mpn_submul_1:) +PROLOG(__mpn_submul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d5),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,20),R(res_ptr) + movel MEM_DISP(sp,24),R(s1_ptr) + movel MEM_DISP(sp,28),R(s1_size) + movel MEM_DISP(sp,32),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + clrl R(d5) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + addxl R(d5),R(d1) + subl R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + addxl R(d5),R(d0) + subl R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + addxl R(d5),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d5) + + rts +EPILOG(__mpn_submul_1) diff --git a/gnu/lib/libgmp/mpn/m68k/rshift.S b/gnu/lib/libgmp/mpn/m68k/rshift.S new file mode 100644 index 00000000000..2ca5c7946c9 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m68k/rshift.S @@ -0,0 +1,150 @@ +/* mc68020 __mpn_rshift -- Shift right a low-level natural-number integer. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + s_size (sp + 16) + cnt (sp + 12) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr a1 +#define s_ptr a0 +#define s_size d6 +#define cnt d4 + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_rshift) + +C_SYMBOL_NAME(__mpn_rshift:) +PROLOG(__mpn_rshift) +/* Save used registers on the stack. */ + moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. */ + movel MEM_DISP(sp,28),R(res_ptr) + movel MEM_DISP(sp,32),R(s_ptr) + movel MEM_DISP(sp,36),R(s_size) + movel MEM_DISP(sp,40),R(cnt) + + moveql #1,R(d5) + cmpl R(d5),R(cnt) + bne L(Lnormal) + cmpl R(res_ptr),R(s_ptr) + bls L(Lspecial) /* jump if res_ptr >= s_ptr */ +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(res_ptr,s_size,l,4),R(a2) +#else /* not mc68020 */ + movel R(s_size),R(d0) + asll #2,R(d0) + lea MEM_INDX(res_ptr,d0,l),R(a2) +#endif + cmpl R(s_ptr),R(a2) + bls L(Lspecial) /* jump if s_ptr >= res_ptr + s_size */ + +L(Lnormal:) + moveql #32,R(d5) + subl R(cnt),R(d5) + movel MEM_POSTINC(s_ptr),R(d2) + movel R(d2),R(d0) + lsll R(d5),R(d0) /* compute carry limb */ + + lsrl R(cnt),R(d2) + movel R(d2),R(d1) + subql #1,R(s_size) + beq L(Lend) + lsrl #1,R(s_size) + bcs L(L1) + subql #1,R(s_size) + +L(Loop:) + movel MEM_POSTINC(s_ptr),R(d2) + movel R(d2),R(d3) + lsll R(d5),R(d3) + orl R(d3),R(d1) + movel R(d1),MEM_POSTINC(res_ptr) + lsrl R(cnt),R(d2) +L(L1:) + movel MEM_POSTINC(s_ptr),R(d1) + movel R(d1),R(d3) + lsll R(d5),R(d3) + orl R(d3),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) + lsrl R(cnt),R(d1) + + dbf R(s_size),L(Loop) + subl #0x10000,R(s_size) + bcc L(Loop) + +L(Lend:) + movel R(d1),MEM(res_ptr) /* store most significant limb */ + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts + +/* We loop from most significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. */ + +L(Lspecial:) +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr) + lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr) +#else /* not mc68000 */ + movel R(s_size),R(d0) + asll #2,R(d0) + addl R(s_size),R(s_ptr) + addl R(s_size),R(res_ptr) +#endif + + clrl R(d0) /* initialize carry */ + eorw #1,R(s_size) + lsrl #1,R(s_size) + bcc L(LL1) + subql #1,R(s_size) + +L(LLoop:) + movel MEM_PREDEC(s_ptr),R(d2) + roxrl #1,R(d2) + movel R(d2),MEM_PREDEC(res_ptr) +L(LL1:) + movel MEM_PREDEC(s_ptr),R(d2) + roxrl #1,R(d2) + movel R(d2),MEM_PREDEC(res_ptr) + + dbf R(s_size),L(LLoop) + roxrl #1,R(d0) /* save cy in msb */ + subl #0x10000,R(s_size) + bcs L(LLend) + addl R(d0),R(d0) /* restore cy */ + bra L(LLoop) + +L(LLend:) +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts +EPILOG(__mpn_rshift) diff --git a/gnu/lib/libgmp/mpn/m68k/sub_n.S b/gnu/lib/libgmp/mpn/m68k/sub_n.S new file mode 100644 index 00000000000..f94b0c72822 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m68k/sub_n.S @@ -0,0 +1,80 @@ +/* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + store difference in a third limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 16) + size (sp + 12) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_sub_n) + +C_SYMBOL_NAME(__mpn_sub_n:) +PROLOG(__mpn_sub_n) +/* Save used registers on the stack. */ + movel R(d2),MEM_PREDEC(sp) + movel R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,12),R(a2) + movel MEM_DISP(sp,16),R(a0) + movel MEM_DISP(sp,20),R(a1) + movel MEM_DISP(sp,24),R(d2) + + eorw #1,R(d2) + lsrl #1,R(d2) + bcc L(L1) + subql #1,R(d2) /* clears cy as side effect */ + +L(Loop:) + movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + subxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) +L(L1:) movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + subxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) + + dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */ + subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ + subl #0x10000,R(d2) + bcs L(L2) + addl R(d0),R(d0) /* restore cy */ + bra L(Loop) + +L(L2:) + negl R(d0) + +/* Restore used registers from stack frame. */ + movel MEM_POSTINC(sp),R(a2) + movel MEM_POSTINC(sp),R(d2) + + rts +EPILOG(__mpn_sub_n) diff --git a/gnu/lib/libgmp/mpn/m68k/syntax.h b/gnu/lib/libgmp/mpn/m68k/syntax.h new file mode 100644 index 00000000000..9d6f3522bff --- /dev/null +++ b/gnu/lib/libgmp/mpn/m68k/syntax.h @@ -0,0 +1,177 @@ +/* asm.h -- Definitions for 68k syntax variations. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#undef ALIGN + +#ifdef MIT_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)base@ +#define MEM_DISP(base,displacement)base@(displacement) +#define MEM_INDX(base,idx,size_suffix)base@(idx:size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)base@(idx:size_suffix:scale) +#define MEM_PREDEC(memory_base)memory_base@- +#define MEM_POSTINC(memory_base)memory_base@+ +#define L(label) label +#define TEXT .text +#define ALIGN .even +#define GLOBL .globl +#define moveql moveq +/* Use variable sized opcodes. */ +#define bcc jcc +#define bcs jcs +#define bls jls +#define beq jeq +#define bne jne +#define bra jra +#endif + +#ifdef SONY_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)(displacement,base) +#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale) +#define MEM_PREDEC(memory_base)-(memory_base) +#define MEM_POSTINC(memory_base)(memory_base)+ +#define L(label) label +#define TEXT .text +#define ALIGN .even +#define GLOBL .globl +#endif + +#ifdef MOTOROLA_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)(displacement,base) +#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale) +#define MEM_PREDEC(memory_base)-(memory_base) +#define MEM_POSTINC(memory_base)(memory_base)+ +#define L(label) label +#define TEXT +#define ALIGN +#define GLOBL XDEF +#define lea LEA +#define movel MOVE.L +#define moveml MOVEM.L +#define moveql MOVEQ.L +#define cmpl CMP.L +#define orl OR.L +#define clrl CLR.L +#define eorw EOR.W +#define lsrl LSR.L +#define lsll LSL.L +#define roxrl ROXR.L +#define roxll ROXL.L +#define addl ADD.L +#define addxl ADDX.L +#define addql ADDQ.L +#define subl SUB.L +#define subxl SUBX.L +#define subql SUBQ.L +#define negl NEG.L +#define mulul MULU.L +#define bcc BCC +#define bcs BCS +#define bls BLS +#define beq BEQ +#define bne BNE +#define bra BRA +#define dbf DBF +#define rts RTS +#define d0 D0 +#define d1 D1 +#define d2 D2 +#define d3 D3 +#define d4 D4 +#define d5 D5 +#define d6 D6 +#define d7 D7 +#define a0 A0 +#define a1 A1 +#define a2 A2 +#define a3 A3 +#define a4 A4 +#define a5 A5 +#define a6 A6 +#define a7 A7 +#define sp SP +#endif + +#ifdef ELF_SYNTAX +#define PROLOG(name) .type name,@function +#define EPILOG(name) .size name,.-name +#define MEM(base)(R(base)) +#define MEM_DISP(base,displacement)(displacement,R(base)) +#define MEM_PREDEC(memory_base)-(R(memory_base)) +#define MEM_POSTINC(memory_base)(R(memory_base))+ +#ifdef __STDC__ +#define R_(r)%##r +#define R(r)R_(r) +#define MEM_INDX_(base,idx,size_suffix)(R(base),R(idx##.##size_suffix)) +#define MEM_INDX(base,idx,size_suffix)MEM_INDX_(base,idx,size_suffix) +#define MEM_INDX1_(base,idx,size_suffix,scale)(R(base),R(idx##.##size_suffix*scale)) +#define MEM_INDX1(base,idx,size_suffix,scale)MEM_INDX1_(base,idx,size_suffix,scale) +#define L(label) .##label +#else +#define R(r)%/**/r +#define MEM_INDX(base,idx,size_suffix)(R(base),R(idx).size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(R(base),R(idx).size_suffix*scale) +#define L(label) ./**/label +#endif +#define TEXT .text +#define ALIGN .align 2 +#define GLOBL .globl +#define bcc jbcc +#define bcs jbcs +#define bls jbls +#define beq jbeq +#define bne jbne +#define bra jbra +#endif + +#if defined (SONY_SYNTAX) || defined (ELF_SYNTAX) +#define movel move.l +#define moveml movem.l +#define moveql moveq.l +#define cmpl cmp.l +#define orl or.l +#define clrl clr.l +#define eorw eor.w +#define lsrl lsr.l +#define lsll lsl.l +#define roxrl roxr.l +#define roxll roxl.l +#define addl add.l +#define addxl addx.l +#define addql addq.l +#define subl sub.l +#define subxl subx.l +#define subql subq.l +#define negl neg.l +#define mulul mulu.l +#endif diff --git a/gnu/lib/libgmp/mpn/m88k/add_n.s b/gnu/lib/libgmp/mpn/m88k/add_n.s new file mode 100644 index 00000000000..1b09ccef8f3 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m88k/add_n.s @@ -0,0 +1,104 @@ +; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; s2_ptr r4 +; size r5 + +; This code has been optimized to run one instruction per clock, avoiding +; load stalls and writeback contention. As a result, the instruction +; order is not always natural. + +; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100, +; but on the 88110, it seems to run much slower, 6.6 clocks/limb. + + text + align 16 + global ___mpn_add_n +___mpn_add_n: + ld r6,r3,0 ; read first limb from s1_ptr + extu r10,r5,3 + ld r7,r4,0 ; read first limb from s2_ptr + + subu.co r5,r0,r5 ; (clear carry as side effect) + mak r5,r5,3<4> + bcnd eq0,r5,Lzero + + or r12,r0,lo16(Lbase) + or.u r12,r12,hi16(Lbase) + addu r12,r12,r5 ; r12 is address for entering in loop + + extu r5,r5,2 ; divide by 4 + subu r2,r2,r5 ; adjust res_ptr + subu r3,r3,r5 ; adjust s1_ptr + subu r4,r4,r5 ; adjust s2_ptr + + or r8,r6,r0 + + jmp.n r12 + or r9,r7,r0 + +Loop: addu r3,r3,32 + st r8,r2,28 + addu r4,r4,32 + ld r6,r3,0 + addu r2,r2,32 + ld r7,r4,0 +Lzero: subu r10,r10,1 ; add 0 + 8r limbs (adj loop cnt) +Lbase: ld r8,r3,4 + addu.cio r6,r6,r7 + ld r9,r4,4 + st r6,r2,0 + ld r6,r3,8 ; add 7 + 8r limbs + addu.cio r8,r8,r9 + ld r7,r4,8 + st r8,r2,4 + ld r8,r3,12 ; add 6 + 8r limbs + addu.cio r6,r6,r7 + ld r9,r4,12 + st r6,r2,8 + ld r6,r3,16 ; add 5 + 8r limbs + addu.cio r8,r8,r9 + ld r7,r4,16 + st r8,r2,12 + ld r8,r3,20 ; add 4 + 8r limbs + addu.cio r6,r6,r7 + ld r9,r4,20 + st r6,r2,16 + ld r6,r3,24 ; add 3 + 8r limbs + addu.cio r8,r8,r9 + ld r7,r4,24 + st r8,r2,20 + ld r8,r3,28 ; add 2 + 8r limbs + addu.cio r6,r6,r7 + ld r9,r4,28 + st r6,r2,24 + bcnd.n ne0,r10,Loop ; add 1 + 8r limbs + addu.cio r8,r8,r9 + + st r8,r2,28 ; store most significant limb + + jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb diff --git a/gnu/lib/libgmp/mpn/m88k/mc88110/add_n.S b/gnu/lib/libgmp/mpn/m88k/mc88110/add_n.S new file mode 100644 index 00000000000..39a44e55795 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m88k/mc88110/add_n.S @@ -0,0 +1,200 @@ +; mc88110 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +#define res_ptr r2 +#define s1_ptr r3 +#define s2_ptr r4 +#define size r5 + +#include "sysdep.h" + + text + align 16 + global C_SYMBOL_NAME(__mpn_add_n) +C_SYMBOL_NAME(__mpn_add_n): + addu.co r0,r0,r0 ; clear cy flag + xor r12,s2_ptr,res_ptr + bb1 2,r12,L1 +; ** V1a ** +L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned? +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + addu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s1_ptr,0 + ld r12,s1_ptr,4 + ld.d r8,s2_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1: subu size,size,8 + addu.cio r6,r10,r8 + ld r10,s1_ptr,8 + addu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + addu.cio r6,r10,r8 + ld r10,s1_ptr,16 + addu.cio r7,r12,r9 + ld r12,s1_ptr,20 + ld.d r8,s2_ptr,16 + st.d r6,res_ptr,8 + addu.cio r6,r10,r8 + ld r10,s1_ptr,24 + addu.cio r7,r12,r9 + ld r12,s1_ptr,28 + ld.d r8,s2_ptr,24 + st.d r6,res_ptr,16 + addu.cio r6,r10,r8 + ld r10,s1_ptr,32 + addu.cio r7,r12,r9 + ld r12,s1_ptr,36 + addu s1_ptr,s1_ptr,32 + ld.d r8,s2_ptr,32 + addu s2_ptr,s2_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1 + +Lfin1: addu size,size,8-2 + bcnd lt0,size,Lend1 +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: addu.cio r6,r10,r8 + ld r10,s1_ptr,8 + addu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1 +Lend1: addu.cio r6,r10,r8 + addu.cio r7,r12,r9 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1 +/* Add last limb */ + ld r10,s1_ptr,8 + ld r8,s2_ptr,8 + addu.cio r6,r10,r8 + st r6,res_ptr,8 + +Lret1: jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb + +L1: xor r12,s1_ptr,res_ptr + bb1 2,r12,L2 +; ** V1b ** + or r12,r0,s2_ptr + or s2_ptr,r0,s1_ptr + or s1_ptr,r0,r12 + br L0 + +; ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp r12,size,1 + bb1 eq,r12,Ljone + bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + addu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 + +L_v2: subu size,size,8 + bcnd lt0,size,Lfin2 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop2: subu size,size,8 + ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + addu.cio r8,r8,r6 + st r8,res_ptr,0 + addu.cio r9,r9,r7 + st r9,res_ptr,4 + ld.d r8,s1_ptr,8 + ld.d r6,s2_ptr,8 + addu.cio r8,r8,r6 + st r8,res_ptr,8 + addu.cio r9,r9,r7 + st r9,res_ptr,12 + ld.d r8,s1_ptr,16 + ld.d r6,s2_ptr,16 + addu.cio r8,r8,r6 + st r8,res_ptr,16 + addu.cio r9,r9,r7 + st r9,res_ptr,20 + ld.d r8,s1_ptr,24 + ld.d r6,s2_ptr,24 + addu.cio r8,r8,r6 + st r8,res_ptr,24 + addu.cio r9,r9,r7 + st r9,res_ptr,28 + addu s1_ptr,s1_ptr,32 + addu s2_ptr,s2_ptr,32 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop2 + +Lfin2: addu size,size,8-2 + bcnd lt0,size,Lend2 +Loope2: ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + addu.cio r8,r8,r6 + st r8,res_ptr,0 + addu.cio r9,r9,r7 + st r9,res_ptr,4 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope2 +Lend2: bb0 0,size,Lret2 +/* Add last limb */ +Ljone: ld r10,s1_ptr,0 + ld r8,s2_ptr,0 + addu.cio r6,r10,r8 + st r6,res_ptr,0 + +Lret2: jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb diff --git a/gnu/lib/libgmp/mpn/m88k/mc88110/addmul_1.s b/gnu/lib/libgmp/mpn/m88k/mc88110/addmul_1.s new file mode 100644 index 00000000000..2bd6f21aff8 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m88k/mc88110/addmul_1.s @@ -0,0 +1,61 @@ +; mc88110 __mpn_addmul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; size r4 +; s2_limb r5 + + text + align 16 + global ___mpn_addmul_1 +___mpn_addmul_1: + lda r3,r3[r4] + lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval + subu r4,r0,r4 + addu.co r2,r0,r0 ; r2 = cy = 0 + + ld r6,r3[r4] + addu r4,r4,1 + subu r8,r8,4 + bcnd.n eq0,r4,Lend + mulu.d r10,r6,r5 + +Loop: ld r7,r8[r4] + ld r6,r3[r4] + addu.cio r9,r11,r2 + addu.ci r2,r10,r0 + addu.co r9,r9,r7 + st r9,r8[r4] + addu r4,r4,1 + mulu.d r10,r6,r5 + bcnd ne0,r4,Loop + +Lend: ld r7,r8,0 + addu.cio r9,r11,r2 + addu.ci r2,r10,r0 + addu.co r9,r9,r7 + st r9,r8,0 + jmp.n r1 + addu.ci r2,r2,r0 diff --git a/gnu/lib/libgmp/mpn/m88k/mc88110/mul_1.s b/gnu/lib/libgmp/mpn/m88k/mc88110/mul_1.s new file mode 100644 index 00000000000..151890060d4 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m88k/mc88110/mul_1.s @@ -0,0 +1,59 @@ +; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; size r4 +; s2_limb r5 + + text + align 16 + global ___mpn_mul_1 +___mpn_mul_1: + ; Make S1_PTR and RES_PTR point at the end of their blocks + ; and negate SIZE. + lda r3,r3[r4] + lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval + subu r4,r0,r4 + + addu.co r2,r0,r0 ; r2 = cy = 0 + + ld r6,r3[r4] + addu r4,r4,1 + mulu.d r10,r6,r5 + bcnd.n eq0,r4,Lend + subu r8,r8,8 + +Loop: ld r6,r3[r4] + addu.cio r9,r11,r2 + or r2,r10,r0 ; could be avoided if unrolled + addu r4,r4,1 + mulu.d r10,r6,r5 + bcnd.n ne0,r4,Loop + st r9,r8[r4] + +Lend: addu.cio r9,r11,r2 + st r9,r8,4 + jmp.n r1 + addu.ci r2,r10,r0 diff --git a/gnu/lib/libgmp/mpn/m88k/mc88110/sub_n.S b/gnu/lib/libgmp/mpn/m88k/mc88110/sub_n.S new file mode 100644 index 00000000000..685f024fd47 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m88k/mc88110/sub_n.S @@ -0,0 +1,276 @@ +; mc88110 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +#define res_ptr r2 +#define s1_ptr r3 +#define s2_ptr r4 +#define size r5 + +#include "sysdep.h" + + text + align 16 + global C_SYMBOL_NAME(__mpn_sub_n) +C_SYMBOL_NAME(__mpn_sub_n): + subu.co r0,r0,r0 ; set cy flag + xor r12,s2_ptr,res_ptr + bb1 2,r12,L1 +; ** V1a ** +L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + subu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s1_ptr,0 + ld r12,s1_ptr,4 + ld.d r8,s2_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1: subu size,size,8 + subu.cio r6,r10,r8 + ld r10,s1_ptr,8 + subu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu.cio r6,r10,r8 + ld r10,s1_ptr,16 + subu.cio r7,r12,r9 + ld r12,s1_ptr,20 + ld.d r8,s2_ptr,16 + st.d r6,res_ptr,8 + subu.cio r6,r10,r8 + ld r10,s1_ptr,24 + subu.cio r7,r12,r9 + ld r12,s1_ptr,28 + ld.d r8,s2_ptr,24 + st.d r6,res_ptr,16 + subu.cio r6,r10,r8 + ld r10,s1_ptr,32 + subu.cio r7,r12,r9 + ld r12,s1_ptr,36 + addu s1_ptr,s1_ptr,32 + ld.d r8,s2_ptr,32 + addu s2_ptr,s2_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1 + +Lfin1: addu size,size,8-2 + bcnd lt0,size,Lend1 +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: subu.cio r6,r10,r8 + ld r10,s1_ptr,8 + subu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1 +Lend1: subu.cio r6,r10,r8 + subu.cio r7,r12,r9 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1 +/* Add last limb */ + ld r10,s1_ptr,8 + ld r8,s2_ptr,8 + subu.cio r6,r10,r8 + st r6,res_ptr,8 + +Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 + +L1: xor r12,s1_ptr,res_ptr + bb1 2,r12,L2 +; ** V1b ** + bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned +/* Add least significant limb separately to align res_ptr and s1_ptr */ + ld r10,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + ld r8,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + subu size,size,1 + subu.co r6,r8,r10 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1b: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s2_ptr,0 + ld r12,s2_ptr,4 + ld.d r8,s1_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1b +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1b: subu size,size,8 + subu.cio r6,r8,r10 + ld r10,s2_ptr,8 + subu.cio r7,r9,r12 + ld r12,s2_ptr,12 + ld.d r8,s1_ptr,8 + st.d r6,res_ptr,0 + subu.cio r6,r8,r10 + ld r10,s2_ptr,16 + subu.cio r7,r9,r12 + ld r12,s2_ptr,20 + ld.d r8,s1_ptr,16 + st.d r6,res_ptr,8 + subu.cio r6,r8,r10 + ld r10,s2_ptr,24 + subu.cio r7,r9,r12 + ld r12,s2_ptr,28 + ld.d r8,s1_ptr,24 + st.d r6,res_ptr,16 + subu.cio r6,r8,r10 + ld r10,s2_ptr,32 + subu.cio r7,r9,r12 + ld r12,s2_ptr,36 + addu s2_ptr,s2_ptr,32 + ld.d r8,s1_ptr,32 + addu s1_ptr,s1_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1b + +Lfin1b: addu size,size,8-2 + bcnd lt0,size,Lend1b +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1b:subu.cio r6,r8,r10 + ld r10,s2_ptr,8 + subu.cio r7,r9,r12 + ld r12,s2_ptr,12 + ld.d r8,s1_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1b +Lend1b: subu.cio r6,r8,r10 + subu.cio r7,r9,r12 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1b +/* Add last limb */ + ld r10,s2_ptr,8 + ld r8,s1_ptr,8 + subu.cio r6,r8,r10 + st r6,res_ptr,8 + +Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 + +; ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp r12,size,1 + bb1 eq,r12,Ljone + bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + subu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 + +L_v2: subu size,size,8 + bcnd lt0,size,Lfin2 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop2: subu size,size,8 + ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + subu.cio r8,r8,r6 + st r8,res_ptr,0 + subu.cio r9,r9,r7 + st r9,res_ptr,4 + ld.d r8,s1_ptr,8 + ld.d r6,s2_ptr,8 + subu.cio r8,r8,r6 + st r8,res_ptr,8 + subu.cio r9,r9,r7 + st r9,res_ptr,12 + ld.d r8,s1_ptr,16 + ld.d r6,s2_ptr,16 + subu.cio r8,r8,r6 + st r8,res_ptr,16 + subu.cio r9,r9,r7 + st r9,res_ptr,20 + ld.d r8,s1_ptr,24 + ld.d r6,s2_ptr,24 + subu.cio r8,r8,r6 + st r8,res_ptr,24 + subu.cio r9,r9,r7 + st r9,res_ptr,28 + addu s1_ptr,s1_ptr,32 + addu s2_ptr,s2_ptr,32 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop2 + +Lfin2: addu size,size,8-2 + bcnd lt0,size,Lend2 +Loope2: ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + subu.cio r8,r8,r6 + st r8,res_ptr,0 + subu.cio r9,r9,r7 + st r9,res_ptr,4 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope2 +Lend2: bb0 0,size,Lret2 +/* Add last limb */ +Ljone: ld r10,s1_ptr,0 + ld r8,s2_ptr,0 + subu.cio r6,r10,r8 + st r6,res_ptr,0 + +Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 diff --git a/gnu/lib/libgmp/mpn/m88k/mul_1.s b/gnu/lib/libgmp/mpn/m88k/mul_1.s new file mode 100644 index 00000000000..26626bf9591 --- /dev/null +++ b/gnu/lib/libgmp/mpn/m88k/mul_1.s @@ -0,0 +1,127 @@ +; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; size r4 +; s2_limb r5 + +; Common overhead is about 11 cycles/invocation. + +; The speed for S2_LIMB >= 0x10000 is approximately 21 cycles/limb. (The +; pipeline stalls 2 cycles due to WB contention.) + +; The speed for S2_LIMB < 0x10000 is approximately 16 cycles/limb. (The +; pipeline stalls 2 cycles due to WB contention and 1 cycle due to latency.) + +; To enhance speed: +; 1. Unroll main loop 4-8 times. +; 2. Schedule code to avoid WB contention. It might be tempting to move the +; ld instruction in the loops down to save 2 cycles (less WB contention), +; but that looses because the ultimate value will be read from outside +; the allocated space. But if we handle the ultimate multiplication in +; the tail, we can do this. +; 3. Make the multiplication with less instructions. I think the code for +; (S2_LIMB >= 0x10000) is not minimal. +; With these techniques the (S2_LIMB >= 0x10000) case would run in 17 or +; less cycles/limb; the (S2_LIMB < 0x10000) case would run in 11 +; cycles/limb. (Assuming infinite unrolling.) + + text + align 16 + global ___mpn_mul_1 +___mpn_mul_1: + + ; Make S1_PTR and RES_PTR point at the end of their blocks + ; and negate SIZE. + lda r3,r3[r4] + lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval + subu r4,r0,r4 + + addu.co r2,r0,r0 ; r2 = cy = 0 + ld r9,r3[r4] + mask r7,r5,0xffff ; r7 = lo(S2_LIMB) + extu r8,r5,16 ; r8 = hi(S2_LIMB) + bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0) + subu r6,r6,4 + +; General code for any value of S2_LIMB. + + ; Make a stack frame and save r25 and r26 + subu r31,r31,16 + st.d r25,r31,8 + + ; Enter the loop in the middle + br.n L1 + addu r4,r4,1 + +Loop: ld r9,r3[r4] + st r26,r6[r4] +; bcnd ne0,r0,0 ; bubble + addu r4,r4,1 +L1: mul r26,r9,r5 ; low word of product mul_1 WB ld + mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1 + mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1 + mul r10,r12,r8 ; r10 = prod_1a mul_3 + extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1 + mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1 + mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2 + extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3 + addu r10,r10,r11 ; addu_1 WB extu_2 +; bcnd ne0,r0,0 ; bubble WB addu_1 + addu.co r10,r10,r12 ; WB mul_4 + mask.u r10,r10,0xffff ; move the 16 most significant bits... + addu.ci r10,r10,r0 ; ...to the low half of the word... + rot r10,r10,16 ; ...and put carry in pos 16. + addu.co r26,r26,r2 ; add old carry limb + bcnd.n ne0,r4,Loop + addu.ci r2,r25,r10 ; compute new carry limb + + st r26,r6[r4] + ld.d r25,r31,8 + jmp.n r1 + addu r31,r31,16 + +; Fast code for S2_LIMB < 0x10000 +Lsmall: + ; Enter the loop in the middle + br.n SL1 + addu r4,r4,1 + +SLoop: ld r9,r3[r4] ; + st r8,r6[r4] ; + addu r4,r4,1 ; +SL1: mul r8,r9,r5 ; low word of product + mask r12,r9,0xffff ; r12 = lo(s1_limb) + extu r13,r9,16 ; r13 = hi(s1_limb) + mul r11,r12,r7 ; r11 = prod_0 + mul r12,r13,r7 ; r12 = prod_1b + addu.cio r8,r8,r2 ; add old carry limb + extu r10,r11,16 ; r11 = hi(prod_0) + addu r10,r10,r12 ; + bcnd.n ne0,r4,SLoop + extu r2,r10,16 ; r2 = new carry limb + + jmp.n r1 + st r8,r6[r4] diff --git a/gnu/lib/libgmp/mpn/m88k/sub_n.s b/gnu/lib/libgmp/mpn/m88k/sub_n.s new file mode 100644 index 00000000000..7dfffc980bb --- /dev/null +++ b/gnu/lib/libgmp/mpn/m88k/sub_n.s @@ -0,0 +1,106 @@ +; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; s2_ptr r4 +; size r5 + +; This code has been optimized to run one instruction per clock, avoiding +; load stalls and writeback contention. As a result, the instruction +; order is not always natural. + +; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100, +; but on the 88110, it seems to run much slower, 6.6 clocks/limb. + + text + align 16 + global ___mpn_sub_n +___mpn_sub_n: + ld r6,r3,0 ; read first limb from s1_ptr + extu r10,r5,3 + ld r7,r4,0 ; read first limb from s2_ptr + + subu r5,r0,r5 + mak r5,r5,3<4> + bcnd.n eq0,r5,Lzero + subu.co r0,r0,r0 ; initialize carry + + or r12,r0,lo16(Lbase) + or.u r12,r12,hi16(Lbase) + addu r12,r12,r5 ; r12 is address for entering in loop + + extu r5,r5,2 ; divide by 4 + subu r2,r2,r5 ; adjust res_ptr + subu r3,r3,r5 ; adjust s1_ptr + subu r4,r4,r5 ; adjust s2_ptr + + or r8,r6,r0 + + jmp.n r12 + or r9,r7,r0 + +Loop: addu r3,r3,32 + st r8,r2,28 + addu r4,r4,32 + ld r6,r3,0 + addu r2,r2,32 + ld r7,r4,0 +Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt) +Lbase: ld r8,r3,4 + subu.cio r6,r6,r7 + ld r9,r4,4 + st r6,r2,0 + ld r6,r3,8 ; subtract 7 + 8r limbs + subu.cio r8,r8,r9 + ld r7,r4,8 + st r8,r2,4 + ld r8,r3,12 ; subtract 6 + 8r limbs + subu.cio r6,r6,r7 + ld r9,r4,12 + st r6,r2,8 + ld r6,r3,16 ; subtract 5 + 8r limbs + subu.cio r8,r8,r9 + ld r7,r4,16 + st r8,r2,12 + ld r8,r3,20 ; subtract 4 + 8r limbs + subu.cio r6,r6,r7 + ld r9,r4,20 + st r6,r2,16 + ld r6,r3,24 ; subtract 3 + 8r limbs + subu.cio r8,r8,r9 + ld r7,r4,24 + st r8,r2,20 + ld r8,r3,28 ; subtract 2 + 8r limbs + subu.cio r6,r6,r7 + ld r9,r4,28 + st r6,r2,24 + bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs + subu.cio r8,r8,r9 + + st r8,r2,28 ; store most significant limb + + addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 diff --git a/gnu/lib/libgmp/mpn/mips2/add_n.s b/gnu/lib/libgmp/mpn/mips2/add_n.s new file mode 100644 index 00000000000..f5525cec46b --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips2/add_n.s @@ -0,0 +1,120 @@ + # MIPS2 __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .set noreorder + .set nomacro + + lw $10,0($5) + lw $11,0($6) + + addiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + subu $7,$7,$9 + +.Loop0: addiu $9,$9,-1 + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,0($4) + or $2,$2,$8 + + addiu $5,$5,4 + addiu $6,$6,4 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + addiu $4,$4,4 + +.L0: beq $7,$0,.Lend + nop + +.Loop: addiu $7,$7,-4 + + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,0($4) + or $2,$2,$8 + + lw $10,8($5) + addu $13,$13,$2 + lw $11,8($6) + sltu $8,$13,$2 + addu $13,$12,$13 + sltu $2,$13,$12 + sw $13,4($4) + or $2,$2,$8 + + lw $12,12($5) + addu $11,$11,$2 + lw $13,12($6) + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,8($4) + or $2,$2,$8 + + lw $10,16($5) + addu $13,$13,$2 + lw $11,16($6) + sltu $8,$13,$2 + addu $13,$12,$13 + sltu $2,$13,$12 + sw $13,12($4) + or $2,$2,$8 + + addiu $5,$5,16 + addiu $6,$6,16 + + bne $7,$0,.Loop + addiu $4,$4,16 + +.Lend: addu $11,$11,$2 + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,0($4) + j $31 + or $2,$2,$8 + + .end __mpn_add_n diff --git a/gnu/lib/libgmp/mpn/mips2/addmul_1.s b/gnu/lib/libgmp/mpn/mips2/addmul_1.s new file mode 100644 index 00000000000..6145771e396 --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips2/addmul_1.s @@ -0,0 +1,97 @@ + # MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and + # add the product to a second limb vector. + + # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_addmul_1 + .ent __mpn_addmul_1 +__mpn_addmul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + lw $8,0($5) + + # warm up phase 1 + addiu $5,$5,4 + multu $8,$7 + + addiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + addiu $6,$6,-1 + beq $6,$0,$LC1 + lw $8,0($5) # load new s1 limb as early as possible + +Loop: lw $10,0($4) + mflo $3 + mfhi $9 + addiu $5,$5,4 + addu $3,$3,$2 # add old carry limb to low product limb + multu $8,$7 + lw $8,0($5) # load new s1 limb as early as possible + addiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + addu $3,$10,$3 + sltu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + bne $6,$0,Loop + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + multu $8,$7 + addu $3,$10,$3 + sltu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + addu $3,$10,$3 + sltu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + j $31 + addu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_addmul_1 diff --git a/gnu/lib/libgmp/mpn/mips2/lshift.s b/gnu/lib/libgmp/mpn/mips2/lshift.s new file mode 100644 index 00000000000..ee92d7916fd --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips2/lshift.s @@ -0,0 +1,95 @@ + # MIPS2 __mpn_lshift -- + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __mpn_lshift + .ent __mpn_lshift +__mpn_lshift: + .set noreorder + .set nomacro + + sll $2,$6,2 + addu $5,$5,$2 # make r5 point at end of src + lw $10,-4($5) # load first limb + subu $13,$0,$7 + addu $4,$4,$2 # make r4 point at end of res + addiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + srl $2,$10,$13 # compute function result + + subu $6,$6,$9 + +.Loop0: lw $3,-8($5) + addiu $4,$4,-4 + addiu $5,$5,-4 + addiu $9,$9,-1 + sll $11,$10,$7 + srl $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sw $8,0($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: lw $3,-8($5) + addiu $4,$4,-16 + addiu $6,$6,-4 + sll $11,$10,$7 + srl $12,$3,$13 + + lw $10,-12($5) + sll $14,$3,$7 + or $8,$11,$12 + sw $8,12($4) + srl $9,$10,$13 + + lw $3,-16($5) + sll $11,$10,$7 + or $8,$14,$9 + sw $8,8($4) + srl $12,$3,$13 + + lw $10,-20($5) + sll $14,$3,$7 + or $8,$11,$12 + sw $8,4($4) + srl $9,$10,$13 + + addiu $5,$5,-16 + or $8,$14,$9 + bgtz $6,.Loop + sw $8,0($4) + +.Lend: sll $8,$10,$7 + j $31 + sw $8,-4($4) + .end __mpn_lshift diff --git a/gnu/lib/libgmp/mpn/mips2/mul_1.s b/gnu/lib/libgmp/mpn/mips2/mul_1.s new file mode 100644 index 00000000000..d006fa12219 --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips2/mul_1.s @@ -0,0 +1,85 @@ + # MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and + # store the product in a second limb vector. + + # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_mul_1 + .ent __mpn_mul_1 +__mpn_mul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + lw $8,0($5) + + # warm up phase 1 + addiu $5,$5,4 + multu $8,$7 + + addiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + addiu $6,$6,-1 + beq $6,$0,$LC1 + lw $8,0($5) # load new s1 limb as early as possible + +Loop: mflo $10 + mfhi $9 + addiu $5,$5,4 + addu $10,$10,$2 # add old carry limb to low product limb + multu $8,$7 + lw $8,0($5) # load new s1 limb as early as possible + addiu $6,$6,-1 # decrement loop counter + sltu $2,$10,$2 # carry from previous addition -> $2 + sw $10,0($4) + addiu $4,$4,4 + bne $6,$0,Loop + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: mflo $10 + mfhi $9 + addu $10,$10,$2 + sltu $2,$10,$2 + multu $8,$7 + sw $10,0($4) + addiu $4,$4,4 + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: mflo $10 + mfhi $9 + addu $10,$10,$2 + sltu $2,$10,$2 + sw $10,0($4) + j $31 + addu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_mul_1 diff --git a/gnu/lib/libgmp/mpn/mips2/rshift.s b/gnu/lib/libgmp/mpn/mips2/rshift.s new file mode 100644 index 00000000000..a8beb405771 --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips2/rshift.s @@ -0,0 +1,92 @@ + # MIPS2 __mpn_rshift -- + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __mpn_rshift + .ent __mpn_rshift +__mpn_rshift: + .set noreorder + .set nomacro + + lw $10,0($5) # load first limb + subu $13,$0,$7 + addiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + sll $2,$10,$13 # compute function result + + subu $6,$6,$9 + +.Loop0: lw $3,4($5) + addiu $4,$4,4 + addiu $5,$5,4 + addiu $9,$9,-1 + srl $11,$10,$7 + sll $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sw $8,-4($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: lw $3,4($5) + addiu $4,$4,16 + addiu $6,$6,-4 + srl $11,$10,$7 + sll $12,$3,$13 + + lw $10,8($5) + srl $14,$3,$7 + or $8,$11,$12 + sw $8,-16($4) + sll $9,$10,$13 + + lw $3,12($5) + srl $11,$10,$7 + or $8,$14,$9 + sw $8,-12($4) + sll $12,$3,$13 + + lw $10,16($5) + srl $14,$3,$7 + or $8,$11,$12 + sw $8,-8($4) + sll $9,$10,$13 + + addiu $5,$5,16 + or $8,$14,$9 + bgtz $6,.Loop + sw $8,-4($4) + +.Lend: srl $8,$10,$7 + j $31 + sw $8,0($4) + .end __mpn_rshift diff --git a/gnu/lib/libgmp/mpn/mips2/sub_n.s b/gnu/lib/libgmp/mpn/mips2/sub_n.s new file mode 100644 index 00000000000..3368ef29d66 --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips2/sub_n.s @@ -0,0 +1,120 @@ + # MIPS2 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .set noreorder + .set nomacro + + lw $10,0($5) + lw $11,0($6) + + addiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + subu $7,$7,$9 + +.Loop0: addiu $9,$9,-1 + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,0($4) + or $2,$2,$8 + + addiu $5,$5,4 + addiu $6,$6,4 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + addiu $4,$4,4 + +.L0: beq $7,$0,.Lend + nop + +.Loop: addiu $7,$7,-4 + + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,0($4) + or $2,$2,$8 + + lw $10,8($5) + addu $13,$13,$2 + lw $11,8($6) + sltu $8,$13,$2 + subu $13,$12,$13 + sltu $2,$12,$13 + sw $13,4($4) + or $2,$2,$8 + + lw $12,12($5) + addu $11,$11,$2 + lw $13,12($6) + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,8($4) + or $2,$2,$8 + + lw $10,16($5) + addu $13,$13,$2 + lw $11,16($6) + sltu $8,$13,$2 + subu $13,$12,$13 + sltu $2,$12,$13 + sw $13,12($4) + or $2,$2,$8 + + addiu $5,$5,16 + addiu $6,$6,16 + + bne $7,$0,.Loop + addiu $4,$4,16 + +.Lend: addu $11,$11,$2 + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,0($4) + j $31 + or $2,$2,$8 + + .end __mpn_sub_n diff --git a/gnu/lib/libgmp/mpn/mips2/submul_1.s b/gnu/lib/libgmp/mpn/mips2/submul_1.s new file mode 100644 index 00000000000..1324b660904 --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips2/submul_1.s @@ -0,0 +1,97 @@ + # MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and + # subtract the product from a second limb vector. + + # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_submul_1 + .ent __mpn_submul_1 +__mpn_submul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + lw $8,0($5) + + # warm up phase 1 + addiu $5,$5,4 + multu $8,$7 + + addiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + addiu $6,$6,-1 + beq $6,$0,$LC1 + lw $8,0($5) # load new s1 limb as early as possible + +Loop: lw $10,0($4) + mflo $3 + mfhi $9 + addiu $5,$5,4 + addu $3,$3,$2 # add old carry limb to low product limb + multu $8,$7 + lw $8,0($5) # load new s1 limb as early as possible + addiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + subu $3,$10,$3 + sgtu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + bne $6,$0,Loop + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + multu $8,$7 + subu $3,$10,$3 + sgtu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + subu $3,$10,$3 + sgtu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + j $31 + addu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_submul_1 diff --git a/gnu/lib/libgmp/mpn/mips3/README b/gnu/lib/libgmp/mpn/mips3/README new file mode 100644 index 00000000000..e94b2c74607 --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips3/README @@ -0,0 +1,23 @@ +This directory contains mpn functions optimized for MIPS3. Example of +processors that implement MIPS3 are R4000, R4400, R4600, R4700, and R8000. + +RELEVANT OPTIMIZATION ISSUES + +1. On the R4000 and R4400, branches, both the plain and the "likely" ones, + take 3 cycles to execute. (The fastest possible loop will take 4 cycles, + because of the delay insn.) + + On the R4600, branches takes a single cycle + + On the R8000, branches often take no noticable cycles, as they are + executed in a separate function unit.. + +2. The R4000 and R4400 have a load latency of 4 cycles. + +3. On the R4000 and R4400, multiplies take a data-dependent number of + cycles, contrary to the SGI documentation. There seem to be 3 or 4 + possible latencies. + +STATUS + +Good... diff --git a/gnu/lib/libgmp/mpn/mips3/add_n.s b/gnu/lib/libgmp/mpn/mips3/add_n.s new file mode 100644 index 00000000000..996a449ebdb --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips3/add_n.s @@ -0,0 +1,120 @@ + # MIPS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .set noreorder + .set nomacro + + ld $10,0($5) + ld $11,0($6) + + daddiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + dsubu $7,$7,$9 + +.Loop0: daddiu $9,$9,-1 + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + or $2,$2,$8 + + daddiu $5,$5,8 + daddiu $6,$6,8 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + daddiu $4,$4,8 + +.L0: beq $7,$0,.Lend + nop + +.Loop: daddiu $7,$7,-4 + + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + or $2,$2,$8 + + ld $10,16($5) + daddu $13,$13,$2 + ld $11,16($6) + sltu $8,$13,$2 + daddu $13,$12,$13 + sltu $2,$13,$12 + sd $13,8($4) + or $2,$2,$8 + + ld $12,24($5) + daddu $11,$11,$2 + ld $13,24($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,16($4) + or $2,$2,$8 + + ld $10,32($5) + daddu $13,$13,$2 + ld $11,32($6) + sltu $8,$13,$2 + daddu $13,$12,$13 + sltu $2,$13,$12 + sd $13,24($4) + or $2,$2,$8 + + daddiu $5,$5,32 + daddiu $6,$6,32 + + bne $7,$0,.Loop + daddiu $4,$4,32 + +.Lend: daddu $11,$11,$2 + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + j $31 + or $2,$2,$8 + + .end __mpn_add_n diff --git a/gnu/lib/libgmp/mpn/mips3/addmul_1.s b/gnu/lib/libgmp/mpn/mips3/addmul_1.s new file mode 100644 index 00000000000..cd75c180133 --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips3/addmul_1.s @@ -0,0 +1,97 @@ + # MIPS3 __mpn_addmul_1 -- Multiply a limb vector with a single limb and + # add the product to a second limb vector. + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_addmul_1 + .ent __mpn_addmul_1 +__mpn_addmul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + ld $8,0($5) + + # warm up phase 1 + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: ld $10,0($4) + mflo $3 + mfhi $9 + daddiu $5,$5,8 + daddu $3,$3,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dmultu $8,$7 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_addmul_1 diff --git a/gnu/lib/libgmp/mpn/mips3/gmp-mparam.h b/gnu/lib/libgmp/mpn/mips3/gmp-mparam.h new file mode 100644 index 00000000000..f3df7ff6e0e --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips3/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/gnu/lib/libgmp/mpn/mips3/lshift.s b/gnu/lib/libgmp/mpn/mips3/lshift.s new file mode 100644 index 00000000000..324a6020c8f --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips3/lshift.s @@ -0,0 +1,95 @@ + # MIPS3 __mpn_lshift -- + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __mpn_lshift + .ent __mpn_lshift +__mpn_lshift: + .set noreorder + .set nomacro + + dsll $2,$6,3 + daddu $5,$5,$2 # make r5 point at end of src + ld $10,-8($5) # load first limb + dsubu $13,$0,$7 + daddu $4,$4,$2 # make r4 point at end of res + daddiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + dsrl $2,$10,$13 # compute function result + + dsubu $6,$6,$9 + +.Loop0: ld $3,-16($5) + daddiu $4,$4,-8 + daddiu $5,$5,-8 + daddiu $9,$9,-1 + dsll $11,$10,$7 + dsrl $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sd $8,0($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: ld $3,-16($5) + daddiu $4,$4,-32 + daddiu $6,$6,-4 + dsll $11,$10,$7 + dsrl $12,$3,$13 + + ld $10,-24($5) + dsll $14,$3,$7 + or $8,$11,$12 + sd $8,24($4) + dsrl $9,$10,$13 + + ld $3,-32($5) + dsll $11,$10,$7 + or $8,$14,$9 + sd $8,16($4) + dsrl $12,$3,$13 + + ld $10,-40($5) + dsll $14,$3,$7 + or $8,$11,$12 + sd $8,8($4) + dsrl $9,$10,$13 + + daddiu $5,$5,-32 + or $8,$14,$9 + bgtz $6,.Loop + sd $8,0($4) + +.Lend: dsll $8,$10,$7 + j $31 + sd $8,-8($4) + .end __mpn_lshift diff --git a/gnu/lib/libgmp/mpn/mips3/mul_1.s b/gnu/lib/libgmp/mpn/mips3/mul_1.s new file mode 100644 index 00000000000..281d0574aa8 --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips3/mul_1.s @@ -0,0 +1,85 @@ + # MIPS3 __mpn_mul_1 -- Multiply a limb vector with a single limb and + # store the product in a second limb vector. + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_mul_1 + .ent __mpn_mul_1 +__mpn_mul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + ld $8,0($5) + + # warm up phase 1 + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: mflo $10 + mfhi $9 + daddiu $5,$5,8 + daddu $10,$10,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$10,$2 # carry from previous addition -> $2 + sd $10,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: mflo $10 + mfhi $9 + daddu $10,$10,$2 + sltu $2,$10,$2 + dmultu $8,$7 + sd $10,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: mflo $10 + mfhi $9 + daddu $10,$10,$2 + sltu $2,$10,$2 + sd $10,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_mul_1 diff --git a/gnu/lib/libgmp/mpn/mips3/rshift.s b/gnu/lib/libgmp/mpn/mips3/rshift.s new file mode 100644 index 00000000000..9920e1a9e3a --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips3/rshift.s @@ -0,0 +1,92 @@ + # MIPS3 __mpn_rshift -- + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __mpn_rshift + .ent __mpn_rshift +__mpn_rshift: + .set noreorder + .set nomacro + + ld $10,0($5) # load first limb + dsubu $13,$0,$7 + daddiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + dsll $2,$10,$13 # compute function result + + dsubu $6,$6,$9 + +.Loop0: ld $3,8($5) + daddiu $4,$4,8 + daddiu $5,$5,8 + daddiu $9,$9,-1 + dsrl $11,$10,$7 + dsll $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sd $8,-8($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: ld $3,8($5) + daddiu $4,$4,32 + daddiu $6,$6,-4 + dsrl $11,$10,$7 + dsll $12,$3,$13 + + ld $10,16($5) + dsrl $14,$3,$7 + or $8,$11,$12 + sd $8,-32($4) + dsll $9,$10,$13 + + ld $3,24($5) + dsrl $11,$10,$7 + or $8,$14,$9 + sd $8,-24($4) + dsll $12,$3,$13 + + ld $10,32($5) + dsrl $14,$3,$7 + or $8,$11,$12 + sd $8,-16($4) + dsll $9,$10,$13 + + daddiu $5,$5,32 + or $8,$14,$9 + bgtz $6,.Loop + sd $8,-8($4) + +.Lend: dsrl $8,$10,$7 + j $31 + sd $8,0($4) + .end __mpn_rshift diff --git a/gnu/lib/libgmp/mpn/mips3/sub_n.s b/gnu/lib/libgmp/mpn/mips3/sub_n.s new file mode 100644 index 00000000000..56c77d8bc4e --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips3/sub_n.s @@ -0,0 +1,120 @@ + # MIPS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .set noreorder + .set nomacro + + ld $10,0($5) + ld $11,0($6) + + daddiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + dsubu $7,$7,$9 + +.Loop0: daddiu $9,$9,-1 + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + or $2,$2,$8 + + daddiu $5,$5,8 + daddiu $6,$6,8 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + daddiu $4,$4,8 + +.L0: beq $7,$0,.Lend + nop + +.Loop: daddiu $7,$7,-4 + + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + or $2,$2,$8 + + ld $10,16($5) + daddu $13,$13,$2 + ld $11,16($6) + sltu $8,$13,$2 + dsubu $13,$12,$13 + sltu $2,$12,$13 + sd $13,8($4) + or $2,$2,$8 + + ld $12,24($5) + daddu $11,$11,$2 + ld $13,24($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,16($4) + or $2,$2,$8 + + ld $10,32($5) + daddu $13,$13,$2 + ld $11,32($6) + sltu $8,$13,$2 + dsubu $13,$12,$13 + sltu $2,$12,$13 + sd $13,24($4) + or $2,$2,$8 + + daddiu $5,$5,32 + daddiu $6,$6,32 + + bne $7,$0,.Loop + daddiu $4,$4,32 + +.Lend: daddu $11,$11,$2 + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + j $31 + or $2,$2,$8 + + .end __mpn_sub_n diff --git a/gnu/lib/libgmp/mpn/mips3/submul_1.s b/gnu/lib/libgmp/mpn/mips3/submul_1.s new file mode 100644 index 00000000000..a9c9fa25149 --- /dev/null +++ b/gnu/lib/libgmp/mpn/mips3/submul_1.s @@ -0,0 +1,97 @@ + # MIPS3 __mpn_submul_1 -- Multiply a limb vector with a single limb and + # subtract the product from a second limb vector. + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __mpn_submul_1 + .ent __mpn_submul_1 +__mpn_submul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + ld $8,0($5) + + # warm up phase 1 + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: ld $10,0($4) + mflo $3 + mfhi $9 + daddiu $5,$5,8 + daddu $3,$3,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dmultu $8,$7 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end __mpn_submul_1 diff --git a/gnu/lib/libgmp/mpn/mp_bases.c b/gnu/lib/libgmp/mpn/mp_bases.c new file mode 100644 index 00000000000..bbe39b02976 --- /dev/null +++ b/gnu/lib/libgmp/mpn/mp_bases.c @@ -0,0 +1,549 @@ +/* __mp_bases -- Structure for conversion between internal binary + format and strings in base 2..255. The fields are explained in + gmp-impl.h. + + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#if BITS_PER_MP_LIMB == 32 +const struct bases __mp_bases[256] = +{ + /* 0 */ {0, 0.0, 0, 0}, + /* 1 */ {0, 1e38, 0, 0}, + /* 2 */ {32, 1.00000000, 0x1, 0x0}, + /* 3 */ {20, 0.63092975, 0xcfd41b91, 0x3b563c24}, + /* 4 */ {16, 0.50000000, 0x2, 0x0}, + /* 5 */ {13, 0.43067656, 0x48c27395, 0xc25c2684}, + /* 6 */ {12, 0.38685281, 0x81bf1000, 0xf91bd1b6}, + /* 7 */ {11, 0.35620719, 0x75db9c97, 0x1607a2cb}, + /* 8 */ {10, 0.33333333, 0x3, 0x0}, + /* 9 */ {10, 0.31546488, 0xcfd41b91, 0x3b563c24}, + /* 10 */ {9, 0.30103000, 0x3b9aca00, 0x12e0be82}, + /* 11 */ {9, 0.28906483, 0x8c8b6d2b, 0xd24cde04}, + /* 12 */ {8, 0.27894295, 0x19a10000, 0x3fa39ab5}, + /* 13 */ {8, 0.27023815, 0x309f1021, 0x50f8ac5f}, + /* 14 */ {8, 0.26264954, 0x57f6c100, 0x74843b1e}, + /* 15 */ {8, 0.25595802, 0x98c29b81, 0xad0326c2}, + /* 16 */ {8, 0.25000000, 0x4, 0x0}, + /* 17 */ {7, 0.24465054, 0x18754571, 0x4ef0b6bd}, + /* 18 */ {7, 0.23981247, 0x247dbc80, 0xc0fc48a1}, + /* 19 */ {7, 0.23540891, 0x3547667b, 0x33838942}, + /* 20 */ {7, 0.23137821, 0x4c4b4000, 0xad7f29ab}, + /* 21 */ {7, 0.22767025, 0x6b5a6e1d, 0x313c3d15}, + /* 22 */ {7, 0.22424382, 0x94ace180, 0xb8cca9e0}, + /* 23 */ {7, 0.22106473, 0xcaf18367, 0x42ed6de9}, + /* 24 */ {6, 0.21810429, 0xb640000, 0x67980e0b}, + /* 25 */ {6, 0.21533828, 0xe8d4a51, 0x19799812}, + /* 26 */ {6, 0.21274605, 0x1269ae40, 0xbce85396}, + /* 27 */ {6, 0.21030992, 0x17179149, 0x62c103a9}, + /* 28 */ {6, 0.20801460, 0x1cb91000, 0x1d353d43}, + /* 29 */ {6, 0.20584683, 0x23744899, 0xce1decea}, + /* 30 */ {6, 0.20379505, 0x2b73a840, 0x790fc511}, + /* 31 */ {6, 0.20184909, 0x34e63b41, 0x35b865a0}, + /* 32 */ {6, 0.20000000, 0x5, 0x0}, + /* 33 */ {6, 0.19823986, 0x4cfa3cc1, 0xa9aed1b3}, + /* 34 */ {6, 0.19656163, 0x5c13d840, 0x63dfc229}, + /* 35 */ {6, 0.19495902, 0x6d91b519, 0x2b0fee30}, + /* 36 */ {6, 0.19342640, 0x81bf1000, 0xf91bd1b6}, + /* 37 */ {6, 0.19195872, 0x98ede0c9, 0xac89c3a9}, + /* 38 */ {6, 0.19055141, 0xb3773e40, 0x6d2c32fe}, + /* 39 */ {6, 0.18920036, 0xd1bbc4d1, 0x387907c9}, + /* 40 */ {6, 0.18790182, 0xf4240000, 0xc6f7a0b}, + /* 41 */ {5, 0.18665241, 0x6e7d349, 0x28928154}, + /* 42 */ {5, 0.18544902, 0x7ca30a0, 0x6e8629d}, + /* 43 */ {5, 0.18428883, 0x8c32bbb, 0xd373dca0}, + /* 44 */ {5, 0.18316925, 0x9d46c00, 0xa0b17895}, + /* 45 */ {5, 0.18208790, 0xaffacfd, 0x746811a5}, + /* 46 */ {5, 0.18104260, 0xc46bee0, 0x4da6500f}, + /* 47 */ {5, 0.18003133, 0xdab86ef, 0x2ba23582}, + /* 48 */ {5, 0.17905223, 0xf300000, 0xdb20a88}, + /* 49 */ {5, 0.17810359, 0x10d63af1, 0xe68d5ce4}, + /* 50 */ {5, 0.17718382, 0x12a05f20, 0xb7cdfd9d}, + /* 51 */ {5, 0.17629143, 0x1490aae3, 0x8e583933}, + /* 52 */ {5, 0.17542506, 0x16a97400, 0x697cc3ea}, + /* 53 */ {5, 0.17458343, 0x18ed2825, 0x48a5ca6c}, + /* 54 */ {5, 0.17376534, 0x1b5e4d60, 0x2b52db16}, + /* 55 */ {5, 0.17296969, 0x1dff8297, 0x111586a6}, + /* 56 */ {5, 0.17219543, 0x20d38000, 0xf31d2b36}, + /* 57 */ {5, 0.17144160, 0x23dd1799, 0xc8d76d19}, + /* 58 */ {5, 0.17070728, 0x271f35a0, 0xa2cb1eb4}, + /* 59 */ {5, 0.16999162, 0x2a9ce10b, 0x807c3ec3}, + /* 60 */ {5, 0.16929381, 0x2e593c00, 0x617ec8bf}, + /* 61 */ {5, 0.16861310, 0x3257844d, 0x45746cbe}, + /* 62 */ {5, 0.16794878, 0x369b13e0, 0x2c0aa273}, + /* 63 */ {5, 0.16730018, 0x3b27613f, 0x14f90805}, + /* 64 */ {5, 0.16666667, 0x6, 0x0}, + /* 65 */ {5, 0.16604765, 0x4528a141, 0xd9cf0829}, + /* 66 */ {5, 0.16544255, 0x4aa51420, 0xb6fc4841}, + /* 67 */ {5, 0.16485086, 0x50794633, 0x973054cb}, + /* 68 */ {5, 0.16427205, 0x56a94400, 0x7a1dbe4b}, + /* 69 */ {5, 0.16370566, 0x5d393975, 0x5f7fcd7f}, + /* 70 */ {5, 0.16315122, 0x642d7260, 0x47196c84}, + /* 71 */ {5, 0.16260831, 0x6b8a5ae7, 0x30b43635}, + /* 72 */ {5, 0.16207652, 0x73548000, 0x1c1fa5f6}, + /* 73 */ {5, 0.16155547, 0x7b908fe9, 0x930634a}, + /* 74 */ {5, 0.16104477, 0x84435aa0, 0xef7f4a3c}, + /* 75 */ {5, 0.16054409, 0x8d71d25b, 0xcf5552d2}, + /* 76 */ {5, 0.16005307, 0x97210c00, 0xb1a47c8e}, + /* 77 */ {5, 0.15957142, 0xa1563f9d, 0x9634b43e}, + /* 78 */ {5, 0.15909881, 0xac16c8e0, 0x7cd3817d}, + /* 79 */ {5, 0.15863496, 0xb768278f, 0x65536761}, + /* 80 */ {5, 0.15817959, 0xc3500000, 0x4f8b588e}, + /* 81 */ {5, 0.15773244, 0xcfd41b91, 0x3b563c24}, + /* 82 */ {5, 0.15729325, 0xdcfa6920, 0x28928154}, + /* 83 */ {5, 0.15686177, 0xeac8fd83, 0x1721bfb0}, + /* 84 */ {5, 0.15643779, 0xf9461400, 0x6e8629d}, + /* 85 */ {4, 0.15602107, 0x31c84b1, 0x491cc17c}, + /* 86 */ {4, 0.15561139, 0x342ab10, 0x3a11d83b}, + /* 87 */ {4, 0.15520856, 0x36a2c21, 0x2be074cd}, + /* 88 */ {4, 0.15481238, 0x3931000, 0x1e7a02e7}, + /* 89 */ {4, 0.15442266, 0x3bd5ee1, 0x11d10edd}, + /* 90 */ {4, 0.15403922, 0x3e92110, 0x5d92c68}, + /* 91 */ {4, 0.15366189, 0x4165ef1, 0xf50dbfb2}, + /* 92 */ {4, 0.15329049, 0x4452100, 0xdf9f1316}, + /* 93 */ {4, 0.15292487, 0x4756fd1, 0xcb52a684}, + /* 94 */ {4, 0.15256487, 0x4a75410, 0xb8163e97}, + /* 95 */ {4, 0.15221035, 0x4dad681, 0xa5d8f269}, + /* 96 */ {4, 0.15186115, 0x5100000, 0x948b0fcd}, + /* 97 */ {4, 0.15151715, 0x546d981, 0x841e0215}, + /* 98 */ {4, 0.15117821, 0x57f6c10, 0x74843b1e}, + /* 99 */ {4, 0.15084420, 0x5b9c0d1, 0x65b11e6e}, + /* 100 */ {4, 0.15051500, 0x5f5e100, 0x5798ee23}, + /* 101 */ {4, 0.15019048, 0x633d5f1, 0x4a30b99b}, + /* 102 */ {4, 0.14987054, 0x673a910, 0x3d6e4d94}, + /* 103 */ {4, 0.14955506, 0x6b563e1, 0x314825b0}, + /* 104 */ {4, 0.14924394, 0x6f91000, 0x25b55f2e}, + /* 105 */ {4, 0.14893706, 0x73eb721, 0x1aadaccb}, + /* 106 */ {4, 0.14863434, 0x7866310, 0x10294ba2}, + /* 107 */ {4, 0.14833567, 0x7d01db1, 0x620f8f6}, + /* 108 */ {4, 0.14804096, 0x81bf100, 0xf91bd1b6}, + /* 109 */ {4, 0.14775011, 0x869e711, 0xe6d37b2a}, + /* 110 */ {4, 0.14746305, 0x8ba0a10, 0xd55cff6e}, + /* 111 */ {4, 0.14717969, 0x90c6441, 0xc4ad2db2}, + /* 112 */ {4, 0.14689994, 0x9610000, 0xb4b985cf}, + /* 113 */ {4, 0.14662372, 0x9b7e7c1, 0xa5782bef}, + /* 114 */ {4, 0.14635096, 0xa112610, 0x96dfdd2a}, + /* 115 */ {4, 0.14608158, 0xa6cc591, 0x88e7e509}, + /* 116 */ {4, 0.14581551, 0xacad100, 0x7b8813d3}, + /* 117 */ {4, 0.14555268, 0xb2b5331, 0x6eb8b595}, + /* 118 */ {4, 0.14529302, 0xb8e5710, 0x627289db}, + /* 119 */ {4, 0.14503647, 0xbf3e7a1, 0x56aebc07}, + /* 120 */ {4, 0.14478295, 0xc5c1000, 0x4b66dc33}, + /* 121 */ {4, 0.14453241, 0xcc6db61, 0x4094d8a3}, + /* 122 */ {4, 0.14428479, 0xd345510, 0x3632f7a5}, + /* 123 */ {4, 0.14404003, 0xda48871, 0x2c3bd1f0}, + /* 124 */ {4, 0.14379807, 0xe178100, 0x22aa4d5f}, + /* 125 */ {4, 0.14355885, 0xe8d4a51, 0x19799812}, + /* 126 */ {4, 0.14332233, 0xf05f010, 0x10a523e5}, + /* 127 */ {4, 0.14308844, 0xf817e01, 0x828a237}, + /* 128 */ {4, 0.14285714, 0x7, 0x0}, + /* 129 */ {4, 0.14262838, 0x10818201, 0xf04ec452}, + /* 130 */ {4, 0.14240211, 0x11061010, 0xe136444a}, + /* 131 */ {4, 0.14217828, 0x118db651, 0xd2af9589}, + /* 132 */ {4, 0.14195685, 0x12188100, 0xc4b42a83}, + /* 133 */ {4, 0.14173777, 0x12a67c71, 0xb73dccf5}, + /* 134 */ {4, 0.14152100, 0x1337b510, 0xaa4698c5}, + /* 135 */ {4, 0.14130649, 0x13cc3761, 0x9dc8f729}, + /* 136 */ {4, 0.14109421, 0x14641000, 0x91bf9a30}, + /* 137 */ {4, 0.14088412, 0x14ff4ba1, 0x86257887}, + /* 138 */ {4, 0.14067617, 0x159df710, 0x7af5c98c}, + /* 139 */ {4, 0.14047033, 0x16401f31, 0x702c01a0}, + /* 140 */ {4, 0.14026656, 0x16e5d100, 0x65c3ceb1}, + /* 141 */ {4, 0.14006482, 0x178f1991, 0x5bb91502}, + /* 142 */ {4, 0.13986509, 0x183c0610, 0x5207ec23}, + /* 143 */ {4, 0.13966731, 0x18eca3c1, 0x48ac9c19}, + /* 144 */ {4, 0.13947147, 0x19a10000, 0x3fa39ab5}, + /* 145 */ {4, 0.13927753, 0x1a592841, 0x36e98912}, + /* 146 */ {4, 0.13908545, 0x1b152a10, 0x2e7b3140}, + /* 147 */ {4, 0.13889521, 0x1bd51311, 0x2655840b}, + /* 148 */ {4, 0.13870677, 0x1c98f100, 0x1e7596ea}, + /* 149 */ {4, 0.13852011, 0x1d60d1b1, 0x16d8a20d}, + /* 150 */ {4, 0.13833519, 0x1e2cc310, 0xf7bfe87}, + /* 151 */ {4, 0.13815199, 0x1efcd321, 0x85d2492}, + /* 152 */ {4, 0.13797047, 0x1fd11000, 0x179a9f4}, + /* 153 */ {4, 0.13779062, 0x20a987e1, 0xf59e80eb}, + /* 154 */ {4, 0.13761241, 0x21864910, 0xe8b768db}, + /* 155 */ {4, 0.13743580, 0x226761f1, 0xdc39d6d5}, + /* 156 */ {4, 0.13726078, 0x234ce100, 0xd021c5d1}, + /* 157 */ {4, 0.13708732, 0x2436d4d1, 0xc46b5e37}, + /* 158 */ {4, 0.13691539, 0x25254c10, 0xb912f39c}, + /* 159 */ {4, 0.13674498, 0x26185581, 0xae150294}, + /* 160 */ {4, 0.13657605, 0x27100000, 0xa36e2eb1}, + /* 161 */ {4, 0.13640859, 0x280c5a81, 0x991b4094}, + /* 162 */ {4, 0.13624257, 0x290d7410, 0x8f19241e}, + /* 163 */ {4, 0.13607797, 0x2a135bd1, 0x8564e6b7}, + /* 164 */ {4, 0.13591477, 0x2b1e2100, 0x7bfbb5b4}, + /* 165 */ {4, 0.13575295, 0x2c2dd2f1, 0x72dadcc8}, + /* 166 */ {4, 0.13559250, 0x2d428110, 0x69ffc498}, + /* 167 */ {4, 0.13543338, 0x2e5c3ae1, 0x6167f154}, + /* 168 */ {4, 0.13527558, 0x2f7b1000, 0x5911016e}, + /* 169 */ {4, 0.13511908, 0x309f1021, 0x50f8ac5f}, + /* 170 */ {4, 0.13496386, 0x31c84b10, 0x491cc17c}, + /* 171 */ {4, 0.13480991, 0x32f6d0b1, 0x417b26d8}, + /* 172 */ {4, 0.13465720, 0x342ab100, 0x3a11d83b}, + /* 173 */ {4, 0.13450572, 0x3563fc11, 0x32dee622}, + /* 174 */ {4, 0.13435545, 0x36a2c210, 0x2be074cd}, + /* 175 */ {4, 0.13420637, 0x37e71341, 0x2514bb58}, + /* 176 */ {4, 0.13405847, 0x39310000, 0x1e7a02e7}, + /* 177 */ {4, 0.13391173, 0x3a8098c1, 0x180ea5d0}, + /* 178 */ {4, 0.13376614, 0x3bd5ee10, 0x11d10edd}, + /* 179 */ {4, 0.13362168, 0x3d311091, 0xbbfb88e}, + /* 180 */ {4, 0.13347832, 0x3e921100, 0x5d92c68}, + /* 181 */ {4, 0.13333607, 0x3ff90031, 0x1c024c}, + /* 182 */ {4, 0.13319491, 0x4165ef10, 0xf50dbfb2}, + /* 183 */ {4, 0.13305481, 0x42d8eea1, 0xea30efa3}, + /* 184 */ {4, 0.13291577, 0x44521000, 0xdf9f1316}, + /* 185 */ {4, 0.13277777, 0x45d16461, 0xd555c0c9}, + /* 186 */ {4, 0.13264079, 0x4756fd10, 0xcb52a684}, + /* 187 */ {4, 0.13250483, 0x48e2eb71, 0xc193881f}, + /* 188 */ {4, 0.13236988, 0x4a754100, 0xb8163e97}, + /* 189 */ {4, 0.13223591, 0x4c0e0f51, 0xaed8b724}, + /* 190 */ {4, 0.13210292, 0x4dad6810, 0xa5d8f269}, + /* 191 */ {4, 0.13197089, 0x4f535d01, 0x9d15039d}, + /* 192 */ {4, 0.13183981, 0x51000000, 0x948b0fcd}, + /* 193 */ {4, 0.13170967, 0x52b36301, 0x8c394d1d}, + /* 194 */ {4, 0.13158046, 0x546d9810, 0x841e0215}, + /* 195 */ {4, 0.13145216, 0x562eb151, 0x7c3784f8}, + /* 196 */ {4, 0.13132477, 0x57f6c100, 0x74843b1e}, + /* 197 */ {4, 0.13119827, 0x59c5d971, 0x6d02985d}, + /* 198 */ {4, 0.13107265, 0x5b9c0d10, 0x65b11e6e}, + /* 199 */ {4, 0.13094791, 0x5d796e61, 0x5e8e5c64}, + /* 200 */ {4, 0.13082402, 0x5f5e1000, 0x5798ee23}, + /* 201 */ {4, 0.13070099, 0x614a04a1, 0x50cf7bde}, + /* 202 */ {4, 0.13057879, 0x633d5f10, 0x4a30b99b}, + /* 203 */ {4, 0.13045743, 0x65383231, 0x43bb66bd}, + /* 204 */ {4, 0.13033688, 0x673a9100, 0x3d6e4d94}, + /* 205 */ {4, 0.13021715, 0x69448e91, 0x374842ee}, + /* 206 */ {4, 0.13009822, 0x6b563e10, 0x314825b0}, + /* 207 */ {4, 0.12998007, 0x6d6fb2c1, 0x2b6cde75}, + /* 208 */ {4, 0.12986271, 0x6f910000, 0x25b55f2e}, + /* 209 */ {4, 0.12974613, 0x71ba3941, 0x2020a2c5}, + /* 210 */ {4, 0.12963031, 0x73eb7210, 0x1aadaccb}, + /* 211 */ {4, 0.12951524, 0x7624be11, 0x155b891f}, + /* 212 */ {4, 0.12940092, 0x78663100, 0x10294ba2}, + /* 213 */ {4, 0.12928734, 0x7aafdeb1, 0xb160fe9}, + /* 214 */ {4, 0.12917448, 0x7d01db10, 0x620f8f6}, + /* 215 */ {4, 0.12906235, 0x7f5c3a21, 0x14930ef}, + /* 216 */ {4, 0.12895094, 0x81bf1000, 0xf91bd1b6}, + /* 217 */ {4, 0.12884022, 0x842a70e1, 0xefdcb0c7}, + /* 218 */ {4, 0.12873021, 0x869e7110, 0xe6d37b2a}, + /* 219 */ {4, 0.12862089, 0x891b24f1, 0xddfeb94a}, + /* 220 */ {4, 0.12851224, 0x8ba0a100, 0xd55cff6e}, + /* 221 */ {4, 0.12840428, 0x8e2ef9d1, 0xcceced50}, + /* 222 */ {4, 0.12829698, 0x90c64410, 0xc4ad2db2}, + /* 223 */ {4, 0.12819034, 0x93669481, 0xbc9c75f9}, + /* 224 */ {4, 0.12808435, 0x96100000, 0xb4b985cf}, + /* 225 */ {4, 0.12797901, 0x98c29b81, 0xad0326c2}, + /* 226 */ {4, 0.12787431, 0x9b7e7c10, 0xa5782bef}, + /* 227 */ {4, 0.12777024, 0x9e43b6d1, 0x9e1771a9}, + /* 228 */ {4, 0.12766680, 0xa1126100, 0x96dfdd2a}, + /* 229 */ {4, 0.12756398, 0xa3ea8ff1, 0x8fd05c41}, + /* 230 */ {4, 0.12746176, 0xa6cc5910, 0x88e7e509}, + /* 231 */ {4, 0.12736016, 0xa9b7d1e1, 0x8225759d}, + /* 232 */ {4, 0.12725915, 0xacad1000, 0x7b8813d3}, + /* 233 */ {4, 0.12715874, 0xafac2921, 0x750eccf9}, + /* 234 */ {4, 0.12705891, 0xb2b53310, 0x6eb8b595}, + /* 235 */ {4, 0.12695967, 0xb5c843b1, 0x6884e923}, + /* 236 */ {4, 0.12686100, 0xb8e57100, 0x627289db}, + /* 237 */ {4, 0.12676290, 0xbc0cd111, 0x5c80c07b}, + /* 238 */ {4, 0.12666537, 0xbf3e7a10, 0x56aebc07}, + /* 239 */ {4, 0.12656839, 0xc27a8241, 0x50fbb19b}, + /* 240 */ {4, 0.12647197, 0xc5c10000, 0x4b66dc33}, + /* 241 */ {4, 0.12637609, 0xc91209c1, 0x45ef7c7c}, + /* 242 */ {4, 0.12628075, 0xcc6db610, 0x4094d8a3}, + /* 243 */ {4, 0.12618595, 0xcfd41b91, 0x3b563c24}, + /* 244 */ {4, 0.12609168, 0xd3455100, 0x3632f7a5}, + /* 245 */ {4, 0.12599794, 0xd6c16d31, 0x312a60c3}, + /* 246 */ {4, 0.12590471, 0xda488710, 0x2c3bd1f0}, + /* 247 */ {4, 0.12581200, 0xdddab5a1, 0x2766aa45}, + /* 248 */ {4, 0.12571980, 0xe1781000, 0x22aa4d5f}, + /* 249 */ {4, 0.12562811, 0xe520ad61, 0x1e06233c}, + /* 250 */ {4, 0.12553692, 0xe8d4a510, 0x19799812}, + /* 251 */ {4, 0.12544622, 0xec940e71, 0x15041c33}, + /* 252 */ {4, 0.12535601, 0xf05f0100, 0x10a523e5}, + /* 253 */ {4, 0.12526629, 0xf4359451, 0xc5c2749}, + /* 254 */ {4, 0.12517705, 0xf817e010, 0x828a237}, + /* 255 */ {4, 0.12508829, 0xfc05fc01, 0x40a1423}, +}; +#endif +#if BITS_PER_MP_LIMB == 64 +const struct bases __mp_bases[256] = +{ + /* 0 */ {0, 0.0, 0, 0}, + /* 1 */ {0, 1e38, 0, 0}, + /* 2 */ {64, 1.00000000, 0x1, 0x0}, + /* 3 */ {40, 0.63092975, 0xa8b8b452291fe821L, 0x846d550e37b5063dL}, + /* 4 */ {32, 0.50000000, 0x2L, 0x0L}, + /* 5 */ {27, 0.43067656, 0x6765c793fa10079dL, 0x3ce9a36f23c0fc90L}, + /* 6 */ {24, 0.38685281, 0x41c21cb8e1000000L, 0xf24f62335024a295L}, + /* 7 */ {22, 0.35620719, 0x3642798750226111L, 0x2df495ccaa57147bL}, + /* 8 */ {21, 0.33333333, 0x3L, 0x0L}, + /* 9 */ {20, 0.31546488, 0xa8b8b452291fe821L, 0x846d550e37b5063dL}, + /* 10 */ {19, 0.30103000, 0x8ac7230489e80000L, 0xd83c94fb6d2ac34aL}, + /* 11 */ {18, 0.28906483, 0x4d28cb56c33fa539L, 0xa8adf7ae45e7577bL}, + /* 12 */ {17, 0.27894295, 0x1eca170c00000000L, 0xa10c2bec5da8f8fL}, + /* 13 */ {17, 0.27023815, 0x780c7372621bd74dL, 0x10f4becafe412ec3L}, + /* 14 */ {16, 0.26264954, 0x1e39a5057d810000L, 0xf08480f672b4e86L}, + /* 15 */ {16, 0.25595802, 0x5b27ac993df97701L, 0x6779c7f90dc42f48L}, + /* 16 */ {16, 0.25000000, 0x4L, 0x0L}, + /* 17 */ {15, 0.24465054, 0x27b95e997e21d9f1L, 0x9c71e11bab279323L}, + /* 18 */ {15, 0.23981247, 0x5da0e1e53c5c8000L, 0x5dfaa697ec6f6a1cL}, + /* 19 */ {15, 0.23540891, 0xd2ae3299c1c4aedbL, 0x3711783f6be7e9ecL}, + /* 20 */ {14, 0.23137821, 0x16bcc41e90000000L, 0x6849b86a12b9b01eL}, + /* 21 */ {14, 0.22767025, 0x2d04b7fdd9c0ef49L, 0x6bf097ba5ca5e239L}, + /* 22 */ {14, 0.22424382, 0x5658597bcaa24000L, 0x7b8015c8d7af8f08L}, + /* 23 */ {14, 0.22106473, 0xa0e2073737609371L, 0x975a24b3a3151b38L}, + /* 24 */ {13, 0.21810429, 0xc29e98000000000L, 0x50bd367972689db1L}, + /* 25 */ {13, 0.21533828, 0x14adf4b7320334b9L, 0x8c240c4aecb13bb5L}, + /* 26 */ {13, 0.21274605, 0x226ed36478bfa000L, 0xdbd2e56854e118c9L}, + /* 27 */ {13, 0.21030992, 0x383d9170b85ff80bL, 0x2351ffcaa9c7c4aeL}, + /* 28 */ {13, 0.20801460, 0x5a3c23e39c000000L, 0x6b24188ca33b0636L}, + /* 29 */ {13, 0.20584683, 0x8e65137388122bcdL, 0xcc3dceaf2b8ba99dL}, + /* 30 */ {13, 0.20379505, 0xdd41bb36d259e000L, 0x2832e835c6c7d6b6L}, + /* 31 */ {12, 0.20184909, 0xaee5720ee830681L, 0x76b6aa272e1873c5L}, + /* 32 */ {12, 0.20000000, 0x5L, 0x0L}, + /* 33 */ {12, 0.19823986, 0x172588ad4f5f0981L, 0x61eaf5d402c7bf4fL}, + /* 34 */ {12, 0.19656163, 0x211e44f7d02c1000L, 0xeeb658123ffb27ecL}, + /* 35 */ {12, 0.19495902, 0x2ee56725f06e5c71L, 0x5d5e3762e6fdf509L}, + /* 36 */ {12, 0.19342640, 0x41c21cb8e1000000L, 0xf24f62335024a295L}, + /* 37 */ {12, 0.19195872, 0x5b5b57f8a98a5dd1L, 0x66ae7831762efb6fL}, + /* 38 */ {12, 0.19055141, 0x7dcff8986ea31000L, 0x47388865a00f544L}, + /* 39 */ {12, 0.18920036, 0xabd4211662a6b2a1L, 0x7d673c33a123b54cL}, + /* 40 */ {12, 0.18790182, 0xe8d4a51000000000L, 0x19799812dea11197L}, + /* 41 */ {11, 0.18665241, 0x7a32956ad081b79L, 0xc27e62e0686feaeL}, + /* 42 */ {11, 0.18544902, 0x9f49aaff0e86800L, 0x9b6e7507064ce7c7L}, + /* 43 */ {11, 0.18428883, 0xce583bb812d37b3L, 0x3d9ac2bf66cfed94L}, + /* 44 */ {11, 0.18316925, 0x109b79a654c00000L, 0xed46bc50ce59712aL}, + /* 45 */ {11, 0.18208790, 0x1543beff214c8b95L, 0x813d97e2c89b8d46L}, + /* 46 */ {11, 0.18104260, 0x1b149a79459a3800L, 0x2e81751956af8083L}, + /* 47 */ {11, 0.18003133, 0x224edfb5434a830fL, 0xdd8e0a95e30c0988L}, + /* 48 */ {11, 0.17905223, 0x2b3fb00000000000L, 0x7ad4dd48a0b5b167L}, + /* 49 */ {11, 0.17810359, 0x3642798750226111L, 0x2df495ccaa57147bL}, + /* 50 */ {11, 0.17718382, 0x43c33c1937564800L, 0xe392010175ee5962L}, + /* 51 */ {11, 0.17629143, 0x54411b2441c3cd8bL, 0x84eaf11b2fe7738eL}, + /* 52 */ {11, 0.17542506, 0x6851455acd400000L, 0x3a1e3971e008995dL}, + /* 53 */ {11, 0.17458343, 0x80a23b117c8feb6dL, 0xfd7a462344ffce25L}, + /* 54 */ {11, 0.17376534, 0x9dff7d32d5dc1800L, 0x9eca40b40ebcef8aL}, + /* 55 */ {11, 0.17296969, 0xc155af6faeffe6a7L, 0x52fa161a4a48e43dL}, + /* 56 */ {11, 0.17219543, 0xebb7392e00000000L, 0x1607a2cbacf930c1L}, + /* 57 */ {10, 0.17144160, 0x50633659656d971L, 0x97a014f8e3be55f1L}, + /* 58 */ {10, 0.17070728, 0x5fa8624c7fba400L, 0x568df8b76cbf212cL}, + /* 59 */ {10, 0.16999162, 0x717d9faa73c5679L, 0x20ba7c4b4e6ef492L}, + /* 60 */ {10, 0.16929381, 0x86430aac6100000L, 0xe81ee46b9ef492f5L}, + /* 61 */ {10, 0.16861310, 0x9e64d9944b57f29L, 0x9dc0d10d51940416L}, + /* 62 */ {10, 0.16794878, 0xba5ca5392cb0400L, 0x5fa8ed2f450272a5L}, + /* 63 */ {10, 0.16730018, 0xdab2ce1d022cd81L, 0x2ba9eb8c5e04e641L}, + /* 64 */ {10, 0.16666667, 0x6L, 0x0L}, + /* 65 */ {10, 0.16604765, 0x12aeed5fd3e2d281L, 0xb67759cc00287bf1L}, + /* 66 */ {10, 0.16544255, 0x15c3da1572d50400L, 0x78621feeb7f4ed33L}, + /* 67 */ {10, 0.16485086, 0x194c05534f75ee29L, 0x43d55b5f72943bc0L}, + /* 68 */ {10, 0.16427205, 0x1d56299ada100000L, 0x173decb64d1d4409L}, + /* 69 */ {10, 0.16370566, 0x21f2a089a4ff4f79L, 0xe29fb54fd6b6074fL}, + /* 70 */ {10, 0.16315122, 0x2733896c68d9a400L, 0xa1f1f5c210d54e62L}, + /* 71 */ {10, 0.16260831, 0x2d2cf2c33b533c71L, 0x6aac7f9bfafd57b2L}, + /* 72 */ {10, 0.16207652, 0x33f506e440000000L, 0x3b563c2478b72ee2L}, + /* 73 */ {10, 0.16155547, 0x3ba43bec1d062211L, 0x12b536b574e92d1bL}, + /* 74 */ {10, 0.16104477, 0x4455872d8fd4e400L, 0xdf86c03020404fa5L}, + /* 75 */ {10, 0.16054409, 0x4e2694539f2f6c59L, 0xa34adf02234eea8eL}, + /* 76 */ {10, 0.16005307, 0x5938006c18900000L, 0x6f46eb8574eb59ddL}, + /* 77 */ {10, 0.15957142, 0x65ad9912474aa649L, 0x42459b481df47cecL}, + /* 78 */ {10, 0.15909881, 0x73ae9ff4241ec400L, 0x1b424b95d80ca505L}, + /* 79 */ {10, 0.15863496, 0x836612ee9c4ce1e1L, 0xf2c1b982203a0dacL}, + /* 80 */ {10, 0.15817959, 0x9502f90000000000L, 0xb7cdfd9d7bdbab7dL}, + /* 81 */ {10, 0.15773244, 0xa8b8b452291fe821L, 0x846d550e37b5063dL}, + /* 82 */ {10, 0.15729325, 0xbebf59a07dab4400L, 0x57931eeaf85cf64fL}, + /* 83 */ {10, 0.15686177, 0xd7540d4093bc3109L, 0x305a944507c82f47L}, + /* 84 */ {10, 0.15643779, 0xf2b96616f1900000L, 0xe007ccc9c22781aL}, + /* 85 */ {9, 0.15602107, 0x336de62af2bca35L, 0x3e92c42e000eeed4L}, + /* 86 */ {9, 0.15561139, 0x39235ec33d49600L, 0x1ebe59130db2795eL}, + /* 87 */ {9, 0.15520856, 0x3f674e539585a17L, 0x268859e90f51b89L}, + /* 88 */ {9, 0.15481238, 0x4645b6958000000L, 0xd24cde0463108cfaL}, + /* 89 */ {9, 0.15442266, 0x4dcb74afbc49c19L, 0xa536009f37adc383L}, + /* 90 */ {9, 0.15403922, 0x56064e1d18d9a00L, 0x7cea06ce1c9ace10L}, + /* 91 */ {9, 0.15366189, 0x5f04fe2cd8a39fbL, 0x58db032e72e8ba43L}, + /* 92 */ {9, 0.15329049, 0x68d74421f5c0000L, 0x388cc17cae105447L}, + /* 93 */ {9, 0.15292487, 0x738df1f6ab4827dL, 0x1b92672857620ce0L}, + /* 94 */ {9, 0.15256487, 0x7f3afbc9cfb5e00L, 0x18c6a9575c2ade4L}, + /* 95 */ {9, 0.15221035, 0x8bf187fba88f35fL, 0xd44da7da8e44b24fL}, + /* 96 */ {9, 0.15186115, 0x99c600000000000L, 0xaa2f78f1b4cc6794L}, + /* 97 */ {9, 0.15151715, 0xa8ce21eb6531361L, 0x843c067d091ee4ccL}, + /* 98 */ {9, 0.15117821, 0xb92112c1a0b6200L, 0x62005e1e913356e3L}, + /* 99 */ {9, 0.15084420, 0xcad7718b8747c43L, 0x4316eed01dedd518L}, + /* 100 */ {9, 0.15051500, 0xde0b6b3a7640000L, 0x2725dd1d243aba0eL}, + /* 101 */ {9, 0.15019048, 0xf2d8cf5fe6d74c5L, 0xddd9057c24cb54fL}, + /* 102 */ {9, 0.14987054, 0x1095d25bfa712600L, 0xedeee175a736d2a1L}, + /* 103 */ {9, 0.14955506, 0x121b7c4c3698faa7L, 0xc4699f3df8b6b328L}, + /* 104 */ {9, 0.14924394, 0x13c09e8d68000000L, 0x9ebbe7d859cb5a7cL}, + /* 105 */ {9, 0.14893706, 0x15876ccb0b709ca9L, 0x7c828b9887eb2179L}, + /* 106 */ {9, 0.14863434, 0x17723c2976da2a00L, 0x5d652ab99001adcfL}, + /* 107 */ {9, 0.14833567, 0x198384e9c259048bL, 0x4114f1754e5d7b32L}, + /* 108 */ {9, 0.14804096, 0x1bbde41dfeec0000L, 0x274b7c902f7e0188L}, + /* 109 */ {9, 0.14775011, 0x1e241d6e3337910dL, 0xfc9e0fbb32e210cL}, + /* 110 */ {9, 0.14746305, 0x20b91cee9901ee00L, 0xf4afa3e594f8ea1fL}, + /* 111 */ {9, 0.14717969, 0x237ff9079863dfefL, 0xcd85c32e9e4437b0L}, + /* 112 */ {9, 0.14689994, 0x267bf47000000000L, 0xa9bbb147e0dd92a8L}, + /* 113 */ {9, 0.14662372, 0x29b08039fbeda7f1L, 0x8900447b70e8eb82L}, + /* 114 */ {9, 0.14635096, 0x2d213df34f65f200L, 0x6b0a92adaad5848aL}, + /* 115 */ {9, 0.14608158, 0x30d201d957a7c2d3L, 0x4f990ad8740f0ee5L}, + /* 116 */ {9, 0.14581551, 0x34c6d52160f40000L, 0x3670a9663a8d3610L}, + /* 117 */ {9, 0.14555268, 0x3903f855d8f4c755L, 0x1f5c44188057be3cL}, + /* 118 */ {9, 0.14529302, 0x3d8de5c8ec59b600L, 0xa2bea956c4e4977L}, + /* 119 */ {9, 0.14503647, 0x4269541d1ff01337L, 0xed68b23033c3637eL}, + /* 120 */ {9, 0.14478295, 0x479b38e478000000L, 0xc99cf624e50549c5L}, + /* 121 */ {9, 0.14453241, 0x4d28cb56c33fa539L, 0xa8adf7ae45e7577bL}, + /* 122 */ {9, 0.14428479, 0x5317871fa13aba00L, 0x8a5bc740b1c113e5L}, + /* 123 */ {9, 0.14404003, 0x596d2f44de9fa71bL, 0x6e6c7efb81cfbb9bL}, + /* 124 */ {9, 0.14379807, 0x602fd125c47c0000L, 0x54aba5c5cada5f10L}, + /* 125 */ {9, 0.14355885, 0x6765c793fa10079dL, 0x3ce9a36f23c0fc90L}, + /* 126 */ {9, 0.14332233, 0x6f15be069b847e00L, 0x26fb43de2c8cd2a8L}, + /* 127 */ {9, 0.14308844, 0x7746b3e82a77047fL, 0x12b94793db8486a1L}, + /* 128 */ {9, 0.14285714, 0x7L, 0x0L}, + /* 129 */ {9, 0.14262838, 0x894953f7ea890481L, 0xdd5deca404c0156dL}, + /* 130 */ {9, 0.14240211, 0x932abffea4848200L, 0xbd51373330291de0L}, + /* 131 */ {9, 0.14217828, 0x9dacb687d3d6a163L, 0x9fa4025d66f23085L}, + /* 132 */ {9, 0.14195685, 0xa8d8102a44840000L, 0x842530ee2db4949dL}, + /* 133 */ {9, 0.14173777, 0xb4b60f9d140541e5L, 0x6aa7f2766b03dc25L}, + /* 134 */ {9, 0.14152100, 0xc15065d4856e4600L, 0x53035ba7ebf32e8dL}, + /* 135 */ {9, 0.14130649, 0xceb1363f396d23c7L, 0x3d12091fc9fb4914L}, + /* 136 */ {9, 0.14109421, 0xdce31b2488000000L, 0x28b1cb81b1ef1849L}, + /* 137 */ {9, 0.14088412, 0xebf12a24bca135c9L, 0x15c35be67ae3e2c9L}, + /* 138 */ {9, 0.14067617, 0xfbe6f8dbf88f4a00L, 0x42a17bd09be1ff0L}, + /* 139 */ {8, 0.14047033, 0x1ef156c084ce761L, 0x8bf461f03cf0bbfL}, + /* 140 */ {8, 0.14026656, 0x20c4e3b94a10000L, 0xf3fbb43f68a32d05L}, + /* 141 */ {8, 0.14006482, 0x22b0695a08ba421L, 0xd84f44c48564dc19L}, + /* 142 */ {8, 0.13986509, 0x24b4f35d7a4c100L, 0xbe58ebcce7956abeL}, + /* 143 */ {8, 0.13966731, 0x26d397284975781L, 0xa5fac463c7c134b7L}, + /* 144 */ {8, 0.13947147, 0x290d74100000000L, 0x8f19241e28c7d757L}, + /* 145 */ {8, 0.13927753, 0x2b63b3a37866081L, 0x799a6d046c0ae1aeL}, + /* 146 */ {8, 0.13908545, 0x2dd789f4d894100L, 0x6566e37d746a9e40L}, + /* 147 */ {8, 0.13889521, 0x306a35e51b58721L, 0x526887dbfb5f788fL}, + /* 148 */ {8, 0.13870677, 0x331d01712e10000L, 0x408af3382b8efd3dL}, + /* 149 */ {8, 0.13852011, 0x35f14200a827c61L, 0x2fbb374806ec05f1L}, + /* 150 */ {8, 0.13833519, 0x38e858b62216100L, 0x1fe7c0f0afce87feL}, + /* 151 */ {8, 0.13815199, 0x3c03b2c13176a41L, 0x11003d517540d32eL}, + /* 152 */ {8, 0.13797047, 0x3f44c9b21000000L, 0x2f5810f98eff0dcL}, + /* 153 */ {8, 0.13779062, 0x42ad23cef3113c1L, 0xeb72e35e7840d910L}, + /* 154 */ {8, 0.13761241, 0x463e546b19a2100L, 0xd27de19593dc3614L}, + /* 155 */ {8, 0.13743580, 0x49f9fc3f96684e1L, 0xbaf391fd3e5e6fc2L}, + /* 156 */ {8, 0.13726078, 0x4de1c9c5dc10000L, 0xa4bd38c55228c81dL}, + /* 157 */ {8, 0.13708732, 0x51f77994116d2a1L, 0x8fc5a8de8e1de782L}, + /* 158 */ {8, 0.13691539, 0x563cd6bb3398100L, 0x7bf9265bea9d3a3bL}, + /* 159 */ {8, 0.13674498, 0x5ab3bb270beeb01L, 0x69454b325983dccdL}, + /* 160 */ {8, 0.13657605, 0x5f5e10000000000L, 0x5798ee2308c39df9L}, + /* 161 */ {8, 0.13640859, 0x643dce0ec16f501L, 0x46e40ba0fa66a753L}, + /* 162 */ {8, 0.13624257, 0x6954fe21e3e8100L, 0x3717b0870b0db3a7L}, + /* 163 */ {8, 0.13607797, 0x6ea5b9755f440a1L, 0x2825e6775d11cdebL}, + /* 164 */ {8, 0.13591477, 0x74322a1c0410000L, 0x1a01a1c09d1b4dacL}, + /* 165 */ {8, 0.13575295, 0x79fc8b6ae8a46e1L, 0xc9eb0a8bebc8f3eL}, + /* 166 */ {8, 0.13559250, 0x80072a66d512100L, 0xffe357ff59e6a004L}, + /* 167 */ {8, 0.13543338, 0x86546633b42b9c1L, 0xe7dfd1be05fa61a8L}, + /* 168 */ {8, 0.13527558, 0x8ce6b0861000000L, 0xd11ed6fc78f760e5L}, + /* 169 */ {8, 0.13511908, 0x93c08e16a022441L, 0xbb8db609dd29ebfeL}, + /* 170 */ {8, 0.13496386, 0x9ae49717f026100L, 0xa71aec8d1813d532L}, + /* 171 */ {8, 0.13480991, 0xa25577ae24c1a61L, 0x93b612a9f20fbc02L}, + /* 172 */ {8, 0.13465720, 0xaa15f068e610000L, 0x814fc7b19a67d317L}, + /* 173 */ {8, 0.13450572, 0xb228d6bf7577921L, 0x6fd9a03f2e0a4b7cL}, + /* 174 */ {8, 0.13435545, 0xba91158ef5c4100L, 0x5f4615a38d0d316eL}, + /* 175 */ {8, 0.13420637, 0xc351ad9aec0b681L, 0x4f8876863479a286L}, + /* 176 */ {8, 0.13405847, 0xcc6db6100000000L, 0x4094d8a3041b60ebL}, + /* 177 */ {8, 0.13391173, 0xd5e85d09025c181L, 0x32600b8ed883a09bL}, + /* 178 */ {8, 0.13376614, 0xdfc4e816401c100L, 0x24df8c6eb4b6d1f1L}, + /* 179 */ {8, 0.13362168, 0xea06b4c72947221L, 0x18097a8ee151acefL}, + /* 180 */ {8, 0.13347832, 0xf4b139365210000L, 0xbd48cc8ec1cd8e3L}, + /* 181 */ {8, 0.13333607, 0xffc80497d520961L, 0x3807a8d67485fbL}, + /* 182 */ {8, 0.13319491, 0x10b4ebfca1dee100L, 0xea5768860b62e8d8L}, + /* 183 */ {8, 0.13305481, 0x117492de921fc141L, 0xd54faf5b635c5005L}, + /* 184 */ {8, 0.13291577, 0x123bb2ce41000000L, 0xc14a56233a377926L}, + /* 185 */ {8, 0.13277777, 0x130a8b6157bdecc1L, 0xae39a88db7cd329fL}, + /* 186 */ {8, 0.13264079, 0x13e15dede0e8a100L, 0x9c10bde69efa7ab6L}, + /* 187 */ {8, 0.13250483, 0x14c06d941c0ca7e1L, 0x8ac36c42a2836497L}, + /* 188 */ {8, 0.13236988, 0x15a7ff487a810000L, 0x7a463c8b84f5ef67L}, + /* 189 */ {8, 0.13223591, 0x169859ddc5c697a1L, 0x6a8e5f5ad090fd4bL}, + /* 190 */ {8, 0.13210292, 0x1791c60f6fed0100L, 0x5b91a2943596fc56L}, + /* 191 */ {8, 0.13197089, 0x18948e8c0e6fba01L, 0x4d4667b1c468e8f0L}, + /* 192 */ {8, 0.13183981, 0x19a1000000000000L, 0x3fa39ab547994dafL}, + /* 193 */ {8, 0.13170967, 0x1ab769203dafc601L, 0x32a0a9b2faee1e2aL}, + /* 194 */ {8, 0.13158046, 0x1bd81ab557f30100L, 0x26357ceac0e96962L}, + /* 195 */ {8, 0.13145216, 0x1d0367a69fed1ba1L, 0x1a5a6f65caa5859eL}, + /* 196 */ {8, 0.13132477, 0x1e39a5057d810000L, 0xf08480f672b4e86L}, + /* 197 */ {8, 0.13119827, 0x1f7b2a18f29ac3e1L, 0x4383340615612caL}, + /* 198 */ {8, 0.13107265, 0x20c850694c2aa100L, 0xf3c77969ee4be5a2L}, + /* 199 */ {8, 0.13094791, 0x222173cc014980c1L, 0xe00993cc187c5ec9L}, + /* 200 */ {8, 0.13082402, 0x2386f26fc1000000L, 0xcd2b297d889bc2b6L}, + /* 201 */ {8, 0.13070099, 0x24f92ce8af296d41L, 0xbb214d5064862b22L}, + /* 202 */ {8, 0.13057879, 0x2678863cd0ece100L, 0xa9e1a7ca7ea10e20L}, + /* 203 */ {8, 0.13045743, 0x280563f0a9472d61L, 0x99626e72b39ea0cfL}, + /* 204 */ {8, 0.13033688, 0x29a02e1406210000L, 0x899a5ba9c13fafd9L}, + /* 205 */ {8, 0.13021715, 0x2b494f4efe6d2e21L, 0x7a80a705391e96ffL}, + /* 206 */ {8, 0.13009822, 0x2d0134ef21cbc100L, 0x6c0cfe23de23042aL}, + /* 207 */ {8, 0.12998007, 0x2ec84ef4da2ef581L, 0x5e377df359c944ddL}, + /* 208 */ {8, 0.12986271, 0x309f102100000000L, 0x50f8ac5fc8f53985L}, + /* 209 */ {8, 0.12974613, 0x3285ee02a1420281L, 0x44497266278e35b7L}, + /* 210 */ {8, 0.12963031, 0x347d6104fc324100L, 0x382316831f7ee175L}, + /* 211 */ {8, 0.12951524, 0x3685e47dade53d21L, 0x2c7f377833b8946eL}, + /* 212 */ {8, 0.12940092, 0x389ff6bb15610000L, 0x2157c761ab4163efL}, + /* 213 */ {8, 0.12928734, 0x3acc1912ebb57661L, 0x16a7071803cc49a9L}, + /* 214 */ {8, 0.12917448, 0x3d0acff111946100L, 0xc6781d80f8224fcL}, + /* 215 */ {8, 0.12906235, 0x3f5ca2e692eaf841L, 0x294092d370a900bL}, + /* 216 */ {8, 0.12895094, 0x41c21cb8e1000000L, 0xf24f62335024a295L}, + /* 217 */ {8, 0.12884022, 0x443bcb714399a5c1L, 0xe03b98f103fad6d2L}, + /* 218 */ {8, 0.12873021, 0x46ca406c81af2100L, 0xcee3d32cad2a9049L}, + /* 219 */ {8, 0.12862089, 0x496e106ac22aaae1L, 0xbe3f9df9277fdadaL}, + /* 220 */ {8, 0.12851224, 0x4c27d39fa5410000L, 0xae46f0d94c05e933L}, + /* 221 */ {8, 0.12840428, 0x4ef825c296e43ca1L, 0x9ef2280fb437a33dL}, + /* 222 */ {8, 0.12829698, 0x51dfa61f5ad88100L, 0x9039ff426d3f284bL}, + /* 223 */ {8, 0.12819034, 0x54def7a6d2f16901L, 0x82178c6d6b51f8f4L}, + /* 224 */ {8, 0.12808435, 0x57f6c10000000000L, 0x74843b1ee4c1e053L}, + /* 225 */ {8, 0.12797901, 0x5b27ac993df97701L, 0x6779c7f90dc42f48L}, + /* 226 */ {8, 0.12787431, 0x5e7268b9bbdf8100L, 0x5af23c74f9ad9fe9L}, + /* 227 */ {8, 0.12777024, 0x61d7a7932ff3d6a1L, 0x4ee7eae2acdc617eL}, + /* 228 */ {8, 0.12766680, 0x65581f53c8c10000L, 0x43556aa2ac262a0bL}, + /* 229 */ {8, 0.12756398, 0x68f48a385b8320e1L, 0x3835949593b8ddd1L}, + /* 230 */ {8, 0.12746176, 0x6cada69ed07c2100L, 0x2d837fbe78458762L}, + /* 231 */ {8, 0.12736016, 0x70843718cdbf27c1L, 0x233a7e150a54a555L}, + /* 232 */ {8, 0.12725915, 0x7479027ea1000000L, 0x19561984a50ff8feL}, + /* 233 */ {8, 0.12715874, 0x788cd40268f39641L, 0xfd211159fe3490fL}, + /* 234 */ {8, 0.12705891, 0x7cc07b437ecf6100L, 0x6aa563e655033e3L}, + /* 235 */ {8, 0.12695967, 0x8114cc6220762061L, 0xfbb614b3f2d3b14cL}, + /* 236 */ {8, 0.12686100, 0x858aa0135be10000L, 0xeac0f8837fb05773L}, + /* 237 */ {8, 0.12676290, 0x8a22d3b53c54c321L, 0xda6e4c10e8615ca5L}, + /* 238 */ {8, 0.12666537, 0x8ede496339f34100L, 0xcab755a8d01fa67fL}, + /* 239 */ {8, 0.12656839, 0x93bde80aec3a1481L, 0xbb95a9ae71aa3e0cL}, + /* 240 */ {8, 0.12647197, 0x98c29b8100000000L, 0xad0326c296b4f529L}, + /* 241 */ {8, 0.12637609, 0x9ded549671832381L, 0x9ef9f21eed31b7c1L}, + /* 242 */ {8, 0.12628075, 0xa33f092e0b1ac100L, 0x91747422be14b0b2L}, + /* 243 */ {8, 0.12618595, 0xa8b8b452291fe821L, 0x846d550e37b5063dL}, + /* 244 */ {8, 0.12609168, 0xae5b564ac3a10000L, 0x77df79e9a96c06f6L}, + /* 245 */ {8, 0.12599794, 0xb427f4b3be74c361L, 0x6bc6019636c7d0c2L}, + /* 246 */ {8, 0.12590471, 0xba1f9a938041e100L, 0x601c4205aebd9e47L}, + /* 247 */ {8, 0.12581200, 0xc0435871d1110f41L, 0x54ddc59756f05016L}, + /* 248 */ {8, 0.12571980, 0xc694446f01000000L, 0x4a0648979c838c18L}, + /* 249 */ {8, 0.12562811, 0xcd137a5b57ac3ec1L, 0x3f91b6e0bb3a053dL}, + /* 250 */ {8, 0.12553692, 0xd3c21bcecceda100L, 0x357c299a88ea76a5L}, + /* 251 */ {8, 0.12544622, 0xdaa150410b788de1L, 0x2bc1e517aecc56e3L}, + /* 252 */ {8, 0.12535601, 0xe1b24521be010000L, 0x225f56ceb3da9f5dL}, + /* 253 */ {8, 0.12526629, 0xe8f62df12777c1a1L, 0x1951136d53ad63acL}, + /* 254 */ {8, 0.12517705, 0xf06e445906fc0100L, 0x1093d504b3cd7d93L}, + /* 255 */ {8, 0.12508829, 0xf81bc845c81bf801L, 0x824794d1ec1814fL}, +}; +#endif diff --git a/gnu/lib/libgmp/mpn/msdos/asm-syntax.h b/gnu/lib/libgmp/mpn/msdos/asm-syntax.h new file mode 100644 index 00000000000..e6327e07466 --- /dev/null +++ b/gnu/lib/libgmp/mpn/msdos/asm-syntax.h @@ -0,0 +1,2 @@ +#define ELF_SYNTAX +#include "x86/syntax.h" diff --git a/gnu/lib/libgmp/mpn/ns32k/add_n.s b/gnu/lib/libgmp/mpn/ns32k/add_n.s new file mode 100644 index 00000000000..dde2e15b51c --- /dev/null +++ b/gnu/lib/libgmp/mpn/ns32k/add_n.s @@ -0,0 +1,46 @@ +# ns32000 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +# sum in a third limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___mpn_add_n +___mpn_add_n: + save [r3,r4,r5] + negd 28(sp),r3 + movd r3,r0 + lshd 2,r0 + movd 24(sp),r4 + subd r0,r4 # r4 -> to end of S2 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r2 + subd r0,r2 # r2 -> to end of RES + subd r0,r0 # cy = 0 + +Loop: movd r5[r3:d],r0 + addcd r4[r3:d],r0 + movd r0,r2[r3:d] + acbd 1,r3,Loop + + scsd r0 # r0 = cy. + restore [r5,r4,r3] + ret 0 diff --git a/gnu/lib/libgmp/mpn/ns32k/addmul_1.s b/gnu/lib/libgmp/mpn/ns32k/addmul_1.s new file mode 100644 index 00000000000..205bfe3b34d --- /dev/null +++ b/gnu/lib/libgmp/mpn/ns32k/addmul_1.s @@ -0,0 +1,48 @@ +# ns32000 __mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___mpn_addmul_1 +___mpn_addmul_1: + save [r3,r4,r5,r6,r7] + negd 24(sp),r4 + movd r4,r0 + lshd 2,r0 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r6 + subd r0,r6 # r6 -> to end of RES + subd r0,r0 # r0 = 0, cy = 0 + movd 28(sp),r7 # r7 = s2_limb + +Loop: movd r5[r4:d],r2 + meid r7,r2 # r2 = low_prod, r3 = high_prod + addcd r0,r2 # r2 = low_prod + cy_limb + movd r3,r0 # r0 = new cy_limb + addcd 0,r0 + addd r2,r6[r4:d] + acbd 1,r4,Loop + + addcd 0,r0 + restore [r7,r6,r5,r4,r3] + ret 0 diff --git a/gnu/lib/libgmp/mpn/ns32k/mul_1.s b/gnu/lib/libgmp/mpn/ns32k/mul_1.s new file mode 100644 index 00000000000..64e4abbba41 --- /dev/null +++ b/gnu/lib/libgmp/mpn/ns32k/mul_1.s @@ -0,0 +1,47 @@ +# ns32000 __mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___mpn_mul_1 +___mpn_mul_1: + save [r3,r4,r5,r6,r7] + negd 24(sp),r4 + movd r4,r0 + lshd 2,r0 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r6 + subd r0,r6 # r6 -> to end of RES + subd r0,r0 # r0 = 0, cy = 0 + movd 28(sp),r7 # r7 = s2_limb + +Loop: movd r5[r4:d],r2 + meid r7,r2 # r2 = low_prod, r3 = high_prod + addcd r0,r2 # r2 = low_prod + cy_limb + movd r3,r0 # r0 = new cy_limb + movd r2,r6[r4:d] + acbd 1,r4,Loop + + addcd 0,r0 + restore [r7,r6,r5,r4,r3] + ret 0 diff --git a/gnu/lib/libgmp/mpn/ns32k/sub_n.s b/gnu/lib/libgmp/mpn/ns32k/sub_n.s new file mode 100644 index 00000000000..ef6c889c598 --- /dev/null +++ b/gnu/lib/libgmp/mpn/ns32k/sub_n.s @@ -0,0 +1,46 @@ +# ns32000 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +# store difference in a third limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___mpn_sub_n +___mpn_sub_n: + save [r3,r4,r5] + negd 28(sp),r3 + movd r3,r0 + lshd 2,r0 + movd 24(sp),r4 + subd r0,r4 # r4 -> to end of S2 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r2 + subd r0,r2 # r2 -> to end of RES + subd r0,r0 # cy = 0 + +Loop: movd r5[r3:d],r0 + subcd r4[r3:d],r0 + movd r0,r2[r3:d] + acbd 1,r3,Loop + + scsd r0 # r0 = cy. + restore [r5,r4,r3] + ret 0 diff --git a/gnu/lib/libgmp/mpn/ns32k/submul_1.s b/gnu/lib/libgmp/mpn/ns32k/submul_1.s new file mode 100644 index 00000000000..50930953213 --- /dev/null +++ b/gnu/lib/libgmp/mpn/ns32k/submul_1.s @@ -0,0 +1,48 @@ +# ns32000 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___mpn_submul_1 +___mpn_submul_1: + save [r3,r4,r5,r6,r7] + negd 24(sp),r4 + movd r4,r0 + lshd 2,r0 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r6 + subd r0,r6 # r6 -> to end of RES + subd r0,r0 # r0 = 0, cy = 0 + movd 28(sp),r7 # r7 = s2_limb + +Loop: movd r5[r4:d],r2 + meid r7,r2 # r2 = low_prod, r3 = high_prod + addcd r0,r2 # r2 = low_prod + cy_limb + movd r3,r0 # r0 = new cy_limb + addcd 0,r0 + subd r2,r6[r4:d] + acbd 1,r4,Loop + + addcd 0,r0 + restore [r7,r6,r5,r4,r3] + ret 0 diff --git a/gnu/lib/libgmp/mpn/power/add_n.s b/gnu/lib/libgmp/mpn/power/add_n.s new file mode 100644 index 00000000000..9e1c9489745 --- /dev/null +++ b/gnu/lib/libgmp/mpn/power/add_n.s @@ -0,0 +1,81 @@ +# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length. + +# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + + .toc + .extern __mpn_add_n[DS] + .extern .__mpn_add_n +.csect [PR] + .align 2 + .globl __mpn_add_n + .globl .__mpn_add_n + .csect __mpn_add_n[DS] +__mpn_add_n: + .long .__mpn_add_n, TOC[tc0], 0 + .csect [PR] +.__mpn_add_n: + andil. 10,6,1 # odd or even number of limbs? + l 8,0(4) # load least significant s1 limb + l 0,0(5) # load least significant s2 limb + cal 3,-4(3) # offset res_ptr, it's updated before it's used + sri 10,6,1 # count for unrolled loop + a 7,0,8 # add least significant limbs, set cy + mtctr 10 # copy count into CTR + beq 0,Leven # branch if even # of limbs (# of limbs >= 2) + +# We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 # is count for unrolled loop zero? + bne 1,L1 # branch if not + st 7,4(3) + aze 3,10 # use the fact that r10 is zero... + br # return + +# We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + stu 7,4(3) + ae 7,0,8 # add limbs, set cy +Leven: lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + bdz Lend # If done, skip loop + +Loop: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + ae 11,9,10 # add previous limbs with cy, set cy + stu 7,4(3) # + lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + ae 7,0,8 # add previous limbs with cy, set cy + stu 11,4(3) # + bdn Loop # decrement CTR and loop back + +Lend: ae 11,9,10 # add limbs with cy, set cy + st 7,4(3) # + st 11,8(3) # + lil 3,0 # load cy into ... + aze 3,3 # ... return value register + br diff --git a/gnu/lib/libgmp/mpn/power/addmul_1.s b/gnu/lib/libgmp/mpn/power/addmul_1.s new file mode 100644 index 00000000000..2db69841c75 --- /dev/null +++ b/gnu/lib/libgmp/mpn/power/addmul_1.s @@ -0,0 +1,123 @@ +# IBM POWER __mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + + .toc + .csect .__mpn_addmul_1[PR] + .align 2 + .globl __mpn_addmul_1 + .globl .__mpn_addmul_1 + .csect __mpn_addmul_1[DS] +__mpn_addmul_1: + .long .__mpn_addmul_1[PR], TOC[tc0], 0 + .csect .__mpn_addmul_1[PR] +.__mpn_addmul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + cax 9,9,7 + l 7,4(3) + a 8,8,7 # add res_limb + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 # low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + l 7,4(3) + aze 9,9 + a 8,8,7 + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 8,7,9 + l 7,4(3) + ae 10,10,0 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 8,7,10 + l 7,4(3) + ae 9,9,0 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br diff --git a/gnu/lib/libgmp/mpn/power/lshift.s b/gnu/lib/libgmp/mpn/power/lshift.s new file mode 100644 index 00000000000..38169bf53ff --- /dev/null +++ b/gnu/lib/libgmp/mpn/power/lshift.s @@ -0,0 +1,59 @@ +# IBM POWER __mpn_lshift -- + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s_ptr r4 +# size r5 +# cnt r6 + + .toc + .extern __mpn_lshift[DS] + .extern .__mpn_lshift +.csect [PR] + .align 2 + .globl __mpn_lshift + .globl .__mpn_lshift + .csect __mpn_lshift[DS] +__mpn_lshift: + .long .__mpn_lshift, TOC[tc0], 0 + .csect [PR] +.__mpn_lshift: + sli 0,5,2 + cax 9,3,0 + cax 4,4,0 + sfi 8,6,32 + mtctr 5 # put limb count in CTR loop register + lu 0,-4(4) # read most significant limb + sre 3,0,8 # compute carry out limb, and init MQ register + bdz Lend2 # if just one limb, skip loop + lu 0,-4(4) # read 2:nd most significant limb + sreq 7,0,8 # compute most significant limb of result + bdz Lend # if just two limb, skip loop +Loop: lu 0,-4(4) # load next lower limb + stu 7,-4(9) # store previous result during read latency + sreq 7,0,8 # compute result limb + bdn Loop # loop back until CTR is zero +Lend: stu 7,-4(9) # store 2:nd least significant limb +Lend2: sle 7,0,6 # compute least significant limb + st 7,-4(9) # store it" \ + br diff --git a/gnu/lib/libgmp/mpn/power/mul_1.s b/gnu/lib/libgmp/mpn/power/mul_1.s new file mode 100644 index 00000000000..a72bce660c7 --- /dev/null +++ b/gnu/lib/libgmp/mpn/power/mul_1.s @@ -0,0 +1,110 @@ +# IBM POWER __mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + + .toc + .csect .__mpn_mul_1[PR] + .align 2 + .globl __mpn_mul_1 + .globl .__mpn_mul_1 + .csect __mpn_mul_1[DS] +__mpn_mul_1: + .long .__mpn_mul_1[PR], TOC[tc0], 0 + .csect .__mpn_mul_1[PR] +.__mpn_mul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + ai 0,0,0 # reset carry + cax 9,9,7 + blt Lneg +Lpos: bdz Lend +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + cax 10,10,0 # adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,9 + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + cax 9,9,0 # adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,10 + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br diff --git a/gnu/lib/libgmp/mpn/power/rshift.s b/gnu/lib/libgmp/mpn/power/rshift.s new file mode 100644 index 00000000000..30d408a24b0 --- /dev/null +++ b/gnu/lib/libgmp/mpn/power/rshift.s @@ -0,0 +1,57 @@ +# IBM POWER __mpn_rshift -- + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s_ptr r4 +# size r5 +# cnt r6 + + .toc + .extern __mpn_rshift[DS] + .extern .__mpn_rshift +.csect [PR] + .align 2 + .globl __mpn_rshift + .globl .__mpn_rshift + .csect __mpn_rshift[DS] +__mpn_rshift: + .long .__mpn_rshift, TOC[tc0], 0 + .csect [PR] +.__mpn_rshift: + sfi 8,6,32 + mtctr 5 # put limb count in CTR loop register + l 0,0(4) # read least significant limb + ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s + sle 3,0,8 # compute carry limb, and init MQ register + bdz Lend2 # if just one limb, skip loop + lu 0,4(4) # read 2:nd least significant limb + sleq 7,0,8 # compute least significant limb of result + bdz Lend # if just two limb, skip loop +Loop: lu 0,4(4) # load next higher limb + stu 7,4(9) # store previous result during read latency + sleq 7,0,8 # compute result limb + bdn Loop # loop back until CTR is zero +Lend: stu 7,4(9) # store 2:nd most significant limb +Lend2: sre 7,0,6 # compute most significant limb + st 7,4(9) # store it" \ + br diff --git a/gnu/lib/libgmp/mpn/power/sub_n.s b/gnu/lib/libgmp/mpn/power/sub_n.s new file mode 100644 index 00000000000..30d4fee861a --- /dev/null +++ b/gnu/lib/libgmp/mpn/power/sub_n.s @@ -0,0 +1,82 @@ +# IBM POWER __mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. + +# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + + .toc + .extern __mpn_sub_n[DS] + .extern .__mpn_sub_n +.csect [PR] + .align 2 + .globl __mpn_sub_n + .globl .__mpn_sub_n + .csect __mpn_sub_n[DS] +__mpn_sub_n: + .long .__mpn_sub_n, TOC[tc0], 0 + .csect [PR] +.__mpn_sub_n: + andil. 10,6,1 # odd or even number of limbs? + l 8,0(4) # load least significant s1 limb + l 0,0(5) # load least significant s2 limb + cal 3,-4(3) # offset res_ptr, it's updated before it's used + sri 10,6,1 # count for unrolled loop + sf 7,0,8 # subtract least significant limbs, set cy + mtctr 10 # copy count into CTR + beq 0,Leven # branch if even # of limbs (# of limbs >= 2) + +# We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 # is count for unrolled loop zero? + bne 1,L1 # branch if not + st 7,4(3) + sfe 3,0,0 # load !cy into ... + sfi 3,3,0 # ... return value register + br # return + +# We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + stu 7,4(3) + sfe 7,0,8 # subtract limbs, set cy +Leven: lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + bdz Lend # If done, skip loop + +Loop: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + sfe 11,10,9 # subtract previous limbs with cy, set cy + stu 7,4(3) # + lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + sfe 7,0,8 # subtract previous limbs with cy, set cy + stu 11,4(3) # + bdn Loop # decrement CTR and loop back + +Lend: sfe 11,10,9 # subtract limbs with cy, set cy + st 7,4(3) # + st 11,8(3) # + sfe 3,0,0 # load !cy into ... + sfi 3,3,0 # ... return value register + br diff --git a/gnu/lib/libgmp/mpn/power/submul_1.s b/gnu/lib/libgmp/mpn/power/submul_1.s new file mode 100644 index 00000000000..8e5946fe18c --- /dev/null +++ b/gnu/lib/libgmp/mpn/power/submul_1.s @@ -0,0 +1,128 @@ +# IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + + .toc + .csect .__mpn_submul_1[PR] + .align 2 + .globl __mpn_submul_1 + .globl .__mpn_submul_1 + .csect __mpn_submul_1[DS] +__mpn_submul_1: + .long .__mpn_submul_1[PR], TOC[tc0], 0 + .csect .__mpn_submul_1[PR] +.__mpn_submul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 11 + cax 9,9,7 + l 7,4(3) + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 11,0,9 # low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 11,0,10 + l 7,4(3) + aze 9,9 + sf 8,11,7 + a 11,8,11 # invert cy (r11 is junk) + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 11,7,9 + l 7,4(3) + ae 10,10,0 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 11,7,10 + l 7,4(3) + ae 9,9,0 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br diff --git a/gnu/lib/libgmp/mpn/powerpc32/add_n.s b/gnu/lib/libgmp/mpn/powerpc32/add_n.s new file mode 100644 index 00000000000..7739a4a8ac4 --- /dev/null +++ b/gnu/lib/libgmp/mpn/powerpc32/add_n.s @@ -0,0 +1,55 @@ +# PowerPC-32 __mpn_add_n -- Add two limb vectors of equal, non-zero length. + +# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + + .toc + .extern __mpn_add_n[DS] + .extern .__mpn_add_n +.csect [PR] + .align 2 + .globl __mpn_add_n + .globl .__mpn_add_n + .csect __mpn_add_n[DS] +__mpn_add_n: + .long .__mpn_add_n, TOC[tc0], 0 + .csect [PR] +.__mpn_add_n: + mtctr 6 # copy size into CTR + lwz 8,0(4) # load least significant s1 limb + lwz 0,0(5) # load least significant s2 limb + addi 3,3,-4 # offset res_ptr, it's updated before used + addc 7,0,8 # add least significant limbs, set cy + bdz Lend # If done, skip loop +Loop: lwzu 8,4(4) # load s1 limb and update s1_ptr + lwzu 0,4(5) # load s2 limb and update s2_ptr + stwu 7,4(3) # store previous limb in load latency slot + adde 7,0,8 # add new limbs with cy, set cy + bdnz Loop # decrement CTR and loop back +Lend: stw 7,4(3) # store ultimate result limb + li 3,0 # load cy into ... + addze 3,3 # ... return value register + blr diff --git a/gnu/lib/libgmp/mpn/powerpc32/addmul_1.s b/gnu/lib/libgmp/mpn/powerpc32/addmul_1.s new file mode 100644 index 00000000000..6ecd53bbcdd --- /dev/null +++ b/gnu/lib/libgmp/mpn/powerpc32/addmul_1.s @@ -0,0 +1,68 @@ +# PowerPC-32 __mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# This is a fairly straightforward implementation. The timing of the PC601 +# is hard to understand, so I will wait to optimize this until I have some +# hardware to play with. + +# The code trivially generalizes to 64 bit limbs for the PC620. + + .toc + .csect .__mpn_addmul_1[PR] + .align 2 + .globl __mpn_addmul_1 + .globl .__mpn_addmul_1 + .csect __mpn_addmul_1[DS] +__mpn_addmul_1: + .long .__mpn_addmul_1[PR], TOC[tc0], 0 + .csect .__mpn_addmul_1[PR] +.__mpn_addmul_1: + mtctr 5 + + lwz 0,0(4) + mullw 7,0,6 + mulhwu 10,0,6 + lwz 9,0(3) + addc 8,7,9 + addi 3,3,-4 + bdz Lend + +Loop: lwzu 0,4(4) + stwu 8,4(3) + mullw 8,0,6 + adde 7,8,10 + mulhwu 10,0,6 + lwz 9,4(3) + addze 10,10 + addc 8,7,9 + bdnz Loop + +Lend: stw 8,4(3) + addze 3,10 + blr diff --git a/gnu/lib/libgmp/mpn/powerpc32/lshift.s b/gnu/lib/libgmp/mpn/powerpc32/lshift.s new file mode 100644 index 00000000000..9eef2ee0b3f --- /dev/null +++ b/gnu/lib/libgmp/mpn/powerpc32/lshift.s @@ -0,0 +1,67 @@ +# PowerPC-32 __mpn_lshift -- + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# cnt r6 + + .toc +.csect .text[PR] + .align 2 + .globl __mpn_lshift + .globl .__mpn_lshift + .csect __mpn_lshift[DS] +__mpn_lshift: + .long .__mpn_lshift, TOC[tc0], 0 + .csect .text[PR] +.__mpn_lshift: + mtctr 5 # copy size into CTR + slwi 0,5,2 + add 7,3,0 # make r7 point at end of res + add 4,4,0 # make r4 point at end of s1 + subfic 8,6,32 + lwzu 11,-4(4) # load first s1 limb + srw 3,11,8 # compute function return value + bdz Lend1 + +Loop: lwzu 10,-4(4) + slw 9,11,6 + srw 12,10,8 + or 9,9,12 + stwu 9,-4(7) + bdz Lend2 + lwzu 11,-4(4) + slw 9,10,6 + srw 12,11,8 + or 9,9,12 + stwu 9,-4(7) + bdnz Loop + +Lend1: slw 0,11,6 + stw 0,-4(7) + blr + +Lend2: slw 0,10,6 + stw 0,-4(7) + blr diff --git a/gnu/lib/libgmp/mpn/powerpc32/mul_1.s b/gnu/lib/libgmp/mpn/powerpc32/mul_1.s new file mode 100644 index 00000000000..dc13cac0da8 --- /dev/null +++ b/gnu/lib/libgmp/mpn/powerpc32/mul_1.s @@ -0,0 +1,64 @@ +# PowerPC-32 __mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# This is a fairly straightforward implementation. The timing of the PC601 +# is hard to understand, so I will wait to optimize this until I have some +# hardware to play with. + +# The code trivially generalizes to 64 bit limbs for the PC620. + + .toc + .csect .__mpn_mul_1[PR] + .align 2 + .globl __mpn_mul_1 + .globl .__mpn_mul_1 + .csect __mpn_mul_1[DS] +__mpn_mul_1: + .long .__mpn_mul_1[PR], TOC[tc0], 0 + .csect .__mpn_mul_1[PR] +.__mpn_mul_1: + mtctr 5 + + lwz 0,0(4) + mullw 7,0,6 + mulhwu 10,0,6 + addi 3,3,-4 # adjust res_ptr + addic 5,5,0 # clear cy with dummy insn + bdz Lend + +Loop: lwzu 0,4(4) + stwu 7,4(3) + mullw 8,0,6 + adde 7,8,10 + mulhwu 10,0,6 + bdnz Loop + +Lend: stw 7,4(3) + addze 3,10 + blr diff --git a/gnu/lib/libgmp/mpn/powerpc32/rshift.s b/gnu/lib/libgmp/mpn/powerpc32/rshift.s new file mode 100644 index 00000000000..4059270818e --- /dev/null +++ b/gnu/lib/libgmp/mpn/powerpc32/rshift.s @@ -0,0 +1,65 @@ +# PowerPC-32 __mpn_rshift -- + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# cnt r6 + + .toc +.csect .text[PR] + .align 2 + .globl __mpn_rshift + .globl .__mpn_rshift + .csect __mpn_rshift[DS] +__mpn_rshift: + .long .__mpn_rshift, TOC[tc0], 0 + .csect .text[PR] +.__mpn_rshift: + mtctr 5 # copy size into CTR + addi 7,3,-4 # move adjusted res_ptr to free return reg + subfic 8,6,32 + lwz 11,0(4) # load first s1 limb + slw 3,11,8 # compute function return value + bdz Lend1 + +Loop: lwzu 10,4(4) + srw 9,11,6 + slw 12,10,8 + or 9,9,12 + stwu 9,4(7) + bdz Lend2 + lwzu 11,4(4) + srw 9,10,6 + slw 12,11,8 + or 9,9,12 + stwu 9,4(7) + bdnz Loop + +Lend1: srw 0,11,6 + stw 0,4(7) + blr + +Lend2: srw 0,10,6 + stw 0,4(7) + blr diff --git a/gnu/lib/libgmp/mpn/powerpc32/sub_n.s b/gnu/lib/libgmp/mpn/powerpc32/sub_n.s new file mode 100644 index 00000000000..2d00d3668f2 --- /dev/null +++ b/gnu/lib/libgmp/mpn/powerpc32/sub_n.s @@ -0,0 +1,56 @@ +# PowerPC-32 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 +# and store difference in a third limb vector. + +# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + + .toc + .extern __mpn_sub_n[DS] + .extern .__mpn_sub_n +.csect [PR] + .align 2 + .globl __mpn_sub_n + .globl .__mpn_sub_n + .csect __mpn_sub_n[DS] +__mpn_sub_n: + .long .__mpn_sub_n, TOC[tc0], 0 + .csect [PR] +.__mpn_sub_n: + mtctr 6 # copy size into CTR + lwz 8,0(4) # load least significant s1 limb + lwz 0,0(5) # load least significant s2 limb + addi 3,3,-4 # offset res_ptr, it's updated before used + subfc 7,0,8 # add least significant limbs, set cy + bdz Lend # If done, skip loop +Loop: lwzu 8,4(4) # load s1 limb and update s1_ptr + lwzu 0,4(5) # load s2 limb and update s2_ptr + stwu 7,4(3) # store previous limb in load latency slot + subfe 7,0,8 # add new limbs with cy, set cy + bdnz Loop # decrement CTR and loop back +Lend: stw 7,4(3) # store ultimate result limb + subfe 3,0,0 # load !cy into ... + subfic 3,3,0 # ... return value register + blr diff --git a/gnu/lib/libgmp/mpn/powerpc32/submul_1.s b/gnu/lib/libgmp/mpn/powerpc32/submul_1.s new file mode 100644 index 00000000000..78467cc4b82 --- /dev/null +++ b/gnu/lib/libgmp/mpn/powerpc32/submul_1.s @@ -0,0 +1,70 @@ +# PowerPC-32 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# This is a fairly straightforward implementation. The timing of the PC601 +# is hard to understand, so I will wait to optimize this until I have some +# hardware to play with. + +# The code trivially generalizes to 64 bit limbs for the PC620. + + .toc + .csect .__mpn_submul_1[PR] + .align 2 + .globl __mpn_submul_1 + .globl .__mpn_submul_1 + .csect __mpn_submul_1[DS] +__mpn_submul_1: + .long .__mpn_submul_1[PR], TOC[tc0], 0 + .csect .__mpn_submul_1[PR] +.__mpn_submul_1: + mtctr 5 + + lwz 0,0(4) + mullw 7,0,6 + mulhwu 10,0,6 + lwz 9,0(3) + subfc 8,7,9 + addc 7,7,8 # invert cy (r7 is junk) + addi 3,3,-4 + bdz Lend + +Loop: lwzu 0,4(4) + stwu 8,4(3) + mullw 8,0,6 + adde 7,8,10 + mulhwu 10,0,6 + lwz 9,4(3) + addze 10,10 + subfc 8,7,9 + addc 7,7,8 # invert cy (r7 is junk) + bdnz Loop + +Lend: stw 8,4(3) + addze 3,10 + blr diff --git a/gnu/lib/libgmp/mpn/powerpc64/gmp-mparam.h b/gnu/lib/libgmp/mpn/powerpc64/gmp-mparam.h new file mode 100644 index 00000000000..48eb85de699 --- /dev/null +++ b/gnu/lib/libgmp/mpn/powerpc64/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/gnu/lib/libgmp/mpn/pyr/add_n.s b/gnu/lib/libgmp/mpn/pyr/add_n.s new file mode 100644 index 00000000000..416c6602058 --- /dev/null +++ b/gnu/lib/libgmp/mpn/pyr/add_n.s @@ -0,0 +1,76 @@ +# Pyramid __mpn_add_n -- Add two limb vectors of the same length > 0 and store +# sum in a third limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 2 +.globl ___mpn_add_n +___mpn_add_n: + movw $-1,tr0 # representation for carry clear + + movw pr3,tr2 + andw $3,tr2 + beq Lend0 + subw tr2,pr3 + +Loop0: rsubw $0,tr0 # restore carry bit from carry-save register + + movw (pr1),tr1 + addwc (pr2),tr1 + movw tr1,(pr0) + + subwb tr0,tr0 + addw $4,pr0 + addw $4,pr1 + addw $4,pr2 + addw $-1,tr2 + bne Loop0 + + mtstw pr3,pr3 + beq Lend +Lend0: +Loop: rsubw $0,tr0 # restore carry bit from carry-save register + + movw (pr1),tr1 + addwc (pr2),tr1 + movw tr1,(pr0) + + movw 4(pr1),tr1 + addwc 4(pr2),tr1 + movw tr1,4(pr0) + + movw 8(pr1),tr1 + addwc 8(pr2),tr1 + movw tr1,8(pr0) + + movw 12(pr1),tr1 + addwc 12(pr2),tr1 + movw tr1,12(pr0) + + subwb tr0,tr0 + addw $16,pr0 + addw $16,pr1 + addw $16,pr2 + addw $-4,pr3 + bne Loop +Lend: + mnegw tr0,pr0 + ret diff --git a/gnu/lib/libgmp/mpn/pyr/addmul_1.s b/gnu/lib/libgmp/mpn/pyr/addmul_1.s new file mode 100644 index 00000000000..a1495cac8f4 --- /dev/null +++ b/gnu/lib/libgmp/mpn/pyr/addmul_1.s @@ -0,0 +1,45 @@ +# Pyramid __mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 2 +.globl ___mpn_addmul_1 +___mpn_addmul_1: + mova (pr0)[pr2*4],pr0 + mova (pr1)[pr2*4],pr1 + mnegw pr2,pr2 + movw $0,tr3 + +Loop: movw (pr1)[pr2*4],tr1 + uemul pr3,tr0 + addw tr3,tr1 + movw $0,tr3 + addwc tr0,tr3 + movw (pr0)[pr2*0x4],tr0 + addw tr0,tr1 + addwc $0,tr3 + movw tr1,(pr0)[pr2*4] + addw $1,pr2 + bne Loop + + movw tr3,pr0 + ret diff --git a/gnu/lib/libgmp/mpn/pyr/mul_1.s b/gnu/lib/libgmp/mpn/pyr/mul_1.s new file mode 100644 index 00000000000..e6b97910f9d --- /dev/null +++ b/gnu/lib/libgmp/mpn/pyr/mul_1.s @@ -0,0 +1,42 @@ +# Pyramid __mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 2 +.globl ___mpn_mul_1 +___mpn_mul_1: + mova (pr0)[pr2*4],pr0 + mova (pr1)[pr2*4],pr1 + mnegw pr2,pr2 + movw $0,tr3 + +Loop: movw (pr1)[pr2*4],tr1 + uemul pr3,tr0 + addw tr3,tr1 + movw $0,tr3 + addwc tr0,tr3 + movw tr1,(pr0)[pr2*4] + addw $1,pr2 + bne Loop + + movw tr3,pr0 + ret diff --git a/gnu/lib/libgmp/mpn/pyr/sub_n.s b/gnu/lib/libgmp/mpn/pyr/sub_n.s new file mode 100644 index 00000000000..5664859cf00 --- /dev/null +++ b/gnu/lib/libgmp/mpn/pyr/sub_n.s @@ -0,0 +1,76 @@ +# Pyramid __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +# store difference in a third limb vector. + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 2 +.globl ___mpn_sub_n +___mpn_sub_n: + movw $-1,tr0 # representation for carry clear + + movw pr3,tr2 + andw $3,tr2 + beq Lend0 + subw tr2,pr3 + +Loop0: rsubw $0,tr0 # restore carry bit from carry-save register + + movw (pr1),tr1 + subwb (pr2),tr1 + movw tr1,(pr0) + + subwb tr0,tr0 + addw $4,pr0 + addw $4,pr1 + addw $4,pr2 + addw $-1,tr2 + bne Loop0 + + mtstw pr3,pr3 + beq Lend +Lend0: +Loop: rsubw $0,tr0 # restore carry bit from carry-save register + + movw (pr1),tr1 + subwb (pr2),tr1 + movw tr1,(pr0) + + movw 4(pr1),tr1 + subwb 4(pr2),tr1 + movw tr1,4(pr0) + + movw 8(pr1),tr1 + subwb 8(pr2),tr1 + movw tr1,8(pr0) + + movw 12(pr1),tr1 + subwb 12(pr2),tr1 + movw tr1,12(pr0) + + subwb tr0,tr0 + addw $16,pr0 + addw $16,pr1 + addw $16,pr2 + addw $-4,pr3 + bne Loop +Lend: + mnegw tr0,pr0 + ret diff --git a/gnu/lib/libgmp/mpn/sh/add_n.s b/gnu/lib/libgmp/mpn/sh/add_n.s new file mode 100644 index 00000000000..93dad51e48f --- /dev/null +++ b/gnu/lib/libgmp/mpn/sh/add_n.s @@ -0,0 +1,47 @@ +! SH __mpn_add_n -- Add two limb vectors of the same length > 0 and store +! sum in a third limb vector. + +! Copyright (C) 1995 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r4 +! s1_ptr r5 +! s2_ptr r6 +! size r7 + + .text + .align 2 + .global ___mpn_add_n +___mpn_add_n: + mov #0,r3 ! clear cy save reg + +Loop: mov.l @r5+,r1 + mov.l @r6+,r2 + shlr r3 ! restore cy + addc r2,r1 + movt r3 ! save cy + mov.l r1,@r4 + dt r7 + bf.s Loop + add #4,r4 + + rts + movt r0 ! return carry-out from most sign. limb diff --git a/gnu/lib/libgmp/mpn/sh/sh2/addmul_1.s b/gnu/lib/libgmp/mpn/sh/sh2/addmul_1.s new file mode 100644 index 00000000000..19d81da3d64 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sh/sh2/addmul_1.s @@ -0,0 +1,53 @@ +! SH2 __mpn_addmul_1 -- Multiply a limb vector with a limb and add +! the result to a second limb vector. + +! Copyright (C) 1995 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r4 +! s1_ptr r5 +! size r6 +! s2_limb r7 + + .text + .align 1 + .global ___mpn_addmul_1 +___mpn_addmul_1: + mov #0,r2 ! cy_limb = 0 + mov #0,r0 ! Keep r0 = 0 for entire loop + clrt + +Loop: mov.l @r5+,r3 + dmulu.l r3,r7 + sts macl,r1 + addc r2,r1 ! lo_prod += old cy_limb + sts mach,r2 ! new cy_limb = hi_prod + mov.l @r4,r3 + addc r0,r2 ! cy_limb += T, T = 0 + addc r3,r1 + addc r0,r2 ! cy_limb += T, T = 0 + dt r6 + mov.l r1,@r4 + bf.s Loop + add #4,r4 + + rts + mov r2,r0 diff --git a/gnu/lib/libgmp/mpn/sh/sh2/mul_1.s b/gnu/lib/libgmp/mpn/sh/sh2/mul_1.s new file mode 100644 index 00000000000..7ca275671f0 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sh/sh2/mul_1.s @@ -0,0 +1,50 @@ +! SH2 __mpn_mul_1 -- Multiply a limb vector with a limb and store +! the result in a second limb vector. + +! Copyright (C) 1995 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r4 +! s1_ptr r5 +! size r6 +! s2_limb r7 + + .text + .align 1 + .global ___mpn_mul_1 +___mpn_mul_1: + mov #0,r2 ! cy_limb = 0 + mov #0,r0 ! Keep r0 = 0 for entire loop + clrt + +Loop: mov.l @r5+,r3 + dmulu.l r3,r7 + sts macl,r1 + addc r2,r1 + sts mach,r2 + addc r0,r2 ! propagate carry to cy_limb (dt clobbers T) + dt r6 + mov.l r1,@r4 + bf.s Loop + add #4,r4 + + rts + mov r2,r0 diff --git a/gnu/lib/libgmp/mpn/sh/sh2/submul_1.s b/gnu/lib/libgmp/mpn/sh/sh2/submul_1.s new file mode 100644 index 00000000000..9ef380ced92 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sh/sh2/submul_1.s @@ -0,0 +1,53 @@ +! SH2 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract +! the result from a second limb vector. + +! Copyright (C) 1995 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r4 +! s1_ptr r5 +! size r6 +! s2_limb r7 + + .text + .align 1 + .global ___mpn_submul_1 +___mpn_submul_1: + mov #0,r2 ! cy_limb = 0 + mov #0,r0 ! Keep r0 = 0 for entire loop + clrt + +Loop: mov.l @r5+,r3 + dmulu.l r3,r7 + sts macl,r1 + addc r2,r1 ! lo_prod += old cy_limb + sts mach,r2 ! new cy_limb = hi_prod + mov.l @r4,r3 + addc r0,r2 ! cy_limb += T, T = 0 + subc r3,r1 + addc r0,r2 ! cy_limb += T, T = 0 + dt r6 + mov.l r1,@r4 + bf.s Loop + add #4,r4 + + rts + mov r2,r0 diff --git a/gnu/lib/libgmp/mpn/sh/sub_n.s b/gnu/lib/libgmp/mpn/sh/sub_n.s new file mode 100644 index 00000000000..6b201f60fed --- /dev/null +++ b/gnu/lib/libgmp/mpn/sh/sub_n.s @@ -0,0 +1,47 @@ +! SH __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and store +! difference in a third limb vector. + +! Copyright (C) 1995 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r4 +! s1_ptr r5 +! s2_ptr r6 +! size r7 + + .text + .align 2 + .global ___mpn_sub_n +___mpn_sub_n: + mov #0,r3 ! clear cy save reg + +Loop: mov.l @r5+,r1 + mov.l @r6+,r2 + shlr r3 ! restore cy + subc r2,r1 + movt r3 ! save cy + mov.l r1,@r4 + dt r7 + bf.s Loop + add #4,r4 + + rts + movt r0 ! return carry-out from most sign. limb diff --git a/gnu/lib/libgmp/mpn/sparc32/README b/gnu/lib/libgmp/mpn/sparc32/README new file mode 100644 index 00000000000..7c19df7bc42 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/README @@ -0,0 +1,36 @@ +This directory contains mpn functions for various SPARC chips. Code that +runs only on version 8 SPARC implementations, is in the v8 subdirectory. + +RELEVANT OPTIMIZATION ISSUES + + Load and Store timing + +On most early SPARC implementations, the ST instructions takes multiple +cycles, while a STD takes just a single cycle more than an ST. For the CPUs +in SPARCstation I and II, the times are 3 and 4 cycles, respectively. +Therefore, combining two ST instrucitons into a STD when possible is a +significant optimiation. + +Later SPARC implementations have single cycle ST. + +For SuperSPARC, we can perform just one memory instruction per cycle, even +if up to two integer instructions can be executed in its pipeline. For +programs that perform so many memory operations that there are not enough +non-memory operations to issue in parallel with all memory operations, using +LDD and STD when possible helps. + +STATUS + +1. On a SuperSPARC, mpn_lshift and mpn_rshift run at 3 cycles/limb, or 2.5 + cycles/limb asymptotically. We could optimize speed for special counts + by using ADDXCC. + +2. On a SuperSPARC, mpn_add_n and mpn_sub_n runs at 2.5 cycles/limb, or 2 + cycles/limb asymptotically. + +3. mpn_mul_1 runs at what is believed to be optimal speed. + +4. On SuperSPARC, mpn_addmul_1 and mpn_submul_1 could both be improved by a + cycle by avoiding one of the add instrucitons. See a29k/addmul_1. + +The speed of the code for other SPARC implementations is uncertain. diff --git a/gnu/lib/libgmp/mpn/sparc32/add_n.S b/gnu/lib/libgmp/mpn/sparc32/add_n.S new file mode 100644 index 00000000000..9852c256aad --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/add_n.S @@ -0,0 +1,226 @@ +! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store +! sum in a third limb vector. + +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +#define res_ptr %o0 +#define s1_ptr %o1 +#define s2_ptr %o2 +#define size %o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(__mpn_add_n) +C_SYMBOL_NAME(__mpn_add_n): + xor s2_ptr,res_ptr,%g1 + andcc %g1,4,%g0 + bne L1 ! branch if alignment differs + nop +! ** V1a ** +L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 + be L_v1 ! if no, branch + nop +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld [s1_ptr],%g4 + add s1_ptr,4,s1_ptr + ld [s2_ptr],%g2 + add s2_ptr,4,s2_ptr + add size,-1,size + addcc %g4,%g2,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr +L_v1: addx %g0,%g0,%o4 ! save cy in register + cmp size,2 ! if size < 2 ... + bl Lend2 ! ... branch to tail code + subcc %g0,%o4,%g0 ! restore cy + + ld [s1_ptr+0],%g4 + addcc size,-10,size + ld [s1_ptr+4],%g1 + ldd [s2_ptr+0],%g2 + blt Lfin1 + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 8 limbs until less than 8 limbs remain */ +Loop1: addxcc %g4,%g2,%o4 + ld [s1_ptr+8],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+12],%g1 + ldd [s2_ptr+8],%g2 + std %o4,[res_ptr+0] + addxcc %g4,%g2,%o4 + ld [s1_ptr+16],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+20],%g1 + ldd [s2_ptr+16],%g2 + std %o4,[res_ptr+8] + addxcc %g4,%g2,%o4 + ld [s1_ptr+24],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+28],%g1 + ldd [s2_ptr+24],%g2 + std %o4,[res_ptr+16] + addxcc %g4,%g2,%o4 + ld [s1_ptr+32],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+36],%g1 + ldd [s2_ptr+32],%g2 + std %o4,[res_ptr+24] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-8,size + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge Loop1 + subcc %g0,%o4,%g0 ! restore cy + +Lfin1: addcc size,8-2,size + blt Lend1 + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: addxcc %g4,%g2,%o4 + ld [s1_ptr+8],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+12],%g1 + ldd [s2_ptr+8],%g2 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-2,size + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge Loope1 + subcc %g0,%o4,%g0 ! restore cy +Lend1: addxcc %g4,%g2,%o4 + addxcc %g1,%g3,%o5 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 ! save cy in register + + andcc size,1,%g0 + be Lret1 + subcc %g0,%o4,%g0 ! restore cy +/* Add last limb */ + ld [s1_ptr+8],%g4 + ld [s2_ptr+8],%g2 + addxcc %g4,%g2,%o4 + st %o4,[res_ptr+8] + +Lret1: retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb + +L1: xor s1_ptr,res_ptr,%g1 + andcc %g1,4,%g0 + bne L2 + nop +! ** V1b ** + mov s2_ptr,%g1 + mov s1_ptr,s2_ptr + b L0 + mov %g1,s1_ptr + +! ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp size,1 + be Ljone + nop + andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 + be L_v2 ! if no, branch + nop +/* Add least significant limb separately to align s1_ptr and s2_ptr */ + ld [s1_ptr],%g4 + add s1_ptr,4,s1_ptr + ld [s2_ptr],%g2 + add s2_ptr,4,s2_ptr + add size,-1,size + addcc %g4,%g2,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr + +L_v2: addx %g0,%g0,%o4 ! save cy in register + addcc size,-8,size + blt Lfin2 + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 8 limbs until less than 8 limbs remain */ +Loop2: ldd [s1_ptr+0],%g2 + ldd [s2_ptr+0],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+0] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+4] + ldd [s1_ptr+8],%g2 + ldd [s2_ptr+8],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+8] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+12] + ldd [s1_ptr+16],%g2 + ldd [s2_ptr+16],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+16] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+20] + ldd [s1_ptr+24],%g2 + ldd [s2_ptr+24],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+24] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+28] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-8,size + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge Loop2 + subcc %g0,%o4,%g0 ! restore cy + +Lfin2: addcc size,8-2,size + blt Lend2 + subcc %g0,%o4,%g0 ! restore cy +Loope2: ldd [s1_ptr+0],%g2 + ldd [s2_ptr+0],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+0] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+4] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-2,size + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge Loope2 + subcc %g0,%o4,%g0 ! restore cy +Lend2: andcc size,1,%g0 + be Lret2 + subcc %g0,%o4,%g0 ! restore cy +/* Add last limb */ +Ljone: ld [s1_ptr],%g4 + ld [s2_ptr],%g2 + addxcc %g4,%g2,%o4 + st %o4,[res_ptr] + +Lret2: retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb diff --git a/gnu/lib/libgmp/mpn/sparc32/addmul_1.S b/gnu/lib/libgmp/mpn/sparc32/addmul_1.S new file mode 100644 index 00000000000..375d25db6b0 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/addmul_1.S @@ -0,0 +1,147 @@ +! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add +! the result to a second limb vector. + +! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_addmul_1) +C_SYMBOL_NAME(__mpn_addmul_1): + ! Make S1_PTR and RES_PTR point at the end of their blocks + ! and put (- 4 x SIZE) in index/loop counter. + sll %o2,2,%o2 + add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval + add %o1,%o2,%o1 + sub %g0,%o2,%o2 + + cmp %o3,0xfff + bgu Large + nop + + ld [%o1+%o2],%o5 + mov 0,%o0 + b L0 + add %o4,-4,%o4 +Loop0: + addcc %o5,%g1,%g1 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g1,[%o4+%o2] +L0: wr %g0,%o3,%y + sra %o5,31,%g2 + and %o3,%g2,%g2 + andcc %g1,0,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,0,%g1 + sra %g1,20,%g4 + sll %g1,12,%g1 + rd %y,%g3 + srl %g3,20,%g3 + or %g1,%g3,%g1 + + addcc %g1,%o0,%g1 + addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb + addcc %o2,4,%o2 ! loop counter + bne Loop0 + ld [%o4+%o2],%o5 + + addcc %o5,%g1,%g1 + addx %o0,%g0,%o0 + retl + st %g1,[%o4+%o2] + + +Large: ld [%o1+%o2],%o5 + mov 0,%o0 + sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 + b L1 + add %o4,-4,%o4 +Loop: + addcc %o5,%g3,%g3 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g3,[%o4+%o2] +L1: wr %g0,%o5,%y + and %o5,%g4,%g2 + andcc %g0,%g0,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%g0,%g1 + rd %y,%g3 + addcc %g3,%o0,%g3 + addx %g2,%g1,%o0 + addcc %o2,4,%o2 + bne Loop + ld [%o4+%o2],%o5 + + addcc %o5,%g3,%g3 + addx %o0,%g0,%o0 + retl + st %g3,[%o4+%o2] diff --git a/gnu/lib/libgmp/mpn/sparc32/lshift.S b/gnu/lib/libgmp/mpn/sparc32/lshift.S new file mode 100644 index 00000000000..4f0595f2fb6 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/lshift.S @@ -0,0 +1,95 @@ +! sparc __mpn_lshift -- + +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! src_ptr %o1 +! size %o2 +! cnt %o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(__mpn_lshift) +C_SYMBOL_NAME(__mpn_lshift): + sll %o2,2,%g1 + add %o1,%g1,%o1 ! make %o1 point at end of src + ld [%o1-4],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o0,%g1,%o0 ! make %o0 point at end of res + add %o2,-1,%o2 + andcc %o2,4-1,%g4 ! number of limbs in first loop + srl %g2,%o5,%g1 ! compute function result + be L0 ! if multiple of 4 limbs, skip first loop + st %g1,[%sp+80] + + sub %o2,%g4,%o2 ! adjust count for main loop + +Loop0: ld [%o1-8],%g3 + add %o0,-4,%o0 + add %o1,-4,%o1 + addcc %g4,-1,%g4 + sll %g2,%o3,%o4 + srl %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + bne Loop0 + st %o4,[%o0+0] + +L0: tst %o2 + be Lend + nop + +Loop: ld [%o1-8],%g3 + add %o0,-16,%o0 + addcc %o2,-4,%o2 + sll %g2,%o3,%o4 + srl %g3,%o5,%g1 + + ld [%o1-12],%g2 + sll %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0+12] + srl %g2,%o5,%g1 + + ld [%o1-16],%g3 + sll %g2,%o3,%o4 + or %g4,%g1,%g4 + st %g4,[%o0+8] + srl %g3,%o5,%g1 + + ld [%o1-20],%g2 + sll %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0+4] + srl %g2,%o5,%g1 + + add %o1,-16,%o1 + or %g4,%g1,%g4 + bne Loop + st %g4,[%o0+0] + +Lend: sll %g2,%o3,%g2 + st %g2,[%o0-4] + retl + ld [%sp+80],%o0 diff --git a/gnu/lib/libgmp/mpn/sparc32/mul_1.S b/gnu/lib/libgmp/mpn/sparc32/mul_1.S new file mode 100644 index 00000000000..142fd8ba2a8 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/mul_1.S @@ -0,0 +1,199 @@ +! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store +! the result in a second limb vector. + +! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +! ADD CODE FOR SMALL MULTIPLIERS! +!1: ld +! st +! +!2: ld ,a +! addxcc a,a,x +! st x, +! +!3_unrolled: +! ld ,a +! addxcc a,a,x1 ! 2a + cy +! addx %g0,%g0,x2 +! addcc a,x1,x ! 3a + c +! st x, +! +! ld ,a +! addxcc a,a,y1 +! addx %g0,%g0,y2 +! addcc a,y1,x +! st x, +! +!4_unrolled: +! ld ,a +! srl a,2,x1 ! 4a +! addxcc y2,x1,x +! sll a,30,x2 +! st x, +! +! ld ,a +! srl a,2,y1 +! addxcc x2,y1,y +! sll a,30,y2 +! st x, +! +!5_unrolled: +! ld ,a +! srl a,2,x1 ! 4a +! addxcc a,x1,x ! 5a + c +! sll a,30,x2 +! addx %g0,x2,x2 +! st x, +! +! ld ,a +! srl a,2,y1 +! addxcc a,y1,x +! sll a,30,y2 +! addx %g0,y2,y2 +! st x, +! +!8_unrolled: +! ld ,a +! srl a,3,x1 ! 8a +! addxcc y2,x1,x +! sll a,29,x2 +! st x, +! +! ld ,a +! srl a,3,y1 +! addxcc x2,y1,y +! sll a,29,y2 +! st x, + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_mul_1) +C_SYMBOL_NAME(__mpn_mul_1): + ! Make S1_PTR and RES_PTR point at the end of their blocks + ! and put (- 4 x SIZE) in index/loop counter. + sll %o2,2,%o2 + add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval + add %o1,%o2,%o1 + sub %g0,%o2,%o2 + + cmp %o3,0xfff + bgu Large + nop + + ld [%o1+%o2],%o5 + mov 0,%o0 + b L0 + add %o4,-4,%o4 +Loop0: + st %g1,[%o4+%o2] +L0: wr %g0,%o3,%y + sra %o5,31,%g2 + and %o3,%g2,%g2 + andcc %g1,0,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,0,%g1 + sra %g1,20,%g4 + sll %g1,12,%g1 + rd %y,%g3 + srl %g3,20,%g3 + or %g1,%g3,%g1 + + addcc %g1,%o0,%g1 + addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb + addcc %o2,4,%o2 ! loop counter + bne,a Loop0 + ld [%o1+%o2],%o5 + + retl + st %g1,[%o4+%o2] + + +Large: ld [%o1+%o2],%o5 + mov 0,%o0 + sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 + b L1 + add %o4,-4,%o4 +Loop: + st %g3,[%o4+%o2] +L1: wr %g0,%o5,%y + and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0 + andcc %g0,%g0,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%g0,%g1 + rd %y,%g3 + addcc %g3,%o0,%g3 + addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb + addcc %o2,4,%o2 ! loop counter + bne,a Loop + ld [%o1+%o2],%o5 + + retl + st %g3,[%o4+%o2] diff --git a/gnu/lib/libgmp/mpn/sparc32/rshift.S b/gnu/lib/libgmp/mpn/sparc32/rshift.S new file mode 100644 index 00000000000..fea4f3b9268 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/rshift.S @@ -0,0 +1,92 @@ +! sparc __mpn_rshift -- + +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! src_ptr %o1 +! size %o2 +! cnt %o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(__mpn_rshift) +C_SYMBOL_NAME(__mpn_rshift): + ld [%o1],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o2,-1,%o2 + andcc %o2,4-1,%g4 ! number of limbs in first loop + sll %g2,%o5,%g1 ! compute function result + be L0 ! if multiple of 4 limbs, skip first loop + st %g1,[%sp+80] + + sub %o2,%g4,%o2 ! adjust count for main loop + +Loop0: ld [%o1+4],%g3 + add %o0,4,%o0 + add %o1,4,%o1 + addcc %g4,-1,%g4 + srl %g2,%o3,%o4 + sll %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + bne Loop0 + st %o4,[%o0-4] + +L0: tst %o2 + be Lend + nop + +Loop: ld [%o1+4],%g3 + add %o0,16,%o0 + addcc %o2,-4,%o2 + srl %g2,%o3,%o4 + sll %g3,%o5,%g1 + + ld [%o1+8],%g2 + srl %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0-16] + sll %g2,%o5,%g1 + + ld [%o1+12],%g3 + srl %g2,%o3,%o4 + or %g4,%g1,%g4 + st %g4,[%o0-12] + sll %g3,%o5,%g1 + + ld [%o1+16],%g2 + srl %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0-8] + sll %g2,%o5,%g1 + + add %o1,16,%o1 + or %g4,%g1,%g4 + bne Loop + st %g4,[%o0-4] + +Lend: srl %g2,%o3,%g2 + st %g2,[%o0-0] + retl + ld [%sp+80],%o0 diff --git a/gnu/lib/libgmp/mpn/sparc32/sub_n.S b/gnu/lib/libgmp/mpn/sparc32/sub_n.S new file mode 100644 index 00000000000..b7a11958e25 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/sub_n.S @@ -0,0 +1,311 @@ +! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +! store difference in a third limb vector. + +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +#define res_ptr %o0 +#define s1_ptr %o1 +#define s2_ptr %o2 +#define size %o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(__mpn_sub_n) +C_SYMBOL_NAME(__mpn_sub_n): + xor s2_ptr,res_ptr,%g1 + andcc %g1,4,%g0 + bne L1 ! branch if alignment differs + nop +! ** V1a ** + andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 + be L_v1 ! if no, branch + nop +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld [s1_ptr],%g4 + add s1_ptr,4,s1_ptr + ld [s2_ptr],%g2 + add s2_ptr,4,s2_ptr + add size,-1,size + subcc %g4,%g2,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr +L_v1: addx %g0,%g0,%o4 ! save cy in register + cmp size,2 ! if size < 2 ... + bl Lend2 ! ... branch to tail code + subcc %g0,%o4,%g0 ! restore cy + + ld [s1_ptr+0],%g4 + addcc size,-10,size + ld [s1_ptr+4],%g1 + ldd [s2_ptr+0],%g2 + blt Lfin1 + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 8 limbs until less than 8 limbs remain */ +Loop1: subxcc %g4,%g2,%o4 + ld [s1_ptr+8],%g4 + subxcc %g1,%g3,%o5 + ld [s1_ptr+12],%g1 + ldd [s2_ptr+8],%g2 + std %o4,[res_ptr+0] + subxcc %g4,%g2,%o4 + ld [s1_ptr+16],%g4 + subxcc %g1,%g3,%o5 + ld [s1_ptr+20],%g1 + ldd [s2_ptr+16],%g2 + std %o4,[res_ptr+8] + subxcc %g4,%g2,%o4 + ld [s1_ptr+24],%g4 + subxcc %g1,%g3,%o5 + ld [s1_ptr+28],%g1 + ldd [s2_ptr+24],%g2 + std %o4,[res_ptr+16] + subxcc %g4,%g2,%o4 + ld [s1_ptr+32],%g4 + subxcc %g1,%g3,%o5 + ld [s1_ptr+36],%g1 + ldd [s2_ptr+32],%g2 + std %o4,[res_ptr+24] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-8,size + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge Loop1 + subcc %g0,%o4,%g0 ! restore cy + +Lfin1: addcc size,8-2,size + blt Lend1 + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: subxcc %g4,%g2,%o4 + ld [s1_ptr+8],%g4 + subxcc %g1,%g3,%o5 + ld [s1_ptr+12],%g1 + ldd [s2_ptr+8],%g2 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-2,size + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge Loope1 + subcc %g0,%o4,%g0 ! restore cy +Lend1: subxcc %g4,%g2,%o4 + subxcc %g1,%g3,%o5 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 ! save cy in register + + andcc size,1,%g0 + be Lret1 + subcc %g0,%o4,%g0 ! restore cy +/* Add last limb */ + ld [s1_ptr+8],%g4 + ld [s2_ptr+8],%g2 + subxcc %g4,%g2,%o4 + st %o4,[res_ptr+8] + +Lret1: retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb + +L1: xor s1_ptr,res_ptr,%g1 + andcc %g1,4,%g0 + bne L2 + nop +! ** V1b ** + andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 + be L_v1b ! if no, branch + nop +/* Add least significant limb separately to align res_ptr and s1_ptr */ + ld [s2_ptr],%g4 + add s2_ptr,4,s2_ptr + ld [s1_ptr],%g2 + add s1_ptr,4,s1_ptr + add size,-1,size + subcc %g2,%g4,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr +L_v1b: addx %g0,%g0,%o4 ! save cy in register + cmp size,2 ! if size < 2 ... + bl Lend2 ! ... branch to tail code + subcc %g0,%o4,%g0 ! restore cy + + ld [s2_ptr+0],%g4 + addcc size,-10,size + ld [s2_ptr+4],%g1 + ldd [s1_ptr+0],%g2 + blt Lfin1b + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 8 limbs until less than 8 limbs remain */ +Loop1b: subxcc %g2,%g4,%o4 + ld [s2_ptr+8],%g4 + subxcc %g3,%g1,%o5 + ld [s2_ptr+12],%g1 + ldd [s1_ptr+8],%g2 + std %o4,[res_ptr+0] + subxcc %g2,%g4,%o4 + ld [s2_ptr+16],%g4 + subxcc %g3,%g1,%o5 + ld [s2_ptr+20],%g1 + ldd [s1_ptr+16],%g2 + std %o4,[res_ptr+8] + subxcc %g2,%g4,%o4 + ld [s2_ptr+24],%g4 + subxcc %g3,%g1,%o5 + ld [s2_ptr+28],%g1 + ldd [s1_ptr+24],%g2 + std %o4,[res_ptr+16] + subxcc %g2,%g4,%o4 + ld [s2_ptr+32],%g4 + subxcc %g3,%g1,%o5 + ld [s2_ptr+36],%g1 + ldd [s1_ptr+32],%g2 + std %o4,[res_ptr+24] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-8,size + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge Loop1b + subcc %g0,%o4,%g0 ! restore cy + +Lfin1b: addcc size,8-2,size + blt Lend1b + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1b:subxcc %g2,%g4,%o4 + ld [s2_ptr+8],%g4 + subxcc %g3,%g1,%o5 + ld [s2_ptr+12],%g1 + ldd [s1_ptr+8],%g2 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-2,size + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge Loope1b + subcc %g0,%o4,%g0 ! restore cy +Lend1b: subxcc %g2,%g4,%o4 + subxcc %g3,%g1,%o5 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 ! save cy in register + + andcc size,1,%g0 + be Lret1b + subcc %g0,%o4,%g0 ! restore cy +/* Add last limb */ + ld [s2_ptr+8],%g4 + ld [s1_ptr+8],%g2 + subxcc %g2,%g4,%o4 + st %o4,[res_ptr+8] + +Lret1b: retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb + +! ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp size,1 + be Ljone + nop + andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 + be L_v2 ! if no, branch + nop +/* Add least significant limb separately to align s1_ptr and s2_ptr */ + ld [s1_ptr],%g4 + add s1_ptr,4,s1_ptr + ld [s2_ptr],%g2 + add s2_ptr,4,s2_ptr + add size,-1,size + subcc %g4,%g2,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr + +L_v2: addx %g0,%g0,%o4 ! save cy in register + addcc size,-8,size + blt Lfin2 + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 8 limbs until less than 8 limbs remain */ +Loop2: ldd [s1_ptr+0],%g2 + ldd [s2_ptr+0],%o4 + subxcc %g2,%o4,%g2 + st %g2,[res_ptr+0] + subxcc %g3,%o5,%g3 + st %g3,[res_ptr+4] + ldd [s1_ptr+8],%g2 + ldd [s2_ptr+8],%o4 + subxcc %g2,%o4,%g2 + st %g2,[res_ptr+8] + subxcc %g3,%o5,%g3 + st %g3,[res_ptr+12] + ldd [s1_ptr+16],%g2 + ldd [s2_ptr+16],%o4 + subxcc %g2,%o4,%g2 + st %g2,[res_ptr+16] + subxcc %g3,%o5,%g3 + st %g3,[res_ptr+20] + ldd [s1_ptr+24],%g2 + ldd [s2_ptr+24],%o4 + subxcc %g2,%o4,%g2 + st %g2,[res_ptr+24] + subxcc %g3,%o5,%g3 + st %g3,[res_ptr+28] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-8,size + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge Loop2 + subcc %g0,%o4,%g0 ! restore cy + +Lfin2: addcc size,8-2,size + blt Lend2 + subcc %g0,%o4,%g0 ! restore cy +Loope2: ldd [s1_ptr+0],%g2 + ldd [s2_ptr+0],%o4 + subxcc %g2,%o4,%g2 + st %g2,[res_ptr+0] + subxcc %g3,%o5,%g3 + st %g3,[res_ptr+4] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-2,size + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge Loope2 + subcc %g0,%o4,%g0 ! restore cy +Lend2: andcc size,1,%g0 + be Lret2 + subcc %g0,%o4,%g0 ! restore cy +/* Add last limb */ +Ljone: ld [s1_ptr],%g4 + ld [s2_ptr],%g2 + subxcc %g4,%g2,%o4 + st %o4,[res_ptr] + +Lret2: retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb diff --git a/gnu/lib/libgmp/mpn/sparc32/submul_1.S b/gnu/lib/libgmp/mpn/sparc32/submul_1.S new file mode 100644 index 00000000000..a8ebd501a7c --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/submul_1.S @@ -0,0 +1,147 @@ +! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract +! the result from a second limb vector. + +! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_submul_1) +C_SYMBOL_NAME(__mpn_submul_1): + ! Make S1_PTR and RES_PTR point at the end of their blocks + ! and put (- 4 x SIZE) in index/loop counter. + sll %o2,2,%o2 + add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval + add %o1,%o2,%o1 + sub %g0,%o2,%o2 + + cmp %o3,0xfff + bgu Large + nop + + ld [%o1+%o2],%o5 + mov 0,%o0 + b L0 + add %o4,-4,%o4 +Loop0: + subcc %o5,%g1,%g1 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g1,[%o4+%o2] +L0: wr %g0,%o3,%y + sra %o5,31,%g2 + and %o3,%g2,%g2 + andcc %g1,0,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,0,%g1 + sra %g1,20,%g4 + sll %g1,12,%g1 + rd %y,%g3 + srl %g3,20,%g3 + or %g1,%g3,%g1 + + addcc %g1,%o0,%g1 + addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb + addcc %o2,4,%o2 ! loop counter + bne Loop0 + ld [%o4+%o2],%o5 + + subcc %o5,%g1,%g1 + addx %o0,%g0,%o0 + retl + st %g1,[%o4+%o2] + + +Large: ld [%o1+%o2],%o5 + mov 0,%o0 + sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 + b L1 + add %o4,-4,%o4 +Loop: + subcc %o5,%g3,%g3 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g3,[%o4+%o2] +L1: wr %g0,%o5,%y + and %o5,%g4,%g2 + andcc %g0,%g0,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%g0,%g1 + rd %y,%g3 + addcc %g3,%o0,%g3 + addx %g2,%g1,%o0 + addcc %o2,4,%o2 + bne Loop + ld [%o4+%o2],%o5 + + subcc %o5,%g3,%g3 + addx %o0,%g0,%o0 + retl + st %g3,[%o4+%o2] diff --git a/gnu/lib/libgmp/mpn/sparc32/udiv_fp.S b/gnu/lib/libgmp/mpn/sparc32/udiv_fp.S new file mode 100644 index 00000000000..d11227dff4b --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/udiv_fp.S @@ -0,0 +1,145 @@ +! SPARC v7 __udiv_qrnnd division support, used from longlong.h. +! This is for v7 CPUs with a floating-point unit. + +! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! rem_ptr i0 +! n1 i1 +! n0 i2 +! d i3 + +#include "sysdep.h" +#undef ret /* Kludge for glibc */ + + .text + .align 8 +LC0: .double 0r4294967296 +LC1: .double 0r2147483648 + + .align 4 + .global C_SYMBOL_NAME(__udiv_qrnnd) +C_SYMBOL_NAME(__udiv_qrnnd): + !#PROLOGUE# 0 + save %sp,-104,%sp + !#PROLOGUE# 1 + st %i1,[%fp-8] + ld [%fp-8],%f10 + sethi %hi(LC0),%o7 + fitod %f10,%f4 + ldd [%o7+%lo(LC0)],%f8 + cmp %i1,0 + bge L248 + mov %i0,%i5 + faddd %f4,%f8,%f4 +L248: + st %i2,[%fp-8] + ld [%fp-8],%f10 + fmuld %f4,%f8,%f6 + cmp %i2,0 + bge L249 + fitod %f10,%f2 + faddd %f2,%f8,%f2 +L249: + st %i3,[%fp-8] + faddd %f6,%f2,%f2 + ld [%fp-8],%f10 + cmp %i3,0 + bge L250 + fitod %f10,%f4 + faddd %f4,%f8,%f4 +L250: + fdivd %f2,%f4,%f2 + sethi %hi(LC1),%o7 + ldd [%o7+%lo(LC1)],%f4 + fcmped %f2,%f4 + nop + fbge,a L251 + fsubd %f2,%f4,%f2 + fdtoi %f2,%f2 + st %f2,[%fp-8] + b L252 + ld [%fp-8],%i4 +L251: + fdtoi %f2,%f2 + st %f2,[%fp-8] + ld [%fp-8],%i4 + sethi %hi(-2147483648),%g2 + xor %i4,%g2,%i4 +L252: + wr %g0,%i4,%y + sra %i3,31,%g2 + and %i4,%g2,%g2 + andcc %g0,0,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,0,%g1 + add %g1,%g2,%i0 + rd %y,%g3 + subcc %i2,%g3,%o7 + subxcc %i1,%i0,%g0 + be L253 + cmp %o7,%i3 + + add %i4,-1,%i0 + add %o7,%i3,%o7 + st %o7,[%i5] + ret + restore +L253: + blu L246 + mov %i4,%i0 + add %i4,1,%i0 + sub %o7,%i3,%o7 +L246: + st %o7,[%i5] + ret + restore diff --git a/gnu/lib/libgmp/mpn/sparc32/udiv_nfp.S b/gnu/lib/libgmp/mpn/sparc32/udiv_nfp.S new file mode 100644 index 00000000000..118d8a4a26c --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/udiv_nfp.S @@ -0,0 +1,188 @@ +! SPARC v7 __udiv_qrnnd division support, used from longlong.h. +! This is for v7 CPUs without a floating-point unit. + +! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! rem_ptr o0 +! n1 o1 +! n0 o2 +! d o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(__udiv_qrnnd) +C_SYMBOL_NAME(__udiv_qrnnd): + tst %o3 + bneg Largedivisor + mov 8,%g1 + + b Lp1 + addxcc %o2,%o2,%o2 + +Lplop: bcc Ln1 + addxcc %o2,%o2,%o2 +Lp1: addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc Ln2 + addxcc %o2,%o2,%o2 +Lp2: addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc Ln3 + addxcc %o2,%o2,%o2 +Lp3: addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc Ln4 + addxcc %o2,%o2,%o2 +Lp4: addx %o1,%o1,%o1 + addcc %g1,-1,%g1 + bne Lplop + subcc %o1,%o3,%o4 + bcc Ln5 + addxcc %o2,%o2,%o2 +Lp5: st %o1,[%o0] + retl + xnor %g0,%o2,%o0 + +Lnlop: bcc Lp1 + addxcc %o2,%o2,%o2 +Ln1: addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc Lp2 + addxcc %o2,%o2,%o2 +Ln2: addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc Lp3 + addxcc %o2,%o2,%o2 +Ln3: addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc Lp4 + addxcc %o2,%o2,%o2 +Ln4: addx %o4,%o4,%o4 + addcc %g1,-1,%g1 + bne Lnlop + subcc %o4,%o3,%o1 + bcc Lp5 + addxcc %o2,%o2,%o2 +Ln5: st %o4,[%o0] + retl + xnor %g0,%o2,%o0 + +Largedivisor: + and %o2,1,%o5 ! %o5 = n0 & 1 + + srl %o2,1,%o2 + sll %o1,31,%g2 + or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1) + srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1) + + and %o3,1,%g2 + srl %o3,1,%g3 ! %g3 = floor(d / 2) + add %g3,%g2,%g3 ! %g3 = ceil(d / 2) + + b LLp1 + addxcc %o2,%o2,%o2 + +LLplop: bcc LLn1 + addxcc %o2,%o2,%o2 +LLp1: addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc LLn2 + addxcc %o2,%o2,%o2 +LLp2: addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc LLn3 + addxcc %o2,%o2,%o2 +LLp3: addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc LLn4 + addxcc %o2,%o2,%o2 +LLp4: addx %o1,%o1,%o1 + addcc %g1,-1,%g1 + bne LLplop + subcc %o1,%g3,%o4 + bcc LLn5 + addxcc %o2,%o2,%o2 +LLp5: add %o1,%o1,%o1 ! << 1 + tst %g2 + bne Oddp + add %o5,%o1,%o1 + st %o1,[%o0] + retl + xnor %g0,%o2,%o0 + +LLnlop: bcc LLp1 + addxcc %o2,%o2,%o2 +LLn1: addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc LLp2 + addxcc %o2,%o2,%o2 +LLn2: addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc LLp3 + addxcc %o2,%o2,%o2 +LLn3: addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc LLp4 + addxcc %o2,%o2,%o2 +LLn4: addx %o4,%o4,%o4 + addcc %g1,-1,%g1 + bne LLnlop + subcc %o4,%g3,%o1 + bcc LLp5 + addxcc %o2,%o2,%o2 +LLn5: add %o4,%o4,%o4 ! << 1 + tst %g2 + bne Oddn + add %o5,%o4,%o4 + st %o4,[%o0] + retl + xnor %g0,%o2,%o0 + +Oddp: xnor %g0,%o2,%o2 + ! q' in %o2. r' in %o1 + addcc %o1,%o2,%o1 + bcc LLp6 + addx %o2,0,%o2 + sub %o1,%o3,%o1 +LLp6: subcc %o1,%o3,%g0 + bcs LLp7 + subx %o2,-1,%o2 + sub %o1,%o3,%o1 +LLp7: st %o1,[%o0] + retl + mov %o2,%o0 + +Oddn: xnor %g0,%o2,%o2 + ! q' in %o2. r' in %o4 + addcc %o4,%o2,%o4 + bcc LLn6 + addx %o2,0,%o2 + sub %o4,%o3,%o4 +LLn6: subcc %o4,%o3,%g0 + bcs LLn7 + subx %o2,-1,%o2 + sub %o4,%o3,%o4 +LLn7: st %o4,[%o0] + retl + mov %o2,%o0 diff --git a/gnu/lib/libgmp/mpn/sparc32/v8/addmul_1.S b/gnu/lib/libgmp/mpn/sparc32/v8/addmul_1.S new file mode 100644 index 00000000000..fb9ea7cf0eb --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/v8/addmul_1.S @@ -0,0 +1,124 @@ +! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and +! add the result to a second limb vector. + +! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_addmul_1) +C_SYMBOL_NAME(__mpn_addmul_1): + orcc %g0,%g0,%g2 + ld [%o1+0],%o4 ! 1 + + sll %o2,4,%g1 + and %g1,(4-1)<<4,%g1 +#if PIC + mov %o7,%g4 ! Save return address register + call 1f + add %o7,LL-1f,%g3 +1: mov %g4,%o7 ! Restore return address register +#else + sethi %hi(LL),%g3 + or %g3,%lo(LL),%g3 +#endif + jmp %g3+%g1 + nop +LL: +LL00: add %o0,-4,%o0 + b Loop00 /* 4, 8, 12, ... */ + add %o1,-4,%o1 + nop +LL01: b Loop01 /* 1, 5, 9, ... */ + nop + nop + nop +LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */ + b Loop10 + add %o1,4,%o1 + nop +LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */ + b Loop11 + add %o1,-8,%o1 + nop + +1: addcc %g3,%g2,%g3 ! 1 + ld [%o1+4],%o4 ! 2 + rd %y,%g2 ! 1 + addx %g0,%g2,%g2 + ld [%o0+0],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+0] ! 1 +Loop00: umul %o4,%o3,%g3 ! 2 + ld [%o0+4],%g1 ! 2 + addxcc %g3,%g2,%g3 ! 2 + ld [%o1+8],%o4 ! 3 + rd %y,%g2 ! 2 + addx %g0,%g2,%g2 + nop + addcc %g1,%g3,%g3 + st %g3,[%o0+4] ! 2 +Loop11: umul %o4,%o3,%g3 ! 3 + addxcc %g3,%g2,%g3 ! 3 + ld [%o1+12],%o4 ! 4 + rd %y,%g2 ! 3 + add %o1,16,%o1 + addx %g0,%g2,%g2 + ld [%o0+8],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+8] ! 3 +Loop10: umul %o4,%o3,%g3 ! 4 + addxcc %g3,%g2,%g3 ! 4 + ld [%o1+0],%o4 ! 1 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 + ld [%o0+12],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+12] ! 4 + add %o0,16,%o0 + addx %g0,%g2,%g2 +Loop01: addcc %o2,-4,%o2 + bg 1b + umul %o4,%o3,%g3 ! 1 + + addcc %g3,%g2,%g3 ! 4 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 + ld [%o0+0],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+0] ! 4 + addx %g0,%g2,%o0 + + retl + nop + + +! umul, ld, addxcc, rd, st + +! umul, ld, addxcc, rd, ld, addcc, st, addx + diff --git a/gnu/lib/libgmp/mpn/sparc32/v8/mul_1.S b/gnu/lib/libgmp/mpn/sparc32/v8/mul_1.S new file mode 100644 index 00000000000..b641feb4530 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/v8/mul_1.S @@ -0,0 +1,99 @@ +! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and +! store the product in a second limb vector. + +! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 8 + .global C_SYMBOL_NAME(__mpn_mul_1) +C_SYMBOL_NAME(__mpn_mul_1): + sll %o2,4,%g1 + and %g1,(4-1)<<4,%g1 +#if PIC + mov %o7,%g4 ! Save return address register + call 1f + add %o7,LL-1f,%g3 +1: mov %g4,%o7 ! Restore return address register +#else + sethi %hi(LL),%g3 + or %g3,%lo(LL),%g3 +#endif + jmp %g3+%g1 + ld [%o1+0],%o4 ! 1 +LL: +LL00: add %o0,-4,%o0 + add %o1,-4,%o1 + b Loop00 /* 4, 8, 12, ... */ + orcc %g0,%g0,%g2 +LL01: b Loop01 /* 1, 5, 9, ... */ + orcc %g0,%g0,%g2 + nop + nop +LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */ + add %o1,4,%o1 + b Loop10 + orcc %g0,%g0,%g2 + nop +LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */ + add %o1,-8,%o1 + b Loop11 + orcc %g0,%g0,%g2 + +Loop: addcc %g3,%g2,%g3 ! 1 + ld [%o1+4],%o4 ! 2 + st %g3,[%o0+0] ! 1 + rd %y,%g2 ! 1 +Loop00: umul %o4,%o3,%g3 ! 2 + addxcc %g3,%g2,%g3 ! 2 + ld [%o1+8],%o4 ! 3 + st %g3,[%o0+4] ! 2 + rd %y,%g2 ! 2 +Loop11: umul %o4,%o3,%g3 ! 3 + addxcc %g3,%g2,%g3 ! 3 + ld [%o1+12],%o4 ! 4 + add %o1,16,%o1 + st %g3,[%o0+8] ! 3 + rd %y,%g2 ! 3 +Loop10: umul %o4,%o3,%g3 ! 4 + addxcc %g3,%g2,%g3 ! 4 + ld [%o1+0],%o4 ! 1 + st %g3,[%o0+12] ! 4 + add %o0,16,%o0 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 +Loop01: addcc %o2,-4,%o2 + bg Loop + umul %o4,%o3,%g3 ! 1 + + addcc %g3,%g2,%g3 ! 4 + st %g3,[%o0+0] ! 4 + rd %y,%g2 ! 4 + + retl + addx %g0,%g2,%o0 diff --git a/gnu/lib/libgmp/mpn/sparc32/v8/submul_1.S b/gnu/lib/libgmp/mpn/sparc32/v8/submul_1.S new file mode 100644 index 00000000000..e40119d0119 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/v8/submul_1.S @@ -0,0 +1,58 @@ +! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and +! subtract the result from a second limb vector. + +! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_submul_1) +C_SYMBOL_NAME(__mpn_submul_1): + sub %g0,%o2,%o2 ! negate ... + sll %o2,2,%o2 ! ... and scale size + sub %o1,%o2,%o1 ! o1 is offset s1_ptr + sub %o0,%o2,%g1 ! g1 is offset res_ptr + + mov 0,%o0 ! clear cy_limb + +Loop: ld [%o1+%o2],%o4 + ld [%g1+%o2],%g2 + umul %o4,%o3,%o5 + rd %y,%g3 + addcc %o5,%o0,%o5 + addx %g3,0,%o0 + subcc %g2,%o5,%g2 + addx %o0,0,%o0 + st %g2,[%g1+%o2] + + addcc %o2,4,%o2 + bne Loop + nop + + retl + nop diff --git a/gnu/lib/libgmp/mpn/sparc32/v8/supersparc/udiv.S b/gnu/lib/libgmp/mpn/sparc32/v8/supersparc/udiv.S new file mode 100644 index 00000000000..ed688ee1d24 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc32/v8/supersparc/udiv.S @@ -0,0 +1,109 @@ +! SuperSPARC __udiv_qrnnd division support, used from longlong.h. +! This is for SuperSPARC only, to compensate for its semi-functional +! udiv instruction. + +! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! rem_ptr i0 +! n1 i1 +! n0 i2 +! d i3 + +#include "sysdep.h" +#undef ret /* Kludge for glibc */ + + .text + .align 8 +LC0: .double 0r4294967296 +LC1: .double 0r2147483648 + + .align 4 + .global C_SYMBOL_NAME(__udiv_qrnnd) +C_SYMBOL_NAME(__udiv_qrnnd): + !#PROLOGUE# 0 + save %sp,-104,%sp + !#PROLOGUE# 1 + st %i1,[%fp-8] + ld [%fp-8],%f10 + sethi %hi(LC0),%o7 + fitod %f10,%f4 + ldd [%o7+%lo(LC0)],%f8 + cmp %i1,0 + bge L248 + mov %i0,%i5 + faddd %f4,%f8,%f4 +L248: + st %i2,[%fp-8] + ld [%fp-8],%f10 + fmuld %f4,%f8,%f6 + cmp %i2,0 + bge L249 + fitod %f10,%f2 + faddd %f2,%f8,%f2 +L249: + st %i3,[%fp-8] + faddd %f6,%f2,%f2 + ld [%fp-8],%f10 + cmp %i3,0 + bge L250 + fitod %f10,%f4 + faddd %f4,%f8,%f4 +L250: + fdivd %f2,%f4,%f2 + sethi %hi(LC1),%o7 + ldd [%o7+%lo(LC1)],%f4 + fcmped %f2,%f4 + nop + fbge,a L251 + fsubd %f2,%f4,%f2 + fdtoi %f2,%f2 + st %f2,[%fp-8] + b L252 + ld [%fp-8],%i4 +L251: + fdtoi %f2,%f2 + st %f2,[%fp-8] + ld [%fp-8],%i4 + sethi %hi(-2147483648),%g2 + xor %i4,%g2,%i4 +L252: + umul %i3,%i4,%g3 + rd %y,%i0 + subcc %i2,%g3,%o7 + subxcc %i1,%i0,%g0 + be L253 + cmp %o7,%i3 + + add %i4,-1,%i0 + add %o7,%i3,%o7 + st %o7,[%i5] + ret + restore +L253: + blu L246 + mov %i4,%i0 + add %i4,1,%i0 + sub %o7,%i3,%o7 +L246: + st %o7,[%i5] + ret + restore diff --git a/gnu/lib/libgmp/mpn/sparc64/add_n.s b/gnu/lib/libgmp/mpn/sparc64/add_n.s new file mode 100644 index 00000000000..01d1f49564b --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc64/add_n.s @@ -0,0 +1,58 @@ +! SPARC v9 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +! sum in a third limb vector. + +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! s1_ptr %o1 +! s2_ptr %o2 +! size %o3 + +.section ".text" + .align 4 + .global __mpn_add_n + .type __mpn_add_n,#function + .proc 04 +__mpn_add_n: + sub %g0,%o3,%g3 + sllx %o3,3,%g1 + add %o1,%g1,%o1 ! make s1_ptr point at end + add %o2,%g1,%o2 ! make s2_ptr point at end + add %o0,%g1,%o0 ! make res_ptr point at end + mov 0,%o4 ! clear carry variable + sllx %g3,3,%o5 ! compute initial address index + +.Loop: ldx [%o2+%o5],%g1 ! load s2 limb + add %g3,1,%g3 ! increment loop count + ldx [%o1+%o5],%g2 ! load s1 limb + addcc %g1,%o4,%g1 ! add s2 limb and carry variable + movcc %xcc,0,%o4 ! if carry-out, o4 was 1; clear it + addcc %g1,%g2,%g1 ! add s1 limb to sum + stx %g1,[%o0+%o5] ! store result + add %o5,8,%o5 ! increment address index + brnz,pt %g3,.Loop + movcs %xcc,1,%o4 ! if s1 add gave carry, record it + + retl + mov %o4,%o0 +.LLfe1: + .size __mpn_add_n,.LLfe1-__mpn_add_n diff --git a/gnu/lib/libgmp/mpn/sparc64/addmul_1.s b/gnu/lib/libgmp/mpn/sparc64/addmul_1.s new file mode 100644 index 00000000000..8d86390808d --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc64/addmul_1.s @@ -0,0 +1,89 @@ +! SPARC v9 __mpn_addmul_1 -- Multiply a limb vector with a single limb and +! add the product to a second limb vector. + +! Copyright (C) 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +.section ".text" + .align 4 + .global __mpn_addmul_1 + .type __mpn_addmul_1,#function + .proc 016 +__mpn_addmul_1: + !#PROLOGUE# 0 + save %sp,-160,%sp + !#PROLOGUE# 1 + sub %g0,%i2,%o7 + sllx %o7,3,%g5 + sub %i1,%g5,%o3 + sub %i0,%g5,%o4 + mov 0,%o0 ! zero cy_limb + + srl %i3,0,%o1 ! extract low 32 bits of s2_limb + srlx %i3,32,%i3 ! extract high 32 bits of s2_limb + mov 1,%o2 + sllx %o2,32,%o2 ! o2 = 0x100000000 + + ! hi ! + ! mid-1 ! + ! mid-2 ! + ! lo ! +.Loop: + sllx %o7,3,%g1 + ldx [%o3+%g1],%g5 + srl %g5,0,%i0 ! zero hi bits + srlx %g5,32,%g5 + mulx %o1,%i0,%i4 ! lo product + mulx %i3,%i0,%i1 ! mid-1 product + mulx %o1,%g5,%l2 ! mid-2 product + mulx %i3,%g5,%i5 ! hi product + srlx %i4,32,%i0 ! extract high 32 bits of lo product... + add %i1,%i0,%i1 ! ...and add it to the mid-1 product + addcc %i1,%l2,%i1 ! add mid products + mov 0,%l0 ! we need the carry from that add... + movcs %xcc,%o2,%l0 ! ...compute it and... + add %i5,%l0,%i5 ! ...add to bit 32 of the hi product + sllx %i1,32,%i0 ! align low bits of mid product + srl %i4,0,%g5 ! zero high 32 bits of lo product + add %i0,%g5,%i0 ! combine into low 64 bits of result + srlx %i1,32,%i1 ! extract high bits of mid product... + add %i5,%i1,%i1 ! ...and add them to the high result + addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result + mov 0,%g5 + movcs %xcc,1,%g5 + add %o7,1,%o7 + ldx [%o4+%g1],%l1 + addcc %l1,%i0,%i0 + movcs %xcc,1,%g5 + stx %i0,[%o4+%g1] + brnz %o7,.Loop + add %i1,%g5,%o0 ! compute new cy_limb + + mov %o0,%i0 + ret + restore +.LLfe1: + .size __mpn_addmul_1,.LLfe1-__mpn_addmul_1 diff --git a/gnu/lib/libgmp/mpn/sparc64/gmp-mparam.h b/gnu/lib/libgmp/mpn/sparc64/gmp-mparam.h new file mode 100644 index 00000000000..a3c66974ded --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc64/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/gnu/lib/libgmp/mpn/sparc64/lshift.s b/gnu/lib/libgmp/mpn/sparc64/lshift.s new file mode 100644 index 00000000000..ad1f667fa35 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc64/lshift.s @@ -0,0 +1,96 @@ +! SPARC v9 __mpn_lshift -- + +! Copyright (C) 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! src_ptr %o1 +! size %o2 +! cnt %o3 + +.section ".text" + .align 4 + .global __mpn_lshift + .type __mpn_lshift,#function + .proc 04 +__mpn_lshift: + sllx %o2,3,%g1 + add %o1,%g1,%o1 ! make %o1 point at end of src + ldx [%o1-8],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o0,%g1,%o0 ! make %o0 point at end of res + add %o2,-1,%o2 + and %o2,4-1,%g4 ! number of limbs in first loop + srlx %g2,%o5,%g1 ! compute function result + brz,pn %g4,.L0 ! if multiple of 4 limbs, skip first loop + stx %g1,[%sp+80] + + sub %o2,%g4,%o2 ! adjust count for main loop + +.Loop0: ldx [%o1-16],%g3 + add %o0,-8,%o0 + add %o1,-8,%o1 + add %g4,-1,%g4 + sllx %g2,%o3,%o4 + srlx %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + brnz,pt %g4,.Loop0 + stx %o4,[%o0+0] + +.L0: brz,pn %o2,.Lend + nop + +.Loop: ldx [%o1-16],%g3 + add %o0,-32,%o0 + add %o2,-4,%o2 + sllx %g2,%o3,%o4 + srlx %g3,%o5,%g1 + + ldx [%o1-24],%g2 + sllx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0+24] + srlx %g2,%o5,%g1 + + ldx [%o1-32],%g3 + sllx %g2,%o3,%o4 + or %g4,%g1,%g4 + stx %g4,[%o0+16] + srlx %g3,%o5,%g1 + + ldx [%o1-40],%g2 + sllx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0+8] + srlx %g2,%o5,%g1 + + add %o1,-32,%o1 + or %g4,%g1,%g4 + brnz,pt %o2,.Loop + stx %g4,[%o0+0] + +.Lend: sllx %g2,%o3,%g2 + stx %g2,[%o0-8] + retl + ldx [%sp+80],%o0 +.LLfe1: + .size __mpn_lshift,.LLfe1-__mpn_lshift diff --git a/gnu/lib/libgmp/mpn/sparc64/mul_1.s b/gnu/lib/libgmp/mpn/sparc64/mul_1.s new file mode 100644 index 00000000000..91d6eb01b83 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc64/mul_1.s @@ -0,0 +1,86 @@ +! SPARC v9 __mpn_mul_1 -- Multiply a limb vector with a single limb and +! store the product in a second limb vector. + +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +.section ".text" + .align 4 + .global __mpn_mul_1 + .type __mpn_mul_1,#function + .proc 016 +__mpn_mul_1: + !#PROLOGUE# 0 + save %sp,-160,%sp + !#PROLOGUE# 1 + sub %g0,%i2,%o7 + sllx %o7,3,%g5 + sub %i1,%g5,%o3 + sub %i0,%g5,%o4 + mov 0,%o0 ! zero cy_limb + + srl %i3,0,%o1 ! extract low 32 bits of s2_limb + srlx %i3,32,%i3 ! extract high 32 bits of s2_limb + mov 1,%o2 + sllx %o2,32,%o2 ! o2 = 0x100000000 + + ! hi ! + ! mid-1 ! + ! mid-2 ! + ! lo ! +.Loop: + sllx %o7,3,%g1 + ldx [%o3+%g1],%g5 + srl %g5,0,%i0 ! zero hi bits + srlx %g5,32,%g5 + mulx %o1,%i0,%i4 ! lo product + mulx %i3,%i0,%i1 ! mid-1 product + mulx %o1,%g5,%l2 ! mid-2 product + mulx %i3,%g5,%i5 ! hi product + srlx %i4,32,%i0 ! extract high 32 bits of lo product... + add %i1,%i0,%i1 ! ...and add it to the mid-1 product + addcc %i1,%l2,%i1 ! add mid products + mov 0,%l0 ! we need the carry from that add... + movcs %xcc,%o2,%l0 ! ...compute it and... + add %i5,%l0,%i5 ! ...add to bit 32 of the hi product + sllx %i1,32,%i0 ! align low bits of mid product + srl %i4,0,%g5 ! zero high 32 bits of lo product + add %i0,%g5,%i0 ! combine into low 64 bits of result + srlx %i1,32,%i1 ! extract high bits of mid product... + add %i5,%i1,%i1 ! ...and add them to the high result + addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result + mov 0,%g5 + movcs %xcc,1,%g5 + add %o7,1,%o7 + stx %i0,[%o4+%g1] + brnz %o7,.Loop + add %i1,%g5,%o0 ! compute new cy_limb + + mov %o0,%i0 + ret + restore +.LLfe1: + .size __mpn_mul_1,.LLfe1-__mpn_mul_1 diff --git a/gnu/lib/libgmp/mpn/sparc64/rshift.s b/gnu/lib/libgmp/mpn/sparc64/rshift.s new file mode 100644 index 00000000000..ff6a3801602 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc64/rshift.s @@ -0,0 +1,93 @@ +! SPARC v9 __mpn_rshift -- + +! Copyright (C) 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! src_ptr %o1 +! size %o2 +! cnt %o3 + +.section ".text" + .align 4 + .global __mpn_rshift + .type __mpn_rshift,#function + .proc 04 +__mpn_rshift: + ldx [%o1],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o2,-1,%o2 + and %o2,4-1,%g4 ! number of limbs in first loop + sllx %g2,%o5,%g1 ! compute function result + brz,pn %g4,.L0 ! if multiple of 4 limbs, skip first loop + stx %g1,[%sp+80] + + sub %o2,%g4,%o2 ! adjust count for main loop + +.Loop0: ldx [%o1+8],%g3 + add %o0,8,%o0 + add %o1,8,%o1 + add %g4,-1,%g4 + srlx %g2,%o3,%o4 + sllx %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + brnz,pt %g4,.Loop0 + stx %o4,[%o0-8] + +.L0: brz,pn %o2,.Lend + nop + +.Loop: ldx [%o1+8],%g3 + add %o0,32,%o0 + add %o2,-4,%o2 + srlx %g2,%o3,%o4 + sllx %g3,%o5,%g1 + + ldx [%o1+16],%g2 + srlx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0-32] + sllx %g2,%o5,%g1 + + ldx [%o1+24],%g3 + srlx %g2,%o3,%o4 + or %g4,%g1,%g4 + stx %g4,[%o0-24] + sllx %g3,%o5,%g1 + + ldx [%o1+32],%g2 + srlx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0-16] + sllx %g2,%o5,%g1 + + add %o1,32,%o1 + or %g4,%g1,%g4 + brnz %o2,.Loop + stx %g4,[%o0-8] + +.Lend: srlx %g2,%o3,%g2 + stx %g2,[%o0-0] + retl + ldx [%sp+80],%o0 +.LLfe1: + .size __mpn_rshift,.LLfe1-__mpn_rshift diff --git a/gnu/lib/libgmp/mpn/sparc64/sub_n.s b/gnu/lib/libgmp/mpn/sparc64/sub_n.s new file mode 100644 index 00000000000..d4842b8cdde --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc64/sub_n.s @@ -0,0 +1,58 @@ +! SPARC v9 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +! store difference in a third limb vector. + +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! s1_ptr %o1 +! s2_ptr %o2 +! size %o3 + +.section ".text" + .align 4 + .global __mpn_sub_n + .type __mpn_sub_n,#function + .proc 04 +__mpn_sub_n: + sub %g0,%o3,%g3 + sllx %o3,3,%g1 + add %o1,%g1,%o1 ! make s1_ptr point at end + add %o2,%g1,%o2 ! make s2_ptr point at end + add %o0,%g1,%o0 ! make res_ptr point at end + mov 0,%o4 ! clear carry variable + sllx %g3,3,%o5 ! compute initial address index + +.Loop: ldx [%o2+%o5],%g1 ! load s2 limb + add %g3,1,%g3 ! increment loop count + ldx [%o1+%o5],%g2 ! load s1 limb + addcc %g1,%o4,%g1 ! add s2 limb and carry variable + movcc %xcc,0,%o4 ! if carry-out, o4 was 1; clear it + subcc %g1,%g2,%g1 ! subtract s1 limb from sum + stx %g1,[%o0+%o5] ! store result + add %o5,8,%o5 ! increment address index + brnz,pt %g3,.Loop + movcs %xcc,1,%o4 ! if s1 subtract gave carry, record it + + retl + mov %o4,%o0 +.LLfe1: + .size __mpn_sub_n,.LLfe1-__mpn_sub_n diff --git a/gnu/lib/libgmp/mpn/sparc64/submul_1.s b/gnu/lib/libgmp/mpn/sparc64/submul_1.s new file mode 100644 index 00000000000..e7962434708 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sparc64/submul_1.s @@ -0,0 +1,89 @@ +! SPARC v9 __mpn_submul_1 -- Multiply a limb vector with a single limb and +! subtract the product from a second limb vector. + +! Copyright (C) 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +.section ".text" + .align 4 + .global __mpn_submul_1 + .type __mpn_submul_1,#function + .proc 016 +__mpn_submul_1: + !#PROLOGUE# 0 + save %sp,-160,%sp + !#PROLOGUE# 1 + sub %g0,%i2,%o7 + sllx %o7,3,%g5 + sub %i1,%g5,%o3 + sub %i0,%g5,%o4 + mov 0,%o0 ! zero cy_limb + + srl %i3,0,%o1 ! extract low 32 bits of s2_limb + srlx %i3,32,%i3 ! extract high 32 bits of s2_limb + mov 1,%o2 + sllx %o2,32,%o2 ! o2 = 0x100000000 + + ! hi ! + ! mid-1 ! + ! mid-2 ! + ! lo ! +.Loop: + sllx %o7,3,%g1 + ldx [%o3+%g1],%g5 + srl %g5,0,%i0 ! zero hi bits + srlx %g5,32,%g5 + mulx %o1,%i0,%i4 ! lo product + mulx %i3,%i0,%i1 ! mid-1 product + mulx %o1,%g5,%l2 ! mid-2 product + mulx %i3,%g5,%i5 ! hi product + srlx %i4,32,%i0 ! extract high 32 bits of lo product... + add %i1,%i0,%i1 ! ...and add it to the mid-1 product + addcc %i1,%l2,%i1 ! add mid products + mov 0,%l0 ! we need the carry from that add... + movcs %xcc,%o2,%l0 ! ...compute it and... + add %i5,%l0,%i5 ! ...add to bit 32 of the hi product + sllx %i1,32,%i0 ! align low bits of mid product + srl %i4,0,%g5 ! zero high 32 bits of lo product + add %i0,%g5,%i0 ! combine into low 64 bits of result + srlx %i1,32,%i1 ! extract high bits of mid product... + add %i5,%i1,%i1 ! ...and add them to the high result + addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result + mov 0,%g5 + movcs %xcc,1,%g5 + add %o7,1,%o7 + ldx [%o4+%g1],%l1 + subcc %l1,%i0,%i0 + movcs %xcc,1,%g5 + stx %i0,[%o4+%g1] + brnz %o7,.Loop + add %i1,%g5,%o0 ! compute new cy_limb + + mov %o0,%i0 + ret + restore +.LLfe1: + .size __mpn_submul_1,.LLfe1-__mpn_submul_1 diff --git a/gnu/lib/libgmp/mpn/sysv.h b/gnu/lib/libgmp/mpn/sysv.h new file mode 100644 index 00000000000..87c250902a5 --- /dev/null +++ b/gnu/lib/libgmp/mpn/sysv.h @@ -0,0 +1 @@ +#define C_SYMBOL_NAME(name) name diff --git a/gnu/lib/libgmp/mpn/tests/add_n.c b/gnu/lib/libgmp/mpn/tests/add_n.c new file mode 100644 index 00000000000..c27d34710a1 --- /dev/null +++ b/gnu/lib/libgmp/mpn/tests/add_n.c @@ -0,0 +1,211 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" + +#ifndef USG +#include <sys/time.h> +#include <sys/resource.h> + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include <time.h> + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 10000000 +#endif +#ifndef SIZE +#define SIZE 328 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + + +mp_limb_t +#if __STDC__ +refmpn_add_n (mp_ptr res_ptr, + mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +refmpn_add_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = (y < cy); /* get out carry from that addition */ + y = x + y; /* add other addend */ + cy = (y < x) + cy; /* get out carry from that add, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t s2[SIZE]; + mp_limb_t dx[SIZE+1]; + mp_limb_t dy[SIZE+1]; + int cyx, cyy; + int i; + long t0, t; + int test; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + + mpn_random2 (s1, size); + mpn_random2 (s2, size); + + dx[size] = 0x12345678; + dy[size] = 0x12345678; + +#ifdef PRINT + mpn_print (s1, size); + mpn_print (s2, size); +#endif + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_add_n (dx, s1, s2, size); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_add_n: %ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%d ", cyx); mpn_print (dx, size); +#endif + + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = mpn_add_n (dx, s1, s2, size); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_add_n: %ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%d ", cyx); mpn_print (dx, size); +#endif + +#ifndef NOCHECK + /* Put garbage in the destination. */ + for (i = 0; i < size; i++) + { + dx[i] = 0x7654321; + dy[i] = 0x1234567; + } + + cyx = refmpn_add_n (dx, s1, s2, size); + cyy = mpn_add_n (dy, s1, s2, size); + if (cyx != cyy || mpn_cmp (dx, dy, size) != 0 + || dx[size] != 0x12345678 || dy[size] != 0x12345678) + { +#ifndef PRINT + printf ("%d ", cyx); mpn_print (dx, size); + printf ("%d ", cyy); mpn_print (dy, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/gnu/lib/libgmp/mpn/tests/addmul_1.c b/gnu/lib/libgmp/mpn/tests/addmul_1.c new file mode 100644 index 00000000000..23952a1cc99 --- /dev/null +++ b/gnu/lib/libgmp/mpn/tests/addmul_1.c @@ -0,0 +1,223 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include <sys/time.h> +#include <sys/resource.h> + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include <time.h> + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 20000000 +#endif +#ifndef SIZE +#define SIZE 496 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +mp_limb_t +refmpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += (prod_low < x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t dx[SIZE+2]; + mp_limb_t dy[SIZE+2]; + mp_limb_t cyx, cyy; + int i; + long t0, t; + int test; + mp_limb_t xlimb; + mp_size_t size; + double cyc; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + + mpn_random2 (s1, size); + mpn_random2 (dy+1, size); + + if (random () % 0x100 == 0) + xlimb = 0; + else + mpn_random2 (&xlimb, 1); + + dy[size+1] = 0x12345678; + dy[0] = 0x87654321; + +#if defined (PRINT) || defined (XPRINT) + printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb); +#endif +#ifdef PRINT + mpn_print (dy+1, size); + mpn_print (s1, size); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + cyc = ((double) t * CLOCK) / (OPS * 1000.0); + printf ("refmpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n", + t, + cyc, + CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyy = mpn_addmul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + cyc = ((double) t * CLOCK) / (OPS * 1000.0); + printf ("mpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n", + t, + cyc, + CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB); +#endif + + MPN_COPY (dx, dy, size+2); + cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb); + cyy = mpn_addmul_1 (dy+1, s1, size, xlimb); + +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + +#ifndef NOCHECK + if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0 + || dx[size+1] != 0x12345678 || dx[0] != 0x87654321) + { +#ifndef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/gnu/lib/libgmp/mpn/tests/divmod_1.c b/gnu/lib/libgmp/mpn/tests/divmod_1.c new file mode 100644 index 00000000000..f6b541ee472 --- /dev/null +++ b/gnu/lib/libgmp/mpn/tests/divmod_1.c @@ -0,0 +1,120 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include <sys/time.h> +#include <sys/resource.h> + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include <time.h> + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 20000000 +#endif +#ifndef SIZE +#define SIZE 1000 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +main () +{ + mp_limb_t nptr[SIZE]; + mp_limb_t qptr[SIZE]; + mp_limb_t pptr[SIZE]; + mp_limb_t dlimb, rlimb, plimb; + mp_size_t nsize, qsize, psize; + int test; + + for (test = 0; ; test++) + { +#ifdef RANDOM + nsize = random () % SIZE + 1; +#else + nsize = SIZE; +#endif + + mpn_random2 (nptr, nsize); + + mpn_random2 (&dlimb, 1); + if (dlimb == 0) + abort (); + + rlimb = mpn_divmod_1 (qptr, nptr, nsize, dlimb); + qsize = nsize - (qptr[nsize - 1] == 0); + if (qsize == 0) + { + plimb = rlimb; + psize = qsize; + } + else + { + plimb = mpn_mul_1 (pptr, qptr, qsize, dlimb); + psize = qsize; + plimb += mpn_add_1 (pptr, pptr, psize, rlimb); + } + if (plimb != 0) + pptr[psize++] = plimb; + + + if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0) + abort (); + } +} diff --git a/gnu/lib/libgmp/mpn/tests/divrem.c b/gnu/lib/libgmp/mpn/tests/divrem.c new file mode 100644 index 00000000000..6eafc99e8da --- /dev/null +++ b/gnu/lib/libgmp/mpn/tests/divrem.c @@ -0,0 +1,129 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include <sys/time.h> +#include <sys/resource.h> + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include <time.h> + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 20000000 +#endif +#ifndef SIZE +#define SIZE 100 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +main () +{ + mp_limb_t nptr[2 * SIZE]; + mp_limb_t dptr[SIZE]; + mp_limb_t qptr[2 * SIZE]; + mp_limb_t pptr[2 * SIZE]; + mp_limb_t rptr[2 * SIZE]; + mp_size_t nsize, dsize, qsize, rsize, psize; + int test; + mp_limb_t qlimb; + + for (test = 0; ; test++) + { +#ifdef RANDOM + nsize = random () % (2 * SIZE) + 1; + dsize = random () % nsize + 1; +#else + nsize = 2 * SIZE; + dsize = SIZE; +#endif + + mpn_random2 (nptr, nsize); + mpn_random2 (dptr, dsize); + dptr[dsize - 1] |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + + MPN_COPY (rptr, nptr, nsize); + qlimb = mpn_divrem (qptr, (mp_size_t) 0, rptr, nsize, dptr, dsize); + rsize = dsize; + qsize = nsize - dsize; + qptr[qsize] = qlimb; + qsize += qlimb; + if (qsize == 0 || qsize > 2 * SIZE) + { + continue; /* bogus */ + } + else + { + mp_limb_t cy; + if (qsize > dsize) + mpn_mul (pptr, qptr, qsize, dptr, dsize); + else + mpn_mul (pptr, dptr, dsize, qptr, qsize); + psize = qsize + dsize; + psize -= pptr[psize - 1] == 0; + cy = mpn_add (pptr, pptr, psize, rptr, rsize); + pptr[psize] = cy; + psize += cy; + } + + if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0) + abort (); + } +} diff --git a/gnu/lib/libgmp/mpn/tests/lshift.c b/gnu/lib/libgmp/mpn/tests/lshift.c new file mode 100644 index 00000000000..f50c5dcebed --- /dev/null +++ b/gnu/lib/libgmp/mpn/tests/lshift.c @@ -0,0 +1,226 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include <sys/time.h> +#include <sys/resource.h> + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include <time.h> + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 10000000 +#endif +#ifndef SIZE +#define SIZE 496 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +mp_limb_t +refmpn_lshift (wp, up, usize, cnt) + register mp_ptr wp; + register mp_srcptr up; + mp_size_t usize; + register unsigned int cnt; +{ + register mp_limb_t high_limb, low_limb; + register unsigned sh_1, sh_2; + register mp_size_t i; + mp_limb_t retval; + +#ifdef DEBUG + if (usize == 0 || cnt == 0) + abort (); +#endif + + sh_1 = cnt; +#if 0 + if (sh_1 == 0) + { + if (wp != up) + { + /* Copy from high end to low end, to allow specified input/output + overlapping. */ + for (i = usize - 1; i >= 0; i--) + wp[i] = up[i]; + } + return 0; + } +#endif + + wp += 1; + sh_2 = BITS_PER_MP_LIMB - sh_1; + i = usize - 1; + low_limb = up[i]; + retval = low_limb >> sh_2; + high_limb = low_limb; + while (--i >= 0) + { + low_limb = up[i]; + wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); + high_limb = low_limb; + } + wp[i] = high_limb << sh_1; + + return retval; +} + +#ifndef CNT +#define CNT 4 +#endif + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t dx[SIZE+2]; + mp_limb_t dy[SIZE+2]; + mp_limb_t cyx, cyy; + int i; + long t0, t; + int test; + int cnt = CNT; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + mpn_random2 (s1, size); + + dx[size+1] = 0x12345678; + dy[size+1] = 0x12345678; + dx[0] = 0x87654321; + dy[0] = 0x87654321; + +#ifdef PRINT + mpn_print (s1, size); +#endif + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_lshift (dx+1, s1, size, cnt); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_lshift: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); mpn_print (dx+1, size); +#endif + + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyy = mpn_lshift (dx+1, s1, size, cnt); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_lshift: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); mpn_print (dx+1, size); +#endif + +#ifndef NOCHECK + /* Put garbage in the destination. */ + for (i = 1; i <= size; i++) + { + dx[i] = 0x7654321; + dy[i] = 0x1234567; + } + + cyx = refmpn_lshift (dx+1, s1, size, cnt); + cyy = mpn_lshift (dy+1, s1, size, cnt); + + if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0 + || dx[size+1] != 0x12345678 || dx[0] != 0x87654321) + { +#ifndef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/gnu/lib/libgmp/mpn/tests/mul_1.c b/gnu/lib/libgmp/mpn/tests/mul_1.c new file mode 100644 index 00000000000..2b522fa7929 --- /dev/null +++ b/gnu/lib/libgmp/mpn/tests/mul_1.c @@ -0,0 +1,212 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include <sys/time.h> +#include <sys/resource.h> + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include <time.h> + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 20000000 +#endif +#ifndef SIZE +#define SIZE 496 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +mp_limb_t +refmpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + + /* The loop counter and index J goes from -S1_SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + res_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t dx[SIZE+2]; + mp_limb_t dy[SIZE+2]; + mp_limb_t cyx, cyy; + int i; + long t0, t; + int test; + mp_limb_t xlimb; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + + mpn_random2 (s1, size); + mpn_random2 (dy+1, size); + + if (random () % 0x100 == 0) + xlimb = 0; + else + mpn_random2 (&xlimb, 1); + + dy[size+1] = 0x12345678; + dy[0] = 0x87654321; + +#if defined (PRINT) || defined (XPRINT) + printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb); +#endif +#ifdef PRINT + mpn_print (s1, size); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_mul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_mul_1: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyy = mpn_mul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_mul_1: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif + + cyx = refmpn_mul_1 (dx+1, s1, size, xlimb); + cyy = mpn_mul_1 (dy+1, s1, size, xlimb); + +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + +#ifndef NOCHECK + if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0 + || dx[size+1] != 0x12345678 || dx[0] != 0x87654321) + { +#ifndef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/gnu/lib/libgmp/mpn/tests/rshift.c b/gnu/lib/libgmp/mpn/tests/rshift.c new file mode 100644 index 00000000000..2482bf3407e --- /dev/null +++ b/gnu/lib/libgmp/mpn/tests/rshift.c @@ -0,0 +1,227 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include <sys/time.h> +#include <sys/resource.h> + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include <time.h> + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 10000000 +#endif +#ifndef SIZE +#define SIZE 496 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +mp_limb_t +refmpn_rshift (wp, up, usize, cnt) + register mp_ptr wp; + register mp_srcptr up; + mp_size_t usize; + register unsigned int cnt; +{ + register mp_limb_t high_limb, low_limb; + register unsigned sh_1, sh_2; + register mp_size_t i; + mp_limb_t retval; + +#ifdef DEBUG + if (usize == 0 || cnt == 0) + abort (); +#endif + + sh_1 = cnt; +#if 0 + if (sh_1 == 0) + { + if (wp != up) + { + /* Copy from low end to high end, to allow specified input/output + overlapping. */ + for (i = 0; i < usize; i++) + wp[i] = up[i]; + } + return 0; + } +#endif + + wp -= 1; + sh_2 = BITS_PER_MP_LIMB - sh_1; + high_limb = up[0]; + retval = high_limb << sh_2; + low_limb = high_limb; + + for (i = 1; i < usize; i++) + { + high_limb = up[i]; + wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); + low_limb = high_limb; + } + low_limb >>= sh_1; + wp[i] = low_limb; + + return retval; +} + +#ifndef CNT +#define CNT 4 +#endif + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t dx[SIZE+2]; + mp_limb_t dy[SIZE+2]; + mp_limb_t cyx, cyy; + int i; + long t0, t; + int test; + int cnt = CNT; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + mpn_random2 (s1, size); + + dx[size+1] = 0x12345678; + dy[size+1] = 0x12345678; + dx[0] = 0x87654321; + dy[0] = 0x87654321; + +#ifdef PRINT + mpn_print (s1, size); +#endif + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_rshift (dx+1, s1, size, cnt); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_rshift: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); mpn_print (dx+1, size); +#endif + + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyy = mpn_rshift (dx+1, s1, size, cnt); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_rshift: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); mpn_print (dx+1, size); +#endif + +#ifndef NOCHECK + /* Put garbage in the destination. */ + for (i = 1; i <= size; i++) + { + dx[i] = 0x7654321; + dy[i] = 0x1234567; + } + + cyx = refmpn_rshift (dx+1, s1, size, cnt); + cyy = mpn_rshift (dy+1, s1, size, cnt); + + if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0 + || dx[size+1] != 0x12345678 || dx[0] != 0x87654321) + { +#ifndef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/gnu/lib/libgmp/mpn/tests/sub_n.c b/gnu/lib/libgmp/mpn/tests/sub_n.c new file mode 100644 index 00000000000..2b9031bbfef --- /dev/null +++ b/gnu/lib/libgmp/mpn/tests/sub_n.c @@ -0,0 +1,211 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" + +#ifndef USG +#include <sys/time.h> +#include <sys/resource.h> + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include <time.h> + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 10000000 +#endif +#ifndef SIZE +#define SIZE 328 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + + +mp_limb_t +#if __STDC__ +refmpn_sub_n (mp_ptr res_ptr, + mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +refmpn_sub_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = (y < cy); /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy = (y > x) + cy; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t s2[SIZE]; + mp_limb_t dx[SIZE+1]; + mp_limb_t dy[SIZE+1]; + int cyx, cyy; + int i; + long t0, t; + int test; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + + mpn_random2 (s1, size); + mpn_random2 (s2, size); + + dx[size] = 0x12345678; + dy[size] = 0x12345678; + +#ifdef PRINT + mpn_print (s1, size); + mpn_print (s2, size); +#endif + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_sub_n (dx, s1, s2, size); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_sub_n: %ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%d ", cyx); mpn_print (dx, size); +#endif + + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = mpn_sub_n (dx, s1, s2, size); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_sub_n: %ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif +#ifdef PRINT + printf ("%d ", cyx); mpn_print (dx, size); +#endif + +#ifndef NOCHECK + /* Put garbage in the destination. */ + for (i = 0; i < size; i++) + { + dx[i] = 0x7654321; + dy[i] = 0x1234567; + } + + cyx = refmpn_sub_n (dx, s1, s2, size); + cyy = mpn_sub_n (dy, s1, s2, size); + if (cyx != cyy || mpn_cmp (dx, dy, size) != 0 + || dx[size] != 0x12345678 || dy[size] != 0x12345678) + { +#ifndef PRINT + printf ("%d ", cyx); mpn_print (dx, size); + printf ("%d ", cyy); mpn_print (dy, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/gnu/lib/libgmp/mpn/tests/submul_1.c b/gnu/lib/libgmp/mpn/tests/submul_1.c new file mode 100644 index 00000000000..0e464e61328 --- /dev/null +++ b/gnu/lib/libgmp/mpn/tests/submul_1.c @@ -0,0 +1,218 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef USG +#include <sys/time.h> +#include <sys/resource.h> + +unsigned long +cputime () +{ + struct rusage rus; + + getrusage (0, &rus); + return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; +} +#else +#include <time.h> + +#ifndef CLOCKS_PER_SEC +#define CLOCKS_PER_SEC 1000000 +#endif + +#if CLOCKS_PER_SEC >= 10000 +#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000)) +#else +#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC) +#endif + +unsigned long +cputime () +{ + return CLOCK_TO_MILLISEC (clock ()); +} +#endif + +#define M * 1000000 + +#ifndef CLOCK +#if defined (__m88k__) +#define CLOCK 20 M +#elif defined (__i386__) +#define CLOCK (16.666667 M) +#elif defined (__m68k__) +#define CLOCK (20 M) +#elif defined (_IBMR2) +#define CLOCK (25 M) +#elif defined (__sparc__) +#define CLOCK (20 M) +#elif defined (__sun__) +#define CLOCK (20 M) +#elif defined (__mips) +#define CLOCK (40 M) +#elif defined (__hppa__) +#define CLOCK (50 M) +#elif defined (__alpha) +#define CLOCK (133 M) +#else +#error "Don't know CLOCK of your machine" +#endif +#endif + +#ifndef OPS +#define OPS 20000000 +#endif +#ifndef SIZE +#define SIZE 496 +#endif +#ifndef TIMES +#define TIMES OPS/SIZE +#else +#undef OPS +#define OPS (SIZE*TIMES) +#endif + +mp_limb_t +refmpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x - prod_low; + cy_limb += (prod_low > x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} + +main (argc, argv) + int argc; + char **argv; +{ + mp_limb_t s1[SIZE]; + mp_limb_t dx[SIZE+2]; + mp_limb_t dy[SIZE+2]; + mp_limb_t cyx, cyy; + int i; + long t0, t; + int test; + mp_limb_t xlimb; + mp_size_t size; + + for (test = 0; ; test++) + { +#ifdef RANDOM + size = (random () % SIZE + 1); +#else + size = SIZE; +#endif + + mpn_random2 (s1, size); + mpn_random2 (dy+1, size); + + if (random () % 0x100 == 0) + xlimb = 0; + else + mpn_random2 (&xlimb, 1); + + dy[size+1] = 0x12345678; + dy[0] = 0x87654321; + +#if defined (PRINT) || defined (XPRINT) + printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb); +#endif +#ifdef PRINT + mpn_print (dy+1, size); + mpn_print (s1, size); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyx = refmpn_submul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + printf ("refmpn_submul_1: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif + + MPN_COPY (dx, dy, size+2); + t0 = cputime(); + for (i = 0; i < TIMES; i++) + cyy = mpn_submul_1 (dx+1, s1, size, xlimb); + t = cputime() - t0; +#if TIMES != 1 + printf ("mpn_submul_1: %5ldms (%.2f cycles/limb)\n", + t, + ((double) t * CLOCK) / (OPS * 1000.0)); +#endif + + MPN_COPY (dx, dy, size+2); + cyx = refmpn_submul_1 (dx+1, s1, size, xlimb); + cyy = mpn_submul_1 (dy+1, s1, size, xlimb); + +#ifdef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + +#ifndef NOCHECK + if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0 + || dx[size+1] != 0x12345678 || dx[0] != 0x87654321) + { +#ifndef PRINT + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); + mpn_print (dx+1, size); + printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); + mpn_print (dy+1, size); +#endif + abort(); + } +#endif + } +} + +mpn_print (mp_ptr p, mp_size_t size) +{ + mp_size_t i; + + for (i = size - 1; i >= 0; i--) + { + printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]); +#ifdef SPACE + if (i != 0) + printf (" "); +#endif + } + puts (""); +} diff --git a/gnu/lib/libgmp/mpn/tests/tst-addsub.c b/gnu/lib/libgmp/mpn/tests/tst-addsub.c new file mode 100644 index 00000000000..e02b9d5cc76 --- /dev/null +++ b/gnu/lib/libgmp/mpn/tests/tst-addsub.c @@ -0,0 +1,164 @@ +#include <stdio.h> +#include <stdlib.h> +#include "gmp.h" +#include "gmp-impl.h" + +#define ADD 1 +#define SUB 2 + +#ifndef METHOD +#define METHOD ADD +#endif + +#if METHOD == ADD +#define REFCALL refmpn_add_n +#define TESTCALL mpn_add_n +#endif + +#if METHOD == SUB +#define REFCALL refmpn_sub_n +#define TESTCALL mpn_sub_n +#endif + +mp_limb_t refmpn_add_n (); +mp_limb_t refmpn_sub_n (); + +#define SIZE 100 + +main (argc, argv) + int argc; + char **argv; +{ + mp_size_t alloc_size, max_size, size, i, cumul_size; + mp_ptr s1, s2, dx, dy; + int s1_align, s2_align, d_align; + long pass, n_passes; + mp_limb_t cx, cy; + + max_size = SIZE; + n_passes = 1000000; + + argc--; argv++; + if (argc) + { + max_size = atol (*argv); + argc--; argv++; + } + + alloc_size = max_size + 32; + s1 = malloc (alloc_size * BYTES_PER_MP_LIMB); + s2 = malloc (alloc_size * BYTES_PER_MP_LIMB); + dx = malloc (alloc_size * BYTES_PER_MP_LIMB); + dy = malloc (alloc_size * BYTES_PER_MP_LIMB); + + cumul_size = 0; + for (pass = 0; pass < n_passes; pass++) + { + cumul_size += size; + if (cumul_size >= 1000000) + { + cumul_size -= 1000000; + printf ("%d ", pass); fflush (stdout); + } + s1_align = random () % 32; + s2_align = random () % 32; + d_align = random () % 32; + + size = random () % max_size + 1; + + mpn_random2 (s1 + s1_align, size); + mpn_random2 (s2 + s2_align, size); + + for (i = 0; i < alloc_size; i++) + dx[i] = dy[i] = i + 0x9876500; + + cx = TESTCALL (dx + d_align, s1 + s1_align, s2 + s2_align, size); + cy = REFCALL (dy + d_align, s1 + s1_align, s2 + s2_align, size); + + if (cx != cy || mpn_cmp (dx, dy, alloc_size) != 0) + abort (); + } + + printf ("%d passes OK\n", n_passes); + exit (0); +} + +mp_limb_t +#if __STDC__ +refmpn_add_n (mp_ptr res_ptr, + mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +refmpn_add_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = (y < cy); /* get out carry from that addition */ + y = x + y; /* add other addend */ + cy = (y < x) + cy; /* get out carry from that add, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} + +mp_limb_t +#if __STDC__ +refmpn_sub_n (mp_ptr res_ptr, + mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +refmpn_sub_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = (y < cy); /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy = (y > x) + cy; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} diff --git a/gnu/lib/libgmp/mpn/vax/add_n.s b/gnu/lib/libgmp/mpn/vax/add_n.s new file mode 100644 index 00000000000..d4764e23a2f --- /dev/null +++ b/gnu/lib/libgmp/mpn/vax/add_n.s @@ -0,0 +1,48 @@ +# VAX __mpn_add_n -- Add two limb vectors of the same length > 0 and store +# sum in a third limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr (sp + 4) +# s1_ptr (sp + 8) +# s2_ptr (sp + 12) +# size (sp + 16) + +.text + .align 1 +.globl ___mpn_add_n +___mpn_add_n: + .word 0x0 + movl 16(ap),r0 + movl 12(ap),r1 + movl 8(ap),r2 + movl 4(ap),r3 + subl2 r4,r4 + +Loop: + movl (r2)+,r4 + adwc (r1)+,r4 + movl r4,(r3)+ + jsobgtr r0,Loop + + adwc r0,r0 + ret diff --git a/gnu/lib/libgmp/mpn/vax/addmul_1.s b/gnu/lib/libgmp/mpn/vax/addmul_1.s new file mode 100644 index 00000000000..746d95ba7a0 --- /dev/null +++ b/gnu/lib/libgmp/mpn/vax/addmul_1.s @@ -0,0 +1,126 @@ +# VAX __mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr (sp + 4) +# s1_ptr (sp + 8) +# size (sp + 12) +# s2_limb (sp + 16) + +.text + .align 1 +.globl ___mpn_addmul_1 +___mpn_addmul_1: + .word 0xfc0 + movl 12(ap),r4 + movl 8(ap),r8 + movl 4(ap),r9 + movl 16(ap),r6 + jlss s2_big + + clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L1 + clrl r11 + +# Loop for S2_LIMB < 0x80000000 +Loop1: movl (r8)+,r1 + jlss L1n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc $0,r3 + addl2 r2,(r9)+ + adwc $0,r3 +L1: movl (r8)+,r1 + jlss L1n1 +L1p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc $0,r11 + addl2 r10,(r9)+ + adwc $0,r11 + + jsobgtr r7,Loop1 + movl r11,r0 + ret + +L1n0: emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r6,r3 + addl2 r2,(r9)+ + adwc $0,r3 + movl (r8)+,r1 + jgeq L1p1 +L1n1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r6,r11 + addl2 r10,(r9)+ + adwc $0,r11 + + jsobgtr r7,Loop1 + movl r11,r0 + ret + + +s2_big: clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L2 + clrl r11 + +# Loop for S2_LIMB >= 0x80000000 +Loop2: movl (r8)+,r1 + jlss L2n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r1,r3 + addl2 r2,(r9)+ + adwc $0,r3 +L2: movl (r8)+,r1 + jlss L2n1 +L2p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r1,r11 + addl2 r10,(r9)+ + adwc $0,r11 + + jsobgtr r7,Loop2 + movl r11,r0 + ret + +L2n0: emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r6,r3 + addl2 r2,(r9)+ + adwc r1,r3 + movl (r8)+,r1 + jgeq L2p1 +L2n1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r6,r11 + addl2 r10,(r9)+ + adwc r1,r11 + + jsobgtr r7,Loop2 + movl r11,r0 + ret diff --git a/gnu/lib/libgmp/mpn/vax/mul_1.s b/gnu/lib/libgmp/mpn/vax/mul_1.s new file mode 100644 index 00000000000..e2ff5a1bc1c --- /dev/null +++ b/gnu/lib/libgmp/mpn/vax/mul_1.s @@ -0,0 +1,123 @@ +# VAX __mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr (sp + 4) +# s1_ptr (sp + 8) +# size (sp + 12) +# s2_limb (sp + 16) + +.text + .align 1 +.globl ___mpn_mul_1 +___mpn_mul_1: + .word 0xfc0 + movl 12(ap),r4 + movl 8(ap),r8 + movl 4(ap),r9 + movl 16(ap),r6 + jlss s2_big + +# One might want to combine the addl2 and the store below, but that +# is actually just slower according to my timing tests. (VAX 3600) + + clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L1 + clrl r11 + +# Loop for S2_LIMB < 0x80000000 +Loop1: movl (r8)+,r1 + jlss L1n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc $0,r3 + movl r2,(r9)+ +L1: movl (r8)+,r1 + jlss L1n1 +L1p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc $0,r11 + movl r10,(r9)+ + + jsobgtr r7,Loop1 + movl r11,r0 + ret + +L1n0: emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r6,r3 + movl r2,(r9)+ + movl (r8)+,r1 + jgeq L1p1 +L1n1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r6,r11 + movl r10,(r9)+ + + jsobgtr r7,Loop1 + movl r11,r0 + ret + + +s2_big: clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L2 + clrl r11 + +# Loop for S2_LIMB >= 0x80000000 +Loop2: movl (r8)+,r1 + jlss L2n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r1,r3 + movl r2,(r9)+ +L2: movl (r8)+,r1 + jlss L2n1 +L2p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r1,r11 + movl r10,(r9)+ + + jsobgtr r7,Loop2 + movl r11,r0 + ret + +L2n0: emul r1,r6,$0,r2 + addl2 r1,r3 + addl2 r11,r2 + adwc r6,r3 + movl r2,(r9)+ + movl (r8)+,r1 + jgeq L2p1 +L2n1: emul r1,r6,$0,r10 + addl2 r1,r11 + addl2 r3,r10 + adwc r6,r11 + movl r10,(r9)+ + + jsobgtr r7,Loop2 + movl r11,r0 + ret diff --git a/gnu/lib/libgmp/mpn/vax/sub_n.s b/gnu/lib/libgmp/mpn/vax/sub_n.s new file mode 100644 index 00000000000..a891c4425a1 --- /dev/null +++ b/gnu/lib/libgmp/mpn/vax/sub_n.s @@ -0,0 +1,48 @@ +# VAX __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and store +# difference in a third limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr (sp + 4) +# s1_ptr (sp + 8) +# s2_ptr (sp + 12) +# size (sp + 16) + +.text + .align 1 +.globl ___mpn_sub_n +___mpn_sub_n: + .word 0x0 + movl 16(ap),r0 + movl 12(ap),r1 + movl 8(ap),r2 + movl 4(ap),r3 + subl2 r4,r4 + +Loop: + movl (r2)+,r4 + sbwc (r1)+,r4 + movl r4,(r3)+ + jsobgtr r0,Loop + + adwc r0,r0 + ret diff --git a/gnu/lib/libgmp/mpn/vax/submul_1.s b/gnu/lib/libgmp/mpn/vax/submul_1.s new file mode 100644 index 00000000000..c473937caf9 --- /dev/null +++ b/gnu/lib/libgmp/mpn/vax/submul_1.s @@ -0,0 +1,126 @@ +# VAX __mpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr (sp + 4) +# s1_ptr (sp + 8) +# size (sp + 12) +# s2_limb (sp + 16) + +.text + .align 1 +.globl ___mpn_submul_1 +___mpn_submul_1: + .word 0xfc0 + movl 12(ap),r4 + movl 8(ap),r8 + movl 4(ap),r9 + movl 16(ap),r6 + jlss s2_big + + clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L1 + clrl r11 + +# Loop for S2_LIMB < 0x80000000 +Loop1: movl (r8)+,r1 + jlss L1n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc $0,r3 + subl2 r2,(r9)+ + adwc $0,r3 +L1: movl (r8)+,r1 + jlss L1n1 +L1p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc $0,r11 + subl2 r10,(r9)+ + adwc $0,r11 + + jsobgtr r7,Loop1 + movl r11,r0 + ret + +L1n0: emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r6,r3 + subl2 r2,(r9)+ + adwc $0,r3 + movl (r8)+,r1 + jgeq L1p1 +L1n1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r6,r11 + subl2 r10,(r9)+ + adwc $0,r11 + + jsobgtr r7,Loop1 + movl r11,r0 + ret + + +s2_big: clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L2 + clrl r11 + +# Loop for S2_LIMB >= 0x80000000 +Loop2: movl (r8)+,r1 + jlss L2n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r1,r3 + subl2 r2,(r9)+ + adwc $0,r3 +L2: movl (r8)+,r1 + jlss L2n1 +L2p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r1,r11 + subl2 r10,(r9)+ + adwc $0,r11 + + jsobgtr r7,Loop2 + movl r11,r0 + ret + +L2n0: emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r6,r3 + subl2 r2,(r9)+ + adwc r1,r3 + movl (r8)+,r1 + jgeq L2p1 +L2n1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r6,r11 + subl2 r10,(r9)+ + adwc r1,r11 + + jsobgtr r7,Loop2 + movl r11,r0 + ret diff --git a/gnu/lib/libgmp/mpn/x86/add_n.S b/gnu/lib/libgmp/mpn/x86/add_n.S new file mode 100644 index 00000000000..5bbbd0d88c0 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/add_n.S @@ -0,0 +1,106 @@ +/* i80386 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +sum in a third limb vector. + +Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 12) + size (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_add_n) +C_SYMBOL_NAME(__mpn_add_n:) + pushl %edi + pushl %esi + + movl 12(%esp),%edi /* res_ptr */ + movl 16(%esp),%esi /* s1_ptr */ + movl 20(%esp),%edx /* s2_ptr */ + movl 24(%esp),%ecx /* size */ + + movl %ecx,%eax + shrl $3,%ecx /* compute count for unrolled loop */ + negl %eax + andl $7,%eax /* get index where to start loop */ + jz Loop /* necessary special case for 0 */ + incl %ecx /* adjust loop count */ + shll $2,%eax /* adjustment for pointers... */ + subl %eax,%edi /* ... since they are offset ... */ + subl %eax,%esi /* ... by a constant when we ... */ + subl %eax,%edx /* ... enter the loop */ + shrl $2,%eax /* restore previous value */ +#ifdef PIC +/* Calculate start address in loop for PIC. Due to limitations in some + assemblers, Loop-L0-3 cannot be put into the leal */ + call L0 +L0: leal (%eax,%eax,8),%eax + addl (%esp),%eax + addl $(Loop-L0-3),%eax + addl $4,%esp +#else +/* Calculate start address in loop for non-PIC. */ + leal (Loop - 3)(%eax,%eax,8),%eax +#endif + jmp *%eax /* jump into loop */ + ALIGN (3) +Loop: movl (%esi),%eax + adcl (%edx),%eax + movl %eax,(%edi) + movl 4(%esi),%eax + adcl 4(%edx),%eax + movl %eax,4(%edi) + movl 8(%esi),%eax + adcl 8(%edx),%eax + movl %eax,8(%edi) + movl 12(%esi),%eax + adcl 12(%edx),%eax + movl %eax,12(%edi) + movl 16(%esi),%eax + adcl 16(%edx),%eax + movl %eax,16(%edi) + movl 20(%esi),%eax + adcl 20(%edx),%eax + movl %eax,20(%edi) + movl 24(%esi),%eax + adcl 24(%edx),%eax + movl %eax,24(%edi) + movl 28(%esi),%eax + adcl 28(%edx),%eax + movl %eax,28(%edi) + leal 32(%edi),%edi + leal 32(%esi),%esi + leal 32(%edx),%edx + decl %ecx + jnz Loop + + sbbl %eax,%eax + negl %eax + + popl %esi + popl %edi + ret diff --git a/gnu/lib/libgmp/mpn/x86/addmul_1.S b/gnu/lib/libgmp/mpn/x86/addmul_1.S new file mode 100644 index 00000000000..c11209d925e --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/addmul_1.S @@ -0,0 +1,76 @@ +/* i80386 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + the result to a second limb vector. + +Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_addmul_1) +C_SYMBOL_NAME(__mpn_addmul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) +Loop: + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + INSN1(mul,l ,R(s2_limb)) + INSN2(add,l ,R(eax),R(ebx)) + INSN2(adc,l ,R(edx),$0) + INSN2(add,l ,MEM_INDEX(res_ptr,size,4),R(eax)) + INSN2(adc,l ,R(edx),$0) + INSN2(mov,l ,R(ebx),R(edx)) + + INSN1(inc,l ,R(size)) + INSN1(jnz, ,Loop) + INSN2(mov,l ,R(eax),R(ebx)) + + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/gnu/lib/libgmp/mpn/x86/lshift.S b/gnu/lib/libgmp/mpn/x86/lshift.S new file mode 100644 index 00000000000..8173b92cbe4 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/lshift.S @@ -0,0 +1,85 @@ +/* i80386 __mpn_lshift -- + +Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + size (sp + 12) + cnt (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_lshift) +C_SYMBOL_NAME(__mpn_lshift:) + pushl %edi + pushl %esi + pushl %ebx + + movl 16(%esp),%edi /* res_ptr */ + movl 20(%esp),%esi /* s_ptr */ + movl 24(%esp),%edx /* size */ + movl 28(%esp),%ecx /* cnt */ + + subl $4,%esi /* adjust s_ptr */ + + movl (%esi,%edx,4),%ebx /* read most significant limb */ + xorl %eax,%eax + shldl %cl,%ebx,%eax /* compute carry limb */ + decl %edx + jz Lend + pushl %eax /* push carry limb onto stack */ + testb $1,%edx + jnz L1 /* enter loop in the middle */ + movl %ebx,%eax + + ALIGN (3) +Loop: movl (%esi,%edx,4),%ebx /* load next lower limb */ + shldl %cl,%ebx,%eax /* compute result limb */ + movl %eax,(%edi,%edx,4) /* store it */ + decl %edx +L1: movl (%esi,%edx,4),%eax + shldl %cl,%eax,%ebx + movl %ebx,(%edi,%edx,4) + decl %edx + jnz Loop + + shll %cl,%eax /* compute least significant limb */ + movl %eax,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebx + popl %esi + popl %edi + ret + +Lend: shll %cl,%ebx /* compute least significant limb */ + movl %ebx,(%edi) /* store it */ + + popl %ebx + popl %esi + popl %edi + ret diff --git a/gnu/lib/libgmp/mpn/x86/mul_1.S b/gnu/lib/libgmp/mpn/x86/mul_1.S new file mode 100644 index 00000000000..b2713999801 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/mul_1.S @@ -0,0 +1,75 @@ +/* i80386 __mpn_mul_1 -- Multiply a limb vector with a limb and store + the result in a second limb vector. + +Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_mul_1) +C_SYMBOL_NAME(__mpn_mul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) +Loop: + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + INSN1(mul,l ,R(s2_limb)) + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(eax)) + INSN2(adc,l ,R(edx),$0) + INSN2(mov,l ,R(ebx),R(edx)) + + INSN1(inc,l ,R(size)) + INSN1(jnz, ,Loop) + INSN2(mov,l ,R(eax),R(ebx)) + + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/gnu/lib/libgmp/mpn/x86/pentium/README b/gnu/lib/libgmp/mpn/x86/pentium/README new file mode 100644 index 00000000000..d73b0826848 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/pentium/README @@ -0,0 +1,26 @@ +This directory contains mpn functions optimized for Intel Pentium +processors. + +RELEVANT OPTIMIZATION ISSUES + +1. Pentium doesn't allocate cache lines on writes, unlike most other modern +processors. Since the functions in the mpn class do array writes, we have to +handle allocating the destination cache lines by reading a word from it in the +loops, to achieve the best performance. + +2. Pairing of memory operations requires that the two issued operations refer +to different cache banks. The simplest way to insure this is to read/write +two words from the same object. If we make operations on different objects, +they might or might not be to the same cache bank. + +STATUS + +1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium +documentation indicates that they should take only 43/8 = 5.375 cycles/limb, +or 5 cycles/limb asymptotically. + +2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb. Due to loop +overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb. + +3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they +should... diff --git a/gnu/lib/libgmp/mpn/x86/pentium/add_n.S b/gnu/lib/libgmp/mpn/x86/pentium/add_n.S new file mode 100644 index 00000000000..ac6f2819b25 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/pentium/add_n.S @@ -0,0 +1,130 @@ +/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store + sum in a third limb vector. + +Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 12) + size (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_add_n) +C_SYMBOL_NAME(__mpn_add_n:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ + movl 32(%esp),%ecx /* size */ + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx /* zero carry flag */ + jz Lend + pushl %edx + + ALIGN (3) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + adcl %ebx,%eax + movl 4(%ebp),%ebx + adcl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + adcl %ebx,%eax + movl 12(%ebp),%ebx + adcl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + adcl %ebx,%eax + movl 20(%ebp),%ebx + adcl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %ebx,%eax + movl 28(%ebp),%ebx + adcl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz Loop + + popl %edx +Lend: + decl %edx /* test %edx w/o clobbering carry */ + js Lend2 + incl %edx +Loop2: + leal 4(%edi),%edi + movl (%esi),%eax + adcl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz Loop2 +Lend2: + movl (%esi),%eax + adcl %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret diff --git a/gnu/lib/libgmp/mpn/x86/pentium/addmul_1.S b/gnu/lib/libgmp/mpn/x86/pentium/addmul_1.S new file mode 100644 index 00000000000..7cfa5db6877 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/pentium/addmul_1.S @@ -0,0 +1,83 @@ +/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add + the result to a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_addmul_1) +C_SYMBOL_NAME(__mpn_addmul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) + + INSN2(adc,l ,R(edx),$0) + INSN2(add,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/gnu/lib/libgmp/mpn/x86/pentium/lshift.S b/gnu/lib/libgmp/mpn/x86/pentium/lshift.S new file mode 100644 index 00000000000..b2989835631 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/pentium/lshift.S @@ -0,0 +1,217 @@ +/* Pentium optimized __mpn_lshift -- + +Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + size (sp + 12) + cnt (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_lshift) +C_SYMBOL_NAME(__mpn_lshift:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s_ptr */ + movl 28(%esp),%ebp /* size */ + movl 32(%esp),%ecx /* cnt */ + +/* We can use faster code for shift-by-1 under certain conditions. */ + cmp $1,%ecx + jne Lnormal + leal 4(%esi),%eax + cmpl %edi,%eax + jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */ + leal (%esi,%ebp,4),%eax + cmpl %eax,%edi + jnc Lspecial /* jump if res_ptr >= s_ptr + size */ + +Lnormal: + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + xorl %eax,%eax + shldl %cl,%edx,%eax /* compute carry limb */ + pushl %eax /* push carry limb onto stack */ + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz Lend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +Loop: movl -28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + shldl %cl,%eax,%ebx + shldl %cl,%edx,%eax + movl %ebx,(%edi) + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + shldl %cl,%ebx,%edx + shldl %cl,%eax,%ebx + movl %edx,-8(%edi) + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + shldl %cl,%edx,%eax + shldl %cl,%ebx,%edx + movl %eax,-16(%edi) + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + shldl %cl,%eax,%ebx + shldl %cl,%edx,%eax + movl %ebx,-24(%edi) + movl %eax,-28(%edi) + + subl $32,%esi + subl $32,%edi + decl %ebp + jnz Loop + +Lend: popl %ebp + andl $7,%ebp + jz Lend2 +Loop2: movl (%esi),%eax + shldl %cl,%eax,%edx + movl %edx,(%edi) + movl %eax,%edx + subl $4,%esi + subl $4,%edi + decl %ebp + jnz Loop2 + +Lend2: shll %cl,%edx /* compute least significant limb */ + movl %edx,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. +*/ + +Lspecial: + movl (%esi),%edx + addl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + addl %edx,%edx + incl %ebp + decl %ebp + jz LLend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +LLoop: movl 28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + adcl %eax,%eax + movl %ebx,(%edi) + adcl %edx,%edx + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + adcl %ebx,%ebx + movl %edx,8(%edi) + adcl %eax,%eax + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + adcl %edx,%edx + movl %eax,16(%edi) + adcl %ebx,%ebx + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %eax,%eax + movl %ebx,24(%edi) + adcl %edx,%edx + movl %eax,28(%edi) + + leal 32(%esi),%esi /* use leal not to clobber carry */ + leal 32(%edi),%edi + decl %ebp + jnz LLoop + +LLend: popl %ebp + sbbl %eax,%eax /* save carry in %eax */ + andl $7,%ebp + jz LLend2 + addl %eax,%eax /* restore carry from eax */ +LLoop2: movl %edx,%ebx + movl (%esi),%edx + adcl %edx,%edx + movl %ebx,(%edi) + + leal 4(%esi),%esi /* use leal not to clobber carry */ + leal 4(%edi),%edi + decl %ebp + jnz LLoop2 + + jmp LL1 +LLend2: addl %eax,%eax /* restore carry from eax */ +LL1: movl %edx,(%edi) /* store last limb */ + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret diff --git a/gnu/lib/libgmp/mpn/x86/pentium/mul_1.S b/gnu/lib/libgmp/mpn/x86/pentium/mul_1.S new file mode 100644 index 00000000000..4ac3050a611 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/pentium/mul_1.S @@ -0,0 +1,79 @@ +/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store + the result in a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_mul_1) +C_SYMBOL_NAME(__mpn_mul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/gnu/lib/libgmp/mpn/x86/pentium/rshift.S b/gnu/lib/libgmp/mpn/x86/pentium/rshift.S new file mode 100644 index 00000000000..38398edb13b --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/pentium/rshift.S @@ -0,0 +1,217 @@ +/* Pentium optimized __mpn_rshift -- + +Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + size (sp + 12) + cnt (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_rshift) +C_SYMBOL_NAME(__mpn_rshift:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s_ptr */ + movl 28(%esp),%ebp /* size */ + movl 32(%esp),%ecx /* cnt */ + +/* We can use faster code for shift-by-1 under certain conditions. */ + cmp $1,%ecx + jne Lnormal + leal 4(%edi),%eax + cmpl %esi,%eax + jnc Lspecial /* jump if res_ptr + 1 >= s_ptr */ + leal (%edi,%ebp,4),%eax + cmpl %eax,%esi + jnc Lspecial /* jump if s_ptr >= res_ptr + size */ + +Lnormal: + movl (%esi),%edx + addl $4,%esi + xorl %eax,%eax + shrdl %cl,%edx,%eax /* compute carry limb */ + pushl %eax /* push carry limb onto stack */ + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz Lend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + shrdl %cl,%eax,%ebx + shrdl %cl,%edx,%eax + movl %ebx,(%edi) + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + shrdl %cl,%ebx,%edx + shrdl %cl,%eax,%ebx + movl %edx,8(%edi) + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + shrdl %cl,%edx,%eax + shrdl %cl,%ebx,%edx + movl %eax,16(%edi) + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + shrdl %cl,%eax,%ebx + shrdl %cl,%edx,%eax + movl %ebx,24(%edi) + movl %eax,28(%edi) + + addl $32,%esi + addl $32,%edi + decl %ebp + jnz Loop + +Lend: popl %ebp + andl $7,%ebp + jz Lend2 +Loop2: movl (%esi),%eax + shrdl %cl,%eax,%edx /* compute result limb */ + movl %edx,(%edi) + movl %eax,%edx + addl $4,%esi + addl $4,%edi + decl %ebp + jnz Loop2 + +Lend2: shrl %cl,%edx /* compute most significant limb */ + movl %edx,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. +*/ + +Lspecial: + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + shrl $1,%edx + incl %ebp + decl %ebp + jz LLend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +LLoop: movl -28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + rcrl $1,%eax + movl %ebx,(%edi) + rcrl $1,%edx + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + rcrl $1,%ebx + movl %edx,-8(%edi) + rcrl $1,%eax + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + rcrl $1,%edx + movl %eax,-16(%edi) + rcrl $1,%ebx + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + rcrl $1,%eax + movl %ebx,-24(%edi) + rcrl $1,%edx + movl %eax,-28(%edi) + + leal -32(%esi),%esi /* use leal not to clobber carry */ + leal -32(%edi),%edi + decl %ebp + jnz LLoop + +LLend: popl %ebp + sbbl %eax,%eax /* save carry in %eax */ + andl $7,%ebp + jz LLend2 + addl %eax,%eax /* restore carry from eax */ +LLoop2: movl %edx,%ebx + movl (%esi),%edx + rcrl $1,%edx + movl %ebx,(%edi) + + leal -4(%esi),%esi /* use leal not to clobber carry */ + leal -4(%edi),%edi + decl %ebp + jnz LLoop2 + + jmp LL1 +LLend2: addl %eax,%eax /* restore carry from eax */ +LL1: movl %edx,(%edi) /* store last limb */ + + movl $0,%eax + rcrl $1,%eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret diff --git a/gnu/lib/libgmp/mpn/x86/pentium/sub_n.S b/gnu/lib/libgmp/mpn/x86/pentium/sub_n.S new file mode 100644 index 00000000000..d1a2bc08403 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/pentium/sub_n.S @@ -0,0 +1,130 @@ +/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0 + and store difference in a third limb vector. + +Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 12) + size (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_sub_n) +C_SYMBOL_NAME(__mpn_sub_n:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ + movl 32(%esp),%ecx /* size */ + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx /* zero carry flag */ + jz Lend + pushl %edx + + ALIGN (3) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + sbbl %ebx,%eax + movl 4(%ebp),%ebx + sbbl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + sbbl %ebx,%eax + movl 12(%ebp),%ebx + sbbl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + sbbl %ebx,%eax + movl 20(%ebp),%ebx + sbbl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + sbbl %ebx,%eax + movl 28(%ebp),%ebx + sbbl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz Loop + + popl %edx +Lend: + decl %edx /* test %edx w/o clobbering carry */ + js Lend2 + incl %edx +Loop2: + leal 4(%edi),%edi + movl (%esi),%eax + sbbl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz Loop2 +Lend2: + movl (%esi),%eax + sbbl %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret diff --git a/gnu/lib/libgmp/mpn/x86/pentium/submul_1.S b/gnu/lib/libgmp/mpn/x86/pentium/submul_1.S new file mode 100644 index 00000000000..adf2d63e684 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/pentium/submul_1.S @@ -0,0 +1,83 @@ +/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract + the result from a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_submul_1) +C_SYMBOL_NAME(__mpn_submul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) + + INSN2(adc,l ,R(edx),$0) + INSN2(sub,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/gnu/lib/libgmp/mpn/x86/rshift.S b/gnu/lib/libgmp/mpn/x86/rshift.S new file mode 100644 index 00000000000..9abbf9a45d2 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/rshift.S @@ -0,0 +1,87 @@ +/* i80386 __mpn_rshift -- + +Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + size (sp + 12) + cnt (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_rshift) +C_SYMBOL_NAME(__mpn_rshift:) + pushl %edi + pushl %esi + pushl %ebx + + movl 16(%esp),%edi /* res_ptr */ + movl 20(%esp),%esi /* s_ptr */ + movl 24(%esp),%edx /* size */ + movl 28(%esp),%ecx /* cnt */ + + leal -4(%edi,%edx,4),%edi + leal (%esi,%edx,4),%esi + negl %edx + + movl (%esi,%edx,4),%ebx /* read least significant limb */ + xorl %eax,%eax + shrdl %cl,%ebx,%eax /* compute carry limb */ + incl %edx + jz Lend + pushl %eax /* push carry limb onto stack */ + testb $1,%edx + jnz L1 /* enter loop in the middle */ + movl %ebx,%eax + + ALIGN (3) +Loop: movl (%esi,%edx,4),%ebx /* load next higher limb */ + shrdl %cl,%ebx,%eax /* compute result limb */ + movl %eax,(%edi,%edx,4) /* store it */ + incl %edx +L1: movl (%esi,%edx,4),%eax + shrdl %cl,%eax,%ebx + movl %ebx,(%edi,%edx,4) + incl %edx + jnz Loop + + shrl %cl,%eax /* compute most significant limb */ + movl %eax,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebx + popl %esi + popl %edi + ret + +Lend: shrl %cl,%ebx /* compute most significant limb */ + movl %ebx,(%edi) /* store it */ + + popl %ebx + popl %esi + popl %edi + ret diff --git a/gnu/lib/libgmp/mpn/x86/sub_n.S b/gnu/lib/libgmp/mpn/x86/sub_n.S new file mode 100644 index 00000000000..1774f3abe60 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/sub_n.S @@ -0,0 +1,106 @@ +/* i80386 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store + sum in a third limb vector. + +Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 12) + size (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_sub_n) +C_SYMBOL_NAME(__mpn_sub_n:) + pushl %edi + pushl %esi + + movl 12(%esp),%edi /* res_ptr */ + movl 16(%esp),%esi /* s1_ptr */ + movl 20(%esp),%edx /* s2_ptr */ + movl 24(%esp),%ecx /* size */ + + movl %ecx,%eax + shrl $3,%ecx /* compute count for unrolled loop */ + negl %eax + andl $7,%eax /* get index where to start loop */ + jz Loop /* necessary special case for 0 */ + incl %ecx /* adjust loop count */ + shll $2,%eax /* adjustment for pointers... */ + subl %eax,%edi /* ... since they are offset ... */ + subl %eax,%esi /* ... by a constant when we ... */ + subl %eax,%edx /* ... enter the loop */ + shrl $2,%eax /* restore previous value */ +#ifdef PIC +/* Calculate start address in loop for PIC. Due to limitations in some + assemblers, Loop-L0-3 cannot be put into the leal */ + call L0 +L0: leal (%eax,%eax,8),%eax + addl (%esp),%eax + addl $(Loop-L0-3),%eax + addl $4,%esp +#else +/* Calculate start address in loop for non-PIC. */ + leal (Loop - 3)(%eax,%eax,8),%eax +#endif + jmp *%eax /* jump into loop */ + ALIGN (3) +Loop: movl (%esi),%eax + sbbl (%edx),%eax + movl %eax,(%edi) + movl 4(%esi),%eax + sbbl 4(%edx),%eax + movl %eax,4(%edi) + movl 8(%esi),%eax + sbbl 8(%edx),%eax + movl %eax,8(%edi) + movl 12(%esi),%eax + sbbl 12(%edx),%eax + movl %eax,12(%edi) + movl 16(%esi),%eax + sbbl 16(%edx),%eax + movl %eax,16(%edi) + movl 20(%esi),%eax + sbbl 20(%edx),%eax + movl %eax,20(%edi) + movl 24(%esi),%eax + sbbl 24(%edx),%eax + movl %eax,24(%edi) + movl 28(%esi),%eax + sbbl 28(%edx),%eax + movl %eax,28(%edi) + leal 32(%edi),%edi + leal 32(%esi),%esi + leal 32(%edx),%edx + decl %ecx + jnz Loop + + sbbl %eax,%eax + negl %eax + + popl %esi + popl %edi + ret diff --git a/gnu/lib/libgmp/mpn/x86/submul_1.S b/gnu/lib/libgmp/mpn/x86/submul_1.S new file mode 100644 index 00000000000..730e732045c --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/submul_1.S @@ -0,0 +1,76 @@ +/* i80386 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract + the result from a second limb vector. + +Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_submul_1) +C_SYMBOL_NAME(__mpn_submul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) +Loop: + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + INSN1(mul,l ,R(s2_limb)) + INSN2(add,l ,R(eax),R(ebx)) + INSN2(adc,l ,R(edx),$0) + INSN2(sub,l ,MEM_INDEX(res_ptr,size,4),R(eax)) + INSN2(adc,l ,R(edx),$0) + INSN2(mov,l ,R(ebx),R(edx)) + + INSN1(inc,l ,R(size)) + INSN1(jnz, ,Loop) + INSN2(mov,l ,R(eax),R(ebx)) + + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/gnu/lib/libgmp/mpn/x86/syntax.h b/gnu/lib/libgmp/mpn/x86/syntax.h new file mode 100644 index 00000000000..c53c73c03f3 --- /dev/null +++ b/gnu/lib/libgmp/mpn/x86/syntax.h @@ -0,0 +1,62 @@ +/* asm.h -- Definitions for x86 syntax variations. + +Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + + +#undef ALIGN + +#if defined (BSD_SYNTAX) || defined (ELF_SYNTAX) +#define R(r) %r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)displacement(R(base)) +#define MEM_INDEX(base,index,size)(R(base),R(index),size) +#ifdef __STDC__ +#define INSN1(mnemonic,size_suffix,dst)mnemonic##size_suffix dst +#define INSN2(mnemonic,size_suffix,dst,src)mnemonic##size_suffix src,dst +#else +#define INSN1(mnemonic,size_suffix,dst)mnemonic/**/size_suffix dst +#define INSN2(mnemonic,size_suffix,dst,src)mnemonic/**/size_suffix src,dst +#endif +#define TEXT .text +#if defined (BSD_SYNTAX) +#define ALIGN(log) .align log +#endif +#if defined (ELF_SYNTAX) +#define ALIGN(log) .align 1<<(log) +#endif +#define GLOBL .globl +#endif + +#ifdef INTEL_SYNTAX +#define R(r) r +#define MEM(base)[base] +#define MEM_DISP(base,displacement)[base+(displacement)] +#define MEM_INDEX(base,index,size)[base+index*size] +#define INSN1(mnemonic,size_suffix,dst)mnemonic dst +#define INSN2(mnemonic,size_suffix,dst,src)mnemonic dst,src +#define TEXT .text +#define ALIGN(log) .align log +#define GLOBL .globl +#endif + +#ifdef BROKEN_ALIGN +#undef ALIGN +#define ALIGN(log) .align log,0x90 +#endif diff --git a/gnu/lib/libgmp/mpn/z8000/add_n.s b/gnu/lib/libgmp/mpn/z8000/add_n.s new file mode 100644 index 00000000000..a50fc3ef5f5 --- /dev/null +++ b/gnu/lib/libgmp/mpn/z8000/add_n.s @@ -0,0 +1,53 @@ +! Z8000 __mpn_add_n -- Add two limb vectors of equal, non-zero length. + +! Copyright (C) 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r7 +! s1_ptr r6 +! s2_ptr r5 +! size r4 + +! If we are really crazy, we can use push to write a few result words +! backwards, using push just because it is faster than reg+disp. We'd +! then add 2x the number of words written to r7... + + unseg + .text + even + global ___mpn_add_n +___mpn_add_n: + pop r0,@r6 + pop r1,@r5 + add r0,r1 + ld @r7,r0 + dec r4 + jr eq,Lend +Loop: pop r0,@r6 + pop r1,@r5 + adc r0,r1 + inc r7,#2 + ld @r7,r0 + dec r4 + jr ne,Loop +Lend: ld r2,r4 ! use 0 already in r4 + adc r2,r2 + ret t diff --git a/gnu/lib/libgmp/mpn/z8000/gmp-mparam.h b/gnu/lib/libgmp/mpn/z8000/gmp-mparam.h new file mode 100644 index 00000000000..e0a303e9799 --- /dev/null +++ b/gnu/lib/libgmp/mpn/z8000/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 16 +#define BYTES_PER_MP_LIMB 2 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 16 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/gnu/lib/libgmp/mpn/z8000/mul_1.s b/gnu/lib/libgmp/mpn/z8000/mul_1.s new file mode 100644 index 00000000000..f1126b5ab3d --- /dev/null +++ b/gnu/lib/libgmp/mpn/z8000/mul_1.s @@ -0,0 +1,68 @@ +! Z8000 __mpn_mul_1 -- Multiply a limb vector with a limb and store +! the result in a second limb vector. + +! Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r7 +! s1_ptr r6 +! size r5 +! s2_limb r4 + + unseg + .text + even + global ___mpn_mul_1 +___mpn_mul_1: + sub r2,r2 ! zero carry limb + and r4,r4 + jr mi,Lneg + +Lpos: pop r1,@r6 + ld r9,r1 + mult rr8,r4 + and r1,r1 ! shift msb of loaded limb into cy + jr mi,Lp ! branch if loaded limb's msb is set + add r8,r4 ! hi_limb += sign_comp2 +Lp: add r9,r2 ! lo_limb += cy_limb + xor r2,r2 + adc r2,r8 + ld @r7,r9 + inc r7,#2 + dec r5 + jr ne,Lpos + ret t + +Lneg: pop r1,@r6 + ld r9,r1 + mult rr8,r4 + add r8,r1 ! hi_limb += sign_comp1 + and r1,r1 + jr mi,Ln + add r8,r4 ! hi_limb += sign_comp2 +Ln: add r9,r2 ! lo_limb += cy_limb + xor r2,r2 + adc r2,r8 + ld @r7,r9 + inc r7,#2 + dec r5 + jr ne,Lneg + ret t diff --git a/gnu/lib/libgmp/mpn/z8000/sub_n.s b/gnu/lib/libgmp/mpn/z8000/sub_n.s new file mode 100644 index 00000000000..272c671bd19 --- /dev/null +++ b/gnu/lib/libgmp/mpn/z8000/sub_n.s @@ -0,0 +1,54 @@ +! Z8000 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +! store difference in a third limb vector. + +! Copyright (C) 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r7 +! s1_ptr r6 +! s2_ptr r5 +! size r4 + +! If we are really crazy, we can use push to write a few result words +! backwards, using push just because it is faster than reg+disp. We'd +! then add 2x the number of words written to r7... + + unseg + .text + even + global ___mpn_sub_n +___mpn_sub_n: + pop r0,@r6 + pop r1,@r5 + sub r0,r1 + ld @r7,r0 + dec r4 + jr eq,Lend +Loop: pop r0,@r6 + pop r1,@r5 + sbc r0,r1 + inc r7,#2 + ld @r7,r0 + dec r4 + jr ne,Loop +Lend: ld r2,r4 ! use 0 already in r4 + adc r2,r2 + ret t diff --git a/gnu/lib/libgmp/mpn/z8000x/add_n.s b/gnu/lib/libgmp/mpn/z8000x/add_n.s new file mode 100644 index 00000000000..c5c0d42759b --- /dev/null +++ b/gnu/lib/libgmp/mpn/z8000x/add_n.s @@ -0,0 +1,56 @@ +! Z8000 (32 bit limb version) __mpn_add_n -- Add two limb vectors of equal, +! non-zero length. + +! Copyright (C) 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r7 +! s1_ptr r6 +! s2_ptr r5 +! size r4 + +! If we are really crazy, we can use push to write a few result words +! backwards, using push just because it is faster than reg+disp. We'd +! then add 2x the number of words written to r7... + + segm + .text + even + global ___mpn_add_n +___mpn_add_n: + popl rr0,@r6 + popl rr8,@r5 + addl rr0,rr8 + ldl @r7,rr0 + dec r4 + jr eq,Lend +Loop: popl rr0,@r6 + popl rr8,@r5 + adc r1,r9 + adc r0,r8 + inc r7,#4 + ldl @r7,rr0 + dec r4 + jr ne,Loop +Lend: ld r2,r4 ! use 0 already in r4 + ld r3,r4 + adc r2,r2 + ret t diff --git a/gnu/lib/libgmp/mpn/z8000x/sub_n.s b/gnu/lib/libgmp/mpn/z8000x/sub_n.s new file mode 100644 index 00000000000..9eeece69d49 --- /dev/null +++ b/gnu/lib/libgmp/mpn/z8000x/sub_n.s @@ -0,0 +1,56 @@ +! Z8000 (32 bit limb version) __mpn_sub_n -- Subtract two limb vectors of the +! same length > 0 and store difference in a third limb vector. + +! Copyright (C) 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r7 +! s1_ptr r6 +! s2_ptr r5 +! size r4 + +! If we are really crazy, we can use push to write a few result words +! backwards, using push just because it is faster than reg+disp. We'd +! then add 2x the number of words written to r7... + + segm + .text + even + global ___mpn_sub_n +___mpn_sub_n: + popl rr0,@r6 + popl rr8,@r5 + subl rr0,rr8 + ldl @r7,rr0 + dec r4 + jr eq,Lend +Loop: popl rr0,@r6 + popl rr8,@r5 + sbc r1,r9 + sbc r0,r8 + inc r7,#4 + ldl @r7,rr0 + dec r4 + jr ne,Loop +Lend: ld r2,r4 ! use 0 already in r4 + ld r3,r4 + adc r2,r2 + ret t |