sys/lib/libkern/arch/alpha/divrem.m4


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198

/*	$OpenBSD: divrem.m4,v 1.3 1996/10/31 00:43:17 niklas Exp $	*/
/*	$NetBSD: divrem.m4,v 1.5 1996/10/17 04:26:25 cgd Exp $	*/

/*
 * Copyright (c) 1994, 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Chris G. Demetriou
 * 
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 * 
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 * 
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*
 * Division and remainder.
 *
 * The use of m4 is modeled after the sparc code, but the algorithm is
 * simple binary long division.
 *
 * Note that the loops could probably benefit from unrolling.
 */

/*
 * M4 Parameters
 * NAME		name of function to generate
 * OP		OP=div: t10 / t11 -> t12; OP=rem: t10 % t11 -> t12
 * S		S=true: signed; S=false: unsigned
 * WORDSIZE	total number of bits
 */

define(A, `t10')
define(B, `t11')
define(RESULT, `t12')

define(BIT, `t0')
define(I, `t1')
define(CC, `t2')
define(T_0, `t3')
ifelse(S, `true', `define(NEG, `t4')')

#include <machine/asm.h>

LEAF(NAME, 0)					/* XXX */
	lda	sp, -64(sp)
	stq	BIT, 0(sp)
	stq	I, 8(sp)
	stq	CC, 16(sp)
	stq	T_0, 24(sp)
ifelse(S, `true',
`	stq	NEG, 32(sp)')
	stq	A, 40(sp)
	stq	B, 48(sp)
	mov	zero, RESULT			/* Initialize result to zero */

ifelse(S, `true',
`
	/* Compute sign of result.  If either is negative, this is easy.  */
	or	A, B, NEG			/* not the sign, but... */
	srl	NEG, WORDSIZE - 1, NEG		/* rather, or of high bits */
	blbc	NEG, Ldoit			/* neither negative? do it! */

ifelse(OP, `div',
`	xor	A, B, NEG			/* THIS is the sign! */
', `	mov	A, NEG				/* sign follows A. */
')
	srl	NEG, WORDSIZE - 1, NEG		/* make negation the low bit. */

	srl	A, WORDSIZE - 1, I		/* is A negative? */
	blbc	I, LnegB			/* no. */
	/* A is negative; flip it. */
ifelse(WORDSIZE, `32', `
	/* top 32 bits may be random junk */
	zap	A, 0xf0, A
')
	subq	zero, A, A
	srl	B, WORDSIZE - 1, I		/* is B negative? */
	blbc	I, Ldoit			/* no. */
LnegB:
	/* B is definitely negative, no matter how we got here. */
ifelse(WORDSIZE, `32', `
	/* top 32 bits may be random junk */
	zap	B, 0xf0, B
')
	subq	zero, B, B
Ldoit:
')
ifelse(WORDSIZE, `32', `
	/*
	 * Clear the top 32 bits of each operand, as they may
	 * sign extension (if negated above), or random junk.
	 */
	zap	A, 0xf0, A
	zap	B, 0xf0, B
')

	/* kill the special cases. */
	beq	B, Ldotrap			/* division by zero! */

	cmpult	A, B, CC			/* A < B? */
	/* RESULT is already zero, from above.  A is untouched. */
	bne	CC, Lret_result

	cmpeq	A, B, CC			/* A == B? */
	cmovne	CC, 1, RESULT
	cmovne	CC, zero, A
	bne	CC, Lret_result

	/*
	 * Find out how many bits of zeros are at the beginning of the divisor.
	 */
LBbits:
	ldiq	T_0, 1				/* I = 0; BIT = 1<<WORDSIZE-1 */
	mov	zero, I
	sll	T_0, WORDSIZE-1, BIT
LBloop:
	and	B, BIT, CC			/* if bit in B is set, done. */
	bne	CC, LAbits
	addq	I, 1, I				/* increment I, shift bit */
	srl	BIT, 1, BIT
	cmplt	I, WORDSIZE-1, CC		/* if I leaves one bit, done. */
	bne	CC, LBloop

LAbits:
	beq	I, Ldodiv			/* If I = 0, divide now.  */
	ldiq	T_0, 1				/* BIT = 1<<WORDSIZE-1 */
	sll	T_0, WORDSIZE-1, BIT

LAloop:
	and	A, BIT, CC			/* if bit in A is set, done. */
	bne	CC, Ldodiv
	subq	I, 1, I				/* decrement I, shift bit */
	srl     BIT, 1, BIT 
	bne	I, LAloop			/* If I != 0, loop again */

Ldodiv:
	sll	B, I, B				/* B <<= i */
	ldiq	T_0, 1
	sll	T_0, I, BIT

Ldivloop:
	cmpult	A, B, CC
	or	RESULT, BIT, T_0
	cmoveq	CC, T_0, RESULT
	subq	A, B, T_0
	cmoveq	CC, T_0, A
	srl	BIT, 1, BIT	
	srl	B, 1, B
	beq	A, Lret_result
	bne	BIT, Ldivloop

Lret_result:
ifelse(OP, `div',
`', `	mov	A, RESULT
')
ifelse(S, `true',
`
	/* Check to see if we should negate it. */
	subqv	zero, RESULT, T_0
	cmovlbs	NEG, T_0, RESULT
')

	ldq	BIT, 0(sp)
	ldq	I, 8(sp)
	ldq	CC, 16(sp)
	ldq	T_0, 24(sp)
ifelse(S, `true',
`	ldq	NEG, 32(sp)')
	ldq	A, 40(sp)
	ldq	B, 48(sp)
	lda	sp, 64(sp)
	ret	zero, (t9), 1

Ldotrap:
	ldiq	a0, -2			/* This is the signal to SIGFPE! */
	call_pal PAL_gentrap
ifelse(OP, `div',
`', `	mov	zero, A			/* so that zero will be returned */
')
	br	zero, Lret_result

END(NAME)