; unsigned long division and modulus routines
;
; written by Kai-Uwe Bloem (I5110401@dbstu1.bitnet).
;
;
; Revision 1.1, kub 03-90
; first version, replaces the appropriate routine from fixnum.s.
; Should be faster in more common cases. Division is done by 68000 divu
; operations if divisor is only 16 bits wide. Otherwise the normal division
; algorithm as described in various papers takes place. The division routine
; delivers the quotient in d0 and the remainder in d1, thus the implementation
; of the modulo operation is trivial.

	text
	even
	xdef	__CXD33

__CXD33:
	movem.l	d0-d3,-(sp)
	subq.w	#4,sp
	clr.l	d0		; prepare result
	move.l	8(sp),d2	; get divisor
	beq.s	9$		; divisor = 0 causes a division trap
	move.l	4(sp),d1	; get dividend
;== case 1) divident < divisor
	cmp.l	d2,d1		; is divident smaller then divisor ?
	bcs.s	8$		; yes, return immediately
;== case 2) divisor has <= 16 significant bits
	tst.w	8(sp)
	bne.s	2$		; divisor has only 16 bits
	move.w	d1,d3		; save dividend
	clr.w	d1		; divide dvd.h by dvs
	swap	d1
	beq.s	1$		; (no division necessary if dividend zero)
	divu.w	d2,d1
1$:	move.w	d1,d0		; save quotient.h
	swap	d0
	move.w	d3,d1		; (d1.h = remainder of prev divu)
	divu.w	d2,d1		; divide dvd.l by dvs
	move.w	d1,d0		; save quotient.l
	clr.w	d1		; get remainder
	swap	d1
	bra.s	8$		; and return
;== case 3) divisor > 16 bits (corollary is dividend > 16 bits, see case 1)
2$:
	moveq.l	#31,d3		; loop count
3$:
	add.l	d1,d1		; shift divident ...
	addx.l	d0,d0		;  ... into d0
	cmp.l	d2,d0		; compare with divisor
	bcs.s	4$
	sub.l	d2,d0		; big enough, subtract
	add.w	#1,d1		; and note bit in result
4$:
	dbra	d3,3$
	exg	d0,d1		; put quotient and remainder in their registers
8$:
	lea	12(sp),sp
	movem.l	(sp)+,d2-d3
	rts
9$:
	divu.w	d2,d1		; cause division trap
	bra.s	8$		; back to user
	end
