/* --- Copyright University of Sussex 1994. All rights reserved. ----------
 * File:	    S.axpvms/src/amove.s
 * Purpose:
 * Author:          John Gibson, Sep 12 1994
 */

	.title	"amove.o"	;;; must be the object file name

;;; ---------------- ROUTINES TO MOVE MEMORY, ETC ------------------------

#_<

#_INCLUDE 'asm.ph'

>_#


ASM_CODE_PSECT


;;; --- COMPARE ROUTINES ------------------------------------------------

	;;; Compare two byte regions of equal nonzero length
	;;; Args: rt0 = len, rt1 = bptr1, rt2 = bptr2
	;;; Result: rt0 = 1 if same, 0 if not
	;;; Uses rt3 - rt5. Return in rchain.

	.align quad
cmpbytes:
	;;; load unaligned quad from (rt1) into rt3
	ldq_u	rt3, (rt1)
	ldq_u	rt4, 7(rt1)
	extql	rt3, rt1, rt3
	extqh	rt4, rt1, rt4
	or	rt3, rt4, rt3	;;; quad1 in rt3

	;;; load unaligned quad from (rt2) into rt4
	ldq_u	rt4, (rt2)
	ldq_u	rt5, 7(rt2)
	extql	rt4, rt2, rt4
	extqh	rt5, rt2, rt5
	or	rt4, rt5, rt4	;;; quad2 in rt4

	;;; compare rt3 and rt4
	xor	rt3, rt4, rt3	;;; zero if equal, nonzero if not
	subl	rt0, #8, rt0	;;; decrement count by 1 quad
	bne	rt3, 2$		;;; stop if bytes not all equal
	lda	rt1, 8(rt1)	;;; step ptrs
	lda	rt2, 8(rt2)
	bgt	rt0, cmpbytes	;;; loop if more to do

	;;; same
	mov	#1, rt0		;;; return 1
	ret	(rchain)

	;;; bytes not all equal -- could be trailing bytes off end of strings
2$:	bge	rt0, 3$		;;; count nonneg -- all within strings
	mskql	rt3, rt0, rt3	;;; zero the bytes off end of strings
3$:	cmpeq	rt3, #0, rt0	;;; result 1 if rt3 now zero
	ret	(rchain)


	;;; _bcmp(_______boffs, _______bptr1, _______bptr2) -> ____bool
	;;; _scmp(_______soffs, _______sptr1, _______sptr2) -> ____bool
	;;; compare two byte/short regions of the same length
	.align quad
DEF_C_LAB (_bcmp)
DEF_C_LAB (_scmp)
	ldl	rt0, 8(rusp)	;;; ______offs
	ldl	rt1, 4(rusp)	;;; ______ptr1
	ldl	rt2, (rusp)	;;; ______ptr2
	lda	rusp, 8(rusp)
	beq	rt0, 1$		;;; return true if zero length
	bsr	rchain, cmpbytes ;;; do compare
	blbc	rt0, 2$		;;; not same
	;;; same
1$:	lda	rt0, _TRUEOFFS(rfalse)
	stl	rt0, (rusp)
	ret	(rret)
2$:	stl	rfalse, (rusp)	;;; else return false
	ret	(rret)


	;;; _cmp(_______woffs, _______wptr1, _______wptr2) -> ____bool
	;;; compare two word regions of the same length
	.align quad
DEF_C_LAB (_cmp)
	ldl	rt0, 8(rusp)	;;; ______offs
	ldl	rt1, 4(rusp)	;;; ______ptr1
	ldl	rt2, (rusp)	;;; ______ptr2
	lda	rusp, 8(rusp)
	beq	rt0, 2$		;;; return if zero length

	.align quad
1$:	ldl	rt3, (rt1)
	ldl	rt4, (rt2)
	lda	rt1, 4(rt1)
	lda	rt2, 4(rt2)
	cmpeq	rt3, rt4, rt3
	subl	rt0, #4, rt0
	blbc	rt3, 3$
	bgt	rt0, 1$

	;;; same
2$:	lda	rt0, _TRUEOFFS(rfalse)
	stl	rt0, (rusp)
	ret	(rret)

3$:	;;; not same
	stl	rfalse, (rusp)	;;; else return false
	ret	(rret)


;;; --- LOCATE CHARACTER ----------------------------------------------------

	;;; _skpc(______bptr, _______boffs, ______byte) -> _____________boffs_or_-1
	;;; skip string for character
	.align quad
DEF_C_LAB (_skpc)
	mov	#255, rt3	;;; all 1s mask for XOR
	br	locskp

	;;; _locc(______bptr, _______boffs, ______byte) -> _____________boffs_or_-1
	;;; search string for character
	.align quad
DEF_C_LAB (_locc)
	clr	rt3		;;; all 0s mask for XOR

locskp:
	ldl	rt1, 8(rusp)	;;; ______bptr
	ldl	rt0, 4(rusp)	;;; _______boffs
	ldl	rt2, (rusp)	;;; ______byte
	lda	rusp, 8(rusp)
	beq	rt0, 2$		;;; return not found if zero length

	;;; dup byte across quadword
	sll	rt2, #8, rt4
	or	rt2, rt4, rt2
	sll	rt2, #16, rt4
	or	rt2, rt4, rt2
	sll	rt2, #32, rt4
	or	rt2, rt4, rt2

1$:	ldq_u	rt4, (rt1)	;;; load src quadword into rt4
	ldq_u	rt5, 7(rt1)
	extql	rt4, rt1, rt4
	extqh	rt5, rt1, rt5
	or	rt4, rt5, rt4

	cmpbge	rt2, rt4, rt5	;;; compare with byte quad in rt2
	cmpbge	rt4, rt2, rt4
	lda	rt1, 8(rt1)
	and	rt4, rt5, rt4	;;; AND results to get 1s for equal bytes
	xor	rt4, rt3, rt4	;;; then XOR with mask for equal/not equal
	subl	rt0, #8, rt0	;;; decrement count
	bne	rt4, 3$		;;; finished if some equal/not equal
	bgt	rt0, 1$		;;; loop if more to compare

	;;; not found/ nothing else found
2$:	mov	#-1, rt0	;;; return -1
	stl	rt0, (rusp)
	ret	(rret)

	;;; find first bit 0 - 7 set in rt4
3$:	mov	#-8, rt5	;;; -8 because rt1 was stepped
	and	rt4, #15, rt6
	bne	rt6, 4$
	addl	rt5, #4, rt5
	srl	rt4, #4, rt4
4$:	and	rt4, #3, rt6
	bne	rt6, 5$
	addl	rt5, #2, rt5
	srl	rt4, #2, rt4
5$:	blbs	rt4, 6$
	addl	rt5, #1, rt5
	;;; (offset from quad start) - 8 now in rt5
6$:	cmple	rt0, rt5, rt0	;;; if -ve overshoot on count <= rt5,
	blbs	rt0, 2$		;;; equal byte was not in string

	;;; char/something else found
	ldl	rt0, (rusp)	;;; initial ptr again
	addl	rt1, rt5, rt1	;;; ptr to char
	subl	rt1, rt0, rt0	;;; offset to char
	stl	rt0, (rusp)	;;; return it
	ret	(rret)

;;;---------------------------------------------------------------------------

	;;; _mtchc(______bptr, _______boffs, ________subptr, _________suboffs) -> _____________boffs_or_-1
	;;; search string for substring
	.align quad
DEF_C_LAB (_mtchc)
	ldl	rt1, 12(rusp)	;;; ______bptr
	lda	rusp, 12(rusp)	;;; set stack for 1 result
	stl	rt1, -24(rusp)	;;; save original ______bptr
	ldl	rt3, -12(rusp)	;;; _________suboffs
	ldl	rt0, -4(rusp)	;;; _______boffs
	beq	rt3, 9$		;;; return 0 offset if substring 0 length
	subl	rt0, rt3, rt0
	ldl	rt2, -8(rusp)	;;; ________subptr
	addl	rt0, #1, rt0	;;; _______boffs - _________suboffs + 1 = len to use
	ble	rt0, 3$		;;; return if substring too long
	;;; dup first byte of substring across quadword in rt6
	ldq_u	rt3, (rt2)
	extbl	rt3, rt2, rt3	;;; first byte
	sll	rt3, #8, rt2
	or	rt3, rt2, rt3
	sll	rt3, #16, rt2
	or	rt3, rt2, rt3
	sll	rt3, #32, rt2
	or	rt3, rt2, rt6

1$:	ldq_u	rt2, (rt1)	;;; load next ______bptr quadword into rt2
	ldq_u	rt3, 7(rt1)
	extql	rt2, rt1, rt2
	extqh	rt3, rt1, rt3
	or	rt2, rt3, rt2

	cmpbge	rt2, rt6, rt3	;;; compare with byte quad in rt6
	cmpbge	rt6, rt2, rt2
	lda	rt1, 8(rt1)	;;; step ______bptr
	and	rt2, rt3, rt2	;;; AND results to get 1s for equal bytes
	subl	rt0, #8, rt0	;;; decrement _______boffs
	bne	rt2, 4$		;;; break if some equal
2$:	bgt	rt0, 1$		;;; else loop if more to compare

	;;; not found
3$:	mov	#-1, rt0	;;; return -1
	stl	rt0, (rusp)
	ret	(rret)

4$:	mov	#-8, rt3	;;; -8 because rt1 was stepped
	;;; find first bit 0 - 7 set in rt2
5$:	and	rt2, #15, rt4
	bne	rt4, 6$
	addl	rt3, #4, rt3
	srl	rt2, #4, rt2
6$:	and	rt2, #3, rt4
	bne	rt4, 7$
	addl	rt3, #2, rt3
	srl	rt2, #2, rt2
7$:	blbs	rt2, 8$
	addl	rt3, #1, rt3
	srl	rt2, #1, rt2

	;;; (offset from quad start) - 8 now in rt3
8$:	cmple	rt0, rt3, rt4	;;; _______boffs neg and <= rt3?
	blbs	rt4, 3$		;;; if so equal byte was not in string
	;;; first char of substring found -- compare with substring
	stl	rt2, -16(rusp)	;;; save bitmask
	stl	rt3, -20(rusp)	;;; save offset within quad
	stl	rt0, -4(rusp)	;;; save _______boffs
	stl	rt1, (rusp)	;;; save ______bptr
	ldl	rt0, -12(rusp)	;;; _________suboffs is length to compare
	addl	rt1, rt3, rt1	;;; ptr to char
	ldl	rt2, -8(rusp)	;;; ________subptr to compare with
	bsr	rchain, cmpbytes ;;; do compare -- args in rt0,rt1,rt2
	ldl	rt1, (rusp)	;;; recover ______bptr
	ldl	rt3, -20(rusp)	;;; recover offset within quad
	blbs	rt0, 9$		;;; br if found substring
	;;; didn't match -- try next first char found
	ldl	rt2, -16(rusp)	;;; recover bitmask
	ldl	rt0, -4(rusp)	;;; recover _______boffs
	bic	rt2, #1, rt2	;;; clear last bit
	beq	rt2, 2$		;;; br if no more bits in this quad
	br	5$		;;; else find next bit set

9$:	;;; found substring -- return offset of after substring
	ldl	rt0, -12(rusp)	;;; _________suboffs
	ldl	rt2, -24(rusp)	;;; original ______bptr
	addl	rt1, rt3, rt1	;;; ptr to first char
	addl	rt1, rt0, rt1	;;; ptr to after last char
	subl	rt1, rt2, rt1	;;; offset to after last char
	stl	rt1, (rusp)	;;; return it
	ret	(rret)


;;; --- MOVE ROUTINES ------------------------------------------------------

	;;; _bmove(_______boffs, __________bptr_src, __________bptr_dst) -> __________next_dst
	;;; _smove(_______soffs, __________sptr_src, __________sptr_dst) -> __________next_dst
	;;; move bytes or shorts
	.align quad
DEF_C_LAB (_bmove)
DEF_C_LAB (_smove)
	ldl	rt2, (rusp)	;;; _________ptr_dst
	ldl	rt0, 8(rusp)	;;; ______offs
	ldl	rt1, 4(rusp)	;;; _________ptr_src
	addl	rt2, rt0, rt4	;;; next destination
	lda	rusp, 8(rusp)
	subl	rt2, rt1, rt3	;;; dst - src
	stl	rt4, (rusp)	;;; next destination result
	beq	rt0, 3$		;;; return if nothing to move
	blt	rt3, 5$ 	;;; br if moving down
	beq	rt3, 3$		;;; return if source = destination

	;;; moving up
	addl	rt1, rt0, rt1	;;; src lim
	mov	rt4, rt2	;;; dest lim

1$:	subl	rt0, #8, rt0	;;; decrement count by 1 quad
	lda	rt1, -8(rt1)	;;; step ptrs back 1 quad
	lda	rt2, -8(rt2)
	blt	rt0, 7$		;;; br if count neg -- odd bytes at end

	;;; load unaligned quad from (rt1) into rt3
	ldq_u	rt3, (rt1)
	ldq_u	rt4, 7(rt1)
	extql	rt3, rt1, rt3
	extqh	rt4, rt1, rt4
	or	rt3, rt4, rt3	;;; the src quad in rt3

	;;; store src quad rt3 at unaligned dst (rt2)
2$:	ldq_u	rt5, 7(rt2)
	ldq_u	rt4, (rt2)
	insqh	rt3, rt2, rt6
	insql	rt3, rt2, rt3
	mskqh	rt5, rt2, rt5
	mskql	rt4, rt2, rt4
	or	rt5, rt6, rt5
	or	rt3, rt4, rt3
	stq_u	rt5, 7(rt2)
	stq_u	rt3, (rt2)

	bgt	rt0, 1$		;;; loop if more to go
3$:	ret	(rret)


	;;; moving down
5$:	subl	rt0, #8, rt0	;;; decrement count by 1 quad
	blt	rt0, 8$		;;; odd bytes at end

	;;; load unaligned quad from (rt1) into rt3
	ldq_u	rt3, (rt1)
	ldq_u	rt4, 7(rt1)
	extql	rt3, rt1, rt3
	extqh	rt4, rt1, rt4
	or	rt3, rt4, rt3	;;; the src quad in rt3

6$:	;;; store src quad rt3 at unaligned dst (rt2)
	ldq_u	rt5, 7(rt2)
	ldq_u	rt4, (rt2)
	insqh	rt3, rt2, rt6
	insql	rt3, rt2, rt3
	mskqh	rt5, rt2, rt5
	mskql	rt4, rt2, rt4
	or	rt5, rt6, rt5
	or	rt3, rt4, rt3
	stq_u	rt5, 7(rt2)
	stq_u	rt3, (rt2)

	lda	rt1, 8(rt1)	;;; step ptrs
	lda	rt2, 8(rt2)
	bgt	rt0, 5$		;;; loop if more to go
	ret	(rret)


	;;; move odd bytes at end (moving up)
7$:	subq	rt1, rt0, rt1	;;; get ptrs to start of bytes
	subq	rt2, rt0, rt2

	;;; move 1 - 7 bytes at end
8$:	addl	rt0, #8, rt0	;;; _N = 1 - 7

	addl	rt1, rt0, rt3
	ldq_u	rt4, (rt1)
	ldq_u	rt5, -1(rt3)
	extql	rt4, rt1, rt4
	extqh	rt5, rt1, rt5
	or	rt4, rt5, rt4
	mskql	rt4, rt0, rt3	;;; the _N src bytes aligned at the bottom

	addl	rt2, rt0, rt1
	mov	#-1, rt4
	mskql	rt4, rt0, rt0	;;; mask for _N bytes

	ldq_u	rt4, -1(rt1)
	ldq_u	rt5, (rt2)

	insqh	rt0, rt2, rchain
	insqh	rt3, rt2, rt6
	bic	rt4, rchain, rt4
	or	rt4, rt6, rt4
	stq_u	rt4, -1(rt1)

	insql	rt0, rt2, rchain
	insql	rt3, rt2, rt6
	bic	rt5, rchain, rt5
	or	rt5, rt6, rt5
	stq_u	rt5, (rt2)

	ret	(rret)



	;;; _move(_______woffs, __________wptr_src, __________wptr_dst) -> __________next_dst
	;;; move words (i.e. Alpha longwords). _moveq is the same.

	.align quad
DEF_C_LAB (_move)
DEF_C_LAB (_moveq)
	ldl	rt2, (rusp)	;;; __________wptr_dst
	ldl	rt0, 8(rusp)	;;; _______woffs
	ldl	rt1, 4(rusp)	;;; __________wptr_src
	addl	rt2, rt0, rt4	;;; next destination
	lda	rusp, 8(rusp)
	stl	rt4, (rusp)	;;; next destination result
	;;; drop thru to movwords

	;;; Do word move.
	;;; word offset to move in rt0, source in rt1, dest in rt2
	;;; Uses rt3, rt4, rt5
movwords:
	cmple	rt0, #4, rt3	;;; less than 2 words to move?
	subl	rt2, rt1, rt4	;;; dst - src
	blbs	rt3, 8$		;;; br if less than 2 words

	;;; can use forward move if moving down or dest beyond end of src
	;;; (mem system anticipates moving forward thru mem rather than back)
	cmpule	rt0, rt4, rt3 	;;; woffs unsigned <= dst - src ?
	blbc	rt3, movback	;;; no -- do backward move

	;;; NO OVERLAP -- DO THE MOVE FORWARDS
	and	rt2, #4, rt3	;;; test alignment on dst
	beq	rt3, 1$		;;; br if dst is quad aligned
	;;; dst not quad aligned -- move 1 word first
	ldl	rt3, (rt1)
	lda	rt2, 4(rt2)
	lda	rt1, 4(rt1)
	stl	rt3, -4(rt2)	;;; rt2 now aligned
	subl	rt0, #4, rt0
1$:	and	rt1, #4, rt3	;;; test alignment on src
	beq	rt3, 6$		;;; br if src also now quad aligned

	;;; dst aligned, src not
	ldl	rt3, (rt1)	;;; initial lo part
	subl	rt0, #8, rt0
	extll	rt3, #0, rt3	;;; zero hi part
	br	3$

	.align quad
2$:	ldq	rt4, 4(rt1)
	subl	rt0, #8, rt0
	lda	rt2, 8(rt2)
	sll	rt4, #32, rt5	;;; lo -> hi
	lda	rt1, 8(rt1)
	or	rt5, rt3, rt5
	srl	rt4, #32, rt3	;;; previous hi -> next lo
	stq	rt5, -8(rt2)
3$:	bgt	rt0, 2$

	blt	rt0, 4$
	ldl	rt4, 4(rt1)
	stl	rt4, 4(rt2)
4$:	stl	rt3, (rt2)
	ret	(rret)

	;;; both quad aligned
	.align quad
5$:	stq	rt3, -8(rt2)
	lda	rt1, 8(rt1)
6$:	subl	rt0, #8, rt0
	ldq	rt3, (rt1)
	lda	rt2, 8(rt2)
	bgt	rt0, 5$

	blt	rt0, 7$
	stq	rt3, -8(rt2)
	ret	(rret)
7$:	stl	rt3, -8(rt2)
	ret	(rret)

	;;; LESS THAN 2 WORDS TO MOVE
8$:	beq	rt0, 9$		;;; return if nothing to move
	ldl	rt3, (rt1)	;;; else move 1 word and return
	stl	rt3, (rt2)
9$:	ret	(rret)


	;;; SRC AND DST OVERLAP -- DO THE MOVE BACKWARDS
	.align quad
movback:
	beq	rt4, 10$	;;; return if source = destination
	addl	rt1, rt0, rt1	;;; src lim
	addl	rt2, rt0, rt2	;;; dst lim
	and	rt2, #4, rt3	;;; test alignment on dst lim
	beq	rt3, 3$		;;; br if dst lim is quad aligned
	;;; dst lim not quad aligned -- move 1 word first
	ldl	rt3, -4(rt1)
	lda	rt2, -4(rt2)
	lda	rt1, -4(rt1)
	stl	rt3, (rt2)	;;; rt2 now aligned
	subl	rt0, #4, rt0
3$:	and	rt1, #4, rt3	;;; test alignment on src lim
	beq	rt3, 8$		;;; br if src lim also now quad aligned

	;;; dst lim aligned, src not
	ldl	rt3, -4(rt1)	;;; initial lo part
	subl	rt0, #8, rt0
	sll	rt3, #32, rt3	;;; previous lo -> next hi
	br	5$

	.align quad
4$:	ldq	rt4, -12(rt1)
	subl	rt0, #8, rt0
	lda	rt2, -8(rt2)
	srl	rt4, #32, rt5	;;; hi -> lo
	lda	rt1, -8(rt1)
	or	rt5, rt3, rt5	;;; or with hi
	sll	rt4, #32, rt3	;;; previous lo -> next hi
	stq	rt5, (rt2)
5$:	bgt	rt0, 4$

	srl	rt3, #32, rt4
	blt	rt0, 6$
	ldl	rt3, -8(rt1)
	stl	rt3, -8(rt2)
6$:	stl	rt4, -4(rt2)
	ret	(rret)

	;;; both quad aligned
	.align quad
7$:	stq	rt3, (rt2)
	lda	rt1, -8(rt1)
8$:	subl	rt0, #8, rt0
	ldq	rt3, -8(rt1)
	lda	rt2, -8(rt2)
	bgt	rt0, 7$

	blt	rt0, 9$
	stq	rt3, (rt2)
	ret	(rret)
9$:	srl	rt3, #32, rt4
	stl	rt4, 4(rt2)
10$:	ret	(rret)


;;; --- FILL ROUTINES ----------------------------------------------------

	;;; _bfill(______byte, _______boffs, ______bptr)
	;;; fill a region of bytes with a given byte

	.align quad
DEF_C_LAB (_bfill)
	ldl	rt1, 8(rusp)	;;; ______byte
	ldl	rt0, 4(rusp)	;;; _______boffs
	ldl	rt2, (rusp)	;;; ______bptr
	lda	rusp, 12(rusp)
	beq	rt0, 3$		;;; return if nothing to do
	;;; dup byte across quadword
	sll	rt1, #8, rt3
	or	rt1, rt3, rt1
	sll	rt1, #16, rt3
	or	rt1, rt3, rt1
	sll	rt1, #32, rt3
	or	rt1, rt3, rt1
	insql	rt1, rt2, rt3	;;; split to rt3, rt4
	insqh	rt1, rt2, rt4

1$:	subl	rt0, #8, rt0	;;; decrement count by 1 quad
	blt	rt0, 4$		;;; not end or no odd bytes at end

2$:	;;; store src quad rt1 split in rt3, rt4 at unaligned dst (rt2)
	ldq_u	rt6, 7(rt2)
	ldq_u	rt5, (rt2)
	mskqh	rt6, rt2, rt6
	mskql	rt5, rt2, rt5
	or	rt6, rt4, rt6
	or	rt5, rt3, rt5
	stq_u	rt6, 7(rt2)
	stq_u	rt5, (rt2)

	lda	rt2, 8(rt2)	;;; step ptr
	bgt	rt0, 1$		;;; loop if more to go
3$:	ret	(rret)

	;;; insert odd trailing dst bytes in src quad
4$:	ldq_u	rt5, (rt2)
	ldq_u	rt6, 7(rt2)
	extql	rt5, rt2, rt5
	extqh	rt6, rt2, rt6
	or	rt5, rt6, rt5	;;; last dst quad in rt5
	mskql	rt1, rt0, rt1	;;; src bytes wanted
	mskqh	rt5, rt0, rt5	;;; dst bytes wanted
	or	rt1, rt5, rt1	;;; correct quad to store
	insql	rt1, rt2, rt3	;;; split it to rt3, rt4
	insqh	rt1, rt2, rt4
	br	2$		;;; store it


	;;; _fill(______word, _______woffs, ______wptr)
	;;; fill a region of words with a given word

	.align quad
DEF_C_LAB (_fill)
	ldl	rt2, (rusp)	;;; ______wptr
	ldl	rt1, 4(rusp)	;;; _______woffs
	ldl	rt0, 8(rusp)	;;; ______word
	br	2$

	.align quad
1$:	stl	rt0, (rt2)	;;; store word at dst
	subl	rt1, #4, rt1
	lda	rt2, 4(rt2)
2$:	bne	rt1, 1$

	lda	rusp, 12(rusp)
	ret	(rret)

;;;--------------------------------------------------------------------------


	;;; _move_userstack(_______woffs)
	;;; move the user stack up or down by word offset _______woffs

	.align quad
DEF_C_LAB (_move_userstack)
	ldl	rt2, (rusp)		;;; _______woffs
	ldl	rt3, _SVB_OFFS(_userhi)(rsvb)	;;; _userhi value
	lda	rusp, 4(rusp)
	subl	rt3, rusp, rt0		;;; length of u/s to move in rt0
	mov	rusp, rt1		;;; u/s top -- src for move
	addl	rt3, rt2, rt3		;;; _userhi + _______woffs = new _userhi
	addl	rusp, rt2, rt2		;;; top + _______woffs = destination for move
	stl	rt3, _SVB_OFFS(_userhi)(rsvb)	;;; update _userhi
	mov	rt2, rusp		;;; and new stack top
	br	movwords		;;; do the move (return to caller)


;;; --- ROUTINES TO HANDLE USERSTACK SWAPPING -------------------------------

	;;; _ussave(_______woffs, _________saveptr)
	;;; save and erase a given offset at the end of the userstack

	.align quad
DEF_C_LAB (_ussave)
	;;; copy the area to be saved
	ldl	rt2, (rusp)		;;; _________saveptr = dst for move
	ldl	rt0, 4(rusp)		;;; _______woffs = offset to move
	ldl	rt6, _SVB_OFFS(_userhi)(rsvb)	;;; stack base
	lda	rusp, 8(rusp)
	mov	rret, rchain		;;; save return
	subl	rt6, rt0, rt1		;;; _userhi - _______woffs = src for move ...
	mov	rt1, rt6		;;; ... remembered in rt6
	bsr	rret, movwords		;;; save the area

	;;; now shift up the remaining stack contents
	ldl	rt2, _SVB_OFFS(_userhi)(rsvb)	;;; _userhi again
	mov	rusp, rt1		;;; stack top = src for move
	subl	rt6, rt1, rt0		;;; save src - stack top = offs to move
	subl	rt2, rt0, rt2		;;; _userhi - offs = dst for move ...
	mov	rt2, rusp		;;; ... and new stack top afterwards
	mov	rchain, rret		;;; set return to caller
	br	movwords		;;; do the move


	;;; _usrestore(_______woffs, _________restptr)
	;;; restore a given offset at the end of the userstack

	.align quad
DEF_C_LAB (_usrestore)
	;;; shift down the current stack contents to make room
	ldl	rt0, _SVB_OFFS(_userhi)(rsvb)	;;; stack base
	ldl	rt6, 4(rusp)		;;; _______woffs (keep for next part)
	mov	rret, rchain		;;; save return
	subl	rt0, rusp, rt0		;;; _userhi - top = offset to move
	mov	rusp, rt1		;;; stack top = src for move
	subl	rusp, rt6, rt2		;;; top - _______woffs = dst for move ...
	mov	rt2, rusp		;;; ... and new stack top afterwards
	bsr	rret, movwords		;;; save the area

	;;; now move in the area to be restored
	ldl	rt2, _SVB_OFFS(_userhi)(rsvb)	;;; stack base again
	ldl	rt1, (rusp)		;;; _________restptr = src for move
	lda	rusp, 8(rusp)		;;; remove args
	mov	rt6, rt0		;;; _______woffs = offset for move
	subl	rt2, rt0, rt2		;;; _userhi - _______woffs = dst for move
	mov	rchain, rret		;;; set return to caller
	br	movwords		;;; do the move


	;;; _userasund(_______woffs)
	;;; erase a given offset at the end of the userstack

	.align quad
DEF_C_LAB (_userasund)
	ldl	rt2, (rusp)		;;; _______woffs
	ldl	rt0, _SVB_OFFS(_userhi)(rsvb)	;;; stack base
	lda	rt1, 4(rusp)		;;; stack top + 4 = src for move
	addl	rt1, rt2, rt2		;;; plus _______woffs = dst for move ...
	mov	rt2, rusp		;;; ... and new stack top afterwards
	subl	rt0, rt2, rt0		;;; _userhi - new top = size to move
	br	movwords		;;; do the move


;;; ------------------------------------------------------------------------

	;;; _move_callstack(_______woffs, _________limaddr)
	;;; move the callstack from sp to _________limaddr by a signed offset _______woffs

	.align quad
DEF_C_LAB(_move_callstack)
	ldl	rt0, (rusp)	;;; _________limaddr
	ldl	rt2, 4(rusp)	;;; _______woffs
	lda	rusp, 8(rusp)
	subl	rt0, sp, rt0	;;; _________limaddr - sp = offset to move in rt0
	mov	sp, rt1		;;; sp = src for move in rt1
	addl	sp, rt2, rt2	;;; sp + _______woffs = destination in rt2 ...
	mov	rt2, sp		;;; ... and new sp afterwards
	br	movwords	;;; do the move


;;; --- BITFIELD ROUTINES ------------------------------------------------


	;;; access an unsigned bit field
	;;; rt0 = struct address, rt1 = bitoffset, rt2 = field width W,
	;;; returns field value in rt0
	.align quad
DEF_C_LAB (_bfield)
	mov	#64, rt4
	subl	rt4, rt2, rt4		;;; 64-W = field shift-down
	and	rt1, #7, rt3		;;; bitoffs within start byte
	subl	rt4, rt3, rt3		;;; 64-W-bitoffs = field shift-up
	sra	rt1, #3, rt1		;;; trunc bit offset to byte offset
	addl	rt0, rt1, rt0		;;; add to struct addr in rt0
	;;; load quadword from unaligned address in rt0
	ldq_u	rt1, (rt0)
	ldq_u	rt2, 7(rt0)
	extql	rt1, rt0, rt1
	extqh	rt2, rt0, rt2
	or	rt1, rt2, rt0		;;; the quadword containing the field
	;;; align the bitfield
	sll	rt0, rt3, rt0		;;; shift up to reg top
	srl	rt0, rt4, rt0		;;; logical shift down
	ret	(rret)


	;;; access a signed bit field (same except for last shift)
	;;; rt0 = struct address, rt1 = bitoffset, rt2 = field width W,
	;;; returns field value in rt0
	.align quad
DEF_C_LAB (_sbfield)
	mov	#64, rt4
	subl	rt4, rt2, rt4		;;; 64-W = field shift-down
	and	rt1, #7, rt3		;;; bitoffs within start byte
	subl	rt4, rt3, rt3		;;; 64-W-bitoffs = field shift-up
	sra	rt1, #3, rt1		;;; trunc bit offset to byte offset
	addl	rt0, rt1, rt0		;;; add to struct addr in rt0
	;;; load quadword from unaligned address in rt0
	ldq_u	rt1, (rt0)
	ldq_u	rt2, 7(rt0)
	extql	rt1, rt0, rt1
	extqh	rt2, rt0, rt2
	or	rt1, rt2, rt0		;;; the quadword
	;;; align the bitfield
	sll	rt0, rt3, rt0		;;; shift up to reg top
	sra	rt0, rt4, rt0		;;; arithmetic shift down
	ret	(rret)


	;;; update a bit field
	;;; rt0 = struct address, rt1 = bitoffset, rt2 = field width W,
	;;; field updated from value off stack
	.align quad
DEF_C_LAB (_ubfield)
	mov	#64, rt4
	subl	rt4, rt2, rt4		;;; 64-W = field shift-down
	and	rt1, #7, rt3		;;; bitoffs within start byte
	subl	rt4, rt3, rt3		;;; 64-W-bitoffs = field shift-up
	sra	rt1, #3, rt1		;;; trunc bit offset to byte offset
	addl	rt0, rt1, rt0		;;; add to struct addr in rt0
	;;; load quadword from unaligned address in rt0
	ldq_u	rt1, (rt0)
	ldq_u	rt2, 7(rt0)
	extql	rt1, rt0, rt5
	extqh	rt2, rt0, rt6
	or	rt5, rt6, rt5		;;; the quadword containing the field
	;;; clear old field and insert new
	mov	#-1, rt6		;;; all 1s for mask
	sll	rt6, rt4, rt6		;;; shift up by 64-W
	srl	rt6, rt3, rt6		;;; shift down by 64-W-bitoffs = mask
	subl	rt4, rt3, rt4		;;; (64-W)-(64-W-bitoffs) = bitoffs
	ldl	rt3, (rusp)		;;; new value for field
	bic	rt5, rt6, rt5		;;; clear old field in quadword
	lda	rusp, 4(rusp)
	sll	rt3, rt4, rt3		;;; shift up new field by bitoffs
	and	rt3, rt6, rt3		;;; mask new field
	or	rt5, rt3, rt5		;;; insert new field in quadword
	;;; store rt5 quadword at unaligned address (loads still in rt1, rt2)
	insqh	rt5, rt0, rt4
	insql	rt5, rt0, rt3
	mskqh	rt2, r0, rt2
	mskql	rt1, r0, rt1
	or	rt2, rt4, rt2
	or	rt1, rt3, rt1
	stq_u	rt2, 7(rt0)		;;; must store high first
	stq_u	rt1, (rt0)
	ret	(rret)


	.end
