summaryrefslogtreecommitdiff
path: root/sys/arch/sparc64
diff options
context:
space:
mode:
authorMark Kettenis <kettenis@cvs.openbsd.org>2013-06-13 19:33:05 +0000
committerMark Kettenis <kettenis@cvs.openbsd.org>2013-06-13 19:33:05 +0000
commit81690cea09a348595634b39f1055aa090f221b36 (patch)
tree007bcdf0e045ac39044922a2f513a37834ff8553 /sys/arch/sparc64
parentf03fa0019e0c57aa68c92b8aeab4c1f60b0568a4 (diff)
Get rid of the VIS-optimized bcopy/bzero code. This has never been enabled,
but did get compiled in. Made locore.s even more intimidating. ok deraadt@
Diffstat (limited to 'sys/arch/sparc64')
-rw-r--r--sys/arch/sparc64/sparc64/locore.s1303
1 files changed, 1 insertions, 1302 deletions
diff --git a/sys/arch/sparc64/sparc64/locore.s b/sys/arch/sparc64/sparc64/locore.s
index 57d646f3774..820ca6311ac 100644
--- a/sys/arch/sparc64/sparc64/locore.s
+++ b/sys/arch/sparc64/sparc64/locore.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.s,v 1.172 2013/06/13 19:11:13 kettenis Exp $ */
+/* $OpenBSD: locore.s,v 1.173 2013/06/13 19:33:04 kettenis Exp $ */
/* $NetBSD: locore.s,v 1.137 2001/08/13 06:10:10 jdolecek Exp $ */
/*
@@ -300,69 +300,6 @@ _C_LABEL(sun4u_mtp_patch_end):
movz %icc, %o0, %sp
.endm
-/*
- * The following routines allow fpu use in the kernel.
- *
- * They allocate a stack frame and use all local regs. Extra
- * local storage can be requested by setting the siz parameter,
- * and can be accessed at %sp+CC64FSZ.
- */
-
- .macro ENABLE_FPU siz
- save %sp, -(CC64FSZ), %sp; ! Allocate a stack frame
- GET_CPUINFO_VA(%l1);
- add %fp, BIAS-FS_SIZE, %l0; ! Allocate a fpstate
- ldx [%l1 + CI_FPPROC], %l2; ! Load fpproc
- andn %l0, BLOCK_SIZE, %l0; ! Align it
- clr %l3; ! NULL fpstate
- brz,pt %l2, 1f; ! fpproc == NULL?
- add %l0, -BIAS-CC64FSZ-(\siz), %sp; ! Set proper %sp
- ldx [%l2 + P_FPSTATE], %l3;
- brz,pn %l3, 1f; ! Make sure we have an fpstate
- mov %l3, %o0;
- call _C_LABEL(savefpstate); ! Save the old fpstate
-1:
- set EINTSTACK-BIAS, %l4; ! Are we on intr stack?
- cmp %sp, %l4;
- bgu,pt %xcc, 1f;
- set INTSTACK-BIAS, %l4;
- cmp %sp, %l4;
- blu %xcc, 1f;
-0:
- sethi %hi(_C_LABEL(proc0)), %l4; ! Yes, use proc0
- ba,pt %xcc, 2f; ! XXXX needs to change to CPUs idle proc
- or %l4, %lo(_C_LABEL(proc0)), %l5;
-1:
- GET_CURPROC(%l5); ! Use curproc
- brz,pn %l5, 0b; nop; ! If curproc is NULL need to use proc0
-2:
- ldx [%l5 + P_FPSTATE], %l6; ! Save old fpstate
- stx %l0, [%l5 + P_FPSTATE]; ! Insert new fpstate
- stx %l5, [%l1 + CI_FPPROC]; ! Set new fpproc
- wr %g0, FPRS_FEF, %fprs ! Enable FPU
- .endm
-
-/*
- * We've saved our possible fpstate, now disable the fpu
- * and continue with life.
- */
-
- .macro RESTORE_FPU
-#ifdef DEBUG
- ldx [%l5 + P_FPSTATE], %l7
- cmp %l7, %l0
- tnz 1
-#endif /* DEBUG */
- stx %l2, [%l1 + CI_FPPROC] ! Restore old fproc
- wr %g0, 0, %fprs ! Disable fpu
- brz,pt %l3, 1f ! Skip if no fpstate
- stx %l6, [%l5 + P_FPSTATE] ! Restore old fpstate
-
- call _C_LABEL(loadfpstate) ! Reload orig fpstate
- mov %l3, %o0
-1:
- .endm
-
.data
.globl _C_LABEL(data_start)
@@ -6481,22 +6418,6 @@ ENTRY(pseg_set)
mov 1, %o0
-/*
- * Use block_disable to turn off block instructions for
- * bcopy/memset
- */
- .data
- .align 8
- .globl block_disable
-block_disable: .xword 1
- .text
-
-#if 0
-#define ASI_STORE ASI_BLK_COMMIT_P
-#else /* 0 */
-#define ASI_STORE ASI_BLK_P
-#endif /* 0 */
-
#if 1
/*
* kernel bcopy/memcpy
@@ -6582,11 +6503,6 @@ Lovbcopy:
* Plenty of data to copy, so try to do it optimally.
*/
2:
-#if 0
- ! If it is big enough, use VIS instructions
- bge Lbcopy_block
- nop
-#endif /* 0 */
Lbcopy_fancy:
!!
@@ -6910,1134 +6826,10 @@ Lbcopy_finish:
Lbcopy_complete:
ret
restore %i1, %g0, %o0
-
-#if 1
-
-/*
- * Block copy. Useful for >256 byte copies.
- *
- * Benchmarking has shown this always seems to be slower than
- * the integer version, so this is disabled. Maybe someone will
- * figure out why sometime.
- */
-
-Lbcopy_block:
- sethi %hi(block_disable), %o3
- ldx [ %o3 + %lo(block_disable) ], %o3
- brnz,pn %o3, Lbcopy_fancy
- !! Make sure our trap table is installed
- set _C_LABEL(trapbase), %o5
- rdpr %tba, %o3
- sub %o3, %o5, %o3
- brnz,pn %o3, Lbcopy_fancy ! No, then don't use block load/store
- nop
-#ifdef _KERNEL
-/*
- * Kernel:
- *
- * Here we use VIS instructions to do a block clear of a page.
- * But before we can do that we need to save and enable the FPU.
- * The last owner of the FPU registers is fpproc, and
- * fpproc->p_md.md_fpstate is the current fpstate. If that's not
- * null, call savefpstate() with it to store our current fp state.
- *
- * Next, allocate an aligned fpstate on the stack. We will properly
- * nest calls on a particular stack so this should not be a problem.
- *
- * Now we grab either curproc (or if we're on the interrupt stack
- * proc0). We stash its existing fpstate in a local register and
- * put our new fpstate in curproc->p_md.md_fpstate. We point
- * fpproc at curproc (or proc0) and enable the FPU.
- *
- * If we are ever preempted, our FPU state will be saved in our
- * fpstate. Then, when we're resumed and we take an FPDISABLED
- * trap, the trap handler will be able to fish our FPU state out
- * of curproc (or proc0).
- *
- * On exiting this routine we undo the damage: restore the original
- * pointer to curproc->p_md.md_fpstate, clear our fpproc, and disable
- * the MMU.
- *
- *
- * Register usage, Kernel only (after save):
- *
- * %i0 src
- * %i1 dest
- * %i2 size
- *
- * %l0 XXXX DEBUG old fpstate
- * %l1 fpproc (hi bits only)
- * %l2 orig fpproc
- * %l3 orig fpstate
- * %l5 curproc
- * %l6 old fpstate
- *
- * Register ussage, Kernel and user:
- *
- * %g1 src (retval for memcpy)
- *
- * %o0 src
- * %o1 dest
- * %o2 end dest
- * %o5 last safe fetchable address
- */
-
- ENABLE_FPU 0
- mov %i0, %o0 ! Src addr.
- mov %i1, %o1 ! Store our dest ptr here.
- mov %i2, %o2 ! Len counter
-#endif /* _KERNEL */
-
- !!
- !! First align the output to a 64-bit entity
- !!
-
- mov %o1, %g1 ! memcpy retval
- add %o0, %o2, %o5 ! End of source block
-
- andn %o0, 7, %o3 ! Start of block
- dec %o5
- fzero %f0
-
- andn %o5, BLOCK_ALIGN, %o5 ! Last safe addr.
- ldd [%o3], %f2 ! Load 1st word
-
- dec 8, %o3 ! Move %o3 1 word back
- btst 1, %o1
- bz 4f
-
- mov -7, %o4 ! Lowest src addr possible
- alignaddr %o0, %o4, %o4 ! Base addr for load.
-
- cmp %o3, %o4
- be,pt %xcc, 1f ! Already loaded?
- mov %o4, %o3
- fmovd %f2, %f0 ! No. Shift
- ldd [%o3+8], %f2 ! And load
-1:
-
- faligndata %f0, %f2, %f4 ! Isolate 1st byte
-
- stda %f4, [%o1] ASI_FL8_P ! Store 1st byte
- inc 1, %o1 ! Update address
- inc 1, %o0
- dec 1, %o2
-4:
- btst 2, %o1
- bz 4f
-
- mov -6, %o4 ! Calculate src - 6
- alignaddr %o0, %o4, %o4 ! calculate shift mask and dest.
-
- cmp %o3, %o4 ! Addresses same?
- be,pt %xcc, 1f
- mov %o4, %o3
- fmovd %f2, %f0 ! Shuffle data
- ldd [%o3+8], %f2 ! Load word 0
-1:
- faligndata %f0, %f2, %f4 ! Move 1st short low part of f8
-
- stda %f4, [%o1] ASI_FL16_P ! Store 1st short
- dec 2, %o2
- inc 2, %o1
- inc 2, %o0
-4:
- brz,pn %o2, Lbcopy_blockfinish ! XXXX
-
- btst 4, %o1
- bz 4f
-
- mov -4, %o4
- alignaddr %o0, %o4, %o4 ! calculate shift mask and dest.
-
- cmp %o3, %o4 ! Addresses same?
- beq,pt %xcc, 1f
- mov %o4, %o3
- fmovd %f2, %f0 ! Shuffle data
- ldd [%o3+8], %f2 ! Load word 0
-1:
- faligndata %f0, %f2, %f4 ! Move 1st short low part of f8
-
- st %f5, [%o1] ! Store word
- dec 4, %o2
- inc 4, %o1
- inc 4, %o0
-4:
- brz,pn %o2, Lbcopy_blockfinish ! XXXX
- !!
- !! We are now 32-bit aligned in the dest.
- !!
-Lbcopy_block_common:
-
- mov -0, %o4
- alignaddr %o0, %o4, %o4 ! base - shift
-
- cmp %o3, %o4 ! Addresses same?
- beq,pt %xcc, 1f
- mov %o4, %o3
- fmovd %f2, %f0 ! Shuffle data
- ldd [%o3+8], %f2 ! Load word 0
-1:
- add %o3, 8, %o0 ! now use %o0 for src
-
- !!
- !! Continue until our dest is block aligned
- !!
-Lbcopy_block_aligned8:
-1:
- brz %o2, Lbcopy_blockfinish
- btst BLOCK_ALIGN, %o1 ! Block aligned?
- bz 1f
-
- faligndata %f0, %f2, %f4 ! Generate result
- deccc 8, %o2
- ble,pn %icc, Lbcopy_blockfinish ! Should never happen
- fmovd %f4, %f48
-
- std %f4, [%o1] ! Store result
- inc 8, %o1
-
- fmovd %f2, %f0
- inc 8, %o0
- ba,pt %xcc, 1b ! Not yet.
- ldd [%o0], %f2 ! Load next part
-Lbcopy_block_aligned64:
-1:
-
-/*
- * 64-byte aligned -- ready for block operations.
- *
- * Here we have the destination block aligned, but the
- * source pointer may not be. Sub-word alignment will
- * be handled by faligndata instructions. But the source
- * can still be potentially aligned to 8 different words
- * in our 64-bit block, so we have 8 different copy routines.
- *
- * Once we figure out our source alignment, we branch
- * to the appropriate copy routine, which sets up the
- * alignment for faligndata and loads (sets) the values
- * into the source registers and does the copy loop.
- *
- * When were down to less than 1 block to store, we
- * exit the copy loop and execute cleanup code.
- *
- * Block loads and stores are not properly interlocked.
- * Stores save one reg/cycle, so you can start overwriting
- * registers the cycle after the store is issued.
- *
- * Block loads require a block load to a different register
- * block or a membar #Sync before accessing the loaded
- * data.
- *
- * Since the faligndata instructions may be offset as far
- * as 7 registers into a block (if you are shifting source
- * 7 -> dest 0), you need 3 source register blocks for full
- * performance: one you are copying, one you are loading,
- * and one for interlocking. Otherwise, we would need to
- * sprinkle the code with membar #Sync and lose the advantage
- * of running faligndata in parallel with block stores. This
- * means we are fetching a full 128 bytes ahead of the stores.
- * We need to make sure the prefetch does not inadvertently
- * cross a page boundary and fault on data that we will never
- * store.
- *
- */
-#if 1
- and %o0, BLOCK_ALIGN, %o3
- srax %o3, 3, %o3 ! Isolate the offset
-
- brz %o3, L100 ! 0->0
- btst 4, %o3
- bnz %xcc, 4f
- btst 2, %o3
- bnz %xcc, 2f
- btst 1, %o3
- ba,pt %xcc, L101 ! 0->1
- nop /* XXX spitfire bug */
-2:
- bz %xcc, L102 ! 0->2
- nop
- ba,pt %xcc, L103 ! 0->3
- nop /* XXX spitfire bug */
-4:
- bnz %xcc, 2f
- btst 1, %o3
- bz %xcc, L104 ! 0->4
- nop
- ba,pt %xcc, L105 ! 0->5
- nop /* XXX spitfire bug */
-2:
- bz %xcc, L106 ! 0->6
- nop
- ba,pt %xcc, L107 ! 0->7
- nop /* XXX spitfire bug */
-#else /* 1 */
-
- !!
- !! Isolate the word offset, which just happens to be
- !! the slot in our jump table.
- !!
- !! This is 6 instructions, most of which cannot be paired,
- !! which is about the same as the above version.
- !!
- rd %pc, %o4
-1:
- and %o0, 0x31, %o3
- add %o3, (Lbcopy_block_jmp - 1b), %o3
- jmpl %o4 + %o3, %g0
- nop
-
- !!
- !! Jump table
- !!
-
-Lbcopy_block_jmp:
- ba,a,pt %xcc, L100
- nop
- ba,a,pt %xcc, L101
- nop
- ba,a,pt %xcc, L102
- nop
- ba,a,pt %xcc, L103
- nop
- ba,a,pt %xcc, L104
- nop
- ba,a,pt %xcc, L105
- nop
- ba,a,pt %xcc, L106
- nop
- ba,a,pt %xcc, L107
- nop
-#endif /* 1 */
-
- !!
- !! Source is block aligned.
- !!
- !! Just load a block and go.
- !!
-L100:
-#ifdef RETURN_NAME
- sethi %hi(1f), %g1
- ba,pt %icc, 2f
- or %g1, %lo(1f), %g1
-1:
- .asciz "L100"
- .align 8
-2:
-#endif /* RETURN_NAME */
- fmovd %f0 , %f62
- ldda [%o0] ASI_BLK_P, %f0
- inc BLOCK_SIZE, %o0
- cmp %o0, %o5
- bleu,a,pn %icc, 3f
- ldda [%o0] ASI_BLK_P, %f16
- ba,pt %icc, 3f
- membar #Sync
-
- .align 32 ! ICache align.
-3:
- faligndata %f62, %f0, %f32
- inc BLOCK_SIZE, %o0
- faligndata %f0, %f2, %f34
- dec BLOCK_SIZE, %o2
- faligndata %f2, %f4, %f36
- cmp %o0, %o5
- faligndata %f4, %f6, %f38
- faligndata %f6, %f8, %f40
- faligndata %f8, %f10, %f42
- faligndata %f10, %f12, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f12, %f14, %f46
-
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f48
- membar #Sync
-2:
- stda %f32, [%o1] ASI_STORE
- faligndata %f14, %f16, %f32
- inc BLOCK_SIZE, %o0
- faligndata %f16, %f18, %f34
- inc BLOCK_SIZE, %o1
- faligndata %f18, %f20, %f36
- dec BLOCK_SIZE, %o2
- faligndata %f20, %f22, %f38
- cmp %o0, %o5
- faligndata %f22, %f24, %f40
- faligndata %f24, %f26, %f42
- faligndata %f26, %f28, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f28, %f30, %f46
-
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f0
- membar #Sync
-2:
- stda %f32, [%o1] ASI_STORE
- faligndata %f30, %f48, %f32
- inc BLOCK_SIZE, %o0
- faligndata %f48, %f50, %f34
- inc BLOCK_SIZE, %o1
- faligndata %f50, %f52, %f36
- dec BLOCK_SIZE, %o2
- faligndata %f52, %f54, %f38
- cmp %o0, %o5
- faligndata %f54, %f56, %f40
- faligndata %f56, %f58, %f42
- faligndata %f58, %f60, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f60, %f62, %f46
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16 ! Increment is at top
- membar #Sync
-2:
- stda %f32, [%o1] ASI_STORE
- ba 3b
- inc BLOCK_SIZE, %o1
-
- !!
- !! Source at BLOCK_ALIGN+8
- !!
- !! We need to load almost 1 complete block by hand.
- !!
-L101:
-#ifdef RETURN_NAME
- sethi %hi(1f), %g1
- ba,pt %icc, 2f
- or %g1, %lo(1f), %g1
-1:
- .asciz "L101"
- .align 8
-2:
-#endif /* RETURN_NAME */
-! fmovd %f0, %f0 ! Hoist fmovd
- ldd [%o0], %f2
- inc 8, %o0
- ldd [%o0], %f4
- inc 8, %o0
- ldd [%o0], %f6
- inc 8, %o0
- ldd [%o0], %f8
- inc 8, %o0
- ldd [%o0], %f10
- inc 8, %o0
- ldd [%o0], %f12
- inc 8, %o0
- ldd [%o0], %f14
- inc 8, %o0
-
- cmp %o0, %o5
- bleu,a,pn %icc, 3f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-3:
- faligndata %f0, %f2, %f32
- inc BLOCK_SIZE, %o0
- faligndata %f2, %f4, %f34
- cmp %o0, %o5
- faligndata %f4, %f6, %f36
- dec BLOCK_SIZE, %o2
- faligndata %f6, %f8, %f38
- faligndata %f8, %f10, %f40
- faligndata %f10, %f12, %f42
- faligndata %f12, %f14, %f44
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f48
- membar #Sync
-2:
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f14, %f16, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f16, %f18, %f32
- inc BLOCK_SIZE, %o0
- faligndata %f18, %f20, %f34
- inc BLOCK_SIZE, %o1
- faligndata %f20, %f22, %f36
- cmp %o0, %o5
- faligndata %f22, %f24, %f38
- dec BLOCK_SIZE, %o2
- faligndata %f24, %f26, %f40
- faligndata %f26, %f28, %f42
- faligndata %f28, %f30, %f44
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f0
- membar #Sync
-2:
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f30, %f48, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f48, %f50, %f32
- inc BLOCK_SIZE, %o0
- faligndata %f50, %f52, %f34
- inc BLOCK_SIZE, %o1
- faligndata %f52, %f54, %f36
- cmp %o0, %o5
- faligndata %f54, %f56, %f38
- dec BLOCK_SIZE, %o2
- faligndata %f56, %f58, %f40
- faligndata %f58, %f60, %f42
- faligndata %f60, %f62, %f44
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f62, %f0, %f46
-
- stda %f32, [%o1] ASI_STORE
- ba 3b
- inc BLOCK_SIZE, %o1
-
- !!
- !! Source at BLOCK_ALIGN+16
- !!
- !! We need to load 6 doubles by hand.
- !!
-L102:
-#ifdef RETURN_NAME
- sethi %hi(1f), %g1
- ba,pt %icc, 2f
- or %g1, %lo(1f), %g1
-1:
- .asciz "L102"
- .align 8
-2:
-#endif /* RETURN_NAME */
- ldd [%o0], %f4
- inc 8, %o0
- fmovd %f0, %f2 ! Hoist fmovd
- ldd [%o0], %f6
- inc 8, %o0
-
- ldd [%o0], %f8
- inc 8, %o0
- ldd [%o0], %f10
- inc 8, %o0
- ldd [%o0], %f12
- inc 8, %o0
- ldd [%o0], %f14
- inc 8, %o0
-
- cmp %o0, %o5
- bleu,a,pn %icc, 3f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-3:
- faligndata %f2, %f4, %f32
- inc BLOCK_SIZE, %o0
- faligndata %f4, %f6, %f34
- cmp %o0, %o5
- faligndata %f6, %f8, %f36
- dec BLOCK_SIZE, %o2
- faligndata %f8, %f10, %f38
- faligndata %f10, %f12, %f40
- faligndata %f12, %f14, %f42
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f48
- membar #Sync
-2:
- faligndata %f14, %f16, %f44
-
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f16, %f18, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f18, %f20, %f32
- inc BLOCK_SIZE, %o0
- faligndata %f20, %f22, %f34
- inc BLOCK_SIZE, %o1
- faligndata %f22, %f24, %f36
- cmp %o0, %o5
- faligndata %f24, %f26, %f38
- dec BLOCK_SIZE, %o2
- faligndata %f26, %f28, %f40
- faligndata %f28, %f30, %f42
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f0
- membar #Sync
-2:
- faligndata %f30, %f48, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f48, %f50, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f50, %f52, %f32
- inc BLOCK_SIZE, %o0
- faligndata %f52, %f54, %f34
- inc BLOCK_SIZE, %o1
- faligndata %f54, %f56, %f36
- cmp %o0, %o5
- faligndata %f56, %f58, %f38
- dec BLOCK_SIZE, %o2
- faligndata %f58, %f60, %f40
- faligndata %f60, %f62, %f42
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- faligndata %f62, %f0, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f0, %f2, %f46
-
- stda %f32, [%o1] ASI_STORE
- ba 3b
- inc BLOCK_SIZE, %o1
- !!
- !! Source at BLOCK_ALIGN+24
- !!
- !! We need to load 5 doubles by hand.
- !!
-L103:
-#ifdef RETURN_NAME
- sethi %hi(1f), %g1
- ba,pt %icc, 2f
- or %g1, %lo(1f), %g1
-1:
- .asciz "L103"
- .align 8
-2:
-#endif /* RETURN_NAME */
- fmovd %f0, %f4
- ldd [%o0], %f6
- inc 8, %o0
- ldd [%o0], %f8
- inc 8, %o0
- ldd [%o0], %f10
- inc 8, %o0
- ldd [%o0], %f12
- inc 8, %o0
- ldd [%o0], %f14
- inc 8, %o0
-
- cmp %o0, %o5
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- inc BLOCK_SIZE, %o0
-3:
- faligndata %f4, %f6, %f32
- cmp %o0, %o5
- faligndata %f6, %f8, %f34
- dec BLOCK_SIZE, %o2
- faligndata %f8, %f10, %f36
- faligndata %f10, %f12, %f38
- faligndata %f12, %f14, %f40
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f48
- membar #Sync
-2:
- faligndata %f14, %f16, %f42
- inc BLOCK_SIZE, %o0
- faligndata %f16, %f18, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f18, %f20, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f20, %f22, %f32
- cmp %o0, %o5
- faligndata %f22, %f24, %f34
- dec BLOCK_SIZE, %o2
- faligndata %f24, %f26, %f36
- inc BLOCK_SIZE, %o1
- faligndata %f26, %f28, %f38
- faligndata %f28, %f30, %f40
- ble,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f0
- membar #Sync
-2:
- faligndata %f30, %f48, %f42
- inc BLOCK_SIZE, %o0
- faligndata %f48, %f50, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f50, %f52, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f52, %f54, %f32
- cmp %o0, %o5
- faligndata %f54, %f56, %f34
- dec BLOCK_SIZE, %o2
- faligndata %f56, %f58, %f36
- faligndata %f58, %f60, %f38
- inc BLOCK_SIZE, %o1
- faligndata %f60, %f62, %f40
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- faligndata %f62, %f0, %f42
- inc BLOCK_SIZE, %o0
- faligndata %f0, %f2, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f2, %f4, %f46
-
- stda %f32, [%o1] ASI_STORE
- ba 3b
- inc BLOCK_SIZE, %o1
-
- !!
- !! Source at BLOCK_ALIGN+32
- !!
- !! We need to load 4 doubles by hand.
- !!
-L104:
-#ifdef RETURN_NAME
- sethi %hi(1f), %g1
- ba,pt %icc, 2f
- or %g1, %lo(1f), %g1
-1:
- .asciz "L104"
- .align 8
-2:
-#endif /* RETURN_NAME */
- fmovd %f0, %f6
- ldd [%o0], %f8
- inc 8, %o0
- ldd [%o0], %f10
- inc 8, %o0
- ldd [%o0], %f12
- inc 8, %o0
- ldd [%o0], %f14
- inc 8, %o0
-
- cmp %o0, %o5
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- inc BLOCK_SIZE, %o0
-3:
- faligndata %f6, %f8, %f32
- cmp %o0, %o5
- faligndata %f8, %f10, %f34
- dec BLOCK_SIZE, %o2
- faligndata %f10, %f12, %f36
- faligndata %f12, %f14, %f38
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f48
- membar #Sync
-2:
- faligndata %f14, %f16, %f40
- faligndata %f16, %f18, %f42
- inc BLOCK_SIZE, %o0
- faligndata %f18, %f20, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f20, %f22, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f22, %f24, %f32
- cmp %o0, %o5
- faligndata %f24, %f26, %f34
- faligndata %f26, %f28, %f36
- inc BLOCK_SIZE, %o1
- faligndata %f28, %f30, %f38
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f0
- membar #Sync
-2:
- faligndata %f30, %f48, %f40
- dec BLOCK_SIZE, %o2
- faligndata %f48, %f50, %f42
- inc BLOCK_SIZE, %o0
- faligndata %f50, %f52, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f52, %f54, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f54, %f56, %f32
- cmp %o0, %o5
- faligndata %f56, %f58, %f34
- faligndata %f58, %f60, %f36
- inc BLOCK_SIZE, %o1
- faligndata %f60, %f62, %f38
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- faligndata %f62, %f0, %f40
- dec BLOCK_SIZE, %o2
- faligndata %f0, %f2, %f42
- inc BLOCK_SIZE, %o0
- faligndata %f2, %f4, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f4, %f6, %f46
-
- stda %f32, [%o1] ASI_STORE
- ba 3b
- inc BLOCK_SIZE, %o1
-
- !!
- !! Source at BLOCK_ALIGN+40
- !!
- !! We need to load 3 doubles by hand.
- !!
-L105:
-#ifdef RETURN_NAME
- sethi %hi(1f), %g1
- ba,pt %icc, 2f
- or %g1, %lo(1f), %g1
-1:
- .asciz "L105"
- .align 8
-2:
-#endif /* RETURN_NAME */
- fmovd %f0, %f8
- ldd [%o0], %f10
- inc 8, %o0
- ldd [%o0], %f12
- inc 8, %o0
- ldd [%o0], %f14
- inc 8, %o0
-
- cmp %o0, %o5
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- inc BLOCK_SIZE, %o0
-3:
- faligndata %f8, %f10, %f32
- cmp %o0, %o5
- faligndata %f10, %f12, %f34
- faligndata %f12, %f14, %f36
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f48
- membar #Sync
-2:
- faligndata %f14, %f16, %f38
- dec BLOCK_SIZE, %o2
- faligndata %f16, %f18, %f40
- inc BLOCK_SIZE, %o0
- faligndata %f18, %f20, %f42
- faligndata %f20, %f22, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f22, %f24, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f24, %f26, %f32
- cmp %o0, %o5
- faligndata %f26, %f28, %f34
- dec BLOCK_SIZE, %o2
- faligndata %f28, %f30, %f36
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f0
- membar #Sync
-2:
- faligndata %f30, %f48, %f38
- inc BLOCK_SIZE, %o1
- faligndata %f48, %f50, %f40
- inc BLOCK_SIZE, %o0
- faligndata %f50, %f52, %f42
- faligndata %f52, %f54, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f54, %f56, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f56, %f58, %f32
- cmp %o0, %o5
- faligndata %f58, %f60, %f34
- dec BLOCK_SIZE, %o2
- faligndata %f60, %f62, %f36
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- faligndata %f62, %f0, %f38
- inc BLOCK_SIZE, %o1
- faligndata %f0, %f2, %f40
- inc BLOCK_SIZE, %o0
- faligndata %f2, %f4, %f42
- faligndata %f4, %f6, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f6, %f8, %f46
-
- stda %f32, [%o1] ASI_STORE
- ba 3b
- inc BLOCK_SIZE, %o1
-
-
- !!
- !! Source at BLOCK_ALIGN+48
- !!
- !! We need to load 2 doubles by hand.
- !!
-L106:
-#ifdef RETURN_NAME
- sethi %hi(1f), %g1
- ba,pt %icc, 2f
- or %g1, %lo(1f), %g1
-1:
- .asciz "L106"
- .align 8
-2:
-#endif /* RETURN_NAME */
- fmovd %f0, %f10
- ldd [%o0], %f12
- inc 8, %o0
- ldd [%o0], %f14
- inc 8, %o0
-
- cmp %o0, %o5
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- inc BLOCK_SIZE, %o0
-3:
- faligndata %f10, %f12, %f32
- cmp %o0, %o5
- faligndata %f12, %f14, %f34
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f48
- membar #Sync
-2:
- faligndata %f14, %f16, %f36
- dec BLOCK_SIZE, %o2
- faligndata %f16, %f18, %f38
- inc BLOCK_SIZE, %o0
- faligndata %f18, %f20, %f40
- faligndata %f20, %f22, %f42
- faligndata %f22, %f24, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f24, %f26, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f26, %f28, %f32
- cmp %o0, %o5
- faligndata %f28, %f30, %f34
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f0
- membar #Sync
-2:
- faligndata %f30, %f48, %f36
- dec BLOCK_SIZE, %o2
- faligndata %f48, %f50, %f38
- inc BLOCK_SIZE, %o1
- faligndata %f50, %f52, %f40
- faligndata %f52, %f54, %f42
- inc BLOCK_SIZE, %o0
- faligndata %f54, %f56, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f56, %f58, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f58, %f60, %f32
- cmp %o0, %o5
- faligndata %f60, %f62, %f34
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- faligndata %f62, %f0, %f36
- dec BLOCK_SIZE, %o2
- faligndata %f0, %f2, %f38
- inc BLOCK_SIZE, %o1
- faligndata %f2, %f4, %f40
- faligndata %f4, %f6, %f42
- inc BLOCK_SIZE, %o0
- faligndata %f6, %f8, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f8, %f10, %f46
-
- stda %f32, [%o1] ASI_STORE
- ba 3b
- inc BLOCK_SIZE, %o1
-
-
- !!
- !! Source at BLOCK_ALIGN+56
- !!
- !! We need to load 1 double by hand.
- !!
-L107:
-#ifdef RETURN_NAME
- sethi %hi(1f), %g1
- ba,pt %icc, 2f
- or %g1, %lo(1f), %g1
-1:
- .asciz "L107"
- .align 8
-2:
-#endif /* RETURN_NAME */
- fmovd %f0, %f12
- ldd [%o0], %f14
- inc 8, %o0
-
- cmp %o0, %o5
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- inc BLOCK_SIZE, %o0
-3:
- faligndata %f12, %f14, %f32
- cmp %o0, %o5
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f48
- membar #Sync
-2:
- faligndata %f14, %f16, %f34
- dec BLOCK_SIZE, %o2
- faligndata %f16, %f18, %f36
- inc BLOCK_SIZE, %o0
- faligndata %f18, %f20, %f38
- faligndata %f20, %f22, %f40
- faligndata %f22, %f24, %f42
- faligndata %f24, %f26, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f26, %f28, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f28, %f30, %f32
- cmp %o0, %o5
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f0
- membar #Sync
-2:
- faligndata %f30, %f48, %f34
- dec BLOCK_SIZE, %o2
- faligndata %f48, %f50, %f36
- inc BLOCK_SIZE, %o1
- faligndata %f50, %f52, %f38
- faligndata %f52, %f54, %f40
- inc BLOCK_SIZE, %o0
- faligndata %f54, %f56, %f42
- faligndata %f56, %f58, %f44
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f58, %f60, %f46
-
- stda %f32, [%o1] ASI_STORE
-
- faligndata %f60, %f62, %f32
- cmp %o0, %o5
- bleu,a,pn %icc, 2f
- ldda [%o0] ASI_BLK_P, %f16
- membar #Sync
-2:
- faligndata %f62, %f0, %f34
- dec BLOCK_SIZE, %o2
- faligndata %f0, %f2, %f36
- inc BLOCK_SIZE, %o1
- faligndata %f2, %f4, %f38
- faligndata %f4, %f6, %f40
- inc BLOCK_SIZE, %o0
- faligndata %f6, %f8, %f42
- faligndata %f8, %f10, %f44
-
- brlez,pn %o2, Lbcopy_blockdone
- faligndata %f10, %f12, %f46
-
- stda %f32, [%o1] ASI_STORE
- ba 3b
- inc BLOCK_SIZE, %o1
-
-Lbcopy_blockdone:
- inc BLOCK_SIZE, %o2 ! Fixup our overcommit
- membar #Sync ! Finish any pending loads
-#define FINISH_REG(f) \
- deccc 8, %o2; \
- bl,a Lbcopy_blockfinish; \
- fmovd f, %f48; \
- std f, [%o1]; \
- inc 8, %o1
-
- FINISH_REG(%f32)
- FINISH_REG(%f34)
- FINISH_REG(%f36)
- FINISH_REG(%f38)
- FINISH_REG(%f40)
- FINISH_REG(%f42)
- FINISH_REG(%f44)
- FINISH_REG(%f46)
- FINISH_REG(%f48)
-#undef FINISH_REG
- !!
- !! The low 3 bits have the sub-word bits needed to be
- !! stored [because (x-8)&0x7 == x].
- !!
-Lbcopy_blockfinish:
- brz,pn %o2, 2f ! 100% complete?
- fmovd %f48, %f4
- cmp %o2, 8 ! Exactly 8 bytes?
- bz,a,pn %xcc, 2f
- std %f4, [%o1]
-
- btst 4, %o2 ! Word store?
- bz %xcc, 1f
- nop
- st %f4, [%o1]
- inc 4, %o1
-1:
- btst 2, %o2
- fzero %f0
- bz 1f
-
- mov -6, %o4
- alignaddr %o1, %o4, %g0
-
- faligndata %f0, %f4, %f8
-
- stda %f8, [%o1] ASI_FL16_P ! Store short
- inc 2, %o1
-1:
- btst 1, %o2 ! Byte aligned?
- bz 2f
-
- mov -7, %o0 ! Calculate dest - 7
- alignaddr %o1, %o0, %g0 ! Calculate shift mask and dest.
-
- faligndata %f0, %f4, %f8 ! Move 1st byte to low part of f8
-
- stda %f8, [%o1] ASI_FL8_P ! Store 1st byte
- inc 1, %o1 ! Update address
-2:
- membar #Sync
-#ifdef _KERNEL
-
-/*
- * Weve saved our possible fpstate, now disable the fpu
- * and continue with life.
- */
- RESTORE_FPU
- ret
- restore %g1, 0, %o0 ! Return DEST for memcpy
-#endif /* _KERNEL */
- retl
- mov %g1, %o0
-#endif /* 1 */
-
-
-#if 1
-/*
- * XXXXXXXXXXXXXXXXXXXX
- * We need to make sure that this doesn't use floating point
- * before our trap handlers are installed or we could panic
- * XXXXXXXXXXXXXXXXXXXX
- */
/*
* bzero(addr, len)
*
- * We want to use VIS instructions if we're clearing out more than
- * 256 bytes, but to do that we need to properly save and restore the
- * FP registers. Unfortunately the code to do that in the kernel needs
- * to keep track of the current owner of the FPU, hence the different
- * code.
- *
* XXXXX To produce more efficient code, we do not allow lengths
* greater than 0x80000000000000000, which are negative numbers.
* This should not really be an issue since the VA hole should
@@ -8079,11 +6871,6 @@ Lbzero_internal:
sllx %o1, 32, %o3
or %o1, %o3, %o1
1:
-#if 0
- !! Now we are 64-bit aligned
- cmp %o2, 256 ! Use block clear if len > 256
- bge,pt %xcc, Lbzero_block ! use block store instructions
-#endif /* 0 */
deccc 8, %o2
Lbzero_longs:
bl,pn %xcc, Lbzero_cleanup ! Less than 8 bytes left
@@ -8118,94 +6905,6 @@ Lbzero_done:
retl
mov %o4, %o0 ! Restore pointer for memset (ugh)
-#if 1
-Lbzero_block:
- sethi %hi(block_disable), %o3
- ldx [ %o3 + %lo(block_disable) ], %o3
- brnz,pn %o3, Lbzero_longs
- !! Make sure our trap table is installed
- set _C_LABEL(trapbase), %o5
- rdpr %tba, %o3
- sub %o3, %o5, %o3
- brnz,pn %o3, Lbzero_longs ! No, then don't use block load/store
- nop
-/*
- * Kernel:
- *
- * Here we use VIS instructions to do a block clear of a page.
- * But before we can do that we need to save and enable the FPU.
- * The last owner of the FPU registers is fpproc, and
- * fpproc->p_md.md_fpstate is the current fpstate. If that's not
- * null, call savefpstate() with it to store our current fp state.
- *
- * Next, allocate an aligned fpstate on the stack. We will properly
- * nest calls on a particular stack so this should not be a problem.
- *
- * Now we grab either curproc (or if we're on the interrupt stack
- * proc0). We stash its existing fpstate in a local register and
- * put our new fpstate in curproc->p_md.md_fpstate. We point
- * fpproc at curproc (or proc0) and enable the FPU.
- *
- * If we are ever preempted, our FPU state will be saved in our
- * fpstate. Then, when we're resumed and we take an FPDISABLED
- * trap, the trap handler will be able to fish our FPU state out
- * of curproc (or proc0).
- *
- * On exiting this routine we undo the damage: restore the original
- * pointer to curproc->p_md.md_fpstate, clear our fpproc, and disable
- * the MMU.
- *
- */
-
- ENABLE_FPU 0
- !! We are now 8-byte aligned. We need to become 64-byte aligned.
- btst 63, %i0
- bz,pt %xcc, 2f
- nop
-1:
- stx %i1, [%i0]
- inc 8, %i0
- btst 63, %i0
- bnz,pt %xcc, 1b
- dec 8, %i2
-
-2:
- brz %i1, 3f ! Skip the memory op
- fzero %f0 ! for bzero
-
- stx %i1, [%i0] ! Flush this puppy to RAM
- membar #StoreLoad
- ldd [%i0], %f0
-
-3:
- fmovd %f0, %f2 ! Duplicate the pattern
- fmovd %f0, %f4
- fmovd %f0, %f6
- fmovd %f0, %f8
- fmovd %f0, %f10
- fmovd %f0, %f12
- fmovd %f0, %f14
-
- !! Remember: we were 8 bytes too far
- dec 56, %i2 ! Go one iteration too far
-5:
- stda %f0, [%i0] ASI_BLK_P ! Store 64 bytes
- deccc BLOCK_SIZE, %i2
- bg,pt %icc, 5b
- inc BLOCK_SIZE, %i0
-
- membar #Sync
-/*
- * We've saved our possible fpstate, now disable the fpu
- * and continue with life.
- */
- RESTORE_FPU
- addcc %i2, 56, %i2 ! Restore the count
- ba,pt %xcc, Lbzero_longs ! Finish up the remainder
- restore
-#endif /* 1 */
-#endif /* 1 */
-
/*
* kcopy() is exactly like bcopy except that it set pcb_onfault such that
* when a fault occurs, it is able to return EFAULT to indicate this to the