summaryrefslogtreecommitdiff
path: root/sys/arch
diff options
context:
space:
mode:
authorMark Kettenis <kettenis@cvs.openbsd.org>2007-10-17 21:23:29 +0000
committerMark Kettenis <kettenis@cvs.openbsd.org>2007-10-17 21:23:29 +0000
commit5146d2857f2bf42c8a2ee2426137d95fdfe312bd (patch)
tree95933c85e493d33705bcce402b278bc594f3ebd4 /sys/arch
parent3e3404572ddd39b26b8c2bfe81515d858cba68c9 (diff)
Spin up secondary CPUs on MULTIPROCESSOR kernels. Works on UltraSPARC-III
CPUs.
Diffstat (limited to 'sys/arch')
-rw-r--r--sys/arch/sparc64/include/pmap.h2
-rw-r--r--sys/arch/sparc64/sparc64/autoconf.c123
-rw-r--r--sys/arch/sparc64/sparc64/clock.c4
-rw-r--r--sys/arch/sparc64/sparc64/cpu.c193
-rw-r--r--sys/arch/sparc64/sparc64/locore.s391
-rw-r--r--sys/arch/sparc64/sparc64/pmap.c10
6 files changed, 650 insertions, 73 deletions
diff --git a/sys/arch/sparc64/include/pmap.h b/sys/arch/sparc64/include/pmap.h
index c2507a3e958..9d8007d7fa0 100644
--- a/sys/arch/sparc64/include/pmap.h
+++ b/sys/arch/sparc64/include/pmap.h
@@ -165,7 +165,7 @@ int pmap_count_res(pmap_t pmap);
#define pmap_proc_iflush(p,va,len) /* nothing */
-void pmap_bootstrap(u_long kernelstart, u_long kernelend, u_int numctx);
+void pmap_bootstrap(u_long, u_long, u_int, u_int);
/* make sure all page mappings are modulo 16K to prevent d$ aliasing */
#define PMAP_PREFER(pa, va) (*(va) += (((*(va)) ^ (pa)) & VA_ALIAS_MASK))
diff --git a/sys/arch/sparc64/sparc64/autoconf.c b/sys/arch/sparc64/sparc64/autoconf.c
index 76a2706f3fe..772465ae151 100644
--- a/sys/arch/sparc64/sparc64/autoconf.c
+++ b/sys/arch/sparc64/sparc64/autoconf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: autoconf.c,v 1.70 2007/10/14 18:31:29 kettenis Exp $ */
+/* $OpenBSD: autoconf.c,v 1.71 2007/10/17 21:23:28 kettenis Exp $ */
/* $NetBSD: autoconf.c,v 1.51 2001/07/24 19:32:11 eeh Exp $ */
/*
@@ -111,6 +111,7 @@ static int mbprint(void *, const char *);
void sync_crash(void);
int mainbus_match(struct device *, void *, void *);
static void mainbus_attach(struct device *, struct device *, void *);
+int get_ncpus(void);
struct device *booted_device;
struct bootpath bootpath[8];
@@ -195,6 +196,46 @@ str2hex(char *str, long *vp)
return (str);
}
+int
+get_ncpus(void)
+{
+#ifdef MULTIPROCESSOR
+ int node0, node,ncpus;
+ char buf[32];
+
+ node = findroot();
+
+ ncpus = 0;
+ for (node = OF_child(node), node0 = 0; node; node = OF_peer(node)) {
+ /*
+ * UltraSPARC-IV cpus appear as two "cpu" nodes below
+ * a "cmp" node. Go down one level, but remember
+ * where we came from, such that we can go up again
+ * after we've handled both "cpu" nodes.
+ */
+ if (OF_getprop(node, "name", buf, sizeof(buf)) <= 0)
+ continue;
+ if (strcmp(buf, "cmp") == 0) {
+ node0 = node;
+ node = OF_child(node0);
+ }
+
+ if (OF_getprop(node, "device_type", buf, sizeof(buf)) <= 0)
+ continue;
+ if (strcmp(buf, "cpu") == 0)
+ ncpus++;
+
+ if (node0 && OF_peer(node) == 0) {
+ node = node0;
+ node0 = 0;
+ }
+ }
+
+ return (ncpus);
+#else
+ return (1);
+#endif
+}
/*
* locore.s code calls bootstrap() just before calling main().
*
@@ -215,6 +256,7 @@ bootstrap(nctx)
int nctx;
{
extern int end; /* End of kernel */
+ int ncpus;
/*
* Initialize ddb first and register OBP callbacks.
@@ -238,7 +280,8 @@ bootstrap(nctx)
OF_set_symbol_lookup(OF_sym2val, OF_val2sym);
#endif
- pmap_bootstrap(KERNBASE, (u_long)&end, nctx);
+ ncpus = get_ncpus();
+ pmap_bootstrap(KERNBASE, (u_long)&end, nctx, ncpus);
}
void
@@ -619,7 +662,7 @@ extern bus_space_tag_t mainbus_space_tag;
struct mainbus_attach_args ma;
char buf[32], *p;
const char *const *ssp, *sp = NULL;
- int node0, node, rv, len;
+ int node0, node, rv, len, ncpus;
static const char *const openboot_special[] = {
/* ignore these (end with NULL) */
@@ -657,64 +700,48 @@ extern bus_space_tag_t mainbus_space_tag;
*p = '\0';
}
- /*
- * Locate and configure the ``early'' devices. These must be
- * configured before we can do the rest. For instance, the
- * EEPROM contains the Ethernet address for the LANCE chip.
- * If the device cannot be located or configured, panic.
- */
-
-/*
- * The rest of this routine is for OBP machines exclusively.
- */
-
- node = findroot();
-
/* Establish the first component of the boot path */
bootpath_store(1, bootpath);
- /* the first early device to be configured is the cpu */
+ /* We configure the CPUs first. */
- {
+ node = findroot();
+
+ ncpus = 0;
+ for (node = OF_child(node), node0 = 0; node; node = OF_peer(node)) {
/*
* UltraSPARC-IV cpus appear as two "cpu" nodes below
- * a "cmp" node. Lookup the first "cmp" node, such
- * that we find the "cpu" node in the code below.
+ * a "cmp" node. Go down one level, but remember
+ * where we came from, such that we can go up again
+ * after we've handled both "cpu" nodes.
*/
-
- for (node = OF_child(node); node; node = OF_peer(node)) {
- if (OF_getprop(node, "name", buf, sizeof(buf)) <= 0)
- continue;
- if (strcmp(buf, "cmp") == 0)
- break;
+ if (OF_getprop(node, "name", buf, sizeof(buf)) <= 0)
+ continue;
+ if (strcmp(buf, "cmp") == 0) {
+ node0 = node;
+ node = OF_child(node0);
}
- if (node == 0)
- node = findroot();
- }
-
- {
- int found = 0;
-
- for (node = OF_child(node); node; node = OF_peer(node)) {
- if (OF_getprop(node, "device_type",
- buf, sizeof(buf)) <= 0)
- continue;
- if (strcmp(buf, "cpu") == 0) {
- bzero(&ma, sizeof(ma));
- ma.ma_bustag = mainbus_space_tag;
- ma.ma_dmatag = &mainbus_dma_tag;
- ma.ma_node = node;
- ma.ma_name = "cpu";
- config_found(dev, (void *)&ma, mbprint);
- found++;
- }
+ if (OF_getprop(node, "device_type", buf, sizeof(buf)) <= 0)
+ continue;
+ if (strcmp(buf, "cpu") == 0) {
+ bzero(&ma, sizeof(ma));
+ ma.ma_bustag = mainbus_space_tag;
+ ma.ma_dmatag = &mainbus_dma_tag;
+ ma.ma_node = node;
+ ma.ma_name = "cpu";
+ config_found(dev, (void *)&ma, mbprint);
+ ncpus++;
}
- if (!found)
- panic("None of the CPUs found");
+ if (node0 && OF_peer(node) == 0) {
+ node = node0;
+ node0 = 0;
+ }
}
+ if (ncpus == 0)
+ panic("None of the CPUs found");
node = findroot(); /* re-init root node */
diff --git a/sys/arch/sparc64/sparc64/clock.c b/sys/arch/sparc64/sparc64/clock.c
index 09672a03c68..e68c6e014f3 100644
--- a/sys/arch/sparc64/sparc64/clock.c
+++ b/sys/arch/sparc64/sparc64/clock.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: clock.c,v 1.33 2007/05/06 14:52:36 kettenis Exp $ */
+/* $OpenBSD: clock.c,v 1.34 2007/10/17 21:23:28 kettenis Exp $ */
/* $NetBSD: clock.c,v 1.41 2001/07/24 19:29:25 eeh Exp $ */
/*
@@ -126,7 +126,7 @@ struct timecounter tick_timecounter = {
int statvar = 8192;
int statmin; /* statclock interval - 1/2*variance */
-static long tick_increment;
+long tick_increment;
int schedintr(void *);
static struct intrhand level10 = { clockintr };
diff --git a/sys/arch/sparc64/sparc64/cpu.c b/sys/arch/sparc64/sparc64/cpu.c
index c3c8b0acf00..a6320524dc6 100644
--- a/sys/arch/sparc64/sparc64/cpu.c
+++ b/sys/arch/sparc64/sparc64/cpu.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.c,v 1.23 2007/09/09 12:57:40 kettenis Exp $ */
+/* $OpenBSD: cpu.c,v 1.24 2007/10/17 21:23:28 kettenis Exp $ */
/* $NetBSD: cpu.c,v 1.13 2001/05/26 21:27:15 chs Exp $ */
/*
@@ -63,6 +63,7 @@
#include <machine/reg.h>
#include <machine/trap.h>
#include <machine/pmap.h>
+#include <machine/sparc64.h>
#include <sparc64/sparc64/cache.h>
@@ -74,10 +75,15 @@ struct cacheinfo cacheinfo = {
/* Linked list of all CPUs in system. */
struct cpu_info *cpus = NULL;
+struct cpu_info *alloc_cpuinfo(int);
+
/* The following are used externally (sysctl_hw). */
char machine[] = MACHINE; /* from <machine/param.h> */
char cpu_model[100];
+void cpu_reset_fpustate(void);
+void cpu_hatch(void);
+
/* The CPU configuration driver. */
static void cpu_attach(struct device *, struct device *, void *);
int cpu_match(struct device *, void *, void *);
@@ -91,6 +97,81 @@ extern struct cfdriver cpu_cd;
#define IU_IMPL(v) ((((u_int64_t)(v))&VER_IMPL) >> VER_IMPL_SHIFT)
#define IU_VERS(v) ((((u_int64_t)(v))&VER_MASK) >> VER_MASK_SHIFT)
+struct cpu_info *
+alloc_cpuinfo(int node)
+{
+ paddr_t pa0, pa;
+ vaddr_t va, va0, kstack;
+ vsize_t sz = 8 * PAGE_SIZE;
+ int portid;
+ struct cpu_info *cpi, *ci;
+ extern paddr_t cpu0paddr;
+
+ portid = getpropint(node, "portid", -1);
+ if (portid == -1)
+ portid = getpropint(node, "upa-portid", -1);
+ if (portid == -1)
+ panic("alloc_cpuinfo: portid");
+
+ for (cpi = cpus; cpi != NULL; cpi = cpi->ci_next)
+ if (cpi->ci_upaid == portid)
+ return cpi;
+
+ va = uvm_km_valloc_align(kernel_map, sz, 8 * PAGE_SIZE);
+ if (va == 0)
+ panic("alloc_cpuinfo: no virtual space");
+ va0 = va;
+
+ pa0 = cpu0paddr;
+ cpu0paddr += sz;
+
+ for (pa = pa0; pa < cpu0paddr; pa += PAGE_SIZE, va += PAGE_SIZE)
+ pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE);
+
+ pmap_update(pmap_kernel());
+
+ cpi = (struct cpu_info *)(va0 + CPUINFO_VA - INTSTACK);
+
+ memset((void *)va0, 0, sz);
+
+ kstack = uvm_km_alloc (kernel_map, USPACE);
+ if (kstack == 0)
+ panic("alloc_cpuinfo: unable to allocate pcb");
+
+ /*
+ * Initialize cpuinfo structure.
+ *
+ * Arrange pcb, idle stack and interrupt stack in the same
+ * way as is done for the boot CPU in pmap.c.
+ */
+ cpi->ci_next = NULL;
+ cpi->ci_curproc = NULL;
+ cpi->ci_number = ncpus++;
+ cpi->ci_upaid = portid;
+ cpi->ci_fpproc = NULL;
+#ifdef MULTIPROCESSOR
+ cpi->ci_spinup = cpu_hatch; /* XXX */
+#else
+ cpi->ci_spinup = NULL;
+#endif
+
+ cpi->ci_initstack = (void *)(kstack + USPACE);
+ cpi->ci_paddr = pa0;
+ cpi->ci_self = cpi;
+ cpi->ci_node = node;
+ cpi->ci_cpcb = (struct pcb *)kstack;
+
+ sched_init_cpu(cpi);
+
+ /*
+ * Finally, add itself to the list of active cpus.
+ */
+ for (ci = cpus; ci->ci_next != NULL; ci = ci->ci_next)
+ ;
+ ci->ci_next = cpi;
+ return (cpi);
+}
+
int
cpu_match(parent, vcf, aux)
struct device *parent;
@@ -103,6 +184,27 @@ cpu_match(parent, vcf, aux)
return (strcmp(cf->cf_driver->cd_name, ma->ma_name) == 0);
}
+void
+cpu_reset_fpustate(void)
+{
+ struct fpstate64 *fpstate;
+ struct fpstate64 fps[2];
+
+ /* This needs to be 64-bit aligned */
+ fpstate = ALIGNFPSTATE(&fps[1]);
+ /*
+ * Get the FSR and clear any exceptions. If we do not unload
+ * the queue here and it is left over from a previous crash, we
+ * will panic in the first loadfpstate(), due to a sequence error,
+ * so we need to dump the whole state anyway.
+ *
+ * If there is no FPU, trap.c will advance over all the stores,
+ * so we initialize fs_fsr here.
+ */
+ fpstate->fs_fsr = 7 << FSR_VER_SHIFT; /* 7 is reserved for "none" */
+ savefpstate(fpstate);
+}
+
/*
* Attach the CPU.
* Discover interesting goop about the virtual address cache
@@ -119,26 +221,12 @@ cpu_attach(parent, dev, aux)
int impl, vers;
char *cpuname;
struct mainbus_attach_args *ma = aux;
- struct fpstate64 *fpstate;
- struct fpstate64 fps[2];
- char *sep;
+ struct cpu_info *ci;
+ const char *sep;
register int i, l;
u_int64_t ver;
extern u_int64_t cpu_clockrate[];
- /* This needs to be 64-bit aligned */
- fpstate = ALIGNFPSTATE(&fps[1]);
- /*
- * Get the FSR and clear any exceptions. If we do not unload
- * the queue here and it is left over from a previous crash, we
- * will panic in the first loadfpstate(), due to a sequence error,
- * so we need to dump the whole state anyway.
- *
- * If there is no FPU, trap.c will advance over all the stores,
- * so we initialize fs_fsr here.
- */
- fpstate->fs_fsr = 7 << FSR_VER_SHIFT; /* 7 is reserved for "none" */
- savefpstate(fpstate);
ver = getver();
impl = IU_IMPL(ver);
vers = IU_VERS(ver);
@@ -146,6 +234,19 @@ cpu_attach(parent, dev, aux)
/* tell them what we have */
node = ma->ma_node;
+ /*
+ * Allocate cpu_info structure if needed.
+ */
+ ci = alloc_cpuinfo(node);
+
+ /*
+ * Only do this on the boot cpu. Other cpu's call
+ * cpu_reset_fpustate() from cpu_hatch() before they
+ * call into the idle loop.
+ */
+ if (ci->ci_number == 0)
+ cpu_reset_fpustate();
+
clk = getpropint(node, "clock-frequency", 0);
if (clk == 0) {
/*
@@ -280,6 +381,64 @@ struct cfdriver cpu_cd = {
NULL, "cpu", DV_DULL
};
+#ifdef MULTIPROCESSOR
+void cpu_mp_startup(void);
+volatile int cpu_mp_started;
+
+void
+cpu_boot_secondary_processors(void)
+{
+ struct cpu_info *ci;
+ int cpuid, i;
+
+ for (ci = cpus; ci != NULL; ci = ci->ci_next) {
+ if (ci->ci_upaid == CPU_UPAID)
+ continue;
+
+ cpuid = getpropint(ci->ci_node, "cpuid", -1);
+ if (cpuid == -1) {
+ prom_start_cpu(ci->ci_node,
+ (void *)cpu_mp_startup, ci->ci_paddr);
+ } else {
+ prom_start_cpu_by_cpuid(cpuid,
+ (void *)cpu_mp_startup, ci->ci_paddr);
+ }
+
+ for (i = 0; i < 2000; i++) {
+ sparc_membar(Sync);
+ if (cpu_mp_started == 1)
+ break;
+ delay(10000);
+ }
+
+ cpu_mp_started = 0;
+ sparc_membar(Sync);
+ }
+}
+
+void
+cpu_hatch(void)
+{
+ int s;
+
+ printf("cpu%d running\n", cpu_number());
+
+ cpu_reset_fpustate();
+ cpu_mp_started = 1;
+ sparc_membar(Sync);
+
+ s = splhigh();
+ microuptime(&curcpu()->ci_schedstate.spc_runtime);
+ splx(s);
+
+ extern long tick_increment;
+ next_tick(tick_increment);
+
+ SCHED_LOCK(s);
+ cpu_switchto(NULL, sched_chooseproc());
+}
+#endif
+
void
need_resched(struct cpu_info *ci)
{
diff --git a/sys/arch/sparc64/sparc64/locore.s b/sys/arch/sparc64/sparc64/locore.s
index 9658e9de666..b1a06dc9786 100644
--- a/sys/arch/sparc64/sparc64/locore.s
+++ b/sys/arch/sparc64/sparc64/locore.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.s,v 1.88 2007/10/17 20:16:11 kettenis Exp $ */
+/* $OpenBSD: locore.s,v 1.89 2007/10/17 21:23:28 kettenis Exp $ */
/* $NetBSD: locore.s,v 1.137 2001/08/13 06:10:10 jdolecek Exp $ */
/*
@@ -4573,6 +4573,392 @@ dlflush2:
_ALIGN
.text
+#ifdef MULTIPROCESSOR
+ENTRY(cpu_mp_startup)
+ mov %o0, %g2
+ wrpr %g0, 0, %cleanwin
+ wrpr %g0, 13, %pil
+ wrpr %g0, PSTATE_INTR|PSTATE_PEF, %pstate
+ wr %o0, FPRS_FEF, %fprs ! Turn on FPU
+
+ wrpr %g0, 0, %tl ! Make sure we're not in NUCLEUS mode
+
+ flushw
+
+#if 0
+ /*
+ * Disable the DCACHE entirely for debug.
+ */
+ ldxa [%g0] ASI_MCCR, %o1
+ andn %o1, MCCR_DCACHE_EN, %o1
+ stxa %o1, [%g0] ASI_MCCR
+ membar #Sync
+#endif /* 0 */
+
+ sethi %hi(KERNBASE), %l0 ! Find our xlation
+ sethi %hi(DATA_START), %l3
+
+ set _C_LABEL(ktextp), %l2 ! Find phys addr
+ ldx [%l2], %l2 ! The following gets ugly: We need to load the following mask
+ set _C_LABEL(kdatap), %l5
+ ldx [%l5], %l5
+
+ set _C_LABEL(ektext), %l1 ! And the ends...
+ ldx [%l1], %l1
+ set _C_LABEL(ekdata), %l4
+ ldx [%l4], %l4
+
+ sethi %hi(0xe0000000), %o0 ! V=1|SZ=11|NFO=0|IE=0
+ sllx %o0, 32, %o0 ! Shift it into place
+
+ sethi %hi(0x400000), %l6 ! Create a 4MB mask
+ add %l6, -1, %l7
+
+ mov -1, %o1 ! Create a nice mask
+ sllx %o1, 41, %o1 ! Mask off high bits
+ or %o1, 0xfff, %o1 ! We can just load this in 12 (of 13) bits
+
+ andn %l2, %o1, %l2 ! Mask the phys page number
+ andn %l5, %o1, %l5 ! Mask the phys page number
+
+ or %l2, %o0, %l2 ! Now take care of the high bits
+ or %l5, %o0, %l5 ! Now take care of the high bits
+
+ wrpr %g0, PSTATE_KERN, %pstate ! Disable interrupts
+
+#ifdef DEBUG
+ set 1f, %o0 ! Debug printf for TEXT page
+ srlx %l0, 32, %o1
+ srl %l0, 0, %o2
+ or %l2, TLB_L|TLB_CP|TLB_CV|TLB_P, %o4 ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=1(ugh)|G=0
+ srlx %o4, 32, %o3
+ call _C_LABEL(prom_printf)
+ srl %o4, 0, %o4
+
+ set 1f, %o0 ! Debug printf for DATA page
+ srlx %l3, 32, %o1
+ srl %l3, 0, %o2
+ or %l5, TLB_L|TLB_CP|TLB_CV|TLB_P|TLB_W, %o4 ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=1(ugh)|G=0
+ srlx %o4, 32, %o3
+ call _C_LABEL(prom_printf)
+ srl %o4, 0, %o4
+ .data
+1:
+ .asciz "Setting DTLB entry %08x %08x data %08x %08x\r\n"
+ _ALIGN
+ .text
+#endif /* DEBUG */
+ mov %l0, %o0 ! Demap all of kernel dmmu text segment
+ mov %l3, %o1
+ set 0x2000, %o2 ! 8K page size
+ add %l1, %l7, %o5 ! Extend to 4MB boundary
+ andn %o5, %l7, %o5
+0:
+ stxa %o0, [%o0] ASI_DMMU_DEMAP ! Demap text segment
+ membar #Sync
+ cmp %o0, %o5
+ bleu 0b
+ add %o0, %o2, %o0
+
+ add %l4, %l7, %o5 ! Extend to 4MB boundary
+ andn %o5, %l7, %o5
+0:
+ stxa %o1, [%o1] ASI_DMMU_DEMAP ! Demap data segment
+ membar #Sync
+ cmp %o1, %o5
+ bleu 0b
+ add %o1, %o2, %o1
+
+ set (1<<14)-8, %o0 ! Clear out DCACHE
+1:
+dlflush2a:
+ stxa %g0, [%o0] ASI_DCACHE_TAG ! clear DCACHE line
+ membar #Sync
+ brnz,pt %o0, 1b
+ dec 8, %o0
+
+ /*
+ * First map data segment into the DMMU.
+ */
+ set TLB_TAG_ACCESS, %o0 ! Now map it back in with a locked TTE
+ mov %l3, %o1
+#ifdef NO_VCACHE
+ ! And low bits: L=1|CP=1|CV=0(ugh)|E=0|P=1|W=1|G=0
+ or %l5, TLB_L|TLB_CP|TLB_P|TLB_W, %o2
+#else /* NO_VCACHE */
+ ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=1|G=0
+ or %l5, TLB_L|TLB_CP|TLB_CV|TLB_P|TLB_W, %o2
+#endif /* NO_VCACHE */
+ set 1f, %o5
+2:
+ stxa %o1, [%o0] ASI_DMMU ! Set VA for DSEG
+ membar #Sync ! We may need more membar #Sync in here
+ stxa %o2, [%g0] ASI_DMMU_DATA_IN ! Store TTE for DSEG
+ membar #Sync ! We may need more membar #Sync in here
+ flush %o5 ! Make IMMU see this too
+1:
+ add %o1, %l6, %o1 ! increment VA
+ cmp %o1, %l4 ! Next 4MB mapping....
+ blu,pt %xcc, 2b
+ add %o2, %l6, %o2 ! Increment tag
+
+ /*
+ * Next map the text segment into the DMMU so we can get at RODATA.
+ */
+ mov %l0, %o1
+#ifdef NO_VCACHE
+ ! And low bits: L=1|CP=1|CV=0(ugh)|E=0|P=1|W=0|G=0
+ or %l2, TLB_L|TLB_CP|TLB_P, %o2
+#else /* NO_VCACHE */
+ ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=0|G=0
+ or %l2, TLB_L|TLB_CP|TLB_CV|TLB_P, %o2
+#endif /* NO_VCACHE */
+2:
+ stxa %o1, [%o0] ASI_DMMU ! Set VA for DSEG
+ membar #Sync ! We may need more membar #Sync in here
+ stxa %o2, [%g0] ASI_DMMU_DATA_IN ! Store TTE for DSEG
+ membar #Sync ! We may need more membar #Sync in here
+ flush %o5 ! Make IMMU see this too
+ add %o1, %l6, %o1 ! increment VA
+ cmp %o1, %l1 ! Next 4MB mapping....
+ blu,pt %xcc, 2b
+ add %o2, %l6, %o2 ! Increment tag
+
+#ifdef DEBUG
+ set 1f, %o0 ! Debug printf
+ srlx %l0, 32, %o1
+ srl %l0, 0, %o2
+ or %l2, TLB_L|TLB_CP|TLB_CV|TLB_P, %o4
+ srlx %o4, 32, %o3
+ call _C_LABEL(prom_printf)
+ srl %o4, 0, %o4
+ .data
+1:
+ .asciz "Setting ITLB entry %08x %08x data %08x %08x\r\n"
+ _ALIGN
+ .text
+#endif /* DEBUG */
+ /*
+ * Finished the DMMU, now we need to do the IMMU which is more
+ * difficult because we're execting instructions through the IMMU
+ * while we're flushing it. We need to remap the entire kernel
+ * to a new context, flush the entire context 0 IMMU, map it back
+ * into context 0, switch to context 0, and flush context 1.
+ *
+ * Another interesting issue is that the flush instructions are
+ * translated through the DMMU, therefore we need to enter the
+ * mappings both in the IMMU and the DMMU so we can flush them
+ * correctly.
+ *
+ * Start by mapping in the kernel text as context==1
+ */
+ set TLB_TAG_ACCESS, %o0
+ or %l0, 1, %o1 ! Context = 1
+ or %l2, TLB_CP|TLB_P, %o2 ! And low bits: L=0|CP=1|CV=0|E=0|P=1|G=0
+2:
+ stxa %o1, [%o0] ASI_DMMU ! Make DMMU point to it
+ membar #Sync ! We may need more membar #Sync in here
+ stxa %o2, [%g0] ASI_DMMU_DATA_IN ! Store it
+ membar #Sync ! We may need more membar #Sync in here
+ stxa %o1, [%o0] ASI_IMMU ! Make IMMU point to it
+ membar #Sync ! We may need more membar #Sync in here
+ flush %o1-1 ! Make IMMU see this too
+ stxa %o2, [%g0] ASI_IMMU_DATA_IN ! Store it
+ membar #Sync ! We may need more membar #Sync in here
+ flush %o5 ! Make IMMU see this too
+ add %o1, %l6, %o1 ! increment VA
+ cmp %o1, %l1 ! Next 4MB mapping....
+ blu,pt %xcc, 2b
+ add %o2, %l6, %o2 ! Increment tag
+
+ !!
+ !! Load 1 as primary context
+ !!
+ mov 1, %o0
+ mov CTX_PRIMARY, %o1
+ stxa %o0, [%o1] ASI_DMMU
+ wrpr %g0, 0, %tl ! Make SURE we're nucleus mode
+ membar #Sync ! This probably should be a flush, but it works
+ flush %o5 ! This should be KERNBASE
+
+ !!
+ !! Demap entire context 0 kernel
+ !!
+ or %l0, DEMAP_PAGE_NUCLEUS, %o0 ! Context = Nucleus
+ add %l1, %l7, %o1 ! Demap all of kernel text seg
+ andn %o1, %l7, %o1 ! rounded up to 4MB.
+ set 0x2000, %o2 ! 8K page size
+0:
+ stxa %o0, [%o0] ASI_IMMU_DEMAP ! Demap it
+ membar #Sync
+ flush %o5 ! Assume low bits are benign
+ cmp %o0, %o1
+ bleu,pt %xcc, 0b ! Next page
+ add %o0, %o2, %o0
+
+ or %l3, DEMAP_PAGE_NUCLEUS, %o0 ! Context = Nucleus
+ add %l4, %l7, %o1 ! Demap all of kernel data seg
+ andn %o1, %l7, %o1 ! rounded up to 4MB.
+0:
+ stxa %o0, [%o0] ASI_IMMU_DEMAP ! Demap it
+ membar #Sync
+ flush %o5 ! Assume low bits are benign
+ cmp %o0, %o1
+ bleu,pt %xcc, 0b ! Next page
+ add %o0, %o2, %o0
+
+ !!
+ !! Now, map in the kernel text as context==0
+ !!
+ set TLB_TAG_ACCESS, %o0
+ mov %l0, %o1 ! Context = 0
+#ifdef NO_VCACHE
+ ! And low bits: L=1|CP=1|CV=0(ugh)|E=0|P=1|W=1|G=0
+ or %l2, TLB_L|TLB_CP|TLB_P, %o2
+#else /* NO_VCACHE */
+ ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=1|G=0
+ or %l2, TLB_L|TLB_CP|TLB_CV|TLB_P, %o2
+#endif /* NO_VCACHE */
+2:
+ stxa %o1, [%o0] ASI_IMMU ! Make IMMU point to it
+ membar #Sync ! We may need more membar #Sync in here
+ stxa %o2, [%g0] ASI_IMMU_DATA_IN ! Store it
+ membar #Sync ! We may need more membar #Sync in here
+ flush %o5 ! Make IMMU see this too
+ add %o1, %l6, %o1 ! increment VA
+ cmp %o1, %l1 ! Next 4MB mapping....
+ blu,pt %xcc, 2b
+ add %o2, %l6, %o2 ! Increment tag
+
+ !!
+ !! Restore 0 as primary context
+ !!
+ mov CTX_PRIMARY, %o0
+ stxa %g0, [%o0] ASI_DMMU
+ membar #Sync ! No real reason for this XXXX
+ flush %o5
+
+ !!
+ !! Demap context 1
+ !!
+ mov 1, %o1
+ mov CTX_SECONDARY, %o0
+ stxa %o1, [%o0] ASI_DMMU
+ membar #Sync ! This probably should be a flush, but it works
+ flush %l0
+ mov DEMAP_CTX_SECONDARY, %o4
+ stxa %o4, [%o4] ASI_DMMU_DEMAP
+ membar #Sync
+ stxa %o4, [%o4] ASI_IMMU_DEMAP
+ membar #Sync
+ flush %l0
+ stxa %g0, [%o0] ASI_DMMU
+ membar #Sync
+ flush %l0
+
+#ifdef DEBUG
+ set 1f, %o0 ! Debug printf
+ call _C_LABEL(prom_printf)
+ .data
+1:
+ .asciz "Setting CPUINFO mappings...\r\n"
+ _ALIGN
+ .text
+#endif /* DEBUG */
+
+ /*
+ * Get pointer to our cpu_info struct
+ */
+
+ mov %g2, %l1 ! Load the interrupt stack's PA
+
+ sethi %hi(0xa0000000), %l2 ! V=1|SZ=01|NFO=0|IE=0
+ sllx %l2, 32, %l2 ! Shift it into place
+
+ mov -1, %l3 ! Create a nice mask
+ sllx %l3, 41, %l4 ! Mask off high bits
+ or %l4, 0xfff, %l4 ! We can just load this in 12 (of 13) bits
+
+ andn %l1, %l4, %l1 ! Mask the phys page number
+
+ or %l2, %l1, %l1 ! Now take care of the high bits
+#ifdef NO_VCACHE
+ or %l1, TLB_L|TLB_CP|TLB_P|TLB_W, %l2 ! And low bits: L=1|CP=1|CV=0|E=0|P=1|W=0|G=0
+#else /* NO_VCACHE */
+ or %l1, TLB_L|TLB_CP|TLB_CV|TLB_P|TLB_W, %l2 ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=0|G=0
+#endif /* NO_VCACHE */
+
+ !!
+ !! Now, map in the interrupt stack as context==0
+ !!
+ set TLB_TAG_ACCESS, %l5
+ sethi %hi(INTSTACK), %l0
+ stxa %l0, [%l5] ASI_DMMU ! Make DMMU point to it
+ membar #Sync ! We may need more membar #Sync in here
+ stxa %l2, [%g0] ASI_DMMU_DATA_IN ! Store it
+ membar #Sync ! We may need more membar #Sync in here
+ flush %o5
+
+ !!
+ !! Set 0 as primary context XXX
+ !!
+ mov CTX_PRIMARY, %o0
+ stxa %g0, [%o0] ASI_DMMU
+ flush %o5
+
+!!! Make sure our stack's OK.
+ sethi %hi(CPUINFO_VA+CI_INITSTACK), %l0
+ ldx [%l0 + %lo(CPUINFO_VA+CI_INITSTACK)], %l0
+ add %l0, - CC64FSZ - 80, %l0
+ andn %l0, 0x0f, %l0 ! Needs to be 16-byte aligned
+ sub %l0, BIAS, %l0 ! and biased
+ mov %l0, %sp
+ set 1, %fp
+ clr %i7
+
+ /*
+ * Step 7: change the trap base register, and install our TSBs
+ */
+
+ /* Set the dmmu tsb */
+ sethi %hi(0x1fff), %l2
+ set _C_LABEL(tsb_dmmu), %l0
+ ldx [%l0], %l0
+ set _C_LABEL(tsbsize), %l1
+ or %l2, %lo(0x1fff), %l2
+ ld [%l1], %l1
+ andn %l0, %l2, %l0 ! Mask off size and split bits
+ or %l0, %l1, %l0 ! Make a TSB pointer
+ set TSB, %l2
+ stxa %l0, [%l2] ASI_DMMU ! Install data TSB pointer
+ membar #Sync
+
+
+ /* Set the immu tsb */
+ sethi %hi(0x1fff), %l2
+ set _C_LABEL(tsb_immu), %l0
+ ldx [%l0], %l0
+ set _C_LABEL(tsbsize), %l1
+ or %l2, %lo(0x1fff), %l2
+ ld [%l1], %l1
+ andn %l0, %l2, %l0 ! Mask off size and split bits
+ or %l0, %l1, %l0 ! Make a TSB pointer
+ set TSB, %l2
+ stxa %l0, [%l2] ASI_IMMU ! Install instruction TSB pointer
+ membar #Sync ! We may need more membar #Sync in here
+
+ /* Change the trap base register */
+ set _C_LABEL(trapbase), %l1
+ !call _C_LABEL(prom_set_trap_table) ! Now we should be running 100% from our handlers
+ ! mov %l1, %o0
+ wrpr %l1, 0, %tba ! Make sure the PROM didn't foul up.
+ wrpr %g0, WSTATE_KERN, %wstate
+
+ call _C_LABEL(cpu_hatch)
+ nop
+ NOTREACHED
+#endif
+
/*
* openfirmware(cell* param);
*
@@ -9269,6 +9655,9 @@ _C_LABEL(proc0paddr):
_C_LABEL(dlflush_start):
.xword dlflush1
.xword dlflush2
+#ifdef MULTIPROCESSOR
+ .xword dlflush2a
+#endif
.xword dlflush3
.xword dlflush4
.xword dlflush5
diff --git a/sys/arch/sparc64/sparc64/pmap.c b/sys/arch/sparc64/sparc64/pmap.c
index e2900c78a79..0c03c10aa44 100644
--- a/sys/arch/sparc64/sparc64/pmap.c
+++ b/sys/arch/sparc64/sparc64/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.43 2007/09/09 14:59:37 kettenis Exp $ */
+/* $OpenBSD: pmap.c,v 1.44 2007/10/17 21:23:28 kettenis Exp $ */
/* $NetBSD: pmap.c,v 1.107 2001/08/31 16:47:41 eeh Exp $ */
#undef NO_VCACHE /* Don't forget the locked TLB in dostart */
/*
@@ -560,9 +560,9 @@ pmap_calculate_colors() {
*/
void
-pmap_bootstrap(kernelstart, kernelend, maxctx)
+pmap_bootstrap(kernelstart, kernelend, maxctx, numcpus)
u_long kernelstart, kernelend;
- u_int maxctx;
+ u_int maxctx, numcpus;
{
extern int data_start[], end[]; /* start of data segment */
extern int msgbufmapped;
@@ -993,7 +993,7 @@ remap_data:
/*
* Allocate a 64KB page for the cpu_info structure now.
*/
- if ((cpu0paddr = prom_alloc_phys(8*NBPG, 8*NBPG)) == 0 ) {
+ if ((cpu0paddr = prom_alloc_phys(numcpus * 8*NBPG, 8*NBPG)) == 0 ) {
prom_printf("Cannot allocate new cpu_info\r\n");
OF_exit();
}
@@ -1407,6 +1407,8 @@ remap_data:
cpus->ci_paddr = cpu0paddr;
proc0paddr = cpus->ci_cpcb;
+ cpu0paddr += 64 * KB;
+
/* The rest will be done at CPU attach time. */
BDPRINTF(PDB_BOOT1,
("Done inserting cpu_info into pmap_kernel()\r\n"));