diff options
author | Mark Kettenis <kettenis@cvs.openbsd.org> | 2007-10-17 21:23:29 +0000 |
---|---|---|
committer | Mark Kettenis <kettenis@cvs.openbsd.org> | 2007-10-17 21:23:29 +0000 |
commit | 5146d2857f2bf42c8a2ee2426137d95fdfe312bd (patch) | |
tree | 95933c85e493d33705bcce402b278bc594f3ebd4 /sys/arch | |
parent | 3e3404572ddd39b26b8c2bfe81515d858cba68c9 (diff) |
Spin up secondary CPUs on MULTIPROCESSOR kernels. Works on UltraSPARC-III
CPUs.
Diffstat (limited to 'sys/arch')
-rw-r--r-- | sys/arch/sparc64/include/pmap.h | 2 | ||||
-rw-r--r-- | sys/arch/sparc64/sparc64/autoconf.c | 123 | ||||
-rw-r--r-- | sys/arch/sparc64/sparc64/clock.c | 4 | ||||
-rw-r--r-- | sys/arch/sparc64/sparc64/cpu.c | 193 | ||||
-rw-r--r-- | sys/arch/sparc64/sparc64/locore.s | 391 | ||||
-rw-r--r-- | sys/arch/sparc64/sparc64/pmap.c | 10 |
6 files changed, 650 insertions, 73 deletions
diff --git a/sys/arch/sparc64/include/pmap.h b/sys/arch/sparc64/include/pmap.h index c2507a3e958..9d8007d7fa0 100644 --- a/sys/arch/sparc64/include/pmap.h +++ b/sys/arch/sparc64/include/pmap.h @@ -165,7 +165,7 @@ int pmap_count_res(pmap_t pmap); #define pmap_proc_iflush(p,va,len) /* nothing */ -void pmap_bootstrap(u_long kernelstart, u_long kernelend, u_int numctx); +void pmap_bootstrap(u_long, u_long, u_int, u_int); /* make sure all page mappings are modulo 16K to prevent d$ aliasing */ #define PMAP_PREFER(pa, va) (*(va) += (((*(va)) ^ (pa)) & VA_ALIAS_MASK)) diff --git a/sys/arch/sparc64/sparc64/autoconf.c b/sys/arch/sparc64/sparc64/autoconf.c index 76a2706f3fe..772465ae151 100644 --- a/sys/arch/sparc64/sparc64/autoconf.c +++ b/sys/arch/sparc64/sparc64/autoconf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: autoconf.c,v 1.70 2007/10/14 18:31:29 kettenis Exp $ */ +/* $OpenBSD: autoconf.c,v 1.71 2007/10/17 21:23:28 kettenis Exp $ */ /* $NetBSD: autoconf.c,v 1.51 2001/07/24 19:32:11 eeh Exp $ */ /* @@ -111,6 +111,7 @@ static int mbprint(void *, const char *); void sync_crash(void); int mainbus_match(struct device *, void *, void *); static void mainbus_attach(struct device *, struct device *, void *); +int get_ncpus(void); struct device *booted_device; struct bootpath bootpath[8]; @@ -195,6 +196,46 @@ str2hex(char *str, long *vp) return (str); } +int +get_ncpus(void) +{ +#ifdef MULTIPROCESSOR + int node0, node,ncpus; + char buf[32]; + + node = findroot(); + + ncpus = 0; + for (node = OF_child(node), node0 = 0; node; node = OF_peer(node)) { + /* + * UltraSPARC-IV cpus appear as two "cpu" nodes below + * a "cmp" node. Go down one level, but remember + * where we came from, such that we can go up again + * after we've handled both "cpu" nodes. + */ + if (OF_getprop(node, "name", buf, sizeof(buf)) <= 0) + continue; + if (strcmp(buf, "cmp") == 0) { + node0 = node; + node = OF_child(node0); + } + + if (OF_getprop(node, "device_type", buf, sizeof(buf)) <= 0) + continue; + if (strcmp(buf, "cpu") == 0) + ncpus++; + + if (node0 && OF_peer(node) == 0) { + node = node0; + node0 = 0; + } + } + + return (ncpus); +#else + return (1); +#endif +} /* * locore.s code calls bootstrap() just before calling main(). * @@ -215,6 +256,7 @@ bootstrap(nctx) int nctx; { extern int end; /* End of kernel */ + int ncpus; /* * Initialize ddb first and register OBP callbacks. @@ -238,7 +280,8 @@ bootstrap(nctx) OF_set_symbol_lookup(OF_sym2val, OF_val2sym); #endif - pmap_bootstrap(KERNBASE, (u_long)&end, nctx); + ncpus = get_ncpus(); + pmap_bootstrap(KERNBASE, (u_long)&end, nctx, ncpus); } void @@ -619,7 +662,7 @@ extern bus_space_tag_t mainbus_space_tag; struct mainbus_attach_args ma; char buf[32], *p; const char *const *ssp, *sp = NULL; - int node0, node, rv, len; + int node0, node, rv, len, ncpus; static const char *const openboot_special[] = { /* ignore these (end with NULL) */ @@ -657,64 +700,48 @@ extern bus_space_tag_t mainbus_space_tag; *p = '\0'; } - /* - * Locate and configure the ``early'' devices. These must be - * configured before we can do the rest. For instance, the - * EEPROM contains the Ethernet address for the LANCE chip. - * If the device cannot be located or configured, panic. - */ - -/* - * The rest of this routine is for OBP machines exclusively. - */ - - node = findroot(); - /* Establish the first component of the boot path */ bootpath_store(1, bootpath); - /* the first early device to be configured is the cpu */ + /* We configure the CPUs first. */ - { + node = findroot(); + + ncpus = 0; + for (node = OF_child(node), node0 = 0; node; node = OF_peer(node)) { /* * UltraSPARC-IV cpus appear as two "cpu" nodes below - * a "cmp" node. Lookup the first "cmp" node, such - * that we find the "cpu" node in the code below. + * a "cmp" node. Go down one level, but remember + * where we came from, such that we can go up again + * after we've handled both "cpu" nodes. */ - - for (node = OF_child(node); node; node = OF_peer(node)) { - if (OF_getprop(node, "name", buf, sizeof(buf)) <= 0) - continue; - if (strcmp(buf, "cmp") == 0) - break; + if (OF_getprop(node, "name", buf, sizeof(buf)) <= 0) + continue; + if (strcmp(buf, "cmp") == 0) { + node0 = node; + node = OF_child(node0); } - if (node == 0) - node = findroot(); - } - - { - int found = 0; - - for (node = OF_child(node); node; node = OF_peer(node)) { - if (OF_getprop(node, "device_type", - buf, sizeof(buf)) <= 0) - continue; - if (strcmp(buf, "cpu") == 0) { - bzero(&ma, sizeof(ma)); - ma.ma_bustag = mainbus_space_tag; - ma.ma_dmatag = &mainbus_dma_tag; - ma.ma_node = node; - ma.ma_name = "cpu"; - config_found(dev, (void *)&ma, mbprint); - found++; - } + if (OF_getprop(node, "device_type", buf, sizeof(buf)) <= 0) + continue; + if (strcmp(buf, "cpu") == 0) { + bzero(&ma, sizeof(ma)); + ma.ma_bustag = mainbus_space_tag; + ma.ma_dmatag = &mainbus_dma_tag; + ma.ma_node = node; + ma.ma_name = "cpu"; + config_found(dev, (void *)&ma, mbprint); + ncpus++; } - if (!found) - panic("None of the CPUs found"); + if (node0 && OF_peer(node) == 0) { + node = node0; + node0 = 0; + } } + if (ncpus == 0) + panic("None of the CPUs found"); node = findroot(); /* re-init root node */ diff --git a/sys/arch/sparc64/sparc64/clock.c b/sys/arch/sparc64/sparc64/clock.c index 09672a03c68..e68c6e014f3 100644 --- a/sys/arch/sparc64/sparc64/clock.c +++ b/sys/arch/sparc64/sparc64/clock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: clock.c,v 1.33 2007/05/06 14:52:36 kettenis Exp $ */ +/* $OpenBSD: clock.c,v 1.34 2007/10/17 21:23:28 kettenis Exp $ */ /* $NetBSD: clock.c,v 1.41 2001/07/24 19:29:25 eeh Exp $ */ /* @@ -126,7 +126,7 @@ struct timecounter tick_timecounter = { int statvar = 8192; int statmin; /* statclock interval - 1/2*variance */ -static long tick_increment; +long tick_increment; int schedintr(void *); static struct intrhand level10 = { clockintr }; diff --git a/sys/arch/sparc64/sparc64/cpu.c b/sys/arch/sparc64/sparc64/cpu.c index c3c8b0acf00..a6320524dc6 100644 --- a/sys/arch/sparc64/sparc64/cpu.c +++ b/sys/arch/sparc64/sparc64/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.23 2007/09/09 12:57:40 kettenis Exp $ */ +/* $OpenBSD: cpu.c,v 1.24 2007/10/17 21:23:28 kettenis Exp $ */ /* $NetBSD: cpu.c,v 1.13 2001/05/26 21:27:15 chs Exp $ */ /* @@ -63,6 +63,7 @@ #include <machine/reg.h> #include <machine/trap.h> #include <machine/pmap.h> +#include <machine/sparc64.h> #include <sparc64/sparc64/cache.h> @@ -74,10 +75,15 @@ struct cacheinfo cacheinfo = { /* Linked list of all CPUs in system. */ struct cpu_info *cpus = NULL; +struct cpu_info *alloc_cpuinfo(int); + /* The following are used externally (sysctl_hw). */ char machine[] = MACHINE; /* from <machine/param.h> */ char cpu_model[100]; +void cpu_reset_fpustate(void); +void cpu_hatch(void); + /* The CPU configuration driver. */ static void cpu_attach(struct device *, struct device *, void *); int cpu_match(struct device *, void *, void *); @@ -91,6 +97,81 @@ extern struct cfdriver cpu_cd; #define IU_IMPL(v) ((((u_int64_t)(v))&VER_IMPL) >> VER_IMPL_SHIFT) #define IU_VERS(v) ((((u_int64_t)(v))&VER_MASK) >> VER_MASK_SHIFT) +struct cpu_info * +alloc_cpuinfo(int node) +{ + paddr_t pa0, pa; + vaddr_t va, va0, kstack; + vsize_t sz = 8 * PAGE_SIZE; + int portid; + struct cpu_info *cpi, *ci; + extern paddr_t cpu0paddr; + + portid = getpropint(node, "portid", -1); + if (portid == -1) + portid = getpropint(node, "upa-portid", -1); + if (portid == -1) + panic("alloc_cpuinfo: portid"); + + for (cpi = cpus; cpi != NULL; cpi = cpi->ci_next) + if (cpi->ci_upaid == portid) + return cpi; + + va = uvm_km_valloc_align(kernel_map, sz, 8 * PAGE_SIZE); + if (va == 0) + panic("alloc_cpuinfo: no virtual space"); + va0 = va; + + pa0 = cpu0paddr; + cpu0paddr += sz; + + for (pa = pa0; pa < cpu0paddr; pa += PAGE_SIZE, va += PAGE_SIZE) + pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE); + + pmap_update(pmap_kernel()); + + cpi = (struct cpu_info *)(va0 + CPUINFO_VA - INTSTACK); + + memset((void *)va0, 0, sz); + + kstack = uvm_km_alloc (kernel_map, USPACE); + if (kstack == 0) + panic("alloc_cpuinfo: unable to allocate pcb"); + + /* + * Initialize cpuinfo structure. + * + * Arrange pcb, idle stack and interrupt stack in the same + * way as is done for the boot CPU in pmap.c. + */ + cpi->ci_next = NULL; + cpi->ci_curproc = NULL; + cpi->ci_number = ncpus++; + cpi->ci_upaid = portid; + cpi->ci_fpproc = NULL; +#ifdef MULTIPROCESSOR + cpi->ci_spinup = cpu_hatch; /* XXX */ +#else + cpi->ci_spinup = NULL; +#endif + + cpi->ci_initstack = (void *)(kstack + USPACE); + cpi->ci_paddr = pa0; + cpi->ci_self = cpi; + cpi->ci_node = node; + cpi->ci_cpcb = (struct pcb *)kstack; + + sched_init_cpu(cpi); + + /* + * Finally, add itself to the list of active cpus. + */ + for (ci = cpus; ci->ci_next != NULL; ci = ci->ci_next) + ; + ci->ci_next = cpi; + return (cpi); +} + int cpu_match(parent, vcf, aux) struct device *parent; @@ -103,6 +184,27 @@ cpu_match(parent, vcf, aux) return (strcmp(cf->cf_driver->cd_name, ma->ma_name) == 0); } +void +cpu_reset_fpustate(void) +{ + struct fpstate64 *fpstate; + struct fpstate64 fps[2]; + + /* This needs to be 64-bit aligned */ + fpstate = ALIGNFPSTATE(&fps[1]); + /* + * Get the FSR and clear any exceptions. If we do not unload + * the queue here and it is left over from a previous crash, we + * will panic in the first loadfpstate(), due to a sequence error, + * so we need to dump the whole state anyway. + * + * If there is no FPU, trap.c will advance over all the stores, + * so we initialize fs_fsr here. + */ + fpstate->fs_fsr = 7 << FSR_VER_SHIFT; /* 7 is reserved for "none" */ + savefpstate(fpstate); +} + /* * Attach the CPU. * Discover interesting goop about the virtual address cache @@ -119,26 +221,12 @@ cpu_attach(parent, dev, aux) int impl, vers; char *cpuname; struct mainbus_attach_args *ma = aux; - struct fpstate64 *fpstate; - struct fpstate64 fps[2]; - char *sep; + struct cpu_info *ci; + const char *sep; register int i, l; u_int64_t ver; extern u_int64_t cpu_clockrate[]; - /* This needs to be 64-bit aligned */ - fpstate = ALIGNFPSTATE(&fps[1]); - /* - * Get the FSR and clear any exceptions. If we do not unload - * the queue here and it is left over from a previous crash, we - * will panic in the first loadfpstate(), due to a sequence error, - * so we need to dump the whole state anyway. - * - * If there is no FPU, trap.c will advance over all the stores, - * so we initialize fs_fsr here. - */ - fpstate->fs_fsr = 7 << FSR_VER_SHIFT; /* 7 is reserved for "none" */ - savefpstate(fpstate); ver = getver(); impl = IU_IMPL(ver); vers = IU_VERS(ver); @@ -146,6 +234,19 @@ cpu_attach(parent, dev, aux) /* tell them what we have */ node = ma->ma_node; + /* + * Allocate cpu_info structure if needed. + */ + ci = alloc_cpuinfo(node); + + /* + * Only do this on the boot cpu. Other cpu's call + * cpu_reset_fpustate() from cpu_hatch() before they + * call into the idle loop. + */ + if (ci->ci_number == 0) + cpu_reset_fpustate(); + clk = getpropint(node, "clock-frequency", 0); if (clk == 0) { /* @@ -280,6 +381,64 @@ struct cfdriver cpu_cd = { NULL, "cpu", DV_DULL }; +#ifdef MULTIPROCESSOR +void cpu_mp_startup(void); +volatile int cpu_mp_started; + +void +cpu_boot_secondary_processors(void) +{ + struct cpu_info *ci; + int cpuid, i; + + for (ci = cpus; ci != NULL; ci = ci->ci_next) { + if (ci->ci_upaid == CPU_UPAID) + continue; + + cpuid = getpropint(ci->ci_node, "cpuid", -1); + if (cpuid == -1) { + prom_start_cpu(ci->ci_node, + (void *)cpu_mp_startup, ci->ci_paddr); + } else { + prom_start_cpu_by_cpuid(cpuid, + (void *)cpu_mp_startup, ci->ci_paddr); + } + + for (i = 0; i < 2000; i++) { + sparc_membar(Sync); + if (cpu_mp_started == 1) + break; + delay(10000); + } + + cpu_mp_started = 0; + sparc_membar(Sync); + } +} + +void +cpu_hatch(void) +{ + int s; + + printf("cpu%d running\n", cpu_number()); + + cpu_reset_fpustate(); + cpu_mp_started = 1; + sparc_membar(Sync); + + s = splhigh(); + microuptime(&curcpu()->ci_schedstate.spc_runtime); + splx(s); + + extern long tick_increment; + next_tick(tick_increment); + + SCHED_LOCK(s); + cpu_switchto(NULL, sched_chooseproc()); +} +#endif + void need_resched(struct cpu_info *ci) { diff --git a/sys/arch/sparc64/sparc64/locore.s b/sys/arch/sparc64/sparc64/locore.s index 9658e9de666..b1a06dc9786 100644 --- a/sys/arch/sparc64/sparc64/locore.s +++ b/sys/arch/sparc64/sparc64/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.88 2007/10/17 20:16:11 kettenis Exp $ */ +/* $OpenBSD: locore.s,v 1.89 2007/10/17 21:23:28 kettenis Exp $ */ /* $NetBSD: locore.s,v 1.137 2001/08/13 06:10:10 jdolecek Exp $ */ /* @@ -4573,6 +4573,392 @@ dlflush2: _ALIGN .text +#ifdef MULTIPROCESSOR +ENTRY(cpu_mp_startup) + mov %o0, %g2 + wrpr %g0, 0, %cleanwin + wrpr %g0, 13, %pil + wrpr %g0, PSTATE_INTR|PSTATE_PEF, %pstate + wr %o0, FPRS_FEF, %fprs ! Turn on FPU + + wrpr %g0, 0, %tl ! Make sure we're not in NUCLEUS mode + + flushw + +#if 0 + /* + * Disable the DCACHE entirely for debug. + */ + ldxa [%g0] ASI_MCCR, %o1 + andn %o1, MCCR_DCACHE_EN, %o1 + stxa %o1, [%g0] ASI_MCCR + membar #Sync +#endif /* 0 */ + + sethi %hi(KERNBASE), %l0 ! Find our xlation + sethi %hi(DATA_START), %l3 + + set _C_LABEL(ktextp), %l2 ! Find phys addr + ldx [%l2], %l2 ! The following gets ugly: We need to load the following mask + set _C_LABEL(kdatap), %l5 + ldx [%l5], %l5 + + set _C_LABEL(ektext), %l1 ! And the ends... + ldx [%l1], %l1 + set _C_LABEL(ekdata), %l4 + ldx [%l4], %l4 + + sethi %hi(0xe0000000), %o0 ! V=1|SZ=11|NFO=0|IE=0 + sllx %o0, 32, %o0 ! Shift it into place + + sethi %hi(0x400000), %l6 ! Create a 4MB mask + add %l6, -1, %l7 + + mov -1, %o1 ! Create a nice mask + sllx %o1, 41, %o1 ! Mask off high bits + or %o1, 0xfff, %o1 ! We can just load this in 12 (of 13) bits + + andn %l2, %o1, %l2 ! Mask the phys page number + andn %l5, %o1, %l5 ! Mask the phys page number + + or %l2, %o0, %l2 ! Now take care of the high bits + or %l5, %o0, %l5 ! Now take care of the high bits + + wrpr %g0, PSTATE_KERN, %pstate ! Disable interrupts + +#ifdef DEBUG + set 1f, %o0 ! Debug printf for TEXT page + srlx %l0, 32, %o1 + srl %l0, 0, %o2 + or %l2, TLB_L|TLB_CP|TLB_CV|TLB_P, %o4 ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=1(ugh)|G=0 + srlx %o4, 32, %o3 + call _C_LABEL(prom_printf) + srl %o4, 0, %o4 + + set 1f, %o0 ! Debug printf for DATA page + srlx %l3, 32, %o1 + srl %l3, 0, %o2 + or %l5, TLB_L|TLB_CP|TLB_CV|TLB_P|TLB_W, %o4 ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=1(ugh)|G=0 + srlx %o4, 32, %o3 + call _C_LABEL(prom_printf) + srl %o4, 0, %o4 + .data +1: + .asciz "Setting DTLB entry %08x %08x data %08x %08x\r\n" + _ALIGN + .text +#endif /* DEBUG */ + mov %l0, %o0 ! Demap all of kernel dmmu text segment + mov %l3, %o1 + set 0x2000, %o2 ! 8K page size + add %l1, %l7, %o5 ! Extend to 4MB boundary + andn %o5, %l7, %o5 +0: + stxa %o0, [%o0] ASI_DMMU_DEMAP ! Demap text segment + membar #Sync + cmp %o0, %o5 + bleu 0b + add %o0, %o2, %o0 + + add %l4, %l7, %o5 ! Extend to 4MB boundary + andn %o5, %l7, %o5 +0: + stxa %o1, [%o1] ASI_DMMU_DEMAP ! Demap data segment + membar #Sync + cmp %o1, %o5 + bleu 0b + add %o1, %o2, %o1 + + set (1<<14)-8, %o0 ! Clear out DCACHE +1: +dlflush2a: + stxa %g0, [%o0] ASI_DCACHE_TAG ! clear DCACHE line + membar #Sync + brnz,pt %o0, 1b + dec 8, %o0 + + /* + * First map data segment into the DMMU. + */ + set TLB_TAG_ACCESS, %o0 ! Now map it back in with a locked TTE + mov %l3, %o1 +#ifdef NO_VCACHE + ! And low bits: L=1|CP=1|CV=0(ugh)|E=0|P=1|W=1|G=0 + or %l5, TLB_L|TLB_CP|TLB_P|TLB_W, %o2 +#else /* NO_VCACHE */ + ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=1|G=0 + or %l5, TLB_L|TLB_CP|TLB_CV|TLB_P|TLB_W, %o2 +#endif /* NO_VCACHE */ + set 1f, %o5 +2: + stxa %o1, [%o0] ASI_DMMU ! Set VA for DSEG + membar #Sync ! We may need more membar #Sync in here + stxa %o2, [%g0] ASI_DMMU_DATA_IN ! Store TTE for DSEG + membar #Sync ! We may need more membar #Sync in here + flush %o5 ! Make IMMU see this too +1: + add %o1, %l6, %o1 ! increment VA + cmp %o1, %l4 ! Next 4MB mapping.... + blu,pt %xcc, 2b + add %o2, %l6, %o2 ! Increment tag + + /* + * Next map the text segment into the DMMU so we can get at RODATA. + */ + mov %l0, %o1 +#ifdef NO_VCACHE + ! And low bits: L=1|CP=1|CV=0(ugh)|E=0|P=1|W=0|G=0 + or %l2, TLB_L|TLB_CP|TLB_P, %o2 +#else /* NO_VCACHE */ + ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=0|G=0 + or %l2, TLB_L|TLB_CP|TLB_CV|TLB_P, %o2 +#endif /* NO_VCACHE */ +2: + stxa %o1, [%o0] ASI_DMMU ! Set VA for DSEG + membar #Sync ! We may need more membar #Sync in here + stxa %o2, [%g0] ASI_DMMU_DATA_IN ! Store TTE for DSEG + membar #Sync ! We may need more membar #Sync in here + flush %o5 ! Make IMMU see this too + add %o1, %l6, %o1 ! increment VA + cmp %o1, %l1 ! Next 4MB mapping.... + blu,pt %xcc, 2b + add %o2, %l6, %o2 ! Increment tag + +#ifdef DEBUG + set 1f, %o0 ! Debug printf + srlx %l0, 32, %o1 + srl %l0, 0, %o2 + or %l2, TLB_L|TLB_CP|TLB_CV|TLB_P, %o4 + srlx %o4, 32, %o3 + call _C_LABEL(prom_printf) + srl %o4, 0, %o4 + .data +1: + .asciz "Setting ITLB entry %08x %08x data %08x %08x\r\n" + _ALIGN + .text +#endif /* DEBUG */ + /* + * Finished the DMMU, now we need to do the IMMU which is more + * difficult because we're execting instructions through the IMMU + * while we're flushing it. We need to remap the entire kernel + * to a new context, flush the entire context 0 IMMU, map it back + * into context 0, switch to context 0, and flush context 1. + * + * Another interesting issue is that the flush instructions are + * translated through the DMMU, therefore we need to enter the + * mappings both in the IMMU and the DMMU so we can flush them + * correctly. + * + * Start by mapping in the kernel text as context==1 + */ + set TLB_TAG_ACCESS, %o0 + or %l0, 1, %o1 ! Context = 1 + or %l2, TLB_CP|TLB_P, %o2 ! And low bits: L=0|CP=1|CV=0|E=0|P=1|G=0 +2: + stxa %o1, [%o0] ASI_DMMU ! Make DMMU point to it + membar #Sync ! We may need more membar #Sync in here + stxa %o2, [%g0] ASI_DMMU_DATA_IN ! Store it + membar #Sync ! We may need more membar #Sync in here + stxa %o1, [%o0] ASI_IMMU ! Make IMMU point to it + membar #Sync ! We may need more membar #Sync in here + flush %o1-1 ! Make IMMU see this too + stxa %o2, [%g0] ASI_IMMU_DATA_IN ! Store it + membar #Sync ! We may need more membar #Sync in here + flush %o5 ! Make IMMU see this too + add %o1, %l6, %o1 ! increment VA + cmp %o1, %l1 ! Next 4MB mapping.... + blu,pt %xcc, 2b + add %o2, %l6, %o2 ! Increment tag + + !! + !! Load 1 as primary context + !! + mov 1, %o0 + mov CTX_PRIMARY, %o1 + stxa %o0, [%o1] ASI_DMMU + wrpr %g0, 0, %tl ! Make SURE we're nucleus mode + membar #Sync ! This probably should be a flush, but it works + flush %o5 ! This should be KERNBASE + + !! + !! Demap entire context 0 kernel + !! + or %l0, DEMAP_PAGE_NUCLEUS, %o0 ! Context = Nucleus + add %l1, %l7, %o1 ! Demap all of kernel text seg + andn %o1, %l7, %o1 ! rounded up to 4MB. + set 0x2000, %o2 ! 8K page size +0: + stxa %o0, [%o0] ASI_IMMU_DEMAP ! Demap it + membar #Sync + flush %o5 ! Assume low bits are benign + cmp %o0, %o1 + bleu,pt %xcc, 0b ! Next page + add %o0, %o2, %o0 + + or %l3, DEMAP_PAGE_NUCLEUS, %o0 ! Context = Nucleus + add %l4, %l7, %o1 ! Demap all of kernel data seg + andn %o1, %l7, %o1 ! rounded up to 4MB. +0: + stxa %o0, [%o0] ASI_IMMU_DEMAP ! Demap it + membar #Sync + flush %o5 ! Assume low bits are benign + cmp %o0, %o1 + bleu,pt %xcc, 0b ! Next page + add %o0, %o2, %o0 + + !! + !! Now, map in the kernel text as context==0 + !! + set TLB_TAG_ACCESS, %o0 + mov %l0, %o1 ! Context = 0 +#ifdef NO_VCACHE + ! And low bits: L=1|CP=1|CV=0(ugh)|E=0|P=1|W=1|G=0 + or %l2, TLB_L|TLB_CP|TLB_P, %o2 +#else /* NO_VCACHE */ + ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=1|G=0 + or %l2, TLB_L|TLB_CP|TLB_CV|TLB_P, %o2 +#endif /* NO_VCACHE */ +2: + stxa %o1, [%o0] ASI_IMMU ! Make IMMU point to it + membar #Sync ! We may need more membar #Sync in here + stxa %o2, [%g0] ASI_IMMU_DATA_IN ! Store it + membar #Sync ! We may need more membar #Sync in here + flush %o5 ! Make IMMU see this too + add %o1, %l6, %o1 ! increment VA + cmp %o1, %l1 ! Next 4MB mapping.... + blu,pt %xcc, 2b + add %o2, %l6, %o2 ! Increment tag + + !! + !! Restore 0 as primary context + !! + mov CTX_PRIMARY, %o0 + stxa %g0, [%o0] ASI_DMMU + membar #Sync ! No real reason for this XXXX + flush %o5 + + !! + !! Demap context 1 + !! + mov 1, %o1 + mov CTX_SECONDARY, %o0 + stxa %o1, [%o0] ASI_DMMU + membar #Sync ! This probably should be a flush, but it works + flush %l0 + mov DEMAP_CTX_SECONDARY, %o4 + stxa %o4, [%o4] ASI_DMMU_DEMAP + membar #Sync + stxa %o4, [%o4] ASI_IMMU_DEMAP + membar #Sync + flush %l0 + stxa %g0, [%o0] ASI_DMMU + membar #Sync + flush %l0 + +#ifdef DEBUG + set 1f, %o0 ! Debug printf + call _C_LABEL(prom_printf) + .data +1: + .asciz "Setting CPUINFO mappings...\r\n" + _ALIGN + .text +#endif /* DEBUG */ + + /* + * Get pointer to our cpu_info struct + */ + + mov %g2, %l1 ! Load the interrupt stack's PA + + sethi %hi(0xa0000000), %l2 ! V=1|SZ=01|NFO=0|IE=0 + sllx %l2, 32, %l2 ! Shift it into place + + mov -1, %l3 ! Create a nice mask + sllx %l3, 41, %l4 ! Mask off high bits + or %l4, 0xfff, %l4 ! We can just load this in 12 (of 13) bits + + andn %l1, %l4, %l1 ! Mask the phys page number + + or %l2, %l1, %l1 ! Now take care of the high bits +#ifdef NO_VCACHE + or %l1, TLB_L|TLB_CP|TLB_P|TLB_W, %l2 ! And low bits: L=1|CP=1|CV=0|E=0|P=1|W=0|G=0 +#else /* NO_VCACHE */ + or %l1, TLB_L|TLB_CP|TLB_CV|TLB_P|TLB_W, %l2 ! And low bits: L=1|CP=1|CV=1|E=0|P=1|W=0|G=0 +#endif /* NO_VCACHE */ + + !! + !! Now, map in the interrupt stack as context==0 + !! + set TLB_TAG_ACCESS, %l5 + sethi %hi(INTSTACK), %l0 + stxa %l0, [%l5] ASI_DMMU ! Make DMMU point to it + membar #Sync ! We may need more membar #Sync in here + stxa %l2, [%g0] ASI_DMMU_DATA_IN ! Store it + membar #Sync ! We may need more membar #Sync in here + flush %o5 + + !! + !! Set 0 as primary context XXX + !! + mov CTX_PRIMARY, %o0 + stxa %g0, [%o0] ASI_DMMU + flush %o5 + +!!! Make sure our stack's OK. + sethi %hi(CPUINFO_VA+CI_INITSTACK), %l0 + ldx [%l0 + %lo(CPUINFO_VA+CI_INITSTACK)], %l0 + add %l0, - CC64FSZ - 80, %l0 + andn %l0, 0x0f, %l0 ! Needs to be 16-byte aligned + sub %l0, BIAS, %l0 ! and biased + mov %l0, %sp + set 1, %fp + clr %i7 + + /* + * Step 7: change the trap base register, and install our TSBs + */ + + /* Set the dmmu tsb */ + sethi %hi(0x1fff), %l2 + set _C_LABEL(tsb_dmmu), %l0 + ldx [%l0], %l0 + set _C_LABEL(tsbsize), %l1 + or %l2, %lo(0x1fff), %l2 + ld [%l1], %l1 + andn %l0, %l2, %l0 ! Mask off size and split bits + or %l0, %l1, %l0 ! Make a TSB pointer + set TSB, %l2 + stxa %l0, [%l2] ASI_DMMU ! Install data TSB pointer + membar #Sync + + + /* Set the immu tsb */ + sethi %hi(0x1fff), %l2 + set _C_LABEL(tsb_immu), %l0 + ldx [%l0], %l0 + set _C_LABEL(tsbsize), %l1 + or %l2, %lo(0x1fff), %l2 + ld [%l1], %l1 + andn %l0, %l2, %l0 ! Mask off size and split bits + or %l0, %l1, %l0 ! Make a TSB pointer + set TSB, %l2 + stxa %l0, [%l2] ASI_IMMU ! Install instruction TSB pointer + membar #Sync ! We may need more membar #Sync in here + + /* Change the trap base register */ + set _C_LABEL(trapbase), %l1 + !call _C_LABEL(prom_set_trap_table) ! Now we should be running 100% from our handlers + ! mov %l1, %o0 + wrpr %l1, 0, %tba ! Make sure the PROM didn't foul up. + wrpr %g0, WSTATE_KERN, %wstate + + call _C_LABEL(cpu_hatch) + nop + NOTREACHED +#endif + /* * openfirmware(cell* param); * @@ -9269,6 +9655,9 @@ _C_LABEL(proc0paddr): _C_LABEL(dlflush_start): .xword dlflush1 .xword dlflush2 +#ifdef MULTIPROCESSOR + .xword dlflush2a +#endif .xword dlflush3 .xword dlflush4 .xword dlflush5 diff --git a/sys/arch/sparc64/sparc64/pmap.c b/sys/arch/sparc64/sparc64/pmap.c index e2900c78a79..0c03c10aa44 100644 --- a/sys/arch/sparc64/sparc64/pmap.c +++ b/sys/arch/sparc64/sparc64/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.43 2007/09/09 14:59:37 kettenis Exp $ */ +/* $OpenBSD: pmap.c,v 1.44 2007/10/17 21:23:28 kettenis Exp $ */ /* $NetBSD: pmap.c,v 1.107 2001/08/31 16:47:41 eeh Exp $ */ #undef NO_VCACHE /* Don't forget the locked TLB in dostart */ /* @@ -560,9 +560,9 @@ pmap_calculate_colors() { */ void -pmap_bootstrap(kernelstart, kernelend, maxctx) +pmap_bootstrap(kernelstart, kernelend, maxctx, numcpus) u_long kernelstart, kernelend; - u_int maxctx; + u_int maxctx, numcpus; { extern int data_start[], end[]; /* start of data segment */ extern int msgbufmapped; @@ -993,7 +993,7 @@ remap_data: /* * Allocate a 64KB page for the cpu_info structure now. */ - if ((cpu0paddr = prom_alloc_phys(8*NBPG, 8*NBPG)) == 0 ) { + if ((cpu0paddr = prom_alloc_phys(numcpus * 8*NBPG, 8*NBPG)) == 0 ) { prom_printf("Cannot allocate new cpu_info\r\n"); OF_exit(); } @@ -1407,6 +1407,8 @@ remap_data: cpus->ci_paddr = cpu0paddr; proc0paddr = cpus->ci_cpcb; + cpu0paddr += 64 * KB; + /* The rest will be done at CPU attach time. */ BDPRINTF(PDB_BOOT1, ("Done inserting cpu_info into pmap_kernel()\r\n")); |