diff options
author | Niklas Hallqvist <niklas@cvs.openbsd.org> | 2004-06-13 21:49:30 +0000 |
---|---|---|
committer | Niklas Hallqvist <niklas@cvs.openbsd.org> | 2004-06-13 21:49:30 +0000 |
commit | 2dd254afa61a7c0cc5ae920b463d3d4266852804 (patch) | |
tree | 7adbebef3be24ba910fd83ee1ba09e1577ae21a8 /sys | |
parent | 4d62e331dcde739b4067d712dd602c0927ce11b3 (diff) |
debranch SMP, have fun
Diffstat (limited to 'sys')
144 files changed, 10647 insertions, 1668 deletions
diff --git a/sys/arch/alpha/alpha/cpu.c b/sys/arch/alpha/alpha/cpu.c index a8045be3246..6d5c8a9d347 100644 --- a/sys/arch/alpha/alpha/cpu.c +++ b/sys/arch/alpha/alpha/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.18 2004/06/08 20:13:21 miod Exp $ */ +/* $OpenBSD: cpu.c,v 1.19 2004/06/13 21:49:11 niklas Exp $ */ /* $NetBSD: cpu.c,v 1.44 2000/05/23 05:12:53 thorpej Exp $ */ /*- @@ -565,7 +565,8 @@ cpu_iccb_send(cpu_id, msg) strlcpy(pcsp->pcs_iccb.iccb_rxbuf, msg, sizeof pcsp->pcs_iccb.iccb_rxbuf); pcsp->pcs_iccb.iccb_rxlen = strlen(msg); - atomic_setbits_ulong(&hwrpb->rpb_rxrdy, cpumask); + /* XXX cast to __volatile */ + atomic_setbits_ulong((__volatile u_long *)&hwrpb->rpb_rxrdy, cpumask); /* Wait for the message to be received. */ for (timeout = 10000; timeout != 0; timeout--) { diff --git a/sys/arch/alpha/alpha/genassym.cf b/sys/arch/alpha/alpha/genassym.cf index 27d0faa330b..2a8802256f3 100644 --- a/sys/arch/alpha/alpha/genassym.cf +++ b/sys/arch/alpha/alpha/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.5 2003/06/02 23:27:43 millert Exp $ +# $OpenBSD: genassym.cf,v 1.6 2004/06/13 21:49:11 niklas Exp $ # Copyright (c) 1994, 1995 Gordon W. Ross # Copyright (c) 1993 Adam Glass @@ -120,6 +120,8 @@ struct prochd member ph_link member ph_rlink +export SONPROC + # offsets needed by cpu_switch() to switch mappings. define VM_MAP_PMAP offsetof(struct vmspace, vm_map.pmap) diff --git a/sys/arch/alpha/alpha/locore.s b/sys/arch/alpha/alpha/locore.s index d30e1f646f1..48849130727 100644 --- a/sys/arch/alpha/alpha/locore.s +++ b/sys/arch/alpha/alpha/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.21 2003/10/18 20:14:40 jmc Exp $ */ +/* $OpenBSD: locore.s,v 1.22 2004/06/13 21:49:11 niklas Exp $ */ /* $NetBSD: locore.s,v 1.94 2001/04/26 03:10:44 ross Exp $ */ /*- @@ -891,7 +891,6 @@ cpu_switch_queuescan: * * Note: GET_CPUINFO clobbers v0, t0, t8...t11. */ -#if 0 #ifdef __alpha_bwx__ ldiq t0, SONPROC /* p->p_stat = SONPROC */ stb t0, P_STAT(s2) @@ -904,7 +903,6 @@ cpu_switch_queuescan: or t0, t1, t0 stq_u t0, 0(t3) #endif /* __alpha_bwx__ */ -#endif GET_CPUINFO /* p->p_cpu initialized in fork1() for single-processor */ diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c index d8860e84b38..484eb661c93 100644 --- a/sys/arch/alpha/alpha/pmap.c +++ b/sys/arch/alpha/alpha/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.41 2004/06/08 20:13:21 miod Exp $ */ +/* $OpenBSD: pmap.c,v 1.42 2004/06/13 21:49:11 niklas Exp $ */ /* $NetBSD: pmap.c,v 1.154 2000/12/07 22:18:55 thorpej Exp $ */ /*- @@ -388,11 +388,6 @@ struct lock pmap_main_lock; struct simplelock pmap_all_pmaps_slock; struct simplelock pmap_growkernel_slock; -#ifdef __OpenBSD__ -#define spinlockinit(lock, name, flags) lockinit(lock, 0, name, 0, flags) -#define spinlockmgr(lock, flags, slock) lockmgr(lock, flags, slock, curproc) -#endif - #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) #define PMAP_MAP_TO_HEAD_LOCK() \ spinlockmgr(&pmap_main_lock, LK_SHARED, NULL) diff --git a/sys/arch/alpha/include/cpu.h b/sys/arch/alpha/include/cpu.h index 7bfc81c4624..c793d217035 100644 --- a/sys/arch/alpha/include/cpu.h +++ b/sys/arch/alpha/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.24 2004/06/08 20:13:23 miod Exp $ */ +/* $OpenBSD: cpu.h,v 1.25 2004/06/13 21:49:12 niklas Exp $ */ /* $NetBSD: cpu.h,v 1.45 2000/08/21 02:03:12 thorpej Exp $ */ /*- @@ -105,6 +105,7 @@ typedef union alpha_t_float { #ifdef _KERNEL #include <machine/bus.h> +#include <sys/device.h> struct pcb; struct proc; @@ -177,6 +178,7 @@ struct mchkinfo { }; struct cpu_info { + struct device *ci_dev; /* pointer to our device */ /* * Public members. */ @@ -185,12 +187,13 @@ struct cpu_info { u_long ci_simple_locks; /* # of simple locks held */ #endif struct proc *ci_curproc; /* current owner of the processor */ + struct simplelock ci_slock; /* lock on this data structure */ + cpuid_t ci_cpuid; /* our CPU ID */ /* * Private members. */ struct mchkinfo ci_mcinfo; /* machine check info */ - cpuid_t ci_cpuid; /* our CPU ID */ struct proc *ci_fpcurproc; /* current owner of the FPU */ paddr_t ci_curpcb; /* PA of current HW PCB */ struct pcb *ci_idle_pcb; /* our idle PCB */ @@ -265,15 +268,20 @@ struct clockframe { /* * Preempt the current process if in interrupt from user mode, * or after the current trap/syscall if in system mode. - * - * XXXSMP - * need_resched() needs to take a cpu_info *. */ -#define need_resched() \ +#ifdef MULTIPROCESSOR +#define need_resched(ci) \ +do { \ + ci->ci_want_resched = 1; \ + aston(curcpu()); \ +} while (/*CONSTCOND*/0) +#else +#define need_resched(ci) \ do { \ curcpu()->ci_want_resched = 1; \ aston(curcpu()); \ } while (/*CONSTCOND*/0) +#endif /* * Give a profiling tick to the current process when the user profiling diff --git a/sys/arch/alpha/include/intr.h b/sys/arch/alpha/include/intr.h index a5813c6e1bf..25fdcba079d 100644 --- a/sys/arch/alpha/include/intr.h +++ b/sys/arch/alpha/include/intr.h @@ -1,4 +1,4 @@ -/* $OpenBSD: intr.h,v 1.16 2004/04/16 04:52:05 pvalchev Exp $ */ +/* $OpenBSD: intr.h,v 1.17 2004/06/13 21:49:12 niklas Exp $ */ /* $NetBSD: intr.h,v 1.26 2000/06/03 20:47:41 thorpej Exp $ */ /*- @@ -159,6 +159,7 @@ extern ipifunc_t ipifuncs[ALPHA_NIPIS]; void alpha_send_ipi(unsigned long, unsigned long); void alpha_broadcast_ipi(unsigned long); +void alpha_multicast_ipi(unsigned long, unsigned long); /* * Alpha shared-interrupt-line common code. diff --git a/sys/arch/alpha/include/pmap.h b/sys/arch/alpha/include/pmap.h index e61bba6497d..1a5c4784915 100644 --- a/sys/arch/alpha/include/pmap.h +++ b/sys/arch/alpha/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.17 2004/06/09 20:17:23 tedu Exp $ */ +/* $OpenBSD: pmap.h,v 1.18 2004/06/13 21:49:12 niklas Exp $ */ /* $NetBSD: pmap.h,v 1.37 2000/11/19 03:16:35 thorpej Exp $ */ /*- @@ -178,7 +178,7 @@ struct pv_head { #if defined(MULTIPROCESSOR) void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t); -void pmap_do_tlb_shootdown(void); +void pmap_do_tlb_shootdown(struct cpu_info *, struct trapframe *); void pmap_tlb_shootdown_q_drain(u_long, boolean_t); #define PMAP_TLB_SHOOTDOWN(pm, va, pte) \ pmap_tlb_shootdown((pm), (va), (pte)) diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf index de6ab55ca81..a56769705b4 100644 --- a/sys/arch/amd64/amd64/genassym.cf +++ b/sys/arch/amd64/amd64/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.3 2004/05/13 20:20:24 sturm Exp $ +# $OpenBSD: genassym.cf,v 1.4 2004/06/13 21:49:12 niklas Exp $ # Written by Artur Grabowski art@openbsd.org, Public Domain @@ -17,6 +17,7 @@ include <machine/vmparam.h> include <machine/intr.h> export SRUN +export SONPROC export L4_SLOT_KERNBASE define L3_SLOT_KERNBASE pl3_pi(KERNBASE) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index a61e1719e61..732eea0a0df 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.S,v 1.10 2004/05/13 20:20:24 sturm Exp $ */ +/* $OpenBSD: locore.S,v 1.11 2004/06/13 21:49:12 niklas Exp $ */ /* $NetBSD: locore.S,v 1.2 2003/04/26 19:34:45 fvdl Exp $ */ /* @@ -848,9 +848,7 @@ switch_resume: movq %rax,P_BACK(%r12) /* Record new proc. */ -#ifdef MULTIPROCESSOR - movb $SONPROC,P_STAT(%r12) # l->l_stat = SONPROC -#endif + movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC SET_CURPROC(%r12,%rcx) sti diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h index cc688f39aa6..9caf05b561d 100644 --- a/sys/arch/amd64/include/cpu.h +++ b/sys/arch/amd64/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.6 2004/03/09 23:05:13 deraadt Exp $ */ +/* $OpenBSD: cpu.h,v 1.7 2004/06/13 21:49:13 niklas Exp $ */ /* $NetBSD: cpu.h,v 1.1 2003/04/26 18:39:39 fvdl Exp $ */ /*- @@ -191,25 +191,15 @@ extern struct cpu_info cpu_info_primary; * or after the current trap/syscall if in system mode. */ -#ifdef MULTIPROCESSOR #define need_resched(ci) \ do { \ - struct cpu_info *__ci = (ci); \ + struct cpu_info *__ci = curcpu(); \ __ci->ci_want_resched = 1; \ if (__ci->ci_curproc != NULL) \ aston(__ci->ci_curproc); \ } while (/*CONSTCOND*/0) -#else -#define need_resched() \ -do { \ - struct cpu_info *__ci = curcpu(); \ - __ci->ci_want_resched = 1; \ - if (__ci->ci_curproc != NULL) \ - aston(__ci->ci_curproc); \ -} while (/*CONSTCOND*/0) -#endif -#endif +#endif /* MULTIPROCESSOR */ #define aston(p) ((p)->p_md.md_astpending = 1) diff --git a/sys/arch/amd64/include/param.h b/sys/arch/amd64/include/param.h index ea59b9e5273..09e070357a6 100644 --- a/sys/arch/amd64/include/param.h +++ b/sys/arch/amd64/include/param.h @@ -1,4 +1,4 @@ -/* $OpenBSD: param.h,v 1.3 2004/04/19 22:55:48 deraadt Exp $ */ +/* $OpenBSD: param.h,v 1.4 2004/06/13 21:49:13 niklas Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -162,9 +162,3 @@ #define btop(x) x86_btop(x) #define ptob(x) x86_ptob(x) #define round_pdr(x) x86_round_pdr(x) - -/* XXX - oh, the horror.. Just for now. */ -#define KERNEL_PROC_LOCK(p) -#define KERNEL_PROC_UNLOCK(p) -#define KERNEL_LOCK(i) -#define KERNEL_UNLOCK() diff --git a/sys/arch/amd64/isa/isa_machdep.c b/sys/arch/amd64/isa/isa_machdep.c index f86a49a00ed..2b3e249e7ae 100644 --- a/sys/arch/amd64/isa/isa_machdep.c +++ b/sys/arch/amd64/isa/isa_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: isa_machdep.c,v 1.1 2004/01/28 01:39:39 mickey Exp $ */ +/* $OpenBSD: isa_machdep.c,v 1.2 2004/06/13 21:49:13 niklas Exp $ */ /* $NetBSD: isa_machdep.c,v 1.22 1997/06/12 23:57:32 thorpej Exp $ */ #define ISA_DMA_STATS @@ -132,6 +132,7 @@ #include <machine/intr.h> #include <machine/pio.h> #include <machine/cpufunc.h> +#include <machine/i8259.h> #include <dev/isa/isareg.h> #include <dev/isa/isavar.h> @@ -139,7 +140,6 @@ #include <dev/isa/isadmavar.h> #endif #include <i386/isa/isa_machdep.h> -#include <i386/isa/icu.h> #include "isadma.h" diff --git a/sys/arch/arm/arm/cpuswitch.S b/sys/arch/arm/arm/cpuswitch.S index e7f673a8a45..813bb1c813f 100644 --- a/sys/arch/arm/arm/cpuswitch.S +++ b/sys/arch/arm/arm/cpuswitch.S @@ -1,4 +1,4 @@ -/* $OpenBSD: cpuswitch.S,v 1.2 2004/02/01 06:10:33 drahn Exp $ */ +/* $OpenBSD: cpuswitch.S,v 1.3 2004/06/13 21:49:13 niklas Exp $ */ /* $NetBSD: cpuswitch.S,v 1.41 2003/11/15 08:44:18 scw Exp $ */ /* @@ -537,11 +537,9 @@ ENTRY(cpu_switch) /* l->l_cpu initialized in fork1() for single-processor */ #endif -#if 0 /* Process is now on a processor. */ - mov r0, #LSONPROC /* l->l_stat = LSONPROC */ - str r0, [r6, #(P_STAT)] -#endif + mov r0, #SONPROC /* p->p_stat = SONPROC */ + strb r0, [r6, #(P_STAT)] /* We have a new curproc now so make a note it */ ldr r7, .Lcurproc diff --git a/sys/arch/arm/arm/genassym.cf b/sys/arch/arm/arm/genassym.cf index 6803c524393..83733107879 100644 --- a/sys/arch/arm/arm/genassym.cf +++ b/sys/arch/arm/arm/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.2 2004/02/14 15:34:31 miod Exp $ +# $OpenBSD: genassym.cf,v 1.3 2004/06/13 21:49:13 niklas Exp $ # $NetBSD: genassym.cf,v 1.27 2003/11/04 10:33:16 dsl Exp$ # Copyright (c) 1982, 1990 The Regents of the University of California. @@ -83,6 +83,8 @@ export PAGE_SHIFT export P_TRACED export P_PROFIL +export SONPROC + struct proc member p_forw member p_back diff --git a/sys/arch/hp300/include/cpu.h b/sys/arch/hp300/include/cpu.h index b304d633999..183dbb8ded8 100644 --- a/sys/arch/hp300/include/cpu.h +++ b/sys/arch/hp300/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.23 2004/05/20 09:20:41 kettenis Exp $ */ +/* $OpenBSD: cpu.h,v 1.24 2004/06/13 21:49:13 niklas Exp $ */ /* $NetBSD: cpu.h,v 1.28 1998/02/13 07:41:51 scottr Exp $ */ /* @@ -91,7 +91,7 @@ struct clockframe { * or after the current trap/syscall if in system mode. */ extern int want_resched; /* resched() was called */ -#define need_resched() { want_resched++; aston(); } +#define need_resched(ci) { want_resched++; aston(); } /* * Give a profiling tick to the current process when the user profiling diff --git a/sys/arch/hppa/hppa/genassym.cf b/sys/arch/hppa/hppa/genassym.cf index ee718b10e25..c9f494d4e20 100644 --- a/sys/arch/hppa/hppa/genassym.cf +++ b/sys/arch/hppa/hppa/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.25 2004/06/08 22:00:25 mickey Exp $ +# $OpenBSD: genassym.cf,v 1.26 2004/06/13 21:49:14 niklas Exp $ # # Copyright (c) 1982, 1990, 1993 @@ -142,6 +142,7 @@ member P_MD_FLAGS p_md.md_flags member P_MD_REGS p_md.md_regs export SRUN +export SONPROC struct pcb member pcb_fpregs diff --git a/sys/arch/hppa/hppa/locore.S b/sys/arch/hppa/hppa/locore.S index 883c4d3eab8..218c95974b2 100644 --- a/sys/arch/hppa/hppa/locore.S +++ b/sys/arch/hppa/hppa/locore.S @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.S,v 1.133 2004/06/08 22:00:25 mickey Exp $ */ +/* $OpenBSD: locore.S,v 1.134 2004/06/13 21:49:14 niklas Exp $ */ /* * Copyright (c) 1998-2004 Michael Shalayeff @@ -2824,6 +2824,8 @@ link_ok copy arg1, t2 kstack_ok #endif + ldi SONPROC, t1 + stb t1, P_STAT(arg1) /* Skip context switch if same process. */ comb,=,n arg1, arg2, switch_return diff --git a/sys/arch/hppa/include/cpu.h b/sys/arch/hppa/include/cpu.h index eff31c9d750..5eb720ab0e9 100644 --- a/sys/arch/hppa/include/cpu.h +++ b/sys/arch/hppa/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.42 2004/04/08 17:10:18 mickey Exp $ */ +/* $OpenBSD: cpu.h,v 1.43 2004/06/13 21:49:14 niklas Exp $ */ /* * Copyright (c) 2000-2004 Michael Shalayeff @@ -124,7 +124,7 @@ extern int cpu_hvers; #define CLKF_SYSCALL(framep) ((framep)->tf_flags & TFF_SYS) #define signotify(p) (setsoftast()) -#define need_resched() (want_resched = 1, setsoftast()) +#define need_resched(ci) (want_resched = 1, setsoftast()) #define need_proftick(p) ((p)->p_flag |= P_OWEUPC, setsoftast()) #ifndef _LOCORE diff --git a/sys/arch/i386/compile/.cvsignore b/sys/arch/i386/compile/.cvsignore index bf45b1f61e5..07993a81a78 100644 --- a/sys/arch/i386/compile/.cvsignore +++ b/sys/arch/i386/compile/.cvsignore @@ -1,5 +1,6 @@ DISKLESS GENERIC +GENERIC.MP RAMDISK RAMDISKB RAMDISKC diff --git a/sys/arch/i386/conf/GENERIC b/sys/arch/i386/conf/GENERIC index 7e6a393b230..19025bed6e7 100644 --- a/sys/arch/i386/conf/GENERIC +++ b/sys/arch/i386/conf/GENERIC @@ -1,4 +1,4 @@ -# $OpenBSD: GENERIC,v 1.370 2004/06/06 04:50:57 pvalchev Exp $ +# $OpenBSD: GENERIC,v 1.371 2004/06/13 21:49:15 niklas Exp $ # # GENERIC -- everything that's currently supported # @@ -39,7 +39,8 @@ config bsd swap generic mainbus0 at root -bios0 at mainbus0 +cpu0 at mainbus? apid ? +bios0 at mainbus0 apid ? apm0 at bios0 flags 0x0000 # flags 0x0101 to force protocol version 1.1 pcibios0 at bios0 flags 0x0000 # use 0x30 for a total verbose diff --git a/sys/arch/i386/conf/GENERIC.MP b/sys/arch/i386/conf/GENERIC.MP new file mode 100644 index 00000000000..4bebe33f96d --- /dev/null +++ b/sys/arch/i386/conf/GENERIC.MP @@ -0,0 +1,12 @@ +# $OpenBSD: GENERIC.MP,v 1.2 2004/06/13 21:49:15 niklas Exp $ +# +# GENERIC.MP - sample multiprocessor kernel +# + +include "arch/i386/conf/GENERIC" + +option MULTIPROCESSOR # Multiple processor support +option CPU + +cpu* at mainbus? apid ? +ioapic* at mainbus? apid ? diff --git a/sys/arch/i386/conf/RAMDISK b/sys/arch/i386/conf/RAMDISK index b7f528ea3d3..c377ae6ce8f 100644 --- a/sys/arch/i386/conf/RAMDISK +++ b/sys/arch/i386/conf/RAMDISK @@ -1,4 +1,4 @@ -# $OpenBSD: RAMDISK,v 1.129 2004/06/03 18:22:21 grange Exp $ +# $OpenBSD: RAMDISK,v 1.130 2004/06/13 21:49:15 niklas Exp $ machine i386 # architecture, used by config; REQUIRED @@ -42,7 +42,8 @@ config bsd root on rd0a swap on rd0b and wd0b and sd0b mainbus0 at root -bios0 at mainbus0 +cpu0 at mainbus? apid ? +bios0 at mainbus0 apid ? apm0 at bios0 flags 0x0000 # flags 0x0101 to force protocol version 1.1 #option APMDEBUG pcibios0 at bios0 flags 0x0000 diff --git a/sys/arch/i386/conf/RAMDISKB b/sys/arch/i386/conf/RAMDISKB index c7a0d727224..53184d25203 100644 --- a/sys/arch/i386/conf/RAMDISKB +++ b/sys/arch/i386/conf/RAMDISKB @@ -1,4 +1,4 @@ -# $OpenBSD: RAMDISKB,v 1.70 2004/06/03 18:22:21 grange Exp $ +# $OpenBSD: RAMDISKB,v 1.71 2004/06/13 21:49:15 niklas Exp $ machine i386 # architecture, used by config; REQUIRED @@ -42,7 +42,8 @@ config bsd root on rd0a swap on rd0b and wd0b and sd0b mainbus0 at root -bios0 at mainbus0 +cpu0 at mainbus? apid ? +bios0 at mainbus0 apid ? apm0 at bios0 flags 0x0000 # flags 0x0101 to force protocol version 1.1 #option APMDEBUG pcibios0 at bios0 flags 0x0000 diff --git a/sys/arch/i386/conf/RAMDISKC b/sys/arch/i386/conf/RAMDISKC index 76e9db57d6f..81764b62557 100644 --- a/sys/arch/i386/conf/RAMDISKC +++ b/sys/arch/i386/conf/RAMDISKC @@ -1,4 +1,4 @@ -# $OpenBSD: RAMDISKC,v 1.39 2004/06/03 18:22:21 grange Exp $ +# $OpenBSD: RAMDISKC,v 1.40 2004/06/13 21:49:15 niklas Exp $ machine i386 # architecture, used by config; REQUIRED @@ -42,7 +42,8 @@ config bsd root on rd0a swap on rd0b and wd0b and sd0b mainbus0 at root -bios0 at mainbus0 +cpu0 at mainbus? apid ? +bios0 at mainbus0 apid ? apm0 at bios0 flags 0x0000 # flags 0x0101 to force protocol version 1.1 #option APMDEBUG pcibios0 at bios0 flags 0x0000 diff --git a/sys/arch/i386/conf/RAMDISK_CD b/sys/arch/i386/conf/RAMDISK_CD index 830c763eabc..1ea5cf46878 100644 --- a/sys/arch/i386/conf/RAMDISK_CD +++ b/sys/arch/i386/conf/RAMDISK_CD @@ -1,4 +1,4 @@ -# $OpenBSD: RAMDISK_CD,v 1.75 2004/06/06 04:50:57 pvalchev Exp $ +# $OpenBSD: RAMDISK_CD,v 1.76 2004/06/13 21:49:15 niklas Exp $ machine i386 # architecture, used by config; REQUIRED @@ -43,7 +43,8 @@ config bsd root on rd0a swap on rd0b and wd0b and sd0b mainbus0 at root -bios0 at mainbus0 +cpu0 at mainbus? apid ? +bios0 at mainbus0 apid ? apm0 at bios0 flags 0x0000 # flags 0x0101 to force protocol version 1.1 #option APMDEBUG pcibios0 at bios0 flags 0x0000 diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386 index 02b5d8ee2c5..a4cc18bd5e9 100644 --- a/sys/arch/i386/conf/files.i386 +++ b/sys/arch/i386/conf/files.i386 @@ -1,4 +1,4 @@ -# $OpenBSD: files.i386,v 1.122 2004/06/05 15:06:22 grange Exp $ +# $OpenBSD: files.i386,v 1.123 2004/06/13 21:49:15 niklas Exp $ # # new style config file for i386 architecture # @@ -44,6 +44,10 @@ file arch/i386/i386/dkcsum.c bios file lib/libz/adler32.c !ppp_deflate & !ipsec & !crypto file dev/cons.c file dev/cninit.c +file arch/i386/i386/mptramp.s multiprocessor +file arch/i386/i386/lock_machdep.c multiprocessor +file arch/i386/i386/ipifuncs.c multiprocessor +file arch/i386/i386/db_mp.c multiprocessor file arch/i386/i386/wscons_machdep.c wsdisplay major {vnd = 14} @@ -78,7 +82,7 @@ include "../../../dev/ata/files.ata" # System bus types # -define mainbus { } +define mainbus { apid = -1 } device mainbus: isabus, eisabus, pcibus, mainbus attach mainbus at root file arch/i386/i386/mainbus.c mainbus @@ -272,6 +276,28 @@ attach apm at bios file arch/i386/i386/apm.c apm needs-count file arch/i386/i386/apmcall.S apm +# Intel SMP specification 1.4 +define mpbios +file arch/i386/i386/mpbios.c mpbios needs-flag + +# CPUS +define cpu { apid = -1 } +device cpu +attach cpu at mainbus +file arch/i386/i386/cpu.c cpu + +# Common APIC support routines +file arch/i386/i386/apic.c ioapic | lapic + +# Local APIC (required for multiprocessor) +define lapic +file arch/i386/i386/lapic.c lapic needs-flag + +# I/O APICs +device ioapic: mpbios, lapic +attach ioapic at mainbus +file arch/i386/i386/ioapic.c ioapic needs-flag + device pcibios attach pcibios at bios file arch/i386/pci/pcibios.c pcibios needs-count diff --git a/sys/arch/i386/eisa/eisa_machdep.c b/sys/arch/i386/eisa/eisa_machdep.c index 178950e9e86..e086091fd75 100644 --- a/sys/arch/i386/eisa/eisa_machdep.c +++ b/sys/arch/i386/eisa/eisa_machdep.c @@ -1,5 +1,5 @@ -/* $OpenBSD: eisa_machdep.c,v 1.7 2004/06/09 20:13:10 deraadt Exp $ */ -/* $NetBSD: eisa_machdep.c,v 1.6 1997/06/06 23:12:52 thorpej Exp $ */ +/* $OpenBSD: eisa_machdep.c,v 1.8 2004/06/13 21:49:15 niklas Exp $ */ +/* $NetBSD: eisa_machdep.c,v 1.10.22.2 2000/06/25 19:36:58 sommerfeld Exp $ */ /*- * Copyright (c) 1997 The NetBSD Foundation, Inc. @@ -81,8 +81,8 @@ #define _I386_BUS_DMA_PRIVATE #include <machine/bus.h> +#include <machine/i8259.h> -#include <i386/isa/icu.h> #include <dev/isa/isavar.h> #include <dev/eisa/eisavar.h> @@ -133,6 +133,9 @@ eisa_intr_map(ec, irq, ihp) u_int irq; eisa_intr_handle_t *ihp; { +#if NIOAPIC > 0 + struct mp_intr_map *mip; +#endif if (irq >= ICU_LEN) { printf("eisa_intr_map: bad IRQ %d\n", irq); @@ -144,6 +147,26 @@ eisa_intr_map(ec, irq, ihp) irq = 9; } +#if NIOAPIC > 0 + if (mp_busses != NULL) { + /* + * Assumes 1:1 mapping between PCI bus numbers and + * the numbers given by the MP bios. + * XXX Is this a valid assumption? + */ + + for (mip = mp_busses[bus].mb_intrs; mip != NULL; + mip = mip->next) { + if (mip->bus_pin == irq) { + *ihp = mip->ioapic_ih | irq; + return (0); + } + } + if (mip == NULL) + printf("eisa_intr_map: no MP mapping found\n"); + } +#endif + *ihp = irq; return (0); } @@ -155,9 +178,17 @@ eisa_intr_string(ec, ih) { static char irqstr[8]; /* 4 + 2 + NUL + sanity */ - if (ih == 0 || ih >= ICU_LEN || ih == 2) + if (ih == 0 || (ih & 0xff) >= ICU_LEN || ih == 2) panic("eisa_intr_string: bogus handle 0x%x", ih); +#if NIOAPIC > 0 + if (ih & APIC_INT_VIA_APIC) { + sprintf(irqstr, "apic %d int %d (irq %d)", + APIC_IRQ_APIC(ih), APIC_IRQ_PIN(ih), ih & 0xff); + return (irqstr); + } +#endif + snprintf(irqstr, sizeof irqstr, "irq %d", ih); return (irqstr); @@ -171,7 +202,14 @@ eisa_intr_establish(ec, ih, type, level, func, arg, what) void *arg; char *what; { - +#if NIOAPIC > 0 + if (ih != -1) { + if (ih != -1 && (ih & APIC_INT_VIA_APIC)) { + return (apic_intr_establish(ih, type, level, func, arg, + what)); + } + } +#endif if (ih == 0 || ih >= ICU_LEN || ih == 2) panic("eisa_intr_establish: bogus handle 0x%x", ih); @@ -183,6 +221,5 @@ eisa_intr_disestablish(ec, cookie) eisa_chipset_tag_t ec; void *cookie; { - return (isa_intr_disestablish(NULL, cookie)); } diff --git a/sys/arch/i386/eisa/eisa_machdep.h b/sys/arch/i386/eisa/eisa_machdep.h index a313d805fae..fccdd343245 100644 --- a/sys/arch/i386/eisa/eisa_machdep.h +++ b/sys/arch/i386/eisa/eisa_machdep.h @@ -1,4 +1,4 @@ -/* $OpenBSD: eisa_machdep.h,v 1.5 2002/03/14 03:15:53 millert Exp $ */ +/* $OpenBSD: eisa_machdep.h,v 1.6 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: eisa_machdep.h,v 1.4 1997/06/06 23:12:52 thorpej Exp $ */ /* @@ -45,6 +45,9 @@ extern struct i386_bus_dma_tag eisa_bus_dma_tag; +#define ELCR0 0x4d0 /* eisa irq 0-7 */ +#define ELCR1 0x4d1 /* eisa irq 8-15 */ + /* * Types provided to machine-independent EISA code. */ diff --git a/sys/arch/i386/i386/apic.c b/sys/arch/i386/i386/apic.c new file mode 100644 index 00000000000..6ed294770f3 --- /dev/null +++ b/sys/arch/i386/i386/apic.c @@ -0,0 +1,79 @@ +/* $OpenBSD: apic.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ +/* $NetBSD: apic.c,v 1.1.2.2 2000/02/21 18:51:00 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/systm.h> + +#include <machine/i82489reg.h> +#include <machine/i82489var.h> +#include <machine/apicvar.h> + + +const char redirlofmt[] = "\177\20" + "f\0\10vector\0" + "f\10\3delmode\0" + "b\13logical\0" + "b\14pending\0" + "b\15actlo\0" + "b\16irrpending\0" + "b\17level\0" + "b\20masked\0" + "f\22\1dest\0" "=\1self" "=\2all" "=\3all-others"; + +const char redirhifmt[] = "\177\20" + "f\30\10target\0"; + +void +apic_format_redir(where1, where2, idx, redirhi, redirlo) + char *where1; + char *where2; + int idx; + u_int32_t redirhi; + u_int32_t redirlo; +{ + printf("%s: %s%d 0x%x", where1, where2, idx, redirlo); + + if ((redirlo & LAPIC_DEST_MASK) == 0) + printf(" 0x%x", redirhi); + + printf("\n"); +} + diff --git a/sys/arch/i386/i386/apicvec.s b/sys/arch/i386/i386/apicvec.s new file mode 100644 index 00000000000..c2fadf429be --- /dev/null +++ b/sys/arch/i386/i386/apicvec.s @@ -0,0 +1,250 @@ +/* $OpenBSD: apicvec.s,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ +/* $NetBSD: apicvec.s,v 1.1.2.2 2000/02/21 21:54:01 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/i82093reg.h> +#include <machine/i82489reg.h> + +#ifdef __ELF__ +#define XINTR(vec) Xintr/**/vec +#else +#define XINTR(vec) _Xintr/**/vec +#endif + +#ifdef MULTIPROCESSOR + .globl XINTR(ipi) +XINTR(ipi): + pushl $0 + pushl $T_ASTFLT + INTRENTRY + MAKE_FRAME + pushl CPL + movl _C_LABEL(lapic_ppr),%eax + movl %eax,CPL + ioapic_asm_ack() + sti /* safe to take interrupts.. */ + call _C_LABEL(i386_ipi_handler) + jmp _C_LABEL(Xdoreti) +#endif + + /* + * Interrupt from the local APIC timer. + */ + .globl XINTR(ltimer) +XINTR(ltimer): + pushl $0 + pushl $T_ASTFLT + INTRENTRY + MAKE_FRAME + pushl CPL + movl _C_LABEL(lapic_ppr),%eax + movl %eax,CPL + ioapic_asm_ack() + sti +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintlock) +#endif + movl %esp,%eax + pushl %eax + call _C_LABEL(lapic_clockintr) + addl $4,%esp +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintunlock) +#endif + jmp _C_LABEL(Xdoreti) + + .globl XINTR(softclock), XINTR(softnet), XINTR(softtty) +XINTR(softclock): + pushl $0 + pushl $T_ASTFLT + INTRENTRY + MAKE_FRAME + pushl CPL + movl $IPL_SOFTCLOCK,CPL + andl $~(1<<SIR_CLOCK),_C_LABEL(ipending) + ioapic_asm_ack() + sti +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintlock) +#endif + call _C_LABEL(softclock) +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintunlock) +#endif + jmp _C_LABEL(Xdoreti) + +#define DONETISR(s, c) \ + .globl _C_LABEL(c) ;\ + testl $(1 << s),%edi ;\ + jz 1f ;\ + call _C_LABEL(c) ;\ +1: + +XINTR(softnet): + pushl $0 + pushl $T_ASTFLT + INTRENTRY + MAKE_FRAME + pushl CPL + movl $IPL_SOFTNET,CPL + andl $~(1<<SIR_NET),_C_LABEL(ipending) + ioapic_asm_ack() + sti +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintlock) +#endif + xorl %edi,%edi + xchgl _C_LABEL(netisr),%edi +#include <net/netisr_dispatch.h> +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintunlock) +#endif + jmp _C_LABEL(Xdoreti) +#undef DONETISR + +XINTR(softtty): + pushl $0 + pushl $T_ASTFLT + INTRENTRY + MAKE_FRAME + pushl CPL + movl $IPL_SOFTTTY,CPL + andl $~(1<<SIR_TTY),_C_LABEL(ipending) + ioapic_asm_ack() + sti +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintlock) +#endif + call _C_LABEL(comsoft) +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintunlock) +#endif + jmp _C_LABEL(Xdoreti) + +#if NIOAPIC > 0 + +#define voidop(num) + + /* + * I/O APIC interrupt. + * We sort out which one is which based on the value of + * the processor priority register. + * + * XXX no stray interrupt mangling stuff.. + * XXX use cmove when appropriate. + */ + +#define APICINTR(name, num, early_ack, late_ack, mask, unmask, level_mask) \ +_C_LABEL(Xintr_/**/name/**/num): \ + pushl $0 ;\ + pushl $T_ASTFLT ;\ + INTRENTRY ;\ + MAKE_FRAME ;\ + pushl CPL ;\ + movl _C_LABEL(lapic_ppr),%eax ;\ + movl %eax,CPL ;\ + mask(num) /* mask it in hardware */ ;\ + early_ack(num) /* and allow other intrs */ ;\ + incl MY_COUNT+V_INTR /* statistical info */ ;\ + sti ;\ + orl $num,%eax ;\ + incl _C_LABEL(apic_intrcount)(,%eax,4) ;\ + movl _C_LABEL(apic_intrhand)(,%eax,4),%ebx /* chain head */ ;\ + testl %ebx,%ebx ;\ + jz 8f /* oops, no handlers.. */ ;\ +7: \ + LOCK_KERNEL ;\ + movl IH_ARG(%ebx),%eax /* get handler arg */ ;\ + testl %eax,%eax ;\ + jnz 6f ;\ + movl %esp,%eax /* 0 means frame pointer */ ;\ +6: \ + pushl %eax ;\ + call *IH_FUN(%ebx) /* call it */ ;\ + addl $4,%esp /* toss the arg */ ;\ + UNLOCK_KERNEL ;\ + incl IH_COUNT(%ebx) /* count the intrs */ ;\ + movl IH_NEXT(%ebx),%ebx /* next handler in chain */ ;\ + testl %ebx,%ebx ;\ + jnz 7b ;\ +8: \ + unmask(num) /* unmask it in hardware */ ;\ + late_ack(num) ;\ + jmp _C_LABEL(Xdoreti) + +APICINTR(ioapic,0, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,1, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,2, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,3, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,4, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,5, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,6, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,7, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,8, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,9, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,10, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,11, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,12, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,13, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,14, voidop, ioapic_asm_ack, voidop, voidop, voidop) +APICINTR(ioapic,15, voidop, ioapic_asm_ack, voidop, voidop, voidop) + + .globl _C_LABEL(Xintr_ioapic0),_C_LABEL(Xintr_ioapic1) + .globl _C_LABEL(Xintr_ioapic2),_C_LABEL(Xintr_ioapic3) + .globl _C_LABEL(Xintr_ioapic4),_C_LABEL(Xintr_ioapic5) + .globl _C_LABEL(Xintr_ioapic6),_C_LABEL(Xintr_ioapic7) + .globl _C_LABEL(Xintr_ioapic8),_C_LABEL(Xintr_ioapic9) + .globl _C_LABEL(Xintr_ioapic10),_C_LABEL(Xintr_ioapic11) + .globl _C_LABEL(Xintr_ioapic12),_C_LABEL(Xintr_ioapic13) + .globl _C_LABEL(Xintr_ioapic14),_C_LABEL(Xintr_ioapic15) + .globl _C_LABEL(apichandler) + +_C_LABEL(apichandler): + .long _C_LABEL(Xintr_ioapic0),_C_LABEL(Xintr_ioapic1) + .long _C_LABEL(Xintr_ioapic2),_C_LABEL(Xintr_ioapic3) + .long _C_LABEL(Xintr_ioapic4),_C_LABEL(Xintr_ioapic5) + .long _C_LABEL(Xintr_ioapic6),_C_LABEL(Xintr_ioapic7) + .long _C_LABEL(Xintr_ioapic8),_C_LABEL(Xintr_ioapic9) + .long _C_LABEL(Xintr_ioapic10),_C_LABEL(Xintr_ioapic11) + .long _C_LABEL(Xintr_ioapic12),_C_LABEL(Xintr_ioapic13) + .long _C_LABEL(Xintr_ioapic14),_C_LABEL(Xintr_ioapic15) + +#endif + diff --git a/sys/arch/i386/i386/apm.c b/sys/arch/i386/i386/apm.c index 1ade5dfb400..f996be58a37 100644 --- a/sys/arch/i386/i386/apm.c +++ b/sys/arch/i386/i386/apm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: apm.c,v 1.62 2004/05/27 08:19:59 tedu Exp $ */ +/* $OpenBSD: apm.c,v 1.63 2004/06/13 21:49:15 niklas Exp $ */ /*- * Copyright (c) 1998-2001 Michael Shalayeff. All rights reserved. @@ -777,7 +777,6 @@ apmattach(parent, self, aux) struct device *parent, *self; void *aux; { - extern union descriptor *dynamic_gdt; struct bios_attach_args *ba = aux; bios_apminfo_t *ap = ba->bios_apmp; struct apm_softc *sc = (void *)self; @@ -843,12 +842,12 @@ apmattach(parent, self, aux) else ch16 += ap->apm_code16_base - cbase; - setsegment(&dynamic_gdt[GAPM32CODE_SEL].sd, (void *)ch32, - ap->apm_code_len, SDT_MEMERA, SEL_KPL, 1, 0); - setsegment(&dynamic_gdt[GAPM16CODE_SEL].sd, (void *)ch16, - ap->apm_code16_len, SDT_MEMERA, SEL_KPL, 0, 0); - setsegment(&dynamic_gdt[GAPMDATA_SEL].sd, (void *)dh, - ap->apm_data_len, SDT_MEMRWA, SEL_KPL, 1, 0); + setgdt(GAPM32CODE_SEL, (void *)ch32, ap->apm_code_len, + SDT_MEMERA, SEL_KPL, 1, 0); + setgdt(GAPM16CODE_SEL, (void *)ch16, ap->apm_code16_len, + SDT_MEMERA, SEL_KPL, 0, 0); + setgdt(GAPMDATA_SEL, (void *)dh, ap->apm_data_len, SDT_MEMRWA, + SEL_KPL, 1, 0); DPRINTF((": flags %x code 32:%x/%x[%x] 16:%x/%x[%x] " "data %x/%x/%x ep %x (%x:%x)\n%s", apm_flags, ap->apm_code32_base, ch32, ap->apm_code_len, @@ -890,9 +889,9 @@ apmattach(parent, self, aux) } else kthread_create_deferred(apm_thread_create, sc); } else { - dynamic_gdt[GAPM32CODE_SEL] = dynamic_gdt[GNULL_SEL]; - dynamic_gdt[GAPM16CODE_SEL] = dynamic_gdt[GNULL_SEL]; - dynamic_gdt[GAPMDATA_SEL] = dynamic_gdt[GNULL_SEL]; + setgdt(GAPM32CODE_SEL, NULL, 0, 0, 0, 0, 0); + setgdt(GAPM16CODE_SEL, NULL, 0, 0, 0, 0, 0); + setgdt(GAPMDATA_SEL, NULL, 0, 0, 0, 0, 0); } } @@ -901,6 +900,15 @@ apm_thread_create(v) void *v; { struct apm_softc *sc = v; + +#ifdef MULTIPROCESSOR + if (ncpus > 1) { + apm_disconnect(sc); + apm_dobusy = apm_doidle = 0; + return; + } +#endif + if (kthread_create(apm_thread, sc, &sc->sc_thread, "%s", sc->sc_dev.dv_xname)) { apm_disconnect(sc); diff --git a/sys/arch/i386/i386/autoconf.c b/sys/arch/i386/i386/autoconf.c index f0a5b2f1de0..95a3059984b 100644 --- a/sys/arch/i386/i386/autoconf.c +++ b/sys/arch/i386/i386/autoconf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: autoconf.c,v 1.52 2003/10/15 03:56:21 david Exp $ */ +/* $OpenBSD: autoconf.c,v 1.53 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: autoconf.c,v 1.20 1996/05/03 19:41:56 christos Exp $ */ /*- @@ -61,6 +61,12 @@ #include <dev/cons.h> +#include "ioapic.h" + +#if NIOAPIC > 0 +#include <machine/i82093var.h> +#endif + int findblkmajor(struct device *dv); char *findblkname(int); @@ -109,6 +115,14 @@ cpu_configure() printf("biomask %x netmask %x ttymask %x\n", (u_short)IMASK(IPL_BIO), (u_short)IMASK(IPL_NET), (u_short)IMASK(IPL_TTY)); +#if NIOAPIC > 0 + ioapic_enable(); +#endif + +#ifdef MULTIPROCESSOR + /* propagate TSS and LDT configuration to the idle pcb's. */ + cpu_init_idle_pcbs(); +#endif spl0(); /* diff --git a/sys/arch/i386/i386/bios.c b/sys/arch/i386/i386/bios.c index 74691de83d9..a9576ccd41f 100644 --- a/sys/arch/i386/i386/bios.c +++ b/sys/arch/i386/i386/bios.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bios.c,v 1.54 2004/01/29 01:36:13 tom Exp $ */ +/* $OpenBSD: bios.c,v 1.55 2004/06/13 21:49:15 niklas Exp $ */ /* * Copyright (c) 1997-2001 Michael Shalayeff @@ -85,9 +85,12 @@ bios_apminfo_t *apm; bios_pciinfo_t *bios_pciinfo; #endif bios_diskinfo_t *bios_diskinfo; -bios_memmap_t *bios_memmap; +bios_memmap_t *bios_memmap; u_int32_t bios_cksumlen; struct bios32_entry bios32_entry; +#ifdef MULTIPROCESSOR +void *bios_smpinfo; +#endif bios_diskinfo_t *bios_getdiskinfo(dev_t); @@ -324,6 +327,12 @@ bios_getopt() cnset(cdp->consdev); } break; +#ifdef MULTIPROCESSOR + case BOOTARG_SMPINFO: + bios_smpinfo = q->ba_arg; + printf(" smpinfo %p", bios_smpinfo); + break; +#endif default: #ifdef BIOS_DEBUG @@ -356,9 +365,6 @@ bios32_service(service, e, ei) bios32_entry_t e; bios32_entry_info_t ei; { - extern union descriptor *dynamic_gdt; - extern int gdt_get_slot(void); - u_long pa, endpa; vaddr_t va, sva; u_int32_t base, count, off, ent; @@ -388,8 +394,7 @@ bios32_service(service, e, ei) return (0); slot = gdt_get_slot(); - setsegment(&dynamic_gdt[slot].sd, (caddr_t)va, BIOS32_END, - SDT_MEMERA, SEL_KPL, 1, 0); + setgdt(slot, (caddr_t)va, BIOS32_END, SDT_MEMERA, SEL_KPL, 1, 0); for (pa = i386_trunc_page(BIOS32_START), va += i386_trunc_page(BIOS32_START); diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c new file mode 100644 index 00000000000..c447da4e533 --- /dev/null +++ b/sys/arch/i386/i386/cpu.c @@ -0,0 +1,606 @@ +/* $OpenBSD: cpu.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ +/* $NetBSD: cpu.c,v 1.1.2.7 2000/06/26 02:04:05 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1999 Stefan Grefen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "lapic.h" +#include "ioapic.h" + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/user.h> +#include <sys/systm.h> +#include <sys/device.h> + +#include <uvm/uvm_extern.h> + +#include <machine/cpu.h> +#include <machine/cpufunc.h> +#include <machine/cpuvar.h> +#include <machine/pmap.h> +#include <machine/vmparam.h> +#include <machine/mpbiosvar.h> +#include <machine/npx.h> +#include <machine/pcb.h> +#include <machine/specialreg.h> +#include <machine/segments.h> +#include <machine/gdt.h> +#include <machine/pio.h> + +#if NLAPIC > 0 +#include <machine/apicvar.h> +#include <machine/i82489reg.h> +#include <machine/i82489var.h> +#endif + +#if NIOAPIC > 0 +#include <machine/i82093var.h> +#endif + +#include <dev/ic/mc146818reg.h> +#include <i386/isa/nvram.h> +#include <dev/isa/isareg.h> + +int cpu_match(struct device *, void *, void *); +void cpu_attach(struct device *, struct device *, void *); + +#ifdef MULTIPROCESSOR +int mp_cpu_start(struct cpu_info *); +void mp_cpu_start_cleanup(struct cpu_info *); +struct cpu_functions mp_cpu_funcs = + { mp_cpu_start, NULL, mp_cpu_start_cleanup }; +#endif + +/* + * Statically-allocated CPU info for the primary CPU (or the only + * CPU, on uniprocessors). The CPU info list is initialized to + * point at it. + */ +struct cpu_info cpu_info_primary; +struct cpu_info *cpu_info_list = &cpu_info_primary; + +void cpu_init_tss(struct i386tss *, void *, void *); +void cpu_set_tss_gates(struct cpu_info *); + +#ifdef MULTIPROCESSOR +/* + * Array of CPU info structures. Must be statically-allocated because + * curproc, etc. are used early. + */ + +struct cpu_info *cpu_info[I386_MAXPROCS] = { &cpu_info_primary }; + +void cpu_hatch(void *); +void cpu_boot_secondary(struct cpu_info *); +void cpu_copy_trampoline(void); + +/* + * Runs once per boot once multiprocessor goo has been detected and + * the local APIC has been mapped. + * Called from mpbios_scan(); + */ +void +cpu_init_first() +{ + int cpunum = cpu_number(); + + if (cpunum != 0) { + cpu_info[0] = NULL; + cpu_info[cpunum] = &cpu_info_primary; + } + + cpu_copy_trampoline(); +} +#endif + +struct cfattach cpu_ca = { + sizeof(struct cpu_info), cpu_match, cpu_attach +}; + +struct cfdriver cpu_cd = { + NULL, "cpu", DV_DULL /* XXX DV_CPU */ +}; + +int +cpu_match(parent, matchv, aux) + struct device *parent; + void *matchv; + void *aux; +{ + struct cfdata *match = (struct cfdata *)matchv; + struct cpu_attach_args *caa = (struct cpu_attach_args *)aux; + + if (strcmp(caa->caa_name, match->cf_driver->cd_name) == 0) + return (1); + return (0); +} + +void +cpu_attach(parent, self, aux) + struct device *parent, *self; + void *aux; +{ + struct cpu_info *ci = (struct cpu_info *)self; + struct cpu_attach_args *caa = (struct cpu_attach_args *)aux; + +#ifdef MULTIPROCESSOR + int cpunum = caa->cpu_number; + vaddr_t kstack; + struct pcb *pcb; + + if (caa->cpu_role != CPU_ROLE_AP) { + if (cpunum != cpu_number()) { + panic("%s: running cpu is at apic %d" + " instead of at expected %d\n", + self->dv_xname, cpu_number(), cpunum); + } + + ci = &cpu_info_primary; + bcopy(self, &ci->ci_dev, sizeof *self); + + /* special-case boot CPU */ /* XXX */ + if (cpu_info[cpunum] == &cpu_info_primary) { /* XXX */ + cpu_info[cpunum] = NULL; /* XXX */ + } /* XXX */ + } + if (cpu_info[cpunum] != NULL) + panic("cpu at apic id %d already attached?", cpunum); + + cpu_info[cpunum] = ci; +#endif + + ci->ci_self = ci; + ci->ci_apicid = caa->cpu_number; +#ifdef MULTIPROCESSOR + ci->ci_cpuid = ci->ci_apicid; +#else + ci->ci_cpuid = 0; /* False for APs, so what, they're not used */ +#endif + ci->ci_signature = caa->cpu_signature; + ci->ci_feature_flags = caa->feature_flags; + ci->ci_func = caa->cpu_func; + +#ifdef MULTIPROCESSOR + /* + * Allocate UPAGES contiguous pages for the idle PCB and stack. + */ + + kstack = uvm_km_alloc(kernel_map, USPACE); + if (kstack == 0) { + if (cpunum == 0) { /* XXX */ + panic("cpu_attach: unable to allocate idle stack for" + " primary"); + } + printf("%s: unable to allocate idle stack\n", + ci->ci_dev.dv_xname); + return; + } + pcb = ci->ci_idle_pcb = (struct pcb *)kstack; + memset(pcb, 0, USPACE); + + pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); + pcb->pcb_tss.tss_esp0 = kstack + USPACE - 16 - + sizeof (struct trapframe); + pcb->pcb_tss.tss_esp = kstack + USPACE - 16 - + sizeof (struct trapframe); + pcb->pcb_pmap = pmap_kernel(); + pcb->pcb_cr3 = vtophys(pcb->pcb_pmap->pm_pdir); + /* pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdir - KERNBASE; XXX ??? */ +#endif + + /* further PCB init done later. */ + +/* XXXSMP: must be shared with UP */ +#ifdef MULTIPROCESSOR + printf(": "); + + switch (caa->cpu_role) { + case CPU_ROLE_SP: + printf("(uniprocessor)\n"); + ci->ci_flags |= CPUF_PRESENT | CPUF_SP | CPUF_PRIMARY; + identifycpu(ci); + cpu_init(ci); + break; + + case CPU_ROLE_BP: + printf("apid %d (", caa->cpu_number); + printf("boot processor"); + ci->ci_flags |= CPUF_PRESENT | CPUF_BSP | CPUF_PRIMARY; + printf(")\n"); + identifycpu(ci); + cpu_init(ci); + +#if NLAPIC > 0 + /* + * Enable local apic + */ + lapic_enable(); + lapic_calibrate_timer(ci); +#endif +#if NIOAPIC > 0 + ioapic_bsp_id = caa->cpu_number; +#endif + break; + + case CPU_ROLE_AP: + /* + * report on an AP + */ + printf("apid %d (application processor)\n", caa->cpu_number); + +#ifdef MULTIPROCESSOR + gdt_alloc_cpu(ci); + ci->ci_flags |= CPUF_PRESENT | CPUF_AP; + identifycpu(ci); + ci->ci_next = cpu_info_list->ci_next; + cpu_info_list->ci_next = ci; + ncpus++; +#else + printf("%s: not started\n", ci->ci_dev.dv_xname); +#endif + break; + + default: + panic("unknown processor type??\n"); + } +#else /* MULTIPROCESSOR */ + printf("\n"); +#endif /* !MULTIPROCESSOR */ + +#ifdef MULTIPROCESSOR + if (mp_verbose) { + printf("%s: kstack at 0x%lx for %d bytes\n", + ci->ci_dev.dv_xname, kstack, USPACE); + printf("%s: idle pcb at %p, idle sp at 0x%x\n", + ci->ci_dev.dv_xname, pcb, pcb->pcb_esp); + } +#endif +} + +/* + * Initialize the processor appropriately. + */ + +void +cpu_init(ci) + struct cpu_info *ci; +{ + /* configure the CPU if needed */ + if (ci->cpu_setup != NULL) + (*ci->cpu_setup)(NULL, 0, 0); + +#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) + /* + * On a 486 or above, enable ring 0 write protection. + */ + if (ci->cpu_class >= CPUCLASS_486) + lcr0(rcr0() | CR0_WP); +#endif + if (cpu_feature & CPUID_PGE) + lcr4(rcr4() | CR4_PGE); /* enable global TLB caching */ + + ci->ci_flags |= CPUF_RUNNING; +} + + +#ifdef MULTIPROCESSOR + +void +cpu_boot_secondary_processors() +{ + struct cpu_info *ci; + u_long i; + + for (i = 0; i < I386_MAXPROCS; i++) { + ci = cpu_info[i]; + if (ci == NULL) + continue; + if (ci->ci_idle_pcb == NULL) + continue; + if ((ci->ci_flags & CPUF_PRESENT) == 0) + continue; + if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) + continue; + cpu_boot_secondary(ci); + } +} + +void +cpu_init_idle_pcbs() +{ + struct cpu_info *ci; + u_long i; + + for (i=0; i < I386_MAXPROCS; i++) { + ci = cpu_info[i]; + if (ci == NULL) + continue; + if (ci->ci_idle_pcb == NULL) + continue; + if ((ci->ci_flags & CPUF_PRESENT) == 0) + continue; + i386_init_pcb_tss_ldt(ci); + } +} + +void +cpu_boot_secondary (ci) + struct cpu_info *ci; +{ + struct pcb *pcb; + int i; + struct pmap *kpm = pmap_kernel(); + extern u_int32_t mp_pdirpa; + + printf("%s: starting", ci->ci_dev.dv_xname); + + /* XXX move elsewhere, not per CPU. */ + mp_pdirpa = vtophys(kpm->pm_pdir); + + pcb = ci->ci_idle_pcb; + + if (mp_verbose) + printf(", init idle stack ptr is 0x%x", pcb->pcb_esp); + + printf("\n"); + + CPU_STARTUP(ci); + + /* + * wait for it to become ready + */ + for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) { + delay(10); + } + if (!(ci->ci_flags & CPUF_RUNNING)) { + printf("cpu failed to become ready\n"); + Debugger(); + } + + CPU_START_CLEANUP(ci); +} + +/* + * The CPU ends up here when its ready to run + * XXX should share some of this with init386 in machdep.c + * for now it jumps into an infinite loop. + */ +void +cpu_hatch(void *v) +{ + struct cpu_info *ci = (struct cpu_info *)v; + int s; + + cpu_init_idt(); + lapic_enable(); + lapic_initclocks(); + lapic_set_lvt(); + gdt_init_cpu(ci); + npxinit(ci); + + lldt(GSEL(GLDT_SEL, SEL_KPL)); + + cpu_init(ci); + + s = splhigh(); /* XXX prevent softints from running here.. */ + lapic_tpr = 0; + enable_intr(); + printf("%s: CPU %ld running\n", ci->ci_dev.dv_xname, ci->ci_cpuid); + splx(s); +} + +void +cpu_copy_trampoline() +{ + /* + * Copy boot code. + */ + extern u_char cpu_spinup_trampoline[]; + extern u_char cpu_spinup_trampoline_end[]; + + pmap_kenter_pa((vaddr_t)MP_TRAMPOLINE, /* virtual */ + (paddr_t)MP_TRAMPOLINE, /* physical */ + VM_PROT_ALL); /* protection */ + bcopy(cpu_spinup_trampoline, (caddr_t)MP_TRAMPOLINE, + cpu_spinup_trampoline_end - cpu_spinup_trampoline); +} + +#endif + +#ifdef notyet +void +cpu_init_tss(struct i386tss *tss, void *stack, void *func) +{ + memset(tss, 0, sizeof *tss); + tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16); + tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); + tss->__tss_cs = GSEL(GCODE_SEL, SEL_KPL); + tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL); + tss->tss_gs = tss->__tss_es = tss->__tss_ds = + tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL); + tss->tss_cr3 = pmap_kernel()->pm_pdirpa; + tss->tss_esp = (int)((char *)stack + USPACE - 16); + tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL); + tss->__tss_eflags = PSL_MBO | PSL_NT; /* XXX not needed? */ + tss->__tss_eip = (int)func; +} + +/* XXX */ +#define IDTVEC(name) __CONCAT(X, name) +typedef void (vector)(void); +extern vector IDTVEC(tss_trap08); +#ifdef DDB +extern vector Xintrddbipi; +extern int ddb_vec; +#endif + +void +cpu_set_tss_gates(struct cpu_info *ci) +{ + struct segment_descriptor sd; + + ci->ci_doubleflt_stack = (char *)uvm_km_alloc(kernel_map, USPACE); + cpu_init_tss(&ci->ci_doubleflt_tss, ci->ci_doubleflt_stack, + IDTVEC(tss_trap08)); + setsegment(&sd, &ci->ci_doubleflt_tss, sizeof(struct i386tss) - 1, + SDT_SYS386TSS, SEL_KPL, 0, 0); + ci->ci_gdt[GTRAPTSS_SEL].sd = sd; + setgate(&idt[8], NULL, 0, SDT_SYSTASKGT, SEL_KPL, + GSEL(GTRAPTSS_SEL, SEL_KPL)); + +#if defined(DDB) && defined(MULTIPROCESSOR) + /* + * Set up seperate handler for the DDB IPI, so that it doesn't + * stomp on a possibly corrupted stack. + * + * XXX overwriting the gate set in db_machine_init. + * Should rearrange the code so that it's set only once. + */ + ci->ci_ddbipi_stack = (char *)uvm_km_alloc(kernel_map, USPACE); + cpu_init_tss(&ci->ci_ddbipi_tss, ci->ci_ddbipi_stack, + Xintrddbipi); + + setsegment(&sd, &ci->ci_ddbipi_tss, sizeof(struct i386tss) - 1, + SDT_SYS386TSS, SEL_KPL, 0, 0); + ci->ci_gdt[GIPITSS_SEL].sd = sd; + + setgate(&idt[ddb_vec], NULL, 0, SDT_SYSTASKGT, SEL_KPL, + GSEL(GIPITSS_SEL, SEL_KPL)); +#endif +} +#endif + +#ifdef MULTIPROCESSOR +int +mp_cpu_start(struct cpu_info *ci) +{ +#if NLAPIC > 0 + int error; +#endif + unsigned short dwordptr[2]; + + /* + * "The BSP must initialize CMOS shutdown code to 0Ah ..." + */ + + outb(IO_RTC, NVRAM_RESET); + outb(IO_RTC+1, NVRAM_RESET_JUMP); + + /* + * "and the warm reset vector (DWORD based at 40:67) to point + * to the AP startup code ..." + */ + + dwordptr[0] = 0; + dwordptr[1] = MP_TRAMPOLINE >> 4; + + pmap_kenter_pa(0, 0, VM_PROT_READ|VM_PROT_WRITE); + memcpy((u_int8_t *)0x467, dwordptr, 4); + pmap_kremove(0, PAGE_SIZE); + +#if NLAPIC > 0 + /* + * ... prior to executing the following sequence:" + */ + + if (ci->ci_flags & CPUF_AP) { + if ((error = i386_ipi_init(ci->ci_apicid)) != 0) + return (error); + + delay(10000); + + if (cpu_feature & CPUID_APIC) { + if ((error = i386_ipi(MP_TRAMPOLINE / PAGE_SIZE, + ci->ci_apicid, LAPIC_DLMODE_STARTUP)) != 0) + return (error); + delay(200); + + if ((error = i386_ipi(MP_TRAMPOLINE / PAGE_SIZE, + ci->ci_apicid, LAPIC_DLMODE_STARTUP)) != 0) + return (error); + delay(200); + } + } +#endif + return (0); +} + +void +mp_cpu_start_cleanup(struct cpu_info *ci) +{ + /* + * Ensure the NVRAM reset byte contains something vaguely sane. + */ + + outb(IO_RTC, NVRAM_RESET); + outb(IO_RTC+1, NVRAM_RESET_RST); +} +#endif diff --git a/sys/arch/i386/i386/db_interface.c b/sys/arch/i386/i386/db_interface.c index c9d55df065e..44e09f1c914 100644 --- a/sys/arch/i386/i386/db_interface.c +++ b/sys/arch/i386/i386/db_interface.c @@ -1,4 +1,4 @@ -/* $OpenBSD: db_interface.c,v 1.12 2003/05/18 02:43:12 andreas Exp $ */ +/* $OpenBSD: db_interface.c,v 1.13 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: db_interface.c,v 1.22 1996/05/03 19:42:00 christos Exp $ */ /* @@ -53,11 +53,24 @@ extern label_t *db_recover; extern char *trap_type[]; extern int trap_types; +extern boolean_t db_cmd_loop_done; + +#ifdef MULTIPROCESSOR +extern boolean_t db_switch_cpu; +extern long db_switch_to_cpu; + +#endif int db_active = 0; void kdbprinttrap(int, int); void db_sysregs_cmd(db_expr_t, int, db_expr_t, char *); +#ifdef MULTIPROCESSOR +void db_cpuinfo_cmd(db_expr_t, int, db_expr_t, char *); +void db_startproc_cmd(db_expr_t, int, db_expr_t, char *); +void db_stopproc_cmd(db_expr_t, int, db_expr_t, char *); +void db_ddbproc_cmd(db_expr_t, int, db_expr_t, char *); +#endif /* MULTIPROCESSOR */ /* * Print trap reason. @@ -109,7 +122,7 @@ kdb_trap(type, code, regs) * Kernel mode - esp and ss not saved */ ddb_regs.tf_esp = (int)®s->tf_esp; /* kernel stack pointer */ - asm("movw %%ss,%w0" : "=r" (ddb_regs.tf_ss)); + __asm__("movw %%ss,%w0" : "=r" (ddb_regs.tf_ss)); } s = splhigh(); @@ -179,20 +192,141 @@ db_sysregs_cmd(addr, have_addr, count, modif) db_printf("cr4: 0x%08x\n", cr); } +#ifdef MULTIPROCESSOR +void db_cpuinfo_cmd(addr, have_addr, count, modif) + db_expr_t addr; + int have_addr; + db_expr_t count; + char *modif; +{ + int i; + + for (i = 0; i < I386_MAXPROCS; i++) { + if (cpu_info[i] != NULL) { + db_printf("%c%4d: ", (i == cpu_number()) ? '*' : ' ', + i); + switch(cpu_info[i]->ci_ddb_paused) { + case CI_DDB_RUNNING: + db_printf("running\n"); + break; + case CI_DDB_SHOULDSTOP: + db_printf("stopping\n"); + break; + case CI_DDB_STOPPED: + db_printf("stopped\n"); + break; + case CI_DDB_ENTERDDB: + db_printf("entering ddb\n"); + break; + case CI_DDB_INDDB: + db_printf("ddb\n"); + break; + default: + db_printf("? (%d)\n", + cpu_info[i]->ci_ddb_paused); + break; + } + } + } +} + +void db_startproc_cmd(addr, have_addr, count, modif) + db_expr_t addr; + int have_addr; + db_expr_t count; + char *modif; +{ + int i; + + if (have_addr) { + if (addr >= 0 && addr < I386_MAXPROCS + && cpu_info[addr] != NULL && addr != cpu_number()) + db_startcpu(addr); + else + db_printf("Invalid cpu %d\n", (int)addr); + } else { + for (i = 0; i < I386_MAXPROCS; i++) { + if (cpu_info[i] != NULL && i != cpu_number()) { + db_startcpu(i); + } + } + } +} + +void db_stopproc_cmd(addr, have_addr, count, modif) + db_expr_t addr; + int have_addr; + db_expr_t count; + char *modif; +{ + int i; + + if (have_addr) { + if (addr >= 0 && addr < I386_MAXPROCS + && cpu_info[addr] != NULL && addr != cpu_number()) + db_stopcpu(addr); + else + db_printf("Invalid cpu %d\n", (int)addr); + } else { + for (i = 0; i < I386_MAXPROCS; i++) { + if (cpu_info[i] != NULL && i != cpu_number()) { + db_stopcpu(i); + } + } + } +} + +void db_ddbproc_cmd(addr, have_addr, count, modif) + db_expr_t addr; + int have_addr; + db_expr_t count; + char *modif; +{ + if (have_addr) { + if (addr >= 0 && addr < I386_MAXPROCS + && cpu_info[addr] != NULL && addr != cpu_number()) { + db_switch_to_cpu = addr; + db_switch_cpu = 1; + db_cmd_loop_done = 1; + } else { + db_printf("Invalid cpu %d\n", (int)addr); + } + } else { + db_printf("CPU not specified\n"); + } +} +#endif /* MULTIPROCESSOR */ + struct db_command db_machine_command_table[] = { { "sysregs", db_sysregs_cmd, 0, 0 }, +#ifdef MULTIPROCESSOR + { "cpuinfo", db_cpuinfo_cmd, 0, 0 }, + { "startcpu", db_startproc_cmd, 0, 0 }, + { "stopcpu", db_stopproc_cmd, 0, 0 }, + { "ddbcpu", db_ddbproc_cmd, 0, 0 }, +#endif /* MULTIPROCESSOR */ { (char *)0, } }; void db_machine_init() { +#ifdef MULTIPROCESSOR + int i; +#endif /* MULTIPROCESSOR */ db_machine_commands_install(db_machine_command_table); +#ifdef MULTIPROCESSOR + for (i = 0; i < I386_MAXPROCS; i++) { + if (cpu_info[i] != NULL) + cpu_info[i]->ci_ddb_paused = CI_DDB_RUNNING; + } + SIMPLE_LOCK_INIT(&ddb_mp_slock); +#endif /* MULTIPROCESSOR */ } void Debugger() { - asm("int $3"); + __asm__("int $3"); } diff --git a/sys/arch/i386/i386/db_mp.c b/sys/arch/i386/i386/db_mp.c new file mode 100644 index 00000000000..0a859f08360 --- /dev/null +++ b/sys/arch/i386/i386/db_mp.c @@ -0,0 +1,187 @@ +/* $OpenBSD: db_mp.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ + +/* + * Copyright (c) 2003 Andreas Gunnarsson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/simplelock.h> + +#include <machine/db_machdep.h> + +#include <ddb/db_output.h> + +#define DDB_STATE_NOT_RUNNING 0 +#define DDB_STATE_RUNNING 1 + +struct SIMPLELOCK ddb_mp_slock; + +volatile int ddb_state = DDB_STATE_NOT_RUNNING; /* protected by ddb_mp_slock */ +volatile cpuid_t ddb_active_cpu; /* protected by ddb_mp_slock */ + +/* + * ddb_enter_ddb() is called when ddb is entered to stop the other + * CPUs. If another cpu is already in ddb we'll wait until it's finished. + */ +void +db_enter_ddb() +{ + int s, i; + + s = splhigh(); + SIMPLE_LOCK(&ddb_mp_slock); + + while (ddb_state == DDB_STATE_RUNNING + && ddb_active_cpu != cpu_number()) { + db_printf("CPU %d waiting to enter ddb\n", cpu_number()); + SIMPLE_UNLOCK(&ddb_mp_slock); + splx(s); + + /* Busy wait without locking, we'll confirm with lock later */ + while (ddb_state == DDB_STATE_RUNNING + && ddb_active_cpu != cpu_number()) + ; /* Do nothing */ + + s = splhigh(); + SIMPLE_LOCK(&ddb_mp_slock); + } + + ddb_state = DDB_STATE_RUNNING; + ddb_active_cpu = cpu_number(); + + for (i = 0; i < I386_MAXPROCS; i++) { + if (cpu_info[i] != NULL) { + if (i == cpu_number()) + cpu_info[i]->ci_ddb_paused = CI_DDB_INDDB; + else if (cpu_info[i]->ci_ddb_paused + != CI_DDB_STOPPED) { + cpu_info[i]->ci_ddb_paused = CI_DDB_SHOULDSTOP; + db_printf("Sending IPI to cpu %d\n", i); + i386_send_ipi(cpu_info[i], I386_IPI_DDB); + } + } + } + db_printf("CPU %d entering ddb\n", cpu_number()); + SIMPLE_UNLOCK(&ddb_mp_slock); + splx(s); +} + +void +db_leave_ddb() +{ + int s, i; + + s = splhigh(); + SIMPLE_LOCK(&ddb_mp_slock); + db_printf("CPU %d leaving ddb\n", cpu_number()); + for (i = 0; i < I386_MAXPROCS; i++) { + if (cpu_info[i] != NULL) { + cpu_info[i]->ci_ddb_paused = CI_DDB_RUNNING; + } + } + ddb_state = DDB_STATE_NOT_RUNNING; + SIMPLE_UNLOCK(&ddb_mp_slock); + splx(s); +} + +void +db_startcpu(int cpu) +{ + int s; + + if (cpu != cpu_number() && cpu_info[cpu] != NULL) { + s = splhigh(); + SIMPLE_LOCK(&ddb_mp_slock); + cpu_info[cpu]->ci_ddb_paused = CI_DDB_RUNNING; + SIMPLE_UNLOCK(&ddb_mp_slock); + splx(s); + } +} + +void +db_stopcpu(int cpu) +{ + int s; + + if (cpu != cpu_number() && cpu_info[cpu] != NULL) { + s = splhigh(); + SIMPLE_LOCK(&ddb_mp_slock); + cpu_info[cpu]->ci_ddb_paused = CI_DDB_SHOULDSTOP; + db_printf("Sending IPI to cpu %d\n", cpu); + SIMPLE_UNLOCK(&ddb_mp_slock); + splx(s); + i386_send_ipi(cpu_info[cpu], I386_IPI_DDB); + } +} + +void +db_movetocpu(int cpu) +{ + int s; + + s = splhigh(); + SIMPLE_LOCK(&ddb_mp_slock); + cpu_info[cpu]->ci_ddb_paused = CI_DDB_ENTERDDB; + db_printf("Sending IPI to cpu %d\n", cpu); + SIMPLE_UNLOCK(&ddb_mp_slock); + splx(s); + /* XXX If other CPU was running and IPI is lost, we lose. */ + i386_send_ipi(cpu_info[cpu], I386_IPI_DDB); +} + +void +i386_ipi_db(struct cpu_info *ci) +{ + int s; + + s = splhigh(); + SIMPLE_LOCK(&ddb_mp_slock); + db_printf("CPU %d received ddb IPI\n", cpu_number()); + while (ci->ci_ddb_paused == CI_DDB_SHOULDSTOP + || ci->ci_ddb_paused == CI_DDB_STOPPED) { + if (ci->ci_ddb_paused == CI_DDB_SHOULDSTOP) + ci->ci_ddb_paused = CI_DDB_STOPPED; + SIMPLE_UNLOCK(&ddb_mp_slock); + while (ci->ci_ddb_paused == CI_DDB_STOPPED) + ; /* Do nothing */ + SIMPLE_LOCK(&ddb_mp_slock); + } + if (ci->ci_ddb_paused == CI_DDB_ENTERDDB) { + ddb_state = DDB_STATE_RUNNING; + ddb_active_cpu = cpu_number(); + ci->ci_ddb_paused = CI_DDB_INDDB; + db_printf("CPU %d grabbing ddb\n", cpu_number()); + SIMPLE_UNLOCK(&ddb_mp_slock); + Debugger(); + SIMPLE_LOCK(&ddb_mp_slock); + ci->ci_ddb_paused = CI_DDB_RUNNING; + } + db_printf("CPU %d leaving ddb IPI handler\n", cpu_number()); + SIMPLE_UNLOCK(&ddb_mp_slock); + splx(s); +} diff --git a/sys/arch/i386/i386/gdt.c b/sys/arch/i386/i386/gdt.c index 3bced92013f..5bb2775f9be 100644 --- a/sys/arch/i386/i386/gdt.c +++ b/sys/arch/i386/i386/gdt.c @@ -1,8 +1,8 @@ -/* $OpenBSD: gdt.c,v 1.20 2003/11/08 05:38:33 nordin Exp $ */ -/* $NetBSD: gdt.c,v 1.8 1996/05/03 19:42:06 christos Exp $ */ +/* $OpenBSD: gdt.c,v 1.21 2004/06/13 21:49:15 niklas Exp $ */ +/* $NetBSD: gdt.c,v 1.28 2002/12/14 09:38:50 junyoung Exp $ */ /*- - * Copyright (c) 1996 The NetBSD Foundation, Inc. + * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -27,8 +27,8 @@ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN @@ -37,27 +37,49 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/* + * The GDT handling has two phases. During the early lifetime of the + * kernel there is a static gdt which will be stored in bootstrap_gdt. + * Later, when the virtual memory is initialized, this will be + * replaced with a dynamically resizable GDT (although, we will only + * ever be growing it, there is almost no gain at all to compact it, + * and it has proven to be a complicated thing to do, considering + * parallel access, so it's just not worth the effort. + * + * The static GDT area will hold the initial requirement of NGDT descriptors. + * The dynamic GDT will have a statically sized virtual memory area of size + * GDTMAXPAGES, the physical area backing this will be allocated as needed + * starting with the size needed for holding a copy of the bootstrap gdt. + * + * Every CPU in a system has its own copy of the GDT. The only real difference + * between the two are currently that there is a cpu-specific segment holding + * the struct cpu_info of the processor, for simplicity at getting cpu_info + * fields from assembly. The boot processor will actually refer to the global + * copy of the GDT as pointed to by the gdt variable. + */ + +#include <sys/cdefs.h> + #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> +#include <sys/lock.h> #include <sys/user.h> -#include <uvm/uvm_extern.h> +#include <uvm/uvm.h> #include <machine/gdt.h> -#define MINGDTSIZ 512 -#define MAXGDTSIZ 8192 +union descriptor bootstrap_gdt[NGDT]; +union descriptor *gdt = bootstrap_gdt; -union descriptor *dynamic_gdt = gdt; -int gdt_size = NGDT; /* total number of GDT entries */ -int gdt_count = NGDT; /* number of GDT entries in use */ -int gdt_next = NGDT; /* next available slot for sweeping */ -int gdt_free = GNULL_SEL; /* next free slot; terminated with GNULL_SEL */ +int gdt_size; /* total number of GDT entries */ +int gdt_count; /* number of GDT entries in use */ +int gdt_next; /* next available slot for sweeping */ +int gdt_free; /* next free slot; terminated with GNULL_SEL */ -int gdt_flags; -#define GDT_LOCKED 0x1 -#define GDT_WANTED 0x2 +struct simplelock gdt_simplelock; +struct lock gdt_lock_store; static __inline void gdt_lock(void); static __inline void gdt_unlock(void); @@ -72,23 +94,31 @@ void gdt_put_slot(int); static __inline void gdt_lock() { - - while ((gdt_flags & GDT_LOCKED) != 0) { - gdt_flags |= GDT_WANTED; - tsleep(&gdt_flags, PZERO, "gdtlck", 0); - } - gdt_flags |= GDT_LOCKED; + if (curproc != NULL) + lockmgr(&gdt_lock_store, LK_EXCLUSIVE, &gdt_simplelock, + curproc); } static __inline void gdt_unlock() { + if (curproc != NULL) + lockmgr(&gdt_lock_store, LK_RELEASE, &gdt_simplelock, curproc); +} - gdt_flags &= ~GDT_LOCKED; - if ((gdt_flags & GDT_WANTED) != 0) { - gdt_flags &= ~GDT_WANTED; - wakeup(&gdt_flags); - } +/* XXX needs spinlocking if we ever mean to go finegrained. */ +void +setgdt(int sel, void *base, size_t limit, int type, int dpl, int def32, + int gran) +{ + struct segment_descriptor *sd = &gdt[sel].sd; + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + + setsegment(sd, base, limit, type, dpl, def32, gran); + for (CPU_INFO_FOREACH(cii, ci)) + if (ci->ci_gdt != NULL && ci->ci_gdt != gdt) + ci->ci_gdt[sel].sd = *sd; } /* @@ -98,18 +128,69 @@ void gdt_init() { size_t max_len, min_len; - struct region_descriptor region; + struct vm_page *pg; + vaddr_t va; + struct cpu_info *ci = &cpu_info_primary; + + simple_lock_init(&gdt_simplelock); + lockinit(&gdt_lock_store, PZERO, "gdtlck", 0, 0); max_len = MAXGDTSIZ * sizeof(union descriptor); min_len = MINGDTSIZ * sizeof(union descriptor); + gdt_size = MINGDTSIZ; + gdt_count = NGDT; + gdt_next = NGDT; + gdt_free = GNULL_SEL; + + gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len); + for (va = (vaddr_t)gdt; va < (vaddr_t)gdt + min_len; va += PAGE_SIZE) { + pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); + if (pg == NULL) + panic("gdt_init: no pages"); + pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), + VM_PROT_READ | VM_PROT_WRITE); + } + bcopy(bootstrap_gdt, gdt, NGDT * sizeof(union descriptor)); + ci->ci_gdt = gdt; + setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1, + SDT_MEMRWA, SEL_KPL, 1, 1); - dynamic_gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len); - uvm_map_pageable(kernel_map, (vaddr_t)dynamic_gdt, - (vaddr_t)dynamic_gdt + min_len, FALSE, FALSE); - bcopy(gdt, dynamic_gdt, NGDT * sizeof(union descriptor)); + gdt_init_cpu(ci); +} - setregion(®ion, dynamic_gdt, max_len - 1); +#ifdef MULTIPROCESSOR +/* + * Allocate shadow GDT for a slave cpu. + */ +void +gdt_alloc_cpu(struct cpu_info *ci) +{ + int max_len = MAXGDTSIZ * sizeof(union descriptor); + int min_len = MINGDTSIZ * sizeof(union descriptor); + + ci->ci_gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len); + uvm_map_pageable(kernel_map, (vaddr_t)ci->ci_gdt, + (vaddr_t)ci->ci_gdt + min_len, FALSE, FALSE); + bzero(ci->ci_gdt, min_len); + bcopy(gdt, ci->ci_gdt, gdt_count * sizeof(union descriptor)); + setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1, + SDT_MEMRWA, SEL_KPL, 1, 1); +} +#endif /* MULTIPROCESSOR */ + + +/* + * Load appropriate gdt descriptor; we better be running on *ci + * (for the most part, this is how a cpu knows who it is). + */ +void +gdt_init_cpu(struct cpu_info *ci) +{ + struct region_descriptor region; + + setregion(®ion, ci->ci_gdt, + MAXGDTSIZ * sizeof(union descriptor) - 1); lgdt(®ion); } @@ -120,13 +201,29 @@ void gdt_grow() { size_t old_len, new_len; + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + struct vm_page *pg; + vaddr_t va; old_len = gdt_size * sizeof(union descriptor); gdt_size <<= 1; new_len = old_len << 1; - uvm_map_pageable(kernel_map, (vaddr_t)dynamic_gdt + old_len, - (vaddr_t)dynamic_gdt + new_len, FALSE, FALSE); + for (CPU_INFO_FOREACH(cii, ci)) { + for (va = (vaddr_t)(ci->ci_gdt) + old_len; + va < (vaddr_t)(ci->ci_gdt) + new_len; + va += PAGE_SIZE) { + while ( + (pg = + uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) == + NULL) { + uvm_wait("gdt_grow"); + } + pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), + VM_PROT_READ | VM_PROT_WRITE); + } + } } /* @@ -146,15 +243,13 @@ gdt_get_slot() if (gdt_free != GNULL_SEL) { slot = gdt_free; - gdt_free = dynamic_gdt[slot].gd.gd_selector; + gdt_free = gdt[slot].gd.gd_selector; } else { if (gdt_next != gdt_count) panic("gdt_get_slot: gdt_next != gdt_count"); if (gdt_next >= gdt_size) { if (gdt_size >= MAXGDTSIZ) panic("gdt_get_slot: out of GDT descriptors"); - if (dynamic_gdt == gdt) - panic("gdt_get_slot called before gdt_init"); gdt_grow(); } slot = gdt_next++; @@ -169,65 +264,56 @@ gdt_get_slot() * Deallocate a GDT slot, putting it on the free list. */ void -gdt_put_slot(slot) - int slot; +gdt_put_slot(int slot) { gdt_lock(); gdt_count--; - dynamic_gdt[slot].gd.gd_type = SDT_SYSNULL; - dynamic_gdt[slot].gd.gd_selector = gdt_free; + gdt[slot].gd.gd_type = SDT_SYSNULL; + gdt[slot].gd.gd_selector = gdt_free; gdt_free = slot; gdt_unlock(); } -void -tss_alloc(pcb) - struct pcb *pcb; +int +tss_alloc(struct pcb *pcb) { int slot; slot = gdt_get_slot(); - setsegment(&dynamic_gdt[slot].sd, &pcb->pcb_tss, sizeof(struct pcb) - 1, + setgdt(slot, &pcb->pcb_tss, sizeof(struct pcb) - 1, SDT_SYS386TSS, SEL_KPL, 0, 0); - pcb->pcb_tss_sel = GSEL(slot, SEL_KPL); + return GSEL(slot, SEL_KPL); } void -tss_free(pcb) - struct pcb *pcb; +tss_free(int sel) { - gdt_put_slot(IDXSEL(pcb->pcb_tss_sel)); + gdt_put_slot(IDXSEL(sel)); } +/* + * Caller must have pmap locked for both of these functions. + */ void -ldt_alloc(pmap, ldt, len) - struct pmap *pmap; - union descriptor *ldt; - size_t len; +ldt_alloc(struct pmap *pmap, union descriptor *ldt, size_t len) { int slot; slot = gdt_get_slot(); - setsegment(&dynamic_gdt[slot].sd, ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0, - 0); - simple_lock(&pmap->pm_lock); + setgdt(slot, ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0, 0); pmap->pm_ldt_sel = GSEL(slot, SEL_KPL); - simple_unlock(&pmap->pm_lock); } void -ldt_free(pmap) - struct pmap *pmap; +ldt_free(struct pmap *pmap) { int slot; - simple_lock(&pmap->pm_lock); slot = IDXSEL(pmap->pm_ldt_sel); - simple_unlock(&pmap->pm_lock); gdt_put_slot(slot); } diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf index e74f22cae5f..cc5149eb722 100644 --- a/sys/arch/i386/i386/genassym.cf +++ b/sys/arch/i386/i386/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.13 2003/06/02 23:27:47 millert Exp $ +# $OpenBSD: genassym.cf,v 1.14 2004/06/13 21:49:15 niklas Exp $ # # Copyright (c) 1982, 1990 The Regents of the University of California. # All rights reserved. @@ -57,12 +57,17 @@ if COMPAT_FREEBSD > 0 include <machine/freebsd_machdep.h> endif +if MULTIPROCESSOR +include <machine/cpu.h> +endif + include "isa.h" if NISA > 0 include <i386/isa/isa_machdep.h> endif export SRUN +export SONPROC # values for page tables export PDSLOT_KERN @@ -84,6 +89,7 @@ member p_stat member p_wchan member p_vmspace member p_flag +member p_cpu export P_SYSTEM @@ -101,8 +107,8 @@ member pcb_fs member pcb_gs member pcb_cr0 member pcb_ldt_sel -member pcb_tss_sel member pcb_onfault +member pcb_fpcpu # frame definitions struct trapframe @@ -169,3 +175,26 @@ member ih_arg member ih_count member ih_next endif + +define P_MD_TSS_SEL offsetof(struct proc, p_md.md_tss_sel) + +define CPU_INFO_SELF offsetof(struct cpu_info, ci_self) +define CPU_INFO_CURPROC offsetof(struct cpu_info, ci_curproc) +define CPU_INFO_CURPCB offsetof(struct cpu_info, ci_curpcb) +define CPU_INFO_NAME offsetof(struct cpu_info, ci_dev.dv_xname) +define CPU_INFO_IDLE_PCB offsetof(struct cpu_info, ci_idle_pcb) +define CPU_INFO_IDLE_TSS_SEL offsetof(struct cpu_info, ci_idle_tss_sel) +define CPU_INFO_LEVEL offsetof(struct cpu_info, ci_level) +define CPU_INFO_VENDOR offsetof(struct cpu_info, ci_vendor[0]) +define CPU_INFO_SIGNATURE offsetof(struct cpu_info, ci_signature) +define CPU_INFO_RESCHED offsetof(struct cpu_info, ci_want_resched) +define CPU_INFO_ASTPENDING offsetof(struct cpu_info, ci_astpending) +define CPU_INFO_GDT offsetof(struct cpu_info, ci_gdt) +define CPU_INFO_IPENDING offsetof(struct cpu_info, ci_ipending) +define CPU_INFO_IMASK offsetof(struct cpu_info, ci_imask) +define CPU_INFO_IUNMASK offsetof(struct cpu_info, ci_iunmask) +define CPU_INFO_ILEVEL offsetof(struct cpu_info, ci_ilevel) +define CPU_INFO_IDEPTH offsetof(struct cpu_info, ci_idepth) +define CPU_INFO_ISOURCES offsetof(struct cpu_info, ci_isources) + +define SIZEOF_CPU_INFO sizeof(struct cpu_info) diff --git a/sys/arch/i386/i386/ioapic.c b/sys/arch/i386/i386/ioapic.c new file mode 100644 index 00000000000..e5b329372f3 --- /dev/null +++ b/sys/arch/i386/i386/ioapic.c @@ -0,0 +1,736 @@ +/* $OpenBSD: ioapic.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ +/* $NetBSD: ioapic.c,v 1.7 2003/07/14 22:32:40 lukem Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +/* + * Copyright (c) 1999 Stefan Grefen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/device.h> +#include <sys/malloc.h> + +#include <machine/bus.h> +#include <machine/psl.h> + +#include <uvm/uvm_extern.h> + +#include <machine/i82093reg.h> +#include <machine/i82093var.h> + +#include <machine/i82489reg.h> +#include <machine/i82489var.h> + +#include <machine/pmap.h> + +#include <machine/mpbiosvar.h> + +#include "isa.h" + +/* + * XXX locking + */ + +int ioapic_match(struct device *, void *, void *); +void ioapic_attach(struct device *, struct device *, void *); + +/* XXX */ +extern int bus_mem_add_mapping(bus_addr_t, bus_size_t, int, + bus_space_handle_t *); + +void apic_set_redir(struct ioapic_softc *, int); +void apic_vectorset(struct ioapic_softc *, int, int, int); + +int apic_verbose = 0; + +int ioapic_bsp_id = 0; +int ioapic_cold = 1; + +struct ioapic_softc *ioapics; /* head of linked list */ +int nioapics = 0; /* number attached */ + +/* + * Register read/write routines. + */ +static __inline u_int32_t +ioapic_read(struct ioapic_softc *sc, int regid) +{ + u_int32_t val; + + /* + * XXX lock apic + */ + *(sc->sc_reg) = regid; + val = *sc->sc_data; + + return (val); + +} + +static __inline void +ioapic_write(struct ioapic_softc *sc, int regid, int val) +{ + /* + * XXX lock apic + */ + *(sc->sc_reg) = regid; + *(sc->sc_data) = val; +} + +struct ioapic_softc * +ioapic_find(int apicid) +{ + struct ioapic_softc *sc; + + if (apicid == MPS_ALL_APICS) { /* XXX mpbios-specific */ + /* + * XXX kludge for all-ioapics interrupt support + * on single ioapic systems + */ + if (nioapics <= 1) + return (ioapics); + panic("unsupported: all-ioapics interrupt with >1 ioapic"); + } + + for (sc = ioapics; sc != NULL; sc = sc->sc_next) + if (sc->sc_apicid == apicid) + return (sc); + + return (NULL); +} + +static __inline void +ioapic_add(struct ioapic_softc *sc) +{ + sc->sc_next = ioapics; + ioapics = sc; + nioapics++; +} + +void +ioapic_print_redir(struct ioapic_softc *sc, char *why, int pin) +{ + u_int32_t redirlo = ioapic_read(sc, IOAPIC_REDLO(pin)); + u_int32_t redirhi = ioapic_read(sc, IOAPIC_REDHI(pin)); + + apic_format_redir(sc->sc_dev.dv_xname, why, pin, redirhi, redirlo); +} + +struct cfattach ioapic_ca = { + sizeof(struct ioapic_softc), ioapic_match, ioapic_attach +}; + +struct cfdriver ioapic_cd = { + NULL, "ioapic", DV_DULL /* XXX DV_CPU ? */ +}; + +int +ioapic_match(struct device *parent, void *matchv, void *aux) +{ + struct cfdata *match = (struct cfdata *)matchv; + struct apic_attach_args * aaa = (struct apic_attach_args *)aux; + + if (strcmp(aaa->aaa_name, match->cf_driver->cd_name) == 0) + return (1); + return (0); +} + + +/* + * can't use bus_space_xxx as we don't have a bus handle ... + */ +void +ioapic_attach(struct device *parent, struct device *self, void *aux) +{ + struct ioapic_softc *sc = (struct ioapic_softc *)self; + struct apic_attach_args *aaa = (struct apic_attach_args *)aux; + int apic_id; + bus_space_handle_t bh; + u_int32_t ver_sz; + int i; + + sc->sc_flags = aaa->flags; + sc->sc_apicid = aaa->apic_id; + + printf(" apid %d", aaa->apic_id); + + if (ioapic_find(aaa->apic_id) != NULL) { + printf(": duplicate apic id (ignored)\n"); + return; + } + + ioapic_add(sc); + + printf(": pa 0x%lx", aaa->apic_address); + + if (bus_mem_add_mapping(aaa->apic_address, PAGE_SIZE, 0, &bh) != 0) { + printf(", map failed\n"); + return; + } + sc->sc_reg = (volatile u_int32_t *)(bh + IOAPIC_REG); + sc->sc_data = (volatile u_int32_t *)(bh + IOAPIC_DATA); + + apic_id = (ioapic_read(sc,IOAPIC_ID) & IOAPIC_ID_MASK) >> + IOAPIC_ID_SHIFT; + ver_sz = ioapic_read(sc, IOAPIC_VER); + + sc->sc_apic_vers = (ver_sz & IOAPIC_VER_MASK) >> IOAPIC_VER_SHIFT; + sc->sc_apic_sz = (ver_sz & IOAPIC_MAX_MASK) >> IOAPIC_MAX_SHIFT; + sc->sc_apic_sz++; + + if (mp_verbose) { + printf(", %s mode", + aaa->flags & IOAPIC_PICMODE ? "PIC" : "virtual wire"); + } + + printf(", version %x, %d pins\n", sc->sc_apic_vers, sc->sc_apic_sz); + + sc->sc_pins = malloc(sizeof(struct ioapic_pin) * sc->sc_apic_sz, + M_DEVBUF, M_WAITOK); + + for (i=0; i<sc->sc_apic_sz; i++) { + sc->sc_pins[i].ip_handler = NULL; + sc->sc_pins[i].ip_next = NULL; + sc->sc_pins[i].ip_map = NULL; + sc->sc_pins[i].ip_vector = 0; + sc->sc_pins[i].ip_type = 0; + sc->sc_pins[i].ip_minlevel = 0xff; /* XXX magic*/ + sc->sc_pins[i].ip_maxlevel = 0; /* XXX magic */ + } + + /* + * In case the APIC is not initialized to the correct ID + * do it now. + * Maybe we should record the original ID for interrupt + * mapping later ... + */ + if (apic_id != sc->sc_apicid) { + printf("%s: misconfigured as apic %d", sc->sc_dev.dv_xname, + apic_id); + + ioapic_write(sc, IOAPIC_ID, + (ioapic_read(sc, IOAPIC_ID) & ~IOAPIC_ID_MASK) + | (sc->sc_apicid << IOAPIC_ID_SHIFT)); + + apic_id = (ioapic_read(sc,IOAPIC_ID) & IOAPIC_ID_MASK) >> + IOAPIC_ID_SHIFT; + + if (apic_id != sc->sc_apicid) + printf(", can't remap to apid %d\n", sc->sc_apicid); + else + printf(", remapped to apic %d\n", sc->sc_apicid); + } +#if 0 + /* output of this was boring. */ + if (mp_verbose) + for (i=0; i<sc->sc_apic_sz; i++) + ioapic_print_redir(sc, "boot", i); +#endif +} + +/* + * Interrupt mapping. + * + * Multiple handlers may exist for each pin, so there's an + * intrhand chain for each pin. + * + * Ideally, each pin maps to a single vector at the priority of the + * highest level interrupt for that pin. + * + * XXX in the event that there are more than 16 interrupt sources at a + * single level, some doubling-up may be needed. This is not yet + * implemented. + * + * XXX we are wasting some space here because we only use a limited + * range of the vectors here. (0x30..0xef) + */ + +struct intrhand *apic_intrhand[256]; +int apic_intrcount[256]; + + +/* XXX should check vs. softc max int number */ +#define LEGAL_IRQ(x) ((x) >= 0 && (x) < APIC_ICU_LEN && (x) != 2) + +void +apic_set_redir(struct ioapic_softc *sc, int pin) +{ + u_int32_t redlo; + u_int32_t redhi = 0; + int delmode; + + struct ioapic_pin *pp; + struct mp_intr_map *map; + + pp = &sc->sc_pins[pin]; + map = pp->ip_map; + if (map == NULL) { + redlo = IOAPIC_REDLO_MASK; + } else { + redlo = map->redir; + } + delmode = (redlo & IOAPIC_REDLO_DEL_MASK) >> IOAPIC_REDLO_DEL_SHIFT; + + /* XXX magic numbers */ + if ((delmode != 0) && (delmode != 1)) + ; + else if (pp->ip_handler == NULL) { + redlo |= IOAPIC_REDLO_MASK; + } else { + redlo |= (pp->ip_vector & 0xff); + redlo |= (IOAPIC_REDLO_DEL_FIXED << IOAPIC_REDLO_DEL_SHIFT); + redlo &= ~IOAPIC_REDLO_DSTMOD; + + /* + * Destination: BSP CPU + * + * XXX will want to distribute interrupts across cpu's + * eventually. most likely, we'll want to vector each + * interrupt to a specific CPU and load-balance across + * cpu's. but there's no point in doing that until after + * most interrupts run without the kernel lock. + */ + redhi |= (ioapic_bsp_id << IOAPIC_REDHI_DEST_SHIFT); + + /* XXX derive this bit from BIOS info */ + if (pp->ip_type == IST_LEVEL) + redlo |= IOAPIC_REDLO_LEVEL; + else + redlo &= ~IOAPIC_REDLO_LEVEL; + if (map != NULL && ((map->flags & 3) == MPS_INTPO_DEF)) { + if (pp->ip_type == IST_LEVEL) + redlo |= IOAPIC_REDLO_ACTLO; + else + redlo &= ~IOAPIC_REDLO_ACTLO; + } + } + /* Do atomic write */ + ioapic_write(sc, IOAPIC_REDLO(pin), IOAPIC_REDLO_MASK); + ioapic_write(sc, IOAPIC_REDHI(pin), redhi); + ioapic_write(sc, IOAPIC_REDLO(pin), redlo); + if (mp_verbose) + ioapic_print_redir(sc, "int", pin); +} + +/* + * XXX To be really correct an NISA > 0 condition should check for these. + * However, the i386 port pretty much assumes isa is there anyway. + * For example, pci_intr_establish calls isa_intr_establish unconditionally. + */ +extern int fakeintr(void *); /* XXX headerify */ +extern char *isa_intr_typename(int); /* XXX headerify */ + +/* + * apic_vectorset: allocate a vector for the given pin, based on + * the levels of the interrupts on that pin. + * + * XXX if the level of the pin changes while the pin is + * masked, need to do something special to prevent pending + * interrupts from being lost. + * (the answer may be to hang the interrupt chain off of both vectors + * until any interrupts from the old source have been handled. the trouble + * is that we don't have a global view of what interrupts are pending. + * + * Deferring for now since MP systems are more likely servers rather + * than laptops or desktops, and thus will have relatively static + * interrupt configuration. + */ + +void +apic_vectorset(struct ioapic_softc *sc, int pin, int minlevel, int maxlevel) +{ + struct ioapic_pin *pp = &sc->sc_pins[pin]; + int ovector = 0; + int nvector = 0; + + ovector = pp->ip_vector; + + if (maxlevel == 0) { + /* no vector needed. */ + pp->ip_minlevel = 0xff; /* XXX magic */ + pp->ip_maxlevel = 0; /* XXX magic */ + pp->ip_vector = 0; + } else if (maxlevel != pp->ip_maxlevel) { + if (minlevel != maxlevel) { + printf("%s: WARNING: sharing interrupt " + "between different IPLs (currently broken)\n", + sc->sc_dev.dv_xname); + printf("%s: pin %d, ipls %x..%x\n", + sc->sc_dev.dv_xname, + pin, minlevel, maxlevel); + } + + /* + * Allocate interrupt vector at the *lowest* priority level + * of any of the handlers invoked by this pin. + * + * The interrupt handler will raise ipl higher than this + * as appropriate. + */ + nvector = idt_vec_alloc(maxlevel, maxlevel+15); + + if (nvector == 0) { + /* + * XXX XXX we should be able to deal here.. + * need to double-up an existing vector + * and install a slightly different handler. + */ + panic("%s: can't alloc vector for pin %d at level %x", + sc->sc_dev.dv_xname, pin, maxlevel); + } + /* + * XXX want special handler for the maxlevel != minlevel + * case here! + */ + idt_vec_set(nvector, apichandler[nvector & 0xf]); + pp->ip_vector = nvector; + pp->ip_minlevel = minlevel; + pp->ip_maxlevel = maxlevel; + } + apic_intrhand[pp->ip_vector] = pp->ip_handler; + + if (ovector) { + /* + * XXX should defer this until we're sure the old vector + * doesn't have a pending interrupt on any processor. + * do this by setting a counter equal to the number of CPU's, + * and firing off a low-priority broadcast IPI to all cpu's. + * each cpu then decrements the counter; when it + * goes to zero, free the vector.. + * i.e., defer until all processors have run with a CPL + * less than the level of the interrupt.. + * + * this is only an issue for dynamic interrupt configuration + * (e.g., cardbus or pcmcia). + */ + apic_intrhand[ovector] = NULL; + idt_vec_free(ovector); + printf("freed vector %x\n", ovector); + } + + apic_set_redir(sc, pin); +} + +/* + * Throw the switch and enable interrupts.. + */ + +void +ioapic_enable(void) +{ + int p, maxlevel, minlevel; + struct ioapic_softc *sc; + struct intrhand *q; + extern void intr_calculatemasks(void); /* XXX */ + + intr_calculatemasks(); /* for softints, AST's */ + + ioapic_cold = 0; + + if (ioapics == NULL) + return; + +#if 1 /* XXX Will probably get removed */ + lapic_set_softvectors(); + lapic_set_lvt(); +#endif + + if (ioapics->sc_flags & IOAPIC_PICMODE) { + printf("%s: writing to IMCR to disable pics\n", + ioapics->sc_dev.dv_xname); + outb(IMCR_ADDR, IMCR_REGISTER); + outb(IMCR_DATA, IMCR_APIC); + } + +#if 0 /* XXX Will be removed when we have intrsource. */ + isa_nodefaultirq(); +#endif + + for (sc = ioapics; sc != NULL; sc = sc->sc_next) { + if (mp_verbose) + printf("%s: enabling\n", sc->sc_dev.dv_xname); + + for (p=0; p<sc->sc_apic_sz; p++) { + maxlevel = 0; /* magic */ + minlevel = 0xff; /* magic */ + + for (q = sc->sc_pins[p].ip_handler; q != NULL; + q = q->ih_next) { + if (q->ih_level > maxlevel) + maxlevel = q->ih_level; + if (q->ih_level < minlevel) + minlevel = q->ih_level; + } + apic_vectorset(sc, p, minlevel, maxlevel); + } + } +} + +/* + * Interrupt handler management with the apic is radically different from the + * good old 8259. + * + * The APIC adds an additional level of indirection between interrupt + * signals and interrupt vectors in the IDT. + * It also encodes a priority into the high-order 4 bits of the IDT vector + * number. + * + * + * interrupt establishment: + * -> locate interrupt pin. + * -> locate or allocate vector for pin. + * -> locate or allocate handler chain for vector. + * -> chain interrupt into handler chain. + * #ifdef notyet + * -> if level of handler chain increases, reallocate vector, move chain. + * #endif + */ + +void * +apic_intr_establish(int irq, int type, int level, int (*ih_fun)(void *), + void *ih_arg, char *what) +{ + unsigned int ioapic = APIC_IRQ_APIC(irq); + unsigned int intr = APIC_IRQ_PIN(irq); + struct ioapic_softc *sc = ioapic_find(ioapic); + struct ioapic_pin *pin; + struct intrhand **p, *q, *ih; + static struct intrhand fakehand = {fakeintr}; + extern int cold; + int minlevel, maxlevel; + + if (sc == NULL) + panic("apic_intr_establish: unknown ioapic %d", ioapic); + + if ((irq & APIC_INT_VIA_APIC) == 0) + panic("apic_intr_establish of non-apic interrupt 0x%x", irq); + + pin = &sc->sc_pins[intr]; + if (intr >= sc->sc_apic_sz || type == IST_NONE) + panic("apic_intr_establish: bogus intr or type"); + + /* no point in sleeping unless someone can free memory. */ + ih = malloc(sizeof *ih, M_DEVBUF, cold ? M_NOWAIT : M_WAITOK); + if (ih == NULL) + panic("apic_intr_establish: can't malloc handler info"); + + + switch (pin->ip_type) { + case IST_NONE: + pin->ip_type = type; + break; + case IST_EDGE: + case IST_LEVEL: + if (type == pin->ip_type) + break; + case IST_PULSE: + if (type != IST_NONE) + /* XXX should not panic here! */ + panic("apic_intr_establish: " + "intr %d can't share %s with %s", + intr, + isa_intr_typename(sc->sc_pins[intr].ip_type), + isa_intr_typename(type)); + break; + } + + /* + * Figure out where to put the handler. + * This is O(N^2) to establish N interrupts, but we want to + * preserve the order, and N is generally small. + */ + maxlevel = level; + minlevel = level; + for (p = &pin->ip_handler; (q = *p) != NULL; p = &q->ih_next) { + if (q->ih_level > maxlevel) + maxlevel = q->ih_level; + if (q->ih_level < minlevel) + minlevel = q->ih_level; + } + + /* + * Actually install a fake handler momentarily, since we might be doing + * this with interrupts enabled and don't want the real routine called + * until masking is set up. + */ + fakehand.ih_level = level; + *p = &fakehand; + + /* + * Fix up the vector for this pin. + * (if cold, defer this until most interrupts have been established, + * to avoid too much thrashing of the idt..) + */ + + if (!ioapic_cold) + apic_vectorset(sc, intr, minlevel, maxlevel); + +#if 0 + apic_calculatemasks(); +#endif + + /* + * Poke the real handler in now. + */ + ih->ih_fun = ih_fun; + ih->ih_arg = ih_arg; + ih->ih_count = 0; + ih->ih_next = NULL; + ih->ih_level = level; + ih->ih_irq = irq; + ih->ih_what = what; + *p = ih; + + return (ih); +} + +/* + * apic disestablish: + * locate handler chain. + * dechain intrhand from handler chain + * if chain empty { + * reprogram apic for "safe" vector. + * free vector (point at stray handler). + * } + * #ifdef notyet + * else { + * recompute level for current chain. + * if changed, reallocate vector, move chain. + * } + * #endif + */ + +void +apic_intr_disestablish(void *arg) +{ + struct intrhand *ih = arg; + int irq = ih->ih_irq; + unsigned int ioapic = APIC_IRQ_APIC(irq); + unsigned int intr = APIC_IRQ_PIN(irq); + struct ioapic_softc *sc = ioapic_find(ioapic); + struct ioapic_pin *pin = &sc->sc_pins[intr]; + struct intrhand **p, *q; + int minlevel, maxlevel; + + if (sc == NULL) + panic("apic_intr_disestablish: unknown ioapic %d", ioapic); + + if (intr >= sc->sc_apic_sz) + panic("apic_intr_disestablish: bogus irq"); + + /* + * Remove the handler from the chain. + * This is O(n^2), too. + */ + maxlevel = 0; + minlevel = 0xff; + for (p = &pin->ip_handler; (q = *p) != NULL && q != ih; + p = &q->ih_next) { + if (q->ih_level > maxlevel) + maxlevel = q->ih_level; + if (q->ih_level < minlevel) + minlevel = q->ih_level; + } + + if (q) + *p = q->ih_next; + else + panic("intr_disestablish: handler not registered"); + for (; q != NULL; q = q->ih_next) { + if (q->ih_level > maxlevel) + maxlevel = q->ih_level; + if (q->ih_level < minlevel) + minlevel = q->ih_level; + } + + if (!ioapic_cold) + apic_vectorset(sc, intr, minlevel, maxlevel); + + free(ih, M_DEVBUF); +} + +#ifdef DDB +void ioapic_dump(void); + +void +ioapic_dump(void) +{ + struct ioapic_softc *sc; + struct ioapic_pin *ip; + int p; + + for (sc = ioapics; sc != NULL; sc = sc->sc_next) { + for (p = 0; p < sc->sc_apic_sz; p++) { + ip = &sc->sc_pins[p]; + if (ip->ip_type != IST_NONE) + ioapic_print_redir(sc, "dump", p); + } + } +} +#endif diff --git a/sys/arch/i386/i386/ipifuncs.c b/sys/arch/i386/i386/ipifuncs.c new file mode 100644 index 00000000000..10256ca631a --- /dev/null +++ b/sys/arch/i386/i386/ipifuncs.c @@ -0,0 +1,175 @@ +/* $OpenBSD: ipifuncs.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ +/* $NetBSD: ipifuncs.c,v 1.1.2.3 2000/06/26 02:04:06 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ + +/* + * Interprocessor interrupt handlers. + */ + +#include "npx.h" + +#include <sys/param.h> +#include <sys/device.h> +#include <sys/systm.h> + +#include <machine/cpufunc.h> +#include <machine/cpuvar.h> +#include <machine/intr.h> +#include <machine/atomic.h> +#include <machine/i82093var.h> +#include <machine/db_machdep.h> + +#include <uvm/uvm_extern.h> + +void i386_ipi_halt(struct cpu_info *); + +#if NNPX > 0 +void i386_ipi_synch_fpu(struct cpu_info *); +void i386_ipi_flush_fpu(struct cpu_info *); +#else +#define i386_ipi_synch_fpu 0 +#define i386_ipi_flush_fpu 0 +#endif + +void (*ipifunc[I386_NIPI])(struct cpu_info *) = +{ + i386_ipi_halt, +#if 0 && (defined(I586_CPU) || defined(I686_CPU)) + cc_microset, +#else + 0, +#endif + i386_ipi_flush_fpu, + i386_ipi_synch_fpu, + pmap_do_tlb_shootdown, +#if 0 + i386_reload_mtrr, + gdt_reload_cpu, +#else + 0, + 0, +#endif + i386_ipi_db, +}; + +void +i386_ipi_halt(struct cpu_info *ci) +{ + disable_intr(); + + printf("%s: shutting down\n", ci->ci_dev.dv_xname); + for(;;) { + asm volatile("hlt"); + } +} + +#if NNPX > 0 +void +i386_ipi_flush_fpu(struct cpu_info *ci) +{ + npxsave_cpu(ci, 0); +} + +void +i386_ipi_synch_fpu(struct cpu_info *ci) +{ + npxsave_cpu(ci, 1); +} +#endif + +void +i386_spurious(void) +{ + printf("spurious intr\n"); +} + +int +i386_send_ipi(struct cpu_info *ci, int ipimask) +{ + int ret; + + i386_atomic_setbits_l(&ci->ci_ipis, ipimask); + + /* Don't send IPI to cpu which isn't (yet) running. */ + if (!(ci->ci_flags & CPUF_RUNNING)) + return ENOENT; + + ret = i386_ipi(LAPIC_IPI_VECTOR, ci->ci_cpuid, LAPIC_DLMODE_FIXED); + if (ret != 0) { + printf("ipi of %x from %s to %s failed\n", + ipimask, curcpu()->ci_dev.dv_xname, ci->ci_dev.dv_xname); + } + + return ret; +} + +void +i386_self_ipi(int vector) +{ + i82489_writereg(LAPIC_ICRLO, + vector | LAPIC_DLMODE_FIXED | LAPIC_LVL_ASSERT | LAPIC_DEST_SELF); +} + + +void +i386_broadcast_ipi(int ipimask) +{ + panic("broadcast_ipi not implemented"); +} + +void +i386_ipi_handler(void) +{ + struct cpu_info *ci = curcpu(); + u_int32_t pending; + int bit; + + pending = i386_atomic_testset_ul(&ci->ci_ipis, 0); + + for (bit = 0; bit < I386_NIPI && pending; bit++) { + if (pending & (1<<bit)) { + pending &= ~(1<<bit); + (*ipifunc[bit])(ci); + } + } +} diff --git a/sys/arch/i386/i386/lapic.c b/sys/arch/i386/i386/lapic.c new file mode 100644 index 00000000000..b4e4c36b5fa --- /dev/null +++ b/sys/arch/i386/i386/lapic.c @@ -0,0 +1,474 @@ +/* $OpenBSD: lapic.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ +/* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/user.h> +#include <sys/systm.h> +#include <sys/device.h> + +#include <uvm/uvm_extern.h> + +#include <machine/cpu.h> +#include <machine/cpufunc.h> +#include <machine/cpuvar.h> +#include <machine/pmap.h> +#include <machine/vmparam.h> +#include <machine/mpbiosvar.h> +#include <machine/pcb.h> +#include <machine/specialreg.h> +#include <machine/segments.h> + +#include <machine/apicvar.h> +#include <machine/i82489reg.h> +#include <machine/i82489var.h> + +#include <i386/isa/timerreg.h> /* XXX for TIMER_FREQ */ + +void lapic_delay(int); +void lapic_microtime(struct timeval *); +static __inline u_int32_t lapic_gettick(void); +void lapic_clockintr(void *); +void lapic_initclocks(void); +void lapic_map(paddr_t); + +void +lapic_map(lapic_base) + paddr_t lapic_base; +{ + int s; + pt_entry_t *pte; + vaddr_t va = (vaddr_t)&local_apic; + + disable_intr(); + s = lapic_tpr; + + /* + * Map local apic. If we have a local apic, it's safe to assume + * we're on a 486 or better and can use invlpg and non-cacheable PTE's + * + * Whap the PTE "by hand" rather than calling pmap_kenter_pa because + * the latter will attempt to invoke TLB shootdown code just as we + * might have changed the value of cpu_number().. + */ + + pte = kvtopte(va); + *pte = lapic_base | PG_RW | PG_V | PG_N; + invlpg(va); + +#ifdef MULTIPROCESSOR + cpu_init_first(); /* catch up to changed cpu_number() */ +#endif + + lapic_tpr = s; + enable_intr(); +} + +/* + * enable local apic + */ +void +lapic_enable() +{ + i82489_writereg(LAPIC_SVR, LAPIC_SVR_ENABLE | LAPIC_SPURIOUS_VECTOR); +} + +extern struct mp_intr_map *lapic_ints[]; /* XXX header file? */ + +void +lapic_set_softvectors() +{ + idt_vec_set(LAPIC_SOFTCLOCK_VECTOR, Xintrsoftclock); + idt_vec_set(LAPIC_SOFTNET_VECTOR, Xintrsoftnet); + idt_vec_set(LAPIC_SOFTTTY_VECTOR, Xintrsofttty); +} + +void +lapic_set_lvt() +{ +#ifdef MULTIPROCESSOR + struct cpu_info *ci = curcpu(); + + if (mp_verbose) { + apic_format_redir(ci->ci_dev.dv_xname, "prelint", 0, 0, + i82489_readreg(LAPIC_LVINT0)); + apic_format_redir(ci->ci_dev.dv_xname, "prelint", 1, 0, + i82489_readreg(LAPIC_LVINT1)); + } +#endif + if (lapic_ints[0]) + i82489_writereg(LAPIC_LVINT0, lapic_ints[0]->redir); + if (lapic_ints[1]) + i82489_writereg(LAPIC_LVINT1, lapic_ints[1]->redir); + +#ifdef MULTIPROCESSOR + if (mp_verbose) { + apic_format_redir(ci->ci_dev.dv_xname, "timer", 0, 0, + i82489_readreg(LAPIC_LVTT)); + apic_format_redir(ci->ci_dev.dv_xname, "pcint", 0, 0, + i82489_readreg(LAPIC_PCINT)); + apic_format_redir(ci->ci_dev.dv_xname, "lint", 0, 0, + i82489_readreg(LAPIC_LVINT0)); + apic_format_redir(ci->ci_dev.dv_xname, "lint", 1, 0, + i82489_readreg(LAPIC_LVINT1)); + apic_format_redir(ci->ci_dev.dv_xname, "err", 0, 0, + i82489_readreg(LAPIC_LVERR)); + } +#endif +} + +/* + * Initialize fixed idt vectors for use by local apic. + */ +void +lapic_boot_init(lapic_base) + paddr_t lapic_base; +{ + lapic_map(lapic_base); + +#ifdef MULTIPROCESSOR + idt_vec_set(LAPIC_IPI_VECTOR, Xintripi); +#endif + idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious); + idt_vec_set(LAPIC_TIMER_VECTOR, Xintrltimer); +} + +static __inline u_int32_t +lapic_gettick() +{ + return (i82489_readreg(LAPIC_CCR_TIMER)); +} + +#include <sys/kernel.h> /* for hz */ + +int lapic_timer = 0; +u_int32_t lapic_tval; + +/* + * this gets us up to a 4GHz busclock.... + */ +u_int32_t lapic_per_second; +u_int32_t lapic_frac_usec_per_cycle; +u_int64_t lapic_frac_cycle_per_usec; +u_int32_t lapic_delaytab[26]; + +void +lapic_clockintr(arg) + void *arg; +{ + struct clockframe *frame = arg; + + hardclock(frame); +} + +void +lapic_initclocks() +{ + /* + * Start local apic countdown timer running, in repeated mode. + * + * Mask the clock interrupt and set mode, + * then set divisor, + * then unmask and set the vector. + */ + i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_LVTT_M); + i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); + i82489_writereg(LAPIC_ICR_TIMER, lapic_tval); + i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_TIMER_VECTOR); +} + +extern int gettick(void); /* XXX put in header file */ +extern void (*initclock_func)(void); /* XXX put in header file */ + +/* + * Calibrate the local apic count-down timer (which is running at + * bus-clock speed) vs. the i8254 counter/timer (which is running at + * a fixed rate). + * + * The Intel MP spec says: "An MP operating system may use the IRQ8 + * real-time clock as a reference to determine the actual APIC timer clock + * speed." + * + * We're actually using the IRQ0 timer. Hmm. + */ +void +lapic_calibrate_timer(ci) + struct cpu_info *ci; +{ + unsigned int starttick, tick1, tick2, endtick; + unsigned int startapic, apic1, apic2, endapic; + u_int64_t dtick, dapic, tmp; + int i; + char tbuf[9]; + + if (mp_verbose) + printf("%s: calibrating local timer\n", ci->ci_dev.dv_xname); + + /* + * Configure timer to one-shot, interrupt masked, + * large positive number. + */ + i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_M); + i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); + i82489_writereg(LAPIC_ICR_TIMER, 0x80000000); + + starttick = gettick(); + startapic = lapic_gettick(); + + DELAY(2); /* using "old" delay here.. */ + + for (i=0; i<hz; i++) { + do { + tick1 = gettick(); + apic1 = lapic_gettick(); + } while (tick1 < starttick); + + do { + tick2 = gettick(); + apic2 = lapic_gettick(); + } while (tick2 > starttick); + } + + endtick = gettick(); + endapic = lapic_gettick(); + + dtick = hz * TIMER_DIV(hz) + (starttick-endtick); + dapic = startapic-endapic; + + /* + * there are TIMER_FREQ ticks per second. + * in dtick ticks, there are dapic bus clocks. + */ + tmp = (TIMER_FREQ * dapic) / dtick; + + lapic_per_second = tmp; + +#if 0 + humanize_number(tbuf, sizeof(tbuf), tmp, "Hz", 1000); +#else /* XXX: from NetBSD sources... sigh. */ + { + /* prefixes are: (none), Kilo, Mega, Giga, Tera, Peta, Exa */ + static const char prefixes[] = " KMGTPE"; + + int i; + u_int64_t max; + size_t suffixlen; + + if (tbuf == NULL) + goto out; + if (sizeof(tbuf) > 0) + tbuf[0] = '\0'; + suffixlen = sizeof "Hz" - 1; + /* check if enough room for `x y' + suffix + `\0' */ + if (sizeof(tbuf) < 4 + suffixlen) + goto out; + + max = 1; + for (i = 0; i < sizeof(tbuf) - suffixlen - 3; i++) + max *= 10; + for (i = 0; tmp >= max && i < sizeof(prefixes); i++) + tmp /= 1000; + + snprintf(tbuf, sizeof(tbuf), "%qu%s%c%s", + (unsigned long long)tmp, i == 0 ? "" : " ", prefixes[i], + "Hz"); + out: + ; + } +#endif + + printf("%s: apic clock running at %s\n", ci->ci_dev.dv_xname, tbuf); + + if (lapic_per_second != 0) { + /* + * reprogram the apic timer to run in periodic mode. + * XXX need to program timer on other cpu's, too. + */ + lapic_tval = (lapic_per_second * 2) / hz; + lapic_tval = (lapic_tval / 2) + (lapic_tval & 0x1); + + i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM | LAPIC_LVTT_M | + LAPIC_TIMER_VECTOR); + i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); + i82489_writereg(LAPIC_ICR_TIMER, lapic_tval); + + /* + * Compute fixed-point ratios between cycles and + * microseconds to avoid having to do any division + * in lapic_delay and lapic_microtime. + */ + + tmp = (1000000 * (u_int64_t)1 << 32) / lapic_per_second; + lapic_frac_usec_per_cycle = tmp; + + tmp = (lapic_per_second * (u_int64_t)1 << 32) / 1000000; + + lapic_frac_cycle_per_usec = tmp; + + /* + * Compute delay in cycles for likely short delays in usec. + */ + for (i = 0; i < 26; i++) + lapic_delaytab[i] = (lapic_frac_cycle_per_usec * i) >> + 32; + + /* + * Now that the timer's calibrated, use the apic timer routines + * for all our timing needs.. + */ + delay_func = lapic_delay; + microtime_func = lapic_microtime; + initclock_func = lapic_initclocks; + } +} + +/* + * delay for N usec. + */ + +void lapic_delay(usec) + int usec; +{ + int32_t tick, otick; + int64_t deltat; /* XXX may want to be 64bit */ + + otick = lapic_gettick(); + + if (usec <= 0) + return; + if (usec <= 25) + deltat = lapic_delaytab[usec]; + else + deltat = (lapic_frac_cycle_per_usec * usec) >> 32; + + while (deltat > 0) { + tick = lapic_gettick(); + if (tick > otick) + deltat -= lapic_tval - (tick - otick); + else + deltat -= otick - tick; + otick = tick; + } +} + +#define LAPIC_TICK_THRESH 200 + +/* + * XXX need to make work correctly on other than cpu 0. + */ + +void lapic_microtime(tv) + struct timeval *tv; +{ + struct timeval now; + u_int32_t tick; + u_int32_t usec; + u_int32_t tmp; + + disable_intr(); + tick = lapic_gettick(); + now = time; + enable_intr(); + + tmp = lapic_tval - tick; + usec = ((u_int64_t)tmp * lapic_frac_usec_per_cycle) >> 32; + + now.tv_usec += usec; + while (now.tv_usec >= 1000000) { + now.tv_sec += 1; + now.tv_usec -= 1000000; + } + + *tv = now; +} + +/* + * XXX the following belong mostly or partly elsewhere.. + */ + +int +i386_ipi_init(target) + int target; +{ + unsigned j; + + if ((target & LAPIC_DEST_MASK) == 0) { + i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT); + } + + i82489_writereg(LAPIC_ICRLO, (target & LAPIC_DEST_MASK) | + LAPIC_DLMODE_INIT | LAPIC_LVL_ASSERT ); + + for (j = 100000; j > 0; j--) + if ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) == 0) + break; + + delay(10000); + + i82489_writereg(LAPIC_ICRLO, (target & LAPIC_DEST_MASK) | + LAPIC_DLMODE_INIT | LAPIC_LVL_TRIG | LAPIC_LVL_DEASSERT); + + for (j = 100000; j > 0; j--) + if ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) == 0) + break; + + return (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY)?EBUSY:0; +} + +int +i386_ipi(vec,target,dl) + int vec,target,dl; +{ + unsigned j; + + if ((target & LAPIC_DEST_MASK) == 0) + i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT); + + i82489_writereg(LAPIC_ICRLO, + (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LVL_ASSERT); + + for (j = 100000; + j > 0 && (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY); + j--) + ; + + return (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) ? EBUSY : 0; +} diff --git a/sys/arch/i386/i386/lock_machdep.c b/sys/arch/i386/i386/lock_machdep.c new file mode 100644 index 00000000000..0672d9eced4 --- /dev/null +++ b/sys/arch/i386/i386/lock_machdep.c @@ -0,0 +1,112 @@ +/* $OpenBSD: lock_machdep.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ +/* $NetBSD: lock_machdep.c,v 1.1.2.3 2000/05/03 14:40:30 sommerfeld Exp $ */ + +/*- + * Copyright (c) 1998, 1999 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, + * NASA Ames Research Center. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ + +/* + * Machine-dependent spin lock operations. + */ + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/systm.h> + +#include <machine/atomic.h> +#include <machine/lock.h> + +#include <ddb/db_output.h> + +#ifdef LOCKDEBUG + +void +__cpu_simple_lock_init(lockp) + __cpu_simple_lock_t *lockp; +{ + *lockp = __SIMPLELOCK_UNLOCKED; +} + +#if defined (DEBUG) && defined(DDB) +int spin_limit = 10000000; +#endif + +void +__cpu_simple_lock(lockp) + __cpu_simple_lock_t *lockp; +{ +#if defined (DEBUG) && defined(DDB) + int spincount = 0; +#endif + + while (i386_atomic_testset_i(lockp, __SIMPLELOCK_LOCKED) + == __SIMPLELOCK_LOCKED) { +#if defined(DEBUG) && defined(DDB) + spincount++; + if (spincount == spin_limit) { + extern int db_active; + db_printf("spundry\n"); + if (db_active) { + db_printf("but already in debugger\n"); + } else { + Debugger(); + } + } +#endif + } +} + +int +__cpu_simple_lock_try(lockp) + __cpu_simple_lock_t *lockp; +{ + + if (i386_atomic_testset_i(lockp, __SIMPLELOCK_LOCKED) + == __SIMPLELOCK_UNLOCKED) + return (1); + return (0); +} + +void +__cpu_simple_unlock(lockp) + __cpu_simple_lock_t *lockp; +{ + *lockp = __SIMPLELOCK_UNLOCKED; +} + +#endif diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s index 883e7f837c4..c738cee27c1 100644 --- a/sys/arch/i386/i386/locore.s +++ b/sys/arch/i386/i386/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.77 2004/02/01 19:05:23 deraadt Exp $ */ +/* $OpenBSD: locore.s,v 1.78 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ /*- @@ -39,6 +39,8 @@ #include "npx.h" #include "assym.h" #include "apm.h" +#include "lapic.h" +#include "ioapic.h" #include "pctr.h" #include "ksyms.h" @@ -63,6 +65,10 @@ #include <dev/isa/isareg.h> +#if NLAPIC > 0 +#include <machine/i82489reg.h> +#endif + /* * override user-land alignment before including asm.h */ @@ -83,6 +89,57 @@ #define NOP pushl %eax ; inb $0x84,%al ; inb $0x84,%al ; popl %eax #endif +#define CPL _C_LABEL(lapic_tpr) + +#if defined(MULTIPROCESSOR) +#include <machine/i82489reg.h> + +#define GET_CPUINFO(reg) \ + movl _C_LABEL(lapic_id),reg ; \ + shrl $LAPIC_ID_SHIFT,reg ; \ + movl _C_LABEL(cpu_info)(,reg,4),reg +#else +#define GET_CPUINFO(reg) \ + leal _C_LABEL(cpu_info_primary),reg +#endif + +#define GET_CURPROC(reg, treg) \ + GET_CPUINFO(treg) ; \ + movl CPU_INFO_CURPROC(treg),reg + +#define PUSH_CURPROC(treg) \ + GET_CPUINFO(treg) ; \ + pushl CPU_INFO_CURPROC(treg) + +#define CLEAR_CURPROC(treg) \ + GET_CPUINFO(treg) ; \ + movl $0,CPU_INFO_CURPROC(treg) + +#define SET_CURPROC(proc,cpu) \ + GET_CPUINFO(cpu) ; \ + movl proc,CPU_INFO_CURPROC(cpu) ; \ + movl cpu,P_CPU(proc) + +#define GET_CURPCB(reg) \ + GET_CPUINFO(reg) ; \ + movl CPU_INFO_CURPCB(reg),reg + +#define SET_CURPCB(reg,treg) \ + GET_CPUINFO(treg) ; \ + movl reg,CPU_INFO_CURPCB(treg) + +#define CLEAR_RESCHED(treg) \ + GET_CPUINFO(treg) ; \ + xorl %eax,%eax ; \ + movl %eax,CPU_INFO_RESCHED(treg) + +#define CHECK_ASTPENDING(treg) \ + GET_CPUINFO(treg) ; \ + cmpl $0,CPU_INFO_ASTPENDING(treg) + +#define CLEAR_ASTPENDING(cireg) \ + movl $0,CPU_INFO_ASTPENDING(cireg) + /* * These are used on interrupt or trap entry or exit. */ @@ -143,11 +200,39 @@ .globl _C_LABEL(cpu_feature), _C_LABEL(cpu_ecxfeature) .globl _C_LABEL(cpu_cache_eax), _C_LABEL(cpu_cache_ebx) .globl _C_LABEL(cpu_cache_ecx), _C_LABEL(cpu_cache_edx) - .globl _C_LABEL(cold), _C_LABEL(esym) + .globl _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem) + .globl _C_LABEL(esym) .globl _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase) - .globl _C_LABEL(proc0paddr), _C_LABEL(curpcb), _C_LABEL(PTDpaddr) - .globl _C_LABEL(dynamic_gdt) + .globl _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr) + .globl _C_LABEL(gdt) .globl _C_LABEL(bootapiver), _C_LABEL(bootargc), _C_LABEL(bootargv) +#ifndef MULTIPROCESSOR + .globl _C_LABEL(curpcb) +#endif + .globl _C_LABEL(lapic_tpr) + +#if NLAPIC > 0 +#ifdef __ELF__ + .align NBPG +#else + .align 12 +#endif + .globl _C_LABEL(local_apic), _C_LABEL(lapic_id) +_C_LABEL(local_apic): + .space LAPIC_ID +_C_LABEL(lapic_id): + .long 0x00000000 + .space LAPIC_TPRI-(LAPIC_ID+4) +_C_LABEL(lapic_tpr): + .space LAPIC_PPRI-LAPIC_TPRI +_C_LABEL(lapic_ppr): + .space LAPIC_ISR-LAPIC_PPRI +_C_LABEL(lapic_isr): + .space NBPG-LAPIC_ISR +#else +_C_LABEL(lapic_tpr): + .long 0 +#endif _C_LABEL(cpu): .long 0 # are we 386, 386sx, 486, 586 or 686 _C_LABEL(cpu_id): .long 0 # saved from 'cpuid' instruction @@ -162,6 +247,8 @@ _C_LABEL(cpu_vendor): .space 16 # vendor string returned by 'cpuid' instruction _C_LABEL(cpu_brandstr): .space 48 # brand string returned by 'cpuid' _C_LABEL(cold): .long 1 # cold till we are not _C_LABEL(esym): .long 0 # ptr to end of syms +_C_LABEL(cnvmem): .long 0 # conventional memory size +_C_LABEL(extmem): .long 0 # extended memory size _C_LABEL(boothowto): .long 0 # boot flags _C_LABEL(atdevbase): .long 0 # location of start of iomem in virtual _C_LABEL(bootapiver): .long 0 # /boot API version @@ -625,6 +712,10 @@ begin: call _C_LABEL(main) NENTRY(proc_trampoline) +#ifdef MULTIPROCESSOR + call _C_LABEL(proc_trampoline_mp) +#endif + movl $IPL_NONE,CPL pushl %ebx call *%esi addl $4,%esp @@ -759,20 +850,32 @@ ENTRY(fillw) popl %edi ret + +/* Frame pointer reserve on stack. */ +#ifdef DDB +#define FPADD 4 +#else +#define FPADD 0 +#endif + /* * kcopy(caddr_t from, caddr_t to, size_t len); * Copy len bytes, abort on fault. */ ENTRY(kcopy) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif pushl %esi pushl %edi - movl _C_LABEL(curpcb),%eax # load curpcb into eax and set on-fault + GET_CURPCB(%eax) # load curpcb into eax and set on-fault pushl PCB_ONFAULT(%eax) movl $_C_LABEL(copy_fault), PCB_ONFAULT(%eax) - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%ecx + movl 16+FPADD(%esp),%esi + movl 20+FPADD(%esp),%edi + movl 24+FPADD(%esp),%ecx movl %edi,%eax subl %esi,%eax cmpl %ecx,%eax # overlapping? @@ -781,16 +884,19 @@ ENTRY(kcopy) shrl $2,%ecx # copy by 32-bit words rep movsl - movl 24(%esp),%ecx + movl 24+FPADD(%esp),%ecx andl $3,%ecx # any bytes left? rep movsb - movl _C_LABEL(curpcb),%edx + GET_CURPCB(%edx) # XXX save curpcb? popl PCB_ONFAULT(%edx) popl %edi popl %esi xorl %eax,%eax +#ifdef DDB + leave +#endif ret ALIGN_TEXT @@ -802,7 +908,7 @@ ENTRY(kcopy) decl %esi rep movsb - movl 24(%esp),%ecx # copy remainder by 32-bit words + movl 24+FPADD(%esp),%ecx # copy remainder by 32-bit words shrl $2,%ecx subl $3,%esi subl $3,%edi @@ -810,11 +916,14 @@ ENTRY(kcopy) movsl cld - movl _C_LABEL(curpcb),%edx + GET_CURPCB(%edx) popl PCB_ONFAULT(%edx) popl %edi popl %esi xorl %eax,%eax +#ifdef DDB + leave +#endif ret /* @@ -885,13 +994,17 @@ ENTRY(memcpy) * Copy len bytes into the user's address space. */ ENTRY(copyout) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif pushl %esi pushl %edi pushl $0 - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%eax + movl 16+FPADD(%esp),%esi + movl 20+FPADD(%esp),%edi + movl 24+FPADD(%esp),%eax /* * We check that the end of the destination buffer is not past the end @@ -930,7 +1043,7 @@ ENTRY(copyout) /* Compute PTE offset for start address. */ shrl $PGSHIFT,%edi - movl _C_LABEL(curpcb), %edx + GET_CURPCB(%edx) movl $2f, PCB_ONFAULT(%edx) 1: /* Check PTE for each page. */ @@ -941,8 +1054,8 @@ ENTRY(copyout) decl %ecx jns 1b - movl 20(%esp),%edi - movl 24(%esp),%eax + movl 20+FPADD(%esp),%edi + movl 24+FPADD(%esp),%eax jmp 3f 2: /* Simulate a trap. */ @@ -958,7 +1071,7 @@ ENTRY(copyout) jmp _C_LABEL(copy_fault) #endif /* I386_CPU */ -3: movl _C_LABEL(curpcb),%edx +3: GET_CURPCB(%edx) movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx) /* bcopy(%esi, %edi, %eax); */ @@ -976,6 +1089,9 @@ ENTRY(copyout) popl %edi popl %esi xorl %eax,%eax +#ifdef DDB + leave +#endif ret /* @@ -983,15 +1099,19 @@ ENTRY(copyout) * Copy len bytes from the user's address space. */ ENTRY(copyin) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif pushl %esi pushl %edi - movl _C_LABEL(curpcb),%eax + GET_CURPCB(%eax) pushl $0 movl $_C_LABEL(copy_fault),PCB_ONFAULT(%eax) - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%eax + movl 16+FPADD(%esp),%esi + movl 20+FPADD(%esp),%edi + movl 24+FPADD(%esp),%eax /* * We check that the end of the destination buffer is not past the end @@ -1015,19 +1135,25 @@ ENTRY(copyin) rep movsb - movl _C_LABEL(curpcb),%edx + GET_CURPCB(%edx) popl PCB_ONFAULT(%edx) popl %edi popl %esi xorl %eax,%eax +#ifdef DDB + leave +#endif ret ENTRY(copy_fault) - movl _C_LABEL(curpcb),%edx + GET_CURPCB(%edx) popl PCB_ONFAULT(%edx) popl %edi popl %esi movl $EFAULT,%eax +#ifdef DDB + leave +#endif ret /* @@ -1038,12 +1164,16 @@ ENTRY(copy_fault) * return 0 or EFAULT. */ ENTRY(copyoutstr) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif pushl %esi pushl %edi - movl 12(%esp),%esi # esi = from - movl 16(%esp),%edi # edi = to - movl 20(%esp),%edx # edx = maxlen + movl 12+FPADD(%esp),%esi # esi = from + movl 16+FPADD(%esp),%edi # edi = to + movl 20+FPADD(%esp),%edx # edx = maxlen #if defined(I386_CPU) #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) @@ -1057,8 +1187,8 @@ ENTRY(copyoutstr) movl $NBPG,%ecx subl %eax,%ecx # ecx = NBPG - (src % NBPG) - movl _C_LABEL(curpcb), %eax - movl $6f, PCB_ONFAULT(%eax) + GET_CURPCB(%ecx) + movl $6f, PCB_ONFAULT(%ecx) 1: /* * Once per page, check that we are still within the bounds of user @@ -1112,7 +1242,7 @@ ENTRY(copyoutstr) #endif /* I386_CPU */ #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) -5: movl _C_LABEL(curpcb), %eax +5: GET_CURPCB(%eax) movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax) /* * Get min(%edx, VM_MAXUSER_ADDRESS-%edi). @@ -1122,7 +1252,7 @@ ENTRY(copyoutstr) cmpl %edx,%eax jae 1f movl %eax,%edx - movl %eax,20(%esp) + movl %eax,20+FPADD(%esp) 1: incl %edx cld @@ -1154,14 +1284,18 @@ ENTRY(copyoutstr) * return 0 or EFAULT. */ ENTRY(copyinstr) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif pushl %esi pushl %edi - movl _C_LABEL(curpcb),%ecx + GET_CURPCB(%ecx) movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx) - movl 12(%esp),%esi # %esi = from - movl 16(%esp),%edi # %edi = to - movl 20(%esp),%edx # %edx = maxlen + movl 12+FPADD(%esp),%esi # %esi = from + movl 16+FPADD(%esp),%edi # %edi = to + movl 20+FPADD(%esp),%edx # %edx = maxlen /* * Get min(%edx, VM_MAXUSER_ADDRESS-%esi). @@ -1171,7 +1305,7 @@ ENTRY(copyinstr) cmpl %edx,%eax jae 1f movl %eax,%edx - movl %eax,20(%esp) + movl %eax,20+FPADD(%esp) 1: incl %edx cld @@ -1199,17 +1333,20 @@ ENTRY(copystr_fault) copystr_return: /* Set *lencopied and return %eax. */ - movl _C_LABEL(curpcb),%ecx + GET_CURPCB(%ecx) movl $0,PCB_ONFAULT(%ecx) - movl 20(%esp),%ecx + movl 20+FPADD(%esp),%ecx subl %edx,%ecx - movl 24(%esp),%edx + movl 24+FPADD(%esp),%edx testl %edx,%edx jz 8f movl %ecx,(%edx) 8: popl %edi popl %esi +#ifdef DDB + leave +#endif ret /* @@ -1219,12 +1356,16 @@ copystr_return: * string is too long, return ENAMETOOLONG; else return 0. */ ENTRY(copystr) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif pushl %esi pushl %edi - movl 12(%esp),%esi # esi = from - movl 16(%esp),%edi # edi = to - movl 20(%esp),%edx # edx = maxlen + movl 12+FPADD(%esp),%esi # esi = from + movl 16+FPADD(%esp),%edi # edi = to + movl 20+FPADD(%esp),%edx # edx = maxlen incl %edx cld @@ -1244,15 +1385,18 @@ ENTRY(copystr) movl $ENAMETOOLONG,%eax 6: /* Set *lencopied and return %eax. */ - movl 20(%esp),%ecx + movl 20+FPADD(%esp),%ecx subl %edx,%ecx - movl 24(%esp),%edx + movl 24+FPADD(%esp),%edx testl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi +#ifdef DDB + leave +#endif ret /*****************************************************************************/ @@ -1277,6 +1421,8 @@ NENTRY(lgdt) movw %ax,%ds movw %ax,%es movw %ax,%ss + movl $GSEL(GCPU_SEL, SEL_KPL),%eax + movw %ax,%fs /* Reload code selector by doing intersegment return. */ popl %eax pushl $GSEL(GCODE_SEL, SEL_KPL) @@ -1389,11 +1535,85 @@ NENTRY(remrunqueue) * something to come ready. */ ENTRY(idle) + /* Skip context saving if we have none. */ + testl %esi,%esi + jz 1f + + /* + * idling: save old context. + * + * Registers: + * %eax, %ebx, %ecx - scratch + * %esi - old proc, then old pcb + * %edi - idle pcb + * %edx - idle TSS selector + */ + + pushl %esi + call _C_LABEL(pmap_deactivate) # pmap_deactivate(oldproc) + addl $4,%esp + + movl P_ADDR(%esi),%esi + + /* Save stack pointers. */ + movl %esp,PCB_ESP(%esi) + movl %ebp,PCB_EBP(%esi) + + /* Find idle PCB for this CPU */ +#ifndef MULTIPROCESSOR + movl $_C_LABEL(proc0),%ebx + movl P_ADDR(%ebx),%edi + movl P_MD_TSS_SEL(%ebx),%edx +#else + GET_CPUINFO(%ebx) + movl CPU_INFO_IDLE_PCB(%ebx),%edi + movl CPU_INFO_IDLE_TSS_SEL(%ebx),%edx +#endif + + /* Restore the idle context (avoid interrupts) */ cli - movl _C_LABEL(whichqs),%ecx - testl %ecx,%ecx - jnz sw1 + + /* Restore stack pointers. */ + movl PCB_ESP(%edi),%esp + movl PCB_EBP(%edi),%ebp + + + /* Switch address space. */ + movl PCB_CR3(%edi),%ecx + movl %ecx,%cr3 + + /* Switch TSS. Reset "task busy" flag before loading. */ +#ifdef MULTIPROCESSOR + movl CPU_INFO_GDT(%ebx),%eax +#else + movl _C_LABEL(gdt),%eax +#endif + andl $~0x0200,4-SEL_KPL(%eax,%edx,1) + ltr %dx + + /* We're always in the kernel, so we don't need the LDT. */ + + /* Restore cr0 (including FPU state). */ + movl PCB_CR0(%edi),%ecx + movl %ecx,%cr0 + + /* Record new pcb. */ + SET_CURPCB(%edi,%ecx) + + xorl %esi,%esi sti + +1: +#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) + call _C_LABEL(sched_unlock_idle) +#endif + + movl $IPL_NONE,CPL # spl0() + call _C_LABEL(Xspllower) # process pending interrupts + +ENTRY(idle_loop) + cmpl $0,_C_LABEL(whichqs) + jnz _C_LABEL(idle_exit) #if NAPM > 0 call _C_LABEL(apm_cpu_idle) cmpl $0,_C_LABEL(apm_dobusy) @@ -1407,8 +1627,24 @@ ENTRY(idle) #else hlt #endif - jmp _C_LABEL(idle) + jmp _C_LABEL(idle_loop) +ENTRY(idle_exit) + movl $IPL_HIGH,CPL # splhigh +#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) + call _C_LABEL(sched_lock_idle) +#endif +#if 0 + GET_CPUINFO(%ebx) + leal CPU_INFO_NAME(%ebx),%ebx + pushl %ebx + pushl $1f + call _C_LABEL(printf) + addl $8,%esp +#endif + jmp switch_search +1: .asciz "%s: unidle\n" + #ifdef DIAGNOSTIC NENTRY(switch_error) pushl $1f @@ -1427,9 +1663,9 @@ ENTRY(cpu_switch) pushl %ebx pushl %esi pushl %edi - pushl _C_LABEL(cpl) + pushl CPL - movl _C_LABEL(curproc),%esi + GET_CURPROC(%esi,%ecx) /* * Clear curproc so that we don't accumulate system time while idle. @@ -1438,10 +1674,7 @@ ENTRY(cpu_switch) * below and changes the priority. (See corresponding comment in * userret()). */ - movl $0,_C_LABEL(curproc) - - movl $IPL_NONE,_C_LABEL(cpl) # spl0() - call _C_LABEL(Xspllower) # process pending interrupts + CLEAR_CURPROC(%ecx) switch_search: /* @@ -1457,14 +1690,10 @@ switch_search: */ /* Wait for new process. */ - cli # splhigh doesn't do a cli movl _C_LABEL(whichqs),%ecx - -sw1: bsfl %ecx,%ebx # find a full q + bsfl %ecx,%ebx # find a full q jz _C_LABEL(idle) # if none, idle - leal _C_LABEL(qs)(,%ebx,8),%eax # select q - movl P_FORW(%eax),%edi # unlink from front of process q #ifdef DIAGNOSTIC cmpl %edi,%eax # linked to self (i.e. nothing queued)? @@ -1481,8 +1710,7 @@ sw1: bsfl %ecx,%ebx # find a full q movl %ecx,_C_LABEL(whichqs) # update q status 3: /* We just did it. */ - xorl %eax,%eax - movl %eax,_C_LABEL(want_resched) + CLEAR_RESCHED(%ecx) #ifdef DIAGNOSTIC cmpl %eax,P_WCHAN(%edi) # Waiting for something? @@ -1495,10 +1723,8 @@ sw1: bsfl %ecx,%ebx # find a full q movl %eax,P_BACK(%edi) /* Record new process. */ - movl %edi,_C_LABEL(curproc) - - /* It's okay to take interrupts here. */ - sti + movb $SONPROC,P_STAT(%edi) # p->p_stat = SONPROC + SET_CURPROC(%edi,%ecx) /* Skip context switch if same process. */ cmpl %edi,%esi @@ -1517,6 +1743,10 @@ sw1: bsfl %ecx,%ebx # find a full q * %edi - new process */ + pushl %esi + call _C_LABEL(pmap_deactivate) + addl $4,%esp + movl P_ADDR(%esi),%esi /* Save segment registers. */ @@ -1557,13 +1787,24 @@ switch_exited: jnz switch_restored #endif + /* + * Activate the address space. We're curproc, so %cr3 will + * be reloaded, but we're not yet curpcb, so the LDT won't + * be reloaded, although the PCB copy of the selector will + * be refreshed from the pmap. + */ + pushl %edi + call _C_LABEL(pmap_activate) + addl $4,%esp + /* Load TSS info. */ - movl _C_LABEL(dynamic_gdt),%eax - movl PCB_TSS_SEL(%esi),%edx - - /* Switch address space. */ - movl PCB_CR3(%esi),%ecx - movl %ecx,%cr3 +#ifdef MULTIPROCESSOR + GET_CPUINFO(%ebx) + movl CPU_INFO_GDT(%ebx),%eax +#else + movl _C_LABEL(gdt),%eax +#endif + movl P_MD_TSS_SEL(%edi),%edx /* Switch TSS. */ andl $~0x0200,4-SEL_KPL(%eax,%edx,1) @@ -1590,51 +1831,87 @@ switch_exited: switch_restored: /* Restore cr0 (including FPU state). */ movl PCB_CR0(%esi),%ecx +#ifdef MULTIPROCESSOR + /* + * If our floating point registers are on a different CPU, + * clear CR0_TS so we'll trap rather than reuse bogus state. + */ + GET_CPUINFO(%ebx) + cmpl PCB_FPCPU(%esi),%ebx + jz 1f + orl $CR0_TS,%ecx +1: +#endif movl %ecx,%cr0 /* Record new pcb. */ - movl %esi,_C_LABEL(curpcb) + SET_CURPCB(%esi, %ecx) /* Interrupts are okay again. */ sti switch_return: +#if 0 + pushl %edi + GET_CPUINFO(%ebx) + leal CPU_INFO_NAME(%ebx),%ebx + pushl %ebx + pushl $1f + call _C_LABEL(printf) + addl $0xc,%esp +#endif +#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) + call _C_LABEL(sched_unlock_idle) +#endif /* * Restore old cpl from stack. Note that this is always an increase, * due to the spl0() on entry. */ - popl _C_LABEL(cpl) + popl CPL movl %edi,%eax # return (p); popl %edi popl %esi popl %ebx ret - +1: .asciz "%s: scheduled %x\n" /* * switch_exit(struct proc *p); - * Switch to proc0's saved context and deallocate the address space and kernel - * stack for p. Then jump into cpu_switch(), as if we were in proc0 all along. + * Switch to the appropriate idle context (proc0's if uniprocessor; the cpu's if + * multiprocessor) and deallocate the address space and kernel stack for p. + * Then jump into cpu_switch(), as if we were in the idle proc all along. */ +#ifndef MULTIPROCESSOR .globl _C_LABEL(proc0) +#endif ENTRY(switch_exit) movl 4(%esp),%edi # old process +#ifndef MULTIPROCESSOR movl $_C_LABEL(proc0),%ebx + movl P_ADDR(%ebx),%esi + movl P_MD_TSS_SEL(%ebx),%edx +#else + GET_CPUINFO(%ebx) + movl CPU_INFO_IDLE_PCB(%ebx),%esi + movl CPU_INFO_IDLE_TSS_SEL(%ebx),%edx +#endif /* In case we fault... */ - movl $0,_C_LABEL(curproc) + CLEAR_CURPROC(%ecx) - /* Restore proc0's context. */ + /* Restore the idle context. */ cli - movl P_ADDR(%ebx),%esi /* Restore stack pointers. */ movl PCB_ESP(%esi),%esp movl PCB_EBP(%esi),%ebp /* Load TSS info. */ - movl _C_LABEL(dynamic_gdt),%eax - movl PCB_TSS_SEL(%esi),%edx +#ifdef MULTIPROCESSOR + movl CPU_INFO_GDT(%ebx),%eax +#else + movl _C_LABEL(gdt),%eax +#endif /* Switch address space. */ movl PCB_CR3(%esi),%ecx @@ -1656,7 +1933,7 @@ ENTRY(switch_exit) movl %ecx,%cr0 /* Record new pcb. */ - movl %esi,_C_LABEL(curpcb) + SET_CURPCB(%esi, %ecx) /* Interrupts are okay again. */ sti @@ -1669,8 +1946,8 @@ ENTRY(switch_exit) addl $4,%esp /* Jump into cpu_switch() with the right state. */ - movl %ebx,%esi - movl $0,_C_LABEL(curproc) + xorl %esi,%esi + CLEAR_CURPROC(%ecx) jmp switch_search /* @@ -1746,7 +2023,12 @@ IDTVEC(dna) pushl $0 # dummy error code pushl $T_DNA INTRENTRY - pushl _C_LABEL(curproc) +#ifdef MULTIPROCESSOR + GET_CPUINFO(%eax) + pushl %eax +#else + pushl $_C_LABEL(cpu_info_primary) +#endif call *_C_LABEL(npxdna_func) addl $4,%esp testl %eax,%eax @@ -1784,6 +2066,16 @@ IDTVEC(page) TRAP(T_PAGEFLT) IDTVEC(rsvd) ZTRAP(T_RESERVED) +IDTVEC(intrspurious) + /* + * The Pentium Pro local APIC may erroneously call this vector for a + * default IR7. Just ignore it. + * + * (The local APIC does this when CPL is raised while it's on the + * way to delivering an interrupt.. presumably enough has been set + * up that it's inconvenient to abort delivery completely..) + */ + iret IDTVEC(fpu) #if NNPX > 0 /* @@ -1794,7 +2086,7 @@ IDTVEC(fpu) pushl $0 # dummy error code pushl $T_ASTFLT INTRENTRY - pushl _C_LABEL(cpl) # if_ppl in intrframe + pushl CPL # if_ppl in intrframe pushl %esp # push address of intrframe incl _C_LABEL(uvmexp)+V_TRAP call _C_LABEL(npxintr) @@ -1826,12 +2118,12 @@ NENTRY(alltraps) INTRENTRY calltrap: #ifdef DIAGNOSTIC - movl _C_LABEL(cpl),%ebx + movl CPL,%ebx #endif /* DIAGNOSTIC */ call _C_LABEL(trap) 2: /* Check for ASTs on exit to user mode. */ cli - cmpb $0,_C_LABEL(astpending) + CHECK_ASTPENDING(%ecx) je 1f testb $SEL_RPL,TF_CS(%esp) #ifdef VM86 @@ -1839,15 +2131,15 @@ calltrap: testl $PSL_VM,TF_EFLAGS(%esp) #endif jz 1f -5: movb $0,_C_LABEL(astpending) +5: CLEAR_ASTPENDING(%ecx) sti movl $T_ASTFLT,TF_TRAPNO(%esp) call _C_LABEL(trap) jmp 2b #ifndef DIAGNOSTIC 1: INTRFASTEXIT -#else /* DIAGNOSTIC */ -1: cmpl _C_LABEL(cpl),%ebx +#else +1: cmpl CPL,%ebx jne 3f INTRFASTEXIT 3: sti @@ -1857,7 +2149,7 @@ calltrap: #if defined(DDB) && 0 int $3 #endif /* DDB */ - movl %ebx,_C_LABEL(cpl) + movl %ebx,CPL jmp 2b 4: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT\n" #endif /* DIAGNOSTIC */ @@ -1888,17 +2180,17 @@ syscall1: call _C_LABEL(syscall) 2: /* Check for ASTs on exit to user mode. */ cli - cmpb $0,_C_LABEL(astpending) + CHECK_ASTPENDING(%ecx) je 1f /* Always returning to user mode here. */ - movb $0,_C_LABEL(astpending) + CLEAR_ASTPENDING(%ecx) sti /* Pushed T_ASTFLT into tf_trapno on entry. */ call _C_LABEL(trap) jmp 2b 1: INTRFASTEXIT -#include <i386/isa/vector.s> +#include <i386/i386/vector.s> #include <i386/isa/icu.s> /* @@ -2039,3 +2331,7 @@ ENTRY(i686_pagezero) popl %edi ret #endif + +#if NLAPIC > 0 +#include <i386/i386/apicvec.s> +#endif diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index 07ac282f32d..2bd03934ac6 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.296 2004/06/09 16:01:48 tedu Exp $ */ +/* $OpenBSD: machdep.c,v 1.297 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -115,6 +115,7 @@ #include <machine/cpu.h> #include <machine/cpufunc.h> +#include <machine/cpuvar.h> #include <machine/gdt.h> #include <machine/pio.h> #include <machine/bus.h> @@ -248,6 +249,9 @@ int p4_model; int p3_step; int setperf_prio = 0; /* for concurrent handlers */ +void (*delay_func)(int) = i8254_delay; +void (*microtime_func)(struct timeval *) = i8254_microtime; +void (*initclock_func)(void) = i8254_initclocks; void (*update_cpuspeed)(void) = NULL; /* @@ -272,7 +276,7 @@ caddr_t allocsys(caddr_t); void setup_buffers(vaddr_t *); void dumpsys(void); int cpu_dump(void); -void identifycpu(void); +void old_identifycpu(void); void init386(paddr_t); void consinit(void); void (*cpuresetfn)(void); @@ -397,7 +401,8 @@ cpu_startup() printf("%s", version); startrtclock(); - identifycpu(); + /* XXX Merge with identifycpu */ + old_identifycpu(); printf("real mem = %u (%uK)\n", ctob(physmem), ctob(physmem)/1024); /* @@ -458,10 +463,11 @@ cpu_startup() void i386_proc0_tss_ldt_init() { - struct pcb *pcb; int x; + struct pcb *pcb; curpcb = pcb = &proc0.p_addr->u_pcb; + pcb->pcb_tss.tss_ioopt = ((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16; for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++) @@ -472,14 +478,33 @@ i386_proc0_tss_ldt_init() pcb->pcb_cr0 = rcr0(); pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); pcb->pcb_tss.tss_esp0 = (int)proc0.p_addr + USPACE - 16; - tss_alloc(pcb); + proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_esp0 - 1; + proc0.p_md.md_tss_sel = tss_alloc(pcb); - ltr(pcb->pcb_tss_sel); + ltr(proc0.p_md.md_tss_sel); lldt(pcb->pcb_ldt_sel); - - proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_esp0 - 1; } +#ifdef MULTIPROCESSOR +void +i386_init_pcb_tss_ldt(struct cpu_info *ci) +{ + int x; + struct pcb *pcb = ci->ci_idle_pcb; + + pcb->pcb_tss.tss_ioopt = + ((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16; + for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++) + pcb->pcb_iomap[x] = 0xffffffff; + pcb->pcb_iomap_pad = 0xff; + + pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); + pcb->pcb_cr0 = rcr0(); + ci->ci_idle_tss_sel = tss_alloc(pcb); +} +#endif /* MULTIPROCESSOR */ + + /* * Allocate space for system data structures. We are given * a starting virtual address and we return a final virtual @@ -1102,13 +1127,12 @@ winchip_cpu_setup(cpu_device, model, step) { #if defined(I586_CPU) - switch (model) { + switch ((curcpu()->ci_signature >> 4) & 15) { /* model */ case 4: /* WinChip C6 */ - cpu_feature &= ~CPUID_TSC; + curcpu()->ci_feature_flags &= ~CPUID_TSC; /* Disable RDTSC instruction from user-level. */ lcr4(rcr4() | CR4_TSD); - - printf("%s: TSC disabled\n", cpu_device); + printf("%s: TSC disabled\n", curcpu()->ci_dev.dv_xname); break; } #endif @@ -1212,7 +1236,7 @@ cyrix6x86_cpu_setup(cpu_device, model, step) #if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) extern int clock_broken_latch; - switch (model) { + switch ((curcpu()->ci_signature >> 4) & 15) { /* model */ case -1: /* M1 w/o cpuid */ case 2: /* M1 */ /* set up various cyrix registers */ @@ -1228,11 +1252,13 @@ cyrix6x86_cpu_setup(cpu_device, model, step) /* disable access to ccr4/ccr5 */ cyrix_write_reg(0xC3, cyrix_read_reg(0xC3) & ~0x10); - printf("%s: xchg bug workaround performed\n", cpu_device); + printf("%s: xchg bug workaround performed\n", + curcpu()->ci_dev.dv_xname); break; /* fallthrough? */ - case 4: + case 4: /* GXm */ + /* Unset the TSC bit until calibrate_delay() gets fixed. */ clock_broken_latch = 1; - cpu_feature &= ~CPUID_TSC; + curcpu()->ci_feature_flags &= ~CPUID_TSC; printf("%s: TSC disabled\n", cpu_device); break; } @@ -1278,8 +1304,11 @@ intel586_cpu_setup(cpu_device, model, step) int model, step; { #if defined(I586_CPU) - fix_f00f(); - printf("%s: F00F bug workaround installed\n", cpu_device); + if (!cpu_f00f_bug) { + fix_f00f(); + printf("%s: F00F bug workaround installed\n", + curcpu()->ci_dev.dv_xname); + } #endif } @@ -1365,6 +1394,9 @@ intel686_common_cpu_setup(const char *cpu_device, int model, int step) void intel686_cpu_setup(const char *cpu_device, int model, int step) { + struct cpu_info *ci = curcpu(); + /* XXX SMP int model = (ci->ci_signature >> 4) & 15; */ + /* XXX SMP int step = ci->ci_signature & 15; */ u_quad_t msr119; intel686_common_cpu_setup(cpu_device, model, step); @@ -1374,19 +1406,20 @@ intel686_cpu_setup(const char *cpu_device, int model, int step) * From Intel Application Note #485. */ if ((model == 1) && (step < 3)) - cpu_feature &= ~CPUID_SEP; + ci->ci_feature_flags &= ~CPUID_SEP; /* * Disable the Pentium3 serial number. */ - if ((model == 7) && (cpu_feature & CPUID_SER)) { + if ((model == 7) && (ci->ci_feature_flags & CPUID_SER)) { msr119 = rdmsr(MSR_BBL_CR_CTL); msr119 |= 0x0000000000200000LL; wrmsr(MSR_BBL_CR_CTL, msr119); - printf("%s: disabling processor serial number\n", cpu_device); - cpu_feature &= ~CPUID_SER; - cpuid_level = 2; + printf("%s: disabling processor serial number\n", + ci->ci_dev.dv_xname); + ci->ci_feature_flags &= ~CPUID_SER; + ci->ci_level = 2; } #if !defined(SMALL_KERNEL) && defined(I686_CPU) @@ -1474,6 +1507,249 @@ cyrix3_cpu_name(model, step) return name; } +/* XXXSMP: must be shared with UP */ +#ifdef MULTIPROCESSOR +/* + * Print identification for the given CPU. + * XXX XXX + * This is not as clean as one might like, because it references + * + * the "cpuid_level" and "cpu_vendor" globals. + * cpuid_level isn't so bad, since both CPU's will hopefully + * be of the same level. + * + * The Intel multiprocessor spec doesn't give us the cpu_vendor + * information; however, the chance of multi-vendor SMP actually + * ever *working* is sufficiently low that it's probably safe to assume + * all processors are of the same vendor. + */ + +void +identifycpu(struct cpu_info *ci) +{ + extern char cpu_vendor[]; +#ifdef CPUDEBUG + extern int cpu_cache_eax, cpu_cache_ebx, cpu_cache_ecx, cpu_cache_edx; +#else + extern int cpu_cache_edx; +#endif + const char *name, *modifier, *vendorname, *token; + int class = CPUCLASS_386, vendor, i, max; + int family, model, step, modif, cachesize; + const struct cpu_cpuid_nameclass *cpup = NULL; + + char *cpu_device = ci->ci_dev.dv_xname; + /* XXX SMP XXX void (*cpu_setup)(const char *, int, int); */ + + if (cpuid_level == -1) { +#ifdef DIAGNOSTIC + if (cpu < 0 || cpu >= + (sizeof i386_nocpuid_cpus/sizeof(struct cpu_nocpuid_nameclass))) + panic("unknown cpu type %d", cpu); +#endif + name = i386_nocpuid_cpus[cpu].cpu_name; + vendor = i386_nocpuid_cpus[cpu].cpu_vendor; + vendorname = i386_nocpuid_cpus[cpu].cpu_vendorname; + model = -1; + step = -1; + class = i386_nocpuid_cpus[cpu].cpu_class; + ci->cpu_setup = i386_nocpuid_cpus[cpu].cpu_setup; + modifier = ""; + token = ""; + } else { + max = sizeof (i386_cpuid_cpus) / sizeof (i386_cpuid_cpus[0]); + modif = (ci->ci_signature >> 12) & 3; + family = (ci->ci_signature >> 8) & 15; + if (family < CPU_MINFAMILY) + panic("identifycpu: strange family value"); + model = (ci->ci_signature >> 4) & 15; + step = ci->ci_signature & 15; +#ifdef CPUDEBUG + printf("%s: family %x model %x step %x\n", cpu_device, family, + model, step); + printf("%s: cpuid level %d cache eax %x ebx %x ecx %x edx %x\n", + cpu_device, cpuid_level, cpu_cache_eax, cpu_cache_ebx, + cpu_cache_ecx, cpu_cache_edx); +#endif + + for (i = 0; i < max; i++) { + if (!strncmp(cpu_vendor, + i386_cpuid_cpus[i].cpu_id, 12)) { + cpup = &i386_cpuid_cpus[i]; + break; + } + } + + if (cpup == NULL) { + vendor = CPUVENDOR_UNKNOWN; + if (cpu_vendor[0] != '\0') + vendorname = &cpu_vendor[0]; + else + vendorname = "Unknown"; + if (family > CPU_MAXFAMILY) + family = CPU_MAXFAMILY; + class = family - 3; + modifier = ""; + name = ""; + token = ""; + ci->cpu_setup = NULL; + } else { + token = cpup->cpu_id; + vendor = cpup->cpu_vendor; + vendorname = cpup->cpu_vendorname; + modifier = modifiers[modif]; + if (family > CPU_MAXFAMILY) { + family = CPU_MAXFAMILY; + model = CPU_DEFMODEL; + } else if (model > CPU_MAXMODEL) + model = CPU_DEFMODEL; + i = family - CPU_MINFAMILY; + + /* Special hack for the PentiumII/III series. */ + if (vendor == CPUVENDOR_INTEL && family == 6 && + (model == 5 || model == 7)) { + name = intel686_cpu_name(model); + } else + name = cpup->cpu_family[i].cpu_models[model]; + if (name == NULL) + name = cpup->cpu_family[i].cpu_models[CPU_DEFMODEL]; + class = cpup->cpu_family[i].cpu_class; + ci->cpu_setup = cpup->cpu_family[i].cpu_setup; + } + } + + /* Find the amount of on-chip L2 cache. Add support for AMD K6-3...*/ + cachesize = -1; + if (vendor == CPUVENDOR_INTEL && cpuid_level >= 2 && family < 0xf) { + int intel_cachetable[] = { 0, 128, 256, 512, 1024, 2048 }; + if ((cpu_cache_edx & 0xFF) >= 0x40 && + (cpu_cache_edx & 0xFF) <= 0x45) + cachesize = intel_cachetable[(cpu_cache_edx & 0xFF) - 0x40]; + } + + if ((ci->ci_flags & CPUF_BSP) == 0) { + if (cachesize > -1) { + snprintf(cpu_model, sizeof(cpu_model), + "%s %s%s (%s%s%s%s-class, %dKB L2 cache)", + vendorname, modifier, name, + ((*token) ? "\"" : ""), ((*token) ? token : ""), + ((*token) ? "\" " : ""), classnames[class], cachesize); + } else { + snprintf(cpu_model, sizeof(cpu_model), + "%s %s%s (%s%s%s%s-class)", + vendorname, modifier, name, + ((*token) ? "\"" : ""), ((*token) ? token : ""), + ((*token) ? "\" " : ""), classnames[class]); + } + + printf("%s: %s", cpu_device, cpu_model); + } + +#if defined(I586_CPU) || defined(I686_CPU) + if (ci->ci_feature_flags && (ci->ci_feature_flags & CPUID_TSC)) { + /* Has TSC */ + calibrate_cyclecounter(); + if (pentium_mhz > 994) { + int ghz, fr; + + ghz = (pentium_mhz + 9) / 1000; + fr = ((pentium_mhz + 9) / 10 ) % 100; + if ((ci->ci_flags & CPUF_BSP) == 0) { + if (fr) + printf(" %d.%02d GHz", ghz, fr); + else + printf(" %d GHz", ghz); + } + } else { + if ((ci->ci_flags & CPUF_BSP) == 0) { + printf(" %d MHz", pentium_mhz); + } + } + } +#endif + if ((ci->ci_flags & CPUF_BSP) == 0) { + printf("\n"); + + if (ci->ci_feature_flags) { + int numbits = 0; + + printf("%s: ", cpu_device); + max = sizeof(i386_cpuid_features) + / sizeof(i386_cpuid_features[0]); + for (i = 0; i < max; i++) { + if (ci->ci_feature_flags & + i386_cpuid_features[i].feature_bit) { + printf("%s%s", (numbits == 0 ? "" : ","), + i386_cpuid_features[i].feature_name); + numbits++; + } + } + printf("\n"); + } + } + + cpu_class = class; + ci->cpu_class = class; + + /* + * Now that we have told the user what they have, + * let them know if that machine type isn't configured. + */ + switch (cpu_class) { +#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU) +#error No CPU classes configured. +#endif +#ifndef I686_CPU + case CPUCLASS_686: + printf("NOTICE: this kernel does not support Pentium Pro CPU class\n"); +#ifdef I586_CPU + printf("NOTICE: lowering CPU class to i586\n"); + cpu_class = CPUCLASS_586; + break; +#endif +#endif +#ifndef I586_CPU + case CPUCLASS_586: + printf("NOTICE: this kernel does not support Pentium CPU class\n"); +#ifdef I486_CPU + printf("NOTICE: lowering CPU class to i486\n"); + cpu_class = CPUCLASS_486; + break; +#endif +#endif +#ifndef I486_CPU + case CPUCLASS_486: + printf("NOTICE: this kernel does not support i486 CPU class\n"); +#ifdef I386_CPU + printf("NOTICE: lowering CPU class to i386\n"); + cpu_class = CPUCLASS_386; + break; +#endif +#endif +#ifndef I386_CPU + case CPUCLASS_386: + printf("NOTICE: this kernel does not support i386 CPU class\n"); + panic("no appropriate CPU class available"); +#endif + default: + break; + } + + if (cpu == CPU_486DLC) { +#ifndef CYRIX_CACHE_WORKS + printf("WARNING: CYRIX 486DLC CACHE UNCHANGED.\n"); +#else +#ifndef CYRIX_CACHE_REALLY_WORKS + printf("WARNING: CYRIX 486DLC CACHE ENABLED IN HOLD-FLUSH MODE.\n"); +#else + printf("WARNING: CYRIX 486DLC CACHE ENABLED.\n"); +#endif +#endif + } + +} +#endif /* MULTIPROCESSOR */ + char * tm86_cpu_name(model) int model; @@ -1495,7 +1771,7 @@ tm86_cpu_name(model) } void -identifycpu() +old_identifycpu() { extern char cpu_vendor[]; extern char cpu_brandstr[]; @@ -2402,8 +2678,8 @@ setregs(p, pack, stack, retval) #if NNPX > 0 /* If we were using the FPU, forget about it. */ - if (npxproc == p) - npxdrop(); + if (pcb->pcb_fpcpu != NULL) + npxsave_proc(p, 0); #endif #ifdef USER_LDT @@ -2437,7 +2713,6 @@ setregs(p, pack, stack, retval) * Initialize segments and descriptor tables */ -union descriptor gdt[NGDT]; union descriptor ldt[NLDT]; struct gate_descriptor idt_region[NIDT]; struct gate_descriptor *idt = idt_region; @@ -2462,6 +2737,20 @@ setgate(gd, func, args, type, dpl, seg) } void +unsetgate(gd) + struct gate_descriptor *gd; +{ + gd->gd_p = 0; + gd->gd_hioffset = 0; + gd->gd_looffset = 0; + gd->gd_selector = 0; + gd->gd_xx = 0; + gd->gd_stkcpy = 0; + gd->gd_type = 0; + gd->gd_dpl = 0; +} + +void setregion(rd, base, limit) struct region_descriptor *rd; void *base; @@ -2521,8 +2810,8 @@ fix_f00f(void) idt = p; /* Fix up paging redirect */ - setgate(&idt[ 14], &IDTVEC(f00f_redirect), 0, SDT_SYS386TGT, - SEL_KPL, GCODE_SEL); + setgate(&idt[ 14], &IDTVEC(f00f_redirect), 0, SDT_SYS386TGT, SEL_KPL, + GCODE_SEL); /* Map first page RO */ pte = PTE_BASE + i386_btop(va); @@ -2537,6 +2826,16 @@ fix_f00f(void) } #endif +#ifdef MULTIPROCESSOR +void +cpu_init_idt() +{ + struct region_descriptor region; + setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1); + lidt(®ion); +} +#endif /* MULTIPROCESSOR */ + void init386(paddr_t first_avail) { @@ -2565,18 +2864,20 @@ init386(paddr_t first_avail) (caddr_t)iomem_ex_storage, sizeof(iomem_ex_storage), EX_NOCOALESCE|EX_NOWAIT); - /* make gdt gates and memory segments */ + /* make bootstrap gdt gates and memory segments */ setsegment(&gdt[GCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1); setsegment(&gdt[GICODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1); setsegment(&gdt[GDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 1); - setsegment(&gdt[GLDT_SEL].sd, ldt, sizeof(ldt) - 1, SDT_SYSLDT, SEL_KPL, - 0, 0); + setsegment(&gdt[GLDT_SEL].sd, ldt, sizeof(ldt) - 1, SDT_SYSLDT, + SEL_KPL, 0, 0); setsegment(&gdt[GUCODE1_SEL].sd, 0, i386_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 1); setsegment(&gdt[GUCODE_SEL].sd, 0, i386_btop(I386_MAX_EXE_ADDR) - 1, SDT_MEMERA, SEL_UPL, 1, 1); setsegment(&gdt[GUDATA_SEL].sd, 0, i386_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1); + setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary, + sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 1, 1); /* make ldt gates and memory segments */ setgate(&ldt[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1, SDT_SYS386CGT, @@ -2606,11 +2907,13 @@ init386(paddr_t first_avail) setgate(&idt[ 16], &IDTVEC(fpu), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); setgate(&idt[ 17], &IDTVEC(align), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); setgate(&idt[ 18], &IDTVEC(rsvd), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - for (i = 19; i < NIDT; i++) + for (i = 19; i < NRSVIDT; i++) setgate(&idt[i], &IDTVEC(rsvd), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); + for (i = NRSVIDT; i < NIDT; i++) + unsetgate(&idt[i]); setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386TGT, SEL_UPL, GCODE_SEL); - setregion(®ion, gdt, sizeof(gdt) - 1); + setregion(®ion, gdt, NGDT * sizeof(union descriptor) - 1); lgdt(®ion); setregion(®ion, idt, sizeof(idt_region) - 1); lidt(®ion); @@ -2661,6 +2964,14 @@ init386(paddr_t first_avail) panic("no BIOS memory map supplied"); #endif +#if defined(MULTIPROCESSOR) + /* install the page after boot args as PT page for first 4M */ + pmap_enter(pmap_kernel(), (u_long)vtopte(0), + i386_round_page(bootargv + bootargc), VM_PROT_READ|VM_PROT_WRITE, + VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED); + memset(vtopte(0), 0, NBPG); /* make sure it is clean before using */ +#endif + /* * account all the memory passed in the map from /boot * calculate avail_end and count the physmem. @@ -2677,8 +2988,8 @@ init386(paddr_t first_avail) a = i386_round_page(im->addr); e = i386_trunc_page(im->addr + im->size); /* skip first four pages */ - if (a < 4 * NBPG) - a = 4 * NBPG; + if (a < 5 * NBPG) + a = 5 * NBPG; #ifdef DEBUG printf(" %u-%u", a, e); #endif @@ -2937,6 +3248,48 @@ cpu_reset() for (;;); } +void +cpu_initclocks() +{ + (*initclock_func)(); +} + +void +need_resched(struct cpu_info *ci) +{ + ci->ci_want_resched = 1; + ci->ci_astpending = 1; +} + +#ifdef MULTIPROCESSOR +/* Allocate an IDT vector slot within the given range. + * XXX needs locking to avoid MP allocation races. + */ + +int +idt_vec_alloc(int low, int high) +{ + int vec; + + for (vec = low; vec <= high; vec++) + if (idt[vec].gd_p == 0) + return (vec); + return (0); +} + +void +idt_vec_set(int vec, void (*function)(void)) +{ + setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); +} + +void +idt_vec_free(int vec) +{ + unsetgate(&idt[vec]); +} +#endif /* MULTIPROCESSOR */ + /* * machine dependent system variables. */ @@ -2998,7 +3351,7 @@ cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) case CPU_CPUID: return (sysctl_rdint(oldp, oldlenp, newp, cpu_id)); case CPU_CPUFEATURE: - return (sysctl_rdint(oldp, oldlenp, newp, cpu_feature)); + return (sysctl_rdint(oldp, oldlenp, newp, curcpu()->ci_feature_flags)); #if NAPM > 0 case CPU_APMWARN: return (sysctl_int(oldp, oldlenp, newp, newlen, &cpu_apmwarn)); @@ -3204,6 +3557,9 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp) vaddr_t va; pt_entry_t *pte; bus_size_t map_size; +#ifdef MULTIPROCESSOR + u_int32_t cpumask = 0; +#endif pa = i386_trunc_page(bpa); endpa = i386_round_page(bpa + size); @@ -3236,9 +3592,17 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp) *pte &= ~PG_N; else *pte |= PG_N; +#ifdef MULTIPROCESSOR + pmap_tlb_shootdown(pmap_kernel(), va, *pte, + &cpumask); +#else pmap_update_pg(va); +#endif } } +#ifdef MULTIPROCESSOR + pmap_tlb_shootnow(cpumask); +#endif pmap_update(pmap_kernel()); return 0; @@ -3945,9 +4309,52 @@ _bus_dmamem_alloc_range(t, size, alignment, boundary, segs, nsegs, rsegs, void splassert_check(int wantipl, const char *func) { - if (cpl < wantipl) { - splassert_fail(wantipl, cpl, func); - } + if (lapic_tpr < wantipl) + splassert_fail(wantipl, lapic_tpr, func); +} +#endif + +#ifdef MULTIPROCESSOR +void +i386_intlock(struct intrframe iframe) +{ + if (iframe.if_ppl < IPL_SCHED) +#ifdef notdef + spinlockmgr(&kernel_lock, LK_EXCLUSIVE|LK_CANRECURSE, 0); +#else + __mp_lock(&kernel_lock); +#endif +} + +void +i386_intunlock(struct intrframe iframe) +{ + if (iframe.if_ppl < IPL_SCHED) +#ifdef notdef + spinlockmgr(&kernel_lock, LK_RELEASE, 0); +#else + __mp_unlock(&kernel_lock); +#endif +} + +void +i386_softintlock(void) +{ +#ifdef notdef + spinlockmgr(&kernel_lock, LK_EXCLUSIVE|LK_CANRECURSE, 0); +#else + __mp_lock(&kernel_lock); +#endif +} + +void +i386_softintunlock(void) +{ +#ifdef notdef + spinlockmgr(&kernel_lock, LK_RELEASE, 0); +#else + __mp_unlock(&kernel_lock); +#endif } #endif @@ -3957,11 +4364,15 @@ splassert_check(int wantipl, const char *func) * We hand-code this to ensure that it's atomic. */ void -softintr(mask) - int mask; +softintr(sir, vec) + int sir; + int vec; { - __asm __volatile("orl %1, %0" : "=m"(ipending) : "ir" (mask)); - + __asm __volatile("orl %1, %0" : "=m" (ipending) : "ir" (sir)); +#ifdef MULTIPROCESSOR + i82489_writereg(LAPIC_ICRLO, + vec | LAPIC_DLMODE_FIXED | LAPIC_LVL_ASSERT | LAPIC_DEST_SELF); +#endif } /* @@ -3971,10 +4382,10 @@ int splraise(ncpl) int ncpl; { - int ocpl = cpl; + int ocpl = lapic_tpr; if (ncpl > ocpl) - cpl = ncpl; + lapic_tpr = ncpl; return (ocpl); } @@ -3986,7 +4397,7 @@ void splx(ncpl) int ncpl; { - cpl = ncpl; + lapic_tpr = ncpl; if (ipending & IUNMASK(ncpl)) Xspllower(); } @@ -3999,8 +4410,9 @@ int spllower(ncpl) int ncpl; { - int ocpl = cpl; + int ocpl = lapic_tpr; splx(ncpl); return (ocpl); } + diff --git a/sys/arch/i386/i386/mainbus.c b/sys/arch/i386/i386/mainbus.c index 47577e0a0b9..c242f14e5da 100644 --- a/sys/arch/i386/i386/mainbus.c +++ b/sys/arch/i386/i386/mainbus.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mainbus.c,v 1.15 2002/03/14 01:26:32 millert Exp $ */ +/* $OpenBSD: mainbus.c,v 1.16 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: mainbus.c,v 1.21 1997/06/06 23:14:20 thorpej Exp $ */ /* @@ -49,11 +49,22 @@ #include "isa.h" #include "apm.h" #include "bios.h" +#include "mpbios.h" + +#include <machine/cpuvar.h> +#include <machine/i82093var.h> +#include <machine/mpbiosvar.h> #if NBIOS > 0 #include <machine/biosvar.h> #endif +#if 0 +#ifdef SMP /* XXX MULTIPROCESSOR */ +#include <machine/mp.h> +#endif +#endif + int mainbus_match(struct device *, void *, void *); void mainbus_attach(struct device *, struct device *, void *); @@ -75,6 +86,8 @@ union mainbus_attach_args { #if NBIOS > 0 struct bios_attach_args mba_bios; #endif + struct cpu_attach_args mba_caa; + struct apic_attach_args aaa_caa; }; /* @@ -104,6 +117,7 @@ mainbus_attach(parent, self, aux) void *aux; { union mainbus_attach_args mba; + extern int cpu_id, cpu_feature; printf("\n"); @@ -116,6 +130,40 @@ mainbus_attach(parent, self, aux) } #endif +#if NMPBIOS > 0 + if (mpbios_probe(self)) + mpbios_scan(self); + else +#endif + { + struct cpu_attach_args caa; + + memset(&caa, 0, sizeof(caa)); + caa.caa_name = "cpu"; + caa.cpu_number = 0; + caa.cpu_role = CPU_ROLE_SP; + caa.cpu_func = 0; + caa.cpu_signature = cpu_id; + caa.feature_flags = cpu_feature; + + config_found(self, &caa, mainbus_print); + } + +#if 0 +#ifdef SMP + if (bios_smpinfo != NULL) { + struct mp_float *mp = bios_smpinfo; + + printf("%s: MP 1.%d configuration %d\n", self->dv_xname, + mp->revision, mp->feature1); + } +#ifdef CPU_DEBUG + else + printf ("%s: No MP configuration found.", self->dv_xname); +#endif +#endif +#endif + /* * XXX Note also that the presence of a PCI bus should * XXX _always_ be checked, and if present the bus should be diff --git a/sys/arch/i386/i386/microtime.s b/sys/arch/i386/i386/microtime.s index 7523e48b73a..ac1b1742cb4 100644 --- a/sys/arch/i386/i386/microtime.s +++ b/sys/arch/i386/i386/microtime.s @@ -1,4 +1,4 @@ -/* $OpenBSD: microtime.s,v 1.19 2003/06/04 16:36:14 deraadt Exp $ */ +/* $OpenBSD: microtime.s,v 1.20 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: microtime.s,v 1.16 1995/04/17 12:06:47 cgd Exp $ */ /*- @@ -42,7 +42,7 @@ * overridden (i.e. it is 100Hz). */ #ifndef HZ -ENTRY(microtime) +ENTRY(i8254_microtime) #if defined(I586_CPU) || defined(I686_CPU) movl _C_LABEL(pentium_mhz), %ecx diff --git a/sys/arch/i386/i386/mpbios.c b/sys/arch/i386/i386/mpbios.c new file mode 100644 index 00000000000..689b9ace3f5 --- /dev/null +++ b/sys/arch/i386/i386/mpbios.c @@ -0,0 +1,1105 @@ +/* $OpenBSD: mpbios.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ +/* $NetBSD: mpbios.c,v 1.2 2002/10/01 12:56:57 fvdl Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1999 Stefan Grefen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Derived from FreeBSD's mp_machdep.c + */ +/* + * Copyright (c) 1996, by Steve Passe + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * The Intel MP-stuff is just one way of x86 SMP systems + * so only Intel MP specific stuff is here. + */ + +#include "mpbios.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/device.h> +#include <sys/malloc.h> + +#include <uvm/uvm_extern.h> + +#include <machine/specialreg.h> +#include <machine/cputypes.h> +#include <machine/cpuvar.h> +#include <machine/bus.h> +#include <machine/mpbiosreg.h> +#include <machine/mpbiosvar.h> + +#include <machine/i82093reg.h> +#include <machine/i82093var.h> +#include <machine/i82489reg.h> +#include <machine/i82489var.h> +#include <dev/isa/isareg.h> + +#include <dev/eisa/eisavar.h> /* for ELCR* def'ns */ + + +static struct mpbios_ioapic default_ioapic = { + 2, 0, 1, IOAPICENTRY_FLAG_EN, (caddr_t)IOAPIC_BASE_DEFAULT +}; + +/* descriptions of MP basetable entries */ +struct mpbios_baseentry { + u_int8_t type; + u_int8_t length; + u_int16_t count; + const char *name; +}; + +static const char *loc_where[] = { + "extended bios data area", + "last page of base memory", + "bios" +}; + +struct mp_map +{ + vaddr_t baseva; + int vsize; + paddr_t pa; + paddr_t pg; + int psize; +}; + +int mp_print(void *, const char *); +int mp_match(struct device *, void *, void *); +int mpbios_cpu_start(struct cpu_info *); +const void *mpbios_search(struct device *, paddr_t, int, + struct mp_map *); +static __inline int mpbios_cksum(const void *, int); + +void mp_cfg_special_intr(const struct mpbios_int *, u_int32_t *); +void mp_cfg_pci_intr(const struct mpbios_int *, u_int32_t *); +void mp_cfg_eisa_intr(const struct mpbios_int *, u_int32_t *); +void mp_cfg_isa_intr(const struct mpbios_int *, u_int32_t *); +void mp_print_special_intr (int); +void mp_print_pci_intr (int); +void mp_print_eisa_intr (int); +void mp_print_isa_intr (int); + +void mpbios_cpu(const u_int8_t *, struct device *); +void mpbios_bus(const u_int8_t *, struct device *); +void mpbios_ioapic(const u_int8_t *, struct device *); +void mpbios_int(const u_int8_t *, int, struct mp_intr_map *); + +const void *mpbios_map(paddr_t, int, struct mp_map *); +static __inline void mpbios_unmap(struct mp_map *); + +/* + * globals to help us bounce our way through parsing the config table. + */ + +static struct mp_map mp_cfg_table_map; +static struct mp_map mp_fp_map; +const struct mpbios_cth *mp_cth; +const struct mpbios_fps *mp_fps; + +#ifdef MPVERBOSE +int mp_verbose = 1; +#else +int mp_verbose = 0; +#endif + +int +mp_print(aux, pnp) + void *aux; + const char *pnp; +{ + struct cpu_attach_args * caa = (struct cpu_attach_args *) aux; + if (pnp) + printf("%s at %s:", caa->caa_name, pnp); + return (UNCONF); +} + +int +mp_match(parent, cfv, aux) + struct device *parent; + void *cfv; + void *aux; +{ + struct cfdata *cf = (struct cfdata *)cfv; + struct cpu_attach_args * caa = (struct cpu_attach_args *) aux; + if (strcmp(caa->caa_name, cf->cf_driver->cd_name)) + return 0; + + return ((*cf->cf_attach->ca_match)(parent, cf, aux)); +} + +/* + * Map a chunk of memory read-only and return an appropriately + * const'ed pointer. + */ +const void * +mpbios_map(pa, len, handle) + paddr_t pa; + int len; + struct mp_map *handle; +{ + paddr_t pgpa = i386_trunc_page(pa); + paddr_t endpa = i386_round_page(pa + len); + vaddr_t va = uvm_km_valloc(kernel_map, endpa - pgpa); + vaddr_t retva = va + (pa & PGOFSET); + + handle->pa = pa; + handle->pg = pgpa; + handle->psize = len; + handle->baseva = va; + handle->vsize = endpa-pgpa; + + do { +#if 1 + pmap_kenter_pa(va, pgpa, VM_PROT_READ); +#else + pmap_enter(pmap_kernel(), va, pgpa, VM_PROT_READ, TRUE, + VM_PROT_READ); +#endif + va += NBPG; + pgpa += NBPG; + } while (pgpa < endpa); + + return ((const void *)retva); +} + +static __inline void +mpbios_unmap(handle) + struct mp_map *handle; +{ +#if 1 + pmap_kremove(handle->baseva, handle->vsize); +#else + pmap_extract(pmap_kernel(), handle->baseva, NULL); +#endif + uvm_km_free(kernel_map, handle->baseva, handle->vsize); +} + +/* + * Look for an Intel MP spec table, indicating SMP capable hardware. + */ +int +mpbios_probe(self) + struct device *self; +{ + paddr_t ebda, memtop; + + paddr_t cthpa; + int cthlen; + const u_int8_t *mpbios_page; + int scan_loc; + + struct mp_map t; + + /* see if EBDA exists */ + + mpbios_page = mpbios_map(0, NBPG, &t); + + /* XXX Ugly magic constants below. */ + ebda = *(const u_int16_t *)(&mpbios_page[0x40e]); + ebda <<= 4; + + memtop = *(const u_int16_t *)(&mpbios_page[0x413]); + memtop <<= 10; + + mpbios_page = NULL; + mpbios_unmap(&t); + + scan_loc = 0; + + if (ebda && ebda < IOM_BEGIN ) { + mp_fps = mpbios_search(self, ebda, 1024, &mp_fp_map); + if (mp_fps != NULL) + goto found; + } + + scan_loc = 1; + + if (memtop && memtop <= IOM_BEGIN ) { + mp_fps = mpbios_search(self, memtop - 1024, 1024, &mp_fp_map); + if (mp_fps != NULL) + goto found; + } + + scan_loc = 2; + + mp_fps = mpbios_search(self, BIOS_BASE, BIOS_COUNT, &mp_fp_map); + if (mp_fps != NULL) + goto found; + + /* nothing found */ + return (0); + + found: + if (mp_verbose) + printf("%s: MP floating pointer found in %s at 0x%lx\n", + self->dv_xname, loc_where[scan_loc], mp_fp_map.pa); + + if (mp_fps->pap == 0) { + if (mp_fps->mpfb1 == 0) { + printf("%s: MP fps invalid: " + "no default config and no configuration table\n", + self->dv_xname); + + goto err; + } + printf("%s: MP default configuration %d\n", + self->dv_xname, mp_fps->mpfb1); + return (10); + } + + cthpa = mp_fps->pap; + + mp_cth = mpbios_map(cthpa, sizeof (*mp_cth), &mp_cfg_table_map); + cthlen = mp_cth->base_len; + mpbios_unmap(&mp_cfg_table_map); + + mp_cth = mpbios_map(cthpa, cthlen, &mp_cfg_table_map); + + if (mp_verbose) + printf("%s: MP config table at 0x%lx, %d bytes long\n", + self->dv_xname, cthpa, cthlen); + + if (mp_cth->signature != MP_CT_SIG) { + printf("%s: MP signature mismatch (%x vs %x)\n", + self->dv_xname, + MP_CT_SIG, mp_cth->signature); + goto err; + } + + if (mpbios_cksum(mp_cth, cthlen)) { + printf ("%s: MP Configuration Table checksum mismatch\n", + self->dv_xname); + goto err; + } + return (10); + + err: + if (mp_fps) { + mp_fps = NULL; + mpbios_unmap(&mp_fp_map); + } + if (mp_cth) { + mp_cth = NULL; + mpbios_unmap(&mp_cfg_table_map); + } + return (0); +} + + +/* + * Simple byte checksum used on config tables. + */ + +static __inline int +mpbios_cksum (start, len) + const void *start; + int len; +{ + unsigned char res=0; + const char *p = start; + const char *end = p + len; + + while (p < end) + res += *p++; + + return res; +} + + +/* + * Look for the MP floating pointer signature in the given physical + * address range. + * + * We map the memory, scan through it, and unmap it. + * If we find it, remap the floating pointer structure and return it. + */ + +const void * +mpbios_search (self, start, count, map) + struct device *self; + paddr_t start; + int count; + struct mp_map *map; +{ + struct mp_map t; + + int i, len; + const struct mpbios_fps *m; + int end = count - sizeof(*m); + const u_int8_t *base = mpbios_map(start, count, &t); + + if (mp_verbose) + printf("%s: scanning 0x%lx to 0x%lx for MP signature\n", + self->dv_xname, start, start + count - sizeof(*m)); + + for (i = 0; i <= end; i += 4) { + m = (struct mpbios_fps *)&base[i]; + + if ((m->signature == MP_FP_SIG) && + ((len = m->length << 4) != 0) && + mpbios_cksum(m, (m->length << 4)) == 0) { + mpbios_unmap(&t); + + return (mpbios_map(start + i, len, map)); + } + } + mpbios_unmap(&t); + + return (0); +} + +/* + * MP configuration table parsing. + */ + +static struct mpbios_baseentry mp_conf[] = +{ + {0, 20, 0, "cpu"}, + {1, 8, 0, "bus"}, + {2, 8, 0, "ioapic"}, + {3, 8, 0, "ioint"}, + {4, 8, 0, "lint"}, +}; + +struct mp_bus *mp_busses; +int mp_nbus; +struct mp_intr_map *mp_intrs; + +struct mp_intr_map *lapic_ints[2]; /* XXX */ +int mp_isa_bus = -1; /* XXX */ +int mp_eisa_bus = -1; /* XXX */ + +static struct mp_bus extint_bus = { + "ExtINT", + -1, + mp_print_special_intr, + mp_cfg_special_intr, + 0 +}; +static struct mp_bus smi_bus = { + "SMI", + -1, + mp_print_special_intr, + mp_cfg_special_intr, + 0 +}; +static struct mp_bus nmi_bus = { + "NMI", + -1, + mp_print_special_intr, + mp_cfg_special_intr, + 0 +}; + + +/* + * 1st pass on BIOS's Intel MP specification table. + * + * initializes: + * mp_ncpus = 1 + * + * determines: + * cpu_apic_address (common to all CPUs) + * ioapic_address[N] + * mp_naps + * mp_nbusses + * mp_napics + * nintrs + */ +void +mpbios_scan(self) + struct device *self; +{ + const u_int8_t *position, *end; + int count; + int type; + int intr_cnt, cur_intr; + paddr_t lapic_base; + + printf("%s: Intel MP Specification ", self->dv_xname); + + switch (mp_fps->spec_rev) { + case 1: + printf("(Version 1.1)"); + break; + case 4: + printf("(Version 1.4)"); + break; + default: + printf("(unrecognized rev %d)", mp_fps->spec_rev); + } + + /* + * looks like we've got a MP system. start setting up + * infrastructure.. + * XXX is this the right place?? + */ + + lapic_base = LAPIC_BASE; + if (mp_cth != NULL) + lapic_base = (paddr_t)mp_cth->apic_address; + + lapic_boot_init(lapic_base); + + /* check for use of 'default' configuration */ + if (mp_fps->mpfb1 != 0) { + struct mpbios_proc pe; + + printf("\n%s: MP default configuration %d\n", + self->dv_xname, mp_fps->mpfb1); + + /* use default addresses */ + pe.apic_id = cpu_number(); + pe.cpu_flags = PROCENTRY_FLAG_EN|PROCENTRY_FLAG_BP; + pe.cpu_signature = cpu_info_primary.ci_signature; + pe.feature_flags = cpu_info_primary.ci_feature_flags; + + mpbios_cpu((u_int8_t *)&pe, self); + + pe.apic_id = 1 - cpu_number(); + pe.cpu_flags = PROCENTRY_FLAG_EN; + + mpbios_cpu((u_int8_t *)&pe, self); + + mpbios_ioapic((u_int8_t *)&default_ioapic, self); + + /* XXX */ + printf("%s: WARNING: interrupts not configured\n", + self->dv_xname); + panic("lazy bum"); + return; + } else { + /* + * should not happen; mp_probe returns 0 in this case, + * but.. + */ + if (mp_cth == NULL) + panic("mpbios_scan: no config (can't happen?)"); + + printf(" (%8.8s %12.12s)\n", + mp_cth->oem_id, mp_cth->product_id); + + /* + * Walk the table once, counting items + */ + position = (const u_int8_t *)(mp_cth); + end = position + mp_cth->base_len; + position += sizeof(*mp_cth); + + count = mp_cth->entry_count; + intr_cnt = 0; + + while ((count--) && (position < end)) { + type = *position; + if (type >= MPS_MCT_NTYPES) { + printf("%s: unknown entry type %x" + " in MP config table\n", + self->dv_xname, type); + break; + } + mp_conf[type].count++; + if (type == MPS_MCT_BUS) { + const struct mpbios_bus *bp = + (const struct mpbios_bus *)position; + if (bp->bus_id >= mp_nbus) + mp_nbus = bp->bus_id + 1; + } + /* + * Count actual interrupt instances. + * dst_apic_id of MPS_ALL_APICS means "wired to all + * apics of this type". + */ + if ((type == MPS_MCT_IOINT) || + (type == MPS_MCT_LINT)) { + const struct mpbios_int *ie = + (const struct mpbios_int *)position; + if (ie->dst_apic_id != MPS_ALL_APICS) + intr_cnt++; + else if (type == MPS_MCT_IOINT) + intr_cnt += + mp_conf[MPS_MCT_IOAPIC].count; + else + intr_cnt += mp_conf[MPS_MCT_CPU].count; + } + position += mp_conf[type].length; + } + + mp_busses = malloc(sizeof(struct mp_bus) * mp_nbus, + M_DEVBUF, M_NOWAIT); + memset(mp_busses, 0, sizeof(struct mp_bus) * mp_nbus); + mp_intrs = malloc(sizeof(struct mp_intr_map) * intr_cnt, + M_DEVBUF, M_NOWAIT); + + /* re-walk the table, recording info of interest */ + position = (const u_int8_t *)mp_cth + sizeof(*mp_cth); + count = mp_cth->entry_count; + cur_intr = 0; + + while ((count--) && (position < end)) { + switch (type = *(u_char *)position) { + case MPS_MCT_CPU: + mpbios_cpu(position, self); + break; + case MPS_MCT_BUS: + mpbios_bus(position, self); + break; + case MPS_MCT_IOAPIC: + mpbios_ioapic(position, self); + break; + case MPS_MCT_IOINT: + case MPS_MCT_LINT: + mpbios_int(position, type, + &mp_intrs[cur_intr]); + cur_intr++; + break; + default: + printf("%s: unknown entry type %x " + "in MP config table\n", + self->dv_xname, type); + /* NOTREACHED */ + return; + } + + (u_char*)position += mp_conf[type].length; + } + if (mp_verbose && mp_cth->ext_len) + printf("%s: MP WARNING: %d " + "bytes of extended entries not examined\n", + self->dv_xname, mp_cth->ext_len); + } + /* Clean up. */ + mp_fps = NULL; + mpbios_unmap(&mp_fp_map); + if (mp_cth != NULL) { + mp_cth = NULL; + mpbios_unmap(&mp_cfg_table_map); + } +} + +void +mpbios_cpu(ent, self) + const u_int8_t *ent; + struct device *self; +{ + const struct mpbios_proc *entry = (const struct mpbios_proc *)ent; + struct cpu_attach_args caa; + + /* XXX move this into the CPU attachment goo. */ + /* check for usability */ + if (!(entry->cpu_flags & PROCENTRY_FLAG_EN)) + return; + + /* check for BSP flag */ + if (entry->cpu_flags & PROCENTRY_FLAG_BP) + caa.cpu_role = CPU_ROLE_BP; + else + caa.cpu_role = CPU_ROLE_AP; + + caa.caa_name = "cpu"; + caa.cpu_number = entry->apic_id; + caa.cpu_func = &mp_cpu_funcs; +#if 1 /* XXX Will be removed when the real stuff is probed */ + caa.cpu_signature = entry->cpu_signature; + + /* + * XXX this is truncated to just contain the low-order 16 bits + * of the flags on at least some MP bioses + */ + caa.feature_flags = entry->feature_flags; +#endif + + config_found_sm(self, &caa, mp_print, mp_match); +} + +/* + * The following functions conspire to compute base ioapic redirection + * table entry for a given interrupt line. + * + * Fill in: trigger mode, polarity, and possibly delivery mode. + */ +void +mp_cfg_special_intr(entry, redir) + const struct mpbios_int *entry; + u_int32_t *redir; +{ + + /* + * All of these require edge triggered, zero vector, + * appropriate delivery mode. + * see page 13 of the 82093AA datasheet. + */ + *redir &= ~IOAPIC_REDLO_DEL_MASK; + *redir &= ~IOAPIC_REDLO_VECTOR_MASK; + *redir &= ~IOAPIC_REDLO_LEVEL; + + switch (entry->int_type) { + case MPS_INTTYPE_NMI: + *redir |= (IOAPIC_REDLO_DEL_NMI<<IOAPIC_REDLO_DEL_SHIFT); + break; + + case MPS_INTTYPE_SMI: + *redir |= (IOAPIC_REDLO_DEL_SMI<<IOAPIC_REDLO_DEL_SHIFT); + break; + case MPS_INTTYPE_ExtINT: + /* + * We are using the ioapic in "native" mode. + * This indicates where the 8259 is wired to the ioapic + * and/or local apic.. + */ + *redir |= (IOAPIC_REDLO_DEL_EXTINT<<IOAPIC_REDLO_DEL_SHIFT); + *redir |= (IOAPIC_REDLO_MASK); + break; + default: + panic("unknown MPS interrupt type %d", entry->int_type); + } +} + +/* XXX too much duplicated code here. */ + +void +mp_cfg_pci_intr(entry, redir) + const struct mpbios_int *entry; + u_int32_t *redir; +{ + int mpspo = entry->int_flags & 0x03; /* XXX magic */ + int mpstrig = (entry->int_flags >> 2) & 0x03; /* XXX magic */ + + *redir &= ~IOAPIC_REDLO_DEL_MASK; + switch (mpspo) { + case MPS_INTPO_ACTHI: + *redir &= ~IOAPIC_REDLO_ACTLO; + break; + case MPS_INTPO_DEF: + case MPS_INTPO_ACTLO: + *redir |= IOAPIC_REDLO_ACTLO; + break; + default: + panic("unknown MPS interrupt polarity %d", mpspo); + } + + if (entry->int_type != MPS_INTTYPE_INT) { + mp_cfg_special_intr(entry, redir); + return; + } + *redir |= (IOAPIC_REDLO_DEL_LOPRI<<IOAPIC_REDLO_DEL_SHIFT); + + switch (mpstrig) { + case MPS_INTTR_DEF: + case MPS_INTTR_LEVEL: + *redir |= IOAPIC_REDLO_LEVEL; + break; + case MPS_INTTR_EDGE: + *redir &= ~IOAPIC_REDLO_LEVEL; + break; + default: + panic("unknown MPS interrupt trigger %d", mpstrig); + } +} + +void +mp_cfg_eisa_intr (entry, redir) + const struct mpbios_int *entry; + u_int32_t *redir; +{ + int mpspo = entry->int_flags & 0x03; /* XXX magic */ + int mpstrig = (entry->int_flags >> 2) & 0x03; /* XXX magic */ + + *redir &= ~IOAPIC_REDLO_DEL_MASK; + switch (mpspo) { + case MPS_INTPO_DEF: + case MPS_INTPO_ACTHI: + *redir &= ~IOAPIC_REDLO_ACTLO; + break; + case MPS_INTPO_ACTLO: + *redir |= IOAPIC_REDLO_ACTLO; + break; + default: + panic("unknown MPS interrupt polarity %d", mpspo); + } + + if (entry->int_type != MPS_INTTYPE_INT) { + mp_cfg_special_intr(entry, redir); + return; + } + *redir |= (IOAPIC_REDLO_DEL_LOPRI<<IOAPIC_REDLO_DEL_SHIFT); + + switch (mpstrig) { + case MPS_INTTR_LEVEL: + *redir |= IOAPIC_REDLO_LEVEL; + break; + case MPS_INTTR_EDGE: + *redir &= ~IOAPIC_REDLO_LEVEL; + break; + case MPS_INTTR_DEF: + /* + * Set "default" setting based on ELCR value snagged + * earlier. + */ + if (mp_busses[entry->src_bus_id].mb_data & + (1<<entry->src_bus_irq)) { + *redir |= IOAPIC_REDLO_LEVEL; + } else { + *redir &= ~IOAPIC_REDLO_LEVEL; + } + break; + default: + panic("unknown MPS interrupt trigger %d", mpstrig); + } +} + + +void +mp_cfg_isa_intr(entry, redir) + const struct mpbios_int *entry; + u_int32_t *redir; +{ + int mpspo = entry->int_flags & 0x03; /* XXX magic */ + int mpstrig = (entry->int_flags >> 2) & 0x03; /* XXX magic */ + + *redir &= ~IOAPIC_REDLO_DEL_MASK; + switch (mpspo) { + case MPS_INTPO_DEF: + case MPS_INTPO_ACTHI: + *redir &= ~IOAPIC_REDLO_ACTLO; + break; + case MPS_INTPO_ACTLO: + *redir |= IOAPIC_REDLO_ACTLO; + break; + default: + panic("unknown MPS interrupt polarity %d", mpspo); + } + + if (entry->int_type != MPS_INTTYPE_INT) { + mp_cfg_special_intr(entry, redir); + return; + } + *redir |= (IOAPIC_REDLO_DEL_LOPRI << IOAPIC_REDLO_DEL_SHIFT); + + switch (mpstrig) { + case MPS_INTTR_LEVEL: + *redir |= IOAPIC_REDLO_LEVEL; + break; + case MPS_INTTR_DEF: + case MPS_INTTR_EDGE: + *redir &= ~IOAPIC_REDLO_LEVEL; + break; + default: + panic("unknown MPS interrupt trigger %d", mpstrig); + } +} + + +void +mp_print_special_intr(intr) + int intr; +{ +} + +void +mp_print_pci_intr(intr) + int intr; +{ + printf(" device %d INT_%c", (intr >> 2) & 0x1f, 'A' + (intr & 0x3)); +} + +void +mp_print_isa_intr(intr) + int intr; +{ + printf(" irq %d", intr); +} + +void +mp_print_eisa_intr(intr) + int intr; +{ + printf(" EISA irq %d", intr); +} + + + +#define TAB_UNIT 4 +#define TAB_ROUND(a) _TAB_ROUND(a, TAB_UNIT) + +#define _TAB_ROUND(a,u) (((a) + (u - 1)) & ~(u - 1)) +#define EXTEND_TAB(a,u) (!(_TAB_ROUND(a, u) == _TAB_ROUND((a + 1), u))) + +void +mpbios_bus(ent, self) + const u_int8_t *ent; + struct device *self; +{ + const struct mpbios_bus *entry = (const struct mpbios_bus *)ent; + int bus_id = entry->bus_id; + + printf("%s: bus %d is type %6.6s\n", self->dv_xname, + bus_id, entry->bus_type); + +#ifdef DIAGNOSTIC + /* + * This "should not happen" unless the table changes out + * from underneath us + */ + if (bus_id >= mp_nbus) { + panic("%s: bus number %d out of range?? (type %6.6s)\n", + self->dv_xname, bus_id, entry->bus_type); + } +#endif + + mp_busses[bus_id].mb_intrs = NULL; + + if (memcmp(entry->bus_type, "PCI ", 6) == 0) { + mp_busses[bus_id].mb_name = "pci"; + mp_busses[bus_id].mb_idx = bus_id; + mp_busses[bus_id].mb_intr_print = mp_print_pci_intr; + mp_busses[bus_id].mb_intr_cfg = mp_cfg_pci_intr; + } else if (memcmp(entry->bus_type, "EISA ", 6) == 0) { + mp_busses[bus_id].mb_name = "eisa"; + mp_busses[bus_id].mb_idx = bus_id; + mp_busses[bus_id].mb_intr_print = mp_print_eisa_intr; + mp_busses[bus_id].mb_intr_cfg = mp_cfg_eisa_intr; + + mp_busses[bus_id].mb_data = inb(ELCR0) | (inb(ELCR1) << 8); + + if (mp_eisa_bus != -1) + printf("%s: multiple eisa busses?\n", + self->dv_xname); + else + mp_eisa_bus = bus_id; + } else if (memcmp(entry->bus_type, "ISA ", 6) == 0) { + mp_busses[bus_id].mb_name = "isa"; + mp_busses[bus_id].mb_idx = 0; /* XXX */ + mp_busses[bus_id].mb_intr_print = mp_print_isa_intr; + mp_busses[bus_id].mb_intr_cfg = mp_cfg_isa_intr; + if (mp_isa_bus != -1) + printf("%s: multiple isa busses?\n", + self->dv_xname); + else + mp_isa_bus = bus_id; + } else { + printf("%s: unsupported bus type %6.6s\n", self->dv_xname, + entry->bus_type); + } +} + + +void +mpbios_ioapic(ent, self) + const u_int8_t *ent; + struct device *self; +{ + const struct mpbios_ioapic *entry = (const struct mpbios_ioapic *)ent; + struct apic_attach_args aaa; + + /* XXX let flags checking happen in ioapic driver.. */ + if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN)) + return; + + aaa.aaa_name = "ioapic"; + aaa.apic_id = entry->apic_id; + aaa.apic_version = entry->apic_version; + aaa.apic_address = (paddr_t)entry->apic_address; + aaa.flags = (mp_fps->mpfb2 & 0x80) ? IOAPIC_PICMODE : IOAPIC_VWIRE; + + config_found_sm(self, &aaa, mp_print, mp_match); +} + +static const char inttype_fmt[] = "\177\020" + "f\0\2type\0" "=\1NMI\0" "=\2SMI\0" "=\3ExtINT\0"; + +static const char flagtype_fmt[] = "\177\020" + "f\0\2pol\0" "=\1Act Hi\0" "=\3Act Lo\0" + "f\2\2trig\0" "=\1Edge\0" "=\3Level\0"; + +void +mpbios_int(ent, enttype, mpi) + const u_int8_t *ent; + int enttype; + struct mp_intr_map *mpi; +{ + const struct mpbios_int *entry = (const struct mpbios_int *)ent; + struct ioapic_softc *sc = NULL; + + struct mp_intr_map *altmpi; + struct mp_bus *mpb; + + u_int32_t id = entry->dst_apic_id; + u_int32_t pin = entry->dst_apic_int; + u_int32_t bus = entry->src_bus_id; + u_int32_t dev = entry->src_bus_irq; + u_int32_t type = entry->int_type; + u_int32_t flags = entry->int_flags; + + switch (type) { + case MPS_INTTYPE_INT: + mpb = &(mp_busses[bus]); + break; + case MPS_INTTYPE_ExtINT: + mpb = &extint_bus; + break; + case MPS_INTTYPE_SMI: + mpb = &smi_bus; + break; + case MPS_INTTYPE_NMI: + mpb = &nmi_bus; + break; + } + mpi->next = mpb->mb_intrs; + mpb->mb_intrs = mpi; + mpi->bus = mpb; + mpi->bus_pin = dev; + + mpi->ioapic_ih = APIC_INT_VIA_APIC | + ((id << APIC_INT_APIC_SHIFT) | ((pin << APIC_INT_PIN_SHIFT))); + + mpi->type = type; + mpi->flags = flags; + mpi->redir = 0; + if (mpb->mb_intr_cfg == NULL) { + printf("mpbios: can't find bus %d for apic %d pin %d\n", + bus, id, pin); + return; + } + + (*mpb->mb_intr_cfg)(entry, &mpi->redir); + + if (enttype == MPS_MCT_IOINT) { + sc = ioapic_find(id); + if (sc == NULL) { + printf("mpbios: can't find ioapic %d\n", id); + return; + } + + mpi->ioapic = sc; + mpi->ioapic_pin = pin; + + altmpi = sc->sc_pins[pin].ip_map; + + if (altmpi != NULL) { + if ((altmpi->type != type) || + (altmpi->flags != flags)) { + printf( + "%s: conflicting map entries for pin %d\n", + sc->sc_dev.dv_xname, pin); + } + } else { + sc->sc_pins[pin].ip_map = mpi; + } + } else { + if (id != MPS_ALL_APICS) + panic("can't deal with not-all-lapics interrupt yet!"); + if (pin >= 2) + printf("pin %d of local apic doesn't exist!\n", pin); + else { + mpi->ioapic = NULL; + mpi->ioapic_pin = pin; + lapic_ints[pin] = mpi; + } + } + if (mp_verbose) { + printf("%s: int%d attached to %s", + sc ? sc->sc_dev.dv_xname : "local apic", pin, + mpb->mb_name); + if (mpb->mb_idx != -1) + printf("%d", mpb->mb_idx); + + if (mpb != NULL) + + (*(mpb->mb_intr_print))(dev); + + printf(" (type 0x%x flags 0x%x)\n", type, flags); + } +} diff --git a/sys/arch/i386/i386/mptramp.s b/sys/arch/i386/i386/mptramp.s new file mode 100644 index 00000000000..d35b6560c89 --- /dev/null +++ b/sys/arch/i386/i386/mptramp.s @@ -0,0 +1,265 @@ +/* $OpenBSD: mptramp.s,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1999 Stefan Grefen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * MP startup ... + * the stuff from cpu_spinup_trampoline to mp_startup + * is copied into the first 640 KB + * + * We startup the processors now when the kthreads become ready. + * The steps are: + * 1) Get the processors running kernel-code from a special + * page-table and stack page, do chip identification. + * 2) halt the processors waiting for them to be enabled + * by a idle-thread + */ + +#include "assym.h" +#include <machine/param.h> +#include <machine/asm.h> +#include <machine/specialreg.h> +#include <machine/segments.h> +#include <machine/gdt.h> +#include <machine/mpbiosvar.h> +#include <machine/i82489reg.h> + +#define GDTE(a,b) .byte 0xff,0xff,0x0,0x0,0x0,a,b,0x0 +#define _RELOC(x) ((x) - KERNBASE) +#define RELOC(x) _RELOC(_C_LABEL(x)) + +#define _TRMP_LABEL(a) a = . - _C_LABEL(cpu_spinup_trampoline) + MP_TRAMPOLINE + +/* + * Debug code to stop aux. processors in various stages based on the + * value in cpu_trace. + * + * %edi points at cpu_trace; cpu_trace[0] is the "hold point"; + * cpu_trace[1] is the point which the cpu has reached. + * cpu_trace[2] is the last value stored by HALTT. + */ + + +#ifdef MPDEBUG +#define HALT(x) 1: movl (%edi),%ebx;cmpl $ x,%ebx ; jle 1b ; movl $x,4(%edi) +#define HALTT(x,y) movl y,8(%edi); HALT(x) +#else +#define HALT(x) /**/ +#define HALTT(x,y) /**/ +#endif + + .globl _C_LABEL(cpu),_C_LABEL(cpu_id),_C_LABEL(cpu_vendor) + .globl _C_LABEL(cpuid_level),_C_LABEL(cpu_feature) + + .global _C_LABEL(cpu_spinup_trampoline) + .global _C_LABEL(cpu_spinup_trampoline_end) + .global _C_LABEL(cpu_hatch) + .global _C_LABEL(mp_pdirpa) + .global _C_LABEL(gdt), _C_LABEL(local_apic) + + .text + .align 4,0x0 + .code16 +_C_LABEL(cpu_spinup_trampoline): + cli + xorw %ax, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + data32 addr32 lgdt (gdt_desc) # load flat descriptor table + movl %cr0, %eax # get cr0 + orl $0x1, %eax # enable protected mode + movl %eax, %cr0 # doit + ljmp $0x8, $mp_startup + +_TRMP_LABEL(mp_startup) + .code32 + + movl $0x10, %eax # data segment + movw %ax, %ds + movw %ax, %ss + movw %ax, %es + movw %ax, %fs + movw %ax, %gs + movl $(MP_TRAMPOLINE+NBPG-16),%esp # bootstrap stack end, + # with scratch space.. + +#ifdef MPDEBUG + leal RELOC(cpu_trace),%edi +#endif + + HALT(0x1) + /* First, reset the PSL. */ + pushl $PSL_MBO + popfl + + movl RELOC(mp_pdirpa),%ecx + HALTT(0x5,%ecx) + + /* Load base of page directory and enable mapping. */ + movl %ecx,%cr3 # load ptd addr into mmu + movl %cr0,%eax # get control word + # enable paging & NPX emulation + orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_EM|CR0_MP|CR0_WP),%eax + movl %eax,%cr0 # and let's page NOW! + +#ifdef MPDEBUG + leal _C_LABEL(cpu_trace),%edi +#endif + HALT(0x6) + +# ok, we're now running with paging enabled and sharing page tables with cpu0. +# figure out which processor we really are, what stack we should be on, etc. + + movl _C_LABEL(local_apic)+LAPIC_ID,%ecx + shrl $LAPIC_ID_SHIFT,%ecx + leal 0(,%ecx,4),%ecx + movl _C_LABEL(cpu_info)(%ecx),%ecx + + HALTT(0x7, %ecx) + +# %ecx points at our cpu_info structure.. + + movw $((MAXGDTSIZ*8) - 1), 6(%esp) # prepare segment descriptor + movl CPU_INFO_GDT(%ecx), %eax # for real gdt + movl %eax, 8(%esp) + HALTT(0x8, %eax) + lgdt 6(%esp) + HALT(0x9) + jmp 1f + nop +1: + HALT(0xa) + movl $GSEL(GDATA_SEL, SEL_KPL),%eax #switch to new segment + HALTT(0x10, %eax) + movw %ax,%ds + HALT(0x11) + movw %ax,%es + HALT(0x12) + movw %ax,%ss + HALT(0x13) + pushl $GSEL(GCODE_SEL, SEL_KPL) + pushl $mp_cont + HALT(0x14) + lret + .align 4,0x0 +_TRMP_LABEL(gdt_table) + .word 0x0,0x0,0x0,0x0 # null GDTE + GDTE(0x9f,0xcf) # Kernel text + GDTE(0x93,0xcf) # Kernel data +_TRMP_LABEL(gdt_desc) + .word 0x17 # limit 3 entries + .long gdt_table # where is is gdt + +_C_LABEL(cpu_spinup_trampoline_end): #end of code copied to MP_TRAMPOLINE +mp_cont: + + movl CPU_INFO_IDLE_PCB(%ecx),%esi + +# %esi now points at our PCB. + + HALTT(0x19, %esi) + + movl PCB_ESP(%esi),%esp + movl PCB_EBP(%esi),%ebp + + HALT(0x20) + /* Switch address space. */ + movl PCB_CR3(%esi),%eax + HALTT(0x22, %eax) + movl %eax,%cr3 + HALT(0x25) + /* Load segment registers. */ + movl $GSEL(GCPU_SEL, SEL_KPL),%eax + HALTT(0x26,%eax) + movl %eax,%fs + xorl %eax,%eax + HALTT(0x27,%eax) + movl %eax,%gs + movl PCB_CR0(%esi),%eax + HALTT(0x28,%eax) + movl %eax,%cr0 + HALTT(0x30,%ecx) + pushl %ecx + call _C_LABEL(cpu_hatch) + HALT(0x33) + xorl %esi,%esi + jmp _C_LABEL(idle_loop) + + .data +_C_LABEL(mp_pdirpa): + .long 0 +#ifdef MPDEBUG + .global _C_LABEL(cpu_trace) +_C_LABEL(cpu_trace): + .long 0x40 + .long 0xff + .long 0xff +#endif diff --git a/sys/arch/i386/i386/pctr.c b/sys/arch/i386/i386/pctr.c index 9ed49ebe0bd..c1c63ceb2f5 100644 --- a/sys/arch/i386/i386/pctr.c +++ b/sys/arch/i386/i386/pctr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pctr.c,v 1.18 2003/09/29 17:33:01 mickey Exp $ */ +/* $OpenBSD: pctr.c,v 1.19 2004/06/13 21:49:15 niklas Exp $ */ /* * Pentium performance counter driver for OpenBSD. @@ -9,6 +9,7 @@ * OpenBSD project by leaving this copyright notice intact. */ +#include <sys/param.h> #include <sys/types.h> #include <sys/errno.h> #include <sys/fcntl.h> diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index 8dc8115009e..dd34d9b7625 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.75 2004/02/01 12:26:45 grange Exp $ */ +/* $OpenBSD: pmap.c,v 1.76 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -70,6 +70,7 @@ #include <uvm/uvm.h> +#include <machine/atomic.h> #include <machine/cpu.h> #include <machine/specialreg.h> #include <machine/gdt.h> @@ -128,8 +129,6 @@ * - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's. * if we run out of pv_entry's we allocate a new pv_page and free * its pv_entrys. - * - pmap_remove_record: a list of virtual addresses whose mappings - * have been changed. used for TLB flushing. */ /* @@ -225,35 +224,18 @@ * this lock protects the list of active pmaps (headed by "pmaps"). * we lock it when adding or removing pmaps from this list. * - * - pmap_copy_page_lock - * locks the tmp kernel PTE mappings we used to copy data - * - * - pmap_zero_page_lock - * locks the tmp kernel PTE mapping we use to zero a page - * - * - pmap_tmpptp_lock - * locks the tmp kernel PTE mapping we use to look at a PTP - * in another process - * - * XXX: would be nice to have per-CPU VAs for the above 4 */ /* * locking data structures */ -#ifdef __OpenBSD__ -/* XXX */ -#define spinlockinit(lock, name, flags) /* nada */ -#define spinlockmgr(lock, flags, slock) /* nada */ -#endif - -struct lock pmap_main_lock; struct simplelock pvalloc_lock; struct simplelock pmaps_lock; -struct simplelock pmap_copy_page_lock; -struct simplelock pmap_zero_page_lock; -struct simplelock pmap_tmpptp_lock; + +#if defined(MULTIPROCESSOR) && 0 + +struct lock pmap_main_lock; #define PMAP_MAP_TO_HEAD_LOCK() \ spinlockmgr(&pmap_main_lock, LK_SHARED, (void *) 0) @@ -265,6 +247,59 @@ struct simplelock pmap_tmpptp_lock; #define PMAP_HEAD_TO_MAP_UNLOCK() \ spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0) +#else + +#define PMAP_MAP_TO_HEAD_LOCK() /* null */ +#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */ + +#define PMAP_HEAD_TO_MAP_LOCK() /* null */ +#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */ + +#endif + +/* + * TLB Shootdown: + * + * When a mapping is changed in a pmap, the TLB entry corresponding to + * the virtual address must be invalidated on all processors. In order + * to accomplish this on systems with multiple processors, messages are + * sent from the processor which performs the mapping change to all + * processors on which the pmap is active. For other processors, the + * ASN generation numbers for that processor is invalidated, so that + * the next time the pmap is activated on that processor, a new ASN + * will be allocated (which implicitly invalidates all TLB entries). + * + * Shootdown job queue entries are allocated using a simple special- + * purpose allocator for speed. + */ +struct pmap_tlb_shootdown_job { + TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; + vaddr_t pj_va; /* virtual address */ + pmap_t pj_pmap; /* the pmap which maps the address */ + pt_entry_t pj_pte; /* the PTE bits */ + struct pmap_tlb_shootdown_job *pj_nextfree; +}; + +struct pmap_tlb_shootdown_q { + TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; + int pq_pte; /* aggregate PTE bits */ + int pq_count; /* number of pending requests */ + struct SIMPLELOCK pq_slock; /* spin lock on queue */ + int pq_flushg; /* pending flush global */ + int pq_flushu; /* pending flush user */ +} pmap_tlb_shootdown_q[I386_MAXPROCS]; + +#define PMAP_TLB_MAXJOBS 16 + +void pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *); +struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get( + struct pmap_tlb_shootdown_q *); +void pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *, + struct pmap_tlb_shootdown_job *); + +struct SIMPLELOCK pmap_tlb_shootdown_job_lock; +struct pmap_tlb_shootdown_job *pj_page, *pj_free; + /* * global data structures */ @@ -348,6 +383,20 @@ static struct pmap *pmaps_hand = NULL; /* used by pmap_steal_ptp */ struct pool pmap_pmap_pool; /* + * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a + * I386_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing + * due to false sharing. + */ + +#ifdef MULTIPROCESSOR +#define PTESLEW(pte, id) ((pte)+(id)*NPTECL) +#define VASLEW(va,id) ((va)+(id)*NPTECL*NBPG) +#else +#define PTESLEW(pte, id) (pte) +#define VASLEW(va,id) (va) +#endif + +/* * special VAs and the PTEs that map them */ @@ -373,41 +422,42 @@ extern vaddr_t pentium_idt_vaddr; * local prototypes */ -static struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t); -static struct vm_page *pmap_alloc_ptp(struct pmap *, int, boolean_t); -static struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */ +struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t); +struct vm_page *pmap_alloc_ptp(struct pmap *, int, boolean_t); +struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */ #define ALLOCPV_NEED 0 /* need PV now */ #define ALLOCPV_TRY 1 /* just try to allocate, don't steal */ #define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ -static struct pv_entry *pmap_alloc_pvpage(struct pmap *, int); -static void pmap_enter_pv(struct pv_head *, +struct pv_entry *pmap_alloc_pvpage(struct pmap *, int); +void pmap_enter_pv(struct pv_head *, struct pv_entry *, struct pmap *, vaddr_t, struct vm_page *); -static void pmap_free_pv(struct pmap *, struct pv_entry *); -static void pmap_free_pvs(struct pmap *, struct pv_entry *); -static void pmap_free_pv_doit(struct pv_entry *); -static void pmap_free_pvpage(void); -static struct vm_page *pmap_get_ptp(struct pmap *, int, boolean_t); -static boolean_t pmap_is_curpmap(struct pmap *); -static pt_entry_t *pmap_map_ptes(struct pmap *); -static struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *, - vaddr_t); -static boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, - pt_entry_t *, vaddr_t); -static void pmap_remove_ptes(struct pmap *, - struct pmap_remove_record *, - struct vm_page *, vaddr_t, - vaddr_t, vaddr_t); -static struct vm_page *pmap_steal_ptp(struct uvm_object *, +void pmap_free_pv(struct pmap *, struct pv_entry *); +void pmap_free_pvs(struct pmap *, struct pv_entry *); +void pmap_free_pv_doit(struct pv_entry *); +void pmap_free_pvpage(void); +struct vm_page *pmap_get_ptp(struct pmap *, int, boolean_t); +boolean_t pmap_is_curpmap(struct pmap *); +boolean_t pmap_is_active(struct pmap *, int); +pt_entry_t *pmap_map_ptes(struct pmap *); +struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *, vaddr_t); -static vaddr_t pmap_tmpmap_pa(paddr_t); -static pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *); -static void pmap_tmpunmap_pa(void); -static void pmap_tmpunmap_pvepte(struct pv_entry *); -static boolean_t pmap_try_steal_pv(struct pv_head *, +boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, + vaddr_t, int32_t *); +void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, + vaddr_t, vaddr_t, int32_t *); +struct vm_page *pmap_steal_ptp(struct uvm_object *, vaddr_t); +vaddr_t pmap_tmpmap_pa(paddr_t); +pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *); +void pmap_tmpunmap_pa(void); +void pmap_tmpunmap_pvepte(struct pv_entry *); +void pmap_apte_flush(struct pmap *); +boolean_t pmap_try_steal_pv(struct pv_head *, struct pv_entry *, struct pv_entry *); -static void pmap_unmap_ptes(struct pmap *); +void pmap_unmap_ptes(struct pmap *); +void pmap_exec_account(struct pmap *, vaddr_t, pt_entry_t, + pt_entry_t); void pmap_pinit(pmap_t); void pmap_release(pmap_t); @@ -423,7 +473,7 @@ void pmap_zero_phys(paddr_t); * of course the kernel is always loaded */ -__inline static boolean_t +boolean_t pmap_is_curpmap(pmap) struct pmap *pmap; { @@ -432,50 +482,72 @@ pmap_is_curpmap(pmap) } /* + * pmap_is_active: is this pmap loaded into the specified processor's %cr3? + */ + +boolean_t +pmap_is_active(pmap, cpu_id) + struct pmap *pmap; + int cpu_id; +{ + + return (pmap == pmap_kernel() || + (pmap->pm_cpus & (1U << cpu_id)) != 0); +} + +/* * pmap_tmpmap_pa: map a page in for tmp usage - * - * => returns with pmap_tmpptp_lock held */ -__inline static vaddr_t +vaddr_t pmap_tmpmap_pa(pa) paddr_t pa; { - simple_lock(&pmap_tmpptp_lock); +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *ptpte = PTESLEW(ptp_pte, id); + caddr_t ptpva = VASLEW(ptpp, id); #if defined(DIAGNOSTIC) - if (*ptp_pte) + if (*ptpte) panic("pmap_tmpmap_pa: ptp_pte in use?"); #endif - *ptp_pte = PG_V | PG_RW | pa; /* always a new mapping */ - return((vaddr_t)ptpp); + *ptpte = PG_V | PG_RW | pa; /* always a new mapping */ + return((vaddr_t)ptpva); } /* * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) - * - * => we release pmap_tmpptp_lock */ -__inline static void +void pmap_tmpunmap_pa() { +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *ptpte = PTESLEW(ptp_pte, id); + caddr_t ptpva = VASLEW(ptpp, id); #if defined(DIAGNOSTIC) - if (!pmap_valid_entry(*ptp_pte)) + if (!pmap_valid_entry(*ptpte)) panic("pmap_tmpunmap_pa: our pte invalid?"); #endif - *ptp_pte = 0; /* zap! */ - pmap_update_pg((vaddr_t)ptpp); - simple_unlock(&pmap_tmpptp_lock); + *ptpte = 0; /* zap! */ + pmap_update_pg((vaddr_t)ptpva); +#ifdef MULTIPROCESSOR + /* + * No need for tlb shootdown here, since ptp_pte is per-CPU. + */ +#endif } /* * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry * * => do NOT use this on kernel mappings [why? because pv_ptp may be NULL] - * => we may grab pmap_tmpptp_lock and return with it held */ -__inline static pt_entry_t * +pt_entry_t * pmap_tmpmap_pvepte(pve) struct pv_entry *pve; { @@ -494,11 +566,9 @@ pmap_tmpmap_pvepte(pve) /* * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte - * - * => we will release pmap_tmpptp_lock if we hold it */ -__inline static void +void pmap_tmpunmap_pvepte(pve) struct pv_entry *pve; { @@ -509,6 +579,41 @@ pmap_tmpunmap_pvepte(pve) pmap_tmpunmap_pa(); } +void +pmap_apte_flush(struct pmap *pmap) +{ +#if defined(MULTIPROCESSOR) + struct pmap_tlb_shootdown_q *pq; + struct cpu_info *ci, *self = curcpu(); + CPU_INFO_ITERATOR cii; + int s; +#endif + + tlbflush(); /* flush TLB on current processor */ +#if defined(MULTIPROCESSOR) + /* + * Flush the APTE mapping from all other CPUs that + * are using the pmap we are using (who's APTE space + * is the one we've just modified). + * + * XXXthorpej -- find a way to defer the IPI. + */ + for (CPU_INFO_FOREACH(cii, ci)) { + if (ci == self) + continue; + if (pmap_is_active(pmap, ci->ci_cpuid)) { + pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; + s = splipi(); + SIMPLE_LOCK(&pq->pq_slock); + pq->pq_flushu++; + SIMPLE_UNLOCK(&pq->pq_slock); + splx(s); + i386_send_ipi(ci, I386_IPI_TLB); + } + } +#endif +} + /* * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in * @@ -516,7 +621,7 @@ pmap_tmpunmap_pvepte(pve) * => must be undone with pmap_unmap_ptes before returning */ -__inline static pt_entry_t * +pt_entry_t * pmap_map_ptes(pmap) struct pmap *pmap; { @@ -547,7 +652,7 @@ pmap_map_ptes(pmap) if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) { *APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V); if (pmap_valid_entry(opde)) - tlbflush(); + pmap_apte_flush(pmap); } return(APTE_BASE); } @@ -556,7 +661,7 @@ pmap_map_ptes(pmap) * pmap_unmap_ptes: unlock the PTE mapping of "pmap" */ -__inline static void +void pmap_unmap_ptes(pmap) struct pmap *pmap; { @@ -566,12 +671,16 @@ pmap_unmap_ptes(pmap) if (pmap_is_curpmap(pmap)) { simple_unlock(&pmap->pm_obj.vmobjlock); } else { +#if defined(MULTIPROCESSOR) + *APDP_PDE = 0; + pmap_apte_flush(curpcb->pcb_pmap); +#endif simple_unlock(&pmap->pm_obj.vmobjlock); simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock); } } -__inline static void +void pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) { @@ -579,9 +688,18 @@ pmap_exec_account(struct pmap *pm, vaddr_t va, pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) return; - if ((opte ^ npte) & PG_X) + if ((opte ^ npte) & PG_X) { +#ifdef MULTIPROCESSOR + int32_t cpumask = 0; + + pmap_tlb_shootdown(pm, va, opte, &cpumask); + pmap_tlb_shootnow(cpumask); +#else + /* Don't bother deferring in the single CPU case. */ pmap_update_pg(va); - +#endif + } + /* * Executability was removed on the last executable change. * Reset the code segment to something conservative and @@ -656,14 +774,22 @@ pmap_kenter_pa(va, pa, prot) paddr_t pa; vm_prot_t prot; { - pt_entry_t *pte, opte; + pt_entry_t *pte, opte, npte; pte = vtopte(va); - opte = *pte; - *pte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | - PG_V | pmap_pg_g; /* zap! */ - if (pmap_valid_entry(opte)) + npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V | pmap_pg_g; + opte = i386_atomic_testset_ul(pte, npte); /* zap! */ + if (pmap_valid_entry(opte)) { +#ifdef MULTIPROCESSOR + int32_t cpumask = 0; + + pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); + pmap_tlb_shootnow(cpumask); +#else + /* Don't bother deferring in the single CPU case. */ pmap_update_pg(va); +#endif + } } /* @@ -673,8 +799,6 @@ pmap_kenter_pa(va, pa, prot) * => caller must dispose of any vm_page mapped in the va range * => note: not an inline function * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE - * => we assume kernel only unmaps valid addresses and thus don't bother - * checking the valid bit before doing TLB flushing */ void @@ -682,25 +806,31 @@ pmap_kremove(va, len) vaddr_t va; vsize_t len; { - pt_entry_t *pte; + pt_entry_t *pte, opte; +#ifdef MULTIPROCESSOR + int32_t cpumask = 0; +#endif len >>= PAGE_SHIFT; - for ( /* null */ ; len ; len--, va += NBPG) { - pte = vtopte(va); + for ( /* null */ ; len ; len--, va += PAGE_SIZE) { + if (va < VM_MIN_KERNEL_ADDRESS) + pte = vtopte(va); + else + pte = kvtopte(va); + opte = i386_atomic_testset_ul(pte, 0); /* zap! */ #ifdef DIAGNOSTIC - if (*pte & PG_PVLIST) - panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", - va); -#endif - *pte = 0; /* zap! */ -#if defined(I386_CPU) - if (cpu_class != CPUCLASS_386) + if (opte & PG_PVLIST) + panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", va); #endif + if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) +#ifdef MULTIPROCESSOR + pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); +#else pmap_update_pg(va); +#endif } -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) - tlbflush(); +#ifdef MULTIPROCESSOR + pmap_tlb_shootnow(cpumask); #endif } @@ -729,6 +859,7 @@ pmap_bootstrap(kva_start) struct pmap *kpm; vaddr_t kva; pt_entry_t *pte; + int i; /* * set the page size (default value is 4K which is ok) @@ -831,6 +962,26 @@ pmap_bootstrap(kva_start) pte = PTE_BASE + i386_btop(virtual_avail); +#ifdef MULTIPROCESSOR + /* + * Waste some VA space to avoid false sharing of cache lines + * for page table pages: Give each possible CPU a cache line + * of PTE's (8) to play with, though we only need 4. We could + * recycle some of this waste by putting the idle stacks here + * as well; we could waste less space if we knew the largest + * CPU ID beforehand. + */ + csrcp = (caddr_t) virtual_avail; csrc_pte = pte; + + cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; + + zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; + + ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; + + virtual_avail += PAGE_SIZE * I386_MAXPROCS * NPTECL; + pte += I386_MAXPROCS * NPTECL; +#else csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */ virtual_avail += PAGE_SIZE; pte++; /* advance */ @@ -842,10 +993,11 @@ pmap_bootstrap(kva_start) ptpp = (caddr_t) virtual_avail; ptp_pte = pte; virtual_avail += PAGE_SIZE; pte++; +#endif /* XXX: vmmap used by mem.c... should be uvm_map_reserve */ vmmap = (char *)virtual_avail; /* don't need pte */ - virtual_avail += PAGE_SIZE; pte++; + virtual_avail += PAGE_SIZE; #ifdef __NetBSD msgbuf_vaddr = virtual_avail; /* don't need pte */ @@ -883,12 +1035,11 @@ pmap_bootstrap(kva_start) * init the static-global locks and global lists. */ +#if defined(MULTIPROCESSOR) && 0 spinlockinit(&pmap_main_lock, "pmaplk", 0); +#endif simple_lock_init(&pvalloc_lock); simple_lock_init(&pmaps_lock); - simple_lock_init(&pmap_copy_page_lock); - simple_lock_init(&pmap_zero_page_lock); - simple_lock_init(&pmap_tmpptp_lock); LIST_INIT(&pmaps); TAILQ_INIT(&pv_freepages); TAILQ_INIT(&pv_unusedpgs); @@ -900,6 +1051,17 @@ pmap_bootstrap(kva_start) pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", &pool_allocator_nointr); + /* + * Initialize the TLB shootdown queues. + */ + + SIMPLE_LOCK_INIT(&pmap_tlb_shootdown_job_lock); + + for (i = 0; i < I386_MAXPROCS; i++) { + TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head); + SIMPLE_LOCK_INIT(&pmap_tlb_shootdown_q[i].pq_slock); + } + #ifdef __NetBSD__ /* * we must call uvm_page_physload() after we are done playing with @@ -1009,6 +1171,15 @@ pmap_init() pv_nfpvents = 0; (void) pmap_add_pvpage(pv_initpage, FALSE); + pj_page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE); + if (pj_page == NULL) + panic("pmap_init: pj_page"); + + for (i = 0; i < PAGE_SIZE / sizeof *pj_page - 1; i++) + pj_page[i].pj_nextfree = &pj_page[i + 1]; + pj_page[i].pj_nextfree = NULL; + pj_free = &pj_page[0]; + /* * done: pmap module is up (and ready for business) */ @@ -1043,7 +1214,7 @@ pmap_init() * "try" is for optional functions like pmap_copy(). */ -__inline static struct pv_entry * +struct pv_entry * pmap_alloc_pv(pmap, mode) struct pmap *pmap; int mode; @@ -1100,7 +1271,7 @@ pmap_alloc_pv(pmap, mode) * => we assume that the caller holds pvalloc_lock */ -static struct pv_entry * +struct pv_entry * pmap_alloc_pvpage(pmap, mode) struct pmap *pmap; int mode; @@ -1233,12 +1404,15 @@ steal_one: * => return true if we did it! */ -static boolean_t +boolean_t pmap_try_steal_pv(pvh, cpv, prevpv) struct pv_head *pvh; struct pv_entry *cpv, *prevpv; { - pt_entry_t *ptep; /* pointer to a PTE */ + pt_entry_t *ptep, opte; +#ifdef MULTIPROCESSOR + int32_t cpumask = 0; +#endif /* * we never steal kernel mappings or mappings from pmaps we can't lock @@ -1257,9 +1431,15 @@ pmap_try_steal_pv(pvh, cpv, prevpv) if (*ptep & PG_W) { ptep = NULL; /* wired page, avoid stealing this one */ } else { - *ptep = 0; /* zap! */ + opte = i386_atomic_testset_ul(ptep, 0); /* zap! */ +#ifdef MULTIPROCESSOR + pmap_tlb_shootdown(cpv->pv_pmap, cpv->pv_va, opte, &cpumask); + pmap_tlb_shootnow(cpumask); +#else + /* Don't bother deferring in the single CPU case. */ if (pmap_is_curpmap(cpv->pv_pmap)) pmap_update_pg(cpv->pv_va); +#endif pmap_tmpunmap_pvepte(cpv); } if (ptep == NULL) { @@ -1296,7 +1476,7 @@ pmap_try_steal_pv(pvh, cpv, prevpv) * => if need_entry is true, we allocate and return one pv_entry */ -static struct pv_entry * +struct pv_entry * pmap_add_pvpage(pvp, need_entry) struct pv_page *pvp; boolean_t need_entry; @@ -1329,7 +1509,7 @@ pmap_add_pvpage(pvp, need_entry) * => we must be holding pvalloc_lock */ -__inline static void +void pmap_free_pv_doit(pv) struct pv_entry *pv; { @@ -1364,7 +1544,7 @@ pmap_free_pv_doit(pv) * => we gain the pvalloc_lock */ -__inline static void +void pmap_free_pv(pmap, pv) struct pmap *pmap; struct pv_entry *pv; @@ -1389,7 +1569,7 @@ pmap_free_pv(pmap, pv) * => we gain the pvalloc_lock */ -__inline static void +void pmap_free_pvs(pmap, pvs) struct pmap *pmap; struct pv_entry *pvs; @@ -1426,7 +1606,7 @@ pmap_free_pvs(pmap, pvs) * holding kmem_object's lock. */ -static void +void pmap_free_pvpage() { int s; @@ -1488,7 +1668,7 @@ pmap_free_pvpage() * => caller should adjust ptp's wire_count before calling */ -__inline static void +void pmap_enter_pv(pvh, pve, pmap, va, ptp) struct pv_head *pvh; struct pv_entry *pve; /* preallocated pve for us to use */ @@ -1515,7 +1695,7 @@ pmap_enter_pv(pvh, pve, pmap, va, ptp) * => we return the removed pve */ -__inline static struct pv_entry * +struct pv_entry * pmap_remove_pv(pvh, pmap, va) struct pv_head *pvh; struct pmap *pmap; @@ -1554,7 +1734,7 @@ pmap_remove_pv(pvh, pmap, va) * from another pmap (e.g. during optional functions like pmap_copy) */ -__inline static struct vm_page * +struct vm_page * pmap_alloc_ptp(pmap, pde_index, just_try) struct pmap *pmap; int pde_index; @@ -1596,7 +1776,7 @@ pmap_alloc_ptp(pmap, pde_index, just_try) * enough free pages around for us to allocate off the free page list] */ -static struct vm_page * +struct vm_page * pmap_steal_ptp(obj, offset) struct uvm_object *obj; vaddr_t offset; @@ -1607,6 +1787,7 @@ pmap_steal_ptp(obj, offset) pt_entry_t *ptes; int idx, lcv; boolean_t caller_locked, we_locked; + int32_t cpumask = 0; simple_lock(&pmaps_lock); if (pmaps_hand == NULL) @@ -1644,10 +1825,9 @@ pmap_steal_ptp(obj, offset) (PG_V|PG_W)) break; if (lcv == PTES_PER_PTP) - pmap_remove_ptes(pmaps_hand, NULL, ptp, - (vaddr_t)ptes, - ptp_i2v(idx), - ptp_i2v(idx+1)); + pmap_remove_ptes(pmaps_hand, ptp, + (vaddr_t)ptes, ptp_i2v(idx), + ptp_i2v(idx+1), &cpumask); pmap_tmpunmap_pa(); if (lcv != PTES_PER_PTP) @@ -1660,14 +1840,17 @@ pmap_steal_ptp(obj, offset) pmaps_hand->pm_pdir[idx] = 0; /* zap! */ pmaps_hand->pm_stats.resident_count--; +#ifdef MULTIPROCESSOR + pmap_apte_flush(pmaps_hand); +#else if (pmap_is_curpmap(pmaps_hand)) - tlbflush(); + pmap_apte_flush(pmaps_hand); else if (pmap_valid_entry(*APDP_PDE) && - (*APDP_PDE & PG_FRAME) == - pmaps_hand->pm_pdirpa) { + (*APDP_PDE & PG_FRAME) == + pmaps_hand->pm_pdirpa) pmap_update_pg(((vaddr_t)APTE_BASE) + ptp->offset); - } +#endif /* put it in our pmap! */ uvm_pagerealloc(ptp, obj, offset); @@ -1687,6 +1870,7 @@ pmap_steal_ptp(obj, offset) } while (ptp == NULL && pmaps_hand != firstpmap); simple_unlock(&pmaps_lock); + pmap_tlb_shootnow(cpumask); return(ptp); } @@ -1697,7 +1881,7 @@ pmap_steal_ptp(obj, offset) * => pmap should be locked */ -static struct vm_page * +struct vm_page * pmap_get_ptp(pmap, pde_index, just_try) struct pmap *pmap; int pde_index; @@ -1873,7 +2057,10 @@ pmap_release(pmap) uvm_pagefree(pg); } - /* XXX: need to flush it out of other processor's APTE space? */ + /* + * MULTIPROCESSOR -- no need to flush out of other processors' + * APTE space because we do that in pmap_unmap_ptes(). + */ uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG); #ifdef USER_LDT @@ -1993,22 +2180,31 @@ pmap_activate(p) pcb->pcb_pmap = pmap; pcb->pcb_ldt_sel = pmap->pm_ldt_sel; pcb->pcb_cr3 = pmap->pm_pdirpa; - if (p == curproc) + if (p == curproc) { lcr3(pcb->pcb_cr3); - if (pcb == curpcb) lldt(pcb->pcb_ldt_sel); + + /* + * mark the pmap in use by this processor. + */ + i386_atomic_setbits_l(&pmap->pm_cpus, (1U << cpu_number())); + } } /* * pmap_deactivate: deactivate a process' pmap - * - * => XXX: what should this do, if anything? */ void pmap_deactivate(p) struct proc *p; { + struct pmap *pmap = p->p_vmspace->vm_map.pmap; + + /* + * mark the pmap no longer in use by this processor. + */ + i386_atomic_clearbits_l(&pmap->pm_cpus, (1U << cpu_number())); } /* @@ -2065,19 +2261,7 @@ void (*pagezero)(void *, size_t) = bzero; void pmap_zero_page(struct vm_page *pg) { - paddr_t pa = VM_PAGE_TO_PHYS(pg); - - simple_lock(&pmap_zero_page_lock); -#ifdef DIAGNOSTIC - if (*zero_pte) - panic("pmap_zero_page: lock botch"); -#endif - - *zero_pte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */ - pagezero(zerop, PAGE_SIZE); /* zero */ - *zero_pte = 0; /* zap! */ - pmap_update_pg((vaddr_t)zerop); /* flush TLB */ - simple_unlock(&pmap_zero_page_lock); + pmap_zero_phys(VM_PAGE_TO_PHYS(pg)); } /* @@ -2087,17 +2271,21 @@ pmap_zero_page(struct vm_page *pg) void pmap_zero_phys(paddr_t pa) { - simple_lock(&pmap_zero_page_lock); +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *zpte = PTESLEW(zero_pte, id); + caddr_t zerova = VASLEW(zerop, id); + #ifdef DIAGNOSTIC - if (*zero_pte) - panic("pmap_zero_page: lock botch"); + if (*zpte) + panic("pmap_zero_phys: lock botch"); #endif - *zero_pte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */ - pagezero(zerop, PAGE_SIZE); /* zero */ - *zero_pte = 0; /* zap! */ - pmap_update_pg((vaddr_t)zerop); /* flush TLB */ - simple_unlock(&pmap_zero_page_lock); + *zpte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */ + pmap_update_pg((vaddr_t)zerova); /* flush TLB */ + pagezero(zerova, PAGE_SIZE); /* zero */ + *zpte = 0; /* zap! */ } /* @@ -2108,18 +2296,22 @@ boolean_t pmap_zero_page_uncached(pa) paddr_t pa; { - simple_lock(&pmap_zero_page_lock); +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *zpte = PTESLEW(zero_pte, id); + caddr_t zerova = VASLEW(zerop, id); + #ifdef DIAGNOSTIC - if (*zero_pte) + if (*zpte) panic("pmap_zero_page_uncached: lock botch"); #endif - *zero_pte = (pa & PG_FRAME) | PG_V | PG_RW | /* map in */ + *zpte = (pa & PG_FRAME) | PG_V | PG_RW | /* map in */ ((cpu_class != CPUCLASS_386) ? PG_N : 0); - pagezero(zerop, PAGE_SIZE); /* zero */ - *zero_pte = 0; /* zap! */ - pmap_update_pg((vaddr_t)zerop); /* flush TLB */ - simple_unlock(&pmap_zero_page_lock); + pmap_update_pg((vaddr_t)zerova); /* flush TLB */ + pagezero(zerova, PAGE_SIZE); /* zero */ + *zpte = 0; /* zap! */ return (TRUE); } @@ -2133,19 +2325,28 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) { paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg); paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg); +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *spte = PTESLEW(csrc_pte,id); + pt_entry_t *dpte = PTESLEW(cdst_pte,id); + caddr_t csrcva = VASLEW(csrcp, id); + caddr_t cdstva = VASLEW(cdstp, id); - simple_lock(&pmap_copy_page_lock); #ifdef DIAGNOSTIC - if (*csrc_pte || *cdst_pte) + if (*spte || *dpte) panic("pmap_copy_page: lock botch"); #endif - *csrc_pte = (srcpa & PG_FRAME) | PG_V | PG_RW; - *cdst_pte = (dstpa & PG_FRAME) | PG_V | PG_RW; - bcopy(csrcp, cdstp, PAGE_SIZE); - *csrc_pte = *cdst_pte = 0; /* zap! */ - pmap_update_2pg((vaddr_t)csrcp, (vaddr_t)cdstp); - simple_unlock(&pmap_copy_page_lock); + *spte = (srcpa & PG_FRAME) | PG_V | PG_RW; + *dpte = (dstpa & PG_FRAME) | PG_V | PG_RW; + pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); + bcopy(csrcva, cdstva, PAGE_SIZE); + *spte = *dpte = 0; /* zap! */ + pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); +#ifdef MULTIPROCESSOR + /* Using per-cpu VA; no shootdown required here. */ +#endif } /* @@ -2163,13 +2364,13 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) * => PTP should be null if pmap == pmap_kernel() */ -static void -pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva) +void +pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp) struct pmap *pmap; - struct pmap_remove_record *pmap_rr; struct vm_page *ptp; vaddr_t ptpva; vaddr_t startva, endva; + int32_t *cpumaskp; { struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ struct pv_entry *pve; @@ -2191,29 +2392,23 @@ pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva) if (!pmap_valid_entry(*pte)) continue; /* VA not mapped */ - opte = *pte; /* save the old PTE */ - *pte = 0; /* zap! */ + /* atomically save the old PTE and zap! it */ + opte = i386_atomic_testset_ul(pte, 0); + if (opte & PG_W) pmap->pm_stats.wired_count--; pmap->pm_stats.resident_count--; - if (pmap_rr) { /* worried about tlb flushing? */ - if (opte & PG_G) { - /* PG_G requires this */ - pmap_update_pg(startva); - } else { - if (pmap_rr->prr_npages < PMAP_RR_MAX) { - pmap_rr->prr_vas[pmap_rr->prr_npages++] - = startva; - } else { - if (pmap_rr->prr_npages == PMAP_RR_MAX) - /* signal an overflow */ - pmap_rr->prr_npages++; - } - } - } - if (ptp) + if (opte & PG_U) + pmap_tlb_shootdown(pmap, startva, opte, cpumaskp); + + if (ptp) { ptp->wire_count--; /* dropping a PTE */ + /* Make sure that the PDE is flushed */ + if ((ptp->wire_count <= 1) && !(opte & PG_U)) + pmap_tlb_shootdown(pmap, startva, opte, + cpumaskp); + } /* * if we are not on a pv_head list we are done. @@ -2266,12 +2461,13 @@ pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva) * => returns true if we removed a mapping */ -static boolean_t -pmap_remove_pte(pmap, ptp, pte, va) +boolean_t +pmap_remove_pte(pmap, ptp, pte, va, cpumaskp) struct pmap *pmap; struct vm_page *ptp; pt_entry_t *pte; vaddr_t va; + int32_t *cpumaskp; { pt_entry_t opte; int bank, off; @@ -2289,11 +2485,16 @@ pmap_remove_pte(pmap, ptp, pte, va) pmap->pm_stats.wired_count--; pmap->pm_stats.resident_count--; - if (ptp) + if (opte & PG_U) + pmap_tlb_shootdown(pmap, va, opte, cpumaskp); + + if (ptp) { ptp->wire_count--; /* dropping a PTE */ + /* Make sure that the PDE is flushed */ + if ((ptp->wire_count <= 1) && !(opte & PG_U)) + pmap_tlb_shootdown(pmap, va, opte, cpumaskp); - if (pmap_is_curpmap(pmap)) - pmap_update_pg(va); /* flush TLB */ + } /* * if we are not on a pv_head list we are done. @@ -2338,18 +2539,18 @@ pmap_remove(pmap, sva, eva) struct pmap *pmap; vaddr_t sva, eva; { - pt_entry_t *ptes; + pt_entry_t *ptes, opte; boolean_t result; paddr_t ptppa; vaddr_t blkendva; struct vm_page *ptp; - struct pmap_remove_record pmap_rr, *prr; + int32_t cpumask = 0; /* * we lock in the pmap => pv_head direction */ - PMAP_MAP_TO_HEAD_LOCK(); + PMAP_MAP_TO_HEAD_LOCK(); ptes = pmap_map_ptes(pmap); /* locks pmap */ /* @@ -2385,7 +2586,7 @@ pmap_remove(pmap, sva, eva) /* do it! */ result = pmap_remove_pte(pmap, ptp, - &ptes[i386_btop(sva)], sva); + &ptes[i386_btop(sva)], sva, &cpumask); /* * if mapping removed and the PTP is no longer @@ -2393,15 +2594,30 @@ pmap_remove(pmap, sva, eva) */ if (result && ptp && ptp->wire_count <= 1) { - pmap->pm_pdir[pdei(sva)] = 0; /* zap! */ -#if defined(I386_CPU) - /* already dumped whole TLB on i386 */ - if (cpu_class != CPUCLASS_386) + /* zap! */ + opte = i386_atomic_testset_ul( + &pmap->pm_pdir[pdei(sva)], 0); +#ifdef MULTIPROCESSOR + /* + * XXXthorpej Redundant shootdown can happen + * here if we're using APTE space. + */ +#endif + pmap_tlb_shootdown(curpcb->pcb_pmap, + ((vaddr_t)ptes) + ptp->offset, opte, + &cpumask); +#ifdef MULTIPROCESSOR + /* + * Always shoot down the pmap's self-mapping + * of the PTP. + * XXXthorpej Redundant shootdown can happen + * here if pmap == curpcb->pcb_pmap (not APTE + * space). + */ + pmap_tlb_shootdown(pmap, + ((vaddr_t)PTE_BASE) + ptp->offset, opte, + &cpumask); #endif - { - pmap_update_pg(((vaddr_t) ptes) + - ptp->offset); - } pmap->pm_stats.resident_count--; if (pmap->pm_ptphint == ptp) pmap->pm_ptphint = @@ -2410,26 +2626,12 @@ pmap_remove(pmap, sva, eva) uvm_pagefree(ptp); } } - + pmap_tlb_shootnow(cpumask); pmap_unmap_ptes(pmap); /* unlock pmap */ PMAP_MAP_TO_HEAD_UNLOCK(); return; } - /* - * removing a range of pages: we unmap in PTP sized blocks (4MB) - * - * if we are the currently loaded pmap, we use prr to keep track - * of the VAs we unload so that we can flush them out of the tlb. - */ - - if (pmap_is_curpmap(pmap)) { - prr = &pmap_rr; - prr->prr_npages = 0; - } else { - prr = NULL; - } - for (/* null */ ; sva < eva ; sva = blkendva) { /* determine range of block */ @@ -2479,17 +2681,31 @@ pmap_remove(pmap, sva, eva) #endif } } - pmap_remove_ptes(pmap, prr, ptp, - (vaddr_t)&ptes[i386_btop(sva)], sva, blkendva); + pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[i386_btop(sva)], + sva, blkendva, &cpumask); /* if PTP is no longer being used, free it! */ if (ptp && ptp->wire_count <= 1) { - pmap->pm_pdir[pdei(sva)] = 0; /* zap! */ - pmap_update_pg( ((vaddr_t) ptes) + ptp->offset); -#if defined(I386_CPU) - /* cancel possible pending pmap update on i386 */ - if (cpu_class == CPUCLASS_386 && prr) - prr->prr_npages = 0; + /* zap! */ + opte = i386_atomic_testset_ul( + &pmap->pm_pdir[pdei(sva)], 0); +#if defined(MULTIPROCESSOR) + /* + * XXXthorpej Redundant shootdown can happen here + * if we're using APTE space. + */ +#endif + pmap_tlb_shootdown(curpcb->pcb_pmap, + ((vaddr_t)ptes) + ptp->offset, opte, &cpumask); +#if defined(MULTIPROCESSOR) + /* + * Always shoot down the pmap's self-mapping + * of the PTP. + * XXXthorpej Redundant shootdown can happen here + * if pmap == curpcb->pcb_pmap (not APTE space). + */ + pmap_tlb_shootdown(pmap, + ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask); #endif pmap->pm_stats.resident_count--; if (pmap->pm_ptphint == ptp) /* update hint? */ @@ -2500,27 +2716,7 @@ pmap_remove(pmap, sva, eva) } } - /* - * if we kept a removal record and removed some pages update the TLB - */ - - if (prr && prr->prr_npages) { -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) { - tlbflush(); - } else -#endif - { /* not I386 */ - if (prr->prr_npages > PMAP_RR_MAX) { - tlbflush(); - } else { - while (prr->prr_npages) { - pmap_update_pg( - prr->prr_vas[--prr->prr_npages]); - } - } - } /* not I386 */ - } + pmap_tlb_shootnow(cpumask); pmap_unmap_ptes(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); } @@ -2540,9 +2736,7 @@ pmap_page_remove(pg) struct pv_head *pvh; struct pv_entry *pve; pt_entry_t *ptes, opte; -#if defined(I386_CPU) - boolean_t needs_update = FALSE; -#endif + int32_t cpumask = 0; /* XXX: vm_page should either contain pv_head or have a pointer to it */ bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); @@ -2590,14 +2784,10 @@ pmap_page_remove(pg) pve->pv_pmap->pm_stats.wired_count--; pve->pv_pmap->pm_stats.resident_count--; - if (pmap_is_curpmap(pve->pv_pmap)) { -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) - needs_update = TRUE; - else -#endif - pmap_update_pg(pve->pv_va); - } + /* Shootdown only if referenced */ + if (opte & PG_U) + pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte, + &cpumask); /* sync R/M bits */ vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); @@ -2606,12 +2796,29 @@ pmap_page_remove(pg) if (pve->pv_ptp) { pve->pv_ptp->wire_count--; if (pve->pv_ptp->wire_count <= 1) { + /* + * Do we have to shootdown the page just to + * get the pte out of the TLB ? + */ + if(!(opte & PG_U)) + pmap_tlb_shootdown(pve->pv_pmap, + pve->pv_va, opte, &cpumask); + /* zap! */ - pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] = 0; - pmap_update_pg(((vaddr_t)ptes) + - pve->pv_ptp->offset); -#if defined(I386_CPU) - needs_update = FALSE; + opte = i386_atomic_testset_ul( + &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)], + 0); + pmap_tlb_shootdown(curpcb->pcb_pmap, + ((vaddr_t)ptes) + pve->pv_ptp->offset, + opte, &cpumask); +#if defined(MULTIPROCESSOR) + /* + * Always shoot down the other pmap's + * self-mapping of the PTP. + */ + pmap_tlb_shootdown(pve->pv_pmap, + ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset, + opte, &cpumask); #endif pve->pv_pmap->pm_stats.resident_count--; /* update hint? */ @@ -2628,10 +2835,7 @@ pmap_page_remove(pg) pvh->pvh_list = NULL; simple_unlock(&pvh->pvh_lock); PMAP_HEAD_TO_MAP_UNLOCK(); -#if defined(I386_CPU) - if (needs_update) - tlbflush(); -#endif + pmap_tlb_shootnow(cpumask); } /* @@ -2719,11 +2923,9 @@ pmap_change_attrs(pg, setbits, clearbits) int bank, off; struct pv_head *pvh; struct pv_entry *pve; - pt_entry_t *ptes, npte; + pt_entry_t *ptes, npte, opte; char *myattrs; -#if defined(I386_CPU) - boolean_t needs_update = FALSE; -#endif + int32_t cpumask = 0; /* XXX: vm_page should either contain pv_head or have a pointer to it */ bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); @@ -2753,27 +2955,18 @@ pmap_change_attrs(pg, setbits, clearbits) result |= (npte & clearbits); npte = (npte | setbits) & ~clearbits; if (ptes[i386_btop(pve->pv_va)] != npte) { - ptes[i386_btop(pve->pv_va)] = npte; /* zap! */ - - if (pmap_is_curpmap(pve->pv_pmap)) { -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) - needs_update = TRUE; - else -#endif - pmap_update_pg(pve->pv_va); - } + opte = i386_atomic_testset_ul( + &ptes[i386_btop(pve->pv_va)], npte); + pmap_tlb_shootdown(pve->pv_pmap, + i386_btop(pve->pv_va), opte, &cpumask); } pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ } simple_unlock(&pvh->pvh_lock); PMAP_HEAD_TO_MAP_UNLOCK(); + pmap_tlb_shootnow(cpumask); -#if defined(I386_CPU) - if (needs_update) - tlbflush(); -#endif return(result != 0); } @@ -2809,20 +3002,12 @@ pmap_write_protect(pmap, sva, eva, prot) vm_prot_t prot; { pt_entry_t *ptes, *spte, *epte, npte; - struct pmap_remove_record pmap_rr, *prr; - vaddr_t blockend, va; + vaddr_t blockend; u_int32_t md_prot; + int32_t cpumask = 0; ptes = pmap_map_ptes(pmap); /* locks pmap */ - /* need to worry about TLB? [TLB stores protection bits] */ - if (pmap_is_curpmap(pmap)) { - prr = &pmap_rr; - prr->prr_npages = 0; - } else { - prr = NULL; - } - /* should be ok, but just in case ... */ sva &= PG_FRAME; eva &= PG_FRAME; @@ -2869,53 +3054,13 @@ pmap_write_protect(pmap, sva, eva, prot) if (npte != *spte) { pmap_exec_account(pmap, sva, *spte, npte); - - *spte = npte; /* zap! */ - - if (prr) { /* worried about tlb flushing? */ - va = i386_ptob(spte - ptes); - if (npte & PG_G) { - /* PG_G requires this */ - pmap_update_pg(va); - } else { - if (prr->prr_npages < - PMAP_RR_MAX) { - prr->prr_vas[ - prr->prr_npages++] = - va; - } else { - if (prr->prr_npages == - PMAP_RR_MAX) - /* signal an overflow */ - prr->prr_npages++; - } - } - } /* if (prr) */ - } /* npte != *spte */ - } /* for loop */ - } - - /* - * if we kept a removal record and removed some pages update the TLB - */ - - if (prr && prr->prr_npages) { -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) { - tlbflush(); - } else -#endif - { /* not I386 */ - if (prr->prr_npages > PMAP_RR_MAX) { - tlbflush(); - } else { - while (prr->prr_npages) { - pmap_update_pg(prr->prr_vas[ - --prr->prr_npages]); - } + i386_atomic_testset_ul(spte, npte); /* zap! */ + pmap_tlb_shootdown(pmap, sva, *spte, &cpumask); } - } /* not I386 */ + } } + + pmap_tlb_shootnow(cpumask); pmap_unmap_ptes(pmap); /* unlocks pmap */ } @@ -3179,8 +3324,18 @@ enter_now: ptes[i386_btop(va)] = npte; /* zap! */ - if ((opte & ~(PG_M|PG_U)) != npte && pmap_is_curpmap(pmap)) - pmap_update_pg(va); + if ((opte & ~(PG_M|PG_U)) != npte) { +#ifdef MULTIPROCESSOR + int32_t cpumask = 0; + + pmap_tlb_shootdown(pmap, va, opte, &cpumask); + pmap_tlb_shootnow(cpumask); +#else + /* Don't bother deferring in the single CPU case. */ + if (pmap_is_curpmap(pmap)) + pmap_update_pg(va); +#endif + } error = 0; @@ -3330,3 +3485,295 @@ pmap_dump(pmap, sva, eva) PMAP_MAP_TO_HEAD_UNLOCK(); } #endif + + +/******************** TLB shootdown code ********************/ + +void +pmap_tlb_shootnow(int32_t cpumask) +{ +#ifdef MULTIPROCESSOR + struct cpu_info *ci, *self; + CPU_INFO_ITERATOR cii; + int s; +#ifdef DIAGNOSTIC + int count = 0; +#endif +#endif + + if (cpumask == 0) + return; + +#ifdef MULTIPROCESSOR + self = curcpu(); + s = splipi(); + self->ci_tlb_ipi_mask = cpumask; +#endif + + pmap_do_tlb_shootdown(0); /* do *our* work. */ + +#ifdef MULTIPROCESSOR + splx(s); + + /* + * Send the TLB IPI to other CPUs pending shootdowns. + */ + for (CPU_INFO_FOREACH(cii, ci)) { + if (ci == self) + continue; + if (cpumask & (1U << ci->ci_cpuid)) + if (i386_send_ipi(ci, I386_IPI_TLB) != 0) + i386_atomic_clearbits_l(&self->ci_tlb_ipi_mask, + (1U << ci->ci_cpuid)); + } + + while (self->ci_tlb_ipi_mask != 0) +#ifdef DIAGNOSTIC + if (count++ > 100000000) + panic("TLB IPI rendezvous failed (mask %x)", + self->ci_tlb_ipi_mask); +#else + /* XXX insert pause instruction */ + ; +#endif +#endif +} + +/* + * pmap_tlb_shootdown: + * + * Cause the TLB entry for pmap/va to be shot down. + */ +void +pmap_tlb_shootdown(pmap, va, pte, cpumaskp) + pmap_t pmap; + vaddr_t va; + pt_entry_t pte; + int32_t *cpumaskp; +{ + struct cpu_info *ci, *self; + struct pmap_tlb_shootdown_q *pq; + struct pmap_tlb_shootdown_job *pj; + CPU_INFO_ITERATOR cii; + int s; + + if (pmap_initialized == FALSE) { + pmap_update_pg(va); + return; + } + + self = curcpu(); + + s = splipi(); +#if 0 + printf("dshootdown %lx\n", va); +#endif + + for (CPU_INFO_FOREACH(cii, ci)) { + /* Note: we queue shootdown events for ourselves here! */ + if (pmap_is_active(pmap, ci->ci_cpuid) == 0) + continue; + if (ci != self && !(ci->ci_flags & CPUF_RUNNING)) + continue; + pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; + SIMPLE_LOCK(&pq->pq_slock); + + /* + * If there's a global flush already queued, or a + * non-global flush, and this pte doesn't have the G + * bit set, don't bother. + */ + if (pq->pq_flushg > 0 || + (pq->pq_flushu > 0 && (pte & pmap_pg_g) == 0)) { + SIMPLE_UNLOCK(&pq->pq_slock); + continue; + } + +#ifdef I386_CPU + /* + * i386 CPUs can't invalidate a single VA, only + * flush the entire TLB, so don't bother allocating + * jobs for them -- just queue a `flushu'. + * + * XXX note that this can be executed for non-i386 + * when called early (before identifycpu() has set + * cpu_class) + */ + if (cpu_class == CPUCLASS_386) { + pq->pq_flushu++; + *cpumaskp |= 1U << ci->ci_cpuid; + SIMPLE_UNLOCK(&pq->pq_slock); + continue; + } +#endif + + pj = pmap_tlb_shootdown_job_get(pq); + pq->pq_pte |= pte; + if (pj == NULL) { + /* + * Couldn't allocate a job entry. + * Kill it now for this cpu, unless the failure + * was due to too many pending flushes; otherwise, + * tell other cpus to kill everything.. + */ + if (ci == self && pq->pq_count < PMAP_TLB_MAXJOBS) { + pmap_update_pg(va); + SIMPLE_UNLOCK(&pq->pq_slock); + continue; + } else { + if (pq->pq_pte & pmap_pg_g) + pq->pq_flushg++; + else + pq->pq_flushu++; + /* + * Since we've nailed the whole thing, + * drain the job entries pending for that + * processor. + */ + pmap_tlb_shootdown_q_drain(pq); + *cpumaskp |= 1U << ci->ci_cpuid; + } + } else { + pj->pj_pmap = pmap; + pj->pj_va = va; + pj->pj_pte = pte; + TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list); + *cpumaskp |= 1U << ci->ci_cpuid; + } + SIMPLE_UNLOCK(&pq->pq_slock); + } + splx(s); +} + +/* + * pmap_do_tlb_shootdown: + * + * Process pending TLB shootdown operations for this processor. + */ +void +pmap_do_tlb_shootdown(struct cpu_info *self) +{ + u_long cpu_id = cpu_number(); + struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; + struct pmap_tlb_shootdown_job *pj; + int s; +#ifdef MULTIPROCESSOR + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; +#endif + + s = splipi(); + + SIMPLE_LOCK(&pq->pq_slock); + + if (pq->pq_flushg) { + tlbflushg(); + pq->pq_flushg = 0; + pq->pq_flushu = 0; + pmap_tlb_shootdown_q_drain(pq); + } else { + /* + * TLB flushes for PTEs with PG_G set may be in the queue + * after a flushu, they need to be dealt with. + */ + if (pq->pq_flushu) { + tlbflush(); + } + while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) { + TAILQ_REMOVE(&pq->pq_head, pj, pj_list); + + if ((!pq->pq_flushu && pmap_is_curpmap(pj->pj_pmap)) || + (pj->pj_pte & pmap_pg_g)) + pmap_update_pg(pj->pj_va); + + pmap_tlb_shootdown_job_put(pq, pj); + } + + pq->pq_flushu = pq->pq_pte = 0; + } + +#ifdef MULTIPROCESSOR + for (CPU_INFO_FOREACH(cii, ci)) + i386_atomic_clearbits_l(&ci->ci_tlb_ipi_mask, + (1U << cpu_id)); +#endif + SIMPLE_UNLOCK(&pq->pq_slock); + + splx(s); +} + +/* + * pmap_tlb_shootdown_q_drain: + * + * Drain a processor's TLB shootdown queue. We do not perform + * the shootdown operations. This is merely a convenience + * function. + * + * Note: We expect the queue to be locked. + */ +void +pmap_tlb_shootdown_q_drain(pq) + struct pmap_tlb_shootdown_q *pq; +{ + struct pmap_tlb_shootdown_job *pj; + + while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) { + TAILQ_REMOVE(&pq->pq_head, pj, pj_list); + pmap_tlb_shootdown_job_put(pq, pj); + } + pq->pq_pte = 0; +} + +/* + * pmap_tlb_shootdown_job_get: + * + * Get a TLB shootdown job queue entry. This places a limit on + * the number of outstanding jobs a processor may have. + * + * Note: We expect the queue to be locked. + */ +struct pmap_tlb_shootdown_job * +pmap_tlb_shootdown_job_get(pq) + struct pmap_tlb_shootdown_q *pq; +{ + struct pmap_tlb_shootdown_job *pj; + + if (pq->pq_count >= PMAP_TLB_MAXJOBS) + return (NULL); + + SIMPLE_LOCK(&pmap_tlb_shootdown_job_lock); + if (pj_free == NULL) { + SIMPLE_UNLOCK(&pmap_tlb_shootdown_job_lock); + return NULL; + } + pj = pj_free; + pj_free = pj_free->pj_nextfree; + SIMPLE_UNLOCK(&pmap_tlb_shootdown_job_lock); + + pq->pq_count++; + return (pj); +} + +/* + * pmap_tlb_shootdown_job_put: + * + * Put a TLB shootdown job queue entry onto the free list. + * + * Note: We expect the queue to be locked. + */ +void +pmap_tlb_shootdown_job_put(pq, pj) + struct pmap_tlb_shootdown_q *pq; + struct pmap_tlb_shootdown_job *pj; +{ +#ifdef DIAGNOSTIC + if (pq->pq_count == 0) + panic("pmap_tlb_shootdown_job_put: queue length inconsistency"); +#endif + SIMPLE_LOCK(&pmap_tlb_shootdown_job_lock); + pj->pj_nextfree = pj_free; + pj_free = pj; + SIMPLE_UNLOCK(&pmap_tlb_shootdown_job_lock); + + pq->pq_count--; +} diff --git a/sys/arch/i386/i386/process_machdep.c b/sys/arch/i386/i386/process_machdep.c index 1667b8e223d..fbf8c250220 100644 --- a/sys/arch/i386/i386/process_machdep.c +++ b/sys/arch/i386/i386/process_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: process_machdep.c,v 1.15 2004/02/05 01:06:33 deraadt Exp $ */ +/* $OpenBSD: process_machdep.c,v 1.16 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: process_machdep.c,v 1.22 1996/05/03 19:42:25 christos Exp $ */ /* @@ -211,8 +211,7 @@ process_read_fpregs(p, regs) union savefpu *frame = process_fpframe(p); #if NNPX > 0 - if (npxproc == p) - npxsave(); + npxsave_proc(p, 1); #endif if (i386_use_fxsave) { @@ -249,21 +248,22 @@ process_write_regs(p, regs) } else #endif { +#if 0 extern int gdt_size; - extern union descriptor *dynamic_gdt; #define verr_ldt(slot) (slot < pcb->pcb_ldt_len && \ (pcb->pcb_ldt[slot].sd.sd_type & SDT_MEMRO) != 0 && \ pcb->pcb_ldt[slot].sd.sd_dpl == SEL_UPL && \ pcb->pcb_ldt[slot].sd.sd_p == 1) #define verr_gdt(slot) (slot < gdt_size && \ - (dynamic_gdt[slot].sd.sd_type & SDT_MEMRO) != 0 && \ - dynamic_gdt[slot].sd.sd_dpl == SEL_UPL && \ - dynamic_gdt[slot].sd.sd_p == 1) + (gdt[slot].sd.sd_type & SDT_MEMRO) != 0 && \ + gdt[slot].sd.sd_dpl == SEL_UPL && \ + gdt[slot].sd.sd_p == 1) #define verr(sel) (ISLDT(sel) ? verr_ldt(IDXSEL(sel)) : \ verr_gdt(IDXSEL(sel))) #define valid_sel(sel) (ISPL(sel) == SEL_UPL && verr(sel)) #define null_sel(sel) (!ISLDT(sel) && IDXSEL(sel) == 0) +#endif /* * Check for security violations. @@ -272,11 +272,14 @@ process_write_regs(p, regs) !USERMODE(regs->r_cs, regs->r_eflags)) return (EINVAL); + /* XXX Is this safe to remove. */ +#if 0 if ((regs->r_gs != pcb->pcb_gs && \ !valid_sel(regs->r_gs) && !null_sel(regs->r_gs)) || (regs->r_fs != pcb->pcb_fs && \ !valid_sel(regs->r_fs) && !null_sel(regs->r_fs))) return (EINVAL); +#endif pcb->pcb_gs = regs->r_gs & 0xffff; pcb->pcb_fs = regs->r_fs & 0xffff; @@ -308,8 +311,7 @@ process_write_fpregs(p, regs) if (p->p_md.md_flags & MDP_USEDFPU) { #if NNPX > 0 - if (npxproc == p) - npxdrop(); + npxsave_proc(p, 0); #endif } else p->p_md.md_flags |= MDP_USEDFPU; diff --git a/sys/arch/i386/i386/trap.c b/sys/arch/i386/i386/trap.c index 5c25f2ba8da..223c6bf3544 100644 --- a/sys/arch/i386/i386/trap.c +++ b/sys/arch/i386/i386/trap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: trap.c,v 1.62 2004/04/15 00:22:42 tedu Exp $ */ +/* $OpenBSD: trap.c,v 1.63 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $ */ /*- @@ -132,7 +132,7 @@ userret(p, pc, oticks) addupc_task(p, pc, (int)(p->p_sticks - oticks) * psratio); } - curpriority = p->p_priority; + p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority; } char *trap_type[] = { @@ -186,6 +186,7 @@ trap(frame) vm_prot_t vftype, ftype; union sigval sv; caddr_t onfault; + uint32_t cr2; uvmexp.traps++; @@ -200,7 +201,7 @@ trap(frame) if (trapdebug) { printf("trap %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n", frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs, - frame.tf_eflags, rcr2(), cpl); + frame.tf_eflags, rcr2(), lapic_tpr); printf("curproc %p\n", curproc); } #endif @@ -261,7 +262,7 @@ trap(frame) printf("unknown trap %d", frame.tf_trapno); printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor"); printf("trap type %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n", - type, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags, rcr2(), cpl); + type, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags, rcr2(), lapic_tpr); panic("trap type %d, code=%x, pc=%x", type, frame.tf_err, frame.tf_eip); @@ -335,35 +336,47 @@ trap(frame) case T_TSSFLT|T_USER: sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, SIGBUS, vftype, BUS_OBJERR, sv); + KERNEL_PROC_UNLOCK(p); goto out; case T_SEGNPFLT|T_USER: case T_STKFLT|T_USER: sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv); + KERNEL_PROC_UNLOCK(p); goto out; case T_ALIGNFLT|T_USER: sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, SIGBUS, vftype, BUS_ADRALN, sv); + KERNEL_PROC_UNLOCK(p); goto out; case T_PRIVINFLT|T_USER: /* privileged instruction fault */ sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, SIGILL, type &~ T_USER, ILL_PRVOPC, sv); + KERNEL_PROC_UNLOCK(p); goto out; case T_FPOPFLT|T_USER: /* coprocessor operand fault */ sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, SIGILL, type &~ T_USER, ILL_COPROC, sv); + KERNEL_PROC_UNLOCK(p); goto out; case T_ASTFLT|T_USER: /* Allow process switch */ uvmexp.softs++; if (p->p_flag & P_OWEUPC) { p->p_flag &= ~P_OWEUPC; + KERNEL_PROC_LOCK(p); ADDUPROF(p); + KERNEL_PROC_UNLOCK(p); } goto out; @@ -376,55 +389,84 @@ trap(frame) return; } sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, rv, type &~ T_USER, FPE_FLTINV, sv); + KERNEL_PROC_UNLOCK(p); goto out; #else printf("pid %d killed due to lack of floating point\n", p->p_pid); sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, SIGKILL, type &~ T_USER, FPE_FLTINV, sv); + KERNEL_PROC_UNLOCK(p); goto out; #endif } case T_BOUND|T_USER: sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv); + KERNEL_PROC_UNLOCK(p); goto out; case T_OFLOW|T_USER: sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv); + KERNEL_PROC_UNLOCK(p); goto out; case T_DIVIDE|T_USER: sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTDIV, sv); + KERNEL_PROC_UNLOCK(p); goto out; case T_ARITHTRAP|T_USER: sv.sival_int = frame.tf_eip; + KERNEL_PROC_LOCK(p); trapsignal(p, SIGFPE, frame.tf_err, FPE_INTOVF, sv); + KERNEL_PROC_UNLOCK(p); goto out; case T_PAGEFLT: /* allow page faults in kernel mode */ if (p == 0 || p->p_addr == 0) goto we_re_toast; +#ifdef LOCKDEBUG + /* If we page-fault while in scheduler, we're doomed. */ +#ifdef notyet + if (simple_lock_held(&sched_lock)) +#else + if (__mp_lock_held(&sched_lock)) +#endif + goto we_re_toast; +#endif + pcb = &p->p_addr->u_pcb; #if 0 /* XXX - check only applies to 386's and 486's with WP off */ if (frame.tf_err & PGEX_P) goto we_re_toast; #endif - /* FALLTHROUGH */ + cr2 = rcr2(); + KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE); + goto faultcommon; + case T_PAGEFLT|T_USER: { /* page fault */ vaddr_t va, fa; - struct vmspace *vm = p->p_vmspace; + struct vmspace *vm; struct vm_map *map; int rv; unsigned nss; + cr2 = rcr2(); + KERNEL_PROC_LOCK(p); + faultcommon: + vm = p->p_vmspace; if (vm == NULL) goto we_re_toast; - fa = (vaddr_t)rcr2(); + fa = (vaddr_t)cr2; va = trunc_page(fa); /* * It is only a kernel address space fault iff: @@ -472,20 +514,26 @@ trap(frame) if (rv == 0) { if (nss > vm->vm_ssize) vm->vm_ssize = nss; - if (type == T_PAGEFLT) + if (type == T_PAGEFLT) { + KERNEL_UNLOCK(); return; + } + KERNEL_PROC_UNLOCK(p); goto out; } if (type == T_PAGEFLT) { - if (pcb->pcb_onfault != 0) + if (pcb->pcb_onfault != 0) { + KERNEL_UNLOCK(); goto copyfault; + } printf("uvm_fault(%p, 0x%lx, 0, %d) -> %x\n", map, va, ftype, rv); goto we_re_toast; } sv.sival_int = fa; trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv); + KERNEL_PROC_UNLOCK(p); break; } @@ -500,14 +548,18 @@ trap(frame) case T_BPTFLT|T_USER: /* bpt instruction fault */ sv.sival_int = rcr2(); + KERNEL_PROC_LOCK(p); trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_BRKPT, sv); + KERNEL_PROC_UNLOCK(p); break; case T_TRCTRAP|T_USER: /* trace trap */ #if defined(GPL_MATH_EMULATE) trace: #endif sv.sival_int = rcr2(); + KERNEL_PROC_LOCK(p); trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_TRACE, sv); + KERNEL_PROC_UNLOCK(p); break; #if NISA > 0 @@ -591,7 +643,7 @@ syscall(frame) register_t code, args[8], rval[2]; u_quad_t sticks; #ifdef DIAGNOSTIC - int ocpl = cpl; + int ocpl = lapic_tpr; #endif uvmexp.syscalls++; @@ -716,12 +768,14 @@ syscall(frame) goto bad; rval[0] = 0; rval[1] = frame.tf_edx; + KERNEL_PROC_LOCK(p); #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) orig_error = error = systrace_redirect(code, p, args, rval); else #endif orig_error = error = (*callp->sy_call)(p, args, rval); + KERNEL_PROC_UNLOCK(p); switch (error) { case 0: /* @@ -758,15 +812,18 @@ syscall(frame) #endif userret(p, frame.tf_eip, sticks); #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSRET)) + if (KTRPOINT(p, KTR_SYSRET)) { + KERNEL_PROC_LOCK(p); ktrsysret(p, code, orig_error, rval[0]); + KERNEL_PROC_UNLOCK(p); + } #endif #ifdef DIAGNOSTIC - if (cpl != ocpl) { + if (lapic_tpr != ocpl) { printf("WARNING: SPL (0x%x) NOT LOWERED ON " "syscall(0x%x, 0x%x, 0x%x, 0x%x...) EXIT, PID %d\n", - cpl, code, args[0], args[1], args[2], p->p_pid); - cpl = ocpl; + lapic_tpr, code, args[0], args[1], args[2], p->p_pid); + lapic_tpr = ocpl; } #endif } @@ -781,9 +838,15 @@ child_return(arg) tf->tf_eax = 0; tf->tf_eflags &= ~PSL_C; +#ifdef notyet + KERNEL_PROC_UNLOCK(p); +#endif + userret(p, tf->tf_eip, 0); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) + KERNEL_PROC_LOCK(p); ktrsysret(p, SYS_fork, 0, 0); + KERNEL_PROC_UNLOCK(p); #endif } diff --git a/sys/arch/i386/isa/vector.s b/sys/arch/i386/i386/vector.s index 93a7bab51da..b7ddd3f0021 100644 --- a/sys/arch/i386/isa/vector.s +++ b/sys/arch/i386/i386/vector.s @@ -1,4 +1,4 @@ -/* $OpenBSD: vector.s,v 1.16 2003/04/17 03:42:14 drahn Exp $ */ +/* $OpenBSD: vector.s,v 1.2 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: vector.s,v 1.32 1996/01/07 21:29:47 mycroft Exp $ */ /* @@ -30,85 +30,12 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <i386/isa/icu.h> +#include <machine/i8259.h> #include <dev/isa/isareg.h> -#define ICU_HARDWARE_MASK - #define MY_COUNT _C_LABEL(uvmexp) /* - * These macros are fairly self explanatory. If ICU_SPECIAL_MASK_MODE is - * defined, we try to take advantage of the ICU's `special mask mode' by only - * EOIing the interrupts on return. This avoids the requirement of masking and - * unmasking. We can't do this without special mask mode, because the ICU - * would also hold interrupts that it thinks are of lower priority. - * - * Many machines do not support special mask mode, so by default we don't try - * to use it. - */ - -#define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) -#define IRQ_BYTE(irq_num) ((irq_num) / 8) - -#ifdef ICU_SPECIAL_MASK_MODE - -#define ACK1(irq_num) -#define ACK2(irq_num) \ - movb $(0x60|IRQ_SLAVE),%al /* specific EOI for IRQ2 */ ;\ - outb %al,$IO_ICU1 -#define MASK(irq_num, icu) -#define UNMASK(irq_num, icu) \ - movb $(0x60|(irq_num%8)),%al /* specific EOI */ ;\ - outb %al,$icu - -#else /* ICU_SPECIAL_MASK_MODE */ - -#ifndef AUTO_EOI_1 -#define ACK1(irq_num) \ - movb $(0x60|(irq_num%8)),%al /* specific EOI */ ;\ - outb %al,$IO_ICU1 -#else -#define ACK1(irq_num) -#endif - -#ifndef AUTO_EOI_2 -#define ACK2(irq_num) \ - movb $(0x60|(irq_num%8)),%al /* specific EOI */ ;\ - outb %al,$IO_ICU2 /* do the second ICU first */ ;\ - movb $(0x60|IRQ_SLAVE),%al /* specific EOI for IRQ2 */ ;\ - outb %al,$IO_ICU1 -#else -#define ACK2(irq_num) -#endif - -#ifdef ICU_HARDWARE_MASK - -#define MASK(irq_num, icu) \ - movb _C_LABEL(imen) + IRQ_BYTE(irq_num),%al ;\ - orb $IRQ_BIT(irq_num),%al ;\ - movb %al,_C_LABEL(imen) + IRQ_BYTE(irq_num) ;\ - FASTER_NOP ;\ - outb %al,$(icu+1) -#define UNMASK(irq_num, icu) \ - cli ;\ - movb _C_LABEL(imen) + IRQ_BYTE(irq_num),%al ;\ - andb $~IRQ_BIT(irq_num),%al ;\ - movb %al,_C_LABEL(imen) + IRQ_BYTE(irq_num) ;\ - FASTER_NOP ;\ - outb %al,$(icu+1) ;\ - sti - -#else /* ICU_HARDWARE_MASK */ - -#define MASK(irq_num, icu) -#define UNMASK(irq_num, icu) - -#endif /* ICU_HARDWARE_MASK */ - -#endif /* ICU_SPECIAL_MASK_MODE */ - -/* * Macros for interrupt entry, call to handler, and exit. * * XXX @@ -132,6 +59,16 @@ .globl _C_LABEL(isa_strayintr) +#ifdef MULTIPROCESSOR +#define LOCK_KERNEL call _C_LABEL(i386_intlock) +#define UNLOCK_KERNEL call _C_LABEL(i386_intunlock) +#else +#define LOCK_KERNEL +#define UNLOCK_KERNEL +#endif + +#define voidop(num) + /* * Normal vectors. * @@ -148,34 +85,35 @@ * * On exit, we jump to Xdoreti(), to process soft interrupts and ASTs. */ -#define INTR(irq_num, icu, ack) \ -IDTVEC(recurse/**/irq_num) ;\ +#define INTRSTUB(name, num, early_ack, late_ack, mask, unmask, level_mask) \ +IDTVEC(recurse_/**/name/**/num) ;\ pushfl ;\ pushl %cs ;\ pushl %esi ;\ cli ;\ -_C_LABEL(Xintr)/**/irq_num/**/: ;\ +_C_LABEL(Xintr_/**/name/**/num): ;\ pushl $0 /* dummy error code */ ;\ pushl $T_ASTFLT /* trap # for doing ASTs */ ;\ INTRENTRY ;\ MAKE_FRAME ;\ - MASK(irq_num, icu) /* mask it in hardware */ ;\ - ack(irq_num) /* and allow other intrs */ ;\ + mask(num) /* mask it in hardware */ ;\ + early_ack(num) /* and allow other intrs */ ;\ incl MY_COUNT+V_INTR /* statistical info */ ;\ - movl _C_LABEL(iminlevel) + (irq_num) * 4, %eax ;\ - movzbl _C_LABEL(cpl),%ebx ;\ + movl _C_LABEL(iminlevel) + (num) * 4, %eax ;\ + movl CPL,%ebx ;\ cmpl %eax,%ebx ;\ - jae _C_LABEL(Xhold/**/irq_num)/* currently masked; hold it */;\ -_C_LABEL(Xresume)/**/irq_num/**/: ;\ - movzbl _C_LABEL(cpl),%eax /* cpl to restore on exit */ ;\ + jae _C_LABEL(Xhold_/**/name/**/num)/* currently masked; hold it */;\ +Xresume_/**/name/**/num/**/: ;\ + movl CPL,%eax /* cpl to restore on exit */ ;\ pushl %eax ;\ - movl _C_LABEL(imaxlevel) + (irq_num) * 4,%eax ;\ - movl %eax,_C_LABEL(cpl) /* block enough for this irq */ ;\ + movl _C_LABEL(imaxlevel) + (num) * 4,%eax ;\ + movl %eax,CPL /* block enough for this irq */ ;\ sti /* safe to take intrs now */ ;\ - movl _C_LABEL(intrhand) + (irq_num) * 4,%ebx /* head of chain */ ;\ + movl _C_LABEL(intrhand) + (num) * 4,%ebx /* head of chain */ ;\ testl %ebx,%ebx ;\ - jz _C_LABEL(Xstray)/**/irq_num /* no handlears; we're stray */ ;\ + jz _C_LABEL(Xstray_/**/name/**/num) /* no handlears; we're stray */ ;\ STRAY_INITIALIZE /* nobody claimed it yet */ ;\ + LOCK_KERNEL ;\ 7: movl IH_ARG(%ebx),%eax /* get handler arg */ ;\ testl %eax,%eax ;\ jnz 4f ;\ @@ -190,16 +128,18 @@ _C_LABEL(Xresume)/**/irq_num/**/: ;\ 5: movl IH_NEXT(%ebx),%ebx /* next handler in chain */ ;\ testl %ebx,%ebx ;\ jnz 7b ;\ + UNLOCK_KERNEL ;\ STRAY_TEST /* see if it's a stray */ ;\ -6: UNMASK(irq_num, icu) /* unmask it in hardware */ ;\ +6: unmask(num) /* unmask it in hardware */ ;\ + late_ack(num) ;\ jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\ -IDTVEC(stray/**/irq_num) ;\ - pushl $irq_num ;\ +IDTVEC(stray_/**/name/**/num) ;\ + pushl $num ;\ call _C_LABEL(isa_strayintr) ;\ addl $4,%esp ;\ jmp 6b ;\ -IDTVEC(hold/**/irq_num) ;\ - orb $IRQ_BIT(irq_num),_C_LABEL(ipending) + IRQ_BYTE(irq_num) ;\ +IDTVEC(hold_/**/name/**/num) ;\ + orb $IRQ_BIT(num),_C_LABEL(ipending) + IRQ_BYTE(num) ;\ INTRFASTEXIT #if defined(DEBUG) && defined(notdef) @@ -209,7 +149,7 @@ IDTVEC(hold/**/irq_num) ;\ orl %eax,%esi #define STRAY_TEST \ testl %esi,%esi ;\ - jz _C_LABEL(Xstray)/**/irq_num + jz _C_LABEL(Xstray_/**/name/**/num) #else /* !DEBUG */ #define STRAY_INITIALIZE #define STRAY_INTEGRATE @@ -223,66 +163,92 @@ IDTVEC(hold/**/irq_num) ;\ #define MAKE_FRAME #endif /* DDB */ -INTR(0, IO_ICU1, ACK1) -INTR(1, IO_ICU1, ACK1) -INTR(2, IO_ICU1, ACK1) -INTR(3, IO_ICU1, ACK1) -INTR(4, IO_ICU1, ACK1) -INTR(5, IO_ICU1, ACK1) -INTR(6, IO_ICU1, ACK1) -INTR(7, IO_ICU1, ACK1) -INTR(8, IO_ICU2, ACK2) -INTR(9, IO_ICU2, ACK2) -INTR(10, IO_ICU2, ACK2) -INTR(11, IO_ICU2, ACK2) -INTR(12, IO_ICU2, ACK2) -INTR(13, IO_ICU2, ACK2) -INTR(14, IO_ICU2, ACK2) -INTR(15, IO_ICU2, ACK2) +#define ICUADDR IO_ICU1 + +INTRSTUB(legacy,0, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,1, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,2, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,3, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,4, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,5, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,6, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,7, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) + +#undef ICUADDR +#define ICUADDR IO_ICU2 + +INTRSTUB(legacy,8, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,9, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,10, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,11, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,12, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,13, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,14, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) +INTRSTUB(legacy,15, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask, + voidop) /* * These tables are used by the ISA configuration code. */ /* interrupt service routine entry points */ IDTVEC(intr) - .long _C_LABEL(Xintr0), _C_LABEL(Xintr1), _C_LABEL(Xintr2) - .long _C_LABEL(Xintr3), _C_LABEL(Xintr4), _C_LABEL(Xintr5) - .long _C_LABEL(Xintr6), _C_LABEL(Xintr7), _C_LABEL(Xintr8) - .long _C_LABEL(Xintr9), _C_LABEL(Xintr10), _C_LABEL(Xintr11) - .long _C_LABEL(Xintr12), _C_LABEL(Xintr13) - .long _C_LABEL(Xintr14), _C_LABEL(Xintr15) + .long _C_LABEL(Xintr_legacy0), _C_LABEL(Xintr_legacy1) + .long _C_LABEL(Xintr_legacy2), _C_LABEL(Xintr_legacy3) + .long _C_LABEL(Xintr_legacy4), _C_LABEL(Xintr_legacy5) + .long _C_LABEL(Xintr_legacy6), _C_LABEL(Xintr_legacy7) + .long _C_LABEL(Xintr_legacy8), _C_LABEL(Xintr_legacy9) + .long _C_LABEL(Xintr_legacy10), _C_LABEL(Xintr_legacy11) + .long _C_LABEL(Xintr_legacy12), _C_LABEL(Xintr_legacy13) + .long _C_LABEL(Xintr_legacy14), _C_LABEL(Xintr_legacy15) /* * These tables are used by Xdoreti() and Xspllower(). */ /* resume points for suspended interrupts */ IDTVEC(resume) - .long _C_LABEL(Xresume0), _C_LABEL(Xresume1) - .long _C_LABEL(Xresume2), _C_LABEL(Xresume3) - .long _C_LABEL(Xresume4), _C_LABEL(Xresume5) - .long _C_LABEL(Xresume6), _C_LABEL(Xresume7) - .long _C_LABEL(Xresume8), _C_LABEL(Xresume9) - .long _C_LABEL(Xresume10), _C_LABEL(Xresume11) - .long _C_LABEL(Xresume12), _C_LABEL(Xresume13) - .long _C_LABEL(Xresume14), _C_LABEL(Xresume15) + .long _C_LABEL(Xresume_legacy0), _C_LABEL(Xresume_legacy1) + .long _C_LABEL(Xresume_legacy2), _C_LABEL(Xresume_legacy3) + .long _C_LABEL(Xresume_legacy4), _C_LABEL(Xresume_legacy5) + .long _C_LABEL(Xresume_legacy6), _C_LABEL(Xresume_legacy7) + .long _C_LABEL(Xresume_legacy8), _C_LABEL(Xresume_legacy9) + .long _C_LABEL(Xresume_legacy10), _C_LABEL(Xresume_legacy11) + .long _C_LABEL(Xresume_legacy12), _C_LABEL(Xresume_legacy13) + .long _C_LABEL(Xresume_legacy14), _C_LABEL(Xresume_legacy15) /* for soft interrupts */ - .long 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 .long _C_LABEL(Xsofttty), _C_LABEL(Xsoftnet) .long _C_LABEL(Xsoftclock) + .long 0, 0 /* fake interrupts to resume from splx() */ IDTVEC(recurse) - .long _C_LABEL(Xrecurse0), _C_LABEL(Xrecurse1) - .long _C_LABEL(Xrecurse2), _C_LABEL(Xrecurse3) - .long _C_LABEL(Xrecurse4), _C_LABEL(Xrecurse5) - .long _C_LABEL(Xrecurse6), _C_LABEL(Xrecurse7) - .long _C_LABEL(Xrecurse8), _C_LABEL(Xrecurse9) - .long _C_LABEL(Xrecurse10), _C_LABEL(Xrecurse11) - .long _C_LABEL(Xrecurse12), _C_LABEL(Xrecurse13) - .long _C_LABEL(Xrecurse14), _C_LABEL(Xrecurse15) + .long _C_LABEL(Xrecurse_legacy0), _C_LABEL(Xrecurse_legacy1) + .long _C_LABEL(Xrecurse_legacy2), _C_LABEL(Xrecurse_legacy3) + .long _C_LABEL(Xrecurse_legacy4), _C_LABEL(Xrecurse_legacy5) + .long _C_LABEL(Xrecurse_legacy6), _C_LABEL(Xrecurse_legacy7) + .long _C_LABEL(Xrecurse_legacy8), _C_LABEL(Xrecurse_legacy9) + .long _C_LABEL(Xrecurse_legacy10), _C_LABEL(Xrecurse_legacy11) + .long _C_LABEL(Xrecurse_legacy12), _C_LABEL(Xrecurse_legacy13) + .long _C_LABEL(Xrecurse_legacy14), _C_LABEL(Xrecurse_legacy15) /* for soft interrupts */ - .long 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 .long _C_LABEL(Xsofttty), _C_LABEL(Xsoftnet) .long _C_LABEL(Xsoftclock) + .long 0, 0 /* Some bogus data, to keep vmstat happy, for now. */ .globl _C_LABEL(intrnames), _C_LABEL(eintrnames) diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c index 20cda037a0b..3f5eeb74d62 100644 --- a/sys/arch/i386/i386/vm_machdep.c +++ b/sys/arch/i386/i386/vm_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_machdep.c,v 1.39 2003/06/02 23:27:47 millert Exp $ */ +/* $OpenBSD: vm_machdep.c,v 1.40 2004/06/13 21:49:15 niklas Exp $ */ /* $NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $ */ /*- @@ -62,9 +62,6 @@ #include <machine/specialreg.h> #include "npx.h" -#if NNPX > 0 -extern struct proc *npxproc; -#endif /* * Finish a fork operation, with process p2 nearly set up. @@ -88,37 +85,36 @@ cpu_fork(p1, p2, stack, stacksize, func, arg) struct switchframe *sf; #if NNPX > 0 - /* - * If npxproc != p1, then the npx h/w state is irrelevant and the - * state had better already be in the pcb. This is true for forks - * but not for dumps. - * - * If npxproc == p1, then we have to save the npx h/w state to - * p1's pcb so that we can copy it. - */ - if (npxproc == p1) - npxsave(); + npxsave_proc(p1, 1); #endif p2->p_md.md_flags = p1->p_md.md_flags; - /* Sync curpcb (which is presumably p1's PCB) and copy it to p2. */ - savectx(curpcb); + /* Copy pcb from proc p1 to p2. */ + if (p1 == curproc) { + /* Sync the PCB before we copy it. */ + savectx(curpcb); + } +#ifdef DIAGNOSTIC + else if (p1 != &proc0) + panic("cpu_fork: curproc"); +#endif *pcb = p1->p_addr->u_pcb; + /* * Preset these so that gdt_compact() doesn't get confused if called * during the allocations below. + * + * Note: pcb_ldt_sel is handled in the pmap_activate() call when + * we run the new process. */ - pcb->pcb_tss_sel = GSEL(GNULL_SEL, SEL_KPL); - /* - * Activate the addres space. Note this will refresh pcb_ldt_sel. - */ - pmap_activate(p2); + p2->p_md.md_tss_sel = GSEL(GNULL_SEL, SEL_KPL); /* Fix up the TSS. */ pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); pcb->pcb_tss.tss_esp0 = (int)p2->p_addr + USPACE - 16; - tss_alloc(pcb); + + p2->p_md.md_tss_sel = tss_alloc(pcb); /* * Copy the trapframe, and arrange for the child to return directly @@ -150,8 +146,7 @@ cpu_swapout(p) /* * Make sure we save the FP state before the user area vanishes. */ - if (npxproc == p) - npxsave(); + npxsave_proc(p, 1); #endif } @@ -169,8 +164,8 @@ cpu_exit(p) { #if NNPX > 0 /* If we were using the FPU, forget about it. */ - if (npxproc == p) - npxproc = 0; + if (p->p_addr->u_pcb.pcb_fpcpu != NULL) + npxsave_proc(p, 0); #endif uvmexp.swtch++; @@ -181,10 +176,7 @@ void cpu_wait(p) struct proc *p; { - struct pcb *pcb; - - pcb = &p->p_addr->u_pcb; - tss_free(pcb); + tss_free(p->p_md.md_tss_sel); } /* @@ -251,6 +243,9 @@ pagemove(from, to, size) { pt_entry_t *fpte, *tpte; pt_entry_t ofpte, otpte; +#ifdef MULTIPROCESSOR + u_int32_t cpumask = 0; +#endif #ifdef DIAGNOSTIC if ((size & PAGE_MASK) != 0) @@ -263,24 +258,38 @@ pagemove(from, to, size) otpte = *tpte; *tpte++ = *fpte; *fpte++ = 0; -#if defined(I386_CPU) +#if defined(I386_CPU) && !defined(MULTIPROCESSOR) if (cpu_class != CPUCLASS_386) #endif { if (otpte & PG_V) - pmap_update_pg((vaddr_t) to); +#ifdef MULTIPROCESSOR + pmap_tlb_shootdown(pmap_kernel(), (vaddr_t)to, + otpte, &cpumask); +#else + pmap_update_pg((vaddr_t)to); +#endif if (ofpte & PG_V) - pmap_update_pg((vaddr_t) from); +#ifdef MULTIPROCESSOR + pmap_tlb_shootdown(pmap_kernel(), + (vaddr_t)from, ofpte, &cpumask); +#else + pmap_update_pg((vaddr_t)from); +#endif } from += PAGE_SIZE; to += PAGE_SIZE; size -= PAGE_SIZE; } +#ifdef MULTIPROCESSOR + pmap_tlb_shootnow(cpumask); +#else #if defined(I386_CPU) if (cpu_class == CPUCLASS_386) tlbflush(); #endif +#endif } /* diff --git a/sys/arch/i386/include/apicvar.h b/sys/arch/i386/include/apicvar.h new file mode 100644 index 00000000000..aac452fca40 --- /dev/null +++ b/sys/arch/i386/include/apicvar.h @@ -0,0 +1,57 @@ +/* $OpenBSD: apicvar.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: apicvar.h,v 1.1.2.3 2000/02/27 20:25:00 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _I386_APICVAR_H_ +#define _I386_APICVAR_H_ + +struct apic_attach_args { + const char *aaa_name; + int apic_id; + int apic_version; + int flags; +#define IOAPIC_PICMODE 0x01 +#define IOAPIC_VWIRE 0x02 + paddr_t apic_address; +}; + +void apic_format_redir(char *, char *, int, u_int32_t, u_int32_t); + +#endif /* !_I386_APICVAR_H_ */ diff --git a/sys/arch/i386/include/asm.h b/sys/arch/i386/include/asm.h index 2126271a519..f2da5755a33 100644 --- a/sys/arch/i386/include/asm.h +++ b/sys/arch/i386/include/asm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: asm.h,v 1.7 2003/06/02 23:27:47 millert Exp $ */ +/* $OpenBSD: asm.h,v 1.8 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: asm.h,v 1.7 1994/10/27 04:15:56 cgd Exp $ */ /*- @@ -61,6 +61,16 @@ #define _C_LABEL(name) name #define _ASM_LABEL(x) x +#define CVAROFF(x, y) _C_LABEL(x) + y + +#ifdef __STDC__ +# define __CONCAT(x,y) x ## y +# define __STRING(x) #x +#else +# define __CONCAT(x,y) x/**/y +# define __STRING(x) "x" +#endif + /* * WEAK ALIAS: create a weak alias */ diff --git a/sys/arch/i386/include/atomic.h b/sys/arch/i386/include/atomic.h new file mode 100644 index 00000000000..e3be6b68b1b --- /dev/null +++ b/sys/arch/i386/include/atomic.h @@ -0,0 +1,72 @@ +/* $OpenBSD: atomic.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: atomic.h,v 1.1.2.2 2000/02/21 18:54:07 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ATOMIC_H_ +#define _ATOMIC_H_ + +#ifndef _LOCORE + +static __inline u_int32_t +i386_atomic_testset_ul (volatile u_int32_t *ptr, unsigned long val) { + __asm__ volatile ("xchgl %0,(%2)" :"=r" (val):"0" (val),"r" (ptr)); + return val; +} + +static __inline int +i386_atomic_testset_i (volatile int *ptr, unsigned long val) { + __asm__ volatile ("xchgl %0,(%2)" :"=r" (val):"0" (val),"r" (ptr)); + return val; +} + +static __inline void +i386_atomic_setbits_l (volatile u_int32_t *ptr, unsigned long bits) { + __asm __volatile("lock ; orl %1,%0" : "=m" (*ptr) : "ir" (bits)); +} + +static __inline void +i386_atomic_clearbits_l (volatile u_int32_t *ptr, unsigned long bits) { + bits = ~bits; + __asm __volatile("lock ; and %1,%0" : "=m" (*ptr) : "ir" (bits)); +} + +#endif +#endif + diff --git a/sys/arch/i386/include/biosvar.h b/sys/arch/i386/include/biosvar.h index 4fc00866e87..7e4d98933b4 100644 --- a/sys/arch/i386/include/biosvar.h +++ b/sys/arch/i386/include/biosvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: biosvar.h,v 1.42 2003/11/27 00:31:55 espie Exp $ */ +/* $OpenBSD: biosvar.h,v 1.43 2004/06/13 21:49:16 niklas Exp $ */ /* * Copyright (c) 1997-1999 Michael Shalayeff @@ -192,6 +192,8 @@ typedef struct _bios_consdev { int conspeed; } bios_consdev_t; +#define BOOTARG_SMPINFO 6 /* struct mp_float[] */ + #if defined(_KERNEL) || defined (_STANDALONE) #ifdef _LOCORE @@ -244,6 +246,8 @@ int bios32_service(u_int32_t, bios32_entry_t, bios32_entry_info_t); extern u_int bootapiver; extern bios_memmap_t *bios_memmap; +extern void *bios_smpinfo; +extern bios_pciinfo_t *bios_pciinfo; #endif /* _KERNEL */ #endif /* _LOCORE */ diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h index cc690de9219..9d66a9c496c 100644 --- a/sys/arch/i386/include/cpu.h +++ b/sys/arch/i386/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.60 2004/06/06 17:34:37 grange Exp $ */ +/* $OpenBSD: cpu.h,v 1.61 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: cpu.h,v 1.35 1996/05/05 19:29:26 christos Exp $ */ /*- @@ -41,9 +41,19 @@ /* * Definitions unique to i386 cpu support. */ -#include <machine/psl.h> #include <machine/frame.h> +#include <machine/psl.h> #include <machine/segments.h> +#include <machine/intrdefs.h> + +#ifdef MULTIPROCESSOR +#include <machine/i82489reg.h> +#include <machine/i82489var.h> + +/* XXX for now... */ +#define NLAPIC 1 + +#endif /* * definitions of cpu-dependent requirements @@ -60,16 +70,156 @@ */ #define clockframe intrframe -#define CLKF_USERMODE(frame) USERMODE((frame)->if_cs, (frame)->if_eflags) -#define CLKF_PC(frame) ((frame)->if_eip) -#define CLKF_INTR(frame) (IDXSEL((frame)->if_cs) == GICODE_SEL) +#include <sys/device.h> +#include <sys/lock.h> /* will also get LOCKDEBUG */ +#include <sys/proc.h> + +struct intrsource; + +/* XXX stuff to move to cpuvar.h later */ +struct cpu_info { + struct device ci_dev; /* our device */ + struct cpu_info *ci_self; /* pointer to this structure */ + struct schedstate_percpu ci_schedstate; /* scheduler state */ + struct cpu_info *ci_next; /* next cpu */ + + /* + * Public members. + */ + struct proc *ci_curproc; /* current owner of the processor */ + struct simplelock ci_slock; /* lock on this data structure */ + cpuid_t ci_cpuid; /* our CPU ID */ + u_int ci_apicid; /* our APIC ID */ +#if defined(DIAGNOSTIC) || defined(LOCKDEBUG) + u_long ci_spin_locks; /* # of spin locks held */ + u_long ci_simple_locks; /* # of simple locks held */ +#endif + + /* + * Private members. + */ + struct proc *ci_fpcurproc; /* current owner of the FPU */ + int ci_fpsaving; /* save in progress */ + + volatile u_int32_t ci_tlb_ipi_mask; + + struct pcb *ci_curpcb; /* VA of current HW PCB */ + struct pcb *ci_idle_pcb; /* VA of current PCB */ + int ci_idle_tss_sel; /* TSS selector of idle PCB */ + + struct intrsource *ci_isources[MAX_INTR_SOURCES]; + u_int32_t ci_ipending; + int ci_ilevel; + int ci_idepth; + u_int32_t ci_imask[NIPL]; + u_int32_t ci_iunmask[NIPL]; + + paddr_t ci_idle_pcb_paddr; /* PA of idle PCB */ + u_long ci_flags; /* flags; see below */ + u_int32_t ci_ipis; /* interprocessor interrupts pending */ + int sc_apic_version; /* local APIC version */ + + u_int32_t ci_level; + u_int32_t ci_vendor[4]; + u_int32_t ci_signature; /* X86 cpuid type */ + u_int32_t ci_feature_flags; /* X86 CPUID feature bits */ + u_int32_t cpu_class; /* CPU class */ + + struct cpu_functions *ci_func; /* start/stop functions */ + void (*cpu_setup)(const char *, int, int); /* proc-dependant init */ + + int ci_want_resched; + int ci_astpending; + + union descriptor *ci_gdt; + + volatile int ci_ddb_paused; /* paused due to other proc in ddb */ +#define CI_DDB_RUNNING 0 +#define CI_DDB_SHOULDSTOP 1 +#define CI_DDB_STOPPED 2 +#define CI_DDB_ENTERDDB 3 +#define CI_DDB_INDDB 4 +}; + +/* + * Processor flag notes: The "primary" CPU has certain MI-defined + * roles (mostly relating to hardclock handling); we distinguish + * betwen the processor which booted us, and the processor currently + * holding the "primary" role just to give us the flexibility later to + * change primaries should we be sufficiently twisted. + */ + +#define CPUF_BSP 0x0001 /* CPU is the original BSP */ +#define CPUF_AP 0x0002 /* CPU is an AP */ +#define CPUF_SP 0x0004 /* CPU is only processor */ +#define CPUF_PRIMARY 0x0008 /* CPU is active primary processor */ +#define CPUF_APIC_CD 0x0010 /* CPU has apic configured */ + +#define CPUF_PRESENT 0x1000 /* CPU is present */ +#define CPUF_RUNNING 0x2000 /* CPU is running */ /* - * Preempt the current process if in interrupt from user mode, + * We statically allocate the CPU info for the primary CPU (or, + * the only CPU on uniprocessors), and the primary CPU is the + * first CPU on the CPU info list. + */ +extern struct cpu_info cpu_info_primary; +extern struct cpu_info *cpu_info_list; + +#define CPU_INFO_ITERATOR int +#define CPU_INFO_FOREACH(cii, ci) cii = 0, ci = cpu_info_list; \ + ci != NULL; ci = ci->ci_next + +#ifdef MULTIPROCESSOR + +#define I386_MAXPROCS 32 /* because we use a bitmask */ + +#define CPU_STARTUP(_ci) ((_ci)->ci_func->start(_ci)) +#define CPU_STOP(_ci) ((_ci)->ci_func->stop(_ci)) +#define CPU_START_CLEANUP(_ci) ((_ci)->ci_func->cleanup(_ci)) + +#define cpu_number() (i82489_readreg(LAPIC_ID)>>LAPIC_ID_SHIFT) +#define curcpu() (cpu_info[cpu_number()]) + +#define CPU_IS_PRIMARY(ci) ((ci)->ci_flags & CPUF_PRIMARY) + +extern struct cpu_info *cpu_info[I386_MAXPROCS]; +extern u_long cpus_running; + +extern void cpu_boot_secondary_processors(void); +extern void cpu_init_idle_pcbs(void); + +#else /* MULTIPROCESSOR */ + +#define I386_MAXPROCS 1 + +#define cpu_number() 0 +#define curcpu() (&cpu_info_primary) + +#define CPU_IS_PRIMARY(ci) 1 + +/* + * definitions of cpu-dependent requirements + * referenced in generic code + */ +#define cpu_swapin(p) /* nothing */ + +#endif + +#define curpcb curcpu()->ci_curpcb + +#define want_resched (curcpu()->ci_want_resched) +#define astpending (curcpu()->ci_astpending) + +/* + * Preemt the current process if in interrupt from user monre, * or after the current trap/syscall if in system mode. */ -int want_resched; /* resched() was called */ -#define need_resched() (want_resched = 1, setsoftast()) +extern void need_resched(struct cpu_info *); + +#define CLKF_USERMODE(frame) USERMODE((frame)->if_cs, (frame)->if_eflags) +#define CLKF_PC(frame) ((frame)->if_eip) +#define CLKF_INTR(frame) (IDXSEL((frame)->if_cs) == GICODE_SEL) /* * Give a profiling tick to the current process when the user profiling @@ -87,8 +237,13 @@ int want_resched; /* resched() was called */ /* * We need a machine-independent name for this. */ -#define DELAY(x) delay(x) -void delay(int); +extern void (*delay_func)(int); +struct timeval; +extern void (*microtime_func)(struct timeval *); + +#define DELAY(x) (*delay_func)(x) +#define delay(x) (*delay_func)(x) +#define microtime(tv) (*microtime_func)(tv) #if defined(I586_CPU) || defined(I686_CPU) /* @@ -173,6 +328,7 @@ extern void (*update_cpuspeed)(void); void dumpconf(void); void cpu_reset(void); void i386_proc0_tss_ldt_init(void); +void i386_init_pcb_tss_ldt(struct cpu_info *); void cpuid(u_int32_t, u_int32_t *); /* locore.s */ @@ -189,6 +345,9 @@ void proc_trampoline(void); void initrtclock(void); void startrtclock(void); void rtcdrain(void *); +void i8254_delay(int); +void i8254_microtime(struct timeval *); +void i8254_initclocks(void); /* est.c */ #if !defined(SMALL_KERNEL) && defined(I686_CPU) @@ -213,8 +372,9 @@ int k6_powernow_setperf(int); /* npx.c */ -void npxdrop(void); -void npxsave(void); +void npxdrop(struct proc *); +void npxsave_proc(struct proc *, int); +void npxsave_cpu(struct cpu_info *, int); #if defined(GPL_MATH_EMULATE) /* math_emulate.c */ @@ -230,6 +390,7 @@ int i386_set_ldt(struct proc *, void *, register_t *); /* isa_machdep.c */ void isa_defaultirq(void); +void isa_nodefaultirq(void); int isa_nmi(void); /* pmap.c */ @@ -291,4 +452,12 @@ void setconf(void); { "xcrypt", CTLTYPE_INT }, \ } +/* + * This needs to be included late since it relies on definitions higher + * up in this file. + */ +#if defined(MULTIPROCESSOR) && defined(_KERNEL) +#include <sys/mplock.h> +#endif + #endif /* !_I386_CPU_H_ */ diff --git a/sys/arch/i386/include/cpufunc.h b/sys/arch/i386/include/cpufunc.h index d90a120429b..832e4271142 100644 --- a/sys/arch/i386/include/cpufunc.h +++ b/sys/arch/i386/include/cpufunc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpufunc.h,v 1.11 2003/10/28 13:22:44 avsm Exp $ */ +/* $OpenBSD: cpufunc.h,v 1.12 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: cpufunc.h,v 1.8 1994/10/27 04:15:59 cgd Exp $ */ /* @@ -43,6 +43,8 @@ #include <sys/cdefs.h> #include <sys/types.h> +#include <machine/specialreg.h> + static __inline void invlpg(u_int); static __inline void lidt(void *); static __inline void lldt(u_short); @@ -55,6 +57,7 @@ static __inline u_int rcr3(void); static __inline void lcr4(u_int); static __inline u_int rcr4(void); static __inline void tlbflush(void); +static __inline void tlbflushg(void); static __inline void disable_intr(void); static __inline void enable_intr(void); static __inline u_int read_eflags(void); @@ -146,6 +149,39 @@ tlbflush(void) __asm __volatile("movl %0,%%cr3" : : "r" (val)); } +static __inline void +tlbflushg(void) +{ + /* + * Big hammer: flush all TLB entries, including ones from PTE's + * with the G bit set. This should only be necessary if TLB + * shootdown falls far behind. + * + * Intel Architecture Software Developer's Manual, Volume 3, + * System Programming, section 9.10, "Invalidating the + * Translation Lookaside Buffers (TLBS)": + * "The following operations invalidate all TLB entries, irrespective + * of the setting of the G flag: + * ... + * "(P6 family processors only): Writing to control register CR4 to + * modify the PSE, PGE, or PAE flag." + * + * (the alternatives not quoted above are not an option here.) + * + * If PGE is not in use, we reload CR3 for the benefit of + * pre-P6-family processors. + */ + +#if defined(I686_CPU) + if (cpu_feature & CPUID_PGE) { + u_int cr4 = rcr4(); + lcr4(cr4 & ~CR4_PGE); + lcr4(cr4); + } else +#endif + tlbflush(); +} + #ifdef notyet void setidt(int idx, /*XXX*/caddr_t func, int typ, int dpl); #endif diff --git a/sys/arch/i386/include/cpuvar.h b/sys/arch/i386/include/cpuvar.h new file mode 100644 index 00000000000..5bf9f3342ae --- /dev/null +++ b/sys/arch/i386/include/cpuvar.h @@ -0,0 +1,108 @@ +/* $OpenBSD: cpuvar.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: cpuvar.h,v 1.1.2.3 2000/02/21 18:54:07 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1999 Stefan Grefen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +struct cpu_functions { + int (*start)(struct cpu_info *); + int (*stop)(struct cpu_info *); + void (*cleanup)(struct cpu_info *); +}; + +extern struct cpu_functions mp_cpu_funcs; + +#define CPU_ROLE_SP 0 +#define CPU_ROLE_BP 1 +#define CPU_ROLE_AP 2 + +struct cpu_attach_args { + const char *caa_name; + int cpu_number; + int cpu_role; + struct cpu_functions *cpu_func; + int cpu_signature; + int feature_flags; +}; + +#define MP_PICMODE 0x00000001 /* System booted in picmode */ + +#ifdef _KERNEL + +int i386_ipi(int,int,int); +void i386_self_ipi(int); +int i386_ipi_init(int); + +void identifycpu(struct cpu_info *); +void cpu_init(struct cpu_info *); +void cpu_init_first(void); + +#endif + diff --git a/sys/arch/i386/include/db_machdep.h b/sys/arch/i386/include/db_machdep.h index 69fb1d2cb30..c1f5b9b6dfe 100644 --- a/sys/arch/i386/include/db_machdep.h +++ b/sys/arch/i386/include/db_machdep.h @@ -1,4 +1,4 @@ -/* $OpenBSD: db_machdep.h,v 1.9 2003/05/18 02:43:13 andreas Exp $ */ +/* $OpenBSD: db_machdep.h,v 1.10 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: db_machdep.h,v 1.9 1996/05/03 19:23:59 christos Exp $ */ /* @@ -120,5 +120,13 @@ void db_task_name(/* task_t */); int kdb_trap(int, int, db_regs_t *); void db_machine_init(void); +void db_enter_ddb(void); +void db_leave_ddb(void); +void db_startcpu(int cpu); +void db_stopcpu(int cpu); +void db_movetocpu(int cpu); +void i386_ipi_db(struct cpu_info *); + +extern struct SIMPLELOCK ddb_mp_slock; #endif /* _I386_DB_MACHDEP_H_ */ diff --git a/sys/arch/i386/include/gdt.h b/sys/arch/i386/include/gdt.h index 1568b573a86..ba741e2c637 100644 --- a/sys/arch/i386/include/gdt.h +++ b/sys/arch/i386/include/gdt.h @@ -1,8 +1,8 @@ -/* $OpenBSD: gdt.h,v 1.9 2002/03/14 01:26:33 millert Exp $ */ -/* $NetBSD: gdt.h,v 1.3 1996/02/27 22:32:11 jtc Exp $ */ +/* $OpenBSD: gdt.h,v 1.10 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: gdt.h,v 1.7.10.6 2002/08/19 01:22:36 sommerfeld Exp $ */ /*- - * Copyright (c) 1996 The NetBSD Foundation, Inc. + * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -37,10 +37,35 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifdef _KERNEL +#ifndef _LOCORE + +struct cpu_info; +struct pcb; +struct pmap; +union descriptor; + +void gdt_alloc_cpu(struct cpu_info *); +int gdt_get_slot(void); void gdt_init(void); -void tss_alloc(struct pcb *); -void tss_free(struct pcb *); +void gdt_init_cpu(struct cpu_info *); +void gdt_reload_cpu(/* XXX struct cpu_info * */ void); void ldt_alloc(struct pmap *, union descriptor *, size_t); void ldt_free(struct pmap *); +int tss_alloc(struct pcb *); +void tss_free(int); +void setgdt(int, void *, size_t, int, int, int, int); #endif + +/* + * The initial GDT size (as a descriptor count), and the maximum + * GDT size possible. + * + * These are actually not arbitrary. To start with, they have to be + * multiples of 512 and at least 512, in order to work with the + * allocation strategy set forth by gdt_init and gdt_grow. Then, the + * max cannot exceed 65536 since the selector field of a descriptor is + * just 16 bits, and used as free list link. + */ + +#define MINGDTSIZ 512 +#define MAXGDTSIZ 8192 diff --git a/sys/arch/i386/include/i82093reg.h b/sys/arch/i386/include/i82093reg.h new file mode 100644 index 00000000000..07ec03d8991 --- /dev/null +++ b/sys/arch/i386/include/i82093reg.h @@ -0,0 +1,124 @@ +/* $OpenBSD: i82093reg.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: i82093reg.h,v 1.1.2.2 2000/02/21 18:54:07 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Typically, the first apic lives here. + */ +#define IOAPIC_BASE_DEFAULT 0xfec00000 + +/* + * Memory-space registers. + */ + +/* + * The externally visible registers are all 32 bits wide; + * store the register number of interest in IOAPIC_REG, and store/fetch + * the real value in IOAPIC_DATA. + */ + + + +#define IOAPIC_REG 0x0000 +#define IOAPIC_DATA 0x0010 + +/* + * Internal I/O APIC registers. + */ + +#define IOAPIC_ID 0x00 + +#define IOAPIC_ID_SHIFT 24 +#define IOAPIC_ID_MASK 0x0f000000 + +/* Version, and maximum interrupt pin number. */ + +#define IOAPIC_VER 0x01 + +#define IOAPIC_VER_SHIFT 0 +#define IOAPIC_VER_MASK 0x000000ff + +#define IOAPIC_MAX_SHIFT 16 +#define IOAPIC_MAX_MASK 0x00ff0000 + +/* + * Arbitration ID. Same format as IOAPIC_ID register. + */ +#define IOAPIC_ARB 0x02 + +/* + * Redirection table registers. + */ + +#define IOAPIC_REDHI(pin) (0x11 + ((pin)<<1)) +#define IOAPIC_REDLO(pin) (0x10 + ((pin)<<1)) + +#define IOAPIC_REDHI_DEST_SHIFT 24 /* destination. */ +#define IOAPIC_REDHI_DEST_MASK 0xff000000 + +#define IOAPIC_REDLO_MASK 0x00010000 /* 0=enabled; 1=masked */ + +#define IOAPIC_REDLO_LEVEL 0x00008000 /* 0=edge, 1=level */ +#define IOAPIC_REDLO_RIRR 0x00004000 /* remote IRR; read only */ +#define IOAPIC_REDLO_ACTLO 0x00002000 /* 0=act. hi; 1=act. lo */ +#define IOAPIC_REDLO_DELSTS 0x00001000 /* 0=idle; 1=send pending */ +#define IOAPIC_REDLO_DSTMOD 0x00000800 /* 0=physical; 1=logical */ + +#define IOAPIC_REDLO_DEL_MASK 0x00000700 /* del. mode mask */ +#define IOAPIC_REDLO_DEL_SHIFT 8 + +#define IOAPIC_REDLO_DEL_FIXED 0 +#define IOAPIC_REDLO_DEL_LOPRI 1 +#define IOAPIC_REDLO_DEL_SMI 2 +#define IOAPIC_REDLO_DEL_NMI 4 +#define IOAPIC_REDLO_DEL_INIT 5 +#define IOAPIC_REDLO_DEL_EXTINT 7 + +#define IOAPIC_REDLO_VECTOR_MASK 0x000000ff /* delivery vector */ + +#define IMCR_ADDR 0x22 +#define IMCR_DATA 0x23 + +#define IMCR_REGISTER 0x70 +#define IMCR_PIC 0x00 +#define IMCR_APIC 0x01 + +#define ioapic_asm_ack(num) \ + movl $0,_C_LABEL(local_apic) + LAPIC_EOI diff --git a/sys/arch/i386/include/i82093var.h b/sys/arch/i386/include/i82093var.h new file mode 100644 index 00000000000..c3d7ce586f3 --- /dev/null +++ b/sys/arch/i386/include/i82093var.h @@ -0,0 +1,103 @@ +/* $OpenBSD: i82093var.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: i82093var.h,v 1.1 2003/02/26 21:26:10 fvdl Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _I386_I82093VAR_H_ +#define _I386_I82093VAR_H_ + +#include <machine/apicvar.h> + +struct ioapic_pin +{ + struct intrhand *ip_handler; + struct ioapic_pin *ip_next; /* next pin on this vector */ + struct mp_intr_map *ip_map; + int ip_vector; /* IDT vector */ + int ip_type; + int ip_minlevel; + int ip_maxlevel; +}; + +struct ioapic_softc { + struct device sc_dev; /* generic device glue */ + struct ioapic_softc *sc_next; + int sc_apicid; + int sc_apic_vers; + int sc_apic_sz; /* apic size*/ + int sc_flags; + paddr_t sc_pa; /* PA of ioapic */ + volatile u_int32_t *sc_reg; /* KVA of ioapic addr */ + volatile u_int32_t *sc_data; /* KVA of ioapic data */ + struct ioapic_pin *sc_pins; /* sc_apic_sz entries */ +}; + +/* + * MP: intr_handle_t is bitfielded. + * ih&0xff -> line number. + * ih&0x10000000 -> if 0, old-style isa irq; if 1, routed via ioapic. + * (ih&0xff0000)>>16 -> ioapic id. + * (ih&0x00ff00)>>8 -> ioapic line. + */ + +#define APIC_INT_VIA_APIC 0x10000000 +#define APIC_INT_APIC_MASK 0x00ff0000 +#define APIC_INT_APIC_SHIFT 16 +#define APIC_INT_PIN_MASK 0x0000ff00 +#define APIC_INT_PIN_SHIFT 8 + +#define APIC_IRQ_APIC(x) ((x & APIC_INT_APIC_MASK) >> APIC_INT_APIC_SHIFT) +#define APIC_IRQ_PIN(x) ((x & APIC_INT_PIN_MASK) >> APIC_INT_PIN_SHIFT) + +void *apic_intr_establish(int, int, int, int (*)(void *), void *, char *); +void apic_intr_disestablish(void *); + +void ioapic_print_redir(struct ioapic_softc *, char *, int); +void ioapic_format_redir(char *, char *, int, u_int32_t, u_int32_t); +struct ioapic_softc *ioapic_find(int); +struct ioapic_softc *ioapic_find_bybase(int); + +void ioapic_enable(void); +void lapic_vectorset(void); /* XXX */ + +extern int ioapic_bsp_id; +extern int nioapics; +extern struct ioapic_softc *ioapics; + +#endif /* !_I386_I82093VAR_H_ */ diff --git a/sys/arch/i386/include/i82489reg.h b/sys/arch/i386/include/i82489reg.h new file mode 100644 index 00000000000..9b850519370 --- /dev/null +++ b/sys/arch/i386/include/i82489reg.h @@ -0,0 +1,150 @@ +/* $OpenBSD: i82489reg.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: i82489reg.h,v 1.1.2.1 2000/02/20 16:30:27 sommerfeld Exp $ */ + +/*- + * Copyright (c) 1998 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Frank van der Linden. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +/* + * Registers and constants for the 82489DX and Pentium (and up) integrated + * "local" APIC. + */ + +#define LAPIC_ID 0x020 /* ID. RW */ +# define LAPIC_ID_MASK 0x0f000000 +# define LAPIC_ID_SHIFT 24 + +#define LAPIC_VERS 0x030 /* Version. R */ +# define LAPIC_VERSION_MASK 0x000000ff +# define LAPIC_VERSION_LVT_MASK 0x00ff0000 +# define LAPIC_VERSION_LVT_SHIFT 16 + +#define LAPIC_TPRI 0x080 /* Task Prio. RW */ +# define LAPIC_TPRI_MASK 0x000000ff +# define LAPIC_TPRI_INT_MASK 0x000000f0 +# define LAPIC_TPRI_SUB_MASK 0x0000000f + +#define LAPIC_APRI 0x090 /* Arbitration prio R */ +# define LAPIC_APRI_MASK 0x000000ff + +#define LAPIC_PPRI 0x0a0 /* Processor prio. R */ +#define LAPIC_EOI 0x0b0 /* End Int. W */ +#define LAPIC_RRR 0x0c0 /* Remote read R */ +#define LAPIC_LDR 0x0d0 /* Logical dest. RW */ +#define LAPIC_DFR 0x0e0 /* Dest. format RW */ + +#define LAPIC_SVR 0x0f0 /* Spurious intvec RW */ +# define LAPIC_SVR_VECTOR_MASK 0x000000ff +# define LAPIC_SVR_VEC_FIX 0x0000000f +# define LAPIC_SVR_VEC_PROG 0x000000f0 +# define LAPIC_SVR_ENABLE 0x00000100 +# define LAPIC_SVR_SWEN 0x00000100 +# define LAPIC_SVR_FOCUS 0x00000200 +# define LAPIC_SVR_FDIS 0x00000200 + +#define LAPIC_ISR 0x100 /* Int. status. R */ +#define LAPIC_TMR 0x180 +#define LAPIC_IRR 0x200 +#define LAPIC_ESR 0x280 /* Err status. R */ + +#define LAPIC_ICRLO 0x300 /* Int. cmd. RW */ +# define LAPIC_DLMODE_MASK 0x00000700 +# define LAPIC_DLMODE_FIXED 0x00000000 +# define LAPIC_DLMODE_LOW 0x00000100 +# define LAPIC_DLMODE_SMI 0x00000200 +# define LAPIC_DLMODE_RR 0x00000300 +# define LAPIC_DLMODE_NMI 0x00000400 +# define LAPIC_DLMODE_INIT 0x00000500 +# define LAPIC_DLMODE_STARTUP 0x00000600 + +# define LAPIC_DSTMODE_LOG 0x00000800 + +# define LAPIC_DLSTAT_BUSY 0x00001000 + +# define LAPIC_LVL_ASSERT 0x00004000 +# define LAPIC_LVL_DEASSERT 0x00000000 + +# define LAPIC_LVL_TRIG 0x00008000 + +# define LAPIC_RRSTAT_MASK 0x00030000 +# define LAPIC_RRSTAT_INPROG 0x00010000 +# define LAPIC_RRSTAT_VALID 0x00020000 + +# define LAPIC_DEST_MASK 0x000c0000 +# define LAPIC_DEST_SELF 0x00040000 +# define LAPIC_DEST_ALLINCL 0x00080000 +# define LAPIC_DEST_ALLEXCL 0x000c0000 + +# define LAPIC_RESV2_MASK 0xfff00000 + + +#define LAPIC_ICRHI 0x310 /* Int. cmd. RW */ +# define LAPIC_ID_MASK 0x0f000000 +# define LAPIC_ID_SHIFT 24 + +#define LAPIC_LVTT 0x320 /* Loc.vec.(timer) RW */ +# define LAPIC_LVTT_VEC_MASK 0x000000ff +# define LAPIC_LVTT_DS 0x00001000 +# define LAPIC_LVTT_M 0x00010000 +# define LAPIC_LVTT_TM 0x00020000 + +#define LAPIC_PCINT 0x340 +#define LAPIC_LVINT0 0x350 /* Loc.vec (LINT0) RW */ +# define LAPIC_LVT_PERIODIC 0x00020000 +# define LAPIC_LVT_MASKED 0x00010000 +# define LAPIC_LVT_LEVTRIG 0x00008000 +# define LAPIC_LVT_REMOTE_IRR 0x00004000 +# define LAPIC_INP_POL 0x00002000 +# define LAPIC_PEND_SEND 0x00001000 + +#define LAPIC_LVINT1 0x360 /* Loc.vec (LINT1) RW */ +#define LAPIC_LVERR 0x370 /* Loc.vec (ERROR) RW */ +#define LAPIC_ICR_TIMER 0x380 /* Initial count RW */ +#define LAPIC_CCR_TIMER 0x390 /* Current count RO */ + +#define LAPIC_DCR_TIMER 0x3e0 /* Divisor config register */ +# define LAPIC_DCRT_DIV1 0x0b +# define LAPIC_DCRT_DIV2 0x00 +# define LAPIC_DCRT_DIV4 0x01 +# define LAPIC_DCRT_DIV8 0x02 +# define LAPIC_DCRT_DIV16 0x03 +# define LAPIC_DCRT_DIV32 0x08 +# define LAPIC_DCRT_DIV64 0x09 +# define LAPIC_DCRT_DIV128 0x0a + +#define LAPIC_BASE 0xfee00000 + +#define LAPIC_IRQ_MASK(i) (1 << ((i) + 1)) diff --git a/sys/arch/i386/include/i82489var.h b/sys/arch/i386/include/i82489var.h new file mode 100644 index 00000000000..3c4632872ce --- /dev/null +++ b/sys/arch/i386/include/i82489var.h @@ -0,0 +1,113 @@ +/* $OpenBSD: i82489var.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: i82489var.h,v 1.1.2.2 2000/02/21 18:46:14 sommerfeld Exp $ */ + +/*- + * Copyright (c) 1998 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Frank van der Linden. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _I386_I82489VAR_H_ +#define _I386_I82489VAR_H_ + +static __inline__ u_int32_t i82489_readreg(int); +static __inline__ void i82489_writereg(int, u_int32_t); + +#ifdef _KERNEL +extern volatile u_int32_t local_apic[]; +#endif + +static __inline__ u_int32_t +i82489_readreg(reg) + int reg; +{ + return *((volatile u_int32_t *)(((volatile u_int8_t *)local_apic) + + reg)); +} + +static __inline__ void +i82489_writereg(reg, val) + int reg; + u_int32_t val; +{ + *((volatile u_int32_t *)(((volatile u_int8_t *)local_apic) + reg)) = + val; +} + +/* + * "spurious interrupt vector"; vector used by interrupt which was + * aborted because the CPU masked it after it happened but before it + * was delivered.. "Oh, sorry, i caught you at a bad time". + * Low-order 4 bits must be all ones. + */ +extern void i386_spurious(void); +extern void Xintrspurious(void); +#define LAPIC_SPURIOUS_VECTOR 0xef + +/* + * Vector used for inter-processor interrupts. + */ +extern void Xintripi(void); +#define LAPIC_IPI_VECTOR IPL_IPI + +/* + * Vector used for local apic timer interrupts. + */ + +extern void Xintrltimer(void); +#define LAPIC_TIMER_VECTOR IPL_CLOCK + +/* + * Vectors to be used for self-soft-interrupts. + */ + +#define LAPIC_SOFTCLOCK_VECTOR IPL_SOFTCLOCK +#define LAPIC_SOFTNET_VECTOR IPL_SOFTNET +#define LAPIC_SOFTTTY_VECTOR IPL_SOFTTTY + +extern void Xintrsoftclock(void); +extern void Xintrsoftnet(void); +extern void Xintrsofttty(void); + +extern void (*apichandler[])(void); + +struct cpu_info; + +extern void lapic_boot_init(paddr_t); +extern void lapic_initclocks(void); +extern void lapic_set_lvt(void); +extern void lapic_set_softvectors(void); +extern void lapic_enable(void); +extern void lapic_calibrate_timer(struct cpu_info *); + +#endif diff --git a/sys/arch/i386/include/i8259.h b/sys/arch/i386/include/i8259.h new file mode 100644 index 00000000000..f1c4462e4d0 --- /dev/null +++ b/sys/arch/i386/include/i8259.h @@ -0,0 +1,154 @@ +/* $OpenBSD: i8259.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: i8259.h,v 1.3 2003/05/04 22:01:56 fvdl Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)icu.h 5.6 (Berkeley) 5/9/91 + */ + +#ifndef _I386_I8259_H_ +#define _I386_I8259_H_ + +#include <dev/isa/isareg.h> + +#ifndef _LOCORE + +/* + * Interrupt "level" mechanism variables, masks, and macros + */ +extern unsigned imen; /* interrupt mask enable */ +extern unsigned i8259_setmask(unsigned); + +#define SET_ICUS() (outb(IO_ICU1 + 1, imen), outb(IO_ICU2 + 1, imen >> 8)) + +extern void i8259_default_setup(void); +extern void i8259_reinit(void); + +#endif /* !_LOCORE */ + +/* + * Interrupt enable bits -- in order of priority + */ +#define IRQ_SLAVE 2 + +/* + * Interrupt Control offset into Interrupt descriptor table (IDT) + */ +#define ICU_OFFSET 32 /* 0-31 are processor exceptions */ +#define ICU_LEN 16 /* 32-47 are ISA interrupts */ + + +#define ICU_HARDWARE_MASK + +/* + * These macros are fairly self explanatory. If ICU_SPECIAL_MASK_MODE is + * defined, we try to take advantage of the ICU's `special mask mode' by only + * EOIing the interrupts on return. This avoids the requirement of masking and + * unmasking. We can't do this without special mask mode, because the ICU + * would also hold interrupts that it thinks are of lower priority. + * + * Many machines do not support special mask mode, so by default we don't try + * to use it. + */ + +#define IRQ_BIT(num) (1 << ((num) % 8)) +#define IRQ_BYTE(num) ((num) >> 3) + +#define i8259_late_ack(num) + +#ifdef ICU_SPECIAL_MASK_MODE + +#define i8259_asm_ack1(num) +#define i8259_asm_ack2(num) \ + movb $(0x60|IRQ_SLAVE),%al /* specific EOI for IRQ2 */ ;\ + outb %al,$IO_ICU1 +#define i8259_asm_mask(num) +#define i8259_asm_unmask(num) \ + movb $(0x60|(num%8)),%al /* specific EOI */ ;\ + outb %al,$ICUADDR + +#else /* ICU_SPECIAL_MASK_MODE */ + +#ifndef AUTO_EOI_1 +#define i8259_asm_ack1(num) \ + movb $(0x60|(num%8)),%al /* specific EOI */ ;\ + outb %al,$IO_ICU1 +#else +#define i8259_asm_ack1(num) +#endif + +#ifndef AUTO_EOI_2 +#define i8259_asm_ack2(num) \ + movb $(0x60|(num%8)),%al /* specific EOI */ ;\ + outb %al,$IO_ICU2 /* do the second ICU first */ ;\ + movb $(0x60|IRQ_SLAVE),%al /* specific EOI for IRQ2 */ ;\ + outb %al,$IO_ICU1 +#else +#define i8259_asm_ack2(num) +#endif + +#ifndef DUMMY_NOPS +#define PIC_MASKDELAY +#endif + +#ifdef PIC_MASKDELAY +#define MASKDELAY pushl %eax ; inb $0x84,%al ; popl %eax +#else +#define MASKDELAY +#endif + +#ifdef ICU_HARDWARE_MASK + +#define i8259_asm_mask(num) \ + movb CVAROFF(imen, IRQ_BYTE(num)),%al ;\ + orb $IRQ_BIT(num),%al ;\ + movb %al,CVAROFF(imen, IRQ_BYTE(num)) ;\ + MASKDELAY ;\ + outb %al,$(ICUADDR+1) +#define i8259_asm_unmask(num) \ + cli ;\ + movb CVAROFF(imen, IRQ_BYTE(num)),%al ;\ + andb $~IRQ_BIT(num),%al ;\ + movb %al,CVAROFF(imen, IRQ_BYTE(num)) ;\ + MASKDELAY ;\ + outb %al,$(ICUADDR+1) ;\ + sti + +#else /* ICU_HARDWARE_MASK */ + +#define i8259_asm_mask(num) +#define i8259_asm_unmask(num) + +#endif /* ICU_HARDWARE_MASK */ +#endif /* ICU_SPECIAL_MASK_MODE */ + +#endif /* !_X86_I8259_H_ */ diff --git a/sys/arch/i386/include/intr.h b/sys/arch/i386/include/intr.h index 00ba2cea665..9d7a7816cf0 100644 --- a/sys/arch/i386/include/intr.h +++ b/sys/arch/i386/include/intr.h @@ -1,4 +1,4 @@ -/* $OpenBSD: intr.h,v 1.20 2004/05/23 00:06:01 tedu Exp $ */ +/* $OpenBSD: intr.h,v 1.21 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: intr.h,v 1.5 1996/05/13 06:11:28 mycroft Exp $ */ /* @@ -33,85 +33,31 @@ #ifndef _I386_INTR_H_ #define _I386_INTR_H_ -/* - * Intel APICs (advanced programmable interrupt controllers) have - * bytesized priority registers where the upper nibble is the actual - * interrupt priority level (a.k.a. IPL). Interrupt vectors are - * closely tied to these levels as interrupts whose vectors' upper - * nibble is lower than or equal to the current level are blocked. - * Not all 256 possible vectors are available for interrupts in - * APIC systems, only - * - * For systems where instead the older ICU (interrupt controlling - * unit, a.k.a. PIC or 82C59) is used, the IPL is not directly useful, - * since the interrupt blocking is handled via interrupt masks instead - * of levels. However the IPL is easily used as an offset into arrays - * of masks. - */ -#define IPLSHIFT 4 /* The upper nibble of vectors is the IPL. */ -#define NIPL 16 /* Four bits of information gives as much. */ -#define IPL(level) ((level) >> IPLSHIFT) /* Extract the IPL. */ -/* XXX Maybe this IDTVECOFF definition should be elsewhere? */ -#define IDTVECOFF 0x20 /* The lower 32 IDT vectors are reserved. */ +#include <machine/intrdefs.h> -/* - * This macro is only defined for 0 <= x < 14, i.e. there are fourteen - * distinct priority levels available for interrupts. - */ -#define MAKEIPL(priority) (IDTVECOFF + ((priority) << IPLSHIFT)) +#ifndef _LOCORE -/* - * Interrupt priority levels. - * XXX We are somewhat sloppy about what we mean by IPLs, sometimes - * XXX we refer to the eight-bit value suitable for storing into APICs' - * XXX priority registers, other times about the four-bit entity found - * XXX in the former values' upper nibble, which can be used as offsets - * XXX in various arrays of our implementation. We are hoping that - * XXX the context will provide enough information to not make this - * XXX sloppy naming a real problem. - */ -#define IPL_NONE 0 /* nothing */ -#define IPL_SOFTCLOCK MAKEIPL(0) /* timeouts */ -#define IPL_SOFTNET MAKEIPL(1) /* protocol stacks */ -#define IPL_BIO MAKEIPL(2) /* block I/O */ -#define IPL_NET MAKEIPL(3) /* network */ -#define IPL_SOFTTTY MAKEIPL(4) /* delayed terminal handling */ -#define IPL_TTY MAKEIPL(5) /* terminal */ -#define IPL_VM MAKEIPL(6) /* memory allocation */ -#define IPL_IMP IPL_VM /* XXX - should not be here. */ -#define IPL_AUDIO MAKEIPL(7) /* audio */ -#define IPL_CLOCK MAKEIPL(8) /* clock */ -#define IPL_STATCLOCK MAKEIPL(9) /* statclock */ -#define IPL_HIGH MAKEIPL(9) /* everything */ - -/* Interrupt sharing types. */ -#define IST_NONE 0 /* none */ -#define IST_PULSE 1 /* pulsed */ -#define IST_EDGE 2 /* edge-triggered */ -#define IST_LEVEL 3 /* level-triggered */ - -/* Soft interrupt masks. */ -#define SIR_CLOCK 31 -#define SIR_NET 30 -#define SIR_TTY 29 +#ifdef MULTIPROCESSOR +#include <machine/i82489reg.h> +#include <machine/i82489var.h> +#include <machine/cpu.h> +#endif -#ifndef _LOCORE +extern volatile u_int32_t lapic_tpr; /* Current interrupt priority level. */ -volatile int cpl; /* Current interrupt priority level. */ -volatile int ipending; /* Interrupts pending. */ -volatile int astpending;/* Asynchronous software traps (softints) pending. */ -int imask[NIPL]; /* Bitmasks telling what interrupts are blocked. */ -int iunmask[NIPL]; /* Bitmasks telling what interrupts are accepted. */ +extern volatile u_int32_t ipending; /* Interrupts pending. */ +extern int imask[]; /* Bitmasks telling what interrupts are blocked. */ +extern int iunmask[]; /* Bitmasks telling what interrupts are accepted. */ #define IMASK(level) imask[IPL(level)] #define IUNMASK(level) iunmask[IPL(level)] extern void Xspllower(void); -int splraise(int); -int spllower(int); -void splx(int); -void softintr(int); +extern int splraise(int); +extern int spllower(int); +extern void splx(int); +extern void softintr(int, int); /* SPL asserts */ #ifdef DIAGNOSTIC @@ -140,6 +86,7 @@ void splassert_check(int, const char *); #define splaudio() splraise(IPL_AUDIO) #define splclock() splraise(IPL_CLOCK) #define splstatclock() splhigh() +#define splipi() splraise(IPL_IPI) /* * Software interrupt masks @@ -158,12 +105,40 @@ void splassert_check(int, const char *); #define splvm() splraise(IPL_VM) #define splimp() splvm() #define splhigh() splraise(IPL_HIGH) +#define splsched() splraise(IPL_SCHED) +#define spllock() splhigh() #define spl0() spllower(IPL_NONE) #define setsoftast() (astpending = 1) -#define setsoftclock() softintr(1 << SIR_CLOCK) -#define setsoftnet() softintr(1 << SIR_NET) -#define setsofttty() softintr(1 << SIR_TTY) +#define setsoftclock() softintr(1 << SIR_CLOCK, IPL_SOFTCLOCK) +#define setsoftnet() softintr(1 << SIR_NET, IPL_SOFTNET) +#define setsofttty() softintr(1 << SIR_TTY, IPL_SOFTTTY) + +#define I386_IPI_HALT 0x00000001 +#define I386_IPI_MICROSET 0x00000002 +#define I386_IPI_FLUSH_FPU 0x00000004 +#define I386_IPI_SYNCH_FPU 0x00000008 +#define I386_IPI_TLB 0x00000010 +#define I386_IPI_MTRR 0x00000020 +#define I386_IPI_GDT 0x00000040 +#define I386_IPI_DDB 0x00000080 /* synchronize while in ddb */ + +#define I386_NIPI 8 + +struct cpu_info; + +#ifdef MULTIPROCESSOR +int i386_send_ipi(struct cpu_info *, int); +void i386_broadcast_ipi(int); +void i386_multicast_ipi(int, int); +void i386_ipi_handler(void); +void i386_intlock(struct intrframe); +void i386_intunlock(struct intrframe); +void i386_softintlock(void); +void i386_softintunlock(void); + +extern void (*ipifunc[I386_NIPI])(struct cpu_info *); +#endif #endif /* !_LOCORE */ diff --git a/sys/arch/i386/include/intrdefs.h b/sys/arch/i386/include/intrdefs.h new file mode 100644 index 00000000000..c2c998b76b3 --- /dev/null +++ b/sys/arch/i386/include/intrdefs.h @@ -0,0 +1,127 @@ +/* $OpenBSD: intrdefs.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: intrdefs.h,v 1.2 2003/05/04 22:01:56 fvdl Exp $ */ + +#ifndef _i386_INTRDEFS_H +#define _i386_INTRDEFS_H + +/* + * Intel APICs (advanced programmable interrupt controllers) have + * bytesized priority registers where the upper nibble is the actual + * interrupt priority level (a.k.a. IPL). Interrupt vectors are + * closely tied to these levels as interrupts whose vectors' upper + * nibble is lower than or equal to the current level are blocked. + * Not all 256 possible vectors are available for interrupts in + * APIC systems, only + * + * For systems where instead the older ICU (interrupt controlling + * unit, a.k.a. PIC or 82C59) is used, the IPL is not directly useful, + * since the interrupt blocking is handled via interrupt masks instead + * of levels. However the IPL is easily used as an offset into arrays + * of masks. + */ +#define IPLSHIFT 4 /* The upper nibble of vectors is the IPL. */ +#define NIPL 16 /* Four bits of information gives as much. */ +#define IPL(level) ((level) >> IPLSHIFT) /* Extract the IPL. */ +/* XXX Maybe this IDTVECOFF definition should be elsewhere? */ +#define IDTVECOFF 0x20 /* The lower 32 IDT vectors are reserved. */ + +/* + * This macro is only defined for 0 <= x < 14, i.e. there are fourteen + * distinct priority levels available for interrupts. + */ +#define MAKEIPL(priority) (IDTVECOFF + ((priority) << IPLSHIFT)) + +/* + * Interrupt priority levels. + * + * XXX We are somewhat sloppy about what we mean by IPLs, sometimes + * XXX we refer to the eight-bit value suitable for storing into APICs' + * XXX priority registers, other times about the four-bit entity found + * XXX in the former values' upper nibble, which can be used as offsets + * XXX in various arrays of our implementation. We are hoping that + * XXX the context will provide enough information to not make this + * XXX sloppy naming a real problem. + * + * There are tty, network and disk drivers that use free() at interrupt + * time, so imp > (tty | net | bio). + * + * Since run queues may be manipulated by both the statclock and tty, + * network, and disk drivers, clock > imp. + * + * IPL_HIGH must block everything that can manipulate a run queue. + * + * XXX Ultimately we may need serial drivers to run at the absolute highest + * XXX priority to avoid overruns, then we must make serial > high. + * + * The level numbers are picked to fit into APIC vector priorities. + */ +#define IPL_NONE 0 /* nothing */ +#define IPL_SOFTCLOCK MAKEIPL(0) /* timeouts */ +#define IPL_SOFTNET MAKEIPL(1) /* protocol stacks */ +#define IPL_BIO MAKEIPL(2) /* block I/O */ +#define IPL_NET MAKEIPL(3) /* network */ +#define IPL_SOFTTTY MAKEIPL(4) /* delayed terminal handling */ +#define IPL_TTY MAKEIPL(5) /* terminal */ +#define IPL_VM MAKEIPL(6) /* memory allocation */ +#define IPL_IMP IPL_VM /* XXX - should not be here. */ +#define IPL_AUDIO MAKEIPL(7) /* audio */ +#define IPL_CLOCK MAKEIPL(8) /* clock */ +#define IPL_SCHED IPL_CLOCK +#define IPL_STATCLOCK MAKEIPL(9) /* statclock */ +#define IPL_HIGH MAKEIPL(9) /* everything */ +#define IPL_IPI MAKEIPL(10) /* interprocessor interrupt */ + +/* Interrupt sharing types. */ +#define IST_NONE 0 /* none */ +#define IST_PULSE 1 /* pulsed */ +#define IST_EDGE 2 /* edge-triggered */ +#define IST_LEVEL 3 /* level-triggered */ + +/* + * Local APIC masks. Must not conflict with SIR_* below, and must + * be >= NUM_LEGACY_IRQs. Note that LIR_IPI must be first. + */ +#define LIR_IPI 31 +#define LIR_TIMER 30 + +/* Soft interrupt masks. */ +#define SIR_CLOCK 29 +#define SIR_NET 28 +#define SIR_TTY 27 + + +/* + * Maximum # of interrupt sources per CPU. 32 to fit in one word. + * ioapics can theoretically produce more, but it's not likely to + * happen. For multiple ioapics, things can be routed to different + * CPUs. + */ +#define MAX_INTR_SOURCES 32 +#define NUM_LEGACY_IRQS 16 + +/* + * Low and high boundaries between which interrupt gates will + * be allocated in the IDT. + */ +#define IDT_INTR_LOW (0x20 + NUM_LEGACY_IRQS) +#define IDT_INTR_HIGH 0xef + +#define I386_IPI_HALT 0x00000001 +#define I386_IPI_MICROSET 0x00000002 +#define I386_IPI_FLUSH_FPU 0x00000004 +#define I386_IPI_SYNCH_FPU 0x00000008 +#define I386_IPI_TLB 0x00000010 +#define I386_IPI_MTRR 0x00000020 +#define I386_IPI_GDT 0x00000040 +#define I386_IPI_DDB 0x00000080 /* synchronize while in ddb */ + +#define I386_NIPI 8 + +#define I386_IPI_NAMES { "halt IPI", "timeset IPI", "FPU flush IPI", \ + "FPU synch IPI", "TLB shootdown IPI", \ + "MTRR update IPI", "GDT update IPI", \ + "DDB IPI" } + +#define IREENT_MAGIC 0x18041969 + +#endif /* _I386_INTRDEFS_H */ diff --git a/sys/arch/i386/include/lock.h b/sys/arch/i386/include/lock.h new file mode 100644 index 00000000000..a647999a532 --- /dev/null +++ b/sys/arch/i386/include/lock.h @@ -0,0 +1,119 @@ +/* $OpenBSD: lock.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: lock.h,v 1.1.2.2 2000/05/03 14:40:55 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Jason R. Thorpe. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Machine-dependent spin lock operations. + */ + +#ifndef _I386_LOCK_H_ +#define _I386_LOCK_H_ + +typedef __volatile int __cpu_simple_lock_t; + +#define __SIMPLELOCK_LOCKED 1 +#define __SIMPLELOCK_UNLOCKED 0 + +/* + * compiler barrier: prevent reordering of instructions. + * XXX something similar will move to <sys/cdefs.h> + * or thereabouts. + * This prevents the compiler from reordering code around + * this "instruction", acting as a sequence point for code generation. + */ + +#define __lockbarrier() __asm __volatile("": : :"memory") + +#ifdef LOCKDEBUG + +extern void __cpu_simple_lock_init(__cpu_simple_lock_t *); +extern void __cpu_simple_lock(__cpu_simple_lock_t *); +extern int __cpu_simple_lock_try(__cpu_simple_lock_t *); +extern void __cpu_simple_unlock(__cpu_simple_lock_t *); + +#else + +#include <machine/atomic.h> + +static __inline void __cpu_simple_lock_init(__cpu_simple_lock_t *) + __attribute__((__unused__)); +static __inline void __cpu_simple_lock(__cpu_simple_lock_t *) + __attribute__((__unused__)); +static __inline int __cpu_simple_lock_try(__cpu_simple_lock_t *) + __attribute__((__unused__)); +static __inline void __cpu_simple_unlock(__cpu_simple_lock_t *) + __attribute__((__unused__)); + +static __inline void +__cpu_simple_lock_init(__cpu_simple_lock_t *lockp) +{ + *lockp = __SIMPLELOCK_UNLOCKED; + __lockbarrier(); +} + +static __inline void +__cpu_simple_lock(__cpu_simple_lock_t *lockp) +{ + while (i386_atomic_testset_i(lockp, __SIMPLELOCK_LOCKED) + == __SIMPLELOCK_LOCKED) { + continue; /* spin */ + } + __lockbarrier(); +} + +static __inline int +__cpu_simple_lock_try(__cpu_simple_lock_t *lockp) +{ + int r = (i386_atomic_testset_i(lockp, __SIMPLELOCK_LOCKED) + == __SIMPLELOCK_UNLOCKED); + + __lockbarrier(); + + return (r); +} + +static __inline void +__cpu_simple_unlock(__cpu_simple_lock_t *lockp) +{ + __lockbarrier(); + *lockp = __SIMPLELOCK_UNLOCKED; +} + +#endif /* !LOCKDEBUG */ + +#endif /* _I386_LOCK_H_ */ diff --git a/sys/arch/i386/include/mp.h b/sys/arch/i386/include/mp.h new file mode 100644 index 00000000000..d12e8a54aca --- /dev/null +++ b/sys/arch/i386/include/mp.h @@ -0,0 +1,222 @@ +/* $OpenBSD: mp.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ + +/*- + * Copyright (c) 1996 SigmaSoft, Th. Lockert <tholo@sigmasoft.com> + * Copyright (c) 2000 Niklas Hallqvist. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by SigmaSoft, Th. Lockert. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MACHINE_MP_H +#define _MACHINE_MP_H + +/* + * Configuration structures as defined in the Intel MP specification, + * version 1.4 + */ + +/* + * MP Floating Pointer structure; must be located on 16-byte boundary + */ +struct mp_float { + u_int8_t signature[4]; +#define MPF_SIGNATURE "_MP_" + u_int32_t pointer; + u_int8_t length; + u_int8_t revision; + u_int8_t checksum; + u_int8_t feature1; +#define MP_CONF_EXTENDED 0 +#define MP_CONF_2_ISA 1 +#define MP_CONF_2_EISA_NO_8 2 +#define MP_CONF_2_EISA 3 +#define MP_CONF_2_MCA 4 +#define MP_CONF_2_ISA_PCI 5 +#define MP_CONF_2_EISA_PCI 6 +#define MP_CONF_2_MCA_PCI 7 + u_int8_t feature2; +#define MP_IMCR 0x80 + u_int8_t feature3; + u_int8_t feature4; + u_int8_t feature5; +}; + +/* + * MP configuration table header + */ +struct mp_conf { + u_int8_t signature[4]; +#define MPC_SIGNATURE "PCMP" + u_int16_t length; + u_int8_t revision; + u_int8_t checksum; + u_int8_t oem[8]; + u_int8_t product[12]; + u_int32_t oem_pointer; + u_int16_t oem_length; + u_int16_t entry_count; + u_int32_t local_apic; + u_int16_t ext_length; + u_int8_t et_checksum; + u_int8_t reserved; +}; + +/* + * Processor entry + */ +struct mp_proc { + u_int8_t type; +#define MP_PROCESSOR 0 + u_int8_t local_apic; + u_int8_t apic_version; + u_int8_t flags; +#define MP_ENABLE 0x01 +#define MP_BOOTCPU 0x02 + u_int32_t cpu_signature; +#define MP_STEPPING 0x0000000F +#define MP_MODEL 0x000000F0 +#define MP_FAMILY 0x00000F00 + u_int32_t feature_flags; +#define MP_FP 0x00000001 +#define MP_MCE 0x00000080 +#define MP_CX8 0x00000100 +#define MP_APIC 0x00000200 + u_int32_t reserved1; + u_int32_t reserved2; +}; + +/* + * Bus entry + */ +struct mp_bus { + u_int8_t type; +#define MP_BUS 1 + u_int8_t bus_id; + u_int8_t bustype[6] __attribute((packed)); +#define MP_BUS_CBUS "CBUS " +#define MP_BUS_CBUSII "CBUSII" +#define MP_BUS_EISA "EISA " +#define MP_BUS_FUTURE "FUTURE" +#define MP_BUS_INTERN "INTERN" +#define MP_BUS_ISA "ISA " +#define MP_BUS_MBI "MBI " +#define MP_BUS_MBII "MBII " +#define MP_BUS_MCA "MCA " +#define MP_BUS_MPI "MPI " +#define MP_BUS_MPSA "MPSA " +#define MP_BUS_NUBUS "NUBUS " +#define MP_BUS_PCI "PCI " +#define MP_BUS_PCCARD "PCMCIA" +#define MP_BUS_TC "TC " +#define MP_BUS_VLB "VL " +#define MP_BUS_VME "VME " +#define MP_BUS_XPRESS "XPRESS" +}; + +/* + * I/O APIC entry + */ +struct mp_apic { + u_int8_t type; +#define MP_IOAPIC 2 + u_int8_t apic_id; + u_int8_t apic_version; + u_int8_t apic_flags; +#define MP_APIC_ENABLE 0x80 + u_int32_t apic_address; +}; + +/* + * I/O Interrupt Assignment entry + * Local Interrupt Assignment entry + */ +struct mp_irq { + u_int8_t type; +#define MP_INTSRC 3 +#define MP_LOCINTSRC 4 + u_int8_t irqtype; +#define MP_INT_NORMAL 0 +#define MP_INT_NMI 1 +#define MP_INT_SMI 2 +#define MP_INT_EXT 3 + u_int16_t irqflags; + u_int8_t bus_id; + u_int8_t source_irq; + u_int8_t destination_apic; +#define MP_ALL_APIC 0xFF + u_int8_t apic_intr; +}; + +/* + * System Address Space Mapping entry + */ +struct mp_map { + u_int8_t type; +#define MP_SYSMAP 128 + u_int8_t length; + u_int8_t bus; + u_int8_t address_type; +#define MP_ADDR_IO 0 +#define MP_ADDR_MEM 1 +#define MP_ADDR_PRE 2 + u_int64_t address_base; + u_int64_t address_length; +}; + +/* + * Bus Hierarchy Descriptor entry + */ +struct mp_bushier { + u_int8_t type; +#define MP_BUSHIER 129 + u_int8_t length; + u_int8_t bus_id; + u_int8_t bus_info; +#define MP_BUS_SUB 0x01 + u_int8_t parent; + u_int8_t reserved1; + u_int16_t reserved2; +}; + +/* + * Compatibility Bus Address Space Modifier entry + */ +struct mp_buscompat { + u_int8_t type; +#define MP_BUSCOMPAT 130 + u_int8_t length; + u_int8_t bus_id; + u_int8_t modifier; +#define MP_COMPAT_SUB 0x01 + u_int32_t range; +}; + +#ifdef _KERNEL +extern int napics; +#endif /* _KERNEL */ + +#endif /* _MACHINE_MP_H */ diff --git a/sys/arch/i386/include/mpbiosreg.h b/sys/arch/i386/include/mpbiosreg.h new file mode 100644 index 00000000000..d42aed04cdc --- /dev/null +++ b/sys/arch/i386/include/mpbiosreg.h @@ -0,0 +1,155 @@ +/* $OpenBSD: mpbiosreg.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: mpbiosreg.h,v 1.1.2.3 2000/02/29 13:17:51 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _I386_MPBIOSREG_H_ +#define _I386_MPBIOSREG_H_ + +#define BIOS_BASE (0xf0000) +#define BIOS_SIZE (0x10000) +#define BIOS_COUNT (BIOS_SIZE) + +/* + * Multiprocessor config table entry types. + */ + +#define MPS_MCT_CPU 0 +#define MPS_MCT_BUS 1 +#define MPS_MCT_IOAPIC 2 +#define MPS_MCT_IOINT 3 +#define MPS_MCT_LINT 4 + +#define MPS_MCT_NTYPES 5 + +/* + * Interrupt typess + */ + +#define MPS_INTTYPE_INT 0 +#define MPS_INTTYPE_NMI 1 +#define MPS_INTTYPE_SMI 2 +#define MPS_INTTYPE_ExtINT 3 + +#define MPS_INTPO_DEF 0 +#define MPS_INTPO_ACTHI 1 +#define MPS_INTPO_ACTLO 3 + +#define MPS_INTTR_DEF 0 +#define MPS_INTTR_EDGE 1 +#define MPS_INTTR_LEVEL 3 + + +/* MP Floating Pointer Structure */ +struct mpbios_fps { + u_int32_t signature; +/* string defined by the Intel MP Spec as identifying the MP table */ +#define MP_FP_SIG 0x5f504d5f /* _MP_ */ + + u_int32_t pap; + u_int8_t length; + u_int8_t spec_rev; + u_int8_t checksum; + u_int8_t mpfb1; /* system configuration */ + u_int8_t mpfb2; /* flags */ +#define MPFPS_FLAG_IMCR 0x80 /* IMCR present */ + u_int8_t mpfb3; /* unused */ + u_int8_t mpfb4; /* unused */ + u_int8_t mpfb5; /* unused */ +}; + +/* MP Configuration Table Header */ +struct mpbios_cth { + u_int32_t signature; +#define MP_CT_SIG 0x504d4350 /* PCMP */ + + u_int16_t base_len; + u_int8_t spec_rev; + u_int8_t checksum; + u_int8_t oem_id[8]; + u_int8_t product_id[12]; + u_int32_t oem_table_pointer; + u_int16_t oem_table_size; + u_int16_t entry_count; + u_int32_t apic_address; + u_int16_t ext_len; + u_int8_t ext_cksum; + u_int8_t reserved; +}; + +struct mpbios_proc { + u_int8_t type; + u_int8_t apic_id; + u_int8_t apic_version; + u_int8_t cpu_flags; +#define PROCENTRY_FLAG_EN 0x01 +#define PROCENTRY_FLAG_BP 0x02 + u_long cpu_signature; + u_long feature_flags; + u_long reserved1; + u_long reserved2; +}; + +struct mpbios_bus { + u_int8_t type; + u_int8_t bus_id; + char bus_type[6]; +}; + +struct mpbios_ioapic { + u_int8_t type; + u_int8_t apic_id; + u_int8_t apic_version; + u_int8_t apic_flags; +#define IOAPICENTRY_FLAG_EN 0x01 + void *apic_address; +}; + +struct mpbios_int { + u_int8_t type; + u_int8_t int_type; + u_int16_t int_flags; + u_int8_t src_bus_id; + u_int8_t src_bus_irq; + u_int8_t dst_apic_id; +#define MPS_ALL_APICS 0xff + u_int8_t dst_apic_int; +}; + +#endif /* !_I386_MPBIOSREG_H_ */ diff --git a/sys/arch/i386/include/mpbiosvar.h b/sys/arch/i386/include/mpbiosvar.h new file mode 100644 index 00000000000..b617041b4d9 --- /dev/null +++ b/sys/arch/i386/include/mpbiosvar.h @@ -0,0 +1,88 @@ +/* $OpenBSD: mpbiosvar.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $NetBSD: mpbiosvar.h,v 1.1.2.3 2000/02/29 13:17:20 sommerfeld Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by RedBack Networks Inc. + * + * Author: Bill Sommerfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _I386_MPBIOSVAR_H_ +#define _I386_MPBIOSVAR_H_ + +#define MP_TRAMPOLINE (2 * PAGE_SIZE) + +#if !defined(_LOCORE) + +#include <machine/mpbiosreg.h> + +struct mp_bus +{ + char *mb_name; /* XXX bus name */ + int mb_idx; /* XXX bus index */ + void (*mb_intr_print) (int); + void (*mb_intr_cfg)(const struct mpbios_int *, u_int32_t *); + struct mp_intr_map *mb_intrs; + u_int32_t mb_data; /* random bus-specific datum. */ +}; + +struct mp_intr_map +{ + struct mp_intr_map *next; + struct mp_bus *bus; + int bus_pin; + struct ioapic_softc *ioapic; + int ioapic_pin; + int ioapic_ih; /* int handle, for apic_intr_est */ + int type; /* from mp spec intr record */ + int flags; /* from mp spec intr record */ + u_int32_t redir; +}; + +#if defined(_KERNEL) +extern int mp_verbose; +extern struct mp_bus *mp_busses; +extern struct mp_intr_map *mp_intrs; +extern int mp_isa_bus; +extern int mp_eisa_bus; + +void mpbios_scan(struct device *); +int mpbios_probe(struct device *); +#endif + +#endif + +#endif /* !_I386_MPBIOSVAR_H_ */ diff --git a/sys/arch/i386/include/npx.h b/sys/arch/i386/include/npx.h index d6f906efb46..810c7817bee 100644 --- a/sys/arch/i386/include/npx.h +++ b/sys/arch/i386/include/npx.h @@ -1,4 +1,4 @@ -/* $OpenBSD: npx.h,v 1.6 2004/02/01 19:05:23 deraadt Exp $ */ +/* $OpenBSD: npx.h,v 1.7 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: npx.h,v 1.11 1994/10/27 04:16:11 cgd Exp $ */ /*- @@ -180,5 +180,8 @@ struct emcsts { void process_xmm_to_s87(const struct savexmm *, struct save87 *); void process_s87_to_xmm(const struct save87 *, struct savexmm *); +struct cpu_info; + +void npxinit(struct cpu_info *); #endif /* !_I386_NPX_H_ */ diff --git a/sys/arch/i386/include/param.h b/sys/arch/i386/include/param.h index 0d1dfb68fd9..335d7737cbb 100644 --- a/sys/arch/i386/include/param.h +++ b/sys/arch/i386/include/param.h @@ -1,4 +1,4 @@ -/* $OpenBSD: param.h,v 1.27 2004/04/19 22:55:49 deraadt Exp $ */ +/* $OpenBSD: param.h,v 1.28 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: param.h,v 1.29 1996/03/04 05:04:26 cgd Exp $ */ /*- @@ -99,7 +99,7 @@ #define USPACE (UPAGES * NBPG) /* total size of u-area */ #ifndef MSGBUFSIZE -#define MSGBUFSIZE 2*NBPG /* default message buffer size */ +#define MSGBUFSIZE 4*NBPG /* default message buffer size */ #endif /* diff --git a/sys/arch/i386/include/pcb.h b/sys/arch/i386/include/pcb.h index 611ab5996a8..641f8874f17 100644 --- a/sys/arch/i386/include/pcb.h +++ b/sys/arch/i386/include/pcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pcb.h,v 1.11 2004/02/01 19:05:23 deraadt Exp $ */ +/* $OpenBSD: pcb.h,v 1.12 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: pcb.h,v 1.21 1996/01/08 13:51:42 mycroft Exp $ */ /*- @@ -75,7 +75,8 @@ struct pcb { int vm86_eflags; /* virtual eflags for vm86 mode */ int vm86_flagmask; /* flag mask for vm86 mode */ void *vm86_userp; /* XXX performance hack */ - struct pmap *pcb_pmap; /* back pointer to our pmap */ + struct pmap *pcb_pmap; /* back pointer to our pmap */ + struct cpu_info *pcb_fpcpu; /* cpu holding our fpu state */ u_long pcb_iomap[NIOPORTS/32]; /* I/O bitmap */ u_char pcb_iomap_pad; /* required; must be 0xff, says intel */ }; @@ -88,8 +89,4 @@ struct md_coredump { long md_pad[8]; }; -#ifdef _KERNEL -struct pcb *curpcb; /* our current running pcb */ -#endif - #endif /* _I386_PCB_H_ */ diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index 74013d8824c..a7089d958fd 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.34 2004/05/20 09:20:42 kettenis Exp $ */ +/* $OpenBSD: pmap.h,v 1.35 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */ /* @@ -236,6 +236,12 @@ #define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */ #define PG_X PG_AVAIL3 /* executable mapping */ +/* + * Number of PTE's per cache line. 4 byte pte, 32-byte cache line + * Used to avoid false sharing of cache lines. + */ +#define NPTECL 8 + #ifdef _KERNEL /* * pmap data structures: see pmap.c for details of locking. @@ -272,6 +278,7 @@ struct pmap { union descriptor *pm_ldt; /* user-set LDT */ int pm_ldt_len; /* number of LDT entries */ int pm_ldt_sel; /* LDT selector */ + uint32_t pm_cpus; /* mask oc CPUs using map */ }; /* pm_flags */ @@ -330,19 +337,7 @@ struct pv_page { }; /* - * pmap_remove_record: a record of VAs that have been unmapped, used to - * flush TLB. If we have more than PMAP_RR_MAX then we stop recording. - */ - -#define PMAP_RR_MAX 16 /* max of 16 pages (64K) */ - -struct pmap_remove_record { - int prr_npages; - vaddr_t prr_vas[PMAP_RR_MAX]; -}; - -/* - * Global kernel variables + * global kernel variables */ extern pd_entry_t PTD[]; @@ -394,6 +389,10 @@ int pmap_exec_fixup(struct vm_map *, struct trapframe *, vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */ +void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, int32_t *); +void pmap_tlb_shootnow(int32_t); +void pmap_do_tlb_shootdown(struct cpu_info *); + #define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */ /* diff --git a/sys/arch/i386/include/proc.h b/sys/arch/i386/include/proc.h index ff0bc4b1bb3..e5393cd22e4 100644 --- a/sys/arch/i386/include/proc.h +++ b/sys/arch/i386/include/proc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: proc.h,v 1.3 2003/06/02 23:27:47 millert Exp $ */ +/* $OpenBSD: proc.h,v 1.4 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: proc.h,v 1.10 1995/08/06 05:33:23 mycroft Exp $ */ /* @@ -38,6 +38,7 @@ struct mdproc { struct trapframe *md_regs; /* registers on current frame */ int md_flags; /* machine-dependent flags */ + int md_tss_sel; /* TSS selector */ }; /* md_flags */ diff --git a/sys/arch/i386/include/segments.h b/sys/arch/i386/include/segments.h index ac01a2a2e26..3519f0a42ed 100644 --- a/sys/arch/i386/include/segments.h +++ b/sys/arch/i386/include/segments.h @@ -1,4 +1,4 @@ -/* $OpenBSD: segments.h,v 1.12 2003/11/16 20:30:06 avsm Exp $ */ +/* $OpenBSD: segments.h,v 1.13 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: segments.h,v 1.23 1996/02/01 22:31:03 mycroft Exp $ */ /*- @@ -124,7 +124,7 @@ struct region_descriptor { #endif #ifdef _KERNEL -extern union descriptor gdt[], ldt[]; +extern union descriptor *gdt, ldt[]; extern struct gate_descriptor idt_region[]; extern struct gate_descriptor *idt; @@ -132,6 +132,13 @@ void setgate(struct gate_descriptor *, void *, int, int, int, int); void setregion(struct region_descriptor *, void *, size_t); void setsegment(struct segment_descriptor *, void *, size_t, int, int, int, int); +void unsetgate(struct gate_descriptor *); +void cpu_init_idt(void); + +int idt_vec_alloc(int, int); +void idt_vec_set(int, void (*)(void)); +void idt_vec_free(int); + #endif /* _KERNEL */ #endif /* !_LOCORE */ @@ -220,7 +227,8 @@ void setsegment(struct segment_descriptor *, void *, size_t, int, int, #define GAPM16CODE_SEL 8 /* 16 bit APM code descriptor */ #define GAPMDATA_SEL 9 /* APM data descriptor */ #define GICODE_SEL 10 /* Interrupt code descriptor (same as Kernel code) */ -#define NGDT 11 +#define GCPU_SEL 11 /* per-CPU segment */ +#define NGDT 12 /* * Entries in the Local Descriptor Table (LDT) diff --git a/sys/arch/i386/include/types.h b/sys/arch/i386/include/types.h index 0da0bdc06e2..53f8a187136 100644 --- a/sys/arch/i386/include/types.h +++ b/sys/arch/i386/include/types.h @@ -1,5 +1,5 @@ /* $NetBSD: types.h,v 1.12 1995/12/24 01:08:03 mycroft Exp $ */ -/* $OpenBSD: types.h,v 1.13 2003/06/02 23:27:47 millert Exp $ */ +/* $OpenBSD: types.h,v 1.14 2004/06/13 21:49:16 niklas Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -72,5 +72,6 @@ typedef unsigned long long uint64_t; typedef int32_t register_t; #define __HAVE_NWSCONS +#define __HAVE_CPUINFO #endif /* _MACHTYPES_H_ */ diff --git a/sys/arch/i386/isa/clock.c b/sys/arch/i386/isa/clock.c index 2d932ad788f..fa18701776d 100644 --- a/sys/arch/i386/isa/clock.c +++ b/sys/arch/i386/isa/clock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: clock.c,v 1.31 2004/02/27 21:07:49 grange Exp $ */ +/* $OpenBSD: clock.c,v 1.32 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: clock.c,v 1.39 1996/05/12 23:11:54 mycroft Exp $ */ /*- @@ -331,7 +331,7 @@ gettick() * wave' mode counts at 2:1). */ void -delay(n) +i8254_delay(n) int n; { int limit, tick, otick; @@ -458,7 +458,7 @@ calibrate_cyclecounter() #endif void -cpu_initclocks() +i8254_initclocks() { static struct timeout rtcdrain_timeout; stathz = 128; diff --git a/sys/arch/i386/isa/icu.h b/sys/arch/i386/isa/icu.h deleted file mode 100644 index 9b68025526b..00000000000 --- a/sys/arch/i386/isa/icu.h +++ /dev/null @@ -1,71 +0,0 @@ -/* $OpenBSD: icu.h,v 1.8 2003/06/02 23:27:47 millert Exp $ */ -/* $NetBSD: icu.h,v 1.19 1996/02/01 22:31:21 mycroft Exp $ */ - -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)icu.h 5.6 (Berkeley) 5/9/91 - */ - -/* - * AT/386 Interrupt Control constants - * W. Jolitz 8/89 - */ - -#ifndef _I386_ISA_ICU_H_ -#define _I386_ISA_ICU_H_ - -#ifndef _LOCORE - -/* - * Interrupt "level" mechanism variables, masks, and macros - */ -extern unsigned imen; /* interrupt mask enable */ - -#define SET_ICUS() (outb(IO_ICU1 + 1, imen), outb(IO_ICU2 + 1, imen >> 8)) - -#endif /* !_LOCORE */ - -/* - * Interrupt enable bits -- in order of priority - */ -#define IRQ_SLAVE 2 - -/* - * Interrupt Control offset into Interrupt descriptor table (IDT) - * XXX ICU_OFFSET is actually a property of our architecture not of the ICU - * XXX and therefore ought to use the architecture manifest constant IDTVECOFF - * XXX for its definition instead. - */ -#define ICU_OFFSET 32 /* 0-31 are processor exceptions */ -#define ICU_LEN 16 /* 32-47 are ISA interrupts */ - -#endif /* !_I386_ISA_ICU_H_ */ diff --git a/sys/arch/i386/isa/icu.s b/sys/arch/i386/isa/icu.s index 8bd2af133ca..d3f749b4a05 100644 --- a/sys/arch/i386/isa/icu.s +++ b/sys/arch/i386/isa/icu.s @@ -1,4 +1,4 @@ -/* $OpenBSD: icu.s,v 1.20 2003/11/06 21:09:34 mickey Exp $ */ +/* $OpenBSD: icu.s,v 1.21 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: icu.s,v 1.45 1996/01/07 03:59:34 mycroft Exp $ */ /*- @@ -33,10 +33,11 @@ #include <net/netisr.h> .data - .globl _C_LABEL(imen), _C_LABEL(cpl), _C_LABEL(ipending) - .globl _C_LABEL(astpending), _C_LABEL(netisr) + .globl _C_LABEL(imen),_C_LABEL(ipending),_C_LABEL(netisr) _C_LABEL(imen): .long 0xffff # interrupt mask enable (all off) +_C_LABEL(ipending): + .long 0 # interupts pending _C_LABEL(netisr): .long 0 # scheduling bits for network @@ -48,13 +49,13 @@ _C_LABEL(netisr): ALIGN_TEXT _C_LABEL(splhigh): movl $IPL_HIGH,%eax - xchgl %eax,_C_LABEL(cpl) + xchgl %eax,CPL ret ALIGN_TEXT _C_LABEL(splx): movl 4(%esp),%eax - movl %eax,_C_LABEL(cpl) + movl %eax,CPL testl %eax,%eax jnz _C_LABEL(Xspllower) ret @@ -72,12 +73,12 @@ IDTVEC(spllower) pushl %ebx pushl %esi pushl %edi - movl _C_LABEL(cpl),%ebx # save priority + movl CPL,%ebx # save priority movl $1f,%esi # address to resume loop at 1: movl %ebx,%eax # get cpl shrl $4,%eax # find its mask. movl _C_LABEL(iunmask)(,%eax,4),%eax - andl _C_LABEL(ipending),%eax + andl _C_LABEL(ipending),%eax # any non-masked bits left? jz 2f bsfl %eax,%eax btrl %eax,_C_LABEL(ipending) @@ -98,10 +99,10 @@ IDTVEC(spllower) */ IDTVEC(doreti) popl %ebx # get previous priority - movl %ebx,_C_LABEL(cpl) + movl %ebx,CPL movl $1f,%esi # address to resume loop at -1: movl %ebx,%eax # get cpl - shrl $4,%eax # find its mask +1: movl %ebx,%eax + shrl $4,%eax movl _C_LABEL(iunmask)(,%eax,4),%eax andl _C_LABEL(ipending),%eax jz 2f @@ -111,8 +112,8 @@ IDTVEC(doreti) cli jmp *_C_LABEL(Xresume)(,%eax,4) 2: /* Check for ASTs on exit to user mode. */ + CHECK_ASTPENDING(%ecx) cli - cmpb $0,_C_LABEL(astpending) je 3f testb $SEL_RPL,TF_CS(%esp) #ifdef VM86 @@ -120,10 +121,12 @@ IDTVEC(doreti) testl $PSL_VM,TF_EFLAGS(%esp) #endif jz 3f -4: movb $0,_C_LABEL(astpending) +4: CLEAR_ASTPENDING(%ecx) sti + movl $T_ASTFLT,TF_TRAPNO(%esp) /* XXX undo later. */ /* Pushed T_ASTFLT into tf_trapno on entry. */ call _C_LABEL(trap) + cli jmp 2b 3: INTRFASTEXIT @@ -137,9 +140,16 @@ IDTVEC(doreti) IDTVEC(softtty) #if NPCCOM > 0 movl $IPL_SOFTTTY,%eax - movl %eax,_C_LABEL(cpl) + movl %eax,CPL + sti +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintlock) +#endif call _C_LABEL(comsoft) - movl %ebx,_C_LABEL(cpl) +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintunlock) +#endif + movl %ebx,CPL #endif jmp *%esi @@ -152,18 +162,32 @@ IDTVEC(softtty) IDTVEC(softnet) movl $IPL_SOFTNET,%eax - movl %eax,_C_LABEL(cpl) + movl %eax,CPL + sti +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintlock) +#endif xorl %edi,%edi xchgl _C_LABEL(netisr),%edi #include <net/netisr_dispatch.h> - movl %ebx,_C_LABEL(cpl) +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintunlock) +#endif + movl %ebx,CPL jmp *%esi #undef DONETISR IDTVEC(softclock) movl $IPL_SOFTCLOCK,%eax - movl %eax,_C_LABEL(cpl) + movl %eax,CPL + sti +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintlock) +#endif call _C_LABEL(softclock) - movl %ebx,_C_LABEL(cpl) +#ifdef MULTIPROCESSOR + call _C_LABEL(i386_softintunlock) +#endif + movl %ebx,CPL jmp *%esi diff --git a/sys/arch/i386/isa/isa_machdep.c b/sys/arch/i386/isa/isa_machdep.c index 0a8b580c6f4..0df077cdb19 100644 --- a/sys/arch/i386/isa/isa_machdep.c +++ b/sys/arch/i386/isa/isa_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: isa_machdep.c,v 1.48 2003/06/02 23:27:47 millert Exp $ */ +/* $OpenBSD: isa_machdep.c,v 1.49 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: isa_machdep.c,v 1.22 1997/06/12 23:57:32 thorpej Exp $ */ #define ISA_DMA_STATS @@ -122,18 +122,25 @@ #include <uvm/uvm_extern.h> +#include "ioapic.h" + +#if NIOAPIC > 0 +#include <machine/i82093var.h> +#include <machine/mpbiosvar.h> +#endif + #define _I386_BUS_DMA_PRIVATE #include <machine/bus.h> #include <machine/intr.h> #include <machine/pio.h> #include <machine/cpufunc.h> +#include <machine/i8259.h> #include <dev/isa/isareg.h> #include <dev/isa/isavar.h> #include <dev/isa/isadmavar.h> #include <i386/isa/isa_machdep.h> -#include <i386/isa/icu.h> #include "isadma.h" @@ -250,6 +257,16 @@ isa_defaultirq() outb(IO_ICU2, 0x0a); /* Read IRR by default. */ } +void +isa_nodefaultirq() +{ + int i; + + /* icu vectors */ + for (i = 0; i < ICU_LEN; i++) + unsetgate(&idt[ICU_OFFSET + i]); +} + /* * Handle a NMI, possibly a machine check. * return true to panic system, false to ignore. @@ -286,6 +303,9 @@ int intrtype[ICU_LEN], intrmask[ICU_LEN], intrlevel[ICU_LEN]; int iminlevel[ICU_LEN], imaxlevel[ICU_LEN]; struct intrhand *intrhand[ICU_LEN]; +int imask[NIPL]; /* Bitmask telling what interrupts are blocked. */ +int iunmask[NIPL]; /* Bitmask telling what interrupts are accepted. */ + /* * Recalculate the interrupt masks from scratch. * We could code special registry and deregistry versions of this function that @@ -295,24 +315,27 @@ struct intrhand *intrhand[ICU_LEN]; void intr_calculatemasks() { - int irq, level; + int irq, level, unusedirqs; struct intrhand *q; /* First, figure out which levels each IRQ uses. */ + unusedirqs = 0xffff; for (irq = 0; irq < ICU_LEN; irq++) { - register int levels = 0; + int levels = 0; for (q = intrhand[irq]; q; q = q->ih_next) levels |= 1 << IPL(q->ih_level); intrlevel[irq] = levels; + if (levels) + unusedirqs &= ~(1 << irq); } /* Then figure out which IRQs use each level. */ for (level = 0; level < NIPL; level++) { - register int irqs = 0; + int irqs = 0; for (irq = 0; irq < ICU_LEN; irq++) if (intrlevel[irq] & (1 << level)) irqs |= 1 << irq; - imask[level] = irqs; + imask[level] = irqs | unusedirqs; } /* @@ -331,25 +354,40 @@ intr_calculatemasks() /* And eventually calculate the complete masks. */ for (irq = 0; irq < ICU_LEN; irq++) { - register int irqs = 1 << irq; + int irqs = 1 << irq; int minlevel = IPL_NONE; int maxlevel = IPL_NONE; - for (q = intrhand[irq]; q; q = q->ih_next) { - irqs |= IMASK(q->ih_level); - if (minlevel == IPL_NONE || q->ih_level < minlevel) - minlevel = q->ih_level; - if (q->ih_level > maxlevel) - maxlevel = q->ih_level; + if (intrhand[irq] == NULL) { + maxlevel = IPL_HIGH; + irqs = IMASK(IPL_HIGH); + } else { + for (q = intrhand[irq]; q; q = q->ih_next) { + irqs |= IMASK(q->ih_level); + if (minlevel == IPL_NONE || + q->ih_level < minlevel) + minlevel = q->ih_level; + if (q->ih_level > maxlevel) + maxlevel = q->ih_level; + } } + if (irqs != IMASK(maxlevel)) + panic("irq %d level %x mask mismatch: %x vs %x", irq, + maxlevel, irqs, IMASK(maxlevel)); + intrmask[irq] = irqs; iminlevel[irq] = minlevel; imaxlevel[irq] = maxlevel; + +#if 0 + printf("irq %d: level %x, mask 0x%x (%x)\n", irq, + imaxlevel[irq], intrmask[irq], IMASK(imaxlevel[irq])); +#endif } /* Lastly, determine which IRQs are actually in use. */ { - register int irqs = 0; + int irqs = 0; for (irq = 0; irq < ICU_LEN; irq++) if (intrhand[irq]) irqs |= 1 << irq; @@ -496,17 +534,48 @@ isa_intr_establish(ic, irq, type, level, ih_fun, ih_arg, ih_what) struct intrhand **p, *q, *ih; static struct intrhand fakehand = {fakeintr}; +#if NIOAPIC > 0 + struct mp_intr_map *mip; + + if (mp_busses != NULL) { + int mpspec_pin = irq; + int bus = mp_isa_bus; + int airq; + + for (mip = mp_busses[bus].mb_intrs; mip != NULL; + mip = mip->next) { + if (mip->bus_pin == mpspec_pin) { + airq = mip->ioapic_ih | irq; + break; + } + } + if (mip == NULL && mp_eisa_bus != -1) { + for (mip = mp_busses[mp_eisa_bus].mb_intrs; + mip != NULL; mip=mip->next) { + if (mip->bus_pin == mpspec_pin) { + airq = mip->ioapic_ih | irq; + break; + } + } + } + if (mip == NULL) + printf("isa_intr_establish: no MP mapping found\n"); + else + return (apic_intr_establish(airq, type, level, ih_fun, + ih_arg, ih_what)); + } +#endif /* no point in sleeping unless someone can free memory. */ ih = malloc(sizeof *ih, M_DEVBUF, cold ? M_NOWAIT : M_WAITOK); if (ih == NULL) { printf("%s: isa_intr_establish: can't malloc handler info\n", ih_what); - return NULL; + return (NULL); } if (!LEGAL_IRQ(irq) || type == IST_NONE) { - printf("%s: intr_establish: bogus irq or type\n", ih_what); - return NULL; + printf("%s: isa_intr_establish: bogus irq or type\n", ih_what); + return (NULL); } switch (intrtype[irq]) { case IST_NONE: @@ -521,7 +590,7 @@ isa_intr_establish(ic, irq, type, level, ih_fun, ih_arg, ih_what) /*printf("%s: intr_establish: can't share %s with %s, irq %d\n", ih_what, isa_intr_typename(intrtype[irq]), isa_intr_typename(type), irq);*/ - return NULL; + return (NULL); } break; } @@ -571,8 +640,15 @@ isa_intr_disestablish(ic, arg) int irq = ih->ih_irq; struct intrhand **p, *q; +#if NIOAPIC > 0 + if (irq & APIC_INT_VIA_APIC) { + apic_intr_disestablish(arg); + return; + } +#endif + if (!LEGAL_IRQ(irq)) - panic("intr_disestablish: bogus irq"); + panic("intr_disestablish: bogus irq %d", irq); /* * Remove the handler from the chain. diff --git a/sys/arch/i386/isa/mms.c b/sys/arch/i386/isa/mms.c index 1b84aecf965..137c18384a5 100644 --- a/sys/arch/i386/isa/mms.c +++ b/sys/arch/i386/isa/mms.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mms.c,v 1.16 2002/03/14 01:26:33 millert Exp $ */ +/* $OpenBSD: mms.c,v 1.17 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: mms.c,v 1.35 2000/01/08 02:57:25 takemura Exp $ */ /*- diff --git a/sys/arch/i386/isa/npx.c b/sys/arch/i386/isa/npx.c index b56a17f3875..4111ba0ab90 100644 --- a/sys/arch/i386/isa/npx.c +++ b/sys/arch/i386/isa/npx.c @@ -1,10 +1,10 @@ -/* $OpenBSD: npx.c,v 1.31 2004/02/01 19:05:21 deraadt Exp $ */ +/* $OpenBSD: npx.c,v 1.32 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: npx.c,v 1.57 1996/05/12 23:12:24 mycroft Exp $ */ #if 0 -#define iprintf(x) printf x +#define IPRINTF(x) printf x #else -#define iprintf(x) +#define IPRINTF(x) #endif /*- @@ -54,21 +54,16 @@ #include <machine/cpu.h> #include <machine/intr.h> +#include <machine/npx.h> #include <machine/pio.h> #include <machine/cpufunc.h> #include <machine/pcb.h> #include <machine/trap.h> #include <machine/specialreg.h> +#include <machine/i8259.h> #include <dev/isa/isareg.h> #include <dev/isa/isavar.h> -#include <i386/isa/icu.h> - -#if 0 -#define IPRINTF(x) printf x -#else -#define IPRINTF(x) -#endif /* * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. @@ -104,7 +99,6 @@ int npxintr(void *); static int npxprobe1(struct isa_attach_args *); -static void npxsave1(void); struct npx_softc { struct device sc_dev; @@ -129,10 +123,7 @@ enum npx_type { NPX_BROKEN, }; -struct proc *npxproc; - static enum npx_type npx_type; -static int npx_nointr; static volatile u_int npx_intrs_while_probing; static volatile u_int npx_traps_while_probing; @@ -160,15 +151,15 @@ fpu_save(union savefpu *addr) } static int -npxdna_notset(struct proc *p) +npxdna_notset(struct cpu_info *ci) { panic("npxdna vector not initialized"); } -int (*npxdna_func)(struct proc *) = npxdna_notset; -int npxdna_s87(struct proc *); +int (*npxdna_func)(struct cpu_info *) = npxdna_notset; +int npxdna_s87(struct cpu_info *); #ifdef I686_CPU -int npxdna_xmm(struct proc *); +int npxdna_xmm(struct cpu_info *); #endif /* I686_CPU */ void npxexit(void); @@ -350,6 +341,19 @@ asm (".text\n\t" "popl %eax\n\t" "ret\n\t"); +void +npxinit(struct cpu_info *ci) +{ + lcr0(rcr0() & ~(CR0_EM|CR0_TS)); + fninit(); + if (npx586bug1(4195835, 3145727) != 0) { + i386_fpu_fdivbug = 1; + printf("%s: WARNING: Pentium FDIV bug detected!\n", + ci->ci_dev.dv_xname); + } + lcr0(rcr0() | (CR0_TS)); +} + /* * Attach routine - announce which it is, and wire into system */ @@ -379,13 +383,7 @@ npxattach(parent, self, aux) return; } - lcr0(rcr0() & ~(CR0_EM|CR0_TS)); - fninit(); - if (npx586bug1(4195835, 3145727) != 0) { - i386_fpu_fdivbug = 1; - printf("WARNING: Pentium FDIV bug detected!\n"); - } - lcr0(rcr0() | (CR0_TS)); + npxinit(&cpu_info_primary); i386_fpu_present = 1; #ifdef I686_CPU @@ -415,16 +413,17 @@ int npxintr(arg) void *arg; { - register struct proc *p = npxproc; + struct cpu_info *ci = curcpu(); + struct proc *p = ci->ci_fpcurproc; union savefpu *addr; struct intrframe *frame = arg; int code; union sigval sv; uvmexp.traps++; - iprintf(("Intr")); + IPRINTF(("%s: fp intr\n", ci->ci_dev.dv_xname)); - if (p == 0 || npx_type == NPX_NONE) { + if (p == NULL || npx_type == NPX_NONE) { /* XXX no %p in stand/printf.c. Cast to quiet gcc -Wall. */ printf("npxintr: p = %lx, curproc = %lx, npx_type = %d\n", (u_long) p, (u_long) curproc, npx_type); @@ -438,11 +437,21 @@ npxintr(arg) * If we're saving, ignore the interrupt. The FPU will happily * generate another one when we restore the state later. */ - if (npx_nointr != 0) + if (ci->ci_fpsaving) return (1); + +#ifdef DIAGNOSTIC /* - * Find the address of npxproc's savefpu. This is not necessarily - * the one in curpcb. + * At this point, fpcurproc should be curproc. If it wasn't, the TS + * bit should be set, and we should have gotten a DNA exception. + */ + if (p != curproc) + panic("npxintr: wrong process"); +#endif + + /* + * Find the address of fpcurproc's saved FPU state. (Given the + * invariant above, this is always the one in curpcb.) */ addr = &p->p_addr->u_pcb.pcb_savefpu; /* @@ -504,7 +513,7 @@ npxintr(arg) if (addr->sv_87.sv_ex_sw & EN_SW_IE) code = FPE_FLTINV; #ifdef notyet - else if (addr->sv_ex_sw & EN_SW_DE) + else if (addr->sv_87.sv_ex_sw & EN_SW_DE) code = FPE_FLTDEN; #endif else if (addr->sv_87.sv_ex_sw & EN_SW_ZE) @@ -541,115 +550,147 @@ npxintr(arg) } /* - * Wrapper for fnsave instruction to handle h/w bugs. If there is an error - * pending, then fnsave generates a bogus IRQ13 on some systems. Force any - * IRQ13 to be handled immediately, and then ignore it. - * - * This routine is always called at spl0. If it might called with the NPX - * interrupt masked, it would be necessary to forcibly unmask the NPX interrupt - * so that it could succeed. - */ -static __inline void -npxsave1(void) -{ - register struct pcb *pcb; - - npx_nointr = 1; - pcb = &npxproc->p_addr->u_pcb; - fpu_save(&pcb->pcb_savefpu); - pcb->pcb_cr0 |= CR0_TS; - fwait(); - npx_nointr = 0; -} - -/* * Implement device not available (DNA) exception * - * If the we were the last process to use the FPU, we can simply return. + * If we were the last process to use the FPU, we can simply return. * Otherwise, we save the previous state, if necessary, and restore our last * saved state. */ + +/* + * XXX It is unclear if the code below is correct in the multiprocessor + * XXX case. Check the NetBSD sources once again to be sure. + */ #ifdef I686_CPU int -npxdna_xmm(struct proc *p) +npxdna_xmm(struct cpu_info *ci) { + struct proc *p; + int s; -#ifdef DIAGNOSTIC - if (cpl != 0 || npx_nointr != 0) - panic("npxdna: masked"); + if (ci->ci_fpsaving) { + printf("recursive npx trap; cr0=%x\n", rcr0()); + return (0); + } + + s = splipi(); /* lock out IPI's while we clean house.. */ + +#ifdef MULTIPROCESSOR + p = ci->ci_curproc; +#else + p = curproc; #endif - p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; - clts(); + IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev.dv_xname, (u_long)p, + (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : "")); /* + * XXX should have a fast-path here when no save/restore is necessary + */ + /* * Initialize the FPU state to clear any exceptions. If someone else * was using the FPU, save their state (which does an implicit * initialization). */ - npx_nointr = 1; - if (npxproc != 0 && npxproc != p) { - IPRINTF(("Save")); - npxsave1(); + if (ci->ci_fpcurproc != NULL) { + IPRINTF(("%s: fp save %lx\n", ci->ci_dev.dv_xname, + (u_long)ci->ci_fpcurproc)); + npxsave_cpu(ci, 1); } else { - IPRINTF(("Init")); - fninit(); - fwait(); + clts(); + IPRINTF(("%s: fp init\n", ci->ci_dev.dv_xname)); + fninit(); + fwait(); + stts(); } - npx_nointr = 0; - npxproc = p; + splx(s); + + IPRINTF(("%s: done saving\n", ci->ci_dev.dv_xname)); + KDASSERT(ci->ci_fpcurproc == NULL); +#ifndef MULTIPROCESSOR + KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL); +#else + if (p->p_addr->u_pcb.pcb_fpcpu != NULL) + npxsave_proc(p, 1); +#endif + p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; + clts(); + s = splipi(); + ci->ci_fpcurproc = p; + p->p_addr->u_pcb.pcb_fpcpu = ci; + splx(s); if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm.sv_env.en_cw); p->p_md.md_flags |= MDP_USEDFPU; - } else + } else { fxrstor(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm); + } return (1); } #endif /* I686_CPU */ int -npxdna_s87(struct proc *p) +npxdna_s87(struct cpu_info *ci) { - static u_short control = __INITIAL_NPXCW__; + struct proc *p; + int s; + + KDASSERT(i386_use_fxsave == 0); - if (npx_type == NPX_NONE) { - iprintf(("Emul")); + if (ci->ci_fpsaving) { + printf("recursive npx trap; cr0=%x\n", rcr0()); return (0); } -#ifdef DIAGNOSTIC - if (cpl != IPL_NONE || npx_nointr != 0) - panic("npxdna: masked"); + s = splipi(); /* lock out IPI's while we clean house.. */ +#ifdef MULTIPROCESSOR + p = ci->ci_curproc; +#else + p = curproc; #endif + IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev.dv_xname, (u_long)p, + (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : "")); + + /* + * If someone else was using our FPU, save their state (which does an + * implicit initialization); otherwise, initialize the FPU state to + * clear any exceptions. + */ + if (ci->ci_fpcurproc != NULL) { + IPRINTF(("%s: fp save %lx\n", ci->ci_dev.dv_xname, + (u_long)ci->ci_fpcurproc)); + npxsave_cpu(ci, 1); + } else { + clts(); + IPRINTF(("%s: fp init\n", ci->ci_dev.dv_xname)); + fninit(); + fwait(); + stts(); + } + splx(s); + + IPRINTF(("%s: done saving\n", ci->ci_dev.dv_xname)); + KDASSERT(ci->ci_fpcurproc == NULL); +#ifndef MULTIPROCESSOR + KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL); +#else + if (p->p_addr->u_pcb.pcb_fpcpu != NULL) + npxsave_proc(p, 1); +#endif p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; clts(); + s = splipi(); + ci->ci_fpcurproc = p; + p->p_addr->u_pcb.pcb_fpcpu = ci; + splx(s); if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { + fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_87.sv_env.en_cw); p->p_md.md_flags |= MDP_USEDFPU; - iprintf(("Init")); - if (npxproc != 0 && npxproc != p) - npxsave1(); - else { - npx_nointr = 1; - fninit(); - fwait(); - npx_nointr = 0; - } - npxproc = p; - fldcw(&control); } else { - if (npxproc != 0) { -#ifdef DIAGNOSTIC - if (npxproc == p) - panic("npxdna: same process"); -#endif - iprintf(("Save")); - npxsave1(); - } - npxproc = p; /* * The following frstor may cause an IRQ13 when the state being * restored has a pending error. The error will appear to have @@ -670,37 +711,121 @@ npxdna_s87(struct proc *p) } /* - * Drop the current FPU state on the floor. + * The FNSAVE instruction clears the FPU state. Rather than reloading the FPU + * immediately, we clear fpcurproc and turn on CR0_TS to force a DNA and a + * reload of the FPU state the next time we try to use it. This routine + * is only called when forking, core dumping, or debugging, or swapping, + * so the lazy reload at worst forces us to trap once per fork(), and at best + * saves us a reload once per fork(). */ void -npxdrop() +npxsave_cpu(struct cpu_info *ci, int save) { + struct proc *p; + int s; + + KDASSERT(ci == curcpu()); + + p = ci->ci_fpcurproc; + if (p == NULL) + return; + + IPRINTF(("%s: fp cpu %s %lx\n", ci->ci_dev.dv_xname, + save ? "save" : "flush", (u_long)p)); + + if (save) { +#ifdef DIAGNOSTIC + if (ci->ci_fpsaving != 0) + panic("npxsave_cpu: recursive save!"); +#endif + /* + * Set ci->ci_fpsaving, so that any pending exception will be + * thrown away. (It will be caught again if/when the FPU + * state is restored.) + * + * XXX on i386 and earlier, this routine should always be + * called at spl0; if it might called with the NPX interrupt + * masked, it would be necessary to forcibly unmask the NPX + * interrupt so that it could succeed. + * XXX this is irrelevant on 486 and above (systems + * which report FP failures via traps rather than irq13). + * XXX punting for now.. + */ + clts(); + ci->ci_fpsaving = 1; + fpu_save(&p->p_addr->u_pcb.pcb_savefpu); + ci->ci_fpsaving = 0; + /* It is unclear if this is needed. */ + fwait(); + } + /* + * We set the TS bit in the saved CR0 for this process, so that it + * will get a DNA exception on any FPU instruction and force a reload. + */ stts(); - npxproc->p_addr->u_pcb.pcb_cr0 |= CR0_TS; - npxproc = 0; + p->p_addr->u_pcb.pcb_cr0 |= CR0_TS; + + s = splipi(); + p->p_addr->u_pcb.pcb_fpcpu = NULL; + ci->ci_fpcurproc = NULL; + splx(s); } /* - * Save npxproc's FPU state. - * - * The FNSAVE instruction clears the FPU state. Rather than reloading the FPU - * immediately, we clear npxproc and turn on CR0_TS to force a DNA and a reload - * of the FPU state the next time we try to use it. This routine is only - * called when forking or core dump, so this algorithm at worst forces us to - * trap once per fork(), and at best saves us a reload once per fork(). + * Save p's FPU state, which may be on this processor or another processor. */ -void -npxsave() + void +npxsave_proc(struct proc *p, int save) { + struct cpu_info *ci = curcpu(); + struct cpu_info *oci; + + KDASSERT(p->p_addr != NULL); + KDASSERT(p->p_flag & P_INMEM); + oci = p->p_addr->u_pcb.pcb_fpcpu; + if (oci == NULL) + return; + + IPRINTF(("%s: fp proc %s %lx\n", ci->ci_dev.dv_xname, + save ? "save" : "flush", (u_long)p)); + +#if defined(MULTIPROCESSOR) + if (oci == ci) { + int s = splipi(); + npxsave_cpu(ci, save); + splx(s); + } else { #ifdef DIAGNOSTIC - if (cpl != IPL_NONE || npx_nointr != 0) - panic("npxsave: masked"); + int spincount; +#endif + + IPRINTF(("%s: fp ipi to %s %s %lx\n", ci->ci_dev.dv_xname, + oci->ci_dev.dv_xname, save ? "save" : "flush", (u_long)p)); + + i386_send_ipi(oci, + save ? I386_IPI_SYNCH_FPU : I386_IPI_FLUSH_FPU); + +#ifdef DIAGNOSTIC + spincount = 0; +#endif + while (p->p_addr->u_pcb.pcb_fpcpu != NULL) +#ifdef DIAGNOSTIC + { + spincount++; + if (spincount > 100000000) { + panic("fp_save ipi didn't"); + } + } +#else + __splbarrier(); /* XXX replace by generic barrier */ + ; +#endif + } +#else + KASSERT(ci->ci_fpcurproc == p); + npxsave_cpu(ci, save); #endif - iprintf(("Fork")); - clts(); - npxsave1(); - stts(); - npxproc = 0; } + diff --git a/sys/arch/i386/pci/pci_intr_fixup.c b/sys/arch/i386/pci/pci_intr_fixup.c index 30b47f83807..ea0cfd009f1 100644 --- a/sys/arch/i386/pci/pci_intr_fixup.c +++ b/sys/arch/i386/pci/pci_intr_fixup.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pci_intr_fixup.c,v 1.32 2004/02/24 19:30:00 markus Exp $ */ +/* $OpenBSD: pci_intr_fixup.c,v 1.33 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: pci_intr_fixup.c,v 1.10 2000/08/10 21:18:27 soda Exp $ */ /* @@ -100,12 +100,12 @@ #include <machine/bus.h> #include <machine/intr.h> +#include <machine/i8259.h> #include <dev/pci/pcireg.h> #include <dev/pci/pcivar.h> #include <dev/pci/pcidevs.h> -#include <i386/isa/icu.h> #include <i386/pci/pcibiosvar.h> struct pciintr_link_map { diff --git a/sys/arch/i386/pci/pci_machdep.c b/sys/arch/i386/pci/pci_machdep.c index 1d05bf4a202..4ccb6f0e296 100644 --- a/sys/arch/i386/pci/pci_machdep.c +++ b/sys/arch/i386/pci/pci_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pci_machdep.c,v 1.24 2003/05/04 08:01:08 deraadt Exp $ */ +/* $OpenBSD: pci_machdep.c,v 1.25 2004/06/13 21:49:16 niklas Exp $ */ /* $NetBSD: pci_machdep.c,v 1.28 1997/06/06 23:29:17 thorpej Exp $ */ /*- @@ -92,6 +92,7 @@ #define _I386_BUS_DMA_PRIVATE #include <machine/bus.h> #include <machine/pio.h> +#include <machine/i8259.h> #include "bios.h" #if NBIOS > 0 @@ -99,11 +100,17 @@ extern bios_pciinfo_t *bios_pciinfo; #endif -#include <i386/isa/icu.h> #include <dev/isa/isavar.h> #include <dev/pci/pcivar.h> #include <dev/pci/pcireg.h> +#include "ioapic.h" + +#if NIOAPIC > 0 +#include <machine/i82093var.h> +#include <machine/mpbiosvar.h> +#endif + #include "pcibios.h" #if NPCIBIOS > 0 #include <i386/pci/pcibiosvar.h> @@ -408,6 +415,11 @@ pci_intr_map(pa, ihp) struct pci_attach_args *pa; pci_intr_handle_t *ihp; { +#if NIOAPIC > 0 + struct mp_intr_map *mip; + int bus, dev, func; +#endif + #if NPCIBIOS > 0 pci_chipset_tag_t pc = pa->pa_pc; pcitag_t intrtag = pa->pa_intrtag; @@ -459,6 +471,49 @@ pci_intr_map(pa, ihp) line = 9; } } +#if NIOAPIC > 0 + pci_decompose_tag (pc, intrtag, &bus, &dev, &func); + + if (mp_busses != NULL) { + /* + * Assumes 1:1 mapping between PCI bus numbers and + * the numbers given by the MP bios. + * XXX Is this a valid assumption? + */ + int mpspec_pin = (dev<<2)|(pin-1); + + for (mip = mp_busses[bus].mb_intrs; mip != NULL; mip=mip->next) { + if (mip->bus_pin == mpspec_pin) { + ihp->line = mip->ioapic_ih | line; + return 0; + } + } + if (mip == NULL && mp_isa_bus != -1) { + for (mip = mp_busses[mp_isa_bus].mb_intrs; mip != NULL; + mip=mip->next) { + if (mip->bus_pin == line) { + ihp->line = mip->ioapic_ih | line; + return 0; + } + } + } + if (mip == NULL && mp_eisa_bus != -1) { + for (mip = mp_busses[mp_eisa_bus].mb_intrs; + mip != NULL; mip=mip->next) { + if (mip->bus_pin == line) { + ihp->line = mip->ioapic_ih | line; + return 0; + } + } + } + if (mip == NULL) { + printf("pci_intr_map: " + "bus %d dev %d func %d pin %d; line %d\n", + bus, dev, func, pin, line); + printf("pci_intr_map: no MP mapping found\n"); + } + } +#endif return 0; @@ -472,14 +527,22 @@ pci_intr_string(pc, ih) pci_chipset_tag_t pc; pci_intr_handle_t ih; { - static char irqstr[8]; /* 4 + 2 + NULL + sanity */ + static char irqstr[64]; - if (ih.line == 0 || ih.line >= ICU_LEN || ih.line == 2) + if (ih.line == 0 || (ih.line & 0xff) >= ICU_LEN || ih.line == 2) panic("pci_intr_string: bogus handle 0x%x", ih.line); +#if NIOAPIC > 0 + if (ih.line & APIC_INT_VIA_APIC) { + snprintf(irqstr, sizeof irqstr, "apic %d int %d (irq %d)", + APIC_IRQ_APIC(ih.line), APIC_IRQ_PIN(ih.line), + ih.line & 0xff); + return (irqstr); + } +#endif + snprintf(irqstr, sizeof irqstr, "irq %d", ih.line); return (irqstr); - } void * @@ -492,16 +555,21 @@ pci_intr_establish(pc, ih, level, func, arg, what) { void *ret; +#if NIOAPIC > 0 + if (ih.line != -1 && ih.line & APIC_INT_VIA_APIC) + return (apic_intr_establish(ih.line, IST_LEVEL, level, func, + arg, what)); +#endif if (ih.line == 0 || ih.line >= ICU_LEN || ih.line == 2) panic("pci_intr_establish: bogus handle 0x%x", ih.line); - ret = isa_intr_establish(NULL, ih.line, - IST_LEVEL, level, func, arg, what); + ret = isa_intr_establish(NULL, ih.line, IST_LEVEL, level, func, arg, + what); #if NPCIBIOS > 0 if (ret) pci_intr_route_link(pc, &ih); #endif - return ret; + return (ret); } void @@ -510,5 +578,5 @@ pci_intr_disestablish(pc, cookie) void *cookie; { /* XXX oh, unroute the pci int link? */ - return isa_intr_disestablish(NULL, cookie); + return (isa_intr_disestablish(NULL, cookie)); } diff --git a/sys/arch/m88k/include/cpu.h b/sys/arch/m88k/include/cpu.h index 213c4a7d672..992d11f0cb5 100644 --- a/sys/arch/m88k/include/cpu.h +++ b/sys/arch/m88k/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.1 2004/04/26 12:34:05 miod Exp $ */ +/* $OpenBSD: cpu.h,v 1.2 2004/06/13 21:49:17 niklas Exp $ */ /* * Copyright (c) 1996 Nivas Madhur * Copyright (c) 1992, 1993 @@ -103,7 +103,7 @@ extern int want_ast; * or after the current trap/syscall if in system mode. */ extern int want_resched; /* resched() was called */ -#define need_resched() (want_resched = 1, want_ast = 1) +#define need_resched(ci) (want_resched = 1, want_ast = 1) /* * Give a profiling tick to the current process when the user profiling diff --git a/sys/arch/m88k/m88k/genassym.cf b/sys/arch/m88k/m88k/genassym.cf index 9c7329013c3..1f533586cd7 100644 --- a/sys/arch/m88k/m88k/genassym.cf +++ b/sys/arch/m88k/m88k/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.1 2004/04/29 14:33:27 miod Exp $ +# $OpenBSD: genassym.cf,v 1.2 2004/06/13 21:49:17 niklas Exp $ # # Copyright (c) 1982, 1990 The Regents of the University of California. # All rights reserved. @@ -28,7 +28,7 @@ # SUCH DAMAGE. # # @(#)genassym.c 7.8 (Berkeley) 5/7/91 -# $Id: genassym.cf,v 1.1 2004/04/29 14:33:27 miod Exp $ +# $Id: genassym.cf,v 1.2 2004/06/13 21:49:17 niklas Exp $ # include <sys/param.h> @@ -53,6 +53,7 @@ member p_stat member p_wchan export SRUN +export SONPROC # general constants export UPAGES diff --git a/sys/arch/m88k/m88k/process.S b/sys/arch/m88k/m88k/process.S index 9c460e544da..aef28aca221 100644 --- a/sys/arch/m88k/m88k/process.S +++ b/sys/arch/m88k/m88k/process.S @@ -1,4 +1,4 @@ -/* $OpenBSD: process.S,v 1.1 2004/04/29 14:33:27 miod Exp $ */ +/* $OpenBSD: process.S,v 1.2 2004/06/13 21:49:17 niklas Exp $ */ /* * Copyright (c) 1996 Nivas Madhur * All rights reserved. @@ -215,6 +215,8 @@ ASLOCAL(Ldoneloop) or.u r11, r0, hi16(_C_LABEL(curproc)) st r9, r11,lo16(_C_LABEL(curproc)) /* curproc = p */ + or r2, r0, SONPROC + st.b r2, r9, P_STAT /* r9 is curproc */ st r0, r9, P_BACK /* p->p_back = 0 */ diff --git a/sys/arch/mac68k/include/cpu.h b/sys/arch/mac68k/include/cpu.h index ed14fadfadf..5fdded9e731 100644 --- a/sys/arch/mac68k/include/cpu.h +++ b/sys/arch/mac68k/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.29 2004/05/20 09:20:42 kettenis Exp $ */ +/* $OpenBSD: cpu.h,v 1.30 2004/06/13 21:49:17 niklas Exp $ */ /* $NetBSD: cpu.h,v 1.45 1997/02/10 22:13:40 scottr Exp $ */ /* @@ -106,7 +106,7 @@ struct clockframe { * or after the current trap/syscall if in system mode. */ extern int want_resched; /* resched() was called */ -#define need_resched() { want_resched++; aston(); } +#define need_resched(ci) { want_resched++; aston(); } /* * Give a profiling tick to the current process from the softclock diff --git a/sys/arch/macppc/macppc/genassym.cf b/sys/arch/macppc/macppc/genassym.cf index 45d05ddaa5e..a8bce6e5b4f 100644 --- a/sys/arch/macppc/macppc/genassym.cf +++ b/sys/arch/macppc/macppc/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.9 2003/10/16 05:03:22 deraadt Exp $ +# $OpenBSD: genassym.cf,v 1.10 2004/06/13 21:49:18 niklas Exp $ # # Copyright (c) 1982, 1990 The Regents of the University of California. # All rights reserved. @@ -71,3 +71,6 @@ struct proc member p_forw member p_back member p_addr +member p_stat + +export SONPROC diff --git a/sys/arch/macppc/macppc/locore.S b/sys/arch/macppc/macppc/locore.S index fe21293bff7..4536ffd14d6 100644 --- a/sys/arch/macppc/macppc/locore.S +++ b/sys/arch/macppc/macppc/locore.S @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.S,v 1.15 2004/01/03 00:57:06 pvalchev Exp $ */ +/* $OpenBSD: locore.S,v 1.16 2004/06/13 21:49:18 niklas Exp $ */ /* $NetBSD: locore.S,v 1.2 1996/10/16 19:33:09 ws Exp $ */ /* @@ -247,6 +247,9 @@ _C_LABEL(sw1): lis %r4,_C_LABEL(curproc)@ha stw %r31,_C_LABEL(curproc)@l(%r4) /* record new process */ + li %r3,SONPROC + stb %r3,P_STAT(%r31) + mfmsr %r3 ori %r3,%r3,PSL_EE /* Now we can interrupt again */ mtmsr %r3 diff --git a/sys/arch/mvme68k/include/cpu.h b/sys/arch/mvme68k/include/cpu.h index 091cd1aec50..5f3aeb08da3 100644 --- a/sys/arch/mvme68k/include/cpu.h +++ b/sys/arch/mvme68k/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.17 2004/05/20 09:20:42 kettenis Exp $ */ +/* $OpenBSD: cpu.h,v 1.18 2004/06/13 21:49:18 niklas Exp $ */ /* * Copyright (c) 1995 Theo de Raadt @@ -127,7 +127,7 @@ struct clockframe { * or after the current trap/syscall if in system mode. */ extern int want_resched; -#define need_resched() { want_resched = 1; aston(); } +#define need_resched(ci) { want_resched = 1; aston(); } /* * Give a profiling tick to the current process when the user profiling diff --git a/sys/arch/powerpc/include/cpu.h b/sys/arch/powerpc/include/cpu.h index f80c925b2a6..9fcae7e00b8 100644 --- a/sys/arch/powerpc/include/cpu.h +++ b/sys/arch/powerpc/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.18 2004/02/14 15:09:22 grange Exp $ */ +/* $OpenBSD: cpu.h,v 1.19 2004/06/13 21:49:19 niklas Exp $ */ /* $NetBSD: cpu.h,v 1.1 1996/09/30 16:34:21 ws Exp $ */ /* @@ -51,7 +51,7 @@ void delay(unsigned); extern volatile int want_resched; extern volatile int astpending; -#define need_resched() (want_resched = 1, astpending = 1) +#define need_resched(ci) (want_resched = 1, astpending = 1) #define need_proftick(p) ((p)->p_flag |= P_OWEUPC, astpending = 1) #define signotify(p) (astpending = 1) diff --git a/sys/arch/sparc/include/cpu.h b/sys/arch/sparc/include/cpu.h index a958a69c803..a1ca691938a 100644 --- a/sys/arch/sparc/include/cpu.h +++ b/sys/arch/sparc/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.20 2003/06/02 23:27:54 millert Exp $ */ +/* $OpenBSD: cpu.h,v 1.21 2004/06/13 21:49:19 niklas Exp $ */ /* $NetBSD: cpu.h,v 1.24 1997/03/15 22:25:15 pk Exp $ */ /* @@ -129,7 +129,7 @@ extern void raise(int, int); * or after the current trap/syscall if in system mode. */ extern int want_resched; /* resched() was called */ -#define need_resched() (want_resched = 1, want_ast = 1) +#define need_resched(ci) (want_resched = 1, want_ast = 1) extern int want_ast; /* diff --git a/sys/arch/sparc/include/psl.h b/sys/arch/sparc/include/psl.h index 6bb6785e66b..af175bba370 100644 --- a/sys/arch/sparc/include/psl.h +++ b/sys/arch/sparc/include/psl.h @@ -1,4 +1,4 @@ -/* $OpenBSD: psl.h,v 1.18 2003/06/02 23:27:54 millert Exp $ */ +/* $OpenBSD: psl.h,v 1.19 2004/06/13 21:49:20 niklas Exp $ */ /* $NetBSD: psl.h,v 1.12 1997/03/10 21:49:11 pk Exp $ */ /* @@ -95,6 +95,7 @@ */ #define IPL_AUHARD 13 /* hard audio interrupts */ #define IPL_STATCLOCK 14 /* statclock() */ +#define IPL_HIGH 15 /* splhigh() */ #if defined(_KERNEL) && !defined(_LOCORE) diff --git a/sys/arch/sparc/sparc/genassym.cf b/sys/arch/sparc/sparc/genassym.cf index 3281b9e502d..71660d9efde 100644 --- a/sys/arch/sparc/sparc/genassym.cf +++ b/sys/arch/sparc/sparc/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.14 2003/06/02 23:27:55 millert Exp $ +# $OpenBSD: genassym.cf,v 1.15 2004/06/13 21:49:20 niklas Exp $ # $NetBSD: genassym.cf,v 1.2 1997/06/28 19:59:04 pk Exp $ # @@ -84,6 +84,7 @@ member p_wchan member p_vmspace export SRUN +export SONPROC # VM structure fields define VM_PMAP offsetof(struct vmspace, vm_map.pmap) diff --git a/sys/arch/sparc/sparc/locore.s b/sys/arch/sparc/sparc/locore.s index 5dbc64a1ab3..026c076cde2 100644 --- a/sys/arch/sparc/sparc/locore.s +++ b/sys/arch/sparc/sparc/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.58 2004/06/08 18:06:53 art Exp $ */ +/* $OpenBSD: locore.s,v 1.59 2004/06/13 21:49:20 niklas Exp $ */ /* $NetBSD: locore.s,v 1.73 1997/09/13 20:36:48 pk Exp $ */ /* @@ -4638,6 +4638,8 @@ Lsw_scan: * Committed to running process p. * It may be the same as the one we were running before. */ + mov SONPROC, %o0 ! p->p_stat = SONPROC + stb %o0, [%g3 + P_STAT] sethi %hi(_C_LABEL(want_resched)), %o0 st %g0, [%o0 + %lo(_C_LABEL(want_resched))] ! want_resched = 0; ld [%g3 + P_ADDR], %g5 ! newpcb = p->p_addr; diff --git a/sys/arch/sparc64/include/cpu.h b/sys/arch/sparc64/include/cpu.h index 9d73cd48b84..278f7a3031b 100644 --- a/sys/arch/sparc64/include/cpu.h +++ b/sys/arch/sparc64/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.24 2003/07/10 15:26:54 jason Exp $ */ +/* $OpenBSD: cpu.h,v 1.25 2004/06/13 21:49:20 niklas Exp $ */ /* $NetBSD: cpu.h,v 1.28 2001/06/14 22:56:58 thorpej Exp $ */ /* @@ -75,33 +75,7 @@ #include <machine/reg.h> #include <machine/intr.h> -/*#include <sys/sched.h> */ - -/* - * CPU states. - * XXX Not really scheduler state, but no other good place to put - * it right now, and it really is per-CPU. - */ -#define CP_USER 0 -#define CP_NICE 1 -#define CP_SYS 2 -#define CP_INTR 3 -#define CP_IDLE 4 -#define CPUSTATES 5 - -/* - * Per-CPU scheduler state. - */ -struct schedstate_percpu { - struct timeval spc_runtime; /* time curproc started running */ - __volatile int spc_flags; /* flags; see below */ - u_int spc_schedticks; /* ticks for schedclock() */ - u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */ - u_char spc_curpriority; /* usrpri of curproc */ - int spc_rrticks; /* ticks until roundrobin() */ - int spc_pscnt; /* prof/stat counter */ - int spc_psdiv; /* prof/stat divisor */ -}; +#include <sys/proc.h> /* * The cpu_info structure is part of a 64KB structure mapped both the kernel @@ -203,7 +177,7 @@ extern int want_ast; * or after the current trap/syscall if in system mode. */ extern int want_resched; /* resched() was called */ -#define need_resched() (want_resched = 1, want_ast = 1) +#define need_resched(ci) (want_resched = 1, want_ast = 1) /* * Give a profiling tick to the current process when the user profiling diff --git a/sys/arch/sparc64/sparc64/genassym.cf b/sys/arch/sparc64/sparc64/genassym.cf index f2c55d313b1..cde690d07d1 100644 --- a/sys/arch/sparc64/sparc64/genassym.cf +++ b/sys/arch/sparc64/sparc64/genassym.cf @@ -132,6 +132,7 @@ member p_pid member P_FPSTATE p_md.md_fpstate export SRUN +export SONPROC # user structure fields define USIZ sizeof(struct user) diff --git a/sys/arch/sparc64/sparc64/locore.s b/sys/arch/sparc64/sparc64/locore.s index 2705d2b30cc..b6fdf4746aa 100644 --- a/sys/arch/sparc64/sparc64/locore.s +++ b/sys/arch/sparc64/sparc64/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.44 2004/04/23 04:18:17 marc Exp $ */ +/* $OpenBSD: locore.s,v 1.45 2004/06/13 21:49:21 niklas Exp $ */ /* $NetBSD: locore.s,v 1.137 2001/08/13 06:10:10 jdolecek Exp $ */ /* @@ -5879,10 +5879,8 @@ Lsw_scan: * p->p_cpu = curcpu(); */ #endif /* defined(MULTIPROCESSOR) */ -#ifdef notyet mov SONPROC, %o0 ! p->p_stat = SONPROC stb %o0, [%l3 + P_STAT] -#endif /* notyet */ sethi %hi(_C_LABEL(want_resched)), %o0 st %g0, [%o0 + %lo(_C_LABEL(want_resched))] ! want_resched = 0; ldx [%l3 + P_ADDR], %l1 ! newpcb = p->p_addr; diff --git a/sys/arch/vax/include/cpu.h b/sys/arch/vax/include/cpu.h index 03bd14a720d..a0df493ff3e 100644 --- a/sys/arch/vax/include/cpu.h +++ b/sys/arch/vax/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.15 2003/11/10 21:05:06 miod Exp $ */ +/* $OpenBSD: cpu.h,v 1.16 2004/06/13 21:49:21 niklas Exp $ */ /* $NetBSD: cpu.h,v 1.41 1999/10/21 20:01:36 ragge Exp $ */ /* @@ -88,7 +88,7 @@ extern int bootdev; * or after the current trap/syscall if in system mode. */ -#define need_resched(){ \ +#define need_resched(ci){ \ want_resched++; \ mtpr(AST_OK,PR_ASTLVL); \ } diff --git a/sys/arch/vax/vax/genassym.cf b/sys/arch/vax/vax/genassym.cf index 7e2b63bd89b..209fb80ecd1 100644 --- a/sys/arch/vax/vax/genassym.cf +++ b/sys/arch/vax/vax/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.4 2002/01/23 23:24:40 miod Exp $ +# $OpenBSD: genassym.cf,v 1.5 2004/06/13 21:49:22 niklas Exp $ # $NetBSD: genassym.cf,v 1.10 1999/11/19 22:09:55 ragge Exp $ # # Copyright (c) 1997 Ludd, University of Lule}, Sweden. @@ -48,6 +48,9 @@ struct proc member p_priority member p_addr member p_vmspace +member p_stat + +export SONPROC struct pcb member P0BR diff --git a/sys/arch/vax/vax/subr.s b/sys/arch/vax/vax/subr.s index 89d6e993326..0efb63df499 100644 --- a/sys/arch/vax/vax/subr.s +++ b/sys/arch/vax/vax/subr.s @@ -1,4 +1,4 @@ -/* $OpenBSD: subr.s,v 1.19 2003/11/07 10:16:45 jmc Exp $ */ +/* $OpenBSD: subr.s,v 1.20 2004/06/13 21:49:22 niklas Exp $ */ /* $NetBSD: subr.s,v 1.32 1999/03/25 00:41:48 mrg Exp $ */ /* @@ -306,6 +306,7 @@ noque: .asciz "swtch" bbsc r3,_whichqs,2f # no, clear bit in whichqs 2: clrl 4(r2) # clear proc backpointer clrl _want_resched # we are now changing process + movb $SONPROC,P_STAT(r2) # p->p_stat = SONPROC movl r2,_curproc # set new process running cmpl r0,r2 # Same process? bneq 1f # No, continue diff --git a/sys/compat/linux/linux_sched.c b/sys/compat/linux/linux_sched.c index 4d47196b37f..c5b1b008db9 100644 --- a/sys/compat/linux/linux_sched.c +++ b/sys/compat/linux/linux_sched.c @@ -1,4 +1,4 @@ -/* $OpenBSD: linux_sched.c,v 1.3 2001/11/06 18:41:10 art Exp $ */ +/* $OpenBSD: linux_sched.c,v 1.4 2004/06/13 21:49:23 niklas Exp $ */ /* $NetBSD: linux_sched.c,v 1.6 2000/05/28 05:49:05 thorpej Exp $ */ /*- @@ -272,7 +272,7 @@ linux_sys_sched_yield(cp, v, retval) void *v; register_t *retval; { - need_resched(); + need_resched(curcpu()); return (0); } diff --git a/sys/ddb/db_command.c b/sys/ddb/db_command.c index f62fff34850..12477e0b084 100644 --- a/sys/ddb/db_command.c +++ b/sys/ddb/db_command.c @@ -1,4 +1,4 @@ -/* $OpenBSD: db_command.c,v 1.35 2004/04/25 03:21:50 itojun Exp $ */ +/* $OpenBSD: db_command.c,v 1.36 2004/06/13 21:49:23 niklas Exp $ */ /* $NetBSD: db_command.c,v 1.20 1996/03/30 22:30:05 christos Exp $ */ /* @@ -60,6 +60,11 @@ boolean_t db_cmd_loop_done; label_t *db_recover; +#ifdef MULTIPROCESSOR +boolean_t db_switch_cpu; +long db_switch_to_cpu; +#endif + /* * if 'ed' style: 'dot' is set at start of last item printed, * and '+' points to next line. @@ -504,6 +509,11 @@ db_command_loop() label_t *savejmp; extern int db_output_line; +#ifdef MULTIPROCESSOR + db_switch_cpu = 0; + db_enter_ddb(); +#endif /* MULTIPROCESSOR */ + /* * Initialize 'prev' and 'next' to dot. */ @@ -517,17 +527,35 @@ db_command_loop() (void) setjmp(&db_jmpbuf); while (!db_cmd_loop_done) { + if (db_print_position() != 0) db_printf("\n"); db_output_line = 0; +#ifdef MULTIPROCESSOR + db_printf("ddb{%ld}> ", (long)cpu_number()); +#else db_printf("ddb> "); +#endif (void) db_read_line(); db_command(&db_last_command, db_command_table); } db_recover = savejmp; + +#ifdef MULTIPROCESSOR + if (db_switch_cpu) { + db_printf("Moving ddb to cpu %d\n", db_switch_to_cpu); + curcpu()->ci_ddb_paused = CI_DDB_STOPPED; + db_movetocpu(db_switch_to_cpu); + while (curcpu()->ci_ddb_paused == CI_DDB_SHOULDSTOP + || curcpu()->ci_ddb_paused == CI_DDB_STOPPED) + ; /* Do nothing */ + } else { + db_leave_ddb(); + } +#endif /* MULTIPROCESSOR */ } void diff --git a/sys/dev/isa/aria.c b/sys/dev/isa/aria.c index 2da86c3bb26..a73d9805f4b 100644 --- a/sys/dev/isa/aria.c +++ b/sys/dev/isa/aria.c @@ -1,4 +1,4 @@ -/* $OpenBSD: aria.c,v 1.10 2004/01/09 21:32:23 brad Exp $ */ +/* $OpenBSD: aria.c,v 1.11 2004/06/13 21:49:24 niklas Exp $ */ /* * Copyright (c) 1995, 1996 Roland C. Dowdeswell. All rights reserved. @@ -76,7 +76,6 @@ #include <dev/mulaw.h> #include <dev/isa/isavar.h> #include <dev/isa/isadmavar.h> -#include <i386/isa/icu.h> #include <dev/isa/ariareg.h> diff --git a/sys/dev/isa/gus.c b/sys/dev/isa/gus.c index 649e7c11d7d..ff9e0ffef32 100644 --- a/sys/dev/isa/gus.c +++ b/sys/dev/isa/gus.c @@ -1,4 +1,4 @@ -/* $OpenBSD: gus.c,v 1.26 2003/06/08 00:41:47 miod Exp $ */ +/* $OpenBSD: gus.c,v 1.27 2004/06/13 21:49:24 niklas Exp $ */ /* $NetBSD: gus.c,v 1.51 1998/01/25 23:48:06 mycroft Exp $ */ /*- @@ -119,7 +119,6 @@ #include <dev/isa/isavar.h> #include <dev/isa/isadmavar.h> -#include <i386/isa/icu.h> #include <dev/ic/ics2101reg.h> #include <dev/ic/cs4231reg.h> diff --git a/sys/dev/isa/gus_isa.c b/sys/dev/isa/gus_isa.c index 5144a526691..5140bda769b 100644 --- a/sys/dev/isa/gus_isa.c +++ b/sys/dev/isa/gus_isa.c @@ -1,4 +1,4 @@ -/* $OpenBSD: gus_isa.c,v 1.3 2002/03/14 01:26:56 millert Exp $ */ +/* $OpenBSD: gus_isa.c,v 1.4 2004/06/13 21:49:24 niklas Exp $ */ /* $NetBSD: gus.c,v 1.51 1998/01/25 23:48:06 mycroft Exp $ */ /*- @@ -119,7 +119,6 @@ #include <dev/isa/isavar.h> #include <dev/isa/isadmavar.h> -#include <i386/isa/icu.h> #include <dev/ic/ics2101reg.h> #include <dev/ic/cs4231reg.h> diff --git a/sys/dev/isa/gus_isapnp.c b/sys/dev/isa/gus_isapnp.c index 0700ce4745f..37c5415a643 100644 --- a/sys/dev/isa/gus_isapnp.c +++ b/sys/dev/isa/gus_isapnp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: gus_isapnp.c,v 1.3 2002/03/14 01:26:56 millert Exp $ */ +/* $OpenBSD: gus_isapnp.c,v 1.4 2004/06/13 21:49:24 niklas Exp $ */ /* $NetBSD: gus.c,v 1.51 1998/01/25 23:48:06 mycroft Exp $ */ /*- @@ -119,7 +119,6 @@ #include <dev/isa/isavar.h> #include <dev/isa/isadmavar.h> -#include <i386/isa/icu.h> #include <dev/ic/ics2101reg.h> #include <dev/ic/cs4231reg.h> diff --git a/sys/dev/isa/if_hp.c b/sys/dev/isa/if_hp.c index 2d31185b352..5e05b2296a8 100644 --- a/sys/dev/isa/if_hp.c +++ b/sys/dev/isa/if_hp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_hp.c,v 1.11 2004/05/12 06:35:11 tedu Exp $ */ +/* $OpenBSD: if_hp.c,v 1.12 2004/06/13 21:49:24 niklas Exp $ */ /* $NetBSD: if_hp.c,v 1.21 1995/12/24 02:31:31 mycroft Exp $ */ /* XXX THIS DRIVER IS BROKEN. IT WILL NOT EVEN COMPILE. */ @@ -85,7 +85,6 @@ #include <i386/isa/isa_device.h> /* XXX BROKEN */ #include <dev/isa/if_nereg.h> -#include <i386/isa/icu.h> /* XXX BROKEN */ int hpprobe(), hpattach(), hpintr(); int hpstart(), hpinit(), ether_output(), hpioctl(); diff --git a/sys/dev/isa/opti.c b/sys/dev/isa/opti.c index ce2f3b0b00f..e55d4151361 100644 --- a/sys/dev/isa/opti.c +++ b/sys/dev/isa/opti.c @@ -1,4 +1,4 @@ -/* $OpenBSD: opti.c,v 1.7 2003/06/02 19:24:22 mickey Exp $ */ +/* $OpenBSD: opti.c,v 1.8 2004/06/13 21:49:24 niklas Exp $ */ /* * Copyright (c) 1996 Michael Shalayeff @@ -41,7 +41,6 @@ #include <machine/pio.h> -#include <i386/isa/icu.h> #include <dev/isa/isavar.h> #include <dev/isa/opti.h> diff --git a/sys/dev/pci/cy82c693.c b/sys/dev/pci/cy82c693.c index e50bd84b545..6cf96d3f740 100644 --- a/sys/dev/pci/cy82c693.c +++ b/sys/dev/pci/cy82c693.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cy82c693.c,v 1.4 2001/02/26 16:21:46 art Exp $ */ +/* $OpenBSD: cy82c693.c,v 1.5 2004/06/13 21:49:25 niklas Exp $ */ /* $NetBSD: cy82c693.c,v 1.1 2000/06/06 03:07:39 thorpej Exp $ */ /*- @@ -60,7 +60,7 @@ static struct cy82c693_handle cyhc_handle; static int cyhc_initialized; -struct simplelock cyhc_slock = SLOCK_INITIALIZER; +struct simplelock cyhc_slock; #define CYHC_LOCK(s) \ do { \ @@ -81,6 +81,8 @@ cy82c693_init(bus_space_tag_t iot) int s; int error; + simple_lock_init(&cyhc_slock); + CYHC_LOCK(s); if (cyhc_initialized) { diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 55dad5457b4..a8d052d0a05 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: init_main.c,v 1.115 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: init_main.c,v 1.116 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */ /* @@ -125,7 +125,6 @@ struct timeval boottime; #ifndef __HAVE_CPUINFO struct timeval runtime; #endif - int ncpus = 1; #if !defined(NO_PROPOLICE) @@ -188,6 +187,7 @@ main(framep) int s, i; register_t rval[2]; extern struct pdevinit pdevinit[]; + extern struct SIMPLELOCK kprintf_slock; extern void scheduler_start(void); extern void disk_init(void); extern void endtsleep(void *); @@ -213,8 +213,13 @@ main(framep) */ config_init(); /* init autoconfiguration data structures */ consinit(); + + SIMPLE_LOCK_INIT(&kprintf_slock); + printf("%s\n", copyright); + KERNEL_LOCK_INIT(); + uvm_init(); disk_init(); /* must come before autoconfiguration */ tty_init(); /* initialise tty's */ @@ -270,7 +275,7 @@ main(framep) session0.s_leader = p; p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT; - p->p_stat = SRUN; + p->p_stat = SONPROC; p->p_nice = NZERO; p->p_emul = &emul_native; bcopy("swapper", p->p_comm, sizeof ("swapper")); @@ -344,6 +349,9 @@ main(framep) /* Start real time and statistics clocks. */ initclocks(); + /* Lock the kernel on behalf of proc0. */ + KERNEL_PROC_LOCK(p); + #ifdef SYSVSHM /* Initialize System V style shared memory. */ shminit(); @@ -413,8 +421,6 @@ main(framep) VOP_UNLOCK(rootvnode, 0, p); p->p_fd->fd_rdir = NULL; - uvm_swap_init(); - /* * Now can look at time, having had a chance to verify the time * from the file system. Reset p->p_rtime as it may have been @@ -424,10 +430,12 @@ main(framep) p->p_stats->p_start = mono_time = boottime = time; p->p_cpu->ci_schedstate.spc_runtime = time; #else - p->p_stats->p_start = runtime = mono_time = boottime = time; + p->p_stats->p_start = runtime = mono_time = boottime = time; #endif p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0; + uvm_swap_init(); + /* Create process 1 (init(8)). */ if (fork1(p, SIGCHLD, FORK_FORK, NULL, 0, start_init, NULL, rval)) panic("fork init"); @@ -465,6 +473,12 @@ main(framep) srandom((u_long)(rtv.tv_sec ^ rtv.tv_usec)); randompid = 1; + +#if defined(MULTIPROCESSOR) + /* Boot the secondary processors. */ + cpu_boot_secondary_processors(); +#endif + /* The scheduler is an infinite loop. */ uvm_scheduler(); /* NOTREACHED */ @@ -623,8 +637,10 @@ start_init(arg) * Now try to exec the program. If can't for any reason * other than it doesn't exist, complain. */ - if ((error = sys_execve(p, &args, retval)) == 0) + if ((error = sys_execve(p, &args, retval)) == 0) { + KERNEL_PROC_UNLOCK(p); return; + } if (error != ENOENT) printf("exec %s: error %d\n", path, error); } diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 8a34e63d16c..5a3df5ecec4 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_clock.c,v 1.43 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: kern_clock.c,v 1.44 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */ /*- @@ -192,9 +192,16 @@ hardclock(struct clockframe *frame) if (stathz == 0) statclock(frame); -#ifdef __HAVE_CPUINFO +#if defined(__HAVE_CPUINFO) if (--ci->ci_schedstate.spc_rrticks <= 0) roundrobin(ci); + + /* + * If we are not the primary CPU, we're not allowed to do + * any more work. + */ + if (CPU_IS_PRIMARY(ci) == 0) + return; #endif /* @@ -420,9 +427,10 @@ statclock(struct clockframe *frame) if (psdiv == 1) { setstatclockrate(stathz); } else { - setstatclockrate(profhz); + setstatclockrate(profhz); } } + /* XXX Kludgey */ #define pscnt spc->spc_pscnt #define cp_time spc->spc_cp_time @@ -483,7 +491,7 @@ statclock(struct clockframe *frame) pscnt = psdiv; #ifdef __HAVE_CPUINFO -#undef pscnt +#undef psdiv #undef cp_time #endif @@ -495,7 +503,8 @@ statclock(struct clockframe *frame) */ if (schedhz == 0) { #ifdef __HAVE_CPUINFO - if ((++curcpu()->ci_schedstate.spc_schedticks & 3) == 0) + if ((++curcpu()->ci_schedstate.spc_schedticks & 3) == + 0) schedclock(p); #else if ((++schedclk & 3) == 0) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 68d77771271..1587724bc08 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_exit.c,v 1.50 2004/05/27 20:48:46 tedu Exp $ */ +/* $OpenBSD: kern_exit.c,v 1.51 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_exit.c,v 1.39 1996/04/22 01:38:25 christos Exp $ */ /* @@ -285,6 +285,9 @@ exit1(p, rv) limfree(p->p_limit); p->p_limit = NULL; + /* This process no longer needs to hold the kernel lock. */ + KERNEL_PROC_UNLOCK(p); + /* * If emulation has process exit hook, call it now. */ @@ -319,12 +322,15 @@ void exit2(p) struct proc *p; { + int s; - simple_lock(&deadproc_slock); + SIMPLE_LOCK(&deadproc_slock); LIST_INSERT_HEAD(&deadproc, p, p_hash); - simple_unlock(&deadproc_slock); + SIMPLE_UNLOCK(&deadproc_slock); wakeup(&deadproc); + + SCHED_LOCK(s); } /* @@ -337,19 +343,22 @@ reaper(void) { struct proc *p; + KERNEL_PROC_UNLOCK(curproc); + for (;;) { - simple_lock(&deadproc_slock); + SIMPLE_LOCK(&deadproc_slock); p = LIST_FIRST(&deadproc); if (p == NULL) { /* No work for us; go to sleep until someone exits. */ - simple_unlock(&deadproc_slock); + SIMPLE_UNLOCK(&deadproc_slock); (void) tsleep(&deadproc, PVM, "reaper", 0); continue; } /* Remove us from the deadproc list. */ LIST_REMOVE(p, p_hash); - simple_unlock(&deadproc_slock); + SIMPLE_UNLOCK(&deadproc_slock); + KERNEL_PROC_LOCK(curproc); /* * Give machine-dependent code a chance to free any @@ -377,6 +386,9 @@ reaper(void) /* Noone will wait for us. Just zap the process now */ proc_zap(p); } + /* XXXNJW where should this be with respect to + * the wakeup() above? */ + KERNEL_PROC_UNLOCK(curproc); } } diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index a103c391634..e33ea08e005 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_fork.c,v 1.68 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: kern_fork.c,v 1.69 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_fork.c,v 1.29 1996/02/09 18:59:34 christos Exp $ */ /* @@ -204,7 +204,7 @@ fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize, timeout_set(&p2->p_sleep_to, endtsleep, p2); timeout_set(&p2->p_realit_to, realitexpire, p2); -#ifdef __HAVE_CPUINFO +#if defined(__HAVE_CPUINFO) p2->p_cpu = NULL; #endif @@ -339,12 +339,12 @@ fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize, /* * Make child runnable, set start time, and add to run queue. */ - s = splstatclock(); + SCHED_LOCK(s); p2->p_stats->p_start = time; p2->p_acflag = AFORK; p2->p_stat = SRUN; setrunqueue(p2); - splx(s); + SCHED_UNLOCK(s); /* * Now can be swapped. @@ -399,3 +399,20 @@ pidtaken(pid_t pid) return (1); return (0); } + +#if defined(MULTIPROCESSOR) +/* + * XXX This is a slight hack to get newly-formed processes to + * XXX acquire the kernel lock as soon as they run. + */ +void +proc_trampoline_mp(void) +{ + struct proc *p; + + p = curproc; + + SCHED_ASSERT_UNLOCKED(); + KERNEL_PROC_LOCK(p); +} +#endif diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index b811644d403..efb904c589a 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_ktrace.c,v 1.32 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: kern_ktrace.c,v 1.33 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */ /* @@ -37,6 +37,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/file.h> #include <sys/namei.h> #include <sys/vnode.h> diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index d75d09acc94..8ea70f3a097 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_lock.c,v 1.15 2003/06/02 23:28:05 millert Exp $ */ +/* $OpenBSD: kern_lock.c,v 1.16 2004/06/13 21:49:26 niklas Exp $ */ /* * Copyright (c) 1995 @@ -39,9 +39,20 @@ #include <sys/proc.h> #include <sys/lock.h> #include <sys/systm.h> +#include <sys/sched.h> #include <machine/cpu.h> +#ifndef spllock +#define spllock() splhigh() +#endif + +#ifdef MULTIPROCESSOR +#define CPU_NUMBER() cpu_number() +#else +#define CPU_NUMBER() 0 +#endif + void record_stacktrace(int *, int); void playback_stacktrace(int *, int); @@ -50,67 +61,243 @@ void playback_stacktrace(int *, int); * Locks provide shared/exclusive sychronization. */ -#if 0 -#ifdef DEBUG -#define COUNT(p, x) if (p) (p)->p_locks += (x) +/* + * Locking primitives implementation. + * Locks provide shared/exclusive synchronization. + */ + +#if defined(LOCKDEBUG) || defined(DIAGNOSTIC) /* { */ +#if defined(MULTIPROCESSOR) /* { */ +#define COUNT_CPU(cpu_id, x) \ + curcpu()->ci_spin_locks += (x) #else -#define COUNT(p, x) +u_long spin_locks; +#define COUNT_CPU(cpu_id, x) spin_locks += (x) +#endif /* MULTIPROCESSOR */ /* } */ + +#define COUNT(lkp, p, cpu_id, x) \ +do { \ + if ((lkp)->lk_flags & LK_SPIN) \ + COUNT_CPU((cpu_id), (x)); \ + else \ + (p)->p_locks += (x); \ +} while (/*CONSTCOND*/0) +#else +#define COUNT(lkp, p, cpu_id, x) +#define COUNT_CPU(cpu_id, x) +#endif /* LOCKDEBUG || DIAGNOSTIC */ /* } */ + +#ifndef SPINLOCK_SPIN_HOOK /* from <machine/lock.h> */ +#define SPINLOCK_SPIN_HOOK /* nothing */ #endif + +#define INTERLOCK_ACQUIRE(lkp, flags, s) \ +do { \ + if ((flags) & LK_SPIN) \ + s = spllock(); \ + simple_lock(&(lkp)->lk_interlock); \ +} while (/*CONSTCOND*/ 0) + +#define INTERLOCK_RELEASE(lkp, flags, s) \ +do { \ + simple_unlock(&(lkp)->lk_interlock); \ + if ((flags) & LK_SPIN) \ + splx(s); \ +} while (/*CONSTCOND*/ 0) + +#ifdef DDB /* { */ +#ifdef MULTIPROCESSOR +int simple_lock_debugger = 1; /* more serious on MP */ +#else +int simple_lock_debugger = 0; #endif +#define SLOCK_DEBUGGER() if (simple_lock_debugger) Debugger() +#define SLOCK_TRACE() \ + db_stack_trace_print((db_expr_t)__builtin_frame_address(0), \ + TRUE, 65535, "", lock_printf); +#else +#define SLOCK_DEBUGGER() /* nothing */ +#define SLOCK_TRACE() /* nothing */ +#endif /* } */ -#define COUNT(p, x) +#if defined(LOCKDEBUG) +#if defined(DDB) +#define SPINLOCK_SPINCHECK_DEBUGGER Debugger() +#else +#define SPINLOCK_SPINCHECK_DEBUGGER /* nothing */ +#endif + +#define SPINLOCK_SPINCHECK_DECL \ + /* 32-bits of count -- wrap constitutes a "spinout" */ \ + uint32_t __spinc = 0 -#if NCPUS > 1 +#define SPINLOCK_SPINCHECK \ +do { \ + if (++__spinc == 0) { \ + lock_printf("LK_SPIN spinout, excl %d, share %d\n", \ + lkp->lk_exclusivecount, lkp->lk_sharecount); \ + if (lkp->lk_exclusivecount) \ + lock_printf("held by CPU %lu\n", \ + (u_long) lkp->lk_cpu); \ + if (lkp->lk_lock_file) \ + lock_printf("last locked at %s:%d\n", \ + lkp->lk_lock_file, lkp->lk_lock_line); \ + if (lkp->lk_unlock_file) \ + lock_printf("last unlocked at %s:%d\n", \ + lkp->lk_unlock_file, lkp->lk_unlock_line); \ + SLOCK_TRACE(); \ + SPINLOCK_SPINCHECK_DEBUGGER; \ + } \ +} while (/*CONSTCOND*/ 0) +#else +#define SPINLOCK_SPINCHECK_DECL /* nothing */ +#define SPINLOCK_SPINCHECK /* nothing */ +#endif /* LOCKDEBUG && DDB */ /* - * For multiprocessor system, try spin lock first. - * - * This should be inline expanded below, but we cannot have #if - * inside a multiline define. + * Acquire a resource. */ -int lock_wait_time = 100; -#define PAUSE(lkp, wanted) \ - if (lock_wait_time > 0) { \ - int i; \ +#define ACQUIRE(lkp, error, extflags, drain, wanted) \ + if ((extflags) & LK_SPIN) { \ + int interlocked; \ + SPINLOCK_SPINCHECK_DECL; \ \ - simple_unlock(&lkp->lk_interlock); \ - for (i = lock_wait_time; i > 0; i--) \ - if (!(wanted)) \ - break; \ - simple_lock(&lkp->lk_interlock); \ + if ((drain) == 0) \ + (lkp)->lk_waitcount++; \ + for (interlocked = 1;;) { \ + SPINLOCK_SPINCHECK; \ + if (wanted) { \ + if (interlocked) { \ + INTERLOCK_RELEASE((lkp), \ + LK_SPIN, s); \ + interlocked = 0; \ + } \ + SPINLOCK_SPIN_HOOK; \ + } else if (interlocked) { \ + break; \ + } else { \ + INTERLOCK_ACQUIRE((lkp), LK_SPIN, s); \ + interlocked = 1; \ + } \ } \ - if (!(wanted)) \ - break; + if ((drain) == 0) \ + (lkp)->lk_waitcount--; \ + KASSERT((wanted) == 0); \ + error = 0; /* sanity */ \ + } else { \ + for (error = 0; wanted; ) { \ + if ((drain)) \ + (lkp)->lk_flags |= LK_WAITDRAIN; \ + else \ + (lkp)->lk_waitcount++; \ + /* XXX Cast away volatile. */ \ + error = ltsleep((drain) ? \ + (void *)&(lkp)->lk_flags : \ + (void *)(lkp), (lkp)->lk_prio, \ + (lkp)->lk_wmesg, (lkp)->lk_timo, \ + &(lkp)->lk_interlock); \ + if ((drain) == 0) \ + (lkp)->lk_waitcount--; \ + if (error) \ + break; \ + if ((extflags) & LK_SLEEPFAIL) { \ + error = ENOLCK; \ + break; \ + } \ + } \ + } -#else /* NCPUS == 1 */ +#define SETHOLDER(lkp, pid, cpu_id) \ +do { \ + if ((lkp)->lk_flags & LK_SPIN) \ + (lkp)->lk_cpu = cpu_id; \ + else \ + (lkp)->lk_lockholder = pid; \ +} while (/*CONSTCOND*/0) -/* - * It is an error to spin on a uniprocessor as nothing will ever cause - * the simple lock to clear while we are executing. - */ -#define PAUSE(lkp, wanted) +#define WEHOLDIT(lkp, pid, cpu_id) \ + (((lkp)->lk_flags & LK_SPIN) != 0 ? \ + ((lkp)->lk_cpu == (cpu_id)) : \ + ((lkp)->lk_lockholder == (pid))) + +#define WAKEUP_WAITER(lkp) \ +do { \ + if (((lkp)->lk_flags & LK_SPIN) == 0 && (lkp)->lk_waitcount) { \ + /* XXX Cast away volatile. */ \ + wakeup((void *)(lkp)); \ + } \ +} while (/*CONSTCOND*/0) + +#if defined(LOCKDEBUG) /* { */ +#if defined(MULTIPROCESSOR) /* { */ +struct simplelock spinlock_list_slock = SIMPLELOCK_INITIALIZER; -#endif /* NCPUS == 1 */ +#define SPINLOCK_LIST_LOCK() \ + __cpu_simple_lock(&spinlock_list_slock.lock_data) + +#define SPINLOCK_LIST_UNLOCK() \ + __cpu_simple_unlock(&spinlock_list_slock.lock_data) +#else +#define SPINLOCK_LIST_LOCK() /* nothing */ +#define SPINLOCK_LIST_UNLOCK() /* nothing */ +#endif /* MULTIPROCESSOR */ /* } */ + +TAILQ_HEAD(, lock) spinlock_list = + TAILQ_HEAD_INITIALIZER(spinlock_list); + +#define HAVEIT(lkp) \ +do { \ + if ((lkp)->lk_flags & LK_SPIN) { \ + int s = spllock(); \ + SPINLOCK_LIST_LOCK(); \ + /* XXX Cast away volatile. */ \ + TAILQ_INSERT_TAIL(&spinlock_list, (struct lock *)(lkp), \ + lk_list); \ + SPINLOCK_LIST_UNLOCK(); \ + splx(s); \ + } \ +} while (/*CONSTCOND*/0) + +#define DONTHAVEIT(lkp) \ +do { \ + if ((lkp)->lk_flags & LK_SPIN) { \ + int s = spllock(); \ + SPINLOCK_LIST_LOCK(); \ + /* XXX Cast away volatile. */ \ + TAILQ_REMOVE(&spinlock_list, (struct lock *)(lkp), \ + lk_list); \ + SPINLOCK_LIST_UNLOCK(); \ + splx(s); \ + } \ +} while (/*CONSTCOND*/0) +#else +#define HAVEIT(lkp) /* nothing */ + +#define DONTHAVEIT(lkp) /* nothing */ +#endif /* LOCKDEBUG */ /* } */ + +#if defined(LOCKDEBUG) /* - * Acquire a resource. + * Lock debug printing routine; can be configured to print to console + * or log to syslog. */ -#define ACQUIRE(lkp, error, extflags, wanted) \ - PAUSE(lkp, wanted); \ - for (error = 0; wanted; ) { \ - (lkp)->lk_waitcount++; \ - simple_unlock(&(lkp)->lk_interlock); \ - error = tsleep((void *)lkp, (lkp)->lk_prio, \ - (lkp)->lk_wmesg, (lkp)->lk_timo); \ - simple_lock(&(lkp)->lk_interlock); \ - (lkp)->lk_waitcount--; \ - if (error) \ - break; \ - if ((extflags) & LK_SLEEPFAIL) { \ - error = ENOLCK; \ - break; \ - } \ +void +lock_printf(const char *fmt, ...) +{ + char b[150]; + va_list ap; + + va_start(ap, fmt); + if (lock_debug_syslog) + vlog(LOG_DEBUG, fmt, ap); + else { + vsnprintf(b, sizeof(b), fmt, ap); + printf_nolog("%s", b); } + va_end(ap); +} +#endif /* LOCKDEBUG */ /* * Initialize a lock; required before use. @@ -127,10 +314,18 @@ lockinit(lkp, prio, wmesg, timo, flags) bzero(lkp, sizeof(struct lock)); simple_lock_init(&lkp->lk_interlock); lkp->lk_flags = flags & LK_EXTFLG_MASK; - lkp->lk_prio = prio; - lkp->lk_timo = timo; - lkp->lk_wmesg = wmesg; - lkp->lk_lockholder = LK_NOPROC; + if (flags & LK_SPIN) + lkp->lk_cpu = LK_NOCPU; + else { + lkp->lk_lockholder = LK_NOPROC; + lkp->lk_prio = prio; + lkp->lk_timo = timo; + } + lkp->lk_wmesg = wmesg; /* just a name for spin locks */ +#if defined(LOCKDEBUG) + lkp->lk_lock_file = NULL; + lkp->lk_unlock_file = NULL; +#endif } /* @@ -140,14 +335,14 @@ int lockstatus(lkp) struct lock *lkp; { - int lock_type = 0; + int s = 0, lock_type = 0; - simple_lock(&lkp->lk_interlock); + INTERLOCK_ACQUIRE(lkp, lkp->lk_flags, s); if (lkp->lk_exclusivecount != 0) lock_type = LK_EXCLUSIVE; else if (lkp->lk_sharecount != 0) lock_type = LK_SHARED; - simple_unlock(&lkp->lk_interlock); + INTERLOCK_RELEASE(lkp, lkp->lk_flags, s); return (lock_type); } @@ -168,17 +363,33 @@ lockmgr(lkp, flags, interlkp, p) int error; pid_t pid; int extflags; + cpuid_t cpu_id; + int s = 0; error = 0; - if (p) - pid = p->p_pid; - else - pid = LK_KERNPROC; - simple_lock(&lkp->lk_interlock); + + INTERLOCK_ACQUIRE(lkp, lkp->lk_flags, s); if (flags & LK_INTERLOCK) simple_unlock(interlkp); extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; -#ifdef DIAGNOSTIC + +#ifdef DIAGNOSTIC /* { */ + /* + * Don't allow spins on sleep locks and don't allow sleeps + * on spin locks. + */ + if ((flags ^ lkp->lk_flags) & LK_SPIN) + panic("lockmgr: sleep/spin mismatch"); +#endif /* } */ + + if (extflags & LK_SPIN) { + pid = LK_KERNPROC; + } else { + /* Process context required. */ + pid = p->p_pid; + } + cpu_id = CPU_NUMBER(); + /* * Once a lock has drained, the LK_DRAINING flag is set and an * exclusive lock is returned. The only valid operation thereafter @@ -191,12 +402,14 @@ lockmgr(lkp, flags, interlkp, p) * the lock by specifying LK_REENABLE. */ if (lkp->lk_flags & (LK_DRAINING|LK_DRAINED)) { +#ifdef DIAGNOSTIC if (lkp->lk_flags & LK_DRAINED) panic("lockmgr: using decommissioned lock"); if ((flags & LK_TYPE_MASK) != LK_RELEASE || - lkp->lk_lockholder != pid) + WEHOLDIT(lkp, pid, cpu_id) == 0) panic("lockmgr: non-release on draining lock: %d", flags & LK_TYPE_MASK); +#endif /* DIAGNOSTIC */ lkp->lk_flags &= ~LK_DRAINING; if ((flags & LK_REENABLE) == 0) lkp->lk_flags |= LK_DRAINED; @@ -208,12 +421,11 @@ lockmgr(lkp, flags, interlkp, p) if ((lkp->lk_flags & (LK_CANRECURSE|LK_RECURSEFAIL)) == (LK_CANRECURSE|LK_RECURSEFAIL)) panic("lockmgr: make up your mind"); -#endif /* DIAGNOSTIC */ switch (flags & LK_TYPE_MASK) { case LK_SHARED: - if (lkp->lk_lockholder != pid) { + if (WEHOLDIT(lkp, pid, cpu_id) == 0) { /* * If just polling, check to see if we will block. */ @@ -225,12 +437,12 @@ lockmgr(lkp, flags, interlkp, p) /* * Wait for exclusive locks and upgrades to clear. */ - ACQUIRE(lkp, error, extflags, lkp->lk_flags & + ACQUIRE(lkp, error, extflags, 0, lkp->lk_flags & (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)); if (error) break; lkp->lk_sharecount++; - COUNT(p, 1); + COUNT(lkp, p, cpu_id, 1); break; } /* @@ -238,18 +450,24 @@ lockmgr(lkp, flags, interlkp, p) * An alternative would be to fail with EDEADLK. */ lkp->lk_sharecount++; - COUNT(p, 1); + COUNT(lkp, p, cpu_id, 1); /* fall into downgrade */ case LK_DOWNGRADE: - if (lkp->lk_lockholder != pid || lkp->lk_exclusivecount == 0) + if (WEHOLDIT(lkp, pid, cpu_id) == 0 || + lkp->lk_exclusivecount == 0) panic("lockmgr: not holding exclusive lock"); lkp->lk_sharecount += lkp->lk_exclusivecount; lkp->lk_exclusivecount = 0; + lkp->lk_recurselevel = 0; lkp->lk_flags &= ~LK_HAVE_EXCL; - lkp->lk_lockholder = LK_NOPROC; - if (lkp->lk_waitcount) - wakeup((void *)lkp); + SETHOLDER(lkp, LK_NOPROC, LK_NOCPU); +#if defined(LOCKDEBUG) + lkp->lk_unlock_file = file; + lkp->lk_unlock_line = line; +#endif + DONTHAVEIT(lkp); + WAKEUP_WAITER(lkp); break; case LK_EXCLUPGRADE: @@ -260,7 +478,7 @@ lockmgr(lkp, flags, interlkp, p) */ if (lkp->lk_flags & LK_WANT_UPGRADE) { lkp->lk_sharecount--; - COUNT(p, -1); + COUNT(lkp, p, cpu_id, -1); error = EBUSY; break; } @@ -275,10 +493,10 @@ lockmgr(lkp, flags, interlkp, p) * after the upgrade). If we return an error, the file * will always be unlocked. */ - if (lkp->lk_lockholder == pid || lkp->lk_sharecount <= 0) + if (WEHOLDIT(lkp, pid, cpu_id) || lkp->lk_sharecount <= 0) panic("lockmgr: upgrade exclusive lock"); lkp->lk_sharecount--; - COUNT(p, -1); + COUNT(lkp, p, cpu_id, -1); /* * If we are just polling, check to see if we will block. */ @@ -295,16 +513,23 @@ lockmgr(lkp, flags, interlkp, p) * drop to zero, then take exclusive lock. */ lkp->lk_flags |= LK_WANT_UPGRADE; - ACQUIRE(lkp, error, extflags, lkp->lk_sharecount); + ACQUIRE(lkp, error, extflags, 0, lkp->lk_sharecount); lkp->lk_flags &= ~LK_WANT_UPGRADE; if (error) break; lkp->lk_flags |= LK_HAVE_EXCL; - lkp->lk_lockholder = pid; + SETHOLDER(lkp, pid, cpu_id); +#if defined(LOCKDEBUG) + lkp->lk_lock_file = file; + lkp->lk_lock_line = line; +#endif + HAVEIT(lkp); if (lkp->lk_exclusivecount != 0) panic("lockmgr: non-zero exclusive count"); lkp->lk_exclusivecount = 1; - COUNT(p, 1); + if (extflags & LK_SETRECURSE) + lkp->lk_recurselevel = 1; + COUNT(lkp, p, cpu_id, 1); break; } /* @@ -312,24 +537,28 @@ lockmgr(lkp, flags, interlkp, p) * lock, awaken upgrade requestor if we are the last shared * lock, then request an exclusive lock. */ - if (lkp->lk_sharecount == 0 && lkp->lk_waitcount) - wakeup((void *)lkp); + if (lkp->lk_sharecount == 0) + WAKEUP_WAITER(lkp); /* fall into exclusive request */ case LK_EXCLUSIVE: - if (lkp->lk_lockholder == pid && pid != LK_KERNPROC) { + if (WEHOLDIT(lkp, pid, cpu_id)) { /* - * Recursive lock. + * Recursive lock. */ - if ((extflags & LK_CANRECURSE) == 0) { + if ((extflags & LK_CANRECURSE) == 0 && + lkp->lk_recurselevel == 0) { if (extflags & LK_RECURSEFAIL) { error = EDEADLK; break; - } - panic("lockmgr: locking against myself"); + } else + panic("lockmgr: locking against myself"); } lkp->lk_exclusivecount++; - COUNT(p, 1); + if (extflags & LK_SETRECURSE && + lkp->lk_recurselevel == 0) + lkp->lk_recurselevel = lkp->lk_exclusivecount; + COUNT(lkp, p, cpu_id, 1); break; } /* @@ -344,7 +573,7 @@ lockmgr(lkp, flags, interlkp, p) /* * Try to acquire the want_exclusive flag. */ - ACQUIRE(lkp, error, extflags, lkp->lk_flags & + ACQUIRE(lkp, error, extflags, 0, lkp->lk_flags & (LK_HAVE_EXCL | LK_WANT_EXCL)); if (error) break; @@ -352,38 +581,62 @@ lockmgr(lkp, flags, interlkp, p) /* * Wait for shared locks and upgrades to finish. */ - ACQUIRE(lkp, error, extflags, lkp->lk_sharecount != 0 || + ACQUIRE(lkp, error, extflags, 0, lkp->lk_sharecount != 0 || (lkp->lk_flags & LK_WANT_UPGRADE)); lkp->lk_flags &= ~LK_WANT_EXCL; if (error) break; lkp->lk_flags |= LK_HAVE_EXCL; - lkp->lk_lockholder = pid; + SETHOLDER(lkp, pid, cpu_id); +#if defined(LOCKDEBUG) + lkp->lk_lock_file = file; + lkp->lk_lock_line = line; +#endif + HAVEIT(lkp); if (lkp->lk_exclusivecount != 0) panic("lockmgr: non-zero exclusive count"); lkp->lk_exclusivecount = 1; - COUNT(p, 1); + if (extflags & LK_SETRECURSE) + lkp->lk_recurselevel = 1; + COUNT(lkp, p, cpu_id, 1); break; case LK_RELEASE: if (lkp->lk_exclusivecount != 0) { - if (pid != lkp->lk_lockholder) - panic("lockmgr: pid %d, not %s %d unlocking", - pid, "exclusive lock holder", - lkp->lk_lockholder); + if (WEHOLDIT(lkp, pid, cpu_id) == 0) { + if (lkp->lk_flags & LK_SPIN) { + panic("lockmgr: processor %lu, not " + "exclusive lock holder %lu " + "unlocking", cpu_id, lkp->lk_cpu); + } else { + panic("lockmgr: pid %d, not " + "exclusive lock holder %d " + "unlocking", pid, + lkp->lk_lockholder); + } + } + if (lkp->lk_exclusivecount == lkp->lk_recurselevel) + lkp->lk_recurselevel = 0; lkp->lk_exclusivecount--; - COUNT(p, -1); + COUNT(lkp, p, cpu_id, -1); if (lkp->lk_exclusivecount == 0) { lkp->lk_flags &= ~LK_HAVE_EXCL; - lkp->lk_lockholder = LK_NOPROC; + SETHOLDER(lkp, LK_NOPROC, LK_NOCPU); +#if defined(LOCKDEBUG) + lkp->lk_unlock_file = file; + lkp->lk_unlock_line = line; +#endif + DONTHAVEIT(lkp); } } else if (lkp->lk_sharecount != 0) { lkp->lk_sharecount--; - COUNT(p, -1); - } else - panic("lockmgr: LK_RELEASE of unlocked lock"); - if (lkp->lk_waitcount) - wakeup((void *)lkp); + COUNT(lkp, p, cpu_id, -1); + } +#ifdef DIAGNOSTIC + else + panic("lockmgr: release of unlocked lock!"); +#endif + WAKEUP_WAITER(lkp); break; case LK_DRAIN: @@ -393,7 +646,7 @@ lockmgr(lkp, flags, interlkp, p) * check for holding a shared lock, but at least we can * check for an exclusive one. */ - if (lkp->lk_lockholder == pid) + if (WEHOLDIT(lkp, pid, cpu_id)) panic("lockmgr: draining against myself"); /* * If we are just polling, check to see if we will sleep. @@ -404,66 +657,228 @@ lockmgr(lkp, flags, interlkp, p) error = EBUSY; break; } - PAUSE(lkp, ((lkp->lk_flags & + ACQUIRE(lkp, error, extflags, 1, + ((lkp->lk_flags & (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || - lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0)); - for (error = 0; ((lkp->lk_flags & - (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || - lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0); ) { - lkp->lk_flags |= LK_WAITDRAIN; - simple_unlock(&lkp->lk_interlock); - if ((error = tsleep((void *)&lkp->lk_flags, lkp->lk_prio, - lkp->lk_wmesg, lkp->lk_timo)) != 0) - return (error); - if ((extflags) & LK_SLEEPFAIL) - return (ENOLCK); - simple_lock(&lkp->lk_interlock); - } + lkp->lk_sharecount != 0 || + lkp->lk_waitcount != 0)); + if (error) + break; lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL; - lkp->lk_lockholder = pid; + SETHOLDER(lkp, pid, cpu_id); +#if defined(LOCKDEBUG) + lkp->lk_lock_file = file; + lkp->lk_lock_line = line; +#endif + HAVEIT(lkp); lkp->lk_exclusivecount = 1; - COUNT(p, 1); + /* XXX unlikely that we'd want this */ + if (extflags & LK_SETRECURSE) + lkp->lk_recurselevel = 1; + COUNT(lkp, p, cpu_id, 1); break; default: - simple_unlock(&lkp->lk_interlock); + INTERLOCK_RELEASE(lkp, lkp->lk_flags, s); panic("lockmgr: unknown locktype request %d", flags & LK_TYPE_MASK); /* NOTREACHED */ } - if ((lkp->lk_flags & LK_WAITDRAIN) && ((lkp->lk_flags & - (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) == 0 && - lkp->lk_sharecount == 0 && lkp->lk_waitcount == 0)) { + if ((lkp->lk_flags & (LK_WAITDRAIN | LK_SPIN)) == LK_WAITDRAIN && + ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) == 0 && + lkp->lk_sharecount == 0 && lkp->lk_waitcount == 0)) { lkp->lk_flags &= ~LK_WAITDRAIN; wakeup((void *)&lkp->lk_flags); } - simple_unlock(&lkp->lk_interlock); + INTERLOCK_RELEASE(lkp, lkp->lk_flags, s); return (error); } /* + * For a recursive spinlock held one or more times by the current CPU, + * release all N locks, and return N. + * Intended for use in mi_switch() shortly before context switching. + */ + +#ifdef notyet +int +#if defined(LOCKDEBUG) +_spinlock_release_all(__volatile struct lock *lkp, const char *file, int line) +#else +spinlock_release_all(__volatile struct lock *lkp) +#endif +{ + int s, count; + cpuid_t cpu_id; + + KASSERT(lkp->lk_flags & LK_SPIN); + + INTERLOCK_ACQUIRE(lkp, LK_SPIN, s); + + cpu_id = CPU_NUMBER(); + count = lkp->lk_exclusivecount; + + if (count != 0) { +#ifdef DIAGNOSTIC + if (WEHOLDIT(lkp, 0, cpu_id) == 0) { + panic("spinlock_release_all: processor %lu, not " + "exclusive lock holder %lu " + "unlocking", (long)cpu_id, lkp->lk_cpu); + } +#endif + lkp->lk_recurselevel = 0; + lkp->lk_exclusivecount = 0; + COUNT_CPU(cpu_id, -count); + lkp->lk_flags &= ~LK_HAVE_EXCL; + SETHOLDER(lkp, LK_NOPROC, LK_NOCPU); +#if defined(LOCKDEBUG) + lkp->lk_unlock_file = file; + lkp->lk_unlock_line = line; +#endif + DONTHAVEIT(lkp); + } +#ifdef DIAGNOSTIC + else if (lkp->lk_sharecount != 0) + panic("spinlock_release_all: release of shared lock!"); + else + panic("spinlock_release_all: release of unlocked lock!"); +#endif + INTERLOCK_RELEASE(lkp, LK_SPIN, s); + + return (count); +} +#endif + +/* + * For a recursive spinlock held one or more times by the current CPU, + * release all N locks, and return N. + * Intended for use in mi_switch() right after resuming execution. + */ + +#ifdef notyet +void +#if defined(LOCKDEBUG) +_spinlock_acquire_count(__volatile struct lock *lkp, int count, + const char *file, int line) +#else +spinlock_acquire_count(__volatile struct lock *lkp, int count) +#endif +{ + int s, error; + cpuid_t cpu_id; + + KASSERT(lkp->lk_flags & LK_SPIN); + + INTERLOCK_ACQUIRE(lkp, LK_SPIN, s); + + cpu_id = CPU_NUMBER(); + +#ifdef DIAGNOSTIC + if (WEHOLDIT(lkp, LK_NOPROC, cpu_id)) + panic("spinlock_acquire_count: processor %lu already holds lock", (long)cpu_id); +#endif + /* + * Try to acquire the want_exclusive flag. + */ + ACQUIRE(lkp, error, LK_SPIN, 0, lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL)); + lkp->lk_flags |= LK_WANT_EXCL; + /* + * Wait for shared locks and upgrades to finish. + */ + ACQUIRE(lkp, error, LK_SPIN, 0, lkp->lk_sharecount != 0 || + (lkp->lk_flags & LK_WANT_UPGRADE)); + lkp->lk_flags &= ~LK_WANT_EXCL; + lkp->lk_flags |= LK_HAVE_EXCL; + SETHOLDER(lkp, LK_NOPROC, cpu_id); +#if defined(LOCKDEBUG) + lkp->lk_lock_file = file; + lkp->lk_lock_line = line; +#endif + HAVEIT(lkp); + if (lkp->lk_exclusivecount != 0) + panic("lockmgr: non-zero exclusive count"); + lkp->lk_exclusivecount = count; + lkp->lk_recurselevel = 1; + COUNT_CPU(cpu_id, count); + + INTERLOCK_RELEASE(lkp, lkp->lk_flags, s); +} +#endif + +/* * Print out information about state of a lock. Used by VOP_PRINT * routines to display ststus about contained locks. */ void lockmgr_printinfo(lkp) - struct lock *lkp; + __volatile struct lock *lkp; { if (lkp->lk_sharecount) printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg, lkp->lk_sharecount); - else if (lkp->lk_flags & LK_HAVE_EXCL) - printf(" lock type %s: EXCL (count %d) by pid %d", - lkp->lk_wmesg, lkp->lk_exclusivecount, lkp->lk_lockholder); - if (lkp->lk_waitcount > 0) + else if (lkp->lk_flags & LK_HAVE_EXCL) { + printf(" lock type %s: EXCL (count %d) by ", + lkp->lk_wmesg, lkp->lk_exclusivecount); + if (lkp->lk_flags & LK_SPIN) + printf("processor %lu", lkp->lk_cpu); + else + printf("pid %d", lkp->lk_lockholder); + } else + printf(" not locked"); + if ((lkp->lk_flags & LK_SPIN) == 0 && lkp->lk_waitcount > 0) printf(" with %d pending", lkp->lk_waitcount); } #if defined(LOCKDEBUG) +TAILQ_HEAD(, simplelock) simplelock_list = + TAILQ_HEAD_INITIALIZER(simplelock_list); + +#if defined(MULTIPROCESSOR) /* { */ +struct simplelock simplelock_list_slock = SIMPLELOCK_INITIALIZER; + +#define SLOCK_LIST_LOCK() \ + __cpu_simple_lock(&simplelock_list_slock.lock_data) + +#define SLOCK_LIST_UNLOCK() \ + __cpu_simple_unlock(&simplelock_list_slock.lock_data) + +#define SLOCK_COUNT(x) \ + curcpu()->ci_simple_locks += (x) +#else +u_long simple_locks; + +#define SLOCK_LIST_LOCK() /* nothing */ + +#define SLOCK_LIST_UNLOCK() /* nothing */ -int lockdebug_print = 0; -int lockdebug_debugger = 0; +#define SLOCK_COUNT(x) simple_locks += (x) +#endif /* MULTIPROCESSOR */ /* } */ + +#ifdef MULTIPROCESSOR +#define SLOCK_MP() lock_printf("on cpu %ld\n", \ + (u_long) cpu_number()) +#else +#define SLOCK_MP() /* nothing */ +#endif + +#define SLOCK_WHERE(str, alp, id, l) \ +do { \ + lock_printf("\n"); \ + lock_printf(str); \ + lock_printf("lock: %p, currently at: %s:%d\n", (alp), (id), (l)); \ + SLOCK_MP(); \ + if ((alp)->lock_file != NULL) \ + lock_printf("last locked: %s:%d\n", (alp)->lock_file, \ + (alp)->lock_line); \ + if ((alp)->unlock_file != NULL) \ + lock_printf("last unlocked: %s:%d\n", (alp)->unlock_file, \ + (alp)->unlock_line); \ + SLOCK_TRACE() \ + SLOCK_DEBUGGER(); \ +} while (/*CONSTCOND*/0) /* * Simple lock functions so that the debugger can see from whence @@ -474,7 +889,16 @@ simple_lock_init(lkp) struct simplelock *lkp; { - lkp->lock_data = SLOCK_UNLOCKED; +#if defined(MULTIPROCESSOR) /* { */ + __cpu_simple_lock_init(&alp->lock_data); +#else + alp->lock_data = __SIMPLELOCK_UNLOCKED; +#endif /* } */ + alp->lock_file = NULL; + alp->lock_line = 0; + alp->unlock_file = NULL; + alp->unlock_line = 0; + alp->lock_holder = LK_NOCPU; } void @@ -483,16 +907,80 @@ _simple_lock(lkp, id, l) const char *id; int l; { + cpuid_t cpu_id = CPU_NUMBER(); + int s; + + s = spllock(); + + /* + * MULTIPROCESSOR case: This is `safe' since if it's not us, we + * don't take any action, and just fall into the normal spin case. + */ + if (alp->lock_data == __SIMPLELOCK_LOCKED) { +#if defined(MULTIPROCESSOR) /* { */ + if (alp->lock_holder == cpu_id) { + SLOCK_WHERE("simple_lock: locking against myself\n", + alp, id, l); + goto out; + } +#else + SLOCK_WHERE("simple_lock: lock held\n", alp, id, l); + goto out; +#endif /* MULTIPROCESSOR */ /* } */ + } + +#if defined(MULTIPROCESSOR) /* { */ + /* Acquire the lock before modifying any fields. */ + splx(s); + __cpu_simple_lock(&alp->lock_data); + s = spllock(); +#else + alp->lock_data = __SIMPLELOCK_LOCKED; +#endif /* } */ - if (lkp->lock_data == SLOCK_LOCKED) { - if (lockdebug_print) - printf("%s:%d simple_lock: lock held...\n", id, l); - if (lockdebug_debugger) - Debugger(); + if (alp->lock_holder != LK_NOCPU) { + SLOCK_WHERE("simple_lock: uninitialized lock\n", + alp, id, l); } - lkp->lock_data = SLOCK_LOCKED; + alp->lock_file = id; + alp->lock_line = l; + alp->lock_holder = cpu_id; + + SLOCK_LIST_LOCK(); + /* XXX Cast away volatile */ + TAILQ_INSERT_TAIL(&simplelock_list, (struct simplelock *)alp, list); + SLOCK_LIST_UNLOCK(); + + SLOCK_COUNT(1); + + out: + splx(s); } +int +_simple_lock_held(__volatile struct simplelock *alp) +{ + cpuid_t cpu_id = CPU_NUMBER(); + int s, locked = 0; + + s = spllock(); + +#if defined(MULTIPROCESSOR) + if (__cpu_simple_lock_try(&alp->lock_data) == 0) + locked = (alp->lock_holder == cpu_id); + else + __cpu_simple_unlock(&alp->lock_data); +#else + if (alp->lock_data == __SIMPLELOCK_LOCKED) { + locked = 1; + KASSERT(alp->lock_holder == cpu_id); + } +#endif + + splx(s); + + return (locked); +} int _simple_lock_try(lkp, id, l) @@ -500,14 +988,50 @@ _simple_lock_try(lkp, id, l) const char *id; int l; { + cpuid_t cpu_id = CPU_NUMBER(); + int s, rv = 0; + + s = spllock(); - if (lkp->lock_data == SLOCK_LOCKED) { - if (lockdebug_print) - printf("%s:%d simple_lock: lock held...\n", id, l); - if (lockdebug_debugger) - Debugger(); + /* + * MULTIPROCESSOR case: This is `safe' since if it's not us, we + * don't take any action. + */ +#if defined(MULTIPROCESSOR) /* { */ + if ((rv = __cpu_simple_lock_try(&alp->lock_data)) == 0) { + if (alp->lock_holder == cpu_id) + SLOCK_WHERE("simple_lock_try: locking against myself\n", + alp, id, l); + goto out; } - return lkp->lock_data = SLOCK_LOCKED; +#else + if (alp->lock_data == __SIMPLELOCK_LOCKED) { + SLOCK_WHERE("simple_lock_try: lock held\n", alp, id, l); + goto out; + } + alp->lock_data = __SIMPLELOCK_LOCKED; +#endif /* MULTIPROCESSOR */ /* } */ + + /* + * At this point, we have acquired the lock. + */ + + rv = 1; + + alp->lock_file = id; + alp->lock_line = l; + alp->lock_holder = cpu_id; + + SLOCK_LIST_LOCK(); + /* XXX Cast away volatile. */ + TAILQ_INSERT_TAIL(&simplelock_list, (struct simplelock *)alp, list); + SLOCK_LIST_UNLOCK(); + + SLOCK_COUNT(1); + + out: + splx(s); + return (rv); } void @@ -516,30 +1040,239 @@ _simple_unlock(lkp, id, l) const char *id; int l; { + int s; - if (lkp->lock_data == SLOCK_UNLOCKED) { - if (lockdebug_print) - printf("%s:%d simple_unlock: lock not held...\n", - id, l); - if (lockdebug_debugger) - Debugger(); + s = spllock(); + + /* + * MULTIPROCESSOR case: This is `safe' because we think we hold + * the lock, and if we don't, we don't take any action. + */ + if (alp->lock_data == __SIMPLELOCK_UNLOCKED) { + SLOCK_WHERE("simple_unlock: lock not held\n", + alp, id, l); + goto out; } - lkp->lock_data = SLOCK_UNLOCKED; + + SLOCK_LIST_LOCK(); + TAILQ_REMOVE(&simplelock_list, alp, list); + SLOCK_LIST_UNLOCK(); + + SLOCK_COUNT(-1); + + alp->list.tqe_next = NULL; /* sanity */ + alp->list.tqe_prev = NULL; /* sanity */ + + alp->unlock_file = id; + alp->unlock_line = l; + +#if defined(MULTIPROCESSOR) /* { */ + alp->lock_holder = LK_NOCPU; + /* Now that we've modified all fields, release the lock. */ + __cpu_simple_unlock(&alp->lock_data); +#else + alp->lock_data = __SIMPLELOCK_UNLOCKED; + KASSERT(alp->lock_holder == CPU_NUMBER()); + alp->lock_holder = LK_NOCPU; +#endif /* } */ + + out: + splx(s); } void -_simple_lock_assert(lkp, state, id, l) - __volatile struct simplelock *lkp; - int state; - const char *id; - int l; +simple_lock_dump(void) +{ + struct simplelock *alp; + int s; + + s = spllock(); + SLOCK_LIST_LOCK(); + lock_printf("all simple locks:\n"); + TAILQ_FOREACH(alp, &simplelock_list, list) { + lock_printf("%p CPU %lu %s:%d\n", alp, alp->lock_holder, + alp->lock_file, alp->lock_line); + } + SLOCK_LIST_UNLOCK(); + splx(s); +} + +void +simple_lock_freecheck(void *start, void *end) +{ + struct simplelock *alp; + int s; + + s = spllock(); + SLOCK_LIST_LOCK(); + TAILQ_FOREACH(alp, &simplelock_list, list) { + if ((void *)alp >= start && (void *)alp < end) { + lock_printf("freeing simple_lock %p CPU %lu %s:%d\n", + alp, alp->lock_holder, alp->lock_file, + alp->lock_line); + SLOCK_DEBUGGER(); + } + } + SLOCK_LIST_UNLOCK(); + splx(s); + } + +/* + * We must be holding exactly one lock: the sched_lock. + */ + +#ifdef notyet +void +simple_lock_switchcheck(void) +{ + + simple_lock_only_held(&sched_lock, "switching"); +} +#endif + +void +simple_lock_only_held(volatile struct simplelock *lp, const char *where) { - if (lkp->lock_data != state) { - if (lockdebug_print) - printf("%s:%d simple_lock_assert: wrong state: %d", - id, l, lkp->lock_data); - if (lockdebug_debugger) - Debugger(); + struct simplelock *alp; + cpuid_t cpu_id = CPU_NUMBER(); + int s; + + if (lp) { + LOCK_ASSERT(simple_lock_held(lp)); + } + s = spllock(); + SLOCK_LIST_LOCK(); + TAILQ_FOREACH(alp, &simplelock_list, list) { + if (alp == lp) + continue; + if (alp->lock_holder == cpu_id) + break; + } + SLOCK_LIST_UNLOCK(); + splx(s); + + if (alp != NULL) { + lock_printf("\n%s with held simple_lock %p " + "CPU %lu %s:%d\n", + where, alp, alp->lock_holder, alp->lock_file, + alp->lock_line); + SLOCK_TRACE(); + SLOCK_DEBUGGER(); } } #endif /* LOCKDEBUG */ + +#if defined(MULTIPROCESSOR) +/* + * Functions for manipulating the kernel_lock. We put them here + * so that they show up in profiles. + */ + +/* + * XXX Instead of using struct lock for the kernel lock and thus requiring us + * XXX to implement simplelocks, causing all sorts of fine-grained locks all + * XXX over our tree getting activated consuming both time and potentially + * XXX introducing locking protocol bugs. + */ +#ifdef notyet + +struct lock kernel_lock; + +void +_kernel_lock_init(void) +{ + spinlockinit(&kernel_lock, "klock", 0); +} + +/* + * Acquire/release the kernel lock. Intended for use in the scheduler + * and the lower half of the kernel. + */ +void +_kernel_lock(int flag) +{ + SCHED_ASSERT_UNLOCKED(); + spinlockmgr(&kernel_lock, flag, 0); +} + +void +_kernel_unlock(void) +{ + spinlockmgr(&kernel_lock, LK_RELEASE, 0); +} + +/* + * Acquire/release the kernel_lock on behalf of a process. Intended for + * use in the top half of the kernel. + */ +void +_kernel_proc_lock(struct proc *p) +{ + SCHED_ASSERT_UNLOCKED(); + spinlockmgr(&kernel_lock, LK_EXCLUSIVE, 0); + p->p_flag |= P_BIGLOCK; +} + +void +_kernel_proc_unlock(struct proc *p) +{ + p->p_flag &= ~P_BIGLOCK; + spinlockmgr(&kernel_lock, LK_RELEASE, 0); +} + +#else + +struct __mp_lock kernel_lock; + +void +_kernel_lock_init(void) +{ + __mp_lock_init(&kernel_lock); +} + +/* + * Acquire/release the kernel lock. Intended for use in the scheduler + * and the lower half of the kernel. + */ + +/* XXX The flag should go, all callers want equal behaviour. */ +void +_kernel_lock(int flag) +{ + SCHED_ASSERT_UNLOCKED(); + __mp_lock(&kernel_lock); +} + +void +_kernel_unlock(void) +{ + __mp_unlock(&kernel_lock); +} + +/* + * Acquire/release the kernel_lock on behalf of a process. Intended for + * use in the top half of the kernel. + */ +void +_kernel_proc_lock(struct proc *p) +{ + SCHED_ASSERT_UNLOCKED(); + __mp_lock(&kernel_lock); + p->p_flag |= P_BIGLOCK; +} + +void +_kernel_proc_unlock(struct proc *p) +{ + p->p_flag &= ~P_BIGLOCK; + __mp_unlock(&kernel_lock); +} + +#endif + +#ifdef MP_LOCKDEBUG +/* CPU-dependent timing, needs this to be settable from ddb. */ +int __mp_lock_spinout = 200000000; +#endif + +#endif /* MULTIPROCESSOR */ diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 84519c2b60e..aa7ec306c56 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_proc.c,v 1.18 2004/01/29 17:19:42 millert Exp $ */ +/* $OpenBSD: kern_proc.c,v 1.19 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_proc.c,v 1.14 1996/02/09 18:59:41 christos Exp $ */ /* @@ -85,7 +85,7 @@ struct pool pcred_pool; * proclist. Processes on this proclist are also on zombproc; * we use the p_hash member to linkup to deadproc. */ -struct simplelock deadproc_slock; +struct SIMPLELOCK deadproc_slock; struct proclist deadproc; /* dead, but not yet undead */ static void orphanpg(struct pgrp *); @@ -104,7 +104,7 @@ procinit() LIST_INIT(&zombproc); LIST_INIT(&deadproc); - simple_lock_init(&deadproc_slock); + SIMPLE_LOCK_INIT(&deadproc_slock); pidhashtbl = hashinit(maxproc / 4, M_PROC, M_WAITOK, &pidhash); pgrphashtbl = hashinit(maxproc / 4, M_PROC, M_WAITOK, &pgrphash); diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 1e868518989..6d7af1fd136 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_resource.c,v 1.26 2003/12/11 23:02:30 millert Exp $ */ +/* $OpenBSD: kern_resource.c,v 1.27 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_resource.c,v 1.38 1996/10/23 07:19:38 matthias Exp $ */ /*- @@ -44,6 +44,7 @@ #include <sys/resourcevar.h> #include <sys/pool.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/mount.h> #include <sys/syscallargs.h> @@ -184,6 +185,7 @@ donice(curp, chgp, n) register int n; { register struct pcred *pcred = curp->p_cred; + int s; if (pcred->pc_ucred->cr_uid && pcred->p_ruid && pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid && @@ -197,7 +199,9 @@ donice(curp, chgp, n) if (n < chgp->p_nice && suser(curp, 0)) return (EACCES); chgp->p_nice = n; + SCHED_LOCK(s); (void)resetpriority(chgp); + SCHED_UNLOCK(s); return (0); } diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 494f6878d80..0913d2b2a1f 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sig.c,v 1.70 2004/04/06 17:24:11 mickey Exp $ */ +/* $OpenBSD: kern_sig.c,v 1.71 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_sig.c,v 1.54 1996/04/22 01:38:32 christos Exp $ */ /* @@ -62,6 +62,7 @@ #include <sys/malloc.h> #include <sys/pool.h> #include <sys/ptrace.h> +#include <sys/sched.h> #include <sys/mount.h> #include <sys/syscallargs.h> @@ -805,19 +806,30 @@ trapsignal(p, signum, code, type, sigval) * regardless of the signal action (eg, blocked or ignored). * * Other ignored signals are discarded immediately. + * + * XXXSMP: Invoked as psignal() or sched_psignal(). */ void -psignal(p, signum) +psignal1(p, signum, dolock) register struct proc *p; register int signum; + int dolock; /* XXXSMP: works, but icky */ { register int s, prop; register sig_t action; int mask; +#ifdef DIAGNOSTIC if ((u_int)signum >= NSIG || signum == 0) panic("psignal signal number"); + /* XXXSMP: works, but icky */ + if (dolock) + SCHED_ASSERT_UNLOCKED(); + else + SCHED_ASSERT_LOCKED(); +#endif + /* Ignore signal if we are exiting */ if (p->p_flag & P_WEXIT) return; @@ -879,7 +891,10 @@ psignal(p, signum) */ if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP)) return; - s = splhigh(); + /* XXXSMP: works, but icky */ + if (dolock) + SCHED_LOCK(s); + switch (p->p_stat) { case SSLEEP: @@ -921,7 +936,11 @@ psignal(p, signum) p->p_siglist &= ~mask; p->p_xstat = signum; if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0) - psignal(p->p_pptr, SIGCHLD); + /* + * XXXSMP: recursive call; don't lock + * the second time around. + */ + sched_psignal(p->p_pptr, SIGCHLD); proc_stop(p); goto out; } @@ -1009,7 +1028,9 @@ runfast: run: setrunnable(p); out: - splx(s); + /* XXXSMP: works, but icky */ + if (dolock) + SCHED_UNLOCK(s); } /* @@ -1054,7 +1075,7 @@ issignal(struct proc *p) */ p->p_xstat = signum; - s = splstatclock(); /* protect mi_switch */ + SCHED_LOCK(s); /* protect mi_switch */ if (p->p_flag & P_FSTRACE) { #ifdef PROCFS /* procfs debugging */ @@ -1070,6 +1091,7 @@ issignal(struct proc *p) proc_stop(p); mi_switch(); } + SCHED_ASSERT_UNLOCKED(); splx(s); /* @@ -1130,8 +1152,9 @@ issignal(struct proc *p) if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0) psignal(p->p_pptr, SIGCHLD); proc_stop(p); - s = splstatclock(); + SCHED_LOCK(s); mi_switch(); + SCHED_ASSERT_UNLOCKED(); splx(s); break; } else if (prop & SA_IGNORE) { @@ -1179,6 +1202,9 @@ void proc_stop(p) struct proc *p; { +#ifdef MULTIPROCESSOR + SCHED_ASSERT_LOCKED(); +#endif p->p_stat = SSTOP; p->p_flag &= ~P_WAITED; @@ -1205,6 +1231,9 @@ postsig(signum) if (signum == 0) panic("postsig"); #endif + + KERNEL_PROC_LOCK(p); + mask = sigmask(signum); p->p_siglist &= ~mask; action = ps->ps_sigact[signum]; @@ -1254,7 +1283,11 @@ postsig(signum) * mask from before the sigpause is what we want * restored after the signal processing is completed. */ +#ifdef MULTIPROCESSOR + s = splsched(); +#else s = splhigh(); +#endif if (ps->ps_flags & SAS_OLDMASK) { returnmask = ps->ps_oldmask; ps->ps_flags &= ~SAS_OLDMASK; @@ -1279,6 +1312,8 @@ postsig(signum) (*p->p_emul->e_sendsig)(action, signum, returnmask, code, type, sigval); } + + KERNEL_PROC_UNLOCK(p); } /* @@ -1308,7 +1343,6 @@ sigexit(p, signum) register struct proc *p; int signum; { - /* Mark process as going away */ p->p_flag |= P_WEXIT; diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c index 52432e05522..32e659af713 100644 --- a/sys/kern/kern_subr.c +++ b/sys/kern/kern_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_subr.c,v 1.27 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: kern_subr.c,v 1.28 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_subr.c,v 1.15 1996/04/09 17:21:56 ragge Exp $ */ /* @@ -40,6 +40,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/malloc.h> #include <sys/queue.h> #include <sys/kernel.h> diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index baba311f668..ab7ca8f7ed3 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_synch.c,v 1.55 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: kern_synch.c,v 1.56 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ /*- @@ -59,15 +59,19 @@ u_char curpriority; /* usrpri of curproc */ #endif int lbolt; /* once a second sleep address */ #ifdef __HAVE_CPUINFO -int rrticks_init; /* # of harclock ticks per roundrobin */ +int rrticks_init; /* # of hardclock ticks per roundrobin() */ #endif int whichqs; /* Bit mask summary of non-empty Q's. */ struct prochd qs[NQS]; +struct SIMPLELOCK sched_lock; + void scheduler_start(void); -#ifndef __HAVE_CPUINFO +#ifdef __HAVE_CPUINFO +void roundrobin(struct cpu_info *); +#else void roundrobin(void *); #endif void schedcpu(void *); @@ -85,11 +89,13 @@ scheduler_start() /* * We avoid polluting the global namespace by keeping the scheduler * timeouts static in this function. - * We setup the timeouts here and kick roundrobin and schedcpu once to + * We setup the timeouts here and kick schedcpu and roundrobin once to * make them do their job. */ - timeout_set(&roundrobin_to, roundrobin, &roundrobin_to); +#ifndef __HAVE_CPUINFO + timeout_set(&roundrobin_to, schedcpu, &roundrobin_to); +#endif timeout_set(&schedcpu_to, schedcpu, &schedcpu_to); #ifdef __HAVE_CPUINFO @@ -103,6 +109,7 @@ scheduler_start() /* * Force switch among equal priority processes every 100ms. */ +/* ARGSUSED */ #ifdef __HAVE_CPUINFO void roundrobin(struct cpu_info *ci) @@ -122,7 +129,7 @@ roundrobin(struct cpu_info *ci) */ spc->spc_schedflags |= SPCF_SHOULDYIELD; } else { - spc->spc_schedflags |= SPCF_SEENRR; + spc->spc_schedflags |= SPCF_SEENRR; } splx(s); } @@ -130,7 +137,6 @@ roundrobin(struct cpu_info *ci) need_resched(curcpu()); } #else -/* ARGSUSED */ void roundrobin(void *arg) { @@ -152,7 +158,8 @@ roundrobin(void *arg) } splx(s); } - need_resched(); + + need_resched(0); timeout_add(to, hz / 10); } #endif @@ -298,6 +305,8 @@ schedcpu(arg) p->p_cpticks = 0; newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu); p->p_estcpu = newcpu; + splx(s); + SCHED_LOCK(s); resetpriority(p); if (p->p_priority >= PUSER) { if ((p != curproc) && @@ -310,7 +319,7 @@ schedcpu(arg) } else p->p_priority = p->p_usrpri; } - splx(s); + SCHED_UNLOCK(s); } uvm_meter(); wakeup((caddr_t)&lbolt); @@ -329,6 +338,8 @@ updatepri(p) register unsigned int newcpu = p->p_estcpu; register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); + SCHED_ASSERT_LOCKED(); + if (p->p_slptime > 5 * loadfac) p->p_estcpu = 0; else { @@ -392,11 +403,6 @@ ltsleep(ident, priority, wmesg, timo, interlock) int catch = priority & PCATCH; int relock = (priority & PNORELOCK) == 0; -#ifdef KTRACE - if (KTRPOINT(p, KTR_CSW)) - ktrcsw(p, 1, 0); -#endif - s = splhigh(); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, @@ -404,16 +410,26 @@ ltsleep(ident, priority, wmesg, timo, interlock) * don't run any other procs or panic below, * in case this is the idle process and already asleep. */ + s = splhigh(); splx(safepri); splx(s); if (interlock != NULL && relock == 0) simple_unlock(interlock); return (0); } + +#ifdef KTRACE + if (KTRPOINT(p, KTR_CSW)) + ktrcsw(p, 1, 0); +#endif + + SCHED_LOCK(s); + #ifdef DIAGNOSTIC - if (ident == NULL || p->p_stat != SRUN || p->p_back) + if (ident == NULL || p->p_stat != SONPROC || p->p_back != NULL) panic("tsleep"); #endif + p->p_wchan = ident; p->p_wmesg = wmesg; p->p_slptime = 0; @@ -452,29 +468,39 @@ ltsleep(ident, priority, wmesg, timo, interlock) if ((sig = CURSIG(p)) != 0) { if (p->p_wchan) unsleep(p); - p->p_stat = SRUN; + p->p_stat = SONPROC; + SCHED_UNLOCK(s); goto resume; } if (p->p_wchan == 0) { catch = 0; + SCHED_UNLOCK(s); goto resume; } } else sig = 0; p->p_stat = SSLEEP; p->p_stats->p_ru.ru_nvcsw++; + SCHED_ASSERT_LOCKED(); mi_switch(); #ifdef DDB /* handy breakpoint location after process "wakes" */ __asm(".globl bpendtsleep\nbpendtsleep:"); #endif + + SCHED_ASSERT_UNLOCKED(); + /* + * Note! this splx belongs to the SCHED_LOCK(s) above, mi_switch + * releases the scheduler lock, but does not lower the spl. + */ + splx(s); + resume: #ifdef __HAVE_CPUINFO p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; #else curpriority = p->p_usrpri; #endif - splx(s); p->p_flag &= ~P_SINTR; if (p->p_flag & P_TIMEOUT) { p->p_flag &= ~P_TIMEOUT; @@ -504,6 +530,7 @@ resume: if (KTRPOINT(p, KTR_CSW)) ktrcsw(p, 0, 0); #endif + if (interlock != NULL && relock) simple_lock(interlock); return (0); @@ -523,7 +550,7 @@ endtsleep(arg) int s; p = (struct proc *)arg; - s = splhigh(); + SCHED_LOCK(s); if (p->p_wchan) { if (p->p_stat == SSLEEP) setrunnable(p); @@ -531,75 +558,7 @@ endtsleep(arg) unsleep(p); p->p_flag |= P_TIMEOUT; } - splx(s); -} - -/* - * Short-term, non-interruptable sleep. - */ -void -sleep(ident, priority) - void *ident; - int priority; -{ - register struct proc *p = curproc; - register struct slpque *qp; - register int s; - -#ifdef DIAGNOSTIC - if (priority > PZERO) { - printf("sleep called with priority %d > PZERO, wchan: %p\n", - priority, ident); - panic("old sleep"); - } -#endif - s = splhigh(); - if (cold || panicstr) { - /* - * After a panic, or during autoconfiguration, - * just give interrupts a chance, then just return; - * don't run any other procs or panic below, - * in case this is the idle process and already asleep. - */ - splx(safepri); - splx(s); - return; - } -#ifdef DIAGNOSTIC - if (ident == NULL || p->p_stat != SRUN || p->p_back) - panic("sleep"); -#endif - p->p_wchan = ident; - p->p_wmesg = NULL; - p->p_slptime = 0; - p->p_priority = priority; - qp = &slpque[LOOKUP(ident)]; - if (qp->sq_head == 0) - qp->sq_head = p; - else - *qp->sq_tailp = p; - *(qp->sq_tailp = &p->p_forw) = 0; - p->p_stat = SSLEEP; - p->p_stats->p_ru.ru_nvcsw++; -#ifdef KTRACE - if (KTRPOINT(p, KTR_CSW)) - ktrcsw(p, 1, 0); -#endif - mi_switch(); -#ifdef DDB - /* handy breakpoint location after process "wakes" */ - __asm(".globl bpendsleep\nbpendsleep:"); -#endif -#ifdef KTRACE - if (KTRPOINT(p, KTR_CSW)) - ktrcsw(p, 0, 0); -#endif -#ifdef __HAVE_CPUINFO - p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; -#else - curpriority = p->p_usrpri; -#endif - splx(s); + SCHED_UNLOCK(s); } /* @@ -611,9 +570,15 @@ unsleep(p) { register struct slpque *qp; register struct proc **hp; +#if 0 int s; - s = splhigh(); + /* + * XXX we cannot do recursive SCHED_LOCKing yet. All callers lock + * anyhow. + */ + SCHED_LOCK(s); +#endif if (p->p_wchan) { hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head; while (*hp != p) @@ -623,9 +588,25 @@ unsleep(p) qp->sq_tailp = hp; p->p_wchan = 0; } - splx(s); +#if 0 + SCHED_UNLOCK(s); +#endif +} + +#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) +void +sched_unlock_idle(void) +{ + SIMPLE_UNLOCK(&sched_lock); } +void +sched_lock_idle(void) +{ + SIMPLE_LOCK(&sched_lock); +} +#endif /* MULTIPROCESSOR || LOCKDEBUG */ + /* * Make all processes sleeping on the specified identifier runnable. */ @@ -638,7 +619,7 @@ wakeup_n(ident, n) struct proc *p, **q; int s; - s = splhigh(); + SCHED_LOCK(s); qp = &slpque[LOOKUP(ident)]; restart: for (q = &qp->sq_head; (p = *q) != NULL; ) { @@ -662,15 +643,19 @@ restart: /* * Since curpriority is a user priority, * p->p_priority is always better than - * curpriority. + * curpriority on the last CPU on + * which it ran. + * + * XXXSMP See affinity comment in + * resched_proc(). */ - if ((p->p_flag & P_INMEM) != 0) { setrunqueue(p); #ifdef __HAVE_CPUINFO + KASSERT(p->p_cpu != NULL); need_resched(p->p_cpu); #else - need_resched(); + need_resched(0); #endif } else { wakeup((caddr_t)&proc0); @@ -685,7 +670,7 @@ restart: } else q = &p->p_forw; } - splx(s); + SCHED_UNLOCK(s); } void @@ -705,11 +690,12 @@ yield() struct proc *p = curproc; int s; - s = splstatclock(); + SCHED_LOCK(s); p->p_priority = p->p_usrpri; setrunqueue(p); p->p_stats->p_ru.ru_nvcsw++; mi_switch(); + SCHED_ASSERT_UNLOCKED(); splx(s); } @@ -732,11 +718,13 @@ preempt(newp) if (newp != NULL) panic("preempt: cpu_preempt not yet implemented"); - s = splstatclock(); + SCHED_LOCK(s); p->p_priority = p->p_usrpri; + p->p_stat = SRUN; setrunqueue(p); p->p_stats->p_ru.ru_nivcsw++; mi_switch(); + SCHED_ASSERT_UNLOCKED(); splx(s); } @@ -750,11 +738,28 @@ mi_switch() struct proc *p = curproc; /* XXX */ struct rlimit *rlim; struct timeval tv; +#if defined(MULTIPROCESSOR) + int hold_count; +#endif #ifdef __HAVE_CPUINFO struct schedstate_percpu *spc = &p->p_cpu->ci_schedstate; #endif - splassert(IPL_STATCLOCK); + SCHED_ASSERT_LOCKED(); + +#if defined(MULTIPROCESSOR) + /* + * Release the kernel_lock, as we are about to yield the CPU. + * The scheduler lock is still held until cpu_switch() + * selects a new process and removes it from the run queue. + */ + if (p->p_flag & P_BIGLOCK) +#ifdef notyet + hold_count = spinlock_release_all(&kernel_lock); +#else + hold_count = __mp_release_all(&kernel_lock); +#endif +#endif /* * Compute the amount of time during which the current @@ -765,19 +770,19 @@ mi_switch() if (timercmp(&tv, &spc->spc_runtime, <)) { #if 0 printf("time is not monotonic! " - "tv=%ld.%06ld, runtime=%ld.%06ld\n", + "tv=%lu.%06lu, runtime=%lu.%06lu\n", tv.tv_sec, tv.tv_usec, spc->spc_runtime.tv_sec, spc->spc_runtime.tv_usec); #endif } else { - timersub(&tv, &spc->runtime, &tv); + timersub(&tv, &spc->spc_runtime, &tv); timeradd(&p->p_rtime, &tv, &p->p_rtime); } #else if (timercmp(&tv, &runtime, <)) { #if 0 printf("time is not monotonic! " - "tv=%ld.%06ld, runtime=%ld.%06ld\n", + "tv=%lu.%06lu, runtime=%lu.%06lu\n", tv.tv_sec, tv.tv_usec, runtime.tv_sec, runtime.tv_usec); #endif } else { @@ -817,12 +822,38 @@ mi_switch() uvmexp.swtch++; cpu_switch(p); + /* + * Make sure that MD code released the scheduler lock before + * resuming us. + */ + SCHED_ASSERT_UNLOCKED(); + + /* + * We're running again; record our new start time. We might + * be running on a new CPU now, so don't use the cache'd + * schedstate_percpu pointer. + */ #ifdef __HAVE_CPUINFO - /* p->p_cpu might have changed in cpu_switch() */ + KDASSERT(p->p_cpu != NULL); + KDASSERT(p->p_cpu == curcpu()); microtime(&p->p_cpu->ci_schedstate.spc_runtime); #else microtime(&runtime); #endif + +#if defined(MULTIPROCESSOR) + /* + * Reacquire the kernel_lock now. We do this after we've + * released the scheduler lock to avoid deadlock, and before + * we reacquire the interlock. + */ + if (p->p_flag & P_BIGLOCK) +#ifdef notyet + spinlock_acquire_count(&kernel_lock, hold_count); +#else + __mp_acquire_count(&kernel_lock, hold_count); +#endif +#endif } /* @@ -836,6 +867,7 @@ rqinit() for (i = 0; i < NQS; i++) qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; + SIMPLE_LOCK_INIT(&sched_lock); } static __inline void @@ -845,13 +877,35 @@ resched_proc(struct proc *p, u_char pri) struct cpu_info *ci; #endif + /* + * XXXSMP + * Since p->p_cpu persists across a context switch, + * this gives us *very weak* processor affinity, in + * that we notify the CPU on which the process last + * ran that it should try to switch. + * + * This does not guarantee that the process will run on + * that processor next, because another processor might + * grab it the next time it performs a context switch. + * + * This also does not handle the case where its last + * CPU is running a higher-priority process, but every + * other CPU is running a lower-priority process. There + * are ways to handle this situation, but they're not + * currently very pretty, and we also need to weigh the + * cost of moving a process from one CPU to another. + * + * XXXSMP + * There is also the issue of locking the other CPU's + * sched state, which we currently do not do. + */ #ifdef __HAVE_CPUINFO ci = (p->p_cpu != NULL) ? p->p_cpu : curcpu(); if (pri < ci->ci_schedstate.spc_curpriority) need_resched(ci); #else if (pri < curpriority) - need_resched(); + need_resched(0); #endif } @@ -864,12 +918,12 @@ void setrunnable(p) register struct proc *p; { - register int s; + SCHED_ASSERT_LOCKED(); - s = splhigh(); switch (p->p_stat) { case 0: case SRUN: + case SONPROC: case SZOMB: case SDEAD: default: @@ -890,7 +944,6 @@ setrunnable(p) p->p_stat = SRUN; if (p->p_flag & P_INMEM) setrunqueue(p); - splx(s); if (p->p_slptime > 1) updatepri(p); p->p_slptime = 0; @@ -911,6 +964,8 @@ resetpriority(p) { register unsigned int newpriority; + SCHED_ASSERT_LOCKED(); + newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); newpriority = min(newpriority, MAXPRI); p->p_usrpri = newpriority; @@ -936,8 +991,12 @@ void schedclock(p) struct proc *p; { + int s; + p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); + SCHED_LOCK(s); resetpriority(p); + SCHED_UNLOCK(s); if (p->p_priority >= PUSER) p->p_priority = p->p_usrpri; } diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index a517ba2951c..603a354b76b 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sysctl.c,v 1.111 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: kern_sysctl.c,v 1.112 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */ /*- @@ -429,6 +429,20 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) return (sysctl_malloc(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p)); case KERN_CPTIME: +#ifdef MULTIPROCESSOR + { + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + int i; + + bzero(cp_time, sizeof(cp_time)); + + for (CPU_INFO_FOREACH(cii, ci)) { + for (i = 0; i < CPUSTATES; i++) + cp_time[i] += ci->ci_schedstate.spc_cp_time[i]; + } + } +#endif return (sysctl_rdstruct(oldp, oldlenp, newp, &cp_time, sizeof(cp_time))); case KERN_NCHSTATS: @@ -1317,6 +1331,11 @@ fill_kproc2(struct proc *p, struct kinfo_proc2 *ki) &p->p_stats->p_cru.ru_stime, &ut); ki->p_uctime_sec = ut.tv_sec; ki->p_uctime_usec = ut.tv_usec; + ki->p_cpuid = KI_NOCPU; +#ifdef MULTIPROCESSOR + if (p->p_cpu != NULL) + ki->p_cpuid = p->p_cpu->ci_cpuid; +#endif PRELE(p); } } diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index f40928b2824..1ed5f182d36 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_time.c,v 1.40 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: kern_time.c,v 1.41 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: kern_time.c,v 1.20 1996/02/18 11:57:06 fvdl Exp $ */ /* @@ -99,7 +99,7 @@ settime(struct timeval *tv) timersub(tv, &time, &delta); time = *tv; timeradd(&boottime, &delta, &boottime); -#ifdef __HAVE_CURCPU +#ifdef __HAVE_CPUINFO /* * XXXSMP * This is wrong. We should traverse a list of all diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c index e9114b34990..db01a8a868c 100644 --- a/sys/kern/subr_pool.c +++ b/sys/kern/subr_pool.c @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_pool.c,v 1.41 2004/06/02 22:17:22 tedu Exp $ */ +/* $OpenBSD: subr_pool.c,v 1.42 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ /*- @@ -54,7 +54,6 @@ /* * XXX - for now. */ -#define SIMPLELOCK_INITIALIZER { SLOCK_UNLOCKED } #ifdef LOCKDEBUG #define simple_lock_freecheck(a, s) do { /* nothing */ } while (0) #define simple_lock_only_held(lkp, str) do { /* nothing */ } while (0) @@ -86,7 +85,7 @@ int pool_inactive_time = 10; static struct pool *drainpp; /* This spin lock protects both pool_head and drainpp. */ -struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; +struct simplelock pool_head_slock; struct pool_item_header { /* Page headers */ @@ -529,6 +528,8 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 0, "pcgpool", NULL); } + simple_lock_init(&pool_head_slock); + /* Insert this into the list of all pools. */ simple_lock(&pool_head_slock); TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); @@ -2062,9 +2063,9 @@ pool_allocator_drain(struct pool_allocator *pa, struct pool *org, int need) TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list); if (pp == org) continue; - simple_unlock(&pa->pa_list); - freed = pool_reclaim(pp) - simple_lock(&pa->pa_list); + simple_unlock(&pa->pa_slock); + freed = pool_reclaim(pp); + simple_lock(&pa->pa_slock); } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start && (freed < need)); if (!freed) { diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c index 906ce028770..a843c2f1dc2 100644 --- a/sys/kern/subr_prf.c +++ b/sys/kern/subr_prf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_prf.c,v 1.58 2004/01/03 14:08:53 espie Exp $ */ +/* $OpenBSD: subr_prf.c,v 1.59 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: subr_prf.c,v 1.45 1997/10/24 18:14:25 chuck Exp $ */ /*- @@ -47,6 +47,7 @@ #include <sys/ioctl.h> #include <sys/vnode.h> #include <sys/file.h> +#include <sys/simplelock.h> #include <sys/tty.h> #include <sys/tprintf.h> #include <sys/syslog.h> @@ -97,6 +98,50 @@ extern int uvm_doswapencrypt; int kprintf(const char *, int, void *, char *, va_list); void kputchar(int, int, struct tty *); +#ifdef MULTIPROCESSOR + +#ifdef notdef + +struct simplelock kprintf_slock; + +#define KPRINTF_MUTEX_ENTER(s) \ +do { \ + (s) = splhigh(); \ + simple_lock(&kprintf_slock); \ +} while (/*CONSTCOND*/0) + +#define KPRINTF_MUTEX_EXIT(s) \ +do { \ + simple_unlock(&kprintf_slock); \ + splx((s)); \ +} while (/*CONSTCOND*/0) + +#else + +struct __mp_lock kprintf_slock; + +#define KPRINTF_MUTEX_ENTER(s) \ +do { \ + (s) = splhigh(); \ + __mp_lock(&kprintf_slock); \ +} while (/*CONSTCOND*/0) + +#define KPRINTF_MUTEX_EXIT(s) \ +do { \ + __mp_unlock(&kprintf_slock); \ + splx((s)); \ +} while (/*CONSTCOND*/0) + +#endif + +#else + +struct simplelock kprintf_slock; +#define KPRINTF_MUTEX_ENTER(s) (s) = splhigh() +#define KPRINTF_MUTEX_EXIT(s) splx((s)) + +#endif /* MULTIPROCESSOR */ + /* * globals */ @@ -506,6 +551,9 @@ printf(const char *fmt, ...) { va_list ap; int savintr, retval; + int s; + + KPRINTF_MUTEX_ENTER(s); savintr = consintr; /* disable interrupts */ consintr = 0; @@ -515,6 +563,9 @@ printf(const char *fmt, ...) if (!panicstr) logwakeup(); consintr = savintr; /* reenable interrupts */ + + KPRINTF_MUTEX_EXIT(s); + return(retval); } diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index a6fdd09c95e..c512b9a40dd 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_generic.c,v 1.47 2003/12/10 23:10:08 millert Exp $ */ +/* $OpenBSD: sys_generic.c,v 1.48 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ /* @@ -55,6 +55,7 @@ #ifdef KTRACE #include <sys/ktrace.h> #endif +#include <sys/sched.h> #include <sys/mount.h> #include <sys/syscallargs.h> @@ -852,7 +853,7 @@ selwakeup(sip) p = pfind(sip->si_selpid); sip->si_selpid = 0; if (p != NULL) { - s = splhigh(); + SCHED_LOCK(s); if (p->p_wchan == (caddr_t)&selwait) { if (p->p_stat == SSLEEP) setrunnable(p); @@ -860,7 +861,7 @@ selwakeup(sip) unsleep(p); } else if (p->p_flag & P_SELECT) p->p_flag &= ~P_SELECT; - splx(s); + SCHED_UNLOCK(s); } } diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index 37664becb7d..21180cda141 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_process.c,v 1.27 2004/02/08 00:04:21 deraadt Exp $ */ +/* $OpenBSD: sys_process.c,v 1.28 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: sys_process.c,v 1.55 1996/05/15 06:17:47 tls Exp $ */ /*- @@ -57,6 +57,7 @@ #include <sys/ptrace.h> #include <sys/uio.h> #include <sys/user.h> +#include <sys/sched.h> #include <sys/mount.h> #include <sys/syscallargs.h> @@ -91,6 +92,7 @@ sys_ptrace(p, v, retval) #endif int error, write; int temp; + int s; /* "A foolish consistency..." XXX */ if (SCARG(uap, req) == PT_TRACE_ME) @@ -353,7 +355,9 @@ sys_ptrace(p, v, retval) /* Finally, deliver the requested signal (or none). */ if (t->p_stat == SSTOP) { t->p_xstat = SCARG(uap, data); + SCHED_LOCK(s); setrunnable(t); + SCHED_UNLOCK(s); } else { if (SCARG(uap, data) != 0) psignal(t, SCARG(uap, data)); diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 0aca0394f7a..6dec67d20fe 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tty.c,v 1.64 2004/03/19 19:03:07 deraadt Exp $ */ +/* $OpenBSD: tty.c,v 1.65 2004/06/13 21:49:26 niklas Exp $ */ /* $NetBSD: tty.c,v 1.68.4.2 1996/06/06 16:04:52 thorpej Exp $ */ /*- @@ -2098,7 +2098,8 @@ ttyinfo(tp) pick = p; ttyprintf(tp, " cmd: %s %d [%s] ", pick->p_comm, pick->p_pid, - pick->p_stat == SRUN ? "running" : + pick->p_stat == SONPROC ? "running" : + pick->p_stat == SRUN ? "runnable" : pick->p_wmesg ? pick->p_wmesg : "iowait"); calcru(pick, &utime, &stime, NULL); diff --git a/sys/net/netisr_dispatch.h b/sys/net/netisr_dispatch.h index 822577ad843..96493b324db 100644 --- a/sys/net/netisr_dispatch.h +++ b/sys/net/netisr_dispatch.h @@ -1,4 +1,4 @@ -/* $OpenBSD: netisr_dispatch.h,v 1.3 2001/06/09 06:16:38 angelos Exp $ */ +/* $OpenBSD: netisr_dispatch.h,v 1.4 2004/06/13 21:49:27 niklas Exp $ */ /* $NetBSD: netisr_dispatch.h,v 1.2 2000/07/02 04:40:47 cgd Exp $ */ /* @@ -16,20 +16,23 @@ * } */ -#ifndef _NET_NETISR_DISPATCH_H_ -#define _NET_NETISR_DISPATCH_H_ - #ifndef _NET_NETISR_H_ #error <net/netisr.h> must be included before <net/netisr_dispatch.h> #endif +#ifndef _NET_NETISR_DISPATCH_H_ +#define _NET_NETISR_DISPATCH_H_ +#include "ether.h" +#include "ppp.h" +#include "bridge.h" +#endif + /* * When adding functions to this list, be sure to add headers to provide * their prototypes in <net/netisr.h> (if necessary). */ #ifdef INET -#include "ether.h" #if NETHER > 0 DONETISR(NETISR_ARP,arpintr); #endif @@ -59,12 +62,9 @@ #ifdef NATM DONETISR(NETISR_NATM,natmintr); #endif -#include "ppp.h" #if NPPP > 0 DONETISR(NETISR_PPP,pppintr); #endif -#include "bridge.h" #if NBRIDGE > 0 DONETISR(NETISR_BRIDGE,bridgeintr); #endif -#endif /* _NET_NETISR_DISPATCH_H_ */ diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index b3c92ca019b..c2bc3965541 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -1,4 +1,4 @@ -/* $OpenBSD: kernel.h,v 1.9 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: kernel.h,v 1.10 2004/06/13 21:49:28 niklas Exp $ */ /* $NetBSD: kernel.h,v 1.11 1995/03/03 01:24:16 cgd Exp $ */ /*- @@ -49,7 +49,7 @@ extern int domainnamelen; /* 1.2 */ extern volatile struct timeval mono_time; extern struct timeval boottime; -#ifndef __HAVE_CURCPU +#ifndef __HAVE_CPUINFO extern struct timeval runtime; #endif extern volatile struct timeval time; diff --git a/sys/sys/lock.h b/sys/sys/lock.h index e5d1899d35a..5da1c1bfd73 100644 --- a/sys/sys/lock.h +++ b/sys/sys/lock.h @@ -1,4 +1,4 @@ -/* $OpenBSD: lock.h,v 1.11 2003/06/02 23:28:21 millert Exp $ */ +/* $OpenBSD: lock.h,v 1.12 2004/06/13 21:49:28 niklas Exp $ */ /* * Copyright (c) 1995 @@ -56,11 +56,51 @@ struct lock { int lk_sharecount; /* # of accepted shared locks */ int lk_waitcount; /* # of processes sleeping for lock */ short lk_exclusivecount; /* # of recursive exclusive locks */ - short lk_prio; /* priority at which to sleep */ + short lk_recurselevel; /* lvl above which recursion ok */ + + /* + * This is the sleep message for sleep locks, and a simple name + * for spin locks. + */ char *lk_wmesg; /* resource sleeping (for tsleep) */ - int lk_timo; /* maximum sleep time (for tsleep) */ - pid_t lk_lockholder; /* pid of exclusive lock holder */ + + union { + struct { + /* pid of exclusive lock holder */ + pid_t lk_sleep_lockholder; + + /* priority at which to sleep */ + int lk_sleep_prio; + + /* maximum sleep time (for tsleep) */ + int lk_sleep_timo; + } lk_un_sleep; + struct { + /* CPU ID of exclusive lock holder */ + cpuid_t lk_spin_cpu; +#if defined(LOCKDEBUG) + TAILQ_ENTRY(lock) lk_spin_list; +#endif + } lk_un_spin; + } lk_un; + +#define lk_lockholder lk_un.lk_un_sleep.lk_sleep_lockholder +#define lk_prio lk_un.lk_un_sleep.lk_sleep_prio +#define lk_timo lk_un.lk_un_sleep.lk_sleep_timo + +#define lk_cpu lk_un.lk_un_spin.lk_spin_cpu +#if defined(LOCKDEBUG) +#define lk_list lk_un.lk_un_spin.lk_spin_list +#endif + +#if defined(LOCKDEBUG) + const char *lk_lock_file; + const char *lk_unlock_file; + int lk_lock_line; + int lk_unlock_line; +#endif }; + /* * Lock request types: * LK_SHARED - get one of many possible shared locks. If a process @@ -109,12 +149,14 @@ struct lock { * or passed in as arguments to the lock manager. The LK_REENABLE flag may be * set only at the release of a lock obtained by drain. */ -#define LK_EXTFLG_MASK 0x00000770 /* mask of external flags */ +#define LK_EXTFLG_MASK 0x00700070 /* mask of external flags */ #define LK_NOWAIT 0x00000010 /* do not sleep to await lock */ #define LK_SLEEPFAIL 0x00000020 /* sleep, then return failure */ #define LK_CANRECURSE 0x00000040 /* allow recursive exclusive lock */ #define LK_REENABLE 0x00000080 /* lock is be reenabled after drain */ -#define LK_RECURSEFAIL 0x00000100 /* fail if recursive exclusive lock */ +#define LK_SETRECURSE 0x00100000 /* other locks while we have it OK */ +#define LK_RECURSEFAIL 0x00200000 /* fail if recursive exclusive lock */ +#define LK_SPIN 0x00400000 /* lock spins instead of sleeps */ /* * Internal lock flags. * @@ -131,9 +173,9 @@ struct lock { * * Non-persistent external flags. */ -#define LK_INTERLOCK 0x00100000 /* unlock passed simple lock after +#define LK_INTERLOCK 0x00010000 /* unlock passed simple lock after getting lk_interlock */ -#define LK_RETRY 0x00200000 /* vn_lock: retry until locked */ +#define LK_RETRY 0x00020000 /* vn_lock: retry until locked */ /* * Lock return status. @@ -157,6 +199,7 @@ struct lock { */ #define LK_KERNPROC ((pid_t) -2) #define LK_NOPROC ((pid_t) -1) +#define LK_NOCPU ((cpuid_t) -1) struct proc; @@ -164,14 +207,69 @@ void lockinit(struct lock *, int prio, char *wmesg, int timo, int flags); int lockmgr(__volatile struct lock *, u_int flags, struct simplelock *, struct proc *p); -void lockmgr_printinfo(struct lock *); +void lockmgr_printinfo(__volatile struct lock *); int lockstatus(struct lock *); +#if (0 && defined(MULTIPROCESSOR)) || defined(LOCKDEBUG) +#define spinlockinit(lkp, name, flags) \ + lockinit((lkp), 0, (name), 0, (flags) | LK_SPIN) +#define spinlockmgr(lkp, flags, intrlk) \ + lockmgr((lkp), (flags) | LK_SPIN, (intrlk), curproc) +#else +#define spinlockinit(lkp, name, flags) (void)(lkp) +#define spinlockmgr(lkp, flags, intrlk) (0) +#endif + +#if defined(LOCKDEBUG) +int _spinlock_release_all(__volatile struct lock *, const char *, int); +void _spinlock_acquire_count(__volatile struct lock *, int, const char *, + int); + +#define spinlock_release_all(l) _spinlock_release_all((l), __FILE__, __LINE__) +#define spinlock_acquire_count(l, c) _spinlock_acquire_count((l), (c), \ + __FILE__, __LINE__) +#else +int spinlock_release_all(__volatile struct lock *); +void spinlock_acquire_count(__volatile struct lock *, int); +#endif + #ifdef LOCKDEBUG #define LOCK_ASSERT(x) KASSERT(x) #else #define LOCK_ASSERT(x) /* nothing */ #endif -#endif /* !_LOCK_H_ */ +#if defined(MULTIPROCESSOR) +/* + * XXX Instead of using struct lock for the kernel lock and thus requiring us + * XXX to implement simplelocks, causing all sorts of fine-grained locks all + * XXX over our tree getting activated consuming both time and potentially + * XXX introducing locking protocol bugs. + */ +#ifdef notyet + +extern struct lock kernel_lock; + +/* + * XXX Simplelock macros used at "trusted" places. + */ +#define SIMPLELOCK simplelock +#define SIMPLE_LOCK_INIT simple_lock_init +#define SIMPLE_LOCK simple_lock +#define SIMPLE_UNLOCK simple_unlock + +#endif +#else + +/* + * XXX Simplelock macros used at "trusted" places. + */ +#define SIMPLELOCK simplelock +#define SIMPLE_LOCK_INIT simple_lock_init +#define SIMPLE_LOCK simple_lock +#define SIMPLE_UNLOCK simple_unlock + +#endif + +#endif /* !_LOCK_H_ */ diff --git a/sys/sys/mplock.h b/sys/sys/mplock.h new file mode 100644 index 00000000000..73f3c0b1e40 --- /dev/null +++ b/sys/sys/mplock.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2004 Niklas Hallqvist. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MPLOCK_H_ +#define _MPLOCK_H_ + +/* + * Really simple spinlock implementation with recursive capabilities. + * Correctness is paramount, no fancyness allowed. + */ + +struct __mp_lock { + __cpu_simple_lock_t mpl_lock; + cpuid_t mpl_cpu; + int mpl_count; +}; + +static __inline void __mp_lock_init(struct __mp_lock *); +static __inline void __mp_lock(struct __mp_lock *); +static __inline void __mp_unlock(struct __mp_lock *); +static __inline int __mp_release_all(struct __mp_lock *); +static __inline void __mp_acquire_count(struct __mp_lock *, int); +static __inline int __mp_lock_held(struct __mp_lock *); + +/* + * XXX Simplelocks macros used at "trusted" places. + */ +#define SIMPLELOCK __mp_lock +#define SIMPLE_LOCK_INIT __mp_lock_init +#define SIMPLE_LOCK __mp_lock +#define SIMPLE_UNLOCK __mp_unlock + +static __inline void +__mp_lock_init(struct __mp_lock *lock) +{ + __cpu_simple_lock_init(&lock->mpl_lock); + lock->mpl_cpu = LK_NOCPU; + lock->mpl_count = 0; +} + +#if defined(MP_LOCKDEBUG) +#ifndef DDB +#error "MP_LOCKDEBUG requires DDB" +#endif + +extern void Debugger(void); +extern int db_printf(const char *, ...) + __attribute__((__format__(__kprintf__,1,2))); + +/* CPU-dependent timing, needs this to be settable from ddb. */ +extern int __mp_lock_spinout; +#endif + +static __inline void +__mp_lock(struct __mp_lock *lock) +{ + int s = spllock(); + + if (lock->mpl_cpu != cpu_number()) { +#ifndef MP_LOCKDEBUG + __cpu_simple_lock(&lock->mpl_lock); +#else + { + int got_it; + do { + int ticks = __mp_lock_spinout; + + do { + got_it = __cpu_simple_lock_try( + &lock->mpl_lock); + } while (!got_it && ticks-- > 0); + if (!got_it) { + db_printf( + "__mp_lock(0x%x): lock spun out", + lock); + Debugger(); + } + } while (!got_it); + } +#endif + lock->mpl_cpu = cpu_number(); + } + lock->mpl_count++; + splx(s); +} + +static __inline void +__mp_unlock(struct __mp_lock *lock) +{ + int s = spllock(); + +#ifdef MP_LOCKDEBUG + if (lock->mpl_count == 0 || lock->mpl_cpu == LK_NOCPU) { + db_printf("__mp_unlock(0x%x): releasing not locked lock\n", + lock); + Debugger(); + } +#endif + + if (--lock->mpl_count == 0) { + lock->mpl_cpu = LK_NOCPU; + __cpu_simple_unlock(&lock->mpl_lock); + } + splx(s); +} + +static __inline int +__mp_release_all(struct __mp_lock *lock) { + int s = spllock(); + int rv = lock->mpl_count; + +#ifdef MP_LOCKDEBUG + if (lock->mpl_count == 0 || lock->mpl_cpu == LK_NOCPU) { + db_printf( + "__mp_release_all(0x%x): releasing not locked lock\n", + lock); + Debugger(); + } +#endif + + lock->mpl_cpu = LK_NOCPU; + lock->mpl_count = 0; + __cpu_simple_unlock(&lock->mpl_lock); + splx(s); + return (rv); +} + +static __inline void +__mp_acquire_count(struct __mp_lock *lock, int count) { + int s = spllock(); + + __cpu_simple_lock(&lock->mpl_lock); + lock->mpl_cpu = cpu_number(); + lock->mpl_count = count; + splx(s); +} + +static __inline int +__mp_lock_held(struct __mp_lock *lock) { + return lock->mpl_count; +} + +extern struct __mp_lock kernel_lock; + +/* XXX Should really be in proc.h but then __mp_lock is not defined. */ +extern struct SIMPLELOCK deadproc_slock; + +#endif /* !_MPLOCK_H */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index c06b14eda43..9f64ed24839 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: proc.h,v 1.70 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: proc.h,v 1.71 2004/06/13 21:49:28 niklas Exp $ */ /* $NetBSD: proc.h,v 1.44 1996/04/22 01:23:21 christos Exp $ */ /*- @@ -46,7 +46,6 @@ #include <sys/timeout.h> /* For struct timeout. */ #include <sys/event.h> /* For struct klist */ -#ifdef __HAVE_CPUINFO /* * CPU states. * XXX Not really scheduler state, but no other good place to put @@ -60,24 +59,25 @@ #define CPUSTATES 5 /* - * Per-CPU scheduler state. XXX - this should be in sys/sched.h + * Per-CPU scheduler state. */ struct schedstate_percpu { struct timeval spc_runtime; /* time curproc started running */ __volatile int spc_schedflags; /* flags; see below */ u_int spc_schedticks; /* ticks for schedclock() */ u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */ - u_char spc_curpriority; /* usrpri of curproc */ + u_char spc_curpriority; /* usrpri of curproc */ int spc_rrticks; /* ticks until roundrobin() */ int spc_pscnt; /* prof/stat counter */ - int spc_psdiv; /* prof/stat divisor */ + int spc_psdiv; /* prof/stat divisor */ }; /* spc_flags */ -#define SPCF_SEENRR 0x0001 /* process has seen roundrobin() */ -#define SPCF_SHOULDYIELD 0x0002 /* process should yield the CPU */ -#define SPCF_SWITCHCLEAR (SPCF_SEENRR|SPCF_SHOULDYIELD) +#define SPCF_SEENRR 0x0001 /* process has seen roundrobin() */ +#define SPCF_SHOULDYIELD 0x0002 /* process should yield the CPU */ +#define SPCF_SWITCHCLEAR (SPCF_SEENRR|SPCF_SHOULDYIELD) +#ifdef __HAVE_CPUINFO /* * These are the fields we require in struct cpu_info that we get from * curcpu(): @@ -208,7 +208,7 @@ struct proc { u_int p_swtime; /* Time swapped in or out. */ u_int p_slptime; /* Time since last blocked. */ #ifdef __HAVE_CPUINFO - struct cpu_info * __volatile p_cpu; + struct cpu_info * __volatile p_cpu; /* CPU we're running on. */ #else int p_schedflags; /* PSCHED_* flags */ #endif @@ -264,6 +264,7 @@ struct proc { u_short p_xstat; /* Exit status for wait; also stop signal. */ u_short p_acflag; /* Accounting flags. */ struct rusage *p_ru; /* Exit information. XXX */ + int p_locks; /* DEBUG: lockmgr count of held locks */ }; #define p_session p_pgrp->pg_session @@ -276,6 +277,7 @@ struct proc { #define SSTOP 4 /* Process debugging or suspension. */ #define SZOMB 5 /* Awaiting collection by parent. */ #define SDEAD 6 /* Process is almost a zombie. */ +#define SONPROC 7 /* Process is currently on a CPU. */ #define P_ZOMBIE(p) ((p)->p_stat == SZOMB || (p)->p_stat == SDEAD) @@ -311,12 +313,13 @@ struct proc { #define P_SYSTRACE 0x400000 /* Process system call tracing active*/ #define P_CONTINUED 0x800000 /* Proc has continued from a stopped state. */ #define P_SWAPIN 0x1000000 /* Swapping in right now */ +#define P_BIGLOCK 0x2000000 /* Process needs kernel "big lock" to run */ #define P_BITS \ ("\20\01ADVLOCK\02CTTY\03INMEM\04NOCLDSTOP\05PPWAIT\06PROFIL\07SELECT" \ "\010SINTR\011SUGID\012SYSTEM\013TIMEOUT\014TRACED\015WAITED\016WEXIT" \ "\017EXEC\020PWEUPC\021FSTRACE\022SSTEP\023SUGIDEXEC\024NOCLDWAIT" \ - "\025NOZOMBIE\026INEXEC\027SYSTRACE\030CONTINUED") + "\025NOZOMBIE\026INEXEC\027SYSTRACE\030CONTINUED\031SWAPIN\032BIGLOCK") /* Macro to compute the exit signal to be delivered. */ #define P_EXITSIG(p) \ @@ -404,7 +407,6 @@ extern struct proclist allproc; /* List of all processes. */ extern struct proclist zombproc; /* List of zombie processes. */ extern struct proclist deadproc; /* List of dead processes. */ -extern struct simplelock deadproc_slock; extern struct proc *initproc; /* Process slots for init, pager. */ extern struct proc *syncerproc; /* filesystem syncer daemon */ @@ -448,7 +450,6 @@ void setrunnable(struct proc *); #if !defined(setrunqueue) void setrunqueue(struct proc *); #endif -void sleep(void *chan, int pri); void uvm_swapin(struct proc *); /* XXX: uvm_extern.h? */ int ltsleep(void *chan, int pri, const char *wmesg, int timo, volatile struct simplelock *); @@ -476,5 +477,9 @@ void child_return(void *); int proc_cansugid(struct proc *); void proc_zap(struct proc *); + +#if defined(MULTIPROCESSOR) +void proc_trampoline_mp(void); /* XXX */ +#endif #endif /* _KERNEL */ #endif /* !_SYS_PROC_H_ */ diff --git a/sys/sys/sched.h b/sys/sys/sched.h index 08b1fb49904..50625e84149 100644 --- a/sys/sys/sched.h +++ b/sys/sys/sched.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sched.h,v 1.5 2004/06/09 20:18:28 art Exp $ */ +/* $OpenBSD: sched.h,v 1.6 2004/06/13 21:49:28 niklas Exp $ */ /* $NetBSD: sched.h,v 1.2 1999/02/28 18:14:58 ross Exp $ */ /*- @@ -86,10 +86,11 @@ #define NICE_WEIGHT 2 /* priorities per nice level */ #define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - PPQ) -extern int schedhz; /* ideally: 16 */ +extern int schedhz; /* ideally: 16 */ +extern int rrticks_init; /* ticks per roundrobin() */ #ifdef _SYS_PROC_H_ -void schedclock(struct proc *p); +void schedclock(struct proc *); #ifdef __HAVE_CPUINFO void roundrobin(struct cpu_info *); #endif @@ -118,5 +119,75 @@ scheduler_wait_hook(parent, child) parent->p_estcpu = ESTCPULIM(parent->p_estcpu + child->p_estcpu); } #endif /* _SYS_PROC_H_ */ + +#ifndef splsched +#define splsched() splhigh() +#endif +#ifndef IPL_SCHED +#define IPL_SCHED IPL_HIGH +#endif + +#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) +#include <sys/lock.h> + +/* + * XXX Instead of using struct lock for the kernel lock and thus requiring us + * XXX to implement simplelocks, causing all sorts of fine-grained locks all + * XXX over our tree getting activated consuming both time and potentially + * XXX introducing locking protocol bugs. + */ +#ifdef notyet + +extern struct simplelock sched_lock; + +#define SCHED_ASSERT_LOCKED() LOCK_ASSERT(simple_lock_held(&sched_lock)) +#define SCHED_ASSERT_UNLOCKED() LOCK_ASSERT(simple_lock_held(&sched_lock) == 0) + +#define SCHED_LOCK(s) \ +do { \ + s = splsched(); \ + simple_lock(&sched_lock); \ +} while (/* CONSTCOND */ 0) + +#define SCHED_UNLOCK(s) \ +do { \ + simple_unlock(&sched_lock); \ + splx(s); \ +} while (/* CONSTCOND */ 0) + +#else + +extern struct __mp_lock sched_lock; + +#define SCHED_ASSERT_LOCKED() LOCK_ASSERT(__mp_lock_held(&sched_lock)) +#define SCHED_ASSERT_UNLOCKED() LOCK_ASSERT(__mp_lock_held(&sched_lock) == 0) + +#define SCHED_LOCK(s) \ +do { \ + s = splsched(); \ + __mp_lock(&sched_lock); \ +} while (/* CONSTCOND */ 0) + +#define SCHED_UNLOCK(s) \ +do { \ + __mp_unlock(&sched_lock); \ + splx(s); \ +} while (/* CONSTCOND */ 0) + +#endif + +void sched_lock_idle(void); +void sched_unlock_idle(void); + +#else /* ! MULTIPROCESSOR || LOCKDEBUG */ + +#define SCHED_ASSERT_LOCKED() splassert(IPL_SCHED); +#define SCHED_ASSERT_UNLOCKED() /* nothing */ + +#define SCHED_LOCK(s) s = splsched() +#define SCHED_UNLOCK(s) splx(s) + +#endif /* MULTIPROCESSOR || LOCKDEBUG */ + #endif /* _KERNEL */ #endif /* _SYS_SCHED_H_ */ diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h index cac591c71d5..9db5681a21d 100644 --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: signalvar.h,v 1.11 2004/04/06 17:24:12 mickey Exp $ */ +/* $OpenBSD: signalvar.h,v 1.12 2004/06/13 21:49:28 niklas Exp $ */ /* $NetBSD: signalvar.h,v 1.17 1996/04/22 01:23:31 christos Exp $ */ /* @@ -159,7 +159,9 @@ void csignal(pid_t pgid, int signum, uid_t uid, uid_t euid); int issignal(struct proc *p); void pgsignal(struct pgrp *pgrp, int sig, int checkctty); void postsig(int sig); -void psignal(struct proc *p, int sig); +void psignal1(struct proc *p, int sig, int dolock); +#define psignal(p, sig) psignal1((p), (sig), 1) +#define sched_psignal(p, sig) psignal1((p), (sig), 0) void siginit(struct proc *p); void trapsignal(struct proc *p, int sig, u_long code, int type, union sigval val); diff --git a/sys/sys/simplelock.h b/sys/sys/simplelock.h index cfc6fdd8ed0..fbd39ef4d4e 100644 --- a/sys/sys/simplelock.h +++ b/sys/sys/simplelock.h @@ -1,7 +1,12 @@ -/* $OpenBSD: simplelock.h,v 1.10 2002/03/14 01:27:14 millert Exp $ */ +/* $OpenBSD: simplelock.h,v 1.11 2004/06/13 21:49:28 niklas Exp $ */ #ifndef _SIMPLELOCK_H_ #define _SIMPLELOCK_H_ + +#ifdef MULTIPROCESSOR +#include <machine/lock.h> +#endif + /* * A simple spin lock. * @@ -12,28 +17,26 @@ * of these locks while a process is sleeping. */ struct simplelock { +#ifdef MULTIPROCESSOR + __cpu_simple_lock_t lock_data; +#else int lock_data; +#endif }; #ifdef _KERNEL -#ifndef NCPUS -#define NCPUS 1 -#endif - -#define SLOCK_LOCKED 1 -#define SLOCK_UNLOCKED 0 - -#define SLOCK_INITIALIZER { SLOCK_UNLOCKED } - /* * We can't debug locks when we use them in real life. */ -#if (NCPUS != 1) && defined(LOCKDEBUG) +#if defined(MULTIPROCESSOR) && defined(LOCKDEBUG) #undef LOCKDEBUG #endif -#if NCPUS == 1 +#if !defined(MULTIPROCESSOR) || 1 + +#define SLOCK_LOCKED 1 +#define SLOCK_UNLOCKED 0 #ifndef LOCKDEBUG @@ -42,11 +45,7 @@ struct simplelock { #define simple_unlock(lkp) #define simple_lock_assert(lkp) -static __inline void simple_lock_init(struct simplelock *); - -static __inline void -simple_lock_init(lkp) - struct simplelock *lkp; +static __inline void simple_lock_init(struct simplelock *lkp) { lkp->lock_data = SLOCK_UNLOCKED; @@ -67,7 +66,7 @@ void simple_lock_init(struct simplelock *); #endif /* !defined(LOCKDEBUG) */ -#else /* NCPUS > 1 */ +#else /* MULTIPROCESSOR */ /* * The simple-lock routines are the primitives out of which the lock @@ -78,31 +77,26 @@ void simple_lock_init(struct simplelock *); * only be used for exclusive locks. */ -static __inline void -simple_lock(lkp) - __volatile struct simplelock *lkp; +static __inline void simple_lock_init(struct simplelock *lkp) { - - while (test_and_set(&lkp->lock_data)) - continue; + __cpu_simple_lock_init(&lkp->lock_data); } -static __inline int -simple_lock_try(lkp) - __volatile struct simplelock *lkp; +static __inline void simple_lock(__volatile struct simplelock *lkp) { - - return (!test_and_set(&lkp->lock_data)) + __cpu_simple_lock(&lkp->lock_data); } -static __inline void -simple_unlock(lkp) - __volatile struct simplelock *lkp; +static __inline int simple_lock_try(__volatile struct simplelock *lkp) { + return (__cpu_simple_lock_try(&lkp->lock_data)); +} - lkp->lock_data = 0; +static __inline void simple_unlock(__volatile struct simplelock *lkp) +{ + __cpu_simple_unlock(&lkp->lock_data); } -#endif /* NCPUS > 1 */ +#endif /* MULTIPROCESSOR */ #endif /* _KERNEL */ diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 40f921050c3..b0bf65341a1 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: systm.h,v 1.61 2004/06/08 18:09:31 marc Exp $ */ +/* $OpenBSD: systm.h,v 1.62 2004/06/13 21:49:28 niklas Exp $ */ /* $NetBSD: systm.h,v 1.50 1996/06/09 04:55:09 briggs Exp $ */ /*- @@ -85,7 +85,11 @@ extern int nchrdev; /* number of entries in cdevsw */ extern int selwait; /* select timeout address */ +#ifdef MULTIPROCESSOR +#define curpriority (curcpu()->ci_schedstate.spc_curpriority) +#else extern u_char curpriority; /* priority of current process */ +#endif extern int maxmem; /* max memory per process */ extern int physmem; /* physical memory */ @@ -294,4 +298,27 @@ int read_symtab_from_file(struct proc *,struct vnode *,const char *); void user_config(void); #endif +#if defined(MULTIPROCESSOR) +void _kernel_lock_init(void); +void _kernel_lock(int); +void _kernel_unlock(void); +void _kernel_proc_lock(struct proc *); +void _kernel_proc_unlock(struct proc *); + +#define KERNEL_LOCK_INIT() _kernel_lock_init() +#define KERNEL_LOCK(flag) _kernel_lock((flag)) +#define KERNEL_UNLOCK() _kernel_unlock() +#define KERNEL_PROC_LOCK(p) _kernel_proc_lock((p)) +#define KERNEL_PROC_UNLOCK(p) _kernel_proc_unlock((p)) + +#else /* ! MULTIPROCESSOR */ + +#define KERNEL_LOCK_INIT() /* nothing */ +#define KERNEL_LOCK(flag) /* nothing */ +#define KERNEL_UNLOCK() /* nothing */ +#define KERNEL_PROC_LOCK(p) /* nothing */ +#define KERNEL_PROC_UNLOCK(p) /* nothing */ + +#endif /* MULTIPROCESSOR */ + #endif /* __SYSTM_H__ */ diff --git a/sys/uvm/uvm_meter.c b/sys/uvm/uvm_meter.c index 5e2a1ed6ef9..15bc3cd8651 100644 --- a/sys/uvm/uvm_meter.c +++ b/sys/uvm/uvm_meter.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_meter.c,v 1.18 2004/05/17 13:08:42 pedro Exp $ */ +/* $OpenBSD: uvm_meter.c,v 1.19 2004/06/13 21:49:29 niklas Exp $ */ /* $NetBSD: uvm_meter.c,v 1.21 2001/07/14 06:36:03 matt Exp $ */ /* @@ -110,6 +110,7 @@ uvm_loadav(avg) /* fall through */ case SRUN: case SIDL: + case SONPROC: nrun++; } } @@ -263,6 +264,7 @@ uvm_total(totalp) case SRUN: case SIDL: + case SONPROC: if (p->p_flag & P_INMEM) totalp->t_rq++; else |