summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiklas Hallqvist <niklas@cvs.openbsd.org>2004-06-13 21:49:30 +0000
committerNiklas Hallqvist <niklas@cvs.openbsd.org>2004-06-13 21:49:30 +0000
commit2dd254afa61a7c0cc5ae920b463d3d4266852804 (patch)
tree7adbebef3be24ba910fd83ee1ba09e1577ae21a8
parent4d62e331dcde739b4067d712dd602c0927ce11b3 (diff)
debranch SMP, have fun
-rw-r--r--sys/arch/alpha/alpha/cpu.c5
-rw-r--r--sys/arch/alpha/alpha/genassym.cf4
-rw-r--r--sys/arch/alpha/alpha/locore.s4
-rw-r--r--sys/arch/alpha/alpha/pmap.c7
-rw-r--r--sys/arch/alpha/include/cpu.h20
-rw-r--r--sys/arch/alpha/include/intr.h3
-rw-r--r--sys/arch/alpha/include/pmap.h4
-rw-r--r--sys/arch/amd64/amd64/genassym.cf3
-rw-r--r--sys/arch/amd64/amd64/locore.S6
-rw-r--r--sys/arch/amd64/include/cpu.h16
-rw-r--r--sys/arch/amd64/include/param.h8
-rw-r--r--sys/arch/amd64/isa/isa_machdep.c4
-rw-r--r--sys/arch/arm/arm/cpuswitch.S8
-rw-r--r--sys/arch/arm/arm/genassym.cf4
-rw-r--r--sys/arch/hp300/include/cpu.h4
-rw-r--r--sys/arch/hppa/hppa/genassym.cf3
-rw-r--r--sys/arch/hppa/hppa/locore.S4
-rw-r--r--sys/arch/hppa/include/cpu.h4
-rw-r--r--sys/arch/i386/compile/.cvsignore1
-rw-r--r--sys/arch/i386/conf/GENERIC5
-rw-r--r--sys/arch/i386/conf/GENERIC.MP12
-rw-r--r--sys/arch/i386/conf/RAMDISK5
-rw-r--r--sys/arch/i386/conf/RAMDISKB5
-rw-r--r--sys/arch/i386/conf/RAMDISKC5
-rw-r--r--sys/arch/i386/conf/RAMDISK_CD5
-rw-r--r--sys/arch/i386/conf/files.i38630
-rw-r--r--sys/arch/i386/eisa/eisa_machdep.c49
-rw-r--r--sys/arch/i386/eisa/eisa_machdep.h5
-rw-r--r--sys/arch/i386/i386/apic.c79
-rw-r--r--sys/arch/i386/i386/apicvec.s250
-rw-r--r--sys/arch/i386/i386/apm.c30
-rw-r--r--sys/arch/i386/i386/autoconf.c16
-rw-r--r--sys/arch/i386/i386/bios.c19
-rw-r--r--sys/arch/i386/i386/cpu.c606
-rw-r--r--sys/arch/i386/i386/db_interface.c140
-rw-r--r--sys/arch/i386/i386/db_mp.c187
-rw-r--r--sys/arch/i386/i386/gdt.c210
-rw-r--r--sys/arch/i386/i386/genassym.cf33
-rw-r--r--sys/arch/i386/i386/ioapic.c736
-rw-r--r--sys/arch/i386/i386/ipifuncs.c175
-rw-r--r--sys/arch/i386/i386/lapic.c474
-rw-r--r--sys/arch/i386/i386/lock_machdep.c112
-rw-r--r--sys/arch/i386/i386/locore.s488
-rw-r--r--sys/arch/i386/i386/machdep.c508
-rw-r--r--sys/arch/i386/i386/mainbus.c50
-rw-r--r--sys/arch/i386/i386/microtime.s4
-rw-r--r--sys/arch/i386/i386/mpbios.c1105
-rw-r--r--sys/arch/i386/i386/mptramp.s265
-rw-r--r--sys/arch/i386/i386/pctr.c3
-rw-r--r--sys/arch/i386/i386/pmap.c1131
-rw-r--r--sys/arch/i386/i386/process_machdep.c20
-rw-r--r--sys/arch/i386/i386/trap.c91
-rw-r--r--sys/arch/i386/i386/vector.s (renamed from sys/arch/i386/isa/vector.s)236
-rw-r--r--sys/arch/i386/i386/vm_machdep.c75
-rw-r--r--sys/arch/i386/include/apicvar.h57
-rw-r--r--sys/arch/i386/include/asm.h12
-rw-r--r--sys/arch/i386/include/atomic.h72
-rw-r--r--sys/arch/i386/include/biosvar.h6
-rw-r--r--sys/arch/i386/include/cpu.h193
-rw-r--r--sys/arch/i386/include/cpufunc.h38
-rw-r--r--sys/arch/i386/include/cpuvar.h108
-rw-r--r--sys/arch/i386/include/db_machdep.h10
-rw-r--r--sys/arch/i386/include/gdt.h37
-rw-r--r--sys/arch/i386/include/i82093reg.h124
-rw-r--r--sys/arch/i386/include/i82093var.h103
-rw-r--r--sys/arch/i386/include/i82489reg.h150
-rw-r--r--sys/arch/i386/include/i82489var.h113
-rw-r--r--sys/arch/i386/include/i8259.h154
-rw-r--r--sys/arch/i386/include/intr.h121
-rw-r--r--sys/arch/i386/include/intrdefs.h127
-rw-r--r--sys/arch/i386/include/lock.h119
-rw-r--r--sys/arch/i386/include/mp.h222
-rw-r--r--sys/arch/i386/include/mpbiosreg.h155
-rw-r--r--sys/arch/i386/include/mpbiosvar.h88
-rw-r--r--sys/arch/i386/include/npx.h5
-rw-r--r--sys/arch/i386/include/param.h4
-rw-r--r--sys/arch/i386/include/pcb.h9
-rw-r--r--sys/arch/i386/include/pmap.h27
-rw-r--r--sys/arch/i386/include/proc.h3
-rw-r--r--sys/arch/i386/include/segments.h14
-rw-r--r--sys/arch/i386/include/types.h3
-rw-r--r--sys/arch/i386/isa/clock.c6
-rw-r--r--sys/arch/i386/isa/icu.h71
-rw-r--r--sys/arch/i386/isa/icu.s60
-rw-r--r--sys/arch/i386/isa/isa_machdep.c114
-rw-r--r--sys/arch/i386/isa/mms.c2
-rw-r--r--sys/arch/i386/isa/npx.c361
-rw-r--r--sys/arch/i386/pci/pci_intr_fixup.c4
-rw-r--r--sys/arch/i386/pci/pci_machdep.c86
-rw-r--r--sys/arch/m88k/include/cpu.h4
-rw-r--r--sys/arch/m88k/m88k/genassym.cf5
-rw-r--r--sys/arch/m88k/m88k/process.S4
-rw-r--r--sys/arch/mac68k/include/cpu.h4
-rw-r--r--sys/arch/macppc/macppc/genassym.cf5
-rw-r--r--sys/arch/macppc/macppc/locore.S5
-rw-r--r--sys/arch/mvme68k/include/cpu.h4
-rw-r--r--sys/arch/powerpc/include/cpu.h4
-rw-r--r--sys/arch/sparc/include/cpu.h4
-rw-r--r--sys/arch/sparc/include/psl.h3
-rw-r--r--sys/arch/sparc/sparc/genassym.cf3
-rw-r--r--sys/arch/sparc/sparc/locore.s4
-rw-r--r--sys/arch/sparc64/include/cpu.h32
-rw-r--r--sys/arch/sparc64/sparc64/genassym.cf1
-rw-r--r--sys/arch/sparc64/sparc64/locore.s4
-rw-r--r--sys/arch/vax/include/cpu.h4
-rw-r--r--sys/arch/vax/vax/genassym.cf5
-rw-r--r--sys/arch/vax/vax/subr.s3
-rw-r--r--sys/compat/linux/linux_sched.c4
-rw-r--r--sys/ddb/db_command.c30
-rw-r--r--sys/dev/isa/aria.c3
-rw-r--r--sys/dev/isa/gus.c3
-rw-r--r--sys/dev/isa/gus_isa.c3
-rw-r--r--sys/dev/isa/gus_isapnp.c3
-rw-r--r--sys/dev/isa/if_hp.c3
-rw-r--r--sys/dev/isa/opti.c3
-rw-r--r--sys/dev/pci/cy82c693.c6
-rw-r--r--sys/kern/init_main.c30
-rw-r--r--sys/kern/kern_clock.c19
-rw-r--r--sys/kern/kern_exit.c24
-rw-r--r--sys/kern/kern_fork.c25
-rw-r--r--sys/kern/kern_ktrace.c3
-rw-r--r--sys/kern/kern_lock.c1047
-rw-r--r--sys/kern/kern_proc.c6
-rw-r--r--sys/kern/kern_resource.c6
-rw-r--r--sys/kern/kern_sig.c50
-rw-r--r--sys/kern/kern_subr.c3
-rw-r--r--sys/kern/kern_synch.c269
-rw-r--r--sys/kern/kern_sysctl.c21
-rw-r--r--sys/kern/kern_time.c4
-rw-r--r--sys/kern/subr_pool.c13
-rw-r--r--sys/kern/subr_prf.c53
-rw-r--r--sys/kern/sys_generic.c7
-rw-r--r--sys/kern/sys_process.c6
-rw-r--r--sys/kern/tty.c5
-rw-r--r--sys/net/netisr_dispatch.h16
-rw-r--r--sys/sys/kernel.h4
-rw-r--r--sys/sys/lock.h118
-rw-r--r--sys/sys/mplock.h169
-rw-r--r--sys/sys/proc.h29
-rw-r--r--sys/sys/sched.h77
-rw-r--r--sys/sys/signalvar.h6
-rw-r--r--sys/sys/simplelock.h62
-rw-r--r--sys/sys/systm.h29
-rw-r--r--sys/uvm/uvm_meter.c4
144 files changed, 10647 insertions, 1668 deletions
diff --git a/sys/arch/alpha/alpha/cpu.c b/sys/arch/alpha/alpha/cpu.c
index a8045be3246..6d5c8a9d347 100644
--- a/sys/arch/alpha/alpha/cpu.c
+++ b/sys/arch/alpha/alpha/cpu.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.c,v 1.18 2004/06/08 20:13:21 miod Exp $ */
+/* $OpenBSD: cpu.c,v 1.19 2004/06/13 21:49:11 niklas Exp $ */
/* $NetBSD: cpu.c,v 1.44 2000/05/23 05:12:53 thorpej Exp $ */
/*-
@@ -565,7 +565,8 @@ cpu_iccb_send(cpu_id, msg)
strlcpy(pcsp->pcs_iccb.iccb_rxbuf, msg,
sizeof pcsp->pcs_iccb.iccb_rxbuf);
pcsp->pcs_iccb.iccb_rxlen = strlen(msg);
- atomic_setbits_ulong(&hwrpb->rpb_rxrdy, cpumask);
+ /* XXX cast to __volatile */
+ atomic_setbits_ulong((__volatile u_long *)&hwrpb->rpb_rxrdy, cpumask);
/* Wait for the message to be received. */
for (timeout = 10000; timeout != 0; timeout--) {
diff --git a/sys/arch/alpha/alpha/genassym.cf b/sys/arch/alpha/alpha/genassym.cf
index 27d0faa330b..2a8802256f3 100644
--- a/sys/arch/alpha/alpha/genassym.cf
+++ b/sys/arch/alpha/alpha/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.5 2003/06/02 23:27:43 millert Exp $
+# $OpenBSD: genassym.cf,v 1.6 2004/06/13 21:49:11 niklas Exp $
# Copyright (c) 1994, 1995 Gordon W. Ross
# Copyright (c) 1993 Adam Glass
@@ -120,6 +120,8 @@ struct prochd
member ph_link
member ph_rlink
+export SONPROC
+
# offsets needed by cpu_switch() to switch mappings.
define VM_MAP_PMAP offsetof(struct vmspace, vm_map.pmap)
diff --git a/sys/arch/alpha/alpha/locore.s b/sys/arch/alpha/alpha/locore.s
index d30e1f646f1..48849130727 100644
--- a/sys/arch/alpha/alpha/locore.s
+++ b/sys/arch/alpha/alpha/locore.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.s,v 1.21 2003/10/18 20:14:40 jmc Exp $ */
+/* $OpenBSD: locore.s,v 1.22 2004/06/13 21:49:11 niklas Exp $ */
/* $NetBSD: locore.s,v 1.94 2001/04/26 03:10:44 ross Exp $ */
/*-
@@ -891,7 +891,6 @@ cpu_switch_queuescan:
*
* Note: GET_CPUINFO clobbers v0, t0, t8...t11.
*/
-#if 0
#ifdef __alpha_bwx__
ldiq t0, SONPROC /* p->p_stat = SONPROC */
stb t0, P_STAT(s2)
@@ -904,7 +903,6 @@ cpu_switch_queuescan:
or t0, t1, t0
stq_u t0, 0(t3)
#endif /* __alpha_bwx__ */
-#endif
GET_CPUINFO
/* p->p_cpu initialized in fork1() for single-processor */
diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c
index d8860e84b38..484eb661c93 100644
--- a/sys/arch/alpha/alpha/pmap.c
+++ b/sys/arch/alpha/alpha/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.41 2004/06/08 20:13:21 miod Exp $ */
+/* $OpenBSD: pmap.c,v 1.42 2004/06/13 21:49:11 niklas Exp $ */
/* $NetBSD: pmap.c,v 1.154 2000/12/07 22:18:55 thorpej Exp $ */
/*-
@@ -388,11 +388,6 @@ struct lock pmap_main_lock;
struct simplelock pmap_all_pmaps_slock;
struct simplelock pmap_growkernel_slock;
-#ifdef __OpenBSD__
-#define spinlockinit(lock, name, flags) lockinit(lock, 0, name, 0, flags)
-#define spinlockmgr(lock, flags, slock) lockmgr(lock, flags, slock, curproc)
-#endif
-
#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
#define PMAP_MAP_TO_HEAD_LOCK() \
spinlockmgr(&pmap_main_lock, LK_SHARED, NULL)
diff --git a/sys/arch/alpha/include/cpu.h b/sys/arch/alpha/include/cpu.h
index 7bfc81c4624..c793d217035 100644
--- a/sys/arch/alpha/include/cpu.h
+++ b/sys/arch/alpha/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.24 2004/06/08 20:13:23 miod Exp $ */
+/* $OpenBSD: cpu.h,v 1.25 2004/06/13 21:49:12 niklas Exp $ */
/* $NetBSD: cpu.h,v 1.45 2000/08/21 02:03:12 thorpej Exp $ */
/*-
@@ -105,6 +105,7 @@ typedef union alpha_t_float {
#ifdef _KERNEL
#include <machine/bus.h>
+#include <sys/device.h>
struct pcb;
struct proc;
@@ -177,6 +178,7 @@ struct mchkinfo {
};
struct cpu_info {
+ struct device *ci_dev; /* pointer to our device */
/*
* Public members.
*/
@@ -185,12 +187,13 @@ struct cpu_info {
u_long ci_simple_locks; /* # of simple locks held */
#endif
struct proc *ci_curproc; /* current owner of the processor */
+ struct simplelock ci_slock; /* lock on this data structure */
+ cpuid_t ci_cpuid; /* our CPU ID */
/*
* Private members.
*/
struct mchkinfo ci_mcinfo; /* machine check info */
- cpuid_t ci_cpuid; /* our CPU ID */
struct proc *ci_fpcurproc; /* current owner of the FPU */
paddr_t ci_curpcb; /* PA of current HW PCB */
struct pcb *ci_idle_pcb; /* our idle PCB */
@@ -265,15 +268,20 @@ struct clockframe {
/*
* Preempt the current process if in interrupt from user mode,
* or after the current trap/syscall if in system mode.
- *
- * XXXSMP
- * need_resched() needs to take a cpu_info *.
*/
-#define need_resched() \
+#ifdef MULTIPROCESSOR
+#define need_resched(ci) \
+do { \
+ ci->ci_want_resched = 1; \
+ aston(curcpu()); \
+} while (/*CONSTCOND*/0)
+#else
+#define need_resched(ci) \
do { \
curcpu()->ci_want_resched = 1; \
aston(curcpu()); \
} while (/*CONSTCOND*/0)
+#endif
/*
* Give a profiling tick to the current process when the user profiling
diff --git a/sys/arch/alpha/include/intr.h b/sys/arch/alpha/include/intr.h
index a5813c6e1bf..25fdcba079d 100644
--- a/sys/arch/alpha/include/intr.h
+++ b/sys/arch/alpha/include/intr.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: intr.h,v 1.16 2004/04/16 04:52:05 pvalchev Exp $ */
+/* $OpenBSD: intr.h,v 1.17 2004/06/13 21:49:12 niklas Exp $ */
/* $NetBSD: intr.h,v 1.26 2000/06/03 20:47:41 thorpej Exp $ */
/*-
@@ -159,6 +159,7 @@ extern ipifunc_t ipifuncs[ALPHA_NIPIS];
void alpha_send_ipi(unsigned long, unsigned long);
void alpha_broadcast_ipi(unsigned long);
+void alpha_multicast_ipi(unsigned long, unsigned long);
/*
* Alpha shared-interrupt-line common code.
diff --git a/sys/arch/alpha/include/pmap.h b/sys/arch/alpha/include/pmap.h
index e61bba6497d..1a5c4784915 100644
--- a/sys/arch/alpha/include/pmap.h
+++ b/sys/arch/alpha/include/pmap.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.h,v 1.17 2004/06/09 20:17:23 tedu Exp $ */
+/* $OpenBSD: pmap.h,v 1.18 2004/06/13 21:49:12 niklas Exp $ */
/* $NetBSD: pmap.h,v 1.37 2000/11/19 03:16:35 thorpej Exp $ */
/*-
@@ -178,7 +178,7 @@ struct pv_head {
#if defined(MULTIPROCESSOR)
void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t);
-void pmap_do_tlb_shootdown(void);
+void pmap_do_tlb_shootdown(struct cpu_info *, struct trapframe *);
void pmap_tlb_shootdown_q_drain(u_long, boolean_t);
#define PMAP_TLB_SHOOTDOWN(pm, va, pte) \
pmap_tlb_shootdown((pm), (va), (pte))
diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf
index de6ab55ca81..a56769705b4 100644
--- a/sys/arch/amd64/amd64/genassym.cf
+++ b/sys/arch/amd64/amd64/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.3 2004/05/13 20:20:24 sturm Exp $
+# $OpenBSD: genassym.cf,v 1.4 2004/06/13 21:49:12 niklas Exp $
# Written by Artur Grabowski art@openbsd.org, Public Domain
@@ -17,6 +17,7 @@ include <machine/vmparam.h>
include <machine/intr.h>
export SRUN
+export SONPROC
export L4_SLOT_KERNBASE
define L3_SLOT_KERNBASE pl3_pi(KERNBASE)
diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S
index a61e1719e61..732eea0a0df 100644
--- a/sys/arch/amd64/amd64/locore.S
+++ b/sys/arch/amd64/amd64/locore.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.S,v 1.10 2004/05/13 20:20:24 sturm Exp $ */
+/* $OpenBSD: locore.S,v 1.11 2004/06/13 21:49:12 niklas Exp $ */
/* $NetBSD: locore.S,v 1.2 2003/04/26 19:34:45 fvdl Exp $ */
/*
@@ -848,9 +848,7 @@ switch_resume:
movq %rax,P_BACK(%r12)
/* Record new proc. */
-#ifdef MULTIPROCESSOR
- movb $SONPROC,P_STAT(%r12) # l->l_stat = SONPROC
-#endif
+ movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC
SET_CURPROC(%r12,%rcx)
sti
diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
index cc688f39aa6..9caf05b561d 100644
--- a/sys/arch/amd64/include/cpu.h
+++ b/sys/arch/amd64/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.6 2004/03/09 23:05:13 deraadt Exp $ */
+/* $OpenBSD: cpu.h,v 1.7 2004/06/13 21:49:13 niklas Exp $ */
/* $NetBSD: cpu.h,v 1.1 2003/04/26 18:39:39 fvdl Exp $ */
/*-
@@ -191,25 +191,15 @@ extern struct cpu_info cpu_info_primary;
* or after the current trap/syscall if in system mode.
*/
-#ifdef MULTIPROCESSOR
#define need_resched(ci) \
do { \
- struct cpu_info *__ci = (ci); \
+ struct cpu_info *__ci = curcpu(); \
__ci->ci_want_resched = 1; \
if (__ci->ci_curproc != NULL) \
aston(__ci->ci_curproc); \
} while (/*CONSTCOND*/0)
-#else
-#define need_resched() \
-do { \
- struct cpu_info *__ci = curcpu(); \
- __ci->ci_want_resched = 1; \
- if (__ci->ci_curproc != NULL) \
- aston(__ci->ci_curproc); \
-} while (/*CONSTCOND*/0)
-#endif
-#endif
+#endif /* MULTIPROCESSOR */
#define aston(p) ((p)->p_md.md_astpending = 1)
diff --git a/sys/arch/amd64/include/param.h b/sys/arch/amd64/include/param.h
index ea59b9e5273..09e070357a6 100644
--- a/sys/arch/amd64/include/param.h
+++ b/sys/arch/amd64/include/param.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: param.h,v 1.3 2004/04/19 22:55:48 deraadt Exp $ */
+/* $OpenBSD: param.h,v 1.4 2004/06/13 21:49:13 niklas Exp $ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
@@ -162,9 +162,3 @@
#define btop(x) x86_btop(x)
#define ptob(x) x86_ptob(x)
#define round_pdr(x) x86_round_pdr(x)
-
-/* XXX - oh, the horror.. Just for now. */
-#define KERNEL_PROC_LOCK(p)
-#define KERNEL_PROC_UNLOCK(p)
-#define KERNEL_LOCK(i)
-#define KERNEL_UNLOCK()
diff --git a/sys/arch/amd64/isa/isa_machdep.c b/sys/arch/amd64/isa/isa_machdep.c
index f86a49a00ed..2b3e249e7ae 100644
--- a/sys/arch/amd64/isa/isa_machdep.c
+++ b/sys/arch/amd64/isa/isa_machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: isa_machdep.c,v 1.1 2004/01/28 01:39:39 mickey Exp $ */
+/* $OpenBSD: isa_machdep.c,v 1.2 2004/06/13 21:49:13 niklas Exp $ */
/* $NetBSD: isa_machdep.c,v 1.22 1997/06/12 23:57:32 thorpej Exp $ */
#define ISA_DMA_STATS
@@ -132,6 +132,7 @@
#include <machine/intr.h>
#include <machine/pio.h>
#include <machine/cpufunc.h>
+#include <machine/i8259.h>
#include <dev/isa/isareg.h>
#include <dev/isa/isavar.h>
@@ -139,7 +140,6 @@
#include <dev/isa/isadmavar.h>
#endif
#include <i386/isa/isa_machdep.h>
-#include <i386/isa/icu.h>
#include "isadma.h"
diff --git a/sys/arch/arm/arm/cpuswitch.S b/sys/arch/arm/arm/cpuswitch.S
index e7f673a8a45..813bb1c813f 100644
--- a/sys/arch/arm/arm/cpuswitch.S
+++ b/sys/arch/arm/arm/cpuswitch.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpuswitch.S,v 1.2 2004/02/01 06:10:33 drahn Exp $ */
+/* $OpenBSD: cpuswitch.S,v 1.3 2004/06/13 21:49:13 niklas Exp $ */
/* $NetBSD: cpuswitch.S,v 1.41 2003/11/15 08:44:18 scw Exp $ */
/*
@@ -537,11 +537,9 @@ ENTRY(cpu_switch)
/* l->l_cpu initialized in fork1() for single-processor */
#endif
-#if 0
/* Process is now on a processor. */
- mov r0, #LSONPROC /* l->l_stat = LSONPROC */
- str r0, [r6, #(P_STAT)]
-#endif
+ mov r0, #SONPROC /* p->p_stat = SONPROC */
+ strb r0, [r6, #(P_STAT)]
/* We have a new curproc now so make a note it */
ldr r7, .Lcurproc
diff --git a/sys/arch/arm/arm/genassym.cf b/sys/arch/arm/arm/genassym.cf
index 6803c524393..83733107879 100644
--- a/sys/arch/arm/arm/genassym.cf
+++ b/sys/arch/arm/arm/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.2 2004/02/14 15:34:31 miod Exp $
+# $OpenBSD: genassym.cf,v 1.3 2004/06/13 21:49:13 niklas Exp $
# $NetBSD: genassym.cf,v 1.27 2003/11/04 10:33:16 dsl Exp$
# Copyright (c) 1982, 1990 The Regents of the University of California.
@@ -83,6 +83,8 @@ export PAGE_SHIFT
export P_TRACED
export P_PROFIL
+export SONPROC
+
struct proc
member p_forw
member p_back
diff --git a/sys/arch/hp300/include/cpu.h b/sys/arch/hp300/include/cpu.h
index b304d633999..183dbb8ded8 100644
--- a/sys/arch/hp300/include/cpu.h
+++ b/sys/arch/hp300/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.23 2004/05/20 09:20:41 kettenis Exp $ */
+/* $OpenBSD: cpu.h,v 1.24 2004/06/13 21:49:13 niklas Exp $ */
/* $NetBSD: cpu.h,v 1.28 1998/02/13 07:41:51 scottr Exp $ */
/*
@@ -91,7 +91,7 @@ struct clockframe {
* or after the current trap/syscall if in system mode.
*/
extern int want_resched; /* resched() was called */
-#define need_resched() { want_resched++; aston(); }
+#define need_resched(ci) { want_resched++; aston(); }
/*
* Give a profiling tick to the current process when the user profiling
diff --git a/sys/arch/hppa/hppa/genassym.cf b/sys/arch/hppa/hppa/genassym.cf
index ee718b10e25..c9f494d4e20 100644
--- a/sys/arch/hppa/hppa/genassym.cf
+++ b/sys/arch/hppa/hppa/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.25 2004/06/08 22:00:25 mickey Exp $
+# $OpenBSD: genassym.cf,v 1.26 2004/06/13 21:49:14 niklas Exp $
#
# Copyright (c) 1982, 1990, 1993
@@ -142,6 +142,7 @@ member P_MD_FLAGS p_md.md_flags
member P_MD_REGS p_md.md_regs
export SRUN
+export SONPROC
struct pcb
member pcb_fpregs
diff --git a/sys/arch/hppa/hppa/locore.S b/sys/arch/hppa/hppa/locore.S
index 883c4d3eab8..218c95974b2 100644
--- a/sys/arch/hppa/hppa/locore.S
+++ b/sys/arch/hppa/hppa/locore.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.S,v 1.133 2004/06/08 22:00:25 mickey Exp $ */
+/* $OpenBSD: locore.S,v 1.134 2004/06/13 21:49:14 niklas Exp $ */
/*
* Copyright (c) 1998-2004 Michael Shalayeff
@@ -2824,6 +2824,8 @@ link_ok
copy arg1, t2
kstack_ok
#endif
+ ldi SONPROC, t1
+ stb t1, P_STAT(arg1)
/* Skip context switch if same process. */
comb,=,n arg1, arg2, switch_return
diff --git a/sys/arch/hppa/include/cpu.h b/sys/arch/hppa/include/cpu.h
index eff31c9d750..5eb720ab0e9 100644
--- a/sys/arch/hppa/include/cpu.h
+++ b/sys/arch/hppa/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.42 2004/04/08 17:10:18 mickey Exp $ */
+/* $OpenBSD: cpu.h,v 1.43 2004/06/13 21:49:14 niklas Exp $ */
/*
* Copyright (c) 2000-2004 Michael Shalayeff
@@ -124,7 +124,7 @@ extern int cpu_hvers;
#define CLKF_SYSCALL(framep) ((framep)->tf_flags & TFF_SYS)
#define signotify(p) (setsoftast())
-#define need_resched() (want_resched = 1, setsoftast())
+#define need_resched(ci) (want_resched = 1, setsoftast())
#define need_proftick(p) ((p)->p_flag |= P_OWEUPC, setsoftast())
#ifndef _LOCORE
diff --git a/sys/arch/i386/compile/.cvsignore b/sys/arch/i386/compile/.cvsignore
index bf45b1f61e5..07993a81a78 100644
--- a/sys/arch/i386/compile/.cvsignore
+++ b/sys/arch/i386/compile/.cvsignore
@@ -1,5 +1,6 @@
DISKLESS
GENERIC
+GENERIC.MP
RAMDISK
RAMDISKB
RAMDISKC
diff --git a/sys/arch/i386/conf/GENERIC b/sys/arch/i386/conf/GENERIC
index 7e6a393b230..19025bed6e7 100644
--- a/sys/arch/i386/conf/GENERIC
+++ b/sys/arch/i386/conf/GENERIC
@@ -1,4 +1,4 @@
-# $OpenBSD: GENERIC,v 1.370 2004/06/06 04:50:57 pvalchev Exp $
+# $OpenBSD: GENERIC,v 1.371 2004/06/13 21:49:15 niklas Exp $
#
# GENERIC -- everything that's currently supported
#
@@ -39,7 +39,8 @@ config bsd swap generic
mainbus0 at root
-bios0 at mainbus0
+cpu0 at mainbus? apid ?
+bios0 at mainbus0 apid ?
apm0 at bios0 flags 0x0000 # flags 0x0101 to force protocol version 1.1
pcibios0 at bios0 flags 0x0000 # use 0x30 for a total verbose
diff --git a/sys/arch/i386/conf/GENERIC.MP b/sys/arch/i386/conf/GENERIC.MP
new file mode 100644
index 00000000000..4bebe33f96d
--- /dev/null
+++ b/sys/arch/i386/conf/GENERIC.MP
@@ -0,0 +1,12 @@
+# $OpenBSD: GENERIC.MP,v 1.2 2004/06/13 21:49:15 niklas Exp $
+#
+# GENERIC.MP - sample multiprocessor kernel
+#
+
+include "arch/i386/conf/GENERIC"
+
+option MULTIPROCESSOR # Multiple processor support
+option CPU
+
+cpu* at mainbus? apid ?
+ioapic* at mainbus? apid ?
diff --git a/sys/arch/i386/conf/RAMDISK b/sys/arch/i386/conf/RAMDISK
index b7f528ea3d3..c377ae6ce8f 100644
--- a/sys/arch/i386/conf/RAMDISK
+++ b/sys/arch/i386/conf/RAMDISK
@@ -1,4 +1,4 @@
-# $OpenBSD: RAMDISK,v 1.129 2004/06/03 18:22:21 grange Exp $
+# $OpenBSD: RAMDISK,v 1.130 2004/06/13 21:49:15 niklas Exp $
machine i386 # architecture, used by config; REQUIRED
@@ -42,7 +42,8 @@ config bsd root on rd0a swap on rd0b and wd0b and sd0b
mainbus0 at root
-bios0 at mainbus0
+cpu0 at mainbus? apid ?
+bios0 at mainbus0 apid ?
apm0 at bios0 flags 0x0000 # flags 0x0101 to force protocol version 1.1
#option APMDEBUG
pcibios0 at bios0 flags 0x0000
diff --git a/sys/arch/i386/conf/RAMDISKB b/sys/arch/i386/conf/RAMDISKB
index c7a0d727224..53184d25203 100644
--- a/sys/arch/i386/conf/RAMDISKB
+++ b/sys/arch/i386/conf/RAMDISKB
@@ -1,4 +1,4 @@
-# $OpenBSD: RAMDISKB,v 1.70 2004/06/03 18:22:21 grange Exp $
+# $OpenBSD: RAMDISKB,v 1.71 2004/06/13 21:49:15 niklas Exp $
machine i386 # architecture, used by config; REQUIRED
@@ -42,7 +42,8 @@ config bsd root on rd0a swap on rd0b and wd0b and sd0b
mainbus0 at root
-bios0 at mainbus0
+cpu0 at mainbus? apid ?
+bios0 at mainbus0 apid ?
apm0 at bios0 flags 0x0000 # flags 0x0101 to force protocol version 1.1
#option APMDEBUG
pcibios0 at bios0 flags 0x0000
diff --git a/sys/arch/i386/conf/RAMDISKC b/sys/arch/i386/conf/RAMDISKC
index 76e9db57d6f..81764b62557 100644
--- a/sys/arch/i386/conf/RAMDISKC
+++ b/sys/arch/i386/conf/RAMDISKC
@@ -1,4 +1,4 @@
-# $OpenBSD: RAMDISKC,v 1.39 2004/06/03 18:22:21 grange Exp $
+# $OpenBSD: RAMDISKC,v 1.40 2004/06/13 21:49:15 niklas Exp $
machine i386 # architecture, used by config; REQUIRED
@@ -42,7 +42,8 @@ config bsd root on rd0a swap on rd0b and wd0b and sd0b
mainbus0 at root
-bios0 at mainbus0
+cpu0 at mainbus? apid ?
+bios0 at mainbus0 apid ?
apm0 at bios0 flags 0x0000 # flags 0x0101 to force protocol version 1.1
#option APMDEBUG
pcibios0 at bios0 flags 0x0000
diff --git a/sys/arch/i386/conf/RAMDISK_CD b/sys/arch/i386/conf/RAMDISK_CD
index 830c763eabc..1ea5cf46878 100644
--- a/sys/arch/i386/conf/RAMDISK_CD
+++ b/sys/arch/i386/conf/RAMDISK_CD
@@ -1,4 +1,4 @@
-# $OpenBSD: RAMDISK_CD,v 1.75 2004/06/06 04:50:57 pvalchev Exp $
+# $OpenBSD: RAMDISK_CD,v 1.76 2004/06/13 21:49:15 niklas Exp $
machine i386 # architecture, used by config; REQUIRED
@@ -43,7 +43,8 @@ config bsd root on rd0a swap on rd0b and wd0b and sd0b
mainbus0 at root
-bios0 at mainbus0
+cpu0 at mainbus? apid ?
+bios0 at mainbus0 apid ?
apm0 at bios0 flags 0x0000 # flags 0x0101 to force protocol version 1.1
#option APMDEBUG
pcibios0 at bios0 flags 0x0000
diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386
index 02b5d8ee2c5..a4cc18bd5e9 100644
--- a/sys/arch/i386/conf/files.i386
+++ b/sys/arch/i386/conf/files.i386
@@ -1,4 +1,4 @@
-# $OpenBSD: files.i386,v 1.122 2004/06/05 15:06:22 grange Exp $
+# $OpenBSD: files.i386,v 1.123 2004/06/13 21:49:15 niklas Exp $
#
# new style config file for i386 architecture
#
@@ -44,6 +44,10 @@ file arch/i386/i386/dkcsum.c bios
file lib/libz/adler32.c !ppp_deflate & !ipsec & !crypto
file dev/cons.c
file dev/cninit.c
+file arch/i386/i386/mptramp.s multiprocessor
+file arch/i386/i386/lock_machdep.c multiprocessor
+file arch/i386/i386/ipifuncs.c multiprocessor
+file arch/i386/i386/db_mp.c multiprocessor
file arch/i386/i386/wscons_machdep.c wsdisplay
major {vnd = 14}
@@ -78,7 +82,7 @@ include "../../../dev/ata/files.ata"
# System bus types
#
-define mainbus { }
+define mainbus { apid = -1 }
device mainbus: isabus, eisabus, pcibus, mainbus
attach mainbus at root
file arch/i386/i386/mainbus.c mainbus
@@ -272,6 +276,28 @@ attach apm at bios
file arch/i386/i386/apm.c apm needs-count
file arch/i386/i386/apmcall.S apm
+# Intel SMP specification 1.4
+define mpbios
+file arch/i386/i386/mpbios.c mpbios needs-flag
+
+# CPUS
+define cpu { apid = -1 }
+device cpu
+attach cpu at mainbus
+file arch/i386/i386/cpu.c cpu
+
+# Common APIC support routines
+file arch/i386/i386/apic.c ioapic | lapic
+
+# Local APIC (required for multiprocessor)
+define lapic
+file arch/i386/i386/lapic.c lapic needs-flag
+
+# I/O APICs
+device ioapic: mpbios, lapic
+attach ioapic at mainbus
+file arch/i386/i386/ioapic.c ioapic needs-flag
+
device pcibios
attach pcibios at bios
file arch/i386/pci/pcibios.c pcibios needs-count
diff --git a/sys/arch/i386/eisa/eisa_machdep.c b/sys/arch/i386/eisa/eisa_machdep.c
index 178950e9e86..e086091fd75 100644
--- a/sys/arch/i386/eisa/eisa_machdep.c
+++ b/sys/arch/i386/eisa/eisa_machdep.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: eisa_machdep.c,v 1.7 2004/06/09 20:13:10 deraadt Exp $ */
-/* $NetBSD: eisa_machdep.c,v 1.6 1997/06/06 23:12:52 thorpej Exp $ */
+/* $OpenBSD: eisa_machdep.c,v 1.8 2004/06/13 21:49:15 niklas Exp $ */
+/* $NetBSD: eisa_machdep.c,v 1.10.22.2 2000/06/25 19:36:58 sommerfeld Exp $ */
/*-
* Copyright (c) 1997 The NetBSD Foundation, Inc.
@@ -81,8 +81,8 @@
#define _I386_BUS_DMA_PRIVATE
#include <machine/bus.h>
+#include <machine/i8259.h>
-#include <i386/isa/icu.h>
#include <dev/isa/isavar.h>
#include <dev/eisa/eisavar.h>
@@ -133,6 +133,9 @@ eisa_intr_map(ec, irq, ihp)
u_int irq;
eisa_intr_handle_t *ihp;
{
+#if NIOAPIC > 0
+ struct mp_intr_map *mip;
+#endif
if (irq >= ICU_LEN) {
printf("eisa_intr_map: bad IRQ %d\n", irq);
@@ -144,6 +147,26 @@ eisa_intr_map(ec, irq, ihp)
irq = 9;
}
+#if NIOAPIC > 0
+ if (mp_busses != NULL) {
+ /*
+ * Assumes 1:1 mapping between PCI bus numbers and
+ * the numbers given by the MP bios.
+ * XXX Is this a valid assumption?
+ */
+
+ for (mip = mp_busses[bus].mb_intrs; mip != NULL;
+ mip = mip->next) {
+ if (mip->bus_pin == irq) {
+ *ihp = mip->ioapic_ih | irq;
+ return (0);
+ }
+ }
+ if (mip == NULL)
+ printf("eisa_intr_map: no MP mapping found\n");
+ }
+#endif
+
*ihp = irq;
return (0);
}
@@ -155,9 +178,17 @@ eisa_intr_string(ec, ih)
{
static char irqstr[8]; /* 4 + 2 + NUL + sanity */
- if (ih == 0 || ih >= ICU_LEN || ih == 2)
+ if (ih == 0 || (ih & 0xff) >= ICU_LEN || ih == 2)
panic("eisa_intr_string: bogus handle 0x%x", ih);
+#if NIOAPIC > 0
+ if (ih & APIC_INT_VIA_APIC) {
+ sprintf(irqstr, "apic %d int %d (irq %d)",
+ APIC_IRQ_APIC(ih), APIC_IRQ_PIN(ih), ih & 0xff);
+ return (irqstr);
+ }
+#endif
+
snprintf(irqstr, sizeof irqstr, "irq %d", ih);
return (irqstr);
@@ -171,7 +202,14 @@ eisa_intr_establish(ec, ih, type, level, func, arg, what)
void *arg;
char *what;
{
-
+#if NIOAPIC > 0
+ if (ih != -1) {
+ if (ih != -1 && (ih & APIC_INT_VIA_APIC)) {
+ return (apic_intr_establish(ih, type, level, func, arg,
+ what));
+ }
+ }
+#endif
if (ih == 0 || ih >= ICU_LEN || ih == 2)
panic("eisa_intr_establish: bogus handle 0x%x", ih);
@@ -183,6 +221,5 @@ eisa_intr_disestablish(ec, cookie)
eisa_chipset_tag_t ec;
void *cookie;
{
-
return (isa_intr_disestablish(NULL, cookie));
}
diff --git a/sys/arch/i386/eisa/eisa_machdep.h b/sys/arch/i386/eisa/eisa_machdep.h
index a313d805fae..fccdd343245 100644
--- a/sys/arch/i386/eisa/eisa_machdep.h
+++ b/sys/arch/i386/eisa/eisa_machdep.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: eisa_machdep.h,v 1.5 2002/03/14 03:15:53 millert Exp $ */
+/* $OpenBSD: eisa_machdep.h,v 1.6 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: eisa_machdep.h,v 1.4 1997/06/06 23:12:52 thorpej Exp $ */
/*
@@ -45,6 +45,9 @@
extern struct i386_bus_dma_tag eisa_bus_dma_tag;
+#define ELCR0 0x4d0 /* eisa irq 0-7 */
+#define ELCR1 0x4d1 /* eisa irq 8-15 */
+
/*
* Types provided to machine-independent EISA code.
*/
diff --git a/sys/arch/i386/i386/apic.c b/sys/arch/i386/i386/apic.c
new file mode 100644
index 00000000000..6ed294770f3
--- /dev/null
+++ b/sys/arch/i386/i386/apic.c
@@ -0,0 +1,79 @@
+/* $OpenBSD: apic.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
+/* $NetBSD: apic.c,v 1.1.2.2 2000/02/21 18:51:00 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <machine/i82489reg.h>
+#include <machine/i82489var.h>
+#include <machine/apicvar.h>
+
+
+const char redirlofmt[] = "\177\20"
+ "f\0\10vector\0"
+ "f\10\3delmode\0"
+ "b\13logical\0"
+ "b\14pending\0"
+ "b\15actlo\0"
+ "b\16irrpending\0"
+ "b\17level\0"
+ "b\20masked\0"
+ "f\22\1dest\0" "=\1self" "=\2all" "=\3all-others";
+
+const char redirhifmt[] = "\177\20"
+ "f\30\10target\0";
+
+void
+apic_format_redir(where1, where2, idx, redirhi, redirlo)
+ char *where1;
+ char *where2;
+ int idx;
+ u_int32_t redirhi;
+ u_int32_t redirlo;
+{
+ printf("%s: %s%d 0x%x", where1, where2, idx, redirlo);
+
+ if ((redirlo & LAPIC_DEST_MASK) == 0)
+ printf(" 0x%x", redirhi);
+
+ printf("\n");
+}
+
diff --git a/sys/arch/i386/i386/apicvec.s b/sys/arch/i386/i386/apicvec.s
new file mode 100644
index 00000000000..c2fadf429be
--- /dev/null
+++ b/sys/arch/i386/i386/apicvec.s
@@ -0,0 +1,250 @@
+/* $OpenBSD: apicvec.s,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
+/* $NetBSD: apicvec.s,v 1.1.2.2 2000/02/21 21:54:01 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/i82093reg.h>
+#include <machine/i82489reg.h>
+
+#ifdef __ELF__
+#define XINTR(vec) Xintr/**/vec
+#else
+#define XINTR(vec) _Xintr/**/vec
+#endif
+
+#ifdef MULTIPROCESSOR
+ .globl XINTR(ipi)
+XINTR(ipi):
+ pushl $0
+ pushl $T_ASTFLT
+ INTRENTRY
+ MAKE_FRAME
+ pushl CPL
+ movl _C_LABEL(lapic_ppr),%eax
+ movl %eax,CPL
+ ioapic_asm_ack()
+ sti /* safe to take interrupts.. */
+ call _C_LABEL(i386_ipi_handler)
+ jmp _C_LABEL(Xdoreti)
+#endif
+
+ /*
+ * Interrupt from the local APIC timer.
+ */
+ .globl XINTR(ltimer)
+XINTR(ltimer):
+ pushl $0
+ pushl $T_ASTFLT
+ INTRENTRY
+ MAKE_FRAME
+ pushl CPL
+ movl _C_LABEL(lapic_ppr),%eax
+ movl %eax,CPL
+ ioapic_asm_ack()
+ sti
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintlock)
+#endif
+ movl %esp,%eax
+ pushl %eax
+ call _C_LABEL(lapic_clockintr)
+ addl $4,%esp
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintunlock)
+#endif
+ jmp _C_LABEL(Xdoreti)
+
+ .globl XINTR(softclock), XINTR(softnet), XINTR(softtty)
+XINTR(softclock):
+ pushl $0
+ pushl $T_ASTFLT
+ INTRENTRY
+ MAKE_FRAME
+ pushl CPL
+ movl $IPL_SOFTCLOCK,CPL
+ andl $~(1<<SIR_CLOCK),_C_LABEL(ipending)
+ ioapic_asm_ack()
+ sti
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintlock)
+#endif
+ call _C_LABEL(softclock)
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintunlock)
+#endif
+ jmp _C_LABEL(Xdoreti)
+
+#define DONETISR(s, c) \
+ .globl _C_LABEL(c) ;\
+ testl $(1 << s),%edi ;\
+ jz 1f ;\
+ call _C_LABEL(c) ;\
+1:
+
+XINTR(softnet):
+ pushl $0
+ pushl $T_ASTFLT
+ INTRENTRY
+ MAKE_FRAME
+ pushl CPL
+ movl $IPL_SOFTNET,CPL
+ andl $~(1<<SIR_NET),_C_LABEL(ipending)
+ ioapic_asm_ack()
+ sti
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintlock)
+#endif
+ xorl %edi,%edi
+ xchgl _C_LABEL(netisr),%edi
+#include <net/netisr_dispatch.h>
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintunlock)
+#endif
+ jmp _C_LABEL(Xdoreti)
+#undef DONETISR
+
+XINTR(softtty):
+ pushl $0
+ pushl $T_ASTFLT
+ INTRENTRY
+ MAKE_FRAME
+ pushl CPL
+ movl $IPL_SOFTTTY,CPL
+ andl $~(1<<SIR_TTY),_C_LABEL(ipending)
+ ioapic_asm_ack()
+ sti
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintlock)
+#endif
+ call _C_LABEL(comsoft)
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintunlock)
+#endif
+ jmp _C_LABEL(Xdoreti)
+
+#if NIOAPIC > 0
+
+#define voidop(num)
+
+ /*
+ * I/O APIC interrupt.
+ * We sort out which one is which based on the value of
+ * the processor priority register.
+ *
+ * XXX no stray interrupt mangling stuff..
+ * XXX use cmove when appropriate.
+ */
+
+#define APICINTR(name, num, early_ack, late_ack, mask, unmask, level_mask) \
+_C_LABEL(Xintr_/**/name/**/num): \
+ pushl $0 ;\
+ pushl $T_ASTFLT ;\
+ INTRENTRY ;\
+ MAKE_FRAME ;\
+ pushl CPL ;\
+ movl _C_LABEL(lapic_ppr),%eax ;\
+ movl %eax,CPL ;\
+ mask(num) /* mask it in hardware */ ;\
+ early_ack(num) /* and allow other intrs */ ;\
+ incl MY_COUNT+V_INTR /* statistical info */ ;\
+ sti ;\
+ orl $num,%eax ;\
+ incl _C_LABEL(apic_intrcount)(,%eax,4) ;\
+ movl _C_LABEL(apic_intrhand)(,%eax,4),%ebx /* chain head */ ;\
+ testl %ebx,%ebx ;\
+ jz 8f /* oops, no handlers.. */ ;\
+7: \
+ LOCK_KERNEL ;\
+ movl IH_ARG(%ebx),%eax /* get handler arg */ ;\
+ testl %eax,%eax ;\
+ jnz 6f ;\
+ movl %esp,%eax /* 0 means frame pointer */ ;\
+6: \
+ pushl %eax ;\
+ call *IH_FUN(%ebx) /* call it */ ;\
+ addl $4,%esp /* toss the arg */ ;\
+ UNLOCK_KERNEL ;\
+ incl IH_COUNT(%ebx) /* count the intrs */ ;\
+ movl IH_NEXT(%ebx),%ebx /* next handler in chain */ ;\
+ testl %ebx,%ebx ;\
+ jnz 7b ;\
+8: \
+ unmask(num) /* unmask it in hardware */ ;\
+ late_ack(num) ;\
+ jmp _C_LABEL(Xdoreti)
+
+APICINTR(ioapic,0, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,1, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,2, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,3, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,4, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,5, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,6, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,7, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,8, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,9, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,10, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,11, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,12, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,13, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,14, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+APICINTR(ioapic,15, voidop, ioapic_asm_ack, voidop, voidop, voidop)
+
+ .globl _C_LABEL(Xintr_ioapic0),_C_LABEL(Xintr_ioapic1)
+ .globl _C_LABEL(Xintr_ioapic2),_C_LABEL(Xintr_ioapic3)
+ .globl _C_LABEL(Xintr_ioapic4),_C_LABEL(Xintr_ioapic5)
+ .globl _C_LABEL(Xintr_ioapic6),_C_LABEL(Xintr_ioapic7)
+ .globl _C_LABEL(Xintr_ioapic8),_C_LABEL(Xintr_ioapic9)
+ .globl _C_LABEL(Xintr_ioapic10),_C_LABEL(Xintr_ioapic11)
+ .globl _C_LABEL(Xintr_ioapic12),_C_LABEL(Xintr_ioapic13)
+ .globl _C_LABEL(Xintr_ioapic14),_C_LABEL(Xintr_ioapic15)
+ .globl _C_LABEL(apichandler)
+
+_C_LABEL(apichandler):
+ .long _C_LABEL(Xintr_ioapic0),_C_LABEL(Xintr_ioapic1)
+ .long _C_LABEL(Xintr_ioapic2),_C_LABEL(Xintr_ioapic3)
+ .long _C_LABEL(Xintr_ioapic4),_C_LABEL(Xintr_ioapic5)
+ .long _C_LABEL(Xintr_ioapic6),_C_LABEL(Xintr_ioapic7)
+ .long _C_LABEL(Xintr_ioapic8),_C_LABEL(Xintr_ioapic9)
+ .long _C_LABEL(Xintr_ioapic10),_C_LABEL(Xintr_ioapic11)
+ .long _C_LABEL(Xintr_ioapic12),_C_LABEL(Xintr_ioapic13)
+ .long _C_LABEL(Xintr_ioapic14),_C_LABEL(Xintr_ioapic15)
+
+#endif
+
diff --git a/sys/arch/i386/i386/apm.c b/sys/arch/i386/i386/apm.c
index 1ade5dfb400..f996be58a37 100644
--- a/sys/arch/i386/i386/apm.c
+++ b/sys/arch/i386/i386/apm.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: apm.c,v 1.62 2004/05/27 08:19:59 tedu Exp $ */
+/* $OpenBSD: apm.c,v 1.63 2004/06/13 21:49:15 niklas Exp $ */
/*-
* Copyright (c) 1998-2001 Michael Shalayeff. All rights reserved.
@@ -777,7 +777,6 @@ apmattach(parent, self, aux)
struct device *parent, *self;
void *aux;
{
- extern union descriptor *dynamic_gdt;
struct bios_attach_args *ba = aux;
bios_apminfo_t *ap = ba->bios_apmp;
struct apm_softc *sc = (void *)self;
@@ -843,12 +842,12 @@ apmattach(parent, self, aux)
else
ch16 += ap->apm_code16_base - cbase;
- setsegment(&dynamic_gdt[GAPM32CODE_SEL].sd, (void *)ch32,
- ap->apm_code_len, SDT_MEMERA, SEL_KPL, 1, 0);
- setsegment(&dynamic_gdt[GAPM16CODE_SEL].sd, (void *)ch16,
- ap->apm_code16_len, SDT_MEMERA, SEL_KPL, 0, 0);
- setsegment(&dynamic_gdt[GAPMDATA_SEL].sd, (void *)dh,
- ap->apm_data_len, SDT_MEMRWA, SEL_KPL, 1, 0);
+ setgdt(GAPM32CODE_SEL, (void *)ch32, ap->apm_code_len,
+ SDT_MEMERA, SEL_KPL, 1, 0);
+ setgdt(GAPM16CODE_SEL, (void *)ch16, ap->apm_code16_len,
+ SDT_MEMERA, SEL_KPL, 0, 0);
+ setgdt(GAPMDATA_SEL, (void *)dh, ap->apm_data_len, SDT_MEMRWA,
+ SEL_KPL, 1, 0);
DPRINTF((": flags %x code 32:%x/%x[%x] 16:%x/%x[%x] "
"data %x/%x/%x ep %x (%x:%x)\n%s", apm_flags,
ap->apm_code32_base, ch32, ap->apm_code_len,
@@ -890,9 +889,9 @@ apmattach(parent, self, aux)
} else
kthread_create_deferred(apm_thread_create, sc);
} else {
- dynamic_gdt[GAPM32CODE_SEL] = dynamic_gdt[GNULL_SEL];
- dynamic_gdt[GAPM16CODE_SEL] = dynamic_gdt[GNULL_SEL];
- dynamic_gdt[GAPMDATA_SEL] = dynamic_gdt[GNULL_SEL];
+ setgdt(GAPM32CODE_SEL, NULL, 0, 0, 0, 0, 0);
+ setgdt(GAPM16CODE_SEL, NULL, 0, 0, 0, 0, 0);
+ setgdt(GAPMDATA_SEL, NULL, 0, 0, 0, 0, 0);
}
}
@@ -901,6 +900,15 @@ apm_thread_create(v)
void *v;
{
struct apm_softc *sc = v;
+
+#ifdef MULTIPROCESSOR
+ if (ncpus > 1) {
+ apm_disconnect(sc);
+ apm_dobusy = apm_doidle = 0;
+ return;
+ }
+#endif
+
if (kthread_create(apm_thread, sc, &sc->sc_thread,
"%s", sc->sc_dev.dv_xname)) {
apm_disconnect(sc);
diff --git a/sys/arch/i386/i386/autoconf.c b/sys/arch/i386/i386/autoconf.c
index f0a5b2f1de0..95a3059984b 100644
--- a/sys/arch/i386/i386/autoconf.c
+++ b/sys/arch/i386/i386/autoconf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: autoconf.c,v 1.52 2003/10/15 03:56:21 david Exp $ */
+/* $OpenBSD: autoconf.c,v 1.53 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: autoconf.c,v 1.20 1996/05/03 19:41:56 christos Exp $ */
/*-
@@ -61,6 +61,12 @@
#include <dev/cons.h>
+#include "ioapic.h"
+
+#if NIOAPIC > 0
+#include <machine/i82093var.h>
+#endif
+
int findblkmajor(struct device *dv);
char *findblkname(int);
@@ -109,6 +115,14 @@ cpu_configure()
printf("biomask %x netmask %x ttymask %x\n", (u_short)IMASK(IPL_BIO),
(u_short)IMASK(IPL_NET), (u_short)IMASK(IPL_TTY));
+#if NIOAPIC > 0
+ ioapic_enable();
+#endif
+
+#ifdef MULTIPROCESSOR
+ /* propagate TSS and LDT configuration to the idle pcb's. */
+ cpu_init_idle_pcbs();
+#endif
spl0();
/*
diff --git a/sys/arch/i386/i386/bios.c b/sys/arch/i386/i386/bios.c
index 74691de83d9..a9576ccd41f 100644
--- a/sys/arch/i386/i386/bios.c
+++ b/sys/arch/i386/i386/bios.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: bios.c,v 1.54 2004/01/29 01:36:13 tom Exp $ */
+/* $OpenBSD: bios.c,v 1.55 2004/06/13 21:49:15 niklas Exp $ */
/*
* Copyright (c) 1997-2001 Michael Shalayeff
@@ -85,9 +85,12 @@ bios_apminfo_t *apm;
bios_pciinfo_t *bios_pciinfo;
#endif
bios_diskinfo_t *bios_diskinfo;
-bios_memmap_t *bios_memmap;
+bios_memmap_t *bios_memmap;
u_int32_t bios_cksumlen;
struct bios32_entry bios32_entry;
+#ifdef MULTIPROCESSOR
+void *bios_smpinfo;
+#endif
bios_diskinfo_t *bios_getdiskinfo(dev_t);
@@ -324,6 +327,12 @@ bios_getopt()
cnset(cdp->consdev);
}
break;
+#ifdef MULTIPROCESSOR
+ case BOOTARG_SMPINFO:
+ bios_smpinfo = q->ba_arg;
+ printf(" smpinfo %p", bios_smpinfo);
+ break;
+#endif
default:
#ifdef BIOS_DEBUG
@@ -356,9 +365,6 @@ bios32_service(service, e, ei)
bios32_entry_t e;
bios32_entry_info_t ei;
{
- extern union descriptor *dynamic_gdt;
- extern int gdt_get_slot(void);
-
u_long pa, endpa;
vaddr_t va, sva;
u_int32_t base, count, off, ent;
@@ -388,8 +394,7 @@ bios32_service(service, e, ei)
return (0);
slot = gdt_get_slot();
- setsegment(&dynamic_gdt[slot].sd, (caddr_t)va, BIOS32_END,
- SDT_MEMERA, SEL_KPL, 1, 0);
+ setgdt(slot, (caddr_t)va, BIOS32_END, SDT_MEMERA, SEL_KPL, 1, 0);
for (pa = i386_trunc_page(BIOS32_START),
va += i386_trunc_page(BIOS32_START);
diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c
new file mode 100644
index 00000000000..c447da4e533
--- /dev/null
+++ b/sys/arch/i386/i386/cpu.c
@@ -0,0 +1,606 @@
+/* $OpenBSD: cpu.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
+/* $NetBSD: cpu.c,v 1.1.2.7 2000/06/26 02:04:05 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999 Stefan Grefen
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "lapic.h"
+#include "ioapic.h"
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <sys/systm.h>
+#include <sys/device.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/cpufunc.h>
+#include <machine/cpuvar.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/mpbiosvar.h>
+#include <machine/npx.h>
+#include <machine/pcb.h>
+#include <machine/specialreg.h>
+#include <machine/segments.h>
+#include <machine/gdt.h>
+#include <machine/pio.h>
+
+#if NLAPIC > 0
+#include <machine/apicvar.h>
+#include <machine/i82489reg.h>
+#include <machine/i82489var.h>
+#endif
+
+#if NIOAPIC > 0
+#include <machine/i82093var.h>
+#endif
+
+#include <dev/ic/mc146818reg.h>
+#include <i386/isa/nvram.h>
+#include <dev/isa/isareg.h>
+
+int cpu_match(struct device *, void *, void *);
+void cpu_attach(struct device *, struct device *, void *);
+
+#ifdef MULTIPROCESSOR
+int mp_cpu_start(struct cpu_info *);
+void mp_cpu_start_cleanup(struct cpu_info *);
+struct cpu_functions mp_cpu_funcs =
+ { mp_cpu_start, NULL, mp_cpu_start_cleanup };
+#endif
+
+/*
+ * Statically-allocated CPU info for the primary CPU (or the only
+ * CPU, on uniprocessors). The CPU info list is initialized to
+ * point at it.
+ */
+struct cpu_info cpu_info_primary;
+struct cpu_info *cpu_info_list = &cpu_info_primary;
+
+void cpu_init_tss(struct i386tss *, void *, void *);
+void cpu_set_tss_gates(struct cpu_info *);
+
+#ifdef MULTIPROCESSOR
+/*
+ * Array of CPU info structures. Must be statically-allocated because
+ * curproc, etc. are used early.
+ */
+
+struct cpu_info *cpu_info[I386_MAXPROCS] = { &cpu_info_primary };
+
+void cpu_hatch(void *);
+void cpu_boot_secondary(struct cpu_info *);
+void cpu_copy_trampoline(void);
+
+/*
+ * Runs once per boot once multiprocessor goo has been detected and
+ * the local APIC has been mapped.
+ * Called from mpbios_scan();
+ */
+void
+cpu_init_first()
+{
+ int cpunum = cpu_number();
+
+ if (cpunum != 0) {
+ cpu_info[0] = NULL;
+ cpu_info[cpunum] = &cpu_info_primary;
+ }
+
+ cpu_copy_trampoline();
+}
+#endif
+
+struct cfattach cpu_ca = {
+ sizeof(struct cpu_info), cpu_match, cpu_attach
+};
+
+struct cfdriver cpu_cd = {
+ NULL, "cpu", DV_DULL /* XXX DV_CPU */
+};
+
+int
+cpu_match(parent, matchv, aux)
+ struct device *parent;
+ void *matchv;
+ void *aux;
+{
+ struct cfdata *match = (struct cfdata *)matchv;
+ struct cpu_attach_args *caa = (struct cpu_attach_args *)aux;
+
+ if (strcmp(caa->caa_name, match->cf_driver->cd_name) == 0)
+ return (1);
+ return (0);
+}
+
+void
+cpu_attach(parent, self, aux)
+ struct device *parent, *self;
+ void *aux;
+{
+ struct cpu_info *ci = (struct cpu_info *)self;
+ struct cpu_attach_args *caa = (struct cpu_attach_args *)aux;
+
+#ifdef MULTIPROCESSOR
+ int cpunum = caa->cpu_number;
+ vaddr_t kstack;
+ struct pcb *pcb;
+
+ if (caa->cpu_role != CPU_ROLE_AP) {
+ if (cpunum != cpu_number()) {
+ panic("%s: running cpu is at apic %d"
+ " instead of at expected %d\n",
+ self->dv_xname, cpu_number(), cpunum);
+ }
+
+ ci = &cpu_info_primary;
+ bcopy(self, &ci->ci_dev, sizeof *self);
+
+ /* special-case boot CPU */ /* XXX */
+ if (cpu_info[cpunum] == &cpu_info_primary) { /* XXX */
+ cpu_info[cpunum] = NULL; /* XXX */
+ } /* XXX */
+ }
+ if (cpu_info[cpunum] != NULL)
+ panic("cpu at apic id %d already attached?", cpunum);
+
+ cpu_info[cpunum] = ci;
+#endif
+
+ ci->ci_self = ci;
+ ci->ci_apicid = caa->cpu_number;
+#ifdef MULTIPROCESSOR
+ ci->ci_cpuid = ci->ci_apicid;
+#else
+ ci->ci_cpuid = 0; /* False for APs, so what, they're not used */
+#endif
+ ci->ci_signature = caa->cpu_signature;
+ ci->ci_feature_flags = caa->feature_flags;
+ ci->ci_func = caa->cpu_func;
+
+#ifdef MULTIPROCESSOR
+ /*
+ * Allocate UPAGES contiguous pages for the idle PCB and stack.
+ */
+
+ kstack = uvm_km_alloc(kernel_map, USPACE);
+ if (kstack == 0) {
+ if (cpunum == 0) { /* XXX */
+ panic("cpu_attach: unable to allocate idle stack for"
+ " primary");
+ }
+ printf("%s: unable to allocate idle stack\n",
+ ci->ci_dev.dv_xname);
+ return;
+ }
+ pcb = ci->ci_idle_pcb = (struct pcb *)kstack;
+ memset(pcb, 0, USPACE);
+
+ pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
+ pcb->pcb_tss.tss_esp0 = kstack + USPACE - 16 -
+ sizeof (struct trapframe);
+ pcb->pcb_tss.tss_esp = kstack + USPACE - 16 -
+ sizeof (struct trapframe);
+ pcb->pcb_pmap = pmap_kernel();
+ pcb->pcb_cr3 = vtophys(pcb->pcb_pmap->pm_pdir);
+ /* pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdir - KERNBASE; XXX ??? */
+#endif
+
+ /* further PCB init done later. */
+
+/* XXXSMP: must be shared with UP */
+#ifdef MULTIPROCESSOR
+ printf(": ");
+
+ switch (caa->cpu_role) {
+ case CPU_ROLE_SP:
+ printf("(uniprocessor)\n");
+ ci->ci_flags |= CPUF_PRESENT | CPUF_SP | CPUF_PRIMARY;
+ identifycpu(ci);
+ cpu_init(ci);
+ break;
+
+ case CPU_ROLE_BP:
+ printf("apid %d (", caa->cpu_number);
+ printf("boot processor");
+ ci->ci_flags |= CPUF_PRESENT | CPUF_BSP | CPUF_PRIMARY;
+ printf(")\n");
+ identifycpu(ci);
+ cpu_init(ci);
+
+#if NLAPIC > 0
+ /*
+ * Enable local apic
+ */
+ lapic_enable();
+ lapic_calibrate_timer(ci);
+#endif
+#if NIOAPIC > 0
+ ioapic_bsp_id = caa->cpu_number;
+#endif
+ break;
+
+ case CPU_ROLE_AP:
+ /*
+ * report on an AP
+ */
+ printf("apid %d (application processor)\n", caa->cpu_number);
+
+#ifdef MULTIPROCESSOR
+ gdt_alloc_cpu(ci);
+ ci->ci_flags |= CPUF_PRESENT | CPUF_AP;
+ identifycpu(ci);
+ ci->ci_next = cpu_info_list->ci_next;
+ cpu_info_list->ci_next = ci;
+ ncpus++;
+#else
+ printf("%s: not started\n", ci->ci_dev.dv_xname);
+#endif
+ break;
+
+ default:
+ panic("unknown processor type??\n");
+ }
+#else /* MULTIPROCESSOR */
+ printf("\n");
+#endif /* !MULTIPROCESSOR */
+
+#ifdef MULTIPROCESSOR
+ if (mp_verbose) {
+ printf("%s: kstack at 0x%lx for %d bytes\n",
+ ci->ci_dev.dv_xname, kstack, USPACE);
+ printf("%s: idle pcb at %p, idle sp at 0x%x\n",
+ ci->ci_dev.dv_xname, pcb, pcb->pcb_esp);
+ }
+#endif
+}
+
+/*
+ * Initialize the processor appropriately.
+ */
+
+void
+cpu_init(ci)
+ struct cpu_info *ci;
+{
+ /* configure the CPU if needed */
+ if (ci->cpu_setup != NULL)
+ (*ci->cpu_setup)(NULL, 0, 0);
+
+#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
+ /*
+ * On a 486 or above, enable ring 0 write protection.
+ */
+ if (ci->cpu_class >= CPUCLASS_486)
+ lcr0(rcr0() | CR0_WP);
+#endif
+ if (cpu_feature & CPUID_PGE)
+ lcr4(rcr4() | CR4_PGE); /* enable global TLB caching */
+
+ ci->ci_flags |= CPUF_RUNNING;
+}
+
+
+#ifdef MULTIPROCESSOR
+
+void
+cpu_boot_secondary_processors()
+{
+ struct cpu_info *ci;
+ u_long i;
+
+ for (i = 0; i < I386_MAXPROCS; i++) {
+ ci = cpu_info[i];
+ if (ci == NULL)
+ continue;
+ if (ci->ci_idle_pcb == NULL)
+ continue;
+ if ((ci->ci_flags & CPUF_PRESENT) == 0)
+ continue;
+ if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY))
+ continue;
+ cpu_boot_secondary(ci);
+ }
+}
+
+void
+cpu_init_idle_pcbs()
+{
+ struct cpu_info *ci;
+ u_long i;
+
+ for (i=0; i < I386_MAXPROCS; i++) {
+ ci = cpu_info[i];
+ if (ci == NULL)
+ continue;
+ if (ci->ci_idle_pcb == NULL)
+ continue;
+ if ((ci->ci_flags & CPUF_PRESENT) == 0)
+ continue;
+ i386_init_pcb_tss_ldt(ci);
+ }
+}
+
+void
+cpu_boot_secondary (ci)
+ struct cpu_info *ci;
+{
+ struct pcb *pcb;
+ int i;
+ struct pmap *kpm = pmap_kernel();
+ extern u_int32_t mp_pdirpa;
+
+ printf("%s: starting", ci->ci_dev.dv_xname);
+
+ /* XXX move elsewhere, not per CPU. */
+ mp_pdirpa = vtophys(kpm->pm_pdir);
+
+ pcb = ci->ci_idle_pcb;
+
+ if (mp_verbose)
+ printf(", init idle stack ptr is 0x%x", pcb->pcb_esp);
+
+ printf("\n");
+
+ CPU_STARTUP(ci);
+
+ /*
+ * wait for it to become ready
+ */
+ for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) {
+ delay(10);
+ }
+ if (!(ci->ci_flags & CPUF_RUNNING)) {
+ printf("cpu failed to become ready\n");
+ Debugger();
+ }
+
+ CPU_START_CLEANUP(ci);
+}
+
+/*
+ * The CPU ends up here when its ready to run
+ * XXX should share some of this with init386 in machdep.c
+ * for now it jumps into an infinite loop.
+ */
+void
+cpu_hatch(void *v)
+{
+ struct cpu_info *ci = (struct cpu_info *)v;
+ int s;
+
+ cpu_init_idt();
+ lapic_enable();
+ lapic_initclocks();
+ lapic_set_lvt();
+ gdt_init_cpu(ci);
+ npxinit(ci);
+
+ lldt(GSEL(GLDT_SEL, SEL_KPL));
+
+ cpu_init(ci);
+
+ s = splhigh(); /* XXX prevent softints from running here.. */
+ lapic_tpr = 0;
+ enable_intr();
+ printf("%s: CPU %ld running\n", ci->ci_dev.dv_xname, ci->ci_cpuid);
+ splx(s);
+}
+
+void
+cpu_copy_trampoline()
+{
+ /*
+ * Copy boot code.
+ */
+ extern u_char cpu_spinup_trampoline[];
+ extern u_char cpu_spinup_trampoline_end[];
+
+ pmap_kenter_pa((vaddr_t)MP_TRAMPOLINE, /* virtual */
+ (paddr_t)MP_TRAMPOLINE, /* physical */
+ VM_PROT_ALL); /* protection */
+ bcopy(cpu_spinup_trampoline, (caddr_t)MP_TRAMPOLINE,
+ cpu_spinup_trampoline_end - cpu_spinup_trampoline);
+}
+
+#endif
+
+#ifdef notyet
+void
+cpu_init_tss(struct i386tss *tss, void *stack, void *func)
+{
+ memset(tss, 0, sizeof *tss);
+ tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16);
+ tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
+ tss->__tss_cs = GSEL(GCODE_SEL, SEL_KPL);
+ tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL);
+ tss->tss_gs = tss->__tss_es = tss->__tss_ds =
+ tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL);
+ tss->tss_cr3 = pmap_kernel()->pm_pdirpa;
+ tss->tss_esp = (int)((char *)stack + USPACE - 16);
+ tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+ tss->__tss_eflags = PSL_MBO | PSL_NT; /* XXX not needed? */
+ tss->__tss_eip = (int)func;
+}
+
+/* XXX */
+#define IDTVEC(name) __CONCAT(X, name)
+typedef void (vector)(void);
+extern vector IDTVEC(tss_trap08);
+#ifdef DDB
+extern vector Xintrddbipi;
+extern int ddb_vec;
+#endif
+
+void
+cpu_set_tss_gates(struct cpu_info *ci)
+{
+ struct segment_descriptor sd;
+
+ ci->ci_doubleflt_stack = (char *)uvm_km_alloc(kernel_map, USPACE);
+ cpu_init_tss(&ci->ci_doubleflt_tss, ci->ci_doubleflt_stack,
+ IDTVEC(tss_trap08));
+ setsegment(&sd, &ci->ci_doubleflt_tss, sizeof(struct i386tss) - 1,
+ SDT_SYS386TSS, SEL_KPL, 0, 0);
+ ci->ci_gdt[GTRAPTSS_SEL].sd = sd;
+ setgate(&idt[8], NULL, 0, SDT_SYSTASKGT, SEL_KPL,
+ GSEL(GTRAPTSS_SEL, SEL_KPL));
+
+#if defined(DDB) && defined(MULTIPROCESSOR)
+ /*
+ * Set up seperate handler for the DDB IPI, so that it doesn't
+ * stomp on a possibly corrupted stack.
+ *
+ * XXX overwriting the gate set in db_machine_init.
+ * Should rearrange the code so that it's set only once.
+ */
+ ci->ci_ddbipi_stack = (char *)uvm_km_alloc(kernel_map, USPACE);
+ cpu_init_tss(&ci->ci_ddbipi_tss, ci->ci_ddbipi_stack,
+ Xintrddbipi);
+
+ setsegment(&sd, &ci->ci_ddbipi_tss, sizeof(struct i386tss) - 1,
+ SDT_SYS386TSS, SEL_KPL, 0, 0);
+ ci->ci_gdt[GIPITSS_SEL].sd = sd;
+
+ setgate(&idt[ddb_vec], NULL, 0, SDT_SYSTASKGT, SEL_KPL,
+ GSEL(GIPITSS_SEL, SEL_KPL));
+#endif
+}
+#endif
+
+#ifdef MULTIPROCESSOR
+int
+mp_cpu_start(struct cpu_info *ci)
+{
+#if NLAPIC > 0
+ int error;
+#endif
+ unsigned short dwordptr[2];
+
+ /*
+ * "The BSP must initialize CMOS shutdown code to 0Ah ..."
+ */
+
+ outb(IO_RTC, NVRAM_RESET);
+ outb(IO_RTC+1, NVRAM_RESET_JUMP);
+
+ /*
+ * "and the warm reset vector (DWORD based at 40:67) to point
+ * to the AP startup code ..."
+ */
+
+ dwordptr[0] = 0;
+ dwordptr[1] = MP_TRAMPOLINE >> 4;
+
+ pmap_kenter_pa(0, 0, VM_PROT_READ|VM_PROT_WRITE);
+ memcpy((u_int8_t *)0x467, dwordptr, 4);
+ pmap_kremove(0, PAGE_SIZE);
+
+#if NLAPIC > 0
+ /*
+ * ... prior to executing the following sequence:"
+ */
+
+ if (ci->ci_flags & CPUF_AP) {
+ if ((error = i386_ipi_init(ci->ci_apicid)) != 0)
+ return (error);
+
+ delay(10000);
+
+ if (cpu_feature & CPUID_APIC) {
+ if ((error = i386_ipi(MP_TRAMPOLINE / PAGE_SIZE,
+ ci->ci_apicid, LAPIC_DLMODE_STARTUP)) != 0)
+ return (error);
+ delay(200);
+
+ if ((error = i386_ipi(MP_TRAMPOLINE / PAGE_SIZE,
+ ci->ci_apicid, LAPIC_DLMODE_STARTUP)) != 0)
+ return (error);
+ delay(200);
+ }
+ }
+#endif
+ return (0);
+}
+
+void
+mp_cpu_start_cleanup(struct cpu_info *ci)
+{
+ /*
+ * Ensure the NVRAM reset byte contains something vaguely sane.
+ */
+
+ outb(IO_RTC, NVRAM_RESET);
+ outb(IO_RTC+1, NVRAM_RESET_RST);
+}
+#endif
diff --git a/sys/arch/i386/i386/db_interface.c b/sys/arch/i386/i386/db_interface.c
index c9d55df065e..44e09f1c914 100644
--- a/sys/arch/i386/i386/db_interface.c
+++ b/sys/arch/i386/i386/db_interface.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: db_interface.c,v 1.12 2003/05/18 02:43:12 andreas Exp $ */
+/* $OpenBSD: db_interface.c,v 1.13 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: db_interface.c,v 1.22 1996/05/03 19:42:00 christos Exp $ */
/*
@@ -53,11 +53,24 @@
extern label_t *db_recover;
extern char *trap_type[];
extern int trap_types;
+extern boolean_t db_cmd_loop_done;
+
+#ifdef MULTIPROCESSOR
+extern boolean_t db_switch_cpu;
+extern long db_switch_to_cpu;
+
+#endif
int db_active = 0;
void kdbprinttrap(int, int);
void db_sysregs_cmd(db_expr_t, int, db_expr_t, char *);
+#ifdef MULTIPROCESSOR
+void db_cpuinfo_cmd(db_expr_t, int, db_expr_t, char *);
+void db_startproc_cmd(db_expr_t, int, db_expr_t, char *);
+void db_stopproc_cmd(db_expr_t, int, db_expr_t, char *);
+void db_ddbproc_cmd(db_expr_t, int, db_expr_t, char *);
+#endif /* MULTIPROCESSOR */
/*
* Print trap reason.
@@ -109,7 +122,7 @@ kdb_trap(type, code, regs)
* Kernel mode - esp and ss not saved
*/
ddb_regs.tf_esp = (int)&regs->tf_esp; /* kernel stack pointer */
- asm("movw %%ss,%w0" : "=r" (ddb_regs.tf_ss));
+ __asm__("movw %%ss,%w0" : "=r" (ddb_regs.tf_ss));
}
s = splhigh();
@@ -179,20 +192,141 @@ db_sysregs_cmd(addr, have_addr, count, modif)
db_printf("cr4: 0x%08x\n", cr);
}
+#ifdef MULTIPROCESSOR
+void db_cpuinfo_cmd(addr, have_addr, count, modif)
+ db_expr_t addr;
+ int have_addr;
+ db_expr_t count;
+ char *modif;
+{
+ int i;
+
+ for (i = 0; i < I386_MAXPROCS; i++) {
+ if (cpu_info[i] != NULL) {
+ db_printf("%c%4d: ", (i == cpu_number()) ? '*' : ' ',
+ i);
+ switch(cpu_info[i]->ci_ddb_paused) {
+ case CI_DDB_RUNNING:
+ db_printf("running\n");
+ break;
+ case CI_DDB_SHOULDSTOP:
+ db_printf("stopping\n");
+ break;
+ case CI_DDB_STOPPED:
+ db_printf("stopped\n");
+ break;
+ case CI_DDB_ENTERDDB:
+ db_printf("entering ddb\n");
+ break;
+ case CI_DDB_INDDB:
+ db_printf("ddb\n");
+ break;
+ default:
+ db_printf("? (%d)\n",
+ cpu_info[i]->ci_ddb_paused);
+ break;
+ }
+ }
+ }
+}
+
+void db_startproc_cmd(addr, have_addr, count, modif)
+ db_expr_t addr;
+ int have_addr;
+ db_expr_t count;
+ char *modif;
+{
+ int i;
+
+ if (have_addr) {
+ if (addr >= 0 && addr < I386_MAXPROCS
+ && cpu_info[addr] != NULL && addr != cpu_number())
+ db_startcpu(addr);
+ else
+ db_printf("Invalid cpu %d\n", (int)addr);
+ } else {
+ for (i = 0; i < I386_MAXPROCS; i++) {
+ if (cpu_info[i] != NULL && i != cpu_number()) {
+ db_startcpu(i);
+ }
+ }
+ }
+}
+
+void db_stopproc_cmd(addr, have_addr, count, modif)
+ db_expr_t addr;
+ int have_addr;
+ db_expr_t count;
+ char *modif;
+{
+ int i;
+
+ if (have_addr) {
+ if (addr >= 0 && addr < I386_MAXPROCS
+ && cpu_info[addr] != NULL && addr != cpu_number())
+ db_stopcpu(addr);
+ else
+ db_printf("Invalid cpu %d\n", (int)addr);
+ } else {
+ for (i = 0; i < I386_MAXPROCS; i++) {
+ if (cpu_info[i] != NULL && i != cpu_number()) {
+ db_stopcpu(i);
+ }
+ }
+ }
+}
+
+void db_ddbproc_cmd(addr, have_addr, count, modif)
+ db_expr_t addr;
+ int have_addr;
+ db_expr_t count;
+ char *modif;
+{
+ if (have_addr) {
+ if (addr >= 0 && addr < I386_MAXPROCS
+ && cpu_info[addr] != NULL && addr != cpu_number()) {
+ db_switch_to_cpu = addr;
+ db_switch_cpu = 1;
+ db_cmd_loop_done = 1;
+ } else {
+ db_printf("Invalid cpu %d\n", (int)addr);
+ }
+ } else {
+ db_printf("CPU not specified\n");
+ }
+}
+#endif /* MULTIPROCESSOR */
+
struct db_command db_machine_command_table[] = {
{ "sysregs", db_sysregs_cmd, 0, 0 },
+#ifdef MULTIPROCESSOR
+ { "cpuinfo", db_cpuinfo_cmd, 0, 0 },
+ { "startcpu", db_startproc_cmd, 0, 0 },
+ { "stopcpu", db_stopproc_cmd, 0, 0 },
+ { "ddbcpu", db_ddbproc_cmd, 0, 0 },
+#endif /* MULTIPROCESSOR */
{ (char *)0, }
};
void
db_machine_init()
{
+#ifdef MULTIPROCESSOR
+ int i;
+#endif /* MULTIPROCESSOR */
db_machine_commands_install(db_machine_command_table);
+#ifdef MULTIPROCESSOR
+ for (i = 0; i < I386_MAXPROCS; i++) {
+ if (cpu_info[i] != NULL)
+ cpu_info[i]->ci_ddb_paused = CI_DDB_RUNNING;
+ }
+ SIMPLE_LOCK_INIT(&ddb_mp_slock);
+#endif /* MULTIPROCESSOR */
}
void
Debugger()
{
- asm("int $3");
+ __asm__("int $3");
}
diff --git a/sys/arch/i386/i386/db_mp.c b/sys/arch/i386/i386/db_mp.c
new file mode 100644
index 00000000000..0a859f08360
--- /dev/null
+++ b/sys/arch/i386/i386/db_mp.c
@@ -0,0 +1,187 @@
+/* $OpenBSD: db_mp.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
+
+/*
+ * Copyright (c) 2003 Andreas Gunnarsson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/simplelock.h>
+
+#include <machine/db_machdep.h>
+
+#include <ddb/db_output.h>
+
+#define DDB_STATE_NOT_RUNNING 0
+#define DDB_STATE_RUNNING 1
+
+struct SIMPLELOCK ddb_mp_slock;
+
+volatile int ddb_state = DDB_STATE_NOT_RUNNING; /* protected by ddb_mp_slock */
+volatile cpuid_t ddb_active_cpu; /* protected by ddb_mp_slock */
+
+/*
+ * ddb_enter_ddb() is called when ddb is entered to stop the other
+ * CPUs. If another cpu is already in ddb we'll wait until it's finished.
+ */
+void
+db_enter_ddb()
+{
+ int s, i;
+
+ s = splhigh();
+ SIMPLE_LOCK(&ddb_mp_slock);
+
+ while (ddb_state == DDB_STATE_RUNNING
+ && ddb_active_cpu != cpu_number()) {
+ db_printf("CPU %d waiting to enter ddb\n", cpu_number());
+ SIMPLE_UNLOCK(&ddb_mp_slock);
+ splx(s);
+
+ /* Busy wait without locking, we'll confirm with lock later */
+ while (ddb_state == DDB_STATE_RUNNING
+ && ddb_active_cpu != cpu_number())
+ ; /* Do nothing */
+
+ s = splhigh();
+ SIMPLE_LOCK(&ddb_mp_slock);
+ }
+
+ ddb_state = DDB_STATE_RUNNING;
+ ddb_active_cpu = cpu_number();
+
+ for (i = 0; i < I386_MAXPROCS; i++) {
+ if (cpu_info[i] != NULL) {
+ if (i == cpu_number())
+ cpu_info[i]->ci_ddb_paused = CI_DDB_INDDB;
+ else if (cpu_info[i]->ci_ddb_paused
+ != CI_DDB_STOPPED) {
+ cpu_info[i]->ci_ddb_paused = CI_DDB_SHOULDSTOP;
+ db_printf("Sending IPI to cpu %d\n", i);
+ i386_send_ipi(cpu_info[i], I386_IPI_DDB);
+ }
+ }
+ }
+ db_printf("CPU %d entering ddb\n", cpu_number());
+ SIMPLE_UNLOCK(&ddb_mp_slock);
+ splx(s);
+}
+
+void
+db_leave_ddb()
+{
+ int s, i;
+
+ s = splhigh();
+ SIMPLE_LOCK(&ddb_mp_slock);
+ db_printf("CPU %d leaving ddb\n", cpu_number());
+ for (i = 0; i < I386_MAXPROCS; i++) {
+ if (cpu_info[i] != NULL) {
+ cpu_info[i]->ci_ddb_paused = CI_DDB_RUNNING;
+ }
+ }
+ ddb_state = DDB_STATE_NOT_RUNNING;
+ SIMPLE_UNLOCK(&ddb_mp_slock);
+ splx(s);
+}
+
+void
+db_startcpu(int cpu)
+{
+ int s;
+
+ if (cpu != cpu_number() && cpu_info[cpu] != NULL) {
+ s = splhigh();
+ SIMPLE_LOCK(&ddb_mp_slock);
+ cpu_info[cpu]->ci_ddb_paused = CI_DDB_RUNNING;
+ SIMPLE_UNLOCK(&ddb_mp_slock);
+ splx(s);
+ }
+}
+
+void
+db_stopcpu(int cpu)
+{
+ int s;
+
+ if (cpu != cpu_number() && cpu_info[cpu] != NULL) {
+ s = splhigh();
+ SIMPLE_LOCK(&ddb_mp_slock);
+ cpu_info[cpu]->ci_ddb_paused = CI_DDB_SHOULDSTOP;
+ db_printf("Sending IPI to cpu %d\n", cpu);
+ SIMPLE_UNLOCK(&ddb_mp_slock);
+ splx(s);
+ i386_send_ipi(cpu_info[cpu], I386_IPI_DDB);
+ }
+}
+
+void
+db_movetocpu(int cpu)
+{
+ int s;
+
+ s = splhigh();
+ SIMPLE_LOCK(&ddb_mp_slock);
+ cpu_info[cpu]->ci_ddb_paused = CI_DDB_ENTERDDB;
+ db_printf("Sending IPI to cpu %d\n", cpu);
+ SIMPLE_UNLOCK(&ddb_mp_slock);
+ splx(s);
+ /* XXX If other CPU was running and IPI is lost, we lose. */
+ i386_send_ipi(cpu_info[cpu], I386_IPI_DDB);
+}
+
+void
+i386_ipi_db(struct cpu_info *ci)
+{
+ int s;
+
+ s = splhigh();
+ SIMPLE_LOCK(&ddb_mp_slock);
+ db_printf("CPU %d received ddb IPI\n", cpu_number());
+ while (ci->ci_ddb_paused == CI_DDB_SHOULDSTOP
+ || ci->ci_ddb_paused == CI_DDB_STOPPED) {
+ if (ci->ci_ddb_paused == CI_DDB_SHOULDSTOP)
+ ci->ci_ddb_paused = CI_DDB_STOPPED;
+ SIMPLE_UNLOCK(&ddb_mp_slock);
+ while (ci->ci_ddb_paused == CI_DDB_STOPPED)
+ ; /* Do nothing */
+ SIMPLE_LOCK(&ddb_mp_slock);
+ }
+ if (ci->ci_ddb_paused == CI_DDB_ENTERDDB) {
+ ddb_state = DDB_STATE_RUNNING;
+ ddb_active_cpu = cpu_number();
+ ci->ci_ddb_paused = CI_DDB_INDDB;
+ db_printf("CPU %d grabbing ddb\n", cpu_number());
+ SIMPLE_UNLOCK(&ddb_mp_slock);
+ Debugger();
+ SIMPLE_LOCK(&ddb_mp_slock);
+ ci->ci_ddb_paused = CI_DDB_RUNNING;
+ }
+ db_printf("CPU %d leaving ddb IPI handler\n", cpu_number());
+ SIMPLE_UNLOCK(&ddb_mp_slock);
+ splx(s);
+}
diff --git a/sys/arch/i386/i386/gdt.c b/sys/arch/i386/i386/gdt.c
index 3bced92013f..5bb2775f9be 100644
--- a/sys/arch/i386/i386/gdt.c
+++ b/sys/arch/i386/i386/gdt.c
@@ -1,8 +1,8 @@
-/* $OpenBSD: gdt.c,v 1.20 2003/11/08 05:38:33 nordin Exp $ */
-/* $NetBSD: gdt.c,v 1.8 1996/05/03 19:42:06 christos Exp $ */
+/* $OpenBSD: gdt.c,v 1.21 2004/06/13 21:49:15 niklas Exp $ */
+/* $NetBSD: gdt.c,v 1.28 2002/12/14 09:38:50 junyoung Exp $ */
/*-
- * Copyright (c) 1996 The NetBSD Foundation, Inc.
+ * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -27,8 +27,8 @@
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
@@ -37,27 +37,49 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+/*
+ * The GDT handling has two phases. During the early lifetime of the
+ * kernel there is a static gdt which will be stored in bootstrap_gdt.
+ * Later, when the virtual memory is initialized, this will be
+ * replaced with a dynamically resizable GDT (although, we will only
+ * ever be growing it, there is almost no gain at all to compact it,
+ * and it has proven to be a complicated thing to do, considering
+ * parallel access, so it's just not worth the effort.
+ *
+ * The static GDT area will hold the initial requirement of NGDT descriptors.
+ * The dynamic GDT will have a statically sized virtual memory area of size
+ * GDTMAXPAGES, the physical area backing this will be allocated as needed
+ * starting with the size needed for holding a copy of the bootstrap gdt.
+ *
+ * Every CPU in a system has its own copy of the GDT. The only real difference
+ * between the two are currently that there is a cpu-specific segment holding
+ * the struct cpu_info of the processor, for simplicity at getting cpu_info
+ * fields from assembly. The boot processor will actually refer to the global
+ * copy of the GDT as pointed to by the gdt variable.
+ */
+
+#include <sys/cdefs.h>
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
+#include <sys/lock.h>
#include <sys/user.h>
-#include <uvm/uvm_extern.h>
+#include <uvm/uvm.h>
#include <machine/gdt.h>
-#define MINGDTSIZ 512
-#define MAXGDTSIZ 8192
+union descriptor bootstrap_gdt[NGDT];
+union descriptor *gdt = bootstrap_gdt;
-union descriptor *dynamic_gdt = gdt;
-int gdt_size = NGDT; /* total number of GDT entries */
-int gdt_count = NGDT; /* number of GDT entries in use */
-int gdt_next = NGDT; /* next available slot for sweeping */
-int gdt_free = GNULL_SEL; /* next free slot; terminated with GNULL_SEL */
+int gdt_size; /* total number of GDT entries */
+int gdt_count; /* number of GDT entries in use */
+int gdt_next; /* next available slot for sweeping */
+int gdt_free; /* next free slot; terminated with GNULL_SEL */
-int gdt_flags;
-#define GDT_LOCKED 0x1
-#define GDT_WANTED 0x2
+struct simplelock gdt_simplelock;
+struct lock gdt_lock_store;
static __inline void gdt_lock(void);
static __inline void gdt_unlock(void);
@@ -72,23 +94,31 @@ void gdt_put_slot(int);
static __inline void
gdt_lock()
{
-
- while ((gdt_flags & GDT_LOCKED) != 0) {
- gdt_flags |= GDT_WANTED;
- tsleep(&gdt_flags, PZERO, "gdtlck", 0);
- }
- gdt_flags |= GDT_LOCKED;
+ if (curproc != NULL)
+ lockmgr(&gdt_lock_store, LK_EXCLUSIVE, &gdt_simplelock,
+ curproc);
}
static __inline void
gdt_unlock()
{
+ if (curproc != NULL)
+ lockmgr(&gdt_lock_store, LK_RELEASE, &gdt_simplelock, curproc);
+}
- gdt_flags &= ~GDT_LOCKED;
- if ((gdt_flags & GDT_WANTED) != 0) {
- gdt_flags &= ~GDT_WANTED;
- wakeup(&gdt_flags);
- }
+/* XXX needs spinlocking if we ever mean to go finegrained. */
+void
+setgdt(int sel, void *base, size_t limit, int type, int dpl, int def32,
+ int gran)
+{
+ struct segment_descriptor *sd = &gdt[sel].sd;
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+
+ setsegment(sd, base, limit, type, dpl, def32, gran);
+ for (CPU_INFO_FOREACH(cii, ci))
+ if (ci->ci_gdt != NULL && ci->ci_gdt != gdt)
+ ci->ci_gdt[sel].sd = *sd;
}
/*
@@ -98,18 +128,69 @@ void
gdt_init()
{
size_t max_len, min_len;
- struct region_descriptor region;
+ struct vm_page *pg;
+ vaddr_t va;
+ struct cpu_info *ci = &cpu_info_primary;
+
+ simple_lock_init(&gdt_simplelock);
+ lockinit(&gdt_lock_store, PZERO, "gdtlck", 0, 0);
max_len = MAXGDTSIZ * sizeof(union descriptor);
min_len = MINGDTSIZ * sizeof(union descriptor);
+
gdt_size = MINGDTSIZ;
+ gdt_count = NGDT;
+ gdt_next = NGDT;
+ gdt_free = GNULL_SEL;
+
+ gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len);
+ for (va = (vaddr_t)gdt; va < (vaddr_t)gdt + min_len; va += PAGE_SIZE) {
+ pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+ if (pg == NULL)
+ panic("gdt_init: no pages");
+ pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
+ VM_PROT_READ | VM_PROT_WRITE);
+ }
+ bcopy(bootstrap_gdt, gdt, NGDT * sizeof(union descriptor));
+ ci->ci_gdt = gdt;
+ setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1,
+ SDT_MEMRWA, SEL_KPL, 1, 1);
- dynamic_gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len);
- uvm_map_pageable(kernel_map, (vaddr_t)dynamic_gdt,
- (vaddr_t)dynamic_gdt + min_len, FALSE, FALSE);
- bcopy(gdt, dynamic_gdt, NGDT * sizeof(union descriptor));
+ gdt_init_cpu(ci);
+}
- setregion(&region, dynamic_gdt, max_len - 1);
+#ifdef MULTIPROCESSOR
+/*
+ * Allocate shadow GDT for a slave cpu.
+ */
+void
+gdt_alloc_cpu(struct cpu_info *ci)
+{
+ int max_len = MAXGDTSIZ * sizeof(union descriptor);
+ int min_len = MINGDTSIZ * sizeof(union descriptor);
+
+ ci->ci_gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len);
+ uvm_map_pageable(kernel_map, (vaddr_t)ci->ci_gdt,
+ (vaddr_t)ci->ci_gdt + min_len, FALSE, FALSE);
+ bzero(ci->ci_gdt, min_len);
+ bcopy(gdt, ci->ci_gdt, gdt_count * sizeof(union descriptor));
+ setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1,
+ SDT_MEMRWA, SEL_KPL, 1, 1);
+}
+#endif /* MULTIPROCESSOR */
+
+
+/*
+ * Load appropriate gdt descriptor; we better be running on *ci
+ * (for the most part, this is how a cpu knows who it is).
+ */
+void
+gdt_init_cpu(struct cpu_info *ci)
+{
+ struct region_descriptor region;
+
+ setregion(&region, ci->ci_gdt,
+ MAXGDTSIZ * sizeof(union descriptor) - 1);
lgdt(&region);
}
@@ -120,13 +201,29 @@ void
gdt_grow()
{
size_t old_len, new_len;
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+ struct vm_page *pg;
+ vaddr_t va;
old_len = gdt_size * sizeof(union descriptor);
gdt_size <<= 1;
new_len = old_len << 1;
- uvm_map_pageable(kernel_map, (vaddr_t)dynamic_gdt + old_len,
- (vaddr_t)dynamic_gdt + new_len, FALSE, FALSE);
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ for (va = (vaddr_t)(ci->ci_gdt) + old_len;
+ va < (vaddr_t)(ci->ci_gdt) + new_len;
+ va += PAGE_SIZE) {
+ while (
+ (pg =
+ uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) ==
+ NULL) {
+ uvm_wait("gdt_grow");
+ }
+ pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
+ VM_PROT_READ | VM_PROT_WRITE);
+ }
+ }
}
/*
@@ -146,15 +243,13 @@ gdt_get_slot()
if (gdt_free != GNULL_SEL) {
slot = gdt_free;
- gdt_free = dynamic_gdt[slot].gd.gd_selector;
+ gdt_free = gdt[slot].gd.gd_selector;
} else {
if (gdt_next != gdt_count)
panic("gdt_get_slot: gdt_next != gdt_count");
if (gdt_next >= gdt_size) {
if (gdt_size >= MAXGDTSIZ)
panic("gdt_get_slot: out of GDT descriptors");
- if (dynamic_gdt == gdt)
- panic("gdt_get_slot called before gdt_init");
gdt_grow();
}
slot = gdt_next++;
@@ -169,65 +264,56 @@ gdt_get_slot()
* Deallocate a GDT slot, putting it on the free list.
*/
void
-gdt_put_slot(slot)
- int slot;
+gdt_put_slot(int slot)
{
gdt_lock();
gdt_count--;
- dynamic_gdt[slot].gd.gd_type = SDT_SYSNULL;
- dynamic_gdt[slot].gd.gd_selector = gdt_free;
+ gdt[slot].gd.gd_type = SDT_SYSNULL;
+ gdt[slot].gd.gd_selector = gdt_free;
gdt_free = slot;
gdt_unlock();
}
-void
-tss_alloc(pcb)
- struct pcb *pcb;
+int
+tss_alloc(struct pcb *pcb)
{
int slot;
slot = gdt_get_slot();
- setsegment(&dynamic_gdt[slot].sd, &pcb->pcb_tss, sizeof(struct pcb) - 1,
+ setgdt(slot, &pcb->pcb_tss, sizeof(struct pcb) - 1,
SDT_SYS386TSS, SEL_KPL, 0, 0);
- pcb->pcb_tss_sel = GSEL(slot, SEL_KPL);
+ return GSEL(slot, SEL_KPL);
}
void
-tss_free(pcb)
- struct pcb *pcb;
+tss_free(int sel)
{
- gdt_put_slot(IDXSEL(pcb->pcb_tss_sel));
+ gdt_put_slot(IDXSEL(sel));
}
+/*
+ * Caller must have pmap locked for both of these functions.
+ */
void
-ldt_alloc(pmap, ldt, len)
- struct pmap *pmap;
- union descriptor *ldt;
- size_t len;
+ldt_alloc(struct pmap *pmap, union descriptor *ldt, size_t len)
{
int slot;
slot = gdt_get_slot();
- setsegment(&dynamic_gdt[slot].sd, ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0,
- 0);
- simple_lock(&pmap->pm_lock);
+ setgdt(slot, ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0, 0);
pmap->pm_ldt_sel = GSEL(slot, SEL_KPL);
- simple_unlock(&pmap->pm_lock);
}
void
-ldt_free(pmap)
- struct pmap *pmap;
+ldt_free(struct pmap *pmap)
{
int slot;
- simple_lock(&pmap->pm_lock);
slot = IDXSEL(pmap->pm_ldt_sel);
- simple_unlock(&pmap->pm_lock);
gdt_put_slot(slot);
}
diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf
index e74f22cae5f..cc5149eb722 100644
--- a/sys/arch/i386/i386/genassym.cf
+++ b/sys/arch/i386/i386/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.13 2003/06/02 23:27:47 millert Exp $
+# $OpenBSD: genassym.cf,v 1.14 2004/06/13 21:49:15 niklas Exp $
#
# Copyright (c) 1982, 1990 The Regents of the University of California.
# All rights reserved.
@@ -57,12 +57,17 @@ if COMPAT_FREEBSD > 0
include <machine/freebsd_machdep.h>
endif
+if MULTIPROCESSOR
+include <machine/cpu.h>
+endif
+
include "isa.h"
if NISA > 0
include <i386/isa/isa_machdep.h>
endif
export SRUN
+export SONPROC
# values for page tables
export PDSLOT_KERN
@@ -84,6 +89,7 @@ member p_stat
member p_wchan
member p_vmspace
member p_flag
+member p_cpu
export P_SYSTEM
@@ -101,8 +107,8 @@ member pcb_fs
member pcb_gs
member pcb_cr0
member pcb_ldt_sel
-member pcb_tss_sel
member pcb_onfault
+member pcb_fpcpu
# frame definitions
struct trapframe
@@ -169,3 +175,26 @@ member ih_arg
member ih_count
member ih_next
endif
+
+define P_MD_TSS_SEL offsetof(struct proc, p_md.md_tss_sel)
+
+define CPU_INFO_SELF offsetof(struct cpu_info, ci_self)
+define CPU_INFO_CURPROC offsetof(struct cpu_info, ci_curproc)
+define CPU_INFO_CURPCB offsetof(struct cpu_info, ci_curpcb)
+define CPU_INFO_NAME offsetof(struct cpu_info, ci_dev.dv_xname)
+define CPU_INFO_IDLE_PCB offsetof(struct cpu_info, ci_idle_pcb)
+define CPU_INFO_IDLE_TSS_SEL offsetof(struct cpu_info, ci_idle_tss_sel)
+define CPU_INFO_LEVEL offsetof(struct cpu_info, ci_level)
+define CPU_INFO_VENDOR offsetof(struct cpu_info, ci_vendor[0])
+define CPU_INFO_SIGNATURE offsetof(struct cpu_info, ci_signature)
+define CPU_INFO_RESCHED offsetof(struct cpu_info, ci_want_resched)
+define CPU_INFO_ASTPENDING offsetof(struct cpu_info, ci_astpending)
+define CPU_INFO_GDT offsetof(struct cpu_info, ci_gdt)
+define CPU_INFO_IPENDING offsetof(struct cpu_info, ci_ipending)
+define CPU_INFO_IMASK offsetof(struct cpu_info, ci_imask)
+define CPU_INFO_IUNMASK offsetof(struct cpu_info, ci_iunmask)
+define CPU_INFO_ILEVEL offsetof(struct cpu_info, ci_ilevel)
+define CPU_INFO_IDEPTH offsetof(struct cpu_info, ci_idepth)
+define CPU_INFO_ISOURCES offsetof(struct cpu_info, ci_isources)
+
+define SIZEOF_CPU_INFO sizeof(struct cpu_info)
diff --git a/sys/arch/i386/i386/ioapic.c b/sys/arch/i386/i386/ioapic.c
new file mode 100644
index 00000000000..e5b329372f3
--- /dev/null
+++ b/sys/arch/i386/i386/ioapic.c
@@ -0,0 +1,736 @@
+/* $OpenBSD: ioapic.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
+/* $NetBSD: ioapic.c,v 1.7 2003/07/14 22:32:40 lukem Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+/*
+ * Copyright (c) 1999 Stefan Grefen
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+
+#include <machine/bus.h>
+#include <machine/psl.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/i82093reg.h>
+#include <machine/i82093var.h>
+
+#include <machine/i82489reg.h>
+#include <machine/i82489var.h>
+
+#include <machine/pmap.h>
+
+#include <machine/mpbiosvar.h>
+
+#include "isa.h"
+
+/*
+ * XXX locking
+ */
+
+int ioapic_match(struct device *, void *, void *);
+void ioapic_attach(struct device *, struct device *, void *);
+
+/* XXX */
+extern int bus_mem_add_mapping(bus_addr_t, bus_size_t, int,
+ bus_space_handle_t *);
+
+void apic_set_redir(struct ioapic_softc *, int);
+void apic_vectorset(struct ioapic_softc *, int, int, int);
+
+int apic_verbose = 0;
+
+int ioapic_bsp_id = 0;
+int ioapic_cold = 1;
+
+struct ioapic_softc *ioapics; /* head of linked list */
+int nioapics = 0; /* number attached */
+
+/*
+ * Register read/write routines.
+ */
+static __inline u_int32_t
+ioapic_read(struct ioapic_softc *sc, int regid)
+{
+ u_int32_t val;
+
+ /*
+ * XXX lock apic
+ */
+ *(sc->sc_reg) = regid;
+ val = *sc->sc_data;
+
+ return (val);
+
+}
+
+static __inline void
+ioapic_write(struct ioapic_softc *sc, int regid, int val)
+{
+ /*
+ * XXX lock apic
+ */
+ *(sc->sc_reg) = regid;
+ *(sc->sc_data) = val;
+}
+
+struct ioapic_softc *
+ioapic_find(int apicid)
+{
+ struct ioapic_softc *sc;
+
+ if (apicid == MPS_ALL_APICS) { /* XXX mpbios-specific */
+ /*
+ * XXX kludge for all-ioapics interrupt support
+ * on single ioapic systems
+ */
+ if (nioapics <= 1)
+ return (ioapics);
+ panic("unsupported: all-ioapics interrupt with >1 ioapic");
+ }
+
+ for (sc = ioapics; sc != NULL; sc = sc->sc_next)
+ if (sc->sc_apicid == apicid)
+ return (sc);
+
+ return (NULL);
+}
+
+static __inline void
+ioapic_add(struct ioapic_softc *sc)
+{
+ sc->sc_next = ioapics;
+ ioapics = sc;
+ nioapics++;
+}
+
+void
+ioapic_print_redir(struct ioapic_softc *sc, char *why, int pin)
+{
+ u_int32_t redirlo = ioapic_read(sc, IOAPIC_REDLO(pin));
+ u_int32_t redirhi = ioapic_read(sc, IOAPIC_REDHI(pin));
+
+ apic_format_redir(sc->sc_dev.dv_xname, why, pin, redirhi, redirlo);
+}
+
+struct cfattach ioapic_ca = {
+ sizeof(struct ioapic_softc), ioapic_match, ioapic_attach
+};
+
+struct cfdriver ioapic_cd = {
+ NULL, "ioapic", DV_DULL /* XXX DV_CPU ? */
+};
+
+int
+ioapic_match(struct device *parent, void *matchv, void *aux)
+{
+ struct cfdata *match = (struct cfdata *)matchv;
+ struct apic_attach_args * aaa = (struct apic_attach_args *)aux;
+
+ if (strcmp(aaa->aaa_name, match->cf_driver->cd_name) == 0)
+ return (1);
+ return (0);
+}
+
+
+/*
+ * can't use bus_space_xxx as we don't have a bus handle ...
+ */
+void
+ioapic_attach(struct device *parent, struct device *self, void *aux)
+{
+ struct ioapic_softc *sc = (struct ioapic_softc *)self;
+ struct apic_attach_args *aaa = (struct apic_attach_args *)aux;
+ int apic_id;
+ bus_space_handle_t bh;
+ u_int32_t ver_sz;
+ int i;
+
+ sc->sc_flags = aaa->flags;
+ sc->sc_apicid = aaa->apic_id;
+
+ printf(" apid %d", aaa->apic_id);
+
+ if (ioapic_find(aaa->apic_id) != NULL) {
+ printf(": duplicate apic id (ignored)\n");
+ return;
+ }
+
+ ioapic_add(sc);
+
+ printf(": pa 0x%lx", aaa->apic_address);
+
+ if (bus_mem_add_mapping(aaa->apic_address, PAGE_SIZE, 0, &bh) != 0) {
+ printf(", map failed\n");
+ return;
+ }
+ sc->sc_reg = (volatile u_int32_t *)(bh + IOAPIC_REG);
+ sc->sc_data = (volatile u_int32_t *)(bh + IOAPIC_DATA);
+
+ apic_id = (ioapic_read(sc,IOAPIC_ID) & IOAPIC_ID_MASK) >>
+ IOAPIC_ID_SHIFT;
+ ver_sz = ioapic_read(sc, IOAPIC_VER);
+
+ sc->sc_apic_vers = (ver_sz & IOAPIC_VER_MASK) >> IOAPIC_VER_SHIFT;
+ sc->sc_apic_sz = (ver_sz & IOAPIC_MAX_MASK) >> IOAPIC_MAX_SHIFT;
+ sc->sc_apic_sz++;
+
+ if (mp_verbose) {
+ printf(", %s mode",
+ aaa->flags & IOAPIC_PICMODE ? "PIC" : "virtual wire");
+ }
+
+ printf(", version %x, %d pins\n", sc->sc_apic_vers, sc->sc_apic_sz);
+
+ sc->sc_pins = malloc(sizeof(struct ioapic_pin) * sc->sc_apic_sz,
+ M_DEVBUF, M_WAITOK);
+
+ for (i=0; i<sc->sc_apic_sz; i++) {
+ sc->sc_pins[i].ip_handler = NULL;
+ sc->sc_pins[i].ip_next = NULL;
+ sc->sc_pins[i].ip_map = NULL;
+ sc->sc_pins[i].ip_vector = 0;
+ sc->sc_pins[i].ip_type = 0;
+ sc->sc_pins[i].ip_minlevel = 0xff; /* XXX magic*/
+ sc->sc_pins[i].ip_maxlevel = 0; /* XXX magic */
+ }
+
+ /*
+ * In case the APIC is not initialized to the correct ID
+ * do it now.
+ * Maybe we should record the original ID for interrupt
+ * mapping later ...
+ */
+ if (apic_id != sc->sc_apicid) {
+ printf("%s: misconfigured as apic %d", sc->sc_dev.dv_xname,
+ apic_id);
+
+ ioapic_write(sc, IOAPIC_ID,
+ (ioapic_read(sc, IOAPIC_ID) & ~IOAPIC_ID_MASK)
+ | (sc->sc_apicid << IOAPIC_ID_SHIFT));
+
+ apic_id = (ioapic_read(sc,IOAPIC_ID) & IOAPIC_ID_MASK) >>
+ IOAPIC_ID_SHIFT;
+
+ if (apic_id != sc->sc_apicid)
+ printf(", can't remap to apid %d\n", sc->sc_apicid);
+ else
+ printf(", remapped to apic %d\n", sc->sc_apicid);
+ }
+#if 0
+ /* output of this was boring. */
+ if (mp_verbose)
+ for (i=0; i<sc->sc_apic_sz; i++)
+ ioapic_print_redir(sc, "boot", i);
+#endif
+}
+
+/*
+ * Interrupt mapping.
+ *
+ * Multiple handlers may exist for each pin, so there's an
+ * intrhand chain for each pin.
+ *
+ * Ideally, each pin maps to a single vector at the priority of the
+ * highest level interrupt for that pin.
+ *
+ * XXX in the event that there are more than 16 interrupt sources at a
+ * single level, some doubling-up may be needed. This is not yet
+ * implemented.
+ *
+ * XXX we are wasting some space here because we only use a limited
+ * range of the vectors here. (0x30..0xef)
+ */
+
+struct intrhand *apic_intrhand[256];
+int apic_intrcount[256];
+
+
+/* XXX should check vs. softc max int number */
+#define LEGAL_IRQ(x) ((x) >= 0 && (x) < APIC_ICU_LEN && (x) != 2)
+
+void
+apic_set_redir(struct ioapic_softc *sc, int pin)
+{
+ u_int32_t redlo;
+ u_int32_t redhi = 0;
+ int delmode;
+
+ struct ioapic_pin *pp;
+ struct mp_intr_map *map;
+
+ pp = &sc->sc_pins[pin];
+ map = pp->ip_map;
+ if (map == NULL) {
+ redlo = IOAPIC_REDLO_MASK;
+ } else {
+ redlo = map->redir;
+ }
+ delmode = (redlo & IOAPIC_REDLO_DEL_MASK) >> IOAPIC_REDLO_DEL_SHIFT;
+
+ /* XXX magic numbers */
+ if ((delmode != 0) && (delmode != 1))
+ ;
+ else if (pp->ip_handler == NULL) {
+ redlo |= IOAPIC_REDLO_MASK;
+ } else {
+ redlo |= (pp->ip_vector & 0xff);
+ redlo |= (IOAPIC_REDLO_DEL_FIXED << IOAPIC_REDLO_DEL_SHIFT);
+ redlo &= ~IOAPIC_REDLO_DSTMOD;
+
+ /*
+ * Destination: BSP CPU
+ *
+ * XXX will want to distribute interrupts across cpu's
+ * eventually. most likely, we'll want to vector each
+ * interrupt to a specific CPU and load-balance across
+ * cpu's. but there's no point in doing that until after
+ * most interrupts run without the kernel lock.
+ */
+ redhi |= (ioapic_bsp_id << IOAPIC_REDHI_DEST_SHIFT);
+
+ /* XXX derive this bit from BIOS info */
+ if (pp->ip_type == IST_LEVEL)
+ redlo |= IOAPIC_REDLO_LEVEL;
+ else
+ redlo &= ~IOAPIC_REDLO_LEVEL;
+ if (map != NULL && ((map->flags & 3) == MPS_INTPO_DEF)) {
+ if (pp->ip_type == IST_LEVEL)
+ redlo |= IOAPIC_REDLO_ACTLO;
+ else
+ redlo &= ~IOAPIC_REDLO_ACTLO;
+ }
+ }
+ /* Do atomic write */
+ ioapic_write(sc, IOAPIC_REDLO(pin), IOAPIC_REDLO_MASK);
+ ioapic_write(sc, IOAPIC_REDHI(pin), redhi);
+ ioapic_write(sc, IOAPIC_REDLO(pin), redlo);
+ if (mp_verbose)
+ ioapic_print_redir(sc, "int", pin);
+}
+
+/*
+ * XXX To be really correct an NISA > 0 condition should check for these.
+ * However, the i386 port pretty much assumes isa is there anyway.
+ * For example, pci_intr_establish calls isa_intr_establish unconditionally.
+ */
+extern int fakeintr(void *); /* XXX headerify */
+extern char *isa_intr_typename(int); /* XXX headerify */
+
+/*
+ * apic_vectorset: allocate a vector for the given pin, based on
+ * the levels of the interrupts on that pin.
+ *
+ * XXX if the level of the pin changes while the pin is
+ * masked, need to do something special to prevent pending
+ * interrupts from being lost.
+ * (the answer may be to hang the interrupt chain off of both vectors
+ * until any interrupts from the old source have been handled. the trouble
+ * is that we don't have a global view of what interrupts are pending.
+ *
+ * Deferring for now since MP systems are more likely servers rather
+ * than laptops or desktops, and thus will have relatively static
+ * interrupt configuration.
+ */
+
+void
+apic_vectorset(struct ioapic_softc *sc, int pin, int minlevel, int maxlevel)
+{
+ struct ioapic_pin *pp = &sc->sc_pins[pin];
+ int ovector = 0;
+ int nvector = 0;
+
+ ovector = pp->ip_vector;
+
+ if (maxlevel == 0) {
+ /* no vector needed. */
+ pp->ip_minlevel = 0xff; /* XXX magic */
+ pp->ip_maxlevel = 0; /* XXX magic */
+ pp->ip_vector = 0;
+ } else if (maxlevel != pp->ip_maxlevel) {
+ if (minlevel != maxlevel) {
+ printf("%s: WARNING: sharing interrupt "
+ "between different IPLs (currently broken)\n",
+ sc->sc_dev.dv_xname);
+ printf("%s: pin %d, ipls %x..%x\n",
+ sc->sc_dev.dv_xname,
+ pin, minlevel, maxlevel);
+ }
+
+ /*
+ * Allocate interrupt vector at the *lowest* priority level
+ * of any of the handlers invoked by this pin.
+ *
+ * The interrupt handler will raise ipl higher than this
+ * as appropriate.
+ */
+ nvector = idt_vec_alloc(maxlevel, maxlevel+15);
+
+ if (nvector == 0) {
+ /*
+ * XXX XXX we should be able to deal here..
+ * need to double-up an existing vector
+ * and install a slightly different handler.
+ */
+ panic("%s: can't alloc vector for pin %d at level %x",
+ sc->sc_dev.dv_xname, pin, maxlevel);
+ }
+ /*
+ * XXX want special handler for the maxlevel != minlevel
+ * case here!
+ */
+ idt_vec_set(nvector, apichandler[nvector & 0xf]);
+ pp->ip_vector = nvector;
+ pp->ip_minlevel = minlevel;
+ pp->ip_maxlevel = maxlevel;
+ }
+ apic_intrhand[pp->ip_vector] = pp->ip_handler;
+
+ if (ovector) {
+ /*
+ * XXX should defer this until we're sure the old vector
+ * doesn't have a pending interrupt on any processor.
+ * do this by setting a counter equal to the number of CPU's,
+ * and firing off a low-priority broadcast IPI to all cpu's.
+ * each cpu then decrements the counter; when it
+ * goes to zero, free the vector..
+ * i.e., defer until all processors have run with a CPL
+ * less than the level of the interrupt..
+ *
+ * this is only an issue for dynamic interrupt configuration
+ * (e.g., cardbus or pcmcia).
+ */
+ apic_intrhand[ovector] = NULL;
+ idt_vec_free(ovector);
+ printf("freed vector %x\n", ovector);
+ }
+
+ apic_set_redir(sc, pin);
+}
+
+/*
+ * Throw the switch and enable interrupts..
+ */
+
+void
+ioapic_enable(void)
+{
+ int p, maxlevel, minlevel;
+ struct ioapic_softc *sc;
+ struct intrhand *q;
+ extern void intr_calculatemasks(void); /* XXX */
+
+ intr_calculatemasks(); /* for softints, AST's */
+
+ ioapic_cold = 0;
+
+ if (ioapics == NULL)
+ return;
+
+#if 1 /* XXX Will probably get removed */
+ lapic_set_softvectors();
+ lapic_set_lvt();
+#endif
+
+ if (ioapics->sc_flags & IOAPIC_PICMODE) {
+ printf("%s: writing to IMCR to disable pics\n",
+ ioapics->sc_dev.dv_xname);
+ outb(IMCR_ADDR, IMCR_REGISTER);
+ outb(IMCR_DATA, IMCR_APIC);
+ }
+
+#if 0 /* XXX Will be removed when we have intrsource. */
+ isa_nodefaultirq();
+#endif
+
+ for (sc = ioapics; sc != NULL; sc = sc->sc_next) {
+ if (mp_verbose)
+ printf("%s: enabling\n", sc->sc_dev.dv_xname);
+
+ for (p=0; p<sc->sc_apic_sz; p++) {
+ maxlevel = 0; /* magic */
+ minlevel = 0xff; /* magic */
+
+ for (q = sc->sc_pins[p].ip_handler; q != NULL;
+ q = q->ih_next) {
+ if (q->ih_level > maxlevel)
+ maxlevel = q->ih_level;
+ if (q->ih_level < minlevel)
+ minlevel = q->ih_level;
+ }
+ apic_vectorset(sc, p, minlevel, maxlevel);
+ }
+ }
+}
+
+/*
+ * Interrupt handler management with the apic is radically different from the
+ * good old 8259.
+ *
+ * The APIC adds an additional level of indirection between interrupt
+ * signals and interrupt vectors in the IDT.
+ * It also encodes a priority into the high-order 4 bits of the IDT vector
+ * number.
+ *
+ *
+ * interrupt establishment:
+ * -> locate interrupt pin.
+ * -> locate or allocate vector for pin.
+ * -> locate or allocate handler chain for vector.
+ * -> chain interrupt into handler chain.
+ * #ifdef notyet
+ * -> if level of handler chain increases, reallocate vector, move chain.
+ * #endif
+ */
+
+void *
+apic_intr_establish(int irq, int type, int level, int (*ih_fun)(void *),
+ void *ih_arg, char *what)
+{
+ unsigned int ioapic = APIC_IRQ_APIC(irq);
+ unsigned int intr = APIC_IRQ_PIN(irq);
+ struct ioapic_softc *sc = ioapic_find(ioapic);
+ struct ioapic_pin *pin;
+ struct intrhand **p, *q, *ih;
+ static struct intrhand fakehand = {fakeintr};
+ extern int cold;
+ int minlevel, maxlevel;
+
+ if (sc == NULL)
+ panic("apic_intr_establish: unknown ioapic %d", ioapic);
+
+ if ((irq & APIC_INT_VIA_APIC) == 0)
+ panic("apic_intr_establish of non-apic interrupt 0x%x", irq);
+
+ pin = &sc->sc_pins[intr];
+ if (intr >= sc->sc_apic_sz || type == IST_NONE)
+ panic("apic_intr_establish: bogus intr or type");
+
+ /* no point in sleeping unless someone can free memory. */
+ ih = malloc(sizeof *ih, M_DEVBUF, cold ? M_NOWAIT : M_WAITOK);
+ if (ih == NULL)
+ panic("apic_intr_establish: can't malloc handler info");
+
+
+ switch (pin->ip_type) {
+ case IST_NONE:
+ pin->ip_type = type;
+ break;
+ case IST_EDGE:
+ case IST_LEVEL:
+ if (type == pin->ip_type)
+ break;
+ case IST_PULSE:
+ if (type != IST_NONE)
+ /* XXX should not panic here! */
+ panic("apic_intr_establish: "
+ "intr %d can't share %s with %s",
+ intr,
+ isa_intr_typename(sc->sc_pins[intr].ip_type),
+ isa_intr_typename(type));
+ break;
+ }
+
+ /*
+ * Figure out where to put the handler.
+ * This is O(N^2) to establish N interrupts, but we want to
+ * preserve the order, and N is generally small.
+ */
+ maxlevel = level;
+ minlevel = level;
+ for (p = &pin->ip_handler; (q = *p) != NULL; p = &q->ih_next) {
+ if (q->ih_level > maxlevel)
+ maxlevel = q->ih_level;
+ if (q->ih_level < minlevel)
+ minlevel = q->ih_level;
+ }
+
+ /*
+ * Actually install a fake handler momentarily, since we might be doing
+ * this with interrupts enabled and don't want the real routine called
+ * until masking is set up.
+ */
+ fakehand.ih_level = level;
+ *p = &fakehand;
+
+ /*
+ * Fix up the vector for this pin.
+ * (if cold, defer this until most interrupts have been established,
+ * to avoid too much thrashing of the idt..)
+ */
+
+ if (!ioapic_cold)
+ apic_vectorset(sc, intr, minlevel, maxlevel);
+
+#if 0
+ apic_calculatemasks();
+#endif
+
+ /*
+ * Poke the real handler in now.
+ */
+ ih->ih_fun = ih_fun;
+ ih->ih_arg = ih_arg;
+ ih->ih_count = 0;
+ ih->ih_next = NULL;
+ ih->ih_level = level;
+ ih->ih_irq = irq;
+ ih->ih_what = what;
+ *p = ih;
+
+ return (ih);
+}
+
+/*
+ * apic disestablish:
+ * locate handler chain.
+ * dechain intrhand from handler chain
+ * if chain empty {
+ * reprogram apic for "safe" vector.
+ * free vector (point at stray handler).
+ * }
+ * #ifdef notyet
+ * else {
+ * recompute level for current chain.
+ * if changed, reallocate vector, move chain.
+ * }
+ * #endif
+ */
+
+void
+apic_intr_disestablish(void *arg)
+{
+ struct intrhand *ih = arg;
+ int irq = ih->ih_irq;
+ unsigned int ioapic = APIC_IRQ_APIC(irq);
+ unsigned int intr = APIC_IRQ_PIN(irq);
+ struct ioapic_softc *sc = ioapic_find(ioapic);
+ struct ioapic_pin *pin = &sc->sc_pins[intr];
+ struct intrhand **p, *q;
+ int minlevel, maxlevel;
+
+ if (sc == NULL)
+ panic("apic_intr_disestablish: unknown ioapic %d", ioapic);
+
+ if (intr >= sc->sc_apic_sz)
+ panic("apic_intr_disestablish: bogus irq");
+
+ /*
+ * Remove the handler from the chain.
+ * This is O(n^2), too.
+ */
+ maxlevel = 0;
+ minlevel = 0xff;
+ for (p = &pin->ip_handler; (q = *p) != NULL && q != ih;
+ p = &q->ih_next) {
+ if (q->ih_level > maxlevel)
+ maxlevel = q->ih_level;
+ if (q->ih_level < minlevel)
+ minlevel = q->ih_level;
+ }
+
+ if (q)
+ *p = q->ih_next;
+ else
+ panic("intr_disestablish: handler not registered");
+ for (; q != NULL; q = q->ih_next) {
+ if (q->ih_level > maxlevel)
+ maxlevel = q->ih_level;
+ if (q->ih_level < minlevel)
+ minlevel = q->ih_level;
+ }
+
+ if (!ioapic_cold)
+ apic_vectorset(sc, intr, minlevel, maxlevel);
+
+ free(ih, M_DEVBUF);
+}
+
+#ifdef DDB
+void ioapic_dump(void);
+
+void
+ioapic_dump(void)
+{
+ struct ioapic_softc *sc;
+ struct ioapic_pin *ip;
+ int p;
+
+ for (sc = ioapics; sc != NULL; sc = sc->sc_next) {
+ for (p = 0; p < sc->sc_apic_sz; p++) {
+ ip = &sc->sc_pins[p];
+ if (ip->ip_type != IST_NONE)
+ ioapic_print_redir(sc, "dump", p);
+ }
+ }
+}
+#endif
diff --git a/sys/arch/i386/i386/ipifuncs.c b/sys/arch/i386/i386/ipifuncs.c
new file mode 100644
index 00000000000..10256ca631a
--- /dev/null
+++ b/sys/arch/i386/i386/ipifuncs.c
@@ -0,0 +1,175 @@
+/* $OpenBSD: ipifuncs.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
+/* $NetBSD: ipifuncs.c,v 1.1.2.3 2000/06/26 02:04:06 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
+
+/*
+ * Interprocessor interrupt handlers.
+ */
+
+#include "npx.h"
+
+#include <sys/param.h>
+#include <sys/device.h>
+#include <sys/systm.h>
+
+#include <machine/cpufunc.h>
+#include <machine/cpuvar.h>
+#include <machine/intr.h>
+#include <machine/atomic.h>
+#include <machine/i82093var.h>
+#include <machine/db_machdep.h>
+
+#include <uvm/uvm_extern.h>
+
+void i386_ipi_halt(struct cpu_info *);
+
+#if NNPX > 0
+void i386_ipi_synch_fpu(struct cpu_info *);
+void i386_ipi_flush_fpu(struct cpu_info *);
+#else
+#define i386_ipi_synch_fpu 0
+#define i386_ipi_flush_fpu 0
+#endif
+
+void (*ipifunc[I386_NIPI])(struct cpu_info *) =
+{
+ i386_ipi_halt,
+#if 0 && (defined(I586_CPU) || defined(I686_CPU))
+ cc_microset,
+#else
+ 0,
+#endif
+ i386_ipi_flush_fpu,
+ i386_ipi_synch_fpu,
+ pmap_do_tlb_shootdown,
+#if 0
+ i386_reload_mtrr,
+ gdt_reload_cpu,
+#else
+ 0,
+ 0,
+#endif
+ i386_ipi_db,
+};
+
+void
+i386_ipi_halt(struct cpu_info *ci)
+{
+ disable_intr();
+
+ printf("%s: shutting down\n", ci->ci_dev.dv_xname);
+ for(;;) {
+ asm volatile("hlt");
+ }
+}
+
+#if NNPX > 0
+void
+i386_ipi_flush_fpu(struct cpu_info *ci)
+{
+ npxsave_cpu(ci, 0);
+}
+
+void
+i386_ipi_synch_fpu(struct cpu_info *ci)
+{
+ npxsave_cpu(ci, 1);
+}
+#endif
+
+void
+i386_spurious(void)
+{
+ printf("spurious intr\n");
+}
+
+int
+i386_send_ipi(struct cpu_info *ci, int ipimask)
+{
+ int ret;
+
+ i386_atomic_setbits_l(&ci->ci_ipis, ipimask);
+
+ /* Don't send IPI to cpu which isn't (yet) running. */
+ if (!(ci->ci_flags & CPUF_RUNNING))
+ return ENOENT;
+
+ ret = i386_ipi(LAPIC_IPI_VECTOR, ci->ci_cpuid, LAPIC_DLMODE_FIXED);
+ if (ret != 0) {
+ printf("ipi of %x from %s to %s failed\n",
+ ipimask, curcpu()->ci_dev.dv_xname, ci->ci_dev.dv_xname);
+ }
+
+ return ret;
+}
+
+void
+i386_self_ipi(int vector)
+{
+ i82489_writereg(LAPIC_ICRLO,
+ vector | LAPIC_DLMODE_FIXED | LAPIC_LVL_ASSERT | LAPIC_DEST_SELF);
+}
+
+
+void
+i386_broadcast_ipi(int ipimask)
+{
+ panic("broadcast_ipi not implemented");
+}
+
+void
+i386_ipi_handler(void)
+{
+ struct cpu_info *ci = curcpu();
+ u_int32_t pending;
+ int bit;
+
+ pending = i386_atomic_testset_ul(&ci->ci_ipis, 0);
+
+ for (bit = 0; bit < I386_NIPI && pending; bit++) {
+ if (pending & (1<<bit)) {
+ pending &= ~(1<<bit);
+ (*ipifunc[bit])(ci);
+ }
+ }
+}
diff --git a/sys/arch/i386/i386/lapic.c b/sys/arch/i386/i386/lapic.c
new file mode 100644
index 00000000000..b4e4c36b5fa
--- /dev/null
+++ b/sys/arch/i386/i386/lapic.c
@@ -0,0 +1,474 @@
+/* $OpenBSD: lapic.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
+/* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <sys/systm.h>
+#include <sys/device.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/cpufunc.h>
+#include <machine/cpuvar.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/mpbiosvar.h>
+#include <machine/pcb.h>
+#include <machine/specialreg.h>
+#include <machine/segments.h>
+
+#include <machine/apicvar.h>
+#include <machine/i82489reg.h>
+#include <machine/i82489var.h>
+
+#include <i386/isa/timerreg.h> /* XXX for TIMER_FREQ */
+
+void lapic_delay(int);
+void lapic_microtime(struct timeval *);
+static __inline u_int32_t lapic_gettick(void);
+void lapic_clockintr(void *);
+void lapic_initclocks(void);
+void lapic_map(paddr_t);
+
+void
+lapic_map(lapic_base)
+ paddr_t lapic_base;
+{
+ int s;
+ pt_entry_t *pte;
+ vaddr_t va = (vaddr_t)&local_apic;
+
+ disable_intr();
+ s = lapic_tpr;
+
+ /*
+ * Map local apic. If we have a local apic, it's safe to assume
+ * we're on a 486 or better and can use invlpg and non-cacheable PTE's
+ *
+ * Whap the PTE "by hand" rather than calling pmap_kenter_pa because
+ * the latter will attempt to invoke TLB shootdown code just as we
+ * might have changed the value of cpu_number()..
+ */
+
+ pte = kvtopte(va);
+ *pte = lapic_base | PG_RW | PG_V | PG_N;
+ invlpg(va);
+
+#ifdef MULTIPROCESSOR
+ cpu_init_first(); /* catch up to changed cpu_number() */
+#endif
+
+ lapic_tpr = s;
+ enable_intr();
+}
+
+/*
+ * enable local apic
+ */
+void
+lapic_enable()
+{
+ i82489_writereg(LAPIC_SVR, LAPIC_SVR_ENABLE | LAPIC_SPURIOUS_VECTOR);
+}
+
+extern struct mp_intr_map *lapic_ints[]; /* XXX header file? */
+
+void
+lapic_set_softvectors()
+{
+ idt_vec_set(LAPIC_SOFTCLOCK_VECTOR, Xintrsoftclock);
+ idt_vec_set(LAPIC_SOFTNET_VECTOR, Xintrsoftnet);
+ idt_vec_set(LAPIC_SOFTTTY_VECTOR, Xintrsofttty);
+}
+
+void
+lapic_set_lvt()
+{
+#ifdef MULTIPROCESSOR
+ struct cpu_info *ci = curcpu();
+
+ if (mp_verbose) {
+ apic_format_redir(ci->ci_dev.dv_xname, "prelint", 0, 0,
+ i82489_readreg(LAPIC_LVINT0));
+ apic_format_redir(ci->ci_dev.dv_xname, "prelint", 1, 0,
+ i82489_readreg(LAPIC_LVINT1));
+ }
+#endif
+ if (lapic_ints[0])
+ i82489_writereg(LAPIC_LVINT0, lapic_ints[0]->redir);
+ if (lapic_ints[1])
+ i82489_writereg(LAPIC_LVINT1, lapic_ints[1]->redir);
+
+#ifdef MULTIPROCESSOR
+ if (mp_verbose) {
+ apic_format_redir(ci->ci_dev.dv_xname, "timer", 0, 0,
+ i82489_readreg(LAPIC_LVTT));
+ apic_format_redir(ci->ci_dev.dv_xname, "pcint", 0, 0,
+ i82489_readreg(LAPIC_PCINT));
+ apic_format_redir(ci->ci_dev.dv_xname, "lint", 0, 0,
+ i82489_readreg(LAPIC_LVINT0));
+ apic_format_redir(ci->ci_dev.dv_xname, "lint", 1, 0,
+ i82489_readreg(LAPIC_LVINT1));
+ apic_format_redir(ci->ci_dev.dv_xname, "err", 0, 0,
+ i82489_readreg(LAPIC_LVERR));
+ }
+#endif
+}
+
+/*
+ * Initialize fixed idt vectors for use by local apic.
+ */
+void
+lapic_boot_init(lapic_base)
+ paddr_t lapic_base;
+{
+ lapic_map(lapic_base);
+
+#ifdef MULTIPROCESSOR
+ idt_vec_set(LAPIC_IPI_VECTOR, Xintripi);
+#endif
+ idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious);
+ idt_vec_set(LAPIC_TIMER_VECTOR, Xintrltimer);
+}
+
+static __inline u_int32_t
+lapic_gettick()
+{
+ return (i82489_readreg(LAPIC_CCR_TIMER));
+}
+
+#include <sys/kernel.h> /* for hz */
+
+int lapic_timer = 0;
+u_int32_t lapic_tval;
+
+/*
+ * this gets us up to a 4GHz busclock....
+ */
+u_int32_t lapic_per_second;
+u_int32_t lapic_frac_usec_per_cycle;
+u_int64_t lapic_frac_cycle_per_usec;
+u_int32_t lapic_delaytab[26];
+
+void
+lapic_clockintr(arg)
+ void *arg;
+{
+ struct clockframe *frame = arg;
+
+ hardclock(frame);
+}
+
+void
+lapic_initclocks()
+{
+ /*
+ * Start local apic countdown timer running, in repeated mode.
+ *
+ * Mask the clock interrupt and set mode,
+ * then set divisor,
+ * then unmask and set the vector.
+ */
+ i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_LVTT_M);
+ i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
+ i82489_writereg(LAPIC_ICR_TIMER, lapic_tval);
+ i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_TIMER_VECTOR);
+}
+
+extern int gettick(void); /* XXX put in header file */
+extern void (*initclock_func)(void); /* XXX put in header file */
+
+/*
+ * Calibrate the local apic count-down timer (which is running at
+ * bus-clock speed) vs. the i8254 counter/timer (which is running at
+ * a fixed rate).
+ *
+ * The Intel MP spec says: "An MP operating system may use the IRQ8
+ * real-time clock as a reference to determine the actual APIC timer clock
+ * speed."
+ *
+ * We're actually using the IRQ0 timer. Hmm.
+ */
+void
+lapic_calibrate_timer(ci)
+ struct cpu_info *ci;
+{
+ unsigned int starttick, tick1, tick2, endtick;
+ unsigned int startapic, apic1, apic2, endapic;
+ u_int64_t dtick, dapic, tmp;
+ int i;
+ char tbuf[9];
+
+ if (mp_verbose)
+ printf("%s: calibrating local timer\n", ci->ci_dev.dv_xname);
+
+ /*
+ * Configure timer to one-shot, interrupt masked,
+ * large positive number.
+ */
+ i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_M);
+ i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
+ i82489_writereg(LAPIC_ICR_TIMER, 0x80000000);
+
+ starttick = gettick();
+ startapic = lapic_gettick();
+
+ DELAY(2); /* using "old" delay here.. */
+
+ for (i=0; i<hz; i++) {
+ do {
+ tick1 = gettick();
+ apic1 = lapic_gettick();
+ } while (tick1 < starttick);
+
+ do {
+ tick2 = gettick();
+ apic2 = lapic_gettick();
+ } while (tick2 > starttick);
+ }
+
+ endtick = gettick();
+ endapic = lapic_gettick();
+
+ dtick = hz * TIMER_DIV(hz) + (starttick-endtick);
+ dapic = startapic-endapic;
+
+ /*
+ * there are TIMER_FREQ ticks per second.
+ * in dtick ticks, there are dapic bus clocks.
+ */
+ tmp = (TIMER_FREQ * dapic) / dtick;
+
+ lapic_per_second = tmp;
+
+#if 0
+ humanize_number(tbuf, sizeof(tbuf), tmp, "Hz", 1000);
+#else /* XXX: from NetBSD sources... sigh. */
+ {
+ /* prefixes are: (none), Kilo, Mega, Giga, Tera, Peta, Exa */
+ static const char prefixes[] = " KMGTPE";
+
+ int i;
+ u_int64_t max;
+ size_t suffixlen;
+
+ if (tbuf == NULL)
+ goto out;
+ if (sizeof(tbuf) > 0)
+ tbuf[0] = '\0';
+ suffixlen = sizeof "Hz" - 1;
+ /* check if enough room for `x y' + suffix + `\0' */
+ if (sizeof(tbuf) < 4 + suffixlen)
+ goto out;
+
+ max = 1;
+ for (i = 0; i < sizeof(tbuf) - suffixlen - 3; i++)
+ max *= 10;
+ for (i = 0; tmp >= max && i < sizeof(prefixes); i++)
+ tmp /= 1000;
+
+ snprintf(tbuf, sizeof(tbuf), "%qu%s%c%s",
+ (unsigned long long)tmp, i == 0 ? "" : " ", prefixes[i],
+ "Hz");
+ out:
+ ;
+ }
+#endif
+
+ printf("%s: apic clock running at %s\n", ci->ci_dev.dv_xname, tbuf);
+
+ if (lapic_per_second != 0) {
+ /*
+ * reprogram the apic timer to run in periodic mode.
+ * XXX need to program timer on other cpu's, too.
+ */
+ lapic_tval = (lapic_per_second * 2) / hz;
+ lapic_tval = (lapic_tval / 2) + (lapic_tval & 0x1);
+
+ i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM | LAPIC_LVTT_M |
+ LAPIC_TIMER_VECTOR);
+ i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
+ i82489_writereg(LAPIC_ICR_TIMER, lapic_tval);
+
+ /*
+ * Compute fixed-point ratios between cycles and
+ * microseconds to avoid having to do any division
+ * in lapic_delay and lapic_microtime.
+ */
+
+ tmp = (1000000 * (u_int64_t)1 << 32) / lapic_per_second;
+ lapic_frac_usec_per_cycle = tmp;
+
+ tmp = (lapic_per_second * (u_int64_t)1 << 32) / 1000000;
+
+ lapic_frac_cycle_per_usec = tmp;
+
+ /*
+ * Compute delay in cycles for likely short delays in usec.
+ */
+ for (i = 0; i < 26; i++)
+ lapic_delaytab[i] = (lapic_frac_cycle_per_usec * i) >>
+ 32;
+
+ /*
+ * Now that the timer's calibrated, use the apic timer routines
+ * for all our timing needs..
+ */
+ delay_func = lapic_delay;
+ microtime_func = lapic_microtime;
+ initclock_func = lapic_initclocks;
+ }
+}
+
+/*
+ * delay for N usec.
+ */
+
+void lapic_delay(usec)
+ int usec;
+{
+ int32_t tick, otick;
+ int64_t deltat; /* XXX may want to be 64bit */
+
+ otick = lapic_gettick();
+
+ if (usec <= 0)
+ return;
+ if (usec <= 25)
+ deltat = lapic_delaytab[usec];
+ else
+ deltat = (lapic_frac_cycle_per_usec * usec) >> 32;
+
+ while (deltat > 0) {
+ tick = lapic_gettick();
+ if (tick > otick)
+ deltat -= lapic_tval - (tick - otick);
+ else
+ deltat -= otick - tick;
+ otick = tick;
+ }
+}
+
+#define LAPIC_TICK_THRESH 200
+
+/*
+ * XXX need to make work correctly on other than cpu 0.
+ */
+
+void lapic_microtime(tv)
+ struct timeval *tv;
+{
+ struct timeval now;
+ u_int32_t tick;
+ u_int32_t usec;
+ u_int32_t tmp;
+
+ disable_intr();
+ tick = lapic_gettick();
+ now = time;
+ enable_intr();
+
+ tmp = lapic_tval - tick;
+ usec = ((u_int64_t)tmp * lapic_frac_usec_per_cycle) >> 32;
+
+ now.tv_usec += usec;
+ while (now.tv_usec >= 1000000) {
+ now.tv_sec += 1;
+ now.tv_usec -= 1000000;
+ }
+
+ *tv = now;
+}
+
+/*
+ * XXX the following belong mostly or partly elsewhere..
+ */
+
+int
+i386_ipi_init(target)
+ int target;
+{
+ unsigned j;
+
+ if ((target & LAPIC_DEST_MASK) == 0) {
+ i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT);
+ }
+
+ i82489_writereg(LAPIC_ICRLO, (target & LAPIC_DEST_MASK) |
+ LAPIC_DLMODE_INIT | LAPIC_LVL_ASSERT );
+
+ for (j = 100000; j > 0; j--)
+ if ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) == 0)
+ break;
+
+ delay(10000);
+
+ i82489_writereg(LAPIC_ICRLO, (target & LAPIC_DEST_MASK) |
+ LAPIC_DLMODE_INIT | LAPIC_LVL_TRIG | LAPIC_LVL_DEASSERT);
+
+ for (j = 100000; j > 0; j--)
+ if ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) == 0)
+ break;
+
+ return (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY)?EBUSY:0;
+}
+
+int
+i386_ipi(vec,target,dl)
+ int vec,target,dl;
+{
+ unsigned j;
+
+ if ((target & LAPIC_DEST_MASK) == 0)
+ i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT);
+
+ i82489_writereg(LAPIC_ICRLO,
+ (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LVL_ASSERT);
+
+ for (j = 100000;
+ j > 0 && (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY);
+ j--)
+ ;
+
+ return (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) ? EBUSY : 0;
+}
diff --git a/sys/arch/i386/i386/lock_machdep.c b/sys/arch/i386/i386/lock_machdep.c
new file mode 100644
index 00000000000..0672d9eced4
--- /dev/null
+++ b/sys/arch/i386/i386/lock_machdep.c
@@ -0,0 +1,112 @@
+/* $OpenBSD: lock_machdep.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
+/* $NetBSD: lock_machdep.c,v 1.1.2.3 2000/05/03 14:40:30 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 1998, 1999 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
+ * NASA Ames Research Center.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
+
+/*
+ * Machine-dependent spin lock operations.
+ */
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/systm.h>
+
+#include <machine/atomic.h>
+#include <machine/lock.h>
+
+#include <ddb/db_output.h>
+
+#ifdef LOCKDEBUG
+
+void
+__cpu_simple_lock_init(lockp)
+ __cpu_simple_lock_t *lockp;
+{
+ *lockp = __SIMPLELOCK_UNLOCKED;
+}
+
+#if defined (DEBUG) && defined(DDB)
+int spin_limit = 10000000;
+#endif
+
+void
+__cpu_simple_lock(lockp)
+ __cpu_simple_lock_t *lockp;
+{
+#if defined (DEBUG) && defined(DDB)
+ int spincount = 0;
+#endif
+
+ while (i386_atomic_testset_i(lockp, __SIMPLELOCK_LOCKED)
+ == __SIMPLELOCK_LOCKED) {
+#if defined(DEBUG) && defined(DDB)
+ spincount++;
+ if (spincount == spin_limit) {
+ extern int db_active;
+ db_printf("spundry\n");
+ if (db_active) {
+ db_printf("but already in debugger\n");
+ } else {
+ Debugger();
+ }
+ }
+#endif
+ }
+}
+
+int
+__cpu_simple_lock_try(lockp)
+ __cpu_simple_lock_t *lockp;
+{
+
+ if (i386_atomic_testset_i(lockp, __SIMPLELOCK_LOCKED)
+ == __SIMPLELOCK_UNLOCKED)
+ return (1);
+ return (0);
+}
+
+void
+__cpu_simple_unlock(lockp)
+ __cpu_simple_lock_t *lockp;
+{
+ *lockp = __SIMPLELOCK_UNLOCKED;
+}
+
+#endif
diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s
index 883e7f837c4..c738cee27c1 100644
--- a/sys/arch/i386/i386/locore.s
+++ b/sys/arch/i386/i386/locore.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.s,v 1.77 2004/02/01 19:05:23 deraadt Exp $ */
+/* $OpenBSD: locore.s,v 1.78 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */
/*-
@@ -39,6 +39,8 @@
#include "npx.h"
#include "assym.h"
#include "apm.h"
+#include "lapic.h"
+#include "ioapic.h"
#include "pctr.h"
#include "ksyms.h"
@@ -63,6 +65,10 @@
#include <dev/isa/isareg.h>
+#if NLAPIC > 0
+#include <machine/i82489reg.h>
+#endif
+
/*
* override user-land alignment before including asm.h
*/
@@ -83,6 +89,57 @@
#define NOP pushl %eax ; inb $0x84,%al ; inb $0x84,%al ; popl %eax
#endif
+#define CPL _C_LABEL(lapic_tpr)
+
+#if defined(MULTIPROCESSOR)
+#include <machine/i82489reg.h>
+
+#define GET_CPUINFO(reg) \
+ movl _C_LABEL(lapic_id),reg ; \
+ shrl $LAPIC_ID_SHIFT,reg ; \
+ movl _C_LABEL(cpu_info)(,reg,4),reg
+#else
+#define GET_CPUINFO(reg) \
+ leal _C_LABEL(cpu_info_primary),reg
+#endif
+
+#define GET_CURPROC(reg, treg) \
+ GET_CPUINFO(treg) ; \
+ movl CPU_INFO_CURPROC(treg),reg
+
+#define PUSH_CURPROC(treg) \
+ GET_CPUINFO(treg) ; \
+ pushl CPU_INFO_CURPROC(treg)
+
+#define CLEAR_CURPROC(treg) \
+ GET_CPUINFO(treg) ; \
+ movl $0,CPU_INFO_CURPROC(treg)
+
+#define SET_CURPROC(proc,cpu) \
+ GET_CPUINFO(cpu) ; \
+ movl proc,CPU_INFO_CURPROC(cpu) ; \
+ movl cpu,P_CPU(proc)
+
+#define GET_CURPCB(reg) \
+ GET_CPUINFO(reg) ; \
+ movl CPU_INFO_CURPCB(reg),reg
+
+#define SET_CURPCB(reg,treg) \
+ GET_CPUINFO(treg) ; \
+ movl reg,CPU_INFO_CURPCB(treg)
+
+#define CLEAR_RESCHED(treg) \
+ GET_CPUINFO(treg) ; \
+ xorl %eax,%eax ; \
+ movl %eax,CPU_INFO_RESCHED(treg)
+
+#define CHECK_ASTPENDING(treg) \
+ GET_CPUINFO(treg) ; \
+ cmpl $0,CPU_INFO_ASTPENDING(treg)
+
+#define CLEAR_ASTPENDING(cireg) \
+ movl $0,CPU_INFO_ASTPENDING(cireg)
+
/*
* These are used on interrupt or trap entry or exit.
*/
@@ -143,11 +200,39 @@
.globl _C_LABEL(cpu_feature), _C_LABEL(cpu_ecxfeature)
.globl _C_LABEL(cpu_cache_eax), _C_LABEL(cpu_cache_ebx)
.globl _C_LABEL(cpu_cache_ecx), _C_LABEL(cpu_cache_edx)
- .globl _C_LABEL(cold), _C_LABEL(esym)
+ .globl _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem)
+ .globl _C_LABEL(esym)
.globl _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase)
- .globl _C_LABEL(proc0paddr), _C_LABEL(curpcb), _C_LABEL(PTDpaddr)
- .globl _C_LABEL(dynamic_gdt)
+ .globl _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr)
+ .globl _C_LABEL(gdt)
.globl _C_LABEL(bootapiver), _C_LABEL(bootargc), _C_LABEL(bootargv)
+#ifndef MULTIPROCESSOR
+ .globl _C_LABEL(curpcb)
+#endif
+ .globl _C_LABEL(lapic_tpr)
+
+#if NLAPIC > 0
+#ifdef __ELF__
+ .align NBPG
+#else
+ .align 12
+#endif
+ .globl _C_LABEL(local_apic), _C_LABEL(lapic_id)
+_C_LABEL(local_apic):
+ .space LAPIC_ID
+_C_LABEL(lapic_id):
+ .long 0x00000000
+ .space LAPIC_TPRI-(LAPIC_ID+4)
+_C_LABEL(lapic_tpr):
+ .space LAPIC_PPRI-LAPIC_TPRI
+_C_LABEL(lapic_ppr):
+ .space LAPIC_ISR-LAPIC_PPRI
+_C_LABEL(lapic_isr):
+ .space NBPG-LAPIC_ISR
+#else
+_C_LABEL(lapic_tpr):
+ .long 0
+#endif
_C_LABEL(cpu): .long 0 # are we 386, 386sx, 486, 586 or 686
_C_LABEL(cpu_id): .long 0 # saved from 'cpuid' instruction
@@ -162,6 +247,8 @@ _C_LABEL(cpu_vendor): .space 16 # vendor string returned by 'cpuid' instruction
_C_LABEL(cpu_brandstr): .space 48 # brand string returned by 'cpuid'
_C_LABEL(cold): .long 1 # cold till we are not
_C_LABEL(esym): .long 0 # ptr to end of syms
+_C_LABEL(cnvmem): .long 0 # conventional memory size
+_C_LABEL(extmem): .long 0 # extended memory size
_C_LABEL(boothowto): .long 0 # boot flags
_C_LABEL(atdevbase): .long 0 # location of start of iomem in virtual
_C_LABEL(bootapiver): .long 0 # /boot API version
@@ -625,6 +712,10 @@ begin:
call _C_LABEL(main)
NENTRY(proc_trampoline)
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(proc_trampoline_mp)
+#endif
+ movl $IPL_NONE,CPL
pushl %ebx
call *%esi
addl $4,%esp
@@ -759,20 +850,32 @@ ENTRY(fillw)
popl %edi
ret
+
+/* Frame pointer reserve on stack. */
+#ifdef DDB
+#define FPADD 4
+#else
+#define FPADD 0
+#endif
+
/*
* kcopy(caddr_t from, caddr_t to, size_t len);
* Copy len bytes, abort on fault.
*/
ENTRY(kcopy)
+#ifdef DDB
+ pushl %ebp
+ movl %esp,%ebp
+#endif
pushl %esi
pushl %edi
- movl _C_LABEL(curpcb),%eax # load curpcb into eax and set on-fault
+ GET_CURPCB(%eax) # load curpcb into eax and set on-fault
pushl PCB_ONFAULT(%eax)
movl $_C_LABEL(copy_fault), PCB_ONFAULT(%eax)
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- movl 24(%esp),%ecx
+ movl 16+FPADD(%esp),%esi
+ movl 20+FPADD(%esp),%edi
+ movl 24+FPADD(%esp),%ecx
movl %edi,%eax
subl %esi,%eax
cmpl %ecx,%eax # overlapping?
@@ -781,16 +884,19 @@ ENTRY(kcopy)
shrl $2,%ecx # copy by 32-bit words
rep
movsl
- movl 24(%esp),%ecx
+ movl 24+FPADD(%esp),%ecx
andl $3,%ecx # any bytes left?
rep
movsb
- movl _C_LABEL(curpcb),%edx
+ GET_CURPCB(%edx) # XXX save curpcb?
popl PCB_ONFAULT(%edx)
popl %edi
popl %esi
xorl %eax,%eax
+#ifdef DDB
+ leave
+#endif
ret
ALIGN_TEXT
@@ -802,7 +908,7 @@ ENTRY(kcopy)
decl %esi
rep
movsb
- movl 24(%esp),%ecx # copy remainder by 32-bit words
+ movl 24+FPADD(%esp),%ecx # copy remainder by 32-bit words
shrl $2,%ecx
subl $3,%esi
subl $3,%edi
@@ -810,11 +916,14 @@ ENTRY(kcopy)
movsl
cld
- movl _C_LABEL(curpcb),%edx
+ GET_CURPCB(%edx)
popl PCB_ONFAULT(%edx)
popl %edi
popl %esi
xorl %eax,%eax
+#ifdef DDB
+ leave
+#endif
ret
/*
@@ -885,13 +994,17 @@ ENTRY(memcpy)
* Copy len bytes into the user's address space.
*/
ENTRY(copyout)
+#ifdef DDB
+ pushl %ebp
+ movl %esp,%ebp
+#endif
pushl %esi
pushl %edi
pushl $0
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- movl 24(%esp),%eax
+ movl 16+FPADD(%esp),%esi
+ movl 20+FPADD(%esp),%edi
+ movl 24+FPADD(%esp),%eax
/*
* We check that the end of the destination buffer is not past the end
@@ -930,7 +1043,7 @@ ENTRY(copyout)
/* Compute PTE offset for start address. */
shrl $PGSHIFT,%edi
- movl _C_LABEL(curpcb), %edx
+ GET_CURPCB(%edx)
movl $2f, PCB_ONFAULT(%edx)
1: /* Check PTE for each page. */
@@ -941,8 +1054,8 @@ ENTRY(copyout)
decl %ecx
jns 1b
- movl 20(%esp),%edi
- movl 24(%esp),%eax
+ movl 20+FPADD(%esp),%edi
+ movl 24+FPADD(%esp),%eax
jmp 3f
2: /* Simulate a trap. */
@@ -958,7 +1071,7 @@ ENTRY(copyout)
jmp _C_LABEL(copy_fault)
#endif /* I386_CPU */
-3: movl _C_LABEL(curpcb),%edx
+3: GET_CURPCB(%edx)
movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx)
/* bcopy(%esi, %edi, %eax); */
@@ -976,6 +1089,9 @@ ENTRY(copyout)
popl %edi
popl %esi
xorl %eax,%eax
+#ifdef DDB
+ leave
+#endif
ret
/*
@@ -983,15 +1099,19 @@ ENTRY(copyout)
* Copy len bytes from the user's address space.
*/
ENTRY(copyin)
+#ifdef DDB
+ pushl %ebp
+ movl %esp,%ebp
+#endif
pushl %esi
pushl %edi
- movl _C_LABEL(curpcb),%eax
+ GET_CURPCB(%eax)
pushl $0
movl $_C_LABEL(copy_fault),PCB_ONFAULT(%eax)
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- movl 24(%esp),%eax
+ movl 16+FPADD(%esp),%esi
+ movl 20+FPADD(%esp),%edi
+ movl 24+FPADD(%esp),%eax
/*
* We check that the end of the destination buffer is not past the end
@@ -1015,19 +1135,25 @@ ENTRY(copyin)
rep
movsb
- movl _C_LABEL(curpcb),%edx
+ GET_CURPCB(%edx)
popl PCB_ONFAULT(%edx)
popl %edi
popl %esi
xorl %eax,%eax
+#ifdef DDB
+ leave
+#endif
ret
ENTRY(copy_fault)
- movl _C_LABEL(curpcb),%edx
+ GET_CURPCB(%edx)
popl PCB_ONFAULT(%edx)
popl %edi
popl %esi
movl $EFAULT,%eax
+#ifdef DDB
+ leave
+#endif
ret
/*
@@ -1038,12 +1164,16 @@ ENTRY(copy_fault)
* return 0 or EFAULT.
*/
ENTRY(copyoutstr)
+#ifdef DDB
+ pushl %ebp
+ movl %esp,%ebp
+#endif
pushl %esi
pushl %edi
- movl 12(%esp),%esi # esi = from
- movl 16(%esp),%edi # edi = to
- movl 20(%esp),%edx # edx = maxlen
+ movl 12+FPADD(%esp),%esi # esi = from
+ movl 16+FPADD(%esp),%edi # edi = to
+ movl 20+FPADD(%esp),%edx # edx = maxlen
#if defined(I386_CPU)
#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
@@ -1057,8 +1187,8 @@ ENTRY(copyoutstr)
movl $NBPG,%ecx
subl %eax,%ecx # ecx = NBPG - (src % NBPG)
- movl _C_LABEL(curpcb), %eax
- movl $6f, PCB_ONFAULT(%eax)
+ GET_CURPCB(%ecx)
+ movl $6f, PCB_ONFAULT(%ecx)
1: /*
* Once per page, check that we are still within the bounds of user
@@ -1112,7 +1242,7 @@ ENTRY(copyoutstr)
#endif /* I386_CPU */
#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
-5: movl _C_LABEL(curpcb), %eax
+5: GET_CURPCB(%eax)
movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax)
/*
* Get min(%edx, VM_MAXUSER_ADDRESS-%edi).
@@ -1122,7 +1252,7 @@ ENTRY(copyoutstr)
cmpl %edx,%eax
jae 1f
movl %eax,%edx
- movl %eax,20(%esp)
+ movl %eax,20+FPADD(%esp)
1: incl %edx
cld
@@ -1154,14 +1284,18 @@ ENTRY(copyoutstr)
* return 0 or EFAULT.
*/
ENTRY(copyinstr)
+#ifdef DDB
+ pushl %ebp
+ movl %esp,%ebp
+#endif
pushl %esi
pushl %edi
- movl _C_LABEL(curpcb),%ecx
+ GET_CURPCB(%ecx)
movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx)
- movl 12(%esp),%esi # %esi = from
- movl 16(%esp),%edi # %edi = to
- movl 20(%esp),%edx # %edx = maxlen
+ movl 12+FPADD(%esp),%esi # %esi = from
+ movl 16+FPADD(%esp),%edi # %edi = to
+ movl 20+FPADD(%esp),%edx # %edx = maxlen
/*
* Get min(%edx, VM_MAXUSER_ADDRESS-%esi).
@@ -1171,7 +1305,7 @@ ENTRY(copyinstr)
cmpl %edx,%eax
jae 1f
movl %eax,%edx
- movl %eax,20(%esp)
+ movl %eax,20+FPADD(%esp)
1: incl %edx
cld
@@ -1199,17 +1333,20 @@ ENTRY(copystr_fault)
copystr_return:
/* Set *lencopied and return %eax. */
- movl _C_LABEL(curpcb),%ecx
+ GET_CURPCB(%ecx)
movl $0,PCB_ONFAULT(%ecx)
- movl 20(%esp),%ecx
+ movl 20+FPADD(%esp),%ecx
subl %edx,%ecx
- movl 24(%esp),%edx
+ movl 24+FPADD(%esp),%edx
testl %edx,%edx
jz 8f
movl %ecx,(%edx)
8: popl %edi
popl %esi
+#ifdef DDB
+ leave
+#endif
ret
/*
@@ -1219,12 +1356,16 @@ copystr_return:
* string is too long, return ENAMETOOLONG; else return 0.
*/
ENTRY(copystr)
+#ifdef DDB
+ pushl %ebp
+ movl %esp,%ebp
+#endif
pushl %esi
pushl %edi
- movl 12(%esp),%esi # esi = from
- movl 16(%esp),%edi # edi = to
- movl 20(%esp),%edx # edx = maxlen
+ movl 12+FPADD(%esp),%esi # esi = from
+ movl 16+FPADD(%esp),%edi # edi = to
+ movl 20+FPADD(%esp),%edx # edx = maxlen
incl %edx
cld
@@ -1244,15 +1385,18 @@ ENTRY(copystr)
movl $ENAMETOOLONG,%eax
6: /* Set *lencopied and return %eax. */
- movl 20(%esp),%ecx
+ movl 20+FPADD(%esp),%ecx
subl %edx,%ecx
- movl 24(%esp),%edx
+ movl 24+FPADD(%esp),%edx
testl %edx,%edx
jz 7f
movl %ecx,(%edx)
7: popl %edi
popl %esi
+#ifdef DDB
+ leave
+#endif
ret
/*****************************************************************************/
@@ -1277,6 +1421,8 @@ NENTRY(lgdt)
movw %ax,%ds
movw %ax,%es
movw %ax,%ss
+ movl $GSEL(GCPU_SEL, SEL_KPL),%eax
+ movw %ax,%fs
/* Reload code selector by doing intersegment return. */
popl %eax
pushl $GSEL(GCODE_SEL, SEL_KPL)
@@ -1389,11 +1535,85 @@ NENTRY(remrunqueue)
* something to come ready.
*/
ENTRY(idle)
+ /* Skip context saving if we have none. */
+ testl %esi,%esi
+ jz 1f
+
+ /*
+ * idling: save old context.
+ *
+ * Registers:
+ * %eax, %ebx, %ecx - scratch
+ * %esi - old proc, then old pcb
+ * %edi - idle pcb
+ * %edx - idle TSS selector
+ */
+
+ pushl %esi
+ call _C_LABEL(pmap_deactivate) # pmap_deactivate(oldproc)
+ addl $4,%esp
+
+ movl P_ADDR(%esi),%esi
+
+ /* Save stack pointers. */
+ movl %esp,PCB_ESP(%esi)
+ movl %ebp,PCB_EBP(%esi)
+
+ /* Find idle PCB for this CPU */
+#ifndef MULTIPROCESSOR
+ movl $_C_LABEL(proc0),%ebx
+ movl P_ADDR(%ebx),%edi
+ movl P_MD_TSS_SEL(%ebx),%edx
+#else
+ GET_CPUINFO(%ebx)
+ movl CPU_INFO_IDLE_PCB(%ebx),%edi
+ movl CPU_INFO_IDLE_TSS_SEL(%ebx),%edx
+#endif
+
+ /* Restore the idle context (avoid interrupts) */
cli
- movl _C_LABEL(whichqs),%ecx
- testl %ecx,%ecx
- jnz sw1
+
+ /* Restore stack pointers. */
+ movl PCB_ESP(%edi),%esp
+ movl PCB_EBP(%edi),%ebp
+
+
+ /* Switch address space. */
+ movl PCB_CR3(%edi),%ecx
+ movl %ecx,%cr3
+
+ /* Switch TSS. Reset "task busy" flag before loading. */
+#ifdef MULTIPROCESSOR
+ movl CPU_INFO_GDT(%ebx),%eax
+#else
+ movl _C_LABEL(gdt),%eax
+#endif
+ andl $~0x0200,4-SEL_KPL(%eax,%edx,1)
+ ltr %dx
+
+ /* We're always in the kernel, so we don't need the LDT. */
+
+ /* Restore cr0 (including FPU state). */
+ movl PCB_CR0(%edi),%ecx
+ movl %ecx,%cr0
+
+ /* Record new pcb. */
+ SET_CURPCB(%edi,%ecx)
+
+ xorl %esi,%esi
sti
+
+1:
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+ call _C_LABEL(sched_unlock_idle)
+#endif
+
+ movl $IPL_NONE,CPL # spl0()
+ call _C_LABEL(Xspllower) # process pending interrupts
+
+ENTRY(idle_loop)
+ cmpl $0,_C_LABEL(whichqs)
+ jnz _C_LABEL(idle_exit)
#if NAPM > 0
call _C_LABEL(apm_cpu_idle)
cmpl $0,_C_LABEL(apm_dobusy)
@@ -1407,8 +1627,24 @@ ENTRY(idle)
#else
hlt
#endif
- jmp _C_LABEL(idle)
+ jmp _C_LABEL(idle_loop)
+ENTRY(idle_exit)
+ movl $IPL_HIGH,CPL # splhigh
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+ call _C_LABEL(sched_lock_idle)
+#endif
+#if 0
+ GET_CPUINFO(%ebx)
+ leal CPU_INFO_NAME(%ebx),%ebx
+ pushl %ebx
+ pushl $1f
+ call _C_LABEL(printf)
+ addl $8,%esp
+#endif
+ jmp switch_search
+1: .asciz "%s: unidle\n"
+
#ifdef DIAGNOSTIC
NENTRY(switch_error)
pushl $1f
@@ -1427,9 +1663,9 @@ ENTRY(cpu_switch)
pushl %ebx
pushl %esi
pushl %edi
- pushl _C_LABEL(cpl)
+ pushl CPL
- movl _C_LABEL(curproc),%esi
+ GET_CURPROC(%esi,%ecx)
/*
* Clear curproc so that we don't accumulate system time while idle.
@@ -1438,10 +1674,7 @@ ENTRY(cpu_switch)
* below and changes the priority. (See corresponding comment in
* userret()).
*/
- movl $0,_C_LABEL(curproc)
-
- movl $IPL_NONE,_C_LABEL(cpl) # spl0()
- call _C_LABEL(Xspllower) # process pending interrupts
+ CLEAR_CURPROC(%ecx)
switch_search:
/*
@@ -1457,14 +1690,10 @@ switch_search:
*/
/* Wait for new process. */
- cli # splhigh doesn't do a cli
movl _C_LABEL(whichqs),%ecx
-
-sw1: bsfl %ecx,%ebx # find a full q
+ bsfl %ecx,%ebx # find a full q
jz _C_LABEL(idle) # if none, idle
-
leal _C_LABEL(qs)(,%ebx,8),%eax # select q
-
movl P_FORW(%eax),%edi # unlink from front of process q
#ifdef DIAGNOSTIC
cmpl %edi,%eax # linked to self (i.e. nothing queued)?
@@ -1481,8 +1710,7 @@ sw1: bsfl %ecx,%ebx # find a full q
movl %ecx,_C_LABEL(whichqs) # update q status
3: /* We just did it. */
- xorl %eax,%eax
- movl %eax,_C_LABEL(want_resched)
+ CLEAR_RESCHED(%ecx)
#ifdef DIAGNOSTIC
cmpl %eax,P_WCHAN(%edi) # Waiting for something?
@@ -1495,10 +1723,8 @@ sw1: bsfl %ecx,%ebx # find a full q
movl %eax,P_BACK(%edi)
/* Record new process. */
- movl %edi,_C_LABEL(curproc)
-
- /* It's okay to take interrupts here. */
- sti
+ movb $SONPROC,P_STAT(%edi) # p->p_stat = SONPROC
+ SET_CURPROC(%edi,%ecx)
/* Skip context switch if same process. */
cmpl %edi,%esi
@@ -1517,6 +1743,10 @@ sw1: bsfl %ecx,%ebx # find a full q
* %edi - new process
*/
+ pushl %esi
+ call _C_LABEL(pmap_deactivate)
+ addl $4,%esp
+
movl P_ADDR(%esi),%esi
/* Save segment registers. */
@@ -1557,13 +1787,24 @@ switch_exited:
jnz switch_restored
#endif
+ /*
+ * Activate the address space. We're curproc, so %cr3 will
+ * be reloaded, but we're not yet curpcb, so the LDT won't
+ * be reloaded, although the PCB copy of the selector will
+ * be refreshed from the pmap.
+ */
+ pushl %edi
+ call _C_LABEL(pmap_activate)
+ addl $4,%esp
+
/* Load TSS info. */
- movl _C_LABEL(dynamic_gdt),%eax
- movl PCB_TSS_SEL(%esi),%edx
-
- /* Switch address space. */
- movl PCB_CR3(%esi),%ecx
- movl %ecx,%cr3
+#ifdef MULTIPROCESSOR
+ GET_CPUINFO(%ebx)
+ movl CPU_INFO_GDT(%ebx),%eax
+#else
+ movl _C_LABEL(gdt),%eax
+#endif
+ movl P_MD_TSS_SEL(%edi),%edx
/* Switch TSS. */
andl $~0x0200,4-SEL_KPL(%eax,%edx,1)
@@ -1590,51 +1831,87 @@ switch_exited:
switch_restored:
/* Restore cr0 (including FPU state). */
movl PCB_CR0(%esi),%ecx
+#ifdef MULTIPROCESSOR
+ /*
+ * If our floating point registers are on a different CPU,
+ * clear CR0_TS so we'll trap rather than reuse bogus state.
+ */
+ GET_CPUINFO(%ebx)
+ cmpl PCB_FPCPU(%esi),%ebx
+ jz 1f
+ orl $CR0_TS,%ecx
+1:
+#endif
movl %ecx,%cr0
/* Record new pcb. */
- movl %esi,_C_LABEL(curpcb)
+ SET_CURPCB(%esi, %ecx)
/* Interrupts are okay again. */
sti
switch_return:
+#if 0
+ pushl %edi
+ GET_CPUINFO(%ebx)
+ leal CPU_INFO_NAME(%ebx),%ebx
+ pushl %ebx
+ pushl $1f
+ call _C_LABEL(printf)
+ addl $0xc,%esp
+#endif
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+ call _C_LABEL(sched_unlock_idle)
+#endif
/*
* Restore old cpl from stack. Note that this is always an increase,
* due to the spl0() on entry.
*/
- popl _C_LABEL(cpl)
+ popl CPL
movl %edi,%eax # return (p);
popl %edi
popl %esi
popl %ebx
ret
-
+1: .asciz "%s: scheduled %x\n"
/*
* switch_exit(struct proc *p);
- * Switch to proc0's saved context and deallocate the address space and kernel
- * stack for p. Then jump into cpu_switch(), as if we were in proc0 all along.
+ * Switch to the appropriate idle context (proc0's if uniprocessor; the cpu's if
+ * multiprocessor) and deallocate the address space and kernel stack for p.
+ * Then jump into cpu_switch(), as if we were in the idle proc all along.
*/
+#ifndef MULTIPROCESSOR
.globl _C_LABEL(proc0)
+#endif
ENTRY(switch_exit)
movl 4(%esp),%edi # old process
+#ifndef MULTIPROCESSOR
movl $_C_LABEL(proc0),%ebx
+ movl P_ADDR(%ebx),%esi
+ movl P_MD_TSS_SEL(%ebx),%edx
+#else
+ GET_CPUINFO(%ebx)
+ movl CPU_INFO_IDLE_PCB(%ebx),%esi
+ movl CPU_INFO_IDLE_TSS_SEL(%ebx),%edx
+#endif
/* In case we fault... */
- movl $0,_C_LABEL(curproc)
+ CLEAR_CURPROC(%ecx)
- /* Restore proc0's context. */
+ /* Restore the idle context. */
cli
- movl P_ADDR(%ebx),%esi
/* Restore stack pointers. */
movl PCB_ESP(%esi),%esp
movl PCB_EBP(%esi),%ebp
/* Load TSS info. */
- movl _C_LABEL(dynamic_gdt),%eax
- movl PCB_TSS_SEL(%esi),%edx
+#ifdef MULTIPROCESSOR
+ movl CPU_INFO_GDT(%ebx),%eax
+#else
+ movl _C_LABEL(gdt),%eax
+#endif
/* Switch address space. */
movl PCB_CR3(%esi),%ecx
@@ -1656,7 +1933,7 @@ ENTRY(switch_exit)
movl %ecx,%cr0
/* Record new pcb. */
- movl %esi,_C_LABEL(curpcb)
+ SET_CURPCB(%esi, %ecx)
/* Interrupts are okay again. */
sti
@@ -1669,8 +1946,8 @@ ENTRY(switch_exit)
addl $4,%esp
/* Jump into cpu_switch() with the right state. */
- movl %ebx,%esi
- movl $0,_C_LABEL(curproc)
+ xorl %esi,%esi
+ CLEAR_CURPROC(%ecx)
jmp switch_search
/*
@@ -1746,7 +2023,12 @@ IDTVEC(dna)
pushl $0 # dummy error code
pushl $T_DNA
INTRENTRY
- pushl _C_LABEL(curproc)
+#ifdef MULTIPROCESSOR
+ GET_CPUINFO(%eax)
+ pushl %eax
+#else
+ pushl $_C_LABEL(cpu_info_primary)
+#endif
call *_C_LABEL(npxdna_func)
addl $4,%esp
testl %eax,%eax
@@ -1784,6 +2066,16 @@ IDTVEC(page)
TRAP(T_PAGEFLT)
IDTVEC(rsvd)
ZTRAP(T_RESERVED)
+IDTVEC(intrspurious)
+ /*
+ * The Pentium Pro local APIC may erroneously call this vector for a
+ * default IR7. Just ignore it.
+ *
+ * (The local APIC does this when CPL is raised while it's on the
+ * way to delivering an interrupt.. presumably enough has been set
+ * up that it's inconvenient to abort delivery completely..)
+ */
+ iret
IDTVEC(fpu)
#if NNPX > 0
/*
@@ -1794,7 +2086,7 @@ IDTVEC(fpu)
pushl $0 # dummy error code
pushl $T_ASTFLT
INTRENTRY
- pushl _C_LABEL(cpl) # if_ppl in intrframe
+ pushl CPL # if_ppl in intrframe
pushl %esp # push address of intrframe
incl _C_LABEL(uvmexp)+V_TRAP
call _C_LABEL(npxintr)
@@ -1826,12 +2118,12 @@ NENTRY(alltraps)
INTRENTRY
calltrap:
#ifdef DIAGNOSTIC
- movl _C_LABEL(cpl),%ebx
+ movl CPL,%ebx
#endif /* DIAGNOSTIC */
call _C_LABEL(trap)
2: /* Check for ASTs on exit to user mode. */
cli
- cmpb $0,_C_LABEL(astpending)
+ CHECK_ASTPENDING(%ecx)
je 1f
testb $SEL_RPL,TF_CS(%esp)
#ifdef VM86
@@ -1839,15 +2131,15 @@ calltrap:
testl $PSL_VM,TF_EFLAGS(%esp)
#endif
jz 1f
-5: movb $0,_C_LABEL(astpending)
+5: CLEAR_ASTPENDING(%ecx)
sti
movl $T_ASTFLT,TF_TRAPNO(%esp)
call _C_LABEL(trap)
jmp 2b
#ifndef DIAGNOSTIC
1: INTRFASTEXIT
-#else /* DIAGNOSTIC */
-1: cmpl _C_LABEL(cpl),%ebx
+#else
+1: cmpl CPL,%ebx
jne 3f
INTRFASTEXIT
3: sti
@@ -1857,7 +2149,7 @@ calltrap:
#if defined(DDB) && 0
int $3
#endif /* DDB */
- movl %ebx,_C_LABEL(cpl)
+ movl %ebx,CPL
jmp 2b
4: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT\n"
#endif /* DIAGNOSTIC */
@@ -1888,17 +2180,17 @@ syscall1:
call _C_LABEL(syscall)
2: /* Check for ASTs on exit to user mode. */
cli
- cmpb $0,_C_LABEL(astpending)
+ CHECK_ASTPENDING(%ecx)
je 1f
/* Always returning to user mode here. */
- movb $0,_C_LABEL(astpending)
+ CLEAR_ASTPENDING(%ecx)
sti
/* Pushed T_ASTFLT into tf_trapno on entry. */
call _C_LABEL(trap)
jmp 2b
1: INTRFASTEXIT
-#include <i386/isa/vector.s>
+#include <i386/i386/vector.s>
#include <i386/isa/icu.s>
/*
@@ -2039,3 +2331,7 @@ ENTRY(i686_pagezero)
popl %edi
ret
#endif
+
+#if NLAPIC > 0
+#include <i386/i386/apicvec.s>
+#endif
diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c
index 07ac282f32d..2bd03934ac6 100644
--- a/sys/arch/i386/i386/machdep.c
+++ b/sys/arch/i386/i386/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.296 2004/06/09 16:01:48 tedu Exp $ */
+/* $OpenBSD: machdep.c,v 1.297 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */
/*-
@@ -115,6 +115,7 @@
#include <machine/cpu.h>
#include <machine/cpufunc.h>
+#include <machine/cpuvar.h>
#include <machine/gdt.h>
#include <machine/pio.h>
#include <machine/bus.h>
@@ -248,6 +249,9 @@ int p4_model;
int p3_step;
int setperf_prio = 0; /* for concurrent handlers */
+void (*delay_func)(int) = i8254_delay;
+void (*microtime_func)(struct timeval *) = i8254_microtime;
+void (*initclock_func)(void) = i8254_initclocks;
void (*update_cpuspeed)(void) = NULL;
/*
@@ -272,7 +276,7 @@ caddr_t allocsys(caddr_t);
void setup_buffers(vaddr_t *);
void dumpsys(void);
int cpu_dump(void);
-void identifycpu(void);
+void old_identifycpu(void);
void init386(paddr_t);
void consinit(void);
void (*cpuresetfn)(void);
@@ -397,7 +401,8 @@ cpu_startup()
printf("%s", version);
startrtclock();
- identifycpu();
+ /* XXX Merge with identifycpu */
+ old_identifycpu();
printf("real mem = %u (%uK)\n", ctob(physmem), ctob(physmem)/1024);
/*
@@ -458,10 +463,11 @@ cpu_startup()
void
i386_proc0_tss_ldt_init()
{
- struct pcb *pcb;
int x;
+ struct pcb *pcb;
curpcb = pcb = &proc0.p_addr->u_pcb;
+
pcb->pcb_tss.tss_ioopt =
((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16;
for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
@@ -472,14 +478,33 @@ i386_proc0_tss_ldt_init()
pcb->pcb_cr0 = rcr0();
pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
pcb->pcb_tss.tss_esp0 = (int)proc0.p_addr + USPACE - 16;
- tss_alloc(pcb);
+ proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_esp0 - 1;
+ proc0.p_md.md_tss_sel = tss_alloc(pcb);
- ltr(pcb->pcb_tss_sel);
+ ltr(proc0.p_md.md_tss_sel);
lldt(pcb->pcb_ldt_sel);
-
- proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_esp0 - 1;
}
+#ifdef MULTIPROCESSOR
+void
+i386_init_pcb_tss_ldt(struct cpu_info *ci)
+{
+ int x;
+ struct pcb *pcb = ci->ci_idle_pcb;
+
+ pcb->pcb_tss.tss_ioopt =
+ ((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16;
+ for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
+ pcb->pcb_iomap[x] = 0xffffffff;
+ pcb->pcb_iomap_pad = 0xff;
+
+ pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
+ pcb->pcb_cr0 = rcr0();
+ ci->ci_idle_tss_sel = tss_alloc(pcb);
+}
+#endif /* MULTIPROCESSOR */
+
+
/*
* Allocate space for system data structures. We are given
* a starting virtual address and we return a final virtual
@@ -1102,13 +1127,12 @@ winchip_cpu_setup(cpu_device, model, step)
{
#if defined(I586_CPU)
- switch (model) {
+ switch ((curcpu()->ci_signature >> 4) & 15) { /* model */
case 4: /* WinChip C6 */
- cpu_feature &= ~CPUID_TSC;
+ curcpu()->ci_feature_flags &= ~CPUID_TSC;
/* Disable RDTSC instruction from user-level. */
lcr4(rcr4() | CR4_TSD);
-
- printf("%s: TSC disabled\n", cpu_device);
+ printf("%s: TSC disabled\n", curcpu()->ci_dev.dv_xname);
break;
}
#endif
@@ -1212,7 +1236,7 @@ cyrix6x86_cpu_setup(cpu_device, model, step)
#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
extern int clock_broken_latch;
- switch (model) {
+ switch ((curcpu()->ci_signature >> 4) & 15) { /* model */
case -1: /* M1 w/o cpuid */
case 2: /* M1 */
/* set up various cyrix registers */
@@ -1228,11 +1252,13 @@ cyrix6x86_cpu_setup(cpu_device, model, step)
/* disable access to ccr4/ccr5 */
cyrix_write_reg(0xC3, cyrix_read_reg(0xC3) & ~0x10);
- printf("%s: xchg bug workaround performed\n", cpu_device);
+ printf("%s: xchg bug workaround performed\n",
+ curcpu()->ci_dev.dv_xname);
break; /* fallthrough? */
- case 4:
+ case 4: /* GXm */
+ /* Unset the TSC bit until calibrate_delay() gets fixed. */
clock_broken_latch = 1;
- cpu_feature &= ~CPUID_TSC;
+ curcpu()->ci_feature_flags &= ~CPUID_TSC;
printf("%s: TSC disabled\n", cpu_device);
break;
}
@@ -1278,8 +1304,11 @@ intel586_cpu_setup(cpu_device, model, step)
int model, step;
{
#if defined(I586_CPU)
- fix_f00f();
- printf("%s: F00F bug workaround installed\n", cpu_device);
+ if (!cpu_f00f_bug) {
+ fix_f00f();
+ printf("%s: F00F bug workaround installed\n",
+ curcpu()->ci_dev.dv_xname);
+ }
#endif
}
@@ -1365,6 +1394,9 @@ intel686_common_cpu_setup(const char *cpu_device, int model, int step)
void
intel686_cpu_setup(const char *cpu_device, int model, int step)
{
+ struct cpu_info *ci = curcpu();
+ /* XXX SMP int model = (ci->ci_signature >> 4) & 15; */
+ /* XXX SMP int step = ci->ci_signature & 15; */
u_quad_t msr119;
intel686_common_cpu_setup(cpu_device, model, step);
@@ -1374,19 +1406,20 @@ intel686_cpu_setup(const char *cpu_device, int model, int step)
* From Intel Application Note #485.
*/
if ((model == 1) && (step < 3))
- cpu_feature &= ~CPUID_SEP;
+ ci->ci_feature_flags &= ~CPUID_SEP;
/*
* Disable the Pentium3 serial number.
*/
- if ((model == 7) && (cpu_feature & CPUID_SER)) {
+ if ((model == 7) && (ci->ci_feature_flags & CPUID_SER)) {
msr119 = rdmsr(MSR_BBL_CR_CTL);
msr119 |= 0x0000000000200000LL;
wrmsr(MSR_BBL_CR_CTL, msr119);
- printf("%s: disabling processor serial number\n", cpu_device);
- cpu_feature &= ~CPUID_SER;
- cpuid_level = 2;
+ printf("%s: disabling processor serial number\n",
+ ci->ci_dev.dv_xname);
+ ci->ci_feature_flags &= ~CPUID_SER;
+ ci->ci_level = 2;
}
#if !defined(SMALL_KERNEL) && defined(I686_CPU)
@@ -1474,6 +1507,249 @@ cyrix3_cpu_name(model, step)
return name;
}
+/* XXXSMP: must be shared with UP */
+#ifdef MULTIPROCESSOR
+/*
+ * Print identification for the given CPU.
+ * XXX XXX
+ * This is not as clean as one might like, because it references
+ *
+ * the "cpuid_level" and "cpu_vendor" globals.
+ * cpuid_level isn't so bad, since both CPU's will hopefully
+ * be of the same level.
+ *
+ * The Intel multiprocessor spec doesn't give us the cpu_vendor
+ * information; however, the chance of multi-vendor SMP actually
+ * ever *working* is sufficiently low that it's probably safe to assume
+ * all processors are of the same vendor.
+ */
+
+void
+identifycpu(struct cpu_info *ci)
+{
+ extern char cpu_vendor[];
+#ifdef CPUDEBUG
+ extern int cpu_cache_eax, cpu_cache_ebx, cpu_cache_ecx, cpu_cache_edx;
+#else
+ extern int cpu_cache_edx;
+#endif
+ const char *name, *modifier, *vendorname, *token;
+ int class = CPUCLASS_386, vendor, i, max;
+ int family, model, step, modif, cachesize;
+ const struct cpu_cpuid_nameclass *cpup = NULL;
+
+ char *cpu_device = ci->ci_dev.dv_xname;
+ /* XXX SMP XXX void (*cpu_setup)(const char *, int, int); */
+
+ if (cpuid_level == -1) {
+#ifdef DIAGNOSTIC
+ if (cpu < 0 || cpu >=
+ (sizeof i386_nocpuid_cpus/sizeof(struct cpu_nocpuid_nameclass)))
+ panic("unknown cpu type %d", cpu);
+#endif
+ name = i386_nocpuid_cpus[cpu].cpu_name;
+ vendor = i386_nocpuid_cpus[cpu].cpu_vendor;
+ vendorname = i386_nocpuid_cpus[cpu].cpu_vendorname;
+ model = -1;
+ step = -1;
+ class = i386_nocpuid_cpus[cpu].cpu_class;
+ ci->cpu_setup = i386_nocpuid_cpus[cpu].cpu_setup;
+ modifier = "";
+ token = "";
+ } else {
+ max = sizeof (i386_cpuid_cpus) / sizeof (i386_cpuid_cpus[0]);
+ modif = (ci->ci_signature >> 12) & 3;
+ family = (ci->ci_signature >> 8) & 15;
+ if (family < CPU_MINFAMILY)
+ panic("identifycpu: strange family value");
+ model = (ci->ci_signature >> 4) & 15;
+ step = ci->ci_signature & 15;
+#ifdef CPUDEBUG
+ printf("%s: family %x model %x step %x\n", cpu_device, family,
+ model, step);
+ printf("%s: cpuid level %d cache eax %x ebx %x ecx %x edx %x\n",
+ cpu_device, cpuid_level, cpu_cache_eax, cpu_cache_ebx,
+ cpu_cache_ecx, cpu_cache_edx);
+#endif
+
+ for (i = 0; i < max; i++) {
+ if (!strncmp(cpu_vendor,
+ i386_cpuid_cpus[i].cpu_id, 12)) {
+ cpup = &i386_cpuid_cpus[i];
+ break;
+ }
+ }
+
+ if (cpup == NULL) {
+ vendor = CPUVENDOR_UNKNOWN;
+ if (cpu_vendor[0] != '\0')
+ vendorname = &cpu_vendor[0];
+ else
+ vendorname = "Unknown";
+ if (family > CPU_MAXFAMILY)
+ family = CPU_MAXFAMILY;
+ class = family - 3;
+ modifier = "";
+ name = "";
+ token = "";
+ ci->cpu_setup = NULL;
+ } else {
+ token = cpup->cpu_id;
+ vendor = cpup->cpu_vendor;
+ vendorname = cpup->cpu_vendorname;
+ modifier = modifiers[modif];
+ if (family > CPU_MAXFAMILY) {
+ family = CPU_MAXFAMILY;
+ model = CPU_DEFMODEL;
+ } else if (model > CPU_MAXMODEL)
+ model = CPU_DEFMODEL;
+ i = family - CPU_MINFAMILY;
+
+ /* Special hack for the PentiumII/III series. */
+ if (vendor == CPUVENDOR_INTEL && family == 6 &&
+ (model == 5 || model == 7)) {
+ name = intel686_cpu_name(model);
+ } else
+ name = cpup->cpu_family[i].cpu_models[model];
+ if (name == NULL)
+ name = cpup->cpu_family[i].cpu_models[CPU_DEFMODEL];
+ class = cpup->cpu_family[i].cpu_class;
+ ci->cpu_setup = cpup->cpu_family[i].cpu_setup;
+ }
+ }
+
+ /* Find the amount of on-chip L2 cache. Add support for AMD K6-3...*/
+ cachesize = -1;
+ if (vendor == CPUVENDOR_INTEL && cpuid_level >= 2 && family < 0xf) {
+ int intel_cachetable[] = { 0, 128, 256, 512, 1024, 2048 };
+ if ((cpu_cache_edx & 0xFF) >= 0x40 &&
+ (cpu_cache_edx & 0xFF) <= 0x45)
+ cachesize = intel_cachetable[(cpu_cache_edx & 0xFF) - 0x40];
+ }
+
+ if ((ci->ci_flags & CPUF_BSP) == 0) {
+ if (cachesize > -1) {
+ snprintf(cpu_model, sizeof(cpu_model),
+ "%s %s%s (%s%s%s%s-class, %dKB L2 cache)",
+ vendorname, modifier, name,
+ ((*token) ? "\"" : ""), ((*token) ? token : ""),
+ ((*token) ? "\" " : ""), classnames[class], cachesize);
+ } else {
+ snprintf(cpu_model, sizeof(cpu_model),
+ "%s %s%s (%s%s%s%s-class)",
+ vendorname, modifier, name,
+ ((*token) ? "\"" : ""), ((*token) ? token : ""),
+ ((*token) ? "\" " : ""), classnames[class]);
+ }
+
+ printf("%s: %s", cpu_device, cpu_model);
+ }
+
+#if defined(I586_CPU) || defined(I686_CPU)
+ if (ci->ci_feature_flags && (ci->ci_feature_flags & CPUID_TSC)) {
+ /* Has TSC */
+ calibrate_cyclecounter();
+ if (pentium_mhz > 994) {
+ int ghz, fr;
+
+ ghz = (pentium_mhz + 9) / 1000;
+ fr = ((pentium_mhz + 9) / 10 ) % 100;
+ if ((ci->ci_flags & CPUF_BSP) == 0) {
+ if (fr)
+ printf(" %d.%02d GHz", ghz, fr);
+ else
+ printf(" %d GHz", ghz);
+ }
+ } else {
+ if ((ci->ci_flags & CPUF_BSP) == 0) {
+ printf(" %d MHz", pentium_mhz);
+ }
+ }
+ }
+#endif
+ if ((ci->ci_flags & CPUF_BSP) == 0) {
+ printf("\n");
+
+ if (ci->ci_feature_flags) {
+ int numbits = 0;
+
+ printf("%s: ", cpu_device);
+ max = sizeof(i386_cpuid_features)
+ / sizeof(i386_cpuid_features[0]);
+ for (i = 0; i < max; i++) {
+ if (ci->ci_feature_flags &
+ i386_cpuid_features[i].feature_bit) {
+ printf("%s%s", (numbits == 0 ? "" : ","),
+ i386_cpuid_features[i].feature_name);
+ numbits++;
+ }
+ }
+ printf("\n");
+ }
+ }
+
+ cpu_class = class;
+ ci->cpu_class = class;
+
+ /*
+ * Now that we have told the user what they have,
+ * let them know if that machine type isn't configured.
+ */
+ switch (cpu_class) {
+#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU)
+#error No CPU classes configured.
+#endif
+#ifndef I686_CPU
+ case CPUCLASS_686:
+ printf("NOTICE: this kernel does not support Pentium Pro CPU class\n");
+#ifdef I586_CPU
+ printf("NOTICE: lowering CPU class to i586\n");
+ cpu_class = CPUCLASS_586;
+ break;
+#endif
+#endif
+#ifndef I586_CPU
+ case CPUCLASS_586:
+ printf("NOTICE: this kernel does not support Pentium CPU class\n");
+#ifdef I486_CPU
+ printf("NOTICE: lowering CPU class to i486\n");
+ cpu_class = CPUCLASS_486;
+ break;
+#endif
+#endif
+#ifndef I486_CPU
+ case CPUCLASS_486:
+ printf("NOTICE: this kernel does not support i486 CPU class\n");
+#ifdef I386_CPU
+ printf("NOTICE: lowering CPU class to i386\n");
+ cpu_class = CPUCLASS_386;
+ break;
+#endif
+#endif
+#ifndef I386_CPU
+ case CPUCLASS_386:
+ printf("NOTICE: this kernel does not support i386 CPU class\n");
+ panic("no appropriate CPU class available");
+#endif
+ default:
+ break;
+ }
+
+ if (cpu == CPU_486DLC) {
+#ifndef CYRIX_CACHE_WORKS
+ printf("WARNING: CYRIX 486DLC CACHE UNCHANGED.\n");
+#else
+#ifndef CYRIX_CACHE_REALLY_WORKS
+ printf("WARNING: CYRIX 486DLC CACHE ENABLED IN HOLD-FLUSH MODE.\n");
+#else
+ printf("WARNING: CYRIX 486DLC CACHE ENABLED.\n");
+#endif
+#endif
+ }
+
+}
+#endif /* MULTIPROCESSOR */
+
char *
tm86_cpu_name(model)
int model;
@@ -1495,7 +1771,7 @@ tm86_cpu_name(model)
}
void
-identifycpu()
+old_identifycpu()
{
extern char cpu_vendor[];
extern char cpu_brandstr[];
@@ -2402,8 +2678,8 @@ setregs(p, pack, stack, retval)
#if NNPX > 0
/* If we were using the FPU, forget about it. */
- if (npxproc == p)
- npxdrop();
+ if (pcb->pcb_fpcpu != NULL)
+ npxsave_proc(p, 0);
#endif
#ifdef USER_LDT
@@ -2437,7 +2713,6 @@ setregs(p, pack, stack, retval)
* Initialize segments and descriptor tables
*/
-union descriptor gdt[NGDT];
union descriptor ldt[NLDT];
struct gate_descriptor idt_region[NIDT];
struct gate_descriptor *idt = idt_region;
@@ -2462,6 +2737,20 @@ setgate(gd, func, args, type, dpl, seg)
}
void
+unsetgate(gd)
+ struct gate_descriptor *gd;
+{
+ gd->gd_p = 0;
+ gd->gd_hioffset = 0;
+ gd->gd_looffset = 0;
+ gd->gd_selector = 0;
+ gd->gd_xx = 0;
+ gd->gd_stkcpy = 0;
+ gd->gd_type = 0;
+ gd->gd_dpl = 0;
+}
+
+void
setregion(rd, base, limit)
struct region_descriptor *rd;
void *base;
@@ -2521,8 +2810,8 @@ fix_f00f(void)
idt = p;
/* Fix up paging redirect */
- setgate(&idt[ 14], &IDTVEC(f00f_redirect), 0, SDT_SYS386TGT,
- SEL_KPL, GCODE_SEL);
+ setgate(&idt[ 14], &IDTVEC(f00f_redirect), 0, SDT_SYS386TGT, SEL_KPL,
+ GCODE_SEL);
/* Map first page RO */
pte = PTE_BASE + i386_btop(va);
@@ -2537,6 +2826,16 @@ fix_f00f(void)
}
#endif
+#ifdef MULTIPROCESSOR
+void
+cpu_init_idt()
+{
+ struct region_descriptor region;
+ setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
+ lidt(&region);
+}
+#endif /* MULTIPROCESSOR */
+
void
init386(paddr_t first_avail)
{
@@ -2565,18 +2864,20 @@ init386(paddr_t first_avail)
(caddr_t)iomem_ex_storage, sizeof(iomem_ex_storage),
EX_NOCOALESCE|EX_NOWAIT);
- /* make gdt gates and memory segments */
+ /* make bootstrap gdt gates and memory segments */
setsegment(&gdt[GCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1);
setsegment(&gdt[GICODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1);
setsegment(&gdt[GDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 1);
- setsegment(&gdt[GLDT_SEL].sd, ldt, sizeof(ldt) - 1, SDT_SYSLDT, SEL_KPL,
- 0, 0);
+ setsegment(&gdt[GLDT_SEL].sd, ldt, sizeof(ldt) - 1, SDT_SYSLDT,
+ SEL_KPL, 0, 0);
setsegment(&gdt[GUCODE1_SEL].sd, 0, i386_btop(VM_MAXUSER_ADDRESS) - 1,
SDT_MEMERA, SEL_UPL, 1, 1);
setsegment(&gdt[GUCODE_SEL].sd, 0, i386_btop(I386_MAX_EXE_ADDR) - 1,
SDT_MEMERA, SEL_UPL, 1, 1);
setsegment(&gdt[GUDATA_SEL].sd, 0, i386_btop(VM_MAXUSER_ADDRESS) - 1,
SDT_MEMRWA, SEL_UPL, 1, 1);
+ setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary,
+ sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 1, 1);
/* make ldt gates and memory segments */
setgate(&ldt[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1, SDT_SYS386CGT,
@@ -2606,11 +2907,13 @@ init386(paddr_t first_avail)
setgate(&idt[ 16], &IDTVEC(fpu), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL);
setgate(&idt[ 17], &IDTVEC(align), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL);
setgate(&idt[ 18], &IDTVEC(rsvd), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL);
- for (i = 19; i < NIDT; i++)
+ for (i = 19; i < NRSVIDT; i++)
setgate(&idt[i], &IDTVEC(rsvd), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL);
+ for (i = NRSVIDT; i < NIDT; i++)
+ unsetgate(&idt[i]);
setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386TGT, SEL_UPL, GCODE_SEL);
- setregion(&region, gdt, sizeof(gdt) - 1);
+ setregion(&region, gdt, NGDT * sizeof(union descriptor) - 1);
lgdt(&region);
setregion(&region, idt, sizeof(idt_region) - 1);
lidt(&region);
@@ -2661,6 +2964,14 @@ init386(paddr_t first_avail)
panic("no BIOS memory map supplied");
#endif
+#if defined(MULTIPROCESSOR)
+ /* install the page after boot args as PT page for first 4M */
+ pmap_enter(pmap_kernel(), (u_long)vtopte(0),
+ i386_round_page(bootargv + bootargc), VM_PROT_READ|VM_PROT_WRITE,
+ VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED);
+ memset(vtopte(0), 0, NBPG); /* make sure it is clean before using */
+#endif
+
/*
* account all the memory passed in the map from /boot
* calculate avail_end and count the physmem.
@@ -2677,8 +2988,8 @@ init386(paddr_t first_avail)
a = i386_round_page(im->addr);
e = i386_trunc_page(im->addr + im->size);
/* skip first four pages */
- if (a < 4 * NBPG)
- a = 4 * NBPG;
+ if (a < 5 * NBPG)
+ a = 5 * NBPG;
#ifdef DEBUG
printf(" %u-%u", a, e);
#endif
@@ -2937,6 +3248,48 @@ cpu_reset()
for (;;);
}
+void
+cpu_initclocks()
+{
+ (*initclock_func)();
+}
+
+void
+need_resched(struct cpu_info *ci)
+{
+ ci->ci_want_resched = 1;
+ ci->ci_astpending = 1;
+}
+
+#ifdef MULTIPROCESSOR
+/* Allocate an IDT vector slot within the given range.
+ * XXX needs locking to avoid MP allocation races.
+ */
+
+int
+idt_vec_alloc(int low, int high)
+{
+ int vec;
+
+ for (vec = low; vec <= high; vec++)
+ if (idt[vec].gd_p == 0)
+ return (vec);
+ return (0);
+}
+
+void
+idt_vec_set(int vec, void (*function)(void))
+{
+ setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
+}
+
+void
+idt_vec_free(int vec)
+{
+ unsetgate(&idt[vec]);
+}
+#endif /* MULTIPROCESSOR */
+
/*
* machine dependent system variables.
*/
@@ -2998,7 +3351,7 @@ cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
case CPU_CPUID:
return (sysctl_rdint(oldp, oldlenp, newp, cpu_id));
case CPU_CPUFEATURE:
- return (sysctl_rdint(oldp, oldlenp, newp, cpu_feature));
+ return (sysctl_rdint(oldp, oldlenp, newp, curcpu()->ci_feature_flags));
#if NAPM > 0
case CPU_APMWARN:
return (sysctl_int(oldp, oldlenp, newp, newlen, &cpu_apmwarn));
@@ -3204,6 +3557,9 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp)
vaddr_t va;
pt_entry_t *pte;
bus_size_t map_size;
+#ifdef MULTIPROCESSOR
+ u_int32_t cpumask = 0;
+#endif
pa = i386_trunc_page(bpa);
endpa = i386_round_page(bpa + size);
@@ -3236,9 +3592,17 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp)
*pte &= ~PG_N;
else
*pte |= PG_N;
+#ifdef MULTIPROCESSOR
+ pmap_tlb_shootdown(pmap_kernel(), va, *pte,
+ &cpumask);
+#else
pmap_update_pg(va);
+#endif
}
}
+#ifdef MULTIPROCESSOR
+ pmap_tlb_shootnow(cpumask);
+#endif
pmap_update(pmap_kernel());
return 0;
@@ -3945,9 +4309,52 @@ _bus_dmamem_alloc_range(t, size, alignment, boundary, segs, nsegs, rsegs,
void
splassert_check(int wantipl, const char *func)
{
- if (cpl < wantipl) {
- splassert_fail(wantipl, cpl, func);
- }
+ if (lapic_tpr < wantipl)
+ splassert_fail(wantipl, lapic_tpr, func);
+}
+#endif
+
+#ifdef MULTIPROCESSOR
+void
+i386_intlock(struct intrframe iframe)
+{
+ if (iframe.if_ppl < IPL_SCHED)
+#ifdef notdef
+ spinlockmgr(&kernel_lock, LK_EXCLUSIVE|LK_CANRECURSE, 0);
+#else
+ __mp_lock(&kernel_lock);
+#endif
+}
+
+void
+i386_intunlock(struct intrframe iframe)
+{
+ if (iframe.if_ppl < IPL_SCHED)
+#ifdef notdef
+ spinlockmgr(&kernel_lock, LK_RELEASE, 0);
+#else
+ __mp_unlock(&kernel_lock);
+#endif
+}
+
+void
+i386_softintlock(void)
+{
+#ifdef notdef
+ spinlockmgr(&kernel_lock, LK_EXCLUSIVE|LK_CANRECURSE, 0);
+#else
+ __mp_lock(&kernel_lock);
+#endif
+}
+
+void
+i386_softintunlock(void)
+{
+#ifdef notdef
+ spinlockmgr(&kernel_lock, LK_RELEASE, 0);
+#else
+ __mp_unlock(&kernel_lock);
+#endif
}
#endif
@@ -3957,11 +4364,15 @@ splassert_check(int wantipl, const char *func)
* We hand-code this to ensure that it's atomic.
*/
void
-softintr(mask)
- int mask;
+softintr(sir, vec)
+ int sir;
+ int vec;
{
- __asm __volatile("orl %1, %0" : "=m"(ipending) : "ir" (mask));
-
+ __asm __volatile("orl %1, %0" : "=m" (ipending) : "ir" (sir));
+#ifdef MULTIPROCESSOR
+ i82489_writereg(LAPIC_ICRLO,
+ vec | LAPIC_DLMODE_FIXED | LAPIC_LVL_ASSERT | LAPIC_DEST_SELF);
+#endif
}
/*
@@ -3971,10 +4382,10 @@ int
splraise(ncpl)
int ncpl;
{
- int ocpl = cpl;
+ int ocpl = lapic_tpr;
if (ncpl > ocpl)
- cpl = ncpl;
+ lapic_tpr = ncpl;
return (ocpl);
}
@@ -3986,7 +4397,7 @@ void
splx(ncpl)
int ncpl;
{
- cpl = ncpl;
+ lapic_tpr = ncpl;
if (ipending & IUNMASK(ncpl))
Xspllower();
}
@@ -3999,8 +4410,9 @@ int
spllower(ncpl)
int ncpl;
{
- int ocpl = cpl;
+ int ocpl = lapic_tpr;
splx(ncpl);
return (ocpl);
}
+
diff --git a/sys/arch/i386/i386/mainbus.c b/sys/arch/i386/i386/mainbus.c
index 47577e0a0b9..c242f14e5da 100644
--- a/sys/arch/i386/i386/mainbus.c
+++ b/sys/arch/i386/i386/mainbus.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: mainbus.c,v 1.15 2002/03/14 01:26:32 millert Exp $ */
+/* $OpenBSD: mainbus.c,v 1.16 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: mainbus.c,v 1.21 1997/06/06 23:14:20 thorpej Exp $ */
/*
@@ -49,11 +49,22 @@
#include "isa.h"
#include "apm.h"
#include "bios.h"
+#include "mpbios.h"
+
+#include <machine/cpuvar.h>
+#include <machine/i82093var.h>
+#include <machine/mpbiosvar.h>
#if NBIOS > 0
#include <machine/biosvar.h>
#endif
+#if 0
+#ifdef SMP /* XXX MULTIPROCESSOR */
+#include <machine/mp.h>
+#endif
+#endif
+
int mainbus_match(struct device *, void *, void *);
void mainbus_attach(struct device *, struct device *, void *);
@@ -75,6 +86,8 @@ union mainbus_attach_args {
#if NBIOS > 0
struct bios_attach_args mba_bios;
#endif
+ struct cpu_attach_args mba_caa;
+ struct apic_attach_args aaa_caa;
};
/*
@@ -104,6 +117,7 @@ mainbus_attach(parent, self, aux)
void *aux;
{
union mainbus_attach_args mba;
+ extern int cpu_id, cpu_feature;
printf("\n");
@@ -116,6 +130,40 @@ mainbus_attach(parent, self, aux)
}
#endif
+#if NMPBIOS > 0
+ if (mpbios_probe(self))
+ mpbios_scan(self);
+ else
+#endif
+ {
+ struct cpu_attach_args caa;
+
+ memset(&caa, 0, sizeof(caa));
+ caa.caa_name = "cpu";
+ caa.cpu_number = 0;
+ caa.cpu_role = CPU_ROLE_SP;
+ caa.cpu_func = 0;
+ caa.cpu_signature = cpu_id;
+ caa.feature_flags = cpu_feature;
+
+ config_found(self, &caa, mainbus_print);
+ }
+
+#if 0
+#ifdef SMP
+ if (bios_smpinfo != NULL) {
+ struct mp_float *mp = bios_smpinfo;
+
+ printf("%s: MP 1.%d configuration %d\n", self->dv_xname,
+ mp->revision, mp->feature1);
+ }
+#ifdef CPU_DEBUG
+ else
+ printf ("%s: No MP configuration found.", self->dv_xname);
+#endif
+#endif
+#endif
+
/*
* XXX Note also that the presence of a PCI bus should
* XXX _always_ be checked, and if present the bus should be
diff --git a/sys/arch/i386/i386/microtime.s b/sys/arch/i386/i386/microtime.s
index 7523e48b73a..ac1b1742cb4 100644
--- a/sys/arch/i386/i386/microtime.s
+++ b/sys/arch/i386/i386/microtime.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: microtime.s,v 1.19 2003/06/04 16:36:14 deraadt Exp $ */
+/* $OpenBSD: microtime.s,v 1.20 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: microtime.s,v 1.16 1995/04/17 12:06:47 cgd Exp $ */
/*-
@@ -42,7 +42,7 @@
* overridden (i.e. it is 100Hz).
*/
#ifndef HZ
-ENTRY(microtime)
+ENTRY(i8254_microtime)
#if defined(I586_CPU) || defined(I686_CPU)
movl _C_LABEL(pentium_mhz), %ecx
diff --git a/sys/arch/i386/i386/mpbios.c b/sys/arch/i386/i386/mpbios.c
new file mode 100644
index 00000000000..689b9ace3f5
--- /dev/null
+++ b/sys/arch/i386/i386/mpbios.c
@@ -0,0 +1,1105 @@
+/* $OpenBSD: mpbios.c,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
+/* $NetBSD: mpbios.c,v 1.2 2002/10/01 12:56:57 fvdl Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999 Stefan Grefen
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Derived from FreeBSD's mp_machdep.c
+ */
+/*
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The Intel MP-stuff is just one way of x86 SMP systems
+ * so only Intel MP specific stuff is here.
+ */
+
+#include "mpbios.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/specialreg.h>
+#include <machine/cputypes.h>
+#include <machine/cpuvar.h>
+#include <machine/bus.h>
+#include <machine/mpbiosreg.h>
+#include <machine/mpbiosvar.h>
+
+#include <machine/i82093reg.h>
+#include <machine/i82093var.h>
+#include <machine/i82489reg.h>
+#include <machine/i82489var.h>
+#include <dev/isa/isareg.h>
+
+#include <dev/eisa/eisavar.h> /* for ELCR* def'ns */
+
+
+static struct mpbios_ioapic default_ioapic = {
+ 2, 0, 1, IOAPICENTRY_FLAG_EN, (caddr_t)IOAPIC_BASE_DEFAULT
+};
+
+/* descriptions of MP basetable entries */
+struct mpbios_baseentry {
+ u_int8_t type;
+ u_int8_t length;
+ u_int16_t count;
+ const char *name;
+};
+
+static const char *loc_where[] = {
+ "extended bios data area",
+ "last page of base memory",
+ "bios"
+};
+
+struct mp_map
+{
+ vaddr_t baseva;
+ int vsize;
+ paddr_t pa;
+ paddr_t pg;
+ int psize;
+};
+
+int mp_print(void *, const char *);
+int mp_match(struct device *, void *, void *);
+int mpbios_cpu_start(struct cpu_info *);
+const void *mpbios_search(struct device *, paddr_t, int,
+ struct mp_map *);
+static __inline int mpbios_cksum(const void *, int);
+
+void mp_cfg_special_intr(const struct mpbios_int *, u_int32_t *);
+void mp_cfg_pci_intr(const struct mpbios_int *, u_int32_t *);
+void mp_cfg_eisa_intr(const struct mpbios_int *, u_int32_t *);
+void mp_cfg_isa_intr(const struct mpbios_int *, u_int32_t *);
+void mp_print_special_intr (int);
+void mp_print_pci_intr (int);
+void mp_print_eisa_intr (int);
+void mp_print_isa_intr (int);
+
+void mpbios_cpu(const u_int8_t *, struct device *);
+void mpbios_bus(const u_int8_t *, struct device *);
+void mpbios_ioapic(const u_int8_t *, struct device *);
+void mpbios_int(const u_int8_t *, int, struct mp_intr_map *);
+
+const void *mpbios_map(paddr_t, int, struct mp_map *);
+static __inline void mpbios_unmap(struct mp_map *);
+
+/*
+ * globals to help us bounce our way through parsing the config table.
+ */
+
+static struct mp_map mp_cfg_table_map;
+static struct mp_map mp_fp_map;
+const struct mpbios_cth *mp_cth;
+const struct mpbios_fps *mp_fps;
+
+#ifdef MPVERBOSE
+int mp_verbose = 1;
+#else
+int mp_verbose = 0;
+#endif
+
+int
+mp_print(aux, pnp)
+ void *aux;
+ const char *pnp;
+{
+ struct cpu_attach_args * caa = (struct cpu_attach_args *) aux;
+ if (pnp)
+ printf("%s at %s:", caa->caa_name, pnp);
+ return (UNCONF);
+}
+
+int
+mp_match(parent, cfv, aux)
+ struct device *parent;
+ void *cfv;
+ void *aux;
+{
+ struct cfdata *cf = (struct cfdata *)cfv;
+ struct cpu_attach_args * caa = (struct cpu_attach_args *) aux;
+ if (strcmp(caa->caa_name, cf->cf_driver->cd_name))
+ return 0;
+
+ return ((*cf->cf_attach->ca_match)(parent, cf, aux));
+}
+
+/*
+ * Map a chunk of memory read-only and return an appropriately
+ * const'ed pointer.
+ */
+const void *
+mpbios_map(pa, len, handle)
+ paddr_t pa;
+ int len;
+ struct mp_map *handle;
+{
+ paddr_t pgpa = i386_trunc_page(pa);
+ paddr_t endpa = i386_round_page(pa + len);
+ vaddr_t va = uvm_km_valloc(kernel_map, endpa - pgpa);
+ vaddr_t retva = va + (pa & PGOFSET);
+
+ handle->pa = pa;
+ handle->pg = pgpa;
+ handle->psize = len;
+ handle->baseva = va;
+ handle->vsize = endpa-pgpa;
+
+ do {
+#if 1
+ pmap_kenter_pa(va, pgpa, VM_PROT_READ);
+#else
+ pmap_enter(pmap_kernel(), va, pgpa, VM_PROT_READ, TRUE,
+ VM_PROT_READ);
+#endif
+ va += NBPG;
+ pgpa += NBPG;
+ } while (pgpa < endpa);
+
+ return ((const void *)retva);
+}
+
+static __inline void
+mpbios_unmap(handle)
+ struct mp_map *handle;
+{
+#if 1
+ pmap_kremove(handle->baseva, handle->vsize);
+#else
+ pmap_extract(pmap_kernel(), handle->baseva, NULL);
+#endif
+ uvm_km_free(kernel_map, handle->baseva, handle->vsize);
+}
+
+/*
+ * Look for an Intel MP spec table, indicating SMP capable hardware.
+ */
+int
+mpbios_probe(self)
+ struct device *self;
+{
+ paddr_t ebda, memtop;
+
+ paddr_t cthpa;
+ int cthlen;
+ const u_int8_t *mpbios_page;
+ int scan_loc;
+
+ struct mp_map t;
+
+ /* see if EBDA exists */
+
+ mpbios_page = mpbios_map(0, NBPG, &t);
+
+ /* XXX Ugly magic constants below. */
+ ebda = *(const u_int16_t *)(&mpbios_page[0x40e]);
+ ebda <<= 4;
+
+ memtop = *(const u_int16_t *)(&mpbios_page[0x413]);
+ memtop <<= 10;
+
+ mpbios_page = NULL;
+ mpbios_unmap(&t);
+
+ scan_loc = 0;
+
+ if (ebda && ebda < IOM_BEGIN ) {
+ mp_fps = mpbios_search(self, ebda, 1024, &mp_fp_map);
+ if (mp_fps != NULL)
+ goto found;
+ }
+
+ scan_loc = 1;
+
+ if (memtop && memtop <= IOM_BEGIN ) {
+ mp_fps = mpbios_search(self, memtop - 1024, 1024, &mp_fp_map);
+ if (mp_fps != NULL)
+ goto found;
+ }
+
+ scan_loc = 2;
+
+ mp_fps = mpbios_search(self, BIOS_BASE, BIOS_COUNT, &mp_fp_map);
+ if (mp_fps != NULL)
+ goto found;
+
+ /* nothing found */
+ return (0);
+
+ found:
+ if (mp_verbose)
+ printf("%s: MP floating pointer found in %s at 0x%lx\n",
+ self->dv_xname, loc_where[scan_loc], mp_fp_map.pa);
+
+ if (mp_fps->pap == 0) {
+ if (mp_fps->mpfb1 == 0) {
+ printf("%s: MP fps invalid: "
+ "no default config and no configuration table\n",
+ self->dv_xname);
+
+ goto err;
+ }
+ printf("%s: MP default configuration %d\n",
+ self->dv_xname, mp_fps->mpfb1);
+ return (10);
+ }
+
+ cthpa = mp_fps->pap;
+
+ mp_cth = mpbios_map(cthpa, sizeof (*mp_cth), &mp_cfg_table_map);
+ cthlen = mp_cth->base_len;
+ mpbios_unmap(&mp_cfg_table_map);
+
+ mp_cth = mpbios_map(cthpa, cthlen, &mp_cfg_table_map);
+
+ if (mp_verbose)
+ printf("%s: MP config table at 0x%lx, %d bytes long\n",
+ self->dv_xname, cthpa, cthlen);
+
+ if (mp_cth->signature != MP_CT_SIG) {
+ printf("%s: MP signature mismatch (%x vs %x)\n",
+ self->dv_xname,
+ MP_CT_SIG, mp_cth->signature);
+ goto err;
+ }
+
+ if (mpbios_cksum(mp_cth, cthlen)) {
+ printf ("%s: MP Configuration Table checksum mismatch\n",
+ self->dv_xname);
+ goto err;
+ }
+ return (10);
+
+ err:
+ if (mp_fps) {
+ mp_fps = NULL;
+ mpbios_unmap(&mp_fp_map);
+ }
+ if (mp_cth) {
+ mp_cth = NULL;
+ mpbios_unmap(&mp_cfg_table_map);
+ }
+ return (0);
+}
+
+
+/*
+ * Simple byte checksum used on config tables.
+ */
+
+static __inline int
+mpbios_cksum (start, len)
+ const void *start;
+ int len;
+{
+ unsigned char res=0;
+ const char *p = start;
+ const char *end = p + len;
+
+ while (p < end)
+ res += *p++;
+
+ return res;
+}
+
+
+/*
+ * Look for the MP floating pointer signature in the given physical
+ * address range.
+ *
+ * We map the memory, scan through it, and unmap it.
+ * If we find it, remap the floating pointer structure and return it.
+ */
+
+const void *
+mpbios_search (self, start, count, map)
+ struct device *self;
+ paddr_t start;
+ int count;
+ struct mp_map *map;
+{
+ struct mp_map t;
+
+ int i, len;
+ const struct mpbios_fps *m;
+ int end = count - sizeof(*m);
+ const u_int8_t *base = mpbios_map(start, count, &t);
+
+ if (mp_verbose)
+ printf("%s: scanning 0x%lx to 0x%lx for MP signature\n",
+ self->dv_xname, start, start + count - sizeof(*m));
+
+ for (i = 0; i <= end; i += 4) {
+ m = (struct mpbios_fps *)&base[i];
+
+ if ((m->signature == MP_FP_SIG) &&
+ ((len = m->length << 4) != 0) &&
+ mpbios_cksum(m, (m->length << 4)) == 0) {
+ mpbios_unmap(&t);
+
+ return (mpbios_map(start + i, len, map));
+ }
+ }
+ mpbios_unmap(&t);
+
+ return (0);
+}
+
+/*
+ * MP configuration table parsing.
+ */
+
+static struct mpbios_baseentry mp_conf[] =
+{
+ {0, 20, 0, "cpu"},
+ {1, 8, 0, "bus"},
+ {2, 8, 0, "ioapic"},
+ {3, 8, 0, "ioint"},
+ {4, 8, 0, "lint"},
+};
+
+struct mp_bus *mp_busses;
+int mp_nbus;
+struct mp_intr_map *mp_intrs;
+
+struct mp_intr_map *lapic_ints[2]; /* XXX */
+int mp_isa_bus = -1; /* XXX */
+int mp_eisa_bus = -1; /* XXX */
+
+static struct mp_bus extint_bus = {
+ "ExtINT",
+ -1,
+ mp_print_special_intr,
+ mp_cfg_special_intr,
+ 0
+};
+static struct mp_bus smi_bus = {
+ "SMI",
+ -1,
+ mp_print_special_intr,
+ mp_cfg_special_intr,
+ 0
+};
+static struct mp_bus nmi_bus = {
+ "NMI",
+ -1,
+ mp_print_special_intr,
+ mp_cfg_special_intr,
+ 0
+};
+
+
+/*
+ * 1st pass on BIOS's Intel MP specification table.
+ *
+ * initializes:
+ * mp_ncpus = 1
+ *
+ * determines:
+ * cpu_apic_address (common to all CPUs)
+ * ioapic_address[N]
+ * mp_naps
+ * mp_nbusses
+ * mp_napics
+ * nintrs
+ */
+void
+mpbios_scan(self)
+ struct device *self;
+{
+ const u_int8_t *position, *end;
+ int count;
+ int type;
+ int intr_cnt, cur_intr;
+ paddr_t lapic_base;
+
+ printf("%s: Intel MP Specification ", self->dv_xname);
+
+ switch (mp_fps->spec_rev) {
+ case 1:
+ printf("(Version 1.1)");
+ break;
+ case 4:
+ printf("(Version 1.4)");
+ break;
+ default:
+ printf("(unrecognized rev %d)", mp_fps->spec_rev);
+ }
+
+ /*
+ * looks like we've got a MP system. start setting up
+ * infrastructure..
+ * XXX is this the right place??
+ */
+
+ lapic_base = LAPIC_BASE;
+ if (mp_cth != NULL)
+ lapic_base = (paddr_t)mp_cth->apic_address;
+
+ lapic_boot_init(lapic_base);
+
+ /* check for use of 'default' configuration */
+ if (mp_fps->mpfb1 != 0) {
+ struct mpbios_proc pe;
+
+ printf("\n%s: MP default configuration %d\n",
+ self->dv_xname, mp_fps->mpfb1);
+
+ /* use default addresses */
+ pe.apic_id = cpu_number();
+ pe.cpu_flags = PROCENTRY_FLAG_EN|PROCENTRY_FLAG_BP;
+ pe.cpu_signature = cpu_info_primary.ci_signature;
+ pe.feature_flags = cpu_info_primary.ci_feature_flags;
+
+ mpbios_cpu((u_int8_t *)&pe, self);
+
+ pe.apic_id = 1 - cpu_number();
+ pe.cpu_flags = PROCENTRY_FLAG_EN;
+
+ mpbios_cpu((u_int8_t *)&pe, self);
+
+ mpbios_ioapic((u_int8_t *)&default_ioapic, self);
+
+ /* XXX */
+ printf("%s: WARNING: interrupts not configured\n",
+ self->dv_xname);
+ panic("lazy bum");
+ return;
+ } else {
+ /*
+ * should not happen; mp_probe returns 0 in this case,
+ * but..
+ */
+ if (mp_cth == NULL)
+ panic("mpbios_scan: no config (can't happen?)");
+
+ printf(" (%8.8s %12.12s)\n",
+ mp_cth->oem_id, mp_cth->product_id);
+
+ /*
+ * Walk the table once, counting items
+ */
+ position = (const u_int8_t *)(mp_cth);
+ end = position + mp_cth->base_len;
+ position += sizeof(*mp_cth);
+
+ count = mp_cth->entry_count;
+ intr_cnt = 0;
+
+ while ((count--) && (position < end)) {
+ type = *position;
+ if (type >= MPS_MCT_NTYPES) {
+ printf("%s: unknown entry type %x"
+ " in MP config table\n",
+ self->dv_xname, type);
+ break;
+ }
+ mp_conf[type].count++;
+ if (type == MPS_MCT_BUS) {
+ const struct mpbios_bus *bp =
+ (const struct mpbios_bus *)position;
+ if (bp->bus_id >= mp_nbus)
+ mp_nbus = bp->bus_id + 1;
+ }
+ /*
+ * Count actual interrupt instances.
+ * dst_apic_id of MPS_ALL_APICS means "wired to all
+ * apics of this type".
+ */
+ if ((type == MPS_MCT_IOINT) ||
+ (type == MPS_MCT_LINT)) {
+ const struct mpbios_int *ie =
+ (const struct mpbios_int *)position;
+ if (ie->dst_apic_id != MPS_ALL_APICS)
+ intr_cnt++;
+ else if (type == MPS_MCT_IOINT)
+ intr_cnt +=
+ mp_conf[MPS_MCT_IOAPIC].count;
+ else
+ intr_cnt += mp_conf[MPS_MCT_CPU].count;
+ }
+ position += mp_conf[type].length;
+ }
+
+ mp_busses = malloc(sizeof(struct mp_bus) * mp_nbus,
+ M_DEVBUF, M_NOWAIT);
+ memset(mp_busses, 0, sizeof(struct mp_bus) * mp_nbus);
+ mp_intrs = malloc(sizeof(struct mp_intr_map) * intr_cnt,
+ M_DEVBUF, M_NOWAIT);
+
+ /* re-walk the table, recording info of interest */
+ position = (const u_int8_t *)mp_cth + sizeof(*mp_cth);
+ count = mp_cth->entry_count;
+ cur_intr = 0;
+
+ while ((count--) && (position < end)) {
+ switch (type = *(u_char *)position) {
+ case MPS_MCT_CPU:
+ mpbios_cpu(position, self);
+ break;
+ case MPS_MCT_BUS:
+ mpbios_bus(position, self);
+ break;
+ case MPS_MCT_IOAPIC:
+ mpbios_ioapic(position, self);
+ break;
+ case MPS_MCT_IOINT:
+ case MPS_MCT_LINT:
+ mpbios_int(position, type,
+ &mp_intrs[cur_intr]);
+ cur_intr++;
+ break;
+ default:
+ printf("%s: unknown entry type %x "
+ "in MP config table\n",
+ self->dv_xname, type);
+ /* NOTREACHED */
+ return;
+ }
+
+ (u_char*)position += mp_conf[type].length;
+ }
+ if (mp_verbose && mp_cth->ext_len)
+ printf("%s: MP WARNING: %d "
+ "bytes of extended entries not examined\n",
+ self->dv_xname, mp_cth->ext_len);
+ }
+ /* Clean up. */
+ mp_fps = NULL;
+ mpbios_unmap(&mp_fp_map);
+ if (mp_cth != NULL) {
+ mp_cth = NULL;
+ mpbios_unmap(&mp_cfg_table_map);
+ }
+}
+
+void
+mpbios_cpu(ent, self)
+ const u_int8_t *ent;
+ struct device *self;
+{
+ const struct mpbios_proc *entry = (const struct mpbios_proc *)ent;
+ struct cpu_attach_args caa;
+
+ /* XXX move this into the CPU attachment goo. */
+ /* check for usability */
+ if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
+ return;
+
+ /* check for BSP flag */
+ if (entry->cpu_flags & PROCENTRY_FLAG_BP)
+ caa.cpu_role = CPU_ROLE_BP;
+ else
+ caa.cpu_role = CPU_ROLE_AP;
+
+ caa.caa_name = "cpu";
+ caa.cpu_number = entry->apic_id;
+ caa.cpu_func = &mp_cpu_funcs;
+#if 1 /* XXX Will be removed when the real stuff is probed */
+ caa.cpu_signature = entry->cpu_signature;
+
+ /*
+ * XXX this is truncated to just contain the low-order 16 bits
+ * of the flags on at least some MP bioses
+ */
+ caa.feature_flags = entry->feature_flags;
+#endif
+
+ config_found_sm(self, &caa, mp_print, mp_match);
+}
+
+/*
+ * The following functions conspire to compute base ioapic redirection
+ * table entry for a given interrupt line.
+ *
+ * Fill in: trigger mode, polarity, and possibly delivery mode.
+ */
+void
+mp_cfg_special_intr(entry, redir)
+ const struct mpbios_int *entry;
+ u_int32_t *redir;
+{
+
+ /*
+ * All of these require edge triggered, zero vector,
+ * appropriate delivery mode.
+ * see page 13 of the 82093AA datasheet.
+ */
+ *redir &= ~IOAPIC_REDLO_DEL_MASK;
+ *redir &= ~IOAPIC_REDLO_VECTOR_MASK;
+ *redir &= ~IOAPIC_REDLO_LEVEL;
+
+ switch (entry->int_type) {
+ case MPS_INTTYPE_NMI:
+ *redir |= (IOAPIC_REDLO_DEL_NMI<<IOAPIC_REDLO_DEL_SHIFT);
+ break;
+
+ case MPS_INTTYPE_SMI:
+ *redir |= (IOAPIC_REDLO_DEL_SMI<<IOAPIC_REDLO_DEL_SHIFT);
+ break;
+ case MPS_INTTYPE_ExtINT:
+ /*
+ * We are using the ioapic in "native" mode.
+ * This indicates where the 8259 is wired to the ioapic
+ * and/or local apic..
+ */
+ *redir |= (IOAPIC_REDLO_DEL_EXTINT<<IOAPIC_REDLO_DEL_SHIFT);
+ *redir |= (IOAPIC_REDLO_MASK);
+ break;
+ default:
+ panic("unknown MPS interrupt type %d", entry->int_type);
+ }
+}
+
+/* XXX too much duplicated code here. */
+
+void
+mp_cfg_pci_intr(entry, redir)
+ const struct mpbios_int *entry;
+ u_int32_t *redir;
+{
+ int mpspo = entry->int_flags & 0x03; /* XXX magic */
+ int mpstrig = (entry->int_flags >> 2) & 0x03; /* XXX magic */
+
+ *redir &= ~IOAPIC_REDLO_DEL_MASK;
+ switch (mpspo) {
+ case MPS_INTPO_ACTHI:
+ *redir &= ~IOAPIC_REDLO_ACTLO;
+ break;
+ case MPS_INTPO_DEF:
+ case MPS_INTPO_ACTLO:
+ *redir |= IOAPIC_REDLO_ACTLO;
+ break;
+ default:
+ panic("unknown MPS interrupt polarity %d", mpspo);
+ }
+
+ if (entry->int_type != MPS_INTTYPE_INT) {
+ mp_cfg_special_intr(entry, redir);
+ return;
+ }
+ *redir |= (IOAPIC_REDLO_DEL_LOPRI<<IOAPIC_REDLO_DEL_SHIFT);
+
+ switch (mpstrig) {
+ case MPS_INTTR_DEF:
+ case MPS_INTTR_LEVEL:
+ *redir |= IOAPIC_REDLO_LEVEL;
+ break;
+ case MPS_INTTR_EDGE:
+ *redir &= ~IOAPIC_REDLO_LEVEL;
+ break;
+ default:
+ panic("unknown MPS interrupt trigger %d", mpstrig);
+ }
+}
+
+void
+mp_cfg_eisa_intr (entry, redir)
+ const struct mpbios_int *entry;
+ u_int32_t *redir;
+{
+ int mpspo = entry->int_flags & 0x03; /* XXX magic */
+ int mpstrig = (entry->int_flags >> 2) & 0x03; /* XXX magic */
+
+ *redir &= ~IOAPIC_REDLO_DEL_MASK;
+ switch (mpspo) {
+ case MPS_INTPO_DEF:
+ case MPS_INTPO_ACTHI:
+ *redir &= ~IOAPIC_REDLO_ACTLO;
+ break;
+ case MPS_INTPO_ACTLO:
+ *redir |= IOAPIC_REDLO_ACTLO;
+ break;
+ default:
+ panic("unknown MPS interrupt polarity %d", mpspo);
+ }
+
+ if (entry->int_type != MPS_INTTYPE_INT) {
+ mp_cfg_special_intr(entry, redir);
+ return;
+ }
+ *redir |= (IOAPIC_REDLO_DEL_LOPRI<<IOAPIC_REDLO_DEL_SHIFT);
+
+ switch (mpstrig) {
+ case MPS_INTTR_LEVEL:
+ *redir |= IOAPIC_REDLO_LEVEL;
+ break;
+ case MPS_INTTR_EDGE:
+ *redir &= ~IOAPIC_REDLO_LEVEL;
+ break;
+ case MPS_INTTR_DEF:
+ /*
+ * Set "default" setting based on ELCR value snagged
+ * earlier.
+ */
+ if (mp_busses[entry->src_bus_id].mb_data &
+ (1<<entry->src_bus_irq)) {
+ *redir |= IOAPIC_REDLO_LEVEL;
+ } else {
+ *redir &= ~IOAPIC_REDLO_LEVEL;
+ }
+ break;
+ default:
+ panic("unknown MPS interrupt trigger %d", mpstrig);
+ }
+}
+
+
+void
+mp_cfg_isa_intr(entry, redir)
+ const struct mpbios_int *entry;
+ u_int32_t *redir;
+{
+ int mpspo = entry->int_flags & 0x03; /* XXX magic */
+ int mpstrig = (entry->int_flags >> 2) & 0x03; /* XXX magic */
+
+ *redir &= ~IOAPIC_REDLO_DEL_MASK;
+ switch (mpspo) {
+ case MPS_INTPO_DEF:
+ case MPS_INTPO_ACTHI:
+ *redir &= ~IOAPIC_REDLO_ACTLO;
+ break;
+ case MPS_INTPO_ACTLO:
+ *redir |= IOAPIC_REDLO_ACTLO;
+ break;
+ default:
+ panic("unknown MPS interrupt polarity %d", mpspo);
+ }
+
+ if (entry->int_type != MPS_INTTYPE_INT) {
+ mp_cfg_special_intr(entry, redir);
+ return;
+ }
+ *redir |= (IOAPIC_REDLO_DEL_LOPRI << IOAPIC_REDLO_DEL_SHIFT);
+
+ switch (mpstrig) {
+ case MPS_INTTR_LEVEL:
+ *redir |= IOAPIC_REDLO_LEVEL;
+ break;
+ case MPS_INTTR_DEF:
+ case MPS_INTTR_EDGE:
+ *redir &= ~IOAPIC_REDLO_LEVEL;
+ break;
+ default:
+ panic("unknown MPS interrupt trigger %d", mpstrig);
+ }
+}
+
+
+void
+mp_print_special_intr(intr)
+ int intr;
+{
+}
+
+void
+mp_print_pci_intr(intr)
+ int intr;
+{
+ printf(" device %d INT_%c", (intr >> 2) & 0x1f, 'A' + (intr & 0x3));
+}
+
+void
+mp_print_isa_intr(intr)
+ int intr;
+{
+ printf(" irq %d", intr);
+}
+
+void
+mp_print_eisa_intr(intr)
+ int intr;
+{
+ printf(" EISA irq %d", intr);
+}
+
+
+
+#define TAB_UNIT 4
+#define TAB_ROUND(a) _TAB_ROUND(a, TAB_UNIT)
+
+#define _TAB_ROUND(a,u) (((a) + (u - 1)) & ~(u - 1))
+#define EXTEND_TAB(a,u) (!(_TAB_ROUND(a, u) == _TAB_ROUND((a + 1), u)))
+
+void
+mpbios_bus(ent, self)
+ const u_int8_t *ent;
+ struct device *self;
+{
+ const struct mpbios_bus *entry = (const struct mpbios_bus *)ent;
+ int bus_id = entry->bus_id;
+
+ printf("%s: bus %d is type %6.6s\n", self->dv_xname,
+ bus_id, entry->bus_type);
+
+#ifdef DIAGNOSTIC
+ /*
+ * This "should not happen" unless the table changes out
+ * from underneath us
+ */
+ if (bus_id >= mp_nbus) {
+ panic("%s: bus number %d out of range?? (type %6.6s)\n",
+ self->dv_xname, bus_id, entry->bus_type);
+ }
+#endif
+
+ mp_busses[bus_id].mb_intrs = NULL;
+
+ if (memcmp(entry->bus_type, "PCI ", 6) == 0) {
+ mp_busses[bus_id].mb_name = "pci";
+ mp_busses[bus_id].mb_idx = bus_id;
+ mp_busses[bus_id].mb_intr_print = mp_print_pci_intr;
+ mp_busses[bus_id].mb_intr_cfg = mp_cfg_pci_intr;
+ } else if (memcmp(entry->bus_type, "EISA ", 6) == 0) {
+ mp_busses[bus_id].mb_name = "eisa";
+ mp_busses[bus_id].mb_idx = bus_id;
+ mp_busses[bus_id].mb_intr_print = mp_print_eisa_intr;
+ mp_busses[bus_id].mb_intr_cfg = mp_cfg_eisa_intr;
+
+ mp_busses[bus_id].mb_data = inb(ELCR0) | (inb(ELCR1) << 8);
+
+ if (mp_eisa_bus != -1)
+ printf("%s: multiple eisa busses?\n",
+ self->dv_xname);
+ else
+ mp_eisa_bus = bus_id;
+ } else if (memcmp(entry->bus_type, "ISA ", 6) == 0) {
+ mp_busses[bus_id].mb_name = "isa";
+ mp_busses[bus_id].mb_idx = 0; /* XXX */
+ mp_busses[bus_id].mb_intr_print = mp_print_isa_intr;
+ mp_busses[bus_id].mb_intr_cfg = mp_cfg_isa_intr;
+ if (mp_isa_bus != -1)
+ printf("%s: multiple isa busses?\n",
+ self->dv_xname);
+ else
+ mp_isa_bus = bus_id;
+ } else {
+ printf("%s: unsupported bus type %6.6s\n", self->dv_xname,
+ entry->bus_type);
+ }
+}
+
+
+void
+mpbios_ioapic(ent, self)
+ const u_int8_t *ent;
+ struct device *self;
+{
+ const struct mpbios_ioapic *entry = (const struct mpbios_ioapic *)ent;
+ struct apic_attach_args aaa;
+
+ /* XXX let flags checking happen in ioapic driver.. */
+ if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
+ return;
+
+ aaa.aaa_name = "ioapic";
+ aaa.apic_id = entry->apic_id;
+ aaa.apic_version = entry->apic_version;
+ aaa.apic_address = (paddr_t)entry->apic_address;
+ aaa.flags = (mp_fps->mpfb2 & 0x80) ? IOAPIC_PICMODE : IOAPIC_VWIRE;
+
+ config_found_sm(self, &aaa, mp_print, mp_match);
+}
+
+static const char inttype_fmt[] = "\177\020"
+ "f\0\2type\0" "=\1NMI\0" "=\2SMI\0" "=\3ExtINT\0";
+
+static const char flagtype_fmt[] = "\177\020"
+ "f\0\2pol\0" "=\1Act Hi\0" "=\3Act Lo\0"
+ "f\2\2trig\0" "=\1Edge\0" "=\3Level\0";
+
+void
+mpbios_int(ent, enttype, mpi)
+ const u_int8_t *ent;
+ int enttype;
+ struct mp_intr_map *mpi;
+{
+ const struct mpbios_int *entry = (const struct mpbios_int *)ent;
+ struct ioapic_softc *sc = NULL;
+
+ struct mp_intr_map *altmpi;
+ struct mp_bus *mpb;
+
+ u_int32_t id = entry->dst_apic_id;
+ u_int32_t pin = entry->dst_apic_int;
+ u_int32_t bus = entry->src_bus_id;
+ u_int32_t dev = entry->src_bus_irq;
+ u_int32_t type = entry->int_type;
+ u_int32_t flags = entry->int_flags;
+
+ switch (type) {
+ case MPS_INTTYPE_INT:
+ mpb = &(mp_busses[bus]);
+ break;
+ case MPS_INTTYPE_ExtINT:
+ mpb = &extint_bus;
+ break;
+ case MPS_INTTYPE_SMI:
+ mpb = &smi_bus;
+ break;
+ case MPS_INTTYPE_NMI:
+ mpb = &nmi_bus;
+ break;
+ }
+ mpi->next = mpb->mb_intrs;
+ mpb->mb_intrs = mpi;
+ mpi->bus = mpb;
+ mpi->bus_pin = dev;
+
+ mpi->ioapic_ih = APIC_INT_VIA_APIC |
+ ((id << APIC_INT_APIC_SHIFT) | ((pin << APIC_INT_PIN_SHIFT)));
+
+ mpi->type = type;
+ mpi->flags = flags;
+ mpi->redir = 0;
+ if (mpb->mb_intr_cfg == NULL) {
+ printf("mpbios: can't find bus %d for apic %d pin %d\n",
+ bus, id, pin);
+ return;
+ }
+
+ (*mpb->mb_intr_cfg)(entry, &mpi->redir);
+
+ if (enttype == MPS_MCT_IOINT) {
+ sc = ioapic_find(id);
+ if (sc == NULL) {
+ printf("mpbios: can't find ioapic %d\n", id);
+ return;
+ }
+
+ mpi->ioapic = sc;
+ mpi->ioapic_pin = pin;
+
+ altmpi = sc->sc_pins[pin].ip_map;
+
+ if (altmpi != NULL) {
+ if ((altmpi->type != type) ||
+ (altmpi->flags != flags)) {
+ printf(
+ "%s: conflicting map entries for pin %d\n",
+ sc->sc_dev.dv_xname, pin);
+ }
+ } else {
+ sc->sc_pins[pin].ip_map = mpi;
+ }
+ } else {
+ if (id != MPS_ALL_APICS)
+ panic("can't deal with not-all-lapics interrupt yet!");
+ if (pin >= 2)
+ printf("pin %d of local apic doesn't exist!\n", pin);
+ else {
+ mpi->ioapic = NULL;
+ mpi->ioapic_pin = pin;
+ lapic_ints[pin] = mpi;
+ }
+ }
+ if (mp_verbose) {
+ printf("%s: int%d attached to %s",
+ sc ? sc->sc_dev.dv_xname : "local apic", pin,
+ mpb->mb_name);
+ if (mpb->mb_idx != -1)
+ printf("%d", mpb->mb_idx);
+
+ if (mpb != NULL)
+
+ (*(mpb->mb_intr_print))(dev);
+
+ printf(" (type 0x%x flags 0x%x)\n", type, flags);
+ }
+}
diff --git a/sys/arch/i386/i386/mptramp.s b/sys/arch/i386/i386/mptramp.s
new file mode 100644
index 00000000000..d35b6560c89
--- /dev/null
+++ b/sys/arch/i386/i386/mptramp.s
@@ -0,0 +1,265 @@
+/* $OpenBSD: mptramp.s,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999 Stefan Grefen
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * MP startup ...
+ * the stuff from cpu_spinup_trampoline to mp_startup
+ * is copied into the first 640 KB
+ *
+ * We startup the processors now when the kthreads become ready.
+ * The steps are:
+ * 1) Get the processors running kernel-code from a special
+ * page-table and stack page, do chip identification.
+ * 2) halt the processors waiting for them to be enabled
+ * by a idle-thread
+ */
+
+#include "assym.h"
+#include <machine/param.h>
+#include <machine/asm.h>
+#include <machine/specialreg.h>
+#include <machine/segments.h>
+#include <machine/gdt.h>
+#include <machine/mpbiosvar.h>
+#include <machine/i82489reg.h>
+
+#define GDTE(a,b) .byte 0xff,0xff,0x0,0x0,0x0,a,b,0x0
+#define _RELOC(x) ((x) - KERNBASE)
+#define RELOC(x) _RELOC(_C_LABEL(x))
+
+#define _TRMP_LABEL(a) a = . - _C_LABEL(cpu_spinup_trampoline) + MP_TRAMPOLINE
+
+/*
+ * Debug code to stop aux. processors in various stages based on the
+ * value in cpu_trace.
+ *
+ * %edi points at cpu_trace; cpu_trace[0] is the "hold point";
+ * cpu_trace[1] is the point which the cpu has reached.
+ * cpu_trace[2] is the last value stored by HALTT.
+ */
+
+
+#ifdef MPDEBUG
+#define HALT(x) 1: movl (%edi),%ebx;cmpl $ x,%ebx ; jle 1b ; movl $x,4(%edi)
+#define HALTT(x,y) movl y,8(%edi); HALT(x)
+#else
+#define HALT(x) /**/
+#define HALTT(x,y) /**/
+#endif
+
+ .globl _C_LABEL(cpu),_C_LABEL(cpu_id),_C_LABEL(cpu_vendor)
+ .globl _C_LABEL(cpuid_level),_C_LABEL(cpu_feature)
+
+ .global _C_LABEL(cpu_spinup_trampoline)
+ .global _C_LABEL(cpu_spinup_trampoline_end)
+ .global _C_LABEL(cpu_hatch)
+ .global _C_LABEL(mp_pdirpa)
+ .global _C_LABEL(gdt), _C_LABEL(local_apic)
+
+ .text
+ .align 4,0x0
+ .code16
+_C_LABEL(cpu_spinup_trampoline):
+ cli
+ xorw %ax, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+ data32 addr32 lgdt (gdt_desc) # load flat descriptor table
+ movl %cr0, %eax # get cr0
+ orl $0x1, %eax # enable protected mode
+ movl %eax, %cr0 # doit
+ ljmp $0x8, $mp_startup
+
+_TRMP_LABEL(mp_startup)
+ .code32
+
+ movl $0x10, %eax # data segment
+ movw %ax, %ds
+ movw %ax, %ss
+ movw %ax, %es
+ movw %ax, %fs
+ movw %ax, %gs
+ movl $(MP_TRAMPOLINE+NBPG-16),%esp # bootstrap stack end,
+ # with scratch space..
+
+#ifdef MPDEBUG
+ leal RELOC(cpu_trace),%edi
+#endif
+
+ HALT(0x1)
+ /* First, reset the PSL. */
+ pushl $PSL_MBO
+ popfl
+
+ movl RELOC(mp_pdirpa),%ecx
+ HALTT(0x5,%ecx)
+
+ /* Load base of page directory and enable mapping. */
+ movl %ecx,%cr3 # load ptd addr into mmu
+ movl %cr0,%eax # get control word
+ # enable paging & NPX emulation
+ orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_EM|CR0_MP|CR0_WP),%eax
+ movl %eax,%cr0 # and let's page NOW!
+
+#ifdef MPDEBUG
+ leal _C_LABEL(cpu_trace),%edi
+#endif
+ HALT(0x6)
+
+# ok, we're now running with paging enabled and sharing page tables with cpu0.
+# figure out which processor we really are, what stack we should be on, etc.
+
+ movl _C_LABEL(local_apic)+LAPIC_ID,%ecx
+ shrl $LAPIC_ID_SHIFT,%ecx
+ leal 0(,%ecx,4),%ecx
+ movl _C_LABEL(cpu_info)(%ecx),%ecx
+
+ HALTT(0x7, %ecx)
+
+# %ecx points at our cpu_info structure..
+
+ movw $((MAXGDTSIZ*8) - 1), 6(%esp) # prepare segment descriptor
+ movl CPU_INFO_GDT(%ecx), %eax # for real gdt
+ movl %eax, 8(%esp)
+ HALTT(0x8, %eax)
+ lgdt 6(%esp)
+ HALT(0x9)
+ jmp 1f
+ nop
+1:
+ HALT(0xa)
+ movl $GSEL(GDATA_SEL, SEL_KPL),%eax #switch to new segment
+ HALTT(0x10, %eax)
+ movw %ax,%ds
+ HALT(0x11)
+ movw %ax,%es
+ HALT(0x12)
+ movw %ax,%ss
+ HALT(0x13)
+ pushl $GSEL(GCODE_SEL, SEL_KPL)
+ pushl $mp_cont
+ HALT(0x14)
+ lret
+ .align 4,0x0
+_TRMP_LABEL(gdt_table)
+ .word 0x0,0x0,0x0,0x0 # null GDTE
+ GDTE(0x9f,0xcf) # Kernel text
+ GDTE(0x93,0xcf) # Kernel data
+_TRMP_LABEL(gdt_desc)
+ .word 0x17 # limit 3 entries
+ .long gdt_table # where is is gdt
+
+_C_LABEL(cpu_spinup_trampoline_end): #end of code copied to MP_TRAMPOLINE
+mp_cont:
+
+ movl CPU_INFO_IDLE_PCB(%ecx),%esi
+
+# %esi now points at our PCB.
+
+ HALTT(0x19, %esi)
+
+ movl PCB_ESP(%esi),%esp
+ movl PCB_EBP(%esi),%ebp
+
+ HALT(0x20)
+ /* Switch address space. */
+ movl PCB_CR3(%esi),%eax
+ HALTT(0x22, %eax)
+ movl %eax,%cr3
+ HALT(0x25)
+ /* Load segment registers. */
+ movl $GSEL(GCPU_SEL, SEL_KPL),%eax
+ HALTT(0x26,%eax)
+ movl %eax,%fs
+ xorl %eax,%eax
+ HALTT(0x27,%eax)
+ movl %eax,%gs
+ movl PCB_CR0(%esi),%eax
+ HALTT(0x28,%eax)
+ movl %eax,%cr0
+ HALTT(0x30,%ecx)
+ pushl %ecx
+ call _C_LABEL(cpu_hatch)
+ HALT(0x33)
+ xorl %esi,%esi
+ jmp _C_LABEL(idle_loop)
+
+ .data
+_C_LABEL(mp_pdirpa):
+ .long 0
+#ifdef MPDEBUG
+ .global _C_LABEL(cpu_trace)
+_C_LABEL(cpu_trace):
+ .long 0x40
+ .long 0xff
+ .long 0xff
+#endif
diff --git a/sys/arch/i386/i386/pctr.c b/sys/arch/i386/i386/pctr.c
index 9ed49ebe0bd..c1c63ceb2f5 100644
--- a/sys/arch/i386/i386/pctr.c
+++ b/sys/arch/i386/i386/pctr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pctr.c,v 1.18 2003/09/29 17:33:01 mickey Exp $ */
+/* $OpenBSD: pctr.c,v 1.19 2004/06/13 21:49:15 niklas Exp $ */
/*
* Pentium performance counter driver for OpenBSD.
@@ -9,6 +9,7 @@
* OpenBSD project by leaving this copyright notice intact.
*/
+#include <sys/param.h>
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 8dc8115009e..dd34d9b7625 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.75 2004/02/01 12:26:45 grange Exp $ */
+/* $OpenBSD: pmap.c,v 1.76 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */
/*
@@ -70,6 +70,7 @@
#include <uvm/uvm.h>
+#include <machine/atomic.h>
#include <machine/cpu.h>
#include <machine/specialreg.h>
#include <machine/gdt.h>
@@ -128,8 +129,6 @@
* - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's.
* if we run out of pv_entry's we allocate a new pv_page and free
* its pv_entrys.
- * - pmap_remove_record: a list of virtual addresses whose mappings
- * have been changed. used for TLB flushing.
*/
/*
@@ -225,35 +224,18 @@
* this lock protects the list of active pmaps (headed by "pmaps").
* we lock it when adding or removing pmaps from this list.
*
- * - pmap_copy_page_lock
- * locks the tmp kernel PTE mappings we used to copy data
- *
- * - pmap_zero_page_lock
- * locks the tmp kernel PTE mapping we use to zero a page
- *
- * - pmap_tmpptp_lock
- * locks the tmp kernel PTE mapping we use to look at a PTP
- * in another process
- *
- * XXX: would be nice to have per-CPU VAs for the above 4
*/
/*
* locking data structures
*/
-#ifdef __OpenBSD__
-/* XXX */
-#define spinlockinit(lock, name, flags) /* nada */
-#define spinlockmgr(lock, flags, slock) /* nada */
-#endif
-
-struct lock pmap_main_lock;
struct simplelock pvalloc_lock;
struct simplelock pmaps_lock;
-struct simplelock pmap_copy_page_lock;
-struct simplelock pmap_zero_page_lock;
-struct simplelock pmap_tmpptp_lock;
+
+#if defined(MULTIPROCESSOR) && 0
+
+struct lock pmap_main_lock;
#define PMAP_MAP_TO_HEAD_LOCK() \
spinlockmgr(&pmap_main_lock, LK_SHARED, (void *) 0)
@@ -265,6 +247,59 @@ struct simplelock pmap_tmpptp_lock;
#define PMAP_HEAD_TO_MAP_UNLOCK() \
spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0)
+#else
+
+#define PMAP_MAP_TO_HEAD_LOCK() /* null */
+#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */
+
+#define PMAP_HEAD_TO_MAP_LOCK() /* null */
+#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */
+
+#endif
+
+/*
+ * TLB Shootdown:
+ *
+ * When a mapping is changed in a pmap, the TLB entry corresponding to
+ * the virtual address must be invalidated on all processors. In order
+ * to accomplish this on systems with multiple processors, messages are
+ * sent from the processor which performs the mapping change to all
+ * processors on which the pmap is active. For other processors, the
+ * ASN generation numbers for that processor is invalidated, so that
+ * the next time the pmap is activated on that processor, a new ASN
+ * will be allocated (which implicitly invalidates all TLB entries).
+ *
+ * Shootdown job queue entries are allocated using a simple special-
+ * purpose allocator for speed.
+ */
+struct pmap_tlb_shootdown_job {
+ TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
+ vaddr_t pj_va; /* virtual address */
+ pmap_t pj_pmap; /* the pmap which maps the address */
+ pt_entry_t pj_pte; /* the PTE bits */
+ struct pmap_tlb_shootdown_job *pj_nextfree;
+};
+
+struct pmap_tlb_shootdown_q {
+ TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;
+ int pq_pte; /* aggregate PTE bits */
+ int pq_count; /* number of pending requests */
+ struct SIMPLELOCK pq_slock; /* spin lock on queue */
+ int pq_flushg; /* pending flush global */
+ int pq_flushu; /* pending flush user */
+} pmap_tlb_shootdown_q[I386_MAXPROCS];
+
+#define PMAP_TLB_MAXJOBS 16
+
+void pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *);
+struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get(
+ struct pmap_tlb_shootdown_q *);
+void pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *,
+ struct pmap_tlb_shootdown_job *);
+
+struct SIMPLELOCK pmap_tlb_shootdown_job_lock;
+struct pmap_tlb_shootdown_job *pj_page, *pj_free;
+
/*
* global data structures
*/
@@ -348,6 +383,20 @@ static struct pmap *pmaps_hand = NULL; /* used by pmap_steal_ptp */
struct pool pmap_pmap_pool;
/*
+ * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
+ * I386_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing
+ * due to false sharing.
+ */
+
+#ifdef MULTIPROCESSOR
+#define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
+#define VASLEW(va,id) ((va)+(id)*NPTECL*NBPG)
+#else
+#define PTESLEW(pte, id) (pte)
+#define VASLEW(va,id) (va)
+#endif
+
+/*
* special VAs and the PTEs that map them
*/
@@ -373,41 +422,42 @@ extern vaddr_t pentium_idt_vaddr;
* local prototypes
*/
-static struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t);
-static struct vm_page *pmap_alloc_ptp(struct pmap *, int, boolean_t);
-static struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */
+struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t);
+struct vm_page *pmap_alloc_ptp(struct pmap *, int, boolean_t);
+struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */
#define ALLOCPV_NEED 0 /* need PV now */
#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */
#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */
-static struct pv_entry *pmap_alloc_pvpage(struct pmap *, int);
-static void pmap_enter_pv(struct pv_head *,
+struct pv_entry *pmap_alloc_pvpage(struct pmap *, int);
+void pmap_enter_pv(struct pv_head *,
struct pv_entry *, struct pmap *,
vaddr_t, struct vm_page *);
-static void pmap_free_pv(struct pmap *, struct pv_entry *);
-static void pmap_free_pvs(struct pmap *, struct pv_entry *);
-static void pmap_free_pv_doit(struct pv_entry *);
-static void pmap_free_pvpage(void);
-static struct vm_page *pmap_get_ptp(struct pmap *, int, boolean_t);
-static boolean_t pmap_is_curpmap(struct pmap *);
-static pt_entry_t *pmap_map_ptes(struct pmap *);
-static struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *,
- vaddr_t);
-static boolean_t pmap_remove_pte(struct pmap *, struct vm_page *,
- pt_entry_t *, vaddr_t);
-static void pmap_remove_ptes(struct pmap *,
- struct pmap_remove_record *,
- struct vm_page *, vaddr_t,
- vaddr_t, vaddr_t);
-static struct vm_page *pmap_steal_ptp(struct uvm_object *,
+void pmap_free_pv(struct pmap *, struct pv_entry *);
+void pmap_free_pvs(struct pmap *, struct pv_entry *);
+void pmap_free_pv_doit(struct pv_entry *);
+void pmap_free_pvpage(void);
+struct vm_page *pmap_get_ptp(struct pmap *, int, boolean_t);
+boolean_t pmap_is_curpmap(struct pmap *);
+boolean_t pmap_is_active(struct pmap *, int);
+pt_entry_t *pmap_map_ptes(struct pmap *);
+struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *,
vaddr_t);
-static vaddr_t pmap_tmpmap_pa(paddr_t);
-static pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *);
-static void pmap_tmpunmap_pa(void);
-static void pmap_tmpunmap_pvepte(struct pv_entry *);
-static boolean_t pmap_try_steal_pv(struct pv_head *,
+boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
+ vaddr_t, int32_t *);
+void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t,
+ vaddr_t, vaddr_t, int32_t *);
+struct vm_page *pmap_steal_ptp(struct uvm_object *, vaddr_t);
+vaddr_t pmap_tmpmap_pa(paddr_t);
+pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *);
+void pmap_tmpunmap_pa(void);
+void pmap_tmpunmap_pvepte(struct pv_entry *);
+void pmap_apte_flush(struct pmap *);
+boolean_t pmap_try_steal_pv(struct pv_head *,
struct pv_entry *,
struct pv_entry *);
-static void pmap_unmap_ptes(struct pmap *);
+void pmap_unmap_ptes(struct pmap *);
+void pmap_exec_account(struct pmap *, vaddr_t, pt_entry_t,
+ pt_entry_t);
void pmap_pinit(pmap_t);
void pmap_release(pmap_t);
@@ -423,7 +473,7 @@ void pmap_zero_phys(paddr_t);
* of course the kernel is always loaded
*/
-__inline static boolean_t
+boolean_t
pmap_is_curpmap(pmap)
struct pmap *pmap;
{
@@ -432,50 +482,72 @@ pmap_is_curpmap(pmap)
}
/*
+ * pmap_is_active: is this pmap loaded into the specified processor's %cr3?
+ */
+
+boolean_t
+pmap_is_active(pmap, cpu_id)
+ struct pmap *pmap;
+ int cpu_id;
+{
+
+ return (pmap == pmap_kernel() ||
+ (pmap->pm_cpus & (1U << cpu_id)) != 0);
+}
+
+/*
* pmap_tmpmap_pa: map a page in for tmp usage
- *
- * => returns with pmap_tmpptp_lock held
*/
-__inline static vaddr_t
+vaddr_t
pmap_tmpmap_pa(pa)
paddr_t pa;
{
- simple_lock(&pmap_tmpptp_lock);
+#ifdef MULTIPROCESSOR
+ int id = cpu_number();
+#endif
+ pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
+ caddr_t ptpva = VASLEW(ptpp, id);
#if defined(DIAGNOSTIC)
- if (*ptp_pte)
+ if (*ptpte)
panic("pmap_tmpmap_pa: ptp_pte in use?");
#endif
- *ptp_pte = PG_V | PG_RW | pa; /* always a new mapping */
- return((vaddr_t)ptpp);
+ *ptpte = PG_V | PG_RW | pa; /* always a new mapping */
+ return((vaddr_t)ptpva);
}
/*
* pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa)
- *
- * => we release pmap_tmpptp_lock
*/
-__inline static void
+void
pmap_tmpunmap_pa()
{
+#ifdef MULTIPROCESSOR
+ int id = cpu_number();
+#endif
+ pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
+ caddr_t ptpva = VASLEW(ptpp, id);
#if defined(DIAGNOSTIC)
- if (!pmap_valid_entry(*ptp_pte))
+ if (!pmap_valid_entry(*ptpte))
panic("pmap_tmpunmap_pa: our pte invalid?");
#endif
- *ptp_pte = 0; /* zap! */
- pmap_update_pg((vaddr_t)ptpp);
- simple_unlock(&pmap_tmpptp_lock);
+ *ptpte = 0; /* zap! */
+ pmap_update_pg((vaddr_t)ptpva);
+#ifdef MULTIPROCESSOR
+ /*
+ * No need for tlb shootdown here, since ptp_pte is per-CPU.
+ */
+#endif
}
/*
* pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry
*
* => do NOT use this on kernel mappings [why? because pv_ptp may be NULL]
- * => we may grab pmap_tmpptp_lock and return with it held
*/
-__inline static pt_entry_t *
+pt_entry_t *
pmap_tmpmap_pvepte(pve)
struct pv_entry *pve;
{
@@ -494,11 +566,9 @@ pmap_tmpmap_pvepte(pve)
/*
* pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte
- *
- * => we will release pmap_tmpptp_lock if we hold it
*/
-__inline static void
+void
pmap_tmpunmap_pvepte(pve)
struct pv_entry *pve;
{
@@ -509,6 +579,41 @@ pmap_tmpunmap_pvepte(pve)
pmap_tmpunmap_pa();
}
+void
+pmap_apte_flush(struct pmap *pmap)
+{
+#if defined(MULTIPROCESSOR)
+ struct pmap_tlb_shootdown_q *pq;
+ struct cpu_info *ci, *self = curcpu();
+ CPU_INFO_ITERATOR cii;
+ int s;
+#endif
+
+ tlbflush(); /* flush TLB on current processor */
+#if defined(MULTIPROCESSOR)
+ /*
+ * Flush the APTE mapping from all other CPUs that
+ * are using the pmap we are using (who's APTE space
+ * is the one we've just modified).
+ *
+ * XXXthorpej -- find a way to defer the IPI.
+ */
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ if (ci == self)
+ continue;
+ if (pmap_is_active(pmap, ci->ci_cpuid)) {
+ pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
+ s = splipi();
+ SIMPLE_LOCK(&pq->pq_slock);
+ pq->pq_flushu++;
+ SIMPLE_UNLOCK(&pq->pq_slock);
+ splx(s);
+ i386_send_ipi(ci, I386_IPI_TLB);
+ }
+ }
+#endif
+}
+
/*
* pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
*
@@ -516,7 +621,7 @@ pmap_tmpunmap_pvepte(pve)
* => must be undone with pmap_unmap_ptes before returning
*/
-__inline static pt_entry_t *
+pt_entry_t *
pmap_map_ptes(pmap)
struct pmap *pmap;
{
@@ -547,7 +652,7 @@ pmap_map_ptes(pmap)
if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) {
*APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V);
if (pmap_valid_entry(opde))
- tlbflush();
+ pmap_apte_flush(pmap);
}
return(APTE_BASE);
}
@@ -556,7 +661,7 @@ pmap_map_ptes(pmap)
* pmap_unmap_ptes: unlock the PTE mapping of "pmap"
*/
-__inline static void
+void
pmap_unmap_ptes(pmap)
struct pmap *pmap;
{
@@ -566,12 +671,16 @@ pmap_unmap_ptes(pmap)
if (pmap_is_curpmap(pmap)) {
simple_unlock(&pmap->pm_obj.vmobjlock);
} else {
+#if defined(MULTIPROCESSOR)
+ *APDP_PDE = 0;
+ pmap_apte_flush(curpcb->pcb_pmap);
+#endif
simple_unlock(&pmap->pm_obj.vmobjlock);
simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
}
}
-__inline static void
+void
pmap_exec_account(struct pmap *pm, vaddr_t va,
pt_entry_t opte, pt_entry_t npte)
{
@@ -579,9 +688,18 @@ pmap_exec_account(struct pmap *pm, vaddr_t va,
pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
return;
- if ((opte ^ npte) & PG_X)
+ if ((opte ^ npte) & PG_X) {
+#ifdef MULTIPROCESSOR
+ int32_t cpumask = 0;
+
+ pmap_tlb_shootdown(pm, va, opte, &cpumask);
+ pmap_tlb_shootnow(cpumask);
+#else
+ /* Don't bother deferring in the single CPU case. */
pmap_update_pg(va);
-
+#endif
+ }
+
/*
* Executability was removed on the last executable change.
* Reset the code segment to something conservative and
@@ -656,14 +774,22 @@ pmap_kenter_pa(va, pa, prot)
paddr_t pa;
vm_prot_t prot;
{
- pt_entry_t *pte, opte;
+ pt_entry_t *pte, opte, npte;
pte = vtopte(va);
- opte = *pte;
- *pte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) |
- PG_V | pmap_pg_g; /* zap! */
- if (pmap_valid_entry(opte))
+ npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V | pmap_pg_g;
+ opte = i386_atomic_testset_ul(pte, npte); /* zap! */
+ if (pmap_valid_entry(opte)) {
+#ifdef MULTIPROCESSOR
+ int32_t cpumask = 0;
+
+ pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);
+ pmap_tlb_shootnow(cpumask);
+#else
+ /* Don't bother deferring in the single CPU case. */
pmap_update_pg(va);
+#endif
+ }
}
/*
@@ -673,8 +799,6 @@ pmap_kenter_pa(va, pa, prot)
* => caller must dispose of any vm_page mapped in the va range
* => note: not an inline function
* => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
- * => we assume kernel only unmaps valid addresses and thus don't bother
- * checking the valid bit before doing TLB flushing
*/
void
@@ -682,25 +806,31 @@ pmap_kremove(va, len)
vaddr_t va;
vsize_t len;
{
- pt_entry_t *pte;
+ pt_entry_t *pte, opte;
+#ifdef MULTIPROCESSOR
+ int32_t cpumask = 0;
+#endif
len >>= PAGE_SHIFT;
- for ( /* null */ ; len ; len--, va += NBPG) {
- pte = vtopte(va);
+ for ( /* null */ ; len ; len--, va += PAGE_SIZE) {
+ if (va < VM_MIN_KERNEL_ADDRESS)
+ pte = vtopte(va);
+ else
+ pte = kvtopte(va);
+ opte = i386_atomic_testset_ul(pte, 0); /* zap! */
#ifdef DIAGNOSTIC
- if (*pte & PG_PVLIST)
- panic("pmap_kremove: PG_PVLIST mapping for 0x%lx",
- va);
-#endif
- *pte = 0; /* zap! */
-#if defined(I386_CPU)
- if (cpu_class != CPUCLASS_386)
+ if (opte & PG_PVLIST)
+ panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", va);
#endif
+ if ((opte & (PG_V | PG_U)) == (PG_V | PG_U))
+#ifdef MULTIPROCESSOR
+ pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);
+#else
pmap_update_pg(va);
+#endif
}
-#if defined(I386_CPU)
- if (cpu_class == CPUCLASS_386)
- tlbflush();
+#ifdef MULTIPROCESSOR
+ pmap_tlb_shootnow(cpumask);
#endif
}
@@ -729,6 +859,7 @@ pmap_bootstrap(kva_start)
struct pmap *kpm;
vaddr_t kva;
pt_entry_t *pte;
+ int i;
/*
* set the page size (default value is 4K which is ok)
@@ -831,6 +962,26 @@ pmap_bootstrap(kva_start)
pte = PTE_BASE + i386_btop(virtual_avail);
+#ifdef MULTIPROCESSOR
+ /*
+ * Waste some VA space to avoid false sharing of cache lines
+ * for page table pages: Give each possible CPU a cache line
+ * of PTE's (8) to play with, though we only need 4. We could
+ * recycle some of this waste by putting the idle stacks here
+ * as well; we could waste less space if we knew the largest
+ * CPU ID beforehand.
+ */
+ csrcp = (caddr_t) virtual_avail; csrc_pte = pte;
+
+ cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1;
+
+ zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2;
+
+ ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3;
+
+ virtual_avail += PAGE_SIZE * I386_MAXPROCS * NPTECL;
+ pte += I386_MAXPROCS * NPTECL;
+#else
csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */
virtual_avail += PAGE_SIZE; pte++; /* advance */
@@ -842,10 +993,11 @@ pmap_bootstrap(kva_start)
ptpp = (caddr_t) virtual_avail; ptp_pte = pte;
virtual_avail += PAGE_SIZE; pte++;
+#endif
/* XXX: vmmap used by mem.c... should be uvm_map_reserve */
vmmap = (char *)virtual_avail; /* don't need pte */
- virtual_avail += PAGE_SIZE; pte++;
+ virtual_avail += PAGE_SIZE;
#ifdef __NetBSD
msgbuf_vaddr = virtual_avail; /* don't need pte */
@@ -883,12 +1035,11 @@ pmap_bootstrap(kva_start)
* init the static-global locks and global lists.
*/
+#if defined(MULTIPROCESSOR) && 0
spinlockinit(&pmap_main_lock, "pmaplk", 0);
+#endif
simple_lock_init(&pvalloc_lock);
simple_lock_init(&pmaps_lock);
- simple_lock_init(&pmap_copy_page_lock);
- simple_lock_init(&pmap_zero_page_lock);
- simple_lock_init(&pmap_tmpptp_lock);
LIST_INIT(&pmaps);
TAILQ_INIT(&pv_freepages);
TAILQ_INIT(&pv_unusedpgs);
@@ -900,6 +1051,17 @@ pmap_bootstrap(kva_start)
pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
&pool_allocator_nointr);
+ /*
+ * Initialize the TLB shootdown queues.
+ */
+
+ SIMPLE_LOCK_INIT(&pmap_tlb_shootdown_job_lock);
+
+ for (i = 0; i < I386_MAXPROCS; i++) {
+ TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
+ SIMPLE_LOCK_INIT(&pmap_tlb_shootdown_q[i].pq_slock);
+ }
+
#ifdef __NetBSD__
/*
* we must call uvm_page_physload() after we are done playing with
@@ -1009,6 +1171,15 @@ pmap_init()
pv_nfpvents = 0;
(void) pmap_add_pvpage(pv_initpage, FALSE);
+ pj_page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE);
+ if (pj_page == NULL)
+ panic("pmap_init: pj_page");
+
+ for (i = 0; i < PAGE_SIZE / sizeof *pj_page - 1; i++)
+ pj_page[i].pj_nextfree = &pj_page[i + 1];
+ pj_page[i].pj_nextfree = NULL;
+ pj_free = &pj_page[0];
+
/*
* done: pmap module is up (and ready for business)
*/
@@ -1043,7 +1214,7 @@ pmap_init()
* "try" is for optional functions like pmap_copy().
*/
-__inline static struct pv_entry *
+struct pv_entry *
pmap_alloc_pv(pmap, mode)
struct pmap *pmap;
int mode;
@@ -1100,7 +1271,7 @@ pmap_alloc_pv(pmap, mode)
* => we assume that the caller holds pvalloc_lock
*/
-static struct pv_entry *
+struct pv_entry *
pmap_alloc_pvpage(pmap, mode)
struct pmap *pmap;
int mode;
@@ -1233,12 +1404,15 @@ steal_one:
* => return true if we did it!
*/
-static boolean_t
+boolean_t
pmap_try_steal_pv(pvh, cpv, prevpv)
struct pv_head *pvh;
struct pv_entry *cpv, *prevpv;
{
- pt_entry_t *ptep; /* pointer to a PTE */
+ pt_entry_t *ptep, opte;
+#ifdef MULTIPROCESSOR
+ int32_t cpumask = 0;
+#endif
/*
* we never steal kernel mappings or mappings from pmaps we can't lock
@@ -1257,9 +1431,15 @@ pmap_try_steal_pv(pvh, cpv, prevpv)
if (*ptep & PG_W) {
ptep = NULL; /* wired page, avoid stealing this one */
} else {
- *ptep = 0; /* zap! */
+ opte = i386_atomic_testset_ul(ptep, 0); /* zap! */
+#ifdef MULTIPROCESSOR
+ pmap_tlb_shootdown(cpv->pv_pmap, cpv->pv_va, opte, &cpumask);
+ pmap_tlb_shootnow(cpumask);
+#else
+ /* Don't bother deferring in the single CPU case. */
if (pmap_is_curpmap(cpv->pv_pmap))
pmap_update_pg(cpv->pv_va);
+#endif
pmap_tmpunmap_pvepte(cpv);
}
if (ptep == NULL) {
@@ -1296,7 +1476,7 @@ pmap_try_steal_pv(pvh, cpv, prevpv)
* => if need_entry is true, we allocate and return one pv_entry
*/
-static struct pv_entry *
+struct pv_entry *
pmap_add_pvpage(pvp, need_entry)
struct pv_page *pvp;
boolean_t need_entry;
@@ -1329,7 +1509,7 @@ pmap_add_pvpage(pvp, need_entry)
* => we must be holding pvalloc_lock
*/
-__inline static void
+void
pmap_free_pv_doit(pv)
struct pv_entry *pv;
{
@@ -1364,7 +1544,7 @@ pmap_free_pv_doit(pv)
* => we gain the pvalloc_lock
*/
-__inline static void
+void
pmap_free_pv(pmap, pv)
struct pmap *pmap;
struct pv_entry *pv;
@@ -1389,7 +1569,7 @@ pmap_free_pv(pmap, pv)
* => we gain the pvalloc_lock
*/
-__inline static void
+void
pmap_free_pvs(pmap, pvs)
struct pmap *pmap;
struct pv_entry *pvs;
@@ -1426,7 +1606,7 @@ pmap_free_pvs(pmap, pvs)
* holding kmem_object's lock.
*/
-static void
+void
pmap_free_pvpage()
{
int s;
@@ -1488,7 +1668,7 @@ pmap_free_pvpage()
* => caller should adjust ptp's wire_count before calling
*/
-__inline static void
+void
pmap_enter_pv(pvh, pve, pmap, va, ptp)
struct pv_head *pvh;
struct pv_entry *pve; /* preallocated pve for us to use */
@@ -1515,7 +1695,7 @@ pmap_enter_pv(pvh, pve, pmap, va, ptp)
* => we return the removed pve
*/
-__inline static struct pv_entry *
+struct pv_entry *
pmap_remove_pv(pvh, pmap, va)
struct pv_head *pvh;
struct pmap *pmap;
@@ -1554,7 +1734,7 @@ pmap_remove_pv(pvh, pmap, va)
* from another pmap (e.g. during optional functions like pmap_copy)
*/
-__inline static struct vm_page *
+struct vm_page *
pmap_alloc_ptp(pmap, pde_index, just_try)
struct pmap *pmap;
int pde_index;
@@ -1596,7 +1776,7 @@ pmap_alloc_ptp(pmap, pde_index, just_try)
* enough free pages around for us to allocate off the free page list]
*/
-static struct vm_page *
+struct vm_page *
pmap_steal_ptp(obj, offset)
struct uvm_object *obj;
vaddr_t offset;
@@ -1607,6 +1787,7 @@ pmap_steal_ptp(obj, offset)
pt_entry_t *ptes;
int idx, lcv;
boolean_t caller_locked, we_locked;
+ int32_t cpumask = 0;
simple_lock(&pmaps_lock);
if (pmaps_hand == NULL)
@@ -1644,10 +1825,9 @@ pmap_steal_ptp(obj, offset)
(PG_V|PG_W))
break;
if (lcv == PTES_PER_PTP)
- pmap_remove_ptes(pmaps_hand, NULL, ptp,
- (vaddr_t)ptes,
- ptp_i2v(idx),
- ptp_i2v(idx+1));
+ pmap_remove_ptes(pmaps_hand, ptp,
+ (vaddr_t)ptes, ptp_i2v(idx),
+ ptp_i2v(idx+1), &cpumask);
pmap_tmpunmap_pa();
if (lcv != PTES_PER_PTP)
@@ -1660,14 +1840,17 @@ pmap_steal_ptp(obj, offset)
pmaps_hand->pm_pdir[idx] = 0; /* zap! */
pmaps_hand->pm_stats.resident_count--;
+#ifdef MULTIPROCESSOR
+ pmap_apte_flush(pmaps_hand);
+#else
if (pmap_is_curpmap(pmaps_hand))
- tlbflush();
+ pmap_apte_flush(pmaps_hand);
else if (pmap_valid_entry(*APDP_PDE) &&
- (*APDP_PDE & PG_FRAME) ==
- pmaps_hand->pm_pdirpa) {
+ (*APDP_PDE & PG_FRAME) ==
+ pmaps_hand->pm_pdirpa)
pmap_update_pg(((vaddr_t)APTE_BASE) +
ptp->offset);
- }
+#endif
/* put it in our pmap! */
uvm_pagerealloc(ptp, obj, offset);
@@ -1687,6 +1870,7 @@ pmap_steal_ptp(obj, offset)
} while (ptp == NULL && pmaps_hand != firstpmap);
simple_unlock(&pmaps_lock);
+ pmap_tlb_shootnow(cpumask);
return(ptp);
}
@@ -1697,7 +1881,7 @@ pmap_steal_ptp(obj, offset)
* => pmap should be locked
*/
-static struct vm_page *
+struct vm_page *
pmap_get_ptp(pmap, pde_index, just_try)
struct pmap *pmap;
int pde_index;
@@ -1873,7 +2057,10 @@ pmap_release(pmap)
uvm_pagefree(pg);
}
- /* XXX: need to flush it out of other processor's APTE space? */
+ /*
+ * MULTIPROCESSOR -- no need to flush out of other processors'
+ * APTE space because we do that in pmap_unmap_ptes().
+ */
uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG);
#ifdef USER_LDT
@@ -1993,22 +2180,31 @@ pmap_activate(p)
pcb->pcb_pmap = pmap;
pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
pcb->pcb_cr3 = pmap->pm_pdirpa;
- if (p == curproc)
+ if (p == curproc) {
lcr3(pcb->pcb_cr3);
- if (pcb == curpcb)
lldt(pcb->pcb_ldt_sel);
+
+ /*
+ * mark the pmap in use by this processor.
+ */
+ i386_atomic_setbits_l(&pmap->pm_cpus, (1U << cpu_number()));
+ }
}
/*
* pmap_deactivate: deactivate a process' pmap
- *
- * => XXX: what should this do, if anything?
*/
void
pmap_deactivate(p)
struct proc *p;
{
+ struct pmap *pmap = p->p_vmspace->vm_map.pmap;
+
+ /*
+ * mark the pmap no longer in use by this processor.
+ */
+ i386_atomic_clearbits_l(&pmap->pm_cpus, (1U << cpu_number()));
}
/*
@@ -2065,19 +2261,7 @@ void (*pagezero)(void *, size_t) = bzero;
void
pmap_zero_page(struct vm_page *pg)
{
- paddr_t pa = VM_PAGE_TO_PHYS(pg);
-
- simple_lock(&pmap_zero_page_lock);
-#ifdef DIAGNOSTIC
- if (*zero_pte)
- panic("pmap_zero_page: lock botch");
-#endif
-
- *zero_pte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */
- pagezero(zerop, PAGE_SIZE); /* zero */
- *zero_pte = 0; /* zap! */
- pmap_update_pg((vaddr_t)zerop); /* flush TLB */
- simple_unlock(&pmap_zero_page_lock);
+ pmap_zero_phys(VM_PAGE_TO_PHYS(pg));
}
/*
@@ -2087,17 +2271,21 @@ pmap_zero_page(struct vm_page *pg)
void
pmap_zero_phys(paddr_t pa)
{
- simple_lock(&pmap_zero_page_lock);
+#ifdef MULTIPROCESSOR
+ int id = cpu_number();
+#endif
+ pt_entry_t *zpte = PTESLEW(zero_pte, id);
+ caddr_t zerova = VASLEW(zerop, id);
+
#ifdef DIAGNOSTIC
- if (*zero_pte)
- panic("pmap_zero_page: lock botch");
+ if (*zpte)
+ panic("pmap_zero_phys: lock botch");
#endif
- *zero_pte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */
- pagezero(zerop, PAGE_SIZE); /* zero */
- *zero_pte = 0; /* zap! */
- pmap_update_pg((vaddr_t)zerop); /* flush TLB */
- simple_unlock(&pmap_zero_page_lock);
+ *zpte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */
+ pmap_update_pg((vaddr_t)zerova); /* flush TLB */
+ pagezero(zerova, PAGE_SIZE); /* zero */
+ *zpte = 0; /* zap! */
}
/*
@@ -2108,18 +2296,22 @@ boolean_t
pmap_zero_page_uncached(pa)
paddr_t pa;
{
- simple_lock(&pmap_zero_page_lock);
+#ifdef MULTIPROCESSOR
+ int id = cpu_number();
+#endif
+ pt_entry_t *zpte = PTESLEW(zero_pte, id);
+ caddr_t zerova = VASLEW(zerop, id);
+
#ifdef DIAGNOSTIC
- if (*zero_pte)
+ if (*zpte)
panic("pmap_zero_page_uncached: lock botch");
#endif
- *zero_pte = (pa & PG_FRAME) | PG_V | PG_RW | /* map in */
+ *zpte = (pa & PG_FRAME) | PG_V | PG_RW | /* map in */
((cpu_class != CPUCLASS_386) ? PG_N : 0);
- pagezero(zerop, PAGE_SIZE); /* zero */
- *zero_pte = 0; /* zap! */
- pmap_update_pg((vaddr_t)zerop); /* flush TLB */
- simple_unlock(&pmap_zero_page_lock);
+ pmap_update_pg((vaddr_t)zerova); /* flush TLB */
+ pagezero(zerova, PAGE_SIZE); /* zero */
+ *zpte = 0; /* zap! */
return (TRUE);
}
@@ -2133,19 +2325,28 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
{
paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg);
paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg);
+#ifdef MULTIPROCESSOR
+ int id = cpu_number();
+#endif
+ pt_entry_t *spte = PTESLEW(csrc_pte,id);
+ pt_entry_t *dpte = PTESLEW(cdst_pte,id);
+ caddr_t csrcva = VASLEW(csrcp, id);
+ caddr_t cdstva = VASLEW(cdstp, id);
- simple_lock(&pmap_copy_page_lock);
#ifdef DIAGNOSTIC
- if (*csrc_pte || *cdst_pte)
+ if (*spte || *dpte)
panic("pmap_copy_page: lock botch");
#endif
- *csrc_pte = (srcpa & PG_FRAME) | PG_V | PG_RW;
- *cdst_pte = (dstpa & PG_FRAME) | PG_V | PG_RW;
- bcopy(csrcp, cdstp, PAGE_SIZE);
- *csrc_pte = *cdst_pte = 0; /* zap! */
- pmap_update_2pg((vaddr_t)csrcp, (vaddr_t)cdstp);
- simple_unlock(&pmap_copy_page_lock);
+ *spte = (srcpa & PG_FRAME) | PG_V | PG_RW;
+ *dpte = (dstpa & PG_FRAME) | PG_V | PG_RW;
+ pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
+ bcopy(csrcva, cdstva, PAGE_SIZE);
+ *spte = *dpte = 0; /* zap! */
+ pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
+#ifdef MULTIPROCESSOR
+ /* Using per-cpu VA; no shootdown required here. */
+#endif
}
/*
@@ -2163,13 +2364,13 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
* => PTP should be null if pmap == pmap_kernel()
*/
-static void
-pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva)
+void
+pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp)
struct pmap *pmap;
- struct pmap_remove_record *pmap_rr;
struct vm_page *ptp;
vaddr_t ptpva;
vaddr_t startva, endva;
+ int32_t *cpumaskp;
{
struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */
struct pv_entry *pve;
@@ -2191,29 +2392,23 @@ pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva)
if (!pmap_valid_entry(*pte))
continue; /* VA not mapped */
- opte = *pte; /* save the old PTE */
- *pte = 0; /* zap! */
+ /* atomically save the old PTE and zap! it */
+ opte = i386_atomic_testset_ul(pte, 0);
+
if (opte & PG_W)
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
- if (pmap_rr) { /* worried about tlb flushing? */
- if (opte & PG_G) {
- /* PG_G requires this */
- pmap_update_pg(startva);
- } else {
- if (pmap_rr->prr_npages < PMAP_RR_MAX) {
- pmap_rr->prr_vas[pmap_rr->prr_npages++]
- = startva;
- } else {
- if (pmap_rr->prr_npages == PMAP_RR_MAX)
- /* signal an overflow */
- pmap_rr->prr_npages++;
- }
- }
- }
- if (ptp)
+ if (opte & PG_U)
+ pmap_tlb_shootdown(pmap, startva, opte, cpumaskp);
+
+ if (ptp) {
ptp->wire_count--; /* dropping a PTE */
+ /* Make sure that the PDE is flushed */
+ if ((ptp->wire_count <= 1) && !(opte & PG_U))
+ pmap_tlb_shootdown(pmap, startva, opte,
+ cpumaskp);
+ }
/*
* if we are not on a pv_head list we are done.
@@ -2266,12 +2461,13 @@ pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva)
* => returns true if we removed a mapping
*/
-static boolean_t
-pmap_remove_pte(pmap, ptp, pte, va)
+boolean_t
+pmap_remove_pte(pmap, ptp, pte, va, cpumaskp)
struct pmap *pmap;
struct vm_page *ptp;
pt_entry_t *pte;
vaddr_t va;
+ int32_t *cpumaskp;
{
pt_entry_t opte;
int bank, off;
@@ -2289,11 +2485,16 @@ pmap_remove_pte(pmap, ptp, pte, va)
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
- if (ptp)
+ if (opte & PG_U)
+ pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
+
+ if (ptp) {
ptp->wire_count--; /* dropping a PTE */
+ /* Make sure that the PDE is flushed */
+ if ((ptp->wire_count <= 1) && !(opte & PG_U))
+ pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
- if (pmap_is_curpmap(pmap))
- pmap_update_pg(va); /* flush TLB */
+ }
/*
* if we are not on a pv_head list we are done.
@@ -2338,18 +2539,18 @@ pmap_remove(pmap, sva, eva)
struct pmap *pmap;
vaddr_t sva, eva;
{
- pt_entry_t *ptes;
+ pt_entry_t *ptes, opte;
boolean_t result;
paddr_t ptppa;
vaddr_t blkendva;
struct vm_page *ptp;
- struct pmap_remove_record pmap_rr, *prr;
+ int32_t cpumask = 0;
/*
* we lock in the pmap => pv_head direction
*/
- PMAP_MAP_TO_HEAD_LOCK();
+ PMAP_MAP_TO_HEAD_LOCK();
ptes = pmap_map_ptes(pmap); /* locks pmap */
/*
@@ -2385,7 +2586,7 @@ pmap_remove(pmap, sva, eva)
/* do it! */
result = pmap_remove_pte(pmap, ptp,
- &ptes[i386_btop(sva)], sva);
+ &ptes[i386_btop(sva)], sva, &cpumask);
/*
* if mapping removed and the PTP is no longer
@@ -2393,15 +2594,30 @@ pmap_remove(pmap, sva, eva)
*/
if (result && ptp && ptp->wire_count <= 1) {
- pmap->pm_pdir[pdei(sva)] = 0; /* zap! */
-#if defined(I386_CPU)
- /* already dumped whole TLB on i386 */
- if (cpu_class != CPUCLASS_386)
+ /* zap! */
+ opte = i386_atomic_testset_ul(
+ &pmap->pm_pdir[pdei(sva)], 0);
+#ifdef MULTIPROCESSOR
+ /*
+ * XXXthorpej Redundant shootdown can happen
+ * here if we're using APTE space.
+ */
+#endif
+ pmap_tlb_shootdown(curpcb->pcb_pmap,
+ ((vaddr_t)ptes) + ptp->offset, opte,
+ &cpumask);
+#ifdef MULTIPROCESSOR
+ /*
+ * Always shoot down the pmap's self-mapping
+ * of the PTP.
+ * XXXthorpej Redundant shootdown can happen
+ * here if pmap == curpcb->pcb_pmap (not APTE
+ * space).
+ */
+ pmap_tlb_shootdown(pmap,
+ ((vaddr_t)PTE_BASE) + ptp->offset, opte,
+ &cpumask);
#endif
- {
- pmap_update_pg(((vaddr_t) ptes) +
- ptp->offset);
- }
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp)
pmap->pm_ptphint =
@@ -2410,26 +2626,12 @@ pmap_remove(pmap, sva, eva)
uvm_pagefree(ptp);
}
}
-
+ pmap_tlb_shootnow(cpumask);
pmap_unmap_ptes(pmap); /* unlock pmap */
PMAP_MAP_TO_HEAD_UNLOCK();
return;
}
- /*
- * removing a range of pages: we unmap in PTP sized blocks (4MB)
- *
- * if we are the currently loaded pmap, we use prr to keep track
- * of the VAs we unload so that we can flush them out of the tlb.
- */
-
- if (pmap_is_curpmap(pmap)) {
- prr = &pmap_rr;
- prr->prr_npages = 0;
- } else {
- prr = NULL;
- }
-
for (/* null */ ; sva < eva ; sva = blkendva) {
/* determine range of block */
@@ -2479,17 +2681,31 @@ pmap_remove(pmap, sva, eva)
#endif
}
}
- pmap_remove_ptes(pmap, prr, ptp,
- (vaddr_t)&ptes[i386_btop(sva)], sva, blkendva);
+ pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[i386_btop(sva)],
+ sva, blkendva, &cpumask);
/* if PTP is no longer being used, free it! */
if (ptp && ptp->wire_count <= 1) {
- pmap->pm_pdir[pdei(sva)] = 0; /* zap! */
- pmap_update_pg( ((vaddr_t) ptes) + ptp->offset);
-#if defined(I386_CPU)
- /* cancel possible pending pmap update on i386 */
- if (cpu_class == CPUCLASS_386 && prr)
- prr->prr_npages = 0;
+ /* zap! */
+ opte = i386_atomic_testset_ul(
+ &pmap->pm_pdir[pdei(sva)], 0);
+#if defined(MULTIPROCESSOR)
+ /*
+ * XXXthorpej Redundant shootdown can happen here
+ * if we're using APTE space.
+ */
+#endif
+ pmap_tlb_shootdown(curpcb->pcb_pmap,
+ ((vaddr_t)ptes) + ptp->offset, opte, &cpumask);
+#if defined(MULTIPROCESSOR)
+ /*
+ * Always shoot down the pmap's self-mapping
+ * of the PTP.
+ * XXXthorpej Redundant shootdown can happen here
+ * if pmap == curpcb->pcb_pmap (not APTE space).
+ */
+ pmap_tlb_shootdown(pmap,
+ ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask);
#endif
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp) /* update hint? */
@@ -2500,27 +2716,7 @@ pmap_remove(pmap, sva, eva)
}
}
- /*
- * if we kept a removal record and removed some pages update the TLB
- */
-
- if (prr && prr->prr_npages) {
-#if defined(I386_CPU)
- if (cpu_class == CPUCLASS_386) {
- tlbflush();
- } else
-#endif
- { /* not I386 */
- if (prr->prr_npages > PMAP_RR_MAX) {
- tlbflush();
- } else {
- while (prr->prr_npages) {
- pmap_update_pg(
- prr->prr_vas[--prr->prr_npages]);
- }
- }
- } /* not I386 */
- }
+ pmap_tlb_shootnow(cpumask);
pmap_unmap_ptes(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
}
@@ -2540,9 +2736,7 @@ pmap_page_remove(pg)
struct pv_head *pvh;
struct pv_entry *pve;
pt_entry_t *ptes, opte;
-#if defined(I386_CPU)
- boolean_t needs_update = FALSE;
-#endif
+ int32_t cpumask = 0;
/* XXX: vm_page should either contain pv_head or have a pointer to it */
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
@@ -2590,14 +2784,10 @@ pmap_page_remove(pg)
pve->pv_pmap->pm_stats.wired_count--;
pve->pv_pmap->pm_stats.resident_count--;
- if (pmap_is_curpmap(pve->pv_pmap)) {
-#if defined(I386_CPU)
- if (cpu_class == CPUCLASS_386)
- needs_update = TRUE;
- else
-#endif
- pmap_update_pg(pve->pv_va);
- }
+ /* Shootdown only if referenced */
+ if (opte & PG_U)
+ pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte,
+ &cpumask);
/* sync R/M bits */
vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
@@ -2606,12 +2796,29 @@ pmap_page_remove(pg)
if (pve->pv_ptp) {
pve->pv_ptp->wire_count--;
if (pve->pv_ptp->wire_count <= 1) {
+ /*
+ * Do we have to shootdown the page just to
+ * get the pte out of the TLB ?
+ */
+ if(!(opte & PG_U))
+ pmap_tlb_shootdown(pve->pv_pmap,
+ pve->pv_va, opte, &cpumask);
+
/* zap! */
- pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] = 0;
- pmap_update_pg(((vaddr_t)ptes) +
- pve->pv_ptp->offset);
-#if defined(I386_CPU)
- needs_update = FALSE;
+ opte = i386_atomic_testset_ul(
+ &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)],
+ 0);
+ pmap_tlb_shootdown(curpcb->pcb_pmap,
+ ((vaddr_t)ptes) + pve->pv_ptp->offset,
+ opte, &cpumask);
+#if defined(MULTIPROCESSOR)
+ /*
+ * Always shoot down the other pmap's
+ * self-mapping of the PTP.
+ */
+ pmap_tlb_shootdown(pve->pv_pmap,
+ ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset,
+ opte, &cpumask);
#endif
pve->pv_pmap->pm_stats.resident_count--;
/* update hint? */
@@ -2628,10 +2835,7 @@ pmap_page_remove(pg)
pvh->pvh_list = NULL;
simple_unlock(&pvh->pvh_lock);
PMAP_HEAD_TO_MAP_UNLOCK();
-#if defined(I386_CPU)
- if (needs_update)
- tlbflush();
-#endif
+ pmap_tlb_shootnow(cpumask);
}
/*
@@ -2719,11 +2923,9 @@ pmap_change_attrs(pg, setbits, clearbits)
int bank, off;
struct pv_head *pvh;
struct pv_entry *pve;
- pt_entry_t *ptes, npte;
+ pt_entry_t *ptes, npte, opte;
char *myattrs;
-#if defined(I386_CPU)
- boolean_t needs_update = FALSE;
-#endif
+ int32_t cpumask = 0;
/* XXX: vm_page should either contain pv_head or have a pointer to it */
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
@@ -2753,27 +2955,18 @@ pmap_change_attrs(pg, setbits, clearbits)
result |= (npte & clearbits);
npte = (npte | setbits) & ~clearbits;
if (ptes[i386_btop(pve->pv_va)] != npte) {
- ptes[i386_btop(pve->pv_va)] = npte; /* zap! */
-
- if (pmap_is_curpmap(pve->pv_pmap)) {
-#if defined(I386_CPU)
- if (cpu_class == CPUCLASS_386)
- needs_update = TRUE;
- else
-#endif
- pmap_update_pg(pve->pv_va);
- }
+ opte = i386_atomic_testset_ul(
+ &ptes[i386_btop(pve->pv_va)], npte);
+ pmap_tlb_shootdown(pve->pv_pmap,
+ i386_btop(pve->pv_va), opte, &cpumask);
}
pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
}
simple_unlock(&pvh->pvh_lock);
PMAP_HEAD_TO_MAP_UNLOCK();
+ pmap_tlb_shootnow(cpumask);
-#if defined(I386_CPU)
- if (needs_update)
- tlbflush();
-#endif
return(result != 0);
}
@@ -2809,20 +3002,12 @@ pmap_write_protect(pmap, sva, eva, prot)
vm_prot_t prot;
{
pt_entry_t *ptes, *spte, *epte, npte;
- struct pmap_remove_record pmap_rr, *prr;
- vaddr_t blockend, va;
+ vaddr_t blockend;
u_int32_t md_prot;
+ int32_t cpumask = 0;
ptes = pmap_map_ptes(pmap); /* locks pmap */
- /* need to worry about TLB? [TLB stores protection bits] */
- if (pmap_is_curpmap(pmap)) {
- prr = &pmap_rr;
- prr->prr_npages = 0;
- } else {
- prr = NULL;
- }
-
/* should be ok, but just in case ... */
sva &= PG_FRAME;
eva &= PG_FRAME;
@@ -2869,53 +3054,13 @@ pmap_write_protect(pmap, sva, eva, prot)
if (npte != *spte) {
pmap_exec_account(pmap, sva, *spte, npte);
-
- *spte = npte; /* zap! */
-
- if (prr) { /* worried about tlb flushing? */
- va = i386_ptob(spte - ptes);
- if (npte & PG_G) {
- /* PG_G requires this */
- pmap_update_pg(va);
- } else {
- if (prr->prr_npages <
- PMAP_RR_MAX) {
- prr->prr_vas[
- prr->prr_npages++] =
- va;
- } else {
- if (prr->prr_npages ==
- PMAP_RR_MAX)
- /* signal an overflow */
- prr->prr_npages++;
- }
- }
- } /* if (prr) */
- } /* npte != *spte */
- } /* for loop */
- }
-
- /*
- * if we kept a removal record and removed some pages update the TLB
- */
-
- if (prr && prr->prr_npages) {
-#if defined(I386_CPU)
- if (cpu_class == CPUCLASS_386) {
- tlbflush();
- } else
-#endif
- { /* not I386 */
- if (prr->prr_npages > PMAP_RR_MAX) {
- tlbflush();
- } else {
- while (prr->prr_npages) {
- pmap_update_pg(prr->prr_vas[
- --prr->prr_npages]);
- }
+ i386_atomic_testset_ul(spte, npte); /* zap! */
+ pmap_tlb_shootdown(pmap, sva, *spte, &cpumask);
}
- } /* not I386 */
+ }
}
+
+ pmap_tlb_shootnow(cpumask);
pmap_unmap_ptes(pmap); /* unlocks pmap */
}
@@ -3179,8 +3324,18 @@ enter_now:
ptes[i386_btop(va)] = npte; /* zap! */
- if ((opte & ~(PG_M|PG_U)) != npte && pmap_is_curpmap(pmap))
- pmap_update_pg(va);
+ if ((opte & ~(PG_M|PG_U)) != npte) {
+#ifdef MULTIPROCESSOR
+ int32_t cpumask = 0;
+
+ pmap_tlb_shootdown(pmap, va, opte, &cpumask);
+ pmap_tlb_shootnow(cpumask);
+#else
+ /* Don't bother deferring in the single CPU case. */
+ if (pmap_is_curpmap(pmap))
+ pmap_update_pg(va);
+#endif
+ }
error = 0;
@@ -3330,3 +3485,295 @@ pmap_dump(pmap, sva, eva)
PMAP_MAP_TO_HEAD_UNLOCK();
}
#endif
+
+
+/******************** TLB shootdown code ********************/
+
+void
+pmap_tlb_shootnow(int32_t cpumask)
+{
+#ifdef MULTIPROCESSOR
+ struct cpu_info *ci, *self;
+ CPU_INFO_ITERATOR cii;
+ int s;
+#ifdef DIAGNOSTIC
+ int count = 0;
+#endif
+#endif
+
+ if (cpumask == 0)
+ return;
+
+#ifdef MULTIPROCESSOR
+ self = curcpu();
+ s = splipi();
+ self->ci_tlb_ipi_mask = cpumask;
+#endif
+
+ pmap_do_tlb_shootdown(0); /* do *our* work. */
+
+#ifdef MULTIPROCESSOR
+ splx(s);
+
+ /*
+ * Send the TLB IPI to other CPUs pending shootdowns.
+ */
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ if (ci == self)
+ continue;
+ if (cpumask & (1U << ci->ci_cpuid))
+ if (i386_send_ipi(ci, I386_IPI_TLB) != 0)
+ i386_atomic_clearbits_l(&self->ci_tlb_ipi_mask,
+ (1U << ci->ci_cpuid));
+ }
+
+ while (self->ci_tlb_ipi_mask != 0)
+#ifdef DIAGNOSTIC
+ if (count++ > 100000000)
+ panic("TLB IPI rendezvous failed (mask %x)",
+ self->ci_tlb_ipi_mask);
+#else
+ /* XXX insert pause instruction */
+ ;
+#endif
+#endif
+}
+
+/*
+ * pmap_tlb_shootdown:
+ *
+ * Cause the TLB entry for pmap/va to be shot down.
+ */
+void
+pmap_tlb_shootdown(pmap, va, pte, cpumaskp)
+ pmap_t pmap;
+ vaddr_t va;
+ pt_entry_t pte;
+ int32_t *cpumaskp;
+{
+ struct cpu_info *ci, *self;
+ struct pmap_tlb_shootdown_q *pq;
+ struct pmap_tlb_shootdown_job *pj;
+ CPU_INFO_ITERATOR cii;
+ int s;
+
+ if (pmap_initialized == FALSE) {
+ pmap_update_pg(va);
+ return;
+ }
+
+ self = curcpu();
+
+ s = splipi();
+#if 0
+ printf("dshootdown %lx\n", va);
+#endif
+
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ /* Note: we queue shootdown events for ourselves here! */
+ if (pmap_is_active(pmap, ci->ci_cpuid) == 0)
+ continue;
+ if (ci != self && !(ci->ci_flags & CPUF_RUNNING))
+ continue;
+ pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
+ SIMPLE_LOCK(&pq->pq_slock);
+
+ /*
+ * If there's a global flush already queued, or a
+ * non-global flush, and this pte doesn't have the G
+ * bit set, don't bother.
+ */
+ if (pq->pq_flushg > 0 ||
+ (pq->pq_flushu > 0 && (pte & pmap_pg_g) == 0)) {
+ SIMPLE_UNLOCK(&pq->pq_slock);
+ continue;
+ }
+
+#ifdef I386_CPU
+ /*
+ * i386 CPUs can't invalidate a single VA, only
+ * flush the entire TLB, so don't bother allocating
+ * jobs for them -- just queue a `flushu'.
+ *
+ * XXX note that this can be executed for non-i386
+ * when called early (before identifycpu() has set
+ * cpu_class)
+ */
+ if (cpu_class == CPUCLASS_386) {
+ pq->pq_flushu++;
+ *cpumaskp |= 1U << ci->ci_cpuid;
+ SIMPLE_UNLOCK(&pq->pq_slock);
+ continue;
+ }
+#endif
+
+ pj = pmap_tlb_shootdown_job_get(pq);
+ pq->pq_pte |= pte;
+ if (pj == NULL) {
+ /*
+ * Couldn't allocate a job entry.
+ * Kill it now for this cpu, unless the failure
+ * was due to too many pending flushes; otherwise,
+ * tell other cpus to kill everything..
+ */
+ if (ci == self && pq->pq_count < PMAP_TLB_MAXJOBS) {
+ pmap_update_pg(va);
+ SIMPLE_UNLOCK(&pq->pq_slock);
+ continue;
+ } else {
+ if (pq->pq_pte & pmap_pg_g)
+ pq->pq_flushg++;
+ else
+ pq->pq_flushu++;
+ /*
+ * Since we've nailed the whole thing,
+ * drain the job entries pending for that
+ * processor.
+ */
+ pmap_tlb_shootdown_q_drain(pq);
+ *cpumaskp |= 1U << ci->ci_cpuid;
+ }
+ } else {
+ pj->pj_pmap = pmap;
+ pj->pj_va = va;
+ pj->pj_pte = pte;
+ TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
+ *cpumaskp |= 1U << ci->ci_cpuid;
+ }
+ SIMPLE_UNLOCK(&pq->pq_slock);
+ }
+ splx(s);
+}
+
+/*
+ * pmap_do_tlb_shootdown:
+ *
+ * Process pending TLB shootdown operations for this processor.
+ */
+void
+pmap_do_tlb_shootdown(struct cpu_info *self)
+{
+ u_long cpu_id = cpu_number();
+ struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
+ struct pmap_tlb_shootdown_job *pj;
+ int s;
+#ifdef MULTIPROCESSOR
+ struct cpu_info *ci;
+ CPU_INFO_ITERATOR cii;
+#endif
+
+ s = splipi();
+
+ SIMPLE_LOCK(&pq->pq_slock);
+
+ if (pq->pq_flushg) {
+ tlbflushg();
+ pq->pq_flushg = 0;
+ pq->pq_flushu = 0;
+ pmap_tlb_shootdown_q_drain(pq);
+ } else {
+ /*
+ * TLB flushes for PTEs with PG_G set may be in the queue
+ * after a flushu, they need to be dealt with.
+ */
+ if (pq->pq_flushu) {
+ tlbflush();
+ }
+ while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
+ TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
+
+ if ((!pq->pq_flushu && pmap_is_curpmap(pj->pj_pmap)) ||
+ (pj->pj_pte & pmap_pg_g))
+ pmap_update_pg(pj->pj_va);
+
+ pmap_tlb_shootdown_job_put(pq, pj);
+ }
+
+ pq->pq_flushu = pq->pq_pte = 0;
+ }
+
+#ifdef MULTIPROCESSOR
+ for (CPU_INFO_FOREACH(cii, ci))
+ i386_atomic_clearbits_l(&ci->ci_tlb_ipi_mask,
+ (1U << cpu_id));
+#endif
+ SIMPLE_UNLOCK(&pq->pq_slock);
+
+ splx(s);
+}
+
+/*
+ * pmap_tlb_shootdown_q_drain:
+ *
+ * Drain a processor's TLB shootdown queue. We do not perform
+ * the shootdown operations. This is merely a convenience
+ * function.
+ *
+ * Note: We expect the queue to be locked.
+ */
+void
+pmap_tlb_shootdown_q_drain(pq)
+ struct pmap_tlb_shootdown_q *pq;
+{
+ struct pmap_tlb_shootdown_job *pj;
+
+ while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
+ TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
+ pmap_tlb_shootdown_job_put(pq, pj);
+ }
+ pq->pq_pte = 0;
+}
+
+/*
+ * pmap_tlb_shootdown_job_get:
+ *
+ * Get a TLB shootdown job queue entry. This places a limit on
+ * the number of outstanding jobs a processor may have.
+ *
+ * Note: We expect the queue to be locked.
+ */
+struct pmap_tlb_shootdown_job *
+pmap_tlb_shootdown_job_get(pq)
+ struct pmap_tlb_shootdown_q *pq;
+{
+ struct pmap_tlb_shootdown_job *pj;
+
+ if (pq->pq_count >= PMAP_TLB_MAXJOBS)
+ return (NULL);
+
+ SIMPLE_LOCK(&pmap_tlb_shootdown_job_lock);
+ if (pj_free == NULL) {
+ SIMPLE_UNLOCK(&pmap_tlb_shootdown_job_lock);
+ return NULL;
+ }
+ pj = pj_free;
+ pj_free = pj_free->pj_nextfree;
+ SIMPLE_UNLOCK(&pmap_tlb_shootdown_job_lock);
+
+ pq->pq_count++;
+ return (pj);
+}
+
+/*
+ * pmap_tlb_shootdown_job_put:
+ *
+ * Put a TLB shootdown job queue entry onto the free list.
+ *
+ * Note: We expect the queue to be locked.
+ */
+void
+pmap_tlb_shootdown_job_put(pq, pj)
+ struct pmap_tlb_shootdown_q *pq;
+ struct pmap_tlb_shootdown_job *pj;
+{
+#ifdef DIAGNOSTIC
+ if (pq->pq_count == 0)
+ panic("pmap_tlb_shootdown_job_put: queue length inconsistency");
+#endif
+ SIMPLE_LOCK(&pmap_tlb_shootdown_job_lock);
+ pj->pj_nextfree = pj_free;
+ pj_free = pj;
+ SIMPLE_UNLOCK(&pmap_tlb_shootdown_job_lock);
+
+ pq->pq_count--;
+}
diff --git a/sys/arch/i386/i386/process_machdep.c b/sys/arch/i386/i386/process_machdep.c
index 1667b8e223d..fbf8c250220 100644
--- a/sys/arch/i386/i386/process_machdep.c
+++ b/sys/arch/i386/i386/process_machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: process_machdep.c,v 1.15 2004/02/05 01:06:33 deraadt Exp $ */
+/* $OpenBSD: process_machdep.c,v 1.16 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: process_machdep.c,v 1.22 1996/05/03 19:42:25 christos Exp $ */
/*
@@ -211,8 +211,7 @@ process_read_fpregs(p, regs)
union savefpu *frame = process_fpframe(p);
#if NNPX > 0
- if (npxproc == p)
- npxsave();
+ npxsave_proc(p, 1);
#endif
if (i386_use_fxsave) {
@@ -249,21 +248,22 @@ process_write_regs(p, regs)
} else
#endif
{
+#if 0
extern int gdt_size;
- extern union descriptor *dynamic_gdt;
#define verr_ldt(slot) (slot < pcb->pcb_ldt_len && \
(pcb->pcb_ldt[slot].sd.sd_type & SDT_MEMRO) != 0 && \
pcb->pcb_ldt[slot].sd.sd_dpl == SEL_UPL && \
pcb->pcb_ldt[slot].sd.sd_p == 1)
#define verr_gdt(slot) (slot < gdt_size && \
- (dynamic_gdt[slot].sd.sd_type & SDT_MEMRO) != 0 && \
- dynamic_gdt[slot].sd.sd_dpl == SEL_UPL && \
- dynamic_gdt[slot].sd.sd_p == 1)
+ (gdt[slot].sd.sd_type & SDT_MEMRO) != 0 && \
+ gdt[slot].sd.sd_dpl == SEL_UPL && \
+ gdt[slot].sd.sd_p == 1)
#define verr(sel) (ISLDT(sel) ? verr_ldt(IDXSEL(sel)) : \
verr_gdt(IDXSEL(sel)))
#define valid_sel(sel) (ISPL(sel) == SEL_UPL && verr(sel))
#define null_sel(sel) (!ISLDT(sel) && IDXSEL(sel) == 0)
+#endif
/*
* Check for security violations.
@@ -272,11 +272,14 @@ process_write_regs(p, regs)
!USERMODE(regs->r_cs, regs->r_eflags))
return (EINVAL);
+ /* XXX Is this safe to remove. */
+#if 0
if ((regs->r_gs != pcb->pcb_gs && \
!valid_sel(regs->r_gs) && !null_sel(regs->r_gs)) ||
(regs->r_fs != pcb->pcb_fs && \
!valid_sel(regs->r_fs) && !null_sel(regs->r_fs)))
return (EINVAL);
+#endif
pcb->pcb_gs = regs->r_gs & 0xffff;
pcb->pcb_fs = regs->r_fs & 0xffff;
@@ -308,8 +311,7 @@ process_write_fpregs(p, regs)
if (p->p_md.md_flags & MDP_USEDFPU) {
#if NNPX > 0
- if (npxproc == p)
- npxdrop();
+ npxsave_proc(p, 0);
#endif
} else
p->p_md.md_flags |= MDP_USEDFPU;
diff --git a/sys/arch/i386/i386/trap.c b/sys/arch/i386/i386/trap.c
index 5c25f2ba8da..223c6bf3544 100644
--- a/sys/arch/i386/i386/trap.c
+++ b/sys/arch/i386/i386/trap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: trap.c,v 1.62 2004/04/15 00:22:42 tedu Exp $ */
+/* $OpenBSD: trap.c,v 1.63 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $ */
/*-
@@ -132,7 +132,7 @@ userret(p, pc, oticks)
addupc_task(p, pc, (int)(p->p_sticks - oticks) * psratio);
}
- curpriority = p->p_priority;
+ p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority;
}
char *trap_type[] = {
@@ -186,6 +186,7 @@ trap(frame)
vm_prot_t vftype, ftype;
union sigval sv;
caddr_t onfault;
+ uint32_t cr2;
uvmexp.traps++;
@@ -200,7 +201,7 @@ trap(frame)
if (trapdebug) {
printf("trap %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs,
- frame.tf_eflags, rcr2(), cpl);
+ frame.tf_eflags, rcr2(), lapic_tpr);
printf("curproc %p\n", curproc);
}
#endif
@@ -261,7 +262,7 @@ trap(frame)
printf("unknown trap %d", frame.tf_trapno);
printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");
printf("trap type %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
- type, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags, rcr2(), cpl);
+ type, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags, rcr2(), lapic_tpr);
panic("trap type %d, code=%x, pc=%x",
type, frame.tf_err, frame.tf_eip);
@@ -335,35 +336,47 @@ trap(frame)
case T_TSSFLT|T_USER:
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGBUS, vftype, BUS_OBJERR, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
case T_SEGNPFLT|T_USER:
case T_STKFLT|T_USER:
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
case T_ALIGNFLT|T_USER:
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGBUS, vftype, BUS_ADRALN, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
case T_PRIVINFLT|T_USER: /* privileged instruction fault */
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGILL, type &~ T_USER, ILL_PRVOPC, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
case T_FPOPFLT|T_USER: /* coprocessor operand fault */
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGILL, type &~ T_USER, ILL_COPROC, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
case T_ASTFLT|T_USER: /* Allow process switch */
uvmexp.softs++;
if (p->p_flag & P_OWEUPC) {
p->p_flag &= ~P_OWEUPC;
+ KERNEL_PROC_LOCK(p);
ADDUPROF(p);
+ KERNEL_PROC_UNLOCK(p);
}
goto out;
@@ -376,55 +389,84 @@ trap(frame)
return;
}
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, rv, type &~ T_USER, FPE_FLTINV, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
#else
printf("pid %d killed due to lack of floating point\n",
p->p_pid);
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGKILL, type &~ T_USER, FPE_FLTINV, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
#endif
}
case T_BOUND|T_USER:
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
case T_OFLOW|T_USER:
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
case T_DIVIDE|T_USER:
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTDIV, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
case T_ARITHTRAP|T_USER:
sv.sival_int = frame.tf_eip;
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGFPE, frame.tf_err, FPE_INTOVF, sv);
+ KERNEL_PROC_UNLOCK(p);
goto out;
case T_PAGEFLT: /* allow page faults in kernel mode */
if (p == 0 || p->p_addr == 0)
goto we_re_toast;
+#ifdef LOCKDEBUG
+ /* If we page-fault while in scheduler, we're doomed. */
+#ifdef notyet
+ if (simple_lock_held(&sched_lock))
+#else
+ if (__mp_lock_held(&sched_lock))
+#endif
+ goto we_re_toast;
+#endif
+
pcb = &p->p_addr->u_pcb;
#if 0
/* XXX - check only applies to 386's and 486's with WP off */
if (frame.tf_err & PGEX_P)
goto we_re_toast;
#endif
- /* FALLTHROUGH */
+ cr2 = rcr2();
+ KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
+ goto faultcommon;
+
case T_PAGEFLT|T_USER: { /* page fault */
vaddr_t va, fa;
- struct vmspace *vm = p->p_vmspace;
+ struct vmspace *vm;
struct vm_map *map;
int rv;
unsigned nss;
+ cr2 = rcr2();
+ KERNEL_PROC_LOCK(p);
+ faultcommon:
+ vm = p->p_vmspace;
if (vm == NULL)
goto we_re_toast;
- fa = (vaddr_t)rcr2();
+ fa = (vaddr_t)cr2;
va = trunc_page(fa);
/*
* It is only a kernel address space fault iff:
@@ -472,20 +514,26 @@ trap(frame)
if (rv == 0) {
if (nss > vm->vm_ssize)
vm->vm_ssize = nss;
- if (type == T_PAGEFLT)
+ if (type == T_PAGEFLT) {
+ KERNEL_UNLOCK();
return;
+ }
+ KERNEL_PROC_UNLOCK(p);
goto out;
}
if (type == T_PAGEFLT) {
- if (pcb->pcb_onfault != 0)
+ if (pcb->pcb_onfault != 0) {
+ KERNEL_UNLOCK();
goto copyfault;
+ }
printf("uvm_fault(%p, 0x%lx, 0, %d) -> %x\n",
map, va, ftype, rv);
goto we_re_toast;
}
sv.sival_int = fa;
trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
+ KERNEL_PROC_UNLOCK(p);
break;
}
@@ -500,14 +548,18 @@ trap(frame)
case T_BPTFLT|T_USER: /* bpt instruction fault */
sv.sival_int = rcr2();
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_BRKPT, sv);
+ KERNEL_PROC_UNLOCK(p);
break;
case T_TRCTRAP|T_USER: /* trace trap */
#if defined(GPL_MATH_EMULATE)
trace:
#endif
sv.sival_int = rcr2();
+ KERNEL_PROC_LOCK(p);
trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_TRACE, sv);
+ KERNEL_PROC_UNLOCK(p);
break;
#if NISA > 0
@@ -591,7 +643,7 @@ syscall(frame)
register_t code, args[8], rval[2];
u_quad_t sticks;
#ifdef DIAGNOSTIC
- int ocpl = cpl;
+ int ocpl = lapic_tpr;
#endif
uvmexp.syscalls++;
@@ -716,12 +768,14 @@ syscall(frame)
goto bad;
rval[0] = 0;
rval[1] = frame.tf_edx;
+ KERNEL_PROC_LOCK(p);
#if NSYSTRACE > 0
if (ISSET(p->p_flag, P_SYSTRACE))
orig_error = error = systrace_redirect(code, p, args, rval);
else
#endif
orig_error = error = (*callp->sy_call)(p, args, rval);
+ KERNEL_PROC_UNLOCK(p);
switch (error) {
case 0:
/*
@@ -758,15 +812,18 @@ syscall(frame)
#endif
userret(p, frame.tf_eip, sticks);
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ KERNEL_PROC_LOCK(p);
ktrsysret(p, code, orig_error, rval[0]);
+ KERNEL_PROC_UNLOCK(p);
+ }
#endif
#ifdef DIAGNOSTIC
- if (cpl != ocpl) {
+ if (lapic_tpr != ocpl) {
printf("WARNING: SPL (0x%x) NOT LOWERED ON "
"syscall(0x%x, 0x%x, 0x%x, 0x%x...) EXIT, PID %d\n",
- cpl, code, args[0], args[1], args[2], p->p_pid);
- cpl = ocpl;
+ lapic_tpr, code, args[0], args[1], args[2], p->p_pid);
+ lapic_tpr = ocpl;
}
#endif
}
@@ -781,9 +838,15 @@ child_return(arg)
tf->tf_eax = 0;
tf->tf_eflags &= ~PSL_C;
+#ifdef notyet
+ KERNEL_PROC_UNLOCK(p);
+#endif
+
userret(p, tf->tf_eip, 0);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET))
+ KERNEL_PROC_LOCK(p);
ktrsysret(p, SYS_fork, 0, 0);
+ KERNEL_PROC_UNLOCK(p);
#endif
}
diff --git a/sys/arch/i386/isa/vector.s b/sys/arch/i386/i386/vector.s
index 93a7bab51da..b7ddd3f0021 100644
--- a/sys/arch/i386/isa/vector.s
+++ b/sys/arch/i386/i386/vector.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: vector.s,v 1.16 2003/04/17 03:42:14 drahn Exp $ */
+/* $OpenBSD: vector.s,v 1.2 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: vector.s,v 1.32 1996/01/07 21:29:47 mycroft Exp $ */
/*
@@ -30,85 +30,12 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <i386/isa/icu.h>
+#include <machine/i8259.h>
#include <dev/isa/isareg.h>
-#define ICU_HARDWARE_MASK
-
#define MY_COUNT _C_LABEL(uvmexp)
/*
- * These macros are fairly self explanatory. If ICU_SPECIAL_MASK_MODE is
- * defined, we try to take advantage of the ICU's `special mask mode' by only
- * EOIing the interrupts on return. This avoids the requirement of masking and
- * unmasking. We can't do this without special mask mode, because the ICU
- * would also hold interrupts that it thinks are of lower priority.
- *
- * Many machines do not support special mask mode, so by default we don't try
- * to use it.
- */
-
-#define IRQ_BIT(irq_num) (1 << ((irq_num) % 8))
-#define IRQ_BYTE(irq_num) ((irq_num) / 8)
-
-#ifdef ICU_SPECIAL_MASK_MODE
-
-#define ACK1(irq_num)
-#define ACK2(irq_num) \
- movb $(0x60|IRQ_SLAVE),%al /* specific EOI for IRQ2 */ ;\
- outb %al,$IO_ICU1
-#define MASK(irq_num, icu)
-#define UNMASK(irq_num, icu) \
- movb $(0x60|(irq_num%8)),%al /* specific EOI */ ;\
- outb %al,$icu
-
-#else /* ICU_SPECIAL_MASK_MODE */
-
-#ifndef AUTO_EOI_1
-#define ACK1(irq_num) \
- movb $(0x60|(irq_num%8)),%al /* specific EOI */ ;\
- outb %al,$IO_ICU1
-#else
-#define ACK1(irq_num)
-#endif
-
-#ifndef AUTO_EOI_2
-#define ACK2(irq_num) \
- movb $(0x60|(irq_num%8)),%al /* specific EOI */ ;\
- outb %al,$IO_ICU2 /* do the second ICU first */ ;\
- movb $(0x60|IRQ_SLAVE),%al /* specific EOI for IRQ2 */ ;\
- outb %al,$IO_ICU1
-#else
-#define ACK2(irq_num)
-#endif
-
-#ifdef ICU_HARDWARE_MASK
-
-#define MASK(irq_num, icu) \
- movb _C_LABEL(imen) + IRQ_BYTE(irq_num),%al ;\
- orb $IRQ_BIT(irq_num),%al ;\
- movb %al,_C_LABEL(imen) + IRQ_BYTE(irq_num) ;\
- FASTER_NOP ;\
- outb %al,$(icu+1)
-#define UNMASK(irq_num, icu) \
- cli ;\
- movb _C_LABEL(imen) + IRQ_BYTE(irq_num),%al ;\
- andb $~IRQ_BIT(irq_num),%al ;\
- movb %al,_C_LABEL(imen) + IRQ_BYTE(irq_num) ;\
- FASTER_NOP ;\
- outb %al,$(icu+1) ;\
- sti
-
-#else /* ICU_HARDWARE_MASK */
-
-#define MASK(irq_num, icu)
-#define UNMASK(irq_num, icu)
-
-#endif /* ICU_HARDWARE_MASK */
-
-#endif /* ICU_SPECIAL_MASK_MODE */
-
-/*
* Macros for interrupt entry, call to handler, and exit.
*
* XXX
@@ -132,6 +59,16 @@
.globl _C_LABEL(isa_strayintr)
+#ifdef MULTIPROCESSOR
+#define LOCK_KERNEL call _C_LABEL(i386_intlock)
+#define UNLOCK_KERNEL call _C_LABEL(i386_intunlock)
+#else
+#define LOCK_KERNEL
+#define UNLOCK_KERNEL
+#endif
+
+#define voidop(num)
+
/*
* Normal vectors.
*
@@ -148,34 +85,35 @@
*
* On exit, we jump to Xdoreti(), to process soft interrupts and ASTs.
*/
-#define INTR(irq_num, icu, ack) \
-IDTVEC(recurse/**/irq_num) ;\
+#define INTRSTUB(name, num, early_ack, late_ack, mask, unmask, level_mask) \
+IDTVEC(recurse_/**/name/**/num) ;\
pushfl ;\
pushl %cs ;\
pushl %esi ;\
cli ;\
-_C_LABEL(Xintr)/**/irq_num/**/: ;\
+_C_LABEL(Xintr_/**/name/**/num): ;\
pushl $0 /* dummy error code */ ;\
pushl $T_ASTFLT /* trap # for doing ASTs */ ;\
INTRENTRY ;\
MAKE_FRAME ;\
- MASK(irq_num, icu) /* mask it in hardware */ ;\
- ack(irq_num) /* and allow other intrs */ ;\
+ mask(num) /* mask it in hardware */ ;\
+ early_ack(num) /* and allow other intrs */ ;\
incl MY_COUNT+V_INTR /* statistical info */ ;\
- movl _C_LABEL(iminlevel) + (irq_num) * 4, %eax ;\
- movzbl _C_LABEL(cpl),%ebx ;\
+ movl _C_LABEL(iminlevel) + (num) * 4, %eax ;\
+ movl CPL,%ebx ;\
cmpl %eax,%ebx ;\
- jae _C_LABEL(Xhold/**/irq_num)/* currently masked; hold it */;\
-_C_LABEL(Xresume)/**/irq_num/**/: ;\
- movzbl _C_LABEL(cpl),%eax /* cpl to restore on exit */ ;\
+ jae _C_LABEL(Xhold_/**/name/**/num)/* currently masked; hold it */;\
+Xresume_/**/name/**/num/**/: ;\
+ movl CPL,%eax /* cpl to restore on exit */ ;\
pushl %eax ;\
- movl _C_LABEL(imaxlevel) + (irq_num) * 4,%eax ;\
- movl %eax,_C_LABEL(cpl) /* block enough for this irq */ ;\
+ movl _C_LABEL(imaxlevel) + (num) * 4,%eax ;\
+ movl %eax,CPL /* block enough for this irq */ ;\
sti /* safe to take intrs now */ ;\
- movl _C_LABEL(intrhand) + (irq_num) * 4,%ebx /* head of chain */ ;\
+ movl _C_LABEL(intrhand) + (num) * 4,%ebx /* head of chain */ ;\
testl %ebx,%ebx ;\
- jz _C_LABEL(Xstray)/**/irq_num /* no handlears; we're stray */ ;\
+ jz _C_LABEL(Xstray_/**/name/**/num) /* no handlears; we're stray */ ;\
STRAY_INITIALIZE /* nobody claimed it yet */ ;\
+ LOCK_KERNEL ;\
7: movl IH_ARG(%ebx),%eax /* get handler arg */ ;\
testl %eax,%eax ;\
jnz 4f ;\
@@ -190,16 +128,18 @@ _C_LABEL(Xresume)/**/irq_num/**/: ;\
5: movl IH_NEXT(%ebx),%ebx /* next handler in chain */ ;\
testl %ebx,%ebx ;\
jnz 7b ;\
+ UNLOCK_KERNEL ;\
STRAY_TEST /* see if it's a stray */ ;\
-6: UNMASK(irq_num, icu) /* unmask it in hardware */ ;\
+6: unmask(num) /* unmask it in hardware */ ;\
+ late_ack(num) ;\
jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\
-IDTVEC(stray/**/irq_num) ;\
- pushl $irq_num ;\
+IDTVEC(stray_/**/name/**/num) ;\
+ pushl $num ;\
call _C_LABEL(isa_strayintr) ;\
addl $4,%esp ;\
jmp 6b ;\
-IDTVEC(hold/**/irq_num) ;\
- orb $IRQ_BIT(irq_num),_C_LABEL(ipending) + IRQ_BYTE(irq_num) ;\
+IDTVEC(hold_/**/name/**/num) ;\
+ orb $IRQ_BIT(num),_C_LABEL(ipending) + IRQ_BYTE(num) ;\
INTRFASTEXIT
#if defined(DEBUG) && defined(notdef)
@@ -209,7 +149,7 @@ IDTVEC(hold/**/irq_num) ;\
orl %eax,%esi
#define STRAY_TEST \
testl %esi,%esi ;\
- jz _C_LABEL(Xstray)/**/irq_num
+ jz _C_LABEL(Xstray_/**/name/**/num)
#else /* !DEBUG */
#define STRAY_INITIALIZE
#define STRAY_INTEGRATE
@@ -223,66 +163,92 @@ IDTVEC(hold/**/irq_num) ;\
#define MAKE_FRAME
#endif /* DDB */
-INTR(0, IO_ICU1, ACK1)
-INTR(1, IO_ICU1, ACK1)
-INTR(2, IO_ICU1, ACK1)
-INTR(3, IO_ICU1, ACK1)
-INTR(4, IO_ICU1, ACK1)
-INTR(5, IO_ICU1, ACK1)
-INTR(6, IO_ICU1, ACK1)
-INTR(7, IO_ICU1, ACK1)
-INTR(8, IO_ICU2, ACK2)
-INTR(9, IO_ICU2, ACK2)
-INTR(10, IO_ICU2, ACK2)
-INTR(11, IO_ICU2, ACK2)
-INTR(12, IO_ICU2, ACK2)
-INTR(13, IO_ICU2, ACK2)
-INTR(14, IO_ICU2, ACK2)
-INTR(15, IO_ICU2, ACK2)
+#define ICUADDR IO_ICU1
+
+INTRSTUB(legacy,0, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,1, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,2, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,3, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,4, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,5, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,6, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,7, i8259_asm_ack1, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+
+#undef ICUADDR
+#define ICUADDR IO_ICU2
+
+INTRSTUB(legacy,8, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,9, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,10, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,11, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,12, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,13, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,14, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
+INTRSTUB(legacy,15, i8259_asm_ack2, voidop, i8259_asm_mask, i8259_asm_unmask,
+ voidop)
/*
* These tables are used by the ISA configuration code.
*/
/* interrupt service routine entry points */
IDTVEC(intr)
- .long _C_LABEL(Xintr0), _C_LABEL(Xintr1), _C_LABEL(Xintr2)
- .long _C_LABEL(Xintr3), _C_LABEL(Xintr4), _C_LABEL(Xintr5)
- .long _C_LABEL(Xintr6), _C_LABEL(Xintr7), _C_LABEL(Xintr8)
- .long _C_LABEL(Xintr9), _C_LABEL(Xintr10), _C_LABEL(Xintr11)
- .long _C_LABEL(Xintr12), _C_LABEL(Xintr13)
- .long _C_LABEL(Xintr14), _C_LABEL(Xintr15)
+ .long _C_LABEL(Xintr_legacy0), _C_LABEL(Xintr_legacy1)
+ .long _C_LABEL(Xintr_legacy2), _C_LABEL(Xintr_legacy3)
+ .long _C_LABEL(Xintr_legacy4), _C_LABEL(Xintr_legacy5)
+ .long _C_LABEL(Xintr_legacy6), _C_LABEL(Xintr_legacy7)
+ .long _C_LABEL(Xintr_legacy8), _C_LABEL(Xintr_legacy9)
+ .long _C_LABEL(Xintr_legacy10), _C_LABEL(Xintr_legacy11)
+ .long _C_LABEL(Xintr_legacy12), _C_LABEL(Xintr_legacy13)
+ .long _C_LABEL(Xintr_legacy14), _C_LABEL(Xintr_legacy15)
/*
* These tables are used by Xdoreti() and Xspllower().
*/
/* resume points for suspended interrupts */
IDTVEC(resume)
- .long _C_LABEL(Xresume0), _C_LABEL(Xresume1)
- .long _C_LABEL(Xresume2), _C_LABEL(Xresume3)
- .long _C_LABEL(Xresume4), _C_LABEL(Xresume5)
- .long _C_LABEL(Xresume6), _C_LABEL(Xresume7)
- .long _C_LABEL(Xresume8), _C_LABEL(Xresume9)
- .long _C_LABEL(Xresume10), _C_LABEL(Xresume11)
- .long _C_LABEL(Xresume12), _C_LABEL(Xresume13)
- .long _C_LABEL(Xresume14), _C_LABEL(Xresume15)
+ .long _C_LABEL(Xresume_legacy0), _C_LABEL(Xresume_legacy1)
+ .long _C_LABEL(Xresume_legacy2), _C_LABEL(Xresume_legacy3)
+ .long _C_LABEL(Xresume_legacy4), _C_LABEL(Xresume_legacy5)
+ .long _C_LABEL(Xresume_legacy6), _C_LABEL(Xresume_legacy7)
+ .long _C_LABEL(Xresume_legacy8), _C_LABEL(Xresume_legacy9)
+ .long _C_LABEL(Xresume_legacy10), _C_LABEL(Xresume_legacy11)
+ .long _C_LABEL(Xresume_legacy12), _C_LABEL(Xresume_legacy13)
+ .long _C_LABEL(Xresume_legacy14), _C_LABEL(Xresume_legacy15)
/* for soft interrupts */
- .long 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .long 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.long _C_LABEL(Xsofttty), _C_LABEL(Xsoftnet)
.long _C_LABEL(Xsoftclock)
+ .long 0, 0
/* fake interrupts to resume from splx() */
IDTVEC(recurse)
- .long _C_LABEL(Xrecurse0), _C_LABEL(Xrecurse1)
- .long _C_LABEL(Xrecurse2), _C_LABEL(Xrecurse3)
- .long _C_LABEL(Xrecurse4), _C_LABEL(Xrecurse5)
- .long _C_LABEL(Xrecurse6), _C_LABEL(Xrecurse7)
- .long _C_LABEL(Xrecurse8), _C_LABEL(Xrecurse9)
- .long _C_LABEL(Xrecurse10), _C_LABEL(Xrecurse11)
- .long _C_LABEL(Xrecurse12), _C_LABEL(Xrecurse13)
- .long _C_LABEL(Xrecurse14), _C_LABEL(Xrecurse15)
+ .long _C_LABEL(Xrecurse_legacy0), _C_LABEL(Xrecurse_legacy1)
+ .long _C_LABEL(Xrecurse_legacy2), _C_LABEL(Xrecurse_legacy3)
+ .long _C_LABEL(Xrecurse_legacy4), _C_LABEL(Xrecurse_legacy5)
+ .long _C_LABEL(Xrecurse_legacy6), _C_LABEL(Xrecurse_legacy7)
+ .long _C_LABEL(Xrecurse_legacy8), _C_LABEL(Xrecurse_legacy9)
+ .long _C_LABEL(Xrecurse_legacy10), _C_LABEL(Xrecurse_legacy11)
+ .long _C_LABEL(Xrecurse_legacy12), _C_LABEL(Xrecurse_legacy13)
+ .long _C_LABEL(Xrecurse_legacy14), _C_LABEL(Xrecurse_legacy15)
/* for soft interrupts */
- .long 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .long 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.long _C_LABEL(Xsofttty), _C_LABEL(Xsoftnet)
.long _C_LABEL(Xsoftclock)
+ .long 0, 0
/* Some bogus data, to keep vmstat happy, for now. */
.globl _C_LABEL(intrnames), _C_LABEL(eintrnames)
diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c
index 20cda037a0b..3f5eeb74d62 100644
--- a/sys/arch/i386/i386/vm_machdep.c
+++ b/sys/arch/i386/i386/vm_machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vm_machdep.c,v 1.39 2003/06/02 23:27:47 millert Exp $ */
+/* $OpenBSD: vm_machdep.c,v 1.40 2004/06/13 21:49:15 niklas Exp $ */
/* $NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $ */
/*-
@@ -62,9 +62,6 @@
#include <machine/specialreg.h>
#include "npx.h"
-#if NNPX > 0
-extern struct proc *npxproc;
-#endif
/*
* Finish a fork operation, with process p2 nearly set up.
@@ -88,37 +85,36 @@ cpu_fork(p1, p2, stack, stacksize, func, arg)
struct switchframe *sf;
#if NNPX > 0
- /*
- * If npxproc != p1, then the npx h/w state is irrelevant and the
- * state had better already be in the pcb. This is true for forks
- * but not for dumps.
- *
- * If npxproc == p1, then we have to save the npx h/w state to
- * p1's pcb so that we can copy it.
- */
- if (npxproc == p1)
- npxsave();
+ npxsave_proc(p1, 1);
#endif
p2->p_md.md_flags = p1->p_md.md_flags;
- /* Sync curpcb (which is presumably p1's PCB) and copy it to p2. */
- savectx(curpcb);
+ /* Copy pcb from proc p1 to p2. */
+ if (p1 == curproc) {
+ /* Sync the PCB before we copy it. */
+ savectx(curpcb);
+ }
+#ifdef DIAGNOSTIC
+ else if (p1 != &proc0)
+ panic("cpu_fork: curproc");
+#endif
*pcb = p1->p_addr->u_pcb;
+
/*
* Preset these so that gdt_compact() doesn't get confused if called
* during the allocations below.
+ *
+ * Note: pcb_ldt_sel is handled in the pmap_activate() call when
+ * we run the new process.
*/
- pcb->pcb_tss_sel = GSEL(GNULL_SEL, SEL_KPL);
- /*
- * Activate the addres space. Note this will refresh pcb_ldt_sel.
- */
- pmap_activate(p2);
+ p2->p_md.md_tss_sel = GSEL(GNULL_SEL, SEL_KPL);
/* Fix up the TSS. */
pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
pcb->pcb_tss.tss_esp0 = (int)p2->p_addr + USPACE - 16;
- tss_alloc(pcb);
+
+ p2->p_md.md_tss_sel = tss_alloc(pcb);
/*
* Copy the trapframe, and arrange for the child to return directly
@@ -150,8 +146,7 @@ cpu_swapout(p)
/*
* Make sure we save the FP state before the user area vanishes.
*/
- if (npxproc == p)
- npxsave();
+ npxsave_proc(p, 1);
#endif
}
@@ -169,8 +164,8 @@ cpu_exit(p)
{
#if NNPX > 0
/* If we were using the FPU, forget about it. */
- if (npxproc == p)
- npxproc = 0;
+ if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
+ npxsave_proc(p, 0);
#endif
uvmexp.swtch++;
@@ -181,10 +176,7 @@ void
cpu_wait(p)
struct proc *p;
{
- struct pcb *pcb;
-
- pcb = &p->p_addr->u_pcb;
- tss_free(pcb);
+ tss_free(p->p_md.md_tss_sel);
}
/*
@@ -251,6 +243,9 @@ pagemove(from, to, size)
{
pt_entry_t *fpte, *tpte;
pt_entry_t ofpte, otpte;
+#ifdef MULTIPROCESSOR
+ u_int32_t cpumask = 0;
+#endif
#ifdef DIAGNOSTIC
if ((size & PAGE_MASK) != 0)
@@ -263,24 +258,38 @@ pagemove(from, to, size)
otpte = *tpte;
*tpte++ = *fpte;
*fpte++ = 0;
-#if defined(I386_CPU)
+#if defined(I386_CPU) && !defined(MULTIPROCESSOR)
if (cpu_class != CPUCLASS_386)
#endif
{
if (otpte & PG_V)
- pmap_update_pg((vaddr_t) to);
+#ifdef MULTIPROCESSOR
+ pmap_tlb_shootdown(pmap_kernel(), (vaddr_t)to,
+ otpte, &cpumask);
+#else
+ pmap_update_pg((vaddr_t)to);
+#endif
if (ofpte & PG_V)
- pmap_update_pg((vaddr_t) from);
+#ifdef MULTIPROCESSOR
+ pmap_tlb_shootdown(pmap_kernel(),
+ (vaddr_t)from, ofpte, &cpumask);
+#else
+ pmap_update_pg((vaddr_t)from);
+#endif
}
from += PAGE_SIZE;
to += PAGE_SIZE;
size -= PAGE_SIZE;
}
+#ifdef MULTIPROCESSOR
+ pmap_tlb_shootnow(cpumask);
+#else
#if defined(I386_CPU)
if (cpu_class == CPUCLASS_386)
tlbflush();
#endif
+#endif
}
/*
diff --git a/sys/arch/i386/include/apicvar.h b/sys/arch/i386/include/apicvar.h
new file mode 100644
index 00000000000..aac452fca40
--- /dev/null
+++ b/sys/arch/i386/include/apicvar.h
@@ -0,0 +1,57 @@
+/* $OpenBSD: apicvar.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: apicvar.h,v 1.1.2.3 2000/02/27 20:25:00 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _I386_APICVAR_H_
+#define _I386_APICVAR_H_
+
+struct apic_attach_args {
+ const char *aaa_name;
+ int apic_id;
+ int apic_version;
+ int flags;
+#define IOAPIC_PICMODE 0x01
+#define IOAPIC_VWIRE 0x02
+ paddr_t apic_address;
+};
+
+void apic_format_redir(char *, char *, int, u_int32_t, u_int32_t);
+
+#endif /* !_I386_APICVAR_H_ */
diff --git a/sys/arch/i386/include/asm.h b/sys/arch/i386/include/asm.h
index 2126271a519..f2da5755a33 100644
--- a/sys/arch/i386/include/asm.h
+++ b/sys/arch/i386/include/asm.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: asm.h,v 1.7 2003/06/02 23:27:47 millert Exp $ */
+/* $OpenBSD: asm.h,v 1.8 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: asm.h,v 1.7 1994/10/27 04:15:56 cgd Exp $ */
/*-
@@ -61,6 +61,16 @@
#define _C_LABEL(name) name
#define _ASM_LABEL(x) x
+#define CVAROFF(x, y) _C_LABEL(x) + y
+
+#ifdef __STDC__
+# define __CONCAT(x,y) x ## y
+# define __STRING(x) #x
+#else
+# define __CONCAT(x,y) x/**/y
+# define __STRING(x) "x"
+#endif
+
/*
* WEAK ALIAS: create a weak alias
*/
diff --git a/sys/arch/i386/include/atomic.h b/sys/arch/i386/include/atomic.h
new file mode 100644
index 00000000000..e3be6b68b1b
--- /dev/null
+++ b/sys/arch/i386/include/atomic.h
@@ -0,0 +1,72 @@
+/* $OpenBSD: atomic.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: atomic.h,v 1.1.2.2 2000/02/21 18:54:07 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ATOMIC_H_
+#define _ATOMIC_H_
+
+#ifndef _LOCORE
+
+static __inline u_int32_t
+i386_atomic_testset_ul (volatile u_int32_t *ptr, unsigned long val) {
+ __asm__ volatile ("xchgl %0,(%2)" :"=r" (val):"0" (val),"r" (ptr));
+ return val;
+}
+
+static __inline int
+i386_atomic_testset_i (volatile int *ptr, unsigned long val) {
+ __asm__ volatile ("xchgl %0,(%2)" :"=r" (val):"0" (val),"r" (ptr));
+ return val;
+}
+
+static __inline void
+i386_atomic_setbits_l (volatile u_int32_t *ptr, unsigned long bits) {
+ __asm __volatile("lock ; orl %1,%0" : "=m" (*ptr) : "ir" (bits));
+}
+
+static __inline void
+i386_atomic_clearbits_l (volatile u_int32_t *ptr, unsigned long bits) {
+ bits = ~bits;
+ __asm __volatile("lock ; and %1,%0" : "=m" (*ptr) : "ir" (bits));
+}
+
+#endif
+#endif
+
diff --git a/sys/arch/i386/include/biosvar.h b/sys/arch/i386/include/biosvar.h
index 4fc00866e87..7e4d98933b4 100644
--- a/sys/arch/i386/include/biosvar.h
+++ b/sys/arch/i386/include/biosvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: biosvar.h,v 1.42 2003/11/27 00:31:55 espie Exp $ */
+/* $OpenBSD: biosvar.h,v 1.43 2004/06/13 21:49:16 niklas Exp $ */
/*
* Copyright (c) 1997-1999 Michael Shalayeff
@@ -192,6 +192,8 @@ typedef struct _bios_consdev {
int conspeed;
} bios_consdev_t;
+#define BOOTARG_SMPINFO 6 /* struct mp_float[] */
+
#if defined(_KERNEL) || defined (_STANDALONE)
#ifdef _LOCORE
@@ -244,6 +246,8 @@ int bios32_service(u_int32_t, bios32_entry_t, bios32_entry_info_t);
extern u_int bootapiver;
extern bios_memmap_t *bios_memmap;
+extern void *bios_smpinfo;
+extern bios_pciinfo_t *bios_pciinfo;
#endif /* _KERNEL */
#endif /* _LOCORE */
diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h
index cc690de9219..9d66a9c496c 100644
--- a/sys/arch/i386/include/cpu.h
+++ b/sys/arch/i386/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.60 2004/06/06 17:34:37 grange Exp $ */
+/* $OpenBSD: cpu.h,v 1.61 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: cpu.h,v 1.35 1996/05/05 19:29:26 christos Exp $ */
/*-
@@ -41,9 +41,19 @@
/*
* Definitions unique to i386 cpu support.
*/
-#include <machine/psl.h>
#include <machine/frame.h>
+#include <machine/psl.h>
#include <machine/segments.h>
+#include <machine/intrdefs.h>
+
+#ifdef MULTIPROCESSOR
+#include <machine/i82489reg.h>
+#include <machine/i82489var.h>
+
+/* XXX for now... */
+#define NLAPIC 1
+
+#endif
/*
* definitions of cpu-dependent requirements
@@ -60,16 +70,156 @@
*/
#define clockframe intrframe
-#define CLKF_USERMODE(frame) USERMODE((frame)->if_cs, (frame)->if_eflags)
-#define CLKF_PC(frame) ((frame)->if_eip)
-#define CLKF_INTR(frame) (IDXSEL((frame)->if_cs) == GICODE_SEL)
+#include <sys/device.h>
+#include <sys/lock.h> /* will also get LOCKDEBUG */
+#include <sys/proc.h>
+
+struct intrsource;
+
+/* XXX stuff to move to cpuvar.h later */
+struct cpu_info {
+ struct device ci_dev; /* our device */
+ struct cpu_info *ci_self; /* pointer to this structure */
+ struct schedstate_percpu ci_schedstate; /* scheduler state */
+ struct cpu_info *ci_next; /* next cpu */
+
+ /*
+ * Public members.
+ */
+ struct proc *ci_curproc; /* current owner of the processor */
+ struct simplelock ci_slock; /* lock on this data structure */
+ cpuid_t ci_cpuid; /* our CPU ID */
+ u_int ci_apicid; /* our APIC ID */
+#if defined(DIAGNOSTIC) || defined(LOCKDEBUG)
+ u_long ci_spin_locks; /* # of spin locks held */
+ u_long ci_simple_locks; /* # of simple locks held */
+#endif
+
+ /*
+ * Private members.
+ */
+ struct proc *ci_fpcurproc; /* current owner of the FPU */
+ int ci_fpsaving; /* save in progress */
+
+ volatile u_int32_t ci_tlb_ipi_mask;
+
+ struct pcb *ci_curpcb; /* VA of current HW PCB */
+ struct pcb *ci_idle_pcb; /* VA of current PCB */
+ int ci_idle_tss_sel; /* TSS selector of idle PCB */
+
+ struct intrsource *ci_isources[MAX_INTR_SOURCES];
+ u_int32_t ci_ipending;
+ int ci_ilevel;
+ int ci_idepth;
+ u_int32_t ci_imask[NIPL];
+ u_int32_t ci_iunmask[NIPL];
+
+ paddr_t ci_idle_pcb_paddr; /* PA of idle PCB */
+ u_long ci_flags; /* flags; see below */
+ u_int32_t ci_ipis; /* interprocessor interrupts pending */
+ int sc_apic_version; /* local APIC version */
+
+ u_int32_t ci_level;
+ u_int32_t ci_vendor[4];
+ u_int32_t ci_signature; /* X86 cpuid type */
+ u_int32_t ci_feature_flags; /* X86 CPUID feature bits */
+ u_int32_t cpu_class; /* CPU class */
+
+ struct cpu_functions *ci_func; /* start/stop functions */
+ void (*cpu_setup)(const char *, int, int); /* proc-dependant init */
+
+ int ci_want_resched;
+ int ci_astpending;
+
+ union descriptor *ci_gdt;
+
+ volatile int ci_ddb_paused; /* paused due to other proc in ddb */
+#define CI_DDB_RUNNING 0
+#define CI_DDB_SHOULDSTOP 1
+#define CI_DDB_STOPPED 2
+#define CI_DDB_ENTERDDB 3
+#define CI_DDB_INDDB 4
+};
+
+/*
+ * Processor flag notes: The "primary" CPU has certain MI-defined
+ * roles (mostly relating to hardclock handling); we distinguish
+ * betwen the processor which booted us, and the processor currently
+ * holding the "primary" role just to give us the flexibility later to
+ * change primaries should we be sufficiently twisted.
+ */
+
+#define CPUF_BSP 0x0001 /* CPU is the original BSP */
+#define CPUF_AP 0x0002 /* CPU is an AP */
+#define CPUF_SP 0x0004 /* CPU is only processor */
+#define CPUF_PRIMARY 0x0008 /* CPU is active primary processor */
+#define CPUF_APIC_CD 0x0010 /* CPU has apic configured */
+
+#define CPUF_PRESENT 0x1000 /* CPU is present */
+#define CPUF_RUNNING 0x2000 /* CPU is running */
/*
- * Preempt the current process if in interrupt from user mode,
+ * We statically allocate the CPU info for the primary CPU (or,
+ * the only CPU on uniprocessors), and the primary CPU is the
+ * first CPU on the CPU info list.
+ */
+extern struct cpu_info cpu_info_primary;
+extern struct cpu_info *cpu_info_list;
+
+#define CPU_INFO_ITERATOR int
+#define CPU_INFO_FOREACH(cii, ci) cii = 0, ci = cpu_info_list; \
+ ci != NULL; ci = ci->ci_next
+
+#ifdef MULTIPROCESSOR
+
+#define I386_MAXPROCS 32 /* because we use a bitmask */
+
+#define CPU_STARTUP(_ci) ((_ci)->ci_func->start(_ci))
+#define CPU_STOP(_ci) ((_ci)->ci_func->stop(_ci))
+#define CPU_START_CLEANUP(_ci) ((_ci)->ci_func->cleanup(_ci))
+
+#define cpu_number() (i82489_readreg(LAPIC_ID)>>LAPIC_ID_SHIFT)
+#define curcpu() (cpu_info[cpu_number()])
+
+#define CPU_IS_PRIMARY(ci) ((ci)->ci_flags & CPUF_PRIMARY)
+
+extern struct cpu_info *cpu_info[I386_MAXPROCS];
+extern u_long cpus_running;
+
+extern void cpu_boot_secondary_processors(void);
+extern void cpu_init_idle_pcbs(void);
+
+#else /* MULTIPROCESSOR */
+
+#define I386_MAXPROCS 1
+
+#define cpu_number() 0
+#define curcpu() (&cpu_info_primary)
+
+#define CPU_IS_PRIMARY(ci) 1
+
+/*
+ * definitions of cpu-dependent requirements
+ * referenced in generic code
+ */
+#define cpu_swapin(p) /* nothing */
+
+#endif
+
+#define curpcb curcpu()->ci_curpcb
+
+#define want_resched (curcpu()->ci_want_resched)
+#define astpending (curcpu()->ci_astpending)
+
+/*
+ * Preemt the current process if in interrupt from user monre,
* or after the current trap/syscall if in system mode.
*/
-int want_resched; /* resched() was called */
-#define need_resched() (want_resched = 1, setsoftast())
+extern void need_resched(struct cpu_info *);
+
+#define CLKF_USERMODE(frame) USERMODE((frame)->if_cs, (frame)->if_eflags)
+#define CLKF_PC(frame) ((frame)->if_eip)
+#define CLKF_INTR(frame) (IDXSEL((frame)->if_cs) == GICODE_SEL)
/*
* Give a profiling tick to the current process when the user profiling
@@ -87,8 +237,13 @@ int want_resched; /* resched() was called */
/*
* We need a machine-independent name for this.
*/
-#define DELAY(x) delay(x)
-void delay(int);
+extern void (*delay_func)(int);
+struct timeval;
+extern void (*microtime_func)(struct timeval *);
+
+#define DELAY(x) (*delay_func)(x)
+#define delay(x) (*delay_func)(x)
+#define microtime(tv) (*microtime_func)(tv)
#if defined(I586_CPU) || defined(I686_CPU)
/*
@@ -173,6 +328,7 @@ extern void (*update_cpuspeed)(void);
void dumpconf(void);
void cpu_reset(void);
void i386_proc0_tss_ldt_init(void);
+void i386_init_pcb_tss_ldt(struct cpu_info *);
void cpuid(u_int32_t, u_int32_t *);
/* locore.s */
@@ -189,6 +345,9 @@ void proc_trampoline(void);
void initrtclock(void);
void startrtclock(void);
void rtcdrain(void *);
+void i8254_delay(int);
+void i8254_microtime(struct timeval *);
+void i8254_initclocks(void);
/* est.c */
#if !defined(SMALL_KERNEL) && defined(I686_CPU)
@@ -213,8 +372,9 @@ int k6_powernow_setperf(int);
/* npx.c */
-void npxdrop(void);
-void npxsave(void);
+void npxdrop(struct proc *);
+void npxsave_proc(struct proc *, int);
+void npxsave_cpu(struct cpu_info *, int);
#if defined(GPL_MATH_EMULATE)
/* math_emulate.c */
@@ -230,6 +390,7 @@ int i386_set_ldt(struct proc *, void *, register_t *);
/* isa_machdep.c */
void isa_defaultirq(void);
+void isa_nodefaultirq(void);
int isa_nmi(void);
/* pmap.c */
@@ -291,4 +452,12 @@ void setconf(void);
{ "xcrypt", CTLTYPE_INT }, \
}
+/*
+ * This needs to be included late since it relies on definitions higher
+ * up in this file.
+ */
+#if defined(MULTIPROCESSOR) && defined(_KERNEL)
+#include <sys/mplock.h>
+#endif
+
#endif /* !_I386_CPU_H_ */
diff --git a/sys/arch/i386/include/cpufunc.h b/sys/arch/i386/include/cpufunc.h
index d90a120429b..832e4271142 100644
--- a/sys/arch/i386/include/cpufunc.h
+++ b/sys/arch/i386/include/cpufunc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpufunc.h,v 1.11 2003/10/28 13:22:44 avsm Exp $ */
+/* $OpenBSD: cpufunc.h,v 1.12 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: cpufunc.h,v 1.8 1994/10/27 04:15:59 cgd Exp $ */
/*
@@ -43,6 +43,8 @@
#include <sys/cdefs.h>
#include <sys/types.h>
+#include <machine/specialreg.h>
+
static __inline void invlpg(u_int);
static __inline void lidt(void *);
static __inline void lldt(u_short);
@@ -55,6 +57,7 @@ static __inline u_int rcr3(void);
static __inline void lcr4(u_int);
static __inline u_int rcr4(void);
static __inline void tlbflush(void);
+static __inline void tlbflushg(void);
static __inline void disable_intr(void);
static __inline void enable_intr(void);
static __inline u_int read_eflags(void);
@@ -146,6 +149,39 @@ tlbflush(void)
__asm __volatile("movl %0,%%cr3" : : "r" (val));
}
+static __inline void
+tlbflushg(void)
+{
+ /*
+ * Big hammer: flush all TLB entries, including ones from PTE's
+ * with the G bit set. This should only be necessary if TLB
+ * shootdown falls far behind.
+ *
+ * Intel Architecture Software Developer's Manual, Volume 3,
+ * System Programming, section 9.10, "Invalidating the
+ * Translation Lookaside Buffers (TLBS)":
+ * "The following operations invalidate all TLB entries, irrespective
+ * of the setting of the G flag:
+ * ...
+ * "(P6 family processors only): Writing to control register CR4 to
+ * modify the PSE, PGE, or PAE flag."
+ *
+ * (the alternatives not quoted above are not an option here.)
+ *
+ * If PGE is not in use, we reload CR3 for the benefit of
+ * pre-P6-family processors.
+ */
+
+#if defined(I686_CPU)
+ if (cpu_feature & CPUID_PGE) {
+ u_int cr4 = rcr4();
+ lcr4(cr4 & ~CR4_PGE);
+ lcr4(cr4);
+ } else
+#endif
+ tlbflush();
+}
+
#ifdef notyet
void setidt(int idx, /*XXX*/caddr_t func, int typ, int dpl);
#endif
diff --git a/sys/arch/i386/include/cpuvar.h b/sys/arch/i386/include/cpuvar.h
new file mode 100644
index 00000000000..5bf9f3342ae
--- /dev/null
+++ b/sys/arch/i386/include/cpuvar.h
@@ -0,0 +1,108 @@
+/* $OpenBSD: cpuvar.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: cpuvar.h,v 1.1.2.3 2000/02/21 18:54:07 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999 Stefan Grefen
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+struct cpu_functions {
+ int (*start)(struct cpu_info *);
+ int (*stop)(struct cpu_info *);
+ void (*cleanup)(struct cpu_info *);
+};
+
+extern struct cpu_functions mp_cpu_funcs;
+
+#define CPU_ROLE_SP 0
+#define CPU_ROLE_BP 1
+#define CPU_ROLE_AP 2
+
+struct cpu_attach_args {
+ const char *caa_name;
+ int cpu_number;
+ int cpu_role;
+ struct cpu_functions *cpu_func;
+ int cpu_signature;
+ int feature_flags;
+};
+
+#define MP_PICMODE 0x00000001 /* System booted in picmode */
+
+#ifdef _KERNEL
+
+int i386_ipi(int,int,int);
+void i386_self_ipi(int);
+int i386_ipi_init(int);
+
+void identifycpu(struct cpu_info *);
+void cpu_init(struct cpu_info *);
+void cpu_init_first(void);
+
+#endif
+
diff --git a/sys/arch/i386/include/db_machdep.h b/sys/arch/i386/include/db_machdep.h
index 69fb1d2cb30..c1f5b9b6dfe 100644
--- a/sys/arch/i386/include/db_machdep.h
+++ b/sys/arch/i386/include/db_machdep.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: db_machdep.h,v 1.9 2003/05/18 02:43:13 andreas Exp $ */
+/* $OpenBSD: db_machdep.h,v 1.10 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: db_machdep.h,v 1.9 1996/05/03 19:23:59 christos Exp $ */
/*
@@ -120,5 +120,13 @@ void db_task_name(/* task_t */);
int kdb_trap(int, int, db_regs_t *);
void db_machine_init(void);
+void db_enter_ddb(void);
+void db_leave_ddb(void);
+void db_startcpu(int cpu);
+void db_stopcpu(int cpu);
+void db_movetocpu(int cpu);
+void i386_ipi_db(struct cpu_info *);
+
+extern struct SIMPLELOCK ddb_mp_slock;
#endif /* _I386_DB_MACHDEP_H_ */
diff --git a/sys/arch/i386/include/gdt.h b/sys/arch/i386/include/gdt.h
index 1568b573a86..ba741e2c637 100644
--- a/sys/arch/i386/include/gdt.h
+++ b/sys/arch/i386/include/gdt.h
@@ -1,8 +1,8 @@
-/* $OpenBSD: gdt.h,v 1.9 2002/03/14 01:26:33 millert Exp $ */
-/* $NetBSD: gdt.h,v 1.3 1996/02/27 22:32:11 jtc Exp $ */
+/* $OpenBSD: gdt.h,v 1.10 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: gdt.h,v 1.7.10.6 2002/08/19 01:22:36 sommerfeld Exp $ */
/*-
- * Copyright (c) 1996 The NetBSD Foundation, Inc.
+ * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -37,10 +37,35 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#ifdef _KERNEL
+#ifndef _LOCORE
+
+struct cpu_info;
+struct pcb;
+struct pmap;
+union descriptor;
+
+void gdt_alloc_cpu(struct cpu_info *);
+int gdt_get_slot(void);
void gdt_init(void);
-void tss_alloc(struct pcb *);
-void tss_free(struct pcb *);
+void gdt_init_cpu(struct cpu_info *);
+void gdt_reload_cpu(/* XXX struct cpu_info * */ void);
void ldt_alloc(struct pmap *, union descriptor *, size_t);
void ldt_free(struct pmap *);
+int tss_alloc(struct pcb *);
+void tss_free(int);
+void setgdt(int, void *, size_t, int, int, int, int);
#endif
+
+/*
+ * The initial GDT size (as a descriptor count), and the maximum
+ * GDT size possible.
+ *
+ * These are actually not arbitrary. To start with, they have to be
+ * multiples of 512 and at least 512, in order to work with the
+ * allocation strategy set forth by gdt_init and gdt_grow. Then, the
+ * max cannot exceed 65536 since the selector field of a descriptor is
+ * just 16 bits, and used as free list link.
+ */
+
+#define MINGDTSIZ 512
+#define MAXGDTSIZ 8192
diff --git a/sys/arch/i386/include/i82093reg.h b/sys/arch/i386/include/i82093reg.h
new file mode 100644
index 00000000000..07ec03d8991
--- /dev/null
+++ b/sys/arch/i386/include/i82093reg.h
@@ -0,0 +1,124 @@
+/* $OpenBSD: i82093reg.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: i82093reg.h,v 1.1.2.2 2000/02/21 18:54:07 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Typically, the first apic lives here.
+ */
+#define IOAPIC_BASE_DEFAULT 0xfec00000
+
+/*
+ * Memory-space registers.
+ */
+
+/*
+ * The externally visible registers are all 32 bits wide;
+ * store the register number of interest in IOAPIC_REG, and store/fetch
+ * the real value in IOAPIC_DATA.
+ */
+
+
+
+#define IOAPIC_REG 0x0000
+#define IOAPIC_DATA 0x0010
+
+/*
+ * Internal I/O APIC registers.
+ */
+
+#define IOAPIC_ID 0x00
+
+#define IOAPIC_ID_SHIFT 24
+#define IOAPIC_ID_MASK 0x0f000000
+
+/* Version, and maximum interrupt pin number. */
+
+#define IOAPIC_VER 0x01
+
+#define IOAPIC_VER_SHIFT 0
+#define IOAPIC_VER_MASK 0x000000ff
+
+#define IOAPIC_MAX_SHIFT 16
+#define IOAPIC_MAX_MASK 0x00ff0000
+
+/*
+ * Arbitration ID. Same format as IOAPIC_ID register.
+ */
+#define IOAPIC_ARB 0x02
+
+/*
+ * Redirection table registers.
+ */
+
+#define IOAPIC_REDHI(pin) (0x11 + ((pin)<<1))
+#define IOAPIC_REDLO(pin) (0x10 + ((pin)<<1))
+
+#define IOAPIC_REDHI_DEST_SHIFT 24 /* destination. */
+#define IOAPIC_REDHI_DEST_MASK 0xff000000
+
+#define IOAPIC_REDLO_MASK 0x00010000 /* 0=enabled; 1=masked */
+
+#define IOAPIC_REDLO_LEVEL 0x00008000 /* 0=edge, 1=level */
+#define IOAPIC_REDLO_RIRR 0x00004000 /* remote IRR; read only */
+#define IOAPIC_REDLO_ACTLO 0x00002000 /* 0=act. hi; 1=act. lo */
+#define IOAPIC_REDLO_DELSTS 0x00001000 /* 0=idle; 1=send pending */
+#define IOAPIC_REDLO_DSTMOD 0x00000800 /* 0=physical; 1=logical */
+
+#define IOAPIC_REDLO_DEL_MASK 0x00000700 /* del. mode mask */
+#define IOAPIC_REDLO_DEL_SHIFT 8
+
+#define IOAPIC_REDLO_DEL_FIXED 0
+#define IOAPIC_REDLO_DEL_LOPRI 1
+#define IOAPIC_REDLO_DEL_SMI 2
+#define IOAPIC_REDLO_DEL_NMI 4
+#define IOAPIC_REDLO_DEL_INIT 5
+#define IOAPIC_REDLO_DEL_EXTINT 7
+
+#define IOAPIC_REDLO_VECTOR_MASK 0x000000ff /* delivery vector */
+
+#define IMCR_ADDR 0x22
+#define IMCR_DATA 0x23
+
+#define IMCR_REGISTER 0x70
+#define IMCR_PIC 0x00
+#define IMCR_APIC 0x01
+
+#define ioapic_asm_ack(num) \
+ movl $0,_C_LABEL(local_apic) + LAPIC_EOI
diff --git a/sys/arch/i386/include/i82093var.h b/sys/arch/i386/include/i82093var.h
new file mode 100644
index 00000000000..c3d7ce586f3
--- /dev/null
+++ b/sys/arch/i386/include/i82093var.h
@@ -0,0 +1,103 @@
+/* $OpenBSD: i82093var.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: i82093var.h,v 1.1 2003/02/26 21:26:10 fvdl Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _I386_I82093VAR_H_
+#define _I386_I82093VAR_H_
+
+#include <machine/apicvar.h>
+
+struct ioapic_pin
+{
+ struct intrhand *ip_handler;
+ struct ioapic_pin *ip_next; /* next pin on this vector */
+ struct mp_intr_map *ip_map;
+ int ip_vector; /* IDT vector */
+ int ip_type;
+ int ip_minlevel;
+ int ip_maxlevel;
+};
+
+struct ioapic_softc {
+ struct device sc_dev; /* generic device glue */
+ struct ioapic_softc *sc_next;
+ int sc_apicid;
+ int sc_apic_vers;
+ int sc_apic_sz; /* apic size*/
+ int sc_flags;
+ paddr_t sc_pa; /* PA of ioapic */
+ volatile u_int32_t *sc_reg; /* KVA of ioapic addr */
+ volatile u_int32_t *sc_data; /* KVA of ioapic data */
+ struct ioapic_pin *sc_pins; /* sc_apic_sz entries */
+};
+
+/*
+ * MP: intr_handle_t is bitfielded.
+ * ih&0xff -> line number.
+ * ih&0x10000000 -> if 0, old-style isa irq; if 1, routed via ioapic.
+ * (ih&0xff0000)>>16 -> ioapic id.
+ * (ih&0x00ff00)>>8 -> ioapic line.
+ */
+
+#define APIC_INT_VIA_APIC 0x10000000
+#define APIC_INT_APIC_MASK 0x00ff0000
+#define APIC_INT_APIC_SHIFT 16
+#define APIC_INT_PIN_MASK 0x0000ff00
+#define APIC_INT_PIN_SHIFT 8
+
+#define APIC_IRQ_APIC(x) ((x & APIC_INT_APIC_MASK) >> APIC_INT_APIC_SHIFT)
+#define APIC_IRQ_PIN(x) ((x & APIC_INT_PIN_MASK) >> APIC_INT_PIN_SHIFT)
+
+void *apic_intr_establish(int, int, int, int (*)(void *), void *, char *);
+void apic_intr_disestablish(void *);
+
+void ioapic_print_redir(struct ioapic_softc *, char *, int);
+void ioapic_format_redir(char *, char *, int, u_int32_t, u_int32_t);
+struct ioapic_softc *ioapic_find(int);
+struct ioapic_softc *ioapic_find_bybase(int);
+
+void ioapic_enable(void);
+void lapic_vectorset(void); /* XXX */
+
+extern int ioapic_bsp_id;
+extern int nioapics;
+extern struct ioapic_softc *ioapics;
+
+#endif /* !_I386_I82093VAR_H_ */
diff --git a/sys/arch/i386/include/i82489reg.h b/sys/arch/i386/include/i82489reg.h
new file mode 100644
index 00000000000..9b850519370
--- /dev/null
+++ b/sys/arch/i386/include/i82489reg.h
@@ -0,0 +1,150 @@
+/* $OpenBSD: i82489reg.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: i82489reg.h,v 1.1.2.1 2000/02/20 16:30:27 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Frank van der Linden.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+/*
+ * Registers and constants for the 82489DX and Pentium (and up) integrated
+ * "local" APIC.
+ */
+
+#define LAPIC_ID 0x020 /* ID. RW */
+# define LAPIC_ID_MASK 0x0f000000
+# define LAPIC_ID_SHIFT 24
+
+#define LAPIC_VERS 0x030 /* Version. R */
+# define LAPIC_VERSION_MASK 0x000000ff
+# define LAPIC_VERSION_LVT_MASK 0x00ff0000
+# define LAPIC_VERSION_LVT_SHIFT 16
+
+#define LAPIC_TPRI 0x080 /* Task Prio. RW */
+# define LAPIC_TPRI_MASK 0x000000ff
+# define LAPIC_TPRI_INT_MASK 0x000000f0
+# define LAPIC_TPRI_SUB_MASK 0x0000000f
+
+#define LAPIC_APRI 0x090 /* Arbitration prio R */
+# define LAPIC_APRI_MASK 0x000000ff
+
+#define LAPIC_PPRI 0x0a0 /* Processor prio. R */
+#define LAPIC_EOI 0x0b0 /* End Int. W */
+#define LAPIC_RRR 0x0c0 /* Remote read R */
+#define LAPIC_LDR 0x0d0 /* Logical dest. RW */
+#define LAPIC_DFR 0x0e0 /* Dest. format RW */
+
+#define LAPIC_SVR 0x0f0 /* Spurious intvec RW */
+# define LAPIC_SVR_VECTOR_MASK 0x000000ff
+# define LAPIC_SVR_VEC_FIX 0x0000000f
+# define LAPIC_SVR_VEC_PROG 0x000000f0
+# define LAPIC_SVR_ENABLE 0x00000100
+# define LAPIC_SVR_SWEN 0x00000100
+# define LAPIC_SVR_FOCUS 0x00000200
+# define LAPIC_SVR_FDIS 0x00000200
+
+#define LAPIC_ISR 0x100 /* Int. status. R */
+#define LAPIC_TMR 0x180
+#define LAPIC_IRR 0x200
+#define LAPIC_ESR 0x280 /* Err status. R */
+
+#define LAPIC_ICRLO 0x300 /* Int. cmd. RW */
+# define LAPIC_DLMODE_MASK 0x00000700
+# define LAPIC_DLMODE_FIXED 0x00000000
+# define LAPIC_DLMODE_LOW 0x00000100
+# define LAPIC_DLMODE_SMI 0x00000200
+# define LAPIC_DLMODE_RR 0x00000300
+# define LAPIC_DLMODE_NMI 0x00000400
+# define LAPIC_DLMODE_INIT 0x00000500
+# define LAPIC_DLMODE_STARTUP 0x00000600
+
+# define LAPIC_DSTMODE_LOG 0x00000800
+
+# define LAPIC_DLSTAT_BUSY 0x00001000
+
+# define LAPIC_LVL_ASSERT 0x00004000
+# define LAPIC_LVL_DEASSERT 0x00000000
+
+# define LAPIC_LVL_TRIG 0x00008000
+
+# define LAPIC_RRSTAT_MASK 0x00030000
+# define LAPIC_RRSTAT_INPROG 0x00010000
+# define LAPIC_RRSTAT_VALID 0x00020000
+
+# define LAPIC_DEST_MASK 0x000c0000
+# define LAPIC_DEST_SELF 0x00040000
+# define LAPIC_DEST_ALLINCL 0x00080000
+# define LAPIC_DEST_ALLEXCL 0x000c0000
+
+# define LAPIC_RESV2_MASK 0xfff00000
+
+
+#define LAPIC_ICRHI 0x310 /* Int. cmd. RW */
+# define LAPIC_ID_MASK 0x0f000000
+# define LAPIC_ID_SHIFT 24
+
+#define LAPIC_LVTT 0x320 /* Loc.vec.(timer) RW */
+# define LAPIC_LVTT_VEC_MASK 0x000000ff
+# define LAPIC_LVTT_DS 0x00001000
+# define LAPIC_LVTT_M 0x00010000
+# define LAPIC_LVTT_TM 0x00020000
+
+#define LAPIC_PCINT 0x340
+#define LAPIC_LVINT0 0x350 /* Loc.vec (LINT0) RW */
+# define LAPIC_LVT_PERIODIC 0x00020000
+# define LAPIC_LVT_MASKED 0x00010000
+# define LAPIC_LVT_LEVTRIG 0x00008000
+# define LAPIC_LVT_REMOTE_IRR 0x00004000
+# define LAPIC_INP_POL 0x00002000
+# define LAPIC_PEND_SEND 0x00001000
+
+#define LAPIC_LVINT1 0x360 /* Loc.vec (LINT1) RW */
+#define LAPIC_LVERR 0x370 /* Loc.vec (ERROR) RW */
+#define LAPIC_ICR_TIMER 0x380 /* Initial count RW */
+#define LAPIC_CCR_TIMER 0x390 /* Current count RO */
+
+#define LAPIC_DCR_TIMER 0x3e0 /* Divisor config register */
+# define LAPIC_DCRT_DIV1 0x0b
+# define LAPIC_DCRT_DIV2 0x00
+# define LAPIC_DCRT_DIV4 0x01
+# define LAPIC_DCRT_DIV8 0x02
+# define LAPIC_DCRT_DIV16 0x03
+# define LAPIC_DCRT_DIV32 0x08
+# define LAPIC_DCRT_DIV64 0x09
+# define LAPIC_DCRT_DIV128 0x0a
+
+#define LAPIC_BASE 0xfee00000
+
+#define LAPIC_IRQ_MASK(i) (1 << ((i) + 1))
diff --git a/sys/arch/i386/include/i82489var.h b/sys/arch/i386/include/i82489var.h
new file mode 100644
index 00000000000..3c4632872ce
--- /dev/null
+++ b/sys/arch/i386/include/i82489var.h
@@ -0,0 +1,113 @@
+/* $OpenBSD: i82489var.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: i82489var.h,v 1.1.2.2 2000/02/21 18:46:14 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Frank van der Linden.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _I386_I82489VAR_H_
+#define _I386_I82489VAR_H_
+
+static __inline__ u_int32_t i82489_readreg(int);
+static __inline__ void i82489_writereg(int, u_int32_t);
+
+#ifdef _KERNEL
+extern volatile u_int32_t local_apic[];
+#endif
+
+static __inline__ u_int32_t
+i82489_readreg(reg)
+ int reg;
+{
+ return *((volatile u_int32_t *)(((volatile u_int8_t *)local_apic)
+ + reg));
+}
+
+static __inline__ void
+i82489_writereg(reg, val)
+ int reg;
+ u_int32_t val;
+{
+ *((volatile u_int32_t *)(((volatile u_int8_t *)local_apic) + reg)) =
+ val;
+}
+
+/*
+ * "spurious interrupt vector"; vector used by interrupt which was
+ * aborted because the CPU masked it after it happened but before it
+ * was delivered.. "Oh, sorry, i caught you at a bad time".
+ * Low-order 4 bits must be all ones.
+ */
+extern void i386_spurious(void);
+extern void Xintrspurious(void);
+#define LAPIC_SPURIOUS_VECTOR 0xef
+
+/*
+ * Vector used for inter-processor interrupts.
+ */
+extern void Xintripi(void);
+#define LAPIC_IPI_VECTOR IPL_IPI
+
+/*
+ * Vector used for local apic timer interrupts.
+ */
+
+extern void Xintrltimer(void);
+#define LAPIC_TIMER_VECTOR IPL_CLOCK
+
+/*
+ * Vectors to be used for self-soft-interrupts.
+ */
+
+#define LAPIC_SOFTCLOCK_VECTOR IPL_SOFTCLOCK
+#define LAPIC_SOFTNET_VECTOR IPL_SOFTNET
+#define LAPIC_SOFTTTY_VECTOR IPL_SOFTTTY
+
+extern void Xintrsoftclock(void);
+extern void Xintrsoftnet(void);
+extern void Xintrsofttty(void);
+
+extern void (*apichandler[])(void);
+
+struct cpu_info;
+
+extern void lapic_boot_init(paddr_t);
+extern void lapic_initclocks(void);
+extern void lapic_set_lvt(void);
+extern void lapic_set_softvectors(void);
+extern void lapic_enable(void);
+extern void lapic_calibrate_timer(struct cpu_info *);
+
+#endif
diff --git a/sys/arch/i386/include/i8259.h b/sys/arch/i386/include/i8259.h
new file mode 100644
index 00000000000..f1c4462e4d0
--- /dev/null
+++ b/sys/arch/i386/include/i8259.h
@@ -0,0 +1,154 @@
+/* $OpenBSD: i8259.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: i8259.h,v 1.3 2003/05/04 22:01:56 fvdl Exp $ */
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)icu.h 5.6 (Berkeley) 5/9/91
+ */
+
+#ifndef _I386_I8259_H_
+#define _I386_I8259_H_
+
+#include <dev/isa/isareg.h>
+
+#ifndef _LOCORE
+
+/*
+ * Interrupt "level" mechanism variables, masks, and macros
+ */
+extern unsigned imen; /* interrupt mask enable */
+extern unsigned i8259_setmask(unsigned);
+
+#define SET_ICUS() (outb(IO_ICU1 + 1, imen), outb(IO_ICU2 + 1, imen >> 8))
+
+extern void i8259_default_setup(void);
+extern void i8259_reinit(void);
+
+#endif /* !_LOCORE */
+
+/*
+ * Interrupt enable bits -- in order of priority
+ */
+#define IRQ_SLAVE 2
+
+/*
+ * Interrupt Control offset into Interrupt descriptor table (IDT)
+ */
+#define ICU_OFFSET 32 /* 0-31 are processor exceptions */
+#define ICU_LEN 16 /* 32-47 are ISA interrupts */
+
+
+#define ICU_HARDWARE_MASK
+
+/*
+ * These macros are fairly self explanatory. If ICU_SPECIAL_MASK_MODE is
+ * defined, we try to take advantage of the ICU's `special mask mode' by only
+ * EOIing the interrupts on return. This avoids the requirement of masking and
+ * unmasking. We can't do this without special mask mode, because the ICU
+ * would also hold interrupts that it thinks are of lower priority.
+ *
+ * Many machines do not support special mask mode, so by default we don't try
+ * to use it.
+ */
+
+#define IRQ_BIT(num) (1 << ((num) % 8))
+#define IRQ_BYTE(num) ((num) >> 3)
+
+#define i8259_late_ack(num)
+
+#ifdef ICU_SPECIAL_MASK_MODE
+
+#define i8259_asm_ack1(num)
+#define i8259_asm_ack2(num) \
+ movb $(0x60|IRQ_SLAVE),%al /* specific EOI for IRQ2 */ ;\
+ outb %al,$IO_ICU1
+#define i8259_asm_mask(num)
+#define i8259_asm_unmask(num) \
+ movb $(0x60|(num%8)),%al /* specific EOI */ ;\
+ outb %al,$ICUADDR
+
+#else /* ICU_SPECIAL_MASK_MODE */
+
+#ifndef AUTO_EOI_1
+#define i8259_asm_ack1(num) \
+ movb $(0x60|(num%8)),%al /* specific EOI */ ;\
+ outb %al,$IO_ICU1
+#else
+#define i8259_asm_ack1(num)
+#endif
+
+#ifndef AUTO_EOI_2
+#define i8259_asm_ack2(num) \
+ movb $(0x60|(num%8)),%al /* specific EOI */ ;\
+ outb %al,$IO_ICU2 /* do the second ICU first */ ;\
+ movb $(0x60|IRQ_SLAVE),%al /* specific EOI for IRQ2 */ ;\
+ outb %al,$IO_ICU1
+#else
+#define i8259_asm_ack2(num)
+#endif
+
+#ifndef DUMMY_NOPS
+#define PIC_MASKDELAY
+#endif
+
+#ifdef PIC_MASKDELAY
+#define MASKDELAY pushl %eax ; inb $0x84,%al ; popl %eax
+#else
+#define MASKDELAY
+#endif
+
+#ifdef ICU_HARDWARE_MASK
+
+#define i8259_asm_mask(num) \
+ movb CVAROFF(imen, IRQ_BYTE(num)),%al ;\
+ orb $IRQ_BIT(num),%al ;\
+ movb %al,CVAROFF(imen, IRQ_BYTE(num)) ;\
+ MASKDELAY ;\
+ outb %al,$(ICUADDR+1)
+#define i8259_asm_unmask(num) \
+ cli ;\
+ movb CVAROFF(imen, IRQ_BYTE(num)),%al ;\
+ andb $~IRQ_BIT(num),%al ;\
+ movb %al,CVAROFF(imen, IRQ_BYTE(num)) ;\
+ MASKDELAY ;\
+ outb %al,$(ICUADDR+1) ;\
+ sti
+
+#else /* ICU_HARDWARE_MASK */
+
+#define i8259_asm_mask(num)
+#define i8259_asm_unmask(num)
+
+#endif /* ICU_HARDWARE_MASK */
+#endif /* ICU_SPECIAL_MASK_MODE */
+
+#endif /* !_X86_I8259_H_ */
diff --git a/sys/arch/i386/include/intr.h b/sys/arch/i386/include/intr.h
index 00ba2cea665..9d7a7816cf0 100644
--- a/sys/arch/i386/include/intr.h
+++ b/sys/arch/i386/include/intr.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: intr.h,v 1.20 2004/05/23 00:06:01 tedu Exp $ */
+/* $OpenBSD: intr.h,v 1.21 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: intr.h,v 1.5 1996/05/13 06:11:28 mycroft Exp $ */
/*
@@ -33,85 +33,31 @@
#ifndef _I386_INTR_H_
#define _I386_INTR_H_
-/*
- * Intel APICs (advanced programmable interrupt controllers) have
- * bytesized priority registers where the upper nibble is the actual
- * interrupt priority level (a.k.a. IPL). Interrupt vectors are
- * closely tied to these levels as interrupts whose vectors' upper
- * nibble is lower than or equal to the current level are blocked.
- * Not all 256 possible vectors are available for interrupts in
- * APIC systems, only
- *
- * For systems where instead the older ICU (interrupt controlling
- * unit, a.k.a. PIC or 82C59) is used, the IPL is not directly useful,
- * since the interrupt blocking is handled via interrupt masks instead
- * of levels. However the IPL is easily used as an offset into arrays
- * of masks.
- */
-#define IPLSHIFT 4 /* The upper nibble of vectors is the IPL. */
-#define NIPL 16 /* Four bits of information gives as much. */
-#define IPL(level) ((level) >> IPLSHIFT) /* Extract the IPL. */
-/* XXX Maybe this IDTVECOFF definition should be elsewhere? */
-#define IDTVECOFF 0x20 /* The lower 32 IDT vectors are reserved. */
+#include <machine/intrdefs.h>
-/*
- * This macro is only defined for 0 <= x < 14, i.e. there are fourteen
- * distinct priority levels available for interrupts.
- */
-#define MAKEIPL(priority) (IDTVECOFF + ((priority) << IPLSHIFT))
+#ifndef _LOCORE
-/*
- * Interrupt priority levels.
- * XXX We are somewhat sloppy about what we mean by IPLs, sometimes
- * XXX we refer to the eight-bit value suitable for storing into APICs'
- * XXX priority registers, other times about the four-bit entity found
- * XXX in the former values' upper nibble, which can be used as offsets
- * XXX in various arrays of our implementation. We are hoping that
- * XXX the context will provide enough information to not make this
- * XXX sloppy naming a real problem.
- */
-#define IPL_NONE 0 /* nothing */
-#define IPL_SOFTCLOCK MAKEIPL(0) /* timeouts */
-#define IPL_SOFTNET MAKEIPL(1) /* protocol stacks */
-#define IPL_BIO MAKEIPL(2) /* block I/O */
-#define IPL_NET MAKEIPL(3) /* network */
-#define IPL_SOFTTTY MAKEIPL(4) /* delayed terminal handling */
-#define IPL_TTY MAKEIPL(5) /* terminal */
-#define IPL_VM MAKEIPL(6) /* memory allocation */
-#define IPL_IMP IPL_VM /* XXX - should not be here. */
-#define IPL_AUDIO MAKEIPL(7) /* audio */
-#define IPL_CLOCK MAKEIPL(8) /* clock */
-#define IPL_STATCLOCK MAKEIPL(9) /* statclock */
-#define IPL_HIGH MAKEIPL(9) /* everything */
-
-/* Interrupt sharing types. */
-#define IST_NONE 0 /* none */
-#define IST_PULSE 1 /* pulsed */
-#define IST_EDGE 2 /* edge-triggered */
-#define IST_LEVEL 3 /* level-triggered */
-
-/* Soft interrupt masks. */
-#define SIR_CLOCK 31
-#define SIR_NET 30
-#define SIR_TTY 29
+#ifdef MULTIPROCESSOR
+#include <machine/i82489reg.h>
+#include <machine/i82489var.h>
+#include <machine/cpu.h>
+#endif
-#ifndef _LOCORE
+extern volatile u_int32_t lapic_tpr; /* Current interrupt priority level. */
-volatile int cpl; /* Current interrupt priority level. */
-volatile int ipending; /* Interrupts pending. */
-volatile int astpending;/* Asynchronous software traps (softints) pending. */
-int imask[NIPL]; /* Bitmasks telling what interrupts are blocked. */
-int iunmask[NIPL]; /* Bitmasks telling what interrupts are accepted. */
+extern volatile u_int32_t ipending; /* Interrupts pending. */
+extern int imask[]; /* Bitmasks telling what interrupts are blocked. */
+extern int iunmask[]; /* Bitmasks telling what interrupts are accepted. */
#define IMASK(level) imask[IPL(level)]
#define IUNMASK(level) iunmask[IPL(level)]
extern void Xspllower(void);
-int splraise(int);
-int spllower(int);
-void splx(int);
-void softintr(int);
+extern int splraise(int);
+extern int spllower(int);
+extern void splx(int);
+extern void softintr(int, int);
/* SPL asserts */
#ifdef DIAGNOSTIC
@@ -140,6 +86,7 @@ void splassert_check(int, const char *);
#define splaudio() splraise(IPL_AUDIO)
#define splclock() splraise(IPL_CLOCK)
#define splstatclock() splhigh()
+#define splipi() splraise(IPL_IPI)
/*
* Software interrupt masks
@@ -158,12 +105,40 @@ void splassert_check(int, const char *);
#define splvm() splraise(IPL_VM)
#define splimp() splvm()
#define splhigh() splraise(IPL_HIGH)
+#define splsched() splraise(IPL_SCHED)
+#define spllock() splhigh()
#define spl0() spllower(IPL_NONE)
#define setsoftast() (astpending = 1)
-#define setsoftclock() softintr(1 << SIR_CLOCK)
-#define setsoftnet() softintr(1 << SIR_NET)
-#define setsofttty() softintr(1 << SIR_TTY)
+#define setsoftclock() softintr(1 << SIR_CLOCK, IPL_SOFTCLOCK)
+#define setsoftnet() softintr(1 << SIR_NET, IPL_SOFTNET)
+#define setsofttty() softintr(1 << SIR_TTY, IPL_SOFTTTY)
+
+#define I386_IPI_HALT 0x00000001
+#define I386_IPI_MICROSET 0x00000002
+#define I386_IPI_FLUSH_FPU 0x00000004
+#define I386_IPI_SYNCH_FPU 0x00000008
+#define I386_IPI_TLB 0x00000010
+#define I386_IPI_MTRR 0x00000020
+#define I386_IPI_GDT 0x00000040
+#define I386_IPI_DDB 0x00000080 /* synchronize while in ddb */
+
+#define I386_NIPI 8
+
+struct cpu_info;
+
+#ifdef MULTIPROCESSOR
+int i386_send_ipi(struct cpu_info *, int);
+void i386_broadcast_ipi(int);
+void i386_multicast_ipi(int, int);
+void i386_ipi_handler(void);
+void i386_intlock(struct intrframe);
+void i386_intunlock(struct intrframe);
+void i386_softintlock(void);
+void i386_softintunlock(void);
+
+extern void (*ipifunc[I386_NIPI])(struct cpu_info *);
+#endif
#endif /* !_LOCORE */
diff --git a/sys/arch/i386/include/intrdefs.h b/sys/arch/i386/include/intrdefs.h
new file mode 100644
index 00000000000..c2c998b76b3
--- /dev/null
+++ b/sys/arch/i386/include/intrdefs.h
@@ -0,0 +1,127 @@
+/* $OpenBSD: intrdefs.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: intrdefs.h,v 1.2 2003/05/04 22:01:56 fvdl Exp $ */
+
+#ifndef _i386_INTRDEFS_H
+#define _i386_INTRDEFS_H
+
+/*
+ * Intel APICs (advanced programmable interrupt controllers) have
+ * bytesized priority registers where the upper nibble is the actual
+ * interrupt priority level (a.k.a. IPL). Interrupt vectors are
+ * closely tied to these levels as interrupts whose vectors' upper
+ * nibble is lower than or equal to the current level are blocked.
+ * Not all 256 possible vectors are available for interrupts in
+ * APIC systems, only
+ *
+ * For systems where instead the older ICU (interrupt controlling
+ * unit, a.k.a. PIC or 82C59) is used, the IPL is not directly useful,
+ * since the interrupt blocking is handled via interrupt masks instead
+ * of levels. However the IPL is easily used as an offset into arrays
+ * of masks.
+ */
+#define IPLSHIFT 4 /* The upper nibble of vectors is the IPL. */
+#define NIPL 16 /* Four bits of information gives as much. */
+#define IPL(level) ((level) >> IPLSHIFT) /* Extract the IPL. */
+/* XXX Maybe this IDTVECOFF definition should be elsewhere? */
+#define IDTVECOFF 0x20 /* The lower 32 IDT vectors are reserved. */
+
+/*
+ * This macro is only defined for 0 <= x < 14, i.e. there are fourteen
+ * distinct priority levels available for interrupts.
+ */
+#define MAKEIPL(priority) (IDTVECOFF + ((priority) << IPLSHIFT))
+
+/*
+ * Interrupt priority levels.
+ *
+ * XXX We are somewhat sloppy about what we mean by IPLs, sometimes
+ * XXX we refer to the eight-bit value suitable for storing into APICs'
+ * XXX priority registers, other times about the four-bit entity found
+ * XXX in the former values' upper nibble, which can be used as offsets
+ * XXX in various arrays of our implementation. We are hoping that
+ * XXX the context will provide enough information to not make this
+ * XXX sloppy naming a real problem.
+ *
+ * There are tty, network and disk drivers that use free() at interrupt
+ * time, so imp > (tty | net | bio).
+ *
+ * Since run queues may be manipulated by both the statclock and tty,
+ * network, and disk drivers, clock > imp.
+ *
+ * IPL_HIGH must block everything that can manipulate a run queue.
+ *
+ * XXX Ultimately we may need serial drivers to run at the absolute highest
+ * XXX priority to avoid overruns, then we must make serial > high.
+ *
+ * The level numbers are picked to fit into APIC vector priorities.
+ */
+#define IPL_NONE 0 /* nothing */
+#define IPL_SOFTCLOCK MAKEIPL(0) /* timeouts */
+#define IPL_SOFTNET MAKEIPL(1) /* protocol stacks */
+#define IPL_BIO MAKEIPL(2) /* block I/O */
+#define IPL_NET MAKEIPL(3) /* network */
+#define IPL_SOFTTTY MAKEIPL(4) /* delayed terminal handling */
+#define IPL_TTY MAKEIPL(5) /* terminal */
+#define IPL_VM MAKEIPL(6) /* memory allocation */
+#define IPL_IMP IPL_VM /* XXX - should not be here. */
+#define IPL_AUDIO MAKEIPL(7) /* audio */
+#define IPL_CLOCK MAKEIPL(8) /* clock */
+#define IPL_SCHED IPL_CLOCK
+#define IPL_STATCLOCK MAKEIPL(9) /* statclock */
+#define IPL_HIGH MAKEIPL(9) /* everything */
+#define IPL_IPI MAKEIPL(10) /* interprocessor interrupt */
+
+/* Interrupt sharing types. */
+#define IST_NONE 0 /* none */
+#define IST_PULSE 1 /* pulsed */
+#define IST_EDGE 2 /* edge-triggered */
+#define IST_LEVEL 3 /* level-triggered */
+
+/*
+ * Local APIC masks. Must not conflict with SIR_* below, and must
+ * be >= NUM_LEGACY_IRQs. Note that LIR_IPI must be first.
+ */
+#define LIR_IPI 31
+#define LIR_TIMER 30
+
+/* Soft interrupt masks. */
+#define SIR_CLOCK 29
+#define SIR_NET 28
+#define SIR_TTY 27
+
+
+/*
+ * Maximum # of interrupt sources per CPU. 32 to fit in one word.
+ * ioapics can theoretically produce more, but it's not likely to
+ * happen. For multiple ioapics, things can be routed to different
+ * CPUs.
+ */
+#define MAX_INTR_SOURCES 32
+#define NUM_LEGACY_IRQS 16
+
+/*
+ * Low and high boundaries between which interrupt gates will
+ * be allocated in the IDT.
+ */
+#define IDT_INTR_LOW (0x20 + NUM_LEGACY_IRQS)
+#define IDT_INTR_HIGH 0xef
+
+#define I386_IPI_HALT 0x00000001
+#define I386_IPI_MICROSET 0x00000002
+#define I386_IPI_FLUSH_FPU 0x00000004
+#define I386_IPI_SYNCH_FPU 0x00000008
+#define I386_IPI_TLB 0x00000010
+#define I386_IPI_MTRR 0x00000020
+#define I386_IPI_GDT 0x00000040
+#define I386_IPI_DDB 0x00000080 /* synchronize while in ddb */
+
+#define I386_NIPI 8
+
+#define I386_IPI_NAMES { "halt IPI", "timeset IPI", "FPU flush IPI", \
+ "FPU synch IPI", "TLB shootdown IPI", \
+ "MTRR update IPI", "GDT update IPI", \
+ "DDB IPI" }
+
+#define IREENT_MAGIC 0x18041969
+
+#endif /* _I386_INTRDEFS_H */
diff --git a/sys/arch/i386/include/lock.h b/sys/arch/i386/include/lock.h
new file mode 100644
index 00000000000..a647999a532
--- /dev/null
+++ b/sys/arch/i386/include/lock.h
@@ -0,0 +1,119 @@
+/* $OpenBSD: lock.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: lock.h,v 1.1.2.2 2000/05/03 14:40:55 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Machine-dependent spin lock operations.
+ */
+
+#ifndef _I386_LOCK_H_
+#define _I386_LOCK_H_
+
+typedef __volatile int __cpu_simple_lock_t;
+
+#define __SIMPLELOCK_LOCKED 1
+#define __SIMPLELOCK_UNLOCKED 0
+
+/*
+ * compiler barrier: prevent reordering of instructions.
+ * XXX something similar will move to <sys/cdefs.h>
+ * or thereabouts.
+ * This prevents the compiler from reordering code around
+ * this "instruction", acting as a sequence point for code generation.
+ */
+
+#define __lockbarrier() __asm __volatile("": : :"memory")
+
+#ifdef LOCKDEBUG
+
+extern void __cpu_simple_lock_init(__cpu_simple_lock_t *);
+extern void __cpu_simple_lock(__cpu_simple_lock_t *);
+extern int __cpu_simple_lock_try(__cpu_simple_lock_t *);
+extern void __cpu_simple_unlock(__cpu_simple_lock_t *);
+
+#else
+
+#include <machine/atomic.h>
+
+static __inline void __cpu_simple_lock_init(__cpu_simple_lock_t *)
+ __attribute__((__unused__));
+static __inline void __cpu_simple_lock(__cpu_simple_lock_t *)
+ __attribute__((__unused__));
+static __inline int __cpu_simple_lock_try(__cpu_simple_lock_t *)
+ __attribute__((__unused__));
+static __inline void __cpu_simple_unlock(__cpu_simple_lock_t *)
+ __attribute__((__unused__));
+
+static __inline void
+__cpu_simple_lock_init(__cpu_simple_lock_t *lockp)
+{
+ *lockp = __SIMPLELOCK_UNLOCKED;
+ __lockbarrier();
+}
+
+static __inline void
+__cpu_simple_lock(__cpu_simple_lock_t *lockp)
+{
+ while (i386_atomic_testset_i(lockp, __SIMPLELOCK_LOCKED)
+ == __SIMPLELOCK_LOCKED) {
+ continue; /* spin */
+ }
+ __lockbarrier();
+}
+
+static __inline int
+__cpu_simple_lock_try(__cpu_simple_lock_t *lockp)
+{
+ int r = (i386_atomic_testset_i(lockp, __SIMPLELOCK_LOCKED)
+ == __SIMPLELOCK_UNLOCKED);
+
+ __lockbarrier();
+
+ return (r);
+}
+
+static __inline void
+__cpu_simple_unlock(__cpu_simple_lock_t *lockp)
+{
+ __lockbarrier();
+ *lockp = __SIMPLELOCK_UNLOCKED;
+}
+
+#endif /* !LOCKDEBUG */
+
+#endif /* _I386_LOCK_H_ */
diff --git a/sys/arch/i386/include/mp.h b/sys/arch/i386/include/mp.h
new file mode 100644
index 00000000000..d12e8a54aca
--- /dev/null
+++ b/sys/arch/i386/include/mp.h
@@ -0,0 +1,222 @@
+/* $OpenBSD: mp.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+
+/*-
+ * Copyright (c) 1996 SigmaSoft, Th. Lockert <tholo@sigmasoft.com>
+ * Copyright (c) 2000 Niklas Hallqvist.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by SigmaSoft, Th. Lockert.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MACHINE_MP_H
+#define _MACHINE_MP_H
+
+/*
+ * Configuration structures as defined in the Intel MP specification,
+ * version 1.4
+ */
+
+/*
+ * MP Floating Pointer structure; must be located on 16-byte boundary
+ */
+struct mp_float {
+ u_int8_t signature[4];
+#define MPF_SIGNATURE "_MP_"
+ u_int32_t pointer;
+ u_int8_t length;
+ u_int8_t revision;
+ u_int8_t checksum;
+ u_int8_t feature1;
+#define MP_CONF_EXTENDED 0
+#define MP_CONF_2_ISA 1
+#define MP_CONF_2_EISA_NO_8 2
+#define MP_CONF_2_EISA 3
+#define MP_CONF_2_MCA 4
+#define MP_CONF_2_ISA_PCI 5
+#define MP_CONF_2_EISA_PCI 6
+#define MP_CONF_2_MCA_PCI 7
+ u_int8_t feature2;
+#define MP_IMCR 0x80
+ u_int8_t feature3;
+ u_int8_t feature4;
+ u_int8_t feature5;
+};
+
+/*
+ * MP configuration table header
+ */
+struct mp_conf {
+ u_int8_t signature[4];
+#define MPC_SIGNATURE "PCMP"
+ u_int16_t length;
+ u_int8_t revision;
+ u_int8_t checksum;
+ u_int8_t oem[8];
+ u_int8_t product[12];
+ u_int32_t oem_pointer;
+ u_int16_t oem_length;
+ u_int16_t entry_count;
+ u_int32_t local_apic;
+ u_int16_t ext_length;
+ u_int8_t et_checksum;
+ u_int8_t reserved;
+};
+
+/*
+ * Processor entry
+ */
+struct mp_proc {
+ u_int8_t type;
+#define MP_PROCESSOR 0
+ u_int8_t local_apic;
+ u_int8_t apic_version;
+ u_int8_t flags;
+#define MP_ENABLE 0x01
+#define MP_BOOTCPU 0x02
+ u_int32_t cpu_signature;
+#define MP_STEPPING 0x0000000F
+#define MP_MODEL 0x000000F0
+#define MP_FAMILY 0x00000F00
+ u_int32_t feature_flags;
+#define MP_FP 0x00000001
+#define MP_MCE 0x00000080
+#define MP_CX8 0x00000100
+#define MP_APIC 0x00000200
+ u_int32_t reserved1;
+ u_int32_t reserved2;
+};
+
+/*
+ * Bus entry
+ */
+struct mp_bus {
+ u_int8_t type;
+#define MP_BUS 1
+ u_int8_t bus_id;
+ u_int8_t bustype[6] __attribute((packed));
+#define MP_BUS_CBUS "CBUS "
+#define MP_BUS_CBUSII "CBUSII"
+#define MP_BUS_EISA "EISA "
+#define MP_BUS_FUTURE "FUTURE"
+#define MP_BUS_INTERN "INTERN"
+#define MP_BUS_ISA "ISA "
+#define MP_BUS_MBI "MBI "
+#define MP_BUS_MBII "MBII "
+#define MP_BUS_MCA "MCA "
+#define MP_BUS_MPI "MPI "
+#define MP_BUS_MPSA "MPSA "
+#define MP_BUS_NUBUS "NUBUS "
+#define MP_BUS_PCI "PCI "
+#define MP_BUS_PCCARD "PCMCIA"
+#define MP_BUS_TC "TC "
+#define MP_BUS_VLB "VL "
+#define MP_BUS_VME "VME "
+#define MP_BUS_XPRESS "XPRESS"
+};
+
+/*
+ * I/O APIC entry
+ */
+struct mp_apic {
+ u_int8_t type;
+#define MP_IOAPIC 2
+ u_int8_t apic_id;
+ u_int8_t apic_version;
+ u_int8_t apic_flags;
+#define MP_APIC_ENABLE 0x80
+ u_int32_t apic_address;
+};
+
+/*
+ * I/O Interrupt Assignment entry
+ * Local Interrupt Assignment entry
+ */
+struct mp_irq {
+ u_int8_t type;
+#define MP_INTSRC 3
+#define MP_LOCINTSRC 4
+ u_int8_t irqtype;
+#define MP_INT_NORMAL 0
+#define MP_INT_NMI 1
+#define MP_INT_SMI 2
+#define MP_INT_EXT 3
+ u_int16_t irqflags;
+ u_int8_t bus_id;
+ u_int8_t source_irq;
+ u_int8_t destination_apic;
+#define MP_ALL_APIC 0xFF
+ u_int8_t apic_intr;
+};
+
+/*
+ * System Address Space Mapping entry
+ */
+struct mp_map {
+ u_int8_t type;
+#define MP_SYSMAP 128
+ u_int8_t length;
+ u_int8_t bus;
+ u_int8_t address_type;
+#define MP_ADDR_IO 0
+#define MP_ADDR_MEM 1
+#define MP_ADDR_PRE 2
+ u_int64_t address_base;
+ u_int64_t address_length;
+};
+
+/*
+ * Bus Hierarchy Descriptor entry
+ */
+struct mp_bushier {
+ u_int8_t type;
+#define MP_BUSHIER 129
+ u_int8_t length;
+ u_int8_t bus_id;
+ u_int8_t bus_info;
+#define MP_BUS_SUB 0x01
+ u_int8_t parent;
+ u_int8_t reserved1;
+ u_int16_t reserved2;
+};
+
+/*
+ * Compatibility Bus Address Space Modifier entry
+ */
+struct mp_buscompat {
+ u_int8_t type;
+#define MP_BUSCOMPAT 130
+ u_int8_t length;
+ u_int8_t bus_id;
+ u_int8_t modifier;
+#define MP_COMPAT_SUB 0x01
+ u_int32_t range;
+};
+
+#ifdef _KERNEL
+extern int napics;
+#endif /* _KERNEL */
+
+#endif /* _MACHINE_MP_H */
diff --git a/sys/arch/i386/include/mpbiosreg.h b/sys/arch/i386/include/mpbiosreg.h
new file mode 100644
index 00000000000..d42aed04cdc
--- /dev/null
+++ b/sys/arch/i386/include/mpbiosreg.h
@@ -0,0 +1,155 @@
+/* $OpenBSD: mpbiosreg.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: mpbiosreg.h,v 1.1.2.3 2000/02/29 13:17:51 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _I386_MPBIOSREG_H_
+#define _I386_MPBIOSREG_H_
+
+#define BIOS_BASE (0xf0000)
+#define BIOS_SIZE (0x10000)
+#define BIOS_COUNT (BIOS_SIZE)
+
+/*
+ * Multiprocessor config table entry types.
+ */
+
+#define MPS_MCT_CPU 0
+#define MPS_MCT_BUS 1
+#define MPS_MCT_IOAPIC 2
+#define MPS_MCT_IOINT 3
+#define MPS_MCT_LINT 4
+
+#define MPS_MCT_NTYPES 5
+
+/*
+ * Interrupt typess
+ */
+
+#define MPS_INTTYPE_INT 0
+#define MPS_INTTYPE_NMI 1
+#define MPS_INTTYPE_SMI 2
+#define MPS_INTTYPE_ExtINT 3
+
+#define MPS_INTPO_DEF 0
+#define MPS_INTPO_ACTHI 1
+#define MPS_INTPO_ACTLO 3
+
+#define MPS_INTTR_DEF 0
+#define MPS_INTTR_EDGE 1
+#define MPS_INTTR_LEVEL 3
+
+
+/* MP Floating Pointer Structure */
+struct mpbios_fps {
+ u_int32_t signature;
+/* string defined by the Intel MP Spec as identifying the MP table */
+#define MP_FP_SIG 0x5f504d5f /* _MP_ */
+
+ u_int32_t pap;
+ u_int8_t length;
+ u_int8_t spec_rev;
+ u_int8_t checksum;
+ u_int8_t mpfb1; /* system configuration */
+ u_int8_t mpfb2; /* flags */
+#define MPFPS_FLAG_IMCR 0x80 /* IMCR present */
+ u_int8_t mpfb3; /* unused */
+ u_int8_t mpfb4; /* unused */
+ u_int8_t mpfb5; /* unused */
+};
+
+/* MP Configuration Table Header */
+struct mpbios_cth {
+ u_int32_t signature;
+#define MP_CT_SIG 0x504d4350 /* PCMP */
+
+ u_int16_t base_len;
+ u_int8_t spec_rev;
+ u_int8_t checksum;
+ u_int8_t oem_id[8];
+ u_int8_t product_id[12];
+ u_int32_t oem_table_pointer;
+ u_int16_t oem_table_size;
+ u_int16_t entry_count;
+ u_int32_t apic_address;
+ u_int16_t ext_len;
+ u_int8_t ext_cksum;
+ u_int8_t reserved;
+};
+
+struct mpbios_proc {
+ u_int8_t type;
+ u_int8_t apic_id;
+ u_int8_t apic_version;
+ u_int8_t cpu_flags;
+#define PROCENTRY_FLAG_EN 0x01
+#define PROCENTRY_FLAG_BP 0x02
+ u_long cpu_signature;
+ u_long feature_flags;
+ u_long reserved1;
+ u_long reserved2;
+};
+
+struct mpbios_bus {
+ u_int8_t type;
+ u_int8_t bus_id;
+ char bus_type[6];
+};
+
+struct mpbios_ioapic {
+ u_int8_t type;
+ u_int8_t apic_id;
+ u_int8_t apic_version;
+ u_int8_t apic_flags;
+#define IOAPICENTRY_FLAG_EN 0x01
+ void *apic_address;
+};
+
+struct mpbios_int {
+ u_int8_t type;
+ u_int8_t int_type;
+ u_int16_t int_flags;
+ u_int8_t src_bus_id;
+ u_int8_t src_bus_irq;
+ u_int8_t dst_apic_id;
+#define MPS_ALL_APICS 0xff
+ u_int8_t dst_apic_int;
+};
+
+#endif /* !_I386_MPBIOSREG_H_ */
diff --git a/sys/arch/i386/include/mpbiosvar.h b/sys/arch/i386/include/mpbiosvar.h
new file mode 100644
index 00000000000..b617041b4d9
--- /dev/null
+++ b/sys/arch/i386/include/mpbiosvar.h
@@ -0,0 +1,88 @@
+/* $OpenBSD: mpbiosvar.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */
+/* $NetBSD: mpbiosvar.h,v 1.1.2.3 2000/02/29 13:17:20 sommerfeld Exp $ */
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by RedBack Networks Inc.
+ *
+ * Author: Bill Sommerfeld
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _I386_MPBIOSVAR_H_
+#define _I386_MPBIOSVAR_H_
+
+#define MP_TRAMPOLINE (2 * PAGE_SIZE)
+
+#if !defined(_LOCORE)
+
+#include <machine/mpbiosreg.h>
+
+struct mp_bus
+{
+ char *mb_name; /* XXX bus name */
+ int mb_idx; /* XXX bus index */
+ void (*mb_intr_print) (int);
+ void (*mb_intr_cfg)(const struct mpbios_int *, u_int32_t *);
+ struct mp_intr_map *mb_intrs;
+ u_int32_t mb_data; /* random bus-specific datum. */
+};
+
+struct mp_intr_map
+{
+ struct mp_intr_map *next;
+ struct mp_bus *bus;
+ int bus_pin;
+ struct ioapic_softc *ioapic;
+ int ioapic_pin;
+ int ioapic_ih; /* int handle, for apic_intr_est */
+ int type; /* from mp spec intr record */
+ int flags; /* from mp spec intr record */
+ u_int32_t redir;
+};
+
+#if defined(_KERNEL)
+extern int mp_verbose;
+extern struct mp_bus *mp_busses;
+extern struct mp_intr_map *mp_intrs;
+extern int mp_isa_bus;
+extern int mp_eisa_bus;
+
+void mpbios_scan(struct device *);
+int mpbios_probe(struct device *);
+#endif
+
+#endif
+
+#endif /* !_I386_MPBIOSVAR_H_ */
diff --git a/sys/arch/i386/include/npx.h b/sys/arch/i386/include/npx.h
index d6f906efb46..810c7817bee 100644
--- a/sys/arch/i386/include/npx.h
+++ b/sys/arch/i386/include/npx.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: npx.h,v 1.6 2004/02/01 19:05:23 deraadt Exp $ */
+/* $OpenBSD: npx.h,v 1.7 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: npx.h,v 1.11 1994/10/27 04:16:11 cgd Exp $ */
/*-
@@ -180,5 +180,8 @@ struct emcsts {
void process_xmm_to_s87(const struct savexmm *, struct save87 *);
void process_s87_to_xmm(const struct save87 *, struct savexmm *);
+struct cpu_info;
+
+void npxinit(struct cpu_info *);
#endif /* !_I386_NPX_H_ */
diff --git a/sys/arch/i386/include/param.h b/sys/arch/i386/include/param.h
index 0d1dfb68fd9..335d7737cbb 100644
--- a/sys/arch/i386/include/param.h
+++ b/sys/arch/i386/include/param.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: param.h,v 1.27 2004/04/19 22:55:49 deraadt Exp $ */
+/* $OpenBSD: param.h,v 1.28 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: param.h,v 1.29 1996/03/04 05:04:26 cgd Exp $ */
/*-
@@ -99,7 +99,7 @@
#define USPACE (UPAGES * NBPG) /* total size of u-area */
#ifndef MSGBUFSIZE
-#define MSGBUFSIZE 2*NBPG /* default message buffer size */
+#define MSGBUFSIZE 4*NBPG /* default message buffer size */
#endif
/*
diff --git a/sys/arch/i386/include/pcb.h b/sys/arch/i386/include/pcb.h
index 611ab5996a8..641f8874f17 100644
--- a/sys/arch/i386/include/pcb.h
+++ b/sys/arch/i386/include/pcb.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pcb.h,v 1.11 2004/02/01 19:05:23 deraadt Exp $ */
+/* $OpenBSD: pcb.h,v 1.12 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: pcb.h,v 1.21 1996/01/08 13:51:42 mycroft Exp $ */
/*-
@@ -75,7 +75,8 @@ struct pcb {
int vm86_eflags; /* virtual eflags for vm86 mode */
int vm86_flagmask; /* flag mask for vm86 mode */
void *vm86_userp; /* XXX performance hack */
- struct pmap *pcb_pmap; /* back pointer to our pmap */
+ struct pmap *pcb_pmap; /* back pointer to our pmap */
+ struct cpu_info *pcb_fpcpu; /* cpu holding our fpu state */
u_long pcb_iomap[NIOPORTS/32]; /* I/O bitmap */
u_char pcb_iomap_pad; /* required; must be 0xff, says intel */
};
@@ -88,8 +89,4 @@ struct md_coredump {
long md_pad[8];
};
-#ifdef _KERNEL
-struct pcb *curpcb; /* our current running pcb */
-#endif
-
#endif /* _I386_PCB_H_ */
diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h
index 74013d8824c..a7089d958fd 100644
--- a/sys/arch/i386/include/pmap.h
+++ b/sys/arch/i386/include/pmap.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.h,v 1.34 2004/05/20 09:20:42 kettenis Exp $ */
+/* $OpenBSD: pmap.h,v 1.35 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */
/*
@@ -236,6 +236,12 @@
#define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */
#define PG_X PG_AVAIL3 /* executable mapping */
+/*
+ * Number of PTE's per cache line. 4 byte pte, 32-byte cache line
+ * Used to avoid false sharing of cache lines.
+ */
+#define NPTECL 8
+
#ifdef _KERNEL
/*
* pmap data structures: see pmap.c for details of locking.
@@ -272,6 +278,7 @@ struct pmap {
union descriptor *pm_ldt; /* user-set LDT */
int pm_ldt_len; /* number of LDT entries */
int pm_ldt_sel; /* LDT selector */
+ uint32_t pm_cpus; /* mask oc CPUs using map */
};
/* pm_flags */
@@ -330,19 +337,7 @@ struct pv_page {
};
/*
- * pmap_remove_record: a record of VAs that have been unmapped, used to
- * flush TLB. If we have more than PMAP_RR_MAX then we stop recording.
- */
-
-#define PMAP_RR_MAX 16 /* max of 16 pages (64K) */
-
-struct pmap_remove_record {
- int prr_npages;
- vaddr_t prr_vas[PMAP_RR_MAX];
-};
-
-/*
- * Global kernel variables
+ * global kernel variables
*/
extern pd_entry_t PTD[];
@@ -394,6 +389,10 @@ int pmap_exec_fixup(struct vm_map *, struct trapframe *,
vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */
+void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, int32_t *);
+void pmap_tlb_shootnow(int32_t);
+void pmap_do_tlb_shootdown(struct cpu_info *);
+
#define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */
/*
diff --git a/sys/arch/i386/include/proc.h b/sys/arch/i386/include/proc.h
index ff0bc4b1bb3..e5393cd22e4 100644
--- a/sys/arch/i386/include/proc.h
+++ b/sys/arch/i386/include/proc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: proc.h,v 1.3 2003/06/02 23:27:47 millert Exp $ */
+/* $OpenBSD: proc.h,v 1.4 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: proc.h,v 1.10 1995/08/06 05:33:23 mycroft Exp $ */
/*
@@ -38,6 +38,7 @@
struct mdproc {
struct trapframe *md_regs; /* registers on current frame */
int md_flags; /* machine-dependent flags */
+ int md_tss_sel; /* TSS selector */
};
/* md_flags */
diff --git a/sys/arch/i386/include/segments.h b/sys/arch/i386/include/segments.h
index ac01a2a2e26..3519f0a42ed 100644
--- a/sys/arch/i386/include/segments.h
+++ b/sys/arch/i386/include/segments.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: segments.h,v 1.12 2003/11/16 20:30:06 avsm Exp $ */
+/* $OpenBSD: segments.h,v 1.13 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: segments.h,v 1.23 1996/02/01 22:31:03 mycroft Exp $ */
/*-
@@ -124,7 +124,7 @@ struct region_descriptor {
#endif
#ifdef _KERNEL
-extern union descriptor gdt[], ldt[];
+extern union descriptor *gdt, ldt[];
extern struct gate_descriptor idt_region[];
extern struct gate_descriptor *idt;
@@ -132,6 +132,13 @@ void setgate(struct gate_descriptor *, void *, int, int, int, int);
void setregion(struct region_descriptor *, void *, size_t);
void setsegment(struct segment_descriptor *, void *, size_t, int, int,
int, int);
+void unsetgate(struct gate_descriptor *);
+void cpu_init_idt(void);
+
+int idt_vec_alloc(int, int);
+void idt_vec_set(int, void (*)(void));
+void idt_vec_free(int);
+
#endif /* _KERNEL */
#endif /* !_LOCORE */
@@ -220,7 +227,8 @@ void setsegment(struct segment_descriptor *, void *, size_t, int, int,
#define GAPM16CODE_SEL 8 /* 16 bit APM code descriptor */
#define GAPMDATA_SEL 9 /* APM data descriptor */
#define GICODE_SEL 10 /* Interrupt code descriptor (same as Kernel code) */
-#define NGDT 11
+#define GCPU_SEL 11 /* per-CPU segment */
+#define NGDT 12
/*
* Entries in the Local Descriptor Table (LDT)
diff --git a/sys/arch/i386/include/types.h b/sys/arch/i386/include/types.h
index 0da0bdc06e2..53f8a187136 100644
--- a/sys/arch/i386/include/types.h
+++ b/sys/arch/i386/include/types.h
@@ -1,5 +1,5 @@
/* $NetBSD: types.h,v 1.12 1995/12/24 01:08:03 mycroft Exp $ */
-/* $OpenBSD: types.h,v 1.13 2003/06/02 23:27:47 millert Exp $ */
+/* $OpenBSD: types.h,v 1.14 2004/06/13 21:49:16 niklas Exp $ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
@@ -72,5 +72,6 @@ typedef unsigned long long uint64_t;
typedef int32_t register_t;
#define __HAVE_NWSCONS
+#define __HAVE_CPUINFO
#endif /* _MACHTYPES_H_ */
diff --git a/sys/arch/i386/isa/clock.c b/sys/arch/i386/isa/clock.c
index 2d932ad788f..fa18701776d 100644
--- a/sys/arch/i386/isa/clock.c
+++ b/sys/arch/i386/isa/clock.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: clock.c,v 1.31 2004/02/27 21:07:49 grange Exp $ */
+/* $OpenBSD: clock.c,v 1.32 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: clock.c,v 1.39 1996/05/12 23:11:54 mycroft Exp $ */
/*-
@@ -331,7 +331,7 @@ gettick()
* wave' mode counts at 2:1).
*/
void
-delay(n)
+i8254_delay(n)
int n;
{
int limit, tick, otick;
@@ -458,7 +458,7 @@ calibrate_cyclecounter()
#endif
void
-cpu_initclocks()
+i8254_initclocks()
{
static struct timeout rtcdrain_timeout;
stathz = 128;
diff --git a/sys/arch/i386/isa/icu.h b/sys/arch/i386/isa/icu.h
deleted file mode 100644
index 9b68025526b..00000000000
--- a/sys/arch/i386/isa/icu.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* $OpenBSD: icu.h,v 1.8 2003/06/02 23:27:47 millert Exp $ */
-/* $NetBSD: icu.h,v 1.19 1996/02/01 22:31:21 mycroft Exp $ */
-
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)icu.h 5.6 (Berkeley) 5/9/91
- */
-
-/*
- * AT/386 Interrupt Control constants
- * W. Jolitz 8/89
- */
-
-#ifndef _I386_ISA_ICU_H_
-#define _I386_ISA_ICU_H_
-
-#ifndef _LOCORE
-
-/*
- * Interrupt "level" mechanism variables, masks, and macros
- */
-extern unsigned imen; /* interrupt mask enable */
-
-#define SET_ICUS() (outb(IO_ICU1 + 1, imen), outb(IO_ICU2 + 1, imen >> 8))
-
-#endif /* !_LOCORE */
-
-/*
- * Interrupt enable bits -- in order of priority
- */
-#define IRQ_SLAVE 2
-
-/*
- * Interrupt Control offset into Interrupt descriptor table (IDT)
- * XXX ICU_OFFSET is actually a property of our architecture not of the ICU
- * XXX and therefore ought to use the architecture manifest constant IDTVECOFF
- * XXX for its definition instead.
- */
-#define ICU_OFFSET 32 /* 0-31 are processor exceptions */
-#define ICU_LEN 16 /* 32-47 are ISA interrupts */
-
-#endif /* !_I386_ISA_ICU_H_ */
diff --git a/sys/arch/i386/isa/icu.s b/sys/arch/i386/isa/icu.s
index 8bd2af133ca..d3f749b4a05 100644
--- a/sys/arch/i386/isa/icu.s
+++ b/sys/arch/i386/isa/icu.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: icu.s,v 1.20 2003/11/06 21:09:34 mickey Exp $ */
+/* $OpenBSD: icu.s,v 1.21 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: icu.s,v 1.45 1996/01/07 03:59:34 mycroft Exp $ */
/*-
@@ -33,10 +33,11 @@
#include <net/netisr.h>
.data
- .globl _C_LABEL(imen), _C_LABEL(cpl), _C_LABEL(ipending)
- .globl _C_LABEL(astpending), _C_LABEL(netisr)
+ .globl _C_LABEL(imen),_C_LABEL(ipending),_C_LABEL(netisr)
_C_LABEL(imen):
.long 0xffff # interrupt mask enable (all off)
+_C_LABEL(ipending):
+ .long 0 # interupts pending
_C_LABEL(netisr):
.long 0 # scheduling bits for network
@@ -48,13 +49,13 @@ _C_LABEL(netisr):
ALIGN_TEXT
_C_LABEL(splhigh):
movl $IPL_HIGH,%eax
- xchgl %eax,_C_LABEL(cpl)
+ xchgl %eax,CPL
ret
ALIGN_TEXT
_C_LABEL(splx):
movl 4(%esp),%eax
- movl %eax,_C_LABEL(cpl)
+ movl %eax,CPL
testl %eax,%eax
jnz _C_LABEL(Xspllower)
ret
@@ -72,12 +73,12 @@ IDTVEC(spllower)
pushl %ebx
pushl %esi
pushl %edi
- movl _C_LABEL(cpl),%ebx # save priority
+ movl CPL,%ebx # save priority
movl $1f,%esi # address to resume loop at
1: movl %ebx,%eax # get cpl
shrl $4,%eax # find its mask.
movl _C_LABEL(iunmask)(,%eax,4),%eax
- andl _C_LABEL(ipending),%eax
+ andl _C_LABEL(ipending),%eax # any non-masked bits left?
jz 2f
bsfl %eax,%eax
btrl %eax,_C_LABEL(ipending)
@@ -98,10 +99,10 @@ IDTVEC(spllower)
*/
IDTVEC(doreti)
popl %ebx # get previous priority
- movl %ebx,_C_LABEL(cpl)
+ movl %ebx,CPL
movl $1f,%esi # address to resume loop at
-1: movl %ebx,%eax # get cpl
- shrl $4,%eax # find its mask
+1: movl %ebx,%eax
+ shrl $4,%eax
movl _C_LABEL(iunmask)(,%eax,4),%eax
andl _C_LABEL(ipending),%eax
jz 2f
@@ -111,8 +112,8 @@ IDTVEC(doreti)
cli
jmp *_C_LABEL(Xresume)(,%eax,4)
2: /* Check for ASTs on exit to user mode. */
+ CHECK_ASTPENDING(%ecx)
cli
- cmpb $0,_C_LABEL(astpending)
je 3f
testb $SEL_RPL,TF_CS(%esp)
#ifdef VM86
@@ -120,10 +121,12 @@ IDTVEC(doreti)
testl $PSL_VM,TF_EFLAGS(%esp)
#endif
jz 3f
-4: movb $0,_C_LABEL(astpending)
+4: CLEAR_ASTPENDING(%ecx)
sti
+ movl $T_ASTFLT,TF_TRAPNO(%esp) /* XXX undo later. */
/* Pushed T_ASTFLT into tf_trapno on entry. */
call _C_LABEL(trap)
+ cli
jmp 2b
3: INTRFASTEXIT
@@ -137,9 +140,16 @@ IDTVEC(doreti)
IDTVEC(softtty)
#if NPCCOM > 0
movl $IPL_SOFTTTY,%eax
- movl %eax,_C_LABEL(cpl)
+ movl %eax,CPL
+ sti
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintlock)
+#endif
call _C_LABEL(comsoft)
- movl %ebx,_C_LABEL(cpl)
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintunlock)
+#endif
+ movl %ebx,CPL
#endif
jmp *%esi
@@ -152,18 +162,32 @@ IDTVEC(softtty)
IDTVEC(softnet)
movl $IPL_SOFTNET,%eax
- movl %eax,_C_LABEL(cpl)
+ movl %eax,CPL
+ sti
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintlock)
+#endif
xorl %edi,%edi
xchgl _C_LABEL(netisr),%edi
#include <net/netisr_dispatch.h>
- movl %ebx,_C_LABEL(cpl)
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintunlock)
+#endif
+ movl %ebx,CPL
jmp *%esi
#undef DONETISR
IDTVEC(softclock)
movl $IPL_SOFTCLOCK,%eax
- movl %eax,_C_LABEL(cpl)
+ movl %eax,CPL
+ sti
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintlock)
+#endif
call _C_LABEL(softclock)
- movl %ebx,_C_LABEL(cpl)
+#ifdef MULTIPROCESSOR
+ call _C_LABEL(i386_softintunlock)
+#endif
+ movl %ebx,CPL
jmp *%esi
diff --git a/sys/arch/i386/isa/isa_machdep.c b/sys/arch/i386/isa/isa_machdep.c
index 0a8b580c6f4..0df077cdb19 100644
--- a/sys/arch/i386/isa/isa_machdep.c
+++ b/sys/arch/i386/isa/isa_machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: isa_machdep.c,v 1.48 2003/06/02 23:27:47 millert Exp $ */
+/* $OpenBSD: isa_machdep.c,v 1.49 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: isa_machdep.c,v 1.22 1997/06/12 23:57:32 thorpej Exp $ */
#define ISA_DMA_STATS
@@ -122,18 +122,25 @@
#include <uvm/uvm_extern.h>
+#include "ioapic.h"
+
+#if NIOAPIC > 0
+#include <machine/i82093var.h>
+#include <machine/mpbiosvar.h>
+#endif
+
#define _I386_BUS_DMA_PRIVATE
#include <machine/bus.h>
#include <machine/intr.h>
#include <machine/pio.h>
#include <machine/cpufunc.h>
+#include <machine/i8259.h>
#include <dev/isa/isareg.h>
#include <dev/isa/isavar.h>
#include <dev/isa/isadmavar.h>
#include <i386/isa/isa_machdep.h>
-#include <i386/isa/icu.h>
#include "isadma.h"
@@ -250,6 +257,16 @@ isa_defaultirq()
outb(IO_ICU2, 0x0a); /* Read IRR by default. */
}
+void
+isa_nodefaultirq()
+{
+ int i;
+
+ /* icu vectors */
+ for (i = 0; i < ICU_LEN; i++)
+ unsetgate(&idt[ICU_OFFSET + i]);
+}
+
/*
* Handle a NMI, possibly a machine check.
* return true to panic system, false to ignore.
@@ -286,6 +303,9 @@ int intrtype[ICU_LEN], intrmask[ICU_LEN], intrlevel[ICU_LEN];
int iminlevel[ICU_LEN], imaxlevel[ICU_LEN];
struct intrhand *intrhand[ICU_LEN];
+int imask[NIPL]; /* Bitmask telling what interrupts are blocked. */
+int iunmask[NIPL]; /* Bitmask telling what interrupts are accepted. */
+
/*
* Recalculate the interrupt masks from scratch.
* We could code special registry and deregistry versions of this function that
@@ -295,24 +315,27 @@ struct intrhand *intrhand[ICU_LEN];
void
intr_calculatemasks()
{
- int irq, level;
+ int irq, level, unusedirqs;
struct intrhand *q;
/* First, figure out which levels each IRQ uses. */
+ unusedirqs = 0xffff;
for (irq = 0; irq < ICU_LEN; irq++) {
- register int levels = 0;
+ int levels = 0;
for (q = intrhand[irq]; q; q = q->ih_next)
levels |= 1 << IPL(q->ih_level);
intrlevel[irq] = levels;
+ if (levels)
+ unusedirqs &= ~(1 << irq);
}
/* Then figure out which IRQs use each level. */
for (level = 0; level < NIPL; level++) {
- register int irqs = 0;
+ int irqs = 0;
for (irq = 0; irq < ICU_LEN; irq++)
if (intrlevel[irq] & (1 << level))
irqs |= 1 << irq;
- imask[level] = irqs;
+ imask[level] = irqs | unusedirqs;
}
/*
@@ -331,25 +354,40 @@ intr_calculatemasks()
/* And eventually calculate the complete masks. */
for (irq = 0; irq < ICU_LEN; irq++) {
- register int irqs = 1 << irq;
+ int irqs = 1 << irq;
int minlevel = IPL_NONE;
int maxlevel = IPL_NONE;
- for (q = intrhand[irq]; q; q = q->ih_next) {
- irqs |= IMASK(q->ih_level);
- if (minlevel == IPL_NONE || q->ih_level < minlevel)
- minlevel = q->ih_level;
- if (q->ih_level > maxlevel)
- maxlevel = q->ih_level;
+ if (intrhand[irq] == NULL) {
+ maxlevel = IPL_HIGH;
+ irqs = IMASK(IPL_HIGH);
+ } else {
+ for (q = intrhand[irq]; q; q = q->ih_next) {
+ irqs |= IMASK(q->ih_level);
+ if (minlevel == IPL_NONE ||
+ q->ih_level < minlevel)
+ minlevel = q->ih_level;
+ if (q->ih_level > maxlevel)
+ maxlevel = q->ih_level;
+ }
}
+ if (irqs != IMASK(maxlevel))
+ panic("irq %d level %x mask mismatch: %x vs %x", irq,
+ maxlevel, irqs, IMASK(maxlevel));
+
intrmask[irq] = irqs;
iminlevel[irq] = minlevel;
imaxlevel[irq] = maxlevel;
+
+#if 0
+ printf("irq %d: level %x, mask 0x%x (%x)\n", irq,
+ imaxlevel[irq], intrmask[irq], IMASK(imaxlevel[irq]));
+#endif
}
/* Lastly, determine which IRQs are actually in use. */
{
- register int irqs = 0;
+ int irqs = 0;
for (irq = 0; irq < ICU_LEN; irq++)
if (intrhand[irq])
irqs |= 1 << irq;
@@ -496,17 +534,48 @@ isa_intr_establish(ic, irq, type, level, ih_fun, ih_arg, ih_what)
struct intrhand **p, *q, *ih;
static struct intrhand fakehand = {fakeintr};
+#if NIOAPIC > 0
+ struct mp_intr_map *mip;
+
+ if (mp_busses != NULL) {
+ int mpspec_pin = irq;
+ int bus = mp_isa_bus;
+ int airq;
+
+ for (mip = mp_busses[bus].mb_intrs; mip != NULL;
+ mip = mip->next) {
+ if (mip->bus_pin == mpspec_pin) {
+ airq = mip->ioapic_ih | irq;
+ break;
+ }
+ }
+ if (mip == NULL && mp_eisa_bus != -1) {
+ for (mip = mp_busses[mp_eisa_bus].mb_intrs;
+ mip != NULL; mip=mip->next) {
+ if (mip->bus_pin == mpspec_pin) {
+ airq = mip->ioapic_ih | irq;
+ break;
+ }
+ }
+ }
+ if (mip == NULL)
+ printf("isa_intr_establish: no MP mapping found\n");
+ else
+ return (apic_intr_establish(airq, type, level, ih_fun,
+ ih_arg, ih_what));
+ }
+#endif
/* no point in sleeping unless someone can free memory. */
ih = malloc(sizeof *ih, M_DEVBUF, cold ? M_NOWAIT : M_WAITOK);
if (ih == NULL) {
printf("%s: isa_intr_establish: can't malloc handler info\n",
ih_what);
- return NULL;
+ return (NULL);
}
if (!LEGAL_IRQ(irq) || type == IST_NONE) {
- printf("%s: intr_establish: bogus irq or type\n", ih_what);
- return NULL;
+ printf("%s: isa_intr_establish: bogus irq or type\n", ih_what);
+ return (NULL);
}
switch (intrtype[irq]) {
case IST_NONE:
@@ -521,7 +590,7 @@ isa_intr_establish(ic, irq, type, level, ih_fun, ih_arg, ih_what)
/*printf("%s: intr_establish: can't share %s with %s, irq %d\n",
ih_what, isa_intr_typename(intrtype[irq]),
isa_intr_typename(type), irq);*/
- return NULL;
+ return (NULL);
}
break;
}
@@ -571,8 +640,15 @@ isa_intr_disestablish(ic, arg)
int irq = ih->ih_irq;
struct intrhand **p, *q;
+#if NIOAPIC > 0
+ if (irq & APIC_INT_VIA_APIC) {
+ apic_intr_disestablish(arg);
+ return;
+ }
+#endif
+
if (!LEGAL_IRQ(irq))
- panic("intr_disestablish: bogus irq");
+ panic("intr_disestablish: bogus irq %d", irq);
/*
* Remove the handler from the chain.
diff --git a/sys/arch/i386/isa/mms.c b/sys/arch/i386/isa/mms.c
index 1b84aecf965..137c18384a5 100644
--- a/sys/arch/i386/isa/mms.c
+++ b/sys/arch/i386/isa/mms.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: mms.c,v 1.16 2002/03/14 01:26:33 millert Exp $ */
+/* $OpenBSD: mms.c,v 1.17 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: mms.c,v 1.35 2000/01/08 02:57:25 takemura Exp $ */
/*-
diff --git a/sys/arch/i386/isa/npx.c b/sys/arch/i386/isa/npx.c
index b56a17f3875..4111ba0ab90 100644
--- a/sys/arch/i386/isa/npx.c
+++ b/sys/arch/i386/isa/npx.c
@@ -1,10 +1,10 @@
-/* $OpenBSD: npx.c,v 1.31 2004/02/01 19:05:21 deraadt Exp $ */
+/* $OpenBSD: npx.c,v 1.32 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: npx.c,v 1.57 1996/05/12 23:12:24 mycroft Exp $ */
#if 0
-#define iprintf(x) printf x
+#define IPRINTF(x) printf x
#else
-#define iprintf(x)
+#define IPRINTF(x)
#endif
/*-
@@ -54,21 +54,16 @@
#include <machine/cpu.h>
#include <machine/intr.h>
+#include <machine/npx.h>
#include <machine/pio.h>
#include <machine/cpufunc.h>
#include <machine/pcb.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
+#include <machine/i8259.h>
#include <dev/isa/isareg.h>
#include <dev/isa/isavar.h>
-#include <i386/isa/icu.h>
-
-#if 0
-#define IPRINTF(x) printf x
-#else
-#define IPRINTF(x)
-#endif
/*
* 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
@@ -104,7 +99,6 @@
int npxintr(void *);
static int npxprobe1(struct isa_attach_args *);
-static void npxsave1(void);
struct npx_softc {
struct device sc_dev;
@@ -129,10 +123,7 @@ enum npx_type {
NPX_BROKEN,
};
-struct proc *npxproc;
-
static enum npx_type npx_type;
-static int npx_nointr;
static volatile u_int npx_intrs_while_probing;
static volatile u_int npx_traps_while_probing;
@@ -160,15 +151,15 @@ fpu_save(union savefpu *addr)
}
static int
-npxdna_notset(struct proc *p)
+npxdna_notset(struct cpu_info *ci)
{
panic("npxdna vector not initialized");
}
-int (*npxdna_func)(struct proc *) = npxdna_notset;
-int npxdna_s87(struct proc *);
+int (*npxdna_func)(struct cpu_info *) = npxdna_notset;
+int npxdna_s87(struct cpu_info *);
#ifdef I686_CPU
-int npxdna_xmm(struct proc *);
+int npxdna_xmm(struct cpu_info *);
#endif /* I686_CPU */
void npxexit(void);
@@ -350,6 +341,19 @@ asm (".text\n\t"
"popl %eax\n\t"
"ret\n\t");
+void
+npxinit(struct cpu_info *ci)
+{
+ lcr0(rcr0() & ~(CR0_EM|CR0_TS));
+ fninit();
+ if (npx586bug1(4195835, 3145727) != 0) {
+ i386_fpu_fdivbug = 1;
+ printf("%s: WARNING: Pentium FDIV bug detected!\n",
+ ci->ci_dev.dv_xname);
+ }
+ lcr0(rcr0() | (CR0_TS));
+}
+
/*
* Attach routine - announce which it is, and wire into system
*/
@@ -379,13 +383,7 @@ npxattach(parent, self, aux)
return;
}
- lcr0(rcr0() & ~(CR0_EM|CR0_TS));
- fninit();
- if (npx586bug1(4195835, 3145727) != 0) {
- i386_fpu_fdivbug = 1;
- printf("WARNING: Pentium FDIV bug detected!\n");
- }
- lcr0(rcr0() | (CR0_TS));
+ npxinit(&cpu_info_primary);
i386_fpu_present = 1;
#ifdef I686_CPU
@@ -415,16 +413,17 @@ int
npxintr(arg)
void *arg;
{
- register struct proc *p = npxproc;
+ struct cpu_info *ci = curcpu();
+ struct proc *p = ci->ci_fpcurproc;
union savefpu *addr;
struct intrframe *frame = arg;
int code;
union sigval sv;
uvmexp.traps++;
- iprintf(("Intr"));
+ IPRINTF(("%s: fp intr\n", ci->ci_dev.dv_xname));
- if (p == 0 || npx_type == NPX_NONE) {
+ if (p == NULL || npx_type == NPX_NONE) {
/* XXX no %p in stand/printf.c. Cast to quiet gcc -Wall. */
printf("npxintr: p = %lx, curproc = %lx, npx_type = %d\n",
(u_long) p, (u_long) curproc, npx_type);
@@ -438,11 +437,21 @@ npxintr(arg)
* If we're saving, ignore the interrupt. The FPU will happily
* generate another one when we restore the state later.
*/
- if (npx_nointr != 0)
+ if (ci->ci_fpsaving)
return (1);
+
+#ifdef DIAGNOSTIC
/*
- * Find the address of npxproc's savefpu. This is not necessarily
- * the one in curpcb.
+ * At this point, fpcurproc should be curproc. If it wasn't, the TS
+ * bit should be set, and we should have gotten a DNA exception.
+ */
+ if (p != curproc)
+ panic("npxintr: wrong process");
+#endif
+
+ /*
+ * Find the address of fpcurproc's saved FPU state. (Given the
+ * invariant above, this is always the one in curpcb.)
*/
addr = &p->p_addr->u_pcb.pcb_savefpu;
/*
@@ -504,7 +513,7 @@ npxintr(arg)
if (addr->sv_87.sv_ex_sw & EN_SW_IE)
code = FPE_FLTINV;
#ifdef notyet
- else if (addr->sv_ex_sw & EN_SW_DE)
+ else if (addr->sv_87.sv_ex_sw & EN_SW_DE)
code = FPE_FLTDEN;
#endif
else if (addr->sv_87.sv_ex_sw & EN_SW_ZE)
@@ -541,115 +550,147 @@ npxintr(arg)
}
/*
- * Wrapper for fnsave instruction to handle h/w bugs. If there is an error
- * pending, then fnsave generates a bogus IRQ13 on some systems. Force any
- * IRQ13 to be handled immediately, and then ignore it.
- *
- * This routine is always called at spl0. If it might called with the NPX
- * interrupt masked, it would be necessary to forcibly unmask the NPX interrupt
- * so that it could succeed.
- */
-static __inline void
-npxsave1(void)
-{
- register struct pcb *pcb;
-
- npx_nointr = 1;
- pcb = &npxproc->p_addr->u_pcb;
- fpu_save(&pcb->pcb_savefpu);
- pcb->pcb_cr0 |= CR0_TS;
- fwait();
- npx_nointr = 0;
-}
-
-/*
* Implement device not available (DNA) exception
*
- * If the we were the last process to use the FPU, we can simply return.
+ * If we were the last process to use the FPU, we can simply return.
* Otherwise, we save the previous state, if necessary, and restore our last
* saved state.
*/
+
+/*
+ * XXX It is unclear if the code below is correct in the multiprocessor
+ * XXX case. Check the NetBSD sources once again to be sure.
+ */
#ifdef I686_CPU
int
-npxdna_xmm(struct proc *p)
+npxdna_xmm(struct cpu_info *ci)
{
+ struct proc *p;
+ int s;
-#ifdef DIAGNOSTIC
- if (cpl != 0 || npx_nointr != 0)
- panic("npxdna: masked");
+ if (ci->ci_fpsaving) {
+ printf("recursive npx trap; cr0=%x\n", rcr0());
+ return (0);
+ }
+
+ s = splipi(); /* lock out IPI's while we clean house.. */
+
+#ifdef MULTIPROCESSOR
+ p = ci->ci_curproc;
+#else
+ p = curproc;
#endif
- p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
- clts();
+ IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev.dv_xname, (u_long)p,
+ (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : ""));
/*
+ * XXX should have a fast-path here when no save/restore is necessary
+ */
+ /*
* Initialize the FPU state to clear any exceptions. If someone else
* was using the FPU, save their state (which does an implicit
* initialization).
*/
- npx_nointr = 1;
- if (npxproc != 0 && npxproc != p) {
- IPRINTF(("Save"));
- npxsave1();
+ if (ci->ci_fpcurproc != NULL) {
+ IPRINTF(("%s: fp save %lx\n", ci->ci_dev.dv_xname,
+ (u_long)ci->ci_fpcurproc));
+ npxsave_cpu(ci, 1);
} else {
- IPRINTF(("Init"));
- fninit();
- fwait();
+ clts();
+ IPRINTF(("%s: fp init\n", ci->ci_dev.dv_xname));
+ fninit();
+ fwait();
+ stts();
}
- npx_nointr = 0;
- npxproc = p;
+ splx(s);
+
+ IPRINTF(("%s: done saving\n", ci->ci_dev.dv_xname));
+ KDASSERT(ci->ci_fpcurproc == NULL);
+#ifndef MULTIPROCESSOR
+ KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
+#else
+ if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
+ npxsave_proc(p, 1);
+#endif
+ p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
+ clts();
+ s = splipi();
+ ci->ci_fpcurproc = p;
+ p->p_addr->u_pcb.pcb_fpcpu = ci;
+ splx(s);
if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm.sv_env.en_cw);
p->p_md.md_flags |= MDP_USEDFPU;
- } else
+ } else {
fxrstor(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm);
+ }
return (1);
}
#endif /* I686_CPU */
int
-npxdna_s87(struct proc *p)
+npxdna_s87(struct cpu_info *ci)
{
- static u_short control = __INITIAL_NPXCW__;
+ struct proc *p;
+ int s;
+
+ KDASSERT(i386_use_fxsave == 0);
- if (npx_type == NPX_NONE) {
- iprintf(("Emul"));
+ if (ci->ci_fpsaving) {
+ printf("recursive npx trap; cr0=%x\n", rcr0());
return (0);
}
-#ifdef DIAGNOSTIC
- if (cpl != IPL_NONE || npx_nointr != 0)
- panic("npxdna: masked");
+ s = splipi(); /* lock out IPI's while we clean house.. */
+#ifdef MULTIPROCESSOR
+ p = ci->ci_curproc;
+#else
+ p = curproc;
#endif
+ IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev.dv_xname, (u_long)p,
+ (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : ""));
+
+ /*
+ * If someone else was using our FPU, save their state (which does an
+ * implicit initialization); otherwise, initialize the FPU state to
+ * clear any exceptions.
+ */
+ if (ci->ci_fpcurproc != NULL) {
+ IPRINTF(("%s: fp save %lx\n", ci->ci_dev.dv_xname,
+ (u_long)ci->ci_fpcurproc));
+ npxsave_cpu(ci, 1);
+ } else {
+ clts();
+ IPRINTF(("%s: fp init\n", ci->ci_dev.dv_xname));
+ fninit();
+ fwait();
+ stts();
+ }
+ splx(s);
+
+ IPRINTF(("%s: done saving\n", ci->ci_dev.dv_xname));
+ KDASSERT(ci->ci_fpcurproc == NULL);
+#ifndef MULTIPROCESSOR
+ KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
+#else
+ if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
+ npxsave_proc(p, 1);
+#endif
p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
clts();
+ s = splipi();
+ ci->ci_fpcurproc = p;
+ p->p_addr->u_pcb.pcb_fpcpu = ci;
+ splx(s);
if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
+ fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_87.sv_env.en_cw);
p->p_md.md_flags |= MDP_USEDFPU;
- iprintf(("Init"));
- if (npxproc != 0 && npxproc != p)
- npxsave1();
- else {
- npx_nointr = 1;
- fninit();
- fwait();
- npx_nointr = 0;
- }
- npxproc = p;
- fldcw(&control);
} else {
- if (npxproc != 0) {
-#ifdef DIAGNOSTIC
- if (npxproc == p)
- panic("npxdna: same process");
-#endif
- iprintf(("Save"));
- npxsave1();
- }
- npxproc = p;
/*
* The following frstor may cause an IRQ13 when the state being
* restored has a pending error. The error will appear to have
@@ -670,37 +711,121 @@ npxdna_s87(struct proc *p)
}
/*
- * Drop the current FPU state on the floor.
+ * The FNSAVE instruction clears the FPU state. Rather than reloading the FPU
+ * immediately, we clear fpcurproc and turn on CR0_TS to force a DNA and a
+ * reload of the FPU state the next time we try to use it. This routine
+ * is only called when forking, core dumping, or debugging, or swapping,
+ * so the lazy reload at worst forces us to trap once per fork(), and at best
+ * saves us a reload once per fork().
*/
void
-npxdrop()
+npxsave_cpu(struct cpu_info *ci, int save)
{
+ struct proc *p;
+ int s;
+
+ KDASSERT(ci == curcpu());
+
+ p = ci->ci_fpcurproc;
+ if (p == NULL)
+ return;
+
+ IPRINTF(("%s: fp cpu %s %lx\n", ci->ci_dev.dv_xname,
+ save ? "save" : "flush", (u_long)p));
+
+ if (save) {
+#ifdef DIAGNOSTIC
+ if (ci->ci_fpsaving != 0)
+ panic("npxsave_cpu: recursive save!");
+#endif
+ /*
+ * Set ci->ci_fpsaving, so that any pending exception will be
+ * thrown away. (It will be caught again if/when the FPU
+ * state is restored.)
+ *
+ * XXX on i386 and earlier, this routine should always be
+ * called at spl0; if it might called with the NPX interrupt
+ * masked, it would be necessary to forcibly unmask the NPX
+ * interrupt so that it could succeed.
+ * XXX this is irrelevant on 486 and above (systems
+ * which report FP failures via traps rather than irq13).
+ * XXX punting for now..
+ */
+ clts();
+ ci->ci_fpsaving = 1;
+ fpu_save(&p->p_addr->u_pcb.pcb_savefpu);
+ ci->ci_fpsaving = 0;
+ /* It is unclear if this is needed. */
+ fwait();
+ }
+ /*
+ * We set the TS bit in the saved CR0 for this process, so that it
+ * will get a DNA exception on any FPU instruction and force a reload.
+ */
stts();
- npxproc->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
- npxproc = 0;
+ p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
+
+ s = splipi();
+ p->p_addr->u_pcb.pcb_fpcpu = NULL;
+ ci->ci_fpcurproc = NULL;
+ splx(s);
}
/*
- * Save npxproc's FPU state.
- *
- * The FNSAVE instruction clears the FPU state. Rather than reloading the FPU
- * immediately, we clear npxproc and turn on CR0_TS to force a DNA and a reload
- * of the FPU state the next time we try to use it. This routine is only
- * called when forking or core dump, so this algorithm at worst forces us to
- * trap once per fork(), and at best saves us a reload once per fork().
+ * Save p's FPU state, which may be on this processor or another processor.
*/
-void
-npxsave()
+ void
+npxsave_proc(struct proc *p, int save)
{
+ struct cpu_info *ci = curcpu();
+ struct cpu_info *oci;
+
+ KDASSERT(p->p_addr != NULL);
+ KDASSERT(p->p_flag & P_INMEM);
+ oci = p->p_addr->u_pcb.pcb_fpcpu;
+ if (oci == NULL)
+ return;
+
+ IPRINTF(("%s: fp proc %s %lx\n", ci->ci_dev.dv_xname,
+ save ? "save" : "flush", (u_long)p));
+
+#if defined(MULTIPROCESSOR)
+ if (oci == ci) {
+ int s = splipi();
+ npxsave_cpu(ci, save);
+ splx(s);
+ } else {
#ifdef DIAGNOSTIC
- if (cpl != IPL_NONE || npx_nointr != 0)
- panic("npxsave: masked");
+ int spincount;
+#endif
+
+ IPRINTF(("%s: fp ipi to %s %s %lx\n", ci->ci_dev.dv_xname,
+ oci->ci_dev.dv_xname, save ? "save" : "flush", (u_long)p));
+
+ i386_send_ipi(oci,
+ save ? I386_IPI_SYNCH_FPU : I386_IPI_FLUSH_FPU);
+
+#ifdef DIAGNOSTIC
+ spincount = 0;
+#endif
+ while (p->p_addr->u_pcb.pcb_fpcpu != NULL)
+#ifdef DIAGNOSTIC
+ {
+ spincount++;
+ if (spincount > 100000000) {
+ panic("fp_save ipi didn't");
+ }
+ }
+#else
+ __splbarrier(); /* XXX replace by generic barrier */
+ ;
+#endif
+ }
+#else
+ KASSERT(ci->ci_fpcurproc == p);
+ npxsave_cpu(ci, save);
#endif
- iprintf(("Fork"));
- clts();
- npxsave1();
- stts();
- npxproc = 0;
}
+
diff --git a/sys/arch/i386/pci/pci_intr_fixup.c b/sys/arch/i386/pci/pci_intr_fixup.c
index 30b47f83807..ea0cfd009f1 100644
--- a/sys/arch/i386/pci/pci_intr_fixup.c
+++ b/sys/arch/i386/pci/pci_intr_fixup.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pci_intr_fixup.c,v 1.32 2004/02/24 19:30:00 markus Exp $ */
+/* $OpenBSD: pci_intr_fixup.c,v 1.33 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: pci_intr_fixup.c,v 1.10 2000/08/10 21:18:27 soda Exp $ */
/*
@@ -100,12 +100,12 @@
#include <machine/bus.h>
#include <machine/intr.h>
+#include <machine/i8259.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcidevs.h>
-#include <i386/isa/icu.h>
#include <i386/pci/pcibiosvar.h>
struct pciintr_link_map {
diff --git a/sys/arch/i386/pci/pci_machdep.c b/sys/arch/i386/pci/pci_machdep.c
index 1d05bf4a202..4ccb6f0e296 100644
--- a/sys/arch/i386/pci/pci_machdep.c
+++ b/sys/arch/i386/pci/pci_machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pci_machdep.c,v 1.24 2003/05/04 08:01:08 deraadt Exp $ */
+/* $OpenBSD: pci_machdep.c,v 1.25 2004/06/13 21:49:16 niklas Exp $ */
/* $NetBSD: pci_machdep.c,v 1.28 1997/06/06 23:29:17 thorpej Exp $ */
/*-
@@ -92,6 +92,7 @@
#define _I386_BUS_DMA_PRIVATE
#include <machine/bus.h>
#include <machine/pio.h>
+#include <machine/i8259.h>
#include "bios.h"
#if NBIOS > 0
@@ -99,11 +100,17 @@
extern bios_pciinfo_t *bios_pciinfo;
#endif
-#include <i386/isa/icu.h>
#include <dev/isa/isavar.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
+#include "ioapic.h"
+
+#if NIOAPIC > 0
+#include <machine/i82093var.h>
+#include <machine/mpbiosvar.h>
+#endif
+
#include "pcibios.h"
#if NPCIBIOS > 0
#include <i386/pci/pcibiosvar.h>
@@ -408,6 +415,11 @@ pci_intr_map(pa, ihp)
struct pci_attach_args *pa;
pci_intr_handle_t *ihp;
{
+#if NIOAPIC > 0
+ struct mp_intr_map *mip;
+ int bus, dev, func;
+#endif
+
#if NPCIBIOS > 0
pci_chipset_tag_t pc = pa->pa_pc;
pcitag_t intrtag = pa->pa_intrtag;
@@ -459,6 +471,49 @@ pci_intr_map(pa, ihp)
line = 9;
}
}
+#if NIOAPIC > 0
+ pci_decompose_tag (pc, intrtag, &bus, &dev, &func);
+
+ if (mp_busses != NULL) {
+ /*
+ * Assumes 1:1 mapping between PCI bus numbers and
+ * the numbers given by the MP bios.
+ * XXX Is this a valid assumption?
+ */
+ int mpspec_pin = (dev<<2)|(pin-1);
+
+ for (mip = mp_busses[bus].mb_intrs; mip != NULL; mip=mip->next) {
+ if (mip->bus_pin == mpspec_pin) {
+ ihp->line = mip->ioapic_ih | line;
+ return 0;
+ }
+ }
+ if (mip == NULL && mp_isa_bus != -1) {
+ for (mip = mp_busses[mp_isa_bus].mb_intrs; mip != NULL;
+ mip=mip->next) {
+ if (mip->bus_pin == line) {
+ ihp->line = mip->ioapic_ih | line;
+ return 0;
+ }
+ }
+ }
+ if (mip == NULL && mp_eisa_bus != -1) {
+ for (mip = mp_busses[mp_eisa_bus].mb_intrs;
+ mip != NULL; mip=mip->next) {
+ if (mip->bus_pin == line) {
+ ihp->line = mip->ioapic_ih | line;
+ return 0;
+ }
+ }
+ }
+ if (mip == NULL) {
+ printf("pci_intr_map: "
+ "bus %d dev %d func %d pin %d; line %d\n",
+ bus, dev, func, pin, line);
+ printf("pci_intr_map: no MP mapping found\n");
+ }
+ }
+#endif
return 0;
@@ -472,14 +527,22 @@ pci_intr_string(pc, ih)
pci_chipset_tag_t pc;
pci_intr_handle_t ih;
{
- static char irqstr[8]; /* 4 + 2 + NULL + sanity */
+ static char irqstr[64];
- if (ih.line == 0 || ih.line >= ICU_LEN || ih.line == 2)
+ if (ih.line == 0 || (ih.line & 0xff) >= ICU_LEN || ih.line == 2)
panic("pci_intr_string: bogus handle 0x%x", ih.line);
+#if NIOAPIC > 0
+ if (ih.line & APIC_INT_VIA_APIC) {
+ snprintf(irqstr, sizeof irqstr, "apic %d int %d (irq %d)",
+ APIC_IRQ_APIC(ih.line), APIC_IRQ_PIN(ih.line),
+ ih.line & 0xff);
+ return (irqstr);
+ }
+#endif
+
snprintf(irqstr, sizeof irqstr, "irq %d", ih.line);
return (irqstr);
-
}
void *
@@ -492,16 +555,21 @@ pci_intr_establish(pc, ih, level, func, arg, what)
{
void *ret;
+#if NIOAPIC > 0
+ if (ih.line != -1 && ih.line & APIC_INT_VIA_APIC)
+ return (apic_intr_establish(ih.line, IST_LEVEL, level, func,
+ arg, what));
+#endif
if (ih.line == 0 || ih.line >= ICU_LEN || ih.line == 2)
panic("pci_intr_establish: bogus handle 0x%x", ih.line);
- ret = isa_intr_establish(NULL, ih.line,
- IST_LEVEL, level, func, arg, what);
+ ret = isa_intr_establish(NULL, ih.line, IST_LEVEL, level, func, arg,
+ what);
#if NPCIBIOS > 0
if (ret)
pci_intr_route_link(pc, &ih);
#endif
- return ret;
+ return (ret);
}
void
@@ -510,5 +578,5 @@ pci_intr_disestablish(pc, cookie)
void *cookie;
{
/* XXX oh, unroute the pci int link? */
- return isa_intr_disestablish(NULL, cookie);
+ return (isa_intr_disestablish(NULL, cookie));
}
diff --git a/sys/arch/m88k/include/cpu.h b/sys/arch/m88k/include/cpu.h
index 213c4a7d672..992d11f0cb5 100644
--- a/sys/arch/m88k/include/cpu.h
+++ b/sys/arch/m88k/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.1 2004/04/26 12:34:05 miod Exp $ */
+/* $OpenBSD: cpu.h,v 1.2 2004/06/13 21:49:17 niklas Exp $ */
/*
* Copyright (c) 1996 Nivas Madhur
* Copyright (c) 1992, 1993
@@ -103,7 +103,7 @@ extern int want_ast;
* or after the current trap/syscall if in system mode.
*/
extern int want_resched; /* resched() was called */
-#define need_resched() (want_resched = 1, want_ast = 1)
+#define need_resched(ci) (want_resched = 1, want_ast = 1)
/*
* Give a profiling tick to the current process when the user profiling
diff --git a/sys/arch/m88k/m88k/genassym.cf b/sys/arch/m88k/m88k/genassym.cf
index 9c7329013c3..1f533586cd7 100644
--- a/sys/arch/m88k/m88k/genassym.cf
+++ b/sys/arch/m88k/m88k/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.1 2004/04/29 14:33:27 miod Exp $
+# $OpenBSD: genassym.cf,v 1.2 2004/06/13 21:49:17 niklas Exp $
#
# Copyright (c) 1982, 1990 The Regents of the University of California.
# All rights reserved.
@@ -28,7 +28,7 @@
# SUCH DAMAGE.
#
# @(#)genassym.c 7.8 (Berkeley) 5/7/91
-# $Id: genassym.cf,v 1.1 2004/04/29 14:33:27 miod Exp $
+# $Id: genassym.cf,v 1.2 2004/06/13 21:49:17 niklas Exp $
#
include <sys/param.h>
@@ -53,6 +53,7 @@ member p_stat
member p_wchan
export SRUN
+export SONPROC
# general constants
export UPAGES
diff --git a/sys/arch/m88k/m88k/process.S b/sys/arch/m88k/m88k/process.S
index 9c460e544da..aef28aca221 100644
--- a/sys/arch/m88k/m88k/process.S
+++ b/sys/arch/m88k/m88k/process.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: process.S,v 1.1 2004/04/29 14:33:27 miod Exp $ */
+/* $OpenBSD: process.S,v 1.2 2004/06/13 21:49:17 niklas Exp $ */
/*
* Copyright (c) 1996 Nivas Madhur
* All rights reserved.
@@ -215,6 +215,8 @@ ASLOCAL(Ldoneloop)
or.u r11, r0, hi16(_C_LABEL(curproc))
st r9, r11,lo16(_C_LABEL(curproc)) /* curproc = p */
+ or r2, r0, SONPROC
+ st.b r2, r9, P_STAT
/* r9 is curproc */
st r0, r9, P_BACK /* p->p_back = 0 */
diff --git a/sys/arch/mac68k/include/cpu.h b/sys/arch/mac68k/include/cpu.h
index ed14fadfadf..5fdded9e731 100644
--- a/sys/arch/mac68k/include/cpu.h
+++ b/sys/arch/mac68k/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.29 2004/05/20 09:20:42 kettenis Exp $ */
+/* $OpenBSD: cpu.h,v 1.30 2004/06/13 21:49:17 niklas Exp $ */
/* $NetBSD: cpu.h,v 1.45 1997/02/10 22:13:40 scottr Exp $ */
/*
@@ -106,7 +106,7 @@ struct clockframe {
* or after the current trap/syscall if in system mode.
*/
extern int want_resched; /* resched() was called */
-#define need_resched() { want_resched++; aston(); }
+#define need_resched(ci) { want_resched++; aston(); }
/*
* Give a profiling tick to the current process from the softclock
diff --git a/sys/arch/macppc/macppc/genassym.cf b/sys/arch/macppc/macppc/genassym.cf
index 45d05ddaa5e..a8bce6e5b4f 100644
--- a/sys/arch/macppc/macppc/genassym.cf
+++ b/sys/arch/macppc/macppc/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.9 2003/10/16 05:03:22 deraadt Exp $
+# $OpenBSD: genassym.cf,v 1.10 2004/06/13 21:49:18 niklas Exp $
#
# Copyright (c) 1982, 1990 The Regents of the University of California.
# All rights reserved.
@@ -71,3 +71,6 @@ struct proc
member p_forw
member p_back
member p_addr
+member p_stat
+
+export SONPROC
diff --git a/sys/arch/macppc/macppc/locore.S b/sys/arch/macppc/macppc/locore.S
index fe21293bff7..4536ffd14d6 100644
--- a/sys/arch/macppc/macppc/locore.S
+++ b/sys/arch/macppc/macppc/locore.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.S,v 1.15 2004/01/03 00:57:06 pvalchev Exp $ */
+/* $OpenBSD: locore.S,v 1.16 2004/06/13 21:49:18 niklas Exp $ */
/* $NetBSD: locore.S,v 1.2 1996/10/16 19:33:09 ws Exp $ */
/*
@@ -247,6 +247,9 @@ _C_LABEL(sw1):
lis %r4,_C_LABEL(curproc)@ha
stw %r31,_C_LABEL(curproc)@l(%r4) /* record new process */
+ li %r3,SONPROC
+ stb %r3,P_STAT(%r31)
+
mfmsr %r3
ori %r3,%r3,PSL_EE /* Now we can interrupt again */
mtmsr %r3
diff --git a/sys/arch/mvme68k/include/cpu.h b/sys/arch/mvme68k/include/cpu.h
index 091cd1aec50..5f3aeb08da3 100644
--- a/sys/arch/mvme68k/include/cpu.h
+++ b/sys/arch/mvme68k/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.17 2004/05/20 09:20:42 kettenis Exp $ */
+/* $OpenBSD: cpu.h,v 1.18 2004/06/13 21:49:18 niklas Exp $ */
/*
* Copyright (c) 1995 Theo de Raadt
@@ -127,7 +127,7 @@ struct clockframe {
* or after the current trap/syscall if in system mode.
*/
extern int want_resched;
-#define need_resched() { want_resched = 1; aston(); }
+#define need_resched(ci) { want_resched = 1; aston(); }
/*
* Give a profiling tick to the current process when the user profiling
diff --git a/sys/arch/powerpc/include/cpu.h b/sys/arch/powerpc/include/cpu.h
index f80c925b2a6..9fcae7e00b8 100644
--- a/sys/arch/powerpc/include/cpu.h
+++ b/sys/arch/powerpc/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.18 2004/02/14 15:09:22 grange Exp $ */
+/* $OpenBSD: cpu.h,v 1.19 2004/06/13 21:49:19 niklas Exp $ */
/* $NetBSD: cpu.h,v 1.1 1996/09/30 16:34:21 ws Exp $ */
/*
@@ -51,7 +51,7 @@ void delay(unsigned);
extern volatile int want_resched;
extern volatile int astpending;
-#define need_resched() (want_resched = 1, astpending = 1)
+#define need_resched(ci) (want_resched = 1, astpending = 1)
#define need_proftick(p) ((p)->p_flag |= P_OWEUPC, astpending = 1)
#define signotify(p) (astpending = 1)
diff --git a/sys/arch/sparc/include/cpu.h b/sys/arch/sparc/include/cpu.h
index a958a69c803..a1ca691938a 100644
--- a/sys/arch/sparc/include/cpu.h
+++ b/sys/arch/sparc/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.20 2003/06/02 23:27:54 millert Exp $ */
+/* $OpenBSD: cpu.h,v 1.21 2004/06/13 21:49:19 niklas Exp $ */
/* $NetBSD: cpu.h,v 1.24 1997/03/15 22:25:15 pk Exp $ */
/*
@@ -129,7 +129,7 @@ extern void raise(int, int);
* or after the current trap/syscall if in system mode.
*/
extern int want_resched; /* resched() was called */
-#define need_resched() (want_resched = 1, want_ast = 1)
+#define need_resched(ci) (want_resched = 1, want_ast = 1)
extern int want_ast;
/*
diff --git a/sys/arch/sparc/include/psl.h b/sys/arch/sparc/include/psl.h
index 6bb6785e66b..af175bba370 100644
--- a/sys/arch/sparc/include/psl.h
+++ b/sys/arch/sparc/include/psl.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: psl.h,v 1.18 2003/06/02 23:27:54 millert Exp $ */
+/* $OpenBSD: psl.h,v 1.19 2004/06/13 21:49:20 niklas Exp $ */
/* $NetBSD: psl.h,v 1.12 1997/03/10 21:49:11 pk Exp $ */
/*
@@ -95,6 +95,7 @@
*/
#define IPL_AUHARD 13 /* hard audio interrupts */
#define IPL_STATCLOCK 14 /* statclock() */
+#define IPL_HIGH 15 /* splhigh() */
#if defined(_KERNEL) && !defined(_LOCORE)
diff --git a/sys/arch/sparc/sparc/genassym.cf b/sys/arch/sparc/sparc/genassym.cf
index 3281b9e502d..71660d9efde 100644
--- a/sys/arch/sparc/sparc/genassym.cf
+++ b/sys/arch/sparc/sparc/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.14 2003/06/02 23:27:55 millert Exp $
+# $OpenBSD: genassym.cf,v 1.15 2004/06/13 21:49:20 niklas Exp $
# $NetBSD: genassym.cf,v 1.2 1997/06/28 19:59:04 pk Exp $
#
@@ -84,6 +84,7 @@ member p_wchan
member p_vmspace
export SRUN
+export SONPROC
# VM structure fields
define VM_PMAP offsetof(struct vmspace, vm_map.pmap)
diff --git a/sys/arch/sparc/sparc/locore.s b/sys/arch/sparc/sparc/locore.s
index 5dbc64a1ab3..026c076cde2 100644
--- a/sys/arch/sparc/sparc/locore.s
+++ b/sys/arch/sparc/sparc/locore.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.s,v 1.58 2004/06/08 18:06:53 art Exp $ */
+/* $OpenBSD: locore.s,v 1.59 2004/06/13 21:49:20 niklas Exp $ */
/* $NetBSD: locore.s,v 1.73 1997/09/13 20:36:48 pk Exp $ */
/*
@@ -4638,6 +4638,8 @@ Lsw_scan:
* Committed to running process p.
* It may be the same as the one we were running before.
*/
+ mov SONPROC, %o0 ! p->p_stat = SONPROC
+ stb %o0, [%g3 + P_STAT]
sethi %hi(_C_LABEL(want_resched)), %o0
st %g0, [%o0 + %lo(_C_LABEL(want_resched))] ! want_resched = 0;
ld [%g3 + P_ADDR], %g5 ! newpcb = p->p_addr;
diff --git a/sys/arch/sparc64/include/cpu.h b/sys/arch/sparc64/include/cpu.h
index 9d73cd48b84..278f7a3031b 100644
--- a/sys/arch/sparc64/include/cpu.h
+++ b/sys/arch/sparc64/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.24 2003/07/10 15:26:54 jason Exp $ */
+/* $OpenBSD: cpu.h,v 1.25 2004/06/13 21:49:20 niklas Exp $ */
/* $NetBSD: cpu.h,v 1.28 2001/06/14 22:56:58 thorpej Exp $ */
/*
@@ -75,33 +75,7 @@
#include <machine/reg.h>
#include <machine/intr.h>
-/*#include <sys/sched.h> */
-
-/*
- * CPU states.
- * XXX Not really scheduler state, but no other good place to put
- * it right now, and it really is per-CPU.
- */
-#define CP_USER 0
-#define CP_NICE 1
-#define CP_SYS 2
-#define CP_INTR 3
-#define CP_IDLE 4
-#define CPUSTATES 5
-
-/*
- * Per-CPU scheduler state.
- */
-struct schedstate_percpu {
- struct timeval spc_runtime; /* time curproc started running */
- __volatile int spc_flags; /* flags; see below */
- u_int spc_schedticks; /* ticks for schedclock() */
- u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */
- u_char spc_curpriority; /* usrpri of curproc */
- int spc_rrticks; /* ticks until roundrobin() */
- int spc_pscnt; /* prof/stat counter */
- int spc_psdiv; /* prof/stat divisor */
-};
+#include <sys/proc.h>
/*
* The cpu_info structure is part of a 64KB structure mapped both the kernel
@@ -203,7 +177,7 @@ extern int want_ast;
* or after the current trap/syscall if in system mode.
*/
extern int want_resched; /* resched() was called */
-#define need_resched() (want_resched = 1, want_ast = 1)
+#define need_resched(ci) (want_resched = 1, want_ast = 1)
/*
* Give a profiling tick to the current process when the user profiling
diff --git a/sys/arch/sparc64/sparc64/genassym.cf b/sys/arch/sparc64/sparc64/genassym.cf
index f2c55d313b1..cde690d07d1 100644
--- a/sys/arch/sparc64/sparc64/genassym.cf
+++ b/sys/arch/sparc64/sparc64/genassym.cf
@@ -132,6 +132,7 @@ member p_pid
member P_FPSTATE p_md.md_fpstate
export SRUN
+export SONPROC
# user structure fields
define USIZ sizeof(struct user)
diff --git a/sys/arch/sparc64/sparc64/locore.s b/sys/arch/sparc64/sparc64/locore.s
index 2705d2b30cc..b6fdf4746aa 100644
--- a/sys/arch/sparc64/sparc64/locore.s
+++ b/sys/arch/sparc64/sparc64/locore.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.s,v 1.44 2004/04/23 04:18:17 marc Exp $ */
+/* $OpenBSD: locore.s,v 1.45 2004/06/13 21:49:21 niklas Exp $ */
/* $NetBSD: locore.s,v 1.137 2001/08/13 06:10:10 jdolecek Exp $ */
/*
@@ -5879,10 +5879,8 @@ Lsw_scan:
* p->p_cpu = curcpu();
*/
#endif /* defined(MULTIPROCESSOR) */
-#ifdef notyet
mov SONPROC, %o0 ! p->p_stat = SONPROC
stb %o0, [%l3 + P_STAT]
-#endif /* notyet */
sethi %hi(_C_LABEL(want_resched)), %o0
st %g0, [%o0 + %lo(_C_LABEL(want_resched))] ! want_resched = 0;
ldx [%l3 + P_ADDR], %l1 ! newpcb = p->p_addr;
diff --git a/sys/arch/vax/include/cpu.h b/sys/arch/vax/include/cpu.h
index 03bd14a720d..a0df493ff3e 100644
--- a/sys/arch/vax/include/cpu.h
+++ b/sys/arch/vax/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.15 2003/11/10 21:05:06 miod Exp $ */
+/* $OpenBSD: cpu.h,v 1.16 2004/06/13 21:49:21 niklas Exp $ */
/* $NetBSD: cpu.h,v 1.41 1999/10/21 20:01:36 ragge Exp $ */
/*
@@ -88,7 +88,7 @@ extern int bootdev;
* or after the current trap/syscall if in system mode.
*/
-#define need_resched(){ \
+#define need_resched(ci){ \
want_resched++; \
mtpr(AST_OK,PR_ASTLVL); \
}
diff --git a/sys/arch/vax/vax/genassym.cf b/sys/arch/vax/vax/genassym.cf
index 7e2b63bd89b..209fb80ecd1 100644
--- a/sys/arch/vax/vax/genassym.cf
+++ b/sys/arch/vax/vax/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.4 2002/01/23 23:24:40 miod Exp $
+# $OpenBSD: genassym.cf,v 1.5 2004/06/13 21:49:22 niklas Exp $
# $NetBSD: genassym.cf,v 1.10 1999/11/19 22:09:55 ragge Exp $
#
# Copyright (c) 1997 Ludd, University of Lule}, Sweden.
@@ -48,6 +48,9 @@ struct proc
member p_priority
member p_addr
member p_vmspace
+member p_stat
+
+export SONPROC
struct pcb
member P0BR
diff --git a/sys/arch/vax/vax/subr.s b/sys/arch/vax/vax/subr.s
index 89d6e993326..0efb63df499 100644
--- a/sys/arch/vax/vax/subr.s
+++ b/sys/arch/vax/vax/subr.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: subr.s,v 1.19 2003/11/07 10:16:45 jmc Exp $ */
+/* $OpenBSD: subr.s,v 1.20 2004/06/13 21:49:22 niklas Exp $ */
/* $NetBSD: subr.s,v 1.32 1999/03/25 00:41:48 mrg Exp $ */
/*
@@ -306,6 +306,7 @@ noque: .asciz "swtch"
bbsc r3,_whichqs,2f # no, clear bit in whichqs
2: clrl 4(r2) # clear proc backpointer
clrl _want_resched # we are now changing process
+ movb $SONPROC,P_STAT(r2) # p->p_stat = SONPROC
movl r2,_curproc # set new process running
cmpl r0,r2 # Same process?
bneq 1f # No, continue
diff --git a/sys/compat/linux/linux_sched.c b/sys/compat/linux/linux_sched.c
index 4d47196b37f..c5b1b008db9 100644
--- a/sys/compat/linux/linux_sched.c
+++ b/sys/compat/linux/linux_sched.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: linux_sched.c,v 1.3 2001/11/06 18:41:10 art Exp $ */
+/* $OpenBSD: linux_sched.c,v 1.4 2004/06/13 21:49:23 niklas Exp $ */
/* $NetBSD: linux_sched.c,v 1.6 2000/05/28 05:49:05 thorpej Exp $ */
/*-
@@ -272,7 +272,7 @@ linux_sys_sched_yield(cp, v, retval)
void *v;
register_t *retval;
{
- need_resched();
+ need_resched(curcpu());
return (0);
}
diff --git a/sys/ddb/db_command.c b/sys/ddb/db_command.c
index f62fff34850..12477e0b084 100644
--- a/sys/ddb/db_command.c
+++ b/sys/ddb/db_command.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: db_command.c,v 1.35 2004/04/25 03:21:50 itojun Exp $ */
+/* $OpenBSD: db_command.c,v 1.36 2004/06/13 21:49:23 niklas Exp $ */
/* $NetBSD: db_command.c,v 1.20 1996/03/30 22:30:05 christos Exp $ */
/*
@@ -60,6 +60,11 @@
boolean_t db_cmd_loop_done;
label_t *db_recover;
+#ifdef MULTIPROCESSOR
+boolean_t db_switch_cpu;
+long db_switch_to_cpu;
+#endif
+
/*
* if 'ed' style: 'dot' is set at start of last item printed,
* and '+' points to next line.
@@ -504,6 +509,11 @@ db_command_loop()
label_t *savejmp;
extern int db_output_line;
+#ifdef MULTIPROCESSOR
+ db_switch_cpu = 0;
+ db_enter_ddb();
+#endif /* MULTIPROCESSOR */
+
/*
* Initialize 'prev' and 'next' to dot.
*/
@@ -517,17 +527,35 @@ db_command_loop()
(void) setjmp(&db_jmpbuf);
while (!db_cmd_loop_done) {
+
if (db_print_position() != 0)
db_printf("\n");
db_output_line = 0;
+#ifdef MULTIPROCESSOR
+ db_printf("ddb{%ld}> ", (long)cpu_number());
+#else
db_printf("ddb> ");
+#endif
(void) db_read_line();
db_command(&db_last_command, db_command_table);
}
db_recover = savejmp;
+
+#ifdef MULTIPROCESSOR
+ if (db_switch_cpu) {
+ db_printf("Moving ddb to cpu %d\n", db_switch_to_cpu);
+ curcpu()->ci_ddb_paused = CI_DDB_STOPPED;
+ db_movetocpu(db_switch_to_cpu);
+ while (curcpu()->ci_ddb_paused == CI_DDB_SHOULDSTOP
+ || curcpu()->ci_ddb_paused == CI_DDB_STOPPED)
+ ; /* Do nothing */
+ } else {
+ db_leave_ddb();
+ }
+#endif /* MULTIPROCESSOR */
}
void
diff --git a/sys/dev/isa/aria.c b/sys/dev/isa/aria.c
index 2da86c3bb26..a73d9805f4b 100644
--- a/sys/dev/isa/aria.c
+++ b/sys/dev/isa/aria.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: aria.c,v 1.10 2004/01/09 21:32:23 brad Exp $ */
+/* $OpenBSD: aria.c,v 1.11 2004/06/13 21:49:24 niklas Exp $ */
/*
* Copyright (c) 1995, 1996 Roland C. Dowdeswell. All rights reserved.
@@ -76,7 +76,6 @@
#include <dev/mulaw.h>
#include <dev/isa/isavar.h>
#include <dev/isa/isadmavar.h>
-#include <i386/isa/icu.h>
#include <dev/isa/ariareg.h>
diff --git a/sys/dev/isa/gus.c b/sys/dev/isa/gus.c
index 649e7c11d7d..ff9e0ffef32 100644
--- a/sys/dev/isa/gus.c
+++ b/sys/dev/isa/gus.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: gus.c,v 1.26 2003/06/08 00:41:47 miod Exp $ */
+/* $OpenBSD: gus.c,v 1.27 2004/06/13 21:49:24 niklas Exp $ */
/* $NetBSD: gus.c,v 1.51 1998/01/25 23:48:06 mycroft Exp $ */
/*-
@@ -119,7 +119,6 @@
#include <dev/isa/isavar.h>
#include <dev/isa/isadmavar.h>
-#include <i386/isa/icu.h>
#include <dev/ic/ics2101reg.h>
#include <dev/ic/cs4231reg.h>
diff --git a/sys/dev/isa/gus_isa.c b/sys/dev/isa/gus_isa.c
index 5144a526691..5140bda769b 100644
--- a/sys/dev/isa/gus_isa.c
+++ b/sys/dev/isa/gus_isa.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: gus_isa.c,v 1.3 2002/03/14 01:26:56 millert Exp $ */
+/* $OpenBSD: gus_isa.c,v 1.4 2004/06/13 21:49:24 niklas Exp $ */
/* $NetBSD: gus.c,v 1.51 1998/01/25 23:48:06 mycroft Exp $ */
/*-
@@ -119,7 +119,6 @@
#include <dev/isa/isavar.h>
#include <dev/isa/isadmavar.h>
-#include <i386/isa/icu.h>
#include <dev/ic/ics2101reg.h>
#include <dev/ic/cs4231reg.h>
diff --git a/sys/dev/isa/gus_isapnp.c b/sys/dev/isa/gus_isapnp.c
index 0700ce4745f..37c5415a643 100644
--- a/sys/dev/isa/gus_isapnp.c
+++ b/sys/dev/isa/gus_isapnp.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: gus_isapnp.c,v 1.3 2002/03/14 01:26:56 millert Exp $ */
+/* $OpenBSD: gus_isapnp.c,v 1.4 2004/06/13 21:49:24 niklas Exp $ */
/* $NetBSD: gus.c,v 1.51 1998/01/25 23:48:06 mycroft Exp $ */
/*-
@@ -119,7 +119,6 @@
#include <dev/isa/isavar.h>
#include <dev/isa/isadmavar.h>
-#include <i386/isa/icu.h>
#include <dev/ic/ics2101reg.h>
#include <dev/ic/cs4231reg.h>
diff --git a/sys/dev/isa/if_hp.c b/sys/dev/isa/if_hp.c
index 2d31185b352..5e05b2296a8 100644
--- a/sys/dev/isa/if_hp.c
+++ b/sys/dev/isa/if_hp.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_hp.c,v 1.11 2004/05/12 06:35:11 tedu Exp $ */
+/* $OpenBSD: if_hp.c,v 1.12 2004/06/13 21:49:24 niklas Exp $ */
/* $NetBSD: if_hp.c,v 1.21 1995/12/24 02:31:31 mycroft Exp $ */
/* XXX THIS DRIVER IS BROKEN. IT WILL NOT EVEN COMPILE. */
@@ -85,7 +85,6 @@
#include <i386/isa/isa_device.h> /* XXX BROKEN */
#include <dev/isa/if_nereg.h>
-#include <i386/isa/icu.h> /* XXX BROKEN */
int hpprobe(), hpattach(), hpintr();
int hpstart(), hpinit(), ether_output(), hpioctl();
diff --git a/sys/dev/isa/opti.c b/sys/dev/isa/opti.c
index ce2f3b0b00f..e55d4151361 100644
--- a/sys/dev/isa/opti.c
+++ b/sys/dev/isa/opti.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: opti.c,v 1.7 2003/06/02 19:24:22 mickey Exp $ */
+/* $OpenBSD: opti.c,v 1.8 2004/06/13 21:49:24 niklas Exp $ */
/*
* Copyright (c) 1996 Michael Shalayeff
@@ -41,7 +41,6 @@
#include <machine/pio.h>
-#include <i386/isa/icu.h>
#include <dev/isa/isavar.h>
#include <dev/isa/opti.h>
diff --git a/sys/dev/pci/cy82c693.c b/sys/dev/pci/cy82c693.c
index e50bd84b545..6cf96d3f740 100644
--- a/sys/dev/pci/cy82c693.c
+++ b/sys/dev/pci/cy82c693.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cy82c693.c,v 1.4 2001/02/26 16:21:46 art Exp $ */
+/* $OpenBSD: cy82c693.c,v 1.5 2004/06/13 21:49:25 niklas Exp $ */
/* $NetBSD: cy82c693.c,v 1.1 2000/06/06 03:07:39 thorpej Exp $ */
/*-
@@ -60,7 +60,7 @@
static struct cy82c693_handle cyhc_handle;
static int cyhc_initialized;
-struct simplelock cyhc_slock = SLOCK_INITIALIZER;
+struct simplelock cyhc_slock;
#define CYHC_LOCK(s) \
do { \
@@ -81,6 +81,8 @@ cy82c693_init(bus_space_tag_t iot)
int s;
int error;
+ simple_lock_init(&cyhc_slock);
+
CYHC_LOCK(s);
if (cyhc_initialized) {
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 55dad5457b4..a8d052d0a05 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: init_main.c,v 1.115 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: init_main.c,v 1.116 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */
/*
@@ -125,7 +125,6 @@ struct timeval boottime;
#ifndef __HAVE_CPUINFO
struct timeval runtime;
#endif
-
int ncpus = 1;
#if !defined(NO_PROPOLICE)
@@ -188,6 +187,7 @@ main(framep)
int s, i;
register_t rval[2];
extern struct pdevinit pdevinit[];
+ extern struct SIMPLELOCK kprintf_slock;
extern void scheduler_start(void);
extern void disk_init(void);
extern void endtsleep(void *);
@@ -213,8 +213,13 @@ main(framep)
*/
config_init(); /* init autoconfiguration data structures */
consinit();
+
+ SIMPLE_LOCK_INIT(&kprintf_slock);
+
printf("%s\n", copyright);
+ KERNEL_LOCK_INIT();
+
uvm_init();
disk_init(); /* must come before autoconfiguration */
tty_init(); /* initialise tty's */
@@ -270,7 +275,7 @@ main(framep)
session0.s_leader = p;
p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
- p->p_stat = SRUN;
+ p->p_stat = SONPROC;
p->p_nice = NZERO;
p->p_emul = &emul_native;
bcopy("swapper", p->p_comm, sizeof ("swapper"));
@@ -344,6 +349,9 @@ main(framep)
/* Start real time and statistics clocks. */
initclocks();
+ /* Lock the kernel on behalf of proc0. */
+ KERNEL_PROC_LOCK(p);
+
#ifdef SYSVSHM
/* Initialize System V style shared memory. */
shminit();
@@ -413,8 +421,6 @@ main(framep)
VOP_UNLOCK(rootvnode, 0, p);
p->p_fd->fd_rdir = NULL;
- uvm_swap_init();
-
/*
* Now can look at time, having had a chance to verify the time
* from the file system. Reset p->p_rtime as it may have been
@@ -424,10 +430,12 @@ main(framep)
p->p_stats->p_start = mono_time = boottime = time;
p->p_cpu->ci_schedstate.spc_runtime = time;
#else
- p->p_stats->p_start = runtime = mono_time = boottime = time;
+ p->p_stats->p_start = runtime = mono_time = boottime = time;
#endif
p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
+ uvm_swap_init();
+
/* Create process 1 (init(8)). */
if (fork1(p, SIGCHLD, FORK_FORK, NULL, 0, start_init, NULL, rval))
panic("fork init");
@@ -465,6 +473,12 @@ main(framep)
srandom((u_long)(rtv.tv_sec ^ rtv.tv_usec));
randompid = 1;
+
+#if defined(MULTIPROCESSOR)
+ /* Boot the secondary processors. */
+ cpu_boot_secondary_processors();
+#endif
+
/* The scheduler is an infinite loop. */
uvm_scheduler();
/* NOTREACHED */
@@ -623,8 +637,10 @@ start_init(arg)
* Now try to exec the program. If can't for any reason
* other than it doesn't exist, complain.
*/
- if ((error = sys_execve(p, &args, retval)) == 0)
+ if ((error = sys_execve(p, &args, retval)) == 0) {
+ KERNEL_PROC_UNLOCK(p);
return;
+ }
if (error != ENOENT)
printf("exec %s: error %d\n", path, error);
}
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 8a34e63d16c..5a3df5ecec4 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_clock.c,v 1.43 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: kern_clock.c,v 1.44 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */
/*-
@@ -192,9 +192,16 @@ hardclock(struct clockframe *frame)
if (stathz == 0)
statclock(frame);
-#ifdef __HAVE_CPUINFO
+#if defined(__HAVE_CPUINFO)
if (--ci->ci_schedstate.spc_rrticks <= 0)
roundrobin(ci);
+
+ /*
+ * If we are not the primary CPU, we're not allowed to do
+ * any more work.
+ */
+ if (CPU_IS_PRIMARY(ci) == 0)
+ return;
#endif
/*
@@ -420,9 +427,10 @@ statclock(struct clockframe *frame)
if (psdiv == 1) {
setstatclockrate(stathz);
} else {
- setstatclockrate(profhz);
+ setstatclockrate(profhz);
}
}
+
/* XXX Kludgey */
#define pscnt spc->spc_pscnt
#define cp_time spc->spc_cp_time
@@ -483,7 +491,7 @@ statclock(struct clockframe *frame)
pscnt = psdiv;
#ifdef __HAVE_CPUINFO
-#undef pscnt
+#undef psdiv
#undef cp_time
#endif
@@ -495,7 +503,8 @@ statclock(struct clockframe *frame)
*/
if (schedhz == 0) {
#ifdef __HAVE_CPUINFO
- if ((++curcpu()->ci_schedstate.spc_schedticks & 3) == 0)
+ if ((++curcpu()->ci_schedstate.spc_schedticks & 3) ==
+ 0)
schedclock(p);
#else
if ((++schedclk & 3) == 0)
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 68d77771271..1587724bc08 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_exit.c,v 1.50 2004/05/27 20:48:46 tedu Exp $ */
+/* $OpenBSD: kern_exit.c,v 1.51 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_exit.c,v 1.39 1996/04/22 01:38:25 christos Exp $ */
/*
@@ -285,6 +285,9 @@ exit1(p, rv)
limfree(p->p_limit);
p->p_limit = NULL;
+ /* This process no longer needs to hold the kernel lock. */
+ KERNEL_PROC_UNLOCK(p);
+
/*
* If emulation has process exit hook, call it now.
*/
@@ -319,12 +322,15 @@ void
exit2(p)
struct proc *p;
{
+ int s;
- simple_lock(&deadproc_slock);
+ SIMPLE_LOCK(&deadproc_slock);
LIST_INSERT_HEAD(&deadproc, p, p_hash);
- simple_unlock(&deadproc_slock);
+ SIMPLE_UNLOCK(&deadproc_slock);
wakeup(&deadproc);
+
+ SCHED_LOCK(s);
}
/*
@@ -337,19 +343,22 @@ reaper(void)
{
struct proc *p;
+ KERNEL_PROC_UNLOCK(curproc);
+
for (;;) {
- simple_lock(&deadproc_slock);
+ SIMPLE_LOCK(&deadproc_slock);
p = LIST_FIRST(&deadproc);
if (p == NULL) {
/* No work for us; go to sleep until someone exits. */
- simple_unlock(&deadproc_slock);
+ SIMPLE_UNLOCK(&deadproc_slock);
(void) tsleep(&deadproc, PVM, "reaper", 0);
continue;
}
/* Remove us from the deadproc list. */
LIST_REMOVE(p, p_hash);
- simple_unlock(&deadproc_slock);
+ SIMPLE_UNLOCK(&deadproc_slock);
+ KERNEL_PROC_LOCK(curproc);
/*
* Give machine-dependent code a chance to free any
@@ -377,6 +386,9 @@ reaper(void)
/* Noone will wait for us. Just zap the process now */
proc_zap(p);
}
+ /* XXXNJW where should this be with respect to
+ * the wakeup() above? */
+ KERNEL_PROC_UNLOCK(curproc);
}
}
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index a103c391634..e33ea08e005 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_fork.c,v 1.68 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: kern_fork.c,v 1.69 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_fork.c,v 1.29 1996/02/09 18:59:34 christos Exp $ */
/*
@@ -204,7 +204,7 @@ fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize,
timeout_set(&p2->p_sleep_to, endtsleep, p2);
timeout_set(&p2->p_realit_to, realitexpire, p2);
-#ifdef __HAVE_CPUINFO
+#if defined(__HAVE_CPUINFO)
p2->p_cpu = NULL;
#endif
@@ -339,12 +339,12 @@ fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize,
/*
* Make child runnable, set start time, and add to run queue.
*/
- s = splstatclock();
+ SCHED_LOCK(s);
p2->p_stats->p_start = time;
p2->p_acflag = AFORK;
p2->p_stat = SRUN;
setrunqueue(p2);
- splx(s);
+ SCHED_UNLOCK(s);
/*
* Now can be swapped.
@@ -399,3 +399,20 @@ pidtaken(pid_t pid)
return (1);
return (0);
}
+
+#if defined(MULTIPROCESSOR)
+/*
+ * XXX This is a slight hack to get newly-formed processes to
+ * XXX acquire the kernel lock as soon as they run.
+ */
+void
+proc_trampoline_mp(void)
+{
+ struct proc *p;
+
+ p = curproc;
+
+ SCHED_ASSERT_UNLOCKED();
+ KERNEL_PROC_LOCK(p);
+}
+#endif
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
index b811644d403..efb904c589a 100644
--- a/sys/kern/kern_ktrace.c
+++ b/sys/kern/kern_ktrace.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_ktrace.c,v 1.32 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: kern_ktrace.c,v 1.33 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */
/*
@@ -37,6 +37,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
+#include <sys/sched.h>
#include <sys/file.h>
#include <sys/namei.h>
#include <sys/vnode.h>
diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
index d75d09acc94..8ea70f3a097 100644
--- a/sys/kern/kern_lock.c
+++ b/sys/kern/kern_lock.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_lock.c,v 1.15 2003/06/02 23:28:05 millert Exp $ */
+/* $OpenBSD: kern_lock.c,v 1.16 2004/06/13 21:49:26 niklas Exp $ */
/*
* Copyright (c) 1995
@@ -39,9 +39,20 @@
#include <sys/proc.h>
#include <sys/lock.h>
#include <sys/systm.h>
+#include <sys/sched.h>
#include <machine/cpu.h>
+#ifndef spllock
+#define spllock() splhigh()
+#endif
+
+#ifdef MULTIPROCESSOR
+#define CPU_NUMBER() cpu_number()
+#else
+#define CPU_NUMBER() 0
+#endif
+
void record_stacktrace(int *, int);
void playback_stacktrace(int *, int);
@@ -50,67 +61,243 @@ void playback_stacktrace(int *, int);
* Locks provide shared/exclusive sychronization.
*/
-#if 0
-#ifdef DEBUG
-#define COUNT(p, x) if (p) (p)->p_locks += (x)
+/*
+ * Locking primitives implementation.
+ * Locks provide shared/exclusive synchronization.
+ */
+
+#if defined(LOCKDEBUG) || defined(DIAGNOSTIC) /* { */
+#if defined(MULTIPROCESSOR) /* { */
+#define COUNT_CPU(cpu_id, x) \
+ curcpu()->ci_spin_locks += (x)
#else
-#define COUNT(p, x)
+u_long spin_locks;
+#define COUNT_CPU(cpu_id, x) spin_locks += (x)
+#endif /* MULTIPROCESSOR */ /* } */
+
+#define COUNT(lkp, p, cpu_id, x) \
+do { \
+ if ((lkp)->lk_flags & LK_SPIN) \
+ COUNT_CPU((cpu_id), (x)); \
+ else \
+ (p)->p_locks += (x); \
+} while (/*CONSTCOND*/0)
+#else
+#define COUNT(lkp, p, cpu_id, x)
+#define COUNT_CPU(cpu_id, x)
+#endif /* LOCKDEBUG || DIAGNOSTIC */ /* } */
+
+#ifndef SPINLOCK_SPIN_HOOK /* from <machine/lock.h> */
+#define SPINLOCK_SPIN_HOOK /* nothing */
#endif
+
+#define INTERLOCK_ACQUIRE(lkp, flags, s) \
+do { \
+ if ((flags) & LK_SPIN) \
+ s = spllock(); \
+ simple_lock(&(lkp)->lk_interlock); \
+} while (/*CONSTCOND*/ 0)
+
+#define INTERLOCK_RELEASE(lkp, flags, s) \
+do { \
+ simple_unlock(&(lkp)->lk_interlock); \
+ if ((flags) & LK_SPIN) \
+ splx(s); \
+} while (/*CONSTCOND*/ 0)
+
+#ifdef DDB /* { */
+#ifdef MULTIPROCESSOR
+int simple_lock_debugger = 1; /* more serious on MP */
+#else
+int simple_lock_debugger = 0;
#endif
+#define SLOCK_DEBUGGER() if (simple_lock_debugger) Debugger()
+#define SLOCK_TRACE() \
+ db_stack_trace_print((db_expr_t)__builtin_frame_address(0), \
+ TRUE, 65535, "", lock_printf);
+#else
+#define SLOCK_DEBUGGER() /* nothing */
+#define SLOCK_TRACE() /* nothing */
+#endif /* } */
-#define COUNT(p, x)
+#if defined(LOCKDEBUG)
+#if defined(DDB)
+#define SPINLOCK_SPINCHECK_DEBUGGER Debugger()
+#else
+#define SPINLOCK_SPINCHECK_DEBUGGER /* nothing */
+#endif
+
+#define SPINLOCK_SPINCHECK_DECL \
+ /* 32-bits of count -- wrap constitutes a "spinout" */ \
+ uint32_t __spinc = 0
-#if NCPUS > 1
+#define SPINLOCK_SPINCHECK \
+do { \
+ if (++__spinc == 0) { \
+ lock_printf("LK_SPIN spinout, excl %d, share %d\n", \
+ lkp->lk_exclusivecount, lkp->lk_sharecount); \
+ if (lkp->lk_exclusivecount) \
+ lock_printf("held by CPU %lu\n", \
+ (u_long) lkp->lk_cpu); \
+ if (lkp->lk_lock_file) \
+ lock_printf("last locked at %s:%d\n", \
+ lkp->lk_lock_file, lkp->lk_lock_line); \
+ if (lkp->lk_unlock_file) \
+ lock_printf("last unlocked at %s:%d\n", \
+ lkp->lk_unlock_file, lkp->lk_unlock_line); \
+ SLOCK_TRACE(); \
+ SPINLOCK_SPINCHECK_DEBUGGER; \
+ } \
+} while (/*CONSTCOND*/ 0)
+#else
+#define SPINLOCK_SPINCHECK_DECL /* nothing */
+#define SPINLOCK_SPINCHECK /* nothing */
+#endif /* LOCKDEBUG && DDB */
/*
- * For multiprocessor system, try spin lock first.
- *
- * This should be inline expanded below, but we cannot have #if
- * inside a multiline define.
+ * Acquire a resource.
*/
-int lock_wait_time = 100;
-#define PAUSE(lkp, wanted) \
- if (lock_wait_time > 0) { \
- int i; \
+#define ACQUIRE(lkp, error, extflags, drain, wanted) \
+ if ((extflags) & LK_SPIN) { \
+ int interlocked; \
+ SPINLOCK_SPINCHECK_DECL; \
\
- simple_unlock(&lkp->lk_interlock); \
- for (i = lock_wait_time; i > 0; i--) \
- if (!(wanted)) \
- break; \
- simple_lock(&lkp->lk_interlock); \
+ if ((drain) == 0) \
+ (lkp)->lk_waitcount++; \
+ for (interlocked = 1;;) { \
+ SPINLOCK_SPINCHECK; \
+ if (wanted) { \
+ if (interlocked) { \
+ INTERLOCK_RELEASE((lkp), \
+ LK_SPIN, s); \
+ interlocked = 0; \
+ } \
+ SPINLOCK_SPIN_HOOK; \
+ } else if (interlocked) { \
+ break; \
+ } else { \
+ INTERLOCK_ACQUIRE((lkp), LK_SPIN, s); \
+ interlocked = 1; \
+ } \
} \
- if (!(wanted)) \
- break;
+ if ((drain) == 0) \
+ (lkp)->lk_waitcount--; \
+ KASSERT((wanted) == 0); \
+ error = 0; /* sanity */ \
+ } else { \
+ for (error = 0; wanted; ) { \
+ if ((drain)) \
+ (lkp)->lk_flags |= LK_WAITDRAIN; \
+ else \
+ (lkp)->lk_waitcount++; \
+ /* XXX Cast away volatile. */ \
+ error = ltsleep((drain) ? \
+ (void *)&(lkp)->lk_flags : \
+ (void *)(lkp), (lkp)->lk_prio, \
+ (lkp)->lk_wmesg, (lkp)->lk_timo, \
+ &(lkp)->lk_interlock); \
+ if ((drain) == 0) \
+ (lkp)->lk_waitcount--; \
+ if (error) \
+ break; \
+ if ((extflags) & LK_SLEEPFAIL) { \
+ error = ENOLCK; \
+ break; \
+ } \
+ } \
+ }
-#else /* NCPUS == 1 */
+#define SETHOLDER(lkp, pid, cpu_id) \
+do { \
+ if ((lkp)->lk_flags & LK_SPIN) \
+ (lkp)->lk_cpu = cpu_id; \
+ else \
+ (lkp)->lk_lockholder = pid; \
+} while (/*CONSTCOND*/0)
-/*
- * It is an error to spin on a uniprocessor as nothing will ever cause
- * the simple lock to clear while we are executing.
- */
-#define PAUSE(lkp, wanted)
+#define WEHOLDIT(lkp, pid, cpu_id) \
+ (((lkp)->lk_flags & LK_SPIN) != 0 ? \
+ ((lkp)->lk_cpu == (cpu_id)) : \
+ ((lkp)->lk_lockholder == (pid)))
+
+#define WAKEUP_WAITER(lkp) \
+do { \
+ if (((lkp)->lk_flags & LK_SPIN) == 0 && (lkp)->lk_waitcount) { \
+ /* XXX Cast away volatile. */ \
+ wakeup((void *)(lkp)); \
+ } \
+} while (/*CONSTCOND*/0)
+
+#if defined(LOCKDEBUG) /* { */
+#if defined(MULTIPROCESSOR) /* { */
+struct simplelock spinlock_list_slock = SIMPLELOCK_INITIALIZER;
-#endif /* NCPUS == 1 */
+#define SPINLOCK_LIST_LOCK() \
+ __cpu_simple_lock(&spinlock_list_slock.lock_data)
+
+#define SPINLOCK_LIST_UNLOCK() \
+ __cpu_simple_unlock(&spinlock_list_slock.lock_data)
+#else
+#define SPINLOCK_LIST_LOCK() /* nothing */
+#define SPINLOCK_LIST_UNLOCK() /* nothing */
+#endif /* MULTIPROCESSOR */ /* } */
+
+TAILQ_HEAD(, lock) spinlock_list =
+ TAILQ_HEAD_INITIALIZER(spinlock_list);
+
+#define HAVEIT(lkp) \
+do { \
+ if ((lkp)->lk_flags & LK_SPIN) { \
+ int s = spllock(); \
+ SPINLOCK_LIST_LOCK(); \
+ /* XXX Cast away volatile. */ \
+ TAILQ_INSERT_TAIL(&spinlock_list, (struct lock *)(lkp), \
+ lk_list); \
+ SPINLOCK_LIST_UNLOCK(); \
+ splx(s); \
+ } \
+} while (/*CONSTCOND*/0)
+
+#define DONTHAVEIT(lkp) \
+do { \
+ if ((lkp)->lk_flags & LK_SPIN) { \
+ int s = spllock(); \
+ SPINLOCK_LIST_LOCK(); \
+ /* XXX Cast away volatile. */ \
+ TAILQ_REMOVE(&spinlock_list, (struct lock *)(lkp), \
+ lk_list); \
+ SPINLOCK_LIST_UNLOCK(); \
+ splx(s); \
+ } \
+} while (/*CONSTCOND*/0)
+#else
+#define HAVEIT(lkp) /* nothing */
+
+#define DONTHAVEIT(lkp) /* nothing */
+#endif /* LOCKDEBUG */ /* } */
+
+#if defined(LOCKDEBUG)
/*
- * Acquire a resource.
+ * Lock debug printing routine; can be configured to print to console
+ * or log to syslog.
*/
-#define ACQUIRE(lkp, error, extflags, wanted) \
- PAUSE(lkp, wanted); \
- for (error = 0; wanted; ) { \
- (lkp)->lk_waitcount++; \
- simple_unlock(&(lkp)->lk_interlock); \
- error = tsleep((void *)lkp, (lkp)->lk_prio, \
- (lkp)->lk_wmesg, (lkp)->lk_timo); \
- simple_lock(&(lkp)->lk_interlock); \
- (lkp)->lk_waitcount--; \
- if (error) \
- break; \
- if ((extflags) & LK_SLEEPFAIL) { \
- error = ENOLCK; \
- break; \
- } \
+void
+lock_printf(const char *fmt, ...)
+{
+ char b[150];
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (lock_debug_syslog)
+ vlog(LOG_DEBUG, fmt, ap);
+ else {
+ vsnprintf(b, sizeof(b), fmt, ap);
+ printf_nolog("%s", b);
}
+ va_end(ap);
+}
+#endif /* LOCKDEBUG */
/*
* Initialize a lock; required before use.
@@ -127,10 +314,18 @@ lockinit(lkp, prio, wmesg, timo, flags)
bzero(lkp, sizeof(struct lock));
simple_lock_init(&lkp->lk_interlock);
lkp->lk_flags = flags & LK_EXTFLG_MASK;
- lkp->lk_prio = prio;
- lkp->lk_timo = timo;
- lkp->lk_wmesg = wmesg;
- lkp->lk_lockholder = LK_NOPROC;
+ if (flags & LK_SPIN)
+ lkp->lk_cpu = LK_NOCPU;
+ else {
+ lkp->lk_lockholder = LK_NOPROC;
+ lkp->lk_prio = prio;
+ lkp->lk_timo = timo;
+ }
+ lkp->lk_wmesg = wmesg; /* just a name for spin locks */
+#if defined(LOCKDEBUG)
+ lkp->lk_lock_file = NULL;
+ lkp->lk_unlock_file = NULL;
+#endif
}
/*
@@ -140,14 +335,14 @@ int
lockstatus(lkp)
struct lock *lkp;
{
- int lock_type = 0;
+ int s = 0, lock_type = 0;
- simple_lock(&lkp->lk_interlock);
+ INTERLOCK_ACQUIRE(lkp, lkp->lk_flags, s);
if (lkp->lk_exclusivecount != 0)
lock_type = LK_EXCLUSIVE;
else if (lkp->lk_sharecount != 0)
lock_type = LK_SHARED;
- simple_unlock(&lkp->lk_interlock);
+ INTERLOCK_RELEASE(lkp, lkp->lk_flags, s);
return (lock_type);
}
@@ -168,17 +363,33 @@ lockmgr(lkp, flags, interlkp, p)
int error;
pid_t pid;
int extflags;
+ cpuid_t cpu_id;
+ int s = 0;
error = 0;
- if (p)
- pid = p->p_pid;
- else
- pid = LK_KERNPROC;
- simple_lock(&lkp->lk_interlock);
+
+ INTERLOCK_ACQUIRE(lkp, lkp->lk_flags, s);
if (flags & LK_INTERLOCK)
simple_unlock(interlkp);
extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
-#ifdef DIAGNOSTIC
+
+#ifdef DIAGNOSTIC /* { */
+ /*
+ * Don't allow spins on sleep locks and don't allow sleeps
+ * on spin locks.
+ */
+ if ((flags ^ lkp->lk_flags) & LK_SPIN)
+ panic("lockmgr: sleep/spin mismatch");
+#endif /* } */
+
+ if (extflags & LK_SPIN) {
+ pid = LK_KERNPROC;
+ } else {
+ /* Process context required. */
+ pid = p->p_pid;
+ }
+ cpu_id = CPU_NUMBER();
+
/*
* Once a lock has drained, the LK_DRAINING flag is set and an
* exclusive lock is returned. The only valid operation thereafter
@@ -191,12 +402,14 @@ lockmgr(lkp, flags, interlkp, p)
* the lock by specifying LK_REENABLE.
*/
if (lkp->lk_flags & (LK_DRAINING|LK_DRAINED)) {
+#ifdef DIAGNOSTIC
if (lkp->lk_flags & LK_DRAINED)
panic("lockmgr: using decommissioned lock");
if ((flags & LK_TYPE_MASK) != LK_RELEASE ||
- lkp->lk_lockholder != pid)
+ WEHOLDIT(lkp, pid, cpu_id) == 0)
panic("lockmgr: non-release on draining lock: %d",
flags & LK_TYPE_MASK);
+#endif /* DIAGNOSTIC */
lkp->lk_flags &= ~LK_DRAINING;
if ((flags & LK_REENABLE) == 0)
lkp->lk_flags |= LK_DRAINED;
@@ -208,12 +421,11 @@ lockmgr(lkp, flags, interlkp, p)
if ((lkp->lk_flags & (LK_CANRECURSE|LK_RECURSEFAIL)) ==
(LK_CANRECURSE|LK_RECURSEFAIL))
panic("lockmgr: make up your mind");
-#endif /* DIAGNOSTIC */
switch (flags & LK_TYPE_MASK) {
case LK_SHARED:
- if (lkp->lk_lockholder != pid) {
+ if (WEHOLDIT(lkp, pid, cpu_id) == 0) {
/*
* If just polling, check to see if we will block.
*/
@@ -225,12 +437,12 @@ lockmgr(lkp, flags, interlkp, p)
/*
* Wait for exclusive locks and upgrades to clear.
*/
- ACQUIRE(lkp, error, extflags, lkp->lk_flags &
+ ACQUIRE(lkp, error, extflags, 0, lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE));
if (error)
break;
lkp->lk_sharecount++;
- COUNT(p, 1);
+ COUNT(lkp, p, cpu_id, 1);
break;
}
/*
@@ -238,18 +450,24 @@ lockmgr(lkp, flags, interlkp, p)
* An alternative would be to fail with EDEADLK.
*/
lkp->lk_sharecount++;
- COUNT(p, 1);
+ COUNT(lkp, p, cpu_id, 1);
/* fall into downgrade */
case LK_DOWNGRADE:
- if (lkp->lk_lockholder != pid || lkp->lk_exclusivecount == 0)
+ if (WEHOLDIT(lkp, pid, cpu_id) == 0 ||
+ lkp->lk_exclusivecount == 0)
panic("lockmgr: not holding exclusive lock");
lkp->lk_sharecount += lkp->lk_exclusivecount;
lkp->lk_exclusivecount = 0;
+ lkp->lk_recurselevel = 0;
lkp->lk_flags &= ~LK_HAVE_EXCL;
- lkp->lk_lockholder = LK_NOPROC;
- if (lkp->lk_waitcount)
- wakeup((void *)lkp);
+ SETHOLDER(lkp, LK_NOPROC, LK_NOCPU);
+#if defined(LOCKDEBUG)
+ lkp->lk_unlock_file = file;
+ lkp->lk_unlock_line = line;
+#endif
+ DONTHAVEIT(lkp);
+ WAKEUP_WAITER(lkp);
break;
case LK_EXCLUPGRADE:
@@ -260,7 +478,7 @@ lockmgr(lkp, flags, interlkp, p)
*/
if (lkp->lk_flags & LK_WANT_UPGRADE) {
lkp->lk_sharecount--;
- COUNT(p, -1);
+ COUNT(lkp, p, cpu_id, -1);
error = EBUSY;
break;
}
@@ -275,10 +493,10 @@ lockmgr(lkp, flags, interlkp, p)
* after the upgrade). If we return an error, the file
* will always be unlocked.
*/
- if (lkp->lk_lockholder == pid || lkp->lk_sharecount <= 0)
+ if (WEHOLDIT(lkp, pid, cpu_id) || lkp->lk_sharecount <= 0)
panic("lockmgr: upgrade exclusive lock");
lkp->lk_sharecount--;
- COUNT(p, -1);
+ COUNT(lkp, p, cpu_id, -1);
/*
* If we are just polling, check to see if we will block.
*/
@@ -295,16 +513,23 @@ lockmgr(lkp, flags, interlkp, p)
* drop to zero, then take exclusive lock.
*/
lkp->lk_flags |= LK_WANT_UPGRADE;
- ACQUIRE(lkp, error, extflags, lkp->lk_sharecount);
+ ACQUIRE(lkp, error, extflags, 0, lkp->lk_sharecount);
lkp->lk_flags &= ~LK_WANT_UPGRADE;
if (error)
break;
lkp->lk_flags |= LK_HAVE_EXCL;
- lkp->lk_lockholder = pid;
+ SETHOLDER(lkp, pid, cpu_id);
+#if defined(LOCKDEBUG)
+ lkp->lk_lock_file = file;
+ lkp->lk_lock_line = line;
+#endif
+ HAVEIT(lkp);
if (lkp->lk_exclusivecount != 0)
panic("lockmgr: non-zero exclusive count");
lkp->lk_exclusivecount = 1;
- COUNT(p, 1);
+ if (extflags & LK_SETRECURSE)
+ lkp->lk_recurselevel = 1;
+ COUNT(lkp, p, cpu_id, 1);
break;
}
/*
@@ -312,24 +537,28 @@ lockmgr(lkp, flags, interlkp, p)
* lock, awaken upgrade requestor if we are the last shared
* lock, then request an exclusive lock.
*/
- if (lkp->lk_sharecount == 0 && lkp->lk_waitcount)
- wakeup((void *)lkp);
+ if (lkp->lk_sharecount == 0)
+ WAKEUP_WAITER(lkp);
/* fall into exclusive request */
case LK_EXCLUSIVE:
- if (lkp->lk_lockholder == pid && pid != LK_KERNPROC) {
+ if (WEHOLDIT(lkp, pid, cpu_id)) {
/*
- * Recursive lock.
+ * Recursive lock.
*/
- if ((extflags & LK_CANRECURSE) == 0) {
+ if ((extflags & LK_CANRECURSE) == 0 &&
+ lkp->lk_recurselevel == 0) {
if (extflags & LK_RECURSEFAIL) {
error = EDEADLK;
break;
- }
- panic("lockmgr: locking against myself");
+ } else
+ panic("lockmgr: locking against myself");
}
lkp->lk_exclusivecount++;
- COUNT(p, 1);
+ if (extflags & LK_SETRECURSE &&
+ lkp->lk_recurselevel == 0)
+ lkp->lk_recurselevel = lkp->lk_exclusivecount;
+ COUNT(lkp, p, cpu_id, 1);
break;
}
/*
@@ -344,7 +573,7 @@ lockmgr(lkp, flags, interlkp, p)
/*
* Try to acquire the want_exclusive flag.
*/
- ACQUIRE(lkp, error, extflags, lkp->lk_flags &
+ ACQUIRE(lkp, error, extflags, 0, lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL));
if (error)
break;
@@ -352,38 +581,62 @@ lockmgr(lkp, flags, interlkp, p)
/*
* Wait for shared locks and upgrades to finish.
*/
- ACQUIRE(lkp, error, extflags, lkp->lk_sharecount != 0 ||
+ ACQUIRE(lkp, error, extflags, 0, lkp->lk_sharecount != 0 ||
(lkp->lk_flags & LK_WANT_UPGRADE));
lkp->lk_flags &= ~LK_WANT_EXCL;
if (error)
break;
lkp->lk_flags |= LK_HAVE_EXCL;
- lkp->lk_lockholder = pid;
+ SETHOLDER(lkp, pid, cpu_id);
+#if defined(LOCKDEBUG)
+ lkp->lk_lock_file = file;
+ lkp->lk_lock_line = line;
+#endif
+ HAVEIT(lkp);
if (lkp->lk_exclusivecount != 0)
panic("lockmgr: non-zero exclusive count");
lkp->lk_exclusivecount = 1;
- COUNT(p, 1);
+ if (extflags & LK_SETRECURSE)
+ lkp->lk_recurselevel = 1;
+ COUNT(lkp, p, cpu_id, 1);
break;
case LK_RELEASE:
if (lkp->lk_exclusivecount != 0) {
- if (pid != lkp->lk_lockholder)
- panic("lockmgr: pid %d, not %s %d unlocking",
- pid, "exclusive lock holder",
- lkp->lk_lockholder);
+ if (WEHOLDIT(lkp, pid, cpu_id) == 0) {
+ if (lkp->lk_flags & LK_SPIN) {
+ panic("lockmgr: processor %lu, not "
+ "exclusive lock holder %lu "
+ "unlocking", cpu_id, lkp->lk_cpu);
+ } else {
+ panic("lockmgr: pid %d, not "
+ "exclusive lock holder %d "
+ "unlocking", pid,
+ lkp->lk_lockholder);
+ }
+ }
+ if (lkp->lk_exclusivecount == lkp->lk_recurselevel)
+ lkp->lk_recurselevel = 0;
lkp->lk_exclusivecount--;
- COUNT(p, -1);
+ COUNT(lkp, p, cpu_id, -1);
if (lkp->lk_exclusivecount == 0) {
lkp->lk_flags &= ~LK_HAVE_EXCL;
- lkp->lk_lockholder = LK_NOPROC;
+ SETHOLDER(lkp, LK_NOPROC, LK_NOCPU);
+#if defined(LOCKDEBUG)
+ lkp->lk_unlock_file = file;
+ lkp->lk_unlock_line = line;
+#endif
+ DONTHAVEIT(lkp);
}
} else if (lkp->lk_sharecount != 0) {
lkp->lk_sharecount--;
- COUNT(p, -1);
- } else
- panic("lockmgr: LK_RELEASE of unlocked lock");
- if (lkp->lk_waitcount)
- wakeup((void *)lkp);
+ COUNT(lkp, p, cpu_id, -1);
+ }
+#ifdef DIAGNOSTIC
+ else
+ panic("lockmgr: release of unlocked lock!");
+#endif
+ WAKEUP_WAITER(lkp);
break;
case LK_DRAIN:
@@ -393,7 +646,7 @@ lockmgr(lkp, flags, interlkp, p)
* check for holding a shared lock, but at least we can
* check for an exclusive one.
*/
- if (lkp->lk_lockholder == pid)
+ if (WEHOLDIT(lkp, pid, cpu_id))
panic("lockmgr: draining against myself");
/*
* If we are just polling, check to see if we will sleep.
@@ -404,66 +657,228 @@ lockmgr(lkp, flags, interlkp, p)
error = EBUSY;
break;
}
- PAUSE(lkp, ((lkp->lk_flags &
+ ACQUIRE(lkp, error, extflags, 1,
+ ((lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
- lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0));
- for (error = 0; ((lkp->lk_flags &
- (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
- lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0); ) {
- lkp->lk_flags |= LK_WAITDRAIN;
- simple_unlock(&lkp->lk_interlock);
- if ((error = tsleep((void *)&lkp->lk_flags, lkp->lk_prio,
- lkp->lk_wmesg, lkp->lk_timo)) != 0)
- return (error);
- if ((extflags) & LK_SLEEPFAIL)
- return (ENOLCK);
- simple_lock(&lkp->lk_interlock);
- }
+ lkp->lk_sharecount != 0 ||
+ lkp->lk_waitcount != 0));
+ if (error)
+ break;
lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL;
- lkp->lk_lockholder = pid;
+ SETHOLDER(lkp, pid, cpu_id);
+#if defined(LOCKDEBUG)
+ lkp->lk_lock_file = file;
+ lkp->lk_lock_line = line;
+#endif
+ HAVEIT(lkp);
lkp->lk_exclusivecount = 1;
- COUNT(p, 1);
+ /* XXX unlikely that we'd want this */
+ if (extflags & LK_SETRECURSE)
+ lkp->lk_recurselevel = 1;
+ COUNT(lkp, p, cpu_id, 1);
break;
default:
- simple_unlock(&lkp->lk_interlock);
+ INTERLOCK_RELEASE(lkp, lkp->lk_flags, s);
panic("lockmgr: unknown locktype request %d",
flags & LK_TYPE_MASK);
/* NOTREACHED */
}
- if ((lkp->lk_flags & LK_WAITDRAIN) && ((lkp->lk_flags &
- (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) == 0 &&
- lkp->lk_sharecount == 0 && lkp->lk_waitcount == 0)) {
+ if ((lkp->lk_flags & (LK_WAITDRAIN | LK_SPIN)) == LK_WAITDRAIN &&
+ ((lkp->lk_flags &
+ (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) == 0 &&
+ lkp->lk_sharecount == 0 && lkp->lk_waitcount == 0)) {
lkp->lk_flags &= ~LK_WAITDRAIN;
wakeup((void *)&lkp->lk_flags);
}
- simple_unlock(&lkp->lk_interlock);
+ INTERLOCK_RELEASE(lkp, lkp->lk_flags, s);
return (error);
}
/*
+ * For a recursive spinlock held one or more times by the current CPU,
+ * release all N locks, and return N.
+ * Intended for use in mi_switch() shortly before context switching.
+ */
+
+#ifdef notyet
+int
+#if defined(LOCKDEBUG)
+_spinlock_release_all(__volatile struct lock *lkp, const char *file, int line)
+#else
+spinlock_release_all(__volatile struct lock *lkp)
+#endif
+{
+ int s, count;
+ cpuid_t cpu_id;
+
+ KASSERT(lkp->lk_flags & LK_SPIN);
+
+ INTERLOCK_ACQUIRE(lkp, LK_SPIN, s);
+
+ cpu_id = CPU_NUMBER();
+ count = lkp->lk_exclusivecount;
+
+ if (count != 0) {
+#ifdef DIAGNOSTIC
+ if (WEHOLDIT(lkp, 0, cpu_id) == 0) {
+ panic("spinlock_release_all: processor %lu, not "
+ "exclusive lock holder %lu "
+ "unlocking", (long)cpu_id, lkp->lk_cpu);
+ }
+#endif
+ lkp->lk_recurselevel = 0;
+ lkp->lk_exclusivecount = 0;
+ COUNT_CPU(cpu_id, -count);
+ lkp->lk_flags &= ~LK_HAVE_EXCL;
+ SETHOLDER(lkp, LK_NOPROC, LK_NOCPU);
+#if defined(LOCKDEBUG)
+ lkp->lk_unlock_file = file;
+ lkp->lk_unlock_line = line;
+#endif
+ DONTHAVEIT(lkp);
+ }
+#ifdef DIAGNOSTIC
+ else if (lkp->lk_sharecount != 0)
+ panic("spinlock_release_all: release of shared lock!");
+ else
+ panic("spinlock_release_all: release of unlocked lock!");
+#endif
+ INTERLOCK_RELEASE(lkp, LK_SPIN, s);
+
+ return (count);
+}
+#endif
+
+/*
+ * For a recursive spinlock held one or more times by the current CPU,
+ * release all N locks, and return N.
+ * Intended for use in mi_switch() right after resuming execution.
+ */
+
+#ifdef notyet
+void
+#if defined(LOCKDEBUG)
+_spinlock_acquire_count(__volatile struct lock *lkp, int count,
+ const char *file, int line)
+#else
+spinlock_acquire_count(__volatile struct lock *lkp, int count)
+#endif
+{
+ int s, error;
+ cpuid_t cpu_id;
+
+ KASSERT(lkp->lk_flags & LK_SPIN);
+
+ INTERLOCK_ACQUIRE(lkp, LK_SPIN, s);
+
+ cpu_id = CPU_NUMBER();
+
+#ifdef DIAGNOSTIC
+ if (WEHOLDIT(lkp, LK_NOPROC, cpu_id))
+ panic("spinlock_acquire_count: processor %lu already holds lock", (long)cpu_id);
+#endif
+ /*
+ * Try to acquire the want_exclusive flag.
+ */
+ ACQUIRE(lkp, error, LK_SPIN, 0, lkp->lk_flags &
+ (LK_HAVE_EXCL | LK_WANT_EXCL));
+ lkp->lk_flags |= LK_WANT_EXCL;
+ /*
+ * Wait for shared locks and upgrades to finish.
+ */
+ ACQUIRE(lkp, error, LK_SPIN, 0, lkp->lk_sharecount != 0 ||
+ (lkp->lk_flags & LK_WANT_UPGRADE));
+ lkp->lk_flags &= ~LK_WANT_EXCL;
+ lkp->lk_flags |= LK_HAVE_EXCL;
+ SETHOLDER(lkp, LK_NOPROC, cpu_id);
+#if defined(LOCKDEBUG)
+ lkp->lk_lock_file = file;
+ lkp->lk_lock_line = line;
+#endif
+ HAVEIT(lkp);
+ if (lkp->lk_exclusivecount != 0)
+ panic("lockmgr: non-zero exclusive count");
+ lkp->lk_exclusivecount = count;
+ lkp->lk_recurselevel = 1;
+ COUNT_CPU(cpu_id, count);
+
+ INTERLOCK_RELEASE(lkp, lkp->lk_flags, s);
+}
+#endif
+
+/*
* Print out information about state of a lock. Used by VOP_PRINT
* routines to display ststus about contained locks.
*/
void
lockmgr_printinfo(lkp)
- struct lock *lkp;
+ __volatile struct lock *lkp;
{
if (lkp->lk_sharecount)
printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg,
lkp->lk_sharecount);
- else if (lkp->lk_flags & LK_HAVE_EXCL)
- printf(" lock type %s: EXCL (count %d) by pid %d",
- lkp->lk_wmesg, lkp->lk_exclusivecount, lkp->lk_lockholder);
- if (lkp->lk_waitcount > 0)
+ else if (lkp->lk_flags & LK_HAVE_EXCL) {
+ printf(" lock type %s: EXCL (count %d) by ",
+ lkp->lk_wmesg, lkp->lk_exclusivecount);
+ if (lkp->lk_flags & LK_SPIN)
+ printf("processor %lu", lkp->lk_cpu);
+ else
+ printf("pid %d", lkp->lk_lockholder);
+ } else
+ printf(" not locked");
+ if ((lkp->lk_flags & LK_SPIN) == 0 && lkp->lk_waitcount > 0)
printf(" with %d pending", lkp->lk_waitcount);
}
#if defined(LOCKDEBUG)
+TAILQ_HEAD(, simplelock) simplelock_list =
+ TAILQ_HEAD_INITIALIZER(simplelock_list);
+
+#if defined(MULTIPROCESSOR) /* { */
+struct simplelock simplelock_list_slock = SIMPLELOCK_INITIALIZER;
+
+#define SLOCK_LIST_LOCK() \
+ __cpu_simple_lock(&simplelock_list_slock.lock_data)
+
+#define SLOCK_LIST_UNLOCK() \
+ __cpu_simple_unlock(&simplelock_list_slock.lock_data)
+
+#define SLOCK_COUNT(x) \
+ curcpu()->ci_simple_locks += (x)
+#else
+u_long simple_locks;
+
+#define SLOCK_LIST_LOCK() /* nothing */
+
+#define SLOCK_LIST_UNLOCK() /* nothing */
-int lockdebug_print = 0;
-int lockdebug_debugger = 0;
+#define SLOCK_COUNT(x) simple_locks += (x)
+#endif /* MULTIPROCESSOR */ /* } */
+
+#ifdef MULTIPROCESSOR
+#define SLOCK_MP() lock_printf("on cpu %ld\n", \
+ (u_long) cpu_number())
+#else
+#define SLOCK_MP() /* nothing */
+#endif
+
+#define SLOCK_WHERE(str, alp, id, l) \
+do { \
+ lock_printf("\n"); \
+ lock_printf(str); \
+ lock_printf("lock: %p, currently at: %s:%d\n", (alp), (id), (l)); \
+ SLOCK_MP(); \
+ if ((alp)->lock_file != NULL) \
+ lock_printf("last locked: %s:%d\n", (alp)->lock_file, \
+ (alp)->lock_line); \
+ if ((alp)->unlock_file != NULL) \
+ lock_printf("last unlocked: %s:%d\n", (alp)->unlock_file, \
+ (alp)->unlock_line); \
+ SLOCK_TRACE() \
+ SLOCK_DEBUGGER(); \
+} while (/*CONSTCOND*/0)
/*
* Simple lock functions so that the debugger can see from whence
@@ -474,7 +889,16 @@ simple_lock_init(lkp)
struct simplelock *lkp;
{
- lkp->lock_data = SLOCK_UNLOCKED;
+#if defined(MULTIPROCESSOR) /* { */
+ __cpu_simple_lock_init(&alp->lock_data);
+#else
+ alp->lock_data = __SIMPLELOCK_UNLOCKED;
+#endif /* } */
+ alp->lock_file = NULL;
+ alp->lock_line = 0;
+ alp->unlock_file = NULL;
+ alp->unlock_line = 0;
+ alp->lock_holder = LK_NOCPU;
}
void
@@ -483,16 +907,80 @@ _simple_lock(lkp, id, l)
const char *id;
int l;
{
+ cpuid_t cpu_id = CPU_NUMBER();
+ int s;
+
+ s = spllock();
+
+ /*
+ * MULTIPROCESSOR case: This is `safe' since if it's not us, we
+ * don't take any action, and just fall into the normal spin case.
+ */
+ if (alp->lock_data == __SIMPLELOCK_LOCKED) {
+#if defined(MULTIPROCESSOR) /* { */
+ if (alp->lock_holder == cpu_id) {
+ SLOCK_WHERE("simple_lock: locking against myself\n",
+ alp, id, l);
+ goto out;
+ }
+#else
+ SLOCK_WHERE("simple_lock: lock held\n", alp, id, l);
+ goto out;
+#endif /* MULTIPROCESSOR */ /* } */
+ }
+
+#if defined(MULTIPROCESSOR) /* { */
+ /* Acquire the lock before modifying any fields. */
+ splx(s);
+ __cpu_simple_lock(&alp->lock_data);
+ s = spllock();
+#else
+ alp->lock_data = __SIMPLELOCK_LOCKED;
+#endif /* } */
- if (lkp->lock_data == SLOCK_LOCKED) {
- if (lockdebug_print)
- printf("%s:%d simple_lock: lock held...\n", id, l);
- if (lockdebug_debugger)
- Debugger();
+ if (alp->lock_holder != LK_NOCPU) {
+ SLOCK_WHERE("simple_lock: uninitialized lock\n",
+ alp, id, l);
}
- lkp->lock_data = SLOCK_LOCKED;
+ alp->lock_file = id;
+ alp->lock_line = l;
+ alp->lock_holder = cpu_id;
+
+ SLOCK_LIST_LOCK();
+ /* XXX Cast away volatile */
+ TAILQ_INSERT_TAIL(&simplelock_list, (struct simplelock *)alp, list);
+ SLOCK_LIST_UNLOCK();
+
+ SLOCK_COUNT(1);
+
+ out:
+ splx(s);
}
+int
+_simple_lock_held(__volatile struct simplelock *alp)
+{
+ cpuid_t cpu_id = CPU_NUMBER();
+ int s, locked = 0;
+
+ s = spllock();
+
+#if defined(MULTIPROCESSOR)
+ if (__cpu_simple_lock_try(&alp->lock_data) == 0)
+ locked = (alp->lock_holder == cpu_id);
+ else
+ __cpu_simple_unlock(&alp->lock_data);
+#else
+ if (alp->lock_data == __SIMPLELOCK_LOCKED) {
+ locked = 1;
+ KASSERT(alp->lock_holder == cpu_id);
+ }
+#endif
+
+ splx(s);
+
+ return (locked);
+}
int
_simple_lock_try(lkp, id, l)
@@ -500,14 +988,50 @@ _simple_lock_try(lkp, id, l)
const char *id;
int l;
{
+ cpuid_t cpu_id = CPU_NUMBER();
+ int s, rv = 0;
+
+ s = spllock();
- if (lkp->lock_data == SLOCK_LOCKED) {
- if (lockdebug_print)
- printf("%s:%d simple_lock: lock held...\n", id, l);
- if (lockdebug_debugger)
- Debugger();
+ /*
+ * MULTIPROCESSOR case: This is `safe' since if it's not us, we
+ * don't take any action.
+ */
+#if defined(MULTIPROCESSOR) /* { */
+ if ((rv = __cpu_simple_lock_try(&alp->lock_data)) == 0) {
+ if (alp->lock_holder == cpu_id)
+ SLOCK_WHERE("simple_lock_try: locking against myself\n",
+ alp, id, l);
+ goto out;
}
- return lkp->lock_data = SLOCK_LOCKED;
+#else
+ if (alp->lock_data == __SIMPLELOCK_LOCKED) {
+ SLOCK_WHERE("simple_lock_try: lock held\n", alp, id, l);
+ goto out;
+ }
+ alp->lock_data = __SIMPLELOCK_LOCKED;
+#endif /* MULTIPROCESSOR */ /* } */
+
+ /*
+ * At this point, we have acquired the lock.
+ */
+
+ rv = 1;
+
+ alp->lock_file = id;
+ alp->lock_line = l;
+ alp->lock_holder = cpu_id;
+
+ SLOCK_LIST_LOCK();
+ /* XXX Cast away volatile. */
+ TAILQ_INSERT_TAIL(&simplelock_list, (struct simplelock *)alp, list);
+ SLOCK_LIST_UNLOCK();
+
+ SLOCK_COUNT(1);
+
+ out:
+ splx(s);
+ return (rv);
}
void
@@ -516,30 +1040,239 @@ _simple_unlock(lkp, id, l)
const char *id;
int l;
{
+ int s;
- if (lkp->lock_data == SLOCK_UNLOCKED) {
- if (lockdebug_print)
- printf("%s:%d simple_unlock: lock not held...\n",
- id, l);
- if (lockdebug_debugger)
- Debugger();
+ s = spllock();
+
+ /*
+ * MULTIPROCESSOR case: This is `safe' because we think we hold
+ * the lock, and if we don't, we don't take any action.
+ */
+ if (alp->lock_data == __SIMPLELOCK_UNLOCKED) {
+ SLOCK_WHERE("simple_unlock: lock not held\n",
+ alp, id, l);
+ goto out;
}
- lkp->lock_data = SLOCK_UNLOCKED;
+
+ SLOCK_LIST_LOCK();
+ TAILQ_REMOVE(&simplelock_list, alp, list);
+ SLOCK_LIST_UNLOCK();
+
+ SLOCK_COUNT(-1);
+
+ alp->list.tqe_next = NULL; /* sanity */
+ alp->list.tqe_prev = NULL; /* sanity */
+
+ alp->unlock_file = id;
+ alp->unlock_line = l;
+
+#if defined(MULTIPROCESSOR) /* { */
+ alp->lock_holder = LK_NOCPU;
+ /* Now that we've modified all fields, release the lock. */
+ __cpu_simple_unlock(&alp->lock_data);
+#else
+ alp->lock_data = __SIMPLELOCK_UNLOCKED;
+ KASSERT(alp->lock_holder == CPU_NUMBER());
+ alp->lock_holder = LK_NOCPU;
+#endif /* } */
+
+ out:
+ splx(s);
}
void
-_simple_lock_assert(lkp, state, id, l)
- __volatile struct simplelock *lkp;
- int state;
- const char *id;
- int l;
+simple_lock_dump(void)
+{
+ struct simplelock *alp;
+ int s;
+
+ s = spllock();
+ SLOCK_LIST_LOCK();
+ lock_printf("all simple locks:\n");
+ TAILQ_FOREACH(alp, &simplelock_list, list) {
+ lock_printf("%p CPU %lu %s:%d\n", alp, alp->lock_holder,
+ alp->lock_file, alp->lock_line);
+ }
+ SLOCK_LIST_UNLOCK();
+ splx(s);
+}
+
+void
+simple_lock_freecheck(void *start, void *end)
+{
+ struct simplelock *alp;
+ int s;
+
+ s = spllock();
+ SLOCK_LIST_LOCK();
+ TAILQ_FOREACH(alp, &simplelock_list, list) {
+ if ((void *)alp >= start && (void *)alp < end) {
+ lock_printf("freeing simple_lock %p CPU %lu %s:%d\n",
+ alp, alp->lock_holder, alp->lock_file,
+ alp->lock_line);
+ SLOCK_DEBUGGER();
+ }
+ }
+ SLOCK_LIST_UNLOCK();
+ splx(s);
+ }
+
+/*
+ * We must be holding exactly one lock: the sched_lock.
+ */
+
+#ifdef notyet
+void
+simple_lock_switchcheck(void)
+{
+
+ simple_lock_only_held(&sched_lock, "switching");
+}
+#endif
+
+void
+simple_lock_only_held(volatile struct simplelock *lp, const char *where)
{
- if (lkp->lock_data != state) {
- if (lockdebug_print)
- printf("%s:%d simple_lock_assert: wrong state: %d",
- id, l, lkp->lock_data);
- if (lockdebug_debugger)
- Debugger();
+ struct simplelock *alp;
+ cpuid_t cpu_id = CPU_NUMBER();
+ int s;
+
+ if (lp) {
+ LOCK_ASSERT(simple_lock_held(lp));
+ }
+ s = spllock();
+ SLOCK_LIST_LOCK();
+ TAILQ_FOREACH(alp, &simplelock_list, list) {
+ if (alp == lp)
+ continue;
+ if (alp->lock_holder == cpu_id)
+ break;
+ }
+ SLOCK_LIST_UNLOCK();
+ splx(s);
+
+ if (alp != NULL) {
+ lock_printf("\n%s with held simple_lock %p "
+ "CPU %lu %s:%d\n",
+ where, alp, alp->lock_holder, alp->lock_file,
+ alp->lock_line);
+ SLOCK_TRACE();
+ SLOCK_DEBUGGER();
}
}
#endif /* LOCKDEBUG */
+
+#if defined(MULTIPROCESSOR)
+/*
+ * Functions for manipulating the kernel_lock. We put them here
+ * so that they show up in profiles.
+ */
+
+/*
+ * XXX Instead of using struct lock for the kernel lock and thus requiring us
+ * XXX to implement simplelocks, causing all sorts of fine-grained locks all
+ * XXX over our tree getting activated consuming both time and potentially
+ * XXX introducing locking protocol bugs.
+ */
+#ifdef notyet
+
+struct lock kernel_lock;
+
+void
+_kernel_lock_init(void)
+{
+ spinlockinit(&kernel_lock, "klock", 0);
+}
+
+/*
+ * Acquire/release the kernel lock. Intended for use in the scheduler
+ * and the lower half of the kernel.
+ */
+void
+_kernel_lock(int flag)
+{
+ SCHED_ASSERT_UNLOCKED();
+ spinlockmgr(&kernel_lock, flag, 0);
+}
+
+void
+_kernel_unlock(void)
+{
+ spinlockmgr(&kernel_lock, LK_RELEASE, 0);
+}
+
+/*
+ * Acquire/release the kernel_lock on behalf of a process. Intended for
+ * use in the top half of the kernel.
+ */
+void
+_kernel_proc_lock(struct proc *p)
+{
+ SCHED_ASSERT_UNLOCKED();
+ spinlockmgr(&kernel_lock, LK_EXCLUSIVE, 0);
+ p->p_flag |= P_BIGLOCK;
+}
+
+void
+_kernel_proc_unlock(struct proc *p)
+{
+ p->p_flag &= ~P_BIGLOCK;
+ spinlockmgr(&kernel_lock, LK_RELEASE, 0);
+}
+
+#else
+
+struct __mp_lock kernel_lock;
+
+void
+_kernel_lock_init(void)
+{
+ __mp_lock_init(&kernel_lock);
+}
+
+/*
+ * Acquire/release the kernel lock. Intended for use in the scheduler
+ * and the lower half of the kernel.
+ */
+
+/* XXX The flag should go, all callers want equal behaviour. */
+void
+_kernel_lock(int flag)
+{
+ SCHED_ASSERT_UNLOCKED();
+ __mp_lock(&kernel_lock);
+}
+
+void
+_kernel_unlock(void)
+{
+ __mp_unlock(&kernel_lock);
+}
+
+/*
+ * Acquire/release the kernel_lock on behalf of a process. Intended for
+ * use in the top half of the kernel.
+ */
+void
+_kernel_proc_lock(struct proc *p)
+{
+ SCHED_ASSERT_UNLOCKED();
+ __mp_lock(&kernel_lock);
+ p->p_flag |= P_BIGLOCK;
+}
+
+void
+_kernel_proc_unlock(struct proc *p)
+{
+ p->p_flag &= ~P_BIGLOCK;
+ __mp_unlock(&kernel_lock);
+}
+
+#endif
+
+#ifdef MP_LOCKDEBUG
+/* CPU-dependent timing, needs this to be settable from ddb. */
+int __mp_lock_spinout = 200000000;
+#endif
+
+#endif /* MULTIPROCESSOR */
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 84519c2b60e..aa7ec306c56 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_proc.c,v 1.18 2004/01/29 17:19:42 millert Exp $ */
+/* $OpenBSD: kern_proc.c,v 1.19 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_proc.c,v 1.14 1996/02/09 18:59:41 christos Exp $ */
/*
@@ -85,7 +85,7 @@ struct pool pcred_pool;
* proclist. Processes on this proclist are also on zombproc;
* we use the p_hash member to linkup to deadproc.
*/
-struct simplelock deadproc_slock;
+struct SIMPLELOCK deadproc_slock;
struct proclist deadproc; /* dead, but not yet undead */
static void orphanpg(struct pgrp *);
@@ -104,7 +104,7 @@ procinit()
LIST_INIT(&zombproc);
LIST_INIT(&deadproc);
- simple_lock_init(&deadproc_slock);
+ SIMPLE_LOCK_INIT(&deadproc_slock);
pidhashtbl = hashinit(maxproc / 4, M_PROC, M_WAITOK, &pidhash);
pgrphashtbl = hashinit(maxproc / 4, M_PROC, M_WAITOK, &pgrphash);
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 1e868518989..6d7af1fd136 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_resource.c,v 1.26 2003/12/11 23:02:30 millert Exp $ */
+/* $OpenBSD: kern_resource.c,v 1.27 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_resource.c,v 1.38 1996/10/23 07:19:38 matthias Exp $ */
/*-
@@ -44,6 +44,7 @@
#include <sys/resourcevar.h>
#include <sys/pool.h>
#include <sys/proc.h>
+#include <sys/sched.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
@@ -184,6 +185,7 @@ donice(curp, chgp, n)
register int n;
{
register struct pcred *pcred = curp->p_cred;
+ int s;
if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
@@ -197,7 +199,9 @@ donice(curp, chgp, n)
if (n < chgp->p_nice && suser(curp, 0))
return (EACCES);
chgp->p_nice = n;
+ SCHED_LOCK(s);
(void)resetpriority(chgp);
+ SCHED_UNLOCK(s);
return (0);
}
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 494f6878d80..0913d2b2a1f 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_sig.c,v 1.70 2004/04/06 17:24:11 mickey Exp $ */
+/* $OpenBSD: kern_sig.c,v 1.71 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_sig.c,v 1.54 1996/04/22 01:38:32 christos Exp $ */
/*
@@ -62,6 +62,7 @@
#include <sys/malloc.h>
#include <sys/pool.h>
#include <sys/ptrace.h>
+#include <sys/sched.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
@@ -805,19 +806,30 @@ trapsignal(p, signum, code, type, sigval)
* regardless of the signal action (eg, blocked or ignored).
*
* Other ignored signals are discarded immediately.
+ *
+ * XXXSMP: Invoked as psignal() or sched_psignal().
*/
void
-psignal(p, signum)
+psignal1(p, signum, dolock)
register struct proc *p;
register int signum;
+ int dolock; /* XXXSMP: works, but icky */
{
register int s, prop;
register sig_t action;
int mask;
+#ifdef DIAGNOSTIC
if ((u_int)signum >= NSIG || signum == 0)
panic("psignal signal number");
+ /* XXXSMP: works, but icky */
+ if (dolock)
+ SCHED_ASSERT_UNLOCKED();
+ else
+ SCHED_ASSERT_LOCKED();
+#endif
+
/* Ignore signal if we are exiting */
if (p->p_flag & P_WEXIT)
return;
@@ -879,7 +891,10 @@ psignal(p, signum)
*/
if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP))
return;
- s = splhigh();
+ /* XXXSMP: works, but icky */
+ if (dolock)
+ SCHED_LOCK(s);
+
switch (p->p_stat) {
case SSLEEP:
@@ -921,7 +936,11 @@ psignal(p, signum)
p->p_siglist &= ~mask;
p->p_xstat = signum;
if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
- psignal(p->p_pptr, SIGCHLD);
+ /*
+ * XXXSMP: recursive call; don't lock
+ * the second time around.
+ */
+ sched_psignal(p->p_pptr, SIGCHLD);
proc_stop(p);
goto out;
}
@@ -1009,7 +1028,9 @@ runfast:
run:
setrunnable(p);
out:
- splx(s);
+ /* XXXSMP: works, but icky */
+ if (dolock)
+ SCHED_UNLOCK(s);
}
/*
@@ -1054,7 +1075,7 @@ issignal(struct proc *p)
*/
p->p_xstat = signum;
- s = splstatclock(); /* protect mi_switch */
+ SCHED_LOCK(s); /* protect mi_switch */
if (p->p_flag & P_FSTRACE) {
#ifdef PROCFS
/* procfs debugging */
@@ -1070,6 +1091,7 @@ issignal(struct proc *p)
proc_stop(p);
mi_switch();
}
+ SCHED_ASSERT_UNLOCKED();
splx(s);
/*
@@ -1130,8 +1152,9 @@ issignal(struct proc *p)
if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
psignal(p->p_pptr, SIGCHLD);
proc_stop(p);
- s = splstatclock();
+ SCHED_LOCK(s);
mi_switch();
+ SCHED_ASSERT_UNLOCKED();
splx(s);
break;
} else if (prop & SA_IGNORE) {
@@ -1179,6 +1202,9 @@ void
proc_stop(p)
struct proc *p;
{
+#ifdef MULTIPROCESSOR
+ SCHED_ASSERT_LOCKED();
+#endif
p->p_stat = SSTOP;
p->p_flag &= ~P_WAITED;
@@ -1205,6 +1231,9 @@ postsig(signum)
if (signum == 0)
panic("postsig");
#endif
+
+ KERNEL_PROC_LOCK(p);
+
mask = sigmask(signum);
p->p_siglist &= ~mask;
action = ps->ps_sigact[signum];
@@ -1254,7 +1283,11 @@ postsig(signum)
* mask from before the sigpause is what we want
* restored after the signal processing is completed.
*/
+#ifdef MULTIPROCESSOR
+ s = splsched();
+#else
s = splhigh();
+#endif
if (ps->ps_flags & SAS_OLDMASK) {
returnmask = ps->ps_oldmask;
ps->ps_flags &= ~SAS_OLDMASK;
@@ -1279,6 +1312,8 @@ postsig(signum)
(*p->p_emul->e_sendsig)(action, signum, returnmask, code,
type, sigval);
}
+
+ KERNEL_PROC_UNLOCK(p);
}
/*
@@ -1308,7 +1343,6 @@ sigexit(p, signum)
register struct proc *p;
int signum;
{
-
/* Mark process as going away */
p->p_flag |= P_WEXIT;
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
index 52432e05522..32e659af713 100644
--- a/sys/kern/kern_subr.c
+++ b/sys/kern/kern_subr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_subr.c,v 1.27 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: kern_subr.c,v 1.28 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_subr.c,v 1.15 1996/04/09 17:21:56 ragge Exp $ */
/*
@@ -40,6 +40,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
+#include <sys/sched.h>
#include <sys/malloc.h>
#include <sys/queue.h>
#include <sys/kernel.h>
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index baba311f668..ab7ca8f7ed3 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_synch.c,v 1.55 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: kern_synch.c,v 1.56 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
/*-
@@ -59,15 +59,19 @@ u_char curpriority; /* usrpri of curproc */
#endif
int lbolt; /* once a second sleep address */
#ifdef __HAVE_CPUINFO
-int rrticks_init; /* # of harclock ticks per roundrobin */
+int rrticks_init; /* # of hardclock ticks per roundrobin() */
#endif
int whichqs; /* Bit mask summary of non-empty Q's. */
struct prochd qs[NQS];
+struct SIMPLELOCK sched_lock;
+
void scheduler_start(void);
-#ifndef __HAVE_CPUINFO
+#ifdef __HAVE_CPUINFO
+void roundrobin(struct cpu_info *);
+#else
void roundrobin(void *);
#endif
void schedcpu(void *);
@@ -85,11 +89,13 @@ scheduler_start()
/*
* We avoid polluting the global namespace by keeping the scheduler
* timeouts static in this function.
- * We setup the timeouts here and kick roundrobin and schedcpu once to
+ * We setup the timeouts here and kick schedcpu and roundrobin once to
* make them do their job.
*/
- timeout_set(&roundrobin_to, roundrobin, &roundrobin_to);
+#ifndef __HAVE_CPUINFO
+ timeout_set(&roundrobin_to, schedcpu, &roundrobin_to);
+#endif
timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
#ifdef __HAVE_CPUINFO
@@ -103,6 +109,7 @@ scheduler_start()
/*
* Force switch among equal priority processes every 100ms.
*/
+/* ARGSUSED */
#ifdef __HAVE_CPUINFO
void
roundrobin(struct cpu_info *ci)
@@ -122,7 +129,7 @@ roundrobin(struct cpu_info *ci)
*/
spc->spc_schedflags |= SPCF_SHOULDYIELD;
} else {
- spc->spc_schedflags |= SPCF_SEENRR;
+ spc->spc_schedflags |= SPCF_SEENRR;
}
splx(s);
}
@@ -130,7 +137,6 @@ roundrobin(struct cpu_info *ci)
need_resched(curcpu());
}
#else
-/* ARGSUSED */
void
roundrobin(void *arg)
{
@@ -152,7 +158,8 @@ roundrobin(void *arg)
}
splx(s);
}
- need_resched();
+
+ need_resched(0);
timeout_add(to, hz / 10);
}
#endif
@@ -298,6 +305,8 @@ schedcpu(arg)
p->p_cpticks = 0;
newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
p->p_estcpu = newcpu;
+ splx(s);
+ SCHED_LOCK(s);
resetpriority(p);
if (p->p_priority >= PUSER) {
if ((p != curproc) &&
@@ -310,7 +319,7 @@ schedcpu(arg)
} else
p->p_priority = p->p_usrpri;
}
- splx(s);
+ SCHED_UNLOCK(s);
}
uvm_meter();
wakeup((caddr_t)&lbolt);
@@ -329,6 +338,8 @@ updatepri(p)
register unsigned int newcpu = p->p_estcpu;
register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+ SCHED_ASSERT_LOCKED();
+
if (p->p_slptime > 5 * loadfac)
p->p_estcpu = 0;
else {
@@ -392,11 +403,6 @@ ltsleep(ident, priority, wmesg, timo, interlock)
int catch = priority & PCATCH;
int relock = (priority & PNORELOCK) == 0;
-#ifdef KTRACE
- if (KTRPOINT(p, KTR_CSW))
- ktrcsw(p, 1, 0);
-#endif
- s = splhigh();
if (cold || panicstr) {
/*
* After a panic, or during autoconfiguration,
@@ -404,16 +410,26 @@ ltsleep(ident, priority, wmesg, timo, interlock)
* don't run any other procs or panic below,
* in case this is the idle process and already asleep.
*/
+ s = splhigh();
splx(safepri);
splx(s);
if (interlock != NULL && relock == 0)
simple_unlock(interlock);
return (0);
}
+
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_CSW))
+ ktrcsw(p, 1, 0);
+#endif
+
+ SCHED_LOCK(s);
+
#ifdef DIAGNOSTIC
- if (ident == NULL || p->p_stat != SRUN || p->p_back)
+ if (ident == NULL || p->p_stat != SONPROC || p->p_back != NULL)
panic("tsleep");
#endif
+
p->p_wchan = ident;
p->p_wmesg = wmesg;
p->p_slptime = 0;
@@ -452,29 +468,39 @@ ltsleep(ident, priority, wmesg, timo, interlock)
if ((sig = CURSIG(p)) != 0) {
if (p->p_wchan)
unsleep(p);
- p->p_stat = SRUN;
+ p->p_stat = SONPROC;
+ SCHED_UNLOCK(s);
goto resume;
}
if (p->p_wchan == 0) {
catch = 0;
+ SCHED_UNLOCK(s);
goto resume;
}
} else
sig = 0;
p->p_stat = SSLEEP;
p->p_stats->p_ru.ru_nvcsw++;
+ SCHED_ASSERT_LOCKED();
mi_switch();
#ifdef DDB
/* handy breakpoint location after process "wakes" */
__asm(".globl bpendtsleep\nbpendtsleep:");
#endif
+
+ SCHED_ASSERT_UNLOCKED();
+ /*
+ * Note! this splx belongs to the SCHED_LOCK(s) above, mi_switch
+ * releases the scheduler lock, but does not lower the spl.
+ */
+ splx(s);
+
resume:
#ifdef __HAVE_CPUINFO
p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
#else
curpriority = p->p_usrpri;
#endif
- splx(s);
p->p_flag &= ~P_SINTR;
if (p->p_flag & P_TIMEOUT) {
p->p_flag &= ~P_TIMEOUT;
@@ -504,6 +530,7 @@ resume:
if (KTRPOINT(p, KTR_CSW))
ktrcsw(p, 0, 0);
#endif
+
if (interlock != NULL && relock)
simple_lock(interlock);
return (0);
@@ -523,7 +550,7 @@ endtsleep(arg)
int s;
p = (struct proc *)arg;
- s = splhigh();
+ SCHED_LOCK(s);
if (p->p_wchan) {
if (p->p_stat == SSLEEP)
setrunnable(p);
@@ -531,75 +558,7 @@ endtsleep(arg)
unsleep(p);
p->p_flag |= P_TIMEOUT;
}
- splx(s);
-}
-
-/*
- * Short-term, non-interruptable sleep.
- */
-void
-sleep(ident, priority)
- void *ident;
- int priority;
-{
- register struct proc *p = curproc;
- register struct slpque *qp;
- register int s;
-
-#ifdef DIAGNOSTIC
- if (priority > PZERO) {
- printf("sleep called with priority %d > PZERO, wchan: %p\n",
- priority, ident);
- panic("old sleep");
- }
-#endif
- s = splhigh();
- if (cold || panicstr) {
- /*
- * After a panic, or during autoconfiguration,
- * just give interrupts a chance, then just return;
- * don't run any other procs or panic below,
- * in case this is the idle process and already asleep.
- */
- splx(safepri);
- splx(s);
- return;
- }
-#ifdef DIAGNOSTIC
- if (ident == NULL || p->p_stat != SRUN || p->p_back)
- panic("sleep");
-#endif
- p->p_wchan = ident;
- p->p_wmesg = NULL;
- p->p_slptime = 0;
- p->p_priority = priority;
- qp = &slpque[LOOKUP(ident)];
- if (qp->sq_head == 0)
- qp->sq_head = p;
- else
- *qp->sq_tailp = p;
- *(qp->sq_tailp = &p->p_forw) = 0;
- p->p_stat = SSLEEP;
- p->p_stats->p_ru.ru_nvcsw++;
-#ifdef KTRACE
- if (KTRPOINT(p, KTR_CSW))
- ktrcsw(p, 1, 0);
-#endif
- mi_switch();
-#ifdef DDB
- /* handy breakpoint location after process "wakes" */
- __asm(".globl bpendsleep\nbpendsleep:");
-#endif
-#ifdef KTRACE
- if (KTRPOINT(p, KTR_CSW))
- ktrcsw(p, 0, 0);
-#endif
-#ifdef __HAVE_CPUINFO
- p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
-#else
- curpriority = p->p_usrpri;
-#endif
- splx(s);
+ SCHED_UNLOCK(s);
}
/*
@@ -611,9 +570,15 @@ unsleep(p)
{
register struct slpque *qp;
register struct proc **hp;
+#if 0
int s;
- s = splhigh();
+ /*
+ * XXX we cannot do recursive SCHED_LOCKing yet. All callers lock
+ * anyhow.
+ */
+ SCHED_LOCK(s);
+#endif
if (p->p_wchan) {
hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head;
while (*hp != p)
@@ -623,9 +588,25 @@ unsleep(p)
qp->sq_tailp = hp;
p->p_wchan = 0;
}
- splx(s);
+#if 0
+ SCHED_UNLOCK(s);
+#endif
+}
+
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+void
+sched_unlock_idle(void)
+{
+ SIMPLE_UNLOCK(&sched_lock);
}
+void
+sched_lock_idle(void)
+{
+ SIMPLE_LOCK(&sched_lock);
+}
+#endif /* MULTIPROCESSOR || LOCKDEBUG */
+
/*
* Make all processes sleeping on the specified identifier runnable.
*/
@@ -638,7 +619,7 @@ wakeup_n(ident, n)
struct proc *p, **q;
int s;
- s = splhigh();
+ SCHED_LOCK(s);
qp = &slpque[LOOKUP(ident)];
restart:
for (q = &qp->sq_head; (p = *q) != NULL; ) {
@@ -662,15 +643,19 @@ restart:
/*
* Since curpriority is a user priority,
* p->p_priority is always better than
- * curpriority.
+ * curpriority on the last CPU on
+ * which it ran.
+ *
+ * XXXSMP See affinity comment in
+ * resched_proc().
*/
-
if ((p->p_flag & P_INMEM) != 0) {
setrunqueue(p);
#ifdef __HAVE_CPUINFO
+ KASSERT(p->p_cpu != NULL);
need_resched(p->p_cpu);
#else
- need_resched();
+ need_resched(0);
#endif
} else {
wakeup((caddr_t)&proc0);
@@ -685,7 +670,7 @@ restart:
} else
q = &p->p_forw;
}
- splx(s);
+ SCHED_UNLOCK(s);
}
void
@@ -705,11 +690,12 @@ yield()
struct proc *p = curproc;
int s;
- s = splstatclock();
+ SCHED_LOCK(s);
p->p_priority = p->p_usrpri;
setrunqueue(p);
p->p_stats->p_ru.ru_nvcsw++;
mi_switch();
+ SCHED_ASSERT_UNLOCKED();
splx(s);
}
@@ -732,11 +718,13 @@ preempt(newp)
if (newp != NULL)
panic("preempt: cpu_preempt not yet implemented");
- s = splstatclock();
+ SCHED_LOCK(s);
p->p_priority = p->p_usrpri;
+ p->p_stat = SRUN;
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
+ SCHED_ASSERT_UNLOCKED();
splx(s);
}
@@ -750,11 +738,28 @@ mi_switch()
struct proc *p = curproc; /* XXX */
struct rlimit *rlim;
struct timeval tv;
+#if defined(MULTIPROCESSOR)
+ int hold_count;
+#endif
#ifdef __HAVE_CPUINFO
struct schedstate_percpu *spc = &p->p_cpu->ci_schedstate;
#endif
- splassert(IPL_STATCLOCK);
+ SCHED_ASSERT_LOCKED();
+
+#if defined(MULTIPROCESSOR)
+ /*
+ * Release the kernel_lock, as we are about to yield the CPU.
+ * The scheduler lock is still held until cpu_switch()
+ * selects a new process and removes it from the run queue.
+ */
+ if (p->p_flag & P_BIGLOCK)
+#ifdef notyet
+ hold_count = spinlock_release_all(&kernel_lock);
+#else
+ hold_count = __mp_release_all(&kernel_lock);
+#endif
+#endif
/*
* Compute the amount of time during which the current
@@ -765,19 +770,19 @@ mi_switch()
if (timercmp(&tv, &spc->spc_runtime, <)) {
#if 0
printf("time is not monotonic! "
- "tv=%ld.%06ld, runtime=%ld.%06ld\n",
+ "tv=%lu.%06lu, runtime=%lu.%06lu\n",
tv.tv_sec, tv.tv_usec, spc->spc_runtime.tv_sec,
spc->spc_runtime.tv_usec);
#endif
} else {
- timersub(&tv, &spc->runtime, &tv);
+ timersub(&tv, &spc->spc_runtime, &tv);
timeradd(&p->p_rtime, &tv, &p->p_rtime);
}
#else
if (timercmp(&tv, &runtime, <)) {
#if 0
printf("time is not monotonic! "
- "tv=%ld.%06ld, runtime=%ld.%06ld\n",
+ "tv=%lu.%06lu, runtime=%lu.%06lu\n",
tv.tv_sec, tv.tv_usec, runtime.tv_sec, runtime.tv_usec);
#endif
} else {
@@ -817,12 +822,38 @@ mi_switch()
uvmexp.swtch++;
cpu_switch(p);
+ /*
+ * Make sure that MD code released the scheduler lock before
+ * resuming us.
+ */
+ SCHED_ASSERT_UNLOCKED();
+
+ /*
+ * We're running again; record our new start time. We might
+ * be running on a new CPU now, so don't use the cache'd
+ * schedstate_percpu pointer.
+ */
#ifdef __HAVE_CPUINFO
- /* p->p_cpu might have changed in cpu_switch() */
+ KDASSERT(p->p_cpu != NULL);
+ KDASSERT(p->p_cpu == curcpu());
microtime(&p->p_cpu->ci_schedstate.spc_runtime);
#else
microtime(&runtime);
#endif
+
+#if defined(MULTIPROCESSOR)
+ /*
+ * Reacquire the kernel_lock now. We do this after we've
+ * released the scheduler lock to avoid deadlock, and before
+ * we reacquire the interlock.
+ */
+ if (p->p_flag & P_BIGLOCK)
+#ifdef notyet
+ spinlock_acquire_count(&kernel_lock, hold_count);
+#else
+ __mp_acquire_count(&kernel_lock, hold_count);
+#endif
+#endif
}
/*
@@ -836,6 +867,7 @@ rqinit()
for (i = 0; i < NQS; i++)
qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
+ SIMPLE_LOCK_INIT(&sched_lock);
}
static __inline void
@@ -845,13 +877,35 @@ resched_proc(struct proc *p, u_char pri)
struct cpu_info *ci;
#endif
+ /*
+ * XXXSMP
+ * Since p->p_cpu persists across a context switch,
+ * this gives us *very weak* processor affinity, in
+ * that we notify the CPU on which the process last
+ * ran that it should try to switch.
+ *
+ * This does not guarantee that the process will run on
+ * that processor next, because another processor might
+ * grab it the next time it performs a context switch.
+ *
+ * This also does not handle the case where its last
+ * CPU is running a higher-priority process, but every
+ * other CPU is running a lower-priority process. There
+ * are ways to handle this situation, but they're not
+ * currently very pretty, and we also need to weigh the
+ * cost of moving a process from one CPU to another.
+ *
+ * XXXSMP
+ * There is also the issue of locking the other CPU's
+ * sched state, which we currently do not do.
+ */
#ifdef __HAVE_CPUINFO
ci = (p->p_cpu != NULL) ? p->p_cpu : curcpu();
if (pri < ci->ci_schedstate.spc_curpriority)
need_resched(ci);
#else
if (pri < curpriority)
- need_resched();
+ need_resched(0);
#endif
}
@@ -864,12 +918,12 @@ void
setrunnable(p)
register struct proc *p;
{
- register int s;
+ SCHED_ASSERT_LOCKED();
- s = splhigh();
switch (p->p_stat) {
case 0:
case SRUN:
+ case SONPROC:
case SZOMB:
case SDEAD:
default:
@@ -890,7 +944,6 @@ setrunnable(p)
p->p_stat = SRUN;
if (p->p_flag & P_INMEM)
setrunqueue(p);
- splx(s);
if (p->p_slptime > 1)
updatepri(p);
p->p_slptime = 0;
@@ -911,6 +964,8 @@ resetpriority(p)
{
register unsigned int newpriority;
+ SCHED_ASSERT_LOCKED();
+
newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO);
newpriority = min(newpriority, MAXPRI);
p->p_usrpri = newpriority;
@@ -936,8 +991,12 @@ void
schedclock(p)
struct proc *p;
{
+ int s;
+
p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
+ SCHED_LOCK(s);
resetpriority(p);
+ SCHED_UNLOCK(s);
if (p->p_priority >= PUSER)
p->p_priority = p->p_usrpri;
}
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index a517ba2951c..603a354b76b 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_sysctl.c,v 1.111 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: kern_sysctl.c,v 1.112 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */
/*-
@@ -429,6 +429,20 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
return (sysctl_malloc(name + 1, namelen - 1, oldp, oldlenp,
newp, newlen, p));
case KERN_CPTIME:
+#ifdef MULTIPROCESSOR
+ {
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+ int i;
+
+ bzero(cp_time, sizeof(cp_time));
+
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ for (i = 0; i < CPUSTATES; i++)
+ cp_time[i] += ci->ci_schedstate.spc_cp_time[i];
+ }
+ }
+#endif
return (sysctl_rdstruct(oldp, oldlenp, newp, &cp_time,
sizeof(cp_time)));
case KERN_NCHSTATS:
@@ -1317,6 +1331,11 @@ fill_kproc2(struct proc *p, struct kinfo_proc2 *ki)
&p->p_stats->p_cru.ru_stime, &ut);
ki->p_uctime_sec = ut.tv_sec;
ki->p_uctime_usec = ut.tv_usec;
+ ki->p_cpuid = KI_NOCPU;
+#ifdef MULTIPROCESSOR
+ if (p->p_cpu != NULL)
+ ki->p_cpuid = p->p_cpu->ci_cpuid;
+#endif
PRELE(p);
}
}
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index f40928b2824..1ed5f182d36 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_time.c,v 1.40 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: kern_time.c,v 1.41 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: kern_time.c,v 1.20 1996/02/18 11:57:06 fvdl Exp $ */
/*
@@ -99,7 +99,7 @@ settime(struct timeval *tv)
timersub(tv, &time, &delta);
time = *tv;
timeradd(&boottime, &delta, &boottime);
-#ifdef __HAVE_CURCPU
+#ifdef __HAVE_CPUINFO
/*
* XXXSMP
* This is wrong. We should traverse a list of all
diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c
index e9114b34990..db01a8a868c 100644
--- a/sys/kern/subr_pool.c
+++ b/sys/kern/subr_pool.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: subr_pool.c,v 1.41 2004/06/02 22:17:22 tedu Exp $ */
+/* $OpenBSD: subr_pool.c,v 1.42 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */
/*-
@@ -54,7 +54,6 @@
/*
* XXX - for now.
*/
-#define SIMPLELOCK_INITIALIZER { SLOCK_UNLOCKED }
#ifdef LOCKDEBUG
#define simple_lock_freecheck(a, s) do { /* nothing */ } while (0)
#define simple_lock_only_held(lkp, str) do { /* nothing */ } while (0)
@@ -86,7 +85,7 @@ int pool_inactive_time = 10;
static struct pool *drainpp;
/* This spin lock protects both pool_head and drainpp. */
-struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER;
+struct simplelock pool_head_slock;
struct pool_item_header {
/* Page headers */
@@ -529,6 +528,8 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
0, "pcgpool", NULL);
}
+ simple_lock_init(&pool_head_slock);
+
/* Insert this into the list of all pools. */
simple_lock(&pool_head_slock);
TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
@@ -2062,9 +2063,9 @@ pool_allocator_drain(struct pool_allocator *pa, struct pool *org, int need)
TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list);
if (pp == org)
continue;
- simple_unlock(&pa->pa_list);
- freed = pool_reclaim(pp)
- simple_lock(&pa->pa_list);
+ simple_unlock(&pa->pa_slock);
+ freed = pool_reclaim(pp);
+ simple_lock(&pa->pa_slock);
} while ((pp = TAILQ_FIRST(&pa->pa_list)) != start && (freed < need));
if (!freed) {
diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c
index 906ce028770..a843c2f1dc2 100644
--- a/sys/kern/subr_prf.c
+++ b/sys/kern/subr_prf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: subr_prf.c,v 1.58 2004/01/03 14:08:53 espie Exp $ */
+/* $OpenBSD: subr_prf.c,v 1.59 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: subr_prf.c,v 1.45 1997/10/24 18:14:25 chuck Exp $ */
/*-
@@ -47,6 +47,7 @@
#include <sys/ioctl.h>
#include <sys/vnode.h>
#include <sys/file.h>
+#include <sys/simplelock.h>
#include <sys/tty.h>
#include <sys/tprintf.h>
#include <sys/syslog.h>
@@ -97,6 +98,50 @@ extern int uvm_doswapencrypt;
int kprintf(const char *, int, void *, char *, va_list);
void kputchar(int, int, struct tty *);
+#ifdef MULTIPROCESSOR
+
+#ifdef notdef
+
+struct simplelock kprintf_slock;
+
+#define KPRINTF_MUTEX_ENTER(s) \
+do { \
+ (s) = splhigh(); \
+ simple_lock(&kprintf_slock); \
+} while (/*CONSTCOND*/0)
+
+#define KPRINTF_MUTEX_EXIT(s) \
+do { \
+ simple_unlock(&kprintf_slock); \
+ splx((s)); \
+} while (/*CONSTCOND*/0)
+
+#else
+
+struct __mp_lock kprintf_slock;
+
+#define KPRINTF_MUTEX_ENTER(s) \
+do { \
+ (s) = splhigh(); \
+ __mp_lock(&kprintf_slock); \
+} while (/*CONSTCOND*/0)
+
+#define KPRINTF_MUTEX_EXIT(s) \
+do { \
+ __mp_unlock(&kprintf_slock); \
+ splx((s)); \
+} while (/*CONSTCOND*/0)
+
+#endif
+
+#else
+
+struct simplelock kprintf_slock;
+#define KPRINTF_MUTEX_ENTER(s) (s) = splhigh()
+#define KPRINTF_MUTEX_EXIT(s) splx((s))
+
+#endif /* MULTIPROCESSOR */
+
/*
* globals
*/
@@ -506,6 +551,9 @@ printf(const char *fmt, ...)
{
va_list ap;
int savintr, retval;
+ int s;
+
+ KPRINTF_MUTEX_ENTER(s);
savintr = consintr; /* disable interrupts */
consintr = 0;
@@ -515,6 +563,9 @@ printf(const char *fmt, ...)
if (!panicstr)
logwakeup();
consintr = savintr; /* reenable interrupts */
+
+ KPRINTF_MUTEX_EXIT(s);
+
return(retval);
}
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index a6fdd09c95e..c512b9a40dd 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sys_generic.c,v 1.47 2003/12/10 23:10:08 millert Exp $ */
+/* $OpenBSD: sys_generic.c,v 1.48 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */
/*
@@ -55,6 +55,7 @@
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
+#include <sys/sched.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
@@ -852,7 +853,7 @@ selwakeup(sip)
p = pfind(sip->si_selpid);
sip->si_selpid = 0;
if (p != NULL) {
- s = splhigh();
+ SCHED_LOCK(s);
if (p->p_wchan == (caddr_t)&selwait) {
if (p->p_stat == SSLEEP)
setrunnable(p);
@@ -860,7 +861,7 @@ selwakeup(sip)
unsleep(p);
} else if (p->p_flag & P_SELECT)
p->p_flag &= ~P_SELECT;
- splx(s);
+ SCHED_UNLOCK(s);
}
}
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 37664becb7d..21180cda141 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sys_process.c,v 1.27 2004/02/08 00:04:21 deraadt Exp $ */
+/* $OpenBSD: sys_process.c,v 1.28 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: sys_process.c,v 1.55 1996/05/15 06:17:47 tls Exp $ */
/*-
@@ -57,6 +57,7 @@
#include <sys/ptrace.h>
#include <sys/uio.h>
#include <sys/user.h>
+#include <sys/sched.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
@@ -91,6 +92,7 @@ sys_ptrace(p, v, retval)
#endif
int error, write;
int temp;
+ int s;
/* "A foolish consistency..." XXX */
if (SCARG(uap, req) == PT_TRACE_ME)
@@ -353,7 +355,9 @@ sys_ptrace(p, v, retval)
/* Finally, deliver the requested signal (or none). */
if (t->p_stat == SSTOP) {
t->p_xstat = SCARG(uap, data);
+ SCHED_LOCK(s);
setrunnable(t);
+ SCHED_UNLOCK(s);
} else {
if (SCARG(uap, data) != 0)
psignal(t, SCARG(uap, data));
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
index 0aca0394f7a..6dec67d20fe 100644
--- a/sys/kern/tty.c
+++ b/sys/kern/tty.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tty.c,v 1.64 2004/03/19 19:03:07 deraadt Exp $ */
+/* $OpenBSD: tty.c,v 1.65 2004/06/13 21:49:26 niklas Exp $ */
/* $NetBSD: tty.c,v 1.68.4.2 1996/06/06 16:04:52 thorpej Exp $ */
/*-
@@ -2098,7 +2098,8 @@ ttyinfo(tp)
pick = p;
ttyprintf(tp, " cmd: %s %d [%s] ", pick->p_comm, pick->p_pid,
- pick->p_stat == SRUN ? "running" :
+ pick->p_stat == SONPROC ? "running" :
+ pick->p_stat == SRUN ? "runnable" :
pick->p_wmesg ? pick->p_wmesg : "iowait");
calcru(pick, &utime, &stime, NULL);
diff --git a/sys/net/netisr_dispatch.h b/sys/net/netisr_dispatch.h
index 822577ad843..96493b324db 100644
--- a/sys/net/netisr_dispatch.h
+++ b/sys/net/netisr_dispatch.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: netisr_dispatch.h,v 1.3 2001/06/09 06:16:38 angelos Exp $ */
+/* $OpenBSD: netisr_dispatch.h,v 1.4 2004/06/13 21:49:27 niklas Exp $ */
/* $NetBSD: netisr_dispatch.h,v 1.2 2000/07/02 04:40:47 cgd Exp $ */
/*
@@ -16,20 +16,23 @@
* }
*/
-#ifndef _NET_NETISR_DISPATCH_H_
-#define _NET_NETISR_DISPATCH_H_
-
#ifndef _NET_NETISR_H_
#error <net/netisr.h> must be included before <net/netisr_dispatch.h>
#endif
+#ifndef _NET_NETISR_DISPATCH_H_
+#define _NET_NETISR_DISPATCH_H_
+#include "ether.h"
+#include "ppp.h"
+#include "bridge.h"
+#endif
+
/*
* When adding functions to this list, be sure to add headers to provide
* their prototypes in <net/netisr.h> (if necessary).
*/
#ifdef INET
-#include "ether.h"
#if NETHER > 0
DONETISR(NETISR_ARP,arpintr);
#endif
@@ -59,12 +62,9 @@
#ifdef NATM
DONETISR(NETISR_NATM,natmintr);
#endif
-#include "ppp.h"
#if NPPP > 0
DONETISR(NETISR_PPP,pppintr);
#endif
-#include "bridge.h"
#if NBRIDGE > 0
DONETISR(NETISR_BRIDGE,bridgeintr);
#endif
-#endif /* _NET_NETISR_DISPATCH_H_ */
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
index b3c92ca019b..c2bc3965541 100644
--- a/sys/sys/kernel.h
+++ b/sys/sys/kernel.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: kernel.h,v 1.9 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: kernel.h,v 1.10 2004/06/13 21:49:28 niklas Exp $ */
/* $NetBSD: kernel.h,v 1.11 1995/03/03 01:24:16 cgd Exp $ */
/*-
@@ -49,7 +49,7 @@ extern int domainnamelen;
/* 1.2 */
extern volatile struct timeval mono_time;
extern struct timeval boottime;
-#ifndef __HAVE_CURCPU
+#ifndef __HAVE_CPUINFO
extern struct timeval runtime;
#endif
extern volatile struct timeval time;
diff --git a/sys/sys/lock.h b/sys/sys/lock.h
index e5d1899d35a..5da1c1bfd73 100644
--- a/sys/sys/lock.h
+++ b/sys/sys/lock.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: lock.h,v 1.11 2003/06/02 23:28:21 millert Exp $ */
+/* $OpenBSD: lock.h,v 1.12 2004/06/13 21:49:28 niklas Exp $ */
/*
* Copyright (c) 1995
@@ -56,11 +56,51 @@ struct lock {
int lk_sharecount; /* # of accepted shared locks */
int lk_waitcount; /* # of processes sleeping for lock */
short lk_exclusivecount; /* # of recursive exclusive locks */
- short lk_prio; /* priority at which to sleep */
+ short lk_recurselevel; /* lvl above which recursion ok */
+
+ /*
+ * This is the sleep message for sleep locks, and a simple name
+ * for spin locks.
+ */
char *lk_wmesg; /* resource sleeping (for tsleep) */
- int lk_timo; /* maximum sleep time (for tsleep) */
- pid_t lk_lockholder; /* pid of exclusive lock holder */
+
+ union {
+ struct {
+ /* pid of exclusive lock holder */
+ pid_t lk_sleep_lockholder;
+
+ /* priority at which to sleep */
+ int lk_sleep_prio;
+
+ /* maximum sleep time (for tsleep) */
+ int lk_sleep_timo;
+ } lk_un_sleep;
+ struct {
+ /* CPU ID of exclusive lock holder */
+ cpuid_t lk_spin_cpu;
+#if defined(LOCKDEBUG)
+ TAILQ_ENTRY(lock) lk_spin_list;
+#endif
+ } lk_un_spin;
+ } lk_un;
+
+#define lk_lockholder lk_un.lk_un_sleep.lk_sleep_lockholder
+#define lk_prio lk_un.lk_un_sleep.lk_sleep_prio
+#define lk_timo lk_un.lk_un_sleep.lk_sleep_timo
+
+#define lk_cpu lk_un.lk_un_spin.lk_spin_cpu
+#if defined(LOCKDEBUG)
+#define lk_list lk_un.lk_un_spin.lk_spin_list
+#endif
+
+#if defined(LOCKDEBUG)
+ const char *lk_lock_file;
+ const char *lk_unlock_file;
+ int lk_lock_line;
+ int lk_unlock_line;
+#endif
};
+
/*
* Lock request types:
* LK_SHARED - get one of many possible shared locks. If a process
@@ -109,12 +149,14 @@ struct lock {
* or passed in as arguments to the lock manager. The LK_REENABLE flag may be
* set only at the release of a lock obtained by drain.
*/
-#define LK_EXTFLG_MASK 0x00000770 /* mask of external flags */
+#define LK_EXTFLG_MASK 0x00700070 /* mask of external flags */
#define LK_NOWAIT 0x00000010 /* do not sleep to await lock */
#define LK_SLEEPFAIL 0x00000020 /* sleep, then return failure */
#define LK_CANRECURSE 0x00000040 /* allow recursive exclusive lock */
#define LK_REENABLE 0x00000080 /* lock is be reenabled after drain */
-#define LK_RECURSEFAIL 0x00000100 /* fail if recursive exclusive lock */
+#define LK_SETRECURSE 0x00100000 /* other locks while we have it OK */
+#define LK_RECURSEFAIL 0x00200000 /* fail if recursive exclusive lock */
+#define LK_SPIN 0x00400000 /* lock spins instead of sleeps */
/*
* Internal lock flags.
*
@@ -131,9 +173,9 @@ struct lock {
*
* Non-persistent external flags.
*/
-#define LK_INTERLOCK 0x00100000 /* unlock passed simple lock after
+#define LK_INTERLOCK 0x00010000 /* unlock passed simple lock after
getting lk_interlock */
-#define LK_RETRY 0x00200000 /* vn_lock: retry until locked */
+#define LK_RETRY 0x00020000 /* vn_lock: retry until locked */
/*
* Lock return status.
@@ -157,6 +199,7 @@ struct lock {
*/
#define LK_KERNPROC ((pid_t) -2)
#define LK_NOPROC ((pid_t) -1)
+#define LK_NOCPU ((cpuid_t) -1)
struct proc;
@@ -164,14 +207,69 @@ void lockinit(struct lock *, int prio, char *wmesg, int timo,
int flags);
int lockmgr(__volatile struct lock *, u_int flags,
struct simplelock *, struct proc *p);
-void lockmgr_printinfo(struct lock *);
+void lockmgr_printinfo(__volatile struct lock *);
int lockstatus(struct lock *);
+#if (0 && defined(MULTIPROCESSOR)) || defined(LOCKDEBUG)
+#define spinlockinit(lkp, name, flags) \
+ lockinit((lkp), 0, (name), 0, (flags) | LK_SPIN)
+#define spinlockmgr(lkp, flags, intrlk) \
+ lockmgr((lkp), (flags) | LK_SPIN, (intrlk), curproc)
+#else
+#define spinlockinit(lkp, name, flags) (void)(lkp)
+#define spinlockmgr(lkp, flags, intrlk) (0)
+#endif
+
+#if defined(LOCKDEBUG)
+int _spinlock_release_all(__volatile struct lock *, const char *, int);
+void _spinlock_acquire_count(__volatile struct lock *, int, const char *,
+ int);
+
+#define spinlock_release_all(l) _spinlock_release_all((l), __FILE__, __LINE__)
+#define spinlock_acquire_count(l, c) _spinlock_acquire_count((l), (c), \
+ __FILE__, __LINE__)
+#else
+int spinlock_release_all(__volatile struct lock *);
+void spinlock_acquire_count(__volatile struct lock *, int);
+#endif
+
#ifdef LOCKDEBUG
#define LOCK_ASSERT(x) KASSERT(x)
#else
#define LOCK_ASSERT(x) /* nothing */
#endif
-#endif /* !_LOCK_H_ */
+#if defined(MULTIPROCESSOR)
+/*
+ * XXX Instead of using struct lock for the kernel lock and thus requiring us
+ * XXX to implement simplelocks, causing all sorts of fine-grained locks all
+ * XXX over our tree getting activated consuming both time and potentially
+ * XXX introducing locking protocol bugs.
+ */
+#ifdef notyet
+
+extern struct lock kernel_lock;
+
+/*
+ * XXX Simplelock macros used at "trusted" places.
+ */
+#define SIMPLELOCK simplelock
+#define SIMPLE_LOCK_INIT simple_lock_init
+#define SIMPLE_LOCK simple_lock
+#define SIMPLE_UNLOCK simple_unlock
+
+#endif
+#else
+
+/*
+ * XXX Simplelock macros used at "trusted" places.
+ */
+#define SIMPLELOCK simplelock
+#define SIMPLE_LOCK_INIT simple_lock_init
+#define SIMPLE_LOCK simple_lock
+#define SIMPLE_UNLOCK simple_unlock
+
+#endif
+
+#endif /* !_LOCK_H_ */
diff --git a/sys/sys/mplock.h b/sys/sys/mplock.h
new file mode 100644
index 00000000000..73f3c0b1e40
--- /dev/null
+++ b/sys/sys/mplock.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2004 Niklas Hallqvist. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MPLOCK_H_
+#define _MPLOCK_H_
+
+/*
+ * Really simple spinlock implementation with recursive capabilities.
+ * Correctness is paramount, no fancyness allowed.
+ */
+
+struct __mp_lock {
+ __cpu_simple_lock_t mpl_lock;
+ cpuid_t mpl_cpu;
+ int mpl_count;
+};
+
+static __inline void __mp_lock_init(struct __mp_lock *);
+static __inline void __mp_lock(struct __mp_lock *);
+static __inline void __mp_unlock(struct __mp_lock *);
+static __inline int __mp_release_all(struct __mp_lock *);
+static __inline void __mp_acquire_count(struct __mp_lock *, int);
+static __inline int __mp_lock_held(struct __mp_lock *);
+
+/*
+ * XXX Simplelocks macros used at "trusted" places.
+ */
+#define SIMPLELOCK __mp_lock
+#define SIMPLE_LOCK_INIT __mp_lock_init
+#define SIMPLE_LOCK __mp_lock
+#define SIMPLE_UNLOCK __mp_unlock
+
+static __inline void
+__mp_lock_init(struct __mp_lock *lock)
+{
+ __cpu_simple_lock_init(&lock->mpl_lock);
+ lock->mpl_cpu = LK_NOCPU;
+ lock->mpl_count = 0;
+}
+
+#if defined(MP_LOCKDEBUG)
+#ifndef DDB
+#error "MP_LOCKDEBUG requires DDB"
+#endif
+
+extern void Debugger(void);
+extern int db_printf(const char *, ...)
+ __attribute__((__format__(__kprintf__,1,2)));
+
+/* CPU-dependent timing, needs this to be settable from ddb. */
+extern int __mp_lock_spinout;
+#endif
+
+static __inline void
+__mp_lock(struct __mp_lock *lock)
+{
+ int s = spllock();
+
+ if (lock->mpl_cpu != cpu_number()) {
+#ifndef MP_LOCKDEBUG
+ __cpu_simple_lock(&lock->mpl_lock);
+#else
+ {
+ int got_it;
+ do {
+ int ticks = __mp_lock_spinout;
+
+ do {
+ got_it = __cpu_simple_lock_try(
+ &lock->mpl_lock);
+ } while (!got_it && ticks-- > 0);
+ if (!got_it) {
+ db_printf(
+ "__mp_lock(0x%x): lock spun out",
+ lock);
+ Debugger();
+ }
+ } while (!got_it);
+ }
+#endif
+ lock->mpl_cpu = cpu_number();
+ }
+ lock->mpl_count++;
+ splx(s);
+}
+
+static __inline void
+__mp_unlock(struct __mp_lock *lock)
+{
+ int s = spllock();
+
+#ifdef MP_LOCKDEBUG
+ if (lock->mpl_count == 0 || lock->mpl_cpu == LK_NOCPU) {
+ db_printf("__mp_unlock(0x%x): releasing not locked lock\n",
+ lock);
+ Debugger();
+ }
+#endif
+
+ if (--lock->mpl_count == 0) {
+ lock->mpl_cpu = LK_NOCPU;
+ __cpu_simple_unlock(&lock->mpl_lock);
+ }
+ splx(s);
+}
+
+static __inline int
+__mp_release_all(struct __mp_lock *lock) {
+ int s = spllock();
+ int rv = lock->mpl_count;
+
+#ifdef MP_LOCKDEBUG
+ if (lock->mpl_count == 0 || lock->mpl_cpu == LK_NOCPU) {
+ db_printf(
+ "__mp_release_all(0x%x): releasing not locked lock\n",
+ lock);
+ Debugger();
+ }
+#endif
+
+ lock->mpl_cpu = LK_NOCPU;
+ lock->mpl_count = 0;
+ __cpu_simple_unlock(&lock->mpl_lock);
+ splx(s);
+ return (rv);
+}
+
+static __inline void
+__mp_acquire_count(struct __mp_lock *lock, int count) {
+ int s = spllock();
+
+ __cpu_simple_lock(&lock->mpl_lock);
+ lock->mpl_cpu = cpu_number();
+ lock->mpl_count = count;
+ splx(s);
+}
+
+static __inline int
+__mp_lock_held(struct __mp_lock *lock) {
+ return lock->mpl_count;
+}
+
+extern struct __mp_lock kernel_lock;
+
+/* XXX Should really be in proc.h but then __mp_lock is not defined. */
+extern struct SIMPLELOCK deadproc_slock;
+
+#endif /* !_MPLOCK_H */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index c06b14eda43..9f64ed24839 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: proc.h,v 1.70 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: proc.h,v 1.71 2004/06/13 21:49:28 niklas Exp $ */
/* $NetBSD: proc.h,v 1.44 1996/04/22 01:23:21 christos Exp $ */
/*-
@@ -46,7 +46,6 @@
#include <sys/timeout.h> /* For struct timeout. */
#include <sys/event.h> /* For struct klist */
-#ifdef __HAVE_CPUINFO
/*
* CPU states.
* XXX Not really scheduler state, but no other good place to put
@@ -60,24 +59,25 @@
#define CPUSTATES 5
/*
- * Per-CPU scheduler state. XXX - this should be in sys/sched.h
+ * Per-CPU scheduler state.
*/
struct schedstate_percpu {
struct timeval spc_runtime; /* time curproc started running */
__volatile int spc_schedflags; /* flags; see below */
u_int spc_schedticks; /* ticks for schedclock() */
u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */
- u_char spc_curpriority; /* usrpri of curproc */
+ u_char spc_curpriority; /* usrpri of curproc */
int spc_rrticks; /* ticks until roundrobin() */
int spc_pscnt; /* prof/stat counter */
- int spc_psdiv; /* prof/stat divisor */
+ int spc_psdiv; /* prof/stat divisor */
};
/* spc_flags */
-#define SPCF_SEENRR 0x0001 /* process has seen roundrobin() */
-#define SPCF_SHOULDYIELD 0x0002 /* process should yield the CPU */
-#define SPCF_SWITCHCLEAR (SPCF_SEENRR|SPCF_SHOULDYIELD)
+#define SPCF_SEENRR 0x0001 /* process has seen roundrobin() */
+#define SPCF_SHOULDYIELD 0x0002 /* process should yield the CPU */
+#define SPCF_SWITCHCLEAR (SPCF_SEENRR|SPCF_SHOULDYIELD)
+#ifdef __HAVE_CPUINFO
/*
* These are the fields we require in struct cpu_info that we get from
* curcpu():
@@ -208,7 +208,7 @@ struct proc {
u_int p_swtime; /* Time swapped in or out. */
u_int p_slptime; /* Time since last blocked. */
#ifdef __HAVE_CPUINFO
- struct cpu_info * __volatile p_cpu;
+ struct cpu_info * __volatile p_cpu; /* CPU we're running on. */
#else
int p_schedflags; /* PSCHED_* flags */
#endif
@@ -264,6 +264,7 @@ struct proc {
u_short p_xstat; /* Exit status for wait; also stop signal. */
u_short p_acflag; /* Accounting flags. */
struct rusage *p_ru; /* Exit information. XXX */
+ int p_locks; /* DEBUG: lockmgr count of held locks */
};
#define p_session p_pgrp->pg_session
@@ -276,6 +277,7 @@ struct proc {
#define SSTOP 4 /* Process debugging or suspension. */
#define SZOMB 5 /* Awaiting collection by parent. */
#define SDEAD 6 /* Process is almost a zombie. */
+#define SONPROC 7 /* Process is currently on a CPU. */
#define P_ZOMBIE(p) ((p)->p_stat == SZOMB || (p)->p_stat == SDEAD)
@@ -311,12 +313,13 @@ struct proc {
#define P_SYSTRACE 0x400000 /* Process system call tracing active*/
#define P_CONTINUED 0x800000 /* Proc has continued from a stopped state. */
#define P_SWAPIN 0x1000000 /* Swapping in right now */
+#define P_BIGLOCK 0x2000000 /* Process needs kernel "big lock" to run */
#define P_BITS \
("\20\01ADVLOCK\02CTTY\03INMEM\04NOCLDSTOP\05PPWAIT\06PROFIL\07SELECT" \
"\010SINTR\011SUGID\012SYSTEM\013TIMEOUT\014TRACED\015WAITED\016WEXIT" \
"\017EXEC\020PWEUPC\021FSTRACE\022SSTEP\023SUGIDEXEC\024NOCLDWAIT" \
- "\025NOZOMBIE\026INEXEC\027SYSTRACE\030CONTINUED")
+ "\025NOZOMBIE\026INEXEC\027SYSTRACE\030CONTINUED\031SWAPIN\032BIGLOCK")
/* Macro to compute the exit signal to be delivered. */
#define P_EXITSIG(p) \
@@ -404,7 +407,6 @@ extern struct proclist allproc; /* List of all processes. */
extern struct proclist zombproc; /* List of zombie processes. */
extern struct proclist deadproc; /* List of dead processes. */
-extern struct simplelock deadproc_slock;
extern struct proc *initproc; /* Process slots for init, pager. */
extern struct proc *syncerproc; /* filesystem syncer daemon */
@@ -448,7 +450,6 @@ void setrunnable(struct proc *);
#if !defined(setrunqueue)
void setrunqueue(struct proc *);
#endif
-void sleep(void *chan, int pri);
void uvm_swapin(struct proc *); /* XXX: uvm_extern.h? */
int ltsleep(void *chan, int pri, const char *wmesg, int timo,
volatile struct simplelock *);
@@ -476,5 +477,9 @@ void child_return(void *);
int proc_cansugid(struct proc *);
void proc_zap(struct proc *);
+
+#if defined(MULTIPROCESSOR)
+void proc_trampoline_mp(void); /* XXX */
+#endif
#endif /* _KERNEL */
#endif /* !_SYS_PROC_H_ */
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
index 08b1fb49904..50625e84149 100644
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: sched.h,v 1.5 2004/06/09 20:18:28 art Exp $ */
+/* $OpenBSD: sched.h,v 1.6 2004/06/13 21:49:28 niklas Exp $ */
/* $NetBSD: sched.h,v 1.2 1999/02/28 18:14:58 ross Exp $ */
/*-
@@ -86,10 +86,11 @@
#define NICE_WEIGHT 2 /* priorities per nice level */
#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - PPQ)
-extern int schedhz; /* ideally: 16 */
+extern int schedhz; /* ideally: 16 */
+extern int rrticks_init; /* ticks per roundrobin() */
#ifdef _SYS_PROC_H_
-void schedclock(struct proc *p);
+void schedclock(struct proc *);
#ifdef __HAVE_CPUINFO
void roundrobin(struct cpu_info *);
#endif
@@ -118,5 +119,75 @@ scheduler_wait_hook(parent, child)
parent->p_estcpu = ESTCPULIM(parent->p_estcpu + child->p_estcpu);
}
#endif /* _SYS_PROC_H_ */
+
+#ifndef splsched
+#define splsched() splhigh()
+#endif
+#ifndef IPL_SCHED
+#define IPL_SCHED IPL_HIGH
+#endif
+
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#include <sys/lock.h>
+
+/*
+ * XXX Instead of using struct lock for the kernel lock and thus requiring us
+ * XXX to implement simplelocks, causing all sorts of fine-grained locks all
+ * XXX over our tree getting activated consuming both time and potentially
+ * XXX introducing locking protocol bugs.
+ */
+#ifdef notyet
+
+extern struct simplelock sched_lock;
+
+#define SCHED_ASSERT_LOCKED() LOCK_ASSERT(simple_lock_held(&sched_lock))
+#define SCHED_ASSERT_UNLOCKED() LOCK_ASSERT(simple_lock_held(&sched_lock) == 0)
+
+#define SCHED_LOCK(s) \
+do { \
+ s = splsched(); \
+ simple_lock(&sched_lock); \
+} while (/* CONSTCOND */ 0)
+
+#define SCHED_UNLOCK(s) \
+do { \
+ simple_unlock(&sched_lock); \
+ splx(s); \
+} while (/* CONSTCOND */ 0)
+
+#else
+
+extern struct __mp_lock sched_lock;
+
+#define SCHED_ASSERT_LOCKED() LOCK_ASSERT(__mp_lock_held(&sched_lock))
+#define SCHED_ASSERT_UNLOCKED() LOCK_ASSERT(__mp_lock_held(&sched_lock) == 0)
+
+#define SCHED_LOCK(s) \
+do { \
+ s = splsched(); \
+ __mp_lock(&sched_lock); \
+} while (/* CONSTCOND */ 0)
+
+#define SCHED_UNLOCK(s) \
+do { \
+ __mp_unlock(&sched_lock); \
+ splx(s); \
+} while (/* CONSTCOND */ 0)
+
+#endif
+
+void sched_lock_idle(void);
+void sched_unlock_idle(void);
+
+#else /* ! MULTIPROCESSOR || LOCKDEBUG */
+
+#define SCHED_ASSERT_LOCKED() splassert(IPL_SCHED);
+#define SCHED_ASSERT_UNLOCKED() /* nothing */
+
+#define SCHED_LOCK(s) s = splsched()
+#define SCHED_UNLOCK(s) splx(s)
+
+#endif /* MULTIPROCESSOR || LOCKDEBUG */
+
#endif /* _KERNEL */
#endif /* _SYS_SCHED_H_ */
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index cac591c71d5..9db5681a21d 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: signalvar.h,v 1.11 2004/04/06 17:24:12 mickey Exp $ */
+/* $OpenBSD: signalvar.h,v 1.12 2004/06/13 21:49:28 niklas Exp $ */
/* $NetBSD: signalvar.h,v 1.17 1996/04/22 01:23:31 christos Exp $ */
/*
@@ -159,7 +159,9 @@ void csignal(pid_t pgid, int signum, uid_t uid, uid_t euid);
int issignal(struct proc *p);
void pgsignal(struct pgrp *pgrp, int sig, int checkctty);
void postsig(int sig);
-void psignal(struct proc *p, int sig);
+void psignal1(struct proc *p, int sig, int dolock);
+#define psignal(p, sig) psignal1((p), (sig), 1)
+#define sched_psignal(p, sig) psignal1((p), (sig), 0)
void siginit(struct proc *p);
void trapsignal(struct proc *p, int sig, u_long code, int type,
union sigval val);
diff --git a/sys/sys/simplelock.h b/sys/sys/simplelock.h
index cfc6fdd8ed0..fbd39ef4d4e 100644
--- a/sys/sys/simplelock.h
+++ b/sys/sys/simplelock.h
@@ -1,7 +1,12 @@
-/* $OpenBSD: simplelock.h,v 1.10 2002/03/14 01:27:14 millert Exp $ */
+/* $OpenBSD: simplelock.h,v 1.11 2004/06/13 21:49:28 niklas Exp $ */
#ifndef _SIMPLELOCK_H_
#define _SIMPLELOCK_H_
+
+#ifdef MULTIPROCESSOR
+#include <machine/lock.h>
+#endif
+
/*
* A simple spin lock.
*
@@ -12,28 +17,26 @@
* of these locks while a process is sleeping.
*/
struct simplelock {
+#ifdef MULTIPROCESSOR
+ __cpu_simple_lock_t lock_data;
+#else
int lock_data;
+#endif
};
#ifdef _KERNEL
-#ifndef NCPUS
-#define NCPUS 1
-#endif
-
-#define SLOCK_LOCKED 1
-#define SLOCK_UNLOCKED 0
-
-#define SLOCK_INITIALIZER { SLOCK_UNLOCKED }
-
/*
* We can't debug locks when we use them in real life.
*/
-#if (NCPUS != 1) && defined(LOCKDEBUG)
+#if defined(MULTIPROCESSOR) && defined(LOCKDEBUG)
#undef LOCKDEBUG
#endif
-#if NCPUS == 1
+#if !defined(MULTIPROCESSOR) || 1
+
+#define SLOCK_LOCKED 1
+#define SLOCK_UNLOCKED 0
#ifndef LOCKDEBUG
@@ -42,11 +45,7 @@ struct simplelock {
#define simple_unlock(lkp)
#define simple_lock_assert(lkp)
-static __inline void simple_lock_init(struct simplelock *);
-
-static __inline void
-simple_lock_init(lkp)
- struct simplelock *lkp;
+static __inline void simple_lock_init(struct simplelock *lkp)
{
lkp->lock_data = SLOCK_UNLOCKED;
@@ -67,7 +66,7 @@ void simple_lock_init(struct simplelock *);
#endif /* !defined(LOCKDEBUG) */
-#else /* NCPUS > 1 */
+#else /* MULTIPROCESSOR */
/*
* The simple-lock routines are the primitives out of which the lock
@@ -78,31 +77,26 @@ void simple_lock_init(struct simplelock *);
* only be used for exclusive locks.
*/
-static __inline void
-simple_lock(lkp)
- __volatile struct simplelock *lkp;
+static __inline void simple_lock_init(struct simplelock *lkp)
{
-
- while (test_and_set(&lkp->lock_data))
- continue;
+ __cpu_simple_lock_init(&lkp->lock_data);
}
-static __inline int
-simple_lock_try(lkp)
- __volatile struct simplelock *lkp;
+static __inline void simple_lock(__volatile struct simplelock *lkp)
{
-
- return (!test_and_set(&lkp->lock_data))
+ __cpu_simple_lock(&lkp->lock_data);
}
-static __inline void
-simple_unlock(lkp)
- __volatile struct simplelock *lkp;
+static __inline int simple_lock_try(__volatile struct simplelock *lkp)
{
+ return (__cpu_simple_lock_try(&lkp->lock_data));
+}
- lkp->lock_data = 0;
+static __inline void simple_unlock(__volatile struct simplelock *lkp)
+{
+ __cpu_simple_unlock(&lkp->lock_data);
}
-#endif /* NCPUS > 1 */
+#endif /* MULTIPROCESSOR */
#endif /* _KERNEL */
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 40f921050c3..b0bf65341a1 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: systm.h,v 1.61 2004/06/08 18:09:31 marc Exp $ */
+/* $OpenBSD: systm.h,v 1.62 2004/06/13 21:49:28 niklas Exp $ */
/* $NetBSD: systm.h,v 1.50 1996/06/09 04:55:09 briggs Exp $ */
/*-
@@ -85,7 +85,11 @@ extern int nchrdev; /* number of entries in cdevsw */
extern int selwait; /* select timeout address */
+#ifdef MULTIPROCESSOR
+#define curpriority (curcpu()->ci_schedstate.spc_curpriority)
+#else
extern u_char curpriority; /* priority of current process */
+#endif
extern int maxmem; /* max memory per process */
extern int physmem; /* physical memory */
@@ -294,4 +298,27 @@ int read_symtab_from_file(struct proc *,struct vnode *,const char *);
void user_config(void);
#endif
+#if defined(MULTIPROCESSOR)
+void _kernel_lock_init(void);
+void _kernel_lock(int);
+void _kernel_unlock(void);
+void _kernel_proc_lock(struct proc *);
+void _kernel_proc_unlock(struct proc *);
+
+#define KERNEL_LOCK_INIT() _kernel_lock_init()
+#define KERNEL_LOCK(flag) _kernel_lock((flag))
+#define KERNEL_UNLOCK() _kernel_unlock()
+#define KERNEL_PROC_LOCK(p) _kernel_proc_lock((p))
+#define KERNEL_PROC_UNLOCK(p) _kernel_proc_unlock((p))
+
+#else /* ! MULTIPROCESSOR */
+
+#define KERNEL_LOCK_INIT() /* nothing */
+#define KERNEL_LOCK(flag) /* nothing */
+#define KERNEL_UNLOCK() /* nothing */
+#define KERNEL_PROC_LOCK(p) /* nothing */
+#define KERNEL_PROC_UNLOCK(p) /* nothing */
+
+#endif /* MULTIPROCESSOR */
+
#endif /* __SYSTM_H__ */
diff --git a/sys/uvm/uvm_meter.c b/sys/uvm/uvm_meter.c
index 5e2a1ed6ef9..15bc3cd8651 100644
--- a/sys/uvm/uvm_meter.c
+++ b/sys/uvm/uvm_meter.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_meter.c,v 1.18 2004/05/17 13:08:42 pedro Exp $ */
+/* $OpenBSD: uvm_meter.c,v 1.19 2004/06/13 21:49:29 niklas Exp $ */
/* $NetBSD: uvm_meter.c,v 1.21 2001/07/14 06:36:03 matt Exp $ */
/*
@@ -110,6 +110,7 @@ uvm_loadav(avg)
/* fall through */
case SRUN:
case SIDL:
+ case SONPROC:
nrun++;
}
}
@@ -263,6 +264,7 @@ uvm_total(totalp)
case SRUN:
case SIDL:
+ case SONPROC:
if (p->p_flag & P_INMEM)
totalp->t_rq++;
else