From d6ec0bc1862a4fed11c7f4ac537413b2c7e89de4 Mon Sep 17 00:00:00 2001 From: Niklas Hallqvist Date: Wed, 25 May 2005 23:17:48 +0000 Subject: This patch is mortly art's work and was done *a year* ago. Art wants to thank everyone for the prompt review and ok of this work ;-) Yeah, that includes me too, or maybe especially me. I am sorry. Change the sched_lock to a mutex. This fixes, among other things, the infamous "telnet localhost &" problem. The real bug in that case was that the sched_lock which is by design a non-recursive lock, was recursively acquired, and not enough releases made us hold the lock in the idle loop, blocking scheduling on the other processors. Some of the other processors would hold the biglock though, which made it impossible for cpu 0 to enter the kernel... A nice deadlock. Let me just say debugging this for days just to realize that it was all fixed in an old diff noone ever ok'd was somewhat of an anti-climax. This diff also changes splsched to be correct for all our architectures. --- sys/kern/kern_fork.c | 4 +-- sys/kern/kern_lock.c | 70 +---------------------------------------- sys/kern/kern_resource.c | 4 +-- sys/kern/kern_sig.c | 63 +++++++++++++------------------------ sys/kern/kern_synch.c | 75 ++++++++++++++++++++++---------------------- sys/kern/kern_time.c | 6 +++- sys/kern/sched_bsd.c | 81 +++++++++++++++++++++++------------------------- sys/kern/vfs_sync.c | 7 +++-- 8 files changed, 111 insertions(+), 199 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index ed404db4a0c..05ba79f3399 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_fork.c,v 1.74 2005/02/24 04:44:25 tedu Exp $ */ +/* $OpenBSD: kern_fork.c,v 1.75 2005/05/25 23:17:47 niklas Exp $ */ /* $NetBSD: kern_fork.c,v 1.29 1996/02/09 18:59:34 christos Exp $ */ /* @@ -341,9 +341,9 @@ fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize, /* * Make child runnable, set start time, and add to run queue. */ - SCHED_LOCK(s); getmicrotime(&p2->p_stats->p_start); p2->p_acflag = AFORK; + SCHED_LOCK(s); p2->p_stat = SRUN; setrunqueue(p2); SCHED_UNLOCK(s); diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index 4a56f9e6388..d36c6d14ec6 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_lock.c,v 1.17 2004/11/01 06:39:39 marius Exp $ */ +/* $OpenBSD: kern_lock.c,v 1.18 2005/05/25 23:17:47 niklas Exp $ */ /* * Copyright (c) 1995 @@ -1121,18 +1121,6 @@ simple_lock_freecheck(void *start, void *end) splx(s); } -/* - * We must be holding exactly one lock: the sched_lock. - */ - -#ifdef notyet -void -simple_lock_switchcheck(void) -{ - - simple_lock_only_held(&sched_lock, "switching"); -} -#endif void simple_lock_only_held(volatile struct simplelock *lp, const char *where) @@ -1172,60 +1160,6 @@ simple_lock_only_held(volatile struct simplelock *lp, const char *where) * so that they show up in profiles. */ -/* - * XXX Instead of using struct lock for the kernel lock and thus requiring us - * XXX to implement simplelocks, causing all sorts of fine-grained locks all - * XXX over our tree getting activated consuming both time and potentially - * XXX introducing locking protocol bugs. - */ -#ifdef notyet - -struct lock kernel_lock; - -void -_kernel_lock_init(void) -{ - spinlockinit(&kernel_lock, "klock", 0); -} - -/* - * Acquire/release the kernel lock. Intended for use in the scheduler - * and the lower half of the kernel. - */ -void -_kernel_lock(int flag) -{ - SCHED_ASSERT_UNLOCKED(); - spinlockmgr(&kernel_lock, flag, 0); -} - -void -_kernel_unlock(void) -{ - spinlockmgr(&kernel_lock, LK_RELEASE, 0); -} - -/* - * Acquire/release the kernel_lock on behalf of a process. Intended for - * use in the top half of the kernel. - */ -void -_kernel_proc_lock(struct proc *p) -{ - SCHED_ASSERT_UNLOCKED(); - spinlockmgr(&kernel_lock, LK_EXCLUSIVE, 0); - p->p_flag |= P_BIGLOCK; -} - -void -_kernel_proc_unlock(struct proc *p) -{ - p->p_flag &= ~P_BIGLOCK; - spinlockmgr(&kernel_lock, LK_RELEASE, 0); -} - -#else - struct __mp_lock kernel_lock; void @@ -1272,8 +1206,6 @@ _kernel_proc_unlock(struct proc *p) __mp_unlock(&kernel_lock); } -#endif - #ifdef MP_LOCKDEBUG /* CPU-dependent timing, needs this to be settable from ddb. */ int __mp_lock_spinout = 200000000; diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 3d5759bad59..d770a3858af 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_resource.c,v 1.28 2004/12/26 21:22:13 miod Exp $ */ +/* $OpenBSD: kern_resource.c,v 1.29 2005/05/25 23:17:47 niklas Exp $ */ /* $NetBSD: kern_resource.c,v 1.38 1996/10/23 07:19:38 matthias Exp $ */ /*- @@ -199,7 +199,7 @@ donice(curp, chgp, n) return (EACCES); chgp->p_nice = n; SCHED_LOCK(s); - (void)resetpriority(chgp); + resetpriority(chgp); SCHED_UNLOCK(s); return (0); } diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 7a0142d92dc..d228b218db6 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sig.c,v 1.73 2004/12/26 21:22:13 miod Exp $ */ +/* $OpenBSD: kern_sig.c,v 1.74 2005/05/25 23:17:47 niklas Exp $ */ /* $NetBSD: kern_sig.c,v 1.54 1996/04/22 01:38:32 christos Exp $ */ /* @@ -805,29 +805,19 @@ trapsignal(p, signum, code, type, sigval) * regardless of the signal action (eg, blocked or ignored). * * Other ignored signals are discarded immediately. - * - * XXXSMP: Invoked as psignal() or sched_psignal(). */ void -psignal1(p, signum, dolock) - register struct proc *p; - register int signum; - int dolock; /* XXXSMP: works, but icky */ +psignal(struct proc *p, int signum) { - register int s, prop; - register sig_t action; + int s, prop; + sig_t action; int mask; #ifdef DIAGNOSTIC if ((u_int)signum >= NSIG || signum == 0) panic("psignal signal number"); - - /* XXXSMP: works, but icky */ - if (dolock) - SCHED_ASSERT_UNLOCKED(); - else - SCHED_ASSERT_LOCKED(); #endif + SCHED_ASSERT_UNLOCKED(); /* Ignore signal if we are exiting */ if (p->p_flag & P_WEXIT) @@ -890,10 +880,8 @@ psignal1(p, signum, dolock) */ if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP)) return; - /* XXXSMP: works, but icky */ - if (dolock) - SCHED_LOCK(s); + SCHED_LOCK(s); switch (p->p_stat) { case SSLEEP: @@ -934,12 +922,11 @@ psignal1(p, signum, dolock) goto out; p->p_siglist &= ~mask; p->p_xstat = signum; - if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0) - /* - * XXXSMP: recursive call; don't lock - * the second time around. - */ - sched_psignal(p->p_pptr, SIGCHLD); + if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0) { + SCHED_UNLOCK(s); + psignal(p->p_pptr, SIGCHLD); + SCHED_LOCK(s); + } proc_stop(p); goto out; } @@ -976,7 +963,7 @@ psignal1(p, signum, dolock) * Otherwise, process goes back to sleep state. */ p->p_flag |= P_CONTINUED; - wakeup(p->p_pptr); + sched_wakeup(p->p_pptr); if (action == SIG_DFL) p->p_siglist &= ~mask; if (action == SIG_CATCH) @@ -1027,9 +1014,7 @@ runfast: run: setrunnable(p); out: - /* XXXSMP: works, but icky */ - if (dolock) - SCHED_UNLOCK(s); + SCHED_UNLOCK(s); } /* @@ -1074,24 +1059,23 @@ issignal(struct proc *p) */ p->p_xstat = signum; - SCHED_LOCK(s); /* protect mi_switch */ if (p->p_flag & P_FSTRACE) { #ifdef PROCFS + SCHED_LOCK(s); /* procfs debugging */ p->p_stat = SSTOP; - wakeup(p); - mi_switch(); + sched_wakeup(p); + mi_switch(s); #else panic("procfs debugging"); #endif } else { /* ptrace debugging */ psignal(p->p_pptr, SIGCHLD); + SCHED_LOCK(s); proc_stop(p); - mi_switch(); + mi_switch(s); } - SCHED_ASSERT_UNLOCKED(); - splx(s); /* * If we are no longer being traced, or the parent @@ -1152,9 +1136,7 @@ issignal(struct proc *p) psignal(p->p_pptr, SIGCHLD); SCHED_LOCK(s); proc_stop(p); - mi_switch(); - SCHED_ASSERT_UNLOCKED(); - splx(s); + mi_switch(s); break; } else if (prop & SA_IGNORE) { /* @@ -1198,16 +1180,13 @@ keep: * on the run queue. */ void -proc_stop(p) - struct proc *p; +proc_stop(struct proc *p) { -#ifdef MULTIPROCESSOR SCHED_ASSERT_LOCKED(); -#endif p->p_stat = SSTOP; p->p_flag &= ~P_WAITED; - wakeup(p->p_pptr); + sched_wakeup(p->p_pptr); } /* diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index fa0ff867b3c..9d887083455 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_synch.c,v 1.61 2004/07/29 06:25:45 tedu Exp $ */ +/* $OpenBSD: kern_synch.c,v 1.62 2005/05/25 23:17:47 niklas Exp $ */ /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ /*- @@ -145,8 +145,12 @@ ltsleep(ident, priority, wmesg, timo, interlock) else *qp->sq_tailp = p; *(qp->sq_tailp = &p->p_forw) = 0; + + p->p_stat = SSLEEP; + if (timo) timeout_add(&p->p_sleep_to, timo); + /* * We can now release the interlock; the scheduler_slock * is held, so a thread can't get in to do wakeup() before @@ -170,13 +174,16 @@ ltsleep(ident, priority, wmesg, timo, interlock) */ if (catch) { p->p_flag |= P_SINTR; + SCHED_UNLOCK(s); /* XXX - must unlock for CURSIG */ if ((sig = CURSIG(p)) != 0) { + SCHED_LOCK(s); if (p->p_wchan) unsleep(p); p->p_stat = SONPROC; SCHED_UNLOCK(s); goto resume; } + SCHED_LOCK(s); if (p->p_wchan == 0) { catch = 0; SCHED_UNLOCK(s); @@ -184,22 +191,14 @@ ltsleep(ident, priority, wmesg, timo, interlock) } } else sig = 0; - p->p_stat = SSLEEP; p->p_stats->p_ru.ru_nvcsw++; SCHED_ASSERT_LOCKED(); - mi_switch(); + mi_switch(s); #ifdef DDB /* handy breakpoint location after process "wakes" */ __asm(".globl bpendtsleep\nbpendtsleep:"); #endif - SCHED_ASSERT_UNLOCKED(); - /* - * Note! this splx belongs to the SCHED_LOCK(s) above, mi_switch - * releases the scheduler lock, but does not lower the spl. - */ - splx(s); - resume: #ifdef __HAVE_CPUINFO p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; @@ -270,20 +269,13 @@ endtsleep(arg) * Remove a process from its wait queue */ void -unsleep(p) - register struct proc *p; +unsleep(struct proc *p) { - register struct slpque *qp; - register struct proc **hp; -#if 0 - int s; + struct slpque *qp; + struct proc **hp; + + SCHED_ASSERT_LOCKED(); - /* - * XXX we cannot do recursive SCHED_LOCKing yet. All callers lock - * anyhow. - */ - SCHED_LOCK(s); -#endif if (p->p_wchan) { hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head; while (*hp != p) @@ -293,24 +285,39 @@ unsleep(p) qp->sq_tailp = hp; p->p_wchan = 0; } -#if 0 +} + +void +wakeup(void *ident) +{ + int s; + + SCHED_LOCK(s); + sched_wakeup(ident); + SCHED_UNLOCK(s); +} + +void +wakeup_n(void *ident, int n) +{ + int s; + + SCHED_LOCK(s); + sched_wakeup_n(ident, n); SCHED_UNLOCK(s); -#endif } /* * Make all processes sleeping on the specified identifier runnable. */ void -wakeup_n(ident, n) - void *ident; - int n; +sched_wakeup_n(void *ident, int n) { struct slpque *qp; struct proc *p, **q; - int s; - SCHED_LOCK(s); + SCHED_ASSERT_LOCKED(); + qp = &slpque[LOOKUP(ident)]; restart: for (q = &qp->sq_head; (p = *q) != NULL; ) { @@ -349,7 +356,7 @@ restart: need_resched(0); #endif } else { - wakeup((caddr_t)&proc0); + sched_wakeup((caddr_t)&proc0); } /* END INLINE EXPANSION */ @@ -361,12 +368,4 @@ restart: } else q = &p->p_forw; } - SCHED_UNLOCK(s); -} - -void -wakeup(chan) - void *chan; -{ - wakeup_n(chan, -1); } diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index 567341a2f85..d5e29b55362 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_time.c,v 1.45 2004/07/28 17:15:12 tholo Exp $ */ +/* $OpenBSD: kern_time.c,v 1.46 2005/05/25 23:17:47 niklas Exp $ */ /* $NetBSD: kern_time.c,v 1.20 1996/02/18 11:57:06 fvdl Exp $ */ /* @@ -560,8 +560,12 @@ sys_setitimer(p, v, retval) } p->p_realtimer = aitv; } else { + int s; + itimerround(&aitv.it_interval); + s = splclock(); p->p_stats->p_timer[SCARG(uap, which)] = aitv; + splx(s); } return (0); diff --git a/sys/kern/sched_bsd.c b/sys/kern/sched_bsd.c index 523e240a4b1..ff39ea3d1ad 100644 --- a/sys/kern/sched_bsd.c +++ b/sys/kern/sched_bsd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sched_bsd.c,v 1.1 2004/07/29 06:25:45 tedu Exp $ */ +/* $OpenBSD: sched_bsd.c,v 1.2 2005/05/25 23:17:47 niklas Exp $ */ /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ /*- @@ -65,7 +65,9 @@ int rrticks_init; /* # of hardclock ticks per roundrobin() */ int whichqs; /* Bit mask summary of non-empty Q's. */ struct prochd qs[NQS]; -struct SIMPLELOCK sched_lock; +#ifdef MULTIPROCESSOR +struct mutex sched_mutex = MUTEX_INITIALIZER(IPL_SCHED); +#endif void scheduler_start(void); @@ -260,7 +262,7 @@ schedcpu(arg) struct timeout *to = (struct timeout *)arg; fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); struct proc *p; - int s; + int s, t; unsigned int newcpu; int phz; @@ -274,6 +276,7 @@ schedcpu(arg) KASSERT(phz); for (p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) { + SCHED_LOCK(s); /* * Increment time in/out of memory and sleep time * (if sleeping). We ignore overflow; with 16-bit int's @@ -287,9 +290,11 @@ schedcpu(arg) * If the process has slept the entire second, * stop recalculating its priority until it wakes up. */ - if (p->p_slptime > 1) + if (p->p_slptime > 1) { + SCHED_UNLOCK(s); continue; - s = splstatclock(); /* prevent state changes */ + } + t = splstatclock(); /* * p_pctcpu is only for ps. */ @@ -305,8 +310,7 @@ schedcpu(arg) p->p_cpticks = 0; newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu); p->p_estcpu = newcpu; - splx(s); - SCHED_LOCK(s); + splx(t); resetpriority(p); if (p->p_priority >= PUSER) { if ((p != curproc) && @@ -355,13 +359,13 @@ updatepri(p) void sched_unlock_idle(void) { - SIMPLE_UNLOCK(&sched_lock); + mtx_leave(&sched_mutex); } void sched_lock_idle(void) { - SIMPLE_LOCK(&sched_lock); + mtx_enter(&sched_mutex); } #endif /* MULTIPROCESSOR || LOCKDEBUG */ @@ -379,9 +383,7 @@ yield() p->p_priority = p->p_usrpri; setrunqueue(p); p->p_stats->p_ru.ru_nvcsw++; - mi_switch(); - SCHED_ASSERT_UNLOCKED(); - splx(s); + mi_switch(s); } /* @@ -408,19 +410,17 @@ preempt(newp) p->p_stat = SRUN; setrunqueue(p); p->p_stats->p_ru.ru_nivcsw++; - mi_switch(); - SCHED_ASSERT_UNLOCKED(); - splx(s); + mi_switch(s); } /* - * Must be called at splstatclock() or higher. + * Must be called at splsched() or higher. */ void -mi_switch() +mi_switch(int s) { - struct proc *p = curproc; /* XXX */ + struct proc *p = curproc; struct rlimit *rlim; struct timeval tv; #if defined(MULTIPROCESSOR) @@ -432,20 +432,6 @@ mi_switch() SCHED_ASSERT_LOCKED(); -#if defined(MULTIPROCESSOR) - /* - * Release the kernel_lock, as we are about to yield the CPU. - * The scheduler lock is still held until cpu_switch() - * selects a new process and removes it from the run queue. - */ - if (p->p_flag & P_BIGLOCK) -#ifdef notyet - hold_count = spinlock_release_all(&kernel_lock); -#else - hold_count = __mp_release_all(&kernel_lock); -#endif -#endif - /* * Compute the amount of time during which the current * process was running, and add that to its total so far. @@ -483,6 +469,7 @@ mi_switch() */ rlim = &p->p_rlimit[RLIMIT_CPU]; if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_cur) { + SCHED_UNLOCK(s); if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_max) { psignal(p, SIGKILL); } else { @@ -490,8 +477,23 @@ mi_switch() if (rlim->rlim_cur < rlim->rlim_max) rlim->rlim_cur += 5; } + SCHED_LOCK(s); } +#if defined(MULTIPROCESSOR) + /* + * Release the kernel_lock, as we are about to yield the CPU. + * The scheduler lock is still held until cpu_switch() + * selects a new process and removes it from the run queue. + */ + if (p->p_flag & P_BIGLOCK) +#ifdef notyet + hold_count = spinlock_release_all(&kernel_lock); +#else + hold_count = __mp_release_all(&kernel_lock); +#endif +#endif + /* * Process is about to yield the CPU; clear the appropriate * scheduling flags. @@ -534,12 +536,9 @@ mi_switch() * we reacquire the interlock. */ if (p->p_flag & P_BIGLOCK) -#ifdef notyet - spinlock_acquire_count(&kernel_lock, hold_count); -#else __mp_acquire_count(&kernel_lock, hold_count); #endif -#endif + splx(s); } /* @@ -553,7 +552,6 @@ rqinit() for (i = 0; i < NQS; i++) qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; - SIMPLE_LOCK_INIT(&sched_lock); } static __inline void @@ -601,8 +599,7 @@ resched_proc(struct proc *p, u_char pri) * and awakening the swapper if it isn't in memory. */ void -setrunnable(p) - register struct proc *p; +setrunnable(struct proc *p) { SCHED_ASSERT_LOCKED(); @@ -634,7 +631,7 @@ setrunnable(p) updatepri(p); p->p_slptime = 0; if ((p->p_flag & P_INMEM) == 0) - wakeup((caddr_t)&proc0); + sched_wakeup((caddr_t)&proc0); else resched_proc(p, p->p_priority); } @@ -679,10 +676,10 @@ schedclock(p) { int s; - p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); SCHED_LOCK(s); + p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); resetpriority(p); - SCHED_UNLOCK(s); if (p->p_priority >= PUSER) p->p_priority = p->p_usrpri; + SCHED_UNLOCK(s); } diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c index e9113ca4ea1..549962182f6 100644 --- a/sys/kern/vfs_sync.c +++ b/sys/kern/vfs_sync.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_sync.c,v 1.29 2004/10/29 11:51:49 pedro Exp $ */ +/* $OpenBSD: vfs_sync.c,v 1.30 2005/05/25 23:17:47 niklas Exp $ */ /* * Portions of this code are: @@ -50,6 +50,7 @@ #include #include +#include #ifdef FFS_SOFTUPDATES int softdep_process_worklist(struct mount *); @@ -244,10 +245,10 @@ speedup_syncer() { int s; - s = splhigh(); + SCHED_LOCK(s); if (syncerproc && syncerproc->p_wchan == &lbolt) setrunnable(syncerproc); - splx(s); + SCHED_UNLOCK(s); if (rushjob < syncdelay / 2) { rushjob += 1; stat_rush_requests += 1; -- cgit v1.2.3