src - OpenBSD base system

diff options


context:
space:
mode:

author	Ted Unangst <tedu@cvs.openbsd.org>	2004-07-29 06:25:46 +0000
committer	Ted Unangst <tedu@cvs.openbsd.org>	2004-07-29 06:25:46 +0000
commit	3d5db7fb8218bfddd4dba9c31df44108c1e76bd1 (patch)
tree	a0b6af39d0c7c7c7acbb19afc2a9d231190bad8a /sys/kern/kern_synch.c
parent	411e952b31a297e8e1b889cc358521c17d628c7b (diff)

put the scheduler in its own file. reduces clutter, and logically separates

"put this process to sleep" and "find a process to run" operations. no functional change. ok art@

Diffstat (limited to 'sys/kern/kern_synch.c')

-rw-r--r--

sys/kern/kern_synch.c

634

1 files changed, 1 insertions, 633 deletions

diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 785136cd27b..fa0ff867b3c 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: kern_synch.c,v 1.60 2004/07/25 20:50:51 tedu Exp $ */

+/* $OpenBSD: kern_synch.c,v 1.61 2004/07/29 06:25:45 tedu Exp $ */

/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */

/*-

@@ -52,641 +52,9 @@

#include <sys/ktrace.h>

#endif

-#include <machine/cpu.h>

-#ifndef __HAVE_CPUINFO

-u_char curpriority; /* usrpri of curproc */

-#endif

-int lbolt; /* once a second sleep address */

-#ifdef __HAVE_CPUINFO

-int rrticks_init; /* # of hardclock ticks per roundrobin() */

-#endif

-int whichqs; /* Bit mask summary of non-empty Q's. */

-struct prochd qs[NQS];

-struct SIMPLELOCK sched_lock;

-void scheduler_start(void);

-#ifdef __HAVE_CPUINFO

-void roundrobin(struct cpu_info *);

-#else

-void roundrobin(void *);

-#endif

-void schedcpu(void *);

void updatepri(struct proc *);

void endtsleep(void *);

-void

-scheduler_start()

-#ifndef __HAVE_CPUINFO

- static struct timeout roundrobin_to;

-#endif

- static struct timeout schedcpu_to;

- /*

- * We avoid polluting the global namespace by keeping the scheduler

- * timeouts static in this function.

- * We setup the timeouts here and kick schedcpu and roundrobin once to

- * make them do their job.

- */

-#ifndef __HAVE_CPUINFO

- timeout_set(&roundrobin_to, roundrobin, &roundrobin_to);

-#endif

- timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);

-#ifdef __HAVE_CPUINFO

- rrticks_init = hz / 10;

-#else

- roundrobin(&roundrobin_to);

-#endif

- schedcpu(&schedcpu_to);

-/*

- * Force switch among equal priority processes every 100ms.

- */

-/* ARGSUSED */

-#ifdef __HAVE_CPUINFO

-void

-roundrobin(struct cpu_info *ci)

- struct schedstate_percpu *spc = &ci->ci_schedstate;

- int s;

- spc->spc_rrticks = rrticks_init;

- if (curproc != NULL) {

- s = splstatclock();

- if (spc->spc_schedflags & SPCF_SEENRR) {

- /*

- * The process has already been through a roundrobin

- * without switching and may be hogging the CPU.

- * Indicate that the process should yield.

- */

- spc->spc_schedflags |= SPCF_SHOULDYIELD;

- } else {

- spc->spc_schedflags |= SPCF_SEENRR;

- }

- splx(s);

- }

- need_resched(curcpu());

-#else

-void

-roundrobin(void *arg)

- struct timeout *to = (struct timeout *)arg;

- struct proc *p = curproc;

- int s;

- if (p != NULL) {

- s = splstatclock();

- if (p->p_schedflags & PSCHED_SEENRR) {

- /*

- * The process has already been through a roundrobin

- * without switching and may be hogging the CPU.

- * Indicate that the process should yield.

- */

- p->p_schedflags |= PSCHED_SHOULDYIELD;

- } else {

- p->p_schedflags |= PSCHED_SEENRR;

- }

- splx(s);

- }

- need_resched(0);

- timeout_add(to, hz / 10);

-#endif

-/*

- * Constants for digital decay and forget:

- * 90% of (p_estcpu) usage in 5 * loadav time

- * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)

- * Note that, as ps(1) mentions, this can let percentages

- * total over 100% (I've seen 137.9% for 3 processes).

- *

- * Note that hardclock updates p_estcpu and p_cpticks independently.

- *

- * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.

- * That is, the system wants to compute a value of decay such

- * that the following for loop:

- * for (i = 0; i < (5 * loadavg); i++)

- * p_estcpu *= decay;

- * will compute

- * p_estcpu *= 0.1;

- * for all values of loadavg:

- *

- * Mathematically this loop can be expressed by saying:

- * decay ** (5 * loadavg) ~= .1

- *

- * The system computes decay as:

- * decay = (2 * loadavg) / (2 * loadavg + 1)

- *

- * We wish to prove that the system's computation of decay

- * will always fulfill the equation:

- * decay ** (5 * loadavg) ~= .1

- *

- * If we compute b as:

- * b = 2 * loadavg

- * then

- * decay = b / (b + 1)

- *

- * We now need to prove two things:

- * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)

- * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)

- *

- * Facts:

- * For x close to zero, exp(x) =~ 1 + x, since

- * exp(x) = 0! + x**1/1! + x**2/2! + ... .

- * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.

- * For x close to zero, ln(1+x) =~ x, since

- * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1

- * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).

- * ln(.1) =~ -2.30

- *

- * Proof of (1):

- * Solve (factor)**(power) =~ .1 given power (5*loadav):

- * solving for factor,

- * ln(factor) =~ (-2.30/5*loadav), or

- * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =

- * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED

- *

- * Proof of (2):

- * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):

- * solving for power,

- * power*ln(b/(b+1)) =~ -2.30, or

- * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED

- *

- * Actual power values for the implemented algorithm are as follows:

- * loadav: 1 2 3 4

- * power: 5.68 10.32 14.94 19.55

- */

-/* calculations for digital decay to forget 90% of usage in 5*loadav sec */

-#define loadfactor(loadav) (2 * (loadav))

-#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))

-/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */

-fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */

-/*

- * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the

- * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below

- * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).

- *

- * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:

- * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).

- *

- * If you dont want to bother with the faster/more-accurate formula, you

- * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate

- * (more general) method of calculating the %age of CPU used by a process.

- */

-#define CCPU_SHIFT 11

-/*

- * Recompute process priorities, every hz ticks.

- */

-/* ARGSUSED */

-void

-schedcpu(arg)

- void *arg;

- struct timeout *to = (struct timeout *)arg;

- fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);

- struct proc *p;

- int s;

- unsigned int newcpu;

- int phz;

- /*

- * If we have a statistics clock, use that to calculate CPU

- * time, otherwise revert to using the profiling clock (which,

- * in turn, defaults to hz if there is no separate profiling

- * clock available)

- */

- phz = stathz ? stathz : profhz;

- KASSERT(phz);

- for (p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) {

- /*

- * Increment time in/out of memory and sleep time

- * (if sleeping). We ignore overflow; with 16-bit int's

- * (remember them?) overflow takes 45 days.

- */

- p->p_swtime++;

- if (p->p_stat == SSLEEP || p->p_stat == SSTOP)

- p->p_slptime++;

- p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;

- /*

- * If the process has slept the entire second,

- * stop recalculating its priority until it wakes up.

- */

- if (p->p_slptime > 1)

- continue;

- s = splstatclock(); /* prevent state changes */

- /*

- * p_pctcpu is only for ps.

- */

-#if (FSHIFT >= CCPU_SHIFT)

- p->p_pctcpu += (phz == 100)?

- ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):

- 100 * (((fixpt_t) p->p_cpticks)

- << (FSHIFT - CCPU_SHIFT)) / phz;

-#else

- p->p_pctcpu += ((FSCALE - ccpu) *

- (p->p_cpticks * FSCALE / phz)) >> FSHIFT;

-#endif

- p->p_cpticks = 0;

- newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);

- p->p_estcpu = newcpu;

- splx(s);

- SCHED_LOCK(s);

- resetpriority(p);

- if (p->p_priority >= PUSER) {

- if ((p != curproc) &&

- p->p_stat == SRUN &&

- (p->p_flag & P_INMEM) &&

- (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {

- remrunqueue(p);

- p->p_priority = p->p_usrpri;

- setrunqueue(p);

- } else

- p->p_priority = p->p_usrpri;

- }

- SCHED_UNLOCK(s);

- }

- uvm_meter();

- wakeup((caddr_t)&lbolt);

- timeout_add(to, hz);

-/*

- * Recalculate the priority of a process after it has slept for a while.

- * For all load averages >= 1 and max p_estcpu of 255, sleeping for at

- * least six times the loadfactor will decay p_estcpu to zero.

- */

-void

-updatepri(p)

- register struct proc *p;

- register unsigned int newcpu = p->p_estcpu;

- register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);

- SCHED_ASSERT_LOCKED();

- if (p->p_slptime > 5 * loadfac)

- p->p_estcpu = 0;

- else {

- p->p_slptime--; /* the first time was done in schedcpu */

- while (newcpu && --p->p_slptime)

- newcpu = (int) decay_cpu(loadfac, newcpu);

- p->p_estcpu = newcpu;

- }

- resetpriority(p);

-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)

-void

-sched_unlock_idle(void)

- SIMPLE_UNLOCK(&sched_lock);

-void

-sched_lock_idle(void)

- SIMPLE_LOCK(&sched_lock);

-#endif /* MULTIPROCESSOR || LOCKDEBUG */

-/*

- * General yield call. Puts the current process back on its run queue and

- * performs a voluntary context switch.

- */

-void

-yield()

- struct proc *p = curproc;

- int s;

- SCHED_LOCK(s);

- p->p_priority = p->p_usrpri;

- setrunqueue(p);

- p->p_stats->p_ru.ru_nvcsw++;

- mi_switch();

- SCHED_ASSERT_UNLOCKED();

- splx(s);

-/*

- * General preemption call. Puts the current process back on its run queue

- * and performs an involuntary context switch. If a process is supplied,

- * we switch to that process. Otherwise, we use the normal process selection

- * criteria.

- */

-void

-preempt(newp)

- struct proc *newp;

- struct proc *p = curproc;

- int s;

- /*

- * XXX Switching to a specific process is not supported yet.

- */

- if (newp != NULL)

- panic("preempt: cpu_preempt not yet implemented");

- SCHED_LOCK(s);

- p->p_priority = p->p_usrpri;

- p->p_stat = SRUN;

- setrunqueue(p);

- p->p_stats->p_ru.ru_nivcsw++;

- mi_switch();

- SCHED_ASSERT_UNLOCKED();

- splx(s);

-/*

- * Must be called at splstatclock() or higher.

- */

-void

-mi_switch()

- struct proc *p = curproc; /* XXX */

- struct rlimit *rlim;

- struct timeval tv;

-#if defined(MULTIPROCESSOR)

- int hold_count;

-#endif

-#ifdef __HAVE_CPUINFO

- struct schedstate_percpu *spc = &p->p_cpu->ci_schedstate;

-#endif

- SCHED_ASSERT_LOCKED();

-#if defined(MULTIPROCESSOR)

- /*

- * Release the kernel_lock, as we are about to yield the CPU.

- * The scheduler lock is still held until cpu_switch()

- * selects a new process and removes it from the run queue.

- */

- if (p->p_flag & P_BIGLOCK)

-#ifdef notyet

- hold_count = spinlock_release_all(&kernel_lock);

-#else

- hold_count = __mp_release_all(&kernel_lock);

-#endif

- /*

- * Compute the amount of time during which the current

- * process was running, and add that to its total so far.

- * XXX - use microuptime here to avoid strangeness.

- */

- microuptime(&tv);

-#ifdef __HAVE_CPUINFO

- if (timercmp(&tv, &spc->spc_runtime, <)) {

-#if 0

- printf("uptime is not monotonic! "

- "tv=%lu.%06lu, runtime=%lu.%06lu\n",

- tv.tv_sec, tv.tv_usec, spc->spc_runtime.tv_sec,

- spc->spc_runtime.tv_usec);

-#endif

- } else {

- timersub(&tv, &spc->spc_runtime, &tv);

- timeradd(&p->p_rtime, &tv, &p->p_rtime);

- }

-#else

- if (timercmp(&tv, &runtime, <)) {

-#if 0

- printf("uptime is not monotonic! "

- "tv=%lu.%06lu, runtime=%lu.%06lu\n",

- tv.tv_sec, tv.tv_usec, runtime.tv_sec, runtime.tv_usec);

-#endif

- } else {

- timersub(&tv, &runtime, &tv);

- timeradd(&p->p_rtime, &tv, &p->p_rtime);

- }

-#endif

- /*

- * Check if the process exceeds its cpu resource allocation.

- * If over max, kill it.

- */

- rlim = &p->p_rlimit[RLIMIT_CPU];

- if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_cur) {

- if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_max) {

- psignal(p, SIGKILL);

- } else {

- psignal(p, SIGXCPU);

- if (rlim->rlim_cur < rlim->rlim_max)

- rlim->rlim_cur += 5;

- }

- /*

- * Process is about to yield the CPU; clear the appropriate

- * scheduling flags.

- */

-#ifdef __HAVE_CPUINFO

- spc->spc_schedflags &= ~SPCF_SWITCHCLEAR;

-#else

- p->p_schedflags &= ~PSCHED_SWITCHCLEAR;

-#endif

- /*

- * Pick a new current process and record its start time.

- */

- uvmexp.swtch++;

- cpu_switch(p);

- /*

- * Make sure that MD code released the scheduler lock before

- * resuming us.

- */

- SCHED_ASSERT_UNLOCKED();

- /*

- * We're running again; record our new start time. We might

- * be running on a new CPU now, so don't use the cache'd

- * schedstate_percpu pointer.

- */

-#ifdef __HAVE_CPUINFO

- KDASSERT(p->p_cpu != NULL);

- KDASSERT(p->p_cpu == curcpu());

- microuptime(&p->p_cpu->ci_schedstate.spc_runtime);

-#else

- microuptime(&runtime);

-#endif

-#if defined(MULTIPROCESSOR)

- /*

- * Reacquire the kernel_lock now. We do this after we've

- * released the scheduler lock to avoid deadlock, and before

- * we reacquire the interlock.

- */

- if (p->p_flag & P_BIGLOCK)

-#ifdef notyet

- spinlock_acquire_count(&kernel_lock, hold_count);

-#else

- __mp_acquire_count(&kernel_lock, hold_count);

-#endif

-/*

- * Initialize the (doubly-linked) run queues

- * to be empty.

- */

-void

-rqinit()

- register int i;

- for (i = 0; i < NQS; i++)

- qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];

- SIMPLE_LOCK_INIT(&sched_lock);

-static __inline void

-resched_proc(struct proc *p, u_char pri)

-#ifdef __HAVE_CPUINFO

- struct cpu_info *ci;

-#endif

- /*

- * XXXSMP

- * Since p->p_cpu persists across a context switch,

- * this gives us *very weak* processor affinity, in

- * that we notify the CPU on which the process last

- * ran that it should try to switch.

- *

- * This does not guarantee that the process will run on

- * that processor next, because another processor might

- * grab it the next time it performs a context switch.

- *

- * This also does not handle the case where its last

- * CPU is running a higher-priority process, but every

- * other CPU is running a lower-priority process. There

- * are ways to handle this situation, but they're not

- * currently very pretty, and we also need to weigh the

- * cost of moving a process from one CPU to another.

- *

- * XXXSMP

- * There is also the issue of locking the other CPU's

- * sched state, which we currently do not do.

- */

-#ifdef __HAVE_CPUINFO

- ci = (p->p_cpu != NULL) ? p->p_cpu : curcpu();

- if (pri < ci->ci_schedstate.spc_curpriority)

- need_resched(ci);

-#else

- if (pri < curpriority)

- need_resched(0);

-#endif

-/*

- * Change process state to be runnable,

- * placing it on the run queue if it is in memory,

- * and awakening the swapper if it isn't in memory.

- */

-void

-setrunnable(p)

- register struct proc *p;

- SCHED_ASSERT_LOCKED();

- switch (p->p_stat) {

- case 0:

- case SRUN:

- case SONPROC:

- case SZOMB:

- case SDEAD:

- default:

- panic("setrunnable");

- case SSTOP:

- /*

- * If we're being traced (possibly because someone attached us

- * while we were stopped), check for a signal from the debugger.

- */

- if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0)

- p->p_siglist |= sigmask(p->p_xstat);

- case SSLEEP:

- unsleep(p); /* e.g. when sending signals */

- break;

- case SIDL:

- break;

- }

- p->p_stat = SRUN;

- if (p->p_flag & P_INMEM)

- setrunqueue(p);

- if (p->p_slptime > 1)

- updatepri(p);

- p->p_slptime = 0;

- if ((p->p_flag & P_INMEM) == 0)

- wakeup((caddr_t)&proc0);

- else

- resched_proc(p, p->p_priority);

-/*

- * Compute the priority of a process when running in user mode.

- * Arrange to reschedule if the resulting priority is better

- * than that of the current process.

- */

-void

-resetpriority(p)

- register struct proc *p;

- register unsigned int newpriority;

- SCHED_ASSERT_LOCKED();

- newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO);

- newpriority = min(newpriority, MAXPRI);

- p->p_usrpri = newpriority;

- resched_proc(p, p->p_usrpri);

-/*

- * We adjust the priority of the current process. The priority of a process

- * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu)

- * is increased here. The formula for computing priorities (in kern_synch.c)

- * will compute a different value each time p_estcpu increases. This can

- * cause a switch, but unless the priority crosses a PPQ boundary the actual

- * queue will not change. The cpu usage estimator ramps up quite quickly

- * when the process is running (linearly), and decays away exponentially, at

- * a rate which is proportionally slower when the system is busy. The basic

- * principle is that the system will 90% forget that the process used a lot

- * of CPU time in 5 * loadav seconds. This causes the system to favor

- * processes which haven't run much recently, and to round-robin among other

- * processes.

- */

-void

-schedclock(p)

- struct proc *p;

- int s;

- p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);

- SCHED_LOCK(s);

- resetpriority(p);

- SCHED_UNLOCK(s);

- if (p->p_priority >= PUSER)

- p->p_priority = p->p_usrpri;

* We're only looking at 7 bits of the address; everything is