src - OpenBSD base system

diff options


context:
space:
mode:

author	Artur Grabowski <art@cvs.openbsd.org>	2009-03-23 13:25:12 +0000
committer	Artur Grabowski <art@cvs.openbsd.org>	2009-03-23 13:25:12 +0000
commit	9fa08e1449bad2c67ac7a2930a541f74c9d50f4c (patch)
tree	cf76ab3f3c7cdc2f6ee262705e3466854af7d064
parent	599d360b82cb69d59934037fc27b8a60f70f152a (diff)

Processor affinity for processes.

- Split up run queues so that every cpu has one. - Make setrunqueue choose the cpu where we want to make this process runnable (this should be refined and less brutal in the future). - When choosing the cpu where we want to run, make some kind of educated guess where it will be best to run (very naive right now). Other: - Set operations for sets of cpus. - load average calculations per cpu. - sched_is_idle() -> curcpu_is_idle() tested, debugged and prodded by many@

Diffstat

-rw-r--r--

sys/arch/amd64/amd64/pmap.c

-rw-r--r--

sys/arch/arm/arm/pmap.c

-rw-r--r--

sys/kern/kern_fork.c

-rw-r--r--

sys/kern/kern_sched.c

379

-rw-r--r--

sys/kern/kern_synch.c

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

10 files changed, 416 insertions, 60 deletions

diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c
index 6acd8a1fc3c..6d92e73acb0 100644
--- a/sys/arch/amd64/amd64/pmap.c
+++ b/sys/arch/amd64/amd64/pmap.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: pmap.c,v 1.37 2009/02/16 20:26:58 kurt Exp $ */

+/* $OpenBSD: pmap.c,v 1.38 2009/03/23 13:25:11 art Exp $ */

/* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */

@@ -1322,7 +1322,7 @@ pmap_pageidlezero(struct vm_page *pg)

* with uncached mappings.

for (i = 0, ptr = (long *) va; i < PAGE_SIZE / sizeof(long); i++) {

- if (!sched_is_idle()) {

+ if (!curcpu_is_idle()) {

* A process has become ready. Abort now,

diff --git a/sys/arch/arm/arm/pmap.c b/sys/arch/arm/arm/pmap.c
index 3a8ecd90059..b47279ea4ec 100644
--- a/sys/arch/arm/arm/pmap.c
+++ b/sys/arch/arm/arm/pmap.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: pmap.c,v 1.19 2008/10/28 20:16:58 drahn Exp $ */

+/* $OpenBSD: pmap.c,v 1.20 2009/03/23 13:25:11 art Exp $ */

/* $NetBSD: pmap.c,v 1.147 2004/01/18 13:03:50 scw Exp $ */

@@ -3327,7 +3327,7 @@ pmap_pageidlezero(struct vm_page *pg)

for (i = 0, ptr = (int *)cdstp;

i < (PAGE_SIZE / sizeof(int)); i++) {

- if (!sched_is_idle()) {

+ if (!curcpu_is_idle()) {

* A process has become ready. Abort now,

* so we don't keep it waiting while we

diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 76d868fe0a4..10dd9a5f8eb 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: kern_fork.c,v 1.101 2008/11/11 02:13:14 tedu Exp $ */

+/* $OpenBSD: kern_fork.c,v 1.102 2009/03/23 13:25:11 art Exp $ */

/* $NetBSD: kern_fork.c,v 1.29 1996/02/09 18:59:34 christos Exp $ */

@@ -260,8 +260,6 @@ fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize,

timeout_set(&p2->p_sleep_to, endtsleep, p2);

timeout_set(&p2->p_realit_to, realitexpire, p2);

- p2->p_cpu = p1->p_cpu;

* Duplicate sub-structures as needed.

* Increase reference counts on shared objects.

diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
index 79bc376314a..1b921159985 100644
--- a/sys/kern/kern_sched.c
+++ b/sys/kern/kern_sched.c

@@ -1,6 +1,6 @@

-/* $OpenBSD: kern_sched.c,v 1.8 2008/11/06 19:49:13 deraadt Exp $ */

+/* $OpenBSD: kern_sched.c,v 1.9 2009/03/23 13:25:11 art Exp $ */

* Permission to use, copy, modify, and distribute this software for any

* purpose with or without fee is hereby granted, provided that the above

@@ -24,12 +24,27 @@

#include <sys/resourcevar.h>

#include <sys/signalvar.h>

#include <sys/mutex.h>

+#include <machine/atomic.h>

#include <uvm/uvm_extern.h>

+#include <sys/malloc.h>

void sched_kthreads_create(void *);

void sched_idle(void *);

+int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);

+struct proc *sched_steal_proc(struct cpu_info *);

+/*

+ * To help choosing which cpu should run which process we keep track

+ * of cpus which are currently idle and which cpus have processes

+ * queued.

+ */

+struct cpuset sched_idle_cpus;

+struct cpuset sched_queued_cpus;

* A few notes about cpu_switchto that is implemented in MD code.

@@ -55,12 +70,22 @@ void

sched_init_cpu(struct cpu_info *ci)

{

struct schedstate_percpu *spc = &ci->ci_schedstate;

+ int i;

+ for (i = 0; i < SCHED_NQS; i++)

+ TAILQ_INIT(&spc->spc_qs[i]);

spc->spc_idleproc = NULL;

kthread_create_deferred(sched_kthreads_create, ci);

LIST_INIT(&spc->spc_deadproc);

+ /*

+ * Slight hack here until the cpuset code handles cpu_info

+ * structures.

+ */

+ cpuset_init_cpu(ci);

}

void

@@ -79,27 +104,31 @@ sched_kthreads_create(void *v)

void

sched_idle(void *v)

{

+ struct schedstate_percpu *spc;

struct proc *p = curproc;

struct cpu_info *ci = v;

int s;

KERNEL_PROC_UNLOCK(p);

+ spc = &ci->ci_schedstate;

* First time we enter here, we're not supposed to idle,

* just go away for a while.

SCHED_LOCK(s);

+ cpuset_add(&sched_idle_cpus, ci);

p->p_stat = SSLEEP;

mi_switch();

+ cpuset_del(&sched_idle_cpus, ci);

SCHED_UNLOCK(s);

- while (1) {

- KASSERT(ci == curcpu());

- KASSERT(curproc == ci->ci_schedstate.spc_idleproc);

+ KASSERT(ci == curcpu());

+ KASSERT(curproc == spc->spc_idleproc);

- while (!sched_is_idle()) {

- struct schedstate_percpu *spc = &ci->ci_schedstate;

+ while (1) {

+ while (!curcpu_is_idle()) {

struct proc *dead;

SCHED_LOCK(s);

@@ -115,10 +144,12 @@ sched_idle(void *v)

splassert(IPL_NONE);

+ cpuset_add(&sched_idle_cpus, ci);

cpu_idle_enter();

- while (sched_is_idle())

+ while (spc->spc_whichqs == 0)

cpu_idle_cycle();

cpu_idle_leave();

+ cpuset_del(&sched_idle_cpus, ci);

}

@@ -160,22 +191,10 @@ sched_exit(struct proc *p)

* Run queue management.

- *

- * The run queue management is just like before, except that it's with

- * a bit more modern queue handling.

-TAILQ_HEAD(prochead, proc) sched_qs[NQS];

-volatile int sched_whichqs;

void

sched_init_runqueues(void)

{

- int i;

- for (i = 0; i < NQS; i++)

- TAILQ_INIT(&sched_qs[i]);

#ifdef MULTIPROCESSOR

__mp_lock_init(&sched_lock);

#endif

@@ -184,37 +203,56 @@ sched_init_runqueues(void)

void

setrunqueue(struct proc *p)

{

+ struct schedstate_percpu *spc;

int queue = p->p_priority >> 2;

SCHED_ASSERT_LOCKED();

+ sched_choosecpu(p);

+ spc = &p->p_cpu->ci_schedstate;

+ spc->spc_nrun++;

+ TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);

+ spc->spc_whichqs |= (1 << queue);

+ cpuset_add(&sched_queued_cpus, p->p_cpu);

- TAILQ_INSERT_TAIL(&sched_qs[queue], p, p_runq);

- sched_whichqs |= (1 << queue);

+ if (p->p_cpu != curcpu())

+ cpu_unidle(p->p_cpu);

}

void

remrunqueue(struct proc *p)

{

+ struct schedstate_percpu *spc;

int queue = p->p_priority >> 2;

SCHED_ASSERT_LOCKED();

- TAILQ_REMOVE(&sched_qs[queue], p, p_runq);

- if (TAILQ_EMPTY(&sched_qs[queue]))

- sched_whichqs &= ~(1 << queue);

+ spc = &p->p_cpu->ci_schedstate;

+ spc->spc_nrun--;

+ TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);

+ if (TAILQ_EMPTY(&spc->spc_qs[queue])) {

+ spc->spc_whichqs &= ~(1 << queue);

+ if (spc->spc_whichqs == 0)

+ cpuset_del(&sched_queued_cpus, p->p_cpu);

+ }

}

struct proc *

sched_chooseproc(void)

{

+ struct schedstate_percpu *spc = &curcpu()->ci_schedstate;

struct proc *p;

int queue;

SCHED_ASSERT_LOCKED();

again:

- if (sched_is_idle()) {

- p = curcpu()->ci_schedstate.spc_idleproc;

+ if (spc->spc_whichqs) {

+ queue = ffs(spc->spc_whichqs) - 1;

+ p = TAILQ_FIRST(&spc->spc_qs[queue]);

+ remrunqueue(p);

+ } else if ((p = sched_steal_proc(curcpu())) == NULL) {

+ p = spc->spc_idleproc;

if (p == NULL) {

int s;

@@ -232,13 +270,284 @@ again:

}

KASSERT(p);

p->p_stat = SRUN;

- } else {

- queue = ffs(sched_whichqs) - 1;

- p = TAILQ_FIRST(&sched_qs[queue]);

- TAILQ_REMOVE(&sched_qs[queue], p, p_runq);

- if (TAILQ_EMPTY(&sched_qs[queue]))

- sched_whichqs &= ~(1 << queue);

- }

+ }

return (p);

}

+uint64_t sched_nmigrations;

+uint64_t sched_noidle;

+uint64_t sched_stolen;

+uint64_t sched_choose;

+uint64_t sched_wasidle;

+uint64_t sched_nomigrations;

+void

+sched_choosecpu(struct proc *p)

+ struct cpu_info *choice = NULL;

+ int last_cost = INT_MAX;

+ struct cpu_info *ci;

+ struct cpuset set;

+ sched_choose++;

+ /*

+ * The simplest case. Our cpu of choice was idle. This happens

+ * when we were sleeping and something woke us up.

+ *

+ * We also need to check sched_queued_cpus to make sure that

+ * we're not thundering herding one cpu that hasn't managed to

+ * get out of the idle loop yet.

+ */

+ if (p->p_cpu && cpuset_isset(&sched_idle_cpus, p->p_cpu) &&

+ !cpuset_isset(&sched_queued_cpus, p->p_cpu)) {

+ sched_wasidle++;

+ return;

+ }

+#if 0

+ /* Most likely, this is broken. don't do it. */

+ /*

+ * Second case. (shouldn't be necessary in the future)

+ * If our cpu is not idle, but has nothing else queued (which

+ * means that we are curproc and roundrobin asks us to reschedule).

+ */

+ if (p->p_cpu && p->p_cpu->ci_schedstate.spc_nrun == 0)

+ return;

+#endif

+ /*

+ * Look at all cpus that are currently idle. Pick the cheapest of

+ * those.

+ */

+ cpuset_copy(&set, &sched_idle_cpus);

+ while ((ci = cpuset_first(&set)) != NULL) {

+ int cost = sched_proc_to_cpu_cost(ci, p);

+ if (choice == NULL || cost < last_cost) {

+ choice = ci;

+ last_cost = cost;

+ }

+ cpuset_del(&set, ci);

+ }

+ /*

+ * All cpus are busy. Pick one.

+ */

+ if (choice == NULL) {

+ CPU_INFO_ITERATOR cii;

+ sched_noidle++;

+ /*

+ * Not curproc, pick the cpu with the lowest cost to switch to.

+ */

+ CPU_INFO_FOREACH(cii, ci) {

+ int cost = sched_proc_to_cpu_cost(ci, p);

+ if (choice == NULL || cost < last_cost) {

+ choice = ci;

+ last_cost = cost;

+ }

+ KASSERT(choice);

+ if (p->p_cpu && p->p_cpu != choice)

+ sched_nmigrations++;

+ else if (p->p_cpu != NULL)

+ sched_nomigrations++;

+ p->p_cpu = choice;

+/*

+ * Attempt to steal a proc from some cpu.

+ */

+struct proc *

+sched_steal_proc(struct cpu_info *self)

+ struct schedstate_percpu *spc;

+ struct proc *best = NULL;

+ int bestcost = INT_MAX;

+ struct cpu_info *ci;

+ struct cpuset set;

+ cpuset_copy(&set, &sched_queued_cpus);

+ while ((ci = cpuset_first(&set)) != NULL) {

+ struct proc *p;

+ int cost;

+ cpuset_del(&set, ci);

+ spc = &ci->ci_schedstate;

+ p = TAILQ_FIRST(&spc->spc_qs[ffs(spc->spc_whichqs) - 1]);

+ KASSERT(p);

+ cost = sched_proc_to_cpu_cost(self, p);

+ if (best == NULL || cost < bestcost) {

+ best = p;

+ bestcost = cost;

+ }

+ if (best == NULL)

+ return (NULL);

+ spc = &best->p_cpu->ci_schedstate;

+ remrunqueue(best);

+ best->p_cpu = self;

+ sched_stolen++;

+ return (best);

+/*

+ * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).

+ */

+static int

+log2(unsigned int i)

+ int ret = 0;

+ while (i >>= 1)

+ ret++;

+ return (ret);

+/*

+ * Calculate the cost of moving the proc to this cpu.

+ *

+ * What we want is some guesstimate of how much "performance" it will

+ * cost us to move the proc here. Not just for caches and TLBs and NUMA

+ * memory, but also for the proc itself. A highly loaded cpu might not

+ * be the best candidate for this proc since it won't get run.

+ *

+ * Just total guesstimates for now.

+ */

+int sched_cost_load = 1;

+int sched_cost_priority = 1;

+int sched_cost_runnable = 3;

+int sched_cost_resident = 1;

+int

+sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)

+ struct schedstate_percpu *spc;

+ int l2resident = 0;

+ int cost;

+ spc = &ci->ci_schedstate;

+ cost = 0;

+ /*

+ * First, account for the priority of the proc we want to move.

+ * More willing to move, the lower the priority of the destination

+ * and the higher the priority of the proc.

+ */

+ if (!cpuset_isset(&sched_idle_cpus, ci)) {

+ cost += (p->p_priority - spc->spc_curpriority) *

+ sched_cost_priority;

+ cost += sched_cost_runnable;

+ }

+ if (cpuset_isset(&sched_queued_cpus, ci)) {

+ cost += spc->spc_nrun * sched_cost_runnable;

+ }

+ /*

+ * Higher load on the destination means we don't want to go there.

+ */

+ cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);

+ /*

+ * If the proc is on this cpu already, lower the cost by how much

+ * it has been running and an estimate of its footprint.

+ */

+ if (p->p_cpu == ci && p->p_slptime == 0) {

+ l2resident =

+ log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));

+ cost -= l2resident * sched_cost_resident;

+ }

+ return (cost);

+/*

+ * Functions to manipulate cpu sets.

+ */

+struct cpu_info *cpuset_infos[MAXCPUS];

+static struct cpuset cpuset_all;

+void

+cpuset_init_cpu(struct cpu_info *ci)

+ cpuset_add(&cpuset_all, ci);

+ cpuset_infos[CPU_INFO_UNIT(ci)] = ci;

+void

+cpuset_clear(struct cpuset *cs)

+ memset(cs, 0, sizeof(*cs));

+/*

+ * XXX - implement it on SP architectures too

+ */

+#ifndef CPU_INFO_UNIT

+#define CPU_INFO_UNIT 0

+#endif

+void

+cpuset_add(struct cpuset *cs, struct cpu_info *ci)

+ unsigned int num = CPU_INFO_UNIT(ci);

+ atomic_setbits_int(&cs->cs_set[num/32], (1 << (num % 32)));

+void

+cpuset_del(struct cpuset *cs, struct cpu_info *ci)

+ unsigned int num = CPU_INFO_UNIT(ci);

+ atomic_clearbits_int(&cs->cs_set[num/32], (1 << (num % 32)));

+int

+cpuset_isset(struct cpuset *cs, struct cpu_info *ci)

+ unsigned int num = CPU_INFO_UNIT(ci);

+ return (cs->cs_set[num/32] & (1 << (num % 32)));

+void

+cpuset_add_all(struct cpuset *cs)

+ cpuset_copy(cs, &cpuset_all);

+void

+cpuset_copy(struct cpuset *to, struct cpuset *from)

+ memcpy(to, from, sizeof(*to));

+struct cpu_info *

+cpuset_first(struct cpuset *cs)

+ int i;

+ for (i = 0; i < CPUSET_ASIZE(ncpus); i++)

+ if (cs->cs_set[i])

+ return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);

+ return (NULL);

diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 650b35e722f..449f93c7b5a 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: kern_synch.c,v 1.87 2008/09/10 12:30:40 blambert Exp $ */

+/* $OpenBSD: kern_synch.c,v 1.88 2009/03/23 13:25:11 art Exp $ */

/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */

@@ -381,7 +381,6 @@ wakeup_n(void *ident, int n)

* resched_proc().

setrunqueue(p);

- KASSERT(p->p_cpu != NULL);

need_resched(p->p_cpu);

/* END INLINE EXPANSION */

diff --git a/sys/kern/sched_bsd.c b/sys/kern/sched_bsd.c
index e5db94d2aaa..53c0902dc49 100644
--- a/sys/kern/sched_bsd.c
+++ b/sys/kern/sched_bsd.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: sched_bsd.c,v 1.19 2008/11/06 22:11:36 art Exp $ */

+/* $OpenBSD: sched_bsd.c,v 1.20 2009/03/23 13:25:11 art Exp $ */

/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */

/*-

@@ -97,7 +97,7 @@ roundrobin(struct cpu_info *ci)

spc->spc_rrticks = rrticks_init;

- if (curproc != NULL) {

+ if (ci->ci_curproc != NULL) {

s = splstatclock();

if (spc->spc_schedflags & SPCF_SEENRR) {

@@ -112,7 +112,8 @@ roundrobin(struct cpu_info *ci)

splx(s);

}

- need_resched(curcpu());

+ if (spc->spc_nrun)

+ need_resched(ci);

}

@@ -257,7 +258,8 @@ schedcpu(void *arg)

resetpriority(p);

if (p->p_priority >= PUSER) {

if (p->p_stat == SRUN &&

- (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {

+ (p->p_priority / SCHED_PPQ) !=

+ (p->p_usrpri / SCHED_PPQ)) {

remrunqueue(p);

p->p_priority = p->p_usrpri;

setrunqueue(p);

diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index aeec114cec4..8ecbabd2947 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: proc.h,v 1.112 2009/03/05 19:52:24 kettenis Exp $ */

+/* $OpenBSD: proc.h,v 1.113 2009/03/23 13:25:11 art Exp $ */

/* $NetBSD: proc.h,v 1.44 1996/04/22 01:23:21 christos Exp $ */

/*-

@@ -468,6 +468,28 @@ void proc_trampoline_mp(void); /* XXX */

int proc_isunder(struct proc *, struct proc *);

+/*

+ * functions to handle sets of cpus.

+ *

+ * For now we keep the cpus in ints so that we can use the generic

+ * atomic ops.

+ */

+#define CPUSET_ASIZE(x) (((x) - 1)/32 + 1)

+#define CPUSET_SSIZE CPUSET_ASIZE(MAXCPUS)

+struct cpuset {

+ int cs_set[CPUSET_SSIZE];

+};

+void cpuset_init_cpu(struct cpu_info *);

+void cpuset_clear(struct cpuset *);

+void cpuset_add(struct cpuset *, struct cpu_info *);

+void cpuset_del(struct cpuset *, struct cpu_info *);

+int cpuset_isset(struct cpuset *, struct cpu_info *);

+void cpuset_add_all(struct cpuset *);

+void cpuset_copy(struct cpuset *to, struct cpuset *from);

+struct cpu_info *cpuset_first(struct cpuset *);

#endif /* _KERNEL */

#endif /* !_SYS_PROC_H_ */

diff --git a/sys/sys/sched.h b/sys/sys/sched.h
index 1e2f9bc5847..151adc9af37 100644
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: sched.h,v 1.19 2008/06/26 05:42:20 ray Exp $ */

+/* $OpenBSD: sched.h,v 1.20 2009/03/23 13:25:11 art Exp $ */

/* $NetBSD: sched.h,v 1.2 1999/02/28 18:14:58 ross Exp $ */

/*-

@@ -87,6 +87,8 @@

#define CP_IDLE 4

#define CPUSTATES 5

+#define SCHED_NQS 32 /* 32 run queues. */

* Per-CPU scheduler state.

* XXX - expose to userland for now.

@@ -101,6 +103,13 @@ struct schedstate_percpu {

int spc_pscnt; /* prof/stat counter */

int spc_psdiv; /* prof/stat divisor */

struct proc *spc_idleproc; /* idle proc for this cpu */

+ u_int spc_nrun; /* procs on the run queues */

+ fixpt_t spc_ldavg; /* shortest load avg. for this cpu */

+ TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];

+ volatile uint32_t spc_whichqs;

#ifdef notyet

struct proc *spc_reaper; /* dead proc reaper */

#endif

@@ -114,11 +123,9 @@ struct schedstate_percpu {

#define SPCF_SHOULDYIELD 0x0002 /* process should yield the CPU */

#define SPCF_SWITCHCLEAR (SPCF_SEENRR|SPCF_SHOULDYIELD)

-#define NQS 32 /* 32 run queues. */

-#define PPQ (128 / NQS) /* priorities per queue */

+#define SCHED_PPQ (128 / SCHED_NQS) /* priorities per queue */

#define NICE_WEIGHT 2 /* priorities per nice level */

-#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - PPQ)

+#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ)

extern int schedhz; /* ideally: 16 */

extern int rrticks_init; /* ticks per roundrobin() */

@@ -133,12 +140,12 @@ void sched_exit(struct proc *);

void mi_switch(void);

void cpu_switchto(struct proc *, struct proc *);

struct proc *sched_chooseproc(void);

+void sched_choosecpu(struct proc *);

void cpu_idle_enter(void);

void cpu_idle_cycle(void);

void cpu_idle_leave(void);

-extern volatile int sched_whichqs;

-#define sched_is_idle() (sched_whichqs == 0)

+#define curcpu_is_idle() (curcpu()->ci_schedstate.spc_whichqs == 0)

void sched_init_runqueues(void);

void setrunqueue(struct proc *);

diff --git a/sys/uvm/uvm_meter.c b/sys/uvm/uvm_meter.c
index 3c10fff7847..7a77b187e60 100644
--- a/sys/uvm/uvm_meter.c
+++ b/sys/uvm/uvm_meter.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: uvm_meter.c,v 1.25 2009/03/20 15:19:04 oga Exp $ */

+/* $OpenBSD: uvm_meter.c,v 1.26 2009/03/23 13:25:11 art Exp $ */

/* $NetBSD: uvm_meter.c,v 1.21 2001/07/14 06:36:03 matt Exp $ */

@@ -97,10 +97,15 @@ uvm_meter(void)

static void

uvm_loadav(struct loadavg *avg)

{

+ CPU_INFO_ITERATOR cii;

+ struct cpu_info *ci;

int i, nrun;

struct proc *p;

+ int nrun_cpu[MAXCPUS];

nrun = 0;

+ memset(nrun_cpu, 0, sizeof(nrun_cpu));

LIST_FOREACH(p, &allproc, p_list) {

switch (p->p_stat) {

case SSLEEP:

@@ -113,11 +118,25 @@ uvm_loadav(struct loadavg *avg)

continue;

case SIDL:

nrun++;

+ if (p->p_cpu)

+ nrun_cpu[CPU_INFO_UNIT(p->p_cpu)]++;

}

- for (i = 0; i < 3; i++)

+ for (i = 0; i < 3; i++) {

avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +

nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;

+ }

+ CPU_INFO_FOREACH(cii, ci) {

+ struct schedstate_percpu *spc = &ci->ci_schedstate;

+ if (nrun_cpu[CPU_INFO_UNIT(ci)] == 0)

+ continue;

+ spc->spc_ldavg = (cexp[0] * spc->spc_ldavg +

+ nrun_cpu[CPU_INFO_UNIT(ci)] * FSCALE *

+ (FSCALE - cexp[0])) >> FSHIFT;

+ }

}

diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index 8a90c987693..3c16e1a5893 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: uvm_page.c,v 1.67 2008/07/02 15:21:33 art Exp $ */

+/* $OpenBSD: uvm_page.c,v 1.68 2009/03/23 13:25:11 art Exp $ */

/* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */

@@ -1400,5 +1400,5 @@ uvm_pageidlezero()

uvmexp.free++;

uvmexp.zeropages++;

uvm_unlock_fpageq();

- } while (sched_is_idle());

+ } while (curcpu_is_idle());

}