src - OpenBSD base system

diff options


context:
space:
mode:

author	Artur Grabowski <art@cvs.openbsd.org>	2007-05-25 15:55:28 +0000
committer	Artur Grabowski <art@cvs.openbsd.org>	2007-05-25 15:55:28 +0000
commit	821e249ec2ec9f4060c4aa9c985e76d5cb6ed872 (patch)
tree	b66242e6b3443f23aa88627eea4a17b44a59b9ac /sys
parent	a5e71db4200419a89667776613c8d20037b99e18 (diff)

Replace the overdesigned and overcomplicated tlb shootdown code with

very simple and dumb fast tlb IPI handlers that have in the order of the same amount of instructions as the old code had function calls. All TLB shootdowns are reorganized so that we always shoot the, without looking at PG_U and when we're shooting a range (primarily in pmap_remove), we shoot the range when there are 32 or less pages in it, otherwise we just nuke the whole TLB (this might need tweaking if someone is interested in micro-optimization). The IPIs are not handled through the normal interrupt vectoring code, they are not blockable and they only shoot one page or a range of pages or the whole tlb. This gives a 15% reduction in system time on my dual-core laptop during a kernel compile and an 18% reduction in real time on a quad machine doing bulk ports build. Tested by many, in snaps for a week, no slowdowns reported (although not everyone is seeing such huge wins).

Diffstat (limited to 'sys')

-rw-r--r--

sys/arch/i386/i386/apicvec.s

-rw-r--r--

sys/arch/i386/i386/ipifuncs.c

-rw-r--r--

sys/arch/i386/i386/lapic.c

-rw-r--r--

sys/arch/i386/i386/lock_machdep.c

-rw-r--r--

sys/arch/i386/i386/machdep.c

-rw-r--r--

sys/arch/i386/i386/pmap.c

714

-rw-r--r--

sys/arch/i386/i386/vm_machdep.c

-rw-r--r--

sys/arch/i386/include/atomic.h

-rw-r--r--

sys/arch/i386/include/i82489var.h

-rw-r--r--

sys/arch/i386/include/intr.h

-rw-r--r--

sys/arch/i386/include/pmap.h

11 files changed, 385 insertions, 535 deletions

diff --git a/sys/arch/i386/i386/apicvec.s b/sys/arch/i386/i386/apicvec.s
index d6422e9ebf3..da710c4dbe5 100644
--- a/sys/arch/i386/i386/apicvec.s
+++ b/sys/arch/i386/i386/apicvec.s

@@ -1,4 +1,4 @@

-/* $OpenBSD: apicvec.s,v 1.9 2007/04/12 20:22:58 art Exp $ */

+/* $OpenBSD: apicvec.s,v 1.10 2007/05/25 15:55:26 art Exp $ */

/* $NetBSD: apicvec.s,v 1.1.2.2 2000/02/21 21:54:01 sommerfeld Exp $ */

/*-

@@ -86,6 +86,73 @@ XINTR(ipi_ast):

popl %ds

popl %eax

iret

+ .globl XINTR(ipi_invltlb)

+ .p2align 4,0x90

+XINTR(ipi_invltlb):

+ pushl %eax

+ pushl %ds

+ movl $GSEL(GDATA_SEL, SEL_KPL), %eax

+ movl %eax, %ds

+ ioapic_asm_ack()

+ movl %cr3, %eax

+ movl %eax, %cr3

+ lock

+ decl tlb_shoot_wait

+ popl %ds

+ popl %eax

+ iret

+ .globl XINTR(ipi_invlpg)

+ .p2align 4,0x90

+XINTR(ipi_invlpg):

+ pushl %eax

+ pushl %ds

+ movl $GSEL(GDATA_SEL, SEL_KPL), %eax

+ movl %eax, %ds

+ ioapic_asm_ack()

+ movl tlb_shoot_addr1, %eax

+ invlpg (%eax)

+ lock

+ decl tlb_shoot_wait

+ popl %ds

+ popl %eax

+ iret

+ .globl XINTR(ipi_invlrange)

+ .p2align 4,0x90

+XINTR(ipi_invlrange):

+ pushl %eax

+ pushl %edx

+ pushl %ds

+ movl $GSEL(GDATA_SEL, SEL_KPL), %eax

+ movl %eax, %ds

+ ioapic_asm_ack()

+ movl tlb_shoot_addr1, %eax

+ movl tlb_shoot_addr2, %edx

+1: invlpg (%eax)

+ addl $PAGE_SIZE, %eax

+ cmpl %edx, %eax

+ jb 1b

+ lock

+ decl tlb_shoot_wait

+ popl %ds

+ popl %edx

+ popl %eax

+ iret

#endif

diff --git a/sys/arch/i386/i386/ipifuncs.c b/sys/arch/i386/i386/ipifuncs.c
index e679fcb6a64..711c6cf278d 100644
--- a/sys/arch/i386/i386/ipifuncs.c
+++ b/sys/arch/i386/i386/ipifuncs.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: ipifuncs.c,v 1.7 2007/04/21 21:06:14 gwk Exp $ */

+/* $OpenBSD: ipifuncs.c,v 1.8 2007/05/25 15:55:26 art Exp $ */

/* $NetBSD: ipifuncs.c,v 1.1.2.3 2000/06/26 02:04:06 sommerfeld Exp $ */

/*-

@@ -77,7 +77,7 @@ void (*ipifunc[I386_NIPI])(struct cpu_info *) =

i386_ipi_microset,

i386_ipi_flush_fpu,

i386_ipi_synch_fpu,

- pmap_do_tlb_shootdown,

+ NULL,

#if 0

i386_reload_mtrr,

gdt_reload_cpu,

@@ -144,6 +144,15 @@ i386_send_ipi(struct cpu_info *ci, int ipimask)

return ret;

}

+int

+i386_fast_ipi(struct cpu_info *ci, int ipi)

+ if (!(ci->ci_flags & CPUF_RUNNING))

+ return (ENOENT);

+ return (i386_ipi(ipi, ci->ci_cpuid, LAPIC_DLMODE_FIXED));

void

i386_self_ipi(int vector)

{

diff --git a/sys/arch/i386/i386/lapic.c b/sys/arch/i386/i386/lapic.c
index 2d8d07b0872..42378c42f57 100644
--- a/sys/arch/i386/i386/lapic.c
+++ b/sys/arch/i386/i386/lapic.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: lapic.c,v 1.15 2007/04/12 20:22:58 art Exp $ */

+/* $OpenBSD: lapic.c,v 1.16 2007/05/25 15:55:26 art Exp $ */

/* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */

/*-

@@ -179,7 +179,6 @@ lapic_set_lvt()

void

lapic_boot_init(paddr_t lapic_base)

{

- extern void Xintripi_ast(void);

static int clk_irq = 0;

static int ipi_irq = 0;

@@ -188,6 +187,9 @@ lapic_boot_init(paddr_t lapic_base)

#ifdef MULTIPROCESSOR

idt_vec_set(LAPIC_IPI_VECTOR, Xintripi);

idt_vec_set(LAPIC_IPI_AST, Xintripi_ast);

+ idt_vec_set(LAPIC_IPI_INVLTLB, Xintripi_invltlb);

+ idt_vec_set(LAPIC_IPI_INVLPG, Xintripi_invlpg);

+ idt_vec_set(LAPIC_IPI_INVLRANGE, Xintripi_invlrange);

#endif

idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious);

idt_vec_set(LAPIC_TIMER_VECTOR, Xintrltimer);

diff --git a/sys/arch/i386/i386/lock_machdep.c b/sys/arch/i386/i386/lock_machdep.c
index 3c00a13309e..d18663ca4aa 100644
--- a/sys/arch/i386/i386/lock_machdep.c
+++ b/sys/arch/i386/i386/lock_machdep.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: lock_machdep.c,v 1.4 2007/05/04 12:58:41 art Exp $ */

+/* $OpenBSD: lock_machdep.c,v 1.5 2007/05/25 15:55:26 art Exp $ */

/* $NetBSD: lock_machdep.c,v 1.1.2.3 2000/05/03 14:40:30 sommerfeld Exp $ */

/*-

@@ -142,19 +142,8 @@ rw_cas_386(volatile unsigned long *p, unsigned long o, unsigned long n)

return (0);

}

-#ifdef MULTIPROCESSOR

-#define MPLOCK "lock "

-#else

-#define MPLOCK

-#endif

int

rw_cas_486(volatile unsigned long *p, unsigned long o, unsigned long n)

{

- int res;

- __asm volatile(MPLOCK " cmpxchgl %2, %1" : "=a" (res), "=m" (*p)

- : "r" (n), "a" (o), "m" (*p) : "memory");

- return (res != o);

+ return (i486_atomic_cas_int((u_int *)p, o, n) != o);

}

diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c
index 22306883185..8202008a456 100644
--- a/sys/arch/i386/i386/machdep.c
+++ b/sys/arch/i386/i386/machdep.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: machdep.c,v 1.389 2007/05/23 20:33:46 pvalchev Exp $ */

+/* $OpenBSD: machdep.c,v 1.390 2007/05/25 15:55:26 art Exp $ */

/* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */

/*-

@@ -2214,7 +2214,7 @@ aston(struct proc *p)

#ifdef MULTIPROCESSOR

if (i386_atomic_testset_i(&p->p_md.md_astpending, 1) == 0 &&

p->p_cpu != curcpu())

- i386_ipi(LAPIC_IPI_AST, p->p_cpu->ci_cpuid, LAPIC_DLMODE_FIXED);

+ i386_fast_ipi(p->p_cpu, LAPIC_IPI_AST);

#else

p->p_md.md_astpending = 1;

#endif

@@ -3585,9 +3585,6 @@ bus_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int cacheable,

vaddr_t va;

pt_entry_t *pte;

bus_size_t map_size;

-#ifdef MULTIPROCESSOR

- u_int32_t cpumask = 0;

-#endif

pa = trunc_page(bpa);

endpa = round_page(bpa + size);

@@ -3620,17 +3617,11 @@ bus_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int cacheable,

*pte &= ~PG_N;

else

*pte |= PG_N;

-#ifdef MULTIPROCESSOR

- pmap_tlb_shootdown(pmap_kernel(), va, *pte,

- &cpumask);

-#else

- pmap_update_pg(va);

-#endif

+ pmap_tlb_shootpage(pmap_kernel(), va);

}

-#ifdef MULTIPROCESSOR

- pmap_tlb_shootnow(cpumask);

-#endif

+ pmap_tlb_shootwait();

pmap_update(pmap_kernel());

return 0;

diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 2a221f4ab80..5fe984296be 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: pmap.c,v 1.111 2007/05/20 14:14:09 miod Exp $ */

+/* $OpenBSD: pmap.c,v 1.112 2007/05/25 15:55:26 art Exp $ */

/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */

@@ -213,49 +213,6 @@ struct simplelock pmaps_lock;

#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */

- * TLB Shootdown:

- *

- * When a mapping is changed in a pmap, the TLB entry corresponding to

- * the virtual address must be invalidated on all processors. In order

- * to accomplish this on systems with multiple processors, messages are

- * sent from the processor which performs the mapping change to all

- * processors on which the pmap is active. For other processors, the

- * ASN generation numbers for that processor is invalidated, so that

- * the next time the pmap is activated on that processor, a new ASN

- * will be allocated (which implicitly invalidates all TLB entries).

- *

- * Shootdown job queue entries are allocated using a simple special-

- * purpose allocator for speed.

- */

-struct pmap_tlb_shootdown_job {

- TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;

- vaddr_t pj_va; /* virtual address */

- pmap_t pj_pmap; /* the pmap which maps the address */

- pt_entry_t pj_pte; /* the PTE bits */

- struct pmap_tlb_shootdown_job *pj_nextfree;

-};

-struct pmap_tlb_shootdown_q {

- TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;

- int pq_pte; /* aggregate PTE bits */

- int pq_count; /* number of pending requests */

- struct mutex pq_mutex; /* mutex on queue */

- int pq_flushg; /* pending flush global */

- int pq_flushu; /* pending flush user */

-} pmap_tlb_shootdown_q[I386_MAXPROCS];

-#define PMAP_TLB_MAXJOBS 16

-void pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *);

-struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get(

- struct pmap_tlb_shootdown_q *);

-void pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *,

- struct pmap_tlb_shootdown_job *);

-struct mutex pmap_tlb_shootdown_job_mutex;

-struct pmap_tlb_shootdown_job *pj_page, *pj_free;

-/*

* global data structures

@@ -387,9 +344,9 @@ pt_entry_t *pmap_map_ptes(struct pmap *);

struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t);

void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);

boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,

- vaddr_t, int32_t *, int);

+ vaddr_t, int);

void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t,

- vaddr_t, vaddr_t, int32_t *, int);

+ vaddr_t, vaddr_t, int);

#define PMAP_REMOVE_ALL 0

#define PMAP_REMOVE_SKIPWIRED 1

@@ -547,33 +504,8 @@ pmap_tmpunmap_pvepte(struct pv_entry *pve)

void

pmap_apte_flush(struct pmap *pmap)

{

-#if defined(MULTIPROCESSOR)

- struct pmap_tlb_shootdown_q *pq;

- struct cpu_info *ci, *self = curcpu();

- CPU_INFO_ITERATOR cii;

-#endif

- tlbflush(); /* flush TLB on current processor */

-#if defined(MULTIPROCESSOR)

- /*

- * Flush the APTE mapping from all other CPUs that

- * are using the pmap we are using (who's APTE space

- * is the one we've just modified).

- *

- * XXXthorpej -- find a way to defer the IPI.

- */

- CPU_INFO_FOREACH(cii, ci) {

- if (ci == self)

- continue;

- if (pmap_is_active(pmap, ci->ci_cpuid)) {

- pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];

- mtx_enter(&pq->pq_mutex);

- pq->pq_flushu++;

- mtx_leave(&pq->pq_mutex);

- i386_send_ipi(ci, I386_IPI_TLB);

- }

-#endif

+ pmap_tlb_shoottlb();

+ pmap_tlb_shootwait();

}

@@ -651,17 +583,8 @@ pmap_exec_account(struct pmap *pm, vaddr_t va,

pm != vm_map_pmap(&curproc->p_vmspace->vm_map))

return;

- if ((opte ^ npte) & PG_X) {

-#ifdef MULTIPROCESSOR

- int32_t cpumask = 0;

- pmap_tlb_shootdown(pm, va, opte, &cpumask);

- pmap_tlb_shootnow(cpumask);

-#else

- /* Don't bother deferring in the single CPU case. */

- pmap_update_pg(va);

-#endif

- }

+ if ((opte ^ npte) & PG_X)

+ pmap_tlb_shootpage(pm, va);

* Executability was removed on the last executable change.

@@ -776,18 +699,13 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)

pt_entry_t *pte, opte, npte;

pte = vtopte(va);

- npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V | pmap_pg_g;

+ npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V |

+ pmap_pg_g | PG_U | PG_M;

opte = i386_atomic_testset_ul(pte, npte); /* zap! */

if (pmap_valid_entry(opte)) {

-#ifdef MULTIPROCESSOR

- int32_t cpumask = 0;

- pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);

- pmap_tlb_shootnow(cpumask);

-#else

- /* Don't bother deferring in the single CPU case. */

- pmap_update_pg(va);

-#endif

+ /* NB. - this should not happen. */

+ pmap_tlb_shootpage(pmap_kernel(), va);

+ pmap_tlb_shootwait();

}

@@ -801,34 +719,23 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)

void

-pmap_kremove(vaddr_t va, vsize_t len)

+pmap_kremove(vaddr_t sva, vsize_t len)

{

pt_entry_t *pte, opte;

-#ifdef MULTIPROCESSOR

- int32_t cpumask = 0;

-#endif

+ vaddr_t va, eva;

- len >>= PAGE_SHIFT;

- for ( /* null */ ; len ; len--, va += PAGE_SIZE) {

- if (va < VM_MIN_KERNEL_ADDRESS)

- pte = vtopte(va);

- else

- pte = kvtopte(va);

- opte = i386_atomic_testset_ul(pte, 0); /* zap! */

+ eva = sva + len;

+ for (va = sva; va != eva; va += PAGE_SIZE) {

+ pte = kvtopte(va);

+ opte = i386_atomic_testset_ul(pte, 0);

#ifdef DIAGNOSTIC

if (opte & PG_PVLIST)

panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", va);

#endif

- if ((opte & (PG_V | PG_U)) == (PG_V | PG_U))

-#ifdef MULTIPROCESSOR

- pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);

-#else

- pmap_update_pg(va);

-#endif

}

-#ifdef MULTIPROCESSOR

- pmap_tlb_shootnow(cpumask);

-#endif

+ pmap_tlb_shootrange(pmap_kernel(), sva, eva);

+ pmap_tlb_shootwait();

}

@@ -856,7 +763,6 @@ pmap_bootstrap(vaddr_t kva_start)

struct pmap *kpm;

vaddr_t kva;

pt_entry_t *pte;

- int i;

* set the page size (default value is 4K which is ok)

@@ -1024,17 +930,6 @@ pmap_bootstrap(vaddr_t kva_start)

&pool_allocator_nointr);

- * Initialize the TLB shootdown queues.

- */

- mtx_init(&pmap_tlb_shootdown_job_mutex, IPL_NONE);

- for (i = 0; i < I386_MAXPROCS; i++) {

- TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);

- mtx_init(&pmap_tlb_shootdown_q[i].pq_mutex, IPL_IPI);

- }

- /*

* ensure the TLB is sync'd with reality by flushing it...

@@ -1050,8 +945,6 @@ pmap_bootstrap(vaddr_t kva_start)

void

pmap_init(void)

{

- int i;

* now we need to free enough pv_entry structures to allow us to get

* the kmem_map allocated and inited (done after this function is

@@ -1067,15 +960,6 @@ pmap_init(void)

pv_nfpvents = 0;

(void) pmap_add_pvpage(pv_initpage, FALSE);

- pj_page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE);

- if (pj_page == NULL)

- panic("pmap_init: pj_page");

- for (i = 0; i < PAGE_SIZE / sizeof *pj_page - 1; i++)

- pj_page[i].pj_nextfree = &pj_page[i + 1];

- pj_page[i].pj_nextfree = NULL;

- pj_free = &pj_page[0];

* done: pmap module is up (and ready for business)

@@ -1482,8 +1366,8 @@ pmap_alloc_ptp(struct pmap *pmap, int pde_index, boolean_t just_try)

/* got one! */

atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);

ptp->wire_count = 1; /* no mappings yet */

- pmap->pm_pdir[pde_index] =

- (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V);

+ pmap->pm_pdir[pde_index] = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_u |

+ PG_RW | PG_V | PG_M | PG_U);

pmap->pm_stats.resident_count++; /* count PTP as resident */

pmap->pm_ptphint = ptp;

return(ptp);

@@ -1955,8 +1839,8 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)

#ifdef MULTIPROCESSOR

int id = cpu_number();

#endif

- pt_entry_t *spte = PTESLEW(csrc_pte,id);

- pt_entry_t *dpte = PTESLEW(cdst_pte,id);

+ pt_entry_t *spte = PTESLEW(csrc_pte, id);

+ pt_entry_t *dpte = PTESLEW(cdst_pte, id);

caddr_t csrcva = VASLEW(csrcp, id);

caddr_t cdstva = VASLEW(cdstp, id);

@@ -1971,9 +1855,6 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)

bcopy(csrcva, cdstva, PAGE_SIZE);

*spte = *dpte = 0; /* zap! */

pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);

-#ifdef MULTIPROCESSOR

- /* Using per-cpu VA; no shootdown required here. */

-#endif

}

@@ -1993,7 +1874,7 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)

void

pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,

- vaddr_t startva, vaddr_t endva, int32_t *cpumaskp, int flags)

+ vaddr_t startva, vaddr_t endva, int flags)

{

struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */

struct pv_entry *pve;

@@ -2025,16 +1906,8 @@ pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,

pmap->pm_stats.wired_count--;

pmap->pm_stats.resident_count--;

- if (opte & PG_U)

- pmap_tlb_shootdown(pmap, startva, opte, cpumaskp);

- if (ptp) {

+ if (ptp)

ptp->wire_count--; /* dropping a PTE */

- /* Make sure that the PDE is flushed */

- if ((ptp->wire_count <= 1) && !(opte & PG_U))

- pmap_tlb_shootdown(pmap, startva, opte,

- cpumaskp);

- }

* Unnecessary work if not PG_VLIST.

@@ -2087,7 +1960,7 @@ pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,

boolean_t

pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,

- vaddr_t va, int32_t *cpumaskp, int flags)

+ vaddr_t va, int flags)

{

struct pv_entry *pve;

struct vm_page *pg;

@@ -2108,16 +1981,8 @@ pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,

pmap->pm_stats.wired_count--;

pmap->pm_stats.resident_count--;

- if (opte & PG_U)

- pmap_tlb_shootdown(pmap, va, opte, cpumaskp);

- if (ptp) {

+ if (ptp)

ptp->wire_count--; /* dropping a PTE */

- /* Make sure that the PDE is flushed */

- if ((ptp->wire_count <= 1) && !(opte & PG_U))

- pmap_tlb_shootdown(pmap, va, opte, cpumaskp);

- }

pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);

@@ -2167,8 +2032,9 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

paddr_t ptppa;

vaddr_t blkendva;

struct vm_page *ptp;

- int32_t cpumask = 0;

TAILQ_HEAD(, vm_page) empty_ptps;

+ int shootall;

+ vaddr_t va;

TAILQ_INIT(&empty_ptps);

@@ -2207,8 +2073,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

}

/* do it! */

- result = pmap_remove_pte(pmap, ptp,

- &ptes[atop(sva)], sva, &cpumask, flags);

+ result = pmap_remove_pte(pmap, ptp, &ptes[atop(sva)],

+ sva, flags);

* if mapping removed and the PTP is no longer

@@ -2216,7 +2082,6 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

if (result && ptp && ptp->wire_count <= 1) {

- /* zap! */

opte = i386_atomic_testset_ul(

&pmap->pm_pdir[pdei(sva)], 0);

#ifdef MULTIPROCESSOR

@@ -2225,9 +2090,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

* here if we're using APTE space.

#endif

- pmap_tlb_shootdown(curpcb->pcb_pmap,

- ((vaddr_t)ptes) + ptp->offset, opte,

- &cpumask);

+ pmap_tlb_shootpage(curpcb->pcb_pmap,

+ ((vaddr_t)ptes) + ptp->offset);

#ifdef MULTIPROCESSOR

* Always shoot down the pmap's self-mapping

@@ -2236,9 +2100,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

* here if pmap == curpcb->pcb_pmap (not APTE

* space).

- pmap_tlb_shootdown(pmap,

- ((vaddr_t)PTE_BASE) + ptp->offset, opte,

- &cpumask);

+ pmap_tlb_shootpage(pmap,

+ ((vaddr_t)PTE_BASE) + ptp->offset);

#endif

pmap->pm_stats.resident_count--;

if (pmap->pm_ptphint == ptp)

@@ -2249,8 +2112,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

uvm_pagerealloc(ptp, NULL, 0);

TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);

}

+ /*

+ * Shoot the tlb after any updates to the PDE.

+ */

+ pmap_tlb_shootpage(pmap, sva);

}

- pmap_tlb_shootnow(cpumask);

+ pmap_tlb_shootwait();

pmap_unmap_ptes(pmap); /* unlock pmap */

PMAP_MAP_TO_HEAD_UNLOCK();

while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {

@@ -2260,10 +2127,19 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

return;

}

- for (/* null */ ; sva < eva ; sva = blkendva) {

+ /*

+ * Decide if we want to shoot the whole tlb or just the range.

+ * Right now, we simply shoot everything when we remove more

+ * than 32 pages, but never in the kernel pmap. XXX - tune.

+ */

+ if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel())

+ shootall = 1;

+ else

+ shootall = 0;

+ for (va = sva ; va < eva ; va = blkendva) {

/* determine range of block */

- blkendva = i386_round_pdr(sva+1);

+ blkendva = i386_round_pdr(va + 1);

if (blkendva > eva)

blkendva = eva;

@@ -2281,16 +2157,16 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

* be VM_MAX_ADDRESS.

- if (pdei(sva) == PDSLOT_PTE)

+ if (pdei(va) == PDSLOT_PTE)

/* XXXCDC: ugly hack to avoid freeing PDP here */

continue;

- if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))

+ if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)]))

/* valid block? */

continue;

/* PA of the PTP */

- ptppa = (pmap->pm_pdir[pdei(sva)] & PG_FRAME);

+ ptppa = (pmap->pm_pdir[pdei(va)] & PG_FRAME);

/* get PTP if non-kernel mapping */

if (pmap == pmap_kernel()) {

@@ -2309,22 +2185,21 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

#endif

}

- pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(sva)],

- sva, blkendva, &cpumask, flags);

+ pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(va)],

+ va, blkendva, flags);

/* if PTP is no longer being used, free it! */

if (ptp && ptp->wire_count <= 1) {

- /* zap! */

opte = i386_atomic_testset_ul(

- &pmap->pm_pdir[pdei(sva)], 0);

+ &pmap->pm_pdir[pdei(va)], 0);

#if defined(MULTIPROCESSOR)

* XXXthorpej Redundant shootdown can happen here

* if we're using APTE space.

#endif

- pmap_tlb_shootdown(curpcb->pcb_pmap,

- ((vaddr_t)ptes) + ptp->offset, opte, &cpumask);

+ pmap_tlb_shootpage(curpcb->pcb_pmap,

+ ((vaddr_t)ptes) + ptp->offset);

#if defined(MULTIPROCESSOR)

* Always shoot down the pmap's self-mapping

@@ -2332,8 +2207,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

* XXXthorpej Redundant shootdown can happen here

* if pmap == curpcb->pcb_pmap (not APTE space).

- pmap_tlb_shootdown(pmap,

- ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask);

+ pmap_tlb_shootpage(pmap,

+ ((vaddr_t)PTE_BASE) + ptp->offset);

#endif

pmap->pm_stats.resident_count--;

if (pmap->pm_ptphint == ptp) /* update hint? */

@@ -2345,8 +2220,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)

TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);

}

+ if (!shootall)

+ pmap_tlb_shootrange(pmap, sva, eva);

+ else

+ pmap_tlb_shoottlb();

- pmap_tlb_shootnow(cpumask);

+ pmap_tlb_shootwait();

pmap_unmap_ptes(pmap);

PMAP_MAP_TO_HEAD_UNLOCK();

while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {

@@ -2366,7 +2245,6 @@ pmap_page_remove(struct vm_page *pg)

{

struct pv_entry *pve;

pt_entry_t *ptes, opte;

- int32_t cpumask = 0;

TAILQ_HEAD(, vm_page) empty_ptps;

struct vm_page *ptp;

@@ -2397,18 +2275,12 @@ pmap_page_remove(struct vm_page *pg)

}

#endif

- opte = ptes[atop(pve->pv_va)];

- ptes[atop(pve->pv_va)] = 0; /* zap! */

+ opte = i386_atomic_testset_ul(&ptes[atop(pve->pv_va)], 0);

if (opte & PG_W)

pve->pv_pmap->pm_stats.wired_count--;

pve->pv_pmap->pm_stats.resident_count--;

- /* Shootdown only if referenced */

- if (opte & PG_U)

- pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte,

- &cpumask);

/* sync R/M bits */

pmap_sync_flags_pte(pg, opte);

@@ -2416,29 +2288,18 @@ pmap_page_remove(struct vm_page *pg)

if (pve->pv_ptp) {

pve->pv_ptp->wire_count--;

if (pve->pv_ptp->wire_count <= 1) {

- /*

- * Do we have to shootdown the page just to

- * get the pte out of the TLB ?

- */

- if(!(opte & PG_U))

- pmap_tlb_shootdown(pve->pv_pmap,

- pve->pv_va, opte, &cpumask);

- /* zap! */

opte = i386_atomic_testset_ul(

&pve->pv_pmap->pm_pdir[pdei(pve->pv_va)],

0);

- pmap_tlb_shootdown(curpcb->pcb_pmap,

- ((vaddr_t)ptes) + pve->pv_ptp->offset,

- opte, &cpumask);

+ pmap_tlb_shootpage(curpcb->pcb_pmap,

+ ((vaddr_t)ptes) + pve->pv_ptp->offset);

#if defined(MULTIPROCESSOR)

* Always shoot down the other pmap's

* self-mapping of the PTP.

- pmap_tlb_shootdown(pve->pv_pmap,

- ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset,

- opte, &cpumask);

+ pmap_tlb_shootpage(pve->pv_pmap,

+ ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset);

#endif

pve->pv_pmap->pm_stats.resident_count--;

/* update hint? */

@@ -2452,12 +2313,16 @@ pmap_page_remove(struct vm_page *pg)

listq);

}

+ pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);

pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */

}

pmap_free_pvs(NULL, pg->mdpage.pv_list);

pg->mdpage.pv_list = NULL;

PMAP_HEAD_TO_MAP_UNLOCK();

- pmap_tlb_shootnow(cpumask);

+ pmap_tlb_shootwait();

while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {

TAILQ_REMOVE(&empty_ptps, ptp, listq);

uvm_pagefree(ptp);

@@ -2517,7 +2382,6 @@ pmap_clear_attrs(struct vm_page *pg, int clearbits)

{

struct pv_entry *pve;

pt_entry_t *ptes, npte, opte;

- int32_t cpumask = 0;

u_long clearflags;

int result;

@@ -2543,14 +2407,13 @@ pmap_clear_attrs(struct vm_page *pg, int clearbits)

npte &= ~clearbits;

opte = i386_atomic_testset_ul(

&ptes[atop(pve->pv_va)], npte);

- pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va,

- opte, &cpumask);

+ pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);

}

pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */

}

PMAP_HEAD_TO_MAP_UNLOCK();

- pmap_tlb_shootnow(cpumask);

+ pmap_tlb_shootwait();

return (result != 0);

}

@@ -2587,7 +2450,8 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva,

pt_entry_t *ptes, *spte, *epte, npte;

vaddr_t blockend;

u_int32_t md_prot;

- int32_t cpumask = 0;

+ vaddr_t va;

+ int shootall = 0;

ptes = pmap_map_ptes(pmap); /* locks pmap */

@@ -2595,9 +2459,11 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva,

sva &= PG_FRAME;

eva &= PG_FRAME;

- for (/* null */ ; sva < eva ; sva = blockend) {

+ if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel())

+ shootall = 1;

- blockend = (sva & PD_MASK) + NBPD;

+ for (va = sva; va < eva; va = blockend) {

+ blockend = (va & PD_MASK) + NBPD;

if (blockend > eva)

blockend = eva;

@@ -2611,24 +2477,24 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva,

/* XXXCDC: ugly hack to avoid freeing PDP here */

- if (pdei(sva) == PDSLOT_PTE)

+ if (pdei(va) == PDSLOT_PTE)

continue;

/* empty block? */

- if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))

+ if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)]))

continue;

md_prot = protection_codes[prot];

- if (sva < VM_MAXUSER_ADDRESS)

+ if (va < VM_MAXUSER_ADDRESS)

md_prot |= PG_u;

- else if (sva < VM_MAX_ADDRESS)

+ else if (va < VM_MAX_ADDRESS)

/* XXX: write-prot our PTES? never! */

md_prot |= (PG_u | PG_RW);

- spte = &ptes[atop(sva)];

+ spte = &ptes[atop(va)];

epte = &ptes[atop(blockend)];

- for (/*null */; spte < epte ; spte++, sva += PAGE_SIZE) {

+ for (/*null */; spte < epte ; spte++, va += PAGE_SIZE) {

if (!pmap_valid_entry(*spte)) /* no mapping? */

continue;

@@ -2636,14 +2502,17 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva,

npte = (*spte & ~PG_PROT) | md_prot;

if (npte != *spte) {

- pmap_exec_account(pmap, sva, *spte, npte);

- i386_atomic_testset_ul(spte, npte); /* zap! */

- pmap_tlb_shootdown(pmap, sva, *spte, &cpumask);

+ pmap_exec_account(pmap, va, *spte, npte);

+ i386_atomic_testset_ul(spte, npte);

}

+ if (shootall)

+ pmap_tlb_shoottlb();

+ else

+ pmap_tlb_shootrange(pmap, sva, eva);

- pmap_tlb_shootnow(cpumask);

+ pmap_tlb_shootwait();

pmap_unmap_ptes(pmap); /* unlocks pmap */

}

@@ -2880,8 +2749,6 @@ enter_now:

npte = pa | protection_codes[prot] | PG_V;

pmap_exec_account(pmap, va, opte, npte);

- if (pg != NULL)

- npte |= PG_PVLIST;

if (wired)

npte |= PG_W;

if (va < VM_MAXUSER_ADDRESS)

@@ -2890,20 +2757,20 @@ enter_now:

npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */

if (pmap == pmap_kernel())

npte |= pmap_pg_g;

+ if (flags & VM_PROT_READ)

+ npte |= PG_U;

+ if (flags & VM_PROT_WRITE)

+ npte |= PG_M;

+ if (pg) {

+ npte |= PG_PVLIST;

+ pmap_sync_flags_pte(pg, npte);

+ }

- ptes[atop(va)] = npte; /* zap! */

- if ((opte & ~(PG_M|PG_U)) != npte) {

-#ifdef MULTIPROCESSOR

- int32_t cpumask = 0;

+ opte = i386_atomic_testset_ul(&ptes[atop(va)], npte);

- pmap_tlb_shootdown(pmap, va, opte, &cpumask);

- pmap_tlb_shootnow(cpumask);

-#else

- /* Don't bother deferring in the single CPU case. */

- if (pmap_is_curpmap(pmap))

- pmap_update_pg(va);

-#endif

+ if (opte & PG_V) {

+ pmap_tlb_shootpage(pmap, va);

+ pmap_tlb_shootwait();

}

error = 0;

@@ -3046,284 +2913,201 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)

}

#endif

+#ifdef MULTIPROCESSOR

+/*

+ * Locking for tlb shootdown.

+ *

+ * We lock by setting tlb_shoot_wait to the number of cpus that will

+ * receive our tlb shootdown. After sending the IPIs, we don't need to

+ * worry about locking order or interrupts spinning for the lock because

+ * the call that grabs the "lock" isn't the one that releases it. And

+ * there is nothing that can block the IPI that releases the lock.

+ *

+ * The functions are organized so that we first count the number of

+ * cpus we need to send the IPI to, then we grab the counter, then

+ * we send the IPIs, then we finally do our own shootdown.

+ *

+ * Our shootdown is last to make it parallell with the other cpus

+ * to shorten the spin time.

+ *

+ * Notice that we depend on failures to send IPIs only being able to

+ * happen during boot. If they happen later, the above assumption

+ * doesn't hold since we can end up in situations where noone will

+ * release the lock if we get an interrupt in a bad moment.

+ */

+volatile int tlb_shoot_wait;

-/******************** TLB shootdown code ********************/

+volatile vaddr_t tlb_shoot_addr1;

+volatile vaddr_t tlb_shoot_addr2;

void

-pmap_tlb_shootnow(int32_t cpumask)

+pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)

{

-#ifdef MULTIPROCESSOR

- struct cpu_info *ci, *self;

+ struct cpu_info *ci, *self = curcpu();

CPU_INFO_ITERATOR cii;

- int s;

-#ifdef DIAGNOSTIC

- int count = 0;

-#endif

- if (cpumask == 0)

- return;

-#ifdef MULTIPROCESSOR

- self = curcpu();

- s = splipi();

- self->ci_tlb_ipi_mask = cpumask;

-#endif

+ int wait = 0;

+ int mask = 0;

- pmap_do_tlb_shootdown(0); /* do *our* work. */

-#ifdef MULTIPROCESSOR

- splx(s);

- if (cold)

+ if (cpu_class == CPUCLASS_386) {

+ tlbflush();

return;

+ }

- /*

- * Send the TLB IPI to other CPUs pending shootdowns.

- */

CPU_INFO_FOREACH(cii, ci) {

- if (ci == self)

+ if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) ||

+ !(ci->ci_flags & CPUF_RUNNING))

continue;

- if (cpumask & (1U << ci->ci_cpuid))

- if (i386_send_ipi(ci, I386_IPI_TLB) != 0)

- i386_atomic_clearbits_l(&self->ci_tlb_ipi_mask,

- (1U << ci->ci_cpuid));

+ mask |= 1 << ci->ci_cpuid;

+ wait++;

}

- while (self->ci_tlb_ipi_mask != 0) {

- SPINLOCK_SPIN_HOOK;

-#ifdef DIAGNOSTIC

- if (count++ > 100000000)

- panic("%s: TLB IPI rendezvous failed (mask 0x%x)",

- self->ci_dev.dv_xname, self->ci_tlb_ipi_mask);

-#endif

+ if (wait > 0) {

+ int s = splvm();

+ while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {

+ while (tlb_shoot_wait != 0)

+ SPINLOCK_SPIN_HOOK;

+ }

+ tlb_shoot_addr1 = va;

+ CPU_INFO_FOREACH(cii, ci) {

+ if ((mask & 1 << ci->ci_cpuid) == 0)

+ continue;

+ if (i386_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0)

+ panic("pmap_tlb_shootpage: ipi failed");

+ }

+ splx(s);

}

-#endif

+ if (pmap_is_curpmap(pm))

+ pmap_update_pg(va);

}

-/*

- * pmap_tlb_shootdown:

- *

- * Cause the TLB entry for pmap/va to be shot down.

- */

void

-pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, int32_t *cpumaskp)

+pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva)

{

- struct cpu_info *ci, *self;

- struct pmap_tlb_shootdown_q *pq;

- struct pmap_tlb_shootdown_job *pj;

+ struct cpu_info *ci, *self = curcpu();

CPU_INFO_ITERATOR cii;

- int s;

+ int wait = 0;

+ int mask = 0;

+ vaddr_t va;

- if (pmap_initialized == FALSE) {

- pmap_update_pg(va);

+ if (cpu_class == CPUCLASS_386) {

+ tlbflush();

return;

}

- self = curcpu();

- s = splipi();

-#if 0

- printf("dshootdown %lx\n", va);

-#endif

CPU_INFO_FOREACH(cii, ci) {

- /* Note: we queue shootdown events for ourselves here! */

- if (pmap_is_active(pmap, ci->ci_cpuid) == 0)

+ if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) ||

+ !(ci->ci_flags & CPUF_RUNNING))

continue;

- if (ci != self && !(ci->ci_flags & CPUF_RUNNING))

- continue;

- pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];

- mtx_enter(&pq->pq_mutex);

+ mask |= 1 << ci->ci_cpuid;

+ wait++;

+ }

- /*

- * If there's a global flush already queued, or a

- * non-global flush, and this pte doesn't have the G

- * bit set, don't bother.

- */

- if (pq->pq_flushg > 0 ||

- (pq->pq_flushu > 0 && (pte & pmap_pg_g) == 0)) {

- mtx_leave(&pq->pq_mutex);

- continue;

- }

+ if (wait > 0) {

+ int s = splvm();

-#ifdef I386_CPU

- /*

- * i386 CPUs can't invalidate a single VA, only

- * flush the entire TLB, so don't bother allocating

- * jobs for them -- just queue a `flushu'.

- *

- * XXX note that this can be executed for non-i386

- * when called early (before identifycpu() has set

- * cpu_class)

- */

- if (cpu_class == CPUCLASS_386) {

- pq->pq_flushu++;

- *cpumaskp |= 1U << ci->ci_cpuid;

- mtx_leave(&pq->pq_mutex);

- continue;

+ while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {

+ while (tlb_shoot_wait != 0)

+ SPINLOCK_SPIN_HOOK;

}

-#endif

- pj = pmap_tlb_shootdown_job_get(pq);

- pq->pq_pte |= pte;

- if (pj == NULL) {

- /*

- * Couldn't allocate a job entry.

- * Kill it now for this cpu, unless the failure

- * was due to too many pending flushes; otherwise,

- * tell other cpus to kill everything..

- */

- if (ci == self && pq->pq_count < PMAP_TLB_MAXJOBS) {

- pmap_update_pg(va);

- mtx_leave(&pq->pq_mutex);

+ tlb_shoot_addr1 = sva;

+ tlb_shoot_addr2 = eva;

+ CPU_INFO_FOREACH(cii, ci) {

+ if ((mask & 1 << ci->ci_cpuid) == 0)

continue;

- } else {

- if (pq->pq_pte & pmap_pg_g)

- pq->pq_flushg++;

- else

- pq->pq_flushu++;

- /*

- * Since we've nailed the whole thing,

- * drain the job entries pending for that

- * processor.

- */

- pmap_tlb_shootdown_q_drain(pq);

- *cpumaskp |= 1U << ci->ci_cpuid;

- }

- } else {

- pj->pj_pmap = pmap;

- pj->pj_va = va;

- pj->pj_pte = pte;

- TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);

- *cpumaskp |= 1U << ci->ci_cpuid;

+ if (i386_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0)

+ panic("pmap_tlb_shootrange: ipi failed");

}

- mtx_leave(&pq->pq_mutex);

+ splx(s);

}

- splx(s);

+ if (pmap_is_curpmap(pm))

+ for (va = sva; va < eva; va += PAGE_SIZE)

+ pmap_update_pg(va);

}

-/*

- * pmap_do_tlb_shootdown:

- *

- * Process pending TLB shootdown operations for this processor.

- */

void

-pmap_do_tlb_shootdown(struct cpu_info *self)

+pmap_tlb_shoottlb(void)

{

- u_long cpu_id = cpu_number();

- struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];

- struct pmap_tlb_shootdown_job *pj;

-#ifdef MULTIPROCESSOR

- struct cpu_info *ci;

+ struct cpu_info *ci, *self = curcpu();

CPU_INFO_ITERATOR cii;

-#endif

+ int wait = 0;

+ int mask = 0;

- mtx_enter(&pq->pq_mutex);

+ if (cpu_class == CPUCLASS_386) {

+ tlbflush();

+ return;

+ }

- if (pq->pq_flushg) {

- tlbflushg();

- pq->pq_flushg = 0;

- pq->pq_flushu = 0;

- pmap_tlb_shootdown_q_drain(pq);

- } else {

- /*

- * TLB flushes for PTEs with PG_G set may be in the queue

- * after a flushu, they need to be dealt with.

- */

- if (pq->pq_flushu) {

- tlbflush();

- }

- while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {

- TAILQ_REMOVE(&pq->pq_head, pj, pj_list);

+ CPU_INFO_FOREACH(cii, ci) {

+ if (ci == self || !(ci->ci_flags & CPUF_RUNNING))

+ continue;

+ mask |= 1 << ci->ci_cpuid;

+ wait++;

+ }

- if ((!pq->pq_flushu && pmap_is_curpmap(pj->pj_pmap)) ||

- (pj->pj_pte & pmap_pg_g))

- pmap_update_pg(pj->pj_va);

+ if (wait) {

+ int s = splvm();

- pmap_tlb_shootdown_job_put(pq, pj);

+ while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {

+ while (tlb_shoot_wait != 0)

+ SPINLOCK_SPIN_HOOK;

}

- pq->pq_flushu = pq->pq_pte = 0;

+ CPU_INFO_FOREACH(cii, ci) {

+ if ((mask & 1 << ci->ci_cpuid) == 0)

+ continue;

+ if (i386_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0)

+ panic("pmap_tlb_shoottlb: ipi failed");

+ }

+ splx(s);

}

-#ifdef MULTIPROCESSOR

- CPU_INFO_FOREACH(cii, ci)

- i386_atomic_clearbits_l(&ci->ci_tlb_ipi_mask,

- (1U << cpu_id));

-#endif

- mtx_leave(&pq->pq_mutex);

+ tlbflush();

}

-/*

- * pmap_tlb_shootdown_q_drain:

- *

- * Drain a processor's TLB shootdown queue. We do not perform

- * the shootdown operations. This is merely a convenience

- * function.

- *

- * Note: We expect the queue to be locked.

- */

void

-pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *pq)

+pmap_tlb_shootwait(void)

{

- struct pmap_tlb_shootdown_job *pj;

+ while (tlb_shoot_wait != 0)

+ SPINLOCK_SPIN_HOOK;

- while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {

- TAILQ_REMOVE(&pq->pq_head, pj, pj_list);

- pmap_tlb_shootdown_job_put(pq, pj);

+#else

+void

+pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)

+ if (cpu_class == CPUCLASS_386) {

+ tlbflush();

+ return;

}

- pq->pq_pte = 0;

+ if (pmap_is_curpmap(pm))

+ pmap_update_pg(va);

}

-/*

- * pmap_tlb_shootdown_job_get:

- *

- * Get a TLB shootdown job queue entry. This places a limit on

- * the number of outstanding jobs a processor may have.

- *

- * Note: We expect the queue to be locked.

- */

-struct pmap_tlb_shootdown_job *

-pmap_tlb_shootdown_job_get(struct pmap_tlb_shootdown_q *pq)

+void

+pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva)

{

- struct pmap_tlb_shootdown_job *pj;

+ vaddr_t va;

- if (pq->pq_count >= PMAP_TLB_MAXJOBS)

- return (NULL);

- mtx_enter(&pmap_tlb_shootdown_job_mutex);

- if (pj_free == NULL) {

- mtx_leave(&pmap_tlb_shootdown_job_mutex);

- return NULL;

+ if (cpu_class == CPUCLASS_386) {

+ tlbflush();

+ return;

}

- pj = pj_free;

- pj_free = pj_free->pj_nextfree;

- mtx_leave(&pmap_tlb_shootdown_job_mutex);

- pq->pq_count++;

- return (pj);

+ for (va = sva; va < eva; va += PAGE_SIZE)

+ pmap_update_pg(va);

}

-/*

- * pmap_tlb_shootdown_job_put:

- *

- * Put a TLB shootdown job queue entry onto the free list.

- *

- * Note: We expect the queue to be locked.

- */

void

-pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *pq,

- struct pmap_tlb_shootdown_job *pj)

+pmap_tlb_shoottlb(void)

{

-#ifdef DIAGNOSTIC

- if (pq->pq_count == 0)

- panic("pmap_tlb_shootdown_job_put: queue length inconsistency");

-#endif

- mtx_enter(&pmap_tlb_shootdown_job_mutex);

- pj->pj_nextfree = pj_free;

- pj_free = pj;

- mtx_leave(&pmap_tlb_shootdown_job_mutex);

- pq->pq_count--;

+ tlbflush();

}

+#endif /* MULTIPROCESSOR */

diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c
index b051d39e554..10fb4b1aef2 100644
--- a/sys/arch/i386/i386/vm_machdep.c
+++ b/sys/arch/i386/i386/vm_machdep.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: vm_machdep.c,v 1.50 2007/03/19 15:17:21 art Exp $ */

+/* $OpenBSD: vm_machdep.c,v 1.51 2007/05/25 15:55:26 art Exp $ */

/* $NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $ */

/*-

@@ -220,14 +220,18 @@ pagemove(caddr_t from, caddr_t to, size_t size)

{

pt_entry_t *fpte, *tpte;

pt_entry_t ofpte, otpte;

-#ifdef MULTIPROCESSOR

- u_int32_t cpumask = 0;

-#endif

+ vaddr_t fsva, tsva, feva, teva;

#ifdef DIAGNOSTIC

if ((size & PAGE_MASK) != 0)

panic("pagemove");

#endif

+ fsva = (vaddr_t)from;

+ tsva = (vaddr_t)to;

+ feva = fsva + size;

+ teva = tsva + size;

fpte = kvtopte((vaddr_t)from);

tpte = kvtopte((vaddr_t)to);

while (size > 0) {

@@ -235,38 +239,14 @@ pagemove(caddr_t from, caddr_t to, size_t size)

otpte = *tpte;

*tpte++ = *fpte;

*fpte++ = 0;

-#if defined(I386_CPU) && !defined(MULTIPROCESSOR)

- if (cpu_class != CPUCLASS_386)

-#endif

- {

- if (otpte & PG_V)

-#ifdef MULTIPROCESSOR

- pmap_tlb_shootdown(pmap_kernel(), (vaddr_t)to,

- otpte, &cpumask);

-#else

- pmap_update_pg((vaddr_t)to);

-#endif

- if (ofpte & PG_V)

-#ifdef MULTIPROCESSOR

- pmap_tlb_shootdown(pmap_kernel(),

- (vaddr_t)from, ofpte, &cpumask);

-#else

- pmap_update_pg((vaddr_t)from);

-#endif

- }

from += PAGE_SIZE;

to += PAGE_SIZE;

size -= PAGE_SIZE;

}

-#ifdef MULTIPROCESSOR

- pmap_tlb_shootnow(cpumask);

-#else

-#if defined(I386_CPU)

- if (cpu_class == CPUCLASS_386)

- tlbflush();

-#endif

+ pmap_tlb_shootrange(pmap_kernel(), fsva, feva);

+ pmap_tlb_shootrange(pmap_kernel(), tsva, teva);

+ pmap_tlb_shootwait();

}

diff --git a/sys/arch/i386/include/atomic.h b/sys/arch/i386/include/atomic.h
index 44a7be7f52f..35ea910c8fa 100644
--- a/sys/arch/i386/include/atomic.h
+++ b/sys/arch/i386/include/atomic.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: atomic.h,v 1.5 2007/02/19 17:18:42 deraadt Exp $ */

+/* $OpenBSD: atomic.h,v 1.6 2007/05/25 15:55:27 art Exp $ */

/* $NetBSD: atomic.h,v 1.1.2.2 2000/02/21 18:54:07 sommerfeld Exp $ */

/*-

@@ -92,6 +92,20 @@ i386_atomic_clearbits_l(volatile u_int32_t *ptr, unsigned long bits)

__asm __volatile(LOCK " andl %1,%0" : "=m" (*ptr) : "ir" (bits));

}

+/*

+ * cas = compare and set

+ */

+static __inline int

+i486_atomic_cas_int(volatile u_int *ptr, u_int expect, u_int set)

+ int res;

+ __asm volatile(LOCK " cmpxchgl %2, %1" : "=a" (res), "=m" (*ptr)

+ : "r" (set), "a" (expect), "m" (*ptr) : "memory");

+ return (res);

#define atomic_setbits_int i386_atomic_setbits_l

#define atomic_clearbits_int i386_atomic_clearbits_l

diff --git a/sys/arch/i386/include/i82489var.h b/sys/arch/i386/include/i82489var.h
index 653641bf713..0fe445e41fe 100644
--- a/sys/arch/i386/include/i82489var.h
+++ b/sys/arch/i386/include/i82489var.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: i82489var.h,v 1.4 2007/04/12 20:22:58 art Exp $ */

+/* $OpenBSD: i82489var.h,v 1.5 2007/05/25 15:55:27 art Exp $ */

/* $NetBSD: i82489var.h,v 1.1.2.2 2000/02/21 18:46:14 sommerfeld Exp $ */

/*-

@@ -109,6 +109,14 @@ extern void Xintrltimer(void);

#define LAPIC_IPI_OFFSET 0xf0

#define LAPIC_IPI_AST (LAPIC_IPI_OFFSET + 0)

+#define LAPIC_IPI_INVLTLB (LAPIC_IPI_OFFSET + 1)

+#define LAPIC_IPI_INVLPG (LAPIC_IPI_OFFSET + 2)

+#define LAPIC_IPI_INVLRANGE (LAPIC_IPI_OFFSET + 3)

+extern void Xintripi_ast(void);

+extern void Xintripi_invltlb(void);

+extern void Xintripi_invlpg(void);

+extern void Xintripi_invlrange(void);

extern void Xintrsoftclock(void);

extern void Xintrsoftnet(void);

diff --git a/sys/arch/i386/include/intr.h b/sys/arch/i386/include/intr.h
index eeeb74a605e..0d69c57277a 100644
--- a/sys/arch/i386/include/intr.h
+++ b/sys/arch/i386/include/intr.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: intr.h,v 1.31 2007/05/16 19:37:06 thib Exp $ */

+/* $OpenBSD: intr.h,v 1.32 2007/05/25 15:55:27 art Exp $ */

/* $NetBSD: intr.h,v 1.5 1996/05/13 06:11:28 mycroft Exp $ */

@@ -137,6 +137,7 @@ struct cpu_info;

#ifdef MULTIPROCESSOR

int i386_send_ipi(struct cpu_info *, int);

+int i386_fast_ipi(struct cpu_info *, int);

void i386_broadcast_ipi(int);

void i386_multicast_ipi(int, int);

void i386_ipi_handler(void);

diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h
index 6520a9bbebe..9f0ed360a1a 100644
--- a/sys/arch/i386/include/pmap.h
+++ b/sys/arch/i386/include/pmap.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: pmap.h,v 1.45 2007/04/26 11:31:52 art Exp $ */

+/* $OpenBSD: pmap.h,v 1.46 2007/05/25 15:55:27 art Exp $ */

/* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */

@@ -383,9 +383,14 @@ int pmap_exec_fixup(struct vm_map *, struct trapframe *,

vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */

-void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, int32_t *);

-void pmap_tlb_shootnow(int32_t);

-void pmap_do_tlb_shootdown(struct cpu_info *);

+void pmap_tlb_shootpage(struct pmap *, vaddr_t);

+void pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t);

+void pmap_tlb_shoottlb(void);

+#ifdef MULTIPROCESSOR

+void pmap_tlb_shootwait(void);

+#else

+#define pmap_tlb_shootwait()

+#endif

#define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */