summaryrefslogtreecommitdiff
path: root/sys/arch
diff options
context:
space:
mode:
Diffstat (limited to 'sys/arch')
-rw-r--r--sys/arch/i386/i386/apicvec.s69
-rw-r--r--sys/arch/i386/i386/ipifuncs.c13
-rw-r--r--sys/arch/i386/i386/lapic.c6
-rw-r--r--sys/arch/i386/i386/lock_machdep.c15
-rw-r--r--sys/arch/i386/i386/machdep.c19
-rw-r--r--sys/arch/i386/i386/pmap.c714
-rw-r--r--sys/arch/i386/i386/vm_machdep.c42
-rw-r--r--sys/arch/i386/include/atomic.h16
-rw-r--r--sys/arch/i386/include/i82489var.h10
-rw-r--r--sys/arch/i386/include/intr.h3
-rw-r--r--sys/arch/i386/include/pmap.h13
11 files changed, 385 insertions, 535 deletions
diff --git a/sys/arch/i386/i386/apicvec.s b/sys/arch/i386/i386/apicvec.s
index d6422e9ebf3..da710c4dbe5 100644
--- a/sys/arch/i386/i386/apicvec.s
+++ b/sys/arch/i386/i386/apicvec.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: apicvec.s,v 1.9 2007/04/12 20:22:58 art Exp $ */
+/* $OpenBSD: apicvec.s,v 1.10 2007/05/25 15:55:26 art Exp $ */
/* $NetBSD: apicvec.s,v 1.1.2.2 2000/02/21 21:54:01 sommerfeld Exp $ */
/*-
@@ -86,6 +86,73 @@ XINTR(ipi_ast):
popl %ds
popl %eax
iret
+
+ .globl XINTR(ipi_invltlb)
+ .p2align 4,0x90
+XINTR(ipi_invltlb):
+ pushl %eax
+ pushl %ds
+ movl $GSEL(GDATA_SEL, SEL_KPL), %eax
+ movl %eax, %ds
+
+ ioapic_asm_ack()
+
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ lock
+ decl tlb_shoot_wait
+
+ popl %ds
+ popl %eax
+ iret
+
+ .globl XINTR(ipi_invlpg)
+ .p2align 4,0x90
+XINTR(ipi_invlpg):
+ pushl %eax
+ pushl %ds
+ movl $GSEL(GDATA_SEL, SEL_KPL), %eax
+ movl %eax, %ds
+
+ ioapic_asm_ack()
+
+ movl tlb_shoot_addr1, %eax
+ invlpg (%eax)
+
+ lock
+ decl tlb_shoot_wait
+
+ popl %ds
+ popl %eax
+ iret
+
+ .globl XINTR(ipi_invlrange)
+ .p2align 4,0x90
+XINTR(ipi_invlrange):
+ pushl %eax
+ pushl %edx
+ pushl %ds
+ movl $GSEL(GDATA_SEL, SEL_KPL), %eax
+ movl %eax, %ds
+
+ ioapic_asm_ack()
+
+ movl tlb_shoot_addr1, %eax
+ movl tlb_shoot_addr2, %edx
+1: invlpg (%eax)
+ addl $PAGE_SIZE, %eax
+ cmpl %edx, %eax
+ jb 1b
+
+ lock
+ decl tlb_shoot_wait
+
+ popl %ds
+ popl %edx
+ popl %eax
+ iret
+
#endif
/*
diff --git a/sys/arch/i386/i386/ipifuncs.c b/sys/arch/i386/i386/ipifuncs.c
index e679fcb6a64..711c6cf278d 100644
--- a/sys/arch/i386/i386/ipifuncs.c
+++ b/sys/arch/i386/i386/ipifuncs.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ipifuncs.c,v 1.7 2007/04/21 21:06:14 gwk Exp $ */
+/* $OpenBSD: ipifuncs.c,v 1.8 2007/05/25 15:55:26 art Exp $ */
/* $NetBSD: ipifuncs.c,v 1.1.2.3 2000/06/26 02:04:06 sommerfeld Exp $ */
/*-
@@ -77,7 +77,7 @@ void (*ipifunc[I386_NIPI])(struct cpu_info *) =
i386_ipi_microset,
i386_ipi_flush_fpu,
i386_ipi_synch_fpu,
- pmap_do_tlb_shootdown,
+ NULL,
#if 0
i386_reload_mtrr,
gdt_reload_cpu,
@@ -144,6 +144,15 @@ i386_send_ipi(struct cpu_info *ci, int ipimask)
return ret;
}
+int
+i386_fast_ipi(struct cpu_info *ci, int ipi)
+{
+ if (!(ci->ci_flags & CPUF_RUNNING))
+ return (ENOENT);
+
+ return (i386_ipi(ipi, ci->ci_cpuid, LAPIC_DLMODE_FIXED));
+}
+
void
i386_self_ipi(int vector)
{
diff --git a/sys/arch/i386/i386/lapic.c b/sys/arch/i386/i386/lapic.c
index 2d8d07b0872..42378c42f57 100644
--- a/sys/arch/i386/i386/lapic.c
+++ b/sys/arch/i386/i386/lapic.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: lapic.c,v 1.15 2007/04/12 20:22:58 art Exp $ */
+/* $OpenBSD: lapic.c,v 1.16 2007/05/25 15:55:26 art Exp $ */
/* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */
/*-
@@ -179,7 +179,6 @@ lapic_set_lvt()
void
lapic_boot_init(paddr_t lapic_base)
{
- extern void Xintripi_ast(void);
static int clk_irq = 0;
static int ipi_irq = 0;
@@ -188,6 +187,9 @@ lapic_boot_init(paddr_t lapic_base)
#ifdef MULTIPROCESSOR
idt_vec_set(LAPIC_IPI_VECTOR, Xintripi);
idt_vec_set(LAPIC_IPI_AST, Xintripi_ast);
+ idt_vec_set(LAPIC_IPI_INVLTLB, Xintripi_invltlb);
+ idt_vec_set(LAPIC_IPI_INVLPG, Xintripi_invlpg);
+ idt_vec_set(LAPIC_IPI_INVLRANGE, Xintripi_invlrange);
#endif
idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious);
idt_vec_set(LAPIC_TIMER_VECTOR, Xintrltimer);
diff --git a/sys/arch/i386/i386/lock_machdep.c b/sys/arch/i386/i386/lock_machdep.c
index 3c00a13309e..d18663ca4aa 100644
--- a/sys/arch/i386/i386/lock_machdep.c
+++ b/sys/arch/i386/i386/lock_machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: lock_machdep.c,v 1.4 2007/05/04 12:58:41 art Exp $ */
+/* $OpenBSD: lock_machdep.c,v 1.5 2007/05/25 15:55:26 art Exp $ */
/* $NetBSD: lock_machdep.c,v 1.1.2.3 2000/05/03 14:40:30 sommerfeld Exp $ */
/*-
@@ -142,19 +142,8 @@ rw_cas_386(volatile unsigned long *p, unsigned long o, unsigned long n)
return (0);
}
-#ifdef MULTIPROCESSOR
-#define MPLOCK "lock "
-#else
-#define MPLOCK
-#endif
-
int
rw_cas_486(volatile unsigned long *p, unsigned long o, unsigned long n)
{
- int res;
-
- __asm volatile(MPLOCK " cmpxchgl %2, %1" : "=a" (res), "=m" (*p)
- : "r" (n), "a" (o), "m" (*p) : "memory");
-
- return (res != o);
+ return (i486_atomic_cas_int((u_int *)p, o, n) != o);
}
diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c
index 22306883185..8202008a456 100644
--- a/sys/arch/i386/i386/machdep.c
+++ b/sys/arch/i386/i386/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.389 2007/05/23 20:33:46 pvalchev Exp $ */
+/* $OpenBSD: machdep.c,v 1.390 2007/05/25 15:55:26 art Exp $ */
/* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */
/*-
@@ -2214,7 +2214,7 @@ aston(struct proc *p)
#ifdef MULTIPROCESSOR
if (i386_atomic_testset_i(&p->p_md.md_astpending, 1) == 0 &&
p->p_cpu != curcpu())
- i386_ipi(LAPIC_IPI_AST, p->p_cpu->ci_cpuid, LAPIC_DLMODE_FIXED);
+ i386_fast_ipi(p->p_cpu, LAPIC_IPI_AST);
#else
p->p_md.md_astpending = 1;
#endif
@@ -3585,9 +3585,6 @@ bus_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int cacheable,
vaddr_t va;
pt_entry_t *pte;
bus_size_t map_size;
-#ifdef MULTIPROCESSOR
- u_int32_t cpumask = 0;
-#endif
pa = trunc_page(bpa);
endpa = round_page(bpa + size);
@@ -3620,17 +3617,11 @@ bus_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int cacheable,
*pte &= ~PG_N;
else
*pte |= PG_N;
-#ifdef MULTIPROCESSOR
- pmap_tlb_shootdown(pmap_kernel(), va, *pte,
- &cpumask);
-#else
- pmap_update_pg(va);
-#endif
+ pmap_tlb_shootpage(pmap_kernel(), va);
}
}
-#ifdef MULTIPROCESSOR
- pmap_tlb_shootnow(cpumask);
-#endif
+
+ pmap_tlb_shootwait();
pmap_update(pmap_kernel());
return 0;
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 2a221f4ab80..5fe984296be 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.111 2007/05/20 14:14:09 miod Exp $ */
+/* $OpenBSD: pmap.c,v 1.112 2007/05/25 15:55:26 art Exp $ */
/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */
/*
@@ -213,49 +213,6 @@ struct simplelock pmaps_lock;
#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */
/*
- * TLB Shootdown:
- *
- * When a mapping is changed in a pmap, the TLB entry corresponding to
- * the virtual address must be invalidated on all processors. In order
- * to accomplish this on systems with multiple processors, messages are
- * sent from the processor which performs the mapping change to all
- * processors on which the pmap is active. For other processors, the
- * ASN generation numbers for that processor is invalidated, so that
- * the next time the pmap is activated on that processor, a new ASN
- * will be allocated (which implicitly invalidates all TLB entries).
- *
- * Shootdown job queue entries are allocated using a simple special-
- * purpose allocator for speed.
- */
-struct pmap_tlb_shootdown_job {
- TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
- vaddr_t pj_va; /* virtual address */
- pmap_t pj_pmap; /* the pmap which maps the address */
- pt_entry_t pj_pte; /* the PTE bits */
- struct pmap_tlb_shootdown_job *pj_nextfree;
-};
-
-struct pmap_tlb_shootdown_q {
- TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;
- int pq_pte; /* aggregate PTE bits */
- int pq_count; /* number of pending requests */
- struct mutex pq_mutex; /* mutex on queue */
- int pq_flushg; /* pending flush global */
- int pq_flushu; /* pending flush user */
-} pmap_tlb_shootdown_q[I386_MAXPROCS];
-
-#define PMAP_TLB_MAXJOBS 16
-
-void pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *);
-struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get(
- struct pmap_tlb_shootdown_q *);
-void pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *,
- struct pmap_tlb_shootdown_job *);
-
-struct mutex pmap_tlb_shootdown_job_mutex;
-struct pmap_tlb_shootdown_job *pj_page, *pj_free;
-
-/*
* global data structures
*/
@@ -387,9 +344,9 @@ pt_entry_t *pmap_map_ptes(struct pmap *);
struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t);
void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
- vaddr_t, int32_t *, int);
+ vaddr_t, int);
void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t,
- vaddr_t, vaddr_t, int32_t *, int);
+ vaddr_t, vaddr_t, int);
#define PMAP_REMOVE_ALL 0
#define PMAP_REMOVE_SKIPWIRED 1
@@ -547,33 +504,8 @@ pmap_tmpunmap_pvepte(struct pv_entry *pve)
void
pmap_apte_flush(struct pmap *pmap)
{
-#if defined(MULTIPROCESSOR)
- struct pmap_tlb_shootdown_q *pq;
- struct cpu_info *ci, *self = curcpu();
- CPU_INFO_ITERATOR cii;
-#endif
-
- tlbflush(); /* flush TLB on current processor */
-#if defined(MULTIPROCESSOR)
- /*
- * Flush the APTE mapping from all other CPUs that
- * are using the pmap we are using (who's APTE space
- * is the one we've just modified).
- *
- * XXXthorpej -- find a way to defer the IPI.
- */
- CPU_INFO_FOREACH(cii, ci) {
- if (ci == self)
- continue;
- if (pmap_is_active(pmap, ci->ci_cpuid)) {
- pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
- mtx_enter(&pq->pq_mutex);
- pq->pq_flushu++;
- mtx_leave(&pq->pq_mutex);
- i386_send_ipi(ci, I386_IPI_TLB);
- }
- }
-#endif
+ pmap_tlb_shoottlb();
+ pmap_tlb_shootwait();
}
/*
@@ -651,17 +583,8 @@ pmap_exec_account(struct pmap *pm, vaddr_t va,
pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
return;
- if ((opte ^ npte) & PG_X) {
-#ifdef MULTIPROCESSOR
- int32_t cpumask = 0;
-
- pmap_tlb_shootdown(pm, va, opte, &cpumask);
- pmap_tlb_shootnow(cpumask);
-#else
- /* Don't bother deferring in the single CPU case. */
- pmap_update_pg(va);
-#endif
- }
+ if ((opte ^ npte) & PG_X)
+ pmap_tlb_shootpage(pm, va);
/*
* Executability was removed on the last executable change.
@@ -776,18 +699,13 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
pt_entry_t *pte, opte, npte;
pte = vtopte(va);
- npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V | pmap_pg_g;
+ npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V |
+ pmap_pg_g | PG_U | PG_M;
opte = i386_atomic_testset_ul(pte, npte); /* zap! */
if (pmap_valid_entry(opte)) {
-#ifdef MULTIPROCESSOR
- int32_t cpumask = 0;
-
- pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);
- pmap_tlb_shootnow(cpumask);
-#else
- /* Don't bother deferring in the single CPU case. */
- pmap_update_pg(va);
-#endif
+ /* NB. - this should not happen. */
+ pmap_tlb_shootpage(pmap_kernel(), va);
+ pmap_tlb_shootwait();
}
}
@@ -801,34 +719,23 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
*/
void
-pmap_kremove(vaddr_t va, vsize_t len)
+pmap_kremove(vaddr_t sva, vsize_t len)
{
pt_entry_t *pte, opte;
-#ifdef MULTIPROCESSOR
- int32_t cpumask = 0;
-#endif
+ vaddr_t va, eva;
- len >>= PAGE_SHIFT;
- for ( /* null */ ; len ; len--, va += PAGE_SIZE) {
- if (va < VM_MIN_KERNEL_ADDRESS)
- pte = vtopte(va);
- else
- pte = kvtopte(va);
- opte = i386_atomic_testset_ul(pte, 0); /* zap! */
+ eva = sva + len;
+
+ for (va = sva; va != eva; va += PAGE_SIZE) {
+ pte = kvtopte(va);
+ opte = i386_atomic_testset_ul(pte, 0);
#ifdef DIAGNOSTIC
if (opte & PG_PVLIST)
panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", va);
#endif
- if ((opte & (PG_V | PG_U)) == (PG_V | PG_U))
-#ifdef MULTIPROCESSOR
- pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);
-#else
- pmap_update_pg(va);
-#endif
}
-#ifdef MULTIPROCESSOR
- pmap_tlb_shootnow(cpumask);
-#endif
+ pmap_tlb_shootrange(pmap_kernel(), sva, eva);
+ pmap_tlb_shootwait();
}
/*
@@ -856,7 +763,6 @@ pmap_bootstrap(vaddr_t kva_start)
struct pmap *kpm;
vaddr_t kva;
pt_entry_t *pte;
- int i;
/*
* set the page size (default value is 4K which is ok)
@@ -1024,17 +930,6 @@ pmap_bootstrap(vaddr_t kva_start)
&pool_allocator_nointr);
/*
- * Initialize the TLB shootdown queues.
- */
-
- mtx_init(&pmap_tlb_shootdown_job_mutex, IPL_NONE);
-
- for (i = 0; i < I386_MAXPROCS; i++) {
- TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
- mtx_init(&pmap_tlb_shootdown_q[i].pq_mutex, IPL_IPI);
- }
-
- /*
* ensure the TLB is sync'd with reality by flushing it...
*/
@@ -1050,8 +945,6 @@ pmap_bootstrap(vaddr_t kva_start)
void
pmap_init(void)
{
- int i;
-
/*
* now we need to free enough pv_entry structures to allow us to get
* the kmem_map allocated and inited (done after this function is
@@ -1067,15 +960,6 @@ pmap_init(void)
pv_nfpvents = 0;
(void) pmap_add_pvpage(pv_initpage, FALSE);
- pj_page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE);
- if (pj_page == NULL)
- panic("pmap_init: pj_page");
-
- for (i = 0; i < PAGE_SIZE / sizeof *pj_page - 1; i++)
- pj_page[i].pj_nextfree = &pj_page[i + 1];
- pj_page[i].pj_nextfree = NULL;
- pj_free = &pj_page[0];
-
/*
* done: pmap module is up (and ready for business)
*/
@@ -1482,8 +1366,8 @@ pmap_alloc_ptp(struct pmap *pmap, int pde_index, boolean_t just_try)
/* got one! */
atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);
ptp->wire_count = 1; /* no mappings yet */
- pmap->pm_pdir[pde_index] =
- (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V);
+ pmap->pm_pdir[pde_index] = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_u |
+ PG_RW | PG_V | PG_M | PG_U);
pmap->pm_stats.resident_count++; /* count PTP as resident */
pmap->pm_ptphint = ptp;
return(ptp);
@@ -1955,8 +1839,8 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
#ifdef MULTIPROCESSOR
int id = cpu_number();
#endif
- pt_entry_t *spte = PTESLEW(csrc_pte,id);
- pt_entry_t *dpte = PTESLEW(cdst_pte,id);
+ pt_entry_t *spte = PTESLEW(csrc_pte, id);
+ pt_entry_t *dpte = PTESLEW(cdst_pte, id);
caddr_t csrcva = VASLEW(csrcp, id);
caddr_t cdstva = VASLEW(cdstp, id);
@@ -1971,9 +1855,6 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
bcopy(csrcva, cdstva, PAGE_SIZE);
*spte = *dpte = 0; /* zap! */
pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
-#ifdef MULTIPROCESSOR
- /* Using per-cpu VA; no shootdown required here. */
-#endif
}
/*
@@ -1993,7 +1874,7 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
void
pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
- vaddr_t startva, vaddr_t endva, int32_t *cpumaskp, int flags)
+ vaddr_t startva, vaddr_t endva, int flags)
{
struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */
struct pv_entry *pve;
@@ -2025,16 +1906,8 @@ pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
- if (opte & PG_U)
- pmap_tlb_shootdown(pmap, startva, opte, cpumaskp);
-
- if (ptp) {
+ if (ptp)
ptp->wire_count--; /* dropping a PTE */
- /* Make sure that the PDE is flushed */
- if ((ptp->wire_count <= 1) && !(opte & PG_U))
- pmap_tlb_shootdown(pmap, startva, opte,
- cpumaskp);
- }
/*
* Unnecessary work if not PG_VLIST.
@@ -2087,7 +1960,7 @@ pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
boolean_t
pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
- vaddr_t va, int32_t *cpumaskp, int flags)
+ vaddr_t va, int flags)
{
struct pv_entry *pve;
struct vm_page *pg;
@@ -2108,16 +1981,8 @@ pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
- if (opte & PG_U)
- pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
-
- if (ptp) {
+ if (ptp)
ptp->wire_count--; /* dropping a PTE */
- /* Make sure that the PDE is flushed */
- if ((ptp->wire_count <= 1) && !(opte & PG_U))
- pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
-
- }
pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
@@ -2167,8 +2032,9 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
paddr_t ptppa;
vaddr_t blkendva;
struct vm_page *ptp;
- int32_t cpumask = 0;
TAILQ_HEAD(, vm_page) empty_ptps;
+ int shootall;
+ vaddr_t va;
TAILQ_INIT(&empty_ptps);
@@ -2207,8 +2073,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
}
/* do it! */
- result = pmap_remove_pte(pmap, ptp,
- &ptes[atop(sva)], sva, &cpumask, flags);
+ result = pmap_remove_pte(pmap, ptp, &ptes[atop(sva)],
+ sva, flags);
/*
* if mapping removed and the PTP is no longer
@@ -2216,7 +2082,6 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
*/
if (result && ptp && ptp->wire_count <= 1) {
- /* zap! */
opte = i386_atomic_testset_ul(
&pmap->pm_pdir[pdei(sva)], 0);
#ifdef MULTIPROCESSOR
@@ -2225,9 +2090,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
* here if we're using APTE space.
*/
#endif
- pmap_tlb_shootdown(curpcb->pcb_pmap,
- ((vaddr_t)ptes) + ptp->offset, opte,
- &cpumask);
+ pmap_tlb_shootpage(curpcb->pcb_pmap,
+ ((vaddr_t)ptes) + ptp->offset);
#ifdef MULTIPROCESSOR
/*
* Always shoot down the pmap's self-mapping
@@ -2236,9 +2100,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
* here if pmap == curpcb->pcb_pmap (not APTE
* space).
*/
- pmap_tlb_shootdown(pmap,
- ((vaddr_t)PTE_BASE) + ptp->offset, opte,
- &cpumask);
+ pmap_tlb_shootpage(pmap,
+ ((vaddr_t)PTE_BASE) + ptp->offset);
#endif
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp)
@@ -2249,8 +2112,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
uvm_pagerealloc(ptp, NULL, 0);
TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
}
+ /*
+ * Shoot the tlb after any updates to the PDE.
+ */
+ pmap_tlb_shootpage(pmap, sva);
}
- pmap_tlb_shootnow(cpumask);
+ pmap_tlb_shootwait();
pmap_unmap_ptes(pmap); /* unlock pmap */
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
@@ -2260,10 +2127,19 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
return;
}
- for (/* null */ ; sva < eva ; sva = blkendva) {
+ /*
+ * Decide if we want to shoot the whole tlb or just the range.
+ * Right now, we simply shoot everything when we remove more
+ * than 32 pages, but never in the kernel pmap. XXX - tune.
+ */
+ if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel())
+ shootall = 1;
+ else
+ shootall = 0;
+ for (va = sva ; va < eva ; va = blkendva) {
/* determine range of block */
- blkendva = i386_round_pdr(sva+1);
+ blkendva = i386_round_pdr(va + 1);
if (blkendva > eva)
blkendva = eva;
@@ -2281,16 +2157,16 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
* be VM_MAX_ADDRESS.
*/
- if (pdei(sva) == PDSLOT_PTE)
+ if (pdei(va) == PDSLOT_PTE)
/* XXXCDC: ugly hack to avoid freeing PDP here */
continue;
- if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
+ if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
/* valid block? */
continue;
/* PA of the PTP */
- ptppa = (pmap->pm_pdir[pdei(sva)] & PG_FRAME);
+ ptppa = (pmap->pm_pdir[pdei(va)] & PG_FRAME);
/* get PTP if non-kernel mapping */
if (pmap == pmap_kernel()) {
@@ -2309,22 +2185,21 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
#endif
}
}
- pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(sva)],
- sva, blkendva, &cpumask, flags);
+ pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(va)],
+ va, blkendva, flags);
/* if PTP is no longer being used, free it! */
if (ptp && ptp->wire_count <= 1) {
- /* zap! */
opte = i386_atomic_testset_ul(
- &pmap->pm_pdir[pdei(sva)], 0);
+ &pmap->pm_pdir[pdei(va)], 0);
#if defined(MULTIPROCESSOR)
/*
* XXXthorpej Redundant shootdown can happen here
* if we're using APTE space.
*/
#endif
- pmap_tlb_shootdown(curpcb->pcb_pmap,
- ((vaddr_t)ptes) + ptp->offset, opte, &cpumask);
+ pmap_tlb_shootpage(curpcb->pcb_pmap,
+ ((vaddr_t)ptes) + ptp->offset);
#if defined(MULTIPROCESSOR)
/*
* Always shoot down the pmap's self-mapping
@@ -2332,8 +2207,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
* XXXthorpej Redundant shootdown can happen here
* if pmap == curpcb->pcb_pmap (not APTE space).
*/
- pmap_tlb_shootdown(pmap,
- ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask);
+ pmap_tlb_shootpage(pmap,
+ ((vaddr_t)PTE_BASE) + ptp->offset);
#endif
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp) /* update hint? */
@@ -2345,8 +2220,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
}
}
+ if (!shootall)
+ pmap_tlb_shootrange(pmap, sva, eva);
+ else
+ pmap_tlb_shoottlb();
- pmap_tlb_shootnow(cpumask);
+ pmap_tlb_shootwait();
pmap_unmap_ptes(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
@@ -2366,7 +2245,6 @@ pmap_page_remove(struct vm_page *pg)
{
struct pv_entry *pve;
pt_entry_t *ptes, opte;
- int32_t cpumask = 0;
TAILQ_HEAD(, vm_page) empty_ptps;
struct vm_page *ptp;
@@ -2397,18 +2275,12 @@ pmap_page_remove(struct vm_page *pg)
}
#endif
- opte = ptes[atop(pve->pv_va)];
- ptes[atop(pve->pv_va)] = 0; /* zap! */
+ opte = i386_atomic_testset_ul(&ptes[atop(pve->pv_va)], 0);
if (opte & PG_W)
pve->pv_pmap->pm_stats.wired_count--;
pve->pv_pmap->pm_stats.resident_count--;
- /* Shootdown only if referenced */
- if (opte & PG_U)
- pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte,
- &cpumask);
-
/* sync R/M bits */
pmap_sync_flags_pte(pg, opte);
@@ -2416,29 +2288,18 @@ pmap_page_remove(struct vm_page *pg)
if (pve->pv_ptp) {
pve->pv_ptp->wire_count--;
if (pve->pv_ptp->wire_count <= 1) {
- /*
- * Do we have to shootdown the page just to
- * get the pte out of the TLB ?
- */
- if(!(opte & PG_U))
- pmap_tlb_shootdown(pve->pv_pmap,
- pve->pv_va, opte, &cpumask);
-
- /* zap! */
opte = i386_atomic_testset_ul(
&pve->pv_pmap->pm_pdir[pdei(pve->pv_va)],
0);
- pmap_tlb_shootdown(curpcb->pcb_pmap,
- ((vaddr_t)ptes) + pve->pv_ptp->offset,
- opte, &cpumask);
+ pmap_tlb_shootpage(curpcb->pcb_pmap,
+ ((vaddr_t)ptes) + pve->pv_ptp->offset);
#if defined(MULTIPROCESSOR)
/*
* Always shoot down the other pmap's
* self-mapping of the PTP.
*/
- pmap_tlb_shootdown(pve->pv_pmap,
- ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset,
- opte, &cpumask);
+ pmap_tlb_shootpage(pve->pv_pmap,
+ ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset);
#endif
pve->pv_pmap->pm_stats.resident_count--;
/* update hint? */
@@ -2452,12 +2313,16 @@ pmap_page_remove(struct vm_page *pg)
listq);
}
}
+
+ pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
+
pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
}
pmap_free_pvs(NULL, pg->mdpage.pv_list);
pg->mdpage.pv_list = NULL;
PMAP_HEAD_TO_MAP_UNLOCK();
- pmap_tlb_shootnow(cpumask);
+ pmap_tlb_shootwait();
+
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
@@ -2517,7 +2382,6 @@ pmap_clear_attrs(struct vm_page *pg, int clearbits)
{
struct pv_entry *pve;
pt_entry_t *ptes, npte, opte;
- int32_t cpumask = 0;
u_long clearflags;
int result;
@@ -2543,14 +2407,13 @@ pmap_clear_attrs(struct vm_page *pg, int clearbits)
npte &= ~clearbits;
opte = i386_atomic_testset_ul(
&ptes[atop(pve->pv_va)], npte);
- pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va,
- opte, &cpumask);
+ pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
}
pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
}
PMAP_HEAD_TO_MAP_UNLOCK();
- pmap_tlb_shootnow(cpumask);
+ pmap_tlb_shootwait();
return (result != 0);
}
@@ -2587,7 +2450,8 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva,
pt_entry_t *ptes, *spte, *epte, npte;
vaddr_t blockend;
u_int32_t md_prot;
- int32_t cpumask = 0;
+ vaddr_t va;
+ int shootall = 0;
ptes = pmap_map_ptes(pmap); /* locks pmap */
@@ -2595,9 +2459,11 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva,
sva &= PG_FRAME;
eva &= PG_FRAME;
- for (/* null */ ; sva < eva ; sva = blockend) {
+ if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel())
+ shootall = 1;
- blockend = (sva & PD_MASK) + NBPD;
+ for (va = sva; va < eva; va = blockend) {
+ blockend = (va & PD_MASK) + NBPD;
if (blockend > eva)
blockend = eva;
@@ -2611,24 +2477,24 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva,
*/
/* XXXCDC: ugly hack to avoid freeing PDP here */
- if (pdei(sva) == PDSLOT_PTE)
+ if (pdei(va) == PDSLOT_PTE)
continue;
/* empty block? */
- if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
+ if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
continue;
md_prot = protection_codes[prot];
- if (sva < VM_MAXUSER_ADDRESS)
+ if (va < VM_MAXUSER_ADDRESS)
md_prot |= PG_u;
- else if (sva < VM_MAX_ADDRESS)
+ else if (va < VM_MAX_ADDRESS)
/* XXX: write-prot our PTES? never! */
md_prot |= (PG_u | PG_RW);
- spte = &ptes[atop(sva)];
+ spte = &ptes[atop(va)];
epte = &ptes[atop(blockend)];
- for (/*null */; spte < epte ; spte++, sva += PAGE_SIZE) {
+ for (/*null */; spte < epte ; spte++, va += PAGE_SIZE) {
if (!pmap_valid_entry(*spte)) /* no mapping? */
continue;
@@ -2636,14 +2502,17 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva,
npte = (*spte & ~PG_PROT) | md_prot;
if (npte != *spte) {
- pmap_exec_account(pmap, sva, *spte, npte);
- i386_atomic_testset_ul(spte, npte); /* zap! */
- pmap_tlb_shootdown(pmap, sva, *spte, &cpumask);
+ pmap_exec_account(pmap, va, *spte, npte);
+ i386_atomic_testset_ul(spte, npte);
}
}
}
+ if (shootall)
+ pmap_tlb_shoottlb();
+ else
+ pmap_tlb_shootrange(pmap, sva, eva);
- pmap_tlb_shootnow(cpumask);
+ pmap_tlb_shootwait();
pmap_unmap_ptes(pmap); /* unlocks pmap */
}
@@ -2880,8 +2749,6 @@ enter_now:
npte = pa | protection_codes[prot] | PG_V;
pmap_exec_account(pmap, va, opte, npte);
- if (pg != NULL)
- npte |= PG_PVLIST;
if (wired)
npte |= PG_W;
if (va < VM_MAXUSER_ADDRESS)
@@ -2890,20 +2757,20 @@ enter_now:
npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
if (pmap == pmap_kernel())
npte |= pmap_pg_g;
+ if (flags & VM_PROT_READ)
+ npte |= PG_U;
+ if (flags & VM_PROT_WRITE)
+ npte |= PG_M;
+ if (pg) {
+ npte |= PG_PVLIST;
+ pmap_sync_flags_pte(pg, npte);
+ }
- ptes[atop(va)] = npte; /* zap! */
-
- if ((opte & ~(PG_M|PG_U)) != npte) {
-#ifdef MULTIPROCESSOR
- int32_t cpumask = 0;
+ opte = i386_atomic_testset_ul(&ptes[atop(va)], npte);
- pmap_tlb_shootdown(pmap, va, opte, &cpumask);
- pmap_tlb_shootnow(cpumask);
-#else
- /* Don't bother deferring in the single CPU case. */
- if (pmap_is_curpmap(pmap))
- pmap_update_pg(va);
-#endif
+ if (opte & PG_V) {
+ pmap_tlb_shootpage(pmap, va);
+ pmap_tlb_shootwait();
}
error = 0;
@@ -3046,284 +2913,201 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
}
#endif
+#ifdef MULTIPROCESSOR
+/*
+ * Locking for tlb shootdown.
+ *
+ * We lock by setting tlb_shoot_wait to the number of cpus that will
+ * receive our tlb shootdown. After sending the IPIs, we don't need to
+ * worry about locking order or interrupts spinning for the lock because
+ * the call that grabs the "lock" isn't the one that releases it. And
+ * there is nothing that can block the IPI that releases the lock.
+ *
+ * The functions are organized so that we first count the number of
+ * cpus we need to send the IPI to, then we grab the counter, then
+ * we send the IPIs, then we finally do our own shootdown.
+ *
+ * Our shootdown is last to make it parallell with the other cpus
+ * to shorten the spin time.
+ *
+ * Notice that we depend on failures to send IPIs only being able to
+ * happen during boot. If they happen later, the above assumption
+ * doesn't hold since we can end up in situations where noone will
+ * release the lock if we get an interrupt in a bad moment.
+ */
+
+volatile int tlb_shoot_wait;
-/******************** TLB shootdown code ********************/
+volatile vaddr_t tlb_shoot_addr1;
+volatile vaddr_t tlb_shoot_addr2;
void
-pmap_tlb_shootnow(int32_t cpumask)
+pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)
{
-#ifdef MULTIPROCESSOR
- struct cpu_info *ci, *self;
+ struct cpu_info *ci, *self = curcpu();
CPU_INFO_ITERATOR cii;
- int s;
-#ifdef DIAGNOSTIC
- int count = 0;
-#endif
-#endif
-
- if (cpumask == 0)
- return;
-
-#ifdef MULTIPROCESSOR
- self = curcpu();
- s = splipi();
- self->ci_tlb_ipi_mask = cpumask;
-#endif
+ int wait = 0;
+ int mask = 0;
- pmap_do_tlb_shootdown(0); /* do *our* work. */
-
-#ifdef MULTIPROCESSOR
- splx(s);
-
- if (cold)
+ if (cpu_class == CPUCLASS_386) {
+ tlbflush();
return;
+ }
- /*
- * Send the TLB IPI to other CPUs pending shootdowns.
- */
CPU_INFO_FOREACH(cii, ci) {
- if (ci == self)
+ if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) ||
+ !(ci->ci_flags & CPUF_RUNNING))
continue;
- if (cpumask & (1U << ci->ci_cpuid))
- if (i386_send_ipi(ci, I386_IPI_TLB) != 0)
- i386_atomic_clearbits_l(&self->ci_tlb_ipi_mask,
- (1U << ci->ci_cpuid));
+ mask |= 1 << ci->ci_cpuid;
+ wait++;
}
- while (self->ci_tlb_ipi_mask != 0) {
- SPINLOCK_SPIN_HOOK;
-#ifdef DIAGNOSTIC
- if (count++ > 100000000)
- panic("%s: TLB IPI rendezvous failed (mask 0x%x)",
- self->ci_dev.dv_xname, self->ci_tlb_ipi_mask);
-#endif
+ if (wait > 0) {
+ int s = splvm();
+
+ while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {
+ while (tlb_shoot_wait != 0)
+ SPINLOCK_SPIN_HOOK;
+ }
+ tlb_shoot_addr1 = va;
+ CPU_INFO_FOREACH(cii, ci) {
+ if ((mask & 1 << ci->ci_cpuid) == 0)
+ continue;
+ if (i386_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0)
+ panic("pmap_tlb_shootpage: ipi failed");
+ }
+ splx(s);
}
-#endif
+
+ if (pmap_is_curpmap(pm))
+ pmap_update_pg(va);
}
-/*
- * pmap_tlb_shootdown:
- *
- * Cause the TLB entry for pmap/va to be shot down.
- */
void
-pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, int32_t *cpumaskp)
+pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva)
{
- struct cpu_info *ci, *self;
- struct pmap_tlb_shootdown_q *pq;
- struct pmap_tlb_shootdown_job *pj;
+ struct cpu_info *ci, *self = curcpu();
CPU_INFO_ITERATOR cii;
- int s;
+ int wait = 0;
+ int mask = 0;
+ vaddr_t va;
- if (pmap_initialized == FALSE) {
- pmap_update_pg(va);
+ if (cpu_class == CPUCLASS_386) {
+ tlbflush();
return;
}
- self = curcpu();
-
- s = splipi();
-#if 0
- printf("dshootdown %lx\n", va);
-#endif
-
CPU_INFO_FOREACH(cii, ci) {
- /* Note: we queue shootdown events for ourselves here! */
- if (pmap_is_active(pmap, ci->ci_cpuid) == 0)
+ if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) ||
+ !(ci->ci_flags & CPUF_RUNNING))
continue;
- if (ci != self && !(ci->ci_flags & CPUF_RUNNING))
- continue;
- pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
- mtx_enter(&pq->pq_mutex);
+ mask |= 1 << ci->ci_cpuid;
+ wait++;
+ }
- /*
- * If there's a global flush already queued, or a
- * non-global flush, and this pte doesn't have the G
- * bit set, don't bother.
- */
- if (pq->pq_flushg > 0 ||
- (pq->pq_flushu > 0 && (pte & pmap_pg_g) == 0)) {
- mtx_leave(&pq->pq_mutex);
- continue;
- }
+ if (wait > 0) {
+ int s = splvm();
-#ifdef I386_CPU
- /*
- * i386 CPUs can't invalidate a single VA, only
- * flush the entire TLB, so don't bother allocating
- * jobs for them -- just queue a `flushu'.
- *
- * XXX note that this can be executed for non-i386
- * when called early (before identifycpu() has set
- * cpu_class)
- */
- if (cpu_class == CPUCLASS_386) {
- pq->pq_flushu++;
- *cpumaskp |= 1U << ci->ci_cpuid;
- mtx_leave(&pq->pq_mutex);
- continue;
+ while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {
+ while (tlb_shoot_wait != 0)
+ SPINLOCK_SPIN_HOOK;
}
-#endif
-
- pj = pmap_tlb_shootdown_job_get(pq);
- pq->pq_pte |= pte;
- if (pj == NULL) {
- /*
- * Couldn't allocate a job entry.
- * Kill it now for this cpu, unless the failure
- * was due to too many pending flushes; otherwise,
- * tell other cpus to kill everything..
- */
- if (ci == self && pq->pq_count < PMAP_TLB_MAXJOBS) {
- pmap_update_pg(va);
- mtx_leave(&pq->pq_mutex);
+ tlb_shoot_addr1 = sva;
+ tlb_shoot_addr2 = eva;
+ CPU_INFO_FOREACH(cii, ci) {
+ if ((mask & 1 << ci->ci_cpuid) == 0)
continue;
- } else {
- if (pq->pq_pte & pmap_pg_g)
- pq->pq_flushg++;
- else
- pq->pq_flushu++;
- /*
- * Since we've nailed the whole thing,
- * drain the job entries pending for that
- * processor.
- */
- pmap_tlb_shootdown_q_drain(pq);
- *cpumaskp |= 1U << ci->ci_cpuid;
- }
- } else {
- pj->pj_pmap = pmap;
- pj->pj_va = va;
- pj->pj_pte = pte;
- TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
- *cpumaskp |= 1U << ci->ci_cpuid;
+ if (i386_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0)
+ panic("pmap_tlb_shootrange: ipi failed");
}
- mtx_leave(&pq->pq_mutex);
+ splx(s);
}
- splx(s);
+
+ if (pmap_is_curpmap(pm))
+ for (va = sva; va < eva; va += PAGE_SIZE)
+ pmap_update_pg(va);
}
-/*
- * pmap_do_tlb_shootdown:
- *
- * Process pending TLB shootdown operations for this processor.
- */
void
-pmap_do_tlb_shootdown(struct cpu_info *self)
+pmap_tlb_shoottlb(void)
{
- u_long cpu_id = cpu_number();
- struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
- struct pmap_tlb_shootdown_job *pj;
-#ifdef MULTIPROCESSOR
- struct cpu_info *ci;
+ struct cpu_info *ci, *self = curcpu();
CPU_INFO_ITERATOR cii;
-#endif
+ int wait = 0;
+ int mask = 0;
- mtx_enter(&pq->pq_mutex);
+ if (cpu_class == CPUCLASS_386) {
+ tlbflush();
+ return;
+ }
- if (pq->pq_flushg) {
- tlbflushg();
- pq->pq_flushg = 0;
- pq->pq_flushu = 0;
- pmap_tlb_shootdown_q_drain(pq);
- } else {
- /*
- * TLB flushes for PTEs with PG_G set may be in the queue
- * after a flushu, they need to be dealt with.
- */
- if (pq->pq_flushu) {
- tlbflush();
- }
- while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
- TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
+ CPU_INFO_FOREACH(cii, ci) {
+ if (ci == self || !(ci->ci_flags & CPUF_RUNNING))
+ continue;
+ mask |= 1 << ci->ci_cpuid;
+ wait++;
+ }
- if ((!pq->pq_flushu && pmap_is_curpmap(pj->pj_pmap)) ||
- (pj->pj_pte & pmap_pg_g))
- pmap_update_pg(pj->pj_va);
+ if (wait) {
+ int s = splvm();
- pmap_tlb_shootdown_job_put(pq, pj);
+ while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {
+ while (tlb_shoot_wait != 0)
+ SPINLOCK_SPIN_HOOK;
}
- pq->pq_flushu = pq->pq_pte = 0;
+ CPU_INFO_FOREACH(cii, ci) {
+ if ((mask & 1 << ci->ci_cpuid) == 0)
+ continue;
+ if (i386_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0)
+ panic("pmap_tlb_shoottlb: ipi failed");
+ }
+ splx(s);
}
-#ifdef MULTIPROCESSOR
- CPU_INFO_FOREACH(cii, ci)
- i386_atomic_clearbits_l(&ci->ci_tlb_ipi_mask,
- (1U << cpu_id));
-#endif
- mtx_leave(&pq->pq_mutex);
+ tlbflush();
}
-/*
- * pmap_tlb_shootdown_q_drain:
- *
- * Drain a processor's TLB shootdown queue. We do not perform
- * the shootdown operations. This is merely a convenience
- * function.
- *
- * Note: We expect the queue to be locked.
- */
void
-pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *pq)
+pmap_tlb_shootwait(void)
{
- struct pmap_tlb_shootdown_job *pj;
+ while (tlb_shoot_wait != 0)
+ SPINLOCK_SPIN_HOOK;
+}
- while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
- TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
- pmap_tlb_shootdown_job_put(pq, pj);
+#else
+
+void
+pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)
+{
+ if (cpu_class == CPUCLASS_386) {
+ tlbflush();
+ return;
}
- pq->pq_pte = 0;
+
+ if (pmap_is_curpmap(pm))
+ pmap_update_pg(va);
+
}
-/*
- * pmap_tlb_shootdown_job_get:
- *
- * Get a TLB shootdown job queue entry. This places a limit on
- * the number of outstanding jobs a processor may have.
- *
- * Note: We expect the queue to be locked.
- */
-struct pmap_tlb_shootdown_job *
-pmap_tlb_shootdown_job_get(struct pmap_tlb_shootdown_q *pq)
+void
+pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva)
{
- struct pmap_tlb_shootdown_job *pj;
+ vaddr_t va;
- if (pq->pq_count >= PMAP_TLB_MAXJOBS)
- return (NULL);
-
- mtx_enter(&pmap_tlb_shootdown_job_mutex);
- if (pj_free == NULL) {
- mtx_leave(&pmap_tlb_shootdown_job_mutex);
- return NULL;
+ if (cpu_class == CPUCLASS_386) {
+ tlbflush();
+ return;
}
- pj = pj_free;
- pj_free = pj_free->pj_nextfree;
- mtx_leave(&pmap_tlb_shootdown_job_mutex);
- pq->pq_count++;
- return (pj);
+ for (va = sva; va < eva; va += PAGE_SIZE)
+ pmap_update_pg(va);
+
}
-/*
- * pmap_tlb_shootdown_job_put:
- *
- * Put a TLB shootdown job queue entry onto the free list.
- *
- * Note: We expect the queue to be locked.
- */
void
-pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *pq,
- struct pmap_tlb_shootdown_job *pj)
+pmap_tlb_shoottlb(void)
{
-#ifdef DIAGNOSTIC
- if (pq->pq_count == 0)
- panic("pmap_tlb_shootdown_job_put: queue length inconsistency");
-#endif
- mtx_enter(&pmap_tlb_shootdown_job_mutex);
- pj->pj_nextfree = pj_free;
- pj_free = pj;
- mtx_leave(&pmap_tlb_shootdown_job_mutex);
-
- pq->pq_count--;
+ tlbflush();
}
+#endif /* MULTIPROCESSOR */
diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c
index b051d39e554..10fb4b1aef2 100644
--- a/sys/arch/i386/i386/vm_machdep.c
+++ b/sys/arch/i386/i386/vm_machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vm_machdep.c,v 1.50 2007/03/19 15:17:21 art Exp $ */
+/* $OpenBSD: vm_machdep.c,v 1.51 2007/05/25 15:55:26 art Exp $ */
/* $NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $ */
/*-
@@ -220,14 +220,18 @@ pagemove(caddr_t from, caddr_t to, size_t size)
{
pt_entry_t *fpte, *tpte;
pt_entry_t ofpte, otpte;
-#ifdef MULTIPROCESSOR
- u_int32_t cpumask = 0;
-#endif
+ vaddr_t fsva, tsva, feva, teva;
#ifdef DIAGNOSTIC
if ((size & PAGE_MASK) != 0)
panic("pagemove");
#endif
+
+ fsva = (vaddr_t)from;
+ tsva = (vaddr_t)to;
+ feva = fsva + size;
+ teva = tsva + size;
+
fpte = kvtopte((vaddr_t)from);
tpte = kvtopte((vaddr_t)to);
while (size > 0) {
@@ -235,38 +239,14 @@ pagemove(caddr_t from, caddr_t to, size_t size)
otpte = *tpte;
*tpte++ = *fpte;
*fpte++ = 0;
-#if defined(I386_CPU) && !defined(MULTIPROCESSOR)
- if (cpu_class != CPUCLASS_386)
-#endif
- {
- if (otpte & PG_V)
-#ifdef MULTIPROCESSOR
- pmap_tlb_shootdown(pmap_kernel(), (vaddr_t)to,
- otpte, &cpumask);
-#else
- pmap_update_pg((vaddr_t)to);
-#endif
- if (ofpte & PG_V)
-#ifdef MULTIPROCESSOR
- pmap_tlb_shootdown(pmap_kernel(),
- (vaddr_t)from, ofpte, &cpumask);
-#else
- pmap_update_pg((vaddr_t)from);
-#endif
- }
from += PAGE_SIZE;
to += PAGE_SIZE;
size -= PAGE_SIZE;
}
-#ifdef MULTIPROCESSOR
- pmap_tlb_shootnow(cpumask);
-#else
-#if defined(I386_CPU)
- if (cpu_class == CPUCLASS_386)
- tlbflush();
-#endif
-#endif
+ pmap_tlb_shootrange(pmap_kernel(), fsva, feva);
+ pmap_tlb_shootrange(pmap_kernel(), tsva, teva);
+ pmap_tlb_shootwait();
}
/*
diff --git a/sys/arch/i386/include/atomic.h b/sys/arch/i386/include/atomic.h
index 44a7be7f52f..35ea910c8fa 100644
--- a/sys/arch/i386/include/atomic.h
+++ b/sys/arch/i386/include/atomic.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: atomic.h,v 1.5 2007/02/19 17:18:42 deraadt Exp $ */
+/* $OpenBSD: atomic.h,v 1.6 2007/05/25 15:55:27 art Exp $ */
/* $NetBSD: atomic.h,v 1.1.2.2 2000/02/21 18:54:07 sommerfeld Exp $ */
/*-
@@ -92,6 +92,20 @@ i386_atomic_clearbits_l(volatile u_int32_t *ptr, unsigned long bits)
__asm __volatile(LOCK " andl %1,%0" : "=m" (*ptr) : "ir" (bits));
}
+/*
+ * cas = compare and set
+ */
+static __inline int
+i486_atomic_cas_int(volatile u_int *ptr, u_int expect, u_int set)
+{
+ int res;
+
+ __asm volatile(LOCK " cmpxchgl %2, %1" : "=a" (res), "=m" (*ptr)
+ : "r" (set), "a" (expect), "m" (*ptr) : "memory");
+
+ return (res);
+}
+
#define atomic_setbits_int i386_atomic_setbits_l
#define atomic_clearbits_int i386_atomic_clearbits_l
diff --git a/sys/arch/i386/include/i82489var.h b/sys/arch/i386/include/i82489var.h
index 653641bf713..0fe445e41fe 100644
--- a/sys/arch/i386/include/i82489var.h
+++ b/sys/arch/i386/include/i82489var.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: i82489var.h,v 1.4 2007/04/12 20:22:58 art Exp $ */
+/* $OpenBSD: i82489var.h,v 1.5 2007/05/25 15:55:27 art Exp $ */
/* $NetBSD: i82489var.h,v 1.1.2.2 2000/02/21 18:46:14 sommerfeld Exp $ */
/*-
@@ -109,6 +109,14 @@ extern void Xintrltimer(void);
*/
#define LAPIC_IPI_OFFSET 0xf0
#define LAPIC_IPI_AST (LAPIC_IPI_OFFSET + 0)
+#define LAPIC_IPI_INVLTLB (LAPIC_IPI_OFFSET + 1)
+#define LAPIC_IPI_INVLPG (LAPIC_IPI_OFFSET + 2)
+#define LAPIC_IPI_INVLRANGE (LAPIC_IPI_OFFSET + 3)
+
+extern void Xintripi_ast(void);
+extern void Xintripi_invltlb(void);
+extern void Xintripi_invlpg(void);
+extern void Xintripi_invlrange(void);
extern void Xintrsoftclock(void);
extern void Xintrsoftnet(void);
diff --git a/sys/arch/i386/include/intr.h b/sys/arch/i386/include/intr.h
index eeeb74a605e..0d69c57277a 100644
--- a/sys/arch/i386/include/intr.h
+++ b/sys/arch/i386/include/intr.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: intr.h,v 1.31 2007/05/16 19:37:06 thib Exp $ */
+/* $OpenBSD: intr.h,v 1.32 2007/05/25 15:55:27 art Exp $ */
/* $NetBSD: intr.h,v 1.5 1996/05/13 06:11:28 mycroft Exp $ */
/*
@@ -137,6 +137,7 @@ struct cpu_info;
#ifdef MULTIPROCESSOR
int i386_send_ipi(struct cpu_info *, int);
+int i386_fast_ipi(struct cpu_info *, int);
void i386_broadcast_ipi(int);
void i386_multicast_ipi(int, int);
void i386_ipi_handler(void);
diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h
index 6520a9bbebe..9f0ed360a1a 100644
--- a/sys/arch/i386/include/pmap.h
+++ b/sys/arch/i386/include/pmap.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.h,v 1.45 2007/04/26 11:31:52 art Exp $ */
+/* $OpenBSD: pmap.h,v 1.46 2007/05/25 15:55:27 art Exp $ */
/* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */
/*
@@ -383,9 +383,14 @@ int pmap_exec_fixup(struct vm_map *, struct trapframe *,
vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */
-void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, int32_t *);
-void pmap_tlb_shootnow(int32_t);
-void pmap_do_tlb_shootdown(struct cpu_info *);
+void pmap_tlb_shootpage(struct pmap *, vaddr_t);
+void pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t);
+void pmap_tlb_shoottlb(void);
+#ifdef MULTIPROCESSOR
+void pmap_tlb_shootwait(void);
+#else
+#define pmap_tlb_shootwait()
+#endif
#define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */