diff options
-rw-r--r-- | sys/arch/i386/i386/apicvec.s | 69 | ||||
-rw-r--r-- | sys/arch/i386/i386/ipifuncs.c | 13 | ||||
-rw-r--r-- | sys/arch/i386/i386/lapic.c | 6 | ||||
-rw-r--r-- | sys/arch/i386/i386/lock_machdep.c | 15 | ||||
-rw-r--r-- | sys/arch/i386/i386/machdep.c | 19 | ||||
-rw-r--r-- | sys/arch/i386/i386/pmap.c | 714 | ||||
-rw-r--r-- | sys/arch/i386/i386/vm_machdep.c | 42 | ||||
-rw-r--r-- | sys/arch/i386/include/atomic.h | 16 | ||||
-rw-r--r-- | sys/arch/i386/include/i82489var.h | 10 | ||||
-rw-r--r-- | sys/arch/i386/include/intr.h | 3 | ||||
-rw-r--r-- | sys/arch/i386/include/pmap.h | 13 |
11 files changed, 385 insertions, 535 deletions
diff --git a/sys/arch/i386/i386/apicvec.s b/sys/arch/i386/i386/apicvec.s index d6422e9ebf3..da710c4dbe5 100644 --- a/sys/arch/i386/i386/apicvec.s +++ b/sys/arch/i386/i386/apicvec.s @@ -1,4 +1,4 @@ -/* $OpenBSD: apicvec.s,v 1.9 2007/04/12 20:22:58 art Exp $ */ +/* $OpenBSD: apicvec.s,v 1.10 2007/05/25 15:55:26 art Exp $ */ /* $NetBSD: apicvec.s,v 1.1.2.2 2000/02/21 21:54:01 sommerfeld Exp $ */ /*- @@ -86,6 +86,73 @@ XINTR(ipi_ast): popl %ds popl %eax iret + + .globl XINTR(ipi_invltlb) + .p2align 4,0x90 +XINTR(ipi_invltlb): + pushl %eax + pushl %ds + movl $GSEL(GDATA_SEL, SEL_KPL), %eax + movl %eax, %ds + + ioapic_asm_ack() + + movl %cr3, %eax + movl %eax, %cr3 + + lock + decl tlb_shoot_wait + + popl %ds + popl %eax + iret + + .globl XINTR(ipi_invlpg) + .p2align 4,0x90 +XINTR(ipi_invlpg): + pushl %eax + pushl %ds + movl $GSEL(GDATA_SEL, SEL_KPL), %eax + movl %eax, %ds + + ioapic_asm_ack() + + movl tlb_shoot_addr1, %eax + invlpg (%eax) + + lock + decl tlb_shoot_wait + + popl %ds + popl %eax + iret + + .globl XINTR(ipi_invlrange) + .p2align 4,0x90 +XINTR(ipi_invlrange): + pushl %eax + pushl %edx + pushl %ds + movl $GSEL(GDATA_SEL, SEL_KPL), %eax + movl %eax, %ds + + ioapic_asm_ack() + + movl tlb_shoot_addr1, %eax + movl tlb_shoot_addr2, %edx +1: invlpg (%eax) + addl $PAGE_SIZE, %eax + cmpl %edx, %eax + jb 1b + + lock + decl tlb_shoot_wait + + popl %ds + popl %edx + popl %eax + iret + #endif /* diff --git a/sys/arch/i386/i386/ipifuncs.c b/sys/arch/i386/i386/ipifuncs.c index e679fcb6a64..711c6cf278d 100644 --- a/sys/arch/i386/i386/ipifuncs.c +++ b/sys/arch/i386/i386/ipifuncs.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ipifuncs.c,v 1.7 2007/04/21 21:06:14 gwk Exp $ */ +/* $OpenBSD: ipifuncs.c,v 1.8 2007/05/25 15:55:26 art Exp $ */ /* $NetBSD: ipifuncs.c,v 1.1.2.3 2000/06/26 02:04:06 sommerfeld Exp $ */ /*- @@ -77,7 +77,7 @@ void (*ipifunc[I386_NIPI])(struct cpu_info *) = i386_ipi_microset, i386_ipi_flush_fpu, i386_ipi_synch_fpu, - pmap_do_tlb_shootdown, + NULL, #if 0 i386_reload_mtrr, gdt_reload_cpu, @@ -144,6 +144,15 @@ i386_send_ipi(struct cpu_info *ci, int ipimask) return ret; } +int +i386_fast_ipi(struct cpu_info *ci, int ipi) +{ + if (!(ci->ci_flags & CPUF_RUNNING)) + return (ENOENT); + + return (i386_ipi(ipi, ci->ci_cpuid, LAPIC_DLMODE_FIXED)); +} + void i386_self_ipi(int vector) { diff --git a/sys/arch/i386/i386/lapic.c b/sys/arch/i386/i386/lapic.c index 2d8d07b0872..42378c42f57 100644 --- a/sys/arch/i386/i386/lapic.c +++ b/sys/arch/i386/i386/lapic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lapic.c,v 1.15 2007/04/12 20:22:58 art Exp $ */ +/* $OpenBSD: lapic.c,v 1.16 2007/05/25 15:55:26 art Exp $ */ /* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */ /*- @@ -179,7 +179,6 @@ lapic_set_lvt() void lapic_boot_init(paddr_t lapic_base) { - extern void Xintripi_ast(void); static int clk_irq = 0; static int ipi_irq = 0; @@ -188,6 +187,9 @@ lapic_boot_init(paddr_t lapic_base) #ifdef MULTIPROCESSOR idt_vec_set(LAPIC_IPI_VECTOR, Xintripi); idt_vec_set(LAPIC_IPI_AST, Xintripi_ast); + idt_vec_set(LAPIC_IPI_INVLTLB, Xintripi_invltlb); + idt_vec_set(LAPIC_IPI_INVLPG, Xintripi_invlpg); + idt_vec_set(LAPIC_IPI_INVLRANGE, Xintripi_invlrange); #endif idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious); idt_vec_set(LAPIC_TIMER_VECTOR, Xintrltimer); diff --git a/sys/arch/i386/i386/lock_machdep.c b/sys/arch/i386/i386/lock_machdep.c index 3c00a13309e..d18663ca4aa 100644 --- a/sys/arch/i386/i386/lock_machdep.c +++ b/sys/arch/i386/i386/lock_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lock_machdep.c,v 1.4 2007/05/04 12:58:41 art Exp $ */ +/* $OpenBSD: lock_machdep.c,v 1.5 2007/05/25 15:55:26 art Exp $ */ /* $NetBSD: lock_machdep.c,v 1.1.2.3 2000/05/03 14:40:30 sommerfeld Exp $ */ /*- @@ -142,19 +142,8 @@ rw_cas_386(volatile unsigned long *p, unsigned long o, unsigned long n) return (0); } -#ifdef MULTIPROCESSOR -#define MPLOCK "lock " -#else -#define MPLOCK -#endif - int rw_cas_486(volatile unsigned long *p, unsigned long o, unsigned long n) { - int res; - - __asm volatile(MPLOCK " cmpxchgl %2, %1" : "=a" (res), "=m" (*p) - : "r" (n), "a" (o), "m" (*p) : "memory"); - - return (res != o); + return (i486_atomic_cas_int((u_int *)p, o, n) != o); } diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index 22306883185..8202008a456 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.389 2007/05/23 20:33:46 pvalchev Exp $ */ +/* $OpenBSD: machdep.c,v 1.390 2007/05/25 15:55:26 art Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -2214,7 +2214,7 @@ aston(struct proc *p) #ifdef MULTIPROCESSOR if (i386_atomic_testset_i(&p->p_md.md_astpending, 1) == 0 && p->p_cpu != curcpu()) - i386_ipi(LAPIC_IPI_AST, p->p_cpu->ci_cpuid, LAPIC_DLMODE_FIXED); + i386_fast_ipi(p->p_cpu, LAPIC_IPI_AST); #else p->p_md.md_astpending = 1; #endif @@ -3585,9 +3585,6 @@ bus_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int cacheable, vaddr_t va; pt_entry_t *pte; bus_size_t map_size; -#ifdef MULTIPROCESSOR - u_int32_t cpumask = 0; -#endif pa = trunc_page(bpa); endpa = round_page(bpa + size); @@ -3620,17 +3617,11 @@ bus_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int cacheable, *pte &= ~PG_N; else *pte |= PG_N; -#ifdef MULTIPROCESSOR - pmap_tlb_shootdown(pmap_kernel(), va, *pte, - &cpumask); -#else - pmap_update_pg(va); -#endif + pmap_tlb_shootpage(pmap_kernel(), va); } } -#ifdef MULTIPROCESSOR - pmap_tlb_shootnow(cpumask); -#endif + + pmap_tlb_shootwait(); pmap_update(pmap_kernel()); return 0; diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index 2a221f4ab80..5fe984296be 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.111 2007/05/20 14:14:09 miod Exp $ */ +/* $OpenBSD: pmap.c,v 1.112 2007/05/25 15:55:26 art Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -213,49 +213,6 @@ struct simplelock pmaps_lock; #define PMAP_HEAD_TO_MAP_UNLOCK() /* null */ /* - * TLB Shootdown: - * - * When a mapping is changed in a pmap, the TLB entry corresponding to - * the virtual address must be invalidated on all processors. In order - * to accomplish this on systems with multiple processors, messages are - * sent from the processor which performs the mapping change to all - * processors on which the pmap is active. For other processors, the - * ASN generation numbers for that processor is invalidated, so that - * the next time the pmap is activated on that processor, a new ASN - * will be allocated (which implicitly invalidates all TLB entries). - * - * Shootdown job queue entries are allocated using a simple special- - * purpose allocator for speed. - */ -struct pmap_tlb_shootdown_job { - TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; - vaddr_t pj_va; /* virtual address */ - pmap_t pj_pmap; /* the pmap which maps the address */ - pt_entry_t pj_pte; /* the PTE bits */ - struct pmap_tlb_shootdown_job *pj_nextfree; -}; - -struct pmap_tlb_shootdown_q { - TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; - int pq_pte; /* aggregate PTE bits */ - int pq_count; /* number of pending requests */ - struct mutex pq_mutex; /* mutex on queue */ - int pq_flushg; /* pending flush global */ - int pq_flushu; /* pending flush user */ -} pmap_tlb_shootdown_q[I386_MAXPROCS]; - -#define PMAP_TLB_MAXJOBS 16 - -void pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *); -struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get( - struct pmap_tlb_shootdown_q *); -void pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *, - struct pmap_tlb_shootdown_job *); - -struct mutex pmap_tlb_shootdown_job_mutex; -struct pmap_tlb_shootdown_job *pj_page, *pj_free; - -/* * global data structures */ @@ -387,9 +344,9 @@ pt_entry_t *pmap_map_ptes(struct pmap *); struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t); void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int); boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, - vaddr_t, int32_t *, int); + vaddr_t, int); void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, - vaddr_t, vaddr_t, int32_t *, int); + vaddr_t, vaddr_t, int); #define PMAP_REMOVE_ALL 0 #define PMAP_REMOVE_SKIPWIRED 1 @@ -547,33 +504,8 @@ pmap_tmpunmap_pvepte(struct pv_entry *pve) void pmap_apte_flush(struct pmap *pmap) { -#if defined(MULTIPROCESSOR) - struct pmap_tlb_shootdown_q *pq; - struct cpu_info *ci, *self = curcpu(); - CPU_INFO_ITERATOR cii; -#endif - - tlbflush(); /* flush TLB on current processor */ -#if defined(MULTIPROCESSOR) - /* - * Flush the APTE mapping from all other CPUs that - * are using the pmap we are using (who's APTE space - * is the one we've just modified). - * - * XXXthorpej -- find a way to defer the IPI. - */ - CPU_INFO_FOREACH(cii, ci) { - if (ci == self) - continue; - if (pmap_is_active(pmap, ci->ci_cpuid)) { - pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; - mtx_enter(&pq->pq_mutex); - pq->pq_flushu++; - mtx_leave(&pq->pq_mutex); - i386_send_ipi(ci, I386_IPI_TLB); - } - } -#endif + pmap_tlb_shoottlb(); + pmap_tlb_shootwait(); } /* @@ -651,17 +583,8 @@ pmap_exec_account(struct pmap *pm, vaddr_t va, pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) return; - if ((opte ^ npte) & PG_X) { -#ifdef MULTIPROCESSOR - int32_t cpumask = 0; - - pmap_tlb_shootdown(pm, va, opte, &cpumask); - pmap_tlb_shootnow(cpumask); -#else - /* Don't bother deferring in the single CPU case. */ - pmap_update_pg(va); -#endif - } + if ((opte ^ npte) & PG_X) + pmap_tlb_shootpage(pm, va); /* * Executability was removed on the last executable change. @@ -776,18 +699,13 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) pt_entry_t *pte, opte, npte; pte = vtopte(va); - npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V | pmap_pg_g; + npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V | + pmap_pg_g | PG_U | PG_M; opte = i386_atomic_testset_ul(pte, npte); /* zap! */ if (pmap_valid_entry(opte)) { -#ifdef MULTIPROCESSOR - int32_t cpumask = 0; - - pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); - pmap_tlb_shootnow(cpumask); -#else - /* Don't bother deferring in the single CPU case. */ - pmap_update_pg(va); -#endif + /* NB. - this should not happen. */ + pmap_tlb_shootpage(pmap_kernel(), va); + pmap_tlb_shootwait(); } } @@ -801,34 +719,23 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) */ void -pmap_kremove(vaddr_t va, vsize_t len) +pmap_kremove(vaddr_t sva, vsize_t len) { pt_entry_t *pte, opte; -#ifdef MULTIPROCESSOR - int32_t cpumask = 0; -#endif + vaddr_t va, eva; - len >>= PAGE_SHIFT; - for ( /* null */ ; len ; len--, va += PAGE_SIZE) { - if (va < VM_MIN_KERNEL_ADDRESS) - pte = vtopte(va); - else - pte = kvtopte(va); - opte = i386_atomic_testset_ul(pte, 0); /* zap! */ + eva = sva + len; + + for (va = sva; va != eva; va += PAGE_SIZE) { + pte = kvtopte(va); + opte = i386_atomic_testset_ul(pte, 0); #ifdef DIAGNOSTIC if (opte & PG_PVLIST) panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", va); #endif - if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) -#ifdef MULTIPROCESSOR - pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); -#else - pmap_update_pg(va); -#endif } -#ifdef MULTIPROCESSOR - pmap_tlb_shootnow(cpumask); -#endif + pmap_tlb_shootrange(pmap_kernel(), sva, eva); + pmap_tlb_shootwait(); } /* @@ -856,7 +763,6 @@ pmap_bootstrap(vaddr_t kva_start) struct pmap *kpm; vaddr_t kva; pt_entry_t *pte; - int i; /* * set the page size (default value is 4K which is ok) @@ -1024,17 +930,6 @@ pmap_bootstrap(vaddr_t kva_start) &pool_allocator_nointr); /* - * Initialize the TLB shootdown queues. - */ - - mtx_init(&pmap_tlb_shootdown_job_mutex, IPL_NONE); - - for (i = 0; i < I386_MAXPROCS; i++) { - TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head); - mtx_init(&pmap_tlb_shootdown_q[i].pq_mutex, IPL_IPI); - } - - /* * ensure the TLB is sync'd with reality by flushing it... */ @@ -1050,8 +945,6 @@ pmap_bootstrap(vaddr_t kva_start) void pmap_init(void) { - int i; - /* * now we need to free enough pv_entry structures to allow us to get * the kmem_map allocated and inited (done after this function is @@ -1067,15 +960,6 @@ pmap_init(void) pv_nfpvents = 0; (void) pmap_add_pvpage(pv_initpage, FALSE); - pj_page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE); - if (pj_page == NULL) - panic("pmap_init: pj_page"); - - for (i = 0; i < PAGE_SIZE / sizeof *pj_page - 1; i++) - pj_page[i].pj_nextfree = &pj_page[i + 1]; - pj_page[i].pj_nextfree = NULL; - pj_free = &pj_page[0]; - /* * done: pmap module is up (and ready for business) */ @@ -1482,8 +1366,8 @@ pmap_alloc_ptp(struct pmap *pmap, int pde_index, boolean_t just_try) /* got one! */ atomic_clearbits_int(&ptp->pg_flags, PG_BUSY); ptp->wire_count = 1; /* no mappings yet */ - pmap->pm_pdir[pde_index] = - (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V); + pmap->pm_pdir[pde_index] = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_u | + PG_RW | PG_V | PG_M | PG_U); pmap->pm_stats.resident_count++; /* count PTP as resident */ pmap->pm_ptphint = ptp; return(ptp); @@ -1955,8 +1839,8 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) #ifdef MULTIPROCESSOR int id = cpu_number(); #endif - pt_entry_t *spte = PTESLEW(csrc_pte,id); - pt_entry_t *dpte = PTESLEW(cdst_pte,id); + pt_entry_t *spte = PTESLEW(csrc_pte, id); + pt_entry_t *dpte = PTESLEW(cdst_pte, id); caddr_t csrcva = VASLEW(csrcp, id); caddr_t cdstva = VASLEW(cdstp, id); @@ -1971,9 +1855,6 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) bcopy(csrcva, cdstva, PAGE_SIZE); *spte = *dpte = 0; /* zap! */ pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); -#ifdef MULTIPROCESSOR - /* Using per-cpu VA; no shootdown required here. */ -#endif } /* @@ -1993,7 +1874,7 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) void pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, - vaddr_t startva, vaddr_t endva, int32_t *cpumaskp, int flags) + vaddr_t startva, vaddr_t endva, int flags) { struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ struct pv_entry *pve; @@ -2025,16 +1906,8 @@ pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, pmap->pm_stats.wired_count--; pmap->pm_stats.resident_count--; - if (opte & PG_U) - pmap_tlb_shootdown(pmap, startva, opte, cpumaskp); - - if (ptp) { + if (ptp) ptp->wire_count--; /* dropping a PTE */ - /* Make sure that the PDE is flushed */ - if ((ptp->wire_count <= 1) && !(opte & PG_U)) - pmap_tlb_shootdown(pmap, startva, opte, - cpumaskp); - } /* * Unnecessary work if not PG_VLIST. @@ -2087,7 +1960,7 @@ pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, boolean_t pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, - vaddr_t va, int32_t *cpumaskp, int flags) + vaddr_t va, int flags) { struct pv_entry *pve; struct vm_page *pg; @@ -2108,16 +1981,8 @@ pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, pmap->pm_stats.wired_count--; pmap->pm_stats.resident_count--; - if (opte & PG_U) - pmap_tlb_shootdown(pmap, va, opte, cpumaskp); - - if (ptp) { + if (ptp) ptp->wire_count--; /* dropping a PTE */ - /* Make sure that the PDE is flushed */ - if ((ptp->wire_count <= 1) && !(opte & PG_U)) - pmap_tlb_shootdown(pmap, va, opte, cpumaskp); - - } pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); @@ -2167,8 +2032,9 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) paddr_t ptppa; vaddr_t blkendva; struct vm_page *ptp; - int32_t cpumask = 0; TAILQ_HEAD(, vm_page) empty_ptps; + int shootall; + vaddr_t va; TAILQ_INIT(&empty_ptps); @@ -2207,8 +2073,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) } /* do it! */ - result = pmap_remove_pte(pmap, ptp, - &ptes[atop(sva)], sva, &cpumask, flags); + result = pmap_remove_pte(pmap, ptp, &ptes[atop(sva)], + sva, flags); /* * if mapping removed and the PTP is no longer @@ -2216,7 +2082,6 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) */ if (result && ptp && ptp->wire_count <= 1) { - /* zap! */ opte = i386_atomic_testset_ul( &pmap->pm_pdir[pdei(sva)], 0); #ifdef MULTIPROCESSOR @@ -2225,9 +2090,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) * here if we're using APTE space. */ #endif - pmap_tlb_shootdown(curpcb->pcb_pmap, - ((vaddr_t)ptes) + ptp->offset, opte, - &cpumask); + pmap_tlb_shootpage(curpcb->pcb_pmap, + ((vaddr_t)ptes) + ptp->offset); #ifdef MULTIPROCESSOR /* * Always shoot down the pmap's self-mapping @@ -2236,9 +2100,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) * here if pmap == curpcb->pcb_pmap (not APTE * space). */ - pmap_tlb_shootdown(pmap, - ((vaddr_t)PTE_BASE) + ptp->offset, opte, - &cpumask); + pmap_tlb_shootpage(pmap, + ((vaddr_t)PTE_BASE) + ptp->offset); #endif pmap->pm_stats.resident_count--; if (pmap->pm_ptphint == ptp) @@ -2249,8 +2112,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) uvm_pagerealloc(ptp, NULL, 0); TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq); } + /* + * Shoot the tlb after any updates to the PDE. + */ + pmap_tlb_shootpage(pmap, sva); } - pmap_tlb_shootnow(cpumask); + pmap_tlb_shootwait(); pmap_unmap_ptes(pmap); /* unlock pmap */ PMAP_MAP_TO_HEAD_UNLOCK(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { @@ -2260,10 +2127,19 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) return; } - for (/* null */ ; sva < eva ; sva = blkendva) { + /* + * Decide if we want to shoot the whole tlb or just the range. + * Right now, we simply shoot everything when we remove more + * than 32 pages, but never in the kernel pmap. XXX - tune. + */ + if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel()) + shootall = 1; + else + shootall = 0; + for (va = sva ; va < eva ; va = blkendva) { /* determine range of block */ - blkendva = i386_round_pdr(sva+1); + blkendva = i386_round_pdr(va + 1); if (blkendva > eva) blkendva = eva; @@ -2281,16 +2157,16 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) * be VM_MAX_ADDRESS. */ - if (pdei(sva) == PDSLOT_PTE) + if (pdei(va) == PDSLOT_PTE) /* XXXCDC: ugly hack to avoid freeing PDP here */ continue; - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)])) /* valid block? */ continue; /* PA of the PTP */ - ptppa = (pmap->pm_pdir[pdei(sva)] & PG_FRAME); + ptppa = (pmap->pm_pdir[pdei(va)] & PG_FRAME); /* get PTP if non-kernel mapping */ if (pmap == pmap_kernel()) { @@ -2309,22 +2185,21 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) #endif } } - pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(sva)], - sva, blkendva, &cpumask, flags); + pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(va)], + va, blkendva, flags); /* if PTP is no longer being used, free it! */ if (ptp && ptp->wire_count <= 1) { - /* zap! */ opte = i386_atomic_testset_ul( - &pmap->pm_pdir[pdei(sva)], 0); + &pmap->pm_pdir[pdei(va)], 0); #if defined(MULTIPROCESSOR) /* * XXXthorpej Redundant shootdown can happen here * if we're using APTE space. */ #endif - pmap_tlb_shootdown(curpcb->pcb_pmap, - ((vaddr_t)ptes) + ptp->offset, opte, &cpumask); + pmap_tlb_shootpage(curpcb->pcb_pmap, + ((vaddr_t)ptes) + ptp->offset); #if defined(MULTIPROCESSOR) /* * Always shoot down the pmap's self-mapping @@ -2332,8 +2207,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) * XXXthorpej Redundant shootdown can happen here * if pmap == curpcb->pcb_pmap (not APTE space). */ - pmap_tlb_shootdown(pmap, - ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask); + pmap_tlb_shootpage(pmap, + ((vaddr_t)PTE_BASE) + ptp->offset); #endif pmap->pm_stats.resident_count--; if (pmap->pm_ptphint == ptp) /* update hint? */ @@ -2345,8 +2220,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq); } } + if (!shootall) + pmap_tlb_shootrange(pmap, sva, eva); + else + pmap_tlb_shoottlb(); - pmap_tlb_shootnow(cpumask); + pmap_tlb_shootwait(); pmap_unmap_ptes(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { @@ -2366,7 +2245,6 @@ pmap_page_remove(struct vm_page *pg) { struct pv_entry *pve; pt_entry_t *ptes, opte; - int32_t cpumask = 0; TAILQ_HEAD(, vm_page) empty_ptps; struct vm_page *ptp; @@ -2397,18 +2275,12 @@ pmap_page_remove(struct vm_page *pg) } #endif - opte = ptes[atop(pve->pv_va)]; - ptes[atop(pve->pv_va)] = 0; /* zap! */ + opte = i386_atomic_testset_ul(&ptes[atop(pve->pv_va)], 0); if (opte & PG_W) pve->pv_pmap->pm_stats.wired_count--; pve->pv_pmap->pm_stats.resident_count--; - /* Shootdown only if referenced */ - if (opte & PG_U) - pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte, - &cpumask); - /* sync R/M bits */ pmap_sync_flags_pte(pg, opte); @@ -2416,29 +2288,18 @@ pmap_page_remove(struct vm_page *pg) if (pve->pv_ptp) { pve->pv_ptp->wire_count--; if (pve->pv_ptp->wire_count <= 1) { - /* - * Do we have to shootdown the page just to - * get the pte out of the TLB ? - */ - if(!(opte & PG_U)) - pmap_tlb_shootdown(pve->pv_pmap, - pve->pv_va, opte, &cpumask); - - /* zap! */ opte = i386_atomic_testset_ul( &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)], 0); - pmap_tlb_shootdown(curpcb->pcb_pmap, - ((vaddr_t)ptes) + pve->pv_ptp->offset, - opte, &cpumask); + pmap_tlb_shootpage(curpcb->pcb_pmap, + ((vaddr_t)ptes) + pve->pv_ptp->offset); #if defined(MULTIPROCESSOR) /* * Always shoot down the other pmap's * self-mapping of the PTP. */ - pmap_tlb_shootdown(pve->pv_pmap, - ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset, - opte, &cpumask); + pmap_tlb_shootpage(pve->pv_pmap, + ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset); #endif pve->pv_pmap->pm_stats.resident_count--; /* update hint? */ @@ -2452,12 +2313,16 @@ pmap_page_remove(struct vm_page *pg) listq); } } + + pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); + pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ } pmap_free_pvs(NULL, pg->mdpage.pv_list); pg->mdpage.pv_list = NULL; PMAP_HEAD_TO_MAP_UNLOCK(); - pmap_tlb_shootnow(cpumask); + pmap_tlb_shootwait(); + while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { TAILQ_REMOVE(&empty_ptps, ptp, listq); uvm_pagefree(ptp); @@ -2517,7 +2382,6 @@ pmap_clear_attrs(struct vm_page *pg, int clearbits) { struct pv_entry *pve; pt_entry_t *ptes, npte, opte; - int32_t cpumask = 0; u_long clearflags; int result; @@ -2543,14 +2407,13 @@ pmap_clear_attrs(struct vm_page *pg, int clearbits) npte &= ~clearbits; opte = i386_atomic_testset_ul( &ptes[atop(pve->pv_va)], npte); - pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, - opte, &cpumask); + pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); } pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ } PMAP_HEAD_TO_MAP_UNLOCK(); - pmap_tlb_shootnow(cpumask); + pmap_tlb_shootwait(); return (result != 0); } @@ -2587,7 +2450,8 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, pt_entry_t *ptes, *spte, *epte, npte; vaddr_t blockend; u_int32_t md_prot; - int32_t cpumask = 0; + vaddr_t va; + int shootall = 0; ptes = pmap_map_ptes(pmap); /* locks pmap */ @@ -2595,9 +2459,11 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, sva &= PG_FRAME; eva &= PG_FRAME; - for (/* null */ ; sva < eva ; sva = blockend) { + if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel()) + shootall = 1; - blockend = (sva & PD_MASK) + NBPD; + for (va = sva; va < eva; va = blockend) { + blockend = (va & PD_MASK) + NBPD; if (blockend > eva) blockend = eva; @@ -2611,24 +2477,24 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, */ /* XXXCDC: ugly hack to avoid freeing PDP here */ - if (pdei(sva) == PDSLOT_PTE) + if (pdei(va) == PDSLOT_PTE) continue; /* empty block? */ - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)])) continue; md_prot = protection_codes[prot]; - if (sva < VM_MAXUSER_ADDRESS) + if (va < VM_MAXUSER_ADDRESS) md_prot |= PG_u; - else if (sva < VM_MAX_ADDRESS) + else if (va < VM_MAX_ADDRESS) /* XXX: write-prot our PTES? never! */ md_prot |= (PG_u | PG_RW); - spte = &ptes[atop(sva)]; + spte = &ptes[atop(va)]; epte = &ptes[atop(blockend)]; - for (/*null */; spte < epte ; spte++, sva += PAGE_SIZE) { + for (/*null */; spte < epte ; spte++, va += PAGE_SIZE) { if (!pmap_valid_entry(*spte)) /* no mapping? */ continue; @@ -2636,14 +2502,17 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, npte = (*spte & ~PG_PROT) | md_prot; if (npte != *spte) { - pmap_exec_account(pmap, sva, *spte, npte); - i386_atomic_testset_ul(spte, npte); /* zap! */ - pmap_tlb_shootdown(pmap, sva, *spte, &cpumask); + pmap_exec_account(pmap, va, *spte, npte); + i386_atomic_testset_ul(spte, npte); } } } + if (shootall) + pmap_tlb_shoottlb(); + else + pmap_tlb_shootrange(pmap, sva, eva); - pmap_tlb_shootnow(cpumask); + pmap_tlb_shootwait(); pmap_unmap_ptes(pmap); /* unlocks pmap */ } @@ -2880,8 +2749,6 @@ enter_now: npte = pa | protection_codes[prot] | PG_V; pmap_exec_account(pmap, va, opte, npte); - if (pg != NULL) - npte |= PG_PVLIST; if (wired) npte |= PG_W; if (va < VM_MAXUSER_ADDRESS) @@ -2890,20 +2757,20 @@ enter_now: npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ if (pmap == pmap_kernel()) npte |= pmap_pg_g; + if (flags & VM_PROT_READ) + npte |= PG_U; + if (flags & VM_PROT_WRITE) + npte |= PG_M; + if (pg) { + npte |= PG_PVLIST; + pmap_sync_flags_pte(pg, npte); + } - ptes[atop(va)] = npte; /* zap! */ - - if ((opte & ~(PG_M|PG_U)) != npte) { -#ifdef MULTIPROCESSOR - int32_t cpumask = 0; + opte = i386_atomic_testset_ul(&ptes[atop(va)], npte); - pmap_tlb_shootdown(pmap, va, opte, &cpumask); - pmap_tlb_shootnow(cpumask); -#else - /* Don't bother deferring in the single CPU case. */ - if (pmap_is_curpmap(pmap)) - pmap_update_pg(va); -#endif + if (opte & PG_V) { + pmap_tlb_shootpage(pmap, va); + pmap_tlb_shootwait(); } error = 0; @@ -3046,284 +2913,201 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) } #endif +#ifdef MULTIPROCESSOR +/* + * Locking for tlb shootdown. + * + * We lock by setting tlb_shoot_wait to the number of cpus that will + * receive our tlb shootdown. After sending the IPIs, we don't need to + * worry about locking order or interrupts spinning for the lock because + * the call that grabs the "lock" isn't the one that releases it. And + * there is nothing that can block the IPI that releases the lock. + * + * The functions are organized so that we first count the number of + * cpus we need to send the IPI to, then we grab the counter, then + * we send the IPIs, then we finally do our own shootdown. + * + * Our shootdown is last to make it parallell with the other cpus + * to shorten the spin time. + * + * Notice that we depend on failures to send IPIs only being able to + * happen during boot. If they happen later, the above assumption + * doesn't hold since we can end up in situations where noone will + * release the lock if we get an interrupt in a bad moment. + */ + +volatile int tlb_shoot_wait; -/******************** TLB shootdown code ********************/ +volatile vaddr_t tlb_shoot_addr1; +volatile vaddr_t tlb_shoot_addr2; void -pmap_tlb_shootnow(int32_t cpumask) +pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) { -#ifdef MULTIPROCESSOR - struct cpu_info *ci, *self; + struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; - int s; -#ifdef DIAGNOSTIC - int count = 0; -#endif -#endif - - if (cpumask == 0) - return; - -#ifdef MULTIPROCESSOR - self = curcpu(); - s = splipi(); - self->ci_tlb_ipi_mask = cpumask; -#endif + int wait = 0; + int mask = 0; - pmap_do_tlb_shootdown(0); /* do *our* work. */ - -#ifdef MULTIPROCESSOR - splx(s); - - if (cold) + if (cpu_class == CPUCLASS_386) { + tlbflush(); return; + } - /* - * Send the TLB IPI to other CPUs pending shootdowns. - */ CPU_INFO_FOREACH(cii, ci) { - if (ci == self) + if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || + !(ci->ci_flags & CPUF_RUNNING)) continue; - if (cpumask & (1U << ci->ci_cpuid)) - if (i386_send_ipi(ci, I386_IPI_TLB) != 0) - i386_atomic_clearbits_l(&self->ci_tlb_ipi_mask, - (1U << ci->ci_cpuid)); + mask |= 1 << ci->ci_cpuid; + wait++; } - while (self->ci_tlb_ipi_mask != 0) { - SPINLOCK_SPIN_HOOK; -#ifdef DIAGNOSTIC - if (count++ > 100000000) - panic("%s: TLB IPI rendezvous failed (mask 0x%x)", - self->ci_dev.dv_xname, self->ci_tlb_ipi_mask); -#endif + if (wait > 0) { + int s = splvm(); + + while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) { + while (tlb_shoot_wait != 0) + SPINLOCK_SPIN_HOOK; + } + tlb_shoot_addr1 = va; + CPU_INFO_FOREACH(cii, ci) { + if ((mask & 1 << ci->ci_cpuid) == 0) + continue; + if (i386_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0) + panic("pmap_tlb_shootpage: ipi failed"); + } + splx(s); } -#endif + + if (pmap_is_curpmap(pm)) + pmap_update_pg(va); } -/* - * pmap_tlb_shootdown: - * - * Cause the TLB entry for pmap/va to be shot down. - */ void -pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, int32_t *cpumaskp) +pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) { - struct cpu_info *ci, *self; - struct pmap_tlb_shootdown_q *pq; - struct pmap_tlb_shootdown_job *pj; + struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; - int s; + int wait = 0; + int mask = 0; + vaddr_t va; - if (pmap_initialized == FALSE) { - pmap_update_pg(va); + if (cpu_class == CPUCLASS_386) { + tlbflush(); return; } - self = curcpu(); - - s = splipi(); -#if 0 - printf("dshootdown %lx\n", va); -#endif - CPU_INFO_FOREACH(cii, ci) { - /* Note: we queue shootdown events for ourselves here! */ - if (pmap_is_active(pmap, ci->ci_cpuid) == 0) + if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || + !(ci->ci_flags & CPUF_RUNNING)) continue; - if (ci != self && !(ci->ci_flags & CPUF_RUNNING)) - continue; - pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; - mtx_enter(&pq->pq_mutex); + mask |= 1 << ci->ci_cpuid; + wait++; + } - /* - * If there's a global flush already queued, or a - * non-global flush, and this pte doesn't have the G - * bit set, don't bother. - */ - if (pq->pq_flushg > 0 || - (pq->pq_flushu > 0 && (pte & pmap_pg_g) == 0)) { - mtx_leave(&pq->pq_mutex); - continue; - } + if (wait > 0) { + int s = splvm(); -#ifdef I386_CPU - /* - * i386 CPUs can't invalidate a single VA, only - * flush the entire TLB, so don't bother allocating - * jobs for them -- just queue a `flushu'. - * - * XXX note that this can be executed for non-i386 - * when called early (before identifycpu() has set - * cpu_class) - */ - if (cpu_class == CPUCLASS_386) { - pq->pq_flushu++; - *cpumaskp |= 1U << ci->ci_cpuid; - mtx_leave(&pq->pq_mutex); - continue; + while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) { + while (tlb_shoot_wait != 0) + SPINLOCK_SPIN_HOOK; } -#endif - - pj = pmap_tlb_shootdown_job_get(pq); - pq->pq_pte |= pte; - if (pj == NULL) { - /* - * Couldn't allocate a job entry. - * Kill it now for this cpu, unless the failure - * was due to too many pending flushes; otherwise, - * tell other cpus to kill everything.. - */ - if (ci == self && pq->pq_count < PMAP_TLB_MAXJOBS) { - pmap_update_pg(va); - mtx_leave(&pq->pq_mutex); + tlb_shoot_addr1 = sva; + tlb_shoot_addr2 = eva; + CPU_INFO_FOREACH(cii, ci) { + if ((mask & 1 << ci->ci_cpuid) == 0) continue; - } else { - if (pq->pq_pte & pmap_pg_g) - pq->pq_flushg++; - else - pq->pq_flushu++; - /* - * Since we've nailed the whole thing, - * drain the job entries pending for that - * processor. - */ - pmap_tlb_shootdown_q_drain(pq); - *cpumaskp |= 1U << ci->ci_cpuid; - } - } else { - pj->pj_pmap = pmap; - pj->pj_va = va; - pj->pj_pte = pte; - TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list); - *cpumaskp |= 1U << ci->ci_cpuid; + if (i386_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0) + panic("pmap_tlb_shootrange: ipi failed"); } - mtx_leave(&pq->pq_mutex); + splx(s); } - splx(s); + + if (pmap_is_curpmap(pm)) + for (va = sva; va < eva; va += PAGE_SIZE) + pmap_update_pg(va); } -/* - * pmap_do_tlb_shootdown: - * - * Process pending TLB shootdown operations for this processor. - */ void -pmap_do_tlb_shootdown(struct cpu_info *self) +pmap_tlb_shoottlb(void) { - u_long cpu_id = cpu_number(); - struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; - struct pmap_tlb_shootdown_job *pj; -#ifdef MULTIPROCESSOR - struct cpu_info *ci; + struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; -#endif + int wait = 0; + int mask = 0; - mtx_enter(&pq->pq_mutex); + if (cpu_class == CPUCLASS_386) { + tlbflush(); + return; + } - if (pq->pq_flushg) { - tlbflushg(); - pq->pq_flushg = 0; - pq->pq_flushu = 0; - pmap_tlb_shootdown_q_drain(pq); - } else { - /* - * TLB flushes for PTEs with PG_G set may be in the queue - * after a flushu, they need to be dealt with. - */ - if (pq->pq_flushu) { - tlbflush(); - } - while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) { - TAILQ_REMOVE(&pq->pq_head, pj, pj_list); + CPU_INFO_FOREACH(cii, ci) { + if (ci == self || !(ci->ci_flags & CPUF_RUNNING)) + continue; + mask |= 1 << ci->ci_cpuid; + wait++; + } - if ((!pq->pq_flushu && pmap_is_curpmap(pj->pj_pmap)) || - (pj->pj_pte & pmap_pg_g)) - pmap_update_pg(pj->pj_va); + if (wait) { + int s = splvm(); - pmap_tlb_shootdown_job_put(pq, pj); + while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) { + while (tlb_shoot_wait != 0) + SPINLOCK_SPIN_HOOK; } - pq->pq_flushu = pq->pq_pte = 0; + CPU_INFO_FOREACH(cii, ci) { + if ((mask & 1 << ci->ci_cpuid) == 0) + continue; + if (i386_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0) + panic("pmap_tlb_shoottlb: ipi failed"); + } + splx(s); } -#ifdef MULTIPROCESSOR - CPU_INFO_FOREACH(cii, ci) - i386_atomic_clearbits_l(&ci->ci_tlb_ipi_mask, - (1U << cpu_id)); -#endif - mtx_leave(&pq->pq_mutex); + tlbflush(); } -/* - * pmap_tlb_shootdown_q_drain: - * - * Drain a processor's TLB shootdown queue. We do not perform - * the shootdown operations. This is merely a convenience - * function. - * - * Note: We expect the queue to be locked. - */ void -pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *pq) +pmap_tlb_shootwait(void) { - struct pmap_tlb_shootdown_job *pj; + while (tlb_shoot_wait != 0) + SPINLOCK_SPIN_HOOK; +} - while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) { - TAILQ_REMOVE(&pq->pq_head, pj, pj_list); - pmap_tlb_shootdown_job_put(pq, pj); +#else + +void +pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) +{ + if (cpu_class == CPUCLASS_386) { + tlbflush(); + return; } - pq->pq_pte = 0; + + if (pmap_is_curpmap(pm)) + pmap_update_pg(va); + } -/* - * pmap_tlb_shootdown_job_get: - * - * Get a TLB shootdown job queue entry. This places a limit on - * the number of outstanding jobs a processor may have. - * - * Note: We expect the queue to be locked. - */ -struct pmap_tlb_shootdown_job * -pmap_tlb_shootdown_job_get(struct pmap_tlb_shootdown_q *pq) +void +pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) { - struct pmap_tlb_shootdown_job *pj; + vaddr_t va; - if (pq->pq_count >= PMAP_TLB_MAXJOBS) - return (NULL); - - mtx_enter(&pmap_tlb_shootdown_job_mutex); - if (pj_free == NULL) { - mtx_leave(&pmap_tlb_shootdown_job_mutex); - return NULL; + if (cpu_class == CPUCLASS_386) { + tlbflush(); + return; } - pj = pj_free; - pj_free = pj_free->pj_nextfree; - mtx_leave(&pmap_tlb_shootdown_job_mutex); - pq->pq_count++; - return (pj); + for (va = sva; va < eva; va += PAGE_SIZE) + pmap_update_pg(va); + } -/* - * pmap_tlb_shootdown_job_put: - * - * Put a TLB shootdown job queue entry onto the free list. - * - * Note: We expect the queue to be locked. - */ void -pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *pq, - struct pmap_tlb_shootdown_job *pj) +pmap_tlb_shoottlb(void) { -#ifdef DIAGNOSTIC - if (pq->pq_count == 0) - panic("pmap_tlb_shootdown_job_put: queue length inconsistency"); -#endif - mtx_enter(&pmap_tlb_shootdown_job_mutex); - pj->pj_nextfree = pj_free; - pj_free = pj; - mtx_leave(&pmap_tlb_shootdown_job_mutex); - - pq->pq_count--; + tlbflush(); } +#endif /* MULTIPROCESSOR */ diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c index b051d39e554..10fb4b1aef2 100644 --- a/sys/arch/i386/i386/vm_machdep.c +++ b/sys/arch/i386/i386/vm_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_machdep.c,v 1.50 2007/03/19 15:17:21 art Exp $ */ +/* $OpenBSD: vm_machdep.c,v 1.51 2007/05/25 15:55:26 art Exp $ */ /* $NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $ */ /*- @@ -220,14 +220,18 @@ pagemove(caddr_t from, caddr_t to, size_t size) { pt_entry_t *fpte, *tpte; pt_entry_t ofpte, otpte; -#ifdef MULTIPROCESSOR - u_int32_t cpumask = 0; -#endif + vaddr_t fsva, tsva, feva, teva; #ifdef DIAGNOSTIC if ((size & PAGE_MASK) != 0) panic("pagemove"); #endif + + fsva = (vaddr_t)from; + tsva = (vaddr_t)to; + feva = fsva + size; + teva = tsva + size; + fpte = kvtopte((vaddr_t)from); tpte = kvtopte((vaddr_t)to); while (size > 0) { @@ -235,38 +239,14 @@ pagemove(caddr_t from, caddr_t to, size_t size) otpte = *tpte; *tpte++ = *fpte; *fpte++ = 0; -#if defined(I386_CPU) && !defined(MULTIPROCESSOR) - if (cpu_class != CPUCLASS_386) -#endif - { - if (otpte & PG_V) -#ifdef MULTIPROCESSOR - pmap_tlb_shootdown(pmap_kernel(), (vaddr_t)to, - otpte, &cpumask); -#else - pmap_update_pg((vaddr_t)to); -#endif - if (ofpte & PG_V) -#ifdef MULTIPROCESSOR - pmap_tlb_shootdown(pmap_kernel(), - (vaddr_t)from, ofpte, &cpumask); -#else - pmap_update_pg((vaddr_t)from); -#endif - } from += PAGE_SIZE; to += PAGE_SIZE; size -= PAGE_SIZE; } -#ifdef MULTIPROCESSOR - pmap_tlb_shootnow(cpumask); -#else -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) - tlbflush(); -#endif -#endif + pmap_tlb_shootrange(pmap_kernel(), fsva, feva); + pmap_tlb_shootrange(pmap_kernel(), tsva, teva); + pmap_tlb_shootwait(); } /* diff --git a/sys/arch/i386/include/atomic.h b/sys/arch/i386/include/atomic.h index 44a7be7f52f..35ea910c8fa 100644 --- a/sys/arch/i386/include/atomic.h +++ b/sys/arch/i386/include/atomic.h @@ -1,4 +1,4 @@ -/* $OpenBSD: atomic.h,v 1.5 2007/02/19 17:18:42 deraadt Exp $ */ +/* $OpenBSD: atomic.h,v 1.6 2007/05/25 15:55:27 art Exp $ */ /* $NetBSD: atomic.h,v 1.1.2.2 2000/02/21 18:54:07 sommerfeld Exp $ */ /*- @@ -92,6 +92,20 @@ i386_atomic_clearbits_l(volatile u_int32_t *ptr, unsigned long bits) __asm __volatile(LOCK " andl %1,%0" : "=m" (*ptr) : "ir" (bits)); } +/* + * cas = compare and set + */ +static __inline int +i486_atomic_cas_int(volatile u_int *ptr, u_int expect, u_int set) +{ + int res; + + __asm volatile(LOCK " cmpxchgl %2, %1" : "=a" (res), "=m" (*ptr) + : "r" (set), "a" (expect), "m" (*ptr) : "memory"); + + return (res); +} + #define atomic_setbits_int i386_atomic_setbits_l #define atomic_clearbits_int i386_atomic_clearbits_l diff --git a/sys/arch/i386/include/i82489var.h b/sys/arch/i386/include/i82489var.h index 653641bf713..0fe445e41fe 100644 --- a/sys/arch/i386/include/i82489var.h +++ b/sys/arch/i386/include/i82489var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: i82489var.h,v 1.4 2007/04/12 20:22:58 art Exp $ */ +/* $OpenBSD: i82489var.h,v 1.5 2007/05/25 15:55:27 art Exp $ */ /* $NetBSD: i82489var.h,v 1.1.2.2 2000/02/21 18:46:14 sommerfeld Exp $ */ /*- @@ -109,6 +109,14 @@ extern void Xintrltimer(void); */ #define LAPIC_IPI_OFFSET 0xf0 #define LAPIC_IPI_AST (LAPIC_IPI_OFFSET + 0) +#define LAPIC_IPI_INVLTLB (LAPIC_IPI_OFFSET + 1) +#define LAPIC_IPI_INVLPG (LAPIC_IPI_OFFSET + 2) +#define LAPIC_IPI_INVLRANGE (LAPIC_IPI_OFFSET + 3) + +extern void Xintripi_ast(void); +extern void Xintripi_invltlb(void); +extern void Xintripi_invlpg(void); +extern void Xintripi_invlrange(void); extern void Xintrsoftclock(void); extern void Xintrsoftnet(void); diff --git a/sys/arch/i386/include/intr.h b/sys/arch/i386/include/intr.h index eeeb74a605e..0d69c57277a 100644 --- a/sys/arch/i386/include/intr.h +++ b/sys/arch/i386/include/intr.h @@ -1,4 +1,4 @@ -/* $OpenBSD: intr.h,v 1.31 2007/05/16 19:37:06 thib Exp $ */ +/* $OpenBSD: intr.h,v 1.32 2007/05/25 15:55:27 art Exp $ */ /* $NetBSD: intr.h,v 1.5 1996/05/13 06:11:28 mycroft Exp $ */ /* @@ -137,6 +137,7 @@ struct cpu_info; #ifdef MULTIPROCESSOR int i386_send_ipi(struct cpu_info *, int); +int i386_fast_ipi(struct cpu_info *, int); void i386_broadcast_ipi(int); void i386_multicast_ipi(int, int); void i386_ipi_handler(void); diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index 6520a9bbebe..9f0ed360a1a 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.45 2007/04/26 11:31:52 art Exp $ */ +/* $OpenBSD: pmap.h,v 1.46 2007/05/25 15:55:27 art Exp $ */ /* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */ /* @@ -383,9 +383,14 @@ int pmap_exec_fixup(struct vm_map *, struct trapframe *, vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */ -void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, int32_t *); -void pmap_tlb_shootnow(int32_t); -void pmap_do_tlb_shootdown(struct cpu_info *); +void pmap_tlb_shootpage(struct pmap *, vaddr_t); +void pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t); +void pmap_tlb_shoottlb(void); +#ifdef MULTIPROCESSOR +void pmap_tlb_shootwait(void); +#else +#define pmap_tlb_shootwait() +#endif #define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */ |