diff options
author | Stefan Fritsch <sf@cvs.openbsd.org> | 2014-10-06 20:34:59 +0000 |
---|---|---|
committer | Stefan Fritsch <sf@cvs.openbsd.org> | 2014-10-06 20:34:59 +0000 |
commit | d41036be6ee3c813374da5b0b78ba5df2ff92c97 (patch) | |
tree | be03285a14d85a20c60dd26ddc2a8d2f3e582ea4 /sys | |
parent | 663dd3a733d1e1eb57165f93e9999a75de283023 (diff) |
Make amd64 pmap more efficient on multi-processor
With the current implementation, when accessing an inactive pmap, its
ptes are mapped in the APTE range. This has the problem that the APTE
range is mapped on all CPUs and changes to the APTE must therefore be
followed by a remote TLB flush on all CPUs. This is very inefficient
because the costs increase quadratically with the number of CPUs.
Therefore, the code is changed to remove the APTE mechanism completely
and instead switch the pmap locally. A remote TLB flush is then only
done if the pmap is in use on the remote CPU. In the common case, this
will replace one TLB flush on all CPUs with two local TLB flushes.
An additional optimization is done in cases where only a single PTE of
an inactive pmap is accessed: The requested PTE is found by walking the
page tables manually via the direct mapping. This makes some more TLB
flushes unnecessary.
Furthermore, some code is reordered so that the TLB-shootdown-IPIs are
sent first, then more local processing takes place, and only afterwards
the CPU waits for the remote TLB-shootdowns to finish.
This diff is based on a patch for i386 by Artur Grabowski <art blahonga org>
from 2008. Some additional bits were taken from a different patch by
Artur from 2005.
Tested by many. OK mlarkin@
Diffstat (limited to 'sys')
-rw-r--r-- | sys/arch/amd64/amd64/pmap.c | 285 | ||||
-rw-r--r-- | sys/arch/amd64/include/pmap.h | 53 |
2 files changed, 171 insertions, 167 deletions
diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c index cee98c65d2b..0de5a87c21f 100644 --- a/sys/arch/amd64/amd64/pmap.c +++ b/sys/arch/amd64/amd64/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.73 2014/09/16 18:57:51 sf Exp $ */ +/* $OpenBSD: pmap.c,v 1.74 2014/10/06 20:34:58 sf Exp $ */ /* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */ /* @@ -203,7 +203,6 @@ long nkptp[] = NKPTP_INITIALIZER; long nkptpmax[] = NKPTPMAX_INITIALIZER; long nbpd[] = NBPD_INITIALIZER; pd_entry_t *normal_pdes[] = PDES_INITIALIZER; -pd_entry_t *alternate_pdes[] = APDES_INITIALIZER; /* int nkpde = NKPTP; */ @@ -285,11 +284,12 @@ void pmap_enter_pv(struct vm_page *, struct pv_entry *, struct pmap *, vaddr_t, struct vm_page *); struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, pd_entry_t **); struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); +int pmap_find_pte_direct(struct pmap *pm, vaddr_t va, pt_entry_t **pd, int *offs); void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t, pt_entry_t *, pd_entry_t **, struct pg_to_free *); void pmap_freepage(struct pmap *, struct vm_page *, int, struct pg_to_free *); static boolean_t pmap_is_active(struct pmap *, int); -void pmap_map_ptes(struct pmap *, pt_entry_t **, pd_entry_t ***); +void pmap_map_ptes(struct pmap *, pt_entry_t **, pd_entry_t ***, paddr_t *); struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t); void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int); boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, @@ -299,14 +299,23 @@ void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, #define PMAP_REMOVE_ALL 0 /* remove all mappings */ #define PMAP_REMOVE_SKIPWIRED 1 /* skip wired mappings */ -void pmap_unmap_ptes(struct pmap *); +void pmap_unmap_ptes(struct pmap *, paddr_t); boolean_t pmap_get_physpage(vaddr_t, int, paddr_t *); boolean_t pmap_pdes_valid(vaddr_t, pd_entry_t **, pd_entry_t *); void pmap_alloc_level(pd_entry_t **, vaddr_t, int, long *); -void pmap_apte_flush(struct pmap *pmap); void pmap_sync_flags_pte(struct vm_page *, u_long); +void pmap_tlb_shootpage(struct pmap *, vaddr_t, int); +void pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t, int); +void pmap_tlb_shoottlb(struct pmap *, int); +#ifdef MULTIPROCESSOR +void pmap_tlb_shootwait(void); +#else +#define pmap_tlb_shootwait() +#endif + + /* * p m a p i n l i n e h e l p e r f u n c t i o n s */ @@ -349,57 +358,76 @@ pmap_sync_flags_pte(struct vm_page *pg, u_long pte) } } -void -pmap_apte_flush(struct pmap *pmap) -{ - pmap_tlb_shoottlb(); - pmap_tlb_shootwait(); -} - /* * pmap_map_ptes: map a pmap's PTEs into KVM - * - * => we lock enough pmaps to keep things locked in - * => must be undone with pmap_unmap_ptes before returning */ void -pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp) +pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp, paddr_t *save_cr3) { - pd_entry_t opde, npde; + paddr_t cr3 = rcr3(); - /* if curpmap then we are always mapped */ - if (pmap_is_curpmap(pmap)) { - *ptepp = PTE_BASE; - *pdeppp = normal_pdes; - return; - } + /* the kernel's pmap is always accessible */ + if (pmap == pmap_kernel() || pmap->pm_pdirpa == cr3) { + *save_cr3 = 0; + } else { + *save_cr3 = cr3; - /* need to load a new alternate pt space into curpmap? */ - opde = *APDP_PDE; - if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) { - npde = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V); - *APDP_PDE = npde; - if (pmap_valid_entry(opde)) - pmap_apte_flush(curpcb->pcb_pmap); + /* + * Not sure if we need this, but better be safe. + * We don't have the current pmap in order to unset its + * active bit, but this just means that we may receive + * an unneccessary cross-CPU TLB flush now and then. + */ + x86_atomic_setbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); + + lcr3(pmap->pm_pdirpa); } - *ptepp = APTE_BASE; - *pdeppp = alternate_pdes; + + *ptepp = PTE_BASE; + *pdeppp = normal_pdes; + return; } void -pmap_unmap_ptes(struct pmap *pmap) +pmap_unmap_ptes(struct pmap *pmap, paddr_t save_cr3) { - if (pmap_is_curpmap(pmap)) - return; + if (save_cr3 != 0) { + x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); + lcr3(save_cr3); + } +} -#if defined(MULTIPROCESSOR) - *APDP_PDE = 0; - pmap_apte_flush(curpcb->pcb_pmap); -#endif - COUNT(apdp_pde_unmap); +int +pmap_find_pte_direct(struct pmap *pm, vaddr_t va, pt_entry_t **pd, int *offs) +{ + u_long mask, shift; + pd_entry_t pde; + paddr_t pdpa; + int lev; + + pdpa = pm->pm_pdirpa; + shift = L4_SHIFT; + mask = L4_MASK; + for (lev = PTP_LEVELS; lev > 0; lev--) { + *pd = (pd_entry_t *)PMAP_DIRECT_MAP(pdpa); + *offs = (VA_SIGN_POS(va) & mask) >> shift; + pde = (*pd)[*offs]; + + /* Large pages are different, break early if we run into one. */ + if ((pde & (PG_PS|PG_V)) != PG_V) + return (lev - 1); + + pdpa = ((*pd)[*offs] & PG_FRAME); + /* 4096/8 == 512 == 2^9 entries per level */ + shift -= 9; + mask >>= 9; + } + + return (0); } + /* * p m a p k e n t e r f u n c t i o n s * @@ -442,7 +470,7 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) if (pa & PMAP_NOCACHE && (opte & PG_N) == 0) wbinvd(); /* This shouldn't happen */ - pmap_tlb_shootpage(pmap_kernel(), va); + pmap_tlb_shootpage(pmap_kernel(), va, 1); pmap_tlb_shootwait(); } } @@ -476,7 +504,7 @@ pmap_kremove(vaddr_t sva, vsize_t len) KASSERT((opte & PG_PVLIST) == 0); } - pmap_tlb_shootrange(pmap_kernel(), sva, eva); + pmap_tlb_shootrange(pmap_kernel(), sva, eva, 1); pmap_tlb_shootwait(); } @@ -822,11 +850,13 @@ pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va, invaladdr = level == 1 ? (vaddr_t)ptes : (vaddr_t)pdes[level - 2]; pmap_tlb_shootpage(curpcb->pcb_pmap, - invaladdr + index * PAGE_SIZE); + invaladdr + index * PAGE_SIZE, + pmap_is_curpmap(curpcb->pcb_pmap)); #if defined(MULTIPROCESSOR) invaladdr = level == 1 ? (vaddr_t)PTE_BASE : (vaddr_t)normal_pdes[level - 2]; - pmap_tlb_shootpage(pmap, invaladdr + index * PAGE_SIZE); + pmap_tlb_shootpage(pmap, invaladdr + index * PAGE_SIZE, + pmap_is_curpmap(curpcb->pcb_pmap)); #endif if (level < PTP_LEVELS - 1) { ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1); @@ -1054,11 +1084,7 @@ pmap_destroy(struct pmap *pmap) } } - /* - * MULTIPROCESSOR -- no need to flush out of other processors' - * APTE space because we do that in pmap_unmap_ptes(). - */ - /* XXX: need to flush it out of other processor's APTE space? */ + /* XXX: need to flush it out of other processor's space? */ pool_put(&pmap_pdp_pool, pmap->pm_pdir); pool_put(&pmap_pmap_pool, pmap); @@ -1147,8 +1173,8 @@ pmap_pdes_valid(vaddr_t va, pd_entry_t **pdes, pd_entry_t *lastpde) boolean_t pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap) { - pt_entry_t *ptes, pte; - pd_entry_t pde, **pdes; + pt_entry_t *ptes; + int level, offs; if (pmap == pmap_kernel() && va >= PMAP_DIRECT_BASE && va < PMAP_DIRECT_END) { @@ -1156,24 +1182,16 @@ pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap) return (TRUE); } - pmap_map_ptes(pmap, &ptes, &pdes); - if (pmap_pdes_valid(va, pdes, &pde) == FALSE) { - return FALSE; - } + level = pmap_find_pte_direct(pmap, va, &ptes, &offs); - if (pde & PG_PS) { + if (__predict_true(level == 0 && pmap_valid_entry(ptes[offs]))) { if (pap != NULL) - *pap = (pde & PG_LGFRAME) | (va & 0x1fffff); - pmap_unmap_ptes(pmap); + *pap = (ptes[offs] & PG_FRAME) | (va & PAGE_MASK); return (TRUE); } - - pte = ptes[pl1_i(va)]; - pmap_unmap_ptes(pmap); - - if (__predict_true(pmap_valid_entry(pte))) { + if (level == 1 && (ptes[offs] & (PG_PS|PG_V)) == (PG_PS|PG_V)) { if (pap != NULL) - *pap = (pte & PG_FRAME) | (va & 0xfff); + *pap = (ptes[offs] & PG_LGFRAME) | (va & PAGE_MASK_L2); return (TRUE); } @@ -1436,13 +1454,16 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) vaddr_t blkendva; struct vm_page *ptp; vaddr_t va; - int shootall = 0; + int shootall = 0, shootself; struct pg_to_free empty_ptps; + paddr_t scr3; TAILQ_INIT(&empty_ptps); PMAP_MAP_TO_HEAD_LOCK(); - pmap_map_ptes(pmap, &ptes, &pdes); + pmap_map_ptes(pmap, &ptes, &pdes, &scr3); + shootself = (scr3 == 0); + /* * removing one page? take shortcut function. @@ -1480,11 +1501,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) if (result && ptp && ptp->wire_count <= 1) pmap_free_ptp(pmap, ptp, sva, ptes, pdes, &empty_ptps); - pmap_tlb_shootpage(pmap, sva); + pmap_tlb_shootpage(pmap, sva, shootself); + pmap_unmap_ptes(pmap, scr3); + pmap_tlb_shootwait(); + } else { + pmap_unmap_ptes(pmap, scr3); } - - pmap_tlb_shootwait(); - pmap_unmap_ptes(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { @@ -1549,13 +1571,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) } if (shootall) - pmap_tlb_shoottlb(); + pmap_tlb_shoottlb(pmap, shootself); else - pmap_tlb_shootrange(pmap, sva, eva); + pmap_tlb_shootrange(pmap, sva, eva, shootself); + pmap_unmap_ptes(pmap, scr3); pmap_tlb_shootwait(); - - pmap_unmap_ptes(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { @@ -1581,6 +1602,8 @@ pmap_page_remove(struct vm_page *pg) #endif struct pg_to_free empty_ptps; struct vm_page *ptp; + paddr_t scr3; + int shootself; TAILQ_INIT(&empty_ptps); @@ -1589,7 +1612,9 @@ pmap_page_remove(struct vm_page *pg) while ((pve = pg->mdpage.pv_list) != NULL) { pg->mdpage.pv_list = pve->pv_next; - pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); + /* XXX use direct map? */ + pmap_map_ptes(pve->pv_pmap, &ptes, &pdes, &scr3); + shootself = (scr3 == 0); #ifdef DIAGNOSTIC if (pve->pv_ptp && pmap_pdes_valid(pve->pv_va, pdes, &pde) && @@ -1612,7 +1637,7 @@ pmap_page_remove(struct vm_page *pg) pve->pv_pmap->pm_stats.wired_count--; pve->pv_pmap->pm_stats.resident_count--; - pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); + pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va, shootself); pmap_sync_flags_pte(pg, opte); @@ -1624,7 +1649,7 @@ pmap_page_remove(struct vm_page *pg) pve->pv_va, ptes, pdes, &empty_ptps); } } - pmap_unmap_ptes(pve->pv_pmap); + pmap_unmap_ptes(pve->pv_pmap, scr3); pool_put(&pmap_pv_pool, pve); } @@ -1652,8 +1677,8 @@ boolean_t pmap_test_attrs(struct vm_page *pg, unsigned int testbits) { struct pv_entry *pve; - pt_entry_t *ptes, pte; - pd_entry_t **pdes; + pt_entry_t *ptes; + int level, offs; u_long mybits, testflags; testflags = pmap_pte2flags(testbits); @@ -1665,10 +1690,9 @@ pmap_test_attrs(struct vm_page *pg, unsigned int testbits) mybits = 0; for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0; pve = pve->pv_next) { - pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); - pte = ptes[pl1_i(pve->pv_va)]; - pmap_unmap_ptes(pve->pv_pmap); - mybits |= (pte & testbits); + level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes, + &offs); + mybits |= (ptes[offs] & testbits); } PMAP_HEAD_TO_MAP_UNLOCK(); @@ -1691,9 +1715,8 @@ pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits) { struct pv_entry *pve; pt_entry_t *ptes, opte; - pd_entry_t **pdes; u_long clearflags; - int result; + int result, level, offs; clearflags = pmap_pte2flags(clearbits); @@ -1704,20 +1727,15 @@ pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits) atomic_clearbits_int(&pg->pg_flags, clearflags); for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) { - pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); -#ifdef DIAGNOSTIC - if (!pmap_pdes_valid(pve->pv_va, pdes, NULL)) - panic("%s: mapping without PTP detected", __func__); -#endif - - opte = ptes[pl1_i(pve->pv_va)]; + level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes, + &offs); + opte = ptes[offs]; if (opte & clearbits) { result = 1; - pmap_pte_clearbits(&ptes[pl1_i(pve->pv_va)], - (opte & clearbits)); - pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); + pmap_pte_clearbits(&ptes[offs], (opte & clearbits)); + pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va, + pmap_is_curpmap(pve->pv_pmap)); } - pmap_unmap_ptes(pve->pv_pmap); } PMAP_HEAD_TO_MAP_UNLOCK(); @@ -1758,10 +1776,12 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) pt_entry_t nx, *ptes, *spte, *epte; pd_entry_t **pdes; vaddr_t blockend; - int shootall = 0; + int shootall = 0, shootself; vaddr_t va; + paddr_t scr3; - pmap_map_ptes(pmap, &ptes, &pdes); + pmap_map_ptes(pmap, &ptes, &pdes, &scr3); + shootself = (scr3 == 0); /* should be ok, but just in case ... */ sva &= PG_FRAME; @@ -1813,13 +1833,13 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) } if (shootall) - pmap_tlb_shoottlb(); + pmap_tlb_shoottlb(pmap, shootself); else - pmap_tlb_shootrange(pmap, sva, eva); + pmap_tlb_shootrange(pmap, sva, eva, shootself); - pmap_tlb_shootwait(); + pmap_unmap_ptes(pmap, scr3); - pmap_unmap_ptes(pmap); + pmap_tlb_shootwait(); } /* @@ -1836,18 +1856,18 @@ void pmap_unwire(struct pmap *pmap, vaddr_t va) { pt_entry_t *ptes; - pd_entry_t **pdes; + int level, offs; - pmap_map_ptes(pmap, &ptes, &pdes); + level = pmap_find_pte_direct(pmap, va, &ptes, &offs); - if (pmap_pdes_valid(va, pdes, NULL)) { + if (level == 0) { #ifdef DIAGNOSTIC - if (!pmap_valid_entry(ptes[pl1_i(va)])) + if (!pmap_valid_entry(ptes[offs])) panic("%s: invalid (unmapped) va 0x%lx", __func__, va); #endif - if (__predict_true((ptes[pl1_i(va)] & PG_W) != 0)) { - pmap_pte_clearbits(&ptes[pl1_i(va)], PG_W); + if (__predict_true((ptes[offs] & PG_W) != 0)) { + pmap_pte_clearbits(&ptes[offs], PG_W); pmap->pm_stats.wired_count--; } #ifdef DIAGNOSTIC @@ -1856,7 +1876,6 @@ pmap_unwire(struct pmap *pmap, vaddr_t va) "didn't change!\n", __func__, pmap, va); } #endif - pmap_unmap_ptes(pmap); } #ifdef DIAGNOSTIC else { @@ -1913,14 +1932,15 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) boolean_t wired = (flags & PMAP_WIRED) != 0; boolean_t nocache = (pa & PMAP_NOCACHE) != 0; boolean_t wc = (pa & PMAP_WC) != 0; - int error; + int error, shootself; + paddr_t scr3; KASSERT(!(wc && nocache)); pa &= PMAP_PA_MASK; #ifdef DIAGNOSTIC - if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE) - panic("%s: trying to map over PDP/APDP!", __func__); + if (va == (vaddr_t) PDP_BASE) + panic("%s: trying to map over PDP!", __func__); /* sanity check: kernel PTPs should already have been pre-allocated */ if (va >= VM_MIN_KERNEL_ADDRESS && @@ -1936,7 +1956,8 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) * map in ptes and get a pointer to our PTP (unless we are the kernel) */ - pmap_map_ptes(pmap, &ptes, &pdes); + pmap_map_ptes(pmap, &ptes, &pdes, &scr3); + shootself = (scr3 == 0); if (pmap == pmap_kernel()) { ptp = NULL; } else { @@ -2108,14 +2129,14 @@ enter_now: if (pmap_valid_entry(opte)) { if (nocache && (opte & PG_N) == 0) wbinvd(); - pmap_tlb_shootpage(pmap, va); + pmap_tlb_shootpage(pmap, va, shootself); pmap_tlb_shootwait(); } error = 0; out: - pmap_unmap_ptes(pmap); + pmap_unmap_ptes(pmap, scr3); PMAP_MAP_TO_HEAD_UNLOCK(); return error; @@ -2336,6 +2357,7 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) pt_entry_t *ptes, *pte; pd_entry_t **pdes; vaddr_t blkendva; + paddr_t scr3; /* * if end is out of range truncate. @@ -2347,7 +2369,7 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) PMAP_MAP_TO_HEAD_LOCK(); - pmap_map_ptes(pmap, &ptes, &pdes); + pmap_map_ptes(pmap, &ptes, &pdes, &scr3); /* * dumping a range of pages: we dump in PTP sized blocks (4MB) @@ -2372,7 +2394,7 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) sva, *pte, *pte & PG_FRAME); } } - pmap_unmap_ptes(pmap); + pmap_unmap_ptes(pmap, scr3); PMAP_MAP_TO_HEAD_UNLOCK(); } #endif @@ -2413,7 +2435,7 @@ volatile vaddr_t tlb_shoot_addr1; volatile vaddr_t tlb_shoot_addr2; void -pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) +pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself) { struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; @@ -2445,12 +2467,12 @@ pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) splx(s); } - if (pmap_is_curpmap(pm)) + if (shootself) pmap_update_pg(va); } void -pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) +pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva, int shootself) { struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; @@ -2484,13 +2506,13 @@ pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) splx(s); } - if (pmap_is_curpmap(pm)) + if (shootself) for (va = sva; va < eva; va += PAGE_SIZE) pmap_update_pg(va); } void -pmap_tlb_shoottlb(void) +pmap_tlb_shoottlb(struct pmap *pm, int shootself) { struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; @@ -2498,7 +2520,7 @@ pmap_tlb_shoottlb(void) u_int64_t mask = 0; CPU_INFO_FOREACH(cii, ci) { - if (ci == self || !(ci->ci_flags & CPUF_RUNNING)) + if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || !(ci->ci_flags & CPUF_RUNNING)) continue; mask |= (1ULL << ci->ci_cpuid); wait++; @@ -2521,7 +2543,8 @@ pmap_tlb_shoottlb(void) splx(s); } - tlbflush(); + if (shootself) + tlbflush(); } void @@ -2534,26 +2557,30 @@ pmap_tlb_shootwait(void) #else void -pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) +pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself) { - if (pmap_is_curpmap(pm)) + if (shootself) pmap_update_pg(va); } void -pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) +pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva, int shootself) { vaddr_t va; + if (!shootself) + return; + for (va = sva; va < eva; va += PAGE_SIZE) pmap_update_pg(va); } void -pmap_tlb_shoottlb(void) +pmap_tlb_shoottlb(struct pmap *pm, int shootself) { - tlbflush(); + if (shootself) + tlbflush(); } #endif /* MULTIPROCESSOR */ diff --git a/sys/arch/amd64/include/pmap.h b/sys/arch/amd64/include/pmap.h index 08587d8f621..0aef748ad37 100644 --- a/sys/arch/amd64/include/pmap.h +++ b/sys/arch/amd64/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.46 2014/09/16 18:57:51 sf Exp $ */ +/* $OpenBSD: pmap.h,v 1.47 2014/10/06 20:34:58 sf Exp $ */ /* $NetBSD: pmap.h,v 1.1 2003/04/26 18:39:46 fvdl Exp $ */ /* @@ -79,35 +79,32 @@ /* * The x86_64 pmap module closely resembles the i386 one. It uses - * the same recursive entry scheme, and the same alternate area - * trick for accessing non-current pmaps. See the i386 pmap.h - * for a description. The first obvious difference is that 2 extra - * levels of page table need to be dealt with. The level 1 page - * table pages are at: + * the same recursive entry scheme. See the i386 pmap.h for a + * description. The alternate area trick for accessing non-current + * pmaps has been removed, though, because it performs badly on SMP + * systems. + * The most obvious difference to i386 is that 2 extra levels of page + * table need to be dealt with. The level 1 page table pages are at: * * l1: 0x00007f8000000000 - 0x00007fffffffffff (39 bits, needs PML4 entry) * - * The alternate space is at: - * - * l1: 0xffffff0000000000 - 0xffffff7fffffffff (39 bits, needs PML4 entry) - * * The other levels are kept as physical pages in 3 UVM objects and are * temporarily mapped for virtual access when needed. * * The other obvious difference from i386 is that it has a direct map of all * physical memory in the VA range: * - * 0xfffffe8000000000 - 0xfffffeffffffffff + * 0xffffff0000000000 - 0xffffff7fffffffff + * + * The direct map is used in some cases to access PTEs of non-current pmaps. * * Note that address space is signed, so the layout for 48 bits is: * * +---------------------------------+ 0xffffffffffffffff * | Kernel Image | * +---------------------------------+ 0xffffff8000000000 - * | alt.L1 table (PTE pages) | - * +---------------------------------+ 0xffffff0000000000 * | Direct Map | - * +---------------------------------+ 0xfffffe8000000000 + * +---------------------------------+ 0xffffff0000000000 * ~ ~ * | | * | Kernel Space | @@ -155,44 +152,34 @@ #define L4_SLOT_PTE 255 #define L4_SLOT_KERN 256 #define L4_SLOT_KERNBASE 511 -#define L4_SLOT_APTE 510 -#define L4_SLOT_DIRECT 509 +#define L4_SLOT_DIRECT 510 #define PDIR_SLOT_KERN L4_SLOT_KERN #define PDIR_SLOT_PTE L4_SLOT_PTE -#define PDIR_SLOT_APTE L4_SLOT_APTE #define PDIR_SLOT_DIRECT L4_SLOT_DIRECT /* * the following defines give the virtual addresses of various MMU * data structures: - * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings - * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD - * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP + * PTE_BASE: the base VA of the linear PTE mappings + * PTD_BASE: the base VA of the recursive mapping of the PTD + * PDP_PDE: the VA of the PDE that points back to the PDP * */ #define PTE_BASE ((pt_entry_t *) (L4_SLOT_PTE * NBPD_L4)) -#define APTE_BASE ((pt_entry_t *) (VA_SIGN_NEG((L4_SLOT_APTE * NBPD_L4)))) #define PMAP_DIRECT_BASE (VA_SIGN_NEG((L4_SLOT_DIRECT * NBPD_L4))) #define PMAP_DIRECT_END (VA_SIGN_NEG(((L4_SLOT_DIRECT + 1) * NBPD_L4))) #define L1_BASE PTE_BASE -#define AL1_BASE APTE_BASE #define L2_BASE ((pd_entry_t *)((char *)L1_BASE + L4_SLOT_PTE * NBPD_L3)) #define L3_BASE ((pd_entry_t *)((char *)L2_BASE + L4_SLOT_PTE * NBPD_L2)) #define L4_BASE ((pd_entry_t *)((char *)L3_BASE + L4_SLOT_PTE * NBPD_L1)) -#define AL2_BASE ((pd_entry_t *)((char *)AL1_BASE + L4_SLOT_PTE * NBPD_L3)) -#define AL3_BASE ((pd_entry_t *)((char *)AL2_BASE + L4_SLOT_PTE * NBPD_L2)) -#define AL4_BASE ((pd_entry_t *)((char *)AL3_BASE + L4_SLOT_PTE * NBPD_L1)) - #define PDP_PDE (L4_BASE + PDIR_SLOT_PTE) -#define APDP_PDE (L4_BASE + PDIR_SLOT_APTE) #define PDP_BASE L4_BASE -#define APDP_BASE AL4_BASE #define NKL4_MAX_ENTRIES (unsigned long)1 #define NKL3_MAX_ENTRIES (unsigned long)(NKL4_MAX_ENTRIES * 512) @@ -247,7 +234,6 @@ NKL3_MAX_ENTRIES, NKL4_MAX_ENTRIES } #define NBPD_INITIALIZER { NBPD_L1, NBPD_L2, NBPD_L3, NBPD_L4 } #define PDES_INITIALIZER { L2_BASE, L3_BASE, L4_BASE } -#define APDES_INITIALIZER { AL2_BASE, AL3_BASE, AL4_BASE } /* * PTP macros: @@ -410,15 +396,6 @@ void pmap_write_protect(struct pmap *, vaddr_t, vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */ -void pmap_tlb_shootpage(struct pmap *, vaddr_t); -void pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t); -void pmap_tlb_shoottlb(void); -#ifdef MULTIPROCESSOR -void pmap_tlb_shootwait(void); -#else -#define pmap_tlb_shootwait() -#endif - paddr_t pmap_prealloc_lowmem_ptps(paddr_t); void pagezero(vaddr_t); |