From 4064dac86bddafdca1855f47b8451be98700aee6 Mon Sep 17 00:00:00 2001 From: Mark Kettenis Date: Tue, 10 Mar 2015 20:12:40 +0000 Subject: Make the amd64 pmap (more) mpsafe by protecting both the pmap itself and the pv lists with a mutex. Rearange some code to avoid sleeping/spinning with one of these locks held, and also take care that we don't allocate/free any memory in that case. This should make pmap_enter(9), pmap_remove(9) and pmap_page_protect(9) safe to use without holding the kernel lock. Other architectures will follow. ok mlarkin@, deraadt@ --- sys/arch/amd64/amd64/pmap.c | 72 ++++++++++++++++++++++++++++--------------- sys/arch/amd64/include/pmap.h | 11 +++---- 2 files changed, 52 insertions(+), 31 deletions(-) (limited to 'sys/arch/amd64') diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c index 07429d42990..59f61290d10 100644 --- a/sys/arch/amd64/amd64/pmap.c +++ b/sys/arch/amd64/amd64/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.88 2015/02/07 01:46:27 kettenis Exp $ */ +/* $OpenBSD: pmap.c,v 1.89 2015/03/10 20:12:39 kettenis Exp $ */ /* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */ /* @@ -103,6 +103,7 @@ #include #include +#include #include #include #include @@ -113,7 +114,6 @@ #include -#include #include #include #include @@ -385,6 +385,9 @@ pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp, paddr lcr3(pmap->pm_pdirpa); } + if (pmap != pmap_kernel()) + mtx_enter(&pmap->pm_mtx); + *ptepp = PTE_BASE; *pdeppp = normal_pdes; return; @@ -393,6 +396,9 @@ pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp, paddr void pmap_unmap_ptes(struct pmap *pmap, paddr_t save_cr3) { + if (pmap != pmap_kernel()) + mtx_leave(&pmap->pm_mtx); + if (save_cr3 != 0) { x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); lcr3(save_cr3); @@ -680,6 +686,7 @@ pmap_bootstrap(paddr_t first_avail, paddr_t max_pa) pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl", &pool_allocator_nointr); pool_sethiwat(&pmap_pv_pool, 32 * 1024); + pool_setipl(&pmap_pv_pool, IPL_VM); /* * initialize the PDE pool. @@ -764,8 +771,10 @@ pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, struct pmap *pmap, pve->pv_pmap = pmap; pve->pv_va = va; pve->pv_ptp = ptp; /* NULL for kernel pmap */ + mtx_enter(&pg->mdpage.pv_mtx); pve->pv_next = pg->mdpage.pv_list; /* add to ... */ pg->mdpage.pv_list = pve; /* ... list */ + mtx_leave(&pg->mdpage.pv_mtx); } /* @@ -780,6 +789,7 @@ pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va) { struct pv_entry *pve, **prevptr; + mtx_enter(&pg->mdpage.pv_mtx); prevptr = &pg->mdpage.pv_list; while ((pve = *prevptr) != NULL) { if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */ @@ -788,6 +798,7 @@ pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va) } prevptr = &pve->pv_next; /* previous pointer */ } + mtx_leave(&pg->mdpage.pv_mtx); return(pve); /* return removed pve */ } @@ -1007,6 +1018,8 @@ pmap_create(void) pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); + mtx_init(&pmap->pm_mtx, IPL_VM); + /* init uvm_object */ for (i = 0; i < PTP_LEVELS - 1; i++) { uvm_objinit(&pmap->pm_obj[i], NULL, 1); @@ -1049,7 +1062,7 @@ pmap_destroy(struct pmap *pmap) * drop reference count */ - refs = --pmap->pm_obj[0].uo_refs; + refs = atomic_dec_int_nv(&pmap->pm_obj[0].uo_refs); if (refs > 0) { return; } @@ -1095,7 +1108,7 @@ pmap_destroy(struct pmap *pmap) void pmap_reference(struct pmap *pmap) { - pmap->pm_obj[0].uo_refs++; + atomic_inc_int(&pmap->pm_obj[0].uo_refs); } /* @@ -1422,7 +1435,6 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) pmap_map_ptes(pmap, &ptes, &pdes, &scr3); shootself = (scr3 == 0); - /* * removing one page? take shortcut function. */ @@ -1563,8 +1575,11 @@ pmap_page_remove(struct vm_page *pg) TAILQ_INIT(&empty_ptps); + mtx_enter(&pg->mdpage.pv_mtx); while ((pve = pg->mdpage.pv_list) != NULL) { pg->mdpage.pv_list = pve->pv_next; + pmap_reference(pve->pv_pmap); + mtx_leave(&pg->mdpage.pv_mtx); /* XXX use direct map? */ pmap_map_ptes(pve->pv_pmap, &ptes, &pdes, &scr3); @@ -1604,8 +1619,11 @@ pmap_page_remove(struct vm_page *pg) } } pmap_unmap_ptes(pve->pv_pmap, scr3); + pmap_destroy(pve->pv_pmap); pool_put(&pmap_pv_pool, pve); + mtx_enter(&pg->mdpage.pv_mtx); } + mtx_leave(&pg->mdpage.pv_mtx); pmap_tlb_shootwait(); @@ -1640,12 +1658,14 @@ pmap_test_attrs(struct vm_page *pg, unsigned int testbits) return (TRUE); mybits = 0; + mtx_enter(&pg->mdpage.pv_mtx); for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0; pve = pve->pv_next) { level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes, &offs); mybits |= (ptes[offs] & testbits); } + mtx_leave(&pg->mdpage.pv_mtx); if (mybits == 0) return (FALSE); @@ -1675,6 +1695,7 @@ pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits) if (result) atomic_clearbits_int(&pg->pg_flags, clearflags); + mtx_enter(&pg->mdpage.pv_mtx); for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) { level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes, &offs); @@ -1686,6 +1707,7 @@ pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits) pmap_is_curpmap(pve->pv_pmap)); } } + mtx_leave(&pg->mdpage.pv_mtx); pmap_tlb_shootwait(); @@ -1785,7 +1807,6 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) pmap_tlb_shootrange(pmap, sva, eva, shootself); pmap_unmap_ptes(pmap, scr3); - pmap_tlb_shootwait(); } @@ -1874,7 +1895,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) pt_entry_t *ptes, opte, npte; pd_entry_t **pdes; struct vm_page *ptp, *pg = NULL; - struct pv_entry *pve = NULL; + struct pv_entry *pve, *opve = NULL; int ptpdelta, wireddelta, resdelta; boolean_t wired = (flags & PMAP_WIRED) != 0; boolean_t nocache = (pa & PMAP_NOCACHE) != 0; @@ -1896,6 +1917,15 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) #endif + pve = pool_get(&pmap_pv_pool, PR_NOWAIT); + if (pve == NULL) { + if (flags & PMAP_CANFAIL) { + error = ENOMEM; + goto out; + } + panic("%s: no pv entries available", __func__); + } + /* * map in ptes and get a pointer to our PTP (unless we are the kernel) */ @@ -1908,6 +1938,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) ptp = pmap_get_ptp(pmap, va, pdes); if (ptp == NULL) { if (flags & PMAP_CANFAIL) { + pmap_unmap_ptes(pmap, scr3); error = ENOMEM; goto out; } @@ -1983,11 +2014,10 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) __func__, pa, atop(pa)); #endif pmap_sync_flags_pte(pg, opte); - pve = pmap_remove_pv(pg, pmap, va); + opve = pmap_remove_pv(pg, pmap, va); pg = NULL; /* This is not the page we are looking for */ } } else { /* opte not valid */ - pve = NULL; resdelta = 1; if (wired) wireddelta = 1; @@ -2010,21 +2040,8 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) pg = PHYS_TO_VM_PAGE(pa); if (pg != NULL) { - if (pve == NULL) { - pve = pool_get(&pmap_pv_pool, PR_NOWAIT); - if (pve == NULL) { - if (flags & PMAP_CANFAIL) { - error = ENOMEM; - goto out; - } - panic("%s: no pv entries available", __func__); - } - } pmap_enter_pv(pg, pve, pmap, va, ptp); - } else { - /* new mapping is not PG_PVLIST. free pve if we've got one */ - if (pve) - pool_put(&pmap_pv_pool, pve); + pve = NULL; } enter_now: @@ -2074,13 +2091,18 @@ enter_now: if (nocache && (opte & PG_N) == 0) wbinvd(); pmap_tlb_shootpage(pmap, va, shootself); - pmap_tlb_shootwait(); } + pmap_unmap_ptes(pmap, scr3); + pmap_tlb_shootwait(); + error = 0; out: - pmap_unmap_ptes(pmap, scr3); + if (pve) + pool_put(&pmap_pv_pool, pve); + if (opve) + pool_put(&pmap_pv_pool, opve); return error; } diff --git a/sys/arch/amd64/include/pmap.h b/sys/arch/amd64/include/pmap.h index e1567b57dcb..b311d0baa2b 100644 --- a/sys/arch/amd64/include/pmap.h +++ b/sys/arch/amd64/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.54 2015/02/19 03:19:11 mlarkin Exp $ */ +/* $OpenBSD: pmap.h,v 1.55 2015/03/10 20:12:39 kettenis Exp $ */ /* $NetBSD: pmap.h,v 1.1 2003/04/26 18:39:46 fvdl Exp $ */ /* @@ -74,6 +74,7 @@ #include #include #endif /* _KERNEL */ +#include #include #include #endif @@ -284,15 +285,11 @@ LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */ * * note that the pm_obj contains the reference count, * page list, and number of PTPs within the pmap. - * - * pm_lock is the same as the spinlock for vm object 0. Changes to - * the other objects may only be made if that lock has been taken - * (the other object locks are only used when uvm_pagealloc is called) */ struct pmap { + struct mutex pm_mtx; struct uvm_object pm_obj[PTP_LEVELS-1]; /* objects for lvl >= 1) */ -#define pm_lock pm_obj[0].vmobjlock #define pm_obj_l1 pm_obj[0] #define pm_obj_l2 pm_obj[1] #define pm_obj_l3 pm_obj[2] @@ -519,10 +516,12 @@ kvtopte(vaddr_t va) #ifndef _LOCORE struct pv_entry; struct vm_page_md { + struct mutex pv_mtx; struct pv_entry *pv_list; }; #define VM_MDPAGE_INIT(pg) do { \ + mtx_init(&(pg)->mdpage.pv_mtx, IPL_VM); \ (pg)->mdpage.pv_list = NULL; \ } while (0) #endif /* !_LOCORE */ -- cgit v1.2.3