summaryrefslogtreecommitdiff
path: root/sys/arch/amd64
diff options
context:
space:
mode:
authorMark Kettenis <kettenis@cvs.openbsd.org>2015-03-10 20:12:40 +0000
committerMark Kettenis <kettenis@cvs.openbsd.org>2015-03-10 20:12:40 +0000
commit4064dac86bddafdca1855f47b8451be98700aee6 (patch)
tree1358e1ccacaee4ce5677b7eaec9f9d7406d8101f /sys/arch/amd64
parentd691bfc745219e63466772b0f4a84b389ffaa6a0 (diff)
Make the amd64 pmap (more) mpsafe by protecting both the pmap itself and the
pv lists with a mutex. Rearange some code to avoid sleeping/spinning with one of these locks held, and also take care that we don't allocate/free any memory in that case. This should make pmap_enter(9), pmap_remove(9) and pmap_page_protect(9) safe to use without holding the kernel lock. Other architectures will follow. ok mlarkin@, deraadt@
Diffstat (limited to 'sys/arch/amd64')
-rw-r--r--sys/arch/amd64/amd64/pmap.c72
-rw-r--r--sys/arch/amd64/include/pmap.h11
2 files changed, 52 insertions, 31 deletions
diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c
index 07429d42990..59f61290d10 100644
--- a/sys/arch/amd64/amd64/pmap.c
+++ b/sys/arch/amd64/amd64/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.88 2015/02/07 01:46:27 kettenis Exp $ */
+/* $OpenBSD: pmap.c,v 1.89 2015/03/10 20:12:39 kettenis Exp $ */
/* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */
/*
@@ -103,6 +103,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/atomic.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/pool.h>
@@ -113,7 +114,6 @@
#include <uvm/uvm.h>
-#include <machine/atomic.h>
#include <machine/lock.h>
#include <machine/cpu.h>
#include <machine/specialreg.h>
@@ -385,6 +385,9 @@ pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp, paddr
lcr3(pmap->pm_pdirpa);
}
+ if (pmap != pmap_kernel())
+ mtx_enter(&pmap->pm_mtx);
+
*ptepp = PTE_BASE;
*pdeppp = normal_pdes;
return;
@@ -393,6 +396,9 @@ pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp, paddr
void
pmap_unmap_ptes(struct pmap *pmap, paddr_t save_cr3)
{
+ if (pmap != pmap_kernel())
+ mtx_leave(&pmap->pm_mtx);
+
if (save_cr3 != 0) {
x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << cpu_number()));
lcr3(save_cr3);
@@ -680,6 +686,7 @@ pmap_bootstrap(paddr_t first_avail, paddr_t max_pa)
pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl",
&pool_allocator_nointr);
pool_sethiwat(&pmap_pv_pool, 32 * 1024);
+ pool_setipl(&pmap_pv_pool, IPL_VM);
/*
* initialize the PDE pool.
@@ -764,8 +771,10 @@ pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, struct pmap *pmap,
pve->pv_pmap = pmap;
pve->pv_va = va;
pve->pv_ptp = ptp; /* NULL for kernel pmap */
+ mtx_enter(&pg->mdpage.pv_mtx);
pve->pv_next = pg->mdpage.pv_list; /* add to ... */
pg->mdpage.pv_list = pve; /* ... list */
+ mtx_leave(&pg->mdpage.pv_mtx);
}
/*
@@ -780,6 +789,7 @@ pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va)
{
struct pv_entry *pve, **prevptr;
+ mtx_enter(&pg->mdpage.pv_mtx);
prevptr = &pg->mdpage.pv_list;
while ((pve = *prevptr) != NULL) {
if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */
@@ -788,6 +798,7 @@ pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va)
}
prevptr = &pve->pv_next; /* previous pointer */
}
+ mtx_leave(&pg->mdpage.pv_mtx);
return(pve); /* return removed pve */
}
@@ -1007,6 +1018,8 @@ pmap_create(void)
pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
+ mtx_init(&pmap->pm_mtx, IPL_VM);
+
/* init uvm_object */
for (i = 0; i < PTP_LEVELS - 1; i++) {
uvm_objinit(&pmap->pm_obj[i], NULL, 1);
@@ -1049,7 +1062,7 @@ pmap_destroy(struct pmap *pmap)
* drop reference count
*/
- refs = --pmap->pm_obj[0].uo_refs;
+ refs = atomic_dec_int_nv(&pmap->pm_obj[0].uo_refs);
if (refs > 0) {
return;
}
@@ -1095,7 +1108,7 @@ pmap_destroy(struct pmap *pmap)
void
pmap_reference(struct pmap *pmap)
{
- pmap->pm_obj[0].uo_refs++;
+ atomic_inc_int(&pmap->pm_obj[0].uo_refs);
}
/*
@@ -1422,7 +1435,6 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
pmap_map_ptes(pmap, &ptes, &pdes, &scr3);
shootself = (scr3 == 0);
-
/*
* removing one page? take shortcut function.
*/
@@ -1563,8 +1575,11 @@ pmap_page_remove(struct vm_page *pg)
TAILQ_INIT(&empty_ptps);
+ mtx_enter(&pg->mdpage.pv_mtx);
while ((pve = pg->mdpage.pv_list) != NULL) {
pg->mdpage.pv_list = pve->pv_next;
+ pmap_reference(pve->pv_pmap);
+ mtx_leave(&pg->mdpage.pv_mtx);
/* XXX use direct map? */
pmap_map_ptes(pve->pv_pmap, &ptes, &pdes, &scr3);
@@ -1604,8 +1619,11 @@ pmap_page_remove(struct vm_page *pg)
}
}
pmap_unmap_ptes(pve->pv_pmap, scr3);
+ pmap_destroy(pve->pv_pmap);
pool_put(&pmap_pv_pool, pve);
+ mtx_enter(&pg->mdpage.pv_mtx);
}
+ mtx_leave(&pg->mdpage.pv_mtx);
pmap_tlb_shootwait();
@@ -1640,12 +1658,14 @@ pmap_test_attrs(struct vm_page *pg, unsigned int testbits)
return (TRUE);
mybits = 0;
+ mtx_enter(&pg->mdpage.pv_mtx);
for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0;
pve = pve->pv_next) {
level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes,
&offs);
mybits |= (ptes[offs] & testbits);
}
+ mtx_leave(&pg->mdpage.pv_mtx);
if (mybits == 0)
return (FALSE);
@@ -1675,6 +1695,7 @@ pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits)
if (result)
atomic_clearbits_int(&pg->pg_flags, clearflags);
+ mtx_enter(&pg->mdpage.pv_mtx);
for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) {
level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes,
&offs);
@@ -1686,6 +1707,7 @@ pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits)
pmap_is_curpmap(pve->pv_pmap));
}
}
+ mtx_leave(&pg->mdpage.pv_mtx);
pmap_tlb_shootwait();
@@ -1785,7 +1807,6 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
pmap_tlb_shootrange(pmap, sva, eva, shootself);
pmap_unmap_ptes(pmap, scr3);
-
pmap_tlb_shootwait();
}
@@ -1874,7 +1895,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
pt_entry_t *ptes, opte, npte;
pd_entry_t **pdes;
struct vm_page *ptp, *pg = NULL;
- struct pv_entry *pve = NULL;
+ struct pv_entry *pve, *opve = NULL;
int ptpdelta, wireddelta, resdelta;
boolean_t wired = (flags & PMAP_WIRED) != 0;
boolean_t nocache = (pa & PMAP_NOCACHE) != 0;
@@ -1896,6 +1917,15 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
#endif
+ pve = pool_get(&pmap_pv_pool, PR_NOWAIT);
+ if (pve == NULL) {
+ if (flags & PMAP_CANFAIL) {
+ error = ENOMEM;
+ goto out;
+ }
+ panic("%s: no pv entries available", __func__);
+ }
+
/*
* map in ptes and get a pointer to our PTP (unless we are the kernel)
*/
@@ -1908,6 +1938,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
ptp = pmap_get_ptp(pmap, va, pdes);
if (ptp == NULL) {
if (flags & PMAP_CANFAIL) {
+ pmap_unmap_ptes(pmap, scr3);
error = ENOMEM;
goto out;
}
@@ -1983,11 +2014,10 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
__func__, pa, atop(pa));
#endif
pmap_sync_flags_pte(pg, opte);
- pve = pmap_remove_pv(pg, pmap, va);
+ opve = pmap_remove_pv(pg, pmap, va);
pg = NULL; /* This is not the page we are looking for */
}
} else { /* opte not valid */
- pve = NULL;
resdelta = 1;
if (wired)
wireddelta = 1;
@@ -2010,21 +2040,8 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
pg = PHYS_TO_VM_PAGE(pa);
if (pg != NULL) {
- if (pve == NULL) {
- pve = pool_get(&pmap_pv_pool, PR_NOWAIT);
- if (pve == NULL) {
- if (flags & PMAP_CANFAIL) {
- error = ENOMEM;
- goto out;
- }
- panic("%s: no pv entries available", __func__);
- }
- }
pmap_enter_pv(pg, pve, pmap, va, ptp);
- } else {
- /* new mapping is not PG_PVLIST. free pve if we've got one */
- if (pve)
- pool_put(&pmap_pv_pool, pve);
+ pve = NULL;
}
enter_now:
@@ -2074,13 +2091,18 @@ enter_now:
if (nocache && (opte & PG_N) == 0)
wbinvd();
pmap_tlb_shootpage(pmap, va, shootself);
- pmap_tlb_shootwait();
}
+ pmap_unmap_ptes(pmap, scr3);
+ pmap_tlb_shootwait();
+
error = 0;
out:
- pmap_unmap_ptes(pmap, scr3);
+ if (pve)
+ pool_put(&pmap_pv_pool, pve);
+ if (opve)
+ pool_put(&pmap_pv_pool, opve);
return error;
}
diff --git a/sys/arch/amd64/include/pmap.h b/sys/arch/amd64/include/pmap.h
index e1567b57dcb..b311d0baa2b 100644
--- a/sys/arch/amd64/include/pmap.h
+++ b/sys/arch/amd64/include/pmap.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.h,v 1.54 2015/02/19 03:19:11 mlarkin Exp $ */
+/* $OpenBSD: pmap.h,v 1.55 2015/03/10 20:12:39 kettenis Exp $ */
/* $NetBSD: pmap.h,v 1.1 2003/04/26 18:39:46 fvdl Exp $ */
/*
@@ -74,6 +74,7 @@
#include <machine/cpufunc.h>
#include <machine/segments.h>
#endif /* _KERNEL */
+#include <sys/mutex.h>
#include <uvm/uvm_object.h>
#include <machine/pte.h>
#endif
@@ -284,15 +285,11 @@ LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */
*
* note that the pm_obj contains the reference count,
* page list, and number of PTPs within the pmap.
- *
- * pm_lock is the same as the spinlock for vm object 0. Changes to
- * the other objects may only be made if that lock has been taken
- * (the other object locks are only used when uvm_pagealloc is called)
*/
struct pmap {
+ struct mutex pm_mtx;
struct uvm_object pm_obj[PTP_LEVELS-1]; /* objects for lvl >= 1) */
-#define pm_lock pm_obj[0].vmobjlock
#define pm_obj_l1 pm_obj[0]
#define pm_obj_l2 pm_obj[1]
#define pm_obj_l3 pm_obj[2]
@@ -519,10 +516,12 @@ kvtopte(vaddr_t va)
#ifndef _LOCORE
struct pv_entry;
struct vm_page_md {
+ struct mutex pv_mtx;
struct pv_entry *pv_list;
};
#define VM_MDPAGE_INIT(pg) do { \
+ mtx_init(&(pg)->mdpage.pv_mtx, IPL_VM); \
(pg)->mdpage.pv_list = NULL; \
} while (0)
#endif /* !_LOCORE */