summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorMark Kettenis <kettenis@cvs.openbsd.org>2015-10-01 16:03:49 +0000
committerMark Kettenis <kettenis@cvs.openbsd.org>2015-10-01 16:03:49 +0000
commit4bcd0a58316fc859b2e03a05faa1db731f8541d6 (patch)
tree0d60963c78fffd223957f9f8ec4c98c74db9026c /sys
parentface7fc54071ecc5a37898fac0b0798c24bbf6b9 (diff)
Make the alpha pmap (more) mpsafe by protecting both the pmap itself and the
pv lists with a mutex. This should make pmap_enter(9), pmap_remove(9) and pmap_page_protect(9) safe to use without holding the kernel lock. This largely reverts rev. 1.75, but now of course the pmap locks are defined to actually call mtx_enter(9) and mtx_leave(9). ok visa@
Diffstat (limited to 'sys')
-rw-r--r--sys/arch/alpha/alpha/pmap.c176
-rw-r--r--sys/arch/alpha/include/pmap.h5
2 files changed, 131 insertions, 50 deletions
diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c
index 7752e29b006..4f8895a314f 100644
--- a/sys/arch/alpha/alpha/pmap.c
+++ b/sys/arch/alpha/alpha/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.79 2015/09/08 21:28:35 kettenis Exp $ */
+/* $OpenBSD: pmap.c,v 1.80 2015/10/01 16:03:48 kettenis Exp $ */
/* $NetBSD: pmap.c,v 1.154 2000/12/07 22:18:55 thorpej Exp $ */
/*-
@@ -141,6 +141,7 @@
#include <sys/user.h>
#include <sys/buf.h>
#include <sys/mutex.h>
+#include <sys/atomic.h>
#ifdef SYSVSHM
#include <sys/shm.h>
#endif
@@ -313,19 +314,11 @@ struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
/*
* Locking:
*
- * This pmap module uses two types of locks: `normal' (sleep)
- * locks and `simple' (spin) locks. They are used as follows:
+ * * pm_mtx (per-pmap) - This lock protects all of the members
+ * of the pmap structure itself.
*
- * SIMPLE LOCKS
- * ------------
- *
- * * pm_slock (per-pmap) - This lock protects all of the members
- * of the pmap structure itself. This lock will be asserted
- * in pmap_activate() and pmap_deactivate() from a critical
- * section of cpu_switch(), and must never sleep. Note that
- * in the case of the kernel pmap, interrupts which cause
- * memory allocation *must* be blocked while this lock is
- * asserted.
+ * * pvh_mtx (per-page) - This locks protects the list of mappings
+ * of a (managed) physical page.
*
* * pmap_all_pmaps_mtx - This lock protects the global list of
* all pmaps. Note that a pm_slock must never be held while this
@@ -338,13 +331,7 @@ struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
* pmap_growkernel() acquires the locks in the following order:
*
* pmap_growkernel_mtx -> pmap_all_pmaps_mtx ->
- * pmap->pm_slock
- *
- * But pmap_lev1map_create() is called with pmap->pm_slock held,
- * and also needs to acquire the pmap_growkernel_mtx. So,
- * we require that the caller of pmap_lev1map_create() (currently,
- * the only caller is pmap_enter()) acquire pmap_growkernel_mtx
- * before acquiring pmap->pm_slock.
+ * pmap->pm_mtx
*
* Address space number management (global ASN counters and per-pmap
* ASN state) are not locked; they use arrays of values indexed
@@ -357,6 +344,9 @@ struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
struct mutex pmap_all_pmaps_mtx;
struct mutex pmap_growkernel_mtx;
+#define PMAP_LOCK(pmap) mtx_enter(&pmap->pm_mtx)
+#define PMAP_UNLOCK(pmap) mtx_leave(&pmap->pm_mtx)
+
#if defined(MULTIPROCESSOR)
/*
* TLB Shootdown:
@@ -1083,8 +1073,6 @@ pmap_init(void)
* pmap_create: [ INTERFACE ]
*
* Create and return a physical map.
- *
- * Note: no locking is necessary in this function.
*/
pmap_t
pmap_create(void)
@@ -1105,6 +1093,7 @@ pmap_create(void)
/* XXX Locking? */
pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen;
}
+ mtx_init(&pmap->pm_mtx, IPL_VM);
for (;;) {
mtx_enter(&pmap_growkernel_mtx);
@@ -1138,7 +1127,7 @@ pmap_destroy(pmap_t pmap)
printf("pmap_destroy(%p)\n", pmap);
#endif
- refs = --pmap->pm_count;
+ refs = atomic_dec_int_nv(&pmap->pm_count);
if (refs > 0)
return;
@@ -1170,7 +1159,7 @@ pmap_reference(pmap_t pmap)
printf("pmap_reference(%p)\n", pmap);
#endif
- pmap->pm_count++;
+ atomic_inc_int(&pmap->pm_count);
}
/*
@@ -1190,9 +1179,7 @@ pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
#endif
- KERNEL_LOCK();
pmap_do_remove(pmap, sva, eva, TRUE);
- KERNEL_UNLOCK();
}
/*
@@ -1226,6 +1213,8 @@ pmap_do_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva, boolean_t dowired)
* interrupt context; pmap_kremove() is used for that.
*/
if (pmap == pmap_kernel()) {
+ PMAP_LOCK(pmap);
+
KASSERT(dowired == TRUE);
while (sva < eva) {
@@ -1244,6 +1233,8 @@ pmap_do_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva, boolean_t dowired)
sva += PAGE_SIZE;
}
+ PMAP_UNLOCK(pmap);
+
if (needisync)
PMAP_SYNC_ISTREAM_KERNEL();
return;
@@ -1255,12 +1246,14 @@ pmap_do_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva, boolean_t dowired)
"address range", sva, eva);
#endif
+ PMAP_LOCK(pmap);
+
/*
* If we're already referencing the kernel_lev1map, there
* is no work for us to do.
*/
if (pmap->pm_lev1map == kernel_lev1map)
- return;
+ goto out;
saved_l1pte = l1pte = pmap_l1pte(pmap, sva);
@@ -1340,6 +1333,9 @@ pmap_do_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva, boolean_t dowired)
if (needisync)
PMAP_SYNC_ISTREAM_USER(pmap);
+
+ out:
+ PMAP_UNLOCK(pmap);
}
/*
@@ -1352,7 +1348,7 @@ void
pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
{
pmap_t pmap;
- pv_entry_t pv, nextpv;
+ pv_entry_t pv;
boolean_t needkisync = FALSE;
cpuid_t cpu_id = cpu_number();
PMAP_TLB_SHOOTDOWN_CPUSET_DECL
@@ -1371,6 +1367,7 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
/* copy_on_write */
case PROT_READ | PROT_EXEC:
case PROT_READ:
+ mtx_enter(&pg->mdpage.pvh_mtx);
for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
if (*pv->pv_pte & (PG_KWE | PG_UWE)) {
*pv->pv_pte &= ~(PG_KWE | PG_UWE);
@@ -1381,6 +1378,7 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
pmap_pte_asm(pv->pv_pte));
}
}
+ mtx_leave(&pg->mdpage.pvh_mtx);
PMAP_TLB_SHOOTNOW();
return;
@@ -1389,9 +1387,32 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
break;
}
- for (pv = pg->mdpage.pvh_list; pv != NULL; pv = nextpv) {
- nextpv = pv->pv_next;
+ mtx_enter(&pg->mdpage.pvh_mtx);
+ while ((pv = pg->mdpage.pvh_list) != NULL) {
+ pmap_reference(pv->pv_pmap);
pmap = pv->pv_pmap;
+ mtx_leave(&pg->mdpage.pvh_mtx);
+
+ PMAP_LOCK(pmap);
+
+ /*
+ * We dropped the pvlist lock before grabbing the pmap
+ * lock to avoid lock ordering problems. This means
+ * we have to check the pvlist again since somebody
+ * else might have modified it. All we care about is
+ * that the pvlist entry matches the pmap we just
+ * locked. If it doesn't, unlock the pmap and try
+ * again.
+ */
+ mtx_enter(&pg->mdpage.pvh_mtx);
+ if ((pv = pg->mdpage.pvh_list) == NULL ||
+ pv->pv_pmap != pmap) {
+ mtx_leave(&pg->mdpage.pvh_mtx);
+ PMAP_UNLOCK(pmap);
+ pmap_destroy(pmap);
+ mtx_enter(&pg->mdpage.pvh_mtx);
+ continue;
+ }
#ifdef DEBUG
if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 ||
@@ -1405,7 +1426,12 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
else
PMAP_SYNC_ISTREAM_USER(pmap);
}
+ mtx_leave(&pg->mdpage.pvh_mtx);
+ PMAP_UNLOCK(pmap);
+ pmap_destroy(pmap);
+ mtx_enter(&pg->mdpage.pvh_mtx);
}
+ mtx_leave(&pg->mdpage.pvh_mtx);
if (needkisync)
PMAP_SYNC_ISTREAM_KERNEL();
@@ -1438,6 +1464,8 @@ pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
return;
}
+ PMAP_LOCK(pmap);
+
bits = pte_prot(pmap, prot);
isactive = PMAP_ISACTIVE(pmap, cpu_id);
@@ -1475,6 +1503,8 @@ pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
if (prot & PROT_EXEC)
PMAP_SYNC_ISTREAM(pmap);
+
+ PMAP_UNLOCK(pmap);
}
/*
@@ -1531,6 +1561,8 @@ pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
}
}
+ PMAP_LOCK(pmap);
+
if (pmap == pmap_kernel()) {
#ifdef DIAGNOSTIC
/*
@@ -1773,6 +1805,8 @@ pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
PMAP_SYNC_ISTREAM(pmap);
out:
+ PMAP_UNLOCK(pmap);
+
return error;
}
@@ -1917,6 +1951,8 @@ pmap_unwire(pmap_t pmap, vaddr_t va)
printf("pmap_unwire(%p, %lx)\n", pmap, va);
#endif
+ PMAP_LOCK(pmap);
+
pte = pmap_l3pte(pmap, va, NULL);
#ifdef DIAGNOSTIC
if (pte == NULL || pmap_pte_v(pte) == 0)
@@ -1938,6 +1974,8 @@ pmap_unwire(pmap_t pmap, vaddr_t va)
"didn't change!\n", pmap, va);
}
#endif
+
+ PMAP_UNLOCK(pmap);
}
/*
@@ -1976,6 +2014,8 @@ pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
goto out_nolock;
}
+ PMAP_LOCK(pmap);
+
l1pte = pmap_l1pte(pmap, va);
if (pmap_pte_v(l1pte) == 0)
goto out;
@@ -1992,6 +2032,7 @@ pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
*pap = pa;
rv = TRUE;
out:
+ PMAP_UNLOCK(pmap);
out_nolock:
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW) {
@@ -2045,7 +2086,7 @@ pmap_collect(pmap_t pmap)
/*
* This process is about to be swapped out; free all of
* the PT pages by removing the physical mappings for its
- * entire address space. Note: pmap_remove() performs
+ * entire address space. Note: pmap_do_remove() performs
* all necessary locking.
*/
pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS, FALSE);
@@ -2204,11 +2245,14 @@ pmap_clear_modify(struct vm_page *pg)
if (pmapdebug & PDB_FOLLOW)
printf("pmap_clear_modify(%p)\n", pg);
#endif
+
+ mtx_enter(&pg->mdpage.pvh_mtx);
if (pg->mdpage.pvh_attrs & PGA_MODIFIED) {
rv = TRUE;
pmap_changebit(pg, PG_FOW, ~0, cpu_id);
pg->mdpage.pvh_attrs &= ~PGA_MODIFIED;
}
+ mtx_leave(&pg->mdpage.pvh_mtx);
return (rv);
}
@@ -2229,11 +2273,13 @@ pmap_clear_reference(struct vm_page *pg)
printf("pmap_clear_reference(%p)\n", pg);
#endif
+ mtx_enter(&pg->mdpage.pvh_mtx);
if (pg->mdpage.pvh_attrs & PGA_REFERENCED) {
rv = TRUE;
pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id);
pg->mdpage.pvh_attrs &= ~PGA_REFERENCED;
}
+ mtx_leave(&pg->mdpage.pvh_mtx);
return (rv);
}
@@ -2365,6 +2411,16 @@ pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
pa = pmap_pte_pa(pte);
onpv = (pmap_pte_pv(pte) != 0);
+ if (onpv) {
+ /*
+ * Remove it from the PV table such that nobody will
+ * attempt to modify the PTE behind our back.
+ */
+ pg = PHYS_TO_VM_PAGE(pa);
+ KASSERT(pg != NULL);
+ pmap_pv_remove(pmap, pg, va, dolock);
+ }
+
hadasm = (pmap_pte_asm(pte) != 0);
isactive = PMAP_ISACTIVE(pmap, cpu_id);
@@ -2416,19 +2472,6 @@ pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
pmap_l3pt_delref(pmap, va, pte, cpu_id);
}
- /*
- * If the mapping wasn't entered on the PV list, we're all done.
- */
- if (onpv == FALSE)
- return (needisync);
-
- /*
- * Remove it from the PV table.
- */
- pg = PHYS_TO_VM_PAGE(pa);
- KASSERT(pg != NULL);
- pmap_pv_remove(pmap, pg, va, dolock);
-
return (needisync);
}
@@ -2438,9 +2481,9 @@ pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
* Set or clear the specified PTE bits for all mappings on the
* specified page.
*
- * Note: we assume that the pv_head is already locked, and that
- * the caller has acquired a PV->pmap mutex so that we can lock
- * the pmaps as we encounter them.
+ * Note: we assume that the pvlist is already locked. There is no
+ * need to lock the pmap itself as amapping cannot be removed while
+ * we are holding the pvlist lock.
*/
void
pmap_changebit(struct vm_page *pg, u_long set, u_long mask, cpuid_t cpu_id)
@@ -2457,6 +2500,8 @@ pmap_changebit(struct vm_page *pg, u_long set, u_long mask, cpuid_t cpu_id)
VM_PAGE_TO_PHYS(pg), set, mask);
#endif
+ MUTEX_ASSERT_LOCKED(&pg->mdpage.pvh_mtx);
+
/*
* Loop over all current mappings setting/clearing as appropriate.
*/
@@ -2493,6 +2538,7 @@ pmap_emulate_reference(struct proc *p, vaddr_t v, int user, int type)
pt_entry_t faultoff, *pte;
struct vm_page *pg;
paddr_t pa;
+ boolean_t didlock = FALSE;
boolean_t exec = FALSE;
cpuid_t cpu_id = cpu_number();
@@ -2520,13 +2566,22 @@ pmap_emulate_reference(struct proc *p, vaddr_t v, int user, int type)
panic("pmap_emulate_reference: bad p_vmspace");
#endif
pmap = p->p_vmspace->vm_map.pmap;
+ PMAP_LOCK(pmap);
+ didlock = TRUE;
pte = pmap_l3pte(pmap, v, NULL);
/*
* We'll unlock below where we're done with the PTE.
*/
}
+ if (pte == NULL || !pmap_pte_v(pte)) {
+ if (didlock)
+ PMAP_UNLOCK(pmap);
+ return (0);
+ }
exec = pmap_pte_exec(pte);
if (!exec && type == ALPHA_MMCSR_FOE) {
+ if (didlock)
+ PMAP_UNLOCK(pmap);
return (1);
}
#ifdef DEBUG
@@ -2536,8 +2591,6 @@ pmap_emulate_reference(struct proc *p, vaddr_t v, int user, int type)
}
#endif
#ifdef DEBUG /* These checks are more expensive */
- if (!pmap_pte_v(pte))
- panic("pmap_emulate_reference: invalid pte");
#ifndef MULTIPROCESSOR
/*
* Quoting the Alpha ARM 14.3.1.4/5/6:
@@ -2585,6 +2638,13 @@ pmap_emulate_reference(struct proc *p, vaddr_t v, int user, int type)
#endif
pa = pmap_pte_pa(pte);
+ /*
+ * We're now done with the PTE. If it was a user pmap, unlock
+ * it now.
+ */
+ if (didlock)
+ PMAP_UNLOCK(pmap);
+
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("\tpa = 0x%lx\n", pa);
@@ -2609,6 +2669,7 @@ pmap_emulate_reference(struct proc *p, vaddr_t v, int user, int type)
* (2) if it was a write fault, mark page as modified.
*/
+ mtx_enter(&pg->mdpage.pvh_mtx);
if (type == ALPHA_MMCSR_FOW) {
pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
faultoff = PG_FOR | PG_FOW;
@@ -2620,6 +2681,7 @@ pmap_emulate_reference(struct proc *p, vaddr_t v, int user, int type)
}
}
pmap_changebit(pg, 0, ~faultoff, cpu_id);
+ mtx_leave(&pg->mdpage.pvh_mtx);
return (0);
}
@@ -2639,9 +2701,11 @@ pmap_pv_dump(paddr_t pa)
pg = PHYS_TO_VM_PAGE(pa);
printf("pa 0x%lx (attrs = 0x%x):\n", pa, pg->mdpage.pvh_attrs);
+ mtx_enter(&pg->mdpage.pvh_mtx);
for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next)
printf(" pmap %p, va 0x%lx\n",
pv->pv_pmap, pv->pv_va);
+ mtx_leave(&pg->mdpage.pvh_mtx);
printf("\n");
}
#endif
@@ -2700,6 +2764,9 @@ pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
newpv->pv_pmap = pmap;
newpv->pv_pte = pte;
+ if (dolock)
+ mtx_enter(&pg->mdpage.pvh_mtx);
+
#ifdef DEBUG
{
pv_entry_t pv;
@@ -2721,6 +2788,9 @@ pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
newpv->pv_next = pg->mdpage.pvh_list;
pg->mdpage.pvh_list = newpv;
+ if (dolock)
+ mtx_leave(&pg->mdpage.pvh_mtx);
+
return (0);
}
@@ -2734,6 +2804,9 @@ pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, boolean_t dolock)
{
pv_entry_t pv, *pvp;
+ if (dolock)
+ mtx_enter(&pg->mdpage.pvh_mtx);
+
/*
* Find the entry to remove.
*/
@@ -2749,6 +2822,9 @@ pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, boolean_t dolock)
*pvp = pv->pv_next;
+ if (dolock)
+ mtx_leave(&pg->mdpage.pvh_mtx);
+
pmap_pv_free(pv);
}
@@ -2946,8 +3022,10 @@ pmap_growkernel(vaddr_t maxkvaddr)
if (pm == pmap_kernel())
continue;
+ PMAP_LOCK(pm);
KDASSERT(pm->pm_lev1map != kernel_lev1map);
pm->pm_lev1map[l1idx] = pte;
+ PMAP_UNLOCK(pm);
}
mtx_leave(&pmap_all_pmaps_mtx);
}
diff --git a/sys/arch/alpha/include/pmap.h b/sys/arch/alpha/include/pmap.h
index 861ed9e58e1..5d1f7df6997 100644
--- a/sys/arch/alpha/include/pmap.h
+++ b/sys/arch/alpha/include/pmap.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.h,v 1.36 2015/02/15 21:34:33 miod Exp $ */
+/* $OpenBSD: pmap.h,v 1.37 2015/10/01 16:03:48 kettenis Exp $ */
/* $NetBSD: pmap.h,v 1.37 2000/11/19 03:16:35 thorpej Exp $ */
/*-
@@ -98,6 +98,7 @@ struct pmap {
TAILQ_ENTRY(pmap) pm_list; /* list of all pmaps */
pt_entry_t *pm_lev1map; /* level 1 map */
int pm_count; /* pmap reference count */
+ struct mutex pm_mtx; /* lock on pmap */
struct pmap_statistics pm_stats; /* pmap statistics */
unsigned long pm_cpus; /* mask of CPUs using pmap */
unsigned long pm_needisync; /* mask of CPUs needing isync */
@@ -300,12 +301,14 @@ do { \
* pmap-specific data stored in the vm_page structure.
*/
struct vm_page_md {
+ struct mutex pvh_mtx;
struct pv_entry *pvh_list; /* pv entry list */
int pvh_attrs; /* page attributes */
};
#define VM_MDPAGE_INIT(pg) \
do { \
+ mtx_init(&(pg)->mdpage.pvh_mtx, IPL_VM); \
(pg)->mdpage.pvh_list = NULL; \
(pg)->mdpage.pvh_attrs = 0; \
} while (0)