diff options
-rw-r--r-- | sys/uvm/uvm_amap.c | 169 | ||||
-rw-r--r-- | sys/uvm/uvm_amap.h | 6 | ||||
-rw-r--r-- | sys/uvm/uvm_anon.c | 120 | ||||
-rw-r--r-- | sys/uvm/uvm_anon.h | 13 | ||||
-rw-r--r-- | sys/uvm/uvm_fault.c | 62 | ||||
-rw-r--r-- | sys/uvm/uvm_map.c | 9 | ||||
-rw-r--r-- | sys/uvm/uvm_page.c | 5 | ||||
-rw-r--r-- | sys/uvm/uvm_pager.c | 5 |
8 files changed, 291 insertions, 98 deletions
diff --git a/sys/uvm/uvm_amap.c b/sys/uvm/uvm_amap.c index e4c38a5d4e3..7eb20e6a95d 100644 --- a/sys/uvm/uvm_amap.c +++ b/sys/uvm/uvm_amap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_amap.c,v 1.86 2020/11/13 11:11:48 mpi Exp $ */ +/* $OpenBSD: uvm_amap.c,v 1.87 2021/01/19 13:21:36 mpi Exp $ */ /* $NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $ */ /* @@ -55,6 +55,9 @@ struct pool uvm_small_amap_pool[UVM_AMAP_CHUNK]; struct pool uvm_amap_chunk_pool; LIST_HEAD(, vm_amap) amap_list; +struct rwlock amap_list_lock = RWLOCK_INITIALIZER("amaplstlk"); +#define amap_lock_list() rw_enter_write(&amap_list_lock) +#define amap_unlock_list() rw_exit_write(&amap_list_lock) static char amap_small_pool_names[UVM_AMAP_CHUNK][9]; @@ -89,13 +92,17 @@ void amap_wiperange(struct vm_amap *, int, int); static inline void amap_list_insert(struct vm_amap *amap) { + amap_lock_list(); LIST_INSERT_HEAD(&amap_list, amap, am_list); + amap_unlock_list(); } static inline void amap_list_remove(struct vm_amap *amap) -{ +{ + amap_lock_list(); LIST_REMOVE(amap, am_list); + amap_unlock_list(); } /* @@ -249,7 +256,7 @@ amap_init(void) /* Initialize the vm_amap pool. */ pool_init(&uvm_amap_pool, sizeof(struct vm_amap), - 0, IPL_NONE, PR_WAITOK, "amappl", NULL); + 0, IPL_MPFLOOR, PR_WAITOK, "amappl", NULL); pool_sethiwat(&uvm_amap_pool, 4096); /* initialize small amap pools */ @@ -258,13 +265,13 @@ amap_init(void) sizeof(amap_small_pool_names[0]), "amappl%d", i + 1); size = offsetof(struct vm_amap, am_small.ac_anon) + (i + 1) * sizeof(struct vm_anon *); - pool_init(&uvm_small_amap_pool[i], size, 0, - IPL_NONE, 0, amap_small_pool_names[i], NULL); + pool_init(&uvm_small_amap_pool[i], size, 0, IPL_MPFLOOR, + PR_WAITOK, amap_small_pool_names[i], NULL); } pool_init(&uvm_amap_chunk_pool, sizeof(struct vm_amap_chunk) + UVM_AMAP_CHUNK * sizeof(struct vm_anon *), - 0, IPL_NONE, 0, "amapchunkpl", NULL); + 0, IPL_MPFLOOR, PR_WAITOK, "amapchunkpl", NULL); pool_sethiwat(&uvm_amap_chunk_pool, 4096); } @@ -332,6 +339,7 @@ amap_alloc1(int slots, int waitf, int lazyalloc) if (amap == NULL) return(NULL); + amap->am_lock = NULL; amap->am_ref = 1; amap->am_flags = 0; #ifdef UVM_AMAP_PPREF @@ -389,6 +397,12 @@ fail1: return (NULL); } +static void +amap_lock_alloc(struct vm_amap *amap) +{ + rw_obj_alloc(&amap->am_lock, "amaplk"); +} + /* * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM * @@ -406,8 +420,10 @@ amap_alloc(vaddr_t sz, int waitf, int lazyalloc) return (NULL); amap = amap_alloc1(slots, waitf, lazyalloc); - if (amap) + if (amap != NULL) { + amap_lock_alloc(amap); amap_list_insert(amap); + } return(amap); } @@ -426,6 +442,11 @@ amap_free(struct vm_amap *amap) KASSERT(amap->am_ref == 0 && amap->am_nused == 0); KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); + if (amap->am_lock != NULL) { + KASSERT(amap->am_lock == NULL || !rw_write_held(amap->am_lock)); + rw_obj_free(amap->am_lock); + } + #ifdef UVM_AMAP_PPREF if (amap->am_ppref && amap->am_ppref != PPREF_NONE) free(amap->am_ppref, M_UVMAMAP, amap->am_nslot * sizeof(int)); @@ -447,6 +468,7 @@ amap_free(struct vm_amap *amap) * * => called from amap_unref when the final reference to an amap is * discarded (i.e. when reference count == 1) + * => amap must be locked. */ void @@ -457,15 +479,16 @@ amap_wipeout(struct vm_amap *amap) struct vm_amap_chunk *chunk; struct pglist pgl; + KASSERT(rw_write_held(amap->am_lock)); KASSERT(amap->am_ref == 0); if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) { /* amap_swap_off will call us again. */ + amap_unlock(amap); return; } TAILQ_INIT(&pgl); - amap_list_remove(amap); AMAP_CHUNK_FOREACH(chunk, amap) { @@ -478,6 +501,7 @@ amap_wipeout(struct vm_amap *amap) if (anon == NULL || anon->an_ref == 0) panic("amap_wipeout: corrupt amap"); + KASSERT(anon->an_lock == amap->am_lock); refs = --anon->an_ref; if (refs == 0) { @@ -495,7 +519,8 @@ amap_wipeout(struct vm_amap *amap) /* now we free the map */ amap->am_ref = 0; /* ... was one */ amap->am_nused = 0; - amap_free(amap); /* will free amap */ + amap_unlock(amap); + amap_free(amap); } /* @@ -503,6 +528,8 @@ amap_wipeout(struct vm_amap *amap) * by copying the amap if necessary. * * => an entry with a null amap pointer will get a new (blank) one. + * => the map that the map entry blocks to must be locked by caller. + * => the amap (if any) currently attached to the entry must be unlocked. * => if canchunk is true, then we may clip the entry into a chunk * => "startva" and "endva" are used only if canchunk is true. they are * used to limit chunking (e.g. if you have a large space that you @@ -519,6 +546,9 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, vaddr_t chunksize; int i, j, k, n, srcslot; struct vm_amap_chunk *chunk = NULL, *srcchunk = NULL; + struct vm_anon *anon; + + KASSERT(map != kernel_map); /* we use sleeping locks */ /* is there a map to copy? if not, create one from scratch. */ if (entry->aref.ar_amap == NULL) { @@ -574,6 +604,8 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, return; srcamap = entry->aref.ar_amap; + amap_lock(srcamap); + /* * need to double check reference count now. the reference count * could have changed while we were in malloc. if the reference count @@ -582,6 +614,7 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, */ if (srcamap->am_ref == 1) { /* take it over? */ entry->etype &= ~UVM_ET_NEEDSCOPY; + amap_unlock(srcamap); amap->am_ref--; /* drop final reference to map */ amap_free(amap); /* dispose of new (unused) amap */ return; @@ -606,18 +639,21 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, chunk = amap_chunk_get(amap, lcv, 1, PR_NOWAIT); if (chunk == NULL) { + amap_unlock(srcamap); amap->am_ref = 0; amap_wipeout(amap); return; } for (k = 0; k < n; i++, j++, k++) { - chunk->ac_anon[i] = srcchunk->ac_anon[j]; - if (chunk->ac_anon[i] == NULL) + chunk->ac_anon[i] = anon = srcchunk->ac_anon[j]; + if (anon == NULL) continue; + KASSERT(anon->an_lock == srcamap->am_lock); + KASSERT(anon->an_ref > 0); chunk->ac_usedmap |= (1 << i); - chunk->ac_anon[i]->an_ref++; + anon->an_ref++; amap->am_nused++; } } @@ -629,6 +665,8 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, * the count to zero. [and no need to worry about freeing it] */ srcamap->am_ref--; + KASSERT(srcamap->am_ref > 0); + if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ #ifdef UVM_AMAP_PPREF @@ -638,6 +676,20 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, } #endif + /* + * If we referenced any anons, then share the source amap's lock. + * Otherwise, we have nothing in common, so allocate a new one. + */ + KASSERT(amap->am_lock == NULL); + if (amap->am_nused != 0) { + amap->am_lock = srcamap->am_lock; + rw_obj_hold(amap->am_lock); + } + amap_unlock(srcamap); + + if (amap->am_lock == NULL) + amap_lock_alloc(amap); + /* install new amap. */ entry->aref.ar_pageoff = 0; entry->aref.ar_amap = amap; @@ -655,6 +707,7 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, * so we resolve the COW here. * * => assume parent's entry was wired, thus all pages are resident. + * => the parent and child vm_map must both be locked. * => caller passes child's map/entry in to us * => XXXCDC: out of memory should cause fork to fail, but there is * currently no easy way to do this (needs fix) @@ -675,6 +728,7 @@ amap_cow_now(struct vm_map *map, struct vm_map_entry *entry) * am_anon[] array on us. */ ReStart: + amap_lock(amap); AMAP_CHUNK_FOREACH(chunk, amap) { int i, map = chunk->ac_usedmap; @@ -683,6 +737,7 @@ ReStart: map ^= 1 << slot; anon = chunk->ac_anon[slot]; pg = anon->an_page; + KASSERT(anon->an_lock == amap->am_lock); /* page must be resident since parent is wired */ KASSERT(pg != NULL); @@ -700,24 +755,27 @@ ReStart: */ if (pg->pg_flags & PG_BUSY) { atomic_setbits_int(&pg->pg_flags, PG_WANTED); - tsleep_nsec(pg, PVM, "cownow", INFSLP); + rwsleep_nsec(pg, amap->am_lock, PVM | PNORELOCK, + "cownow", INFSLP); goto ReStart; } /* ok, time to do a copy-on-write to a new anon */ nanon = uvm_analloc(); - if (nanon) { + if (nanon != NULL) { + /* the new anon will share the amap's lock */ + nanon->an_lock = amap->am_lock; npg = uvm_pagealloc(NULL, 0, nanon, 0); } else npg = NULL; /* XXX: quiet gcc warning */ if (nanon == NULL || npg == NULL) { /* out of memory */ - /* - * XXXCDC: we should cause fork to fail, but - * we can't ... - */ - if (nanon) { + amap_unlock(amap); + if (nanon != NULL) { + nanon->an_lock = NULL; + nanon->an_ref--; + KASSERT(nanon->an_ref == 0); uvm_anfree(nanon); } uvm_wait("cownowpage"); @@ -730,6 +788,7 @@ ReStart: */ uvm_pagecopy(pg, npg); /* old -> new */ anon->an_ref--; /* can't drop to zero */ + KASSERT(anon->an_ref > 0); chunk->ac_anon[slot] = nanon; /* replace */ /* @@ -744,6 +803,7 @@ ReStart: uvm_unlock_pageq(); } } + amap_unlock(amap); } /* @@ -757,10 +817,13 @@ amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset) struct vm_amap *amap = origref->ar_amap; int leftslots; + KASSERT(splitref->ar_amap == amap); AMAP_B2SLOT(leftslots, offset); if (leftslots == 0) panic("amap_splitref: split at zero offset"); + amap_lock(amap); + /* now: we have a valid am_mapped array. */ if (amap->am_nslot - origref->ar_pageoff - leftslots <= 0) panic("amap_splitref: map size check failed"); @@ -775,6 +838,7 @@ amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset) amap->am_ref++; splitref->ar_amap = amap; splitref->ar_pageoff = origref->ar_pageoff + leftslots; + amap_unlock(amap); } #ifdef UVM_AMAP_PPREF @@ -786,6 +850,7 @@ void amap_pp_establish(struct vm_amap *amap) { + KASSERT(rw_write_held(amap->am_lock)); amap->am_ppref = mallocarray(amap->am_nslot, sizeof(int), M_UVMAMAP, M_NOWAIT|M_ZERO); @@ -811,6 +876,8 @@ amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval) int stopslot, *ppref, lcv, prevlcv; int ref, len, prevref, prevlen; + KASSERT(rw_write_held(amap->am_lock)); + stopslot = curslot + slotlen; ppref = amap->am_ppref; prevlcv = 0; @@ -893,6 +960,7 @@ amap_wiperange_chunk(struct vm_amap *amap, struct vm_amap_chunk *chunk, map ^= 1 << curslot; chunk->ac_usedmap ^= 1 << curslot; anon = chunk->ac_anon[curslot]; + KASSERT(anon->an_lock == amap->am_lock); /* remove it from the amap */ chunk->ac_anon[curslot] = NULL; @@ -902,10 +970,6 @@ amap_wiperange_chunk(struct vm_amap *amap, struct vm_amap_chunk *chunk, /* drop anon reference count */ refs = --anon->an_ref; if (refs == 0) { - /* - * we just eliminated the last reference to an - * anon. free it. - */ uvm_anfree(anon); } } @@ -921,6 +985,8 @@ amap_wiperange(struct vm_amap *amap, int slotoff, int slots) int bucket, startbucket, endbucket; struct vm_amap_chunk *chunk, *nchunk; + KASSERT(rw_write_held(amap->am_lock)); + startbucket = UVM_AMAP_BUCKET(amap, slotoff); endbucket = UVM_AMAP_BUCKET(amap, slotoff + slots - 1); @@ -980,12 +1046,24 @@ amap_swap_off(int startslot, int endslot) { struct vm_amap *am; struct vm_amap *am_next; + struct vm_amap marker; boolean_t rv = FALSE; + amap_lock_list(); for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) { int i, map; struct vm_amap_chunk *chunk; + amap_lock(am); + if (am->am_nused == 0) { + amap_unlock(am); + am_next = LIST_NEXT(am, am_list); + continue; + } + + LIST_INSERT_AFTER(am, &marker, am_list); + amap_unlock_list(); + again: AMAP_CHUNK_FOREACH(chunk, am) { map = chunk->ac_usedmap; @@ -1005,20 +1083,28 @@ again: am->am_flags |= AMAP_SWAPOFF; - rv = uvm_anon_pagein(anon); + rv = uvm_anon_pagein(am, anon); + amap_lock(am); am->am_flags &= ~AMAP_SWAPOFF; - if (rv || amap_refs(am) == 0) + if (amap_refs(am) == 0) { + amap_wipeout(am); + am = NULL; + goto nextamap; + } + if (rv) goto nextamap; goto again; } } - nextamap: - am_next = LIST_NEXT(am, am_list); - if (amap_refs(am) == 0) - amap_wipeout(am); + if (am != NULL) + amap_unlock(am); + amap_lock_list(); + am_next = LIST_NEXT(&marker, am_list); + LIST_REMOVE(&marker, am_list); } + amap_unlock_list(); return rv; } @@ -1147,9 +1233,11 @@ amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon, void amap_unadd(struct vm_aref *aref, vaddr_t offset) { - int slot; struct vm_amap *amap = aref->ar_amap; struct vm_amap_chunk *chunk; + int slot; + + KASSERT(rw_write_held(amap->am_lock)); AMAP_B2SLOT(slot, offset); slot += aref->ar_pageoff; @@ -1176,6 +1264,12 @@ amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len, int refv, boolean_t all) { #ifdef UVM_AMAP_PPREF + KASSERT(rw_write_held(amap->am_lock)); + + /* + * We must establish the ppref array before changing am_ref + * so that the ppref values match the current amap refcount. + */ if (amap->am_ppref == NULL && !all && len != amap->am_nslot) { amap_pp_establish(amap); } @@ -1192,32 +1286,37 @@ amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len, } } #endif + amap_unlock(amap); } /* - * amap_ref: gain a reference to an amap + * amap_ref: gain a reference to an amap. * - * => "offset" and "len" are in units of pages - * => called at fork time to gain the child's reference + * => amap must not be locked (we will lock). + * => "offset" and "len" are in units of pages. + * => Called at fork time to gain the child's reference. */ void amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags) { - + amap_lock(amap); if (flags & AMAP_SHARED) amap->am_flags |= AMAP_SHARED; amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0); } /* - * amap_unref: remove a reference to an amap + * amap_unref: remove a reference to an amap. * * => All pmap-level references to this amap must be already removed. * => Called from uvm_unmap_detach(); entry is already removed from the map. + * => We will lock amap, so it must be unlocked. */ void amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all) { + amap_lock(amap); + KASSERT(amap->am_ref > 0); if (amap->am_ref == 1) { diff --git a/sys/uvm/uvm_amap.h b/sys/uvm/uvm_amap.h index c0de03d2312..fc0c4df0b51 100644 --- a/sys/uvm/uvm_amap.h +++ b/sys/uvm/uvm_amap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_amap.h,v 1.32 2020/11/13 11:11:49 mpi Exp $ */ +/* $OpenBSD: uvm_amap.h,v 1.33 2021/01/19 13:21:36 mpi Exp $ */ /* $NetBSD: uvm_amap.h,v 1.14 2001/02/18 21:19:08 chs Exp $ */ /* @@ -133,6 +133,7 @@ struct vm_amap_chunk { }; struct vm_amap { + struct rwlock *am_lock; /* lock for all vm_amap flags */ int am_ref; /* reference count */ int am_flags; /* flags */ int am_nslot; /* # of slots currently in map */ @@ -261,6 +262,9 @@ struct vm_amap { #define amap_flags(AMAP) ((AMAP)->am_flags) #define amap_refs(AMAP) ((AMAP)->am_ref) +#define amap_lock(AMAP) rw_enter_write((AMAP)->am_lock) +#define amap_unlock(AMAP) rw_exit_write((AMAP)->am_lock) + #endif /* _KERNEL */ #endif /* _UVM_UVM_AMAP_H_ */ diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c index 11e0892a553..b64321410da 100644 --- a/sys/uvm/uvm_anon.c +++ b/sys/uvm/uvm_anon.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_anon.c,v 1.50 2020/11/24 13:49:09 mpi Exp $ */ +/* $OpenBSD: uvm_anon.c,v 1.51 2021/01/19 13:21:36 mpi Exp $ */ /* $NetBSD: uvm_anon.c,v 1.10 2000/11/25 06:27:59 chs Exp $ */ /* @@ -48,7 +48,7 @@ struct pool uvm_anon_pool; void uvm_anon_init(void) { - pool_init(&uvm_anon_pool, sizeof(struct vm_anon), 0, IPL_NONE, + pool_init(&uvm_anon_pool, sizeof(struct vm_anon), 0, IPL_MPFLOOR, PR_WAITOK, "anonpl", NULL); pool_sethiwat(&uvm_anon_pool, uvmexp.free / 16); } @@ -63,6 +63,7 @@ uvm_analloc(void) anon = pool_get(&uvm_anon_pool, PR_NOWAIT); if (anon) { + anon->an_lock = NULL; anon->an_ref = 1; anon->an_page = NULL; anon->an_swslot = 0; @@ -71,25 +72,26 @@ uvm_analloc(void) } /* - * uvm_anfree: free a single anon structure + * uvm_anfree_list: free a single anon structure * - * => caller must remove anon from its amap before calling (if it was in - * an amap). + * => anon must be removed from the amap (if anon was in an amap). + * => amap must be locked, if anon was owned by amap. * => we may lock the pageq's. */ void uvm_anfree_list(struct vm_anon *anon, struct pglist *pgl) { - struct vm_page *pg; + struct vm_page *pg = anon->an_page; - /* get page */ - pg = anon->an_page; + KASSERT(anon->an_lock == NULL || rw_write_held(anon->an_lock)); + KASSERT(anon->an_ref == 0); /* - * if we have a resident page, we must dispose of it before freeing - * the anon. + * Dispose of the page, if it is resident. */ - if (pg) { + if (pg != NULL) { + KASSERT(anon->an_lock != NULL); + /* * if page is busy then we just mark it as released (who ever * has it busy must check for this when they wake up). if the @@ -98,6 +100,7 @@ uvm_anfree_list(struct vm_anon *anon, struct pglist *pgl) if ((pg->pg_flags & PG_BUSY) != 0) { /* tell them to dump it when done */ atomic_setbits_int(&pg->pg_flags, PG_RELEASED); + rw_obj_hold(anon->an_lock); return; } pmap_page_protect(pg, PROT_NONE); @@ -115,12 +118,14 @@ uvm_anfree_list(struct vm_anon *anon, struct pglist *pgl) uvm_pagefree(pg); /* bye bye */ uvm_unlock_pageq(); /* free the daemon */ } + } else { + if (anon->an_swslot != 0) { + /* this page is no longer only in swap. */ + KASSERT(uvmexp.swpgonly > 0); + uvmexp.swpgonly--; + } } - if (pg == NULL && anon->an_swslot != 0) { - /* this page is no longer only in swap. */ - KASSERT(uvmexp.swpgonly > 0); - uvmexp.swpgonly--; - } + anon->an_lock = NULL; /* free any swap resources. */ uvm_anon_dropswap(anon); @@ -135,12 +140,6 @@ uvm_anfree_list(struct vm_anon *anon, struct pglist *pgl) pool_put(&uvm_anon_pool, anon); } -void -uvm_anfree(struct vm_anon *anon) -{ - uvm_anfree_list(anon, NULL); -} - /* * uvm_anwait: wait for memory to become available to allocate an anon. */ @@ -155,35 +154,25 @@ uvm_anwait(void) } /* - * uvm_anon_dropswap: release any swap resources from this anon. - */ -void -uvm_anon_dropswap(struct vm_anon *anon) -{ - - if (anon->an_swslot == 0) - return; - - uvm_swap_free(anon->an_swslot, 1); - anon->an_swslot = 0; -} - -/* * fetch an anon's page. * * => returns TRUE if pagein was aborted due to lack of memory. */ boolean_t -uvm_anon_pagein(struct vm_anon *anon) +uvm_anon_pagein(struct vm_amap *amap, struct vm_anon *anon) { struct vm_page *pg; int rv; - rv = uvmfault_anonget(NULL, NULL, anon); + KASSERT(rw_write_held(anon->an_lock)); + KASSERT(anon->an_lock == amap->am_lock); + + rv = uvmfault_anonget(NULL, amap, anon); switch (rv) { case VM_PAGER_OK: + KASSERT(rw_write_held(anon->an_lock)); break; case VM_PAGER_ERROR: case VM_PAGER_REFAULT: @@ -206,7 +195,9 @@ uvm_anon_pagein(struct vm_anon *anon) * mark it as dirty, clear its swslot and un-busy it. */ pg = anon->an_page; - uvm_swap_free(anon->an_swslot, 1); + if (anon->an_swslot > 0) { + uvm_swap_free(anon->an_swslot, 1); + } anon->an_swslot = 0; atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); @@ -216,6 +207,57 @@ uvm_anon_pagein(struct vm_anon *anon) uvm_lock_pageq(); uvm_pagedeactivate(pg); uvm_unlock_pageq(); + rw_exit(anon->an_lock); return FALSE; } + +/* + * uvm_anon_dropswap: release any swap resources from this anon. + * + * => anon must be locked or have a reference count of 0. + */ +void +uvm_anon_dropswap(struct vm_anon *anon) +{ + KASSERT(anon->an_ref == 0 || rw_lock_held(anon->an_lock)); + + if (anon->an_swslot == 0) + return; + + uvm_swap_free(anon->an_swslot, 1); + anon->an_swslot = 0; +} + + +/* + * uvm_anon_release: release an anon and its page. + * + * => anon should not have any references. + * => anon must be locked. + */ + +void +uvm_anon_release(struct vm_anon *anon) +{ + struct vm_page *pg = anon->an_page; + struct rwlock *lock; + + KASSERT(rw_write_held(anon->an_lock)); + KASSERT(pg != NULL); + KASSERT((pg->pg_flags & PG_RELEASED) != 0); + KASSERT((pg->pg_flags & PG_BUSY) != 0); + KASSERT(pg->uobject == NULL); + KASSERT(pg->uanon == anon); + KASSERT(anon->an_ref == 0); + + uvm_lock_pageq(); + uvm_pagefree(pg); + uvm_unlock_pageq(); + KASSERT(anon->an_page == NULL); + lock = anon->an_lock; + uvm_anfree(anon); + rw_exit(lock); + /* Note: extra reference is held for PG_RELEASED case. */ + rw_obj_free(lock); +} diff --git a/sys/uvm/uvm_anon.h b/sys/uvm/uvm_anon.h index 50e7c17f72e..7db12a8223a 100644 --- a/sys/uvm/uvm_anon.h +++ b/sys/uvm/uvm_anon.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_anon.h,v 1.21 2020/01/04 16:17:29 beck Exp $ */ +/* $OpenBSD: uvm_anon.h,v 1.22 2021/01/19 13:21:36 mpi Exp $ */ /* $NetBSD: uvm_anon.h,v 1.13 2000/12/27 09:17:04 chs Exp $ */ /* @@ -38,6 +38,8 @@ */ struct vm_anon { + struct rwlock *an_lock; + struct vm_page *an_page; /* if in RAM */ int an_ref; /* reference count */ @@ -78,12 +80,15 @@ struct vm_aref { #ifdef _KERNEL struct vm_anon *uvm_analloc(void); -void uvm_anfree(struct vm_anon *); -void uvm_anfree_list(struct vm_anon *, struct pglist *); +void uvm_anfree_list(struct vm_anon *, struct pglist *); +void uvm_anon_release(struct vm_anon *); void uvm_anwait(void); void uvm_anon_init(void); void uvm_anon_dropswap(struct vm_anon *); -boolean_t uvm_anon_pagein(struct vm_anon *); +boolean_t uvm_anon_pagein(struct vm_amap *, struct vm_anon *); + +#define uvm_anfree(an) uvm_anfree_list((an), NULL) + #endif /* _KERNEL */ #endif /* _UVM_UVM_ANON_H_ */ diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c index c2b546ffb18..407f5d75cb6 100644 --- a/sys/uvm/uvm_fault.c +++ b/sys/uvm/uvm_fault.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_fault.c,v 1.112 2021/01/16 18:32:47 mpi Exp $ */ +/* $OpenBSD: uvm_fault.c,v 1.113 2021/01/19 13:21:36 mpi Exp $ */ /* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */ /* @@ -136,8 +136,7 @@ * by multiple map entries, and figuring out what should wait could be * complex as well...). * - * given that we are not currently multiprocessor or multithreaded we might - * as well choose alternative 2 now. maybe alternative 3 would be useful + * we use alternative 2 currently. maybe alternative 3 would be useful * in the future. XXX keep in mind for future consideration//rechecking. */ @@ -181,6 +180,7 @@ uvmfault_anonflush(struct vm_anon **anons, int n) for (lcv = 0 ; lcv < n ; lcv++) { if (anons[lcv] == NULL) continue; + KASSERT(rw_lock_held(anons[lcv]->an_lock)); pg = anons[lcv]->an_page; if (pg && (pg->pg_flags & PG_BUSY) == 0) { uvm_lock_pageq(); @@ -271,6 +271,9 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, struct vm_page *pg; int result; + KASSERT(rw_lock_held(anon->an_lock)); + KASSERT(anon->an_lock == amap->am_lock); + result = 0; /* XXX shut up gcc */ counters_inc(uvmexp_counters, flt_anget); /* bump rusage counters */ @@ -302,8 +305,14 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, * the last unlock must be an atomic unlock+wait on * the owner of page */ - uvmfault_unlockall(ufi, amap, NULL); - tsleep_nsec(pg, PVM, "anonget2", INFSLP); + if (pg->uobject) { + uvmfault_unlockall(ufi, amap, NULL); + tsleep_nsec(pg, PVM, "anonget1", INFSLP); + } else { + uvmfault_unlockall(ufi, NULL, NULL); + rwsleep_nsec(pg, anon->an_lock, PVM | PNORELOCK, + "anonget2", INFSLP); + } /* ready to relock and try again */ } else { /* no page, we must try and bring it in. */ @@ -340,6 +349,9 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, /* now relock and try again */ locked = uvmfault_relock(ufi); + if (locked || we_own) { + rw_enter(anon->an_lock, RW_WRITE); + } /* * if we own the page (i.e. we set PG_BUSY), then we need @@ -367,9 +379,10 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, */ if (pg->pg_flags & PG_RELEASED) { pmap_page_protect(pg, PROT_NONE); - uvm_anfree(anon); /* frees page for us */ + KASSERT(anon->an_ref == 0); if (locked) uvmfault_unlockall(ufi, amap, NULL); + uvm_anon_release(anon); /* frees page for us */ counters_inc(uvmexp_counters, flt_pgrele); return (VM_PAGER_REFAULT); /* refault! */ } @@ -400,6 +413,7 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, if (locked) uvmfault_unlockall(ufi, amap, NULL); + rw_exit(anon->an_lock); return (VM_PAGER_ERROR); } @@ -414,8 +428,12 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, } /* we were not able to relock. restart fault. */ - if (!locked) + if (!locked) { + if (we_own) { + rw_exit(anon->an_lock); + } return (VM_PAGER_REFAULT); + } /* verify no one touched the amap and moved the anon on us. */ if (ufi != NULL && @@ -605,6 +623,7 @@ uvm_fault_check(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, /* if we've got an amap, extract current anons. */ if (amap) { + amap_lock(amap); amap_lookups(&ufi->entry->aref, flt->startva - ufi->entry->start, *ranons, flt->npages); } else { @@ -625,8 +644,10 @@ uvm_fault_check(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, voff_t uoff; uoff = (flt->startva - ufi->entry->start) + ufi->entry->offset; + KERNEL_LOCK(); (void) uobj->pgops->pgo_flush(uobj, uoff, uoff + ((vsize_t)nback << PAGE_SHIFT), PGO_DEACTIVATE); + KERNEL_UNLOCK(); } /* now forget about the backpages */ @@ -656,6 +677,9 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, struct vm_page *pg = NULL; int error, ret; + KASSERT(rw_write_held(amap->am_lock)); + KASSERT(anon->an_lock == amap->am_lock); + /* * no matter if we have case 1A or case 1B we are going to need to * have the anon's memory resident. ensure that now. @@ -687,6 +711,9 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, #endif } + KASSERT(rw_write_held(amap->am_lock)); + KASSERT(anon->an_lock == amap->am_lock); + /* * if we are case 1B then we will need to allocate a new blank * anon to transfer the data into. note that we have a lock @@ -705,6 +732,7 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, oanon = anon; /* oanon = old */ anon = uvm_analloc(); if (anon) { + anon->an_lock = amap->am_lock; pg = uvm_pagealloc(NULL, 0, anon, 0); } @@ -714,6 +742,8 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, if (anon == NULL) counters_inc(uvmexp_counters, flt_noanon); else { + anon->an_lock = NULL; + anon->an_ref--; uvm_anfree(anon); counters_inc(uvmexp_counters, flt_noram); } @@ -806,7 +836,6 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, return 0; } - /* * uvm_fault_upper_lookup: look up existing h/w mapping and amap. * @@ -858,6 +887,7 @@ uvm_fault_upper_lookup(struct uvm_faultinfo *ufi, continue; } anon = anons[lcv]; + KASSERT(anon->an_lock == amap->am_lock); if (anon->an_page && (anon->an_page->pg_flags & (PG_RELEASED|PG_BUSY)) == 0) { uvm_lock_pageq(); @@ -1136,6 +1166,8 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, /* re-verify the state of the world. */ locked = uvmfault_relock(ufi); + if (locked && amap != NULL) + amap_lock(amap); /* * Re-verify that amap slot is still free. if there is @@ -1213,6 +1245,7 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, * a zero'd, dirty page, so have * uvm_pagealloc() do that for us. */ + anon->an_lock = amap->am_lock; pg = uvm_pagealloc(NULL, 0, anon, (uobjpage == PGO_DONTCARE) ? UVM_PGA_ZERO : 0); } @@ -1239,6 +1272,8 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, if (anon == NULL) counters_inc(uvmexp_counters, flt_noanon); else { + anon->an_lock = NULL; + anon->an_ref--; uvm_anfree(anon); counters_inc(uvmexp_counters, flt_noram); } @@ -1266,7 +1301,7 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, */ if ((amap_flags(amap) & AMAP_SHARED) != 0) { pmap_page_protect(uobjpage, PROT_NONE); - } + } /* dispose of uobjpage. drop handle to uobj as well. */ if (uobjpage->pg_flags & PG_WANTED) @@ -1306,6 +1341,12 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, * all resources are present. we can now map it in and free our * resources. */ + if (amap == NULL) + KASSERT(anon == NULL); + else { + KASSERT(rw_write_held(amap->am_lock)); + KASSERT(anon == NULL || anon->an_lock == amap->am_lock); + } if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot, flt->access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) { @@ -1491,7 +1532,8 @@ void uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap, struct uvm_object *uobj) { - + if (amap != NULL) + amap_unlock(amap); uvmfault_unlockmaps(ufi, FALSE); } diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index c5c40ef7637..931504a7417 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_map.c,v 1.269 2020/10/19 08:19:46 mpi Exp $ */ +/* $OpenBSD: uvm_map.c,v 1.270 2021/01/19 13:21:36 mpi Exp $ */ /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ /* @@ -1104,10 +1104,8 @@ uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, if (flags & UVM_FLAG_CONCEAL) entry->etype |= UVM_ET_CONCEAL; if (flags & UVM_FLAG_OVERLAY) { - KERNEL_LOCK(); entry->aref.ar_pageoff = 0; entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); - KERNEL_UNLOCK(); } /* Update map and process statistics. */ @@ -2833,9 +2831,7 @@ uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, orig->end = next->start = split; if (next->aref.ar_amap) { - KERNEL_LOCK(); amap_splitref(&orig->aref, &next->aref, adj); - KERNEL_UNLOCK(); } if (UVM_ET_ISSUBMAP(orig)) { uvm_map_reference(next->object.sub_map); @@ -4682,12 +4678,14 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) cp_start = MAX(entry->start, start); cp_end = MIN(entry->end, end); + amap_lock(amap); for (; cp_start != cp_end; cp_start += PAGE_SIZE) { anon = amap_lookup(&entry->aref, cp_start - entry->start); if (anon == NULL) continue; + KASSERT(anon->an_lock == amap->am_lock); pg = anon->an_page; if (pg == NULL) { continue; @@ -4743,6 +4741,7 @@ deactivate_it: panic("uvm_map_clean: weird flags"); } } + amap_unlock(amap); flush_object: cp_start = MAX(entry->start, start); diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c index 222cd5c1910..10e8fd6b32c 100644 --- a/sys/uvm/uvm_page.c +++ b/sys/uvm/uvm_page.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_page.c,v 1.154 2020/12/02 16:32:00 mpi Exp $ */ +/* $OpenBSD: uvm_page.c,v 1.155 2021/01/19 13:21:36 mpi Exp $ */ /* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */ /* @@ -1050,7 +1050,8 @@ uvm_page_unbusy(struct vm_page **pgs, int npgs) } else { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); - uvm_anfree(pg->uanon); + rw_enter(pg->uanon->an_lock, RW_WRITE); + uvm_anon_release(pg->uanon); } } else { atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY); diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c index 910d7ec1b54..f808c6e536a 100644 --- a/sys/uvm/uvm_pager.c +++ b/sys/uvm/uvm_pager.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pager.c,v 1.73 2020/10/21 09:08:14 mpi Exp $ */ +/* $OpenBSD: uvm_pager.c,v 1.74 2021/01/19 13:21:36 mpi Exp $ */ /* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */ /* @@ -649,7 +649,8 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, UVM_PAGE_OWN(ppsp[lcv], NULL); /* kills anon and frees pg */ - uvm_anfree(ppsp[lcv]->uanon); + rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE); + uvm_anon_release(ppsp[lcv]->uanon); continue; } else { |