From df471f546425532f5eb23a525aa177e7cebfc201 Mon Sep 17 00:00:00 2001 From: Ariane van der Steldt Date: Tue, 16 Jun 2009 16:42:42 +0000 Subject: Backout pmemrange (which to most people is more well known as physmem allocator). "i can't see any obvious problems" oga --- sys/arch/amd64/amd64/pmap.c | 10 +- sys/arch/amd64/include/vmparam.h | 9 +- sys/arch/i386/i386/pmap.c | 11 +- sys/arch/i386/i386/pmapae.c | 15 +- sys/arch/i386/include/vmparam.h | 9 +- sys/conf/files | 3 +- sys/nnpfs/nnpfs_vnodeops-bsd.c | 2 +- sys/uvm/uvm.h | 31 +- sys/uvm/uvm_extern.h | 9 +- sys/uvm/uvm_map.c | 15 +- sys/uvm/uvm_page.c | 173 ++++-- sys/uvm/uvm_page.h | 17 +- sys/uvm/uvm_pglist.c | 328 +++++++++- sys/uvm/uvm_pmemrange.c | 1243 -------------------------------------- sys/uvm/uvm_pmemrange.h | 83 --- sys/uvm/uvm_vnode.c | 4 +- 16 files changed, 464 insertions(+), 1498 deletions(-) delete mode 100644 sys/uvm/uvm_pmemrange.c delete mode 100644 sys/uvm/uvm_pmemrange.h diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c index 165f3b365dd..b6ff88bef80 100644 --- a/sys/arch/amd64/amd64/pmap.c +++ b/sys/arch/amd64/amd64/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.48 2009/06/16 00:11:29 oga Exp $ */ +/* $OpenBSD: pmap.c,v 1.49 2009/06/16 16:42:40 ariane Exp $ */ /* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */ /* @@ -835,7 +835,7 @@ pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level, pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq); ptp->wire_count = 0; uvm_pagerealloc(ptp, NULL, 0); - TAILQ_INSERT_TAIL(pagelist, ptp, fq.queues.listq); + TAILQ_INSERT_TAIL(pagelist, ptp, listq); } void @@ -1537,7 +1537,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) PMAP_MAP_TO_HEAD_UNLOCK(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { - TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq); + TAILQ_REMOVE(&empty_ptps, ptp, listq); uvm_pagefree(ptp); } @@ -1609,7 +1609,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) PMAP_MAP_TO_HEAD_UNLOCK(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { - TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq); + TAILQ_REMOVE(&empty_ptps, ptp, listq); uvm_pagefree(ptp); } } @@ -1682,7 +1682,7 @@ pmap_page_remove(struct vm_page *pg) pmap_tlb_shootwait(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { - TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq); + TAILQ_REMOVE(&empty_ptps, ptp, listq); uvm_pagefree(ptp); } } diff --git a/sys/arch/amd64/include/vmparam.h b/sys/arch/amd64/include/vmparam.h index d3c5c9dd102..16914446d34 100644 --- a/sys/arch/amd64/include/vmparam.h +++ b/sys/arch/amd64/include/vmparam.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmparam.h,v 1.11 2009/06/01 17:42:33 ariane Exp $ */ +/* $OpenBSD: vmparam.h,v 1.12 2009/06/16 16:42:40 ariane Exp $ */ /* $NetBSD: vmparam.h,v 1.1 2003/04/26 18:39:49 fvdl Exp $ */ /*- @@ -112,13 +112,6 @@ #define VM_FREELIST_LOW 1 #define VM_FREELIST_HIGH 2 -/* reserve ISA-DMA and 32-bit DMA memory */ -#define UVM_IO_RANGES \ - { \ - { 0, 0x00ffffffUL }, \ - { 0, 0xffffffffUL }, \ - } - #define __HAVE_VM_PAGE_MD struct pv_entry; struct vm_page_md { diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index 240970dab3b..3619ae37e96 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.141 2009/06/16 00:11:29 oga Exp $ */ +/* $OpenBSD: pmap.c,v 1.142 2009/06/16 16:42:41 ariane Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -2009,7 +2009,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) /* If PTP is no longer being used, free it. */ if (ptp && ptp->wire_count <= 1) { pmap_drop_ptp(pmap, va, ptp, ptes); - TAILQ_INSERT_TAIL(&empty_ptps, ptp, fq.queues.listq); + TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq); } if (!shootall) @@ -2023,7 +2023,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) pmap_unmap_ptes(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { - TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq); + TAILQ_REMOVE(&empty_ptps, ptp, listq); uvm_pagefree(ptp); } } @@ -2080,8 +2080,7 @@ pmap_page_remove(struct vm_page *pg) if (pve->pv_ptp && --pve->pv_ptp->wire_count <= 1) { pmap_drop_ptp(pve->pv_pmap, pve->pv_va, pve->pv_ptp, ptes); - TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp, - fq.queues.listq); + TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp, listq); } pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); @@ -2094,7 +2093,7 @@ pmap_page_remove(struct vm_page *pg) pmap_tlb_shootwait(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { - TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq); + TAILQ_REMOVE(&empty_ptps, ptp, listq); uvm_pagefree(ptp); } } diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c index 46fd0b40943..ed75721ad3c 100644 --- a/sys/arch/i386/i386/pmapae.c +++ b/sys/arch/i386/i386/pmapae.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmapae.c,v 1.18 2009/06/16 00:11:29 oga Exp $ */ +/* $OpenBSD: pmapae.c,v 1.19 2009/06/16 16:42:41 ariane Exp $ */ /* * Copyright (c) 2006 Michael Shalayeff @@ -1453,15 +1453,14 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) ptp->wire_count = 0; /* Postpone free to after shootdown. */ uvm_pagerealloc(ptp, NULL, 0); - TAILQ_INSERT_TAIL(&empty_ptps, ptp, - fq.queues.listq); + TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq); } } pmap_tlb_shootnow(cpumask); pmap_unmap_ptes_pae(pmap); /* unlock pmap */ PMAP_MAP_TO_HEAD_UNLOCK(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { - TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq); + TAILQ_REMOVE(&empty_ptps, ptp, listq); uvm_pagefree(ptp); } return; @@ -1547,7 +1546,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) ptp->wire_count = 0; /* Postpone free to after shootdown. */ uvm_pagerealloc(ptp, NULL, 0); - TAILQ_INSERT_TAIL(&empty_ptps, ptp, fq.queues.listq); + TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq); } } @@ -1555,7 +1554,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) pmap_unmap_ptes_pae(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { - TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq); + TAILQ_REMOVE(&empty_ptps, ptp, listq); uvm_pagefree(ptp); } } @@ -1666,7 +1665,7 @@ pmap_page_remove_pae(struct vm_page *pg) /* Postpone free to after shootdown. */ uvm_pagerealloc(pve->pv_ptp, NULL, 0); TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp, - fq.queues.listq); + listq); } } pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */ @@ -1677,7 +1676,7 @@ pmap_page_remove_pae(struct vm_page *pg) PMAP_HEAD_TO_MAP_UNLOCK(); pmap_tlb_shootnow(cpumask); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { - TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq); + TAILQ_REMOVE(&empty_ptps, ptp, listq); uvm_pagefree(ptp); } } diff --git a/sys/arch/i386/include/vmparam.h b/sys/arch/i386/include/vmparam.h index 41e95b3f418..293879609de 100644 --- a/sys/arch/i386/include/vmparam.h +++ b/sys/arch/i386/include/vmparam.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmparam.h,v 1.42 2009/06/01 17:42:33 ariane Exp $ */ +/* $OpenBSD: vmparam.h,v 1.43 2009/06/16 16:42:41 ariane Exp $ */ /* $NetBSD: vmparam.h,v 1.15 1994/10/27 04:16:34 cgd Exp $ */ /*- @@ -118,13 +118,6 @@ #define VM_FREELIST_DEFAULT 0 #define VM_FREELIST_FIRST16 1 -/* reserve ISA-DMA and 32-bit DMA memory */ -#define UVM_IO_RANGES \ - { \ - { 0, 0x00ffffffUL }, \ - { 0, 0xffffffffUL }, \ - } - #define __HAVE_VM_PAGE_MD struct pv_entry; struct vm_page_md { diff --git a/sys/conf/files b/sys/conf/files index 4e84684fa51..84d1f578eb8 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $OpenBSD: files,v 1.465 2009/06/16 00:11:29 oga Exp $ +# $OpenBSD: files,v 1.466 2009/06/16 16:42:41 ariane Exp $ # $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -973,7 +973,6 @@ file uvm/uvm_page.c file uvm/uvm_pager.c file uvm/uvm_pdaemon.c file uvm/uvm_pglist.c -file uvm/uvm_pmemrange.c file uvm/uvm_stat.c file uvm/uvm_swap.c file uvm/uvm_swap_encrypt.c uvm_swap_encrypt diff --git a/sys/nnpfs/nnpfs_vnodeops-bsd.c b/sys/nnpfs/nnpfs_vnodeops-bsd.c index 7e740d3f8f5..e293154a270 100644 --- a/sys/nnpfs/nnpfs_vnodeops-bsd.c +++ b/sys/nnpfs/nnpfs_vnodeops-bsd.c @@ -1119,7 +1119,7 @@ nnpfs_putpages (struct vop_putpages_args *ap) while (pg && !dirty) { dirty = pmap_is_modified(pg) || (pg->flags & PG_CLEAN) == 0; - pg = TAILQ_NEXT(pg, fq.queues.listq); + pg = TAILQ_NEXT(pg, listq); } if (dirty) diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h index d48c5f8026c..57ae90fc894 100644 --- a/sys/uvm/uvm.h +++ b/sys/uvm/uvm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm.h,v 1.35 2009/06/16 00:11:29 oga Exp $ */ +/* $OpenBSD: uvm.h,v 1.36 2009/06/16 16:42:41 ariane Exp $ */ /* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */ /* @@ -57,7 +57,6 @@ #include #include #include -#include #include #ifdef UVM_SWAP_ENCRYPT #include @@ -68,32 +67,6 @@ */ #include -/* - * UVM_IO_RANGES: paddr_t pairs, describing the lowest and highest address - * that should be reserved. These ranges (which may overlap) will have their - * use counter increased, causing them to be avoided if an allocation can be - * satisfied from another range of memory. - * - * IO ranges need not overlap with physmem ranges: the uvm code splits ranges - * on demand to satisfy requests. - * - * UVM_IO_RANGES specified here actually translates into a call to - * uvm_pmr_use_inc() at uvm initialization time. uvm_pmr_use_inc() can also - * be called after uvm_init() has completed. - * - * Note: the upper bound is specified in the same way as to uvm_pglistalloc. - * Ex: a memory range of 16 bit is specified as: { 0, 0xffff }. - */ -#ifndef UVM_IO_RANGES -#define UVM_IO_RANGES {} -#endif - -/* UVM IO ranges are described in an array of uvm_io_ranges. */ -struct uvm_io_ranges { - paddr_t low; - paddr_t high; -}; - /* * uvm structure (vm global state: collected in one structure for ease * of reference...) @@ -103,7 +76,7 @@ struct uvm { /* vm_page related parameters */ /* vm_page queues */ - struct uvm_pmr_control pmr_control; /* pmemrange control data */ + struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */ struct pglist page_active; /* allocated pages, in use */ struct pglist page_inactive_swp;/* pages inactive (reclaim or free) */ struct pglist page_inactive_obj;/* pages inactive (reclaim or free) */ diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h index 48255c1d8ad..de7dbb06b3b 100644 --- a/sys/uvm/uvm_extern.h +++ b/sys/uvm/uvm_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_extern.h,v 1.79 2009/06/14 03:04:08 deraadt Exp $ */ +/* $OpenBSD: uvm_extern.h,v 1.80 2009/06/16 16:42:41 ariane Exp $ */ /* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */ /* @@ -221,12 +221,11 @@ typedef int vm_prot_t; #define UVM_PGA_ZERO 0x0002 /* returned page must be zeroed */ /* - * flags for uvm_pglistalloc() and uvm_pmr_getpages() + * flags for uvm_pglistalloc() */ #define UVM_PLA_WAITOK 0x0001 /* may sleep */ #define UVM_PLA_NOWAIT 0x0002 /* can't sleep (need one of the two) */ #define UVM_PLA_ZERO 0x0004 /* zero all pages before returning */ -#define UVM_PLA_TRY_CONTIG 0x0008 /* try to allocate a contig range */ /* * lockflags that control the locking behavior of various functions. @@ -590,10 +589,6 @@ int uvm_pglistalloc(psize_t, paddr_t, struct pglist *, int, int); void uvm_pglistfree(struct pglist *); -/* uvm_pmemrange.c */ - -void uvm_pmr_use_inc(paddr_t, paddr_t); - /* uvm_swap.c */ void uvm_swap_init(void); diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index b804d36bd6c..b82f161aa9f 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_map.c,v 1.116 2009/06/16 00:11:29 oga Exp $ */ +/* $OpenBSD: uvm_map.c,v 1.117 2009/06/16 16:42:41 ariane Exp $ */ /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ /* @@ -3822,7 +3822,7 @@ uvm_object_printit(uobj, full, pr) (*pr)(" PAGES :\n "); for (pg = TAILQ_FIRST(&uobj->memq); pg != NULL; - pg = TAILQ_NEXT(pg, fq.queues.listq), cnt++) { + pg = TAILQ_NEXT(pg, listq), cnt++) { (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); if ((cnt % 3) == 2) { (*pr)("\n "); @@ -3883,8 +3883,7 @@ uvm_page_printit(pg, full, pr) uobj = pg->uobject; if (uobj) { (*pr)(" checking object list\n"); - TAILQ_FOREACH(tpg, &uobj->memq, - fq.queues.listq) { + TAILQ_FOREACH(tpg, &uobj->memq, listq) { if (tpg == pg) { break; } @@ -3899,11 +3898,9 @@ uvm_page_printit(pg, full, pr) /* cross-verify page queue */ if (pg->pg_flags & PQ_FREE) { - if (uvm_pmr_isfree(pg)) - printf(" page found in uvm_pmemrange\n"); - else - printf(" >>> page not found in uvm_pmemrange <<<\n"); - pgl = NULL; + int fl = uvm_page_lookup_freelist(pg); + pgl = &uvm.page_free[fl].pgfl_queues[((pg)->pg_flags & PG_ZERO) ? + PGFL_ZEROS : PGFL_UNKNOWN]; } else if (pg->pg_flags & PQ_INACTIVE) { pgl = (pg->pg_flags & PQ_SWAPBACKED) ? &uvm.page_inactive_swp : &uvm.page_inactive_obj; diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c index 2cf45c11375..afd841ff331 100644 --- a/sys/uvm/uvm_page.c +++ b/sys/uvm/uvm_page.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_page.c,v 1.89 2009/06/16 00:11:29 oga Exp $ */ +/* $OpenBSD: uvm_page.c,v 1.90 2009/06/16 16:42:41 ariane Exp $ */ /* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */ /* @@ -159,11 +159,10 @@ uvm_pageinsert(struct vm_page *pg) KASSERT((pg->pg_flags & PG_TABLED) == 0); mtx_enter(&uvm.hashlock); buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)]; - TAILQ_INSERT_TAIL(buck, pg, fq.queues.hashq); /* put in hash */ + TAILQ_INSERT_TAIL(buck, pg, hashq); /* put in hash */ mtx_leave(&uvm.hashlock); - TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, - fq.queues.listq); /* put in object */ + TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */ atomic_setbits_int(&pg->pg_flags, PG_TABLED); pg->uobject->uo_npages++; } @@ -184,7 +183,7 @@ uvm_pageremove(struct vm_page *pg) KASSERT(pg->pg_flags & PG_TABLED); mtx_enter(&uvm.hashlock); buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)]; - TAILQ_REMOVE(buck, pg, fq.queues.hashq); + TAILQ_REMOVE(buck, pg, hashq); mtx_leave(&uvm.hashlock); #ifdef UBC @@ -194,7 +193,7 @@ uvm_pageremove(struct vm_page *pg) #endif /* object should be locked */ - TAILQ_REMOVE(&pg->uobject->memq, pg, fq.queues.listq); + TAILQ_REMOVE(&pg->uobject->memq, pg, listq); atomic_clearbits_int(&pg->pg_flags, PG_TABLED|PQ_AOBJ); pg->uobject->uo_npages--; @@ -227,12 +226,15 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) * init the page queues and page queue locks */ + for (lcv = 0; lcv < VM_NFREELIST; lcv++) { + for (i = 0; i < PGFL_NQUEUES; i++) + TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]); + } TAILQ_INIT(&uvm.page_active); TAILQ_INIT(&uvm.page_inactive_swp); TAILQ_INIT(&uvm.page_inactive_obj); simple_lock_init(&uvm.pageqlock); mtx_init(&uvm.fpageqlock, IPL_VM); - uvm_pmr_init(); /* * init the => hash table. for now @@ -317,13 +319,10 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) if (atop(paddr) >= vm_physmem[lcv].avail_start && atop(paddr) <= vm_physmem[lcv].avail_end) { uvmexp.npages++; + /* add page to free pool */ + uvm_pagefree(&vm_physmem[lcv].pgs[i]); } } - - /* add pages to free pool */ - uvm_pmr_freepages(&vm_physmem[lcv].pgs[ - vm_physmem[lcv].avail_start - vm_physmem[lcv].start], - vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start); } /* @@ -812,10 +811,10 @@ uvm_page_rehash(void) /* ... and rehash */ for (lcv = 0 ; lcv < oldcount ; lcv++) { while ((pg = TAILQ_FIRST(&oldbuckets[lcv])) != NULL) { - TAILQ_REMOVE(&oldbuckets[lcv], pg, fq.queues.hashq); + TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq); TAILQ_INSERT_TAIL( &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)], - pg, fq.queues.hashq); + pg, hashq); } } mtx_leave(&uvm.hashlock); @@ -893,15 +892,18 @@ struct vm_page * uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, int flags, int strat, int free_list) { - struct pglist pgl; - int pmr_flags; + int lcv, try1, try2, zeroit = 0; struct vm_page *pg; + struct pglist *freeq; + struct pgfreelist *pgfl; boolean_t use_reserve; UVMHIST_FUNC("uvm_pagealloc_strat"); UVMHIST_CALLED(pghist); KASSERT(obj == NULL || anon == NULL); KASSERT(off == trunc_page(off)); + uvm_lock_fpageq(); + /* * check to see if we need to generate some free pages waking * the pagedaemon. @@ -928,20 +930,95 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, (curproc == syncerproc)))) goto fail; - pmr_flags = UVM_PLA_NOWAIT; - if (flags & UVM_PGA_ZERO) - pmr_flags |= UVM_PLA_ZERO; - TAILQ_INIT(&pgl); - if (uvm_pmr_getpages(1, 0, 0, 1, 0, 1, pmr_flags, &pgl) != 0) +#if PGFL_NQUEUES != 2 +#error uvm_pagealloc_strat needs to be updated +#endif + + /* + * If we want a zero'd page, try the ZEROS queue first, otherwise + * we try the UNKNOWN queue first. + */ + if (flags & UVM_PGA_ZERO) { + try1 = PGFL_ZEROS; + try2 = PGFL_UNKNOWN; + } else { + try1 = PGFL_UNKNOWN; + try2 = PGFL_ZEROS; + } + + UVMHIST_LOG(pghist, "obj=%p off=%lx anon=%p flags=%lx", + obj, (u_long)off, anon, flags); + UVMHIST_LOG(pghist, "strat=%ld free_list=%ld", strat, free_list, 0, 0); + again: + switch (strat) { + case UVM_PGA_STRAT_NORMAL: + /* Check all freelists in descending priority order. */ + for (lcv = 0; lcv < VM_NFREELIST; lcv++) { + pgfl = &uvm.page_free[lcv]; + if ((pg = TAILQ_FIRST((freeq = + &pgfl->pgfl_queues[try1]))) != NULL || + (pg = TAILQ_FIRST((freeq = + &pgfl->pgfl_queues[try2]))) != NULL) + goto gotit; + } + + /* No pages free! */ + goto fail; + + case UVM_PGA_STRAT_ONLY: + case UVM_PGA_STRAT_FALLBACK: + /* Attempt to allocate from the specified free list. */ + KASSERT(free_list >= 0 && free_list < VM_NFREELIST); + pgfl = &uvm.page_free[free_list]; + if ((pg = TAILQ_FIRST((freeq = + &pgfl->pgfl_queues[try1]))) != NULL || + (pg = TAILQ_FIRST((freeq = + &pgfl->pgfl_queues[try2]))) != NULL) + goto gotit; + + /* Fall back, if possible. */ + if (strat == UVM_PGA_STRAT_FALLBACK) { + strat = UVM_PGA_STRAT_NORMAL; + goto again; + } + + /* No pages free! */ goto fail; - pg = TAILQ_FIRST(&pgl); - KASSERT(pg != NULL); - KASSERT(TAILQ_NEXT(pg, pageq) == NULL); + + default: + panic("uvm_pagealloc_strat: bad strat %d", strat); + /* NOTREACHED */ + } + + gotit: + TAILQ_REMOVE(freeq, pg, pageq); + uvmexp.free--; + + /* update zero'd page count */ + if (pg->pg_flags & PG_ZERO) + uvmexp.zeropages--; + + /* + * update allocation statistics and remember if we have to + * zero the page + */ + if (flags & UVM_PGA_ZERO) { + if (pg->pg_flags & PG_ZERO) { + uvmexp.pga_zerohit++; + zeroit = 0; + } else { + uvmexp.pga_zeromiss++; + zeroit = 1; + } + } + + uvm_unlock_fpageq(); /* unlock free page queue */ pg->offset = off; pg->uobject = obj; pg->uanon = anon; pg->pg_flags = PG_BUSY|PG_CLEAN|PG_FAKE; + pg->pg_version++; if (anon) { anon->an_page = pg; atomic_setbits_int(&pg->pg_flags, PQ_ANON); @@ -957,11 +1034,22 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, #endif UVM_PAGE_OWN(pg, "new alloc"); + if (flags & UVM_PGA_ZERO) { + /* + * A zero'd page is not clean. If we got a page not already + * zero'd, then we have to zero it ourselves. + */ + atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); + if (zeroit) + pmap_zero_page(pg); + } + UVMHIST_LOG(pghist, "allocated pg %p/%lx", pg, (u_long)VM_PAGE_TO_PHYS(pg), 0, 0); return(pg); fail: + uvm_unlock_fpageq(); UVMHIST_LOG(pghist, "failed!", 0, 0, 0, 0); return (NULL); } @@ -1012,7 +1100,6 @@ uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) void uvm_pagefree(struct vm_page *pg) { - struct pglist pgl; int saved_loan_count = pg->loan_count; UVMHIST_FUNC("uvm_pagefree"); UVMHIST_CALLED(pghist); @@ -1108,35 +1195,27 @@ uvm_pagefree(struct vm_page *pg) } /* - * Clean page state bits. - */ - atomic_clearbits_int(&pg->pg_flags, - PG_ZERO|PG_FAKE|PG_BUSY|PG_RELEASED|PG_CLEAN|PG_CLEANCHK); - /* - * Pmap flag cleaning. - * XXX: Shouldn't pmap do this? + * and put on free queue */ - atomic_clearbits_int(&pg->pg_flags, - PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3); - -#if defined(DIAGNOSTIC) - if (pg->pg_flags != 0) { - panic("uvm_pagefree: expected page %p pg_flags to be 0\n" - "uvm_pagefree: instead of pg->pg_flags = %x\n", - VM_PAGE_TO_PHYS(pg), pg->pg_flags); - } -#endif + + atomic_clearbits_int(&pg->pg_flags, PG_ZERO); + + uvm_lock_fpageq(); + TAILQ_INSERT_TAIL(&uvm.page_free[ + uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq); + atomic_clearbits_int(&pg->pg_flags, PQ_MASK); + atomic_setbits_int(&pg->pg_flags, PQ_FREE); #ifdef DEBUG pg->uobject = (void *)0xdeadbeef; pg->offset = 0xdeadbeef; pg->uanon = (void *)0xdeadbeef; #endif - TAILQ_INIT(&pgl); - TAILQ_INSERT_HEAD(&pgl, pg, pageq); - uvm_pmr_freepageq(&pgl); + uvmexp.free++; if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) uvm.page_idle_zero = vm_page_zero_enable; + + uvm_unlock_fpageq(); } /* @@ -1229,7 +1308,6 @@ uvm_page_own(struct vm_page *pg, char *tag) void uvm_pageidlezero(void) { -#if 0 /* Disabled for now. */ struct vm_page *pg; struct pgfreelist *pgfl; int free_list; @@ -1296,7 +1374,6 @@ uvm_pageidlezero(void) uvmexp.zeropages++; uvm_unlock_fpageq(); } while (curcpu_is_idle()); -#endif /* 0 */ } /* @@ -1399,7 +1476,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off) mtx_enter(&uvm.hashlock); buck = &uvm.page_hash[uvm_pagehash(obj,off)]; - TAILQ_FOREACH(pg, buck, fq.queues.hashq) { + TAILQ_FOREACH(pg, buck, hashq) { if (pg->uobject == obj && pg->offset == off) { break; } diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h index 5896286c871..4da52e8e323 100644 --- a/sys/uvm/uvm_page.h +++ b/sys/uvm/uvm_page.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_page.h,v 1.37 2009/06/16 00:11:29 oga Exp $ */ +/* $OpenBSD: uvm_page.h,v 1.38 2009/06/16 16:42:41 ariane Exp $ */ /* $NetBSD: uvm_page.h,v 1.19 2000/12/28 08:24:55 chs Exp $ */ /* @@ -106,22 +106,11 @@ #include #include -union vm_page_fq { - struct { - TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/ - TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/ - } queues; - - struct { - RB_ENTRY(vm_page) tree; /* Free chunks, addr/size */ - psize_t pages; - } free; -}; - struct vm_page { - union vm_page_fq fq; /* free and queue management */ TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO * queue or free list (P) */ + TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/ + TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/ struct vm_anon *uanon; /* anon (O,P) */ struct uvm_object *uobject; /* object (O,P) */ diff --git a/sys/uvm/uvm_pglist.c b/sys/uvm/uvm_pglist.c index ff0f8d91f68..5abe87d9fb5 100644 --- a/sys/uvm/uvm_pglist.c +++ b/sys/uvm/uvm_pglist.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pglist.c,v 1.30 2009/06/01 17:42:33 ariane Exp $ */ +/* $OpenBSD: uvm_pglist.c,v 1.31 2009/06/16 16:42:41 ariane Exp $ */ /* $NetBSD: uvm_pglist.c,v 1.13 2001/02/18 21:19:08 chs Exp $ */ /*- @@ -56,6 +56,112 @@ u_long uvm_pglistalloc_npages; #define STAT_DECR(v) #endif +int uvm_pglistalloc_simple(psize_t, paddr_t, paddr_t, struct pglist *); + +/* + * Simple page allocation: pages do not need to be contiguous. We just + * attempt to find enough free pages in the given range. + */ +int +uvm_pglistalloc_simple(psize_t size, paddr_t low, paddr_t high, + struct pglist *rlist) +{ + psize_t todo; + int psi; + struct vm_page *pg; + struct vm_physseg *seg; + paddr_t slow, shigh; + int pgflidx, error, free_list; + UVMHIST_FUNC("uvm_pglistalloc_simple"); UVMHIST_CALLED(pghist); +#ifdef DEBUG + vm_page_t tp; +#endif + + /* Default to "lose". */ + error = ENOMEM; + + todo = atop(size); + + /* + * Block all memory allocation and lock the free list. + */ + uvm_lock_fpageq(); + + /* Are there even any free pages? */ + if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) + goto out; + + for (psi = 0, seg = vm_physmem; psi < vm_nphysseg; psi++, seg++) { + /* + * Skip this segment if incompatible with the address range. + */ + if (seg->avail_end <= atop(low)) + continue; + if (seg->avail_start >= atop(high)) + continue; + + slow = MAX(atop(low), seg->avail_start); + shigh = MIN(atop(high), seg->avail_end); + + /* we want to be able to allocate at least a page... */ + if (slow == shigh) + continue; + + for (pg = &seg->pgs[slow - seg->start]; slow != shigh; + slow++, pg++) { + if (VM_PAGE_IS_FREE(pg) == 0) + continue; + + free_list = uvm_page_lookup_freelist(pg); + pgflidx = (pg->pg_flags & PG_ZERO) ? + PGFL_ZEROS : PGFL_UNKNOWN; +#ifdef DEBUG + for (tp = TAILQ_FIRST(&uvm.page_free[free_list].pgfl_queues[pgflidx]); + tp != NULL; tp = TAILQ_NEXT(tp, pageq)) { + if (tp == pg) + break; + } + if (tp == NULL) + panic("uvm_pglistalloc_simple: page not on freelist"); +#endif + TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx], + pg, pageq); + uvmexp.free--; + if (pg->pg_flags & PG_ZERO) + uvmexp.zeropages--; + pg->uobject = NULL; + pg->uanon = NULL; + pg->pg_version++; + TAILQ_INSERT_TAIL(rlist, pg, pageq); + STAT_INCR(uvm_pglistalloc_npages); + if (--todo == 0) { + error = 0; + goto out; + } + } + + } + +out: + /* + * check to see if we need to generate some free pages waking + * the pagedaemon. + */ + + if (!error && (uvmexp.free + uvmexp.paging < uvmexp.freemin || + (uvmexp.free + uvmexp.paging < uvmexp.freetarg && + uvmexp.inactive < uvmexp.inactarg))) { + wakeup(&uvm.pagedaemon_proc); + } + + uvm_unlock_fpageq(); + + if (error) + uvm_pglistfree(rlist); + + return (error); +} + /* * uvm_pglistalloc: allocate a list of pages * @@ -73,45 +179,202 @@ u_long uvm_pglistalloc_npages; * alignment memory must be aligned to this power-of-two boundary. * boundary no segment in the allocation may cross this * power-of-two boundary (relative to zero). - * => flags: - * UVM_PLA_NOWAIT fail if allocation fails - * UVM_PLA_WAITOK wait for memory to become avail if allocation fails - * UVM_PLA_ZERO return zeroed memory - * UVM_PLA_TRY_CONTIG device prefers p-lineair mem */ int uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, paddr_t boundary, struct pglist *rlist, int nsegs, int flags) { + int psi; + struct vm_page *pgs; + struct vm_physseg *seg; + paddr_t slow, shigh; + paddr_t try, idxpa, lastidxpa; + int tryidx, idx, pgflidx, endidx, error, free_list; + vm_page_t m; + u_long pagemask; +#ifdef DEBUG + vm_page_t tp; +#endif UVMHIST_FUNC("uvm_pglistalloc"); UVMHIST_CALLED(pghist); KASSERT((alignment & (alignment - 1)) == 0); KASSERT((boundary & (boundary - 1)) == 0); + /* + * This argument is always ignored for now, but ensure drivers always + * show intention. + */ KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT)); + + /* + * Our allocations are always page granularity, so our alignment + * must be, too. + */ + if (alignment < PAGE_SIZE) + alignment = PAGE_SIZE; if (size == 0) return (EINVAL); + size = round_page(size); + low = roundup(low, alignment); + /* - * Convert byte addresses to page numbers. + * If we are allowed to allocate as many segments as pages, + * no need to be smart. */ - if (alignment < PAGE_SIZE) - alignment = PAGE_SIZE; - low = atop(roundup(low, alignment)); - /* Allows for overflow: 0xffff + 1 = 0x0000 */ - if ((high & PAGE_MASK) == PAGE_MASK) - high = atop(high) + 1; - else - high = atop(high); - size = atop(round_page(size)); - alignment = atop(alignment); - if (boundary < PAGE_SIZE && boundary != 0) - boundary = PAGE_SIZE; - boundary = atop(boundary); - - return uvm_pmr_getpages(size, low, high, alignment, boundary, nsegs, - flags, rlist); + if ((nsegs >= size / PAGE_SIZE) && (alignment == PAGE_SIZE) && + (boundary == 0)) { + error = uvm_pglistalloc_simple(size, low, high, rlist); + goto done; + } + + if (boundary != 0 && boundary < size) + return (EINVAL); + + pagemask = ~(boundary - 1); + + /* Default to "lose". */ + error = ENOMEM; + + /* + * Block all memory allocation and lock the free list. + */ + uvm_lock_fpageq(); + + /* Are there even any free pages? */ + if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) + goto out; + + for (psi = 0, seg = vm_physmem; psi < vm_nphysseg; psi++, seg++) { + /* + * Skip this segment if incompatible with the address range. + */ + if (seg->avail_end <= atop(low)) + continue; + if (seg->avail_start >= atop(high)) + continue; + + slow = MAX(low, ptoa(seg->avail_start)); + shigh = MIN(high, ptoa(seg->avail_end)); + + try = roundup(slow, alignment); + for (;; try += alignment) { + if (try + size > shigh) { + /* + * We've run past the allowable range, or + * the segment. Try another. + */ + break; + } + + tryidx = idx = atop(try) - seg->start; + endidx = idx + atop(size); + pgs = vm_physmem[psi].pgs; + + /* + * Found a suitable starting page. See if the + * range is free. + */ + + for (; idx < endidx; idx++) { + if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) { + break; + } + idxpa = VM_PAGE_TO_PHYS(&pgs[idx]); + if (idx == tryidx) + continue; + + /* + * Check that the region is contiguous + * (it really should...) and does not + * cross an alignment boundary. + */ + lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]); + if ((lastidxpa + PAGE_SIZE) != idxpa) + break; + + if (boundary != 0 && + ((lastidxpa ^ idxpa) & pagemask) != 0) + break; + } + + if (idx == endidx) { + goto found; + } + } + } + + /* + * We could not allocate a contiguous range. This is where + * we should try harder if nsegs > 1... + */ + goto out; + +#if PGFL_NQUEUES != 2 +#error uvm_pglistalloc needs to be updated +#endif + +found: + /* + * we have a chunk of memory that conforms to the requested constraints. + */ + idx = tryidx; + while (idx < endidx) { + m = &pgs[idx]; + free_list = uvm_page_lookup_freelist(m); + pgflidx = (m->pg_flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN; +#ifdef DEBUG + for (tp = TAILQ_FIRST(&uvm.page_free[ + free_list].pgfl_queues[pgflidx]); + tp != NULL; + tp = TAILQ_NEXT(tp, pageq)) { + if (tp == m) + break; + } + if (tp == NULL) + panic("uvm_pglistalloc: page not on freelist"); +#endif + TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx], + m, pageq); + uvmexp.free--; + if (m->pg_flags & PG_ZERO) + uvmexp.zeropages--; + m->uobject = NULL; + m->uanon = NULL; + m->pg_version++; + TAILQ_INSERT_TAIL(rlist, m, pageq); + idx++; + STAT_INCR(uvm_pglistalloc_npages); + } + error = 0; + +out: + /* + * check to see if we need to generate some free pages waking + * the pagedaemon. + */ + + if (uvmexp.free + uvmexp.paging < uvmexp.freemin || + (uvmexp.free + uvmexp.paging < uvmexp.freetarg && + uvmexp.inactive < uvmexp.inactarg)) { + wakeup(&uvm.pagedaemon_proc); + } + + uvm_unlock_fpageq(); + +done: + /* No locking needed here, pages are not on any queue. */ + if (error == 0) { + TAILQ_FOREACH(m, rlist, pageq) { + if (flags & UVM_PLA_ZERO && + (m->pg_flags & PG_ZERO) == 0) + uvm_pagezero(m); + m->pg_flags = PG_CLEAN; + } + } + + return (error); } /* @@ -126,8 +389,14 @@ uvm_pglistfree(struct pglist *list) struct vm_page *m; UVMHIST_FUNC("uvm_pglistfree"); UVMHIST_CALLED(pghist); - TAILQ_FOREACH(m, list, pageq) { + /* + * Block all memory allocation and lock the free list. + */ + uvm_lock_fpageq(); + + while ((m = TAILQ_FIRST(list)) != NULL) { KASSERT((m->pg_flags & (PQ_ACTIVE|PQ_INACTIVE)) == 0); + TAILQ_REMOVE(list, m, pageq); #ifdef DEBUG if (m->uobject == (void *)0xdeadbeef && m->uanon == (void *)0xdeadbeef) { @@ -139,6 +408,15 @@ uvm_pglistfree(struct pglist *list) m->uanon = (void *)0xdeadbeef; #endif atomic_clearbits_int(&m->pg_flags, PQ_MASK); + atomic_setbits_int(&m->pg_flags, PQ_FREE); + TAILQ_INSERT_TAIL(&uvm.page_free[ + uvm_page_lookup_freelist(m)].pgfl_queues[PGFL_UNKNOWN], + m, pageq); + uvmexp.free++; + if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) + uvm.page_idle_zero = vm_page_zero_enable; + STAT_DECR(uvm_pglistalloc_npages); } - uvm_pmr_freepageq(list); + + uvm_unlock_fpageq(); } diff --git a/sys/uvm/uvm_pmemrange.c b/sys/uvm/uvm_pmemrange.c deleted file mode 100644 index d25b780168b..00000000000 --- a/sys/uvm/uvm_pmemrange.c +++ /dev/null @@ -1,1243 +0,0 @@ -/* $OpenBSD: uvm_pmemrange.c,v 1.8 2009/06/14 03:04:08 deraadt Exp $ */ - -/* - * Copyright (c) 2009 Ariane van der Steldt - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include - -/* - * 2 trees: addr tree and size tree. - * - * addr tree is vm_page[0].fq.free.tree - * size tree is vm_page[1].fq.free.tree - * - * The size tree is not used for memory ranges of 1 page, instead, - * single queue is vm_page[0].pageq - * - * uvm_page_init guarantees that every vm_physseg contains an array of - * struct vm_page. Also, uvm_page_physload allocates an array of struct - * vm_page. This code depends on that array. - */ - -/* Tree comparators. */ -int uvm_pmemrange_addr_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *); -int uvm_pmemrange_use_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *); -int uvm_pmr_addr_cmp(struct vm_page *, struct vm_page *); -int uvm_pmr_size_cmp(struct vm_page *, struct vm_page *); - -/* Memory types. The page flags are used to derive what the current memory - * type of a page is. */ -static __inline int -uvm_pmr_pg_to_memtype(struct vm_page *pg) -{ - if (pg->pg_flags & PG_ZERO) - return UVM_PMR_MEMTYPE_ZERO; - /* Default: dirty memory. */ - return UVM_PMR_MEMTYPE_DIRTY; -} - -/* Trees. */ -RB_PROTOTYPE(uvm_pmr_addr, vm_page, fq.free.tree, uvm_pmr_addr_cmp); -RB_PROTOTYPE(uvm_pmr_size, vm_page, fq.free.tree, uvm_pmr_size_cmp); -RB_PROTOTYPE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr, - uvm_pmemrange_addr_cmp); -RB_GENERATE(uvm_pmr_addr, vm_page, fq.free.tree, uvm_pmr_addr_cmp); -RB_GENERATE(uvm_pmr_size, vm_page, fq.free.tree, uvm_pmr_size_cmp); -RB_GENERATE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr, - uvm_pmemrange_addr_cmp); - -/* Validation. */ -#ifdef DEBUG -void uvm_pmr_assertvalid(struct uvm_pmemrange *pmr); -#else -#define uvm_pmr_assertvalid(pmr) do {} while (0) -#endif - - -int uvm_pmr_get1page(psize_t, int, struct pglist *, - paddr_t, paddr_t); - -struct uvm_pmemrange *uvm_pmr_allocpmr(void); -struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int); -struct vm_page *uvm_pmr_nextsz(struct uvm_pmemrange *, - struct vm_page *, int); -void uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, - struct vm_page *pg, struct vm_page **pg_prev, - struct vm_page **pg_next); -struct vm_page *uvm_pmr_insert(struct uvm_pmemrange *, - struct vm_page *, int); -void uvm_pmr_remove(struct uvm_pmemrange *, - struct vm_page *); -psize_t uvm_pmr_remove_1strange(struct pglist *, paddr_t, - struct vm_page **); -void uvm_pmr_split(paddr_t); -struct uvm_pmemrange *uvm_pmemrange_find(paddr_t); -struct uvm_pmemrange *uvm_pmemrange_use_insert(struct uvm_pmemrange_use *, - struct uvm_pmemrange *); -struct vm_page *uvm_pmr_extract_range(struct uvm_pmemrange *, - struct vm_page *, paddr_t, paddr_t, - struct pglist *); - -/* - * Computes num/denom and rounds it up to the next power-of-2. - */ -static __inline psize_t -pow2divide(psize_t num, psize_t denom) -{ - int rshift = 0; - - while (num > (denom << rshift)) - rshift++; - return (paddr_t)1 << rshift; -} - -/* - * Predicate: lhs is a subrange or rhs. - */ -#define PMR_IS_SUBRANGE_OF(lhs_low, lhs_high, rhs_low, rhs_high) \ - ((lhs_low) >= (rhs_low) && (lhs_high <= rhs_high)) - -/* - * Align to power-of-2 alignment. - */ -#define PMR_ALIGN(pgno, align) \ - (((pgno) + ((align) - 1)) & ~((align) - 1)) - - -/* - * Comparator: sort by address ascending. - */ -int -uvm_pmemrange_addr_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs) -{ - return lhs->low < rhs->low ? -1 : lhs->low > rhs->low; -} - -/* - * Comparator: sort by use ascending. - * - * The higher the use value of a range, the more devices need memory in - * this range. Therefor allocate from the range with the lowest use first. - */ -int -uvm_pmemrange_use_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs) -{ - int result; - - result = lhs->use < rhs->use ? -1 : lhs->use > rhs->use; - if (result == 0) - result = uvm_pmemrange_addr_cmp(lhs, rhs); - return result; -} - -int -uvm_pmr_addr_cmp(struct vm_page *lhs, struct vm_page *rhs) -{ - paddr_t lhs_addr, rhs_addr; - - lhs_addr = VM_PAGE_TO_PHYS(lhs); - rhs_addr = VM_PAGE_TO_PHYS(rhs); - - return (lhs_addr < rhs_addr ? -1 : lhs_addr > rhs_addr); -} - -int -uvm_pmr_size_cmp(struct vm_page *lhs, struct vm_page *rhs) -{ - psize_t lhs_size, rhs_size; - int cmp; - - /* Using second tree, so we receive pg[1] instead of pg[0]. */ - lhs_size = (lhs - 1)->fq.free.pages; - rhs_size = (rhs - 1)->fq.free.pages; - - cmp = (lhs_size < rhs_size ? -1 : lhs_size > rhs_size); - if (cmp == 0) - cmp = uvm_pmr_addr_cmp(lhs - 1, rhs - 1); - return cmp; -} - -/* - * Find the first range of free pages that is at least sz pages long. - */ -struct vm_page * -uvm_pmr_nfindsz(struct uvm_pmemrange *pmr, psize_t sz, int mti) -{ - struct vm_page *node, *best; - - KASSERT(sz >= 1); - - if (sz == 1 && !TAILQ_EMPTY(&pmr->single[mti])) - return TAILQ_FIRST(&pmr->single[mti]); - - node = RB_ROOT(&pmr->size[mti]); - best = NULL; - while (node != NULL) { - if ((node - 1)->fq.free.pages >= sz) { - best = (node - 1); - node = RB_LEFT(node, fq.free.tree); - } else - node = RB_RIGHT(node, fq.free.tree); - } - return best; -} - -/* - * Finds the next range. The next range has a size >= pg->fq.free.pages. - * Returns NULL if no more ranges are available. - */ -struct vm_page * -uvm_pmr_nextsz(struct uvm_pmemrange *pmr, struct vm_page *pg, int mt) -{ - struct vm_page *npg; - - KASSERT(pmr != NULL && pg != NULL); - if (pg->fq.free.pages == 1) { - if (TAILQ_NEXT(pg, pageq) != NULL) - return TAILQ_NEXT(pg, pageq); - else - npg = RB_MIN(uvm_pmr_size, &pmr->size[mt]); - } else - npg = RB_NEXT(uvm_pmr_size, &pmr->size[mt], pg + 1); - - return npg == NULL ? NULL : npg - 1; -} - -/* - * Finds the previous and next ranges relative to the (uninserted) pg range. - * - * *pg_prev == NULL if no previous range is available, that can join with - * pg. - * *pg_next == NULL if no previous range is available, that can join with - * pg. - */ -void -uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, struct vm_page *pg, - struct vm_page **pg_prev, struct vm_page **pg_next) -{ - KASSERT(pg_prev != NULL && pg_next != NULL); - - *pg_next = RB_NFIND(uvm_pmr_addr, &pmr->addr, pg); - if (*pg_next == NULL) - *pg_prev = RB_MAX(uvm_pmr_addr, &pmr->addr); - else - *pg_prev = RB_PREV(uvm_pmr_addr, &pmr->addr, *pg_next); - - /* Reset if not contig. */ - if (*pg_prev != NULL && - (atop(VM_PAGE_TO_PHYS(*pg_prev)) + (*pg_prev)->fq.free.pages - != atop(VM_PAGE_TO_PHYS(pg)) || - uvm_pmr_pg_to_memtype(*pg_prev) != uvm_pmr_pg_to_memtype(pg))) - *pg_prev = NULL; - if (*pg_next != NULL && - (atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages - != atop(VM_PAGE_TO_PHYS(*pg_next)) || - uvm_pmr_pg_to_memtype(*pg_next) != uvm_pmr_pg_to_memtype(pg))) - *pg_next = NULL; - return; -} - -/* - * Remove a range from the address tree. - * Address tree maintains pmr counters. - */ -static __inline void -uvm_pmr_remove_addr(struct uvm_pmemrange *pmr, struct vm_page *pg) -{ - KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg); - KASSERT(pg->pg_flags & PQ_FREE); - RB_REMOVE(uvm_pmr_addr, &pmr->addr, pg); - - pmr->nsegs--; -} -/* - * Remove a range from the size tree. - */ -static __inline void -uvm_pmr_remove_size(struct uvm_pmemrange *pmr, struct vm_page *pg) -{ - int memtype; -#ifdef DEBUG - struct vm_page *i; -#endif - - KASSERT(pg->pg_flags & PQ_FREE); - memtype = uvm_pmr_pg_to_memtype(pg); - - if (pg->fq.free.pages == 1) { -#ifdef DEBUG - TAILQ_FOREACH(i, &pmr->single[memtype], pageq) { - if (i == pg) - break; - } - KDASSERT(i == pg); -#endif - TAILQ_REMOVE(&pmr->single[memtype], pg, pageq); - } else { - KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[memtype], - pg + 1) == pg + 1); - RB_REMOVE(uvm_pmr_size, &pmr->size[memtype], pg + 1); - } -} -/* Remove from both trees. */ -void -uvm_pmr_remove(struct uvm_pmemrange *pmr, struct vm_page *pg) -{ - uvm_pmr_assertvalid(pmr); - uvm_pmr_remove_size(pmr, pg); - uvm_pmr_remove_addr(pmr, pg); - uvm_pmr_assertvalid(pmr); -} - -/* - * Insert the range described in pg. - * Returns the range thus created (which may be joined with the previous and - * next ranges). - * If no_join, the caller guarantees that the range cannot possibly join - * with adjecent ranges. - */ -static __inline struct vm_page * -uvm_pmr_insert_addr(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join) -{ - struct vm_page *prev, *next; - -#ifdef DEBUG - struct vm_page *i; - int mt; - - for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX; mt++) { - TAILQ_FOREACH(i, &pmr->single[mt], pageq) - KDASSERT(i != pg); - if (pg->fq.free.pages > 1) { - KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[mt], - pg + 1) == NULL); - } - KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == NULL); - } -#endif - - KASSERT(pg->pg_flags & PQ_FREE); - KASSERT(pg->fq.free.pages >= 1); - - if (!no_join) { - uvm_pmr_pnaddr(pmr, pg, &prev, &next); - if (next != NULL) { - uvm_pmr_remove_size(pmr, next); - uvm_pmr_remove_addr(pmr, next); - pg->fq.free.pages += next->fq.free.pages; - next->fq.free.pages = 0; - } - if (prev != NULL) { - uvm_pmr_remove_size(pmr, prev); - prev->fq.free.pages += pg->fq.free.pages; - pg->fq.free.pages = 0; - return prev; - } - } -#ifdef DEBUG - else { - uvm_pmr_pnaddr(pmr, pg, &prev, &next); - KDASSERT(prev == NULL && next == NULL); - } -#endif /* DEBUG */ - - RB_INSERT(uvm_pmr_addr, &pmr->addr, pg); - - pmr->nsegs++; - - return pg; -} -/* - * Insert the range described in pg. - * Returns the range thus created (which may be joined with the previous and - * next ranges). - * Page must already be in the address tree. - */ -static __inline void -uvm_pmr_insert_size(struct uvm_pmemrange *pmr, struct vm_page *pg) -{ - int memtype; -#ifdef DEBUG - struct vm_page *i; - int mti; -#endif - - memtype = uvm_pmr_pg_to_memtype(pg); -#ifdef DEBUG - for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX; mti++) { - TAILQ_FOREACH(i, &pmr->single[mti], pageq) - KDASSERT(i != pg); - if (pg->fq.free.pages > 1) { - KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[mti], - pg + 1) == NULL); - } - KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg); - } - for (i = pg; i < pg + pg->fq.free.pages; i++) - KASSERT(uvm_pmr_pg_to_memtype(i) == memtype); -#endif - - KASSERT(pg->pg_flags & PQ_FREE); - KASSERT(pg->fq.free.pages >= 1); - - if (pg->fq.free.pages == 1) - TAILQ_INSERT_TAIL(&pmr->single[memtype], pg, pageq); - else - RB_INSERT(uvm_pmr_size, &pmr->size[memtype], pg + 1); -} -/* Insert in both trees. */ -struct vm_page * -uvm_pmr_insert(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join) -{ - uvm_pmr_assertvalid(pmr); - pg = uvm_pmr_insert_addr(pmr, pg, no_join); - uvm_pmr_insert_size(pmr, pg); - uvm_pmr_assertvalid(pmr); - return pg; -} - -/* - * Remove the first segment of contiguous pages from pgl. - * A segment ends if it crosses boundary (unless boundary = 0) or - * if it would enter a different uvm_pmemrange. - * - * Work: the page range that the caller is currently working with. - * May be null. - */ -psize_t -uvm_pmr_remove_1strange(struct pglist *pgl, paddr_t boundary, - struct vm_page **work) -{ - struct vm_page *pg, *pre_last, *last, *inserted; - psize_t count; - struct uvm_pmemrange *pmr; - paddr_t first_boundary; - - KASSERT(!TAILQ_EMPTY(pgl)); - - pg = TAILQ_FIRST(pgl); - pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))); - KDASSERT(pmr != NULL); - if (boundary != 0) { - first_boundary = - PMR_ALIGN(atop(VM_PAGE_TO_PHYS(pg)) + 1, boundary); - } else - first_boundary = 0; - - /* Remove all pages in the first segment. */ - pre_last = pg; - last = TAILQ_NEXT(pre_last, pageq); - TAILQ_REMOVE(pgl, pre_last, pageq); - count = 1; - /* - * While loop checks the following: - * - last != NULL we have not reached the end of pgs - * - boundary == 0 || last < first_boundary - * we do not cross a boundary - * - atop(pre_last) + 1 == atop(last) - * still in the same segment - * - low <= last - * - high > last still testing the same memory range - * - * At the end of the loop, last points at the next segment - * and each page [pg, pre_last] (inclusive range) has been removed - * and count is the number of pages that have been removed. - */ - while (last != NULL && - (boundary == 0 || atop(VM_PAGE_TO_PHYS(last)) < first_boundary) && - atop(VM_PAGE_TO_PHYS(pre_last)) + 1 == - atop(VM_PAGE_TO_PHYS(last)) && - pmr->low <= atop(VM_PAGE_TO_PHYS(last)) && - pmr->high > atop(VM_PAGE_TO_PHYS(last))) { - count++; - pre_last = last; - last = TAILQ_NEXT(last, pageq); - TAILQ_REMOVE(pgl, pre_last, pageq); - } - KDASSERT(TAILQ_FIRST(pgl) == last); - KDASSERT(pg + (count - 1) == pre_last); - - pg->fq.free.pages = count; - inserted = uvm_pmr_insert(pmr, pg, 0); - - if (work != NULL && *work != NULL && - atop(VM_PAGE_TO_PHYS(inserted)) <= atop(VM_PAGE_TO_PHYS(*work)) && - atop(VM_PAGE_TO_PHYS(inserted)) + inserted->fq.free.pages > - atop(VM_PAGE_TO_PHYS(*work))) - *work = inserted; - return count; -} - -/* - * Extract a number of pages from a segment of free pages. - * Called by uvm_pmr_getpages. - * - * Returns the segment that was created from pages left over at the tail - * of the remove set of pages, or NULL if no pages were left at the tail. - */ -struct vm_page * -uvm_pmr_extract_range(struct uvm_pmemrange *pmr, struct vm_page *pg, - paddr_t start, paddr_t end, struct pglist *result) -{ - struct vm_page *after, *pg_i; - psize_t before_sz, after_sz; -#ifdef DEBUG - psize_t i; -#endif - - KASSERT(end > start); - KASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg))); - KASSERT(pmr->high >= atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages); - KASSERT(atop(VM_PAGE_TO_PHYS(pg)) <= start); - KASSERT(atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages >= end); - - before_sz = start - atop(VM_PAGE_TO_PHYS(pg)); - after_sz = atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages - end; - KDASSERT(before_sz + after_sz + (end - start) == pg->fq.free.pages); - uvm_pmr_assertvalid(pmr); - - uvm_pmr_remove_size(pmr, pg); - if (before_sz == 0) - uvm_pmr_remove_addr(pmr, pg); - - /* Add selected pages to result. */ - for (pg_i = pg + before_sz; atop(VM_PAGE_TO_PHYS(pg_i)) < end; - pg_i++) { - pg_i->fq.free.pages = 0; - TAILQ_INSERT_TAIL(result, pg_i, pageq); - KDASSERT(pg_i->pg_flags & PQ_FREE); - } - - /* Before handling. */ - if (before_sz > 0) { - pg->fq.free.pages = before_sz; - uvm_pmr_insert_size(pmr, pg); - } - - /* After handling. */ - after = NULL; - if (after_sz > 0) { - after = pg + before_sz + (end - start); -#ifdef DEBUG - for (i = 0; i < after_sz; i++) { - KASSERT(!uvm_pmr_isfree(after + i)); - } -#endif - KDASSERT(atop(VM_PAGE_TO_PHYS(after)) == end); - after->fq.free.pages = after_sz; - after = uvm_pmr_insert_addr(pmr, after, 1); - uvm_pmr_insert_size(pmr, after); - } - - uvm_pmr_assertvalid(pmr); - return after; -} - -/* - * Acquire a number of pages. - * - * count: the number of pages returned - * start: lowest page number - * end: highest page number +1 - * (start = end = 0: no limitation) - * align: power-of-2 alignment constraint (align = 1: no alignment) - * boundary: power-of-2 boundary (boundary = 0: no boundary) - * maxseg: maximum number of segments to return - * flags: UVM_PLA_* flags - * result: returned pages storage (uses pageq) - */ -int -uvm_pmr_getpages(psize_t count, paddr_t start, paddr_t end, paddr_t align, - paddr_t boundary, int maxseg, int flags, struct pglist *result) -{ - struct uvm_pmemrange *pmr; /* Iterate memory ranges. */ - struct vm_page *found, *f_next; /* Iterate chunks. */ - psize_t fcount; /* Current found pages. */ - int fnsegs; /* Current segment counter. */ - int try, start_try; - psize_t search[2]; - paddr_t fstart, fend; /* Pages to be taken from found. */ - int memtype; /* Requested memtype. */ - int desperate; /* True if allocation failed. */ - - /* Validate arguments. */ - KASSERT(count > 0); - KASSERT((start == 0 && end == 0) || (start < end)); - KASSERT(align >= 1 && powerof2(align)); - KASSERT(maxseg > 0); - KASSERT(boundary == 0 || powerof2(boundary)); - KDASSERT(boundary == 0 || maxseg * boundary >= count); - KASSERT(TAILQ_EMPTY(result)); - - /* Configure search. If start_try == 0, search[0] should be faster - * (because it will have to throw away less segments). - * search[1] is the worst case: start searching at the smallest - * possible range instead of starting at the range most likely to - * fulfill the allocation. */ - start_try = 0; - search[0] = (flags & UVM_PLA_TRY_CONTIG ? count : - pow2divide(count, maxseg)); - search[1] = 1; - if (maxseg == 1) { - start_try = 1; - search[1] = count; - } else if (search[1] >= search[0]) - start_try = 1; - -ReTry: /* Return point after sleeping. */ - fcount = 0; - fnsegs = 0; - - /* Memory type: if zeroed memory is requested, traverse the zero set. - * Otherwise, traverse the dirty set. */ - if (flags & UVM_PLA_ZERO) - memtype = UVM_PMR_MEMTYPE_ZERO; - else - memtype = UVM_PMR_MEMTYPE_DIRTY; - desperate = 0; - - uvm_lock_fpageq(); - -ReTryDesperate: - /* - * If we just want any page(s), go for the really fast option. - */ - if (count <= maxseg && align == 1 && boundary == 0 && - (flags & UVM_PLA_TRY_CONTIG) == 0) { - if (!desperate) { - KASSERT(fcount == 0); - fcount += uvm_pmr_get1page(count, memtype, result, - start, end); - } else { - for (memtype = 0; memtype < UVM_PMR_MEMTYPE_MAX && - fcount < count; memtype++) { - fcount += uvm_pmr_get1page(count - fcount, - memtype, result, start, end); - } - } - - if (fcount == count) - goto Out; - else - goto Fail; - } - - TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { - /* Empty range. */ - if (pmr->nsegs == 0) - continue; - - /* Outside requested range. */ - if (!(start == 0 && end == 0) && - !PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, start, end)) - continue; - - try = start_try; -ReScan: /* Return point at try++. */ - - for (found = uvm_pmr_nfindsz(pmr, search[try], memtype); - found != NULL; - found = f_next) { - f_next = uvm_pmr_nextsz(pmr, found, memtype); - - fstart = atop(VM_PAGE_TO_PHYS(found)); -DrainFound: - /* Throw away the first segment if fnsegs == maxseg */ - if (fnsegs == maxseg) { - fnsegs--; - fcount -= - uvm_pmr_remove_1strange(result, boundary, - &found); - } - - fstart = PMR_ALIGN(fstart, align); - fend = atop(VM_PAGE_TO_PHYS(found)) + - found->fq.free.pages; - if (fstart >= fend) - continue; - if (boundary != 0) { - fend = - MIN(fend, PMR_ALIGN(fstart + 1, boundary)); - } - if (fend - fstart > count - fcount) - fend = fstart + (count - fcount); - - fcount += fend - fstart; - fnsegs++; - found = uvm_pmr_extract_range(pmr, found, - fstart, fend, result); - - if (fcount == count) - goto Out; - - /* If there's still space left in found, try to - * fully drain it prior to continueing. */ - if (found != NULL) { - fstart = fend; - goto DrainFound; - } - } - - if (++try < nitems(search)) - goto ReScan; - } - - /* - * Not enough memory of the requested type available. Fall back to - * less good memory that we'll clean up better later. - * - * This algorithm is not very smart though, it just starts scanning - * a different typed range, but the nicer ranges of the previous - * iteration may fall out. - */ - if (!desperate) { - desperate = 1; - memtype = 0; - goto ReTryDesperate; - } else if (++memtype < UVM_PMR_MEMTYPE_MAX) - goto ReTryDesperate; - -Fail: - /* - * Allocation failed. - */ - - /* XXX: claim from memory reserve here */ - - while (!TAILQ_EMPTY(result)) - uvm_pmr_remove_1strange(result, 0, NULL); - uvm_unlock_fpageq(); - - if (flags & UVM_PLA_WAITOK) { - uvm_wait("uvm_pmr_getpages"); - goto ReTry; - } else - wakeup(&uvm.pagedaemon_proc); - - return ENOMEM; - -Out: - - /* - * Allocation succesful. - */ - - uvmexp.free -= fcount; - - uvm_unlock_fpageq(); - - /* Update statistics and zero pages if UVM_PLA_ZERO. */ - TAILQ_FOREACH(found, result, pageq) { - if (found->pg_flags & PG_ZERO) { - uvmexp.zeropages--; - } - if (flags & UVM_PLA_ZERO) { - if (found->pg_flags & PG_ZERO) - uvmexp.pga_zerohit++; - else { - uvmexp.pga_zeromiss++; - uvm_pagezero(found); - } - } - atomic_clearbits_int(&found->pg_flags, PG_ZERO | PQ_FREE); - - found->uobject = NULL; - found->uanon = NULL; - found->pg_version++; - } - - return 0; -} - -/* - * Free a number of contig pages (invoked by uvm_page_init). - */ -void -uvm_pmr_freepages(struct vm_page *pg, psize_t count) -{ - struct uvm_pmemrange *pmr; - psize_t i, pmr_count; - - uvm_lock_fpageq(); - - for (i = 0; i < count; i++) { - atomic_clearbits_int(&pg[i].pg_flags, pg[i].pg_flags); - atomic_setbits_int(&pg[i].pg_flags, PQ_FREE); - } - - while (count > 0) { - pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))); - KASSERT(pmr != NULL); - - pmr_count = MIN(count, pmr->high - atop(VM_PAGE_TO_PHYS(pg))); - pg->fq.free.pages = pmr_count; - uvm_pmr_insert(pmr, pg, 0); - - uvmexp.free += pmr_count; - count -= pmr_count; - pg += pmr_count; - } - wakeup(&uvmexp.free); - - uvm_unlock_fpageq(); -} - -/* - * Free all pages in the queue. - */ -void -uvm_pmr_freepageq(struct pglist *pgl) -{ - struct vm_page *pg; - - TAILQ_FOREACH(pg, pgl, pageq) { - atomic_clearbits_int(&pg->pg_flags, pg->pg_flags); - atomic_setbits_int(&pg->pg_flags, PQ_FREE); - } - - uvm_lock_fpageq(); - while (!TAILQ_EMPTY(pgl)) - uvmexp.free += uvm_pmr_remove_1strange(pgl, 0, NULL); - wakeup(&uvmexp.free); - uvm_unlock_fpageq(); - - return; -} - -/* - * Store a pmemrange in the list. - * - * The list is sorted by use. - */ -struct uvm_pmemrange * -uvm_pmemrange_use_insert(struct uvm_pmemrange_use *useq, - struct uvm_pmemrange *pmr) -{ - struct uvm_pmemrange *iter; - int cmp = 1; - - TAILQ_FOREACH(iter, useq, pmr_use) { - cmp = uvm_pmemrange_use_cmp(pmr, iter); - if (cmp == 0) - return iter; - if (cmp == -1) - break; - } - if (cmp == 0) - return iter; - - if (iter == NULL) - TAILQ_INSERT_TAIL(useq, pmr, pmr_use); - else - TAILQ_INSERT_BEFORE(iter, pmr, pmr_use); - return NULL; -} - -#ifdef DEBUG -/* - * Validation of the whole pmemrange. - * Called with fpageq locked. - */ -void -uvm_pmr_assertvalid(struct uvm_pmemrange *pmr) -{ - struct vm_page *prev, *next, *i, *xref; - int lcv, mti; - - /* Validate address tree. */ - RB_FOREACH(i, uvm_pmr_addr, &pmr->addr) { - /* Validate the range. */ - KASSERT(i->fq.free.pages > 0); - KASSERT(atop(VM_PAGE_TO_PHYS(i)) >= pmr->low); - KASSERT(atop(VM_PAGE_TO_PHYS(i)) + i->fq.free.pages - <= pmr->high); - - /* Validate each page in this range. */ - for (lcv = 0; lcv < i->fq.free.pages; lcv++) { - KASSERT(lcv == 0 || i[lcv].fq.free.pages == 0); - /* Flag check: - * - PG_ZERO: page is zeroed. - * - PQ_FREE: page is free. - * Any other flag is a mistake. */ - if (i[lcv].pg_flags != - (i[lcv].pg_flags & (PG_ZERO | PQ_FREE))) { - panic("i[%lu].pg_flags = %x, should be %x\n", - lcv, i[lcv].pg_flags, PG_ZERO | PQ_FREE); - } - /* Free pages are: - * - not wired - * - not loaned - * - have no vm_anon - * - have no uvm_object */ - KASSERT(i[lcv].wire_count == 0); - KASSERT(i[lcv].loan_count == 0); - KASSERT(i[lcv].uanon == NULL); - KASSERT(i[lcv].uobject == NULL); - /* Pages in a single range always have the same - * memtype. */ - KASSERT(uvm_pmr_pg_to_memtype(&i[0]) == - uvm_pmr_pg_to_memtype(&i[lcv])); - } - - /* Check that it shouldn't be joined with its predecessor. */ - prev = RB_PREV(uvm_pmr_addr, &pmr->addr, i); - if (prev != NULL) { - KASSERT(uvm_pmr_pg_to_memtype(&i[0]) != - uvm_pmr_pg_to_memtype(&i[lcv]) || - atop(VM_PAGE_TO_PHYS(i)) > - atop(VM_PAGE_TO_PHYS(prev)) + prev->fq.free.pages); - } - - /* Assert i is in the size tree as well. */ - if (i->fq.free.pages == 1) { - TAILQ_FOREACH(xref, - &pmr->single[uvm_pmr_pg_to_memtype(i)], pageq) { - if (xref == i) - break; - } - KASSERT(xref == i); - } else { - KASSERT(RB_FIND(uvm_pmr_size, - &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == - i + 1); - } - } - - /* Validate size tree. */ - for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX; mti++) { - for (i = uvm_pmr_nfindsz(pmr, 1, mti); i != NULL; i = next) { - next = uvm_pmr_nextsz(pmr, i, mti); - if (next != NULL) { - KASSERT(i->fq.free.pages <= - next->fq.free.pages); - } - - /* Assert i is in the addr tree as well. */ - KASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, i) == i); - - /* Assert i is of the correct memory type. */ - KASSERT(uvm_pmr_pg_to_memtype(i) == mti); - } - } - - /* Validate nsegs statistic. */ - lcv = 0; - RB_FOREACH(i, uvm_pmr_addr, &pmr->addr) - lcv++; - KASSERT(pmr->nsegs == lcv); -} -#endif /* DEBUG */ - -/* - * Split pmr at split point pageno. - * Called with fpageq unlocked. - * - * Split is only applied if a pmemrange spans pageno. - */ -void -uvm_pmr_split(paddr_t pageno) -{ - struct uvm_pmemrange *pmr, *drain; - struct vm_page *rebuild, *prev, *next; - psize_t prev_sz; - - uvm_lock_fpageq(); - pmr = uvm_pmemrange_find(pageno); - if (pmr == NULL || !(pmr->low < pageno)) { - /* No split required. */ - uvm_unlock_fpageq(); - return; - } - - KASSERT(pmr->low < pageno); - KASSERT(pmr->high > pageno); - - drain = uvm_pmr_allocpmr(); - drain->low = pageno; - drain->high = pmr->high; - drain->use = pmr->use; - - uvm_pmr_assertvalid(pmr); - uvm_pmr_assertvalid(drain); - KASSERT(drain->nsegs == 0); - - RB_FOREACH(rebuild, uvm_pmr_addr, &pmr->addr) { - if (atop(VM_PAGE_TO_PHYS(rebuild)) >= pageno) - break; - } - if (rebuild == NULL) - prev = RB_MAX(uvm_pmr_addr, &pmr->addr); - else - prev = RB_PREV(uvm_pmr_addr, &pmr->addr, rebuild); - KASSERT(prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno); - - /* - * Handle free chunk that spans the split point. - */ - if (prev != NULL && - atop(VM_PAGE_TO_PHYS(prev)) + prev->fq.free.pages > pageno) { - psize_t before, after; - - KASSERT(atop(VM_PAGE_TO_PHYS(prev)) < pageno); - - uvm_pmr_remove(pmr, prev); - prev_sz = prev->fq.free.pages; - before = pageno - atop(VM_PAGE_TO_PHYS(prev)); - after = atop(VM_PAGE_TO_PHYS(prev)) + prev_sz - pageno; - - KASSERT(before > 0); - KASSERT(after > 0); - - prev->fq.free.pages = before; - uvm_pmr_insert(pmr, prev, 1); - (prev + before)->fq.free.pages = after; - uvm_pmr_insert(drain, prev + before, 1); - } - - /* - * Move free chunks that no longer fall in the range. - */ - for (; rebuild != NULL; rebuild = next) { - next = RB_NEXT(uvm_pmr_addr, &pmr->addr, rebuild); - - uvm_pmr_remove(pmr, rebuild); - uvm_pmr_insert(drain, rebuild, 1); - } - - pmr->high = pageno; - uvm_pmr_assertvalid(pmr); - uvm_pmr_assertvalid(drain); - - RB_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, drain); - uvm_pmemrange_use_insert(&uvm.pmr_control.use, drain); - uvm_unlock_fpageq(); -} - -/* - * Increase the usage counter for the given range of memory. - * - * The more usage counters a given range of memory has, the more will be - * attempted not to allocate from it. - * - * Addresses here are in paddr_t, not page-numbers. - * The lowest and highest allowed address are specified. - */ -void -uvm_pmr_use_inc(paddr_t low, paddr_t high) -{ - struct uvm_pmemrange *pmr; - - /* - * If high+1 == 0, then you are increasing use of the whole address - * space, which won't make any difference. Skip in that case. - */ - high++; - if (high == 0) - return; - - /* - * pmr uses page numbers, translate low and high. - */ - low = atop(round_page(low)); - high = atop(trunc_page(high)); - uvm_pmr_split(low); - uvm_pmr_split(high); - - uvm_lock_fpageq(); - - /* Increase use count on segments in range. */ - RB_FOREACH(pmr, uvm_pmemrange_addr, &uvm.pmr_control.addr) { - if (PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, low, high)) { - TAILQ_REMOVE(&uvm.pmr_control.use, pmr, pmr_use); - pmr->use++; - uvm_pmemrange_use_insert(&uvm.pmr_control.use, pmr); - } - uvm_pmr_assertvalid(pmr); - } - - uvm_unlock_fpageq(); -} - -/* - * Allocate a pmemrange. - * - * If called from uvm_page_init, the uvm_pageboot_alloc is used. - * If called after uvm_init, malloc is used. - * (And if called in between, you're dead.) - */ -struct uvm_pmemrange * -uvm_pmr_allocpmr() -{ - struct uvm_pmemrange *nw; - int i; - - if (!uvm.page_init_done) { - nw = (struct uvm_pmemrange *) - uvm_pageboot_alloc(sizeof(struct uvm_pmemrange)); - bzero(nw, sizeof(struct uvm_pmemrange)); - } else { - nw = malloc(sizeof(struct uvm_pmemrange), - M_VMMAP, M_NOWAIT | M_ZERO); - } - RB_INIT(&nw->addr); - for (i = 0; i < UVM_PMR_MEMTYPE_MAX; i++) { - RB_INIT(&nw->size[i]); - TAILQ_INIT(&nw->single[i]); - } - return nw; -} - -static const struct uvm_io_ranges uvm_io_ranges[] = UVM_IO_RANGES; - -/* - * Initialization of pmr. - * Called by uvm_page_init. - * - * Sets up pmemranges that maps the vm_physmem data. - */ -void -uvm_pmr_init(void) -{ - struct uvm_pmemrange *new_pmr; - int i; - - TAILQ_INIT(&uvm.pmr_control.use); - RB_INIT(&uvm.pmr_control.addr); - - for (i = 0 ; i < vm_nphysseg ; i++) { - new_pmr = uvm_pmr_allocpmr(); - - new_pmr->low = vm_physmem[i].start; - new_pmr->high = vm_physmem[i].end; - - RB_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, new_pmr); - uvm_pmemrange_use_insert(&uvm.pmr_control.use, new_pmr); - } - - for (i = 0; i < nitems(uvm_io_ranges); i++) - uvm_pmr_use_inc(uvm_io_ranges[i].low, uvm_io_ranges[i].high); -} - -/* - * Find the pmemrange that contains the given page number. - * - * (Manually traverses the binary tree, because that is cheaper on stack - * usage.) - */ -struct uvm_pmemrange * -uvm_pmemrange_find(paddr_t pageno) -{ - struct uvm_pmemrange *pmr; - - pmr = RB_ROOT(&uvm.pmr_control.addr); - while (pmr != NULL) { - if (pmr->low > pageno) - pmr = RB_LEFT(pmr, pmr_addr); - else if (pmr->high <= pageno) - pmr = RB_RIGHT(pmr, pmr_addr); - else - break; - } - - return pmr; -} - -#if defined(DDB) || defined(DEBUG) -/* - * Return true if the given page is in any of the free lists. - * Used by uvm_page_printit. - * This function is safe, even if the page is not on the freeq. - * Note: does not apply locking, only called from ddb. - */ -int -uvm_pmr_isfree(struct vm_page *pg) -{ - struct vm_page *r; - struct uvm_pmemrange *pmr; - - pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))); - if (pmr == NULL) - return 0; - r = RB_NFIND(uvm_pmr_addr, &pmr->addr, pg); - if (r == NULL) - r = RB_MAX(uvm_pmr_addr, &pmr->addr); - else - r = RB_PREV(uvm_pmr_addr, &pmr->addr, r); - if (r == NULL) - return 0; /* Empty tree. */ - - KDASSERT(atop(VM_PAGE_TO_PHYS(r)) <= atop(VM_PAGE_TO_PHYS(pg))); - return atop(VM_PAGE_TO_PHYS(r)) + r->fq.free.pages > - atop(VM_PAGE_TO_PHYS(pg)); -} -#endif /* DEBUG */ - -/* - * Allocate any page, the fastest way. No constraints. - */ -int -uvm_pmr_get1page(psize_t count, int memtype, struct pglist *result, - paddr_t start, paddr_t end) -{ - struct uvm_pmemrange *pmr; - struct vm_page *found; - psize_t fcount; - - fcount = 0; - pmr = TAILQ_FIRST(&uvm.pmr_control.use); - while (pmr != NULL && fcount != count) { - /* Outside requested range. */ - if (!(start == 0 && end == 0) && - !PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, start, end)) { - pmr = TAILQ_NEXT(pmr, pmr_use); - continue; - } - - found = TAILQ_FIRST(&pmr->single[memtype]); - if (found == NULL) { - found = RB_ROOT(&pmr->size[memtype]); - /* Size tree gives pg[1] instead of pg[0] */ - if (found != NULL) - found--; - } - if (found == NULL) { - pmr = TAILQ_NEXT(pmr, pmr_use); - continue; - } - - uvm_pmr_assertvalid(pmr); - uvm_pmr_remove_size(pmr, found); - while (found->fq.free.pages > 0 && fcount < count) { - found->fq.free.pages--; - fcount++; - TAILQ_INSERT_HEAD(result, - &found[found->fq.free.pages], pageq); - } - if (found->fq.free.pages > 0) { - uvm_pmr_insert_size(pmr, found); - KASSERT(fcount == count); - uvm_pmr_assertvalid(pmr); - return fcount; - } else - uvm_pmr_remove_addr(pmr, found); - uvm_pmr_assertvalid(pmr); - } - - /* Ran out of ranges before enough pages were gathered. */ - return fcount; -} diff --git a/sys/uvm/uvm_pmemrange.h b/sys/uvm/uvm_pmemrange.h deleted file mode 100644 index 493961f1f9c..00000000000 --- a/sys/uvm/uvm_pmemrange.h +++ /dev/null @@ -1,83 +0,0 @@ -/* $OpenBSD: uvm_pmemrange.h,v 1.3 2009/06/14 02:20:23 deraadt Exp $ */ - -/* - * Copyright (c) 2009 Ariane van der Steldt - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * uvm_pmemrange.h: describe and manage free physical memory. - */ - -#ifndef _UVM_UVM_PMEMRANGE_H_ -#define _UVM_UVM_PMEMRANGE_H_ - -#include -#include - -RB_HEAD(uvm_pmr_addr, vm_page); -RB_HEAD(uvm_pmr_size, vm_page); - -/* - * Page types available: - * - DIRTY: this page may contain random data. - * - ZERO: this page has been zeroed. - */ -#define UVM_PMR_MEMTYPE_DIRTY 1 -#define UVM_PMR_MEMTYPE_ZERO 1 -#define UVM_PMR_MEMTYPE_MAX 2 - -/* - * An address range of memory. - */ -struct uvm_pmemrange { - struct uvm_pmr_addr addr; /* Free page chunks, sorted by addr. */ - struct uvm_pmr_size size[UVM_PMR_MEMTYPE_MAX]; - /* Free page chunks, sorted by size. */ - TAILQ_HEAD(, vm_page) single[UVM_PMR_MEMTYPE_MAX]; - /* single page regions (uses pageq) */ - - paddr_t low; /* Start of address range (pgno). */ - paddr_t high; /* End +1 (pgno). */ - int use; /* Use counter. */ - int nsegs; /* Current range count. */ - - TAILQ_ENTRY(uvm_pmemrange) pmr_use; - /* pmr, sorted by use */ - RB_ENTRY(uvm_pmemrange) pmr_addr; - /* pmr, sorted by address */ -}; - -RB_HEAD(uvm_pmemrange_addr, uvm_pmemrange); -TAILQ_HEAD(uvm_pmemrange_use, uvm_pmemrange); - -/* - * pmr control structure. Contained in uvm.pmr_control. - */ -struct uvm_pmr_control { - struct uvm_pmemrange_addr addr; - struct uvm_pmemrange_use use; -}; - -void uvm_pmr_freepages(struct vm_page *, psize_t); -void uvm_pmr_freepageq(struct pglist *pgl); -int uvm_pmr_getpages(psize_t, paddr_t, paddr_t, paddr_t, paddr_t, - int, int, struct pglist *); -void uvm_pmr_init(void); - -#ifdef DDB -int uvm_pmr_isfree(struct vm_page *pg); -#endif - -#endif /* _UVM_UVM_PMEMRANGE_H_ */ diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c index e85e2c24e38..998f0fa0a62 100644 --- a/sys/uvm/uvm_vnode.c +++ b/sys/uvm/uvm_vnode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_vnode.c,v 1.63 2009/06/16 00:11:29 oga Exp $ */ +/* $OpenBSD: uvm_vnode.c,v 1.64 2009/06/16 16:42:41 ariane Exp $ */ /* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */ /* @@ -561,7 +561,7 @@ uvm_vnp_terminate(struct vnode *vp) while (uvn->u_obj.uo_npages) { #ifdef DEBUG struct vm_page *pp; - TAILQ_FOREACH(pp, &uvn->u_obj.memq, fq.queues.listq) { + TAILQ_FOREACH(pp, &uvn->u_obj.memq, listq) { if ((pp->pg_flags & PG_BUSY) == 0) panic("uvm_vnp_terminate: detected unbusy pg"); } -- cgit v1.2.3