diff options
author | David Gwynne <dlg@cvs.openbsd.org> | 2014-09-22 01:04:59 +0000 |
---|---|---|
committer | David Gwynne <dlg@cvs.openbsd.org> | 2014-09-22 01:04:59 +0000 |
commit | 87c5f2a33d29fc41a10484cba8216180e43dc854 (patch) | |
tree | f196ee463d5bde4b63479b51fea0dab31c1cd0df /sys/kern | |
parent | 82c46a37cdb6b750e8386edfdf8056f7a845e7dc (diff) |
rework the pool code to make the locking more obvious (to me at
least). after this i am confident that pools are mpsafe, ie, can
be called without the kernel biglock being held.
the page allocation and setup code has been split into four parts:
pool_p_alloc is called without any locks held to ask the pool_allocator
backend to get a page and page header and set up the item list.
pool_p_insert is called with the pool lock held to insert the newly
minted page on the pools internal free page list and update its
internal accounting.
once the pool has finished with a page it calls the following:
pool_p_remove is called with the pool lock help to take the now
unnecessary page off the free page list and uncount it.
pool_p_free is called without the pool lock and does a bunch of
checks to verify that the items arent corrupted and have all been
returned to the page before giving it back to the pool_allocator
to be freed.
instead of pool_do_get doing all the work for pool_get, it is now
only responsible for doing a single item allocation. if for any
reason it cant get an item, it just returns NULL. pool_get is now
responsible for checking if the allocation is allowed (according
to hi watermarks etc), and for potentially sleeping waiting for
resources if required.
sleeping for resources is now built on top of pool_requests, which
are modelled on how the scsi midlayer schedules access to scsibus
resources.
the pool code now calls pool_allocator backends inside its own
calls to KERNEL_LOCK and KERNEL_UNLOCK, so users of pools dont
have to hold biglock to call pool_get or pool_put.
tested by krw@ (who found a SMALL_KERNEL issue, thank you)
noone objected
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/subr_pool.c | 778 |
1 files changed, 334 insertions, 444 deletions
diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c index b2ac8709427..931ab65b89b 100644 --- a/sys/kern/subr_pool.c +++ b/sys/kern/subr_pool.c @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_pool.c,v 1.157 2014/09/17 03:16:00 dlg Exp $ */ +/* $OpenBSD: subr_pool.c,v 1.158 2014/09/22 01:04:58 dlg Exp $ */ /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ /*- @@ -103,17 +103,18 @@ int pool_debug = 0; #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) -int pool_catchup(struct pool *); -void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *); +struct pool_item_header * + pool_p_alloc(struct pool *, int); +void pool_p_insert(struct pool *, struct pool_item_header *); +void pool_p_remove(struct pool *, struct pool_item_header *); +void pool_p_free(struct pool *, struct pool_item_header *); + void pool_update_curpage(struct pool *); -void pool_swizzle_curpage(struct pool *); void *pool_do_get(struct pool *, int); -void pool_do_put(struct pool *, void *); -void pr_rmpage(struct pool *, struct pool_item_header *, - struct pool_pagelist *); int pool_chk_page(struct pool *, struct pool_item_header *, int); int pool_chk(struct pool *); -struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int); +void pool_get_done(void *, void *); +void pool_runqueue(struct pool *, int); void *pool_allocator_alloc(struct pool *, int, int *); void pool_allocator_free(struct pool *, void *); @@ -205,48 +206,6 @@ pr_find_pagehead(struct pool *pp, void *v) } /* - * Remove a page from the pool. - */ -void -pr_rmpage(struct pool *pp, struct pool_item_header *ph, - struct pool_pagelist *pq) -{ - - /* - * If the page was idle, decrement the idle page count. - */ - if (ph->ph_nmissing == 0) { -#ifdef DIAGNOSTIC - if (pp->pr_nidle == 0) - panic("%s: nidle inconsistent", __func__); - if (pp->pr_nitems < pp->pr_itemsperpage) - panic("%s: nitems inconsistent", __func__); -#endif - pp->pr_nidle--; - } - - pp->pr_nitems -= pp->pr_itemsperpage; - - /* - * Unlink a page from the pool and release it (or queue it for release). - */ - LIST_REMOVE(ph, ph_pagelist); - if (!POOL_INPGHDR(pp)) - RB_REMOVE(phtree, &pp->pr_phtree, ph); - pp->pr_npages--; - pp->pr_npagefree++; - pool_update_curpage(pp); - - if (pq) { - LIST_INSERT_HEAD(pq, ph, ph_pagelist); - } else { - pool_allocator_free(pp, ph->ph_page); - if (!POOL_INPGHDR(pp)) - pool_put(&phpool, ph); - } -} - -/* * Initialize the given pool resource structure. * * We export this routine to allow other kernel parts to declare @@ -351,6 +310,8 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, pp->pr_ipl = -1; mtx_init(&pp->pr_mtx, IPL_NONE); + mtx_init(&pp->pr_requests_mtx, IPL_NONE); + TAILQ_INIT(&pp->pr_requests); if (phpool.pr_size == 0) { pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, @@ -387,6 +348,7 @@ pool_setipl(struct pool *pp, int ipl) { pp->pr_ipl = ipl; mtx_init(&pp->pr_mtx, ipl); + mtx_init(&pp->pr_requests_mtx, ipl); } /* @@ -398,6 +360,11 @@ pool_destroy(struct pool *pp) struct pool_item_header *ph; struct pool *prev, *iter; +#ifdef DIAGNOSTIC + if (pp->pr_nout != 0) + panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); +#endif + /* Remove from global pool list */ rw_enter_write(&pool_lock); pool_count--; @@ -409,212 +376,209 @@ pool_destroy(struct pool *pp) if (iter == pp) { SIMPLEQ_REMOVE_AFTER(&pool_head, prev, pr_poollist); - goto removed; + break; } prev = iter; } -#ifdef DIAGNOSTIC - panic("%s: pool not on list", __func__); -#endif } -removed: rw_exit_write(&pool_lock); -#ifdef DIAGNOSTIC - if (pp->pr_nout != 0) - panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); -#endif /* Remove all pages */ - while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) - pr_rmpage(pp, ph, NULL); + while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) { + pool_p_remove(pp, ph); + pool_p_free(pp, ph); + } KASSERT(LIST_EMPTY(&pp->pr_fullpages)); KASSERT(LIST_EMPTY(&pp->pr_partpages)); - } -struct pool_item_header * -pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) +void +pool_request_init(struct pool_request *pr, + void (*handler)(void *, void *), void *cookie) { - struct pool_item_header *ph; + pr->pr_handler = handler; + pr->pr_cookie = cookie; + pr->pr_item = NULL; +} - if (POOL_INPGHDR(pp)) - ph = (struct pool_item_header *)(storage + pp->pr_phoffset); - else - ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) | - PR_NOWAIT); -#ifdef DIAGNOSTIC - if (pool_debug && ph != NULL) - ph->ph_magic = poison_value(ph); -#endif - return (ph); +void +pool_request(struct pool *pp, struct pool_request *pr) +{ + mtx_enter(&pp->pr_requests_mtx); + TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); + pool_runqueue(pp, PR_NOWAIT); + mtx_leave(&pp->pr_requests_mtx); } +struct pool_get_memory { + struct mutex mtx; + void * volatile v; +}; + /* - * Grab an item from the pool; must be called at appropriate spl level + * Grab an item from the pool. */ void * pool_get(struct pool *pp, int flags) { - void *v; + void *v = NULL; KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); - if ((flags & PR_WAITOK) != 0) { -#ifdef DIAGNOSTIC - assertwaitok(); - if (pool_debug == 2) - yield(); -#endif - if (!cold && pool_debug) { - KERNEL_UNLOCK(); - KERNEL_LOCK(); - } - } mtx_enter(&pp->pr_mtx); -#ifdef POOL_DEBUG - if (pp->pr_roflags & PR_DEBUGCHK) { - if (pool_chk(pp)) - panic("%s: before", __func__); - } -#endif - v = pool_do_get(pp, flags); -#ifdef POOL_DEBUG - if (pp->pr_roflags & PR_DEBUGCHK) { - if (pool_chk(pp)) - panic("%s: after", __func__); + if (pp->pr_nout >= pp->pr_hardlimit) { + if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) + goto fail; + } else if ((v = pool_do_get(pp, flags)) == NULL) { + if (ISSET(flags, PR_NOWAIT)) + goto fail; } -#endif - if (v != NULL) - pp->pr_nget++; mtx_leave(&pp->pr_mtx); - if (v == NULL) - return (v); - if (flags & PR_ZERO) + if (v == NULL) { + struct pool_get_memory mem = + { MUTEX_INITIALIZER(pp->pr_ipl), NULL }; + struct pool_request pr; + + pool_request_init(&pr, pool_get_done, &mem); + pool_request(pp, &pr); + + mtx_enter(&mem.mtx); + while (mem.v == NULL) + msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); + mtx_leave(&mem.mtx); + + v = mem.v; + } + + if (ISSET(flags, PR_ZERO)) memset(v, 0, pp->pr_size); return (v); + +fail: + pp->pr_nfail++; + mtx_leave(&pp->pr_mtx); + return (NULL); } -void * -pool_do_get(struct pool *pp, int flags) +void +pool_get_done(void *xmem, void *v) { - struct pool_item *pi; - struct pool_item_header *ph; - void *v; - int slowdown = 0; + struct pool_get_memory *mem = xmem; -startover: - /* - * Check to see if we've reached the hard limit. If we have, - * and we can wait, then wait until an item has been returned to - * the pool. - */ -#ifdef DIAGNOSTIC - if (pp->pr_nout > pp->pr_hardlimit) - panic("%s: %s: crossed hard limit", __func__, pp->pr_wchan); -#endif - if (pp->pr_nout == pp->pr_hardlimit) { - if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { - /* - * XXX: A warning isn't logged in this case. Should - * it be? - */ - pp->pr_flags |= PR_WANTED; - pool_sleep(pp); - goto startover; - } + mtx_enter(&mem->mtx); + mem->v = v; + mtx_leave(&mem->mtx); - /* - * Log a message that the hard limit has been hit. - */ - if (pp->pr_hardlimit_warning != NULL && - ratecheck(&pp->pr_hardlimit_warning_last, - &pp->pr_hardlimit_ratecap)) - log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); + wakeup_one(mem); +} - pp->pr_nfail++; - return (NULL); - } +void +pool_runqueue(struct pool *pp, int flags) +{ + struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); + struct pool_request *pr; - pool_swizzle_curpage(pp); - /* - * The convention we use is that if `curpage' is not NULL, then - * it points at a non-empty bucket. - */ - if ((ph = pp->pr_curpage) == NULL) { -#ifdef DIAGNOSTIC - if (pp->pr_nitems != 0) { - printf("pool_do_get: %s: curpage NULL, nitems %u\n", - pp->pr_wchan, pp->pr_nitems); - panic("%s: nitems inconsistent", __func__); + MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); + MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); + + if (pp->pr_requesting++) + return; + + do { + pp->pr_requesting = 1; + + /* no TAILQ_JOIN? :( */ + while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { + TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); + TAILQ_INSERT_TAIL(&prl, pr, pr_entry); } -#endif + if (TAILQ_EMPTY(&prl)) + continue; - /* - * Call the back-end page allocator for more memory. - */ - v = pool_allocator_alloc(pp, flags, &slowdown); - if (v != NULL) - ph = pool_alloc_item_header(pp, v, flags); + mtx_leave(&pp->pr_requests_mtx); - if (v == NULL || ph == NULL) { - if (v != NULL) - pool_allocator_free(pp, v); + mtx_enter(&pp->pr_mtx); + pr = TAILQ_FIRST(&prl); + while (pr != NULL) { + if (pp->pr_nout >= pp->pr_hardlimit) + break; - if ((flags & PR_WAITOK) == 0) { - pp->pr_nfail++; - return (NULL); - } + pr->pr_item = pool_do_get(pp, flags); + if (pr->pr_item == NULL) + break; + + pr = TAILQ_NEXT(pr, pr_entry); + } + mtx_leave(&pp->pr_mtx); - /* - * Wait for items to be returned to this pool. - * - * XXX: maybe we should wake up once a second and - * try again? - */ - pp->pr_flags |= PR_WANTED; - pool_sleep(pp); - goto startover; + while ((pr = TAILQ_FIRST(&prl)) != NULL && + pr->pr_item != NULL) { + TAILQ_REMOVE(&prl, pr, pr_entry); + (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); } - /* We have more memory; add it to the pool */ - pool_prime_page(pp, v, ph); - pp->pr_npagealloc++; + mtx_enter(&pp->pr_requests_mtx); + } while (--pp->pr_requesting); + + /* no TAILQ_JOIN :( */ + while ((pr = TAILQ_FIRST(&prl)) != NULL) { + TAILQ_REMOVE(&prl, pr, pr_entry); + TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); + } +} + +void * +pool_do_get(struct pool *pp, int flags) +{ + struct pool_item *pi; + struct pool_item_header *ph; - if (slowdown && (flags & PR_WAITOK)) { - mtx_leave(&pp->pr_mtx); - yield(); - mtx_enter(&pp->pr_mtx); + MUTEX_ASSERT_LOCKED(&pp->pr_mtx); + + /* + * Account for this item now to avoid races if we need to give up + * pr_mtx to allocate a page. + */ + pp->pr_nout++; + + if (pp->pr_curpage == NULL) { + mtx_leave(&pp->pr_mtx); + ph = pool_p_alloc(pp, flags); + mtx_enter(&pp->pr_mtx); + + if (ph == NULL) { + pp->pr_nout--; + return (NULL); } - /* Start the allocation process over. */ - goto startover; + pool_p_insert(pp, ph); } - if ((v = pi = XSIMPLEQ_FIRST(&ph->ph_itemlist)) == NULL) + + ph = pp->pr_curpage; + pi = XSIMPLEQ_FIRST(&ph->ph_itemlist); + if (pi == NULL) panic("%s: %s: page empty", __func__, pp->pr_wchan); + XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); -#ifdef DIAGNOSTIC - if (pp->pr_nitems == 0) { - printf("%s: %s: items on itemlist, nitems %u\n", __func__, - pp->pr_wchan, pp->pr_nitems); - panic("%s: nitems inconsistent", __func__); +#ifndef SMALL_KERNEL + if (pi->pi_magic != poison_value(pi)) { + panic("%s: %s free list modified: " + "page %p; item addr %p; offset 0x%x=0x%x", __func__, + pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); } #endif #ifdef DIAGNOSTIC - if (pi->pi_magic != poison_value(pi)) - panic("%s(%s): free list modified: " - "page %p; item addr %p; offset 0x%x=0x%x", __func__, - pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); if (pool_debug && ph->ph_magic) { size_t pidx; uint32_t pval; if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), &pidx, &pval)) { int *ip = (int *)(pi + 1); - panic("%s(%s): free list modified: " + panic("%s: %s free list modified: " "page %p; item addr %p; offset 0x%zx=0x%x", __func__, pp->pr_wchan, ph->ph_page, pi, pidx * sizeof(int), ip[pidx]); @@ -622,34 +586,18 @@ startover: } #endif /* DIAGNOSTIC */ - /* - * Remove from item list. - */ - XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); - pp->pr_nitems--; - pp->pr_nout++; - if (ph->ph_nmissing == 0) { -#ifdef DIAGNOSTIC - if (pp->pr_nidle == 0) - panic("%s: nidle inconsistent", __func__); -#endif - pp->pr_nidle--; - + if (ph->ph_nmissing++ == 0) { /* * This page was previously empty. Move it to the list of * partially-full pages. This page is already curpage. */ LIST_REMOVE(ph, ph_pagelist); LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); + + pp->pr_nidle--; } - ph->ph_nmissing++; - if (XSIMPLEQ_EMPTY(&ph->ph_itemlist)) { -#ifdef DIAGNOSTIC - if (ph->ph_nmissing != pp->pr_itemsperpage) { - panic("%s: %s: nmissing inconsistent", __func__, - pp->pr_wchan); - } -#endif + + if (ph->ph_nmissing == pp->pr_itemsperpage) { /* * This page is now full. Move it to the full list * and select a new current page. @@ -659,69 +607,33 @@ startover: pool_update_curpage(pp); } - /* - * If we have a low water mark and we are now below that low - * water mark, add more items to the pool. - */ - if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { - /* - * XXX: Should we log a warning? Should we set up a timeout - * to try again in a second or so? The latter could break - * a caller's assumptions about interrupt protection, etc. - */ - } - return (v); -} + pp->pr_nget++; -/* - * Return resource to the pool; must be called at appropriate spl level - */ -void -pool_put(struct pool *pp, void *v) -{ - mtx_enter(&pp->pr_mtx); -#ifdef POOL_DEBUG - if (pp->pr_roflags & PR_DEBUGCHK) { - if (pool_chk(pp)) - panic("%s: before", __func__); - } -#endif - pool_do_put(pp, v); -#ifdef POOL_DEBUG - if (pp->pr_roflags & PR_DEBUGCHK) { - if (pool_chk(pp)) - panic("%s: after", __func__); - } -#endif - pp->pr_nput++; - mtx_leave(&pp->pr_mtx); + return (pi); } /* - * Internal version of pool_put(). + * Return resource to the pool. */ void -pool_do_put(struct pool *pp, void *v) +pool_put(struct pool *pp, void *v) { struct pool_item *pi = v; - struct pool_item_header *ph; + struct pool_item_header *ph, *freeph = NULL; +#ifdef DIAGNOSTIC if (v == NULL) panic("%s: NULL item", __func__); - -#ifdef DIAGNOSTIC - if (pp->pr_ipl != -1) - splassert(pp->pr_ipl); - - if (pp->pr_nout == 0) - panic("%s: %s: putting with none out", __func__, pp->pr_wchan); #endif + mtx_enter(&pp->pr_mtx); + ph = pr_find_pagehead(pp, v); - /* - * Return to item list. - */ +#ifndef SMALL_KERNEL + pi->pi_magic = poison_value(pi); +#endif + #ifdef DIAGNOSTIC if (pool_debug) { struct pool_item *qi; @@ -731,56 +643,49 @@ pool_do_put(struct pool *pp, void *v) pp->pr_wchan, pi); } } - } - pi->pi_magic = poison_value(pi); - if (ph->ph_magic) { - poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); + + if (ph->ph_magic) + poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); } #endif /* DIAGNOSTIC */ XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); - ph->ph_nmissing--; - pp->pr_nitems++; - pp->pr_nout--; - - /* Cancel "pool empty" condition if it exists */ - if (pp->pr_curpage == NULL) - pp->pr_curpage = ph; - if (pp->pr_flags & PR_WANTED) { - pp->pr_flags &= ~PR_WANTED; - wakeup(pp); + if (ph->ph_nmissing-- == pp->pr_itemsperpage) { + /* + * The page was previously completely full, move it to the + * partially-full list. + */ + LIST_REMOVE(ph, ph_pagelist); + LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); } - /* - * If this page is now empty, do one of two things: - * - * (1) If we have more pages than the page high water mark, - * free the page back to the system. - * - * (2) Otherwise, move the page to the empty page list. - * - * Either way, select a new current page (so we use a partially-full - * page if one is available). - */ if (ph->ph_nmissing == 0) { + /* + * The page is now empty, so move it to the empty page list. + */ pp->pr_nidle++; - if (pp->pr_nidle > pp->pr_maxpages) { - pr_rmpage(pp, ph, NULL); - } else { - LIST_REMOVE(ph, ph_pagelist); - LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); - pool_update_curpage(pp); - } - } - /* - * If the page was previously completely full, move it to the - * partially-full list. - */ - else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { + LIST_REMOVE(ph, ph_pagelist); - LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); + LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); + pool_update_curpage(pp); } + + pp->pr_nout--; + pp->pr_nput++; + + /* is it time to free a page? */ + if (pp->pr_nidle > pp->pr_maxpages && + (freeph = LIST_FIRST(&pp->pr_emptypages)) != NULL) + pool_p_remove(pp, freeph); + mtx_leave(&pp->pr_mtx); + + if (freeph != NULL) + pool_p_free(pp, freeph); + + mtx_enter(&pp->pr_requests_mtx); + pool_runqueue(pp, PR_NOWAIT); + mtx_leave(&pp->pr_requests_mtx); } /* @@ -789,139 +694,154 @@ pool_do_put(struct pool *pp, void *v) int pool_prime(struct pool *pp, int n) { + struct pool_pagelist pl = LIST_HEAD_INITIALIZER(pl); struct pool_item_header *ph; - caddr_t cp; int newpages; - int slowdown; - mtx_enter(&pp->pr_mtx); newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; while (newpages-- > 0) { - cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); - if (cp != NULL) - ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); - if (cp == NULL || ph == NULL) { - if (cp != NULL) - pool_allocator_free(pp, cp); + ph = pool_p_alloc(pp, PR_NOWAIT); + if (ph == NULL) break; - } - pool_prime_page(pp, cp, ph); - pp->pr_npagealloc++; - pp->pr_minpages++; + LIST_INSERT_HEAD(&pl, ph, ph_pagelist); } - if (pp->pr_minpages >= pp->pr_maxpages) - pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ - + mtx_enter(&pp->pr_mtx); + while ((ph = LIST_FIRST(&pl)) != NULL) { + LIST_REMOVE(ph, ph_pagelist); + pool_p_insert(pp, ph); + } mtx_leave(&pp->pr_mtx); + return (0); } -/* - * Add a page worth of items to the pool. - * - * Note, we must be called with the pool descriptor LOCKED. - */ -void -pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) +struct pool_item_header * +pool_p_alloc(struct pool *pp, int flags) { + struct pool_item_header *ph; struct pool_item *pi; - caddr_t cp = storage; - unsigned int align = pp->pr_align; - int n; + caddr_t addr; + int n, slowdown = 0; - /* - * Insert page header. - */ - LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); - XSIMPLEQ_INIT(&ph->ph_itemlist); - ph->ph_page = storage; - ph->ph_nmissing = 0; - if (!POOL_INPGHDR(pp)) - RB_INSERT(phtree, &pp->pr_phtree, ph); + MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); + KASSERT(pp->pr_size >= sizeof(*pi)); - pp->pr_nidle++; + addr = pool_allocator_alloc(pp, flags, &slowdown); + if (addr == NULL) + return (NULL); - /* - * Color this page. - */ - cp = (caddr_t)(cp + pp->pr_curcolor); - if ((pp->pr_curcolor += align) > pp->pr_maxcolor) - pp->pr_curcolor = 0; + if (slowdown && ISSET(flags, PR_WAITOK)) + yield(); - ph->ph_colored = cp; + if (POOL_INPGHDR(pp)) + ph = (struct pool_item_header *)(addr + pp->pr_phoffset); + else { + ph = pool_get(&phpool, flags); + if (ph == NULL) { + pool_allocator_free(pp, addr); + return (NULL); + } + } - /* - * Insert remaining chunks on the bucket list. - */ - n = pp->pr_itemsperpage; - pp->pr_nitems += n; + XSIMPLEQ_INIT(&ph->ph_itemlist); + ph->ph_page = addr; + ph->ph_nmissing = 0; + if (pool_debug) { + do { + arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); + } while (ph->ph_magic == 0); + } else + ph->ph_magic = 0; + n = pp->pr_itemsperpage; while (n--) { - pi = (struct pool_item *)cp; - - /* Insert on page list */ + pi = (struct pool_item *)addr; +#ifndef SMALL_KERNEL + pi->pi_magic = poison_value(pi); +#endif XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); #ifdef DIAGNOSTIC - pi->pi_magic = poison_value(pi); - if (ph->ph_magic) { + if (pool_debug && ph->ph_magic) poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); - } #endif /* DIAGNOSTIC */ - cp = (caddr_t)(cp + pp->pr_size); + + addr += pp->pr_size; } - /* - * If the pool was depleted, point at the new page. - */ + return (ph); +} + +void +pool_p_free(struct pool *pp, struct pool_item_header *ph) +{ +#ifndef SMALL_KERNEL + struct pool_item *pi; +#endif + + MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); + KASSERT(ph->ph_nmissing == 0); + +#ifndef SMALL_KERNEL + XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { + if (pi->pi_magic != poison_value(pi)) { + panic("%s: %s free list modified: " + "page %p; item addr %p; offset 0x%x=0x%x", __func__, + pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); + } + } +#endif + + pool_allocator_free(pp, ph->ph_page); + + if (!POOL_INPGHDR(pp)) + pool_put(&phpool, ph); +} + +void +pool_p_insert(struct pool *pp, struct pool_item_header *ph) +{ + MUTEX_ASSERT_LOCKED(&pp->pr_mtx); + + /* If the pool was depleted, point at the new page */ if (pp->pr_curpage == NULL) pp->pr_curpage = ph; + LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); + if (!POOL_INPGHDR(pp)) + RB_INSERT(phtree, &pp->pr_phtree, ph); + + pp->pr_nitems += pp->pr_itemsperpage; + pp->pr_nidle++; + + pp->pr_npagealloc++; if (++pp->pr_npages > pp->pr_hiwat) pp->pr_hiwat = pp->pr_npages; } -/* - * Used by pool_get() when nitems drops below the low water mark. This - * is used to catch up pr_nitems with the low water mark. - * - * Note we never wait for memory here, we let the caller decide what to do. - */ -int -pool_catchup(struct pool *pp) +void +pool_p_remove(struct pool *pp, struct pool_item_header *ph) { - struct pool_item_header *ph; - caddr_t cp; - int error = 0; - int slowdown; + MUTEX_ASSERT_LOCKED(&pp->pr_mtx); - while (POOL_NEEDS_CATCHUP(pp)) { - /* - * Call the page back-end allocator for more memory. - */ - cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); - if (cp != NULL) - ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); - if (cp == NULL || ph == NULL) { - if (cp != NULL) - pool_allocator_free(pp, cp); - error = ENOMEM; - break; - } - pool_prime_page(pp, cp, ph); - pp->pr_npagealloc++; - } + pp->pr_npagefree++; + pp->pr_npages--; + pp->pr_nidle--; + pp->pr_nitems -= pp->pr_itemsperpage; - return (error); + if (!POOL_INPGHDR(pp)) + RB_REMOVE(phtree, &pp->pr_phtree, ph); + LIST_REMOVE(ph, ph_pagelist); + + pool_update_curpage(pp); } void pool_update_curpage(struct pool *pp) { - pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); if (pp->pr_curpage == NULL) { pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); @@ -929,51 +849,26 @@ pool_update_curpage(struct pool *pp) } void -pool_swizzle_curpage(struct pool *pp) -{ - struct pool_item_header *ph, *next; - - if ((ph = pp->pr_curpage) == NULL) - return; - if (arc4random_uniform(16) != 0) - return; - next = LIST_FIRST(&pp->pr_partpages); - if (next == ph) - next = LIST_NEXT(next, ph_pagelist); - if (next == NULL) { - next = LIST_FIRST(&pp->pr_emptypages); - if (next == ph) - next = LIST_NEXT(next, ph_pagelist); - } - if (next != NULL) - pp->pr_curpage = next; -} - -void pool_setlowat(struct pool *pp, int n) { + int prime; + mtx_enter(&pp->pr_mtx); pp->pr_minitems = n; pp->pr_minpages = (n == 0) ? 0 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; - mtx_enter(&pp->pr_mtx); - /* Make sure we're caught up with the newly-set low water mark. */ - if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { - /* - * XXX: Should we log a warning? Should we set up a timeout - * to try again in a second or so? The latter could break - * a caller's assumptions about interrupt protection, etc. - */ - } + prime = pp->pr_nitems - n; mtx_leave(&pp->pr_mtx); + + if (prime > 0) + pool_prime(pp, prime); } void pool_sethiwat(struct pool *pp, int n) { - pp->pr_maxpages = (n == 0) ? 0 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; @@ -1014,9 +909,7 @@ int pool_reclaim(struct pool *pp) { struct pool_item_header *ph, *phnext; - struct pool_pagelist pq; - - LIST_INIT(&pq); + struct pool_pagelist pl = LIST_HEAD_INITIALIZER(pl); mtx_enter(&pp->pr_mtx); for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { @@ -1026,8 +919,6 @@ pool_reclaim(struct pool *pp) if (pp->pr_npages <= pp->pr_minpages) break; - KASSERT(ph->ph_nmissing == 0); - /* * If freeing this page would put us below * the low water mark, stop now. @@ -1036,18 +927,17 @@ pool_reclaim(struct pool *pp) pp->pr_minitems) break; - pr_rmpage(pp, ph, &pq); + pool_p_remove(pp, ph); + LIST_INSERT_HEAD(&pl, ph, ph_pagelist); } mtx_leave(&pp->pr_mtx); - if (LIST_EMPTY(&pq)) + if (LIST_EMPTY(&pl)) return (0); - while ((ph = LIST_FIRST(&pq)) != NULL) { + + while ((ph = LIST_FIRST(&pl)) != NULL) { LIST_REMOVE(ph, ph_pagelist); - pool_allocator_free(pp, ph->ph_page); - if (POOL_INPGHDR(pp)) - continue; - pool_put(&phpool, ph); + pool_p_free(pp, ph); } return (1); @@ -1249,16 +1139,16 @@ pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; pi != NULL; pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { - -#ifdef DIAGNOSTIC if (pi->pi_magic != poison_value(pi)) { printf("%s: ", label); - printf("pool(%s): free list modified: " + printf("pool(%p:%s): free list modified: " "page %p; item ordinal %d; addr %p " "(p %p); offset 0x%x=0x%x\n", - pp->pr_wchan, ph->ph_page, n, pi, page, + pp, pp->pr_wchan, ph->ph_page, n, pi, page, 0, pi->pi_magic); } + +#ifdef DIAGNOSTIC if (pool_debug && ph->ph_magic) { size_t pidx; uint32_t pval; @@ -1273,6 +1163,7 @@ pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) } } #endif /* DIAGNOSTIC */ + page = (caddr_t)((u_long)pi & pp->pr_pgmask); if (page == ph->ph_page) continue; @@ -1442,14 +1333,11 @@ done: void * pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) { - int waitok = ISSET(flags, PR_WAITOK); void *v; - if (waitok) - mtx_leave(&pp->pr_mtx); - v = pp->pr_alloc->pa_alloc(pp, flags, slowdown); - if (waitok) - mtx_enter(&pp->pr_mtx); + KERNEL_LOCK(); + v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); + KERNEL_UNLOCK(); #ifdef DIAGNOSTIC if (v != NULL && POOL_INPGHDR(pp)) { @@ -1469,7 +1357,9 @@ pool_allocator_free(struct pool *pp, void *v) { struct pool_allocator *pa = pp->pr_alloc; + KERNEL_LOCK(); (*pa->pa_free)(pp, v); + KERNEL_UNLOCK(); } void * |