src - OpenBSD base system

diff options


context:
space:
mode:

author	David Gwynne <dlg@cvs.openbsd.org>	2014-09-04 00:36:01 +0000
committer	David Gwynne <dlg@cvs.openbsd.org>	2014-09-04 00:36:01 +0000
commit	d90385fc711f16e5cf06fa25f96402a95f795664 (patch)
tree	562b88d81f9bd215da17b2daf9b6775d910a0a88
parent	c03210bf59264ddf4da3873d44124eabdb50504b (diff)

rework how pools with large pages (>PAGE_SIZE) are implemented.

this moves the size of the pool page (not arch page) out of the pool allocator into struct pool. this lets us create only two pools for the automatically determined large page allocations instead of 256 of them. while here support using slack space in large pages for the pool_item_header by requiring km_alloc provide pool page aligned memory. lastly, instead of doing incorrect math to figure how how many arch pages to use for large pool pages, just use powers of two. ok mikeb@

Diffstat

-rw-r--r--

sys/kern/subr_pool.c

224

-rw-r--r--

sys/sys/pool.h

2 files changed, 105 insertions, 125 deletions

diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c
index 1f9e7366b43..2ab398290ab 100644
--- a/sys/kern/subr_pool.c
+++ b/sys/kern/subr_pool.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: subr_pool.c,v 1.148 2014/08/27 00:22:26 dlg Exp $ */

+/* $OpenBSD: subr_pool.c,v 1.149 2014/09/04 00:36:00 dlg Exp $ */

/* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */

/*-

@@ -84,7 +84,6 @@ struct pool_item_header {

int ph_nmissing; /* # of chunks in use */

caddr_t ph_page; /* this page's address */

caddr_t ph_colored; /* page's colored address */

- int ph_pagesize;

int ph_magic;

};

@@ -119,21 +118,35 @@ void *pool_allocator_alloc(struct pool *, int, int *);

void pool_allocator_free(struct pool *, void *);

- * XXX - quick hack. For pools with large items we want to use a special

- * allocator. For now, instead of having the allocator figure out

- * the allocation size from the pool (which can be done trivially

- * with round_page(pr_itemsperpage * pr_size)) which would require

- * lots of changes everywhere, we just create allocators for each

- * size. We limit those to 128 pages.

+ * The default pool allocator.

-#define POOL_LARGE_MAXPAGES 128

-struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES];

-struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES];

+void *pool_page_alloc(struct pool *, int, int *);

+void pool_page_free(struct pool *, void *);

+/*

+ * safe for interrupts, name preserved for compat this is the default

+ * allocator

+ */

+struct pool_allocator pool_allocator_nointr = {

+ pool_page_alloc,

+ pool_page_free

+};

void *pool_large_alloc(struct pool *, int, int *);

void pool_large_free(struct pool *, void *);

+struct pool_allocator pool_allocator_large = {

+ pool_large_alloc,

+ pool_large_free

+};

void *pool_large_alloc_ni(struct pool *, int, int *);

void pool_large_free_ni(struct pool *, void *);

+struct pool_allocator pool_allocator_large_ni = {

+ pool_large_alloc_ni,

+ pool_large_free_ni

+};

#ifdef DDB

void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)

@@ -173,7 +186,7 @@ pr_find_pagehead(struct pool *pp, void *v)

if ((pp->pr_roflags & PR_PHINPAGE) != 0) {

caddr_t page;

- page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask);

+ page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);

return ((struct pool_item_header *)(page + pp->pr_phoffset));

}

@@ -186,7 +199,7 @@ pr_find_pagehead(struct pool *pp, void *v)

}

KASSERT(ph->ph_page <= (caddr_t)v);

- if (ph->ph_page + ph->ph_pagesize <= (caddr_t)v) {

+ if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) {

panic("pr_find_pagehead: %s: incorrect page",

pp->pr_wchan);

}

@@ -246,7 +259,8 @@ void

pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,

const char *wchan, struct pool_allocator *palloc)

{

- int off, slack;

+ int off = 0, slack;

+ unsigned int pgsize = PAGE_SIZE, items;

#ifdef DIAGNOSTIC

struct pool *iter;

KASSERT(ioff == 0);

@@ -256,48 +270,6 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,

if ((flags & PR_DEBUG) && align != 0)

flags &= ~PR_DEBUG;

#endif

- /*

- * Check arguments and construct default values.

- */

- if (palloc == NULL) {

- if (size > PAGE_SIZE) {

- int psize;

- /*

- * XXX - should take align into account as well.

- */

- if (size == round_page(size))

- psize = size / PAGE_SIZE;

- else

- psize = PAGE_SIZE / roundup(size % PAGE_SIZE,

- 1024);

- if (psize > POOL_LARGE_MAXPAGES)

- psize = POOL_LARGE_MAXPAGES;

- if (flags & PR_WAITOK)

- palloc = &pool_allocator_large_ni[psize-1];

- else

- palloc = &pool_allocator_large[psize-1];

- if (palloc->pa_pagesz == 0) {

- palloc->pa_pagesz = psize * PAGE_SIZE;

- if (flags & PR_WAITOK) {

- palloc->pa_alloc = pool_large_alloc_ni;

- palloc->pa_free = pool_large_free_ni;

- } else {

- palloc->pa_alloc = pool_large_alloc;

- palloc->pa_free = pool_large_free;

- }

- } else {

- palloc = &pool_allocator_nointr;

- }

- if (palloc->pa_pagesz == 0) {

- palloc->pa_pagesz = PAGE_SIZE;

- }

- if (palloc->pa_pagemask == 0) {

- palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);

- palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;

- }

if (align == 0)

align = ALIGN(1);

@@ -306,15 +278,43 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,

size = sizeof(struct pool_item);

size = roundup(size, align);

-#ifdef DIAGNOSTIC

- if (size > palloc->pa_pagesz)

- panic("pool_init: pool item size (%lu) too large",

- (u_long)size);

-#endif

+ if (palloc == NULL) {

+ while (size > pgsize)

+ pgsize <<= 1;

+ if (pgsize > PAGE_SIZE) {

+ palloc = ISSET(flags, PR_WAITOK) ?

+ &pool_allocator_large_ni : &pool_allocator_large;

+ } else

+ palloc = &pool_allocator_nointr;

+ } else

+ pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE;

+ items = pgsize / size;

+ /*

+ * Decide whether to put the page header off page to avoid

+ * wasting too large a part of the page. Off-page page headers

+ * go into an RB tree, so we can match a returned item with

+ * its header based on the page address.

+ */

+ if (pgsize - (size * items) > sizeof(struct pool_item_header)) {

+ flags |= PR_PHINPAGE;

+ off = pgsize - sizeof(struct pool_item_header);

+ } else if (sizeof(struct pool_item_header) * 2 >= size) {

+ flags |= PR_PHINPAGE;

+ off = pgsize - sizeof(struct pool_item_header);

+ items = off / size;

+ } else

+ off = pgsize;

+ KASSERT(items > 0);

* Initialize the pool structure.

+ memset(pp, 0, sizeof(*pp));

LIST_INIT(&pp->pr_emptypages);

LIST_INIT(&pp->pr_fullpages);

LIST_INIT(&pp->pr_partpages);

@@ -326,6 +326,10 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,

pp->pr_roflags = flags;

pp->pr_flags = 0;

pp->pr_size = size;

+ pp->pr_pgsize = pgsize;

+ pp->pr_pgmask = ~0UL ^ (pgsize - 1);

+ pp->pr_phoffset = off;

+ pp->pr_itemsperpage = items;

pp->pr_align = align;

pp->pr_wchan = wchan;

pp->pr_alloc = palloc;

@@ -337,28 +341,7 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,

pp->pr_hardlimit_ratecap.tv_usec = 0;

pp->pr_hardlimit_warning_last.tv_sec = 0;

pp->pr_hardlimit_warning_last.tv_usec = 0;

- /*

- * Decide whether to put the page header off page to avoid

- * wasting too large a part of the page. Off-page page headers

- * go into an RB tree, so we can match a returned item with

- * its header based on the page address.

- * We use 1/16 of the page size as the threshold (XXX: tune)

- */

- if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) {

- /* Use the end of the page for the page header */

- pp->pr_roflags |= PR_PHINPAGE;

- pp->pr_phoffset = off = palloc->pa_pagesz -

- ALIGN(sizeof(struct pool_item_header));

- } else {

- /* The page header will be taken from our page header pool */

- pp->pr_phoffset = 0;

- off = palloc->pa_pagesz;

- RB_INIT(&pp->pr_phtree);

- }

- pp->pr_itemsperpage = off / pp->pr_size;

- KASSERT(pp->pr_itemsperpage != 0);

+ RB_INIT(&pp->pr_phtree);

* Use the slack between the chunks and the page header

@@ -882,7 +865,6 @@ pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)

LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);

XSIMPLEQ_INIT(&ph->ph_itemlist);

ph->ph_page = storage;

- ph->ph_pagesize = pp->pr_alloc->pa_pagesz;

ph->ph_nmissing = 0;

if ((pp->pr_roflags & PR_PHINPAGE) == 0)

RB_INSERT(phtree, &pp->pr_phtree, ph);

@@ -1282,7 +1264,7 @@ pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)

int n;

const char *label = pp->pr_wchan;

- page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask);

+ page = (caddr_t)((u_long)ph & pp->pr_pgmask);

if (page != ph->ph_page &&

(pp->pr_roflags & PR_PHINPAGE) != 0) {

printf("%s: ", label);

@@ -1319,8 +1301,7 @@ pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)

}

#endif /* DIAGNOSTIC */

- page =

- (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask);

+ page = (caddr_t)((u_long)pi & pp->pr_pgmask);

if (page == ph->ph_page)

continue;

@@ -1454,7 +1435,7 @@ sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)

mtx_enter(&pp->pr_mtx);

pi.pr_size = pp->pr_size;

- pi.pr_pgsize = pp->pr_alloc->pa_pagesz;

+ pi.pr_pgsize = pp->pr_pgsize;

pi.pr_itemsperpage = pp->pr_itemsperpage;

pi.pr_npages = pp->pr_npages;

pi.pr_minpages = pp->pr_minpages;

@@ -1483,33 +1464,6 @@ done:

* Pool backend allocators.

- *

- * Each pool has a backend allocator that handles allocation, deallocation

- */

-void *pool_page_alloc(struct pool *, int, int *);

-void pool_page_free(struct pool *, void *);

-/*

- * safe for interrupts, name preserved for compat this is the default

- * allocator

- */

-struct pool_allocator pool_allocator_nointr = {

- pool_page_alloc, pool_page_free, 0,

-};

-/*

- * XXX - we have at least three different resources for the same allocation

- * and each resource can be depleted. First we have the ready elements in

- * the pool. Then we have the resource (typically a vm_map) for this

- * allocator, then we have physical memory. Waiting for any of these can

- * be unnecessary when any other is freed, but the kernel doesn't support

- * sleeping on multiple addresses, so we have to fake. The caller sleeps on

- * the pool (so that we can be awakened when an item is returned to the pool),

- * but we set PA_WANT on the allocator. When a page is returned to

- * the allocator and PA_WANT is set pool_allocator_free will wakeup all

- * sleeping pools belonging to this allocator. (XXX - thundering herd).

- * We also wake up the allocator in case someone without a pool (malloc)

- * is sleeping waiting for this allocator.

void *

@@ -1524,6 +1478,16 @@ pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)

if (waitok)

mtx_enter(&pp->pr_mtx);

+#ifdef DIAGNOSTIC

+ if (v != NULL && ISSET(pp->pr_roflags, PR_PHINPAGE)) {

+ vaddr_t addr = (vaddr_t)v;

+ if ((addr & pp->pr_pgmask) != addr) {

+ panic("%s: %s page address %p isnt aligned to %u",

+ __func__, pp->pr_wchan, v, pp->pr_pgsize);

+ }

+#endif

return (v);

}

@@ -1543,28 +1507,31 @@ pool_page_alloc(struct pool *pp, int flags, int *slowdown)

kd.kd_waitok = (flags & PR_WAITOK);

kd.kd_slowdown = slowdown;

- return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd));

+ return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));

}

void

pool_page_free(struct pool *pp, void *v)

{

- km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange);

+ km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);

}

void *

pool_large_alloc(struct pool *pp, int flags, int *slowdown)

{

+ struct kmem_va_mode kv = kv_intrsafe;

struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;

void *v;

int s;

+ if (ISSET(pp->pr_roflags, PR_PHINPAGE))

+ kv.kv_align = pp->pr_pgsize;

kd.kd_waitok = (flags & PR_WAITOK);

kd.kd_slowdown = slowdown;

s = splvm();

- v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange,

- &kd);

+ v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);

splx(s);

return (v);

@@ -1573,26 +1540,39 @@ pool_large_alloc(struct pool *pp, int flags, int *slowdown)

void

pool_large_free(struct pool *pp, void *v)

{

+ struct kmem_va_mode kv = kv_intrsafe;

int s;

+ if (ISSET(pp->pr_roflags, PR_PHINPAGE))

+ kv.kv_align = pp->pr_pgsize;

s = splvm();

- km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange);

+ km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);

splx(s);

}

void *

pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown)

{

+ struct kmem_va_mode kv = kv_any;

struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;

+ if (ISSET(pp->pr_roflags, PR_PHINPAGE))

+ kv.kv_align = pp->pr_pgsize;

kd.kd_waitok = (flags & PR_WAITOK);

kd.kd_slowdown = slowdown;

- return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd));

+ return (km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd));

}

void

pool_large_free_ni(struct pool *pp, void *v)

{

- km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange);

+ struct kmem_va_mode kv = kv_any;

+ if (ISSET(pp->pr_roflags, PR_PHINPAGE))

+ kv.kv_align = pp->pr_pgsize;

+ km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);

}

diff --git a/sys/sys/pool.h b/sys/sys/pool.h
index 81430c01547..234d35c8a77 100644
--- a/sys/sys/pool.h
+++ b/sys/sys/pool.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: pool.h,v 1.48 2014/08/27 00:22:26 dlg Exp $ */

+/* $OpenBSD: pool.h,v 1.49 2014/09/04 00:36:00 dlg Exp $ */

/* $NetBSD: pool.h,v 1.27 2001/06/06 22:00:17 rafal Exp $ */

/*-

@@ -78,8 +78,6 @@ struct pool_allocator {

void *(*pa_alloc)(struct pool *, int, int *);

void (*pa_free)(struct pool *, void *);

int pa_pagesz;

- int pa_pagemask;

- int pa_pageshift;

};

LIST_HEAD(pool_pagelist, pool_item_header);

@@ -109,6 +107,8 @@ struct pool {

unsigned int pr_hardlimit; /* hard limit to number of allocated

items */

unsigned int pr_serial; /* unique serial number of the pool */

+ unsigned int pr_pgsize; /* Size of a "page" */

+ vaddr_t pr_pgmask; /* Mask with an item to get a page */

struct pool_allocator *

pr_alloc; /* backend allocator */

const char * pr_wchan; /* tsleep(9) identifier */