summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Pieuchot <mpi@cvs.openbsd.org>2024-04-17 13:12:59 +0000
committerMartin Pieuchot <mpi@cvs.openbsd.org>2024-04-17 13:12:59 +0000
commita9a952c6b5ebb70f96297507aadd9b9e42dfc6a8 (patch)
treee026aa6507fb0e61f8fcf75cad35115a42e0baa7
parenta5e57701c2d21a274c10aae196a7fc44927e079e (diff)
Add per-CPU caches to the pmemrange allocator.
The caches are used primarily to reduce contention on uvm_lock_fpageq() during concurrent page faults. For the moment only uvm_pagealloc() tries to get a page from the current CPU's cache. So on some architectures the caches are also used by the pmap layer. Each cache is composed of two magazines, design is borrowed from jeff bonwick vmem's paper and the implementation is similar to the one of pool_cache from dlg@. However there is no depot layer and magazines are refilled directly by the pmemrange allocator. Tested by robert@, claudio@ and Laurence Tratt. ok kettenis@
-rw-r--r--sys/arch/amd64/include/cpu.h5
-rw-r--r--sys/arch/arm64/include/cpu.h5
-rw-r--r--sys/arch/i386/include/cpu.h5
-rw-r--r--sys/uvm/uvm_page.c20
-rw-r--r--sys/uvm/uvm_pdaemon.c4
-rw-r--r--sys/uvm/uvm_percpu.h48
-rw-r--r--sys/uvm/uvm_pmemrange.c155
-rw-r--r--sys/uvm/uvm_pmemrange.h6
-rw-r--r--sys/uvm/uvmexp.h8
-rw-r--r--usr.bin/systat/uvm.c13
-rw-r--r--usr.bin/vmstat/vmstat.c7
11 files changed, 244 insertions, 32 deletions
diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
index 7bd9a54fa0f..a6377384ea5 100644
--- a/sys/arch/amd64/include/cpu.h
+++ b/sys/arch/amd64/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.165 2024/04/14 09:59:04 kettenis Exp $ */
+/* $OpenBSD: cpu.h,v 1.166 2024/04/17 13:12:58 mpi Exp $ */
/* $NetBSD: cpu.h,v 1.1 2003/04/26 18:39:39 fvdl Exp $ */
/*-
@@ -53,6 +53,7 @@
#include <sys/sched.h>
#include <sys/sensors.h>
#include <sys/srp.h>
+#include <uvm/uvm_percpu.h>
#ifdef _KERNEL
@@ -210,6 +211,8 @@ struct cpu_info {
#ifdef MULTIPROCESSOR
struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM];
+#define __HAVE_UVM_PERCPU
+ struct uvm_pmr_cache ci_uvm; /* [o] page cache */
#endif
struct ksensordev ci_sensordev;
diff --git a/sys/arch/arm64/include/cpu.h b/sys/arch/arm64/include/cpu.h
index d0521a33ab3..ee820bb65b0 100644
--- a/sys/arch/arm64/include/cpu.h
+++ b/sys/arch/arm64/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.43 2024/02/25 19:15:50 cheloha Exp $ */
+/* $OpenBSD: cpu.h,v 1.44 2024/04/17 13:12:58 mpi Exp $ */
/*
* Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
*
@@ -108,6 +108,7 @@ void arm32_vector_init(vaddr_t, int);
#include <sys/device.h>
#include <sys/sched.h>
#include <sys/srp.h>
+#include <uvm/uvm_percpu.h>
struct cpu_info {
struct device *ci_dev; /* Device corresponding to this CPU */
@@ -161,6 +162,8 @@ struct cpu_info {
#ifdef MULTIPROCESSOR
struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM];
+#define __HAVE_UVM_PERCPU
+ struct uvm_pmr_cache ci_uvm;
volatile int ci_flags;
volatile int ci_ddb_paused;
diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h
index ef7db2af795..9ad7163dccc 100644
--- a/sys/arch/i386/include/cpu.h
+++ b/sys/arch/i386/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.185 2024/02/25 19:15:50 cheloha Exp $ */
+/* $OpenBSD: cpu.h,v 1.186 2024/04/17 13:12:58 mpi Exp $ */
/* $NetBSD: cpu.h,v 1.35 1996/05/05 19:29:26 christos Exp $ */
/*-
@@ -69,6 +69,7 @@
#include <sys/sched.h>
#include <sys/sensors.h>
#include <sys/srp.h>
+#include <uvm/uvm_percpu.h>
struct intrsource;
@@ -99,6 +100,8 @@ struct cpu_info {
#if defined(MULTIPROCESSOR)
struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM];
+#define __HAVE_UVM_PERCPU
+ struct uvm_pmr_cache ci_uvm;
#endif
/*
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index b9e69522d46..5958c77a0ae 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_page.c,v 1.174 2024/02/13 10:16:28 miod Exp $ */
+/* $OpenBSD: uvm_page.c,v 1.175 2024/04/17 13:12:58 mpi Exp $ */
/* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */
/*
@@ -877,13 +877,11 @@ uvm_pagerealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size,
* => only one of obj or anon can be non-null
* => caller must activate/deactivate page if it is not wired.
*/
-
struct vm_page *
uvm_pagealloc(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
int flags)
{
- struct vm_page *pg;
- struct pglist pgl;
+ struct vm_page *pg = NULL;
int pmr_flags;
KASSERT(obj == NULL || anon == NULL);
@@ -906,13 +904,10 @@ uvm_pagealloc(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
if (flags & UVM_PGA_ZERO)
pmr_flags |= UVM_PLA_ZERO;
- TAILQ_INIT(&pgl);
- if (uvm_pmr_getpages(1, 0, 0, 1, 0, 1, pmr_flags, &pgl) != 0)
- goto fail;
-
- pg = TAILQ_FIRST(&pgl);
- KASSERT(pg != NULL && TAILQ_NEXT(pg, pageq) == NULL);
+ pg = uvm_pmr_cache_get(pmr_flags);
+ if (pg == NULL)
+ return NULL;
uvm_pagealloc_pg(pg, obj, off, anon);
KASSERT((pg->pg_flags & PG_DEV) == 0);
if (flags & UVM_PGA_ZERO)
@@ -921,9 +916,6 @@ uvm_pagealloc(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
atomic_setbits_int(&pg->pg_flags, PG_CLEAN);
return pg;
-
-fail:
- return NULL;
}
/*
@@ -1025,7 +1017,7 @@ void
uvm_pagefree(struct vm_page *pg)
{
uvm_pageclean(pg);
- uvm_pmr_freepages(pg, 1);
+ uvm_pmr_cache_put(pg);
}
/*
diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c
index 58a47c37fb0..a8c4c32a3cd 100644
--- a/sys/uvm/uvm_pdaemon.c
+++ b/sys/uvm/uvm_pdaemon.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pdaemon.c,v 1.111 2024/04/10 15:26:18 mpi Exp $ */
+/* $OpenBSD: uvm_pdaemon.c,v 1.112 2024/04/17 13:12:58 mpi Exp $ */
/* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
/*
@@ -262,6 +262,8 @@ uvm_pageout(void *arg)
#if NDRM > 0
drmbackoff(size * 2);
#endif
+ uvm_pmr_cache_drain();
+
/*
* scan if needed
*/
diff --git a/sys/uvm/uvm_percpu.h b/sys/uvm/uvm_percpu.h
new file mode 100644
index 00000000000..bbf6897e40b
--- /dev/null
+++ b/sys/uvm/uvm_percpu.h
@@ -0,0 +1,48 @@
+/* $OpenBSD: uvm_percpu.h,v 1.1 2024/04/17 13:12:58 mpi Exp $ */
+
+/*
+ * Copyright (c) 2024 Martin Pieuchot <mpi@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _UVM_UVM_PCPU_H_
+#define _UVM_UVM_PCPU_H_
+
+struct vm_page;
+
+/*
+ * The number of pages per magazine should be large enough to get rid of the
+ * contention in the pmemrange allocator during concurrent page faults and
+ * small enough to limit fragmentation.
+ */
+#define UVM_PMR_CACHEMAGSZ 8
+
+/*
+ * Magazine
+ */
+struct uvm_pmr_cache_item {
+ struct vm_page *upci_pages[UVM_PMR_CACHEMAGSZ];
+ int upci_npages; /* # of pages in magazine */
+};
+
+/*
+ * Per-CPU cache of physical pages.
+ */
+struct uvm_pmr_cache {
+ struct uvm_pmr_cache_item upc_magz[2]; /* magazines */
+ int upc_actv; /* index of active magazine */
+
+};
+
+#endif /* _UVM_UVM_PCPU_H_ */
diff --git a/sys/uvm/uvm_pmemrange.c b/sys/uvm/uvm_pmemrange.c
index 128772d90ed..8cd16969217 100644
--- a/sys/uvm/uvm_pmemrange.c
+++ b/sys/uvm/uvm_pmemrange.c
@@ -1,6 +1,7 @@
-/* $OpenBSD: uvm_pmemrange.c,v 1.63 2023/04/10 04:21:20 jsg Exp $ */
+/* $OpenBSD: uvm_pmemrange.c,v 1.64 2024/04/17 13:12:58 mpi Exp $ */
/*
+ * Copyright (c) 2024 Martin Pieuchot <mpi@openbsd.org>
* Copyright (c) 2009, 2010 Ariane van der Steldt <ariane@stack.nl>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -1262,6 +1263,28 @@ out:
}
/*
+ * Acquire a single page.
+ *
+ * flags: UVM_PLA_* flags
+ * result: returned page.
+ */
+struct vm_page *
+uvm_pmr_getone(int flags)
+{
+ struct vm_page *pg;
+ struct pglist pgl;
+
+ TAILQ_INIT(&pgl);
+ if (uvm_pmr_getpages(1, 0, 0, 1, 0, 1, flags, &pgl) != 0)
+ return NULL;
+
+ pg = TAILQ_FIRST(&pgl);
+ KASSERT(pg != NULL && TAILQ_NEXT(pg, pageq) == NULL);
+
+ return pg;
+}
+
+/*
* Free a number of contig pages (invoked by uvm_page_init).
*/
void
@@ -2190,3 +2213,133 @@ uvm_pagezero_thread(void *arg)
yield();
}
}
+
+#if defined(MULTIPROCESSOR) && defined(__HAVE_UVM_PERCPU)
+int
+uvm_pmr_cache_alloc(struct uvm_pmr_cache_item *upci)
+{
+ struct vm_page *pg;
+ struct pglist pgl;
+ int flags = UVM_PLA_NOWAIT|UVM_PLA_NOWAKE;
+ int npages = UVM_PMR_CACHEMAGSZ;
+
+ KASSERT(upci->upci_npages == 0);
+
+ TAILQ_INIT(&pgl);
+ if (uvm_pmr_getpages(npages, 0, 0, 1, 0, npages, flags, &pgl))
+ return -1;
+
+ while ((pg = TAILQ_FIRST(&pgl)) != NULL) {
+ TAILQ_REMOVE(&pgl, pg, pageq);
+ upci->upci_pages[upci->upci_npages] = pg;
+ upci->upci_npages++;
+ }
+ atomic_add_int(&uvmexp.percpucaches, npages);
+
+ return 0;
+}
+
+struct vm_page *
+uvm_pmr_cache_get(int flags)
+{
+ struct uvm_pmr_cache *upc = &curcpu()->ci_uvm;
+ struct uvm_pmr_cache_item *upci;
+ struct vm_page *pg;
+
+ upci = &upc->upc_magz[upc->upc_actv];
+ if (upci->upci_npages == 0) {
+ unsigned int prev;
+
+ prev = (upc->upc_actv == 0) ? 1 : 0;
+ upci = &upc->upc_magz[prev];
+ if (upci->upci_npages == 0) {
+ atomic_inc_int(&uvmexp.pcpmiss);
+ if (uvm_pmr_cache_alloc(upci))
+ return uvm_pmr_getone(flags);
+ }
+ /* Swap magazines */
+ upc->upc_actv = prev;
+ } else {
+ atomic_inc_int(&uvmexp.pcphit);
+ }
+
+ atomic_dec_int(&uvmexp.percpucaches);
+ upci->upci_npages--;
+ pg = upci->upci_pages[upci->upci_npages];
+
+ if (flags & UVM_PLA_ZERO)
+ uvm_pagezero(pg);
+
+ return pg;
+}
+
+void
+uvm_pmr_cache_free(struct uvm_pmr_cache_item *upci)
+{
+ struct pglist pgl;
+ unsigned int i;
+
+ TAILQ_INIT(&pgl);
+ for (i = 0; i < upci->upci_npages; i++)
+ TAILQ_INSERT_TAIL(&pgl, upci->upci_pages[i], pageq);
+
+ uvm_pmr_freepageq(&pgl);
+
+ atomic_sub_int(&uvmexp.percpucaches, upci->upci_npages);
+ upci->upci_npages = 0;
+ memset(upci->upci_pages, 0, sizeof(upci->upci_pages));
+}
+
+void
+uvm_pmr_cache_put(struct vm_page *pg)
+{
+ struct uvm_pmr_cache *upc = &curcpu()->ci_uvm;
+ struct uvm_pmr_cache_item *upci;
+
+ upci = &upc->upc_magz[upc->upc_actv];
+ if (upci->upci_npages >= UVM_PMR_CACHEMAGSZ) {
+ unsigned int prev;
+
+ prev = (upc->upc_actv == 0) ? 1 : 0;
+ upci = &upc->upc_magz[prev];
+ if (upci->upci_npages > 0)
+ uvm_pmr_cache_free(upci);
+
+ /* Swap magazines */
+ upc->upc_actv = prev;
+ KASSERT(upci->upci_npages == 0);
+ }
+
+ upci->upci_pages[upci->upci_npages] = pg;
+ upci->upci_npages++;
+ atomic_inc_int(&uvmexp.percpucaches);
+}
+
+void
+uvm_pmr_cache_drain(void)
+{
+ struct uvm_pmr_cache *upc = &curcpu()->ci_uvm;
+
+ uvm_pmr_cache_free(&upc->upc_magz[0]);
+ uvm_pmr_cache_free(&upc->upc_magz[1]);
+}
+
+#else /* !(MULTIPROCESSOR && __HAVE_UVM_PERCPU) */
+
+struct vm_page *
+uvm_pmr_cache_get(int flags)
+{
+ return uvm_pmr_getone(flags);
+}
+
+void
+uvm_pmr_cache_put(struct vm_page *pg)
+{
+ uvm_pmr_freepages(pg, 1);
+}
+
+void
+uvm_pmr_cache_drain(void)
+{
+}
+#endif
diff --git a/sys/uvm/uvm_pmemrange.h b/sys/uvm/uvm_pmemrange.h
index 923807c2196..3369085a2df 100644
--- a/sys/uvm/uvm_pmemrange.h
+++ b/sys/uvm/uvm_pmemrange.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pmemrange.h,v 1.14 2016/09/16 02:47:09 dlg Exp $ */
+/* $OpenBSD: uvm_pmemrange.h,v 1.15 2024/04/17 13:12:58 mpi Exp $ */
/*
* Copyright (c) 2009 Ariane van der Steldt <ariane@stack.nl>
@@ -147,5 +147,9 @@ void uvm_pmr_remove(struct uvm_pmemrange *,
struct vm_page *uvm_pmr_extract_range(struct uvm_pmemrange *,
struct vm_page *, paddr_t, paddr_t,
struct pglist *);
+struct vm_page *uvm_pmr_cache_get(int);
+void uvm_pmr_cache_put(struct vm_page *);
+void uvm_pmr_cache_drain(void);
+
#endif /* _UVM_UVM_PMEMRANGE_H_ */
diff --git a/sys/uvm/uvmexp.h b/sys/uvm/uvmexp.h
index 5b8b182396a..dc4994fa7d2 100644
--- a/sys/uvm/uvmexp.h
+++ b/sys/uvm/uvmexp.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvmexp.h,v 1.12 2024/03/24 10:29:35 mpi Exp $ */
+/* $OpenBSD: uvmexp.h,v 1.13 2024/04/17 13:12:58 mpi Exp $ */
#ifndef _UVM_UVMEXP_
#define _UVM_UVMEXP_
@@ -66,7 +66,7 @@ struct uvmexp {
int zeropages; /* [F] number of zero'd pages */
int reserve_pagedaemon; /* [I] # of pages reserved for pagedaemon */
int reserve_kernel; /* [I] # of pages reserved for kernel */
- int unused01; /* formerly anonpages */
+ int percpucaches; /* [a] # of pages in per-CPU caches */
int vnodepages; /* XXX # of pages used by vnode page cache */
int vtextpages; /* XXX # of pages used by vtext vnodes */
@@ -101,8 +101,8 @@ struct uvmexp {
int syscalls; /* system calls */
int pageins; /* [p] pagein operation count */
/* pageouts are in pdpageouts below */
- int unused07; /* formerly obsolete_swapins */
- int unused08; /* formerly obsolete_swapouts */
+ int pcphit; /* [a] # of pagealloc from per-CPU cache */
+ int pcpmiss; /* [a] # of times a per-CPU cache was empty */
int pgswapin; /* pages swapped in */
int pgswapout; /* pages swapped out */
int forks; /* forks */
diff --git a/usr.bin/systat/uvm.c b/usr.bin/systat/uvm.c
index 957b4dbf336..4bb759add34 100644
--- a/usr.bin/systat/uvm.c
+++ b/usr.bin/systat/uvm.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm.c,v 1.6 2022/11/27 23:18:54 kn Exp $ */
+/* $OpenBSD: uvm.c,v 1.7 2024/04/17 13:12:58 mpi Exp $ */
/*
* Copyright (c) 2008 Can Erkin Acar <canacar@openbsd.org>
* Copyright (c) 2018 Kenneth R Westerback <krw@openbsd.org>
@@ -80,11 +80,10 @@ struct uvmline uvmline[] = {
{ &uvmexp.zeropages, &last_uvmexp.zeropages, "zeropages",
&uvmexp.pageins, &last_uvmexp.pageins, "pageins",
&uvmexp.fltrelckok, &last_uvmexp.fltrelckok, "fltrelckok" },
- { &uvmexp.reserve_pagedaemon, &last_uvmexp.reserve_pagedaemon,
- "reserve_pagedaemon",
+ { &uvmexp.percpucaches, &last_uvmexp.percpucaches, "percpucaches",
&uvmexp.pgswapin, &last_uvmexp.pgswapin, "pgswapin",
&uvmexp.fltanget, &last_uvmexp.fltanget, "fltanget" },
- { &uvmexp.reserve_kernel, &last_uvmexp.reserve_kernel, "reserve_kernel",
+ { NULL, NULL, NULL,
&uvmexp.pgswapout, &last_uvmexp.pgswapout, "pgswapout",
&uvmexp.fltanretry, &last_uvmexp.fltanretry, "fltanretry" },
{ NULL, NULL, NULL,
@@ -143,13 +142,13 @@ struct uvmline uvmline[] = {
NULL, NULL, NULL },
{ &uvmexp.pagesize, &last_uvmexp.pagesize, "pagesize",
&uvmexp.pdpending, &last_uvmexp.pdpending, "pdpending",
- NULL, NULL, NULL },
+ NULL, NULL, "Per-CPU Counters" },
{ &uvmexp.pagemask, &last_uvmexp.pagemask, "pagemask",
&uvmexp.pddeact, &last_uvmexp.pddeact, "pddeact",
- NULL, NULL, NULL },
+ &uvmexp.pcphit, &last_uvmexp.pcphit, "pcphit" },
{ &uvmexp.pageshift, &last_uvmexp.pageshift, "pageshift",
NULL, NULL, NULL,
- NULL, NULL, NULL }
+ &uvmexp.pcpmiss, &last_uvmexp.pcpmiss, "pcpmiss" }
};
field_def fields_uvm[] = {
diff --git a/usr.bin/vmstat/vmstat.c b/usr.bin/vmstat/vmstat.c
index 870bc9f5d43..a737d36adb5 100644
--- a/usr.bin/vmstat/vmstat.c
+++ b/usr.bin/vmstat/vmstat.c
@@ -1,5 +1,5 @@
/* $NetBSD: vmstat.c,v 1.29.4.1 1996/06/05 00:21:05 cgd Exp $ */
-/* $OpenBSD: vmstat.c,v 1.155 2022/12/04 23:50:50 cheloha Exp $ */
+/* $OpenBSD: vmstat.c,v 1.156 2024/04/17 13:12:58 mpi Exp $ */
/*
* Copyright (c) 1980, 1986, 1991, 1993
@@ -513,7 +513,12 @@ dosum(void)
uvmexp.reserve_pagedaemon);
(void)printf("%11u pages reserved for kernel\n",
uvmexp.reserve_kernel);
+ (void)printf("%11u pages in per-cpu caches\n",
+ uvmexp.percpucaches);
+ /* per-cpu cache */
+ (void)printf("%11u per-cpu cache hits\n", uvmexp.pcphit);
+ (void)printf("%11u per-cpu cache misses\n", uvmexp.pcpmiss);
/* swap */
(void)printf("%11u swap pages\n", uvmexp.swpages);
(void)printf("%11u swap pages in use\n", uvmexp.swpginuse);