summaryrefslogtreecommitdiff
path: root/sys/arch/powerpc64
diff options
context:
space:
mode:
authorgkoehler <gkoehler@cvs.openbsd.org>2020-12-30 06:06:32 +0000
committergkoehler <gkoehler@cvs.openbsd.org>2020-12-30 06:06:32 +0000
commit39f39fdbfb82696a9b8a04f41c139f91c64b1b50 (patch)
tree4e7523d1b680cadb3d64a16be369d0e41ec957bd /sys/arch/powerpc64
parenta640deed8a9d460a51db6c891d3c4f153fc30ea9 (diff)
Enter power-saving mode on POWER9 (ISA v3)
When opal(4) attaches, look in the device tree for a psscr value. In cpu_idle_cycle(), use this psscr value and the stop instruction to wait for the next interrupt. In mp kernels, cpu_unidle() now sends an interrupt. In "sysctl hw.sensors", the power and temperature sensors from opalsens(4) may show lower values. The cpu may exit stop at the system reset vector after losing user registers. If so, restore some registers. For now, ignore deeper stop states that would lose hypervisor registers. Our mp kernel uses only the first hardware thread of each core. Take the extra threads from the firmware and stop them forever; this may switch the core from SMT4 to single-thread mode and increase performance. partly by kettenis@, ok kettenis@
Diffstat (limited to 'sys/arch/powerpc64')
-rw-r--r--sys/arch/powerpc64/dev/opal.c101
-rw-r--r--sys/arch/powerpc64/include/cpu.h9
-rw-r--r--sys/arch/powerpc64/include/cpufunc.h3
-rw-r--r--sys/arch/powerpc64/include/trap.h1
-rw-r--r--sys/arch/powerpc64/powerpc64/cpu.c30
-rw-r--r--sys/arch/powerpc64/powerpc64/genassym.cf3
-rw-r--r--sys/arch/powerpc64/powerpc64/locore.S101
-rw-r--r--sys/arch/powerpc64/powerpc64/machdep.c8
-rw-r--r--sys/arch/powerpc64/powerpc64/trap_subr.S26
9 files changed, 264 insertions, 18 deletions
diff --git a/sys/arch/powerpc64/dev/opal.c b/sys/arch/powerpc64/dev/opal.c
index a3ec08afa92..096412b778f 100644
--- a/sys/arch/powerpc64/dev/opal.c
+++ b/sys/arch/powerpc64/dev/opal.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: opal.c,v 1.10 2020/09/23 03:03:11 gkoehler Exp $ */
+/* $OpenBSD: opal.c,v 1.11 2020/12/30 06:06:30 gkoehler Exp $ */
/*
* Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
*
@@ -76,6 +76,8 @@ void opal_attach_deferred(struct device *);
void opal_attach_node(struct opal_softc *, int);
int opal_gettime(struct todr_chip_handle *, struct timeval *);
int opal_settime(struct todr_chip_handle *, struct timeval *);
+void opal_configure_idle_states(struct opal_softc *, int);
+void opal_found_stop_state(struct opal_softc *, uint64_t);
extern int perflevel;
@@ -146,7 +148,11 @@ opal_attach(struct device *parent, struct device *self, void *aux)
sc->sc_todr.todr_settime = opal_settime;
todr_attach(&sc->sc_todr);
- opalpm_init(sc, OF_getnodebyname(faa->fa_node, "power-mgt"));
+ node = OF_getnodebyname(faa->fa_node, "power-mgt");
+ if (node) {
+ opal_configure_idle_states(sc, node);
+ opalpm_init(sc, node);
+ }
node = OF_getnodebyname(faa->fa_node, "consoles");
if (node) {
@@ -322,19 +328,100 @@ opal_settime(struct todr_chip_handle *ch, struct timeval *tv)
return 0;
}
+#define OPAL_PM_LOSE_USER_CONTEXT 0x00001000
+#define OPAL_PM_STOP_INST_FAST 0x00100000
+
+void
+opal_configure_idle_states(struct opal_softc *sc, int node)
+{
+ uint64_t *states;
+ uint32_t accept, *flags;
+ int count, flen, i, slen;
+ char *prop;
+
+ prop = "ibm,cpu-idle-state-flags";
+ flen = OF_getproplen(node, prop);
+ if (flen <= 0 || flen % sizeof(flags[0]) != 0)
+ return;
+ count = flen / sizeof(flags[0]);
+ slen = count * sizeof(states[0]);
+
+ flags = malloc(flen, M_DEVBUF, M_WAITOK);
+ states = malloc(slen, M_DEVBUF, M_WAITOK);
+ OF_getpropintarray(node, prop, flags, flen);
+
+ /* Power ISA v3 uses the psscr with the stop instruction. */
+ prop = "ibm,cpu-idle-state-psscr";
+ if (OF_getpropint64array(node, prop, states, slen) == slen) {
+ /*
+ * Find the deepest idle state that doesn't lose too
+ * much context.
+ */
+ accept = OPAL_PM_LOSE_USER_CONTEXT | OPAL_PM_STOP_INST_FAST;
+ for (i = count - 1; i >= 0; i--) {
+ if ((flags[i] & ~accept) == 0) {
+ opal_found_stop_state(sc, states[i]);
+ break;
+ }
+ }
+ }
+
+ free(flags, M_DEVBUF, flen);
+ free(states, M_DEVBUF, slen);
+}
+
+void cpu_idle_stop(void);
+#ifdef MULTIPROCESSOR
+void cpu_hatch_and_stop(void);
+#endif
+
+void
+opal_found_stop_state(struct opal_softc *sc, uint64_t state)
+{
+#ifdef MULTIPROCESSOR
+ uint32_t pirs[8];
+ int i, len, node;
+ char buf[32];
+#endif
+
+ cpu_idle_state_psscr = state;
+ cpu_idle_cycle_fcn = &cpu_idle_stop;
+ printf("%s: idle psscr %llx\n", sc->sc_dev.dv_xname,
+ (unsigned long long)state);
+
+#ifdef MULTIPROCESSOR
+ /*
+ * Idle the other hardware threads. We use only one thread of
+ * each cpu core. The other threads are idle in OPAL. If we
+ * move them to a deeper idle state, then the core might
+ * switch to single-thread mode, increase performance.
+ */
+ node = OF_parent(curcpu()->ci_node);
+ for (node = OF_child(node); node != 0; node = OF_peer(node)) {
+ if (OF_getprop(node, "device_type", buf, sizeof(buf)) <= 0 ||
+ strcmp(buf, "cpu") != 0)
+ continue;
+ len = OF_getpropintarray(node, "ibm,ppc-interrupt-server#s",
+ pirs, sizeof(pirs));
+ if (len > 0 && len % 4 == 0) {
+ /* Skip i = 0, the first hardware thread. */
+ for (i = 1; i < len / 4; i++)
+ opal_start_cpu(pirs[i],
+ (vaddr_t)cpu_hatch_and_stop);
+ }
+ }
+#endif
+}
+
void
opalpm_init(struct opal_softc *sc, int node)
{
int i, len;
- if (!node) {
- printf("%s: no power-mgt\n", sc->sc_dev.dv_xname);
- return;
- }
len = OF_getproplen(node, "ibm,pstate-ids");
if (len <= 0 || len % sizeof(int) != 0 ||
len != OF_getproplen(node, "ibm,pstate-frequencies-mhz")) {
- printf("%s: can't parse power-mgt\n", sc->sc_dev.dv_xname);
+ printf("%s: can't parse pstates\n", sc->sc_dev.dv_xname);
return;
}
sc->sc_pstate = malloc(len, M_DEVBUF, M_WAITOK);
diff --git a/sys/arch/powerpc64/include/cpu.h b/sys/arch/powerpc64/include/cpu.h
index 7a95344f554..0a1fc75a3f2 100644
--- a/sys/arch/powerpc64/include/cpu.h
+++ b/sys/arch/powerpc64/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.28 2020/09/23 03:03:12 gkoehler Exp $ */
+/* $OpenBSD: cpu.h,v 1.29 2020/12/30 06:06:30 gkoehler Exp $ */
/*
* Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
@@ -69,6 +69,7 @@ struct cpu_info {
#define CPUSAVE_LEN 9
register_t ci_tempsave[CPUSAVE_LEN];
+ register_t ci_idle_sp_save;
uint64_t ci_lasttb;
uint64_t ci_nexttimerevent;
@@ -135,6 +136,7 @@ curcpu(void)
for (cii = 0, ci = curcpu(); ci != NULL; ci = NULL)
#define cpu_kick(ci)
+#define cpu_unidle(ci)
#else
@@ -147,6 +149,7 @@ curcpu(void)
for (cii = 0, ci = &cpu_info[0]; cii < ncpus; cii++, ci++)
void cpu_kick(struct cpu_info *);
+void cpu_unidle(struct cpu_info *);
void cpu_boot_secondary_processors(void);
void cpu_startclock(void);
@@ -166,7 +169,6 @@ void mp_setperf(int);
void signotify(struct proc *);
-#define cpu_unidle(ci)
#define CPU_BUSY_CYCLE() do {} while (0)
#define curpcb curcpu()->ci_curpcb
@@ -180,6 +182,9 @@ extern uint32_t cpu_features2;
void cpu_init_features(void);
void cpu_init(void);
+extern uint64_t cpu_idle_state_psscr;
+extern void (*cpu_idle_cycle_fcn)(void);
+
static inline unsigned int
cpu_rnd_messybits(void)
{
diff --git a/sys/arch/powerpc64/include/cpufunc.h b/sys/arch/powerpc64/include/cpufunc.h
index a3a6e485c29..d2ba98b630f 100644
--- a/sys/arch/powerpc64/include/cpufunc.h
+++ b/sys/arch/powerpc64/include/cpufunc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpufunc.h,v 1.9 2020/12/22 11:55:44 kettenis Exp $ */
+/* $OpenBSD: cpufunc.h,v 1.10 2020/12/30 06:06:30 gkoehler Exp $ */
/*
* Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
@@ -165,6 +165,7 @@ mtlpcr(uint64_t value)
__asm volatile ("mtspr 318, %0" :: "r"(value));
}
+#define LPCR_PECE 0x000040000001f000UL
#define LPCR_LPES 0x0000000000000008UL
#define LPCR_HVICE 0x0000000000000002UL
diff --git a/sys/arch/powerpc64/include/trap.h b/sys/arch/powerpc64/include/trap.h
index e79a967b078..8078cff5931 100644
--- a/sys/arch/powerpc64/include/trap.h
+++ b/sys/arch/powerpc64/include/trap.h
@@ -152,5 +152,6 @@
#define TRAP_ENTRY 0x1f8
#define TRAP_HVENTRY 0x1f0
#define TRAP_SLBENTRY 0x1e8
+#define TRAP_RSTENTRY 0x1e0
#endif /* _MACHINE_TRAP_H_ */
diff --git a/sys/arch/powerpc64/powerpc64/cpu.c b/sys/arch/powerpc64/powerpc64/cpu.c
index e36c67e66b0..d86fac2d777 100644
--- a/sys/arch/powerpc64/powerpc64/cpu.c
+++ b/sys/arch/powerpc64/powerpc64/cpu.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.c,v 1.21 2020/12/22 11:55:44 kettenis Exp $ */
+/* $OpenBSD: cpu.c,v 1.22 2020/12/30 06:06:30 gkoehler Exp $ */
/*
* Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
@@ -236,7 +236,7 @@ cpu_init(void)
uint64_t lpcr = LPCR_LPES;
if (cpu_features2 & PPC_FEATURE2_ARCH_3_00)
- lpcr |= LPCR_HVICE;
+ lpcr |= LPCR_PECE | LPCR_HVICE;
mtlpcr(lpcr);
isync();
@@ -259,6 +259,25 @@ cpu_darn(void *arg)
timeout_add_msec(&cpu_darn_to, 10);
}
+uint64_t cpu_idle_state_psscr;
+void cpu_idle_spin(void);
+void (*cpu_idle_cycle_fcn)(void) = &cpu_idle_spin;
+
+void
+cpu_idle_cycle(void)
+{
+ intr_disable();
+
+ if (!cpu_is_idle(curcpu())) {
+ intr_enable();
+ return;
+ }
+
+ (*cpu_idle_cycle_fcn)();
+
+ intr_enable();
+}
+
#ifdef MULTIPROCESSOR
volatile int mp_perflevel;
@@ -378,6 +397,13 @@ cpu_kick(struct cpu_info *ci)
intr_send_ipi(ci, IPI_NOP);
}
+void
+cpu_unidle(struct cpu_info *ci)
+{
+ if (ci != curcpu())
+ intr_send_ipi(ci, IPI_NOP);
+}
+
/*
* Run ul_setperf(level) on every core.
*/
diff --git a/sys/arch/powerpc64/powerpc64/genassym.cf b/sys/arch/powerpc64/powerpc64/genassym.cf
index 96988f4c900..135fe585c7d 100644
--- a/sys/arch/powerpc64/powerpc64/genassym.cf
+++ b/sys/arch/powerpc64/powerpc64/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.13 2020/09/05 19:21:10 kettenis Exp $
+# $OpenBSD: genassym.cf,v 1.14 2020/12/30 06:06:30 gkoehler Exp $
#
# Copyright (c) 1982, 1990 The Regents of the University of California.
# All rights reserved.
@@ -41,6 +41,7 @@ struct cpu_info
member ci_curpcb
member ci_curproc
member ci_tempsave
+member ci_idle_sp_save
member ci_slbsave
member ci_slbstack
member ci_kernel_slb
diff --git a/sys/arch/powerpc64/powerpc64/locore.S b/sys/arch/powerpc64/powerpc64/locore.S
index 56e73e806f0..5d83bb28000 100644
--- a/sys/arch/powerpc64/powerpc64/locore.S
+++ b/sys/arch/powerpc64/powerpc64/locore.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.S,v 1.41 2020/10/22 23:35:43 mortimer Exp $ */
+/* $OpenBSD: locore.S,v 1.42 2020/12/30 06:06:30 gkoehler Exp $ */
/*
* Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
@@ -86,16 +86,111 @@ tmpstack_end:
.text
+/* For Power ISA v3, stop a hardware thread forever. */
+ .globl cpu_hatch_and_stop
+cpu_hatch_and_stop:
+ bl 1f
+1: mflr %r2
+ addis %r2, %r2, (.TOC. - 1b)@ha
+ addi %r2, %r2, (.TOC. - 1b)@l
+ /* Clear PECE bits to disable exiting from idle. */
+ li %r3, 0
+ mtspr 318, %r3 /* lpcr = 0 */
+ /* Set psscr to request idle state. */
+ addis %r3, %r2, cpu_idle_state_psscr@toc@ha
+ ld %r3, cpu_idle_state_psscr@toc@l(%r3)
+ mtspr 823, %r3
+1: stop
+ b 1b /* Lighter states might not use PECE. */
+
#endif
.globl cpu_idle_enter
cpu_idle_enter:
blr
- .globl cpu_idle_cycle
-cpu_idle_cycle:
+ .globl cpu_idle_spin
+cpu_idle_spin:
blr
+/* Idle for Power ISA v3. */
+ .globl cpu_idle_stop
+cpu_idle_stop:
+ /* Set psscr to request idle state. */
+ addis %r3, %r2, cpu_idle_state_psscr@toc@ha
+ ld %r3, cpu_idle_state_psscr@toc@l(%r3)
+ mtspr 823, %r3
+ /*
+ * POWER9 23.5.9.2 State Loss and Restoration: We may lose
+ * "any nonhypervisor thread context (such as, GPRs, VSRs,
+ * FPRs)" and "the following SPRs: CR, FPSCR, VSCR, XER, DSCR,
+ * AMR, IAMR, UAMOR, AMOR, DAWR, DAWRX."
+ */
+ mflr %r3
+ mfcr %r4
+ std %r3, 16(%r1)
+ stw %r4, 8(%r1)
+ std %r31, -8(%r1)
+ std %r30, -16(%r1)
+ std %r29, -24(%r1)
+ std %r28, -32(%r1)
+ std %r27, -40(%r1)
+ std %r26, -48(%r1)
+ std %r25, -56(%r1)
+ std %r24, -64(%r1)
+ std %r23, -72(%r1)
+ std %r22, -80(%r1)
+ std %r21, -88(%r1)
+ std %r20, -96(%r1)
+ std %r19, -104(%r1)
+ std %r18, -112(%r1)
+ std %r17, -120(%r1)
+ std %r16, -128(%r1)
+ std %r15, -136(%r1)
+ std %r14, -144(%r1)
+ /* Red zone ends at -288(%r1). */
+ mfsprg0 %r3
+ std %r1, CI_IDLE_SP_SAVE(%r3)
+ stop
+ /* If we continue here, then we lost no context. */
+ blr
+
+/* Come here from the system reset vector (rsttrapcode). */
+ .globl cpu_idle_restore_context
+cpu_idle_restore_context:
+ bl 1f
+1: mflr %r2
+ addis %r2, %r2, (.TOC. - 1b)@ha
+ addi %r2, %r2, (.TOC. - 1b)@l /* TOC pointer */
+ mfsprg0 %r3
+ ld %r1, CI_IDLE_SP_SAVE(%r3) /* stack pointer */
+ mfmsr %r4
+ ori %r4, %r4, PSL_DR@l /* data relocation on */
+ mtmsr %r4
+ ld %r14, -144(%r1)
+ ld %r15, -136(%r1)
+ ld %r16, -128(%r1)
+ ld %r17, -120(%r1)
+ ld %r18, -112(%r1)
+ ld %r19, -104(%r1)
+ ld %r20, -96(%r1)
+ ld %r21, -88(%r1)
+ ld %r22, -80(%r1)
+ ld %r23, -72(%r1)
+ ld %r24, -64(%r1)
+ ld %r25, -56(%r1)
+ ld %r26, -48(%r1)
+ ld %r27, -40(%r1)
+ ld %r28, -32(%r1)
+ ld %r29, -24(%r1)
+ ld %r30, -16(%r1)
+ ld %r31, -8(%r1)
+ lwz %r4, 8(%r1) /* cr */
+ ld %r5, 16(%r1) /* lr */
+ mtcr %r4
+ mtsrr0 %r5
+ rfid /* return from system reset interrupt */
+
.globl cpu_idle_leave
cpu_idle_leave:
blr
diff --git a/sys/arch/powerpc64/powerpc64/machdep.c b/sys/arch/powerpc64/powerpc64/machdep.c
index 2aa77e28269..0ffbd3aee5c 100644
--- a/sys/arch/powerpc64/powerpc64/machdep.c
+++ b/sys/arch/powerpc64/powerpc64/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.65 2020/11/08 20:37:24 mpi Exp $ */
+/* $OpenBSD: machdep.c,v 1.66 2020/12/30 06:06:30 gkoehler Exp $ */
/*
* Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
@@ -78,10 +78,12 @@ int opal_have_console_flush;
extern char trapcode[], trapcodeend[];
extern char hvtrapcode[], hvtrapcodeend[];
+extern char rsttrapcode[], rsttrapcodeend[];
extern char slbtrapcode[], slbtrapcodeend[];
extern char generictrap[];
extern char generichvtrap[];
extern char kern_slbtrap[];
+extern char cpu_idle_restore_context[];
extern char initstack[];
@@ -248,12 +250,16 @@ init_powernv(void *fdt, void *tocbase)
memcpy((void *)EXC_HFAC, hvtrapcode, hvtrapcodeend - hvtrapcode);
memcpy((void *)EXC_HVI, hvtrapcode, hvtrapcodeend - hvtrapcode);
+ /* System reset trap needs special handling. */
+ memcpy((void *)EXC_RST, rsttrapcode, rsttrapcodeend - rsttrapcode);
+
/* SLB trap needs special handling as well. */
memcpy((void *)EXC_DSE, slbtrapcode, slbtrapcodeend - slbtrapcode);
*((void **)TRAP_ENTRY) = generictrap;
*((void **)TRAP_HVENTRY) = generichvtrap;
*((void **)TRAP_SLBENTRY) = kern_slbtrap;
+ *((void **)TRAP_RSTENTRY) = cpu_idle_restore_context;
/* Make the stubs visible to the CPU. */
__syncicache(EXC_RSVD, EXC_LAST - EXC_RSVD);
diff --git a/sys/arch/powerpc64/powerpc64/trap_subr.S b/sys/arch/powerpc64/powerpc64/trap_subr.S
index 23e918a86d2..4a1c67b563e 100644
--- a/sys/arch/powerpc64/powerpc64/trap_subr.S
+++ b/sys/arch/powerpc64/powerpc64/trap_subr.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: trap_subr.S,v 1.18 2020/09/25 17:31:27 kettenis Exp $ */
+/* $OpenBSD: trap_subr.S,v 1.19 2020/12/30 06:06:30 gkoehler Exp $ */
/* $NetBSD: trap_subr.S,v 1.20 2002/04/22 23:20:08 kleink Exp $ */
/*-
@@ -305,6 +305,30 @@ hvtrapcode:
blrl
hvtrapcodeend:
+/* System reset might be an exit from power-saving mode. */
+ .globl rsttrapcode, rsttrapcodeend
+rsttrapcode:
+ mtsprg1 %r1
+ mfcr %r1
+ mtsprg2 %r1 /* save cr */
+ mfsrr1 %r1
+ andis. %r1, %r1, 0x3 /* test srr1 bits 46:47 */
+ beq 1f
+ /* This is an exit from power-saving mode. */
+ ld %r1, TRAP_RSTENTRY(0) /* cpu_idle_restore_context */
+ mtctr %r1
+ bctr
+1: /* This is something else. */
+ mfsprg2 %r1
+ mtcr %r1 /* restore cr */
+ mflr %r1
+ mtsprg2 %r1
+ ld %r1, TRAP_ENTRY(0) /* generictrap */
+ mtlr %r1
+ li %r1, 0xe0
+ blrl
+rsttrapcodeend:
+
/*
* For SLB misses: do special things for the kernel
*