diff options
-rw-r--r-- | lib/libc/gen/sysctl.3 | 30 | ||||
-rw-r--r-- | sbin/sysctl/sysctl.8 | 6 | ||||
-rw-r--r-- | sbin/sysctl/sysctl.c | 33 | ||||
-rw-r--r-- | sys/altq/altq_subr.c | 10 | ||||
-rw-r--r-- | sys/conf/files | 3 | ||||
-rw-r--r-- | sys/dev/pci/amdpm.c | 74 | ||||
-rw-r--r-- | sys/dev/pci/amdpmreg.h | 5 | ||||
-rw-r--r-- | sys/dev/pci/files.pci | 7 | ||||
-rw-r--r-- | sys/dev/pci/piixpm.c | 171 | ||||
-rw-r--r-- | sys/kern/init_main.c | 10 | ||||
-rw-r--r-- | sys/kern/kern_clock.c | 31 | ||||
-rw-r--r-- | sys/kern/kern_sysctl.c | 13 | ||||
-rw-r--r-- | sys/kern/kern_tc.c | 603 | ||||
-rw-r--r-- | sys/kern/kern_time.c | 71 | ||||
-rw-r--r-- | sys/sys/kernel.h | 6 | ||||
-rw-r--r-- | sys/sys/sysctl.h | 21 | ||||
-rw-r--r-- | sys/sys/timetc.h | 76 |
17 files changed, 1141 insertions, 29 deletions
diff --git a/lib/libc/gen/sysctl.3 b/lib/libc/gen/sysctl.3 index 229455bab6d..e132cb41635 100644 --- a/lib/libc/gen/sysctl.3 +++ b/lib/libc/gen/sysctl.3 @@ -1,4 +1,4 @@ -.\" $OpenBSD: sysctl.3,v 1.139 2004/06/03 17:09:12 espie Exp $ +.\" $OpenBSD: sysctl.3,v 1.140 2004/07/28 17:15:12 tholo Exp $ .\" .\" Copyright (c) 1993 .\" The Regents of the University of California. All rights reserved. @@ -385,6 +385,7 @@ information. .It Dv KERN_SYSVMSG No " integer no" .It Dv KERN_SYSVSEM No " integer no" .It Dv KERN_SYSVSHM No " integer no" +.It Dv KERN_TIMECOUNTER No " node not applicable" .It Dv KERN_TTY No " node not applicable" .It Dv KERN_TTYCOUNT No " integer no" .It Dv KERN_USERASYMCRYPTO No " integer yes" @@ -836,6 +837,33 @@ system, otherwise 0. .It Dv KERN_SYSVSHM Returns 1 if System V style share memory functionality is available on this system, otherwise 0. +.It Dv KERN_TIMECOUNTER +Return statistics information about the kernel time counter. +The third level names information is detailed below. +The changeable column shows whether a process with appropriate +privileges may change the value. +.Bl -column "KERN_TIMECOUNTER_TIMESTEPWARNINGS" "integer" -offset indent +.It Sy Third level name Type Changeable +.It Dv KERN_TIMECOUNTER_CHOICE No " string no" +.It Dv KERN_TIMECOUNTER_HARDWARE No " string yes" +.It Dv KERN_TIMECOUNTER_TICK No " integer no" +.It Dv KERN_TIMECOUNTER_TIMESTEPWARNINGS No " integer no" +.El +.Pp +The variables are as follows: +.Bl -tag -width "123456" +.It Dv KERN_TIMECOUNTER_CHOICE +Get the list of kernel time counter sources and their claimed +quality (higher is better). +.It Dv KERN_TIMECOUNTER_HARDWARE +Get or set the kernel time counter source by name. +.It Dv KERN_TIMECOUNTER_TICK +Get the number of times we have reset the kernel time counter +information. +.It Dv KERN_TIMECOUNTER_TIMESTEPWARNINGS +Get or set a flag to log a message when the kernel time is +stepped. +.El .It Dv KERN_TTY Return statistics information about tty input/output. The third level names information is detailed below. diff --git a/sbin/sysctl/sysctl.8 b/sbin/sysctl/sysctl.8 index 8f210f4bc31..212be088bb3 100644 --- a/sbin/sysctl/sysctl.8 +++ b/sbin/sysctl/sysctl.8 @@ -1,4 +1,4 @@ -.\" $OpenBSD: sysctl.8,v 1.122 2004/05/07 21:58:14 millert Exp $ +.\" $OpenBSD: sysctl.8,v 1.123 2004/07/28 17:15:12 tholo Exp $ .\" $NetBSD: sysctl.8,v 1.4 1995/09/30 07:12:49 thorpej Exp $ .\" .\" Copyright (c) 1993 @@ -188,6 +188,10 @@ privilege can change the value. .It kern.emul.nemuls integer no .It kern.emul.other integer yes .It kern.maxclusters integer yes +.It kern.timecounter.tick integer no +.It kern.timecounter.timestepwarnings integer no +.It kern.timecounter.hardware string yes +.It kern.timecounter.choice string no .It vm.vmmeter struct no .It vm.loadavg struct no .It vm.psstrings struct no diff --git a/sbin/sysctl/sysctl.c b/sbin/sysctl/sysctl.c index 9536913ec71..dd176c9a41d 100644 --- a/sbin/sysctl/sysctl.c +++ b/sbin/sysctl/sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sysctl.c,v 1.113 2004/04/15 00:23:17 tedu Exp $ */ +/* $OpenBSD: sysctl.c,v 1.114 2004/07/28 17:15:12 tholo Exp $ */ /* $NetBSD: sysctl.c,v 1.9 1995/09/30 07:12:50 thorpej Exp $ */ /* @@ -40,7 +40,7 @@ static const char copyright[] = #if 0 static const char sccsid[] = "@(#)sysctl.c 8.5 (Berkeley) 5/9/95"; #else -static const char rcsid[] = "$OpenBSD: sysctl.c,v 1.113 2004/04/15 00:23:17 tedu Exp $"; +static const char rcsid[] = "$OpenBSD: sysctl.c,v 1.114 2004/07/28 17:15:12 tholo Exp $"; #endif #endif /* not lint */ @@ -131,6 +131,7 @@ struct ctlname ttyname[] = CTL_KERN_TTY_NAMES; struct ctlname semname[] = CTL_KERN_SEMINFO_NAMES; struct ctlname shmname[] = CTL_KERN_SHMINFO_NAMES; struct ctlname watchdogname[] = CTL_KERN_WATCHDOG_NAMES; +struct ctlname tcname[] = CTL_KERN_TIMECOUNTER_NAMES; struct ctlname *vfsname; #ifdef CTL_MACHDEP_NAMES struct ctlname machdepname[] = CTL_MACHDEP_NAMES; @@ -207,6 +208,7 @@ int sysctl_malloc(char *, char **, int *, int, int *); int sysctl_seminfo(char *, char **, int *, int, int *); int sysctl_shminfo(char *, char **, int *, int, int *); int sysctl_watchdog(char *, char **, int *, int, int *); +int sysctl_tc(char *, char **, int *, int, int *); int sysctl_sensors(char *, char **, int *, int, int *); int sysctl_emul(char *, char *, int); #ifdef CPU_CHIPSET @@ -434,6 +436,12 @@ parse(char *string, int flags) if (len < 0) return; break; + case KERN_TIMECOUNTER: + len = sysctl_tc(string, &bufp, mib, flags, + &type); + if (len < 0) + return; + break; case KERN_EMUL: sysctl_emul(string, newval, flags); return; @@ -1417,6 +1425,7 @@ struct list ttylist = { ttyname, KERN_TTY_MAXID }; struct list semlist = { semname, KERN_SEMINFO_MAXID }; struct list shmlist = { shmname, KERN_SHMINFO_MAXID }; struct list watchdoglist = { watchdogname, KERN_WATCHDOG_MAXID }; +struct list tclist = { tcname, KERN_TIMECOUNTER_MAXID }; /* * handle vfs namei cache statistics @@ -1975,6 +1984,26 @@ sysctl_watchdog(char *string, char **bufpp, int mib[], int flags, } /* + * Handle timecounter support + */ +int +sysctl_tc(char *string, char **bufpp, int mib[], int flags, + int *typep) +{ + int indx; + + if (*bufpp == NULL) { + listall(string, &tclist); + return (-1); + } + if ((indx = findname(string, "third", bufpp, &tclist)) == -1) + return (-1); + mib[2] = indx; + *typep = tclist.list[indx].ctl_type; + return (3); +} + +/* * Handle hardware monitoring sensors support */ int diff --git a/sys/altq/altq_subr.c b/sys/altq/altq_subr.c index 8e339d6f762..89fc5071989 100644 --- a/sys/altq/altq_subr.c +++ b/sys/altq/altq_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: altq_subr.c,v 1.18 2004/06/24 19:35:22 tholo Exp $ */ +/* $OpenBSD: altq_subr.c,v 1.19 2004/07/28 17:15:12 tholo Exp $ */ /* $KAME: altq_subr.c,v 1.11 2002/01/11 08:11:49 kjc Exp $ */ /* @@ -746,6 +746,14 @@ init_machclk(void) #if defined(__NetBSD__) && defined(MULTIPROCESSOR) machclk_usepcc = 0; #endif +#if defined(__OpenBSD__) && defined(__HAVE_TIMECOUNTER) + /* + * If we have timecounters, microtime is good enough and we can + * avoid problems on machines with variable cycle counter + * frequencies. + */ + machclk_usepcc = 0; +#endif #ifdef __i386__ /* check if TSC is available */ if (machclk_usepcc == 1 && (cpu_feature & CPUID_TSC) == 0) diff --git a/sys/conf/files b/sys/conf/files index 2bec01e0d5e..fc53260f3b7 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $OpenBSD: files,v 1.311 2004/07/21 17:30:55 marius Exp $ +# $OpenBSD: files,v 1.312 2004/07/28 17:15:12 tholo Exp $ # $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -571,6 +571,7 @@ file kern/kern_sig.c file kern/kern_subr.c file kern/kern_sysctl.c file kern/kern_synch.c +file kern/kern_tc.c file kern/kern_time.c file kern/kern_timeout.c file kern/kern_watchdog.c !small_kernel diff --git a/sys/dev/pci/amdpm.c b/sys/dev/pci/amdpm.c index 8f9d51e54aa..dec9a1d8b75 100644 --- a/sys/dev/pci/amdpm.c +++ b/sys/dev/pci/amdpm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: amdpm.c,v 1.3 2002/11/04 17:12:34 fgsch Exp $ */ +/* $OpenBSD: amdpm.c,v 1.4 2004/07/28 17:15:12 tholo Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -41,6 +41,9 @@ #include <sys/kernel.h> #include <sys/device.h> #include <sys/timeout.h> +#ifdef __HAVE_TIMECOUNTER +#include <sys/timetc.h> +#endif #include <dev/pci/pcivar.h> #include <dev/pci/pcireg.h> @@ -49,6 +52,23 @@ #include <dev/rndvar.h> #include <dev/pci/amdpmreg.h> +#ifdef __HAVE_TIMECOUNTER +unsigned amdpm_get_timecount(struct timecounter *tc); + +#ifndef AMDPM_FREQUENCY +#define AMDPM_FREQUENCY 3579545 +#endif + +static struct timecounter amdpm_timecounter = { + amdpm_get_timecount, /* get_timecount */ + 0, /* no poll_pps */ + 0xffffff, /* counter_mask */ + AMDPM_FREQUENCY, /* frequency */ + "AMDPM", /* name */ + 1000 /* quality */ +}; +#endif + struct amdpm_softc { struct device sc_dev; @@ -90,7 +110,8 @@ amdpm_match(struct device *parent, void *match, void *aux) struct pci_attach_args *pa = aux; if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_AMD && - PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_AMD_PBC768_PMC) + (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_AMD_PBC768_PMC || + PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_AMD_766_PMC)) return (1); return (0); } @@ -101,15 +122,15 @@ amdpm_attach(struct device *parent, struct device *self, void *aux) struct amdpm_softc *sc = (struct amdpm_softc *) self; struct pci_attach_args *pa = aux; struct timeval tv1, tv2; - pcireg_t reg; + pcireg_t cfg_reg, reg; int i; sc->sc_pc = pa->pa_pc; sc->sc_tag = pa->pa_tag; sc->sc_iot = pa->pa_iot; - reg = pci_conf_read(pa->pa_pc, pa->pa_tag, AMDPM_CONFREG); - if ((reg & AMDPM_PMIOEN) == 0) { + cfg_reg = pci_conf_read(pa->pa_pc, pa->pa_tag, AMDPM_CONFREG); + if ((cfg_reg & AMDPM_PMIOEN) == 0) { printf(": PMxx space isn't enabled\n"); return; } @@ -120,8 +141,22 @@ amdpm_attach(struct device *parent, struct device *self, void *aux) return; } - reg = pci_conf_read(pa->pa_pc, pa->pa_tag, AMDPM_CONFREG); - if (reg & AMDPM_RNGEN) { +#ifdef __HAVE_TIMECOUNTER + if ((cfg_reg & AMDPM_TMRRST) == 0 && + (cfg_reg & AMDPM_STOPTMR) == 0 && + PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_AMD_PBC768_PMC) { + printf(": %d-bit timer at %dHz", + (cfg_reg & AMDPM_TMR32) ? 32 : 24, + amdpm_timecounter.tc_frequency); + + amdpm_timecounter.tc_priv = sc; + if (cfg_reg & AMDPM_TMR32) + amdpm_timecounter.tc_counter_mask = 0xffffffffu; + tc_init(&amdpm_timecounter); + } +#endif + + if (cfg_reg & AMDPM_RNGEN) { /* Check to see if we can read data from the RNG. */ (void) bus_space_read_4(sc->sc_iot, sc->sc_ioh, AMDPM_RNGDATA); @@ -155,6 +190,8 @@ amdpm_attach(struct device *parent, struct device *self, void *aux) timeout_set(&sc->sc_rnd_ch, amdpm_rnd_callout, sc); amdpm_rnd_callout(sc); } + + printf("\n"); } void @@ -179,3 +216,26 @@ amdpm_rnd_callout(void *v) AMDPM_RNDCNT_INCR(&sc->sc_rnd_miss); timeout_add(&sc->sc_rnd_ch, 1); } + +#ifdef __HAVE_TIMECOUNTER +unsigned +amdpm_get_timecount(struct timecounter *tc) +{ + struct amdpm_softc *sc = tc->tc_priv; + unsigned u2; +#if 0 + unsigned u1, u3; +#endif + + u2 = bus_space_read_4(sc->sc_iot, sc->sc_ioh, AMDPM_TMR); +#if 0 + u3 = bus_space_read_4(sc->sc_iot, sc->sc_ioh, AMDPM_TMR); + do { + u1 = u2; + u2 = u3; + u3 = bus_space_read_4(sc->sc_iot, sc->sc_ioh, AMDPM_TMR); + } while (u1 > u2 || u2 > u3); +#endif + return (u2); +} +#endif diff --git a/sys/dev/pci/amdpmreg.h b/sys/dev/pci/amdpmreg.h index f44a2db31c4..d16658af4fd 100644 --- a/sys/dev/pci/amdpmreg.h +++ b/sys/dev/pci/amdpmreg.h @@ -40,9 +40,12 @@ /* 0x40: General Configuration 1 Register */ #define AMDPM_RNGEN 0x00000080 /* random number generator enable */ +#define AMDPM_STOPTMR 0x00000040 /* stop free-running timer */ /* 0x41: General Configuration 2 Register */ #define AMDPM_PMIOEN 0x00008000 /* system management IO space enable */ +#define AMDPM_TMRRST 0x00004000 /* reset free-running timer */ +#define AMDPM_TMR32 0x00000800 /* extended (32 bit) timer enable */ /* 0x42: SCI Interrupt Configuration Register */ /* 0x43: Previous Power State Register */ @@ -53,6 +56,8 @@ #define AMDPM_PMSIZE 256 /* PMxx space size */ /* Registers in PMxx space */ +#define AMDPM_TMR 0x08 /* 24/32 bit timer register */ + #define AMDPM_RNGDATA 0xf0 /* 32 bit random data register */ #define AMDPM_RNGSTAT 0xf4 /* RNG status register */ #define AMDPM_RNGDONE 0x00000001 /* Random number generation complete */ diff --git a/sys/dev/pci/files.pci b/sys/dev/pci/files.pci index 3b1d18c7bba..21c5c6d6d08 100644 --- a/sys/dev/pci/files.pci +++ b/sys/dev/pci/files.pci @@ -1,4 +1,4 @@ -# $OpenBSD: files.pci,v 1.156 2004/06/26 06:43:14 alex Exp $ +# $OpenBSD: files.pci,v 1.157 2004/07/28 17:15:12 tholo Exp $ # $NetBSD: files.pci,v 1.20 1996/09/24 17:47:15 christos Exp $ # # Config file and device description for machine-independent PCI code. @@ -505,3 +505,8 @@ file dev/pci/if_san_common.c san file dev/pci/if_san_obsd.c san file dev/pci/if_san_te1.c san file dev/pci/if_san_xilinx.c san + +# PIIX4 power management controller +device piixpm {} +attach piixpm at pci +file dev/pci/piixpm.c piixpm diff --git a/sys/dev/pci/piixpm.c b/sys/dev/pci/piixpm.c new file mode 100644 index 00000000000..7d9b10bf3fc --- /dev/null +++ b/sys/dev/pci/piixpm.c @@ -0,0 +1,171 @@ +/*- + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you + * can do whatever you want with this stuff. If we meet some day, and you think + * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp + * ---------------------------------------------------------------------------- + * + * $OpenBSD: piixpm.c,v 1.1 2004/07/28 17:15:12 tholo Exp $ + * $FreeBSD: /repoman/r/ncvs/src/sys/i386/i386/mp_clock.c,v 1.19 2004/05/30 20:34:57 phk Exp $ + */ + +/*- + * Just when we thought life were beautiful, reality pops its grim face over + * the edge again: + * + * ] 20. ACPI Timer Errata + * ] + * ] Problem: The power management timer may return improper result when + * ] read. Although the timer value settles properly after incrementing, + * ] while incrementing there is a 3nS window every 69.8nS where the + * ] timer value is indeterminate (a 4.2% chance that the data will be + * ] incorrect when read). As a result, the ACPI free running count up + * ] timer specification is violated due to erroneous reads. Implication: + * ] System hangs due to the "inaccuracy" of the timer when used by + * ] software for time critical events and delays. + * ] + * ] Workaround: Read the register twice and compare. + * ] Status: This will not be fixed in the PIIX4 or PIIX4E. + * + * The counter is in other words not latched to the PCI bus clock when + * read. Notice the workaround isn't: We need to read until we have + * three monotonic samples and then use the middle one, otherwise we are + * not protected against the fact that the bits can be wrong in two + * directions. If we only cared about monosity two reads would be enough. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/device.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#ifdef __HAVE_TIMECOUNTER +#include <sys/timetc.h> +#endif + +#include <machine/bus.h> + +#include <dev/pci/pcivar.h> +#include <dev/pci/pcireg.h> +#include <dev/pci/pcidevs.h> + +#define PIIX_PMPTR 0x40 /* PIIX PM address ptr */ + +#define PIIX_PMBASE(x) ((x) & 0xffc0) /* PIIX PM base address */ +#define PIIX_PMSIZE 56 /* PIIX PM space size */ + +struct piixpm_softc { + struct device sc_dev; + bus_space_tag_t sc_iot; + bus_space_handle_t sc_ioh; +}; + +int piixpm_probe(struct device *, void *, void *); +void piixpm_attach(struct device *, struct device *, void *); + +#ifdef __HAVE_TIMECOUNTER +unsigned piix_get_timecount(struct timecounter *tc); + +static u_int piix_freq = 14318182/4; + +static struct timecounter piix_timecounter = { + piix_get_timecount, /* get_timecount */ + 0, /* no poll_pps */ + 0xffffff, /* counter_mask */ + 0, /* frequency */ + "PIIX", /* name */ + 1000 /* quality */ +}; +#endif + +struct cfattach piixpm_ca = { + sizeof(struct piixpm_softc), piixpm_probe, piixpm_attach +}; + +struct cfdriver piixpm_cd = { + NULL, "piixpm", DV_DULL +}; + +#if 0 +int +sysctl_machdep_piix_freq(SYSCTL_HANDLER_ARGS) +{ + int error; + u_int freq; + + if (piix_timecounter.tc_frequency == 0) + return (EOPNOTSUPP); + freq = piix_freq; + error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); + if (error == 0 && req->newptr != NULL) { + piix_freq = freq; + piix_timecounter.tc_frequency = piix_freq; + } + return (error); +} + +SYSCTL_PROC(_machdep, OID_AUTO, piix_freq, CTLTYPE_INT | CTLFLAG_RW, + 0, sizeof(u_int), sysctl_machdep_piix_freq, "I", ""); +#endif + +#ifdef __HAVE_TIMECOUNTER +unsigned +piix_get_timecount(struct timecounter *tc) +{ + struct piixpm_softc *sc = (struct piixpm_softc *) tc->tc_priv; + unsigned u1, u2, u3; + + u2 = bus_space_read_4(sc->sc_iot, sc->sc_ioh, 8); + u3 = bus_space_read_4(sc->sc_iot, sc->sc_ioh, 8); + do { + u1 = u2; + u2 = u3; + u3 = bus_space_read_4(sc->sc_iot, sc->sc_ioh, 8); + } while (u1 > u2 || u2 > u3); + return (u2); +} +#endif + +/* + * XXX - this has to be redone if we ever do real ACPI + */ +int +piixpm_probe(struct device *parent, void *match, void *aux) +{ + struct pci_attach_args *pa = (struct pci_attach_args *) aux; + pcireg_t reg; + + if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL || + PCI_PRODUCT(pa->pa_id) != PCI_PRODUCT_INTEL_82371AB_PMC) + return (0); + + reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG); + if ((reg & PCI_COMMAND_IO_ENABLE) == 0) + return (0); + return (1); +} + +void +piixpm_attach(struct device *parent, struct device *self, void *aux) +{ + struct piixpm_softc *sc = (struct piixpm_softc *) self; + struct pci_attach_args *pa = aux; + pci_chipset_tag_t pc = pa->pa_pc; + pcireg_t reg; + + reg = pci_conf_read(pc, pa->pa_tag, PIIX_PMPTR); + if (bus_space_map(pa->pa_iot, PIIX_PMBASE(reg), PIIX_PMSIZE, + 0, &sc->sc_ioh)) { + printf(": can't map i/o space\n"); + return; + } + + sc->sc_iot = pa->pa_iot; + printf("\n"); +#ifdef __HAVE_TIMECOUNTER + piix_timecounter.tc_frequency = piix_freq; + piix_timecounter.tc_priv = sc; + tc_init(&piix_timecounter); +#endif +} diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 39f991b9ead..196065727d4 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: init_main.c,v 1.118 2004/07/20 20:19:52 art Exp $ */ +/* $OpenBSD: init_main.c,v 1.119 2004/07/28 17:15:12 tholo Exp $ */ /* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */ /* @@ -423,11 +423,15 @@ main(framep) * from the file system. Reset p->p_rtime as it may have been * munched in mi_switch() after the time got set. */ +#ifdef __HAVE_TIMECOUNTER + microtime(&boottime); +#else + boottime = mono_time = time; +#endif + p->p_stats->p_start = boottime; #ifdef __HAVE_CPUINFO - p->p_stats->p_start = mono_time = boottime = time; microuptime(&p->p_cpu->ci_schedstate.spc_runtime); #else - p->p_stats->p_start = runtime = mono_time = boottime = time; microuptime(&runtime); #endif p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0; diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 15c958bbf6b..9d79ae260fc 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_clock.c,v 1.46 2004/06/24 19:35:24 tholo Exp $ */ +/* $OpenBSD: kern_clock.c,v 1.47 2004/07/28 17:15:12 tholo Exp $ */ /* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */ /*- @@ -49,6 +49,9 @@ #include <uvm/uvm_extern.h> #include <sys/sysctl.h> #include <sys/sched.h> +#ifdef __HAVE_TIMECOUNTER +#include <sys/timetc.h> +#endif #include <machine/cpu.h> @@ -101,17 +104,20 @@ int profprocs; int ticks; static int psdiv, pscnt; /* prof => stat divider */ int psratio; /* ratio: prof / stat */ -int tickfix, tickfixinterval; /* used if tick not really integral */ -static int tickfixcnt; /* accumulated fractional error */ long cp_time[CPUSTATES]; +#ifndef __HAVE_TIMECOUNTER +int tickfix, tickfixinterval; /* used if tick not really integral */ +static int tickfixcnt; /* accumulated fractional error */ + volatile time_t time_second; volatile time_t time_uptime; volatile struct timeval time __attribute__((__aligned__(__alignof__(quad_t)))); volatile struct timeval mono_time; +#endif #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS void *softclock_si; @@ -135,6 +141,9 @@ void initclocks() { int i; +#ifdef __HAVE_TIMECOUNTER + extern void inittimecounter(void); +#endif #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS softclock_si = softintr_establish(IPL_SOFTCLOCK, generic_softclock, NULL); @@ -156,6 +165,9 @@ initclocks() if (profhz == 0) profhz = i; psratio = profhz / i; +#ifdef __HAVE_TIMECOUNTER + inittimecounter(); +#endif } /* @@ -165,12 +177,14 @@ void hardclock(struct clockframe *frame) { struct proc *p; +#ifndef __HAVE_TIMECOUNTER int delta; extern int tickdelta; extern long timedelta; #ifdef __HAVE_CPUINFO struct cpu_info *ci = curcpu(); #endif +#endif p = curproc; if (p) { @@ -189,6 +203,7 @@ hardclock(struct clockframe *frame) psignal(p, SIGPROF); } +#ifndef __HAVE_TIMECOUNTER /* * If no separate statistics clock is available, run it from here. */ @@ -216,6 +231,7 @@ hardclock(struct clockframe *frame) * ``tickdelta'' may also be added in. */ ticks++; + delta = tick; if (tickfix) { @@ -231,14 +247,13 @@ hardclock(struct clockframe *frame) timedelta -= tickdelta; } -#ifdef notyet - microset(); -#endif - BUMPTIME(&time, delta); BUMPTIME(&mono_time, delta); time_second = time.tv_sec; time_uptime = mono_time.tv_sec; +#else + tc_ticktock(); +#endif #ifdef CPU_CLOCKUPDATE CPU_CLOCKUPDATE(); @@ -539,6 +554,7 @@ sysctl_clockrate(where, sizep) return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); } +#ifndef __HAVE_TIMECOUNTER /* * Placeholders until everyone uses the timecounters code. * Won't improve anything except maybe removing a bunch of bugs in fixed code. @@ -609,3 +625,4 @@ getmicrouptime(struct timeval *tvp) *tvp = mono_time; splx(s); } +#endif /* __HAVE_TIMECOUNTERS */ diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 4b83109ba96..9599fe397ad 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sysctl.c,v 1.117 2004/06/28 01:34:46 aaron Exp $ */ +/* $OpenBSD: kern_sysctl.c,v 1.118 2004/07/28 17:15:12 tholo Exp $ */ /* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */ /*- @@ -62,6 +62,9 @@ #include <sys/exec.h> #include <sys/mbuf.h> #include <sys/sensors.h> +#ifdef __HAVE_TIMECOUNTER +#include <sys/timetc.h> +#endif #ifdef __HAVE_EVCOUNT #include <sys/evcount.h> #endif @@ -285,6 +288,9 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) #ifdef __HAVE_EVCOUNT case KERN_EVCOUNT: #endif +#ifdef __HAVE_TIMECOUNTER + case KERN_TIMECOUNTER: +#endif break; default: return (ENOTDIR); /* overloaded */ @@ -528,6 +534,11 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) return (evcount_sysctl(name + 1, namelen - 1, oldp, oldlenp, newp, newlen)); #endif +#ifdef __HAVE_TIMECOUNTER + case KERN_TIMECOUNTER: + return (sysctl_tc(name + 1, namelen - 1, oldp, oldlenp, + newp, newlen)); +#endif default: return (EOPNOTSUPP); } diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c new file mode 100644 index 00000000000..1c840169853 --- /dev/null +++ b/sys/kern/kern_tc.c @@ -0,0 +1,603 @@ +/*- + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you + * can do whatever you want with this stuff. If we meet some day, and you think + * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp + * ---------------------------------------------------------------------------- + * + * $OpenBSD: kern_tc.c,v 1.1 2004/07/28 17:15:12 tholo Exp $ + * $FreeBSD: src/sys/kern/kern_tc.c,v 1.148 2003/03/18 08:45:23 phk Exp $ + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/systm.h> +#include <sys/timetc.h> +#include <sys/malloc.h> + +#ifdef __HAVE_TIMECOUNTER +/* + * A large step happens on boot. This constant detects such steps. + * It is relatively small so that ntp_update_second gets called enough + * in the typical 'missed a couple of seconds' case, but doesn't loop + * forever when the time step is large. + */ +#define LARGE_STEP 200 + +void ntp_update_second(int64_t *, time_t *); +int sysctl_tc_hardware(void *, size_t *, void *, size_t); +int sysctl_tc_choice(void *, size_t *, void *, size_t); + +/* + * Implement a dummy timecounter which we can use until we get a real one + * in the air. This allows the console and other early stuff to use + * time services. + */ + +static u_int +dummy_get_timecount(struct timecounter *tc) +{ + static u_int now; + + return (++now); +} + +static struct timecounter dummy_timecounter = { + dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 +}; + +struct timehands { + /* These fields must be initialized by the driver. */ + struct timecounter *th_counter; + int64_t th_adjustment; + u_int64_t th_scale; + u_int th_offset_count; + struct bintime th_offset; + struct timeval th_microtime; + struct timespec th_nanotime; + /* Fields not to be copied in tc_windup start with th_generation. */ + volatile u_int th_generation; + struct timehands *th_next; +}; + +extern struct timehands th0; +static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; +static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; +static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; +static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; +static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; +static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; +static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; +static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; +static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; +static struct timehands th0 = { + &dummy_timecounter, + 0, + (uint64_t)-1 / 1000000, + 0, + {1, 0}, + {0, 0}, + {0, 0}, + 1, + &th1 +}; + +static struct timehands *volatile timehands = &th0; +struct timecounter *timecounter = &dummy_timecounter; +static struct timecounter *timecounters = &dummy_timecounter; + +time_t time_second = 1; +time_t time_uptime = 0; + +extern struct timeval adjtimedelta; +static struct bintime boottimebin; +static int timestepwarnings; + +static void tc_windup(void); + +/* + * Return the difference between the timehands' counter value now and what + * was when we copied it to the timehands' offset_count. + */ +static __inline u_int +tc_delta(struct timehands *th) +{ + struct timecounter *tc; + + tc = th->th_counter; + return ((tc->tc_get_timecount(tc) - th->th_offset_count) & + tc->tc_counter_mask); +} + +/* + * Functions for reading the time. We have to loop until we are sure that + * the timehands that we operated on was not updated under our feet. See + * the comment in <sys/time.h> for a description of these 12 functions. + */ + +void +binuptime(struct bintime *bt) +{ + struct timehands *th; + u_int gen; + + do { + th = timehands; + gen = th->th_generation; + *bt = th->th_offset; + bintime_addx(bt, th->th_scale * tc_delta(th)); + } while (gen == 0 || gen != th->th_generation); +} + +void +nanouptime(struct timespec *tsp) +{ + struct bintime bt; + + binuptime(&bt); + bintime2timespec(&bt, tsp); +} + +void +microuptime(struct timeval *tvp) +{ + struct bintime bt; + + binuptime(&bt); + bintime2timeval(&bt, tvp); +} + +void +bintime(struct bintime *bt) +{ + + binuptime(bt); + bintime_add(bt, &boottimebin); +} + +void +nanotime(struct timespec *tsp) +{ + struct bintime bt; + + bintime(&bt); + bintime2timespec(&bt, tsp); +} + +void +microtime(struct timeval *tvp) +{ + struct bintime bt; + + bintime(&bt); + bintime2timeval(&bt, tvp); +} + +void +getbinuptime(struct bintime *bt) +{ + struct timehands *th; + u_int gen; + + do { + th = timehands; + gen = th->th_generation; + *bt = th->th_offset; + } while (gen == 0 || gen != th->th_generation); +} + +void +getnanouptime(struct timespec *tsp) +{ + struct timehands *th; + u_int gen; + + do { + th = timehands; + gen = th->th_generation; + bintime2timespec(&th->th_offset, tsp); + } while (gen == 0 || gen != th->th_generation); +} + +void +getmicrouptime(struct timeval *tvp) +{ + struct timehands *th; + u_int gen; + + do { + th = timehands; + gen = th->th_generation; + bintime2timeval(&th->th_offset, tvp); + } while (gen == 0 || gen != th->th_generation); +} + +void +getbintime(struct bintime *bt) +{ + struct timehands *th; + u_int gen; + + do { + th = timehands; + gen = th->th_generation; + *bt = th->th_offset; + } while (gen == 0 || gen != th->th_generation); + bintime_add(bt, &boottimebin); +} + +void +getnanotime(struct timespec *tsp) +{ + struct timehands *th; + u_int gen; + + do { + th = timehands; + gen = th->th_generation; + *tsp = th->th_nanotime; + } while (gen == 0 || gen != th->th_generation); +} + +void +getmicrotime(struct timeval *tvp) +{ + struct timehands *th; + u_int gen; + + do { + th = timehands; + gen = th->th_generation; + *tvp = th->th_microtime; + } while (gen == 0 || gen != th->th_generation); +} + +/* + * Initialize a new timecounter and possibly use it. + */ +void +tc_init(struct timecounter *tc) +{ + unsigned u; + + u = tc->tc_frequency / tc->tc_counter_mask; + /* XXX: We need some margin here, 10% is a guess */ + u *= 11; + u /= 10; + if (tc->tc_quality >= 0) { + printf("Timecounter \"%s\" frequency %lu Hz", + tc->tc_name, (unsigned long)tc->tc_frequency); + if (u > hz) { + tc->tc_quality = -2000; + printf(" -- Insufficient hz, needs at least %u\n", u); + } + else + printf(" quality %d\n", tc->tc_quality); + } + + tc->tc_next = timecounters; + timecounters = tc; + /* + * Never automatically use a timecounter with negative quality. + * Even though we run on the dummy counter, switching here may be + * worse since this timecounter may not be monotonous. + */ + if (tc->tc_quality < 0) + return; + if (tc->tc_quality < timecounter->tc_quality) + return; + if (tc->tc_quality == timecounter->tc_quality && + tc->tc_frequency < timecounter->tc_frequency) + return; + (void)tc->tc_get_timecount(tc); + (void)tc->tc_get_timecount(tc); + timecounter = tc; +} + +/* Report the frequency of the current timecounter. */ +u_int64_t +tc_getfrequency(void) +{ + + return (timehands->th_counter->tc_frequency); +} + +/* + * Step our concept of UTC. This is done by modifying our estimate of + * when we booted. + * XXX: not locked. + */ +void +tc_setclock(struct timespec *ts) +{ + struct timespec ts2; + struct bintime bt, bt2; + + binuptime(&bt2); + timespec2bintime(ts, &bt); + bintime_sub(&bt, &bt2); + bintime_add(&bt2, &boottimebin); + boottimebin = bt; + bintime2timeval(&bt, &boottime); + + /* XXX fiddle all the little crinkly bits around the fiords... */ + tc_windup(); + if (timestepwarnings) { + log(LOG_INFO, "Time stepped from %ld.%09ld to %ld.%09ld\n", + (long)ts2.tv_sec, ts2.tv_nsec, + (long)ts->tv_sec, ts->tv_nsec); + } +} + +/* + * Initialize the next struct timehands in the ring and make + * it the active timehands. Along the way we might switch to a different + * timecounter and/or do seconds processing in NTP. Slightly magic. + */ +static void +tc_windup(void) +{ + struct bintime bt; + struct timehands *th, *tho; + u_int64_t scale; + u_int delta, ncount, ogen; + int i; +#ifdef leapsecs + time_t t; +#endif + + /* + * Make the next timehands a copy of the current one, but do not + * overwrite the generation or next pointer. While we update + * the contents, the generation must be zero. + */ + tho = timehands; + th = tho->th_next; + ogen = th->th_generation; + th->th_generation = 0; + bcopy(tho, th, offsetof(struct timehands, th_generation)); + + /* + * Capture a timecounter delta on the current timecounter and if + * changing timecounters, a counter value from the new timecounter. + * Update the offset fields accordingly. + */ + delta = tc_delta(th); + if (th->th_counter != timecounter) + ncount = timecounter->tc_get_timecount(timecounter); + else + ncount = 0; + th->th_offset_count += delta; + th->th_offset_count &= th->th_counter->tc_counter_mask; + bintime_addx(&th->th_offset, th->th_scale * delta); + +#ifdef notyet + /* + * Hardware latching timecounters may not generate interrupts on + * PPS events, so instead we poll them. There is a finite risk that + * the hardware might capture a count which is later than the one we + * got above, and therefore possibly in the next NTP second which might + * have a different rate than the current NTP second. It doesn't + * matter in practice. + */ + if (tho->th_counter->tc_poll_pps) + tho->th_counter->tc_poll_pps(tho->th_counter); +#endif + + /* + * Deal with NTP second processing. The for loop normally + * iterates at most once, but in extreme situations it might + * keep NTP sane if timeouts are not run for several seconds. + * At boot, the time step can be large when the TOD hardware + * has been read, so on really large steps, we call + * ntp_update_second only twice. We need to call it twice in + * case we missed a leap second. + */ + bt = th->th_offset; + bintime_add(&bt, &boottimebin); + i = bt.sec - tho->th_microtime.tv_sec; + if (i > LARGE_STEP) + i = 2; + for (; i > 0; i--) + ntp_update_second(&th->th_adjustment, &bt.sec); + + /* Update the UTC timestamps used by the get*() functions. */ + /* XXX shouldn't do this here. Should force non-`get' versions. */ + bintime2timeval(&bt, &th->th_microtime); + bintime2timespec(&bt, &th->th_nanotime); + + /* Now is a good time to change timecounters. */ + if (th->th_counter != timecounter) { + th->th_counter = timecounter; + th->th_offset_count = ncount; + } + + /*- + * Recalculate the scaling factor. We want the number of 1/2^64 + * fractions of a second per period of the hardware counter, taking + * into account the th_adjustment factor which the NTP PLL/adjtime(2) + * processing provides us with. + * + * The th_adjustment is nanoseconds per second with 32 bit binary + * fraction and we want 64 bit binary fraction of second: + * + * x = a * 2^32 / 10^9 = a * 4.294967296 + * + * The range of th_adjustment is +/- 5000PPM so inside a 64bit int + * we can only multiply by about 850 without overflowing, but that + * leaves suitably precise fractions for multiply before divide. + * + * Divide before multiply with a fraction of 2199/512 results in a + * systematic undercompensation of 10PPM of th_adjustment. On a + * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. + * + * We happily sacrifice the lowest of the 64 bits of our result + * to the goddess of code clarity. + * + */ + scale = (u_int64_t)1 << 63; + scale += (th->th_adjustment / 1024) * 2199; + scale /= th->th_counter->tc_frequency; + th->th_scale = scale * 2; + + /* + * Now that the struct timehands is again consistent, set the new + * generation number, making sure to not make it zero. + */ + if (++ogen == 0) + ogen = 1; + th->th_generation = ogen; + + /* Go live with the new struct timehands. */ + time_second = th->th_microtime.tv_sec; + time_uptime = th->th_offset.sec; + timehands = th; +} + +/* Report or change the active timecounter hardware. */ +int +sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) +{ + char newname[32]; + struct timecounter *newtc, *tc; + int error; + + tc = timecounter; + strlcpy(newname, tc->tc_name, sizeof(newname)); + + error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); + if (error != 0 || strcmp(newname, tc->tc_name) == 0) + return (error); + for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { + if (strcmp(newname, newtc->tc_name) != 0) + continue; + + /* Warm up new timecounter. */ + (void)newtc->tc_get_timecount(newtc); + (void)newtc->tc_get_timecount(newtc); + + timecounter = newtc; + return (0); + } + return (EINVAL); +} + +/* Report or change the active timecounter hardware. */ +int +sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) +{ + char buf[32], *spc, *choices; + struct timecounter *tc; + int error, maxlen, n; + + spc = ""; + error = 0; + maxlen = 0; + for (tc = timecounters; tc != NULL; tc = tc->tc_next) + maxlen += sizeof(buf); + choices = malloc(maxlen, M_TEMP, M_WAITOK); + *choices = '\0'; + for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) { + n = snprintf(buf, sizeof(buf), "%s%s(%d)", + spc, tc->tc_name, tc->tc_quality); + spc = " "; + strlcat(choices, buf, maxlen); + } + if (!error) + error = sysctl_rdstring(oldp, oldlenp, newp, choices); + free(choices, M_TEMP); + return (error); +} + +/* + * Timecounters need to be updated every so often to prevent the hardware + * counter from overflowing. Updating also recalculates the cached values + * used by the get*() family of functions, so their precision depends on + * the update frequency. + */ +static int tc_tick; + +void +tc_ticktock(void) +{ + static int count; + + if (++count < tc_tick) + return; + count = 0; + tc_windup(); +} + +void +inittimecounter(void) +{ + u_int p; + + /* + * Set the initial timeout to + * max(1, <approx. number of hardclock ticks in a millisecond>). + * People should probably not use the sysctl to set the timeout + * to smaller than its inital value, since that value is the + * smallest reasonable one. If they want better timestamps they + * should use the non-"get"* functions. + */ + if (hz > 1000) + tc_tick = (hz + 500) / 1000; + else + tc_tick = 1; + p = (tc_tick * 1000000) / hz; + printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); + + /* warm up new timecounter (again) and get rolling. */ + (void)timecounter->tc_get_timecount(timecounter); + (void)timecounter->tc_get_timecount(timecounter); +} + +/* + * Return timecounter-related information. + */ +int +sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + if (namelen != 1) + return (ENOTDIR); + + switch (name[0]) { + case KERN_TIMECOUNTER_TICK: + return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); + case KERN_TIMECOUNTER_TIMESTEPWARNINGS: + return (sysctl_int(oldp, oldlenp, newp, newlen, + ×tepwarnings)); + case KERN_TIMECOUNTER_HARDWARE: + return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); + case KERN_TIMECOUNTER_CHOICE: + return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); + default: + return (EOPNOTSUPP); + } + /* NOTREACHED */ +} + +void +ntp_update_second(int64_t *adjust, time_t *sec) +{ + struct timeval adj; + + /* Slew time according to any adjtime(2) adjustments. */ + timerclear(&adj); + if (adjtimedelta.tv_sec > 0) + adj.tv_usec = 5000; + else if (adjtimedelta.tv_sec == 0) + adj.tv_usec = MIN(500, adjtimedelta.tv_usec); + else if (adjtimedelta.tv_sec < -1) + adj.tv_usec = -5000; + else if (adjtimedelta.tv_sec == -1) + adj.tv_usec = MAX(-500, adjtimedelta.tv_usec - 1000000); + timersub(&adjtimedelta, &adj, &adjtimedelta); + *adjust = ((int64_t)adj.tv_usec * 1000) << 32; +} +#endif /* __HAVE_TIMECOUNTER */ diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index a19bf4c56a0..567341a2f85 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_time.c,v 1.44 2004/06/26 05:52:20 nordin Exp $ */ +/* $OpenBSD: kern_time.c,v 1.45 2004/07/28 17:15:12 tholo Exp $ */ /* $NetBSD: kern_time.c,v 1.20 1996/02/18 11:57:06 fvdl Exp $ */ /* @@ -39,6 +39,9 @@ #include <sys/proc.h> #include <sys/vnode.h> #include <sys/signalvar.h> +#ifdef __HAVE_TIMECOUNTER +#include <sys/timetc.h> +#endif #include <sys/mount.h> #include <sys/syscallargs.h> @@ -58,6 +61,49 @@ void itimerround(struct timeval *); */ /* This function is used by clock_settime and settimeofday */ +#ifdef __HAVE_TIMECOUNTER +int +settime(struct timespec *ts) +{ + struct timespec now; + + + /* + * Don't allow the time to be set forward so far it will wrap + * and become negative, thus allowing an attacker to bypass + * the next check below. The cutoff is 1 year before rollover + * occurs, so even if the attacker uses adjtime(2) to move + * the time past the cutoff, it will take a very long time + * to get to the wrap point. + * + * XXX: we check against INT_MAX since on 64-bit + * platforms, sizeof(int) != sizeof(long) and + * time_t is 32 bits even when atv.tv_sec is 64 bits. + */ + if (ts->tv_sec > INT_MAX - 365*24*60*60) { + printf("denied attempt to set clock forward to %ld\n", + ts->tv_sec); + return (EPERM); + } + /* + * If the system is secure, we do not allow the time to be + * set to an earlier value (it may be slowed using adjtime, + * but not set back). This feature prevent interlopers from + * setting arbitrary time stamps on files. + */ + nanotime(&now); + if (securelevel > 1 && timespeccmp(ts, &now, <)) { + printf("denied attempt to set clock back %ld seconds\n", + now.tv_sec - ts->tv_sec); + return (EPERM); + } + + tc_setclock(ts); + resettodr(); + + return (0); +} +#else int settime(struct timespec *ts) { @@ -108,6 +154,7 @@ settime(struct timespec *ts) return (0); } +#endif /* ARGSUSED */ int @@ -319,9 +366,13 @@ sys_settimeofday(p, v, retval) return (0); } +#ifdef __HAVE_TIMECOUNTER +struct timeval adjtimedelta; /* unapplied time correction */ +#else int tickdelta; /* current clock skew, us. per tick */ long timedelta; /* unapplied time correction, us. */ long bigadj = 1000000; /* use 10x skew above bigadj us. */ +#endif /* ARGSUSED */ int @@ -334,6 +385,23 @@ sys_adjtime(p, v, retval) syscallarg(const struct timeval *) delta; syscallarg(struct timeval *) olddelta; } */ *uap = v; +#ifdef __HAVE_TIMECOUNTER + int error; + + if ((error = suser(p, 0))) + return (error); + + if (SCARG(uap, olddelta)) + if ((error = copyout((void *)&adjtimedelta, + (void *)SCARG(uap, olddelta), sizeof(struct timeval)))) + return (error); + + if ((error = copyin((void *)SCARG(uap, delta), (void *)&adjtimedelta, + sizeof(struct timeval)))) + return (error); + + return (0); +#else struct timeval atv; register long ndelta, ntickdelta, odelta; int s, error; @@ -380,6 +448,7 @@ sys_adjtime(p, v, retval) return (error); } return (0); +#endif } /* diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index c2bc3965541..d14da10dbab 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -1,4 +1,4 @@ -/* $OpenBSD: kernel.h,v 1.10 2004/06/13 21:49:28 niklas Exp $ */ +/* $OpenBSD: kernel.h,v 1.11 2004/07/28 17:15:12 tholo Exp $ */ /* $NetBSD: kernel.h,v 1.11 1995/03/03 01:24:16 cgd Exp $ */ /*- @@ -47,12 +47,14 @@ extern char domainname[MAXHOSTNAMELEN]; extern int domainnamelen; /* 1.2 */ -extern volatile struct timeval mono_time; extern struct timeval boottime; #ifndef __HAVE_CPUINFO extern struct timeval runtime; #endif +#ifndef __HAVE_TIMECOUNTER +extern volatile struct timeval mono_time; extern volatile struct timeval time; +#endif extern struct timezone tz; /* XXX */ extern int tick; /* usec per tick (1000000 / hz) */ diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h index 9afc78a8655..8a85d138d8b 100644 --- a/sys/sys/sysctl.h +++ b/sys/sys/sysctl.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sysctl.h,v 1.79 2004/06/28 01:34:46 aaron Exp $ */ +/* $OpenBSD: sysctl.h,v 1.80 2004/07/28 17:15:12 tholo Exp $ */ /* $NetBSD: sysctl.h,v 1.16 1996/04/09 20:55:36 cgd Exp $ */ /* @@ -181,6 +181,7 @@ struct ctlname { #define KERN_PROC2 66 /* struct: process entries */ #define KERN_MAXCLUSTERS 67 /* number of mclusters */ #define KERN_EVCOUNT 68 /* node: event counters */ +#define KERN_TIMECOUNTER 69 /* node: timecounter */ #define KERN_MAXID 69 /* number of valid kern ids */ #define CTL_KERN_NAMES { \ @@ -253,6 +254,7 @@ struct ctlname { { "proc2", CTLTYPE_STRUCT }, \ { "maxclusters", CTLTYPE_INT }, \ { "evcount", CTLTYPE_NODE }, \ + { "timecounter", CTLTYPE_NODE }, \ } /* @@ -485,6 +487,23 @@ struct kinfo_proc2 { } /* + * KERN_TIMECOUNTER + */ +#define KERN_TIMECOUNTER_TICK 1 /* int: number of revolutions */ +#define KERN_TIMECOUNTER_TIMESTEPWARNINGS 2 /* int: log a warning when time changes */ +#define KERN_TIMECOUNTER_HARDWARE 3 /* string: tick hardware used */ +#define KERN_TIMECOUNTER_CHOICE 4 /* string: tick hardware used */ +#define KERN_TIMECOUNTER_MAXID 5 + +#define CTL_KERN_TIMECOUNTER_NAMES { \ + { 0, 0 }, \ + { "tick", CTLTYPE_INT }, \ + { "timestepwarnings", CTLTYPE_INT }, \ + { "hardware", CTLTYPE_STRING }, \ + { "choice", CTLTYPE_STRING }, \ +} + +/* * CTL_FS identifiers */ #define FS_POSIX 1 /* POSIX flags */ diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h new file mode 100644 index 00000000000..bbc532aec3e --- /dev/null +++ b/sys/sys/timetc.h @@ -0,0 +1,76 @@ +/*- + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you + * can do whatever you want with this stuff. If we meet some day, and you think + * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp + * ---------------------------------------------------------------------------- + * + * $OpenBSD: timetc.h,v 1.1 2004/07/28 17:15:12 tholo Exp $ + * $FreeBSD: src/sys/sys/timetc.h,v 1.57 2003/04/10 23:07:24 des Exp $ + */ + +#ifndef _SYS_TIMETC_H_ +#define _SYS_TIMETC_H_ + +#ifndef _KERNEL +#error "no user-serviceable parts inside" +#endif + +/*- + * `struct timecounter' is the interface between the hardware which implements + * a timecounter and the MI code which uses this to keep track of time. + * + * A timecounter is a binary counter which has two properties: + * * it runs at a fixed, known frequency. + * * it has sufficient bits to not roll over in less than approximately + * max(2 msec, 2/HZ seconds). (The value 2 here is really 1 + delta, + * for some indeterminate value of delta.) + */ + +struct timecounter; +typedef u_int timecounter_get_t(struct timecounter *); +typedef void timecounter_pps_t(struct timecounter *); + +struct timecounter { + timecounter_get_t *tc_get_timecount; + /* + * This function reads the counter. It is not required to + * mask any unimplemented bits out, as long as they are + * constant. + */ + timecounter_pps_t *tc_poll_pps; + /* + * This function is optional. It will be called whenever the + * timecounter is rewound, and is intended to check for PPS + * events. Normal hardware does not need it but timecounters + * which latch PPS in hardware (like sys/pci/xrpu.c) do. + */ + u_int tc_counter_mask; + /* This mask should mask off any unimplemented bits. */ + u_int64_t tc_frequency; + /* Frequency of the counter in Hz. */ + char *tc_name; + /* Name of the timecounter. */ + int tc_quality; + /* + * Used to determine if this timecounter is better than + * another timecounter higher means better. Negative + * means "only use at explicit request". + */ + void *tc_priv; + /* Pointer to the timecounter's private parts. */ + struct timecounter *tc_next; + /* Pointer to the next timecounter. */ +}; + +extern struct timecounter *timecounter; + +u_int64_t tc_getfrequency(void); +void tc_init(struct timecounter *tc); +void tc_setclock(struct timespec *ts); +void tc_ticktock(void); +void inittimecounter(void); +int sysctl_tc(int *, u_int, void *, size_t *, void *, size_t); + +#endif /* !_SYS_TIMETC_H_ */ |