diff options
author | cheloha <cheloha@cvs.openbsd.org> | 2019-12-02 21:47:55 +0000 |
---|---|---|
committer | cheloha <cheloha@cvs.openbsd.org> | 2019-12-02 21:47:55 +0000 |
commit | 189a114a83964a518e0d0df047aba0304f22db8f (patch) | |
tree | 80de7ad9f61ab17dced85b1578bc8e3363c1fb8b /sys/kern | |
parent | 1767e902e2182b45699c4e54774f9a6861e2b0b3 (diff) |
Revert "timeout(9): switch to tickless backend"
It appears to have caused major performance regressions all over the
network stack.
Reported by bluhm@
ok deraadt@
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/kern_tc.c | 9 | ||||
-rw-r--r-- | sys/kern/kern_timeout.c | 345 | ||||
-rw-r--r-- | sys/kern/uipc_socket.c | 7 |
3 files changed, 142 insertions, 219 deletions
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index a911acc8f59..a9b853b0a14 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_tc.c,v 1.52 2019/12/02 02:24:29 cheloha Exp $ */ +/* $OpenBSD: kern_tc.c,v 1.53 2019/12/02 21:47:54 cheloha Exp $ */ /* * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> @@ -477,7 +477,6 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset, struct bintime bt; struct timecounter *active_tc; struct timehands *th, *tho; - int64_t counter_adjustment; u_int64_t scale; u_int delta, ncount, ogen; int i; @@ -595,8 +594,7 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset, * */ scale = (u_int64_t)1 << 63; - counter_adjustment = th->th_counter->tc_freq_adj; - scale += ((th->th_adjustment + counter_adjustment) / 1024) * 2199; + scale += (th->th_adjustment / 1024) * 2199; scale /= th->th_counter->tc_frequency; th->th_scale = scale * 2; @@ -753,7 +751,7 @@ sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, } /* - * Skew the timehands according to any adjtime(2) adjustment. + * Skew the timehands according to any adjfreq(2)/adjtime(2) adjustments. */ void ntp_update_second(struct timehands *th) @@ -768,6 +766,7 @@ ntp_update_second(struct timehands *th) adj = MAX(-5000, th->th_adjtimedelta); th->th_adjtimedelta -= adj; th->th_adjustment = (adj * 1000) << 32; + th->th_adjustment += th->th_counter->tc_freq_adj; } void diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 8feced497b9..cae318c6191 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_timeout.c,v 1.64 2019/11/29 12:50:48 cheloha Exp $ */ +/* $OpenBSD: kern_timeout.c,v 1.65 2019/12/02 21:47:54 cheloha Exp $ */ /* * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> @@ -55,54 +55,64 @@ struct timeoutstat tostat; /* [t] statistics and totals */ /* * Timeouts are kept in a hierarchical timing wheel. The to_time is the value - * of the system uptime clock when the timeout should be called. There are + * of the global variable "ticks" when the timeout should be called. There are * four levels with 256 buckets each. */ -#define WHEELCOUNT 4 +#define BUCKETS 1024 #define WHEELSIZE 256 #define WHEELMASK 255 #define WHEELBITS 8 -#define BUCKETS (WHEELCOUNT * WHEELSIZE) struct circq timeout_wheel[BUCKETS]; /* [t] Queues of timeouts */ struct circq timeout_todo; /* [t] Due or needs scheduling */ struct circq timeout_proc; /* [t] Due + needs process context */ -time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width (seconds) */ -struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */ -struct timespec timeout_lastscan; /* [t] Uptime at last wheel scan */ -struct timespec timeout_late; /* [t] Late if due prior to this */ +#define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) + +#define BUCKET(rel, abs) \ + (timeout_wheel[ \ + ((rel) <= (1 << (2*WHEELBITS))) \ + ? ((rel) <= (1 << WHEELBITS)) \ + ? MASKWHEEL(0, (abs)) \ + : MASKWHEEL(1, (abs)) + WHEELSIZE \ + : ((rel) <= (1 << (3*WHEELBITS))) \ + ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ + : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) + +#define MOVEBUCKET(wheel, time) \ + CIRCQ_APPEND(&timeout_todo, \ + &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) /* * Circular queue definitions. */ -#define CIRCQ_INIT(elem) do { \ - (elem)->next = (elem); \ - (elem)->prev = (elem); \ +#define CIRCQ_INIT(elem) do { \ + (elem)->next = (elem); \ + (elem)->prev = (elem); \ } while (0) -#define CIRCQ_INSERT_TAIL(list, elem) do { \ - (elem)->prev = (list)->prev; \ - (elem)->next = (list); \ - (list)->prev->next = (elem); \ - (list)->prev = (elem); \ - tostat.tos_pending++; \ +#define CIRCQ_INSERT(elem, list) do { \ + (elem)->prev = (list)->prev; \ + (elem)->next = (list); \ + (list)->prev->next = (elem); \ + (list)->prev = (elem); \ + tostat.tos_pending++; \ } while (0) -#define CIRCQ_CONCAT(fst, snd) do { \ - if (!CIRCQ_EMPTY(snd)) { \ - (fst)->prev->next = (snd)->next;\ - (snd)->next->prev = (fst)->prev;\ - (snd)->prev->next = (fst); \ - (fst)->prev = (snd)->prev; \ - CIRCQ_INIT(snd); \ - } \ +#define CIRCQ_APPEND(fst, snd) do { \ + if (!CIRCQ_EMPTY(snd)) { \ + (fst)->prev->next = (snd)->next;\ + (snd)->next->prev = (fst)->prev;\ + (snd)->prev->next = (fst); \ + (fst)->prev = (snd)->prev; \ + CIRCQ_INIT(snd); \ + } \ } while (0) -#define CIRCQ_REMOVE(elem) do { \ - (elem)->next->prev = (elem)->prev; \ - (elem)->prev->next = (elem)->next; \ +#define CIRCQ_REMOVE(elem) do { \ + (elem)->next->prev = (elem)->prev; \ + (elem)->prev->next = (elem)->next; \ _Q_INVALIDATE((elem)->prev); \ _Q_INVALIDATE((elem)->next); \ tostat.tos_pending--; \ @@ -112,11 +122,6 @@ struct timespec timeout_late; /* [t] Late if due prior to this */ #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem)) -#define CIRCQ_FOREACH(elem, head) \ - for ((elem) = CIRCQ_FIRST(head); \ - (elem) != (head); \ - (elem) = CIRCQ_FIRST(elem)) - #ifdef WITNESS struct lock_object timeout_sleeplock_obj = { .lo_name = "timeout", @@ -141,10 +146,6 @@ struct lock_type timeout_spinlock_type = { void softclock(void *); void softclock_create_thread(void *); void softclock_thread(void *); -int timeout_at_ts_locked(struct timeout *, clockid_t, const struct timespec *); -unsigned int timeout_bucket(const struct timespec *, const struct timespec *); -int timeout_clock_is_valid(clockid_t); -unsigned int timeout_maskwheel(unsigned int, const struct timespec *); void timeout_proc_barrier(void *); /* @@ -177,23 +178,29 @@ timeout_sync_leave(int needsproc) WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); } +/* + * Some of the "math" in here is a bit tricky. + * + * We have to beware of wrapping ints. + * We use the fact that any element added to the queue must be added with a + * positive time. That means that any element `to' on the queue cannot be + * scheduled to timeout further in time than INT_MAX, but to->to_time can + * be positive or negative so comparing it with anything is dangerous. + * The only way we can use the to->to_time value in any predictable way + * is when we calculate how far in the future `to' will timeout - + * "to->to_time - ticks". The result will always be positive for future + * timeouts and 0 or negative for due timeouts. + */ + void timeout_startup(void) { - unsigned int b, level; + int b; CIRCQ_INIT(&timeout_todo); CIRCQ_INIT(&timeout_proc); for (b = 0; b < nitems(timeout_wheel); b++) CIRCQ_INIT(&timeout_wheel[b]); - - for (level = 0; level < nitems(timeout_level_width); level++) - timeout_level_width[level] = 2 << (level * WHEELBITS); - - tick_ts.tv_sec = 0; - tick_ts.tv_nsec = tick_nsec; - timespecclear(&timeout_lastscan); - timespecclear(&timeout_late); } void @@ -215,7 +222,6 @@ timeout_set(struct timeout *new, void (*fn)(void *), void *arg) new->to_func = fn; new->to_arg = arg; new->to_flags = TIMEOUT_INITIALIZED; - timespecclear(&new->to_time); } void @@ -228,15 +234,36 @@ timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) int timeout_add(struct timeout *new, int to_ticks) { - struct timespec ts, when; - int ret; + int old_time; + int ret = 1; + KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); KASSERT(to_ticks >= 0); - NSEC_TO_TIMESPEC((uint64_t)to_ticks * tick_nsec, &ts); mtx_enter(&timeout_mutex); - timespecadd(&timeout_lastscan, &ts, &when); - ret = timeout_at_ts_locked(new, CLOCK_BOOTTIME, &when); + + /* Initialize the time here, it won't change. */ + old_time = new->to_time; + new->to_time = to_ticks + ticks; + CLR(new->to_flags, TIMEOUT_TRIGGERED); + + /* + * If this timeout already is scheduled and now is moved + * earlier, reschedule it now. Otherwise leave it in place + * and let it be rescheduled later. + */ + if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { + if (new->to_time - ticks < old_time - ticks) { + CIRCQ_REMOVE(&new->to_list); + CIRCQ_INSERT(&new->to_list, &timeout_todo); + } + tostat.tos_readded++; + ret = 0; + } else { + SET(new->to_flags, TIMEOUT_ONQUEUE); + CIRCQ_INSERT(&new->to_list, &timeout_todo); + } + tostat.tos_added++; mtx_leave(&timeout_mutex); return ret; @@ -334,49 +361,6 @@ timeout_add_nsec(struct timeout *to, int nsecs) } int -timeout_at_ts(struct timeout *to, clockid_t clock, const struct timespec *ts) -{ - int ret; - - mtx_enter(&timeout_mutex); - ret = timeout_at_ts_locked(to, clock, ts); - mtx_leave(&timeout_mutex); - - return ret; -} - -int -timeout_at_ts_locked(struct timeout *to, clockid_t clock, - const struct timespec *when) -{ - struct timespec old_time; - int ret = 1; - - MUTEX_ASSERT_LOCKED(&timeout_mutex); - KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED)); - KASSERT(timeout_clock_is_valid(clock)); - - old_time = to->to_time; - to->to_time = *when; - CLR(to->to_flags, TIMEOUT_TRIGGERED); - - if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { - if (timespeccmp(&to->to_time, &old_time, <)) { - CIRCQ_REMOVE(&to->to_list); - CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); - } - tostat.tos_readded++; - ret = 0; - } else { - SET(to->to_flags, TIMEOUT_ONQUEUE); - CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); - } - tostat.tos_added++; - - return ret; -} - -int timeout_del(struct timeout *to) { int ret = 0; @@ -428,7 +412,7 @@ timeout_barrier(struct timeout *to) mtx_enter(&timeout_mutex); SET(barrier.to_flags, TIMEOUT_ONQUEUE); - CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); + CIRCQ_INSERT(&barrier.to_list, &timeout_proc); mtx_leave(&timeout_mutex); wakeup_one(&timeout_proc); @@ -445,46 +429,6 @@ timeout_proc_barrier(void *arg) cond_signal(c); } -unsigned int -timeout_maskwheel(unsigned int level, const struct timespec *abstime) -{ - uint32_t hi, lo; - - hi = abstime->tv_sec << 7; - lo = abstime->tv_nsec / 7812500; - - return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK; -} - -unsigned int -timeout_bucket(const struct timespec *now, const struct timespec *later) -{ - struct timespec diff; - unsigned int level; - - KASSERT(timespeccmp(now, later, <)); - - timespecsub(later, now, &diff); - for (level = 0; level < nitems(timeout_level_width) - 1; level++) { - if (diff.tv_sec < timeout_level_width[level]) - break; - } - return level * WHEELSIZE + timeout_maskwheel(level, later); -} - -int -timeout_clock_is_valid(clockid_t clock) -{ - switch (clock) { - case CLOCK_BOOTTIME: - case CLOCK_MONOTONIC: - return 1; - default: - break; - } - return 0; -} - /* * This is called from hardclock() on the primary CPU at the start of * every tick. @@ -492,47 +436,19 @@ timeout_clock_is_valid(clockid_t clock) void timeout_hardclock_update(void) { - struct timespec elapsed, now; - unsigned int b, done, first, last, level, need_softclock, offset; - - nanouptime(&now); + int need_softclock; mtx_enter(&timeout_mutex); - /* - * Dump the buckets that expired while we were away. - * - * If the elapsed time has exceeded a level's width then we need - * to dump every bucket in the level. We have necessarily completed - * a lap of that level so we need to process buckets in the next. - * - * Otherwise we just need to compare indices. If the index of the - * first expired bucket is greater than or equal to that of the last - * then we have completed a lap of the level and need to process - * buckets in the next. - */ - timespecsub(&now, &timeout_lastscan, &elapsed); - for (level = 0; level < nitems(timeout_level_width); level++) { - first = timeout_maskwheel(level, &timeout_lastscan); - if (elapsed.tv_sec >= timeout_level_width[level]) { - last = (first == 0) ? WHEELSIZE - 1 : first - 1; - done = 0; - } else { - last = timeout_maskwheel(level, &now); - done = first <= last; - } - offset = level * WHEELSIZE; - for (b = first;; b = (b + 1) % WHEELSIZE) { - CIRCQ_CONCAT(&timeout_todo, &timeout_wheel[offset + b]); - if (b == last) - break; + MOVEBUCKET(0, ticks); + if (MASKWHEEL(0, ticks) == 0) { + MOVEBUCKET(1, ticks); + if (MASKWHEEL(1, ticks) == 0) { + MOVEBUCKET(2, ticks); + if (MASKWHEEL(2, ticks) == 0) + MOVEBUCKET(3, ticks); } - if (done) - break; } - - timespecsub(&now, &tick_ts, &timeout_late); - timeout_lastscan = now; need_softclock = !CIRCQ_EMPTY(&timeout_todo); mtx_leave(&timeout_mutex); @@ -573,8 +489,10 @@ timeout_run(struct timeout *to) void softclock(void *arg) { + int delta; + struct circq *bucket; struct timeout *to; - unsigned int b, needsproc = 0; + int needsproc = 0; mtx_enter(&timeout_mutex); while (!CIRCQ_EMPTY(&timeout_todo)) { @@ -585,16 +503,17 @@ softclock(void *arg) * If due run it or defer execution to the thread, * otherwise insert it into the right bucket. */ - if (timespeccmp(&timeout_lastscan, &to->to_time, <)) { - b = timeout_bucket(&timeout_lastscan, &to->to_time); - CIRCQ_INSERT_TAIL(&timeout_wheel[b], &to->to_list); + delta = to->to_time - ticks; + if (delta > 0) { + bucket = &BUCKET(delta, to->to_time); + CIRCQ_INSERT(&to->to_list, bucket); tostat.tos_rescheduled++; continue; } - if (timespeccmp(&to->to_time, &timeout_late, <)) + if (delta < 0) tostat.tos_late++; if (ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX)) { - CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); + CIRCQ_INSERT(&to->to_list, &timeout_proc); needsproc = 1; continue; } @@ -649,6 +568,38 @@ softclock_thread(void *arg) } } +#ifndef SMALL_KERNEL +void +timeout_adjust_ticks(int adj) +{ + struct timeout *to; + struct circq *p; + int new_ticks, b; + + /* adjusting the monotonic clock backwards would be a Bad Thing */ + if (adj <= 0) + return; + + mtx_enter(&timeout_mutex); + new_ticks = ticks + adj; + for (b = 0; b < nitems(timeout_wheel); b++) { + p = CIRCQ_FIRST(&timeout_wheel[b]); + while (p != &timeout_wheel[b]) { + to = timeout_from_circq(p); + p = CIRCQ_FIRST(p); + + /* when moving a timeout forward need to reinsert it */ + if (to->to_time - ticks < adj) + to->to_time = new_ticks; + CIRCQ_REMOVE(&to->to_list); + CIRCQ_INSERT(&to->to_list, &timeout_todo); + } + } + ticks = new_ticks; + mtx_leave(&timeout_mutex); +} +#endif + int timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) { @@ -663,34 +614,10 @@ timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) #ifdef DDB void db_show_callout_bucket(struct circq *); -char *db_ts_str(const struct timespec *); - -char * -db_ts_str(const struct timespec *ts) -{ - static char buf[64]; - struct timespec tmp = *ts; - int neg = 0; - - if (tmp.tv_sec < 0) { - tmp.tv_sec = -tmp.tv_sec; - if (tmp.tv_nsec > 0) { - tmp.tv_sec--; - tmp.tv_nsec = 1000000000 - tmp.tv_nsec; - } - neg = 1; - } - - snprintf(buf, sizeof(buf), "%s%lld.%09ld", - neg ? "-" : "", tmp.tv_sec, tmp.tv_nsec); - - return buf; -} void db_show_callout_bucket(struct circq *bucket) { - struct timespec left; char buf[8]; struct timeout *to; struct circq *p; @@ -698,7 +625,7 @@ db_show_callout_bucket(struct circq *bucket) char *name, *where; int width = sizeof(long) * 2; - CIRCQ_FOREACH(p, bucket) { + for (p = CIRCQ_FIRST(bucket); p != bucket; p = CIRCQ_FIRST(p)) { to = timeout_from_circq(p); db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); name = name ? name : "?"; @@ -712,9 +639,8 @@ db_show_callout_bucket(struct circq *bucket) (bucket - timeout_wheel) / WHEELSIZE); where = buf; } - timespecsub(&to->to_time, &timeout_lastscan, &left); - db_printf("%18s %7s 0x%0*lx %s\n", - db_ts_str(&left), where, width, (ulong)to->to_arg, name); + db_printf("%9d %7s 0x%0*lx %s\n", + to->to_time - ticks, where, width, (ulong)to->to_arg, name); } } @@ -722,11 +648,10 @@ void db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) { int width = sizeof(long) * 2 + 2; - unsigned int b; + int b; - db_printf("%18s seconds up at last scan\n", - db_ts_str(&timeout_lastscan)); - db_printf("%18s %7s %*s func\n", "remaining", "wheel", width, "arg"); + db_printf("ticks now: %d\n", ticks); + db_printf("%9s %7s %*s func\n", "ticks", "wheel", width, "arg"); db_show_callout_bucket(&timeout_todo); db_show_callout_bucket(&timeout_proc); diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index d21cf328216..abf528e575c 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.235 2019/11/26 15:27:08 cheloha Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.236 2019/12/02 21:47:54 cheloha Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -2141,10 +2141,9 @@ so_print(void *v, (unsigned long long)so->so_sp->ssp_max); (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, so->so_sp->ssp_idletv.tv_usec); - (*pr)("\tssp_idleto: %spending (@%lld.%09ld)\n", + (*pr)("\tssp_idleto: %spending (@%i)\n", timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", - so->so_sp->ssp_idleto.to_time.tv_sec, - so->so_sp->ssp_idleto.to_time.tv_nsec); + so->so_sp->ssp_idleto.to_time); } (*pr)("so_rcv:\n"); |