/* $OpenBSD: event.c,v 1.21 2009/11/12 05:44:29 deraadt Exp $ */ /* * Copyright (c) 2000-2004 Niels Provos * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #ifdef WIN32 #define WIN32_LEAN_AND_MEAN #include #undef WIN32_LEAN_AND_MEAN #include "misc.h" #endif #include #include #ifdef HAVE_SYS_TIME_H #include #else #include #endif #include #include #include #ifndef WIN32 #include #endif #include #include #include #include #include #include "event.h" #include "event-internal.h" #include "log.h" #ifdef HAVE_EVENT_PORTS extern const struct eventop evportops; #endif #ifdef HAVE_SELECT extern const struct eventop selectops; #endif #ifdef HAVE_POLL extern const struct eventop pollops; #endif #ifdef HAVE_RTSIG extern const struct eventop rtsigops; #endif #ifdef HAVE_EPOLL extern const struct eventop epollops; #endif #ifdef HAVE_WORKING_KQUEUE extern const struct eventop kqops; #endif #ifdef HAVE_DEVPOLL extern const struct eventop devpollops; #endif #ifdef WIN32 extern const struct eventop win32ops; #endif /* In order of preference */ const struct eventop *eventops[] = { #ifdef HAVE_EVENT_PORTS &evportops, #endif #ifdef HAVE_WORKING_KQUEUE &kqops, #endif #ifdef HAVE_EPOLL &epollops, #endif #ifdef HAVE_DEVPOLL &devpollops, #endif #ifdef HAVE_RTSIG &rtsigops, #endif #ifdef HAVE_POLL &pollops, #endif #ifdef HAVE_SELECT &selectops, #endif #ifdef WIN32 &win32ops, #endif NULL }; /* Global state */ struct event_base *current_base = NULL; extern struct event_base *evsignal_base; static int use_monotonic; /* Handle signals - This is a deprecated interface */ int (*event_sigcb)(void); /* Signal callback when gotsig is set */ volatile sig_atomic_t event_gotsig; /* Set in signal handler */ /* Prototypes */ static void event_queue_insert(struct event_base *, struct event *, int); static void event_queue_remove(struct event_base *, struct event *, int); static int event_haveevents(struct event_base *); static void event_process_active(struct event_base *); static int timeout_next(struct event_base *, struct timeval **); static void timeout_process(struct event_base *); static void timeout_correct(struct event_base *, struct timeval *); static int compare(struct event *a, struct event *b) { if (timercmp(&a->ev_timeout, &b->ev_timeout, <)) return (-1); else if (timercmp(&a->ev_timeout, &b->ev_timeout, >)) return (1); if (a < b) return (-1); else if (a > b) return (1); return (0); } static void detect_monotonic(void) { #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) use_monotonic = 1; #endif } static int gettime(struct timeval *tp) { #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) struct timespec ts; if (use_monotonic) { if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) return (-1); tp->tv_sec = ts.tv_sec; tp->tv_usec = ts.tv_nsec / 1000; return (0); } #endif return (gettimeofday(tp, NULL)); } RB_PROTOTYPE(event_tree, event, ev_timeout_node, compare); RB_GENERATE(event_tree, event, ev_timeout_node, compare); struct event_base * event_init(void) { int i; struct event_base *base; if ((base = calloc(1, sizeof(struct event_base))) == NULL) event_err(1, "%s: calloc"); event_sigcb = NULL; event_gotsig = 0; detect_monotonic(); gettime(&base->event_tv); RB_INIT(&base->timetree); TAILQ_INIT(&base->eventqueue); TAILQ_INIT(&base->sig.signalqueue); base->sig.ev_signal_pair[0] = -1; base->sig.ev_signal_pair[1] = -1; base->evbase = NULL; for (i = 0; eventops[i] && !base->evbase; i++) { base->evsel = eventops[i]; base->evbase = base->evsel->init(base); } if (base->evbase == NULL) event_errx(1, "%s: no event mechanism available", __func__); if (getenv("EVENT_SHOW_METHOD")) event_msgx("libevent using: %s\n", base->evsel->name); /* allocate a single active event queue */ event_base_priority_init(base, 1); current_base = base; return (base); } void event_base_free(struct event_base *base) { int i; if (base == NULL && current_base) base = current_base; if (base == current_base) current_base = NULL; assert(base); if (base->evsel->dealloc != NULL) base->evsel->dealloc(base, base->evbase); for (i=0; i < base->nactivequeues; ++i) assert(TAILQ_EMPTY(base->activequeues[i])); assert(RB_EMPTY(&base->timetree)); for (i = 0; i < base->nactivequeues; ++i) free(base->activequeues[i]); free(base->activequeues); assert(TAILQ_EMPTY(&base->eventqueue)); free(base); } int event_priority_init(int npriorities) { return event_base_priority_init(current_base, npriorities); } int event_base_priority_init(struct event_base *base, int npriorities) { int i; if (base->event_count_active) return (-1); if (base->nactivequeues && npriorities != base->nactivequeues) { for (i = 0; i < base->nactivequeues; ++i) { free(base->activequeues[i]); } free(base->activequeues); } /* Allocate our priority queues */ base->nactivequeues = npriorities; base->activequeues = (struct event_list **)calloc(base->nactivequeues, npriorities * sizeof(struct event_list *)); if (base->activequeues == NULL) event_err(1, "%s: calloc", __func__); for (i = 0; i < base->nactivequeues; ++i) { base->activequeues[i] = malloc(sizeof(struct event_list)); if (base->activequeues[i] == NULL) event_err(1, "%s: malloc", __func__); TAILQ_INIT(base->activequeues[i]); } return (0); } int event_haveevents(struct event_base *base) { return (base->event_count > 0); } /* * Active events are stored in priority queues. Lower priorities are always * process before higher priorities. Low priority events can starve high * priority ones. */ static void event_process_active(struct event_base *base) { struct event *ev; struct event_list *activeq = NULL; int i; short ncalls; if (!base->event_count_active) return; for (i = 0; i < base->nactivequeues; ++i) { if (TAILQ_FIRST(base->activequeues[i]) != NULL) { activeq = base->activequeues[i]; break; } } assert(activeq != NULL); for (ev = TAILQ_FIRST(activeq); ev; ev = TAILQ_FIRST(activeq)) { event_queue_remove(base, ev, EVLIST_ACTIVE); /* Allows deletes to work */ ncalls = ev->ev_ncalls; ev->ev_pncalls = &ncalls; while (ncalls) { ncalls--; ev->ev_ncalls = ncalls; (*ev->ev_callback)((int)ev->ev_fd, ev->ev_res, ev->ev_arg); if (event_gotsig) return; } } } /* * Wait continously for events. We exit only if no events are left. */ int event_dispatch(void) { return (event_loop(0)); } int event_base_dispatch(struct event_base *event_base) { return (event_base_loop(event_base, 0)); } static void event_loopexit_cb(int fd, short what, void *arg) { struct event_base *base = arg; base->event_gotterm = 1; } /* not thread safe */ int event_loopexit(struct timeval *tv) { return (event_once(-1, EV_TIMEOUT, event_loopexit_cb, current_base, tv)); } int event_base_loopexit(struct event_base *event_base, struct timeval *tv) { return (event_base_once(event_base, -1, EV_TIMEOUT, event_loopexit_cb, event_base, tv)); } /* not thread safe */ int event_loop(int flags) { return event_base_loop(current_base, flags); } int event_base_loop(struct event_base *base, int flags) { const struct eventop *evsel = base->evsel; void *evbase = base->evbase; struct timeval tv; struct timeval *tv_p; int res, done; #ifndef WIN32 if(!TAILQ_EMPTY(&base->sig.signalqueue)) evsignal_base = base; #endif done = 0; while (!done) { /* Calculate the initial events that we are waiting for */ if (evsel->recalc(base, evbase, 0) == -1) return (-1); /* Terminate the loop if we have been asked to */ if (base->event_gotterm) { base->event_gotterm = 0; break; } /* You cannot use this interface for multi-threaded apps */ while (event_gotsig) { event_gotsig = 0; if (event_sigcb) { res = (*event_sigcb)(); if (res == -1) { errno = EINTR; return (-1); } } } timeout_correct(base, &tv); tv_p = &tv; if (!base->event_count_active && !(flags & EVLOOP_NONBLOCK)) { timeout_next(base, &tv_p); } else { /* * if we have active events, we just poll new events * without waiting. */ timerclear(&tv); } /* If we have no events, we just exit */ if (!event_haveevents(base)) { event_debug(("%s: no events registered.", __func__)); return (1); } res = evsel->dispatch(base, evbase, tv_p); if (res == -1) return (-1); timeout_process(base); if (base->event_count_active) { event_process_active(base); if (!base->event_count_active && (flags & EVLOOP_ONCE)) done = 1; } else if (flags & EVLOOP_NONBLOCK) done = 1; } event_debug(("%s: asked to terminate loop.", __func__)); return (0); } /* Sets up an event for processing once */ struct event_once { struct event ev; void (*cb)(int, short, void *); void *arg; }; /* One-time callback, it deletes itself */ static void event_once_cb(int fd, short events, void *arg) { struct event_once *eonce = arg; (*eonce->cb)(fd, events, eonce->arg); free(eonce); } /* not threadsafe, event scheduled once. */ int event_once(int fd, short events, void (*callback)(int, short, void *), void *arg, struct timeval *tv) { return event_base_once(current_base, fd, events, callback, arg, tv); } /* Schedules an event once */ int event_base_once(struct event_base *base, int fd, short events, void (*callback)(int, short, void *), void *arg, struct timeval *tv) { struct event_once *eonce; struct timeval etv; int res; /* We cannot support signals that just fire once */ if (events & EV_SIGNAL) return (-1); if ((eonce = calloc(1, sizeof(struct event_once))) == NULL) return (-1); eonce->cb = callback; eonce->arg = arg; if (events == EV_TIMEOUT) { if (tv == NULL) { timerclear(&etv); tv = &etv; } evtimer_set(&eonce->ev, event_once_cb, eonce); } else if (events & (EV_READ|EV_WRITE)) { events &= EV_READ|EV_WRITE; event_set(&eonce->ev, fd, events, event_once_cb, eonce); } else { /* Bad event combination */ free(eonce); return (-1); } res = event_base_set(base, &eonce->ev); if (res == 0) res = event_add(&eonce->ev, tv); if (res != 0) { free(eonce); return (res); } return (0); } void event_set(struct event *ev, int fd, short events, void (*callback)(int, short, void *), void *arg) { /* Take the current base - caller needs to set the real base later */ ev->ev_base = current_base; ev->ev_callback = callback; ev->ev_arg = arg; ev->ev_fd = fd; ev->ev_events = events; ev->ev_res = 0; ev->ev_flags = EVLIST_INIT; ev->ev_ncalls = 0; ev->ev_pncalls = NULL; /* by default, we put new events into the middle priority */ if(current_base) ev->ev_pri = current_base->nactivequeues/2; } int event_base_set(struct event_base *base, struct event *ev) { /* Only innocent events may be assigned to a different base */ if (ev->ev_flags != EVLIST_INIT) return (-1); ev->ev_base = base; ev->ev_pri = base->nactivequeues/2; return (0); } /* * Set's the priority of an event - if an event is already scheduled * changing the priority is going to fail. */ int event_priority_set(struct event *ev, int pri) { if (ev->ev_flags & EVLIST_ACTIVE) return (-1); if (pri < 0 || pri >= ev->ev_base->nactivequeues) return (-1); ev->ev_pri = pri; return (0); } /* * Checks if a specific event is pending or scheduled. */ int event_pending(struct event *ev, short event, struct timeval *tv) { struct timeval now, res; int flags = 0; if (ev->ev_flags & EVLIST_INSERTED) flags |= (ev->ev_events & (EV_READ|EV_WRITE)); if (ev->ev_flags & EVLIST_ACTIVE) flags |= ev->ev_res; if (ev->ev_flags & EVLIST_TIMEOUT) flags |= EV_TIMEOUT; if (ev->ev_flags & EVLIST_SIGNAL) flags |= EV_SIGNAL; event &= (EV_TIMEOUT|EV_READ|EV_WRITE|EV_SIGNAL); /* See if there is a timeout that we should report */ if (tv != NULL && (flags & event & EV_TIMEOUT)) { gettime(&now); timersub(&ev->ev_timeout, &now, &res); /* correctly remap to real time */ gettimeofday(&now, NULL); timeradd(&now, &res, tv); } return (flags & event); } int event_add(struct event *ev, struct timeval *tv) { struct event_base *base = ev->ev_base; const struct eventop *evsel = base->evsel; void *evbase = base->evbase; event_debug(( "event_add: event: %p, %s%s%scall %p", ev, ev->ev_events & EV_READ ? "EV_READ " : " ", ev->ev_events & EV_WRITE ? "EV_WRITE " : " ", tv ? "EV_TIMEOUT " : " ", ev->ev_callback)); assert(!(ev->ev_flags & ~EVLIST_ALL)); if (tv != NULL) { struct timeval now; if (ev->ev_flags & EVLIST_TIMEOUT) event_queue_remove(base, ev, EVLIST_TIMEOUT); /* Check if it is active due to a timeout. Rescheduling * this timeout before the callback can be executed * removes it from the active list. */ if ((ev->ev_flags & EVLIST_ACTIVE) && (ev->ev_res & EV_TIMEOUT)) { /* See if we are just active executing this * event in a loop */ if (ev->ev_ncalls && ev->ev_pncalls) { /* Abort loop */ *ev->ev_pncalls = 0; } event_queue_remove(base, ev, EVLIST_ACTIVE); } gettime(&now); timeradd(&now, tv, &ev->ev_timeout); event_debug(( "event_add: timeout in %d seconds, call %p", tv->tv_sec, ev->ev_callback)); event_queue_insert(base, ev, EVLIST_TIMEOUT); } if ((ev->ev_events & (EV_READ|EV_WRITE)) && !(ev->ev_flags & (EVLIST_INSERTED|EVLIST_ACTIVE))) { event_queue_insert(base, ev, EVLIST_INSERTED); return (evsel->add(evbase, ev)); } else if ((ev->ev_events & EV_SIGNAL) && !(ev->ev_flags & EVLIST_SIGNAL)) { event_queue_insert(base, ev, EVLIST_SIGNAL); return (evsel->add(evbase, ev)); } return (0); } int event_del(struct event *ev) { struct event_base *base; const struct eventop *evsel; void *evbase; event_debug(("event_del: %p, callback %p", ev, ev->ev_callback)); /* An event without a base has not been added */ if (ev->ev_base == NULL) return (-1); base = ev->ev_base; evsel = base->evsel; evbase = base->evbase; assert(!(ev->ev_flags & ~EVLIST_ALL)); /* See if we are just active executing this event in a loop */ if (ev->ev_ncalls && ev->ev_pncalls) { /* Abort loop */ *ev->ev_pncalls = 0; } if (ev->ev_flags & EVLIST_TIMEOUT) event_queue_remove(base, ev, EVLIST_TIMEOUT); if (ev->ev_flags & EVLIST_ACTIVE) event_queue_remove(base, ev, EVLIST_ACTIVE); if (ev->ev_flags & EVLIST_INSERTED) { event_queue_remove(base, ev, EVLIST_INSERTED); return (evsel->del(evbase, ev)); } else if (ev->ev_flags & EVLIST_SIGNAL) { event_queue_remove(base, ev, EVLIST_SIGNAL); return (evsel->del(evbase, ev)); } return (0); } void event_active(struct event *ev, int res, short ncalls) { /* We get different kinds of events, add them together */ if (ev->ev_flags & EVLIST_ACTIVE) { ev->ev_res |= res; return; } ev->ev_res = res; ev->ev_ncalls = ncalls; ev->ev_pncalls = NULL; event_queue_insert(ev->ev_base, ev, EVLIST_ACTIVE); } static int timeout_next(struct event_base *base, struct timeval **tv_p) { struct timeval now; struct event *ev; struct timeval *tv = *tv_p; if ((ev = RB_MIN(event_tree, &base->timetree)) == NULL) { /* if no time-based events are active wait for I/O */ *tv_p = NULL; return (0); } if (gettime(&now) == -1) return (-1); if (timercmp(&ev->ev_timeout, &now, <=)) { timerclear(tv); return (0); } timersub(&ev->ev_timeout, &now, tv); assert(tv->tv_sec >= 0); assert(tv->tv_usec >= 0); event_debug(("timeout_next: in %d seconds", tv->tv_sec)); return (0); } /* * Determines if the time is running backwards by comparing the current * time against the last time we checked. Not needed when using clock * monotonic. */ static void timeout_correct(struct event_base *base, struct timeval *tv) { struct event *ev; struct timeval off; if (use_monotonic) return; /* Check if time is running backwards */ gettime(tv); if (timercmp(tv, &base->event_tv, >=)) { base->event_tv = *tv; return; } event_debug(("%s: time is running backwards, corrected", __func__)); timersub(&base->event_tv, tv, &off); /* * We can modify the key element of the node without destroying * the key, beause we apply it to all in the right order. */ RB_FOREACH(ev, event_tree, &base->timetree) timersub(&ev->ev_timeout, &off, &ev->ev_timeout); } void timeout_process(struct event_base *base) { struct timeval now; struct event *ev, *next; gettime(&now); for (ev = RB_MIN(event_tree, &base->timetree); ev; ev = next) { if (timercmp(&ev->ev_timeout, &now, >)) break; next = RB_NEXT(event_tree, &base->timetree, ev); event_queue_remove(base, ev, EVLIST_TIMEOUT); /* delete this event from the I/O queues */ event_del(ev); event_debug(("timeout_process: call %p", ev->ev_callback)); event_active(ev, EV_TIMEOUT, 1); } } void event_queue_remove(struct event_base *base, struct event *ev, int queue) { int docount = 1; if (!(ev->ev_flags & queue)) event_errx(1, "%s: %p(fd %d) not on queue %x", __func__, ev, ev->ev_fd, queue); if (ev->ev_flags & EVLIST_INTERNAL) docount = 0; if (docount) base->event_count--; ev->ev_flags &= ~queue; switch (queue) { case EVLIST_ACTIVE: if (docount) base->event_count_active--; TAILQ_REMOVE(base->activequeues[ev->ev_pri], ev, ev_active_next); break; case EVLIST_SIGNAL: TAILQ_REMOVE(&base->sig.signalqueue, ev, ev_signal_next); break; case EVLIST_TIMEOUT: RB_REMOVE(event_tree, &base->timetree, ev); break; case EVLIST_INSERTED: TAILQ_REMOVE(&base->eventqueue, ev, ev_next); break; default: event_errx(1, "%s: unknown queue %x", __func__, queue); } } void event_queue_insert(struct event_base *base, struct event *ev, int queue) { int docount = 1; if (ev->ev_flags & queue) { /* Double insertion is possible for active events */ if (queue & EVLIST_ACTIVE) return; event_errx(1, "%s: %p(fd %d) already on queue %x", __func__, ev, ev->ev_fd, queue); } if (ev->ev_flags & EVLIST_INTERNAL) docount = 0; if (docount) base->event_count++; ev->ev_flags |= queue; switch (queue) { case EVLIST_ACTIVE: if (docount) base->event_count_active++; TAILQ_INSERT_TAIL(base->activequeues[ev->ev_pri], ev,ev_active_next); break; case EVLIST_SIGNAL: TAILQ_INSERT_TAIL(&base->sig.signalqueue, ev, ev_signal_next); break; case EVLIST_TIMEOUT: { struct event *tmp = RB_INSERT(event_tree, &base->timetree, ev); assert(tmp == NULL); break; } case EVLIST_INSERTED: TAILQ_INSERT_TAIL(&base->eventqueue, ev, ev_next); break; default: event_errx(1, "%s: unknown queue %x", __func__, queue); } } /* Functions for debugging */ const char * event_get_version(void) { return (LIBEVENT_VERSION); } /* * No thread-safe interface needed - the information should be the same * for all threads. */ const char * event_get_method(void) { return (current_base->evsel->name); }