diff options
author | Todd C. Miller <millert@cvs.openbsd.org> | 2003-12-31 03:27:24 +0000 |
---|---|---|
committer | Todd C. Miller <millert@cvs.openbsd.org> | 2003-12-31 03:27:24 +0000 |
commit | 23f499490dbfd44837b6e8444733b39e8577d76f (patch) | |
tree | 36e51f78cf130e80aa8a9bfc92190cd065ce4098 /lib/libc | |
parent | 8d7e31dc1a81c4e63b4f5022186f2a529198c3ac (diff) |
Implement svc_getreq_poll(3) and friends and use poll(2) instead of select(2)
in the libc rpc code. The main difference between this and the previous
version is the use of a simple free list that simplifies the logic when
adding a socket to svc_pollfd. I've also added code to pack svc_pollfd
when the free list gets too big. The idea general idea is to keep
svc_pollfd as tightly packed as possible to make poll(2) efficient.
Tested by many people and OK deraadt@
Diffstat (limited to 'lib/libc')
-rw-r--r-- | lib/libc/rpc/Makefile.inc | 6 | ||||
-rw-r--r-- | lib/libc/rpc/clnt_tcp.c | 47 | ||||
-rw-r--r-- | lib/libc/rpc/clnt_udp.c | 49 | ||||
-rw-r--r-- | lib/libc/rpc/pmap_rmt.c | 40 | ||||
-rw-r--r-- | lib/libc/rpc/rpc.3 | 122 | ||||
-rw-r--r-- | lib/libc/rpc/rpc_commondata.c | 4 | ||||
-rw-r--r-- | lib/libc/rpc/svc.c | 360 | ||||
-rw-r--r-- | lib/libc/rpc/svc_run.c | 49 | ||||
-rw-r--r-- | lib/libc/rpc/svc_tcp.c | 56 |
9 files changed, 463 insertions, 270 deletions
diff --git a/lib/libc/rpc/Makefile.inc b/lib/libc/rpc/Makefile.inc index afcec0471c5..9cdcc5c7845 100644 --- a/lib/libc/rpc/Makefile.inc +++ b/lib/libc/rpc/Makefile.inc @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile.inc,v 1.11 2000/12/21 21:11:31 deraadt Exp $ +# $OpenBSD: Makefile.inc,v 1.12 2003/12/31 03:27:23 millert Exp $ # librpc sources .PATH: ${LIBCSRCDIR}/arch/${MACHINE}/rpc ${LIBCSRCDIR}/rpc @@ -55,8 +55,12 @@ MLINKS+= bindresvport.3 bindresvport_sa.3 \ rpc.3 svc_getargs.3 \ rpc.3 svc_getcaller.3 \ rpc.3 svc_getreq.3 \ + rpc.3 svc_getreq_common.3 \ + rpc.3 svc_getreq_poll.3 \ rpc.3 svc_getreqset.3 \ rpc.3 svc_getreqset2.3 \ + rpc.3 svc_max_pollfd.3 \ + rpc.3 svc_pollfd.3 \ rpc.3 svc_register.3 \ rpc.3 svc_run.3 \ rpc.3 svc_sendreply.3 \ diff --git a/lib/libc/rpc/clnt_tcp.c b/lib/libc/rpc/clnt_tcp.c index bb2f5607d3a..c2110224df0 100644 --- a/lib/libc/rpc/clnt_tcp.c +++ b/lib/libc/rpc/clnt_tcp.c @@ -28,7 +28,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char *rcsid = "$OpenBSD: clnt_tcp.c,v 1.18 2001/09/15 13:51:00 deraadt Exp $"; +static char *rcsid = "$OpenBSD: clnt_tcp.c,v 1.19 2003/12/31 03:27:23 millert Exp $"; #endif /* LIBC_SCCS and not lint */ /* @@ -402,58 +402,49 @@ readtcp(ct, buf, len) caddr_t buf; int len; { - fd_set *fds, readfds; - struct timeval start, after, duration, delta, tmp; - int r, save_errno; + struct pollfd pfd[1]; + struct timeval start, after, duration, tmp; + int delta, r, save_errno; if (len == 0) return (0); - if (ct->ct_sock+1 > FD_SETSIZE) { - int bytes = howmany(ct->ct_sock+1, NFDBITS) * sizeof(fd_mask); - fds = (fd_set *)malloc(bytes); - if (fds == NULL) - return (-1); - memset(fds, 0, bytes); - } else { - fds = &readfds; - FD_ZERO(fds); - } - + pfd[0].fd = ct->ct_sock; + pfd[0].events = POLLIN; + delta = ct->ct_wait.tv_sec * 1000 + ct->ct_wait.tv_usec / 1000; gettimeofday(&start, NULL); - delta = ct->ct_wait; - while (TRUE) { - /* XXX we know the other bits are still clear */ - FD_SET(ct->ct_sock, fds); - r = select(ct->ct_sock+1, fds, NULL, NULL, &delta); + for (;;) { + r = poll(pfd, 1, delta); save_errno = errno; gettimeofday(&after, NULL); timersub(&start, &after, &duration); timersub(&ct->ct_wait, &duration, &tmp); - delta = tmp; - if (delta.tv_sec < 0 || !timerisset(&delta)) + delta = tmp.tv_sec * 1000 + tmp.tv_usec / 1000; + if (delta <= 0) r = 0; switch (r) { case 0: ct->ct_error.re_status = RPC_TIMEDOUT; - if (fds != &readfds) - free(fds); return (-1); + case 1: + if (pfd[0].revents & POLLNVAL) + errno = EBADF; + else if (pfd[0].revents & POLLERR) + errno = EIO; + else + break; + /* FALLTHROUGH */ case -1: if (errno == EINTR) continue; ct->ct_error.re_status = RPC_CANTRECV; ct->ct_error.re_errno = save_errno; - if (fds != &readfds) - free(fds); return (-1); } break; } - if (fds != &readfds) - free(fds); switch (len = read(ct->ct_sock, buf, len)) { case 0: diff --git a/lib/libc/rpc/clnt_udp.c b/lib/libc/rpc/clnt_udp.c index 782078d5aa8..40fcc669961 100644 --- a/lib/libc/rpc/clnt_udp.c +++ b/lib/libc/rpc/clnt_udp.c @@ -28,7 +28,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char *rcsid = "$OpenBSD: clnt_udp.c,v 1.19 2002/09/06 18:35:12 deraadt Exp $"; +static char *rcsid = "$OpenBSD: clnt_udp.c,v 1.20 2003/12/31 03:27:23 millert Exp $"; #endif /* LIBC_SCCS and not lint */ /* @@ -221,7 +221,7 @@ clntudp_call(cl, proc, xargs, argsp, xresults, resultsp, utimeout) int outlen; int inlen; socklen_t fromlen; - fd_set *fds, readfds; + struct pollfd pfd[1]; struct sockaddr_in from; struct rpc_msg reply_msg; XDR reply_xdrs; @@ -235,17 +235,8 @@ clntudp_call(cl, proc, xargs, argsp, xresults, resultsp, utimeout) else timeout = cu->cu_total; /* use default timeout */ - if (cu->cu_sock+1 > FD_SETSIZE) { - int bytes = howmany(cu->cu_sock+1, NFDBITS) * sizeof(fd_mask); - fds = (fd_set *)malloc(bytes); - if (fds == NULL) - return (cu->cu_error.re_status = RPC_CANTSEND); - memset(fds, 0, bytes); - } else { - fds = &readfds; - FD_ZERO(fds); - } - + pfd[0].fd = cu->cu_sock; + pfd[0].events = POLLIN; timerclear(&time_waited); call_again: xdrs = &(cu->cu_outxdrs); @@ -258,8 +249,6 @@ call_again: if (!XDR_PUTLONG(xdrs, (long *)&proc) || !AUTH_MARSHALL(cl->cl_auth, xdrs) || !(*xargs)(xdrs, argsp)) { - if (fds != &readfds) - free(fds); return (cu->cu_error.re_status = RPC_CANTENCODEARGS); } outlen = (int)XDR_GETPOS(xdrs); @@ -268,19 +257,14 @@ send_again: if (sendto(cu->cu_sock, cu->cu_outbuf, outlen, 0, (struct sockaddr *)&(cu->cu_raddr), cu->cu_rlen) != outlen) { cu->cu_error.re_errno = errno; - if (fds != &readfds) - free(fds); return (cu->cu_error.re_status = RPC_CANTSEND); } /* * Hack to provide rpc-based message passing */ - if (!timerisset(&timeout)) { - if (fds != &readfds) - free(fds); + if (!timerisset(&timeout)) return (cu->cu_error.re_status = RPC_TIMEDOUT); - } /* * sub-optimal code appears here because we have @@ -293,17 +277,22 @@ send_again: gettimeofday(&start, NULL); for (;;) { - /* XXX we know the other bits are still clear */ - FD_SET(cu->cu_sock, fds); - switch (select(cu->cu_sock+1, fds, NULL, NULL, &cu->cu_wait)) { + switch (poll(pfd, 1, + cu->cu_wait.tv_sec * 1000 + cu->cu_wait.tv_usec / 1000)) { case 0: timeradd(&time_waited, &cu->cu_wait, &tmp1); time_waited = tmp1; if (timercmp(&time_waited, &timeout, <)) goto send_again; - if (fds != &readfds) - free(fds); return (cu->cu_error.re_status = RPC_TIMEDOUT); + case 1: + if (pfd[0].revents & POLLNVAL) + errno = EBADF; + else if (pfd[0].revents & POLLERR) + errno = EIO; + else + break; + /* FALLTHROUGH */ case -1: if (errno == EINTR) { gettimeofday(&after, NULL); @@ -312,13 +301,9 @@ send_again: time_waited = tmp2; if (timercmp(&time_waited, &timeout, <)) continue; - if (fds != &readfds) - free(fds); return (cu->cu_error.re_status = RPC_TIMEDOUT); } cu->cu_error.re_errno = errno; - if (fds != &readfds) - free(fds); return (cu->cu_error.re_status = RPC_CANTRECV); } @@ -332,8 +317,6 @@ send_again: if (errno == EWOULDBLOCK) continue; cu->cu_error.re_errno = errno; - if (fds != &readfds) - free(fds); return (cu->cu_error.re_status = RPC_CANTRECV); } if (inlen < sizeof(u_int32_t)) @@ -393,8 +376,6 @@ send_again: cu->cu_error.re_status = RPC_CANTDECODERES; } - if (fds != &readfds) - free(fds); return (cu->cu_error.re_status); } diff --git a/lib/libc/rpc/pmap_rmt.c b/lib/libc/rpc/pmap_rmt.c index 2e36d1263ea..7c519843e24 100644 --- a/lib/libc/rpc/pmap_rmt.c +++ b/lib/libc/rpc/pmap_rmt.c @@ -28,7 +28,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char *rcsid = "$OpenBSD: pmap_rmt.c,v 1.20 2002/09/06 18:35:12 deraadt Exp $"; +static char *rcsid = "$OpenBSD: pmap_rmt.c,v 1.21 2003/12/31 03:27:23 millert Exp $"; #endif /* LIBC_SCCS and not lint */ /* @@ -233,8 +233,9 @@ clnt_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult) socklen_t fromlen; int sock = -1; int on = 1; - fd_set *fds = NULL, readfds; + struct pollfd pfd[1]; int i; + int timo; bool_t done = FALSE; u_long xid; u_long port; @@ -243,7 +244,6 @@ clnt_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult) struct rmtcallargs a; struct rmtcallres r; struct rpc_msg msg; - struct timeval t; char outbuf[MAX_BROADCAST_SIZE], inbuf[UDPMSGSIZE]; /* @@ -263,18 +263,8 @@ clnt_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult) } #endif /* def SO_BROADCAST */ - if (sock+1 > FD_SETSIZE) { - int bytes = howmany(sock+1, NFDBITS) * sizeof(fd_mask); - fds = (fd_set *)malloc(bytes); - if (fds == NULL) { - stat = RPC_CANTSEND; - goto done_broad; - } - memset(fds, 0, bytes); - } else { - fds = &readfds; - FD_ZERO(fds); - } + pfd[0].fd = sock; + pfd[0].events = POLLIN; nets = newgetbroadcastnets(&addrs, sock); memset(&baddr, 0, sizeof (baddr)); @@ -282,9 +272,7 @@ clnt_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult) baddr.sin_family = AF_INET; baddr.sin_port = htons(PMAPPORT); baddr.sin_addr.s_addr = htonl(INADDR_ANY); - (void)gettimeofday(&t, (struct timezone *)0); msg.rm_xid = xid = arc4random(); - t.tv_usec = 0; msg.rm_direction = CALL; msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; msg.rm_call.cb_prog = PMAPPROG; @@ -318,7 +306,7 @@ clnt_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult) * the intended function of sending them slowly over half a * minute or so */ - for (t.tv_sec = 4; t.tv_sec <= 14; t.tv_sec += 2) { + for (timo = 4000; timo <= 14000; timo += 2000) { for (i = 0; i < nets; i++) { baddr.sin_addr = addrs[i]; if (sendto(sock, outbuf, outlen, 0, @@ -338,16 +326,22 @@ clnt_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult) msg.acpted_rply.ar_results.where = (caddr_t)&r; msg.acpted_rply.ar_results.proc = xdr_rmtcallres; - /* XXX we know the other bits are still clear */ - FD_SET(sock, fds); - switch (select(sock+1, fds, NULL, NULL, &t)) { + switch (poll(pfd, 1, timo)) { case 0: /* timed out */ stat = RPC_TIMEDOUT; continue; + case 1: + if (pfd[0].revents & POLLNVAL) + errno = EBADF; + else if (pfd[0].revents & POLLERR) + errno = EIO; + else + break; + /* FALLTHROUGH */ case -1: /* some kind of error */ if (errno == EINTR) goto recv_again; - perror("Broadcast select problem"); + perror("Broadcast poll problem"); stat = RPC_CANTRECV; goto done_broad; } @@ -393,8 +387,6 @@ clnt_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult) done_broad: if (addrs) free(addrs); - if (fds != &readfds) - free(fds); if (sock >= 0) (void)close(sock); AUTH_DESTROY(unix_auth); diff --git a/lib/libc/rpc/rpc.3 b/lib/libc/rpc/rpc.3 index fc08d3285fa..1b8483c8cbb 100644 --- a/lib/libc/rpc/rpc.3 +++ b/lib/libc/rpc/rpc.3 @@ -1,4 +1,4 @@ -.\" $OpenBSD: rpc.3,v 1.33 2003/06/29 13:42:40 jmc Exp $ +.\" $OpenBSD: rpc.3,v 1.34 2003/12/31 03:27:23 millert Exp $ .\" .\" Copyright (c) 1998 Theo de Raadt .\" All rights reserved. @@ -114,9 +114,13 @@ .Nm svc_getargs , .Nm svc_getcaller , .Nm svc_getreq , +.Nm svc_getreq_common , +.Nm svc_getreq_poll , .Nm svc_getreqset , .Nm svc_getreqset2 , .Nm svc_register , +.Nm svc_max_pollfd , +.Nm svc_pollfd , .Nm svc_sendreply , .Nm svc_unregister , .Nm svcerr_auth , @@ -198,6 +202,10 @@ .Fa rpc_createerr ; .Ft int .Fn svc_destroy "SVCXPRT *xprt" +.Ft struct pollfd * +.Fa svc_pollfd ; +.Ft int +.Fa svc_max_pollfd ; .Ft fd_set .Fa svc_fdset ; .Ft fd_set @@ -213,9 +221,13 @@ .Ft struct sockaddr_in * .Fn svc_getcaller "SVCXPRT *xprt" .Ft int +.Fn svc_getreq_common "int fd" +.Ft int +.Fn svc_getreq_poll "struct pollfd *pfds" "const int pollretval" +.Ft int .Fn svc_getreqset "fd_set *rdfds" .Ft int -.Fn svc_getreqset2 "fd_set *rdfds, int width" +.Fn svc_getreqset2 "fd_set *rdfds" "int width" .Ft int .Fn svc_getreq "int rdfds" .Ft int @@ -426,7 +438,6 @@ and the supported values of .Fa req and their argument types and what they do are: -.Pp .Bd -literal -offset indent .Tn CLSET_TIMEOUT struct timeval set total timeout .Tn CLGET_TIMEOUT struct timeval get total timeout @@ -438,7 +449,6 @@ if you set the timeout using the timeout parameter passed to .Fn clnt_call will be ignored in all future calls. -.Pp .Bd -literal -offset indent .Tn CLGET_SERVER_ADDR struct sockaddr_in get server's address .Ed @@ -446,7 +456,6 @@ will be ignored in all future calls. The following operations are valid for .Tn UDP only: -.Pp .Bd -literal -offset indent .Tn CLSET_RETRY_TIMEOUT struct timeval set the retry timeout .Tn CLGET_RETRY_TIMEOUT struct timeval get the retry timeout @@ -842,6 +851,35 @@ Use of .Fa xprt is undefined after calling this routine. .Pp +.Fa svc_pollfd +is a global variable reflecting the +.Tn RPC +service side's +read file descriptor array. +This variable is only of interest if service implementors do not call +.Fn svc_run , +but rather do their own asynchronous event processing. +This variable is read-only, and it may change after calls +to svc_getreq_poll() or any creation routines. +Do not pass it directly to +.Xr poll 2 ! +Instead, make a copy and pass that instead. +.Pp +.Fa svc_max_pollfd +is a global variable containing the maximum length of the +.Fa svc_pollfd +array. +.Fa svc_max_pollfd +is not a hard limit; it will grow automatically as needed. +This variable is read-only, and it may change after calls +to svc_getreq_poll() or any creation routines. +The purpose of +.Fa svc_max_pollfd +is to allow a service implementor to make a copy of +.Fa svc_pollfd +that may in turn be passed to +.Xr poll 2 . +.Pp .Fa __svc_fdset and .Fa __svc_fdsetsize @@ -866,6 +904,11 @@ to svc_getreqset() or any creation routines. Do not pass its address to .Xr select 2 ! Instead, pass the address of a copy. +These variables are considered obsolete; new programs should use +.Fa svc_pollfd +and +.Fa svc_max_pollfd +instead. .Pp .Fa svc_fdset is similar to @@ -873,16 +916,15 @@ is similar to but limited to .Fa FD_SETSIZE descriptors. -This is the preferred interface for portability though. This is only of interest if service implementors do not call .Fn svc_run , but rather do their own asynchronous event processing. This variable is read-only, and it may change after calls to svc_getreqset() or any creation routines. -Do not pass its address to +Do not pass it directly to .Xr select 2 ! -Instead, pass the address of a copy. +Instead, make a copy and pass that instead. .Pp Additionally, note that if the process has descriptor limits which are extended beyond @@ -890,13 +932,17 @@ which are extended beyond this variable will only be usable for the first .Fa FD_SETSIZE descriptors. +This variable is considered obsolete; new programs should use +.Fa svc_pollfd +which does not have this limit. .Pp .Fa svc_fds is similar to .Fa svc_fedset , but limited to 32 descriptors. This interface is obsoleted by -.Fa svc_fdset . +.Fa svc_fdset +and is included for source compatibility only. .Pp .Fn svc_freeargs is a macro that frees any data allocated by the @@ -933,6 +979,40 @@ of a procedure associated with the service transport handle, .Fa xprt . .Pp +.Fn svc_getreq_common +is called to handle a request on the given socket. +It is used internally by +.Fn svc_getreq_poll , +.Fn svc_getreqset , +.Fn svc_getreqset2 , +and +.Fn svc_getreq . +.Pp +.Fn svc_getreq_poll +is a routine which is only of interest if a service implementor +does not call +.Fn svc_run , +but instead implements custom asynchronous event processing. +It is called when the +.Xr poll 2 +system call has determined that an +.Tn RPC +request has arrived on some +.Tn RPC +.Fa socket(s) ; +.Fa pollretval +is the value returned by +.Xr poll 2 +and +.Fa pfds +is the array of +.Fa pollfd +structures passed to +.Xr poll 2 . +The routine returns when all sockets described by +.Fa pollfd +have been serviced. +.Pp .Fn svc_getreqset is a routine which is only of interest if a service implementor does not call @@ -972,14 +1052,14 @@ value of have been serviced. This interface is non-portable, but provided for applications which need to deal with large fd_set sizes. -.Tn RPC -internals use it. .Pp .Fn svc_getreq is similar to .Fa svc_getreqset , but limited to 32 descriptors. This interface is obsoleted by +.Fa svc_getreq_poll +and .Fa svc_getreqset . .Pp .Fn svc_register @@ -1025,10 +1105,10 @@ It waits for .Tn RPC requests to arrive, and calls the appropriate service procedure using -.Fn svc_getreq +.Fn svc_getreq_poll when one arrives. This procedure is usually waiting for a -.Xr select 2 +.Xr poll 2 system call to return. .Pp .Fn svc_sendreply @@ -1272,8 +1352,14 @@ service transport handles are created, they should register themselves with the .Tn RPC service package. -This routine modifies the global variable -.Fa svc_fds . +This routine modifies the global variables +.Fa svc_pollfd , +.Fa svc_fdset , +.Fa __svc_fdset +and may modify +.Fa svc_max_pollfd +and +.Fa __svc_fdsetsize . Service implementors usually do not need this routine. .Pp .Fn xprt_unregister @@ -1285,11 +1371,15 @@ it should unregister itself with the .Tn RPC service package. This routine modifies the global variable -.Fa svc_fds . +.Fa svc_pollfd , +.Fa svc_fdset , +and +.Fa __svc_fdset . Service implementors usually do not need this routine. .Sh SEE ALSO .\"Xr rpc_secure 3 , .Xr rpcgen 1 , +.Xr poll 2 , .Xr select 2 , .Xr getrpcent 3 , .Xr getrpcport 3 , diff --git a/lib/libc/rpc/rpc_commondata.c b/lib/libc/rpc/rpc_commondata.c index 46f2ccff110..87d23b23cd3 100644 --- a/lib/libc/rpc/rpc_commondata.c +++ b/lib/libc/rpc/rpc_commondata.c @@ -28,7 +28,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char *rcsid = "$OpenBSD: rpc_commondata.c,v 1.3 1996/08/19 08:31:47 tholo Exp $"; +static char *rcsid = "$OpenBSD: rpc_commondata.c,v 1.4 2003/12/31 03:27:23 millert Exp $"; #endif /* LIBC_SCCS and not lint */ #include <rpc/rpc.h> @@ -39,4 +39,6 @@ static char *rcsid = "$OpenBSD: rpc_commondata.c,v 1.3 1996/08/19 08:31:47 tholo struct opaque_auth _null_auth; fd_set svc_fdset; int svc_maxfd = -1; +int svc_max_pollfd; +struct pollfd *svc_pollfd; struct rpc_createerr rpc_createerr; diff --git a/lib/libc/rpc/svc.c b/lib/libc/rpc/svc.c index 05f5ee848e6..efe828394e8 100644 --- a/lib/libc/rpc/svc.c +++ b/lib/libc/rpc/svc.c @@ -28,7 +28,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char *rcsid = "$OpenBSD: svc.c,v 1.15 2002/02/16 21:27:24 millert Exp $"; +static char *rcsid = "$OpenBSD: svc.c,v 1.16 2003/12/31 03:27:23 millert Exp $"; #endif /* LIBC_SCCS and not lint */ /* @@ -70,9 +70,15 @@ static struct svc_callout { } *svc_head; static struct svc_callout *svc_find(u_long, u_long, struct svc_callout **); +static int svc_fd_insert(int); +static int svc_fd_remove(int); -int __svc_fdsetsize; -fd_set *__svc_fdset; +int __svc_fdsetsize = FD_SETSIZE; +fd_set *__svc_fdset = &svc_fdset; +static int svc_pollfd_size; /* number of slots in svc_pollfd */ +static int svc_used_pollfd; /* number of used slots in svc_pollfd */ +static int *svc_pollfd_freelist; /* svc_pollfd free list */ +static int svc_max_free; /* number of used slots in free list */ /* *************** SVCXPRT related stuff **************** */ @@ -96,33 +102,12 @@ __xprt_register(xprt) { int sock = xprt->xp_sock; - if (sock+1 > __svc_fdsetsize) { - int bytes = howmany(sock+1, NFDBITS) * sizeof(fd_mask); - fd_set *fds; - - fds = (fd_set *)malloc(bytes); - if (fds == NULL) - return (0); - memset(fds, 0, bytes); - if (__svc_fdset) { - memcpy(fds, __svc_fdset, howmany(__svc_fdsetsize, - NFDBITS) * sizeof(fd_mask)); - free(__svc_fdset); - } - __svc_fdset = fds; - __svc_fdsetsize = sock+1; - } - - if (sock < FD_SETSIZE) - FD_SET(sock, &svc_fdset); - FD_SET(sock, __svc_fdset); - - if (xports == NULL || sock+1 > xportssize) { + if (xports == NULL || sock + 1 > xportssize) { SVCXPRT **xp; int size = FD_SETSIZE; - if (sock+1 > size) - size = sock+1; + while (sock + 1 > size) + size += FD_SETSIZE; xp = (SVCXPRT **)mem_alloc(size * sizeof(SVCXPRT *)); if (xp == NULL) return (0); @@ -134,12 +119,159 @@ __xprt_register(xprt) xportssize = size; xports = xp; } + + if (!svc_fd_insert(sock)) + return (0); xports[sock] = xprt; + + return (1); +} + +/* + * Insert a socket into svc_pollfd, svc_fdset and __svc_fdset. + * If we are out of space, we allocate ~128 more slots than we + * need now for future expansion. + * We try to keep svc_pollfd well packed (no holes) as possible + * so that poll(2) is efficient. + */ +static int +svc_fd_insert(int sock) +{ + int slot; + + /* + * Find a slot for sock in svc_pollfd; four possible cases: + * 1) need to allocate more space for svc_pollfd + * 2) there is an entry on the free list + * 3) the free list is empty (svc_used_pollfd is the next slot) + */ + if (svc_pollfd == NULL || svc_used_pollfd == svc_pollfd_size) { + struct pollfd *pfd; + int new_size, *new_freelist; + + new_size = svc_pollfd ? svc_pollfd_size + 128 : FD_SETSIZE; + pfd = realloc(svc_pollfd, sizeof(*svc_pollfd) * new_size); + if (pfd == NULL) + return (0); /* no changes */ + new_freelist = realloc(svc_pollfd_freelist, new_size / 2); + if (new_freelist == NULL) { + free(pfd); + return (0); /* no changes */ + } + svc_pollfd = pfd; + svc_pollfd_size = new_size; + svc_pollfd_freelist = new_freelist; + for (slot = svc_used_pollfd; slot < svc_pollfd_size; slot++) { + svc_pollfd[slot].fd = -1; + svc_pollfd[slot].events = svc_pollfd[slot].revents = 0; + } + slot = svc_used_pollfd; + } else if (svc_max_free != 0) { + /* there is an entry on the free list, use it */ + slot = svc_pollfd_freelist[--svc_max_free]; + } else { + /* nothing on the free list but we have room to grow */ + slot = svc_used_pollfd; + } + if (sock + 1 > __svc_fdsetsize) { + fd_set *fds; + size_t bytes; + + bytes = howmany(sock + 128, NFDBITS) * sizeof(fd_mask); + /* realloc() would be nicer but it gets tricky... */ + if ((fds = (fd_set *)mem_alloc(bytes)) != NULL) { + memset(fds, 0, bytes); + memcpy(fds, __svc_fdset, + howmany(__svc_fdsetsize, NFDBITS) * sizeof(fd_mask)); + if (__svc_fdset != &svc_fdset) + free(__svc_fdset); + __svc_fdset = fds; + __svc_fdsetsize = bytes / sizeof(fd_mask); + } + } + + svc_pollfd[slot].fd = sock; + svc_pollfd[slot].events = POLLIN; + svc_used_pollfd++; + if (svc_max_pollfd < slot + 1) + svc_max_pollfd = slot + 1; + if (sock < FD_SETSIZE) + FD_SET(sock, &svc_fdset); + else if (sock < __svc_fdsetsize) + FD_SET(sock, __svc_fdset); svc_maxfd = max(svc_maxfd, sock); + return (1); } /* + * Remove a socket from svc_pollfd, svc_fdset and __svc_fdset. + * Freed slots are placed on the free list. If the free list fills + * up, we compact svc_pollfd (free list size == svc_pollfd_size /2). + */ +static int +svc_fd_remove(int sock) +{ + int slot; + + if (svc_pollfd == NULL) + return (0); + + for (slot = 0; slot < svc_max_pollfd; slot++) { + if (svc_pollfd[slot].fd == sock) { + svc_pollfd[slot].fd = -1; + svc_pollfd[slot].events = svc_pollfd[slot].revents = 0; + svc_used_pollfd--; + if (sock < FD_SETSIZE) + FD_CLR(sock, &svc_fdset); + else if (sock < __svc_fdsetsize) + FD_CLR(sock, __svc_fdset); + if (sock == svc_maxfd) { + for (svc_maxfd--; svc_maxfd >= 0; svc_maxfd--) + if (xports[svc_maxfd]) + break; + } + if (svc_max_free == svc_pollfd_size / 2) { + int i, j; + + /* + * Out of space in the free list; this means + * that svc_pollfd is half full. Pack things + * such that svc_max_pollfd == svc_used_pollfd + * and svc_pollfd_freelist is empty. + */ + for (i = svc_used_pollfd, j = 0; + i < svc_max_pollfd && j < svc_max_free; i++) { + if (svc_pollfd[i].fd == -1) + continue; + /* be sure to use a low-numbered slot */ + while (svc_pollfd_freelist[j] >= + svc_used_pollfd) + j++; + svc_pollfd[svc_pollfd_freelist[j++]] = + svc_pollfd[i]; + svc_pollfd[i].fd = -1; + svc_pollfd[i].events = + svc_pollfd[i].revents = 0; + } + svc_max_pollfd = svc_used_pollfd; + svc_max_free = 0; + /* could realloc if svc_pollfd_size is big */ + } else { + /* trim svc_max_pollfd from the end */ + while (svc_max_pollfd > 0 && + svc_pollfd[svc_max_pollfd - 1].fd == -1) + svc_max_pollfd--; + } + svc_pollfd_freelist[svc_max_free++] = slot; + + return (1); + } + } + return (0); /* not found, shouldn't happen */ +} + +/* * De-activate a transport handle. */ void @@ -149,19 +281,8 @@ xprt_unregister(xprt) int sock = xprt->xp_sock; if (xports[sock] == xprt) { - xports[sock] = (SVCXPRT *)0; - if (sock < FD_SETSIZE) - FD_CLR(sock, &svc_fdset); - FD_CLR(sock, __svc_fdset); - if (sock == svc_maxfd) { - for (svc_maxfd--; svc_maxfd>=0; svc_maxfd--) - if (xports[svc_maxfd]) - break; - } - /* - * XXX could use svc_maxfd as a hint to - * decrease the size of __svc_fdset - */ + xports[sock] = NULL; + svc_fd_remove(sock); } } @@ -409,15 +530,12 @@ void svc_getreq(rdfds) int rdfds; { - fd_set readfds; + int bit; - FD_ZERO(&readfds); - readfds.fds_bits[0] = rdfds; - svc_getreqset(&readfds); + for (; (bit = ffs(rdfds)); rdfds ^= (1 << (bit - 1))) + svc_getreq_common(bit - 1); } -void svc_getreqset2(fd_set *, int); - void svc_getreqset(readfds) fd_set *readfds; @@ -430,6 +548,39 @@ svc_getreqset2(readfds, width) fd_set *readfds; int width; { + fd_mask mask, *maskp; + int bit, sock; + + maskp = readfds->fds_bits; + for (sock = 0; sock < width; sock += NFDBITS) { + for (mask = *maskp++; (bit = ffs(mask)); + mask ^= (1 << (bit - 1))) + svc_getreq_common(sock + bit - 1); + } +} + +void +svc_getreq_poll(pfd, nready) + struct pollfd *pfd; + const int nready; +{ + int i, n; + + for (n = nready, i = 0; n > 0; i++) { + if (pfd[i].fd == -1) + continue; + if (pfd[i].revents != 0) + n--; + if ((pfd[i].revents & (POLLIN | POLLHUP)) == 0) + continue; + svc_getreq_common(pfd[i].fd); + } +} + +void +svc_getreq_common(fd) + int fd; +{ enum xprt_stat stat; struct rpc_msg msg; int prog_found; @@ -437,74 +588,65 @@ svc_getreqset2(readfds, width) u_long high_vers; struct svc_req r; SVCXPRT *xprt; - int bit; - fd_mask mask, *maskp; - int sock; char cred_area[2*MAX_AUTH_BYTES + RQCRED_SIZE]; + msg.rm_call.cb_cred.oa_base = cred_area; msg.rm_call.cb_verf.oa_base = &(cred_area[MAX_AUTH_BYTES]); r.rq_clntcred = &(cred_area[2*MAX_AUTH_BYTES]); - maskp = readfds->fds_bits; - for (sock = 0; sock < width; sock += NFDBITS) { - for (mask = *maskp++; (bit = ffs(mask)); mask ^= (1 << (bit - 1))) { - /* sock has input waiting */ - xprt = xports[sock + bit - 1]; - if (xprt == NULL) - /* But do we control sock? */ - continue; - /* now receive msgs from xprtprt (support batch calls) */ - do { - if (SVC_RECV(xprt, &msg)) { - - /* now find the exported program and call it */ - register struct svc_callout *s; - enum auth_stat why; - - r.rq_xprt = xprt; - r.rq_prog = msg.rm_call.cb_prog; - r.rq_vers = msg.rm_call.cb_vers; - r.rq_proc = msg.rm_call.cb_proc; - r.rq_cred = msg.rm_call.cb_cred; - /* first authenticate the message */ - if ((why= _authenticate(&r, &msg)) != AUTH_OK) { - svcerr_auth(xprt, why); - goto call_done; - } - /* now match message with a registered service*/ - prog_found = FALSE; - low_vers = (u_long) -1; - high_vers = 0; - for (s = svc_head; s != NULL_SVC; s = s->sc_next) { - if (s->sc_prog == r.rq_prog) { - if (s->sc_vers == r.rq_vers) { - (*s->sc_dispatch)(&r, xprt); - goto call_done; - } /* found correct version */ - prog_found = TRUE; - if (s->sc_vers < low_vers) - low_vers = s->sc_vers; - if (s->sc_vers > high_vers) - high_vers = s->sc_vers; - } /* found correct program */ - } - /* - * if we got here, the program or version - * is not served ... - */ - if (prog_found) - svcerr_progvers(xprt, - low_vers, high_vers); - else - svcerr_noprog(xprt); - /* Fall through to ... */ + /* sock has input waiting */ + xprt = xports[fd]; + if (xprt == NULL) + /* But do we control the fd? */ + return; + /* now receive msgs from xprtprt (support batch calls) */ + do { + if (SVC_RECV(xprt, &msg)) { + /* find the exported program and call it */ + struct svc_callout *s; + enum auth_stat why; + + r.rq_xprt = xprt; + r.rq_prog = msg.rm_call.cb_prog; + r.rq_vers = msg.rm_call.cb_vers; + r.rq_proc = msg.rm_call.cb_proc; + r.rq_cred = msg.rm_call.cb_cred; + /* first authenticate the message */ + if ((why= _authenticate(&r, &msg)) != AUTH_OK) { + svcerr_auth(xprt, why); + goto call_done; } - call_done: - if ((stat = SVC_STAT(xprt)) == XPRT_DIED){ - SVC_DESTROY(xprt); - break; + /* now match message with a registered service*/ + prog_found = FALSE; + low_vers = (u_long) -1; + high_vers = 0; + for (s = svc_head; s != NULL_SVC; s = s->sc_next) { + if (s->sc_prog == r.rq_prog) { + if (s->sc_vers == r.rq_vers) { + (*s->sc_dispatch)(&r, xprt); + goto call_done; + } /* found correct version */ + prog_found = TRUE; + if (s->sc_vers < low_vers) + low_vers = s->sc_vers; + if (s->sc_vers > high_vers) + high_vers = s->sc_vers; + } /* found correct program */ } - } while (stat == XPRT_MOREREQS); - } - } + /* + * if we got here, the program or version + * is not served ... + */ + if (prog_found) + svcerr_progvers(xprt, low_vers, high_vers); + else + svcerr_noprog(xprt); + /* Fall through to ... */ + } + call_done: + if ((stat = SVC_STAT(xprt)) == XPRT_DIED){ + SVC_DESTROY(xprt); + break; + } + } while (stat == XPRT_MOREREQS); } diff --git a/lib/libc/rpc/svc_run.c b/lib/libc/rpc/svc_run.c index c75a9cc7e4f..bbe8374d7c5 100644 --- a/lib/libc/rpc/svc_run.c +++ b/lib/libc/rpc/svc_run.c @@ -28,7 +28,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char *rcsid = "$OpenBSD: svc_run.c,v 1.13 2002/08/03 22:04:28 millert Exp $"; +static char *rcsid = "$OpenBSD: svc_run.c,v 1.14 2003/12/31 03:27:23 millert Exp $"; #endif /* LIBC_SCCS and not lint */ /* @@ -42,45 +42,36 @@ static char *rcsid = "$OpenBSD: svc_run.c,v 1.13 2002/08/03 22:04:28 millert Exp #include <stdlib.h> #include <string.h> -extern int __svc_fdsetsize; -extern fd_set *__svc_fdset; - -void svc_getreqset2(fd_set *, int); - void svc_run() { - fd_set *fds; + struct pollfd *pfd = NULL; + int nready, saved_max_pollfd = 0; for (;;) { - if (__svc_fdset) { - int bytes = howmany(__svc_fdsetsize, NFDBITS) * - sizeof(fd_mask); - if ((fds = (fd_set *)malloc(bytes)) == NULL) { - perror("svc_run"); - return; + if (svc_max_pollfd > saved_max_pollfd) { + free(pfd); + pfd = malloc(sizeof(*pfd) * svc_max_pollfd); + if (pfd == NULL) { + perror("svc_run"); /* XXX */ + return; /* XXX */ } - memcpy(fds, __svc_fdset, bytes); - } else - fds = NULL; - switch (select(svc_maxfd+1, fds, 0, 0, (struct timeval *)0)) { + saved_max_pollfd = svc_max_pollfd; + } + memcpy(pfd, svc_pollfd, sizeof(*pfd) * svc_max_pollfd); + + nready = poll(pfd, svc_max_pollfd, INFTIM); + switch (nready) { case -1: - if (errno == EINTR) { - if (fds) - free(fds); + if (errno == EINTR) continue; - } - perror("svc_run: - select failed"); - if (fds) - free(fds); - return; + perror("svc_run: - poll failed"); /* XXX */ + free(pfd); + return; /* XXX */ case 0: - if (fds) - free(fds); continue; default: - svc_getreqset2(fds, svc_maxfd+1); - free(fds); + svc_getreq_poll(pfd, nready); } } } diff --git a/lib/libc/rpc/svc_tcp.c b/lib/libc/rpc/svc_tcp.c index 9b6aca17a8d..97938d66a1b 100644 --- a/lib/libc/rpc/svc_tcp.c +++ b/lib/libc/rpc/svc_tcp.c @@ -28,7 +28,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char *rcsid = "$OpenBSD: svc_tcp.c,v 1.22 2002/09/06 18:35:12 deraadt Exp $"; +static char *rcsid = "$OpenBSD: svc_tcp.c,v 1.23 2003/12/31 03:27:23 millert Exp $"; #endif /* LIBC_SCCS and not lint */ /* @@ -349,29 +349,29 @@ readtcp(xprt, buf, len) int len; { int sock = xprt->xp_sock; - struct timeval start, delta; + int delta, nready; + struct timeval start; struct timeval tmp1, tmp2; - fd_set *fds = NULL; + struct pollfd *pfd = NULL; int prevbytes = 0, bytes; - extern int __svc_fdsetsize; - extern fd_set *__svc_fdset; - delta = wait_per_try; + pfd = (struct pollfd *)malloc(sizeof(*pfd) * (svc_max_pollfd + 1)); + if (pfd == NULL) + goto fatal_err; + pfd[0].fd = sock; + pfd[0].events = POLLIN; + pfd[0].revents = 0; + memcpy(&pfd[1], svc_pollfd, (sizeof(*pfd) * svc_max_pollfd)); + + /* + * All read operations timeout after 35 seconds. + * A timeout is fatal for the connection. + */ + delta = wait_per_try.tv_sec * 1000; gettimeofday(&start, NULL); do { - bytes = howmany(__svc_fdsetsize, NFDBITS) * sizeof(fd_mask); - if (bytes != prevbytes) { - if (fds) - free(fds); - fds = (fd_set *)malloc(bytes); - prevbytes = bytes; - } - if (fds == NULL) - goto fatal_err; - memcpy(fds, __svc_fdset, bytes); - - FD_SET(sock, fds); - switch (select(svc_maxfd+1, fds, NULL, NULL, &delta)) { + nready = poll(pfd, svc_max_pollfd + 1, delta); + switch (nready) { case -1: if (errno != EINTR) goto fatal_err; @@ -380,32 +380,32 @@ readtcp(xprt, buf, len) timersub(&wait_per_try, &tmp2, &tmp1); if (tmp1.tv_sec < 0 || !timerisset(&tmp1)) goto fatal_err; - delta = tmp1; + delta = tmp1.tv_sec * 1000 + tmp1.tv_usec / 1000; continue; case 0: goto fatal_err; default: - if (!FD_ISSET(sock, fds)) { - svc_getreqset2(fds, svc_maxfd+1); + if (pfd[0].revents == 0) { + svc_getreq_poll(&pfd[1], nready); gettimeofday(&tmp1, NULL); timersub(&tmp1, &start, &tmp2); timersub(&wait_per_try, &tmp2, &tmp1); if (tmp1.tv_sec < 0 || !timerisset(&tmp1)) goto fatal_err; - delta = tmp1; + delta = tmp1.tv_sec * 1000 + tmp1.tv_usec / 1000; continue; } } - } while (!FD_ISSET(sock, fds)); + } while (pfd[0].revents == 0); if ((len = read(sock, buf, len)) > 0) { - if (fds) - free(fds); + if (pfd) + free(pfd); return (len); } fatal_err: ((struct tcp_conn *)(xprt->xp_p1))->strm_stat = XPRT_DIED; - if (fds) - free(fds); + if (pfd) + free(pfd); return (-1); } |