diff options
author | Artur Grabowski <art@cvs.openbsd.org> | 2001-05-26 04:16:09 +0000 |
---|---|---|
committer | Artur Grabowski <art@cvs.openbsd.org> | 2001-05-26 04:16:09 +0000 |
commit | f1c7b7477f262308ca412e5901f4ad910eabdb9d (patch) | |
tree | ac06678b7d69528b988812bf87950ad304d71880 /sys/kern | |
parent | a6cbfa05b2ebd5a8034ab76b0cda65f3c4e1792f (diff) |
Sync in some improvements from FreeBSD + my own improvements.
- use pool for allocating pipe structures.
- use microtime instead of splhigh and time.
- improve locking.
- better handling of nonblocking.
- various efficiency fixes.
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/sys_pipe.c | 256 |
1 files changed, 155 insertions, 101 deletions
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 6817df31121..64639a36b4f 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_pipe.c,v 1.30 2001/05/14 13:43:53 art Exp $ */ +/* $OpenBSD: sys_pipe.c,v 1.31 2001/05/26 04:16:08 art Exp $ */ /* * Copyright (c) 1996 John S. Dyson @@ -34,6 +34,7 @@ #include <sys/stat.h> #include <sys/filedesc.h> #include <sys/malloc.h> +#include <sys/pool.h> #include <sys/ioctl.h> #include <sys/stat.h> #include <sys/select.h> @@ -56,6 +57,10 @@ #include <vm/vm_map.h> #include <vm/vm_page.h> +#if defined(UVM) +#include <uvm/uvm_extern.h> +#endif + #include <sys/pipe.h> /* @@ -82,7 +87,6 @@ struct filterops pipe_rfiltops = struct filterops pipe_wfiltops = { 1, NULL, filt_pipedetach, filt_pipewrite }; - /* * Default pipe buffer size(s), this can be kind-of large now because pipe * space is pageable. The pipe code will try to maintain locality of @@ -96,12 +100,13 @@ struct filterops pipe_wfiltops = */ #define LIMITBIGPIPES 32 int nbigpipe; - static int amountpipekva; +struct pool *pipe_pool; + void pipeclose __P((struct pipe *)); void pipeinit __P((struct pipe *)); -static __inline int pipelock __P((struct pipe *, int)); +static __inline int pipelock __P((struct pipe *)); static __inline void pipeunlock __P((struct pipe *)); static __inline void pipeselwakeup __P((struct pipe *)); void pipespace __P((struct pipe *)); @@ -122,9 +127,14 @@ sys_opipe(p, v, retval) struct pipe *rpipe, *wpipe; int fd, error; - rpipe = malloc(sizeof(*rpipe), M_PIPE, M_WAITOK); + if (pipe_pool == NULL) + pipe_pool = pool_create(sizeof(struct pipe), 0, 0, 0, "pipepl", + 0, pool_page_alloc_nointr, pool_page_free_nointr, + M_PIPE); + + rpipe = pool_get(pipe_pool, PR_WAITOK); pipeinit(rpipe); - wpipe = malloc(sizeof(*wpipe), M_PIPE, M_WAITOK); + wpipe = pool_get(pipe_pool, PR_WAITOK); pipeinit(wpipe); error = falloc(p, &rf, &fd); @@ -166,7 +176,6 @@ pipespace(cpipe) struct pipe *cpipe; { #if defined(UVM) - /* XXX - this is wrong, use an aobj instead */ cpipe->pipe_buffer.buffer = (caddr_t) uvm_km_valloc(kernel_map, cpipe->pipe_buffer.size); if (cpipe->pipe_buffer.buffer == NULL) @@ -202,7 +211,6 @@ void pipeinit(cpipe) struct pipe *cpipe; { - int s; cpipe->pipe_buffer.in = 0; cpipe->pipe_buffer.out = 0; @@ -216,11 +224,9 @@ pipeinit(cpipe) cpipe->pipe_state = 0; cpipe->pipe_peer = NULL; cpipe->pipe_busy = 0; - s = splhigh(); - cpipe->pipe_ctime = time; - cpipe->pipe_atime = time; - cpipe->pipe_mtime = time; - splx(s); + microtime(&cpipe->pipe_ctime); + cpipe->pipe_atime = cpipe->pipe_ctime; + cpipe->pipe_mtime = cpipe->pipe_ctime; bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); cpipe->pipe_pgid = NO_PID; } @@ -230,16 +236,13 @@ pipeinit(cpipe) * lock a pipe for I/O, blocking other access */ static __inline int -pipelock(cpipe, catch) +pipelock(cpipe) struct pipe *cpipe; - int catch; { int error; while (cpipe->pipe_state & PIPE_LOCK) { cpipe->pipe_state |= PIPE_LWANT; - error = tsleep(cpipe, catch ? PRIBIO|PCATCH : PRIBIO, - "pipelk", 0); - if (error) + if ((error = tsleep(cpipe, PRIBIO|PCATCH, "pipelk", 0))) return error; } cpipe->pipe_state |= PIPE_LOCK; @@ -282,11 +285,16 @@ pipe_read(fp, poff, uio, cred) struct ucred *cred; { struct pipe *rpipe = (struct pipe *) fp->f_data; - int error = 0; + int error; int nread = 0; int size; + error = pipelock(rpipe); + if (error) + goto unlocked_error; + ++rpipe->pipe_busy; + while (uio->uio_resid) { /* * normal pipe buffer receive @@ -297,11 +305,8 @@ pipe_read(fp, poff, uio, cred) size = rpipe->pipe_buffer.cnt; if (size > uio->uio_resid) size = uio->uio_resid; - if ((error = pipelock(rpipe,1)) == 0) { - error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], + error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], size, uio); - pipeunlock(rpipe); - } if (error) { break; } @@ -310,6 +315,16 @@ pipe_read(fp, poff, uio, cred) rpipe->pipe_buffer.out = 0; rpipe->pipe_buffer.cnt -= size; + + /* + * If there is no more to read in the pipe, reset + * its pointers to the beginning. This improves + * cache hit stats. + */ + if (rpipe->pipe_buffer.cnt == 0) { + rpipe->pipe_buffer.in = 0; + rpipe->pipe_buffer.out = 0; + } nread += size; } else { /* @@ -319,6 +334,7 @@ pipe_read(fp, poff, uio, cred) /* XXX error = ? */ break; } + /* * If the "write-side" has been blocked, wake it up now. */ @@ -326,68 +342,51 @@ pipe_read(fp, poff, uio, cred) rpipe->pipe_state &= ~PIPE_WANTW; wakeup(rpipe); } - if (nread > 0) - break; - if (fp->f_flag & FNONBLOCK) { - error = EAGAIN; + /* + * Break if some data was read. + */ + if (nread > 0) break; - } /* - * If there is no more to read in the pipe, reset - * its pointers to the beginning. This improves - * cache hit stats. + * Unlock the pipe buffer for our remaining processing. + * We will either break out with an error or we will + * sleep and relock to loop. */ - - if ((error = pipelock(rpipe,1)) == 0) { - if (rpipe->pipe_buffer.cnt == 0) { - rpipe->pipe_buffer.in = 0; - rpipe->pipe_buffer.out = 0; - } - pipeunlock(rpipe); - } else { - break; - } + pipeunlock(rpipe); - if (rpipe->pipe_state & PIPE_WANTW) { - rpipe->pipe_state &= ~PIPE_WANTW; - wakeup(rpipe); + /* + * Handle non-blocking mode operation or + * wait for more data. + */ + if (fp->f_flag & FNONBLOCK) + error = EAGAIN; + else { + rpipe->pipe_state |= PIPE_WANTR; + if ((error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) == 0) + error = pipelock(rpipe); } - - rpipe->pipe_state |= PIPE_WANTR; - error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0); if (error) - break; + goto unlocked_error; } } + pipeunlock(rpipe); - if (error == 0) { - int s = splhigh(); - rpipe->pipe_atime = time; - splx(s); - } - + if (error == 0) + microtime(&rpipe->pipe_atime); +unlocked_error: --rpipe->pipe_busy; + + /* + * PIPE_WANT processing only makes sense if pipe_busy is 0. + */ if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); wakeup(rpipe); } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { /* - * If there is no more to read in the pipe, reset - * its pointers to the beginning. This improves - * cache hit stats. - */ - if (rpipe->pipe_buffer.cnt == 0) { - if ((error == 0) && (error = pipelock(rpipe,1)) == 0) { - rpipe->pipe_buffer.in = 0; - rpipe->pipe_buffer.out = 0; - pipeunlock(rpipe); - } - } - - /* - * If the "write-side" has been blocked, wake it up now. + * Handle write blocking hysteresis. */ if (rpipe->pipe_state & PIPE_WANTW) { rpipe->pipe_state &= ~PIPE_WANTW; @@ -455,7 +454,7 @@ pipe_write(fp, poff, uio, cred) if (wpipe->pipe_buffer.buffer == NULL) { - if ((error = pipelock(wpipe,1)) == 0) { + if ((error = pipelock(wpipe)) == 0) { pipespace(wpipe); pipeunlock(wpipe); } else { @@ -465,45 +464,102 @@ pipe_write(fp, poff, uio, cred) ++wpipe->pipe_busy; orig_resid = uio->uio_resid; + +retrywrite: while (uio->uio_resid) { int space; + + if (wpipe->pipe_state & PIPE_EOF) { + error = EPIPE; + break; + } + space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; /* Writes of size <= PIPE_BUF must be atomic. */ - /* XXX perhaps they need to be contiguous to be atomic? */ if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) space = 0; if (space > 0 && (wpipe->pipe_buffer.cnt < wpipe->pipe_buffer.size)) { - /* - * This set the maximum transfer as a segment of - * the buffer. - */ - int size = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in; - /* - * space is the size left in the buffer - */ - if (size > space) - size = space; - /* - * now limit it to the size of the uio transfer - */ - if (size > uio->uio_resid) - size = uio->uio_resid; - if ((error = pipelock(wpipe,1)) == 0) { + if ((error = pipelock(wpipe)) == 0) { + int size; /* Transfer size */ + int segsize; /* first segment to transfer */ + + /* + * If a process blocked in uiomove, our + * value for space might be bad. + * + * XXX will we be ok if the reader has gone + * away here? + */ + if (space > wpipe->pipe_buffer.size - + wpipe->pipe_buffer.cnt) { + pipeunlock(wpipe); + goto retrywrite; + } + + /* + * Transfer size is minimum of uio transfer + * and free space in pipe buffer. + */ + if (space > uio->uio_resid) + size = uio->uio_resid; + else + size = space; + /* + * First segment to transfer is minimum of + * transfer size and contiguous space in + * pipe buffer. If first segment to transfer + * is less than the transfer size, we've got + * a wraparound in the buffer. + */ + segsize = wpipe->pipe_buffer.size - + wpipe->pipe_buffer.in; + if (segsize > size) + segsize = size; + + /* Transfer first segment */ + error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], - size, uio); + segsize, uio); + + if (error == 0 && segsize < size) { + /* + * Transfer remaining part now, to + * support atomic writes. Wraparound + * happened. + */ +#ifdef DIAGNOSTIC + if (wpipe->pipe_buffer.in + segsize != + wpipe->pipe_buffer.size) + panic("Expected pipe buffer wraparound disappeared"); +#endif + + error = uiomove(&wpipe->pipe_buffer.buffer[0], + size - segsize, uio); + } + if (error == 0) { + wpipe->pipe_buffer.in += size; + if (wpipe->pipe_buffer.in >= + wpipe->pipe_buffer.size) { +#ifdef DIAGNOSTIC + if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) + panic("Expected wraparound bad"); +#endif + wpipe->pipe_buffer.in = size - segsize; + } + + wpipe->pipe_buffer.cnt += size; +#ifdef DIAGNOSTIC + if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) + panic("Pipe buffer overflow"); +#endif + } pipeunlock(wpipe); } if (error) break; - - wpipe->pipe_buffer.in += size; - if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size) - wpipe->pipe_buffer.in = 0; - - wpipe->pipe_buffer.cnt += size; } else { /* * If the "read-side" has been blocked, wake it up now. @@ -567,11 +623,8 @@ pipe_write(fp, poff, uio, cred) (error == EPIPE)) error = 0; - if (error == 0) { - int s = splhigh(); - wpipe->pipe_mtime = time; - splx(s); - } + if (error == 0) + microtime(&wpipe->pipe_mtime); /* * We have something to offer, * wake up select. @@ -684,9 +737,10 @@ pipe_stat(fp, ub, p) TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec); TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec); TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec); + ub->st_uid = fp->f_cred->cr_uid; + ub->st_gid = fp->f_cred->cr_gid; /* - * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, - * st_flags, st_gen. + * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. * XXX (st_dev, st_ino) should be unique. */ return 0; @@ -755,7 +809,7 @@ pipeclose(cpipe) cpipe->pipe_buffer.size); #endif } - free(cpipe, M_PIPE); + pool_put(pipe_pool, cpipe); } } |