diff options
author | Miod Vallat <miod@cvs.openbsd.org> | 2003-10-03 16:38:04 +0000 |
---|---|---|
committer | Miod Vallat <miod@cvs.openbsd.org> | 2003-10-03 16:38:04 +0000 |
commit | e43c66ff4e929d0beb6716fb882523dc136b8c0c (patch) | |
tree | 4394ad7ba1c7886c97e1b4b62491b57f6d5361dd /sys/kern | |
parent | 58eb6ce5f7527ee83794ee261045e7a684d27912 (diff) |
Bring several fixes from FreeBSD to our current pipe implementation:
- when allocating or growing a pipe buffer is not possible, do not panic
but report the error and handle it correctly. (1.73)
- "The pipe_write() code was locking the pipe without busying it first
in certain cases, and a close() by another process could potentially rip
the pipe out from under the (blocked) locking operation." (from Al Viro,
1.81)
- "Remove test in pipe_write() which causes write(2) to return EAGAIN
on a non-blocking pipe in cases where select(2) returns the file
descriptor as ready for write. This in turns causes libc_r, for
one, to busy wait in such cases.
Note: it is a quick performance fix, a more complex fix might be
required in case this turns out to have unexpected side effects."
(1.141)
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/sys_pipe.c | 188 |
1 files changed, 106 insertions, 82 deletions
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 3e36d581fd4..31643d56fc3 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_pipe.c,v 1.44 2003/09/23 16:51:12 millert Exp $ */ +/* $OpenBSD: sys_pipe.c,v 1.45 2003/10/03 16:38:01 miod Exp $ */ /* * Copyright (c) 1996 John S. Dyson @@ -30,18 +30,11 @@ #include <sys/systm.h> #include <sys/proc.h> #include <sys/file.h> -#include <sys/protosw.h> -#include <sys/stat.h> #include <sys/filedesc.h> -#include <sys/malloc.h> #include <sys/pool.h> #include <sys/ioctl.h> #include <sys/stat.h> -#include <sys/select.h> #include <sys/signalvar.h> -#include <sys/errno.h> -#include <sys/queue.h> -#include <sys/kernel.h> #include <sys/mount.h> #include <sys/syscallargs.h> #include <sys/event.h> @@ -95,11 +88,12 @@ static int amountpipekva; struct pool pipe_pool; void pipeclose(struct pipe *); -void pipeinit(struct pipe *); +void pipe_free_kmem(struct pipe *); +int pipe_create(struct pipe *); static __inline int pipelock(struct pipe *); static __inline void pipeunlock(struct pipe *); static __inline void pipeselwakeup(struct pipe *); -void pipespace(struct pipe *); +int pipespace(struct pipe *, u_int); /* * The pipe system call for the DTYPE_PIPE type of pipes @@ -118,26 +112,30 @@ sys_opipe(p, v, retval) int fd, error; rpipe = pool_get(&pipe_pool, PR_WAITOK); - pipeinit(rpipe); + error = pipe_create(rpipe); + if (error != 0) + goto free1; wpipe = pool_get(&pipe_pool, PR_WAITOK); - pipeinit(wpipe); + error = pipe_create(wpipe); + if (error != 0) + goto free2; error = falloc(p, &rf, &fd); - if (error) + if (error != 0) goto free2; rf->f_flag = FREAD | FWRITE; rf->f_type = DTYPE_PIPE; + rf->f_data = rpipe; rf->f_ops = &pipeops; - rf->f_data = (caddr_t)rpipe; retval[0] = fd; error = falloc(p, &wf, &fd); - if (error) + if (error != 0) goto free3; wf->f_flag = FREAD | FWRITE; wf->f_type = DTYPE_PIPE; + wf->f_data = wpipe; wf->f_ops = &pipeops; - wf->f_data = (caddr_t)wpipe; retval[1] = fd; rpipe->pipe_peer = wpipe; @@ -152,6 +150,7 @@ free3: rpipe = NULL; free2: (void)pipeclose(wpipe); +free1: if (rpipe != NULL) (void)pipeclose(rpipe); return (error); @@ -159,44 +158,65 @@ free2: /* * Allocate kva for pipe circular buffer, the space is pageable + * This routine will 'realloc' the size of a pipe safely, if it fails + * it will retain the old buffer. + * If it fails it will return ENOMEM. */ -void -pipespace(cpipe) +int +pipespace(cpipe, size) struct pipe *cpipe; + u_int size; { - cpipe->pipe_buffer.buffer = (caddr_t) uvm_km_valloc(kernel_map, - cpipe->pipe_buffer.size); - if (cpipe->pipe_buffer.buffer == NULL) - panic("pipespace: out of kvm"); + caddr_t buffer; + + buffer = (caddr_t)uvm_km_valloc(kernel_map, size); + if (buffer == NULL) { + return (ENOMEM); + } + + /* free old resources if we are resizing */ + pipe_free_kmem(cpipe); + cpipe->pipe_buffer.buffer = buffer; + cpipe->pipe_buffer.size = size; + cpipe->pipe_buffer.in = 0; + cpipe->pipe_buffer.out = 0; + cpipe->pipe_buffer.cnt = 0; amountpipekva += cpipe->pipe_buffer.size; + + return (0); } /* * initialize and allocate VM and memory for pipe */ -void -pipeinit(cpipe) +int +pipe_create(cpipe) struct pipe *cpipe; { + int error; - cpipe->pipe_buffer.in = 0; - cpipe->pipe_buffer.out = 0; - cpipe->pipe_buffer.cnt = 0; - cpipe->pipe_buffer.size = PIPE_SIZE; - - /* Buffer kva gets dynamically allocated */ + /* so pipe_free_kmem() doesn't follow junk pointer */ cpipe->pipe_buffer.buffer = NULL; - /* cpipe->pipe_buffer.object = invalid */ - + /* + * protect so pipeclose() doesn't follow a junk pointer + * if pipespace() fails. + */ + bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); cpipe->pipe_state = 0; cpipe->pipe_peer = NULL; cpipe->pipe_busy = 0; + + error = pipespace(cpipe, PIPE_SIZE); + if (error != 0) + return (error); + microtime(&cpipe->pipe_ctime); cpipe->pipe_atime = cpipe->pipe_ctime; cpipe->pipe_mtime = cpipe->pipe_ctime; - bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); cpipe->pipe_pgid = NO_PID; + + return (0); } @@ -283,7 +303,6 @@ pipe_read(fp, poff, uio, cred) rpipe->pipe_buffer.out = 0; rpipe->pipe_buffer.cnt -= size; - /* * If there is no more to read in the pipe, reset * its pointers to the beginning. This improves @@ -297,11 +316,10 @@ pipe_read(fp, poff, uio, cred) } else { /* * detect EOF condition + * read returns 0 on EOF, no need to set error */ - if (rpipe->pipe_state & PIPE_EOF) { - /* XXX error = ? */ + if (rpipe->pipe_state & PIPE_EOF) break; - } /* * If the "write-side" has been blocked, wake it up now. @@ -328,9 +346,9 @@ pipe_read(fp, poff, uio, cred) * Handle non-blocking mode operation or * wait for more data. */ - if (fp->f_flag & FNONBLOCK) + if (fp->f_flag & FNONBLOCK) { error = EAGAIN; - else { + } else { rpipe->pipe_state |= PIPE_WANTR; if ((error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) == 0) error = pipelock(rpipe); @@ -365,7 +383,7 @@ unlocked_error: if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) pipeselwakeup(rpipe); - return error; + return (error); } int @@ -387,8 +405,9 @@ pipe_write(fp, poff, uio, cred) * detect loss of pipe read side, issue SIGPIPE if lost. */ if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { - return EPIPE; + return (EPIPE); } + ++wpipe->pipe_busy; /* * If it is advantageous to resize the pipe buffer, do @@ -399,38 +418,33 @@ pipe_write(fp, poff, uio, cred) (wpipe->pipe_buffer.size <= PIPE_SIZE) && (wpipe->pipe_buffer.cnt == 0)) { - if (wpipe->pipe_buffer.buffer) { - amountpipekva -= wpipe->pipe_buffer.size; - uvm_km_free(kernel_map, - (vaddr_t)wpipe->pipe_buffer.buffer, - wpipe->pipe_buffer.size); + if ((error = pipelock(wpipe)) == 0) { + if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) + nbigpipe++; + pipeunlock(wpipe); } - - wpipe->pipe_buffer.in = 0; - wpipe->pipe_buffer.out = 0; - wpipe->pipe_buffer.cnt = 0; - wpipe->pipe_buffer.size = BIG_PIPE_SIZE; - wpipe->pipe_buffer.buffer = NULL; - ++nbigpipe; } - - if (wpipe->pipe_buffer.buffer == NULL) { - if ((error = pipelock(wpipe)) == 0) { - pipespace(wpipe); - pipeunlock(wpipe); - } else { - return error; + /* + * If an early error occured unbusy and return, waking up any pending + * readers. + */ + if (error) { + --wpipe->pipe_busy; + if ((wpipe->pipe_busy == 0) && + (wpipe->pipe_state & PIPE_WANT)) { + wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); + wakeup(wpipe); } + return (error); } - ++wpipe->pipe_busy; orig_resid = uio->uio_resid; -retrywrite: while (uio->uio_resid) { int space; +retrywrite: if (wpipe->pipe_state & PIPE_EOF) { error = EPIPE; break; @@ -442,8 +456,7 @@ retrywrite: if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) space = 0; - if (space > 0 && - (wpipe->pipe_buffer.cnt < wpipe->pipe_buffer.size)) { + if (space > 0) { if ((error = pipelock(wpipe)) == 0) { int size; /* Transfer size */ int segsize; /* first segment to transfer */ @@ -562,9 +575,9 @@ retrywrite: } --wpipe->pipe_busy; - if ((wpipe->pipe_busy == 0) && - (wpipe->pipe_state & PIPE_WANT)) { - wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR); + + if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { + wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); wakeup(wpipe); } else if (wpipe->pipe_buffer.cnt > 0) { /* @@ -582,8 +595,9 @@ retrywrite: */ if ((wpipe->pipe_buffer.cnt == 0) && (uio->uio_resid == 0) && - (error == EPIPE)) + (error == EPIPE)) { error = 0; + } if (error == 0) microtime(&wpipe->pipe_mtime); @@ -593,7 +607,7 @@ retrywrite: if (wpipe->pipe_buffer.cnt) pipeselwakeup(wpipe); - return error; + return (error); } /* @@ -685,7 +699,7 @@ pipe_stat(fp, ub, p) { struct pipe *pipe = (struct pipe *)fp->f_data; - bzero((caddr_t)ub, sizeof (*ub)); + bzero(ub, sizeof(*ub)); ub->st_mode = S_IFIFO; ub->st_blksize = pipe->pipe_buffer.size; ub->st_size = pipe->pipe_buffer.cnt; @@ -699,7 +713,7 @@ pipe_stat(fp, ub, p) * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. * XXX (st_dev, st_ino) should be unique. */ - return 0; + return (0); } /* ARGSUSED */ @@ -710,9 +724,24 @@ pipe_close(fp, p) { struct pipe *cpipe = (struct pipe *)fp->f_data; - pipeclose(cpipe); + fp->f_ops = NULL; fp->f_data = NULL; - return 0; + pipeclose(cpipe); + return (0); +} + +void +pipe_free_kmem(cpipe) + struct pipe *cpipe; +{ + if (cpipe->pipe_buffer.buffer != NULL) { + if (cpipe->pipe_buffer.size > PIPE_SIZE) + --nbigpipe; + amountpipekva -= cpipe->pipe_buffer.size; + uvm_km_free(kernel_map, (vaddr_t)cpipe->pipe_buffer.buffer, + cpipe->pipe_buffer.size); + cpipe->pipe_buffer.buffer = NULL; + } } /* @@ -733,7 +762,7 @@ pipeclose(cpipe) */ while (cpipe->pipe_busy) { wakeup(cpipe); - cpipe->pipe_state |= PIPE_WANT|PIPE_EOF; + cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; tsleep(cpipe, PRIBIO, "pipecl", 0); } @@ -745,20 +774,14 @@ pipeclose(cpipe) ppipe->pipe_state |= PIPE_EOF; wakeup(ppipe); + KNOTE(&ppipe->pipe_sel.si_note, 0); ppipe->pipe_peer = NULL; } /* * free resources */ - if (cpipe->pipe_buffer.buffer) { - if (cpipe->pipe_buffer.size > PIPE_SIZE) - --nbigpipe; - amountpipekva -= cpipe->pipe_buffer.size; - uvm_km_free(kernel_map, - (vaddr_t)cpipe->pipe_buffer.buffer, - cpipe->pipe_buffer.size); - } + pipe_free_kmem(cpipe); pool_put(&pipe_pool, cpipe); } } @@ -776,6 +799,7 @@ pipe_kqfilter(struct file *fp, struct knote *kn) break; case EVFILT_WRITE: if (wpipe == NULL) + /* other end of pipe has been closed */ return (1); kn->kn_fop = &pipe_wfiltops; SLIST_INSERT_HEAD(&wpipe->pipe_sel.si_note, kn, kn_selnext); |