summaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
authorArtur Grabowski <art@cvs.openbsd.org>2001-05-26 04:16:09 +0000
committerArtur Grabowski <art@cvs.openbsd.org>2001-05-26 04:16:09 +0000
commitf1c7b7477f262308ca412e5901f4ad910eabdb9d (patch)
treeac06678b7d69528b988812bf87950ad304d71880 /sys/kern
parenta6cbfa05b2ebd5a8034ab76b0cda65f3c4e1792f (diff)
Sync in some improvements from FreeBSD + my own improvements.
- use pool for allocating pipe structures. - use microtime instead of splhigh and time. - improve locking. - better handling of nonblocking. - various efficiency fixes.
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/sys_pipe.c256
1 files changed, 155 insertions, 101 deletions
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 6817df31121..64639a36b4f 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sys_pipe.c,v 1.30 2001/05/14 13:43:53 art Exp $ */
+/* $OpenBSD: sys_pipe.c,v 1.31 2001/05/26 04:16:08 art Exp $ */
/*
* Copyright (c) 1996 John S. Dyson
@@ -34,6 +34,7 @@
#include <sys/stat.h>
#include <sys/filedesc.h>
#include <sys/malloc.h>
+#include <sys/pool.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/select.h>
@@ -56,6 +57,10 @@
#include <vm/vm_map.h>
#include <vm/vm_page.h>
+#if defined(UVM)
+#include <uvm/uvm_extern.h>
+#endif
+
#include <sys/pipe.h>
/*
@@ -82,7 +87,6 @@ struct filterops pipe_rfiltops =
struct filterops pipe_wfiltops =
{ 1, NULL, filt_pipedetach, filt_pipewrite };
-
/*
* Default pipe buffer size(s), this can be kind-of large now because pipe
* space is pageable. The pipe code will try to maintain locality of
@@ -96,12 +100,13 @@ struct filterops pipe_wfiltops =
*/
#define LIMITBIGPIPES 32
int nbigpipe;
-
static int amountpipekva;
+struct pool *pipe_pool;
+
void pipeclose __P((struct pipe *));
void pipeinit __P((struct pipe *));
-static __inline int pipelock __P((struct pipe *, int));
+static __inline int pipelock __P((struct pipe *));
static __inline void pipeunlock __P((struct pipe *));
static __inline void pipeselwakeup __P((struct pipe *));
void pipespace __P((struct pipe *));
@@ -122,9 +127,14 @@ sys_opipe(p, v, retval)
struct pipe *rpipe, *wpipe;
int fd, error;
- rpipe = malloc(sizeof(*rpipe), M_PIPE, M_WAITOK);
+ if (pipe_pool == NULL)
+ pipe_pool = pool_create(sizeof(struct pipe), 0, 0, 0, "pipepl",
+ 0, pool_page_alloc_nointr, pool_page_free_nointr,
+ M_PIPE);
+
+ rpipe = pool_get(pipe_pool, PR_WAITOK);
pipeinit(rpipe);
- wpipe = malloc(sizeof(*wpipe), M_PIPE, M_WAITOK);
+ wpipe = pool_get(pipe_pool, PR_WAITOK);
pipeinit(wpipe);
error = falloc(p, &rf, &fd);
@@ -166,7 +176,6 @@ pipespace(cpipe)
struct pipe *cpipe;
{
#if defined(UVM)
- /* XXX - this is wrong, use an aobj instead */
cpipe->pipe_buffer.buffer = (caddr_t) uvm_km_valloc(kernel_map,
cpipe->pipe_buffer.size);
if (cpipe->pipe_buffer.buffer == NULL)
@@ -202,7 +211,6 @@ void
pipeinit(cpipe)
struct pipe *cpipe;
{
- int s;
cpipe->pipe_buffer.in = 0;
cpipe->pipe_buffer.out = 0;
@@ -216,11 +224,9 @@ pipeinit(cpipe)
cpipe->pipe_state = 0;
cpipe->pipe_peer = NULL;
cpipe->pipe_busy = 0;
- s = splhigh();
- cpipe->pipe_ctime = time;
- cpipe->pipe_atime = time;
- cpipe->pipe_mtime = time;
- splx(s);
+ microtime(&cpipe->pipe_ctime);
+ cpipe->pipe_atime = cpipe->pipe_ctime;
+ cpipe->pipe_mtime = cpipe->pipe_ctime;
bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel);
cpipe->pipe_pgid = NO_PID;
}
@@ -230,16 +236,13 @@ pipeinit(cpipe)
* lock a pipe for I/O, blocking other access
*/
static __inline int
-pipelock(cpipe, catch)
+pipelock(cpipe)
struct pipe *cpipe;
- int catch;
{
int error;
while (cpipe->pipe_state & PIPE_LOCK) {
cpipe->pipe_state |= PIPE_LWANT;
- error = tsleep(cpipe, catch ? PRIBIO|PCATCH : PRIBIO,
- "pipelk", 0);
- if (error)
+ if ((error = tsleep(cpipe, PRIBIO|PCATCH, "pipelk", 0)))
return error;
}
cpipe->pipe_state |= PIPE_LOCK;
@@ -282,11 +285,16 @@ pipe_read(fp, poff, uio, cred)
struct ucred *cred;
{
struct pipe *rpipe = (struct pipe *) fp->f_data;
- int error = 0;
+ int error;
int nread = 0;
int size;
+ error = pipelock(rpipe);
+ if (error)
+ goto unlocked_error;
+
++rpipe->pipe_busy;
+
while (uio->uio_resid) {
/*
* normal pipe buffer receive
@@ -297,11 +305,8 @@ pipe_read(fp, poff, uio, cred)
size = rpipe->pipe_buffer.cnt;
if (size > uio->uio_resid)
size = uio->uio_resid;
- if ((error = pipelock(rpipe,1)) == 0) {
- error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
+ error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
size, uio);
- pipeunlock(rpipe);
- }
if (error) {
break;
}
@@ -310,6 +315,16 @@ pipe_read(fp, poff, uio, cred)
rpipe->pipe_buffer.out = 0;
rpipe->pipe_buffer.cnt -= size;
+
+ /*
+ * If there is no more to read in the pipe, reset
+ * its pointers to the beginning. This improves
+ * cache hit stats.
+ */
+ if (rpipe->pipe_buffer.cnt == 0) {
+ rpipe->pipe_buffer.in = 0;
+ rpipe->pipe_buffer.out = 0;
+ }
nread += size;
} else {
/*
@@ -319,6 +334,7 @@ pipe_read(fp, poff, uio, cred)
/* XXX error = ? */
break;
}
+
/*
* If the "write-side" has been blocked, wake it up now.
*/
@@ -326,68 +342,51 @@ pipe_read(fp, poff, uio, cred)
rpipe->pipe_state &= ~PIPE_WANTW;
wakeup(rpipe);
}
- if (nread > 0)
- break;
- if (fp->f_flag & FNONBLOCK) {
- error = EAGAIN;
+ /*
+ * Break if some data was read.
+ */
+ if (nread > 0)
break;
- }
/*
- * If there is no more to read in the pipe, reset
- * its pointers to the beginning. This improves
- * cache hit stats.
+ * Unlock the pipe buffer for our remaining processing.
+ * We will either break out with an error or we will
+ * sleep and relock to loop.
*/
-
- if ((error = pipelock(rpipe,1)) == 0) {
- if (rpipe->pipe_buffer.cnt == 0) {
- rpipe->pipe_buffer.in = 0;
- rpipe->pipe_buffer.out = 0;
- }
- pipeunlock(rpipe);
- } else {
- break;
- }
+ pipeunlock(rpipe);
- if (rpipe->pipe_state & PIPE_WANTW) {
- rpipe->pipe_state &= ~PIPE_WANTW;
- wakeup(rpipe);
+ /*
+ * Handle non-blocking mode operation or
+ * wait for more data.
+ */
+ if (fp->f_flag & FNONBLOCK)
+ error = EAGAIN;
+ else {
+ rpipe->pipe_state |= PIPE_WANTR;
+ if ((error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) == 0)
+ error = pipelock(rpipe);
}
-
- rpipe->pipe_state |= PIPE_WANTR;
- error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0);
if (error)
- break;
+ goto unlocked_error;
}
}
+ pipeunlock(rpipe);
- if (error == 0) {
- int s = splhigh();
- rpipe->pipe_atime = time;
- splx(s);
- }
-
+ if (error == 0)
+ microtime(&rpipe->pipe_atime);
+unlocked_error:
--rpipe->pipe_busy;
+
+ /*
+ * PIPE_WANT processing only makes sense if pipe_busy is 0.
+ */
if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
wakeup(rpipe);
} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
/*
- * If there is no more to read in the pipe, reset
- * its pointers to the beginning. This improves
- * cache hit stats.
- */
- if (rpipe->pipe_buffer.cnt == 0) {
- if ((error == 0) && (error = pipelock(rpipe,1)) == 0) {
- rpipe->pipe_buffer.in = 0;
- rpipe->pipe_buffer.out = 0;
- pipeunlock(rpipe);
- }
- }
-
- /*
- * If the "write-side" has been blocked, wake it up now.
+ * Handle write blocking hysteresis.
*/
if (rpipe->pipe_state & PIPE_WANTW) {
rpipe->pipe_state &= ~PIPE_WANTW;
@@ -455,7 +454,7 @@ pipe_write(fp, poff, uio, cred)
if (wpipe->pipe_buffer.buffer == NULL) {
- if ((error = pipelock(wpipe,1)) == 0) {
+ if ((error = pipelock(wpipe)) == 0) {
pipespace(wpipe);
pipeunlock(wpipe);
} else {
@@ -465,45 +464,102 @@ pipe_write(fp, poff, uio, cred)
++wpipe->pipe_busy;
orig_resid = uio->uio_resid;
+
+retrywrite:
while (uio->uio_resid) {
int space;
+
+ if (wpipe->pipe_state & PIPE_EOF) {
+ error = EPIPE;
+ break;
+ }
+
space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
/* Writes of size <= PIPE_BUF must be atomic. */
- /* XXX perhaps they need to be contiguous to be atomic? */
if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
space = 0;
if (space > 0 &&
(wpipe->pipe_buffer.cnt < wpipe->pipe_buffer.size)) {
- /*
- * This set the maximum transfer as a segment of
- * the buffer.
- */
- int size = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in;
- /*
- * space is the size left in the buffer
- */
- if (size > space)
- size = space;
- /*
- * now limit it to the size of the uio transfer
- */
- if (size > uio->uio_resid)
- size = uio->uio_resid;
- if ((error = pipelock(wpipe,1)) == 0) {
+ if ((error = pipelock(wpipe)) == 0) {
+ int size; /* Transfer size */
+ int segsize; /* first segment to transfer */
+
+ /*
+ * If a process blocked in uiomove, our
+ * value for space might be bad.
+ *
+ * XXX will we be ok if the reader has gone
+ * away here?
+ */
+ if (space > wpipe->pipe_buffer.size -
+ wpipe->pipe_buffer.cnt) {
+ pipeunlock(wpipe);
+ goto retrywrite;
+ }
+
+ /*
+ * Transfer size is minimum of uio transfer
+ * and free space in pipe buffer.
+ */
+ if (space > uio->uio_resid)
+ size = uio->uio_resid;
+ else
+ size = space;
+ /*
+ * First segment to transfer is minimum of
+ * transfer size and contiguous space in
+ * pipe buffer. If first segment to transfer
+ * is less than the transfer size, we've got
+ * a wraparound in the buffer.
+ */
+ segsize = wpipe->pipe_buffer.size -
+ wpipe->pipe_buffer.in;
+ if (segsize > size)
+ segsize = size;
+
+ /* Transfer first segment */
+
error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
- size, uio);
+ segsize, uio);
+
+ if (error == 0 && segsize < size) {
+ /*
+ * Transfer remaining part now, to
+ * support atomic writes. Wraparound
+ * happened.
+ */
+#ifdef DIAGNOSTIC
+ if (wpipe->pipe_buffer.in + segsize !=
+ wpipe->pipe_buffer.size)
+ panic("Expected pipe buffer wraparound disappeared");
+#endif
+
+ error = uiomove(&wpipe->pipe_buffer.buffer[0],
+ size - segsize, uio);
+ }
+ if (error == 0) {
+ wpipe->pipe_buffer.in += size;
+ if (wpipe->pipe_buffer.in >=
+ wpipe->pipe_buffer.size) {
+#ifdef DIAGNOSTIC
+ if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
+ panic("Expected wraparound bad");
+#endif
+ wpipe->pipe_buffer.in = size - segsize;
+ }
+
+ wpipe->pipe_buffer.cnt += size;
+#ifdef DIAGNOSTIC
+ if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
+ panic("Pipe buffer overflow");
+#endif
+ }
pipeunlock(wpipe);
}
if (error)
break;
-
- wpipe->pipe_buffer.in += size;
- if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size)
- wpipe->pipe_buffer.in = 0;
-
- wpipe->pipe_buffer.cnt += size;
} else {
/*
* If the "read-side" has been blocked, wake it up now.
@@ -567,11 +623,8 @@ pipe_write(fp, poff, uio, cred)
(error == EPIPE))
error = 0;
- if (error == 0) {
- int s = splhigh();
- wpipe->pipe_mtime = time;
- splx(s);
- }
+ if (error == 0)
+ microtime(&wpipe->pipe_mtime);
/*
* We have something to offer,
* wake up select.
@@ -684,9 +737,10 @@ pipe_stat(fp, ub, p)
TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec);
TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec);
TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec);
+ ub->st_uid = fp->f_cred->cr_uid;
+ ub->st_gid = fp->f_cred->cr_gid;
/*
- * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev,
- * st_flags, st_gen.
+ * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
* XXX (st_dev, st_ino) should be unique.
*/
return 0;
@@ -755,7 +809,7 @@ pipeclose(cpipe)
cpipe->pipe_buffer.size);
#endif
}
- free(cpipe, M_PIPE);
+ pool_put(pipe_pool, cpipe);
}
}