summaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
authorNiels Provos <provos@cvs.openbsd.org>2002-08-08 17:07:33 +0000
committerNiels Provos <provos@cvs.openbsd.org>2002-08-08 17:07:33 +0000
commit645bfcfc7a44cb3eef794f55a0f348cdbb48b20c (patch)
tree65348cbbc69a068876bfe2a31a3287f71b482d02 /sys/kern
parent3ab868adf3b73fcb169a2f03c12e12d299f6f672 (diff)
socket buf speedup from thorpej@netbsd, okay art@ ericj@:
Make insertion of data into socket buffers O(C): * Keep pointers to the first and last mbufs of the last record in the socket buffer. * Use the sb_lastrecord pointer in the sbappend*() family of functions to avoid traversing the packet chain to find the last record. * Add a new sbappend_stream() function for stream protocols which guarantee that there will never be more than one record in the socket buffer. This function uses the sb_mbtail pointer to perform the data insertion. Make TCP use sbappend_stream(). On a profiling run, this makes sbappend of a TCP transmission using a 1M socket buffer go from 50% of the time to .02% of the time. Thanks to Bill Sommerfeld and YAMAMOTO Takashi for their debugging assistance!
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/uipc_socket.c74
-rw-r--r--sys/kern/uipc_socket2.c230
-rw-r--r--sys/kern/vnode_if.c2
3 files changed, 240 insertions, 66 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 8db7ad86888..dc42737aae0 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_socket.c,v 1.43 2002/06/11 05:07:43 art Exp $ */
+/* $OpenBSD: uipc_socket.c,v 1.44 2002/08/08 17:07:32 provos Exp $ */
/* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */
/*
@@ -666,6 +666,8 @@ restart:
error = EWOULDBLOCK;
goto release;
}
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
sbunlock(&so->so_rcv);
error = sbwait(&so->so_rcv);
splx(s);
@@ -674,10 +676,18 @@ restart:
goto restart;
}
dontblock:
+ /*
+ * On entry here, m points to the first record of the socket buffer.
+ * While we process the initial mbufs containing address and control
+ * info, we save a copy of m->m_nextpkt into nextrecord.
+ */
#ifdef notyet /* XXXX */
if (uio->uio_procp)
uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
#endif
+ KASSERT(m == so->so_rcv.sb_mb);
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
nextrecord = m->m_nextpkt;
if (pr->pr_flags & PR_ADDR) {
#ifdef DIAGNOSTIC
@@ -728,9 +738,26 @@ dontblock:
controlp = &(*controlp)->m_next;
}
}
+
+ /*
+ * If m is non-NULL, we have some data to read. From now on,
+ * make sure to keep sb_lastrecord consistent when working on
+ * the last packet on the chain (nextrecord == NULL) and we
+ * change m->m_nextpkt.
+ */
if (m) {
- if ((flags & MSG_PEEK) == 0)
+ if ((flags & MSG_PEEK) == 0) {
m->m_nextpkt = nextrecord;
+ /*
+ * If nextrecord == NULL (this is a single chain),
+ * then sb_lastrecord may not be valid here if m
+ * was changed earlier.
+ */
+ if (nextrecord == NULL) {
+ KASSERT(so->so_rcv.sb_mb == m);
+ so->so_rcv.sb_lastrecord = m;
+ }
+ }
type = m->m_type;
if (type == MT_OOBDATA)
flags |= MSG_OOB;
@@ -738,7 +765,16 @@ dontblock:
flags |= MSG_BCAST;
if (m->m_flags & M_MCAST)
flags |= MSG_MCAST;
+ } else {
+ if ((flags & MSG_PEEK) == 0) {
+ KASSERT(so->so_rcv.sb_mb == m);
+ so->so_rcv.sb_mb = nextrecord;
+ SB_EMPTY_FIXUP(&so->so_rcv);
+ }
}
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
+
moff = 0;
offset = 0;
while (m && uio->uio_resid > 0 && error == 0) {
@@ -766,6 +802,8 @@ dontblock:
* block interrupts again.
*/
if (mp == 0 && uio_error == 0) {
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
splx(s);
uio_error =
@@ -794,8 +832,21 @@ dontblock:
MFREE(m, so->so_rcv.sb_mb);
m = so->so_rcv.sb_mb;
}
- if (m)
+ /*
+ * If m != NULL, we also know that
+ * so->so_rcv.sb_mb != NULL.
+ */
+ KASSERT(so->so_rcv.sb_mb == m);
+ if (m) {
m->m_nextpkt = nextrecord;
+ if (nextrecord == NULL)
+ so->so_rcv.sb_lastrecord = m;
+ } else {
+ so->so_rcv.sb_mb = nextrecord;
+ SB_EMPTY_FIXUP(&so->so_rcv);
+ }
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
}
} else {
if (flags & MSG_PEEK)
@@ -834,6 +885,8 @@ dontblock:
!sosendallatonce(so) && !nextrecord) {
if (so->so_error || so->so_state & SS_CANTRCVMORE)
break;
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
error = sbwait(&so->so_rcv);
if (error) {
sbunlock(&so->so_rcv);
@@ -851,8 +904,21 @@ dontblock:
(void) sbdroprecord(&so->so_rcv);
}
if ((flags & MSG_PEEK) == 0) {
- if (m == 0)
+ if (m == 0) {
+ /*
+ * First part is an inline SB_EMPTY_FIXUP(). Second
+ * part makes sure sb_lastrecord is up-to-date if
+ * there is still data in the socket buffer.
+ */
so->so_rcv.sb_mb = nextrecord;
+ if (so->so_rcv.sb_mb == NULL) {
+ so->so_rcv.sb_mbtail = NULL;
+ so->so_rcv.sb_lastrecord = NULL;
+ } else if (nextrecord->m_nextpkt == NULL)
+ so->so_rcv.sb_lastrecord = nextrecord;
+ }
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
(*pr->pr_usrreq)(so, PRU_RCVD, NULL,
(struct mbuf *)(long)flags, NULL);
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
index 1d88395486f..02543bc3144 100644
--- a/sys/kern/uipc_socket2.c
+++ b/sys/kern/uipc_socket2.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_socket2.c,v 1.27 2002/06/11 05:07:43 art Exp $ */
+/* $OpenBSD: uipc_socket2.c,v 1.28 2002/08/08 17:07:32 provos Exp $ */
/* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */
/*
@@ -435,6 +435,61 @@ sbrelease(sb)
* or sbdroprecord() when the data is acknowledged by the peer.
*/
+#ifdef SOCKBUF_DEBUG
+void
+sblastrecordchk(struct sockbuf *sb, const char *where)
+{
+ struct mbuf *m = sb->sb_mb;
+
+ while (m && m->m_nextpkt)
+ m = m->m_nextpkt;
+
+ if (m != sb->sb_lastrecord) {
+ printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
+ sb->sb_mb, sb->sb_lastrecord, m);
+ printf("packet chain:\n");
+ for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
+ printf("\t%p\n", m);
+ panic("sblastrecordchk from %s\n", where);
+ }
+}
+
+void
+sblastmbufchk(struct sockbuf *sb, const char *where)
+{
+ struct mbuf *m = sb->sb_mb;
+ struct mbuf *n;
+
+ while (m && m->m_nextpkt)
+ m = m->m_nextpkt;
+
+ while (m && m->m_next)
+ m = m->m_next;
+
+ if (m != sb->sb_mbtail) {
+ printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
+ sb->sb_mb, sb->sb_mbtail, m);
+ printf("packet tree:\n");
+ for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
+ printf("\t");
+ for (n = m; n != NULL; n = n->m_next)
+ printf("%p ", n);
+ printf("\n");
+ }
+ panic("sblastmbufchk from %s", where);
+ }
+}
+#endif /* SOCKBUF_DEBUG */
+
+#define SBLINKRECORD(sb, m0) \
+do { \
+ if ((sb)->sb_lastrecord != NULL) \
+ (sb)->sb_lastrecord->m_nextpkt = (m0); \
+ else \
+ (sb)->sb_mb = (m0); \
+ (sb)->sb_lastrecord = (m0); \
+} while (/*CONSTCOND*/0)
+
/*
* Append mbuf chain m to the last record in the
* socket buffer sb. The additional space associated
@@ -450,26 +505,58 @@ sbappend(sb, m)
if (m == 0)
return;
- if ((n = sb->sb_mb) != NULL) {
- while (n->m_nextpkt)
- n = n->m_nextpkt;
+
+ SBLASTRECORDCHK(sb, "sbappend 1");
+
+ if ((n = sb->sb_lastrecord) != NULL) {
+ /*
+ * XXX Would like to simply use sb_mbtail here, but
+ * XXX I need to verify that I won't miss an EOR that
+ * XXX way.
+ */
do {
if (n->m_flags & M_EOR) {
sbappendrecord(sb, m); /* XXXXXX!!!! */
return;
}
} while (n->m_next && (n = n->m_next));
+ } else {
+ /*
+ * If this is the first record in the socket buffer, it's
+ * also the last record.
+ */
+ sb->sb_lastrecord = m;
}
sbcompress(sb, m, n);
+ SBLASTRECORDCHK(sb, "sbappend 2");
+}
+
+/*
+ * This version of sbappend() should only be used when the caller
+ * absolutely knows that there will never be more than one record
+ * in the socket buffer, that is, a stream protocol (such as TCP).
+ */
+void
+sbappendstream(struct sockbuf *sb, struct mbuf *m)
+{
+
+ KDASSERT(m->m_nextpkt == NULL);
+ KASSERT(sb->sb_mb == sb->sb_lastrecord);
+
+ SBLASTMBUFCHK(sb, __func__);
+
+ sbcompress(sb, m, sb->sb_mbtail);
+
+ sb->sb_lastrecord = sb->sb_mb;
+ SBLASTRECORDCHK(sb, __func__);
}
#ifdef SOCKBUF_DEBUG
void
-sbcheck(sb)
- register struct sockbuf *sb;
+sbcheck(struct sockbuf *sb)
{
- register struct mbuf *m;
- register int len = 0, mbcnt = 0;
+ struct mbuf *m;
+ u_long len = 0, mbcnt = 0;
for (m = sb->sb_mb; m; m = m->m_next) {
len += m->m_len;
@@ -480,7 +567,7 @@ sbcheck(sb)
panic("sbcheck nextpkt");
}
if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
- printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
+ printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
mbcnt, sb->sb_mbcnt);
panic("sbcheck");
}
@@ -492,26 +579,20 @@ sbcheck(sb)
* begins a new record.
*/
void
-sbappendrecord(sb, m0)
- register struct sockbuf *sb;
- register struct mbuf *m0;
+sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
{
- register struct mbuf *m;
+ struct mbuf *m;
if (m0 == 0)
return;
- if ((m = sb->sb_mb) != NULL)
- while (m->m_nextpkt)
- m = m->m_nextpkt;
+
/*
* Put the first mbuf on the queue.
* Note this permits zero length records.
*/
sballoc(sb, m0);
- if (m)
- m->m_nextpkt = m0;
- else
- sb->sb_mb = m0;
+ SBLASTRECORDCHK(sb, "sbappendrecord 1");
+ SBLINKRECORD(sb, m0);
m = m0->m_next;
m0->m_next = 0;
if (m && (m0->m_flags & M_EOR)) {
@@ -519,6 +600,7 @@ sbappendrecord(sb, m0)
m->m_flags |= M_EOR;
}
sbcompress(sb, m, m0);
+ SBLASTRECORDCHK(sb, "sbappendrecord 2");
}
/*
@@ -527,15 +609,15 @@ sbappendrecord(sb, m0)
* but after any other OOB data.
*/
void
-sbinsertoob(sb, m0)
- register struct sockbuf *sb;
- register struct mbuf *m0;
+sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
{
- register struct mbuf *m;
- register struct mbuf **mp;
+ struct mbuf *m, **mp;
if (m0 == 0)
return;
+
+ SBLASTRECORDCHK(sb, "sbinsertoob 1");
+
for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
again:
switch (m->m_type) {
@@ -555,6 +637,10 @@ sbinsertoob(sb, m0)
*/
sballoc(sb, m0);
m0->m_nextpkt = *mp;
+ if (*mp == NULL) {
+ /* m0 is actually the new tail */
+ sb->sb_lastrecord = m0;
+ }
*mp = m0;
m = m0->m_next;
m0->m_next = 0;
@@ -563,6 +649,7 @@ sbinsertoob(sb, m0)
m->m_flags |= M_EOR;
}
sbcompress(sb, m, m0);
+ SBLASTRECORDCHK(sb, "sbinsertoob 2");
}
/*
@@ -572,12 +659,10 @@ sbinsertoob(sb, m0)
* Returns 0 if no space in sockbuf or insufficient mbufs.
*/
int
-sbappendaddr(sb, asa, m0, control)
- register struct sockbuf *sb;
- struct sockaddr *asa;
- struct mbuf *m0, *control;
+sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
+ struct mbuf *control)
{
- register struct mbuf *m, *n;
+ struct mbuf *m, *n, *nlast;
int space = asa->sa_len;
if (m0 && (m0->m_flags & M_PKTHDR) == 0)
@@ -603,23 +688,27 @@ sbappendaddr(sb, asa, m0, control)
else
control = m0;
m->m_next = control;
- for (n = m; n; n = n->m_next)
+
+ SBLASTRECORDCHK(sb, "sbappendaddr 1");
+
+ for (n = m; n->m_next != NULL; n = n->m_next)
sballoc(sb, n);
- if ((n = sb->sb_mb) != NULL) {
- while (n->m_nextpkt)
- n = n->m_nextpkt;
- n->m_nextpkt = m;
- } else
- sb->sb_mb = m;
+ sballoc(sb, n);
+ nlast = n;
+ SBLINKRECORD(sb, m);
+
+ sb->sb_mbtail = nlast;
+ SBLASTMBUFCHK(sb, "sbappendaddr");
+
+ SBLASTRECORDCHK(sb, "sbappendaddr 2");
+
return (1);
}
int
-sbappendcontrol(sb, m0, control)
- struct sockbuf *sb;
- struct mbuf *m0, *control;
+sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
{
- register struct mbuf *m, *n;
+ struct mbuf *m, *mlast, *n;
int space = 0;
if (control == 0)
@@ -635,14 +724,20 @@ sbappendcontrol(sb, m0, control)
if (space > sbspace(sb))
return (0);
n->m_next = m0; /* concatenate data to control */
- for (m = control; m; m = m->m_next)
+
+ SBLASTRECORDCHK(sb, "sbappendcontrol 1");
+
+ for (m = control; m->m_next != NULL; m = m->m_next)
sballoc(sb, m);
- if ((n = sb->sb_mb) != NULL) {
- while (n->m_nextpkt)
- n = n->m_nextpkt;
- n->m_nextpkt = control;
- } else
- sb->sb_mb = control;
+ sballoc(sb, m);
+ mlast = m;
+ SBLINKRECORD(sb, control);
+
+ sb->sb_mbtail = mlast;
+ SBLASTMBUFCHK(sb, "sbappendcontrol");
+
+ SBLASTRECORDCHK(sb, "sbappendcontrol 2");
+
return (1);
}
@@ -682,6 +777,7 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
n->m_next = m;
else
sb->sb_mb = m;
+ sb->sb_mbtail = m;
sballoc(sb, m);
n = m;
m->m_flags &= ~M_EOR;
@@ -694,6 +790,7 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
else
printf("semi-panic: sbcompress\n");
}
+ SBLASTMBUFCHK(sb, __func__);
}
/*
@@ -701,27 +798,27 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
* Check that all resources are reclaimed.
*/
void
-sbflush(sb)
- register struct sockbuf *sb;
+sbflush(struct sockbuf *sb)
{
- if (sb->sb_flags & SB_LOCK)
- panic("sbflush");
+ KASSERT((sb->sb_flags & SB_LOCK) == 0);
+
while (sb->sb_mbcnt)
sbdrop(sb, (int)sb->sb_cc);
- if (sb->sb_cc || sb->sb_mb)
- panic("sbflush 2");
+
+ KASSERT(sb->sb_cc == 0);
+ KASSERT(sb->sb_mb == NULL);
+ KASSERT(sb->sb_mbtail == NULL);
+ KASSERT(sb->sb_lastrecord == NULL);
}
/*
* Drop data from (the front of) a sockbuf.
*/
void
-sbdrop(sb, len)
- register struct sockbuf *sb;
- register int len;
+sbdrop(struct sockbuf *sb, int len)
{
- register struct mbuf *m, *mn;
+ struct mbuf *m, *mn;
struct mbuf *next;
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
@@ -754,6 +851,17 @@ sbdrop(sb, len)
m->m_nextpkt = next;
} else
sb->sb_mb = next;
+ /*
+ * First part is an inline SB_EMPTY_FIXUP(). Second part
+ * makes sure sb_lastrecord is up-to-date if we dropped
+ * part of the last record.
+ */
+ m = sb->sb_mb;
+ if (m == NULL) {
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ } else if (m->m_nextpkt == NULL)
+ sb->sb_lastrecord = m;
}
/*
@@ -761,10 +869,9 @@ sbdrop(sb, len)
* and move the next record to the front.
*/
void
-sbdroprecord(sb)
- register struct sockbuf *sb;
+sbdroprecord(struct sockbuf *sb)
{
- register struct mbuf *m, *mn;
+ struct mbuf *m, *mn;
m = sb->sb_mb;
if (m) {
@@ -774,6 +881,7 @@ sbdroprecord(sb)
MFREE(m, mn);
} while ((m = mn) != NULL);
}
+ SB_EMPTY_FIXUP(sb);
}
/*
diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c
index 1c762730f66..f21f2334f5d 100644
--- a/sys/kern/vnode_if.c
+++ b/sys/kern/vnode_if.c
@@ -5,7 +5,7 @@
* Created from the file:
* OpenBSD: vnode_if.src,v 1.19 2002/02/22 20:37:45 drahn Exp
* by the script:
- * OpenBSD: vnode_if.sh,v 1.8 2001/02/26 17:34:18 art Exp
+ * OpenBSD: vnode_if.sh,v 1.10 2002/03/14 23:47:05 millert Exp
*/
/*