diff options
author | Niels Provos <provos@cvs.openbsd.org> | 2002-08-08 17:07:33 +0000 |
---|---|---|
committer | Niels Provos <provos@cvs.openbsd.org> | 2002-08-08 17:07:33 +0000 |
commit | 645bfcfc7a44cb3eef794f55a0f348cdbb48b20c (patch) | |
tree | 65348cbbc69a068876bfe2a31a3287f71b482d02 /sys/kern | |
parent | 3ab868adf3b73fcb169a2f03c12e12d299f6f672 (diff) |
socket buf speedup from thorpej@netbsd, okay art@ ericj@:
Make insertion of data into socket buffers O(C):
* Keep pointers to the first and last mbufs of the last record in the
socket buffer.
* Use the sb_lastrecord pointer in the sbappend*() family of functions
to avoid traversing the packet chain to find the last record.
* Add a new sbappend_stream() function for stream protocols which
guarantee that there will never be more than one record in the
socket buffer. This function uses the sb_mbtail pointer to perform
the data insertion. Make TCP use sbappend_stream(). On a profiling
run, this makes sbappend of a TCP transmission using
a 1M socket buffer go from 50% of the time to .02% of the time. Thanks
to Bill Sommerfeld and YAMAMOTO Takashi for their debugging
assistance!
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/uipc_socket.c | 74 | ||||
-rw-r--r-- | sys/kern/uipc_socket2.c | 230 | ||||
-rw-r--r-- | sys/kern/vnode_if.c | 2 |
3 files changed, 240 insertions, 66 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 8db7ad86888..dc42737aae0 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.43 2002/06/11 05:07:43 art Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.44 2002/08/08 17:07:32 provos Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -666,6 +666,8 @@ restart: error = EWOULDBLOCK; goto release; } + SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); splx(s); @@ -674,10 +676,18 @@ restart: goto restart; } dontblock: + /* + * On entry here, m points to the first record of the socket buffer. + * While we process the initial mbufs containing address and control + * info, we save a copy of m->m_nextpkt into nextrecord. + */ #ifdef notyet /* XXXX */ if (uio->uio_procp) uio->uio_procp->p_stats->p_ru.ru_msgrcv++; #endif + KASSERT(m == so->so_rcv.sb_mb); + SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { #ifdef DIAGNOSTIC @@ -728,9 +738,26 @@ dontblock: controlp = &(*controlp)->m_next; } } + + /* + * If m is non-NULL, we have some data to read. From now on, + * make sure to keep sb_lastrecord consistent when working on + * the last packet on the chain (nextrecord == NULL) and we + * change m->m_nextpkt. + */ if (m) { - if ((flags & MSG_PEEK) == 0) + if ((flags & MSG_PEEK) == 0) { m->m_nextpkt = nextrecord; + /* + * If nextrecord == NULL (this is a single chain), + * then sb_lastrecord may not be valid here if m + * was changed earlier. + */ + if (nextrecord == NULL) { + KASSERT(so->so_rcv.sb_mb == m); + so->so_rcv.sb_lastrecord = m; + } + } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; @@ -738,7 +765,16 @@ dontblock: flags |= MSG_BCAST; if (m->m_flags & M_MCAST) flags |= MSG_MCAST; + } else { + if ((flags & MSG_PEEK) == 0) { + KASSERT(so->so_rcv.sb_mb == m); + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); + moff = 0; offset = 0; while (m && uio->uio_resid > 0 && error == 0) { @@ -766,6 +802,8 @@ dontblock: * block interrupts again. */ if (mp == 0 && uio_error == 0) { + SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); resid = uio->uio_resid; splx(s); uio_error = @@ -794,8 +832,21 @@ dontblock: MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } - if (m) + /* + * If m != NULL, we also know that + * so->so_rcv.sb_mb != NULL. + */ + KASSERT(so->so_rcv.sb_mb == m); + if (m) { m->m_nextpkt = nextrecord; + if (nextrecord == NULL) + so->so_rcv.sb_lastrecord = m; + } else { + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); } } else { if (flags & MSG_PEEK) @@ -834,6 +885,8 @@ dontblock: !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) break; + SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); error = sbwait(&so->so_rcv); if (error) { sbunlock(&so->so_rcv); @@ -851,8 +904,21 @@ dontblock: (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { - if (m == 0) + if (m == 0) { + /* + * First part is an inline SB_EMPTY_FIXUP(). Second + * part makes sure sb_lastrecord is up-to-date if + * there is still data in the socket buffer. + */ so->so_rcv.sb_mb = nextrecord; + if (so->so_rcv.sb_mb == NULL) { + so->so_rcv.sb_mbtail = NULL; + so->so_rcv.sb_lastrecord = NULL; + } else if (nextrecord->m_nextpkt == NULL) + so->so_rcv.sb_lastrecord = nextrecord; + } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreq)(so, PRU_RCVD, NULL, (struct mbuf *)(long)flags, NULL); diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 1d88395486f..02543bc3144 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket2.c,v 1.27 2002/06/11 05:07:43 art Exp $ */ +/* $OpenBSD: uipc_socket2.c,v 1.28 2002/08/08 17:07:32 provos Exp $ */ /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ /* @@ -435,6 +435,61 @@ sbrelease(sb) * or sbdroprecord() when the data is acknowledged by the peer. */ +#ifdef SOCKBUF_DEBUG +void +sblastrecordchk(struct sockbuf *sb, const char *where) +{ + struct mbuf *m = sb->sb_mb; + + while (m && m->m_nextpkt) + m = m->m_nextpkt; + + if (m != sb->sb_lastrecord) { + printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", + sb->sb_mb, sb->sb_lastrecord, m); + printf("packet chain:\n"); + for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) + printf("\t%p\n", m); + panic("sblastrecordchk from %s\n", where); + } +} + +void +sblastmbufchk(struct sockbuf *sb, const char *where) +{ + struct mbuf *m = sb->sb_mb; + struct mbuf *n; + + while (m && m->m_nextpkt) + m = m->m_nextpkt; + + while (m && m->m_next) + m = m->m_next; + + if (m != sb->sb_mbtail) { + printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", + sb->sb_mb, sb->sb_mbtail, m); + printf("packet tree:\n"); + for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { + printf("\t"); + for (n = m; n != NULL; n = n->m_next) + printf("%p ", n); + printf("\n"); + } + panic("sblastmbufchk from %s", where); + } +} +#endif /* SOCKBUF_DEBUG */ + +#define SBLINKRECORD(sb, m0) \ +do { \ + if ((sb)->sb_lastrecord != NULL) \ + (sb)->sb_lastrecord->m_nextpkt = (m0); \ + else \ + (sb)->sb_mb = (m0); \ + (sb)->sb_lastrecord = (m0); \ +} while (/*CONSTCOND*/0) + /* * Append mbuf chain m to the last record in the * socket buffer sb. The additional space associated @@ -450,26 +505,58 @@ sbappend(sb, m) if (m == 0) return; - if ((n = sb->sb_mb) != NULL) { - while (n->m_nextpkt) - n = n->m_nextpkt; + + SBLASTRECORDCHK(sb, "sbappend 1"); + + if ((n = sb->sb_lastrecord) != NULL) { + /* + * XXX Would like to simply use sb_mbtail here, but + * XXX I need to verify that I won't miss an EOR that + * XXX way. + */ do { if (n->m_flags & M_EOR) { sbappendrecord(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); + } else { + /* + * If this is the first record in the socket buffer, it's + * also the last record. + */ + sb->sb_lastrecord = m; } sbcompress(sb, m, n); + SBLASTRECORDCHK(sb, "sbappend 2"); +} + +/* + * This version of sbappend() should only be used when the caller + * absolutely knows that there will never be more than one record + * in the socket buffer, that is, a stream protocol (such as TCP). + */ +void +sbappendstream(struct sockbuf *sb, struct mbuf *m) +{ + + KDASSERT(m->m_nextpkt == NULL); + KASSERT(sb->sb_mb == sb->sb_lastrecord); + + SBLASTMBUFCHK(sb, __func__); + + sbcompress(sb, m, sb->sb_mbtail); + + sb->sb_lastrecord = sb->sb_mb; + SBLASTRECORDCHK(sb, __func__); } #ifdef SOCKBUF_DEBUG void -sbcheck(sb) - register struct sockbuf *sb; +sbcheck(struct sockbuf *sb) { - register struct mbuf *m; - register int len = 0, mbcnt = 0; + struct mbuf *m; + u_long len = 0, mbcnt = 0; for (m = sb->sb_mb; m; m = m->m_next) { len += m->m_len; @@ -480,7 +567,7 @@ sbcheck(sb) panic("sbcheck nextpkt"); } if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { - printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc, + printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, mbcnt, sb->sb_mbcnt); panic("sbcheck"); } @@ -492,26 +579,20 @@ sbcheck(sb) * begins a new record. */ void -sbappendrecord(sb, m0) - register struct sockbuf *sb; - register struct mbuf *m0; +sbappendrecord(struct sockbuf *sb, struct mbuf *m0) { - register struct mbuf *m; + struct mbuf *m; if (m0 == 0) return; - if ((m = sb->sb_mb) != NULL) - while (m->m_nextpkt) - m = m->m_nextpkt; + /* * Put the first mbuf on the queue. * Note this permits zero length records. */ sballoc(sb, m0); - if (m) - m->m_nextpkt = m0; - else - sb->sb_mb = m0; + SBLASTRECORDCHK(sb, "sbappendrecord 1"); + SBLINKRECORD(sb, m0); m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { @@ -519,6 +600,7 @@ sbappendrecord(sb, m0) m->m_flags |= M_EOR; } sbcompress(sb, m, m0); + SBLASTRECORDCHK(sb, "sbappendrecord 2"); } /* @@ -527,15 +609,15 @@ sbappendrecord(sb, m0) * but after any other OOB data. */ void -sbinsertoob(sb, m0) - register struct sockbuf *sb; - register struct mbuf *m0; +sbinsertoob(struct sockbuf *sb, struct mbuf *m0) { - register struct mbuf *m; - register struct mbuf **mp; + struct mbuf *m, **mp; if (m0 == 0) return; + + SBLASTRECORDCHK(sb, "sbinsertoob 1"); + for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { again: switch (m->m_type) { @@ -555,6 +637,10 @@ sbinsertoob(sb, m0) */ sballoc(sb, m0); m0->m_nextpkt = *mp; + if (*mp == NULL) { + /* m0 is actually the new tail */ + sb->sb_lastrecord = m0; + } *mp = m0; m = m0->m_next; m0->m_next = 0; @@ -563,6 +649,7 @@ sbinsertoob(sb, m0) m->m_flags |= M_EOR; } sbcompress(sb, m, m0); + SBLASTRECORDCHK(sb, "sbinsertoob 2"); } /* @@ -572,12 +659,10 @@ sbinsertoob(sb, m0) * Returns 0 if no space in sockbuf or insufficient mbufs. */ int -sbappendaddr(sb, asa, m0, control) - register struct sockbuf *sb; - struct sockaddr *asa; - struct mbuf *m0, *control; +sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, + struct mbuf *control) { - register struct mbuf *m, *n; + struct mbuf *m, *n, *nlast; int space = asa->sa_len; if (m0 && (m0->m_flags & M_PKTHDR) == 0) @@ -603,23 +688,27 @@ sbappendaddr(sb, asa, m0, control) else control = m0; m->m_next = control; - for (n = m; n; n = n->m_next) + + SBLASTRECORDCHK(sb, "sbappendaddr 1"); + + for (n = m; n->m_next != NULL; n = n->m_next) sballoc(sb, n); - if ((n = sb->sb_mb) != NULL) { - while (n->m_nextpkt) - n = n->m_nextpkt; - n->m_nextpkt = m; - } else - sb->sb_mb = m; + sballoc(sb, n); + nlast = n; + SBLINKRECORD(sb, m); + + sb->sb_mbtail = nlast; + SBLASTMBUFCHK(sb, "sbappendaddr"); + + SBLASTRECORDCHK(sb, "sbappendaddr 2"); + return (1); } int -sbappendcontrol(sb, m0, control) - struct sockbuf *sb; - struct mbuf *m0, *control; +sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { - register struct mbuf *m, *n; + struct mbuf *m, *mlast, *n; int space = 0; if (control == 0) @@ -635,14 +724,20 @@ sbappendcontrol(sb, m0, control) if (space > sbspace(sb)) return (0); n->m_next = m0; /* concatenate data to control */ - for (m = control; m; m = m->m_next) + + SBLASTRECORDCHK(sb, "sbappendcontrol 1"); + + for (m = control; m->m_next != NULL; m = m->m_next) sballoc(sb, m); - if ((n = sb->sb_mb) != NULL) { - while (n->m_nextpkt) - n = n->m_nextpkt; - n->m_nextpkt = control; - } else - sb->sb_mb = control; + sballoc(sb, m); + mlast = m; + SBLINKRECORD(sb, control); + + sb->sb_mbtail = mlast; + SBLASTMBUFCHK(sb, "sbappendcontrol"); + + SBLASTRECORDCHK(sb, "sbappendcontrol 2"); + return (1); } @@ -682,6 +777,7 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) n->m_next = m; else sb->sb_mb = m; + sb->sb_mbtail = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; @@ -694,6 +790,7 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) else printf("semi-panic: sbcompress\n"); } + SBLASTMBUFCHK(sb, __func__); } /* @@ -701,27 +798,27 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) * Check that all resources are reclaimed. */ void -sbflush(sb) - register struct sockbuf *sb; +sbflush(struct sockbuf *sb) { - if (sb->sb_flags & SB_LOCK) - panic("sbflush"); + KASSERT((sb->sb_flags & SB_LOCK) == 0); + while (sb->sb_mbcnt) sbdrop(sb, (int)sb->sb_cc); - if (sb->sb_cc || sb->sb_mb) - panic("sbflush 2"); + + KASSERT(sb->sb_cc == 0); + KASSERT(sb->sb_mb == NULL); + KASSERT(sb->sb_mbtail == NULL); + KASSERT(sb->sb_lastrecord == NULL); } /* * Drop data from (the front of) a sockbuf. */ void -sbdrop(sb, len) - register struct sockbuf *sb; - register int len; +sbdrop(struct sockbuf *sb, int len) { - register struct mbuf *m, *mn; + struct mbuf *m, *mn; struct mbuf *next; next = (m = sb->sb_mb) ? m->m_nextpkt : 0; @@ -754,6 +851,17 @@ sbdrop(sb, len) m->m_nextpkt = next; } else sb->sb_mb = next; + /* + * First part is an inline SB_EMPTY_FIXUP(). Second part + * makes sure sb_lastrecord is up-to-date if we dropped + * part of the last record. + */ + m = sb->sb_mb; + if (m == NULL) { + sb->sb_mbtail = NULL; + sb->sb_lastrecord = NULL; + } else if (m->m_nextpkt == NULL) + sb->sb_lastrecord = m; } /* @@ -761,10 +869,9 @@ sbdrop(sb, len) * and move the next record to the front. */ void -sbdroprecord(sb) - register struct sockbuf *sb; +sbdroprecord(struct sockbuf *sb) { - register struct mbuf *m, *mn; + struct mbuf *m, *mn; m = sb->sb_mb; if (m) { @@ -774,6 +881,7 @@ sbdroprecord(sb) MFREE(m, mn); } while ((m = mn) != NULL); } + SB_EMPTY_FIXUP(sb); } /* diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c index 1c762730f66..f21f2334f5d 100644 --- a/sys/kern/vnode_if.c +++ b/sys/kern/vnode_if.c @@ -5,7 +5,7 @@ * Created from the file: * OpenBSD: vnode_if.src,v 1.19 2002/02/22 20:37:45 drahn Exp * by the script: - * OpenBSD: vnode_if.sh,v 1.8 2001/02/26 17:34:18 art Exp + * OpenBSD: vnode_if.sh,v 1.10 2002/03/14 23:47:05 millert Exp */ /* |