summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/kern/uipc_socket.c8
-rw-r--r--sys/kern/uipc_socket2.c39
-rw-r--r--sys/netinet/tcp_input.c42
-rw-r--r--sys/netinet/tcp_output.c9
-rw-r--r--sys/netinet/tcp_usrreq.c74
-rw-r--r--sys/netinet/tcp_var.h17
-rw-r--r--sys/sys/socketvar.h5
7 files changed, 160 insertions, 34 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 5c2e7448bb0..53648922f37 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_socket.c,v 1.83 2010/07/03 04:44:51 guenther Exp $ */
+/* $OpenBSD: uipc_socket.c,v 1.84 2010/09/24 02:59:45 claudio Exp $ */
/* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */
/*
@@ -1036,19 +1036,21 @@ sosetopt(struct socket *so, int level, int optname, struct mbuf *m0)
switch (optname) {
case SO_SNDBUF:
- if (sbcheckreserve(cnt, so->so_snd.sb_hiwat) ||
+ if (sbcheckreserve(cnt, so->so_snd.sb_wat) ||
sbreserve(&so->so_snd, cnt)) {
error = ENOBUFS;
goto bad;
}
+ so->so_snd.sb_wat = cnt;
break;
case SO_RCVBUF:
- if (sbcheckreserve(cnt, so->so_rcv.sb_hiwat) ||
+ if (sbcheckreserve(cnt, so->so_rcv.sb_wat) ||
sbreserve(&so->so_rcv, cnt)) {
error = ENOBUFS;
goto bad;
}
+ so->so_rcv.sb_wat = cnt;
break;
case SO_SNDLOWAT:
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
index 45c335f8236..c1071fd3e19 100644
--- a/sys/kern/uipc_socket2.c
+++ b/sys/kern/uipc_socket2.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_socket2.c,v 1.50 2009/11/09 17:53:39 nicm Exp $ */
+/* $OpenBSD: uipc_socket2.c,v 1.51 2010/09/24 02:59:45 claudio Exp $ */
/* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */
/*
@@ -53,6 +53,7 @@
u_long sb_max = SB_MAX; /* patchable */
extern struct pool mclpools[];
+extern struct pool mbpool;
/*
* Procedures to manipulate state flags of socket
@@ -147,8 +148,6 @@ sonewconn(struct socket *head, int connstatus)
{
struct socket *so;
int soqueue = connstatus ? 1 : 0;
- extern u_long unpst_sendspace, unpst_recvspace;
- u_long snd_sb_hiwat, rcv_sb_hiwat;
splsoftassert(IPL_SOFTNET);
@@ -175,17 +174,11 @@ sonewconn(struct socket *head, int connstatus)
so->so_sigeuid = head->so_sigeuid;
/*
- * If we are tight on mbuf clusters, create the new socket
- * with the minimum. Sorry, you lose.
+ * Inherit watermarks but those may get clamped in low mem situations.
*/
- snd_sb_hiwat = head->so_snd.sb_hiwat;
- if (sbcheckreserve(snd_sb_hiwat, unpst_sendspace))
- snd_sb_hiwat = unpst_sendspace; /* and udp? */
- rcv_sb_hiwat = head->so_rcv.sb_hiwat;
- if (sbcheckreserve(rcv_sb_hiwat, unpst_recvspace))
- rcv_sb_hiwat = unpst_recvspace; /* and udp? */
-
- (void) soreserve(so, snd_sb_hiwat, rcv_sb_hiwat);
+ so->so_snd.sb_wat = head->so_snd.sb_wat;
+ so->so_rcv.sb_wat = head->so_rcv.sb_wat;
+
soqinsque(head, so, soqueue);
if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, NULL,
curproc)) {
@@ -392,18 +385,30 @@ sbreserve(struct sockbuf *sb, u_long cc)
}
/*
- * If over 50% of mbuf clusters in use, do not accept any
- * greater than normal request.
+ * In low memory situation, do not accept any greater than normal request.
*/
int
sbcheckreserve(u_long cnt, u_long defcnt)
{
- if (cnt > defcnt &&
- mclpools[0].pr_nout> mclpools[0].pr_hardlimit / 2)
+ if (cnt > defcnt && sbchecklowmem())
return (ENOBUFS);
return (0);
}
+int
+sbchecklowmem(void)
+{
+ static int sblowmem;
+
+ if (mclpools[0].pr_nout < mclpools[0].pr_hardlimit * 60 / 100 ||
+ mbpool.pr_nout < mbpool.pr_hardlimit * 60 / 100)
+ sblowmem = 0;
+ if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 80 / 100 ||
+ mbpool.pr_nout > mbpool.pr_hardlimit * 80 / 100)
+ sblowmem = 1;
+ return (sblowmem);
+}
+
/*
* Free mbufs held by a socket, and reserved mbuf space.
*/
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index d923a3c6713..664687ee821 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tcp_input.c,v 1.235 2010/07/20 15:36:03 matthew Exp $ */
+/* $OpenBSD: tcp_input.c,v 1.236 2010/09/24 02:59:45 claudio Exp $ */
/* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */
/*
@@ -1091,6 +1091,7 @@ after_listen:
else if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0)
TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
+ tcp_update_sndspace(tp);
if (sb_notify(&so->so_snd))
sowwakeup(so);
if (so->so_snd.sb_cc)
@@ -1122,6 +1123,16 @@ after_listen:
if (so->so_state & SS_CANTRCVMORE)
m_freem(m);
else {
+ if (opti.ts_present && opti.ts_ecr) {
+ if (tp->rfbuf_ts < opti.ts_ecr &&
+ opti.ts_ecr - tp->rfbuf_ts < hz) {
+ tcp_update_rcvspace(tp);
+ /* Start over with next RTT. */
+ tp->rfbuf_cnt = 0;
+ tp->rfbuf_ts = 0;
+ } else
+ tp->rfbuf_cnt += tlen;
+ }
m_adj(m, iphlen + off);
sbappendstream(&so->so_rcv, m);
}
@@ -1152,6 +1163,10 @@ after_listen:
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
}
+ /* Reset receive buffer auto scaling when not in bulk receive mode. */
+ tp->rfbuf_cnt = 0;
+ tp->rfbuf_ts = 0;
+
switch (tp->t_state) {
/*
@@ -1861,6 +1876,8 @@ trimthenstep6:
tp->snd_wnd -= acked;
ourfinisacked = 0;
}
+
+ tcp_update_sndspace(tp);
if (sb_notify(&so->so_snd))
sowwakeup(so);
@@ -4063,9 +4080,28 @@ syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
sc->sc_requested_s_scale = tb.requested_s_scale;
sc->sc_request_r_scale = 0;
+ /*
+ * Pick the smallest possible scaling factor that
+ * will still allow us to scale up to sb_max.
+ *
+ * We do this because there are broken firewalls that
+ * will corrupt the window scale option, leading to
+ * the other endpoint believing that our advertised
+ * window is unscaled. At scale factors larger than
+ * 5 the unscaled window will drop below 1500 bytes,
+ * leading to serious problems when traversing these
+ * broken firewalls.
+ *
+ * With the default sbmax of 256K, a scale factor
+ * of 3 will be chosen by this algorithm. Those who
+ * choose a larger sbmax should watch out
+ * for the compatiblity problems mentioned above.
+ *
+ * RFC1323: The Window field in a SYN (i.e., a <SYN>
+ * or <SYN,ACK>) segment itself is never scaled.
+ */
while (sc->sc_request_r_scale < TCP_MAX_WINSHIFT &&
- TCP_MAXWIN << sc->sc_request_r_scale <
- so->so_rcv.sb_hiwat)
+ (TCP_MAXWIN << sc->sc_request_r_scale) < sb_max)
sc->sc_request_r_scale++;
} else {
sc->sc_requested_s_scale = 15;
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index d58421cf3a5..08d93835a4a 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tcp_output.c,v 1.91 2010/09/08 08:34:42 claudio Exp $ */
+/* $OpenBSD: tcp_output.c,v 1.92 2010/09/24 02:59:45 claudio Exp $ */
/* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */
/*
@@ -593,6 +593,11 @@ send:
*lp++ = htonl(tcp_now + tp->ts_modulate);
*lp = htonl(tp->ts_recent);
optlen += TCPOLEN_TSTAMP_APPA;
+
+ /* Set receive buffer autosizing timestamp. */
+ if (tp->rfbuf_ts == 0)
+ tp->rfbuf_ts = tcp_now;
+
}
#ifdef TCP_SIGNATURE
@@ -1030,6 +1035,8 @@ send:
if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
tp->snd_max = tp->snd_nxt + len;
+ tcp_update_sndspace(tp);
+
/*
* Trace.
*/
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 6c09f5b896d..c3676bcceaa 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tcp_usrreq.c,v 1.102 2010/06/07 13:08:43 claudio Exp $ */
+/* $OpenBSD: tcp_usrreq.c,v 1.103 2010/09/24 02:59:45 claudio Exp $ */
/* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */
/*
@@ -119,6 +119,7 @@ u_int tcp_sendspace = TCP_SENDSPACE;
#define TCP_RECVSPACE 1024*16
#endif
u_int tcp_recvspace = TCP_RECVSPACE;
+u_int tcp_autorcvbuf_inc = 16 * 1024;
int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS;
@@ -313,7 +314,7 @@ tcp_usrreq(so, req, m, nam, control, p)
so->so_state |= SS_CONNECTOUT;
/* Compute window scaling to request. */
- tcp_rscale(tp, so->so_rcv.sb_hiwat);
+ tcp_rscale(tp, sb_max);
soisconnecting(so);
tcpstat.tcps_connattempt++;
@@ -652,10 +653,19 @@ tcp_attach(so)
int error;
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
- error = soreserve(so, tcp_sendspace, tcp_recvspace);
+ /* if low on memory only allow smaller then default buffers */
+ if (so->so_snd.sb_wat == 0 ||
+ sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace))
+ so->so_snd.sb_wat = tcp_sendspace;
+ if (so->so_rcv.sb_wat == 0 ||
+ sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace))
+ so->so_rcv.sb_wat = tcp_recvspace;
+
+ error = soreserve(so, so->so_snd.sb_wat, so->so_rcv.sb_wat);
if (error)
return (error);
}
+
error = in_pcballoc(so, &tcbtable);
if (error)
return (error);
@@ -950,3 +960,61 @@ tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
}
/* NOTREACHED */
}
+
+/*
+ * Scale the send buffer so that inflight data is not accounted against
+ * the limit. The buffer will scale with the congestion window, if the
+ * the receiver stops acking data the window will shrink and therefor
+ * the buffer size will shrink as well.
+ * In low memory situation shrink
+ */
+void
+tcp_update_sndspace(struct tcpcb *tp)
+{
+ struct socket *so = tp->t_inpcb->inp_socket;
+ int nmax;
+
+ if (sbchecklowmem())
+ /* low on memory try to get rid of some */
+ nmax = tcp_sendspace;
+ else if (so->so_snd.sb_wat != tcp_sendspace)
+ /* user requested buffer size, auto-scaling disabled */
+ nmax = so->so_snd.sb_wat;
+ else
+ /* automatic buffer scaling */
+ nmax = MIN(sb_max, so->so_snd.sb_wat + tp->snd_max -
+ tp->snd_una);
+
+ /* round to MSS boundary */
+ nmax = roundup(nmax, tp->t_maxseg);
+
+ if (nmax != so->so_snd.sb_hiwat)
+ sbreserve(&so->so_snd, nmax);
+}
+
+void
+tcp_update_rcvspace(struct tcpcb *tp)
+{
+ struct socket *so = tp->t_inpcb->inp_socket;
+ int nmax = so->so_rcv.sb_hiwat;
+
+ if (sbchecklowmem())
+ /* low on memory try to get rid of some */
+ nmax = tcp_recvspace;
+ else if (so->so_rcv.sb_wat != tcp_recvspace)
+ /* user requested buffer size, auto-scaling disabled */
+ nmax = so->so_rcv.sb_wat;
+ else {
+ /* automatic buffer scaling */
+ if (tp->rfbuf_cnt > so->so_rcv.sb_hiwat / 8 * 7)
+ nmax = MIN(sb_max, so->so_rcv.sb_hiwat +
+ tcp_autorcvbuf_inc);
+ }
+
+ if (nmax == so->so_rcv.sb_hiwat)
+ return;
+
+ /* round to MSS boundary */
+ nmax = roundup(nmax, tp->t_maxseg);
+ sbreserve(&so->so_rcv, nmax);
+}
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 1558747c0fd..dcca8ad53a9 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: tcp_var.h,v 1.95 2010/07/09 16:58:06 reyk Exp $ */
+/* $OpenBSD: tcp_var.h,v 1.96 2010/09/24 02:59:46 claudio Exp $ */
/* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */
/*
@@ -151,6 +151,11 @@ struct tcpcb {
* for slow start exponential to
* linear switch
*/
+
+/* auto-sizing variables */
+ u_int rfbuf_cnt; /* recv buffer autoscaling byte count */
+ u_int32_t rfbuf_ts; /* recv buffer autoscaling time stamp */
+
u_short t_maxopd; /* mss plus options */
u_short t_peermss; /* peer's maximum segment size */
@@ -476,8 +481,8 @@ struct tcpstat {
{ "keepintvl", CTLTYPE_INT }, \
{ "slowhz", CTLTYPE_INT }, \
{ "baddynamic", CTLTYPE_STRUCT }, \
- { "recvspace", CTLTYPE_INT }, \
- { "sendspace", CTLTYPE_INT }, \
+ { NULL, 0 }, \
+ { NULL, 0 }, \
{ "ident", CTLTYPE_STRUCT }, \
{ "sack", CTLTYPE_INT }, \
{ "mssdflt", CTLTYPE_INT }, \
@@ -501,8 +506,8 @@ struct tcpstat {
&tcp_keepintvl, \
NULL, \
NULL, \
- &tcp_recvspace, \
- &tcp_sendspace, \
+ NULL, \
+ NULL, \
NULL, \
NULL, \
&tcp_mssdflt, \
@@ -590,6 +595,8 @@ void tcp_rscale(struct tcpcb *, u_long);
void tcp_respond(struct tcpcb *, caddr_t, struct tcphdr *, tcp_seq,
tcp_seq, int, u_int);
void tcp_setpersist(struct tcpcb *);
+void tcp_update_sndspace(struct tcpcb *);
+void tcp_update_rcvspace(struct tcpcb *);
void tcp_slowtimo(void);
struct mbuf *
tcp_template(struct tcpcb *);
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 14af61bfadb..81c6e17267b 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: socketvar.h,v 1.46 2009/08/10 16:49:38 thib Exp $ */
+/* $OpenBSD: socketvar.h,v 1.47 2010/09/24 02:59:46 claudio Exp $ */
/* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */
/*-
@@ -82,6 +82,7 @@ struct socket {
u_long sb_cc; /* actual chars in buffer */
u_long sb_datacc; /* data only chars in buffer */
u_long sb_hiwat; /* max actual char count */
+ u_long sb_wat; /* default watermark */
u_long sb_mbcnt; /* chars of mbufs used */
u_long sb_mbmax; /* max chars of mbufs to use */
long sb_lowat; /* low water mark */
@@ -225,7 +226,6 @@ struct socket {
#ifdef _KERNEL
extern u_long sb_max;
-struct socket *sonewconn(struct socket *head, int connstatus);
extern struct pool socket_pool;
@@ -268,6 +268,7 @@ void sbflush(struct sockbuf *sb);
void sbinsertoob(struct sockbuf *sb, struct mbuf *m0);
void sbrelease(struct sockbuf *sb);
int sbcheckreserve(u_long cnt, u_long defcnt);
+int sbchecklowmem(void);
int sbreserve(struct sockbuf *sb, u_long cc);
int sbwait(struct sockbuf *sb);
int sb_lock(struct sockbuf *sb);