summaryrefslogtreecommitdiff
path: root/sys/netiso/tp_subr.c
diff options
context:
space:
mode:
authorTheo de Raadt <deraadt@cvs.openbsd.org>1995-10-18 08:53:40 +0000
committerTheo de Raadt <deraadt@cvs.openbsd.org>1995-10-18 08:53:40 +0000
commitd6583bb2a13f329cf0332ef2570eb8bb8fc0e39c (patch)
treeece253b876159b39c620e62b6c9b1174642e070e /sys/netiso/tp_subr.c
initial import of NetBSD tree
Diffstat (limited to 'sys/netiso/tp_subr.c')
-rw-r--r--sys/netiso/tp_subr.c944
1 files changed, 944 insertions, 0 deletions
diff --git a/sys/netiso/tp_subr.c b/sys/netiso/tp_subr.c
new file mode 100644
index 00000000000..c16c842c3f2
--- /dev/null
+++ b/sys/netiso/tp_subr.c
@@ -0,0 +1,944 @@
+/* $NetBSD: tp_subr.c,v 1.6 1995/08/04 01:13:29 mycroft Exp $ */
+
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tp_subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+ Copyright IBM Corporation 1987
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * The main work of data transfer is done here.
+ * These routines are called from tp.trans.
+ * They include the routines that check the validity of acks and Xacks,
+ * (tp_goodack() and tp_goodXack() )
+ * take packets from socket buffers and send them (tp_send()),
+ * drop the data from the socket buffers (tp_sbdrop()),
+ * and put incoming packet data into socket buffers (tp_stash()).
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <netiso/tp_ip.h>
+#include <netiso/iso.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_meas.h>
+#include <netiso/tp_seq.h>
+
+int tp_emit(), tp_sbdrop();
+int tprexmtthresh = 3;
+extern int ticks;
+void tp_send();
+
+/*
+ * CALLED FROM:
+ * tp.trans, when an XAK arrives
+ * FUNCTION and ARGUMENTS:
+ * Determines if the sequence number (seq) from the XAK
+ * acks anything new. If so, drop the appropriate tpdu
+ * from the XPD send queue.
+ * RETURN VALUE:
+ * Returns 1 if it did this, 0 if the ack caused no action.
+ */
+int
+tp_goodXack(tpcb, seq)
+ struct tp_pcb *tpcb;
+ SeqNum seq;
+{
+
+ IFTRACE(D_XPD)
+ tptraceTPCB(TPPTgotXack,
+ seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
+ tpcb->tp_snduna);
+ ENDTRACE
+
+ if ( seq == tpcb->tp_Xuna ) {
+ tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
+
+ /* DROP 1 packet from the Xsnd socket buf - just so happens
+ * that only one packet can be there at any time
+ * so drop the whole thing. If you allow > 1 packet
+ * the socket buffer, then you'll have to keep
+ * track of how many characters went w/ each XPD tpdu, so this
+ * will get messier
+ */
+ IFDEBUG(D_XPD)
+ dump_mbuf(tpcb->tp_Xsnd.sb_mb,
+ "tp_goodXack Xsnd before sbdrop");
+ ENDDEBUG
+
+ IFTRACE(D_XPD)
+ tptraceTPCB(TPPTmisc,
+ "goodXack: dropping cc ",
+ (int)(tpcb->tp_Xsnd.sb_cc),
+ 0,0,0);
+ ENDTRACE
+ sbdroprecord(&tpcb->tp_Xsnd);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * CALLED FROM:
+ * tp_good_ack()
+ * FUNCTION and ARGUMENTS:
+ * updates
+ * smoothed average round trip time (*rtt)
+ * roundtrip time variance (*rtv) - actually deviation, not variance
+ * given the new value (diff)
+ * RETURN VALUE:
+ * void
+ */
+
+void
+tp_rtt_rtv(tpcb)
+register struct tp_pcb *tpcb;
+{
+ int old = tpcb->tp_rtt;
+ int delta, elapsed = ticks - tpcb->tp_rttemit;
+
+ if (tpcb->tp_rtt != 0) {
+ /*
+ * rtt is the smoothed round trip time in machine clock ticks (hz).
+ * It is stored as a fixed point number, unscaled (unlike the tcp
+ * srtt). The rationale here is that it is only significant to the
+ * nearest unit of slowtimo, which is at least 8 machine clock ticks
+ * so there is no need to scale. The smoothing is done according
+ * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
+ */
+ delta = elapsed - tpcb->tp_rtt;
+ if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
+ tpcb->tp_rtt = 1;
+ /*
+ * rtv is a smoothed accumulated mean difference, unscaled
+ * for reasons expressed above.
+ * It is smoothed with an alpha of .75, and the round trip timer
+ * will be set to rtt + 4*rtv, also as TCP does.
+ */
+ if (delta < 0)
+ delta = -delta;
+ if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
+ tpcb->tp_rtv = 1;
+ } else {
+ /*
+ * No rtt measurement yet - use the unsmoothed rtt.
+ * Set the variance to half the rtt (so our first
+ * retransmit happens at 3*rtt)
+ */
+ tpcb->tp_rtt = elapsed;
+ tpcb->tp_rtv = elapsed >> 1;
+ }
+ tpcb->tp_rttemit = 0;
+ tpcb->tp_rxtshift = 0;
+ /*
+ * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias. When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer. The bias will give us exactly the
+ * 1.5 tick we need. But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks)."
+ */
+ TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
+ tpcb->tp_peer_acktime, 128 /* XXX */);
+ IFDEBUG(D_RTT)
+ printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
+ "tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old);
+ ENDDEBUG
+ tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
+}
+
+/*
+ * CALLED FROM:
+ * tp.trans when an AK arrives
+ * FUNCTION and ARGUMENTS:
+ * Given (cdt), the credit from the AK tpdu, and
+ * (seq), the sequence number from the AK tpdu,
+ * tp_goodack() determines if the AK acknowledges something in the send
+ * window, and if so, drops the appropriate packets from the retransmission
+ * list, computes the round trip time, and updates the retransmission timer
+ * based on the new smoothed round trip time.
+ * RETURN VALUE:
+ * Returns 1 if
+ * EITHER it actually acked something heretofore unacknowledged
+ * OR no news but the credit should be processed.
+ * If something heretofore unacked was acked with this sequence number,
+ * the appropriate tpdus are dropped from the retransmission control list,
+ * by calling tp_sbdrop().
+ * No need to see the tpdu itself.
+ */
+int
+tp_goodack(tpcb, cdt, seq, subseq)
+ register struct tp_pcb *tpcb;
+ u_int cdt;
+ register SeqNum seq;
+ u_int subseq;
+{
+ int old_fcredit;
+ int bang = 0; /* bang --> ack for something heretofore unacked */
+ u_int bytes_acked;
+
+ IFDEBUG(D_ACKRECV)
+ printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
+ tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
+ ENDDEBUG
+ IFTRACE(D_ACKRECV)
+ tptraceTPCB(TPPTgotack,
+ seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
+ ENDTRACE
+
+ IFPERF(tpcb)
+ tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
+ ENDPERF
+
+ if (seq == tpcb->tp_snduna) {
+ if (subseq < tpcb->tp_r_subseq ||
+ (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
+ discard_the_ack:
+ IFDEBUG(D_ACKRECV)
+ printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
+ tpcb, subseq, tpcb->tp_r_subseq);
+ ENDDEBUG
+ goto done;
+ }
+ if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
+ tpcb->tp_r_subseq = subseq;
+ if (tpcb->tp_timer[TM_data_retrans] == 0)
+ tpcb->tp_dupacks = 0;
+ else if (++tpcb->tp_dupacks == tprexmtthresh) {
+ /* partner went out of his way to signal with different
+ subsequences that he has the same lack of an expected
+ packet. This may be an early indiciation of a loss */
+
+ SeqNum onxt = tpcb->tp_sndnxt;
+ struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
+ u_int win = min(tpcb->tp_fcredit,
+ tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
+ IFDEBUG(D_ACKRECV)
+ printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n",
+ "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
+ ENDDEBUG
+ if (win < 2)
+ win = 2;
+ tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
+ tpcb->tp_timer[TM_data_retrans] = 0;
+ tpcb->tp_rttemit = 0;
+ tpcb->tp_sndnxt = tpcb->tp_snduna;
+ tpcb->tp_sndnxt_m = 0;
+ tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
+ tp_send(tpcb);
+ tpcb->tp_cong_win = tpcb->tp_ssthresh +
+ tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
+ if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
+ tpcb->tp_sndnxt = onxt;
+ tpcb->tp_sndnxt_m = onxt_m;
+ }
+
+ } else if (tpcb->tp_dupacks > tprexmtthresh) {
+ tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
+ }
+ goto done;
+ }
+ } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
+ goto discard_the_ack;
+ /*
+ * If the congestion window was inflated to account
+ * for the other side's cached packets, retract it.
+ */
+ if (tpcb->tp_dupacks > tprexmtthresh &&
+ tpcb->tp_cong_win > tpcb->tp_ssthresh)
+ tpcb->tp_cong_win = tpcb->tp_ssthresh;
+ tpcb->tp_r_subseq = subseq;
+ old_fcredit = tpcb->tp_fcredit;
+ tpcb->tp_fcredit = cdt;
+ if (cdt > tpcb->tp_maxfcredit)
+ tpcb->tp_maxfcredit = cdt;
+ tpcb->tp_dupacks = 0;
+
+ if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
+
+ tpsbcheck(tpcb, 0);
+ bytes_acked = tp_sbdrop(tpcb, seq);
+ tpsbcheck(tpcb, 1);
+ /*
+ * If transmit timer is running and timed sequence
+ * number was acked, update smoothed round trip time.
+ * Since we now have an rtt measurement, cancel the
+ * timer backoff (cf., Phil Karn's retransmit alg.).
+ * Recompute the initial retransmit timer.
+ */
+ if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
+ tp_rtt_rtv(tpcb);
+ /*
+ * If all outstanding data is acked, stop retransmit timer.
+ * If there is more data to be acked, restart retransmit
+ * timer, using current (possibly backed-off) value.
+ * OSI combines the keepalive and persistance functions.
+ * So, there is no persistance timer per se, to restart.
+ */
+ if (tpcb->tp_class != TP_CLASS_0)
+ tpcb->tp_timer[TM_data_retrans] =
+ (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
+ /*
+ * When new data is acked, open the congestion window.
+ * If the window gives us less than ssthresh packets
+ * in flight, open exponentially (maxseg per packet).
+ * Otherwise open linearly: maxseg per window
+ * (maxseg^2 / cwnd per packet), plus a constant
+ * fraction of a packet (maxseg/8) to help larger windows
+ * open quickly enough.
+ */
+ {
+ u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
+
+ incr = min(incr, bytes_acked);
+ if (cw > tpcb->tp_ssthresh)
+ incr = incr * incr / cw + incr / 8;
+ tpcb->tp_cong_win =
+ min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
+ }
+ tpcb->tp_snduna = seq;
+ if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
+ tpcb->tp_sndnxt = seq;
+ tpcb->tp_sndnxt_m = 0;
+ }
+ bang++;
+ }
+
+ if( cdt != 0 && old_fcredit == 0 ) {
+ tpcb->tp_sendfcc = 1;
+ }
+ if (cdt == 0) {
+ if (old_fcredit != 0)
+ IncStat(ts_zfcdt);
+ /* The following might mean that the window shrunk */
+ if (tpcb->tp_timer[TM_data_retrans]) {
+ tpcb->tp_timer[TM_data_retrans] = 0;
+ tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
+ if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
+ tpcb->tp_sndnxt = tpcb->tp_snduna;
+ tpcb->tp_sndnxt_m = 0;
+ }
+ }
+ }
+ tpcb->tp_fcredit = cdt;
+ bang |= (old_fcredit < cdt);
+
+done:
+ IFDEBUG(D_ACKRECV)
+ printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
+ bang, cdt, old_fcredit, tpcb->tp_cong_win);
+ ENDDEBUG
+ /* if (bang) XXXXX Very bad to remove this test, but somethings broken */
+ tp_send(tpcb);
+ return (bang);
+}
+
+/*
+ * CALLED FROM:
+ * tp_goodack()
+ * FUNCTION and ARGUMENTS:
+ * drops everything up TO but not INCLUDING seq # (seq)
+ * from the retransmission queue.
+ */
+tp_sbdrop(tpcb, seq)
+ register struct tp_pcb *tpcb;
+ SeqNum seq;
+{
+ struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+ register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
+ int oldcc = sb->sb_cc, oldi = i;
+
+ if (i >= tpcb->tp_seqhalf)
+ printf("tp_spdropping too much -- should panic");
+ while (i-- > 0)
+ sbdroprecord(sb);
+ IFDEBUG(D_ACKRECV)
+ printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
+ oldi, oldcc - sb->sb_cc, tpcb, seq);
+ ENDDEBUG
+ if (sb_notify(sb))
+ sowwakeup(tpcb->tp_sock);
+ return (oldcc - sb->sb_cc);
+}
+
+/*
+ * CALLED FROM:
+ * tp.trans on user send request, arrival of AK and arrival of XAK
+ * FUNCTION and ARGUMENTS:
+ * Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
+ * Emits until a) runs out of data, or b) runs into an XPD mark, or
+ * c) it hits seq number (highseq) limited by cong or credit.
+ *
+ * If you want XPD to buffer > 1 du per socket buffer, you can
+ * modifiy this to issue XPD tpdus also, but then it'll have
+ * to take some argument(s) to distinguish between the type of DU to
+ * hand tp_emit.
+ *
+ * When something is sent for the first time, its time-of-send
+ * is stashed (in system clock ticks rather than pf_slowtimo ticks).
+ * When the ack arrives, the smoothed round-trip time is figured
+ * using this value.
+ */
+void
+tp_send(tpcb)
+ register struct tp_pcb *tpcb;
+{
+ register int len;
+ register struct mbuf *m;
+ struct mbuf *mb = 0;
+ struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+ unsigned int eotsdu = 0;
+ SeqNum highseq, checkseq;
+ int idle, idleticks, off, cong_win;
+#ifdef TP_PERF_MEAS
+ int send_start_time = ticks;
+ SeqNum oldnxt = tpcb->tp_sndnxt;
+#endif /* TP_PERF_MEAS */
+
+ idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
+ if (idle) {
+ idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
+ if (idleticks > tpcb->tp_dt_ticks)
+ /*
+ * We have been idle for "a while" and no acks are
+ * expected to clock out any data we send --
+ * slow start to get ack "clock" running again.
+ */
+ tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
+ }
+
+ cong_win = tpcb->tp_cong_win;
+ highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
+ if (tpcb->tp_Xsnd.sb_mb)
+ highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
+
+ IFDEBUG(D_DATA)
+ printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
+ tpcb, tpcb->tp_sndnxt, cong_win, highseq);
+ ENDDEBUG
+ IFTRACE(D_DATA)
+ tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
+ tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
+ tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
+ tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
+ ENDTRACE
+ IFTRACE(D_DATA)
+ tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
+ tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
+ ENDTRACE
+
+ if (tpcb->tp_sndnxt_m)
+ m = tpcb->tp_sndnxt_m;
+ else {
+ off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
+ for (m = sb->sb_mb; m && off > 0; m = m->m_next)
+ off--;
+ }
+send:
+ /*
+ * Avoid silly window syndrome here . . . figure out how!
+ */
+ checkseq = tpcb->tp_sndnum;
+ if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
+ checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
+
+ while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
+
+ eotsdu = (m->m_flags & M_EOR) != 0;
+ len = m->m_pkthdr.len;
+ if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
+ len < (tpcb->tp_l_tpdusize / 2))
+ break; /* Nagle . . . . . */
+ cong_win -= len;
+ /* make a copy - mb goes into the retransmission list
+ * while m gets emitted. m_copy won't copy a zero-length mbuf.
+ */
+ mb = m;
+ m = m_copy(mb, 0, M_COPYALL);
+ if (m == MNULL)
+ break;
+ IFTRACE(D_STASH)
+ tptraceTPCB( TPPTmisc,
+ "tp_send mcopy nxt high eotsdu len",
+ tpcb->tp_sndnxt, highseq, eotsdu, len);
+ ENDTRACE
+
+ IFDEBUG(D_DATA)
+ printf("tp_sending tpcb 0x%x nxt 0x%x\n",
+ tpcb, tpcb->tp_sndnxt);
+ ENDDEBUG
+ /* when headers are precomputed, may need to fill
+ in checksum here */
+ if (tpcb->tp_sock->so_error =
+ tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
+ /* error */
+ break;
+ }
+ m = mb->m_nextpkt;
+ tpcb->tp_sndnxt_m = m;
+ if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
+ SEQ_INC(tpcb, tpcb->tp_sndnew);
+ /*
+ * Time this transmission if not a retransmission and
+ * not currently timing anything.
+ */
+ if (tpcb->tp_rttemit == 0) {
+ tpcb->tp_rttemit = ticks;
+ tpcb->tp_rttseq = tpcb->tp_sndnxt;
+ }
+ tpcb->tp_sndnxt = tpcb->tp_sndnew;
+ } else
+ SEQ_INC(tpcb, tpcb->tp_sndnxt);
+ /*
+ * Set retransmit timer if not currently set.
+ * Initial value for retransmit timer is smoothed
+ * round-trip time + 2 * round-trip time variance.
+ * Initialize shift counter which is used for backoff
+ * of retransmit time.
+ */
+ if (tpcb->tp_timer[TM_data_retrans] == 0 &&
+ tpcb->tp_class != TP_CLASS_0) {
+ tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
+ tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
+ tpcb->tp_rxtshift = 0;
+ }
+ }
+ if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
+ tpcb->tp_oktonagle = 0;
+#ifdef TP_PERF_MEAS
+ IFPERF(tpcb)
+ {
+ register int npkts;
+ int elapsed = ticks - send_start_time, *t;
+ struct timeval now;
+
+ npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
+
+ if (npkts > 0)
+ tpcb->tp_Nwindow++;
+
+ if (npkts > TP_PM_MAX)
+ npkts = TP_PM_MAX;
+
+ t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
+ *t += (t - elapsed) >> TP_RTT_ALPHA;
+
+ if (mb == 0) {
+ IncPStat(tpcb, tps_win_lim_by_data[npkts] );
+ } else {
+ IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
+ /* not true with congestion-window being used */
+ }
+ now.tv_sec = elapsed / hz;
+ now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
+ tpmeas( tpcb->tp_lref,
+ TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
+ }
+ ENDPERF
+#endif /* TP_PERF_MEAS */
+
+
+ IFTRACE(D_DATA)
+ tptraceTPCB( TPPTmisc,
+ "tp_send at end: new nxt eotsdu error",
+ tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
+
+ ENDTRACE
+}
+
+int TPNagleok;
+int TPNagled;
+
+tp_packetize(tpcb, m, eotsdu)
+register struct tp_pcb *tpcb;
+register struct mbuf *m;
+int eotsdu;
+{
+ register struct mbuf *n;
+ register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+ int maxsize = tpcb->tp_l_tpdusize
+ - tp_headersize(DT_TPDU_type, tpcb)
+ - (tpcb->tp_use_checksum?4:0) ;
+ int totlen = m->m_pkthdr.len;
+ struct mbuf *m_split();
+ /*
+ * Pre-packetize the data in the sockbuf
+ * according to negotiated mtu. Do it here
+ * where we can safely wait for mbufs.
+ *
+ * This presumes knowledge of sockbuf conventions.
+ * TODO: allocate space for header and fill it in (once!).
+ */
+ IFDEBUG(D_DATA)
+ printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
+ maxsize, totlen, eotsdu, tpcb->tp_sndnum);
+ ENDTRACE
+ if (tpcb->tp_oktonagle) {
+ if ((n = sb->sb_mb) == 0)
+ panic("tp_packetize");
+ while (n->m_act)
+ n = n->m_act;
+ if (n->m_flags & M_EOR)
+ panic("tp_packetize 2");
+ SEQ_INC(tpcb, tpcb->tp_sndnum);
+ if (totlen + n->m_pkthdr.len < maxsize) {
+ /* There is an unsent packet with space, combine data */
+ struct mbuf *old_n = n;
+ tpsbcheck(tpcb,3);
+ n->m_pkthdr.len += totlen;
+ while (n->m_next)
+ n = n->m_next;
+ sbcompress(sb, m, n);
+ tpsbcheck(tpcb,4);
+ n = old_n;
+ TPNagled++;
+ goto out;
+ }
+ }
+ while (m) {
+ n = m;
+ if (totlen > maxsize) {
+ if ((m = m_split(n, maxsize, M_WAIT)) == 0)
+ panic("tp_packetize");
+ } else
+ m = 0;
+ totlen -= maxsize;
+ tpsbcheck(tpcb, 5);
+ sbappendrecord(sb, n);
+ tpsbcheck(tpcb, 6);
+ SEQ_INC(tpcb, tpcb->tp_sndnum);
+ }
+out:
+ if (eotsdu) {
+ n->m_flags |= M_EOR; /* XXX belongs at end */
+ tpcb->tp_oktonagle = 0;
+ } else {
+ SEQ_DEC(tpcb, tpcb->tp_sndnum);
+ tpcb->tp_oktonagle = 1;
+ TPNagleok++;
+ }
+ IFDEBUG(D_DATA)
+ printf("SEND out: oktonagle %d sndnum 0x%x\n",
+ tpcb->tp_oktonagle, tpcb->tp_sndnum);
+ ENDTRACE
+ return 0;
+}
+
+
+/*
+ * NAME: tp_stash()
+ * CALLED FROM:
+ * tp.trans on arrival of a DT tpdu
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ * Returns 1 if
+ * a) something new arrived and it's got eotsdu_reached bit on,
+ * b) this arrival was caused other out-of-sequence things to be
+ * accepted, or
+ * c) this arrival is the highest seq # for which we last gave credit
+ * (sender just sent a whole window)
+ * In other words, returns 1 if tp should send an ack immediately, 0 if
+ * the ack can wait a while.
+ *
+ * Note: this implementation no longer renegs on credit, (except
+ * when debugging option D_RENEG is on, for the purpose of testing
+ * ack subsequencing), so we don't need to check for incoming tpdus
+ * being in a reneged portion of the window.
+ */
+
+tp_stash(tpcb, e)
+ register struct tp_pcb *tpcb;
+ register struct tp_event *e;
+{
+ register int ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
+ /* 0--> delay acks until full window */
+ /* 1--> ack each tpdu */
+#ifndef lint
+#define E e->ATTR(DT_TPDU)
+#else /* lint */
+#define E e->ev_union.EV_DT_TPDU
+#endif /* lint */
+
+ if ( E.e_eot ) {
+ register struct mbuf *n = E.e_data;
+ n->m_flags |= M_EOR;
+ n->m_act = 0;
+ }
+ IFDEBUG(D_STASH)
+ dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
+ "stash: so_rcv before appending");
+ dump_mbuf(E.e_data,
+ "stash: e_data before appending");
+ ENDDEBUG
+
+ IFPERF(tpcb)
+ PStat(tpcb, Nb_from_ll) += E.e_datalen;
+ tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
+ E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
+ ENDPERF
+
+ if (E.e_seq == tpcb->tp_rcvnxt) {
+
+ IFDEBUG(D_STASH)
+ printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
+ E.e_seq, E.e_datalen, E.e_eot);
+ ENDDEBUG
+
+ IFTRACE(D_STASH)
+ tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
+ E.e_seq, E.e_datalen, E.e_eot, 0);
+ ENDTRACE
+
+ SET_DELACK(tpcb);
+
+ sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
+
+ SEQ_INC( tpcb, tpcb->tp_rcvnxt );
+ /*
+ * move chains from the reassembly queue to the socket buffer
+ */
+ if (tpcb->tp_rsycnt) {
+ register struct mbuf **mp;
+ struct mbuf **mplim;
+
+ mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
+ mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
+
+ while (tpcb->tp_rsycnt && *mp) {
+ sbappend(&tpcb->tp_sock->so_rcv, *mp);
+ tpcb->tp_rsycnt--;
+ *mp = 0;
+ SEQ_INC(tpcb, tpcb->tp_rcvnxt);
+ ack_reason |= ACK_REORDER;
+ if (++mp == mplim)
+ mp = tpcb->tp_rsyq;
+ }
+ }
+ IFDEBUG(D_STASH)
+ dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
+ "stash: so_rcv after appending");
+ ENDDEBUG
+
+ } else {
+ register struct mbuf **mp;
+ SeqNum uwe;
+
+ IFTRACE(D_STASH)
+ tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
+ E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
+ ENDTRACE
+
+ if (tpcb->tp_rsyq == 0)
+ tp_rsyset(tpcb);
+ uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
+ if (tpcb->tp_rsyq == 0 ||
+ !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
+ ack_reason = ACK_DONT;
+ m_freem(E.e_data);
+ } else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
+ IFDEBUG(D_STASH)
+ printf("tp_stash - drop & ack\n");
+ ENDDEBUG
+
+ /* retransmission - drop it and force an ack */
+ IncStat(ts_dt_dup);
+ IFPERF(tpcb)
+ IncPStat(tpcb, tps_n_ack_cuz_dup);
+ ENDPERF
+
+ m_freem(E.e_data);
+ ack_reason |= ACK_DUP;
+ } else {
+ *mp = E.e_data;
+ tpcb->tp_rsycnt++;
+ ack_reason = ACK_DONT;
+ }
+ }
+ /* there were some comments of historical interest here. */
+ {
+ LOCAL_CREDIT(tpcb);
+
+ if ( E.e_seq == tpcb->tp_sent_uwe )
+ ack_reason |= ACK_STRAT_FULLWIN;
+
+ IFTRACE(D_STASH)
+ tptraceTPCB(TPPTmisc,
+ "end of stash, eot, ack_reason, sent_uwe ",
+ E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
+ ENDTRACE
+
+ if ( ack_reason == ACK_DONT ) {
+ IncStat( ts_ackreason[ACK_DONT] );
+ return 0;
+ } else {
+ IFPERF(tpcb)
+ if(ack_reason & ACK_STRAT_EACH) {
+ IncPStat(tpcb, tps_n_ack_cuz_strat);
+ } else if(ack_reason & ACK_STRAT_FULLWIN) {
+ IncPStat(tpcb, tps_n_ack_cuz_fullwin);
+ } else if(ack_reason & ACK_REORDER) {
+ IncPStat(tpcb, tps_n_ack_cuz_reorder);
+ }
+ tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
+ SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
+ ENDPERF
+ {
+ register int i;
+
+ /* keep track of all reasons that apply */
+ for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
+ if( ack_reason & (1<<i) )
+ IncStat( ts_ackreason[i] );
+ }
+ }
+ return 1;
+ }
+ }
+}
+
+/*
+ * tp_rsyflush - drop all the packets on the reassembly queue.
+ * Do this when closing the socket, or when somebody has changed
+ * the space avaible in the receive socket (XXX).
+ */
+tp_rsyflush(tpcb)
+register struct tp_pcb *tpcb;
+{
+ register struct mbuf *m, **mp;
+ if (tpcb->tp_rsycnt) {
+ for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
+ --mp >= tpcb->tp_rsyq; )
+ if (*mp) {
+ tpcb->tp_rsycnt--;
+ m_freem(*mp);
+ }
+ if (tpcb->tp_rsycnt) {
+ printf("tp_rsyflush %x\n", tpcb);
+ tpcb->tp_rsycnt = 0;
+ }
+ }
+ free((caddr_t)tpcb->tp_rsyq, M_PCB);
+ tpcb->tp_rsyq = 0;
+}
+
+tp_rsyset(tpcb)
+register struct tp_pcb *tpcb;
+{
+ register struct socket *so = tpcb->tp_sock;
+ int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
+ int old_credit = tpcb->tp_maxlcredit;
+ caddr_t rsyq;
+
+ tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
+ (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
+
+ if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
+ return;
+ maxcredit *= sizeof(struct mbuf *);
+ if (tpcb->tp_rsyq)
+ tp_rsyflush(tpcb);
+ if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
+ bzero(rsyq, maxcredit);
+ tpcb->tp_rsyq = (struct mbuf **)rsyq;
+}
+
+tpsbcheck(tpcb, i)
+struct tp_pcb *tpcb;
+{
+ register struct mbuf *n, *m;
+ register int len = 0, mbcnt = 0, pktlen;
+ struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+
+ for (n = sb->sb_mb; n; n = n->m_nextpkt) {
+ if ((n->m_flags & M_PKTHDR) == 0)
+ panic("tpsbcheck nohdr");
+ pktlen = len + n->m_pkthdr.len;
+ for (m = n; m; m = m->m_next) {
+ len += m->m_len;
+ mbcnt += MSIZE;
+ if (m->m_flags & M_EXT)
+ mbcnt += m->m_ext.ext_size;
+ }
+ if (len != pktlen) {
+ printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
+ i, len, pktlen, n);
+ panic("tpsbcheck short");
+ }
+ }
+ if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+ printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
+ mbcnt, sb->sb_mbcnt);
+ panic("tpsbcheck");
+ }
+}