src - OpenBSD base system

diff options


context:
space:
mode:

author	Ryan Thomas McBride <mcbride@cvs.openbsd.org>	2004-11-24 01:25:43 +0000
committer	Ryan Thomas McBride <mcbride@cvs.openbsd.org>	2004-11-24 01:25:43 +0000
commit	73490af51c9ba0da59a2c0dcda06654e172f1217 (patch)
tree	2aa5359efebdb81f572218977cfa951b6a2d7fe3 /sys/netinet
parent	484c640e38c5253c264ae8b7ed31d0a046f6b785 (diff)

Multicast routing cleanup from Pavlin Radoslavov

- sync ip_mroute.c with NetBSD - import some FreeBSD changes to MFC entry handling - set im->im_vif correctly when sending IGMPMSG_WRONGVIF - increment mrtstat.mrts_upcalls correctly - return error from get_sg_cnt() if there is no matching forwarding entry ok henning@ brad@ naddy@

Diffstat (limited to 'sys/netinet')

-rw-r--r--

sys/netinet/in.h

-rw-r--r--

sys/netinet/ip_mroute.c

867

-rw-r--r--

sys/netinet/ip_mroute.h

-rw-r--r--

sys/netinet/raw_ip.c

4 files changed, 464 insertions, 458 deletions

diff --git a/sys/netinet/in.h b/sys/netinet/in.h
index 34704080c3a..254b939205f 100644
--- a/sys/netinet/in.h
+++ b/sys/netinet/in.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: in.h,v 1.62 2004/06/06 16:49:09 cedric Exp $ */

+/* $OpenBSD: in.h,v 1.63 2004/11/24 01:25:42 mcbride Exp $ */

/* $NetBSD: in.h,v 1.20 1996/02/13 23:41:47 christos Exp $ */

@@ -609,6 +609,8 @@ in_cksum_addword(u_int16_t a, u_int16_t b)

return (sum);

}

+extern struct in_addr zeroin_addr;

int in_broadcast(struct in_addr, struct ifnet *);

int in_canforward(struct in_addr);

int in_cksum(struct mbuf *, int);

diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
index 3dbd4fd170b..54c695e3312 100644
--- a/sys/netinet/ip_mroute.c
+++ b/sys/netinet/ip_mroute.c

@@ -1,5 +1,5 @@

-/* $OpenBSD: ip_mroute.c,v 1.37 2004/08/24 20:31:16 brad Exp $ */

-/* $NetBSD: ip_mroute.c,v 1.27 1996/05/07 02:40:50 thorpej Exp $ */

+/* $OpenBSD: ip_mroute.c,v 1.38 2004/11/24 01:25:42 mcbride Exp $ */

+/* $NetBSD: ip_mroute.c,v 1.85 2004/04/26 01:31:57 matt Exp $ */

@@ -81,11 +81,11 @@

#include <sys/stdarg.h>

#define IP_MULTICASTOPTS 0

-#define M_PULLUP(m, len) \

- do { \

+#define M_PULLUP(m, len) \

+ do { \

if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \

- (m) = m_pullup((m), (len)); \

- } while (0)

+ (m) = m_pullup((m), (len)); \

+ } while (/*CONSTCOND*/ 0)

* Globals. All but ip_mrouter and ip_mrtproto could be static,

@@ -97,9 +97,9 @@ int ip_mrtproto = IGMP_DVMRP; /* for netstat only */

#define NO_RTE_FOUND 0x1

#define RTE_FOUND 0x2

-#define MFCHASH(a, g) \

- ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \

- ((g) >> 20) ^ ((g) >> 10) ^ (g)) & mfchash)

+#define MFCHASH(a, g) \

+ ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \

+ ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash)

LIST_HEAD(mfchashhdr, mfc) *mfchashtbl;

u_long mfchash;

@@ -120,15 +120,13 @@ extern int rsvp_on;

#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */

#define UPCALL_EXPIRE 6 /* number of timeouts */

-struct timeout upcalls_timeout;

-struct timeout tbf_timeout;

+struct timeout expire_upcalls_ch;

* Define the token bucket filter structures

- * qtable -> each interface has an associated queue of pkts

-struct pkt_queue qtable[MAXVIFS][MAXQSIZE];

+#define TBF_REPROCESS (hz / 100) /* 100x / second */

static int get_sg_cnt(struct sioc_sg_req *);

static int get_vif_cnt(struct sioc_vif_req *);

@@ -138,7 +136,8 @@ static int set_assert(struct mbuf *);

static int get_assert(struct mbuf *);

static int add_vif(struct mbuf *);

static int del_vif(struct mbuf *);

-static void update_mfc(struct mfcctl *, struct mfc *);

+static void update_mfc_params(struct mfc *, struct mfcctl *);

+static void init_mfc_params(struct mfc *, struct mfcctl *);

static void expire_mfc(struct mfc *);

static int add_mfc(struct mbuf *);

#ifdef UPCALL_TIMING

@@ -157,9 +156,8 @@ static void phyint_send(struct ip *, struct vif *, struct mbuf *);

static void encap_send(struct ip *, struct vif *, struct mbuf *);

static void tbf_control(struct vif *, struct mbuf *, struct ip *,

u_int32_t);

-static void tbf_queue(struct vif *, struct mbuf *, struct ip *);

+static void tbf_queue(struct vif *, struct mbuf *);

static void tbf_process_q(struct vif *);

-static void tbf_dequeue(struct vif *, int);

static void tbf_reprocess_q(void *);

static int tbf_dq_sel(struct vif *, struct ip *);

static void tbf_send_packet(struct vif *, struct mbuf *);

@@ -206,7 +204,7 @@ static int have_encap_tunnel = 0;

* one-back cache used by ipip_mroute_input to locate a tunnel's vif

* given a datagram's src ip address.

-static u_int32_t last_encap_src;

+static struct in_addr last_encap_src;

static struct vif *last_encap_vif;

@@ -221,47 +219,46 @@ static int pim_assert;

* Find a route for a given origin IP address and Multicast group address

* Type of service parameter to be added in the future!!!

+ * Statistics are updated by the caller if needed

+ * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses)

+static struct mfc *

+mfc_find(struct in_addr *o, struct in_addr *g)

+ struct mfc *rt;

-#define MFCFIND(o, g, rt) do { \

- struct mfc *_rt; \

- (rt) = NULL; \

- ++mrtstat.mrts_mfc_lookups; \

- for (_rt = mfchashtbl[MFCHASH(o, g)].lh_first; \

- _rt; _rt = _rt->mfc_hash.le_next) { \

- if (_rt->mfc_origin.s_addr == (o) && \

- _rt->mfc_mcastgrp.s_addr == (g) && \

- _rt->mfc_stall == NULL) { \

- (rt) = _rt; \

- break; \

- } \

- if ((rt) == NULL) \

- ++mrtstat.mrts_mfc_misses; \

-} while (0)

+ LIST_FOREACH(rt, &mfchashtbl[MFCHASH(*o, *g)], mfc_hash) {

+ if (in_hosteq(rt->mfc_origin, *o) &&

+ in_hosteq(rt->mfc_mcastgrp, *g) &&

+ (rt->mfc_stall == NULL))

+ break;

+ }

+ return (rt);

* Macros to compute elapsed time efficiently

* Borrowed from Van Jacobson's scheduling code

-#define TV_DELTA(a, b, delta) do { \

- int xxs; \

- delta = (a).tv_usec - (b).tv_usec; \

- xxs = (a).tv_sec - (b).tv_sec; \

- switch (xxs) { \

- case 2: \

- delta += 1000000; \

- /* fall through */ \

- case 1: \

- delta += 1000000; \

- /* fall through */ \

- case 0: \

- break; \

- default: \

- delta += (1000000 * xxs); \

- break; \

- } \

-} while (0)

+#define TV_DELTA(a, b, delta) do { \

+ int xxs; \

+ delta = (a).tv_usec - (b).tv_usec; \

+ xxs = (a).tv_sec - (b).tv_sec; \

+ switch (xxs) { \

+ case 2: \

+ delta += 1000000; \

+ /* fall through */ \

+ case 1: \

+ delta += 1000000; \

+ /* fall through */ \

+ case 0: \

+ break; \

+ default: \

+ delta += (1000000 * xxs); \

+ break; \

+ } \

+} while (/*CONSTCOND*/ 0)

#ifdef UPCALL_TIMING

u_int32_t upcall_data[51];

@@ -271,17 +268,14 @@ u_int32_t upcall_data[51];

* Handle MRT setsockopt commands to modify the multicast routing tables.

int

-ip_mrouter_set(cmd, so, m)

- int cmd;

- struct socket *so;

- struct mbuf **m;

+ip_mrouter_set(struct socket *so, int optname, struct mbuf **m)

{

int error;

- if (cmd != MRT_INIT && so != ip_mrouter)

- error = EACCES;

+ if (optname != MRT_INIT && so != ip_mrouter)

+ error = ENOPROTOOPT;

else

- switch (cmd) {

+ switch (optname) {

case MRT_INIT:

error = ip_mrouter_init(so, *m);

break;

@@ -304,7 +298,7 @@ ip_mrouter_set(cmd, so, m)

error = set_assert(*m);

break;

default:

- error = EOPNOTSUPP;

+ error = ENOPROTOOPT;

break;

}

@@ -317,33 +311,29 @@ ip_mrouter_set(cmd, so, m)

* Handle MRT getsockopt commands

int

-ip_mrouter_get(cmd, so, m)

- int cmd;

- struct socket *so;

- struct mbuf **m;

+ip_mrouter_get(struct socket *so, int optname, struct mbuf **m)

{

- struct mbuf *mb;

int error;

if (so != ip_mrouter)

- error = EACCES;

+ error = ENOPROTOOPT;

else {

- *m = mb = m_get(M_WAIT, MT_SOOPTS);

+ *m = m_get(M_WAIT, MT_SOOPTS);

- switch (cmd) {

+ switch (optname) {

case MRT_VERSION:

- error = get_version(mb);

+ error = get_version(*m);

break;

case MRT_ASSERT:

- error = get_assert(mb);

+ error = get_assert(*m);

break;

default:

- error = EOPNOTSUPP;

+ error = ENOPROTOOPT;

break;

}

if (error)

- m_free(mb);

+ m_free(*m);

}

return (error);

@@ -353,10 +343,7 @@ ip_mrouter_get(cmd, so, m)

* Handle ioctl commands to obtain information from the cache

int

-mrt_ioctl(so, cmd, data)

- struct socket *so;

- u_long cmd;

- caddr_t data;

+mrt_ioctl(struct socket *so, u_long cmd, caddr_t data)

{

int error;

@@ -382,21 +369,22 @@ mrt_ioctl(so, cmd, data)

* returns the packet, byte, rpf-failure count for the source group provided

static int

-get_sg_cnt(req)

- struct sioc_sg_req *req;

+get_sg_cnt(struct sioc_sg_req *req)

{

- struct mfc *rt;

int s;

+ struct mfc *rt;

s = splsoftnet();

- MFCFIND(req->src.s_addr, req->grp.s_addr, rt);

- splx(s);

- if (rt != NULL) {

- req->pktcnt = rt->mfc_pkt_cnt;

- req->bytecnt = rt->mfc_byte_cnt;

- req->wrong_if = rt->mfc_wrong_if;

- } else

+ rt = mfc_find(&req->src, &req->grp);

+ if (rt == NULL) {

+ splx(s);

req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;

+ return (EADDRNOTAVAIL);

+ }

+ req->pktcnt = rt->mfc_pkt_cnt;

+ req->bytecnt = rt->mfc_byte_cnt;

+ req->wrong_if = rt->mfc_wrong_if;

+ splx(s);

return (0);

}

@@ -405,8 +393,7 @@ get_sg_cnt(req)

* returns the input and output packet and byte counts on the vif provided

static int

-get_vif_cnt(req)

- struct sioc_vif_req *req;

+get_vif_cnt(struct sioc_vif_req *req)

{

vifi_t vifi = req->vifi;

@@ -425,9 +412,7 @@ get_vif_cnt(req)

* Enable multicast routing

static int

-ip_mrouter_init(so, m)

- struct socket *so;

- struct mbuf *m;

+ip_mrouter_init(struct socket *so, struct mbuf *m)

{

int *v;

@@ -440,7 +425,7 @@ ip_mrouter_init(so, m)

so->so_proto->pr_protocol != IPPROTO_IGMP)

return (EOPNOTSUPP);

- if (m == 0 || m->m_len < sizeof(int))

+ if (m == NULL || m->m_len < sizeof(int))

return (EINVAL);

v = mtod(m, int *);

@@ -457,8 +442,8 @@ ip_mrouter_init(so, m)

pim_assert = 0;

- timeout_set(&upcalls_timeout, expire_upcalls, NULL);

- timeout_add(&upcalls_timeout, EXPIRE_TIMEOUT);

+ timeout_set(&expire_upcalls_ch, expire_upcalls, NULL);

+ timeout_add(&expire_upcalls_ch, EXPIRE_TIMEOUT);

if (mrtdebug)

log(LOG_DEBUG, "ip_mrouter_init\n");

@@ -482,15 +467,14 @@ ip_mrouter_done()

/* Clear out all the vifs currently in use. */

for (vifi = 0; vifi < numvifs; vifi++) {

vifp = &viftable[vifi];

- if (vifp->v_lcl_addr.s_addr != 0)

+ if (!in_nullhost(vifp->v_lcl_addr))

reset_vif(vifp);

}

- bzero((caddr_t)qtable, sizeof(qtable));

numvifs = 0;

pim_assert = 0;

- timeout_del(&upcalls_timeout);

+ timeout_del(&expire_upcalls_ch);

* Free all multicast forwarding cache entries.

@@ -498,15 +482,16 @@ ip_mrouter_done()

for (i = 0; i < MFCTBLSIZ; i++) {

struct mfc *rt, *nrt;

- for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) {

- nrt = rt->mfc_hash.le_next;

+ for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {

+ nrt = LIST_NEXT(rt, mfc_hash);

expire_mfc(rt);

}

+ bzero((caddr_t)nexpire, sizeof(nexpire));

free(mfchashtbl, M_MRTABLE);

- mfchashtbl = 0;

+ mfchashtbl = NULL;

/* Reset de-encapsulation cache. */

have_encap_tunnel = 0;

@@ -521,9 +506,34 @@ ip_mrouter_done()

return (0);

}

+void

+ip_mrouter_detach(struct ifnet *ifp)

+ int vifi, i;

+ struct vif *vifp;

+ struct mfc *rt;

+ struct rtdetq *rte;

+ /* XXX not sure about side effect to userland routing daemon */

+ for (vifi = 0; vifi < numvifs; vifi++) {

+ vifp = &viftable[vifi];

+ if (vifp->v_ifp == ifp)

+ reset_vif(vifp);

+ }

+ for (i = 0; i < MFCTBLSIZ; i++) {

+ if (nexpire[i] == 0)

+ continue;

+ LIST_FOREACH(rt, &mfchashtbl[i], mfc_hash) {

+ for (rte = rt->mfc_stall; rte; rte = rte->next) {

+ if (rte->ifp == ifp)

+ rte->ifp = NULL;

+ }

static int

-get_version(m)

- struct mbuf *m;

+get_version(struct mbuf *m)

{

int *v = mtod(m, int *);

@@ -536,12 +546,11 @@ get_version(m)

* Set PIM assert processing global

static int

-set_assert(m)

- struct mbuf *m;

+set_assert(struct mbuf *m)

{

int *i;

- if (m == 0 || m->m_len < sizeof(int))

+ if (m == NULL || m->m_len < sizeof(int))

return (EINVAL);

i = mtod(m, int *);

@@ -553,8 +562,7 @@ set_assert(m)

* Get PIM assert processing global

static int

-get_assert(m)

- struct mbuf *m;

+get_assert(struct mbuf *m)

{

int *i = mtod(m, int *);

@@ -569,8 +577,7 @@ static struct sockaddr_in sin = { sizeof(sin), AF_INET };

* Add a vif to the vif table

static int

-add_vif(m)

- struct mbuf *m;

+add_vif(struct mbuf *m)

{

struct vifctl *vifcp;

struct vif *vifp;

@@ -579,22 +586,25 @@ add_vif(m)

struct ifreq ifr;

int error, s;

- if (m == 0 || m->m_len < sizeof(struct vifctl))

+ if (m == NULL || m->m_len < sizeof(struct vifctl))

return (EINVAL);

vifcp = mtod(m, struct vifctl *);

if (vifcp->vifc_vifi >= MAXVIFS)

return (EINVAL);

+ if (in_nullhost(vifcp->vifc_lcl_addr))

+ return (EADDRNOTAVAIL);

vifp = &viftable[vifcp->vifc_vifi];

- if (vifp->v_lcl_addr.s_addr != 0)

+ if (!in_nullhost(vifp->v_lcl_addr))

return (EADDRINUSE);

/* Find the interface with an address in AF_INET family. */

sin.sin_addr = vifcp->vifc_lcl_addr;

ifa = ifa_ifwithaddr(sintosa(&sin));

- if (ifa == 0)

+ if (ifa == NULL)

return (EADDRNOTAVAIL);

+ ifp = ifa->ifa_ifp;

if (vifcp->vifc_flags & VIFF_TUNNEL) {

if (vifcp->vifc_flags & VIFF_SRCRT) {

@@ -627,33 +637,42 @@ add_vif(m)

/* Enable promiscuous reception of all IP multicasts. */

satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);

satosin(&ifr.ifr_addr)->sin_family = AF_INET;

- satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;

+ satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;

error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);

if (error)

return (error);

}

s = splsoftnet();

/* Define parameters for the tbf structure. */

- vifp->v_tbf.q_len = 0;

- vifp->v_tbf.n_tok = 0;

- vifp->v_tbf.last_pkt_t = 0;

+ vifp->tbf_q = NULL;

+ vifp->tbf_t = &vifp->tbf_q;

+ microtime(&vifp->tbf_last_pkt_t);

+ vifp->tbf_n_tok = 0;

+ vifp->tbf_q_len = 0;

+ vifp->tbf_max_q_len = MAXQSIZE;

vifp->v_flags = vifcp->vifc_flags;

vifp->v_threshold = vifcp->vifc_threshold;

+ /* scaling up here allows division by 1024 in critical code */

+ vifp->v_rate_limit = vifcp->vifc_rate_limit * 1024 / 1000;

vifp->v_lcl_addr = vifcp->vifc_lcl_addr;

vifp->v_rmt_addr = vifcp->vifc_rmt_addr;

vifp->v_ifp = ifp;

- vifp->v_rate_limit = vifcp->vifc_rate_limit;

-#ifdef RSVP_ISI

- vifp->v_rsvp_on = 0;

- vifp->v_rsvpd = NULL;

-#endif /* RSVP_ISI */

/* Initialize per vif pkt counters. */

vifp->v_pkt_in = 0;

vifp->v_pkt_out = 0;

vifp->v_bytes_in = 0;

vifp->v_bytes_out = 0;

+ timeout_del(&vifp->v_repq_ch);

+#ifdef RSVP_ISI

+ vifp->v_rsvp_on = 0;

+ vifp->v_rsvpd = NULL;

+#endif /* RSVP_ISI */

splx(s);

/* Adjust numvifs up if the vifi is higher than numvifs. */

@@ -673,22 +692,32 @@ add_vif(m)

}

void

-reset_vif(vifp)

- struct vif *vifp;

+reset_vif(struct vif *vifp)

{

+ struct mbuf *m, *n;

struct ifnet *ifp;

struct ifreq ifr;

+ timeout_set(&vifp->v_repq_ch, tbf_reprocess_q, vifp);

+ /*

+ * Free packets queued at the interface

+ */

+ for (m = vifp->tbf_q; m != NULL; m = n) {

+ n = m->m_nextpkt;

+ m_freem(m);

+ }

if (vifp->v_flags & VIFF_TUNNEL) {

free(vifp->v_ifp, M_MRTABLE);

if (vifp == last_encap_vif) {

- last_encap_vif = 0;

- last_encap_src = 0;

+ last_encap_vif = NULL;

+ last_encap_src = zeroin_addr;

}

} else {

satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);

satosin(&ifr.ifr_addr)->sin_family = AF_INET;

- satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;

+ satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;

ifp = vifp->v_ifp;

(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);

}

@@ -699,15 +728,14 @@ reset_vif(vifp)

* Delete a vif from the vif table

static int

-del_vif(m)

- struct mbuf *m;

+del_vif(struct mbuf *m)

{

vifi_t *vifip;

struct vif *vifp;

vifi_t vifi;

int s;

- if (m == 0 || m->m_len < sizeof(vifi_t))

+ if (m == NULL || m->m_len < sizeof(vifi_t))

return (EINVAL);

vifip = mtod(m, vifi_t *);

@@ -715,18 +743,16 @@ del_vif(m)

return (EINVAL);

vifp = &viftable[*vifip];

- if (vifp->v_lcl_addr.s_addr == 0)

+ if (in_nullhost(vifp->v_lcl_addr))

return (EADDRNOTAVAIL);

s = splsoftnet();

reset_vif(vifp);

- bzero((caddr_t)qtable[*vifip], sizeof(qtable[*vifip]));

/* Adjust numvifs down */

for (vifi = numvifs; vifi > 0; vifi--)

- if (viftable[vifi-1].v_lcl_addr.s_addr != 0)

+ if (!in_nullhost(viftable[vifi - 1].v_lcl_addr))

break;

numvifs = vifi;

@@ -739,8 +765,7 @@ del_vif(m)

}

void

-vif_delete(ifp)

- struct ifnet *ifp;

+vif_delete(struct ifnet *ifp)

{

int i;

struct vif *vifp;

@@ -754,7 +779,7 @@ vif_delete(ifp)

}

for (i = numvifs; i > 0; i--)

- if (viftable[i - 1].v_lcl_addr.s_addr != 0)

+ if (!in_nullhost(viftable[i - 1].v_lcl_addr))

break;

numvifs = i;

@@ -770,23 +795,40 @@ vif_delete(ifp)

}

+/*

+ * update an mfc entry without resetting counters and S,G addresses.

+ */

static void

-update_mfc(mfccp, rt)

- struct mfcctl *mfccp;

- struct mfc *rt;

+update_mfc_params(struct mfc *rt, struct mfcctl *mfccp)

{

- vifi_t vifi;

+ int i;

rt->mfc_parent = mfccp->mfcc_parent;

- for (vifi = 0; vifi < numvifs; vifi++)

- rt->mfc_ttls[vifi] = mfccp->mfcc_ttls[vifi];

- rt->mfc_expire = 0;

- rt->mfc_stall = 0;

+ for (i = 0; i < numvifs; i++) {

+ rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];

+ }

}

+/*

+ * fully initialize an mfc entry from the parameter.

+ */

static void

-expire_mfc(rt)

- struct mfc *rt;

+init_mfc_params(struct mfc *rt, struct mfcctl *mfccp)

+ rt->mfc_origin = mfccp->mfcc_origin;

+ rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;

+ update_mfc_params(rt, mfccp);

+ /* initialize pkt counters per src-grp */

+ rt->mfc_pkt_cnt = 0;

+ rt->mfc_byte_cnt = 0;

+ rt->mfc_wrong_if = 0;

+ timerclear(&rt->mfc_last_assert);

+static void

+expire_mfc(struct mfc *rt)

{

struct rtdetq *rte, *nrte;

@@ -804,8 +846,7 @@ expire_mfc(rt)

* Add an mfc entry

static int

-add_mfc(m)

- struct mbuf *m;

+add_mfc(struct mbuf *m)

{

struct mfcctl *mfccp;

struct mfc *rt;

@@ -814,13 +855,13 @@ add_mfc(m)

u_short nstl;

int s;

- if (m == 0 || m->m_len < sizeof(struct mfcctl))

+ if (m == NULL || m->m_len < sizeof(struct mfcctl))

return (EINVAL);

mfccp = mtod(m, struct mfcctl *);

s = splsoftnet();

- MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);

+ rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);

/* If an entry already exists, just update the fields */

if (rt) {

@@ -830,10 +871,7 @@ add_mfc(m)

ntohl(mfccp->mfcc_mcastgrp.s_addr),

mfccp->mfcc_parent);

- if (rt->mfc_expire)

- nexpire[hash]--;

- update_mfc(mfccp, rt);

+ update_mfc_params(rt, mfccp);

splx(s);

return (0);

@@ -843,10 +881,10 @@ add_mfc(m)

* Find the entry for which the upcall was made and update

nstl = 0;

- hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);

- for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) {

- if (rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr &&

- rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr &&

+ hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);

+ LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {

+ if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&

+ in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&

rt->mfc_stall != NULL) {

if (nstl++)

log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p\n",

@@ -861,11 +899,15 @@ add_mfc(m)

ntohl(mfccp->mfcc_mcastgrp.s_addr),

mfccp->mfcc_parent, rt->mfc_stall);

- if (rt->mfc_expire)

- nexpire[hash]--;

+ rte = rt->mfc_stall;

+ init_mfc_params(rt, mfccp);

+ rt->mfc_stall = NULL;

+ rt->mfc_expire = 0; /* Don't clean this guy up */

+ nexpire[hash]--;

/* free packets Qed at the end of this entry */

- for (rte = rt->mfc_stall; rte != NULL; rte = nrte) {

+ for (; rte != NULL; rte = nrte) {

nrte = rte->next;

if (rte->ifp) {

#ifdef RSVP_ISI

@@ -880,11 +922,12 @@ add_mfc(m)

#endif /* UPCALL_TIMING */

free(rte, M_MRTABLE);

}

- update_mfc(mfccp, rt);

}

+ /*

+ * It is possible that an entry is being inserted without an upcall

+ */

if (nstl == 0) {

* No mfc; make a new one

@@ -895,23 +938,31 @@ add_mfc(m)

ntohl(mfccp->mfcc_mcastgrp.s_addr),

mfccp->mfcc_parent);

- rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);

- if (rt == NULL) {

- splx(s);

- return (ENOBUFS);

+ LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {

+ if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&

+ in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) {

+ init_mfc_params(rt, mfccp);

+ if (rt->mfc_expire)

+ nexpire[hash]--;

+ rt->mfc_expire = 0;

+ break; /* XXX */

+ }

}

+ if (rt == NULL) { /* no upcall, so make a new entry */

+ rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE,

+ M_NOWAIT);

+ if (rt == NULL) {

+ splx(s);

+ return (ENOBUFS);

+ }

- rt->mfc_origin = mfccp->mfcc_origin;

- rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;

- /* initialize pkt counters per src-grp */

- rt->mfc_pkt_cnt = 0;

- rt->mfc_byte_cnt = 0;

- rt->mfc_wrong_if = 0;

- timerclear(&rt->mfc_last_assert);

- update_mfc(mfccp, rt);

- /* insert new entry at head of hash chain */

- LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);

+ init_mfc_params(rt, mfccp);

+ rt->mfc_expire = 0;

+ rt->mfc_stall = NULL;

+ /* insert new entry at head of hash chain */

+ LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);

+ }

}

splx(s);

@@ -922,8 +973,8 @@ add_mfc(m)

* collect delay statistics on the upcalls

-static void collate(t)

- struct timeval *t;

+static void

+collate(struct timeval *t)

{

u_int32_t d;

struct timeval tp;

@@ -947,14 +998,13 @@ static void collate(t)

* Delete an mfc entry

static int

-del_mfc(m)

- struct mbuf *m;

+del_mfc(struct mbuf *m)

{

struct mfcctl *mfccp;

struct mfc *rt;

int s;

- if (m == 0 || m->m_len < sizeof(struct mfcctl))

+ if (m == NULL || m->m_len < sizeof(struct mfcctl))

return (EINVAL);

mfccp = mtod(m, struct mfcctl *);

@@ -966,7 +1016,7 @@ del_mfc(m)

s = splsoftnet();

- MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);

+ rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);

if (rt == NULL) {

splx(s);

return (EADDRNOTAVAIL);

@@ -980,14 +1030,11 @@ del_mfc(m)

}

static int

-socket_send(s, mm, src)

- struct socket *s;

- struct mbuf *mm;

- struct sockaddr_in *src;

+socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)

{

- if (s) {

+ if (s != NULL) {

if (sbappendaddr(&s->so_rcv, sintosa(src), mm,

- (struct mbuf *)0) != 0) {

+ (struct mbuf *)NULL) != 0) {

sorwakeup(s);

return (0);

}

@@ -1012,33 +1059,24 @@ socket_send(s, mm, src)

int

#ifdef RSVP_ISI

-ip_mforward(m, ifp, imo)

+ip_mforward(struct mbuf *m, struct ifnet *ifp, struct ip_moptions *imo)

#else

-ip_mforward(m, ifp)

-#endif /* RSVP_ISI */

- struct mbuf *m;

- struct ifnet *ifp;

-#ifdef RSVP_ISI

- struct ip_moptions *imo;

+ip_mforward(struct mbuf *m, struct ifnet *ifp)

#endif /* RSVP_ISI */

{

struct ip *ip = mtod(m, struct ip *);

struct mfc *rt;

- u_char *ipoptions;

static int srctun = 0;

struct mbuf *mm;

int s;

-#ifdef RSVP_ISI

- struct vif *vifp;

vifi_t vifi;

-#endif /* RSVP_ISI */

if (mrtdebug & DEBUG_FORWARD)

log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p\n",

ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);

if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||

- (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR) {

+ ((u_char *)(ip + 1))[1] != IPOPT_LSRR) {

* Packet arrived via a physical interface or

* an encapuslated tunnel.

@@ -1061,13 +1099,13 @@ ip_mforward(m, ifp)

if (ip->ip_ttl < 255)

ip->ip_ttl++; /* compensate for -1 in *_send routines */

if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {

- vifp = viftable + vifi;

+ struct vif *vifp = viftable + vifi;

printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n",

ntohl(ip->ip_src), ntohl(ip->ip_dst), vifi,

(vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",

vifp->v_ifp->if_xname);

}

- return (ip_mdq(m, ifp, rt, vifi));

+ return (ip_mdq(m, ifp, (struct mfc *)NULL, vifi));

}

if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {

printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",

@@ -1086,7 +1124,8 @@ ip_mforward(m, ifp)

* Determine forwarding vifs from the forwarding cache table

s = splsoftnet();

- MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);

+ ++mrtstat.mrts_mfc_lookups;

+ rt = mfc_find(&ip->ip_src, &ip->ip_dst);

/* Entry exists, so forward if necessary */

if (rt != NULL) {

@@ -1099,19 +1138,21 @@ ip_mforward(m, ifp)

} else {

* If we don't have a route for packet's origin,

- * Make a copy of the packet &

- * send message to routing daemon

+ * Make a copy of the packet & send message to routing daemon

struct mbuf *mb0;

struct rtdetq *rte;

u_int32_t hash;

+ int hlen = ip->ip_hl << 2;

#ifdef UPCALL_TIMING

struct timeval tp;

microtime(&tp);

#endif /* UPCALL_TIMING */

+ ++mrtstat.mrts_mfc_misses;

mrtstat.mrts_no_route++;

if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))

log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n",

@@ -1120,7 +1161,8 @@ ip_mforward(m, ifp)

* Allocate mbufs early so that we don't do extra work if we are

- * just going to fail anyway.

+ * just going to fail anyway. Make sure to pullup the header so

+ * that other people can't step on it.

rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT);

if (rte == NULL) {

@@ -1128,67 +1170,72 @@ ip_mforward(m, ifp)

return (ENOBUFS);

}

mb0 = m_copy(m, 0, M_COPYALL);

+ M_PULLUP(mb0, hlen);

if (mb0 == NULL) {

free(rte, M_MRTABLE);

splx(s);

return (ENOBUFS);

}

- /* is there an upcall waiting for this packet? */

- hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);

- for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) {

- if (ip->ip_src.s_addr == rt->mfc_origin.s_addr &&

- ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr &&

+ /* is there an upcall waiting for this flow? */

+ hash = MFCHASH(ip->ip_src, ip->ip_dst);

+ LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {

+ if (in_hosteq(ip->ip_src, rt->mfc_origin) &&

+ in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&

rt->mfc_stall != NULL)

break;

}

if (rt == NULL) {

- int hlen = ip->ip_hl << 2;

int i;

struct igmpmsg *im;

+ /*

+ * Locate the vifi for the incoming interface for

+ * this packet.

+ * If none found, drop packet.

+ */

+ for (vifi = 0; vifi < numvifs &&

+ viftable[vifi].v_ifp != ifp; vifi++)

+ ;

+ if (vifi >= numvifs) /* vif not found, drop packet */

+ goto non_fatal;

/* no upcall, so make a new entry */

rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);

- if (rt == NULL) {

- free(rte, M_MRTABLE);

- m_free(mb0);

- splx(s);

- return (ENOBUFS);

- }

+ if (rt == NULL)

+ goto fail;

* Make a copy of the header to send to the user level

* process

mm = m_copy(m, 0, hlen);

M_PULLUP(mm, hlen);

- if (mm == NULL) {

- free(rte, M_MRTABLE);

- m_free(mb0);

- free(rt, M_MRTABLE);

- splx(s);

- return (ENOBUFS);

- }

+ if (mm == NULL)

+ goto fail1;

* Send message to routing daemon to install

* a route into the kernel table

- sin.sin_addr = ip->ip_src;

im = mtod(mm, struct igmpmsg *);

im->im_msgtype = IGMPMSG_NOCACHE;

im->im_mbz = 0;

+ im->im_vif = vifi;

mrtstat.mrts_upcalls++;

+ sin.sin_addr = ip->ip_src;

if (socket_send(ip_mrouter, mm, &sin) < 0) {

log(LOG_WARNING,

"ip_mforward: ip_mrouter socket queue full\n");

++mrtstat.mrts_upq_sockfull;

- free(rte, M_MRTABLE);

- m_free(mb0);

+ fail1:

free(rt, M_MRTABLE);

+ fail:

+ free(rte, M_MRTABLE);

+ m_freem(mb0);

splx(s);

return (ENOBUFS);

}

@@ -1214,11 +1261,17 @@ ip_mforward(m, ifp)

struct rtdetq **p;

int npkts = 0;

+ /*

+ * XXX ouch! we need to append to the list, but we

+ * only have a pointer to the front, so we have to

+ * scan the entire list every time.

+ */

for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)

if (++npkts > MAX_UPQ) {

mrtstat.mrts_upq_ovflw++;

+ non_fatal:

free(rte, M_MRTABLE);

- m_free(mb0);

+ m_freem(mb0);

splx(s);

return (0);

}

@@ -1243,8 +1296,7 @@ ip_mforward(m, ifp)

/*ARGSUSED*/

static void

-expire_upcalls(v)

- void *v;

+expire_upcalls(void *v)

{

int i;

int s;

@@ -1257,8 +1309,8 @@ expire_upcalls(v)

if (nexpire[i] == 0)

continue;

- for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) {

- nrt = rt->mfc_hash.le_next;

+ for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {

+ nrt = LIST_NEXT(rt, mfc_hash);

if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)

continue;

@@ -1276,7 +1328,7 @@ expire_upcalls(v)

}

splx(s);

- timeout_add(&upcalls_timeout, EXPIRE_TIMEOUT);

+ timeout_add(&expire_upcalls_ch, EXPIRE_TIMEOUT);

}

@@ -1284,15 +1336,9 @@ expire_upcalls(v)

static int

#ifdef RSVP_ISI

-ip_mdq(m, ifp, rt, xmt_vif)

+ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)

#else

-ip_mdq(m, ifp, rt)

-#endif /* RSVP_ISI */

- struct mbuf *m;

- struct ifnet *ifp;

- struct mfc *rt;

-#ifdef RSVP_ISI

- vifi_t xmt_vif;

+ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt)

#endif /* RSVP_ISI */

{

struct ip *ip = mtod(m, struct ip *);

@@ -1305,12 +1351,12 @@ ip_mdq(m, ifp, rt)

* input, they shouldn't get counted on output, so statistics keeping is

* separate.

-#define MC_SEND(ip, vifp, m) do { \

- if ((vifp)->v_flags & VIFF_TUNNEL) \

- encap_send((ip), (vifp), (m)); \

- else \

- phyint_send((ip), (vifp), (m)); \

-} while (0)

+#define MC_SEND(ip, vifp, m) do { \

+ if ((vifp)->v_flags & VIFF_TUNNEL) \

+ encap_send((ip), (vifp), (m)); \

+ else \

+ phyint_send((ip), (vifp), (m)); \

+} while (/*CONSTCOND*/ 0)

#ifdef RSVP_ISI

@@ -1344,22 +1390,31 @@ ip_mdq(m, ifp, rt)

if (pim_assert && rt->mfc_ttls[vifi] &&

(ifp->if_flags & IFF_BROADCAST) &&

!(viftable[vifi].v_flags & VIFF_TUNNEL)) {

- struct mbuf *mm;

- struct igmpmsg *im;

- int hlen = ip->ip_hl << 2;

struct timeval now;

u_int32_t delta;

+ /* Get vifi for the incoming packet */

+ for (vifi = 0;

+ vifi < numvifs && viftable[vifi].v_ifp != ifp;

+ vifi++)

+ ;

+ if (vifi >= numvifs) {

+ /* The iif is not found: ignore the packet. */

+ return (0);

+ }

microtime(&now);

TV_DELTA(rt->mfc_last_assert, now, delta);

if (delta > ASSERT_MSG_TIME) {

- mm = m_copy(m, 0, hlen);

+ struct igmpmsg *im;

+ int hlen = ip->ip_hl << 2;

+ struct mbuf *mm = m_copy(m, 0, hlen);

M_PULLUP(mm, hlen);

- if (mm == NULL) {

+ if (mm == NULL)

return (ENOBUFS);

- }

rt->mfc_last_assert = now;

@@ -1368,16 +1423,22 @@ ip_mdq(m, ifp, rt)

im->im_mbz = 0;

im->im_vif = vifi;

- sin.sin_addr = im->im_src;

+ mrtstat.mrts_upcalls++;

- socket_send(ip_mrouter, m, &sin);

+ sin.sin_addr = im->im_src;

+ if (socket_send(ip_mrouter, mm, &sin) < 0) {

+ log(LOG_WARNING,

+ "ip_mforward: ip_mrouter socket queue full\n");

+ ++mrtstat.mrts_upq_sockfull;

+ return (ENOBUFS);

+ }

}

return (0);

}

/* If I sourced this packet, it counts as output, else it was input. */

- if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {

+ if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) {

viftable[vifi].v_pkt_out++;

viftable[vifi].v_bytes_out += plen;

} else {

@@ -1406,12 +1467,10 @@ ip_mdq(m, ifp, rt)

#ifdef RSVP_ISI

- * check if a vif number is legal/ok. This is used by ip_output, to export

- * numvifs there,

+ * check if a vif number is legal/ok. This is used by ip_output.

int

-legal_vif_num(vif)

- int vif;

+legal_vif_num(int vif)

{

if (vif >= 0 && vif < numvifs)

return (1);

@@ -1421,10 +1480,7 @@ legal_vif_num(vif)

#endif /* RSVP_ISI */

static void

-phyint_send(ip, vifp, m)

- struct ip *ip;

- struct vif *vifp;

- struct mbuf *m;

+phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)

{

struct mbuf *mb_copy;

int hlen = ip->ip_hl << 2;

@@ -1447,15 +1503,18 @@ phyint_send(ip, vifp, m)

}

static void

-encap_send(ip, vifp, m)

- struct ip *ip;

- struct vif *vifp;

- struct mbuf *m;

+encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m)

{

struct mbuf *mb_copy;

struct ip *ip_copy;

int i, len = ntohs(ip->ip_len) + sizeof(multicast_encap_iphdr);

+ /* Take care of delayed checksums */

+ if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {

+ in_delayed_cksum(m);

+ m->m_pkthdr.csum &= ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT);

+ }

* copy the old packet & pullup it's IP header into the

* new mbuf so we can modify it. Try to fill the new

@@ -1495,13 +1554,9 @@ encap_send(ip, vifp, m)

ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));

--ip->ip_ttl;

ip->ip_sum = 0;

-#if defined(LBL) && !defined(ultrix) && !defined(i386)

- ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0);

-#else

mb_copy->m_data += sizeof(multicast_encap_iphdr);

ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);

mb_copy->m_data -= sizeof(multicast_encap_iphdr);

-#endif

if (vifp->v_rate_limit <= 0)

tbf_send_packet(vifp, mb_copy);

@@ -1540,14 +1595,14 @@ ipip_mroute_input(struct mbuf *m, ...)

* uniquely identifies the tunnel (i.e., that this site has

* at most one tunnel with the remote site).

- if (ip->ip_src.s_addr != last_encap_src) {

+ if (!in_hosteq(ip->ip_src, last_encap_src)) {

struct vif *vife;

vifp = viftable;

vife = vifp + numvifs;

for (; vifp < vife; vifp++)

if (vifp->v_flags & VIFF_TUNNEL &&

- vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr)

+ in_hosteq(vifp->v_rmt_addr, ip->ip_src))

break;

if (vifp == vife) {

mrtstat.mrts_cant_tunnel++; /*XXX*/

@@ -1559,7 +1614,7 @@ ipip_mroute_input(struct mbuf *m, ...)

return;

}

last_encap_vif = vifp;

- last_encap_src = ip->ip_src.s_addr;

+ last_encap_src = ip->ip_src;

} else

vifp = last_encap_vif;

@@ -1568,7 +1623,7 @@ ipip_mroute_input(struct mbuf *m, ...)

m->m_pkthdr.len -= hlen;

m->m_pkthdr.rcvif = vifp->v_ifp;

ifq = &ipintrq;

- s = splimp();

+ s = splnet();

if (IF_QFULL(ifq)) {

IF_DROP(ifq);

m_freem(m);

@@ -1589,42 +1644,40 @@ ipip_mroute_input(struct mbuf *m, ...)

* Token bucket filter module

static void

-tbf_control(vifp, m, ip, p_len)

- struct vif *vifp;

- struct mbuf *m;

- struct ip *ip;

- u_int32_t p_len;

+tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, u_int32_t len)

{

+ if (len > MAX_BKT_SIZE) {

+ /* drop if packet is too large */

+ mrtstat.mrts_pkt2large++;

+ m_freem(m);

+ return;

+ }

tbf_update_tokens(vifp);

* If there are enough tokens, and the queue is empty, send this packet

* out immediately. Otherwise, try to insert it on this vif's queue.

- if (vifp->v_tbf.q_len == 0) {

- if (p_len <= vifp->v_tbf.n_tok) {

- vifp->v_tbf.n_tok -= p_len;

+ if (vifp->tbf_q_len == 0) {

+ if (len <= vifp->tbf_n_tok) {

+ vifp->tbf_n_tok -= len;

tbf_send_packet(vifp, m);

- } else if (p_len > MAX_BKT_SIZE) {

- /* drop if packet is too large */

- mrtstat.mrts_pkt2large++;

- m_freem(m);

} else {

/* queue packet and timeout till later */

- tbf_queue(vifp, m, ip);

- timeout_set(&tbf_timeout, tbf_reprocess_q, vifp);

- timeout_add(&tbf_timeout, 1);

+ tbf_queue(vifp, m);

+ timeout_add(&vifp->v_repq_ch, TBF_REPROCESS);

}

} else {

- if (vifp->v_tbf.q_len >= MAXQSIZE &&

+ if (vifp->tbf_q_len >= vifp->tbf_max_q_len &&

!tbf_dq_sel(vifp, ip)) {

- /* queue length too much, and couldn't make room */

+ /* queue full, and couldn't make room */

mrtstat.mrts_q_overflow++;

m_freem(m);

} else {

/* queue length low enough, or made room */

- tbf_queue(vifp, m, ip);

+ tbf_queue(vifp, m);

tbf_process_q(vifp);

}

@@ -1634,22 +1687,15 @@ tbf_control(vifp, m, ip, p_len)

* adds a packet to the queue at the interface

static void

-tbf_queue(vifp, m, ip)

- struct vif *vifp;

- struct mbuf *m;

- struct ip *ip;

+tbf_queue(struct vif *vifp, struct mbuf *m)

{

- u_int32_t ql;

- int index = (vifp - viftable);

int s = splsoftnet();

- ql = vifp->v_tbf.q_len;

+ /* insert at tail */

+ *vifp->tbf_t = m;

+ vifp->tbf_t = &m->m_nextpkt;

+ vifp->tbf_q_len++;

- qtable[index][ql].pkt_m = m;

- qtable[index][ql].pkt_len = ntohs((mtod(m, struct ip *))->ip_len);

- qtable[index][ql].pkt_ip = ip;

- vifp->v_tbf.q_len++;

splx(s);

}

@@ -1658,64 +1704,40 @@ tbf_queue(vifp, m, ip)

* processes the queue at the interface

static void

-tbf_process_q(vifp)

- struct vif *vifp;

+tbf_process_q(struct vif *vifp)

{

- struct pkt_queue pkt_1;

- int index = (vifp - viftable);

+ struct mbuf *m;

+ int len;

int s = splsoftnet();

- /* loop through the queue at the interface and send as many packets

- * as possible

+ /*

+ * Loop through the queue at the interface and send as many packets

+ * as possible.

- while (vifp->v_tbf.q_len > 0) {

- /* locate the first packet */

- pkt_1 = qtable[index][0];

+ for (m = vifp->tbf_q; m != NULL; m = vifp->tbf_q) {

+ len = ntohs(mtod(m, struct ip *)->ip_len);

/* determine if the packet can be sent */

- if (pkt_1.pkt_len <= vifp->v_tbf.n_tok) {

+ if (len <= vifp->tbf_n_tok) {

/* if so,

- * reduce no of tokens, dequeue the queue,

+ * reduce no of tokens, dequeue the packet,

* send the packet.

- vifp->v_tbf.n_tok -= pkt_1.pkt_len;

+ if ((vifp->tbf_q = m->m_nextpkt) == NULL)

+ vifp->tbf_t = &vifp->tbf_q;

+ --vifp->tbf_q_len;

- tbf_dequeue(vifp, 0);

- tbf_send_packet(vifp, pkt_1.pkt_m);

+ m->m_nextpkt = NULL;

+ vifp->tbf_n_tok -= len;

+ tbf_send_packet(vifp, m);

} else

break;

}

splx(s);

}

-/*

- * removes the jth packet from the queue at the interface

- */

static void

-tbf_dequeue(vifp, j)

- struct vif *vifp;

- int j;

- u_int32_t index = vifp - viftable;

- int i;

- for (i = j + 1; i <= vifp->v_tbf.q_len - 1; i++) {

- qtable[index][i-1] = qtable[index][i];

- }

- qtable[index][i-1].pkt_m = NULL;

- qtable[index][i-1].pkt_len = 0;

- qtable[index][i-1].pkt_ip = NULL;

- vifp->v_tbf.q_len--;

- if (tbfdebug > 1)

- log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d\n",

- vifp - viftable, i - 1);

-static void

-tbf_reprocess_q(arg)

- void *arg;

+tbf_reprocess_q(void *arg)

{

struct vif *vifp = arg;

@@ -1725,31 +1747,33 @@ tbf_reprocess_q(arg)

tbf_update_tokens(vifp);

tbf_process_q(vifp);

- if (vifp->v_tbf.q_len)

- timeout_add(&tbf_timeout, 1);

+ if (vifp->tbf_q_len != 0)

+ timeout_add(&vifp->v_repq_ch, TBF_REPROCESS);

}

/* function that will selectively discard a member of the queue

- * based on the precedence value and the priority obtained through

- * a lookup table - not yet implemented accurately!

+ * based on the precedence value and the priority

static int

-tbf_dq_sel(vifp, ip)

- struct vif *vifp;

- struct ip *ip;

+tbf_dq_sel(struct vif *vifp, struct ip *ip)

{

- int i;

- int s = splsoftnet();

u_int p;

+ struct mbuf **mp, *m;

+ int s = splsoftnet();

p = priority(vifp, ip);

- for (i = vifp->v_tbf.q_len - 1; i >= 0; i--) {

- if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) {

- m_freem(qtable[vifp-viftable][i].pkt_m);

- tbf_dequeue(vifp, i);

- splx(s);

+ for (mp = &vifp->tbf_q, m = *mp;

+ m != NULL;

+ mp = &m->m_nextpkt, m = *mp) {

+ if (p > priority(vifp, mtod(m, struct ip *))) {

+ if ((*mp = m->m_nextpkt) == NULL)

+ vifp->tbf_t = mp;

+ --vifp->tbf_q_len;

+ m_freem(m);

mrtstat.mrts_drop_sel++;

+ splx(s);

return (1);

}

@@ -1758,33 +1782,34 @@ tbf_dq_sel(vifp, ip)

}

static void

-tbf_send_packet(vifp, m)

- struct vif *vifp;

- struct mbuf *m;

+tbf_send_packet(struct vif *vifp, struct mbuf *m)

{

int error;

int s = splsoftnet();

if (vifp->v_flags & VIFF_TUNNEL) {

/* If tunnel options */

- ip_output(m, (struct mbuf *)0, &vifp->v_route,

- IP_FORWARDING, (void *)NULL, (void *)NULL);

+ ip_output(m, (struct mbuf *)NULL, &vifp->v_route,

+ IP_FORWARDING, (struct ip_moptions *)NULL,

+ (struct socket *)NULL);

} else {

/* if physical interface option, extract the options and then send */

- struct ip *ip = mtod(m, struct ip *);

struct ip_moptions imo;

imo.imo_multicast_ifp = vifp->v_ifp;

- imo.imo_multicast_ttl = ip->ip_ttl - 1;

+ imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1;

imo.imo_multicast_loop = 1;

#ifdef RSVP_ISI

imo.imo_multicast_vif = -1;

#endif

- error = ip_output(m, (struct mbuf *)0, (struct route *)0,

- IP_FORWARDING|IP_MULTICASTOPTS, &imo, (void *)NULL);

+ error = ip_output(m, (struct mbuf *)NULL, (struct route *)NULL,

+ IP_FORWARDING|IP_MULTICASTOPTS, &imo,

+ (struct socket *)NULL);

if (mrtdebug & DEBUG_XMIT)

- log(LOG_DEBUG, "phyint_send on vif %d err %d\n",

- vifp - viftable, error);

+ log(LOG_DEBUG, "phyint_send on vif %ld err %d\n",

+ (long)(vifp - viftable), error);

}

splx(s);

}

@@ -1794,34 +1819,38 @@ tbf_send_packet(vifp, m)

* in milliseconds & update the no. of tokens in the bucket

static void

-tbf_update_tokens(vifp)

- struct vif *vifp;

+tbf_update_tokens(struct vif *vifp)

{

struct timeval tp;

- u_int32_t t;

- u_int32_t elapsed;

+ u_int32_t tm;

int s = splsoftnet();

microtime(&tp);

- t = tp.tv_sec * 1000 + tp.tv_usec / 1000;

+ TV_DELTA(tp, vifp->tbf_last_pkt_t, tm);

- elapsed = (t - vifp->v_tbf.last_pkt_t) * vifp->v_rate_limit / 8;

- vifp->v_tbf.n_tok += elapsed;

- vifp->v_tbf.last_pkt_t = t;

+ /*

+ * This formula is actually

+ * "time in seconds" * "bytes/second".

+ *

+ * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)

+ *

+ * The (1000/1024) was introduced in add_vif to optimize

+ * this divide into a shift.

+ */

+ vifp->tbf_n_tok += tm * vifp->v_rate_limit / 8192;

+ vifp->tbf_last_pkt_t = tp;

- if (vifp->v_tbf.n_tok > MAX_BKT_SIZE)

- vifp->v_tbf.n_tok = MAX_BKT_SIZE;

+ if (vifp->tbf_n_tok > MAX_BKT_SIZE)

+ vifp->tbf_n_tok = MAX_BKT_SIZE;

splx(s);

}

static int

-priority(vifp, ip)

- struct vif *vifp;

- struct ip *ip;

+priority(struct vif *vifp, struct ip *ip)

{

- int prio;

+ int prio = 50; /* the lowest priority -- default case */

/* temporary hack; may add general packet classifier some day */

@@ -1845,16 +1874,12 @@ priority(vifp, ip)

case 0xc000:

prio = 55;

break;

- default:

- prio = 50;

- break;

}

if (tbfdebug > 1)

log(LOG_DEBUG, "port %x prio %d\n",

ntohs(udp->uh_dport), prio);

- } else

- prio = 50;

+ }

return (prio);

}

@@ -1864,12 +1889,9 @@ priority(vifp, ip)

#ifdef RSVP_ISI

int

-ip_rsvp_vif_init(so, m)

- struct socket *so;

- struct mbuf *m;

+ip_rsvp_vif_init(struct socket *so, struct mbuf *m)

{

- int i;

- int s;

+ int vifi, s;

if (rsvpdebug)

printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",

@@ -1883,31 +1905,32 @@ ip_rsvp_vif_init(so, m)

if (m == NULL || m->m_len != sizeof(int)) {

return (EINVAL);

}

- i = *(mtod(m, int *));

+ vifi = *(mtod(m, int *));

if (rsvpdebug)

- printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on);

+ printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",

+ vifi, rsvp_on);

s = splsoftnet();

/* Check vif. */

- if (!legal_vif_num(i)) {

+ if (!legal_vif_num(vifi)) {

splx(s);

return (EADDRNOTAVAIL);

}

/* Check if socket is available. */

- if (viftable[i].v_rsvpd != NULL) {

+ if (viftable[vifi].v_rsvpd != NULL) {

splx(s);

return (EADDRINUSE);

}

- viftable[i].v_rsvpd = so;

+ viftable[vifi].v_rsvpd = so;

/* This may seem silly, but we need to be sure we don't over-increment

* the RSVP counter, in case something slips up.

- if (!viftable[i].v_rsvp_on) {

- viftable[i].v_rsvp_on = 1;

+ if (!viftable[vifi].v_rsvp_on) {

+ viftable[vifi].v_rsvp_on = 1;

rsvp_on++;

}

@@ -1916,12 +1939,9 @@ ip_rsvp_vif_init(so, m)

}

int

-ip_rsvp_vif_done(so, m)

- struct socket *so;

- struct mbuf *m;

+ip_rsvp_vif_done(struct socket *so, struct mbuf *m)

{

- int i;

- int s;

+ int vifi, s;

if (rsvpdebug)

printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",

@@ -1935,27 +1955,27 @@ ip_rsvp_vif_done(so, m)

if (m == NULL || m->m_len != sizeof(int)) {

return (EINVAL);

}

- i = *(mtod(m, int *));

+ vifi = *(mtod(m, int *));

s = splsoftnet();

/* Check vif. */

- if (!legal_vif_num(i)) {

+ if (!legal_vif_num(vifi)) {

splx(s);

return (EADDRNOTAVAIL);

}

if (rsvpdebug)

printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",

- viftable[i].v_rsvpd, so);

+ viftable[vifi].v_rsvpd, so);

- viftable[i].v_rsvpd = NULL;

+ viftable[vifi].v_rsvpd = NULL;

* This may seem silly, but we need to be sure we don't over-decrement

* the RSVP counter, in case something slips up.

- if (viftable[i].v_rsvp_on) {

- viftable[i].v_rsvp_on = 0;

+ if (viftable[vifi].v_rsvp_on) {

+ viftable[vifi].v_rsvp_on = 0;

rsvp_on--;

}

@@ -1964,11 +1984,9 @@ ip_rsvp_vif_done(so, m)

}

void

-ip_rsvp_force_done(so)

- struct socket *so;

+ip_rsvp_force_done(struct socket *so)

{

- int vifi;

- int s;

+ int vifi, s;

/* Don't bother if it is not the right type of socket. */

if (so->so_type != SOCK_RAW ||

@@ -2001,14 +2019,11 @@ ip_rsvp_force_done(so)

}

void

-rsvp_input(m, ifp)

- struct mbuf *m;

- struct ifnet *ifp;

+rsvp_input(struct mbuf *m, struct ifnet *ifp)

{

- int vifi;

+ int vifi, s;

struct ip *ip = mtod(m, struct ip *);

static struct sockaddr_in rsvp_src = { sizeof(sin), AF_INET };

- int s;

if (rsvpdebug)

printf("rsvp_input: rsvp_on %d\n", rsvp_on);

@@ -2031,7 +2046,7 @@ rsvp_input(m, ifp)

if (rsvpdebug)

printf("rsvp_input: "

"Sending packet up old-style socket\n");

- rip_input(m, 0);

+ rip_input(m, 0); /*XXX*/

return;

}

diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h
index 72ce308085c..747960e8bf6 100644
--- a/sys/netinet/ip_mroute.h
+++ b/sys/netinet/ip_mroute.h

@@ -1,5 +1,5 @@

-/* $OpenBSD: ip_mroute.h,v 1.10 2004/08/24 20:31:16 brad Exp $ */

-/* $NetBSD: ip_mroute.h,v 1.10 1996/02/13 23:42:55 christos Exp $ */

+/* $OpenBSD: ip_mroute.h,v 1.11 2004/11/24 01:25:42 mcbride Exp $ */

+/* $NetBSD: ip_mroute.h,v 1.23 2004/04/21 17:49:46 itojun Exp $ */

#ifndef _NETINET_IP_MROUTE_H_

#define _NETINET_IP_MROUTE_H_

@@ -16,6 +16,7 @@

#include <sys/queue.h>

+#include <sys/timeout.h>

* Multicast Routing set/getsockopt commands.

@@ -27,7 +28,7 @@

#define MRT_ADD_MFC 104 /* insert forwarding cache entry */

#define MRT_DEL_MFC 105 /* delete forwarding cache entry */

#define MRT_VERSION 106 /* get kernel version number */

-#define MRT_ASSERT 107 /* enable PIM assert processing */

+#define MRT_ASSERT 107 /* enable assert processing */

@@ -63,7 +64,6 @@ struct vifctl {

* Argument structure for MRT_ADD_MFC and MRT_DEL_MFC.

- * (mfcc_tos to be added at a future point)

struct mfcctl {

struct in_addr mfcc_origin; /* ip origin of mcasts */

@@ -118,22 +118,18 @@ struct mrtstat {

#ifdef _KERNEL

- * Token bucket filter at each vif

- */

-struct tbf {

- u_int32_t last_pkt_t; /* arr. time of last pkt */

- u_int32_t n_tok; /* no of tokens in bucket */

- u_int32_t q_len; /* length of queue at this vif */

-};

-/*

* The kernel's virtual-interface structure.

struct vif {

+ struct mbuf *tbf_q, **tbf_t; /* packet queue */

+ struct timeval tbf_last_pkt_t; /* arr. time of last pkt */

+ u_int32_t tbf_n_tok; /* no of tokens in bucket */

+ u_int32_t tbf_q_len; /* length of queue at this vif */

+ u_int32_t tbf_max_q_len; /* max. queue length */

u_int8_t v_flags; /* VIFF_ flags defined above */

u_int8_t v_threshold; /* min ttl required to forward on vif */

u_int32_t v_rate_limit; /* max rate */

- struct tbf v_tbf; /* token bucket structure at intf. */

struct in_addr v_lcl_addr; /* local interface address */

struct in_addr v_rmt_addr; /* remote address (tunnels only) */

struct ifnet *v_ifp; /* pointer to interface */

@@ -142,6 +138,7 @@ struct vif {

u_long v_bytes_in; /* # bytes in on interface */

u_long v_bytes_out; /* # bytes out on interface */

struct route v_route; /* cached route if this is a tunnel */

+ struct timeout v_repq_ch; /* for tbf_reprocess_q() */

#ifdef RSVP_ISI

int v_rsvp_on; /* # RSVP listening on this vif */

struct socket *v_rsvpd; /* # RSVPD daemon */

@@ -175,8 +172,8 @@ struct igmpmsg {

u_int32_t unused1;

u_int32_t unused2;

u_int8_t im_msgtype; /* what type of message */

-#define IGMPMSG_NOCACHE 1

-#define IGMPMSG_WRONGVIF 2

+#define IGMPMSG_NOCACHE 1 /* no MFC in the kernel */

+#define IGMPMSG_WRONGVIF 2 /* packet came from wrong interface */

u_int8_t im_mbz; /* must be zero */

u_int8_t im_vif; /* vif rec'd on */

u_int8_t unused3;

@@ -204,19 +201,11 @@ struct rtdetq {

#define MAX_BKT_SIZE 10000 /* 10K bytes size */

#define MAXQSIZE 10 /* max. no of pkts in token queue */

-/*

- * Queue structure at each vif

- */

-struct pkt_queue {

- u_int32_t pkt_len; /* length of packet in queue */

- struct mbuf *pkt_m; /* pointer to packet mbuf */

- struct ip *pkt_ip; /* pointer to ip header */

-};

-int ip_mrouter_set(int, struct socket *, struct mbuf **);

-int ip_mrouter_get(int, struct socket *, struct mbuf **);

+int ip_mrouter_set(struct socket *, int, struct mbuf **);

+int ip_mrouter_get(struct socket *, int, struct mbuf **);

int mrt_ioctl(struct socket *, u_long, caddr_t);

int ip_mrouter_done(void);

+void ip_mrouter_detach(struct ifnet *);

void reset_vif(struct vif *);

void vif_delete(struct ifnet *);

#ifdef RSVP_ISI

@@ -225,7 +214,7 @@ int legal_vif_num(int);

int ip_rsvp_vif_init(struct socket *, struct mbuf *);

int ip_rsvp_vif_done(struct socket *, struct mbuf *);

void ip_rsvp_force_done(struct socket *);

-void rsvp_input(struct mbuf *, int, int);

+void rsvp_input(struct mbuf *, int, int);

#else

int ip_mforward(struct mbuf *, struct ifnet *);

#endif /* RSVP_ISI */

diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index df0cc77c67d..483c37ef208 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: raw_ip.c,v 1.32 2003/12/21 14:57:19 markus Exp $ */

+/* $OpenBSD: raw_ip.c,v 1.33 2004/11/24 01:25:42 mcbride Exp $ */

/* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */

@@ -297,10 +297,10 @@ rip_ctloutput(op, so, level, optname, m)

#ifdef MROUTING

switch (op) {

case PRCO_SETOPT:

- error = ip_mrouter_set(optname, so, m);

+ error = ip_mrouter_set(so, optname, m);

break;

case PRCO_GETOPT:

- error = ip_mrouter_get(optname, so, m);

+ error = ip_mrouter_get(so, optname, m);

break;

default:

error = EINVAL;