summaryrefslogtreecommitdiff
path: root/sys/altq
diff options
context:
space:
mode:
authorKenjiro Cho <kjc@cvs.openbsd.org>2001-06-27 05:28:37 +0000
committerKenjiro Cho <kjc@cvs.openbsd.org>2001-06-27 05:28:37 +0000
commite13e0cc15f99577acddb2317a916f349405ee961 (patch)
tree6b866f8e79c8e6b96a779aa877dd9b6559059ba3 /sys/altq
parentc00082e97a3137a461a24364470c06ac571b833e (diff)
import ALTQ, alternate queueing support, from KAME.
ALTQ allows to switch various queueing disciplines on output network interfaces.
Diffstat (limited to 'sys/altq')
-rw-r--r--sys/altq/altq.h199
-rw-r--r--sys/altq/altq_afmap.c412
-rw-r--r--sys/altq/altq_afmap.h104
-rw-r--r--sys/altq/altq_blue.c691
-rw-r--r--sys/altq/altq_blue.h119
-rw-r--r--sys/altq/altq_cbq.c973
-rw-r--r--sys/altq/altq_cbq.h220
-rw-r--r--sys/altq/altq_cdnr.c1375
-rw-r--r--sys/altq/altq_cdnr.h334
-rw-r--r--sys/altq/altq_classq.h207
-rw-r--r--sys/altq/altq_conf.c424
-rw-r--r--sys/altq/altq_conf.h108
-rw-r--r--sys/altq/altq_fifoq.c415
-rw-r--r--sys/altq/altq_fifoq.h80
-rw-r--r--sys/altq/altq_flowvalve.h93
-rw-r--r--sys/altq/altq_hfsc.c1811
-rw-r--r--sys/altq/altq_hfsc.h279
-rw-r--r--sys/altq/altq_localq.c69
-rw-r--r--sys/altq/altq_priq.c866
-rw-r--r--sys/altq/altq_priq.h161
-rw-r--r--sys/altq/altq_red.c1475
-rw-r--r--sys/altq/altq_red.h190
-rw-r--r--sys/altq/altq_rio.c829
-rw-r--r--sys/altq/altq_rio.h140
-rw-r--r--sys/altq/altq_rmclass.c1874
-rw-r--r--sys/altq/altq_rmclass.h267
-rw-r--r--sys/altq/altq_rmclass_debug.h113
-rw-r--r--sys/altq/altq_subr.c1552
-rw-r--r--sys/altq/altq_var.h238
-rw-r--r--sys/altq/altq_wfq.c752
-rw-r--r--sys/altq/altq_wfq.h125
-rw-r--r--sys/altq/altqconf.h16
-rw-r--r--sys/altq/if_altq.h166
33 files changed, 16677 insertions, 0 deletions
diff --git a/sys/altq/altq.h b/sys/altq/altq.h
new file mode 100644
index 00000000000..8f1295a7935
--- /dev/null
+++ b/sys/altq/altq.h
@@ -0,0 +1,199 @@
+/* $OpenBSD: altq.h,v 1.1 2001/06/27 05:28:34 kjc Exp $ */
+/* $KAME: altq.h,v 1.6 2000/12/14 08:12:45 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1998-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_ALTQ_H_
+#define _ALTQ_ALTQ_H_
+
+#include <sys/param.h>
+#include <sys/ioccom.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
+/* altq discipline type */
+#define ALTQT_NONE 0 /* reserved */
+#define ALTQT_CBQ 1 /* cbq */
+#define ALTQT_WFQ 2 /* wfq */
+#define ALTQT_AFMAP 3 /* afmap */
+#define ALTQT_FIFOQ 4 /* fifoq */
+#define ALTQT_RED 5 /* red */
+#define ALTQT_RIO 6 /* rio */
+#define ALTQT_LOCALQ 7 /* local use */
+#define ALTQT_HFSC 8 /* hfsc */
+#define ALTQT_CDNR 9 /* traffic conditioner */
+#define ALTQT_BLUE 10 /* blue */
+#define ALTQT_PRIQ 11 /* priority queue */
+#define ALTQT_MAX 12 /* should be max discipline type + 1 */
+
+struct altqreq {
+ char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
+ u_long arg; /* request-specific argument */
+};
+
+/* simple token backet meter profile */
+struct tb_profile {
+ u_int rate; /* rate in bit-per-sec */
+ u_int depth; /* depth in bytes */
+};
+
+struct tbrreq {
+ char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct tb_profile tb_prof; /* token bucket profile */
+};
+
+/*
+ * common network flow info structure
+ */
+struct flowinfo {
+ u_char fi_len; /* total length */
+ u_char fi_family; /* address family */
+ u_int8_t fi_data[46]; /* actually longer; address family
+ specific flow info. */
+};
+
+/*
+ * flow info structure for internet protocol family.
+ * (currently this is the only protocol family supported)
+ */
+struct flowinfo_in {
+ u_char fi_len; /* sizeof(struct flowinfo_in) */
+ u_char fi_family; /* AF_INET */
+ u_int8_t fi_proto; /* IPPROTO_XXX */
+ u_int8_t fi_tos; /* type-of-service */
+ struct in_addr fi_dst; /* dest address */
+ struct in_addr fi_src; /* src address */
+ u_int16_t fi_dport; /* dest port */
+ u_int16_t fi_sport; /* src port */
+ u_int32_t fi_gpi; /* generalized port id for ipsec */
+ u_int8_t _pad[28]; /* make the size equal to
+ flowinfo_in6 */
+};
+
+#ifdef SIN6_LEN
+struct flowinfo_in6 {
+ u_char fi6_len; /* sizeof(struct flowinfo_in6) */
+ u_char fi6_family; /* AF_INET6 */
+ u_int8_t fi6_proto; /* IPPROTO_XXX */
+ u_int8_t fi6_tclass; /* traffic class */
+ u_int32_t fi6_flowlabel; /* ipv6 flowlabel */
+ u_int16_t fi6_dport; /* dest port */
+ u_int16_t fi6_sport; /* src port */
+ u_int32_t fi6_gpi; /* generalized port id */
+ struct in6_addr fi6_dst; /* dest address */
+ struct in6_addr fi6_src; /* src address */
+};
+#endif /* INET6 */
+
+/*
+ * flow filters for AF_INET and AF_INET6
+ */
+struct flow_filter {
+ int ff_ruleno;
+ struct flowinfo_in ff_flow;
+ struct {
+ struct in_addr mask_dst;
+ struct in_addr mask_src;
+ u_int8_t mask_tos;
+ u_int8_t _pad[3];
+ } ff_mask;
+ u_int8_t _pad2[24]; /* make the size equal to flow_filter6 */
+};
+
+#ifdef SIN6_LEN
+struct flow_filter6 {
+ int ff_ruleno;
+ struct flowinfo_in6 ff_flow6;
+ struct {
+ struct in6_addr mask6_dst;
+ struct in6_addr mask6_src;
+ u_int8_t mask6_tclass;
+ u_int8_t _pad[3];
+ } ff_mask6;
+};
+#endif /* INET6 */
+
+/*
+ * generic packet counter
+ */
+struct pktcntr {
+ u_int64_t packets;
+ u_int64_t bytes;
+};
+
+#define PKTCNTR_ADD(cntr, len) \
+ do { (cntr)->packets++; (cntr)->bytes += len; } while (0)
+
+/*
+ * altq related ioctls
+ */
+#define ALTQGTYPE _IOWR('q', 0, struct altqreq) /* get queue type */
+#if 0
+/*
+ * these ioctls are currently discipline-specific but could be shared
+ * in the future.
+ */
+#define ALTQATTACH _IOW('q', 1, struct altqreq) /* attach discipline */
+#define ALTQDETACH _IOW('q', 2, struct altqreq) /* detach discipline */
+#define ALTQENABLE _IOW('q', 3, struct altqreq) /* enable discipline */
+#define ALTQDISABLE _IOW('q', 4, struct altqreq) /* disable discipline*/
+#define ALTQCLEAR _IOW('q', 5, struct altqreq) /* (re)initialize */
+#define ALTQCONFIG _IOWR('q', 6, struct altqreq) /* set config params */
+#define ALTQADDCLASS _IOWR('q', 7, struct altqreq) /* add a class */
+#define ALTQMODCLASS _IOWR('q', 8, struct altqreq) /* modify a class */
+#define ALTQDELCLASS _IOWR('q', 9, struct altqreq) /* delete a class */
+#define ALTQADDFILTER _IOWR('q', 10, struct altqreq) /* add a filter */
+#define ALTQDELFILTER _IOWR('q', 11, struct altqreq) /* delete a filter */
+#define ALTQGETSTATS _IOWR('q', 12, struct altqreq) /* get statistics */
+#define ALTQGETCNTR _IOWR('q', 13, struct altqreq) /* get a pkt counter */
+#endif /* 0 */
+#define ALTQTBRSET _IOW('q', 14, struct tbrreq) /* set tb regulator */
+#define ALTQTBRGET _IOWR('q', 15, struct tbrreq) /* get tb regulator */
+
+/* queue macros only in FreeBSD */
+#ifndef LIST_EMPTY
+#define LIST_EMPTY(head) ((head)->lh_first == NULL)
+#endif
+#ifndef LIST_FOREACH
+#define LIST_FOREACH(var, head, field) \
+ for((var) = (head)->lh_first; (var); (var) = (var)->field.le_next)
+#endif
+
+#ifdef KERNEL
+#ifndef _KERNEL
+#define _KERNEL
+#endif
+#endif
+
+#ifdef _KERNEL
+#include <altq/altq_var.h>
+#endif
+
+#endif /* _ALTQ_ALTQ_H_ */
diff --git a/sys/altq/altq_afmap.c b/sys/altq/altq_afmap.c
new file mode 100644
index 00000000000..0e3a5e1746c
--- /dev/null
+++ b/sys/altq/altq_afmap.c
@@ -0,0 +1,412 @@
+/* $OpenBSD: altq_afmap.c,v 1.1 2001/06/27 05:28:34 kjc Exp $ */
+/* $KAME: altq_afmap.c,v 1.7 2000/12/14 08:12:45 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * experimental:
+ * mapping an ip flow to atm vpi/vci.
+ * this module is not related to queueing at all, but uses the altq
+ * flowinfo mechanism. it's just put in the altq framework since
+ * it is easy to add devices to altq.
+ */
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_AFMAP
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/uio.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet/in.h>
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_afmap.h>
+
+LIST_HEAD(, afm_head) afhead_chain;
+
+static struct afm *afm_match4 __P((struct afm_head *, struct flowinfo_in *));
+#ifdef INET6
+static struct afm *afm_match6 __P((struct afm_head *, struct flowinfo_in6 *));
+#endif
+
+/*
+ * rules to block interrupts: afm_match can be called from a net
+ * level interrupt so that other routines handling the lists should
+ * be called in splnet().
+ */
+int
+afm_alloc(ifp)
+ struct ifnet *ifp;
+{
+ struct afm_head *head;
+
+ MALLOC(head, struct afm_head *, sizeof(struct afm_head),
+ M_DEVBUF, M_WAITOK);
+ if (head == NULL)
+ panic("afm_alloc: malloc failed!");
+ bzero(head, sizeof(struct afm_head));
+
+ /* initialize per interface afmap list */
+ LIST_INIT(&head->afh_head);
+
+ head->afh_ifp = ifp;
+
+ /* add this afm_head to the chain */
+ LIST_INSERT_HEAD(&afhead_chain, head, afh_chain);
+
+ return (0);
+}
+
+int
+afm_dealloc(ifp)
+ struct ifnet *ifp;
+{
+ struct afm_head *head;
+
+ for (head = afhead_chain.lh_first; head != NULL;
+ head = head->afh_chain.le_next)
+ if (head->afh_ifp == ifp)
+ break;
+ if (head == NULL)
+ return (-1);
+
+ afm_removeall(ifp);
+
+ LIST_REMOVE(head, afh_chain);
+
+ FREE(head, M_DEVBUF);
+ return 0;
+}
+
+struct afm *
+afm_top(ifp)
+ struct ifnet *ifp;
+{
+ struct afm_head *head;
+
+ for (head = afhead_chain.lh_first; head != NULL;
+ head = head->afh_chain.le_next)
+ if (head->afh_ifp == ifp)
+ break;
+ if (head == NULL)
+ return NULL;
+
+ return (head->afh_head.lh_first);
+}
+
+int afm_add(ifp, flowmap)
+ struct ifnet *ifp;
+ struct atm_flowmap *flowmap;
+{
+ struct afm_head *head;
+ struct afm *afm;
+
+ for (head = afhead_chain.lh_first; head != NULL;
+ head = head->afh_chain.le_next)
+ if (head->afh_ifp == ifp)
+ break;
+ if (head == NULL)
+ return (-1);
+
+ if (flowmap->af_flowinfo.fi_family == AF_INET) {
+ if (flowmap->af_flowinfo.fi_len != sizeof(struct flowinfo_in))
+ return (EINVAL);
+#ifdef INET6
+ } else if (flowmap->af_flowinfo.fi_family == AF_INET6) {
+ if (flowmap->af_flowinfo.fi_len != sizeof(struct flowinfo_in6))
+ return (EINVAL);
+#endif
+ } else
+ return (EINVAL);
+
+ MALLOC(afm, struct afm *, sizeof(struct afm),
+ M_DEVBUF, M_WAITOK);
+ if (afm == NULL)
+ return (ENOMEM);
+ bzero(afm, sizeof(struct afm));
+
+ afm->afm_vci = flowmap->af_vci;
+ afm->afm_vpi = flowmap->af_vpi;
+ bcopy(&flowmap->af_flowinfo, &afm->afm_flowinfo,
+ flowmap->af_flowinfo.fi_len);
+
+ LIST_INSERT_HEAD(&head->afh_head, afm, afm_list);
+ return 0;
+}
+
+int
+afm_remove(afm)
+ struct afm *afm;
+{
+ LIST_REMOVE(afm, afm_list);
+ FREE(afm, M_DEVBUF);
+ return (0);
+}
+
+int
+afm_removeall(ifp)
+ struct ifnet *ifp;
+{
+ struct afm_head *head;
+ struct afm *afm;
+
+ for (head = afhead_chain.lh_first; head != NULL;
+ head = head->afh_chain.le_next)
+ if (head->afh_ifp == ifp)
+ break;
+ if (head == NULL)
+ return (-1);
+
+ while ((afm = head->afh_head.lh_first) != NULL)
+ afm_remove(afm);
+ return (0);
+}
+
+struct afm *
+afm_lookup(ifp, vpi, vci)
+ struct ifnet *ifp;
+ int vpi, vci;
+{
+ struct afm_head *head;
+ struct afm *afm;
+
+ for (head = afhead_chain.lh_first; head != NULL;
+ head = head->afh_chain.le_next)
+ if (head->afh_ifp == ifp)
+ break;
+ if (head == NULL)
+ return NULL;
+
+ for (afm = head->afh_head.lh_first; afm != NULL;
+ afm = afm->afm_list.le_next)
+ if (afm->afm_vpi == vpi && afm->afm_vci == vci)
+ break;
+ return afm;
+}
+
+static struct afm *
+afm_match4(head, fp)
+ struct afm_head *head;
+ struct flowinfo_in *fp;
+{
+ struct afm *afm;
+
+ for (afm = head->afh_head.lh_first; afm != NULL;
+ afm = afm->afm_list.le_next) {
+ if (afm->afm_flowinfo4.fi_dst.s_addr != 0 &&
+ afm->afm_flowinfo4.fi_dst.s_addr != fp->fi_dst.s_addr)
+ continue;
+ if (afm->afm_flowinfo4.fi_dport != 0 &&
+ afm->afm_flowinfo4.fi_dport != fp->fi_dport)
+ continue;
+ if (afm->afm_flowinfo4.fi_src.s_addr != 0 &&
+ afm->afm_flowinfo4.fi_src.s_addr != fp->fi_src.s_addr)
+ continue;
+ if (afm->afm_flowinfo4.fi_sport != 0 &&
+ afm->afm_flowinfo4.fi_sport != fp->fi_sport)
+ continue;
+ if (afm->afm_flowinfo4.fi_proto != 0 &&
+ afm->afm_flowinfo4.fi_proto != fp->fi_proto)
+ continue;
+ /* match found! */
+ return (afm);
+ }
+ return NULL;
+}
+
+#ifdef INET6
+static struct afm *
+afm_match6(head, fp)
+ struct afm_head *head;
+ struct flowinfo_in6 *fp;
+{
+ struct afm *afm;
+
+ for (afm = head->afh_head.lh_first; afm != NULL;
+ afm = afm->afm_list.le_next) {
+ if (afm->afm_flowinfo6.fi6_flowlabel != 0 &&
+ afm->afm_flowinfo6.fi6_flowlabel != fp->fi6_flowlabel)
+ continue;
+#ifdef notyet
+ if (!IN6_IS_ADDR_UNSPECIFIED(&afm->afm_flowinfo6.fi6_dst) &&
+ !IN6_ARE_ADDR_EQUAL(&afm->afm_flowinfo6.fi6_dst,
+ &fp->fi6_dst))
+ continue;
+ if (afm->afm_flowinfo6.fi6_dport != 0 &&
+ afm->afm_flowinfo6.fi6_dport != fp->fi6_dport)
+ continue;
+#endif
+ if (!IN6_IS_ADDR_UNSPECIFIED(&afm->afm_flowinfo6.fi6_src) &&
+ !IN6_ARE_ADDR_EQUAL(&afm->afm_flowinfo6.fi6_src,
+ &fp->fi6_src))
+ continue;
+#ifdef notyet
+ if (afm->afm_flowinfo6.fi6_sport != 0 &&
+ afm->afm_flowinfo6.fi6_sport != fp->fi6_sport)
+ continue;
+#endif
+ if (afm->afm_flowinfo6.fi6_proto != 0 &&
+ afm->afm_flowinfo6.fi6_proto != fp->fi6_proto)
+ continue;
+ /* match found! */
+ return (afm);
+ }
+ return NULL;
+}
+#endif
+
+/* should be called in splimp() */
+struct afm *
+afm_match(ifp, flow)
+ struct ifnet *ifp;
+ struct flowinfo *flow;
+{
+ struct afm_head *head;
+
+ for (head = afhead_chain.lh_first; head != NULL;
+ head = head->afh_chain.le_next)
+ if (head->afh_ifp == ifp)
+ break;
+ if (head == NULL)
+ return NULL;
+
+ switch (flow->fi_family) {
+ case AF_INET:
+ return (afm_match4(head, (struct flowinfo_in *)flow));
+
+#ifdef INET6
+ case AF_INET6:
+ return (afm_match6(head, (struct flowinfo_in6 *)flow));
+#endif
+
+ default:
+ return NULL;
+ }
+}
+
+/*
+ * afm device interface
+ */
+altqdev_decl(afm);
+
+int
+afmopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ return 0;
+}
+
+int
+afmclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ int err, error = 0;
+ struct atm_flowmap fmap;
+ struct afm_head *head;
+
+ for (head = afhead_chain.lh_first; head != NULL;
+ head = head->afh_chain.le_next) {
+
+ /* call interface to clean up maps */
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+ sprintf(fmap.af_ifname, "%s", head->afh_ifp->if_xname);
+#else
+ sprintf(fmap.af_ifname, "%s%d",
+ head->afh_ifp->if_name, head->afh_ifp->if_unit);
+#endif
+ err = afmioctl(dev, AFM_CLEANFMAP, (caddr_t)&fmap, flag, p);
+ if (err && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+afmioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ int error = 0;
+ struct atm_flowmap *flowmap;
+ struct ifnet *ifp;
+
+ /* check cmd for superuser only */
+ switch (cmd) {
+ case AFM_GETFMAP:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ error = suser(p);
+#else
+ error = suser(p->p_ucred, &p->p_acflag);
+#endif
+ if (error)
+ return (error);
+ break;
+ }
+
+ /* lookup interface */
+ flowmap = (struct atm_flowmap *)addr;
+ flowmap->af_ifname[IFNAMSIZ-1] = '\0';
+ ifp = ifunit(flowmap->af_ifname);
+ if (ifp == NULL || ifp->if_ioctl == NULL ||
+ (ifp->if_flags & IFF_RUNNING) == 0)
+ error = ENXIO;
+ else
+ error = ifp->if_ioctl(ifp, cmd, addr);
+
+ return error;
+}
+
+#endif /* ALTQ_AFMAP */
diff --git a/sys/altq/altq_afmap.h b/sys/altq/altq_afmap.h
new file mode 100644
index 00000000000..dd48bed2bb2
--- /dev/null
+++ b/sys/altq/altq_afmap.h
@@ -0,0 +1,104 @@
+/* $OpenBSD: altq_afmap.h,v 1.1 2001/06/27 05:28:34 kjc Exp $ */
+/* $KAME: altq_afmap.h,v 1.5 2000/12/14 08:12:45 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_AFMAP_H_
+#define _ALTQ_ALTQ_AFMAP_H_
+
+#include <sys/queue.h>
+#include <altq/altq.h>
+
+struct atm_flowmap {
+ char af_ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
+ u_int8_t af_vpi;
+ u_int16_t af_vci;
+ u_int32_t af_pcr; /* peek cell rate */
+ union {
+ struct flowinfo afu_fi;
+ struct flowinfo_in afu_fi4;
+#ifdef SIN6_LEN
+ struct flowinfo_in6 afu_fi6;
+#endif
+ } af_fiu;
+#define af_flowinfo af_fiu.afu_fi
+#define af_flowinfo4 af_fiu.afu_fi4
+#define af_flowinfo6 af_fiu.afu_fi6
+
+ /* statistics */
+ u_int32_t afs_packets; /* total packet count */
+ u_int32_t afs_bytes; /* total byte count */
+};
+
+/* set or get flowmap */
+#define AFM_ADDFMAP _IOWR('F', 30, struct atm_flowmap)
+#define AFM_DELFMAP _IOWR('F', 31, struct atm_flowmap)
+#define AFM_CLEANFMAP _IOWR('F', 32, struct atm_flowmap)
+#define AFM_GETFMAP _IOWR('F', 33, struct atm_flowmap)
+
+#ifdef _KERNEL
+
+/* per flow information */
+struct afm {
+ LIST_ENTRY(afm) afm_list;
+ u_int16_t afm_vci;
+ u_int8_t afm_vpi;
+ union {
+ struct flowinfo afmu_fi;
+ struct flowinfo_in afmu_fi4;
+#ifdef SIN6_LEN
+ struct flowinfo_in6 afmu_fi6;
+#endif
+ } afm_fiu;
+#define afm_flowinfo afm_fiu.afmu_fi
+#define afm_flowinfo4 afm_fiu.afmu_fi4
+#define afm_flowinfo6 afm_fiu.afmu_fi6
+
+ /* statistics */
+ u_int32_t afms_packets; /* total packet count */
+ u_int32_t afms_bytes; /* total byte count */
+};
+
+/* per interface */
+struct afm_head {
+ LIST_ENTRY(afm_head) afh_chain;
+ LIST_HEAD(, afm) afh_head;
+ struct ifnet *afh_ifp;
+};
+
+struct afm *afm_top __P((struct ifnet *));
+int afm_alloc __P((struct ifnet *));
+int afm_dealloc __P((struct ifnet *));
+int afm_add __P((struct ifnet *, struct atm_flowmap *));
+int afm_remove __P((struct afm *));
+int afm_removeall __P((struct ifnet *));
+struct afm *afm_lookup __P((struct ifnet *, int, int));
+struct afm *afm_match __P((struct ifnet *, struct flowinfo *));
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_AFMAP_H_ */
diff --git a/sys/altq/altq_blue.c b/sys/altq/altq_blue.c
new file mode 100644
index 00000000000..291fe0883b4
--- /dev/null
+++ b/sys/altq/altq_blue.c
@@ -0,0 +1,691 @@
+/* $OpenBSD: altq_blue.c,v 1.1 2001/06/27 05:28:34 kjc Exp $ */
+/* $KAME: altq_blue.c,v 1.7 2000/12/14 08:12:45 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_BLUE /* blue is enabled by ALTQ_BLUE option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_blue.h>
+
+/*
+ * Blue is proposed and implemented by Wu-chang Feng <wuchang@eecs.umich.edu>.
+ * more information on Blue is available from
+ * http://www.eecs.umich.edu/~wuchang/blue/
+ */
+
+/* fixed-point uses 12-bit decimal places */
+#define FP_SHIFT 12 /* fixed-point shift */
+
+#define BLUE_LIMIT 200 /* default max queue lenght */
+#define BLUE_STATS /* collect statistics */
+
+/* blue_list keeps all blue_state_t's allocated. */
+static blue_queue_t *blue_list = NULL;
+
+/* internal function prototypes */
+static int blue_enqueue __P((struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *));
+static struct mbuf *blue_dequeue __P((struct ifaltq *, int));
+static int drop_early __P((blue_t *));
+static int mark_ecn __P((struct mbuf *, struct altq_pktattr *, int));
+static int blue_detach __P((blue_queue_t *));
+static int blue_request __P((struct ifaltq *, int, void *));
+
+/*
+ * blue device interface
+ */
+altqdev_decl(blue);
+
+int
+blueopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+blueclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ blue_queue_t *rqp;
+ int err, error = 0;
+
+ while ((rqp = blue_list) != NULL) {
+ /* destroy all */
+ err = blue_detach(rqp);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+blueioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ blue_queue_t *rqp;
+ struct blue_interface *ifacep;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case BLUE_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case BLUE_ENABLE:
+ ifacep = (struct blue_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_enable(rqp->rq_ifq);
+ break;
+
+ case BLUE_DISABLE:
+ ifacep = (struct blue_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_disable(rqp->rq_ifq);
+ break;
+
+ case BLUE_IF_ATTACH:
+ ifp = ifunit(((struct blue_interface *)addr)->blue_ifname);
+ if (ifp == NULL) {
+ error = ENXIO;
+ break;
+ }
+
+ /* allocate and initialize blue_state_t */
+ MALLOC(rqp, blue_queue_t *, sizeof(blue_queue_t), M_DEVBUF, M_WAITOK);
+ bzero(rqp, sizeof(blue_queue_t));
+
+ MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ bzero(rqp->rq_q, sizeof(class_queue_t));
+
+ MALLOC(rqp->rq_blue, blue_t *, sizeof(blue_t), M_DEVBUF, M_WAITOK);
+ bzero(rqp->rq_blue, sizeof(blue_t));
+
+ rqp->rq_ifq = &ifp->if_snd;
+ qtail(rqp->rq_q) = NULL;
+ qlen(rqp->rq_q) = 0;
+ qlimit(rqp->rq_q) = BLUE_LIMIT;
+
+ /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+ blue_init(rqp->rq_blue, 0, 800, 1000, 50000);
+
+ /*
+ * set BLUE to this ifnet structure.
+ */
+ error = altq_attach(rqp->rq_ifq, ALTQT_BLUE, rqp,
+ blue_enqueue, blue_dequeue, blue_request,
+ NULL, NULL);
+ if (error) {
+ FREE(rqp->rq_blue, M_DEVBUF);
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ break;
+ }
+
+ /* add this state to the blue list */
+ rqp->rq_next = blue_list;
+ blue_list = rqp;
+ break;
+
+ case BLUE_IF_DETACH:
+ ifacep = (struct blue_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = blue_detach(rqp);
+ break;
+
+ case BLUE_GETSTATS:
+ do {
+ struct blue_stats *q_stats;
+ blue_t *rp;
+
+ q_stats = (struct blue_stats *)addr;
+ if ((rqp = altq_lookup(q_stats->iface.blue_ifname,
+ ALTQT_BLUE)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ q_stats->q_len = qlen(rqp->rq_q);
+ q_stats->q_limit = qlimit(rqp->rq_q);
+
+ rp = rqp->rq_blue;
+ q_stats->q_pmark = rp->blue_pmark;
+ q_stats->xmit_packets = rp->blue_stats.xmit_packets;
+ q_stats->xmit_bytes = rp->blue_stats.xmit_bytes;
+ q_stats->drop_packets = rp->blue_stats.drop_packets;
+ q_stats->drop_bytes = rp->blue_stats.drop_bytes;
+ q_stats->drop_forced = rp->blue_stats.drop_forced;
+ q_stats->drop_unforced = rp->blue_stats.drop_unforced;
+ q_stats->marked_packets = rp->blue_stats.marked_packets;
+
+ } while (0);
+ break;
+
+ case BLUE_CONFIG:
+ do {
+ struct blue_conf *fc;
+ int limit;
+
+ fc = (struct blue_conf *)addr;
+ if ((rqp = altq_lookup(fc->iface.blue_ifname,
+ ALTQT_BLUE)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ limit = fc->blue_limit;
+ qlimit(rqp->rq_q) = limit;
+ fc->blue_limit = limit; /* write back the new value */
+ if (fc->blue_pkttime > 0)
+ rqp->rq_blue->blue_pkttime = fc->blue_pkttime;
+ if (fc->blue_max_pmark > 0)
+ rqp->rq_blue->blue_max_pmark = fc->blue_max_pmark;
+ if (fc->blue_hold_time > 0)
+ rqp->rq_blue->blue_hold_time = fc->blue_hold_time;
+ rqp->rq_blue->blue_flags = fc->blue_flags;
+
+ blue_init(rqp->rq_blue, rqp->rq_blue->blue_flags,
+ rqp->rq_blue->blue_pkttime,
+ rqp->rq_blue->blue_max_pmark,
+ rqp->rq_blue->blue_hold_time);
+ } while (0);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int blue_detach(rqp)
+ blue_queue_t *rqp;
+{
+ blue_queue_t *tmp;
+ int error = 0;
+
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ altq_disable(rqp->rq_ifq);
+
+ if ((error = altq_detach(rqp->rq_ifq)))
+ return (error);
+
+ if (blue_list == rqp)
+ blue_list = rqp->rq_next;
+ else {
+ for (tmp = blue_list; tmp != NULL; tmp = tmp->rq_next)
+ if (tmp->rq_next == rqp) {
+ tmp->rq_next = rqp->rq_next;
+ break;
+ }
+ if (tmp == NULL)
+ printf("blue_detach: no state found in blue_list!\n");
+ }
+
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp->rq_blue, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * blue support routines
+ */
+
+int
+blue_init(rp, flags, pkttime, blue_max_pmark, blue_hold_time)
+ blue_t *rp;
+ int flags;
+ int pkttime;
+ int blue_max_pmark;
+ int blue_hold_time;
+{
+ int npkts_per_sec;
+
+ rp->blue_idle = 1;
+ rp->blue_flags = flags;
+ rp->blue_pkttime = pkttime;
+ rp->blue_max_pmark = blue_max_pmark;
+ rp->blue_hold_time = blue_hold_time;
+ if (pkttime == 0)
+ rp->blue_pkttime = 1;
+
+ /* when the link is very slow, adjust blue parameters */
+ npkts_per_sec = 1000000 / rp->blue_pkttime;
+ if (npkts_per_sec < 50) {
+ }
+ else if (npkts_per_sec < 300) {
+ }
+
+ microtime(&rp->blue_last);
+ return (0);
+}
+
+/*
+ * enqueue routine:
+ *
+ * returns: 0 when successfully queued.
+ * ENOBUFS when drop occurs.
+ */
+static int
+blue_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
+ int error = 0;
+
+ if (blue_addq(rqp->rq_blue, rqp->rq_q, m, pktattr) == 0)
+ ifq->ifq_len++;
+ else
+ error = ENOBUFS;
+ return error;
+}
+
+#define DTYPE_NODROP 0 /* no drop */
+#define DTYPE_FORCED 1 /* a "forced" drop */
+#define DTYPE_EARLY 2 /* an "unforced" (early) drop */
+
+int
+blue_addq(rp, q, m, pktattr)
+ blue_t *rp;
+ class_queue_t *q;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ int droptype;
+
+ /*
+ * if we were idle, this is an enqueue onto an empty queue
+ * and we should decrement marking probability
+ *
+ */
+ if (rp->blue_idle) {
+ struct timeval now;
+ int t;
+ rp->blue_idle = 0;
+ microtime(&now);
+ t = (now.tv_sec - rp->blue_last.tv_sec);
+ if ( t > 1) {
+ rp->blue_pmark = 1;
+ microtime(&rp->blue_last);
+ } else {
+ t = t * 1000000 + (now.tv_usec - rp->blue_last.tv_usec);
+ if (t > rp->blue_hold_time) {
+ rp->blue_pmark--;
+ if (rp->blue_pmark < 0) rp->blue_pmark = 0;
+ microtime(&rp->blue_last);
+ }
+ }
+ }
+
+ /* see if we drop early */
+ droptype = DTYPE_NODROP;
+ if (drop_early(rp) && qlen(q) > 1) {
+ /* mark or drop by blue */
+ if ((rp->blue_flags & BLUEF_ECN) &&
+ mark_ecn(m, pktattr, rp->blue_flags)) {
+ /* successfully marked. do not drop. */
+#ifdef BLUE_STATS
+ rp->blue_stats.marked_packets++;
+#endif
+ } else {
+ /* unforced drop by blue */
+ droptype = DTYPE_EARLY;
+ }
+ }
+
+ /*
+ * if the queue length hits the hard limit, it's a forced drop.
+ */
+ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+ droptype = DTYPE_FORCED;
+
+ /* if successful or forced drop, enqueue this packet. */
+ if (droptype != DTYPE_EARLY)
+ _addq(q, m);
+
+ if (droptype != DTYPE_NODROP) {
+ if (droptype == DTYPE_EARLY) {
+ /* drop the incoming packet */
+#ifdef BLUE_STATS
+ rp->blue_stats.drop_unforced++;
+#endif
+ } else {
+ struct timeval now;
+ int t;
+ /* forced drop, select a victim packet in the queue. */
+ m = _getq_random(q);
+ microtime(&now);
+ t = (now.tv_sec - rp->blue_last.tv_sec);
+ t = t * 1000000 + (now.tv_usec - rp->blue_last.tv_usec);
+ if (t > rp->blue_hold_time) {
+ rp->blue_pmark += rp->blue_max_pmark >> 3;
+ if (rp->blue_pmark > rp->blue_max_pmark)
+ rp->blue_pmark = rp->blue_max_pmark;
+ microtime(&rp->blue_last);
+ }
+#ifdef BLUE_STATS
+ rp->blue_stats.drop_forced++;
+#endif
+ }
+#ifdef BLUE_STATS
+ rp->blue_stats.drop_packets++;
+ rp->blue_stats.drop_bytes += m->m_pkthdr.len;
+#endif
+ m_freem(m);
+ return (-1);
+ }
+ /* successfully queued */
+ return (0);
+}
+
+/*
+ * early-drop probability is kept in blue_pmark
+ *
+ */
+static int
+drop_early(rp)
+ blue_t *rp;
+{
+ if ((random() % rp->blue_max_pmark) < rp->blue_pmark) {
+ /* drop or mark */
+ return (1);
+ }
+ /* no drop/mark */
+ return (0);
+}
+
+/*
+ * try to mark CE bit to the packet.
+ * returns 1 if successfully marked, 0 otherwise.
+ */
+static int
+mark_ecn(m, pktattr, flags)
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+ int flags;
+{
+ struct mbuf *m0;
+
+ if (pktattr == NULL ||
+ (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+ return (0);
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if ((pktattr->pattr_hdr >= m0->m_data) &&
+ (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, pattr_hdr is stale */
+ pktattr->pattr_af = AF_UNSPEC;
+ return (0);
+ }
+
+ switch (pktattr->pattr_af) {
+ case AF_INET:
+ if (flags & BLUEF_ECN4) {
+ struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+
+ if (ip->ip_v != 4)
+ return (0); /* version mismatch! */
+ if (ip->ip_tos & IPTOS_ECT) {
+ /* ECN-capable, mark ECN bit. */
+ if ((ip->ip_tos & IPTOS_CE) == 0) {
+#if (IPTOS_CE == 0x01)
+ u_short sum;
+
+ ip->ip_tos |= IPTOS_CE;
+ /*
+ * optimized version when IPTOS_CE
+ * is 0x01.
+ * HC' = HC -1 when HC > 0
+ * = 0xfffe when HC = 0
+ */
+ sum = ntohs(ip->ip_sum);
+ if (sum == 0)
+ sum = 0xfffe;
+ else
+ sum -= 1;
+ ip->ip_sum = htons(sum);
+#else /* IPTOS_CE != 0x01 */
+ long sum;
+
+ ip->ip_tos |= IPTOS_CE;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += 0xffff + IPTOS_CE;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+
+ ip->ip_sum = htons(~sum & 0xffff);
+#endif /* IPTOS_CE != 0x01 */
+ }
+ return (1);
+ }
+ }
+ break;
+#ifdef INET6
+ case AF_INET6:
+ if (flags & BLUEF_ECN6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return (0); /* version mismatch! */
+ if (flowlabel & (IPTOS_ECT << 20)) {
+ /* ECN-capable, mark ECN bit. */
+ flowlabel |= (IPTOS_CE << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ return (1);
+ }
+ }
+ break;
+#endif /* INET6 */
+ }
+
+ /* not marked */
+ return (0);
+}
+
+/*
+ * dequeue routine:
+ * must be called in splimp.
+ *
+ * returns: mbuf dequeued.
+ * NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+blue_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
+ struct mbuf *m = NULL;
+
+ if (op == ALTDQ_POLL)
+ return (qhead(rqp->rq_q));
+
+ m = blue_getq(rqp->rq_blue, rqp->rq_q);
+ if (m != NULL)
+ ifq->ifq_len--;
+ return m;
+}
+
+struct mbuf *blue_getq(rp, q)
+ blue_t *rp;
+ class_queue_t *q;
+{
+ struct mbuf *m;
+
+ if ((m = _getq(q)) == NULL) {
+ if (rp->blue_idle == 0) {
+ rp->blue_idle = 1;
+ microtime(&rp->blue_last);
+ }
+ return NULL;
+ }
+
+ rp->blue_idle = 0;
+#ifdef BLUE_STATS
+ rp->blue_stats.xmit_packets++;
+ rp->blue_stats.xmit_bytes += m->m_pkthdr.len;
+#endif
+ return (m);
+}
+
+static int
+blue_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ _flushq(rqp->rq_q);
+ if (ALTQ_IS_ENABLED(ifq))
+ ifq->ifq_len = 0;
+ break;
+ }
+ return (0);
+}
+
+
+#ifdef KLD_MODULE
+
+static struct altqsw blue_sw =
+ {"blue", blueopen, blueclose, blueioctl};
+
+ALTQ_MODULE(altq_blue, ALTQT_BLUE, &blue_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ_BLUE */
diff --git a/sys/altq/altq_blue.h b/sys/altq/altq_blue.h
new file mode 100644
index 00000000000..1a6ebeb4c82
--- /dev/null
+++ b/sys/altq/altq_blue.h
@@ -0,0 +1,119 @@
+/* $OpenBSD: altq_blue.h,v 1.1 2001/06/27 05:28:34 kjc Exp $ */
+/* $KAME: altq_blue.h,v 1.5 2000/12/14 08:12:45 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_BLUE_H_
+#define _ALTQ_ALTQ_BLUE_H_
+
+#include <altq/altq_classq.h>
+
+struct blue_interface {
+ char blue_ifname[IFNAMSIZ];
+};
+
+struct blue_stats {
+ struct blue_interface iface;
+ int q_len;
+ int q_limit;
+ int q_pmark;
+ u_quad_t xmit_packets;
+ u_quad_t xmit_bytes;
+ u_quad_t drop_packets;
+ u_quad_t drop_bytes;
+ u_quad_t drop_forced;
+ u_quad_t drop_unforced;
+ u_quad_t marked_packets;
+};
+
+struct blue_conf {
+ struct blue_interface iface;
+ int blue_limit;
+ int blue_max_pmark;
+ int blue_hold_time;
+ int blue_pkttime; /* average packet time in usec */
+ int blue_flags; /* see below */
+};
+
+/* blue flags */
+#define BLUEF_ECN4 0x01 /* use packet marking for IPv4 packets */
+#define BLUEF_ECN6 0x02 /* use packet marking for IPv6 packets */
+#define BLUEF_ECN (BLUEF_ECN4 | BLUEF_ECN6)
+
+/*
+ * IOCTLs for BLUE
+ */
+#define BLUE_IF_ATTACH _IOW('Q', 1, struct blue_interface)
+#define BLUE_IF_DETACH _IOW('Q', 2, struct blue_interface)
+#define BLUE_ENABLE _IOW('Q', 3, struct blue_interface)
+#define BLUE_DISABLE _IOW('Q', 4, struct blue_interface)
+#define BLUE_CONFIG _IOWR('Q', 6, struct blue_conf)
+#define BLUE_GETSTATS _IOWR('Q', 12, struct blue_stats)
+
+#ifdef _KERNEL
+
+typedef struct blue {
+ int blue_pkttime; /* average packet time in micro sec
+ used for idle calibration */
+ int blue_flags; /* blue flags */
+
+ /* blue parameters */
+ int blue_pmark; /* 0-1000 (mark probability*10000) */
+ int blue_max_pmark; /* sets precision of marking probability */
+ int blue_hold_time; /* hold time in usec */
+
+ int blue_idle; /* queue was empty */
+ struct timeval blue_last; /* timestamp when the queue becomes idle */
+
+ struct {
+ u_quad_t xmit_packets;
+ u_quad_t xmit_bytes;
+ u_quad_t drop_packets;
+ u_quad_t drop_bytes;
+ u_quad_t drop_forced;
+ u_quad_t drop_unforced;
+ u_quad_t marked_packets;
+ } blue_stats;
+} blue_t;
+
+typedef struct blue_queue {
+ struct blue_queue *rq_next; /* next blue_state in the list */
+ struct ifaltq *rq_ifq; /* backpointer to ifaltq */
+
+ class_queue_t *rq_q;
+
+ blue_t *rq_blue;
+} blue_queue_t;
+
+extern int blue_init __P((blue_t *, int, int, int, int));
+extern int blue_addq __P((blue_t *, class_queue_t *, struct mbuf *,
+ struct altq_pktattr *));
+extern struct mbuf *blue_getq __P((blue_t *, class_queue_t *));
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_BLUE_H_ */
diff --git a/sys/altq/altq_cbq.c b/sys/altq/altq_cbq.c
new file mode 100644
index 00000000000..561ecd55be4
--- /dev/null
+++ b/sys/altq/altq_cbq.c
@@ -0,0 +1,973 @@
+/* $OpenBSD: altq_cbq.c,v 1.1 2001/06/27 05:28:34 kjc Exp $ */
+/* $KAME: altq_cbq.c,v 1.9 2000/12/14 08:12:45 thorpej Exp $ */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+/* #pragma ident "@(#)cbq.c 1.39 98/05/13 SMI" */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/uio.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet/in.h>
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_cbq.h>
+
+/*
+ * Local Data structures.
+ */
+static cbq_state_t *cbq_list = NULL;
+
+/*
+ * Forward Declarations.
+ */
+
+static int cbq_add_class __P((struct cbq_add_class *));
+static int cbq_delete_class __P((struct cbq_delete_class *));
+static int cbq_modify_class __P((struct cbq_modify_class *));
+static int cbq_class_create __P((cbq_state_t *, struct cbq_add_class *,
+ struct rm_class *, struct rm_class *));
+static int cbq_class_destroy __P((cbq_state_t *, struct rm_class *));
+static struct rm_class *clh_to_clp __P((cbq_state_t *, u_long));
+static int cbq_add_filter __P((struct cbq_add_filter *));
+static int cbq_delete_filter __P((struct cbq_delete_filter *));
+
+static int cbq_clear_hierarchy __P((struct cbq_interface *));
+static int cbq_clear_interface __P((cbq_state_t *));
+static int cbq_request __P((struct ifaltq *, int, void *));
+static int cbq_set_enable __P((struct cbq_interface *, int));
+static int cbq_ifattach __P((struct cbq_interface *));
+static int cbq_ifdetach __P((struct cbq_interface *));
+static int cbq_enqueue __P((struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *));
+static struct mbuf *cbq_dequeue __P((struct ifaltq *, int));
+static void cbqrestart __P((struct ifaltq *));
+static void get_class_stats __P((class_stats_t *, struct rm_class *));
+static int cbq_getstats __P((struct cbq_getstats *));
+static void cbq_purge(cbq_state_t *);
+
+static int
+cbq_add_class(acp)
+ struct cbq_add_class *acp;
+{
+ char *ifacename;
+ struct rm_class *borrow, *parent;
+ cbq_state_t *cbqp;
+
+ ifacename = acp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* check parameters */
+ if (acp->cbq_class.priority >= RM_MAXPRIO ||
+ acp->cbq_class.maxq > CBQ_MAXQSIZE)
+ return (EINVAL);
+
+ /* Get pointers to parent and borrow classes. */
+ parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle);
+ borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle);
+
+ /*
+ * A class must borrow from it's parent or it can not
+ * borrow at all. Hence, borrow can be null.
+ */
+ if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) {
+ printf("cbq_add_class: no parent class!\n");
+ return (EINVAL);
+ }
+
+ if ((borrow != parent) && (borrow != NULL)) {
+ printf("cbq_add_class: borrow class != parent\n");
+ return (EINVAL);
+ }
+
+ return cbq_class_create(cbqp, acp, parent, borrow);
+}
+
+static int
+cbq_delete_class(dcp)
+ struct cbq_delete_class *dcp;
+{
+ char *ifacename;
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+
+ ifacename = dcp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ /* if we are a parent class, then return an error. */
+ if (is_a_parent_class(cl))
+ return (EINVAL);
+
+ /* if a filter has a reference to this class delete the filter */
+ acc_discard_filters(&cbqp->cbq_classifier, cl, 0);
+
+ return cbq_class_destroy(cbqp, cl);
+}
+
+static int
+cbq_modify_class(acp)
+ struct cbq_modify_class *acp;
+{
+ char *ifacename;
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+
+ ifacename = acp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* Get pointer to this class */
+ if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte,
+ acp->cbq_class.maxq, acp->cbq_class.maxidle,
+ acp->cbq_class.minidle, acp->cbq_class.offtime,
+ acp->cbq_class.pktsize) < 0)
+ return (EINVAL);
+ return (0);
+}
+
+/*
+ * struct rm_class *
+ * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp,
+ * u_long handle, struct rm_class *parent,
+ * struct rm_class *borrow)
+ *
+ * This function create a new traffic class in the CBQ class hierarchy of
+ * given paramters. The class that created is either the root, default,
+ * or a new dynamic class. If CBQ is not initilaized, the the root class
+ * will be created.
+ */
+static int
+cbq_class_create(cbqp, acp, parent, borrow)
+ cbq_state_t *cbqp;
+ struct cbq_add_class *acp;
+ struct rm_class *parent, *borrow;
+{
+ struct rm_class *cl;
+ cbq_class_spec_t *spec = &acp->cbq_class;
+ u_long chandle;
+ int i;
+
+ /*
+ * allocate class handle
+ */
+ switch (spec->flags & CBQCLF_CLASSMASK) {
+ case CBQCLF_ROOTCLASS:
+ if (parent != NULL)
+ return (EINVAL);
+ if (cbqp->ifnp.root_)
+ return (EINVAL);
+ chandle = ROOT_CLASS_HANDLE;
+ break;
+ case CBQCLF_DEFCLASS:
+ if (cbqp->ifnp.default_)
+ return (EINVAL);
+ chandle = DEFAULT_CLASS_HANDLE;
+ break;
+ case CBQCLF_CTLCLASS:
+ if (cbqp->ifnp.ctl_)
+ return (EINVAL);
+ chandle = CTL_CLASS_HANDLE;
+ break;
+ case 0:
+ /* find a free class slot */
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == NULL)
+ break;
+ if (i == CBQ_MAX_CLASSES)
+ return (ENOSPC);
+ chandle = (u_long)i;
+ break;
+ default:
+ /* more than two flags bits set */
+ return (EINVAL);
+ }
+
+ /*
+ * create a class. if this is a root class, initialize the
+ * interface.
+ */
+ if (chandle == ROOT_CLASS_HANDLE) {
+ rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte,
+ cbqrestart, spec->maxq, RM_MAXQUEUED,
+ spec->maxidle, spec->minidle, spec->offtime,
+ spec->flags);
+ cl = cbqp->ifnp.root_;
+ } else {
+ cl = rmc_newclass(spec->priority,
+ &cbqp->ifnp, spec->nano_sec_per_byte,
+ rmc_delay_action, spec->maxq, parent, borrow,
+ spec->maxidle, spec->minidle, spec->offtime,
+ spec->pktsize, spec->flags);
+ }
+ if (cl == NULL)
+ return (ENOMEM);
+
+ /* return handle to user space. */
+ acp->cbq_class_handle = chandle;
+
+ cl->stats_.handle = chandle;
+ cl->stats_.depth = cl->depth_;
+
+ /* save the allocated class */
+ switch (chandle) {
+ case NULL_CLASS_HANDLE:
+ case ROOT_CLASS_HANDLE:
+ break;
+ case DEFAULT_CLASS_HANDLE:
+ cbqp->ifnp.default_ = cl;
+ break;
+ case CTL_CLASS_HANDLE:
+ cbqp->ifnp.ctl_ = cl;
+ break;
+ default:
+ cbqp->cbq_class_tbl[chandle] = cl;
+ break;
+ }
+ return (0);
+}
+
+/*
+ * int
+ * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This
+ * function destroys a given traffic class. Before destorying
+ * the class, all traffic for that class is released.
+ */
+static int
+cbq_class_destroy(cbqp, cl)
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+{
+ u_long chandle;
+
+ chandle = cl->stats_.handle;
+
+ /* delete the class */
+ rmc_delete_class(&cbqp->ifnp, cl);
+
+ /*
+ * free the class handle
+ */
+ switch (chandle) {
+ case ROOT_CLASS_HANDLE:
+ cbqp->ifnp.root_ = NULL;
+ break;
+ case DEFAULT_CLASS_HANDLE:
+ cbqp->ifnp.default_ = NULL;
+ break;
+ case CTL_CLASS_HANDLE:
+ cbqp->ifnp.ctl_ = NULL;
+ break;
+ case NULL_CLASS_HANDLE:
+ break;
+ default:
+ if (chandle >= CBQ_MAX_CLASSES)
+ break;
+ cbqp->cbq_class_tbl[chandle] = NULL;
+ }
+
+ return (0);
+}
+
+/* convert class handle to class pointer */
+static struct rm_class *
+clh_to_clp(cbqp, chandle)
+ cbq_state_t *cbqp;
+ u_long chandle;
+{
+ switch (chandle) {
+ case NULL_CLASS_HANDLE:
+ return (NULL);
+ case ROOT_CLASS_HANDLE:
+ return (cbqp->ifnp.root_);
+ case DEFAULT_CLASS_HANDLE:
+ return (cbqp->ifnp.default_);
+ case CTL_CLASS_HANDLE:
+ return (cbqp->ifnp.ctl_);
+ }
+
+ if (chandle >= CBQ_MAX_CLASSES)
+ return (NULL);
+
+ return (cbqp->cbq_class_tbl[chandle]);
+}
+
+static int
+cbq_add_filter(afp)
+ struct cbq_add_filter *afp;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+
+ ifacename = afp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* Get the pointer to class. */
+ if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter,
+ cl, &afp->cbq_filter_handle);
+}
+
+static int
+cbq_delete_filter(dfp)
+ struct cbq_delete_filter *dfp;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = dfp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&cbqp->cbq_classifier,
+ dfp->cbq_filter_handle);
+}
+
+/*
+ * cbq_clear_hierarchy deletes all classes and their filters on the
+ * given interface.
+ */
+static int
+cbq_clear_hierarchy(ifacep)
+ struct cbq_interface *ifacep;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ return cbq_clear_interface(cbqp);
+}
+
+static int
+cbq_clear_interface(cbqp)
+ cbq_state_t *cbqp;
+{
+ int again, i;
+ struct rm_class *cl;
+
+ /* free the filters for this interface */
+ acc_discard_filters(&cbqp->cbq_classifier, NULL, 1);
+
+ /* clear out the classes now */
+ do {
+ again = 0;
+ for (i = 0; i < CBQ_MAX_CLASSES; i++) {
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
+ if (is_a_parent_class(cl))
+ again++;
+ else {
+ cbq_class_destroy(cbqp, cl);
+ cbqp->cbq_class_tbl[i] = NULL;
+ }
+ }
+ }
+ if (cbqp->ifnp.ctl_ != NULL &&
+ !is_a_parent_class(cbqp->ifnp.ctl_)) {
+ cbq_class_destroy(cbqp, cbqp->ifnp.ctl_);
+ cbqp->ifnp.ctl_ = NULL;
+ }
+ if (cbqp->ifnp.default_ != NULL &&
+ !is_a_parent_class(cbqp->ifnp.default_)) {
+ cbq_class_destroy(cbqp, cbqp->ifnp.default_);
+ cbqp->ifnp.default_ = NULL;
+ }
+ if (cbqp->ifnp.root_ != NULL &&
+ !is_a_parent_class(cbqp->ifnp.root_)) {
+ cbq_class_destroy(cbqp, cbqp->ifnp.root_);
+ cbqp->ifnp.root_ = NULL;
+ }
+ } while (again);
+
+ return (0);
+}
+
+static int
+cbq_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ cbq_purge(cbqp);
+ break;
+ }
+ return (0);
+}
+
+/*
+ * static int
+ * cbq_set_enable(struct cbq_enable *ep) - this function processed the
+ * ioctl request to enable class based queueing. It searches the list
+ * of interfaces for the specified interface and then enables CBQ on
+ * that interface.
+ *
+ * Returns: 0, for no error.
+ * EBADF, for specified inteface not found.
+ */
+
+static int
+cbq_set_enable(ep, enable)
+ struct cbq_interface *ep;
+ int enable;
+{
+ int error = 0;
+ cbq_state_t *cbqp;
+ char *ifacename;
+
+ ifacename = ep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ switch (enable) {
+ case ENABLE:
+ if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL ||
+ cbqp->ifnp.ctl_ == NULL) {
+ if (cbqp->ifnp.root_ == NULL)
+ printf("No Root Class for %s\n", ifacename);
+ if (cbqp->ifnp.default_ == NULL)
+ printf("No Default Class for %s\n", ifacename);
+ if (cbqp->ifnp.ctl_ == NULL)
+ printf("No Control Class for %s\n", ifacename);
+ error = EINVAL;
+ } else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) {
+ cbqp->cbq_qlen = 0;
+ }
+ break;
+
+ case DISABLE:
+ error = altq_disable(cbqp->ifnp.ifq_);
+ break;
+ }
+ return (error);
+}
+
+/* copy the stats info in rm_class to class_states_t */
+static void
+get_class_stats(statsp, cl)
+ class_stats_t *statsp;
+ struct rm_class *cl;
+{
+ statsp->xmit_cnt = cl->stats_.xmit_cnt;
+ statsp->drop_cnt = cl->stats_.drop_cnt;
+ statsp->over = cl->stats_.over;
+ statsp->borrows = cl->stats_.borrows;
+ statsp->overactions = cl->stats_.overactions;
+ statsp->delays = cl->stats_.delays;
+
+ statsp->depth = cl->depth_;
+ statsp->priority = cl->pri_;
+ statsp->maxidle = cl->maxidle_;
+ statsp->minidle = cl->minidle_;
+ statsp->offtime = cl->offtime_;
+ statsp->qmax = qlimit(cl->q_);
+ statsp->ns_per_byte = cl->ns_per_byte_;
+ statsp->wrr_allot = cl->w_allotment_;
+ statsp->qcnt = qlen(cl->q_);
+ statsp->avgidle = cl->avgidle_;
+
+ statsp->qtype = qtype(cl->q_);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ red_getstats(cl->red_, &statsp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ rio_getstats((rio_t *)cl->red_, &statsp->red[0]);
+#endif
+}
+
+static int
+cbq_getstats(gsp)
+ struct cbq_getstats *gsp;
+{
+ char *ifacename;
+ int chandle, n, nclasses;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+ class_stats_t stats, *usp;
+ int error = 0;
+
+ ifacename = gsp->iface.cbq_ifacename;
+ nclasses = gsp->nclasses;
+ usp = gsp->stats;
+
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+ if (nclasses <= 0)
+ return (EINVAL);
+
+ for (n = 0, chandle = 0; n < nclasses && chandle < CBQ_MAX_CLASSES;
+ n++) {
+ switch(n) {
+ case 0:
+ cl = cbqp->ifnp.root_;
+ stats.handle = ROOT_CLASS_HANDLE;
+ break;
+ case 1:
+ cl = cbqp->ifnp.default_;
+ stats.handle = DEFAULT_CLASS_HANDLE;
+ break;
+ case 2:
+ cl = cbqp->ifnp.ctl_;
+ stats.handle = CTL_CLASS_HANDLE;
+ break;
+ default:
+ while ((cl = cbqp->cbq_class_tbl[chandle]) == NULL)
+ if (++chandle >= CBQ_MAX_CLASSES)
+ goto out;
+ stats.handle = chandle++;
+ break;
+ }
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+
+ out:
+ gsp->nclasses = n;
+ return (error);
+}
+
+static int
+cbq_ifattach(ifacep)
+ struct cbq_interface *ifacep;
+{
+ int error = 0;
+ char *ifacename;
+ cbq_state_t *new_cbqp;
+ struct ifnet *ifp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((ifp = ifunit(ifacename)) == NULL)
+ return (ENXIO);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENXIO);
+
+ /* allocate and initialize cbq_state_t */
+ MALLOC(new_cbqp, cbq_state_t *, sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
+ if (new_cbqp == NULL)
+ return (ENOMEM);
+ bzero(new_cbqp, sizeof(cbq_state_t));
+ CALLOUT_INIT(&new_cbqp->cbq_callout);
+ MALLOC(new_cbqp->cbq_class_tbl, struct rm_class **,
+ sizeof(struct rm_class *) * CBQ_MAX_CLASSES, M_DEVBUF, M_WAITOK);
+ if (new_cbqp->cbq_class_tbl == NULL) {
+ FREE(new_cbqp, M_DEVBUF);
+ return (ENOMEM);
+ }
+ bzero(new_cbqp->cbq_class_tbl, sizeof(struct rm_class *) * CBQ_MAX_CLASSES);
+ new_cbqp->cbq_qlen = 0;
+ new_cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */
+
+ /*
+ * set CBQ to this ifnet structure.
+ */
+ error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp,
+ cbq_enqueue, cbq_dequeue, cbq_request,
+ &new_cbqp->cbq_classifier, acc_classify);
+ if (error) {
+ FREE(new_cbqp->cbq_class_tbl, M_DEVBUF);
+ FREE(new_cbqp, M_DEVBUF);
+ return (error);
+ }
+
+ /* prepend to the list of cbq_state_t's. */
+ new_cbqp->cbq_next = cbq_list;
+ cbq_list = new_cbqp;
+
+ return (0);
+}
+
+static int
+cbq_ifdetach(ifacep)
+ struct cbq_interface *ifacep;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ (void)cbq_set_enable(ifacep, DISABLE);
+
+ cbq_clear_interface(cbqp);
+
+ if (cbqp->ifnp.ctl_)
+ cbq_class_destroy(cbqp, cbqp->ifnp.ctl_);
+ if (cbqp->ifnp.default_)
+ cbq_class_destroy(cbqp, cbqp->ifnp.default_);
+ if (cbqp->ifnp.root_)
+ cbq_class_destroy(cbqp, cbqp->ifnp.root_);
+
+ /* remove CBQ from the ifnet structure. */
+ (void)altq_detach(cbqp->ifnp.ifq_);
+
+ /* remove from the list of cbq_state_t's. */
+ if (cbq_list == cbqp)
+ cbq_list = cbqp->cbq_next;
+ else {
+ cbq_state_t *cp;
+
+ for (cp = cbq_list; cp != NULL; cp = cbqp->cbq_next)
+ if (cp->cbq_next == cbqp) {
+ cp->cbq_next = cbqp->cbq_next;
+ break;
+ }
+ ASSERT(cp != NULL);
+ }
+
+ /* deallocate cbq_state_t */
+ FREE(cbqp->cbq_class_tbl, M_DEVBUF);
+ FREE(cbqp, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * int
+ * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr)
+ * - Queue data packets.
+ *
+ * cbq_enqueue is set to ifp->if_altqenqueue and called by an upper
+ * layer (e.g. ether_output). cbq_enqueue queues the given packet
+ * to the cbq, then invokes the driver's start routine.
+ *
+ * Assumptions: called in splimp
+ * Returns: 0 if the queueing is successful.
+ * ENOBUFS if a packet dropping occured as a result of
+ * the queueing.
+ */
+
+static int
+cbq_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+ struct rm_class *cl;
+ int len;
+
+ /* grab class set by classifier */
+ if (pktattr == NULL || (cl = pktattr->pattr_class) == NULL)
+ cl = cbqp->ifnp.default_;
+ cl->pktattr_ = pktattr; /* save proto hdr used by ECN */
+
+ len = m_pktlen(m);
+ if (rmc_queue_packet(cl, m) != 0) {
+ /* drop occurred. some mbuf was freed in rmc_queue_packet. */
+ PKTCNTR_ADD(&cl->stats_.drop_cnt, len);
+ return (ENOBUFS);
+ }
+
+ /* successfully queued. */
+ ++cbqp->cbq_qlen;
+ IFQ_INC_LEN(ifq);
+ return (0);
+}
+
+static struct mbuf *
+cbq_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+ struct mbuf *m;
+
+ m = rmc_dequeue_next(&cbqp->ifnp, op);
+
+ if (m && op == ALTDQ_REMOVE) {
+ --cbqp->cbq_qlen; /* decrement # of packets in cbq */
+ IFQ_DEC_LEN(ifq);
+
+ /* Update the class. */
+ rmc_update_class_util(&cbqp->ifnp);
+ }
+ return (m);
+}
+
+/*
+ * void
+ * cbqrestart(queue_t *) - Restart sending of data.
+ * called from rmc_restart in splimp via timeout after waking up
+ * a suspended class.
+ * Returns: NONE
+ */
+
+static void
+cbqrestart(ifq)
+ struct ifaltq *ifq;
+{
+ cbq_state_t *cbqp;
+ struct ifnet *ifp;
+
+ if (!ALTQ_IS_ENABLED(ifq))
+ /* cbq must have been detached */
+ return;
+ if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL)
+ /* should not happen */
+ return;
+
+ ifp = ifq->altq_ifp;
+ if (ifp->if_start &&
+ cbqp->cbq_qlen > 0 && (ifp->if_flags & IFF_OACTIVE) == 0)
+ (*ifp->if_start)(ifp);
+}
+
+static void cbq_purge(cbqp)
+ cbq_state_t *cbqp;
+{
+ struct rm_class *cl;
+ int i;
+
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL)
+ rmc_dropall(cl);
+ if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_))
+ cbqp->ifnp.ifq_->ifq_len = 0;
+}
+
+/*
+ * cbq device interface
+ */
+
+altqdev_decl(cbq);
+
+int
+cbqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ return (0);
+}
+
+int
+cbqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ struct ifnet *ifp;
+ struct cbq_interface iface;
+ int err, error = 0;
+
+ while (cbq_list) {
+ ifp = cbq_list->ifnp.ifq_->altq_ifp;
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+ sprintf(iface.cbq_ifacename, "%s", ifp->if_xname);
+#else
+ sprintf(iface.cbq_ifacename,
+ "%s%d", ifp->if_name, ifp->if_unit);
+#endif
+ err = cbq_ifdetach(&iface);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return (error);
+}
+
+int
+cbqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ int error = 0;
+
+ /* check cmd for superuser only */
+ switch (cmd) {
+ case CBQ_GETSTATS:
+ /* currently only command that an ordinary user can call */
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ error = suser(p);
+#else
+ error = suser(p->p_ucred, &p->p_acflag);
+#endif
+ if (error)
+ return (error);
+ break;
+ }
+
+ switch (cmd) {
+
+ case CBQ_ENABLE:
+ error = cbq_set_enable((struct cbq_interface *)addr, ENABLE);
+ break;
+
+ case CBQ_DISABLE:
+ error = cbq_set_enable((struct cbq_interface *)addr, DISABLE);
+ break;
+
+ case CBQ_ADD_FILTER:
+ error = cbq_add_filter((struct cbq_add_filter *)addr);
+ break;
+
+ case CBQ_DEL_FILTER:
+ error = cbq_delete_filter((struct cbq_delete_filter *)addr);
+ break;
+
+ case CBQ_ADD_CLASS:
+ error = cbq_add_class((struct cbq_add_class *)addr);
+ break;
+
+ case CBQ_DEL_CLASS:
+ error = cbq_delete_class((struct cbq_delete_class *)addr);
+ break;
+
+ case CBQ_MODIFY_CLASS:
+ error = cbq_modify_class((struct cbq_modify_class *)addr);
+ break;
+
+ case CBQ_CLEAR_HIERARCHY:
+ error = cbq_clear_hierarchy((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_IF_ATTACH:
+ error = cbq_ifattach((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_IF_DETACH:
+ error = cbq_ifdetach((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_GETSTATS:
+ error = cbq_getstats((struct cbq_getstats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+#if 0
+/* for debug */
+static void cbq_class_dump(int);
+
+static void cbq_class_dump(i)
+ int i;
+{
+ struct rm_class *cl;
+ rm_class_stats_t *s;
+ struct _class_queue_ *q;
+
+ if (cbq_list == NULL) {
+ printf("cbq_class_dump: no cbq_state found\n");
+ return;
+ }
+ cl = cbq_list->cbq_class_tbl[i];
+
+ printf("class %d cl=%p\n", i, cl);
+ if (cl != NULL) {
+ s = &cl->stats_;
+ q = cl->q_;
+
+ printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n",
+ cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_);
+ printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n",
+ cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_,
+ cl->maxidle_);
+ printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n",
+ cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_);
+ printf("handle=%d, depth=%d, packets=%d, bytes=%d\n",
+ s->handle, s->depth,
+ (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes);
+ printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n",
+ s->over, s->borrows, (int)s->drop_cnt.packets,
+ s->overactions, s->delays);
+ printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n",
+ q->tail_, q->head_, q->qlen_, q->qlim_,
+ q->qthresh_, q->qtype_);
+ }
+}
+#endif /* 0 */
+
+#ifdef KLD_MODULE
+
+static struct altqsw cbq_sw =
+ {"cbq", cbqopen, cbqclose, cbqioctl};
+
+ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ_CBQ */
diff --git a/sys/altq/altq_cbq.h b/sys/altq/altq_cbq.h
new file mode 100644
index 00000000000..a58223b6a34
--- /dev/null
+++ b/sys/altq/altq_cbq.h
@@ -0,0 +1,220 @@
+/* $OpenBSD: altq_cbq.h,v 1.1 2001/06/27 05:28:34 kjc Exp $ */
+/* $KAME: altq_cbq.h,v 1.5 2000/12/02 13:44:40 kjc Exp $ */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#ifndef _ALTQ_ALTQ_CBQ_H_
+#define _ALTQ_ALTQ_CBQ_H_
+
+#include <sys/ioccom.h>
+#include <altq/altq.h>
+#include <altq/altq_rmclass.h>
+#include <altq/altq_red.h>
+#include <altq/altq_rio.h>
+
+/* #pragma ident "@(#)cbq.h 1.18 98/05/13 SMI" */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Define a well known class handles
+ */
+#define NULL_CLASS_HANDLE 0xffffffff
+#define ROOT_CLASS_HANDLE 0xfffffffe
+#define DEFAULT_CLASS_HANDLE 0xfffffffd
+#define CTL_CLASS_HANDLE 0xfffffffc
+
+/*
+ * Define structures associated with IOCTLS for cbq.
+ */
+
+/*
+ * Define the CBQ interface structure. This must be included in all
+ * IOCTL's such that the CBQ driver may find the appropriate CBQ module
+ * associated with the network interface to be affected.
+ */
+struct cbq_interface {
+ char cbq_ifacename[IFNAMSIZ];
+};
+
+typedef struct cbq_class_spec {
+ u_int priority;
+ u_int nano_sec_per_byte;
+ u_int maxq;
+ u_int maxidle;
+ int minidle;
+ u_int offtime;
+ u_long parent_class_handle;
+ u_long borrow_class_handle;
+
+ u_int pktsize;
+ int flags;
+} cbq_class_spec_t;
+
+/* class flags shoud be same as class flags in rm_class.h */
+#define CBQCLF_RED 0x0001 /* use RED */
+#define CBQCLF_ECN 0x0002 /* use RED/ECN */
+#define CBQCLF_RIO 0x0004 /* use RIO */
+#define CBQCLF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */
+#define CBQCLF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+
+/* class flags only for root class */
+#define CBQCLF_WRR 0x0100 /* weighted-round robin */
+#define CBQCLF_EFFICIENT 0x0200 /* work-conserving */
+
+/* class flags for special classes */
+#define CBQCLF_ROOTCLASS 0x1000 /* root class */
+#define CBQCLF_DEFCLASS 0x2000 /* default class */
+#define CBQCLF_CTLCLASS 0x4000 /* control class */
+#define CBQCLF_CLASSMASK 0xf000 /* class mask */
+
+#define CBQ_MAXQSIZE 200
+
+struct cbq_add_class {
+ struct cbq_interface cbq_iface;
+
+ cbq_class_spec_t cbq_class;
+ u_long cbq_class_handle;
+};
+
+struct cbq_delete_class {
+ struct cbq_interface cbq_iface;
+ u_long cbq_class_handle;
+};
+
+struct cbq_modify_class {
+ struct cbq_interface cbq_iface;
+
+ cbq_class_spec_t cbq_class;
+ u_long cbq_class_handle;
+};
+
+struct cbq_add_filter {
+ struct cbq_interface cbq_iface;
+ u_long cbq_class_handle;
+ struct flow_filter cbq_filter;
+
+ u_long cbq_filter_handle;
+};
+
+struct cbq_delete_filter {
+ struct cbq_interface cbq_iface;
+ u_long cbq_filter_handle;
+};
+
+typedef struct _cbq_class_stats_ {
+ u_int handle;
+ u_int depth;
+
+ struct pktcntr xmit_cnt; /* packets sent in this class */
+ struct pktcntr drop_cnt; /* dropped packets */
+ u_int over; /* # times went over limit */
+ u_int borrows; /* # times tried to borrow */
+ u_int overactions; /* # times invoked overlimit action */
+ u_int delays; /* # times invoked delay actions */
+
+ /* other static class parameters useful for debugging */
+ int priority;
+ int maxidle;
+ int minidle;
+ int offtime;
+ int qmax;
+ int ns_per_byte;
+ int wrr_allot;
+
+ int qcnt; /* # packets in queue */
+ int avgidle;
+
+ /* red and rio related info */
+ int qtype;
+ struct redstats red[3];
+} class_stats_t;
+
+/* number of classes are returned in nclasses field */
+struct cbq_getstats {
+ struct cbq_interface iface;
+ int nclasses;
+ class_stats_t *stats;
+};
+
+/*
+ * Define IOCTLs for CBQ.
+ */
+#define CBQ_IF_ATTACH _IOW('Q', 1, struct cbq_interface)
+#define CBQ_IF_DETACH _IOW('Q', 2, struct cbq_interface)
+#define CBQ_ENABLE _IOW('Q', 3, struct cbq_interface)
+#define CBQ_DISABLE _IOW('Q', 4, struct cbq_interface)
+#define CBQ_CLEAR_HIERARCHY _IOW('Q', 5, struct cbq_interface)
+#define CBQ_ADD_CLASS _IOWR('Q', 7, struct cbq_add_class)
+#define CBQ_DEL_CLASS _IOW('Q', 8, struct cbq_delete_class)
+#define CBQ_MODIFY_CLASS _IOWR('Q', 9, struct cbq_modify_class)
+#define CBQ_ADD_FILTER _IOWR('Q', 10, struct cbq_add_filter)
+#define CBQ_DEL_FILTER _IOW('Q', 11, struct cbq_delete_filter)
+#define CBQ_GETSTATS _IOWR('Q', 12, struct cbq_getstats)
+
+#ifdef _KERNEL
+/*
+ * Define macros only good for kernel drivers and modules.
+ */
+
+#define DISABLE 0x00
+#define ENABLE 0x01
+
+#define CBQ_WATCHDOG (HZ / 20)
+#define CBQ_TIMEOUT 10
+#define CBQ_LS_TIMEOUT (20 * hz / 1000)
+
+#define CBQ_MAX_CLASSES 256
+#define CBQ_MAX_FILTERS 256
+
+/*
+ * Define State structures.
+ */
+typedef struct cbqstate {
+ struct cbqstate *cbq_next;
+ int cbq_qlen; /* # of packets in cbq */
+ struct rm_class **cbq_class_tbl;
+
+ struct rm_ifdat ifnp;
+ struct callout cbq_callout; /* for timeouts */
+
+ struct acc_classifier cbq_classifier;
+} cbq_state_t;
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_ALTQ_ALTQ_CBQ_H_ */
diff --git a/sys/altq/altq_cdnr.c b/sys/altq/altq_cdnr.c
new file mode 100644
index 00000000000..7b6518161ab
--- /dev/null
+++ b/sys/altq/altq_cdnr.c
@@ -0,0 +1,1375 @@
+/* $OpenBSD: altq_cdnr.c,v 1.1 2001/06/27 05:28:34 kjc Exp $ */
+/* $KAME: altq_cdnr.c,v 1.8 2000/12/14 08:12:45 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1999-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_cdnr.h>
+
+/*
+ * diffserv traffic conditioning module
+ */
+
+int altq_cdnr_enabled = 0;
+
+/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
+#ifdef ALTQ_CDNR
+
+/* cdnr_list keeps all cdnr's allocated. */
+static LIST_HEAD(, top_cdnr) tcb_list;
+
+int cdnropen __P((dev_t, int, int, struct proc *));
+int cdnrclose __P((dev_t, int, int, struct proc *));
+int cdnrioctl __P((dev_t, ioctlcmd_t, caddr_t, int, struct proc *));
+
+static int altq_cdnr_input __P((struct mbuf *, int));
+static struct top_cdnr *tcb_lookup __P((char *ifname));
+static struct cdnr_block *cdnr_handle2cb __P((u_long));
+static u_long cdnr_cb2handle __P((struct cdnr_block *));
+static void *cdnr_cballoc __P((struct top_cdnr *, int,
+ struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *)));
+static void cdnr_cbdestroy __P((void *));
+static int tca_verify_action __P((struct tc_action *));
+static void tca_import_action __P((struct tc_action *, struct tc_action *));
+static void tca_invalidate_action __P((struct tc_action *));
+
+static int generic_element_destroy __P((struct cdnr_block *));
+static struct top_cdnr *top_create __P((struct ifaltq *));
+static int top_destroy __P((struct top_cdnr *));
+static struct cdnr_block *element_create __P((struct top_cdnr *,
+ struct tc_action *));
+static int element_destroy __P((struct cdnr_block *));
+static void tb_import_profile __P((struct tbe *, struct tb_profile *));
+static struct tbmeter *tbm_create __P((struct top_cdnr *, struct tb_profile *,
+ struct tc_action *, struct tc_action *));
+static int tbm_destroy __P((struct tbmeter *));
+static struct tc_action *tbm_input __P((struct cdnr_block *,
+ struct cdnr_pktinfo *));
+static struct trtcm *trtcm_create __P((struct top_cdnr *,
+ struct tb_profile *, struct tb_profile *,
+ struct tc_action *, struct tc_action *, struct tc_action *,
+ int));
+static int trtcm_destroy __P((struct trtcm *));
+static struct tc_action *trtcm_input __P((struct cdnr_block *,
+ struct cdnr_pktinfo *));
+static struct tswtcm *tswtcm_create __P((struct top_cdnr *,
+ u_int32_t, u_int32_t, u_int32_t,
+ struct tc_action *, struct tc_action *, struct tc_action *));
+static int tswtcm_destroy __P((struct tswtcm *));
+static struct tc_action *tswtcm_input __P((struct cdnr_block *,
+ struct cdnr_pktinfo *));
+
+static int cdnrcmd_if_attach __P((char *));
+static int cdnrcmd_if_detach __P((char *));
+static int cdnrcmd_add_element __P((struct cdnr_add_element *));
+static int cdnrcmd_delete_element __P((struct cdnr_delete_element *));
+static int cdnrcmd_add_filter __P((struct cdnr_add_filter *));
+static int cdnrcmd_delete_filter __P((struct cdnr_delete_filter *));
+static int cdnrcmd_add_tbm __P((struct cdnr_add_tbmeter *));
+static int cdnrcmd_modify_tbm __P((struct cdnr_modify_tbmeter *));
+static int cdnrcmd_tbm_stats __P((struct cdnr_tbmeter_stats *));
+static int cdnrcmd_add_trtcm __P((struct cdnr_add_trtcm *));
+static int cdnrcmd_modify_trtcm __P((struct cdnr_modify_trtcm *));
+static int cdnrcmd_tcm_stats __P((struct cdnr_tcm_stats *));
+static int cdnrcmd_add_tswtcm __P((struct cdnr_add_tswtcm *));
+static int cdnrcmd_modify_tswtcm __P((struct cdnr_modify_tswtcm *));
+static int cdnrcmd_get_stats __P((struct cdnr_get_stats *));
+
+/*
+ * top level input function called from ip_input.
+ * should be called before converting header fields to host-byte-order.
+ */
+int
+altq_cdnr_input(m, af)
+ struct mbuf *m;
+ int af; /* address family */
+{
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct top_cdnr *top;
+ struct tc_action *tca;
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo pktinfo;
+
+ ifp = m->m_pkthdr.rcvif;
+ if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
+ /* traffic conditioner is not enabled on this interface */
+ return (1);
+
+ top = ifp->if_snd.altq_cdnr;
+
+ ip = mtod(m, struct ip *);
+#ifdef INET6
+ if (af == AF_INET6) {
+ u_int32_t flowlabel;
+
+ flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
+ pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
+ } else
+#endif
+ pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
+ pktinfo.pkt_len = m_pktlen(m);
+
+ tca = NULL;
+
+ cb = acc_classify(&top->tc_classifier, m, af);
+ if (cb != NULL)
+ tca = &cb->cb_action;
+
+ if (tca == NULL)
+ tca = &top->tc_block.cb_action;
+
+ while (1) {
+ PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
+
+ switch (tca->tca_code) {
+ case TCACODE_PASS:
+ return (1);
+ case TCACODE_DROP:
+ m_freem(m);
+ return (0);
+ case TCACODE_RETURN:
+ return (0);
+ case TCACODE_MARK:
+#ifdef INET6
+ if (af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ flowlabel = (tca->tca_dscp << 20) |
+ (flowlabel & ~(DSCP_MASK << 20));
+ ip6->ip6_flow = htonl(flowlabel);
+ } else
+#endif
+ ip->ip_tos = tca->tca_dscp |
+ (ip->ip_tos & DSCP_CUMASK);
+ return (1);
+ case TCACODE_NEXT:
+ cb = tca->tca_next;
+ tca = (*cb->cb_input)(cb, &pktinfo);
+ break;
+ case TCACODE_NONE:
+ default:
+ return (1);
+ }
+ }
+}
+
+static struct top_cdnr *
+tcb_lookup(ifname)
+ char *ifname;
+{
+ struct top_cdnr *top;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(ifname)) != NULL)
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (top->tc_ifq->altq_ifp == ifp)
+ return (top);
+ return (NULL);
+}
+
+static struct cdnr_block *
+cdnr_handle2cb(handle)
+ u_long handle;
+{
+ struct cdnr_block *cb;
+
+ cb = (struct cdnr_block *)handle;
+ if (handle != ALIGN(cb))
+ return (NULL);
+
+ if (cb == NULL || cb->cb_handle != handle)
+ return (NULL);
+ return (cb);
+}
+
+static u_long
+cdnr_cb2handle(cb)
+ struct cdnr_block *cb;
+{
+ return (cb->cb_handle);
+}
+
+static void *
+cdnr_cballoc(top, type, input_func)
+ struct top_cdnr *top;
+ int type;
+ struct tc_action *(*input_func)(struct cdnr_block *,
+ struct cdnr_pktinfo *);
+{
+ struct cdnr_block *cb;
+ int size;
+
+ switch (type) {
+ case TCETYPE_TOP:
+ size = sizeof(struct top_cdnr);
+ break;
+ case TCETYPE_ELEMENT:
+ size = sizeof(struct cdnr_block);
+ break;
+ case TCETYPE_TBMETER:
+ size = sizeof(struct tbmeter);
+ break;
+ case TCETYPE_TRTCM:
+ size = sizeof(struct trtcm);
+ break;
+ case TCETYPE_TSWTCM:
+ size = sizeof(struct tswtcm);
+ break;
+ default:
+ return (NULL);
+ }
+
+ MALLOC(cb, struct cdnr_block *, size, M_DEVBUF, M_WAITOK);
+ if (cb == NULL)
+ return (NULL);
+ bzero(cb, size);
+
+ cb->cb_len = size;
+ cb->cb_type = type;
+ cb->cb_ref = 0;
+ cb->cb_handle = (u_long)cb;
+ if (top == NULL)
+ cb->cb_top = (struct top_cdnr *)cb;
+ else
+ cb->cb_top = top;
+
+ if (input_func != NULL) {
+ /*
+ * if this cdnr has an action function,
+ * make tc_action to call itself.
+ */
+ cb->cb_action.tca_code = TCACODE_NEXT;
+ cb->cb_action.tca_next = cb;
+ cb->cb_input = input_func;
+ } else
+ cb->cb_action.tca_code = TCACODE_NONE;
+
+ /* if this isn't top, register the element to the top level cdnr */
+ if (top != NULL)
+ LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
+
+ return ((void *)cb);
+}
+
+static void
+cdnr_cbdestroy(cblock)
+ void *cblock;
+{
+ struct cdnr_block *cb = cblock;
+
+ /* delete filters belonging to this cdnr */
+ acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
+
+ /* remove from the top level cdnr */
+ if (cb->cb_top != cblock)
+ LIST_REMOVE(cb, cb_next);
+
+ FREE(cb, M_DEVBUF);
+}
+
+/*
+ * conditioner common destroy routine
+ */
+static int
+generic_element_destroy(cb)
+ struct cdnr_block *cb;
+{
+ int error = 0;
+
+ switch (cb->cb_type) {
+ case TCETYPE_TOP:
+ error = top_destroy((struct top_cdnr *)cb);
+ break;
+ case TCETYPE_ELEMENT:
+ error = element_destroy(cb);
+ break;
+ case TCETYPE_TBMETER:
+ error = tbm_destroy((struct tbmeter *)cb);
+ break;
+ case TCETYPE_TRTCM:
+ error = trtcm_destroy((struct trtcm *)cb);
+ break;
+ case TCETYPE_TSWTCM:
+ error = tswtcm_destroy((struct tswtcm *)cb);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+static int
+tca_verify_action(utca)
+ struct tc_action *utca;
+{
+ switch (utca->tca_code) {
+ case TCACODE_PASS:
+ case TCACODE_DROP:
+ case TCACODE_MARK:
+ /* these are ok */
+ break;
+
+ case TCACODE_HANDLE:
+ /* verify handle value */
+ if (cdnr_handle2cb(utca->tca_handle) == NULL)
+ return (-1);
+ break;
+
+ case TCACODE_NONE:
+ case TCACODE_RETURN:
+ case TCACODE_NEXT:
+ default:
+ /* should not be passed from a user */
+ return (-1);
+ }
+ return (0);
+}
+
+static void
+tca_import_action(ktca, utca)
+ struct tc_action *ktca, *utca;
+{
+ struct cdnr_block *cb;
+
+ *ktca = *utca;
+ if (ktca->tca_code == TCACODE_HANDLE) {
+ cb = cdnr_handle2cb(ktca->tca_handle);
+ if (cb == NULL) {
+ ktca->tca_code = TCACODE_NONE;
+ return;
+ }
+ ktca->tca_code = TCACODE_NEXT;
+ ktca->tca_next = cb;
+ cb->cb_ref++;
+ } else if (ktca->tca_code == TCACODE_MARK) {
+ ktca->tca_dscp &= DSCP_MASK;
+ }
+ return;
+}
+
+static void
+tca_invalidate_action(tca)
+ struct tc_action *tca;
+{
+ struct cdnr_block *cb;
+
+ if (tca->tca_code == TCACODE_NEXT) {
+ cb = tca->tca_next;
+ if (cb == NULL)
+ return;
+ cb->cb_ref--;
+ }
+ tca->tca_code = TCACODE_NONE;
+}
+
+/*
+ * top level traffic conditioner
+ */
+static struct top_cdnr *
+top_create(ifq)
+ struct ifaltq *ifq;
+{
+ struct top_cdnr *top;
+
+ if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
+ return (NULL);
+
+ top->tc_ifq = ifq;
+ /* set default action for the top level conditioner */
+ top->tc_block.cb_action.tca_code = TCACODE_PASS;
+
+ LIST_INSERT_HEAD(&tcb_list, top, tc_next);
+
+ ifq->altq_cdnr = top;
+
+ return (top);
+}
+
+static int
+top_destroy(top)
+ struct top_cdnr *top;
+{
+ struct cdnr_block *cb;
+
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+ top->tc_ifq->altq_cdnr = NULL;
+
+ /*
+ * destroy all the conditioner elements belonging to this interface
+ */
+ while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
+ while (cb != NULL && cb->cb_ref > 0)
+ cb = LIST_NEXT(cb, cb_next);
+ if (cb != NULL)
+ generic_element_destroy(cb);
+ }
+
+ LIST_REMOVE(top, tc_next);
+
+ cdnr_cbdestroy(top);
+
+ /* if there is no active conditioner, remove the input hook */
+ if (altq_input != NULL) {
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ break;
+ if (top == NULL)
+ altq_input = NULL;
+ }
+
+ return (0);
+}
+
+/*
+ * simple tc elements without input function (e.g., dropper and makers).
+ */
+static struct cdnr_block *
+element_create(top, action)
+ struct top_cdnr *top;
+ struct tc_action *action;
+{
+ struct cdnr_block *cb;
+
+ if (tca_verify_action(action) < 0)
+ return (NULL);
+
+ if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
+ return (NULL);
+
+ tca_import_action(&cb->cb_action, action);
+
+ return (cb);
+}
+
+static int
+element_destroy(cb)
+ struct cdnr_block *cb;
+{
+ if (cb->cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&cb->cb_action);
+
+ cdnr_cbdestroy(cb);
+ return (0);
+}
+
+/*
+ * internal representation of token bucket parameters
+ * rate: byte_per_unittime << 32
+ * (((bits_per_sec) / 8) << 32) / machclk_freq
+ * depth: byte << 32
+ *
+ */
+#define TB_SHIFT 32
+#define TB_SCALE(x) ((u_int64_t)(x) << TB_SHIFT)
+#define TB_UNSCALE(x) ((x) >> TB_SHIFT)
+
+static void
+tb_import_profile(tb, profile)
+ struct tbe *tb;
+ struct tb_profile *profile;
+{
+ tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
+ tb->depth = TB_SCALE(profile->depth);
+ if (tb->rate > 0)
+ tb->filluptime = tb->depth / tb->rate;
+ else
+ tb->filluptime = 0xffffffffffffffffLL;
+ tb->token = tb->depth;
+ tb->last = read_machclk();
+}
+
+/*
+ * simple token bucket meter
+ */
+static struct tbmeter *
+tbm_create(top, profile, in_action, out_action)
+ struct top_cdnr *top;
+ struct tb_profile *profile;
+ struct tc_action *in_action, *out_action;
+{
+ struct tbmeter *tbm = NULL;
+
+ if (tca_verify_action(in_action) < 0
+ || tca_verify_action(out_action) < 0)
+ return (NULL);
+
+ if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
+ tbm_input)) == NULL)
+ return (NULL);
+
+ tb_import_profile(&tbm->tb, profile);
+
+ tca_import_action(&tbm->in_action, in_action);
+ tca_import_action(&tbm->out_action, out_action);
+
+ return (tbm);
+}
+
+static int
+tbm_destroy(tbm)
+ struct tbmeter *tbm;
+{
+ if (tbm->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tbm->in_action);
+ tca_invalidate_action(&tbm->out_action);
+
+ cdnr_cbdestroy(tbm);
+ return (0);
+}
+
+static struct tc_action *
+tbm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct tbmeter *tbm = (struct tbmeter *)cb;
+ u_int64_t len;
+ u_int64_t interval, now;
+
+ len = TB_SCALE(pktinfo->pkt_len);
+
+ if (tbm->tb.token < len) {
+ now = read_machclk();
+ interval = now - tbm->tb.last;
+ if (interval >= tbm->tb.filluptime)
+ tbm->tb.token = tbm->tb.depth;
+ else {
+ tbm->tb.token += interval * tbm->tb.rate;
+ if (tbm->tb.token > tbm->tb.depth)
+ tbm->tb.token = tbm->tb.depth;
+ }
+ tbm->tb.last = now;
+ }
+
+ if (tbm->tb.token < len) {
+ PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
+ return (&tbm->out_action);
+ }
+
+ tbm->tb.token -= len;
+ PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
+ return (&tbm->in_action);
+}
+
+/*
+ * two rate three color marker
+ * as described in draft-heinanen-diffserv-trtcm-01.txt
+ */
+static struct trtcm *
+trtcm_create(top, cmtd_profile, peak_profile,
+ green_action, yellow_action, red_action, coloraware)
+ struct top_cdnr *top;
+ struct tb_profile *cmtd_profile, *peak_profile;
+ struct tc_action *green_action, *yellow_action, *red_action;
+ int coloraware;
+{
+ struct trtcm *tcm = NULL;
+
+ if (tca_verify_action(green_action) < 0
+ || tca_verify_action(yellow_action) < 0
+ || tca_verify_action(red_action) < 0)
+ return (NULL);
+
+ if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
+ trtcm_input)) == NULL)
+ return (NULL);
+
+ tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
+ tb_import_profile(&tcm->peak_tb, peak_profile);
+
+ tca_import_action(&tcm->green_action, green_action);
+ tca_import_action(&tcm->yellow_action, yellow_action);
+ tca_import_action(&tcm->red_action, red_action);
+
+ /* set dscps to use */
+ if (tcm->green_action.tca_code == TCACODE_MARK)
+ tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->green_dscp = DSCP_AF11;
+ if (tcm->yellow_action.tca_code == TCACODE_MARK)
+ tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->yellow_dscp = DSCP_AF12;
+ if (tcm->red_action.tca_code == TCACODE_MARK)
+ tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->red_dscp = DSCP_AF13;
+
+ tcm->coloraware = coloraware;
+
+ return (tcm);
+}
+
+static int
+trtcm_destroy(tcm)
+ struct trtcm *tcm;
+{
+ if (tcm->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tcm->green_action);
+ tca_invalidate_action(&tcm->yellow_action);
+ tca_invalidate_action(&tcm->red_action);
+
+ cdnr_cbdestroy(tcm);
+ return (0);
+}
+
+static struct tc_action *
+trtcm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct trtcm *tcm = (struct trtcm *)cb;
+ u_int64_t len;
+ u_int64_t interval, now;
+ u_int8_t color;
+
+ len = TB_SCALE(pktinfo->pkt_len);
+ if (tcm->coloraware) {
+ color = pktinfo->pkt_dscp;
+ if (color != tcm->yellow_dscp && color != tcm->red_dscp)
+ color = tcm->green_dscp;
+ } else {
+ /* if color-blind, precolor it as green */
+ color = tcm->green_dscp;
+ }
+
+ now = read_machclk();
+ if (tcm->cmtd_tb.token < len) {
+ interval = now - tcm->cmtd_tb.last;
+ if (interval >= tcm->cmtd_tb.filluptime)
+ tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+ else {
+ tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
+ if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
+ tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+ }
+ tcm->cmtd_tb.last = now;
+ }
+ if (tcm->peak_tb.token < len) {
+ interval = now - tcm->peak_tb.last;
+ if (interval >= tcm->peak_tb.filluptime)
+ tcm->peak_tb.token = tcm->peak_tb.depth;
+ else {
+ tcm->peak_tb.token += interval * tcm->peak_tb.rate;
+ if (tcm->peak_tb.token > tcm->peak_tb.depth)
+ tcm->peak_tb.token = tcm->peak_tb.depth;
+ }
+ tcm->peak_tb.last = now;
+ }
+
+ if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
+ pktinfo->pkt_dscp = tcm->red_dscp;
+ PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
+ return (&tcm->red_action);
+ }
+
+ if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
+ pktinfo->pkt_dscp = tcm->yellow_dscp;
+ tcm->peak_tb.token -= len;
+ PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
+ return (&tcm->yellow_action);
+ }
+
+ pktinfo->pkt_dscp = tcm->green_dscp;
+ tcm->cmtd_tb.token -= len;
+ tcm->peak_tb.token -= len;
+ PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
+ return (&tcm->green_action);
+}
+
+/*
+ * time sliding window three color marker
+ * as described in draft-fang-diffserv-tc-tswtcm-00.txt
+ */
+static struct tswtcm *
+tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
+ green_action, yellow_action, red_action)
+ struct top_cdnr *top;
+ u_int32_t cmtd_rate, peak_rate, avg_interval;
+ struct tc_action *green_action, *yellow_action, *red_action;
+{
+ struct tswtcm *tsw;
+
+ if (tca_verify_action(green_action) < 0
+ || tca_verify_action(yellow_action) < 0
+ || tca_verify_action(red_action) < 0)
+ return (NULL);
+
+ if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
+ tswtcm_input)) == NULL)
+ return (NULL);
+
+ tca_import_action(&tsw->green_action, green_action);
+ tca_import_action(&tsw->yellow_action, yellow_action);
+ tca_import_action(&tsw->red_action, red_action);
+
+ /* set dscps to use */
+ if (tsw->green_action.tca_code == TCACODE_MARK)
+ tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->green_dscp = DSCP_AF11;
+ if (tsw->yellow_action.tca_code == TCACODE_MARK)
+ tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->yellow_dscp = DSCP_AF12;
+ if (tsw->red_action.tca_code == TCACODE_MARK)
+ tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->red_dscp = DSCP_AF13;
+
+ /* convert rates from bits/sec to bytes/sec */
+ tsw->cmtd_rate = cmtd_rate / 8;
+ tsw->peak_rate = peak_rate / 8;
+ tsw->avg_rate = 0;
+
+ /* timewin is converted from msec to machine clock unit */
+ tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
+
+ return (tsw);
+}
+
+static int
+tswtcm_destroy(tsw)
+ struct tswtcm *tsw;
+{
+ if (tsw->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tsw->green_action);
+ tca_invalidate_action(&tsw->yellow_action);
+ tca_invalidate_action(&tsw->red_action);
+
+ cdnr_cbdestroy(tsw);
+ return (0);
+}
+
+static struct tc_action *
+tswtcm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct tswtcm *tsw = (struct tswtcm *)cb;
+ int len;
+ u_int32_t avg_rate;
+ u_int64_t interval, now, tmp;
+
+ /*
+ * rate estimator
+ */
+ len = pktinfo->pkt_len;
+ now = read_machclk();
+
+ interval = now - tsw->t_front;
+ /*
+ * calculate average rate:
+ * avg = (avg * timewin + pkt_len)/(timewin + interval)
+ * pkt_len needs to be multiplied by machclk_freq in order to
+ * get (bytes/sec).
+ * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
+ * less than 32 bits, the following 64-bit operation has enough
+ * precision.
+ */
+ tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
+ + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
+ tsw->avg_rate = avg_rate = (u_int32_t)tmp;
+ tsw->t_front = now;
+
+ /*
+ * marker
+ */
+ if (avg_rate > tsw->cmtd_rate) {
+ u_int32_t randval = random() % avg_rate;
+
+ if (avg_rate > tsw->peak_rate) {
+ if (randval < avg_rate - tsw->peak_rate) {
+ /* mark red */
+ pktinfo->pkt_dscp = tsw->red_dscp;
+ PKTCNTR_ADD(&tsw->red_cnt, len);
+ return (&tsw->red_action);
+ } else if (randval < avg_rate - tsw->cmtd_rate)
+ goto mark_yellow;
+ } else {
+ /* peak_rate >= avg_rate > cmtd_rate */
+ if (randval < avg_rate - tsw->cmtd_rate) {
+ mark_yellow:
+ pktinfo->pkt_dscp = tsw->yellow_dscp;
+ PKTCNTR_ADD(&tsw->yellow_cnt, len);
+ return (&tsw->yellow_action);
+ }
+ }
+ }
+
+ /* mark green */
+ pktinfo->pkt_dscp = tsw->green_dscp;
+ PKTCNTR_ADD(&tsw->green_cnt, len);
+ return (&tsw->green_action);
+}
+
+/*
+ * ioctl requests
+ */
+static int
+cdnrcmd_if_attach(ifname)
+ char *ifname;
+{
+ struct ifnet *ifp;
+ struct top_cdnr *top;
+
+ if ((ifp = ifunit(ifname)) == NULL)
+ return (EBADF);
+
+ if (ifp->if_snd.altq_cdnr != NULL)
+ return (EBUSY);
+
+ if ((top = top_create(&ifp->if_snd)) == NULL)
+ return (ENOMEM);
+ return (0);
+}
+
+static int
+cdnrcmd_if_detach(ifname)
+ char *ifname;
+{
+ struct top_cdnr *top;
+
+ if ((top = tcb_lookup(ifname)) == NULL)
+ return (EBADF);
+
+ return top_destroy(top);
+}
+
+static int
+cdnrcmd_add_element(ap)
+ struct cdnr_add_element *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ cb = element_create(top, &ap->action);
+ if (cb == NULL)
+ return (EINVAL);
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(cb);
+ return (0);
+}
+
+static int
+cdnrcmd_delete_element(ap)
+ struct cdnr_delete_element *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (cb->cb_type != TCETYPE_ELEMENT)
+ return generic_element_destroy(cb);
+
+ return element_destroy(cb);
+}
+
+static int
+cdnrcmd_add_filter(ap)
+ struct cdnr_add_filter *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&top->tc_classifier, &ap->filter,
+ cb, &ap->filter_handle);
+}
+
+static int
+cdnrcmd_delete_filter(ap)
+ struct cdnr_delete_filter *ap;
+{
+ struct top_cdnr *top;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
+}
+
+static int
+cdnrcmd_add_tbm(ap)
+ struct cdnr_add_tbmeter *ap;
+{
+ struct top_cdnr *top;
+ struct tbmeter *tbm;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
+ if (tbm == NULL)
+ return (EINVAL);
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_tbm(ap)
+ struct cdnr_modify_tbmeter *ap;
+{
+ struct tbmeter *tbm;
+
+ if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ tb_import_profile(&tbm->tb, &ap->profile);
+
+ return (0);
+}
+
+static int
+cdnrcmd_tbm_stats(ap)
+ struct cdnr_tbmeter_stats *ap;
+{
+ struct tbmeter *tbm;
+
+ if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ ap->in_cnt = tbm->in_cnt;
+ ap->out_cnt = tbm->out_cnt;
+
+ return (0);
+}
+
+static int
+cdnrcmd_add_trtcm(ap)
+ struct cdnr_add_trtcm *ap;
+{
+ struct top_cdnr *top;
+ struct trtcm *tcm;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
+ &ap->green_action, &ap->yellow_action,
+ &ap->red_action, ap->coloraware);
+ if (tcm == NULL)
+ return (EINVAL);
+
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_trtcm(ap)
+ struct cdnr_modify_trtcm *ap;
+{
+ struct trtcm *tcm;
+
+ if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
+ tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
+
+ return (0);
+}
+
+static int
+cdnrcmd_tcm_stats(ap)
+ struct cdnr_tcm_stats *ap;
+{
+ struct cdnr_block *cb;
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (cb->cb_type == TCETYPE_TRTCM) {
+ struct trtcm *tcm = (struct trtcm *)cb;
+
+ ap->green_cnt = tcm->green_cnt;
+ ap->yellow_cnt = tcm->yellow_cnt;
+ ap->red_cnt = tcm->red_cnt;
+ } else if (cb->cb_type == TCETYPE_TSWTCM) {
+ struct tswtcm *tsw = (struct tswtcm *)cb;
+
+ ap->green_cnt = tsw->green_cnt;
+ ap->yellow_cnt = tsw->yellow_cnt;
+ ap->red_cnt = tsw->red_cnt;
+ } else
+ return (EINVAL);
+
+ return (0);
+}
+
+static int
+cdnrcmd_add_tswtcm(ap)
+ struct cdnr_add_tswtcm *ap;
+{
+ struct top_cdnr *top;
+ struct tswtcm *tsw;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if (ap->cmtd_rate > ap->peak_rate)
+ return (EINVAL);
+
+ tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
+ ap->avg_interval, &ap->green_action,
+ &ap->yellow_action, &ap->red_action);
+ if (tsw == NULL)
+ return (EINVAL);
+
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_tswtcm(ap)
+ struct cdnr_modify_tswtcm *ap;
+{
+ struct tswtcm *tsw;
+
+ if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (ap->cmtd_rate > ap->peak_rate)
+ return (EINVAL);
+
+ /* convert rates from bits/sec to bytes/sec */
+ tsw->cmtd_rate = ap->cmtd_rate / 8;
+ tsw->peak_rate = ap->peak_rate / 8;
+ tsw->avg_rate = 0;
+
+ /* timewin is converted from msec to machine clock unit */
+ tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
+
+ return (0);
+}
+
+static int
+cdnrcmd_get_stats(ap)
+ struct cdnr_get_stats *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+ struct tbmeter *tbm;
+ struct trtcm *tcm;
+ struct tswtcm *tsw;
+ struct tce_stats tce, *usp;
+ int error, n, nskip, nelements;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ /* copy action stats */
+ bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
+
+ /* stats for each element */
+ nelements = ap->nelements;
+ usp = ap->tce_stats;
+ if (nelements <= 0 || usp == NULL)
+ return (0);
+
+ nskip = ap->nskip;
+ n = 0;
+ LIST_FOREACH(cb, &top->tc_elements, cb_next) {
+ if (nskip > 0) {
+ nskip--;
+ continue;
+ }
+
+ bzero(&tce, sizeof(tce));
+ tce.tce_handle = cb->cb_handle;
+ tce.tce_type = cb->cb_type;
+ switch (cb->cb_type) {
+ case TCETYPE_TBMETER:
+ tbm = (struct tbmeter *)cb;
+ tce.tce_cnts[0] = tbm->in_cnt;
+ tce.tce_cnts[1] = tbm->out_cnt;
+ break;
+ case TCETYPE_TRTCM:
+ tcm = (struct trtcm *)cb;
+ tce.tce_cnts[0] = tcm->green_cnt;
+ tce.tce_cnts[1] = tcm->yellow_cnt;
+ tce.tce_cnts[2] = tcm->red_cnt;
+ break;
+ case TCETYPE_TSWTCM:
+ tsw = (struct tswtcm *)cb;
+ tce.tce_cnts[0] = tsw->green_cnt;
+ tce.tce_cnts[1] = tsw->yellow_cnt;
+ tce.tce_cnts[2] = tsw->red_cnt;
+ break;
+ default:
+ continue;
+ }
+
+ if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
+ sizeof(tce))) != 0)
+ return (error);
+
+ if (++n == nelements)
+ break;
+ }
+ ap->nelements = n;
+
+ return (0);
+}
+
+/*
+ * conditioner device interface
+ */
+int
+cdnropen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ if (machclk_freq == 0)
+ init_machclk();
+
+ if (machclk_freq == 0) {
+ printf("cdnr: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+cdnrclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ struct top_cdnr *top;
+ int err, error = 0;
+
+ while ((top = LIST_FIRST(&tcb_list)) != NULL) {
+ /* destroy all */
+ err = top_destroy(top);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+ altq_input = NULL;
+
+ return (error);
+}
+
+int
+cdnrioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ struct top_cdnr *top;
+ struct cdnr_interface *ifacep;
+ int s, error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case CDNR_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+ return (error);
+ break;
+ }
+
+ s = splimp();
+ switch (cmd) {
+
+ case CDNR_IF_ATTACH:
+ ifacep = (struct cdnr_interface *)addr;
+ error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
+ break;
+
+ case CDNR_IF_DETACH:
+ ifacep = (struct cdnr_interface *)addr;
+ error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
+ break;
+
+ case CDNR_ENABLE:
+ case CDNR_DISABLE:
+ ifacep = (struct cdnr_interface *)addr;
+ if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+
+ case CDNR_ENABLE:
+ ALTQ_SET_CNDTNING(top->tc_ifq);
+ if (altq_input == NULL)
+ altq_input = altq_cdnr_input;
+ break;
+
+ case CDNR_DISABLE:
+ ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ break;
+ if (top == NULL)
+ altq_input = NULL;
+ break;
+ }
+ break;
+
+ case CDNR_ADD_ELEM:
+ error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
+ break;
+
+ case CDNR_DEL_ELEM:
+ error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
+ break;
+
+ case CDNR_ADD_TBM:
+ error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
+ break;
+
+ case CDNR_MOD_TBM:
+ error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
+ break;
+
+ case CDNR_TBM_STATS:
+ error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
+ break;
+
+ case CDNR_ADD_TCM:
+ error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
+ break;
+
+ case CDNR_MOD_TCM:
+ error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
+ break;
+
+ case CDNR_TCM_STATS:
+ error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
+ break;
+
+ case CDNR_ADD_FILTER:
+ error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
+ break;
+
+ case CDNR_DEL_FILTER:
+ error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
+ break;
+
+ case CDNR_GETSTATS:
+ error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
+ break;
+
+ case CDNR_ADD_TSW:
+ error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
+ break;
+
+ case CDNR_MOD_TSW:
+ error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ splx(s);
+
+ return error;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw cdnr_sw =
+ {"cdnr", cdnropen, cdnrclose, cdnrioctl};
+
+ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ_CDNR */
diff --git a/sys/altq/altq_cdnr.h b/sys/altq/altq_cdnr.h
new file mode 100644
index 00000000000..d1a1b526fd5
--- /dev/null
+++ b/sys/altq/altq_cdnr.h
@@ -0,0 +1,334 @@
+/* $OpenBSD: altq_cdnr.h,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_cdnr.h,v 1.6 2000/12/14 08:12:45 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1999-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_CDNR_H_
+#define _ALTQ_ALTQ_CDNR_H_
+
+#include <altq/altq.h>
+
+/*
+ * traffic conditioner element types
+ */
+#define TCETYPE_NONE 0
+#define TCETYPE_TOP 1 /* top level conditioner */
+#define TCETYPE_ELEMENT 2 /* a simple tc element */
+#define TCETYPE_TBMETER 3 /* token bucket meter */
+#define TCETYPE_TRTCM 4 /* (two-rate) three color marker */
+#define TCETYPE_TSWTCM 5 /* time sliding window 3-color maker */
+
+/*
+ * traffic conditioner action
+ */
+struct cdnr_block;
+
+struct tc_action {
+ int tca_code; /* e.g., TCACODE_PASS */
+ /* tca_code dependent variable */
+ union {
+ u_long un_value; /* template */
+ u_int8_t un_dscp; /* diffserv code point */
+ u_long un_handle; /* tc action handle */
+ struct cdnr_block *un_next; /* next tc element block */
+ } tca_un;
+};
+#define tca_value tca_un.un_value
+#define tca_dscp tca_un.un_dscp
+#define tca_handle tca_un.un_handle
+#define tca_next tca_un.un_next
+
+#define TCACODE_NONE 0 /* action is not set */
+#define TCACODE_PASS 1 /* pass this packet */
+#define TCACODE_DROP 2 /* discard this packet */
+#define TCACODE_RETURN 3 /* do not process this packet */
+#define TCACODE_MARK 4 /* mark dscp */
+#define TCACODE_HANDLE 5 /* take action specified by handle */
+#define TCACODE_NEXT 6 /* take action in the next tc element */
+#define TCACODE_MAX 6
+
+#define CDNR_NULL_HANDLE 0
+
+struct cdnr_interface {
+ char cdnr_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+};
+
+/* simple element operations */
+struct cdnr_add_element {
+ struct cdnr_interface iface;
+ struct tc_action action;
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_delete_element {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+};
+
+/* token-bucket meter operations */
+struct cdnr_add_tbmeter {
+ struct cdnr_interface iface;
+ struct tb_profile profile;
+ struct tc_action in_action;
+ struct tc_action out_action;
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_tbmeter {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct tb_profile profile;
+};
+
+struct cdnr_tbmeter_stats {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct pktcntr in_cnt;
+ struct pktcntr out_cnt;
+};
+
+/* two-rate three-color marker operations */
+struct cdnr_add_trtcm {
+ struct cdnr_interface iface;
+ struct tb_profile cmtd_profile; /* profile for committed tb */
+ struct tb_profile peak_profile; /* profile for peak tb */
+ struct tc_action green_action; /* action for green packets */
+ struct tc_action yellow_action; /* action for yellow packets */
+ struct tc_action red_action; /* action for red packets */
+ int coloraware; /* color-aware/color-blind */
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_trtcm {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct tb_profile cmtd_profile; /* profile for committed tb */
+ struct tb_profile peak_profile; /* profile for peak tb */
+ int coloraware; /* color-aware/color-blind */
+};
+
+struct cdnr_tcm_stats {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+/* time sliding window three-color marker operations */
+struct cdnr_add_tswtcm {
+ struct cdnr_interface iface;
+ u_int32_t cmtd_rate; /* committed rate (bits/sec) */
+ u_int32_t peak_rate; /* peak rate (bits/sec) */
+ u_int32_t avg_interval; /* averaging interval (msec) */
+ struct tc_action green_action; /* action for green packets */
+ struct tc_action yellow_action; /* action for yellow packets */
+ struct tc_action red_action; /* action for red packets */
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_tswtcm {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ u_int32_t cmtd_rate; /* committed rate (bits/sec) */
+ u_int32_t peak_rate; /* peak rate (bits/sec) */
+ u_int32_t avg_interval; /* averaging interval (msec) */
+};
+
+struct cdnr_add_filter {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct flow_filter filter;
+
+ u_long filter_handle; /* return value */
+};
+
+struct cdnr_delete_filter {
+ struct cdnr_interface iface;
+ u_long filter_handle;
+};
+
+struct tce_stats {
+ u_long tce_handle; /* tc element handle */
+ int tce_type; /* e.g., TCETYPE_ELEMENT */
+ struct pktcntr tce_cnts[3]; /* tcm returns 3 counters */
+};
+
+struct cdnr_get_stats {
+ struct cdnr_interface iface;
+ struct pktcntr cnts[TCACODE_MAX+1];
+
+ /* element stats */
+ int nskip; /* skip # of elements */
+ int nelements; /* # of element stats (WR) */
+ struct tce_stats *tce_stats; /* pointer to stats array */
+};
+
+#define CDNR_IF_ATTACH _IOW('Q', 1, struct cdnr_interface)
+#define CDNR_IF_DETACH _IOW('Q', 2, struct cdnr_interface)
+#define CDNR_ENABLE _IOW('Q', 3, struct cdnr_interface)
+#define CDNR_DISABLE _IOW('Q', 4, struct cdnr_interface)
+#define CDNR_ADD_FILTER _IOWR('Q', 10, struct cdnr_add_filter)
+#define CDNR_DEL_FILTER _IOW('Q', 11, struct cdnr_delete_filter)
+#define CDNR_GETSTATS _IOWR('Q', 12, struct cdnr_get_stats)
+#define CDNR_ADD_ELEM _IOWR('Q', 30, struct cdnr_add_element)
+#define CDNR_DEL_ELEM _IOW('Q', 31, struct cdnr_delete_element)
+#define CDNR_ADD_TBM _IOWR('Q', 32, struct cdnr_add_tbmeter)
+#define CDNR_MOD_TBM _IOW('Q', 33, struct cdnr_modify_tbmeter)
+#define CDNR_TBM_STATS _IOWR('Q', 34, struct cdnr_tbmeter_stats)
+#define CDNR_ADD_TCM _IOWR('Q', 35, struct cdnr_add_trtcm)
+#define CDNR_MOD_TCM _IOWR('Q', 36, struct cdnr_modify_trtcm)
+#define CDNR_TCM_STATS _IOWR('Q', 37, struct cdnr_tcm_stats)
+#define CDNR_ADD_TSW _IOWR('Q', 38, struct cdnr_add_tswtcm)
+#define CDNR_MOD_TSW _IOWR('Q', 39, struct cdnr_modify_tswtcm)
+
+#ifndef DSCP_EF
+/* diffserve code points */
+#define DSCP_MASK 0xfc
+#define DSCP_CUMASK 0x03
+#define DSCP_EF 0xb8
+#define DSCP_AF11 0x28
+#define DSCP_AF12 0x30
+#define DSCP_AF13 0x38
+#define DSCP_AF21 0x48
+#define DSCP_AF22 0x50
+#define DSCP_AF23 0x58
+#define DSCP_AF31 0x68
+#define DSCP_AF32 0x70
+#define DSCP_AF33 0x78
+#define DSCP_AF41 0x88
+#define DSCP_AF42 0x90
+#define DSCP_AF43 0x98
+#define AF_CLASSMASK 0xe0
+#define AF_DROPPRECMASK 0x18
+#endif
+
+#ifdef _KERNEL
+
+/*
+ * packet information passed to the input function of tc elements
+ */
+struct cdnr_pktinfo {
+ int pkt_len; /* packet length */
+ u_int8_t pkt_dscp; /* diffserv code point */
+};
+
+/*
+ * traffic conditioner control block common to all types of tc elements
+ */
+struct cdnr_block {
+ LIST_ENTRY(cdnr_block) cb_next;
+ int cb_len; /* size of this tc element */
+ int cb_type; /* cdnr block type */
+ int cb_ref; /* reference count of this element */
+ u_long cb_handle; /* handle of this tc element */
+ struct top_cdnr *cb_top; /* back pointer to top */
+ struct tc_action cb_action; /* top level action for this tcb */
+ struct tc_action *(*cb_input)(struct cdnr_block *,
+ struct cdnr_pktinfo *);
+};
+
+/*
+ * top level traffic conditioner structure for an interface
+ */
+struct top_cdnr {
+ struct cdnr_block tc_block;
+
+ LIST_ENTRY(top_cdnr) tc_next;
+ struct ifaltq *tc_ifq;
+
+ LIST_HEAD(, cdnr_block) tc_elements;
+ struct acc_classifier tc_classifier;
+
+ struct pktcntr tc_cnts[TCACODE_MAX+1];
+};
+
+/* token bucket element */
+struct tbe {
+ u_int64_t rate;
+ u_int64_t depth;
+
+ u_int64_t token;
+ u_int64_t filluptime;
+ u_int64_t last;
+};
+
+/* token bucket meter structure */
+struct tbmeter {
+ struct cdnr_block cdnrblk; /* conditioner block */
+ struct tbe tb; /* token bucket */
+ struct tc_action in_action; /* actions for IN/OUT */
+ struct tc_action out_action; /* actions for IN/OUT */
+ struct pktcntr in_cnt; /* statistics for IN/OUT */
+ struct pktcntr out_cnt; /* statistics for IN/OUT */
+};
+
+/* two-rate three-color marker structure */
+struct trtcm {
+ struct cdnr_block cdnrblk; /* conditioner block */
+ struct tbe cmtd_tb; /* committed tb profile */
+ struct tbe peak_tb; /* peak tb profile */
+ struct tc_action green_action;
+ struct tc_action yellow_action;
+ struct tc_action red_action;
+ int coloraware;
+ u_int8_t green_dscp;
+ u_int8_t yellow_dscp;
+ u_int8_t red_dscp;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+/* time sliding window three-color marker structure */
+struct tswtcm {
+ struct cdnr_block cdnrblk; /* conditioner block */
+
+ u_int32_t avg_rate; /* average rate (bytes/sec) */
+ u_int64_t t_front; /* timestamp of last update */
+
+ u_int64_t timewin; /* average interval */
+ u_int32_t cmtd_rate; /* committed target rate */
+ u_int32_t peak_rate; /* peak target rate */
+ struct tc_action green_action;
+ struct tc_action yellow_action;
+ struct tc_action red_action;
+ u_int8_t green_dscp;
+ u_int8_t yellow_dscp;
+ u_int8_t red_dscp;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_CDNR_H_ */
diff --git a/sys/altq/altq_classq.h b/sys/altq/altq_classq.h
new file mode 100644
index 00000000000..2de696af502
--- /dev/null
+++ b/sys/altq/altq_classq.h
@@ -0,0 +1,207 @@
+/* $OpenBSD: altq_classq.h,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_classq.h,v 1.4 2001/02/09 07:20:40 kjc Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * class queue definitions extracted from rm_class.h.
+ */
+#ifndef _ALTQ_ALTQ_CLASSQ_H_
+#define _ALTQ_ALTQ_CLASSQ_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Packet Queue types: RED or DROPHEAD.
+ */
+#define Q_DROPHEAD 0x00
+#define Q_RED 0x01
+#define Q_RIO 0x02
+#define Q_DROPTAIL 0x03
+
+#ifdef _KERNEL
+
+/*
+ * Packet Queue strcutures and macros to manipulate them.
+ */
+struct _class_queue_ {
+ struct mbuf *tail_; /* Tail of packet queue */
+ int qlen_; /* Queue length (in number of packets) */
+ int qlim_; /* Queue limit (in number of packets*) */
+ int qtype_; /* Queue type */
+};
+
+typedef struct _class_queue_ class_queue_t;
+
+#define qtype(q) (q)->qtype_ /* Get queue type */
+#define qlimit(q) (q)->qlim_ /* Max packets to be queued */
+#define qlen(q) (q)->qlen_ /* Current queue length. */
+#define qtail(q) (q)->tail_ /* Tail of the queue */
+#define qhead(q) ((q)->tail_ ? (q)->tail_->m_nextpkt : NULL)
+
+#define qempty(q) ((q)->qlen_ == 0) /* Is the queue empty?? */
+#define q_is_red(q) ((q)->qtype_ == Q_RED) /* Is the queue a red queue */
+#define q_is_rio(q) ((q)->qtype_ == Q_RIO) /* Is the queue a rio queue */
+#define q_is_red_or_rio(q) ((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO)
+
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+extern void _addq(class_queue_t *, struct mbuf *);
+extern struct mbuf *_getq(class_queue_t *);
+extern struct mbuf *_getq_tail(class_queue_t *);
+extern struct mbuf *_getq_random(class_queue_t *);
+extern void _removeq(class_queue_t *, struct mbuf *);
+extern void _flushq(class_queue_t *);
+
+#else /* __GNUC__ && !ALTQ_DEBUG */
+/*
+ * inlined versions
+ */
+static __inline void
+_addq(class_queue_t *q, struct mbuf *m)
+{
+ struct mbuf *m0;
+
+ if ((m0 = qtail(q)) != NULL)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ m0 = m;
+ m0->m_nextpkt = m;
+ qtail(q) = m;
+ qlen(q)++;
+}
+
+static __inline struct mbuf *
+_getq(class_queue_t *q)
+{
+ struct mbuf *m, *m0;
+
+ if ((m = qtail(q)) == NULL)
+ return (NULL);
+ if ((m0 = m->m_nextpkt) != m)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ qtail(q) = NULL;
+ qlen(q)--;
+ m0->m_nextpkt = NULL;
+ return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+static __inline struct mbuf *
+_getq_tail(class_queue_t *q)
+{
+ struct mbuf *m, *m0, *prev;
+
+ if ((m = m0 = qtail(q)) == NULL)
+ return NULL;
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else
+ qtail(q) = prev;
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+/* randomly select a packet in the queue */
+static __inline struct mbuf *
+_getq_random(class_queue_t *q)
+{
+ struct mbuf *m;
+ int i, n;
+
+ if ((m = qtail(q)) == NULL)
+ return NULL;
+ if (m->m_nextpkt == m)
+ qtail(q) = NULL;
+ else {
+ struct mbuf *prev = NULL;
+
+ n = random() % qlen(q) + 1;
+ for (i = 0; i < n; i++) {
+ prev = m;
+ m = m->m_nextpkt;
+ }
+ prev->m_nextpkt = m->m_nextpkt;
+ if (m == qtail(q))
+ qtail(q) = prev;
+ }
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+static __inline void
+_removeq(class_queue_t *q, struct mbuf *m)
+{
+ struct mbuf *m0, *prev;
+
+ m0 = qtail(q);
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else if (qtail(q) == m)
+ qtail(q) = prev;
+ qlen(q)--;
+}
+
+static __inline void
+_flushq(class_queue_t *q)
+{
+ struct mbuf *m;
+
+ while ((m = _getq(q)) != NULL)
+ m_freem(m);
+}
+
+#endif /* __GNUC__ && !ALTQ_DEBUG */
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_CLASSQ_H_ */
diff --git a/sys/altq/altq_conf.c b/sys/altq/altq_conf.c
new file mode 100644
index 00000000000..4baac8d2634
--- /dev/null
+++ b/sys/altq/altq_conf.c
@@ -0,0 +1,424 @@
+/* $OpenBSD: altq_conf.c,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_conf.c,v 1.11 2001/06/21 11:00:36 kjc Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+/*
+ * altq device interface.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#if defined(__FreeBSD__) && (__FreeBSD_version < 400000) && defined(DEVFS)
+#include <sys/devfsext.h>
+#endif /*DEVFS*/
+#include <net/if.h>
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+
+#ifdef ALTQ_CBQ
+altqdev_decl(cbq);
+#endif
+#ifdef ALTQ_WFQ
+altqdev_decl(wfq);
+#endif
+#ifdef ALTQ_AFMAP
+altqdev_decl(afm);
+#endif
+#ifdef ALTQ_FIFOQ
+altqdev_decl(fifoq);
+#endif
+#ifdef ALTQ_RED
+altqdev_decl(red);
+#endif
+#ifdef ALTQ_RIO
+altqdev_decl(rio);
+#endif
+#ifdef ALTQ_LOCALQ
+altqdev_decl(localq);
+#endif
+#ifdef ALTQ_HFSC
+altqdev_decl(hfsc);
+#endif
+#ifdef ALTQ_CDNR
+altqdev_decl(cdnr);
+#endif
+#ifdef ALTQ_BLUE
+altqdev_decl(blue);
+#endif
+#ifdef ALTQ_PRIQ
+altqdev_decl(priq);
+#endif
+
+/*
+ * altq minor device (discipline) table
+ */
+static struct altqsw altqsw[] = { /* minor */
+ {"noq", noopen, noclose, noioctl}, /* 0 (reserved) */
+#ifdef ALTQ_CBQ
+ {"cbq", cbqopen, cbqclose, cbqioctl}, /* 1 */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 1 */
+#endif
+#ifdef ALTQ_WFQ
+ {"wfq", wfqopen, wfqclose, wfqioctl}, /* 2 */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 2 */
+#endif
+#ifdef ALTQ_AFMAP
+ {"afm", afmopen, afmclose, afmioctl}, /* 3 */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 3 */
+#endif
+#ifdef ALTQ_FIFOQ
+ {"fifoq", fifoqopen, fifoqclose, fifoqioctl}, /* 4 */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 4 */
+#endif
+#ifdef ALTQ_RED
+ {"red", redopen, redclose, redioctl}, /* 5 */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 5 */
+#endif
+#ifdef ALTQ_RIO
+ {"rio", rioopen, rioclose, rioioctl}, /* 6 */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 6 */
+#endif
+#ifdef ALTQ_LOCALQ
+ {"localq",localqopen, localqclose, localqioctl}, /* 7 (local use) */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 7 (local use) */
+#endif
+#ifdef ALTQ_HFSC
+ {"hfsc",hfscopen, hfscclose, hfscioctl}, /* 8 */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 8 */
+#endif
+#ifdef ALTQ_CDNR
+ {"cdnr",cdnropen, cdnrclose, cdnrioctl}, /* 9 */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 9 */
+#endif
+#ifdef ALTQ_BLUE
+ {"blue",blueopen, blueclose, blueioctl}, /* 10 */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 10 */
+#endif
+#ifdef ALTQ_PRIQ
+ {"priq",priqopen, priqclose, priqioctl}, /* 11 */
+#else
+ {"noq", noopen, noclose, noioctl}, /* 11 */
+#endif
+};
+
+/*
+ * altq major device support
+ */
+int naltqsw = sizeof (altqsw) / sizeof (altqsw[0]);
+
+#ifdef __FreeBSD__
+static d_open_t altqopen;
+static d_close_t altqclose;
+static d_ioctl_t altqioctl;
+static void altq_drvinit __P((void *));
+#else
+void altqattach __P((int));
+#endif
+
+#if defined(__FreeBSD__)
+#define CDEV_MAJOR 96 /* FreeBSD official number */
+
+#if (__FreeBSD_version < 400000)
+static struct cdevsw altq_cdevsw =
+ { altqopen, altqclose, noread, nowrite,
+ altqioctl, nostop, nullreset, nodevtotty,
+ seltrue, nommap, NULL, "altq", NULL, -1 };
+#else
+static struct cdevsw altq_cdevsw =
+ { altqopen, altqclose, noread, nowrite,
+ altqioctl, seltrue, nommap, nostrategy,
+ "altq", CDEV_MAJOR, nodump, nopsize, 0, -1 };
+#endif
+#endif /* FreeBSD */
+
+#ifdef __FreeBSD__
+static
+#endif
+int
+altqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ int unit = minor(dev);
+
+ if (unit == 0)
+ return (0);
+ if (unit < naltqsw)
+ return (*altqsw[unit].d_open)(dev, flag, fmt, p);
+
+ return ENXIO;
+}
+
+#ifdef __FreeBSD__
+static
+#endif
+int
+altqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ int unit = minor(dev);
+
+ if (unit == 0)
+ return (0);
+ if (unit < naltqsw)
+ return (*altqsw[unit].d_close)(dev, flag, fmt, p);
+
+ return ENXIO;
+}
+
+#ifdef __FreeBSD__
+static
+#endif
+int
+altqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ int unit = minor(dev);
+
+ if (unit == 0) {
+ struct ifnet *ifp;
+ struct altqreq *typereq;
+ struct tbrreq *tbrreq;
+ int error;
+
+ switch (cmd) {
+ case ALTQGTYPE:
+ case ALTQTBRGET:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+ case ALTQGTYPE:
+ typereq = (struct altqreq *)addr;
+ if ((ifp = ifunit(typereq->ifname)) == NULL)
+ return (EINVAL);
+ typereq->arg = (u_long)ifp->if_snd.altq_type;
+ return (0);
+ case ALTQTBRSET:
+ tbrreq = (struct tbrreq *)addr;
+ if ((ifp = ifunit(tbrreq->ifname)) == NULL)
+ return (EINVAL);
+ return tbr_set(&ifp->if_snd, &tbrreq->tb_prof);
+ case ALTQTBRGET:
+ tbrreq = (struct tbrreq *)addr;
+ if ((ifp = ifunit(tbrreq->ifname)) == NULL)
+ return (EINVAL);
+ return tbr_get(&ifp->if_snd, &tbrreq->tb_prof);
+ default:
+ return (EINVAL);
+ }
+ }
+ if (unit < naltqsw)
+ return (*altqsw[unit].d_ioctl)(dev, cmd, addr, flag, p);
+
+ return ENXIO;
+}
+
+#ifdef __FreeBSD__
+
+static int altq_devsw_installed = 0;
+
+#if (__FreeBSD_version < 400000)
+#ifdef DEVFS
+static void *altq_devfs_token[sizeof (altqsw) / sizeof (altqsw[0])];
+#endif
+
+static void
+altq_drvinit(unused)
+ void *unused;
+{
+ dev_t dev;
+#ifdef DEVFS
+ int i;
+#endif
+
+ if (!altq_devsw_installed) {
+ dev = makedev(CDEV_MAJOR,0);
+ cdevsw_add(&dev,&altq_cdevsw,NULL);
+ altq_devsw_installed = 1;
+#ifdef DEVFS
+ for (i=0; i<naltqsw; i++)
+ altq_devfs_token[i] =
+ devfs_add_devswf(&altq_cdevsw, i, DV_CHR,
+ 0, 0, 0644, altqsw[i].d_name);
+#endif
+ printf("altq: major number is %d\n", CDEV_MAJOR);
+ }
+}
+
+#else /* FreeBSD 4.x */
+
+static void
+altq_drvinit(unused)
+ void *unused;
+{
+ int unit;
+
+ cdevsw_add(&altq_cdevsw);
+ altq_devsw_installed = 1;
+ printf("altq: major number is %d\n", CDEV_MAJOR);
+
+ /* create minor devices */
+ for (unit = 0; unit < naltqsw; unit++)
+ make_dev(&altq_cdevsw, unit, 0, 0, 0644,
+ altqsw[unit].d_name);
+}
+
+#endif /* FreeBSD 4.x */
+
+SYSINIT(altqdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,altq_drvinit,NULL)
+
+#endif /* FreeBSD */
+
+#ifdef ALTQ_KLD
+/*
+ * KLD support
+ */
+static int altq_module_register __P((struct altq_module_data *));
+static int altq_module_deregister __P((struct altq_module_data *));
+
+static struct altq_module_data *altq_modules[ALTQT_MAX];
+static struct altqsw noqdisc = {"noq", noopen, noclose, noioctl};
+
+void altq_module_incref(type)
+ int type;
+{
+ if (type < 0 || type >= ALTQT_MAX || altq_modules[type] == NULL)
+ return;
+
+ altq_modules[type]->ref++;
+}
+
+void altq_module_declref(type)
+ int type;
+{
+ if (type < 0 || type >= ALTQT_MAX || altq_modules[type] == NULL)
+ return;
+
+ altq_modules[type]->ref--;
+}
+
+static int
+altq_module_register(mdata)
+ struct altq_module_data *mdata;
+{
+ int type = mdata->type;
+
+ if (type < 0 || type >= ALTQT_MAX)
+ return (EINVAL);
+ if (altqsw[type].d_open != noopen)
+ return (EBUSY);
+ altqsw[type] = *mdata->altqsw; /* set discipline functions */
+ altq_modules[type] = mdata; /* save module data pointer */
+ return (0);
+}
+
+static int
+altq_module_deregister(mdata)
+ struct altq_module_data *mdata;
+{
+ int type = mdata->type;
+
+ if (type < 0 || type >= ALTQT_MAX)
+ return (EINVAL);
+ if (mdata != altq_modules[type])
+ return (EINVAL);
+ if (altq_modules[type]->ref > 0)
+ return (EBUSY);
+ altqsw[type] = noqdisc;
+ altq_modules[type] = NULL;
+ return (0);
+}
+
+int
+altq_module_handler(mod, cmd, arg)
+ module_t mod;
+ int cmd;
+ void * arg;
+{
+ struct altq_module_data *data = (struct altq_module_data *)arg;
+ int error = 0;
+
+ switch (cmd) {
+ case MOD_LOAD:
+ error = altq_module_register(data);
+ break;
+
+ case MOD_UNLOAD:
+ error = altq_module_deregister(data);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return(error);
+}
+
+#endif /* ALTQ_KLD */
diff --git a/sys/altq/altq_conf.h b/sys/altq/altq_conf.h
new file mode 100644
index 00000000000..de935e58daa
--- /dev/null
+++ b/sys/altq/altq_conf.h
@@ -0,0 +1,108 @@
+/* $OpenBSD: altq_conf.h,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_conf.h,v 1.6 2001/01/29 19:59:09 itojun Exp $ */
+
+/*
+ * Copyright (C) 1998-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_ALTQ_CONF_H_
+#define _ALTQ_ALTQ_CONF_H_
+
+#ifdef _KERNEL
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+
+#if (__FreeBSD_version > 300000)
+#define ALTQ_KLD
+#endif
+
+#ifdef ALTQ_KLD
+#include <sys/module.h>
+#endif
+
+#ifndef dev_decl
+#ifdef __STDC__
+#define dev_decl(n,t) d_ ## t ## _t n ## t
+#else
+#define dev_decl(n,t) d_/**/t/**/_t n/**/t
+#endif
+#endif
+
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+typedef int d_open_t __P((dev_t, int, int, struct proc *));
+typedef int d_close_t __P((dev_t, int, int, struct proc *));
+typedef int d_ioctl_t __P((dev_t, u_long, caddr_t, int, struct proc *));
+
+#define noopen (dev_type_open((*))) enodev
+#define noclose (dev_type_close((*))) enodev
+#define noioctl (dev_type_ioctl((*))) enodev
+#endif /* __NetBSD__ || __OpenBSD__ */
+
+#if defined(__OpenBSD__)
+int altqopen __P((dev_t, int, int, struct proc *));
+int altqclose __P((dev_t, int, int, struct proc *));
+int altqioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
+#endif
+
+/*
+ * altq queueing discipline switch structure
+ */
+struct altqsw {
+ char *d_name;
+ d_open_t *d_open;
+ d_close_t *d_close;
+ d_ioctl_t *d_ioctl;
+};
+
+#define altqdev_decl(n) \
+ dev_decl(n,open); dev_decl(n,close); dev_decl(n,ioctl)
+
+#ifdef ALTQ_KLD
+
+struct altq_module_data {
+ int type; /* discipline type */
+ int ref; /* reference count */
+ struct altqsw *altqsw; /* discipline functions */
+};
+
+#define ALTQ_MODULE(name, type, devsw) \
+static struct altq_module_data name##_moddata = { type, 0, devsw }; \
+ \
+moduledata_t name##_mod = { \
+ #name, \
+ altq_module_handler, \
+ &name##_moddata \
+}; \
+DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+96)
+
+void altq_module_incref __P((int));
+void altq_module_declref __P((int));
+int altq_module_handler __P((module_t, int, void *));
+
+#endif /* ALTQ_KLD */
+
+#endif /* _KERNEL */
+#endif /* _ALTQ_ALTQ_CONF_H_ */
diff --git a/sys/altq/altq_fifoq.c b/sys/altq/altq_fifoq.c
new file mode 100644
index 00000000000..16231ce6517
--- /dev/null
+++ b/sys/altq/altq_fifoq.c
@@ -0,0 +1,415 @@
+/* $OpenBSD: altq_fifoq.c,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_fifoq.c,v 1.7 2000/12/14 08:12:45 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_FIFOQ /* fifoq is enabled by ALTQ_FIFOQ option in opt_altq.h */
+
+/*
+ * FIFOQ is an altq sample implementation. There will be little
+ * need to use FIFOQ as an alternative queueing scheme.
+ * But this code is provided as a template for those who want to
+ * write their own queueing schemes.
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet/in.h>
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_fifoq.h>
+
+#define FIFOQ_STATS /* collect statistics */
+
+/* fifoq_list keeps all fifoq_state_t's allocated. */
+static fifoq_state_t *fifoq_list = NULL;
+
+/* internal function prototypes */
+static int fifoq_enqueue __P((struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *));
+static struct mbuf *fifoq_dequeue __P((struct ifaltq *, int));
+static int fifoq_detach __P((fifoq_state_t *));
+static int fifoq_request __P((struct ifaltq *, int, void *));
+static void fifoq_purge __P((fifoq_state_t *));
+
+/*
+ * fifoq device interface
+ */
+altqdev_decl(fifoq);
+
+int
+fifoqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+/*
+ * there are 2 ways to act on close.
+ * detach-all-on-close:
+ * use for the daemon style approach. if the daemon dies, all the
+ * resource will be released.
+ * no-action-on-close:
+ * use for the command style approach. (e.g. fifoq on/off)
+ *
+ * note: close is called not on every close but when the last reference
+ * is removed (only once with multiple simultaneous references.)
+ */
+int
+fifoqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ fifoq_state_t *q;
+ int err, error = 0;
+
+ while ((q = fifoq_list) != NULL) {
+ /* destroy all */
+ err = fifoq_detach(q);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+fifoqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ fifoq_state_t *q;
+ struct fifoq_interface *ifacep;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case FIFOQ_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+ case FIFOQ_ENABLE:
+ ifacep = (struct fifoq_interface *)addr;
+ if ((q = altq_lookup(ifacep->fifoq_ifname, ALTQT_FIFOQ))
+ == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_enable(q->q_ifq);
+ break;
+
+ case FIFOQ_DISABLE:
+ ifacep = (struct fifoq_interface *)addr;
+ if ((q = altq_lookup(ifacep->fifoq_ifname, ALTQT_FIFOQ))
+ == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_disable(q->q_ifq);
+ break;
+
+ case FIFOQ_IF_ATTACH:
+ ifp = ifunit(((struct fifoq_interface *)addr)->fifoq_ifname);
+ if (ifp == NULL) {
+ error = ENXIO;
+ break;
+ }
+
+ /* allocate and initialize fifoq_state_t */
+ MALLOC(q, fifoq_state_t *, sizeof(fifoq_state_t),
+ M_DEVBUF, M_WAITOK);
+ if (q == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bzero(q, sizeof(fifoq_state_t));
+
+ q->q_ifq = &ifp->if_snd;
+ q->q_head = q->q_tail = NULL;
+ q->q_len = 0;
+ q->q_limit = FIFOQ_LIMIT;
+
+ /*
+ * set FIFOQ to this ifnet structure.
+ */
+ error = altq_attach(q->q_ifq, ALTQT_FIFOQ, q,
+ fifoq_enqueue, fifoq_dequeue, fifoq_request,
+ NULL, NULL);
+ if (error) {
+ FREE(q, M_DEVBUF);
+ break;
+ }
+
+ /* add this state to the fifoq list */
+ q->q_next = fifoq_list;
+ fifoq_list = q;
+ break;
+
+ case FIFOQ_IF_DETACH:
+ ifacep = (struct fifoq_interface *)addr;
+ if ((q = altq_lookup(ifacep->fifoq_ifname, ALTQT_FIFOQ))
+ == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = fifoq_detach(q);
+ break;
+
+ case FIFOQ_GETSTATS:
+ do {
+ struct fifoq_getstats *q_stats;
+
+ q_stats = (struct fifoq_getstats *)addr;
+ if ((q = altq_lookup(q_stats->iface.fifoq_ifname,
+ ALTQT_FIFOQ)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ q_stats->q_len = q->q_len;
+ q_stats->q_limit = q->q_limit;
+ q_stats->xmit_cnt = q->q_stats.xmit_cnt;
+ q_stats->drop_cnt = q->q_stats.drop_cnt;
+ q_stats->period = q->q_stats.period;
+ } while (0);
+ break;
+
+ case FIFOQ_CONFIG:
+ do {
+ struct fifoq_conf *fc;
+ int limit;
+
+ fc = (struct fifoq_conf *)addr;
+ if ((q = altq_lookup(fc->iface.fifoq_ifname,
+ ALTQT_FIFOQ)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ limit = fc->fifoq_limit;
+ if (limit < 0)
+ limit = 0;
+ q->q_limit = limit;
+ fc->fifoq_limit = limit;
+ } while (0);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+/*
+ * fifoq support routines
+ */
+
+/*
+ * enqueue routine:
+ *
+ * returns: 0 when successfully queued.
+ * ENOBUFS when drop occurs.
+ */
+static int
+fifoq_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ fifoq_state_t *q = (fifoq_state_t *)ifq->altq_disc;
+
+ /* if the queue is full, drop the incoming packet(drop-tail) */
+ if (q->q_len >= q->q_limit) {
+#ifdef FIFOQ_STATS
+ PKTCNTR_ADD(&q->q_stats.drop_cnt, m_pktlen(m));
+#endif
+ m_freem(m);
+ return (ENOBUFS);
+ }
+
+ /* enqueue the packet at the taile of the queue */
+ m->m_nextpkt = NULL;
+ if (q->q_tail == NULL)
+ q->q_head = m;
+ else
+ q->q_tail->m_nextpkt = m;
+ q->q_tail = m;
+ q->q_len++;
+ ifq->ifq_len++;
+ return 0;
+}
+
+/*
+ * dequeue routine:
+ * must be called in splimp.
+ *
+ * returns: mbuf dequeued.
+ * NULL when no packet is available in the queue.
+ */
+/*
+ * ALTDQ_PEEK is provided for drivers which need to know the next packet
+ * to send in advance.
+ * when ALTDQ_PEEK is specified, the next packet to be dequeued is
+ * returned without dequeueing the packet.
+ * when ALTDQ_DEQUEUE is called *immediately after* an ALTDQ_PEEK
+ * operation, the same packet should be returned.
+ */
+static struct mbuf *
+fifoq_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ fifoq_state_t *q = (fifoq_state_t *)ifq->altq_disc;
+ struct mbuf *m = NULL;
+
+ if (op == ALTDQ_POLL)
+ return (q->q_head);
+
+ if ((m = q->q_head) == NULL)
+ return (NULL);
+
+ if ((q->q_head = m->m_nextpkt) == NULL)
+ q->q_tail = NULL;
+ m->m_nextpkt = NULL;
+ q->q_len--;
+ ifq->ifq_len--;
+#ifdef FIFOQ_STATS
+ PKTCNTR_ADD(&q->q_stats.xmit_cnt, m_pktlen(m));
+ if (q->q_len == 0)
+ q->q_stats.period++;
+#endif
+ return (m);
+}
+
+static int
+fifoq_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ fifoq_state_t *q = (fifoq_state_t *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ fifoq_purge(q);
+ break;
+ }
+ return (0);
+}
+
+
+static int fifoq_detach(q)
+ fifoq_state_t *q;
+{
+ fifoq_state_t *tmp;
+ int error = 0;
+
+ if (ALTQ_IS_ENABLED(q->q_ifq))
+ altq_disable(q->q_ifq);
+
+ fifoq_purge(q);
+
+ if ((error = altq_detach(q->q_ifq)))
+ return (error);
+
+ if (fifoq_list == q)
+ fifoq_list = q->q_next;
+ else {
+ for (tmp = fifoq_list; tmp != NULL; tmp = tmp->q_next)
+ if (tmp->q_next == q) {
+ tmp->q_next = q->q_next;
+ break;
+ }
+ if (tmp == NULL)
+ printf("fifoq_detach: no state in fifoq_list!\n");
+ }
+
+ FREE(q, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * fifoq_purge
+ * should be called in splimp or after disabling the fifoq.
+ */
+static void fifoq_purge(q)
+ fifoq_state_t *q;
+{
+ struct mbuf *m;
+
+ while ((m = q->q_head) != NULL) {
+ q->q_head = m->m_nextpkt;
+ m_freem(m);
+ }
+ q->q_tail = NULL;
+ q->q_len = 0;
+ if (ALTQ_IS_ENABLED(q->q_ifq))
+ q->q_ifq->ifq_len = 0;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw fifoq_sw =
+ {"fifoq", fifoqopen, fifoqclose, fifoqioctl};
+
+ALTQ_MODULE(altq_fifoq, ALTQT_FIFOQ, &fifoq_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ_FIFOQ */
diff --git a/sys/altq/altq_fifoq.h b/sys/altq/altq_fifoq.h
new file mode 100644
index 00000000000..837cf67ced9
--- /dev/null
+++ b/sys/altq/altq_fifoq.h
@@ -0,0 +1,80 @@
+/* $OpenBSD: altq_fifoq.h,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_fifoq.h,v 1.6 2000/12/14 08:12:45 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_FIFOQ_H_
+#define _ALTQ_ALTQ_FIFOQ_H_
+
+typedef struct fifoq_state {
+ struct fifoq_state *q_next; /* next fifoq_state in the list */
+ struct ifaltq *q_ifq; /* backpointer to ifaltq */
+
+ struct mbuf *q_head; /* head of queue */
+ struct mbuf *q_tail; /* tail of queue */
+ int q_len; /* queue length */
+ int q_limit; /* max queue length */
+
+ /* statistics */
+ struct {
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+ } q_stats;
+} fifoq_state_t;
+
+struct fifoq_interface {
+ char fifoq_ifname[IFNAMSIZ];
+};
+
+struct fifoq_getstats {
+ struct fifoq_interface iface;
+ int q_len;
+ int q_limit;
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+};
+
+struct fifoq_conf {
+ struct fifoq_interface iface;
+ int fifoq_limit;
+};
+
+#define FIFOQ_LIMIT 50 /* default max queue lenght */
+
+/*
+ * IOCTLs for FIFOQ
+ */
+#define FIFOQ_IF_ATTACH _IOW('Q', 1, struct fifoq_interface)
+#define FIFOQ_IF_DETACH _IOW('Q', 2, struct fifoq_interface)
+#define FIFOQ_ENABLE _IOW('Q', 3, struct fifoq_interface)
+#define FIFOQ_DISABLE _IOW('Q', 4, struct fifoq_interface)
+#define FIFOQ_CONFIG _IOWR('Q', 6, struct fifoq_conf)
+#define FIFOQ_GETSTATS _IOWR('Q', 12, struct fifoq_getstats)
+
+#endif /* _ALTQ_ALTQ_FIFOQ_H_ */
diff --git a/sys/altq/altq_flowvalve.h b/sys/altq/altq_flowvalve.h
new file mode 100644
index 00000000000..39d5c4b383e
--- /dev/null
+++ b/sys/altq/altq_flowvalve.h
@@ -0,0 +1,93 @@
+/* $OpenBSD: altq_flowvalve.h,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_flowvalve.h,v 1.4 2000/12/14 08:12:46 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1998-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_FLOWVALVE_H_
+#define _ALTQ_ALTQ_FLOWVALVE_H_
+
+#ifdef _KERNEL
+
+/* fv_flow structure to define a unique address pair */
+struct fv_flow {
+ int flow_af; /* address family */
+ union {
+ struct {
+ struct in_addr ip_src;
+ struct in_addr ip_dst;
+ } _ip;
+#ifdef INET6
+ struct {
+ struct in6_addr ip6_src;
+ struct in6_addr ip6_dst;
+ } _ip6;
+#endif
+ } flow_un;
+};
+
+#define flow_ip flow_un._ip
+#define flow_ip6 flow_un._ip6
+
+/* flowvalve entry */
+struct fve {
+ TAILQ_ENTRY(fve) fve_lru; /* for LRU list */
+
+ enum fv_state { Green, Red } fve_state;
+
+ int fve_p; /* scaled average drop rate */
+ int fve_f; /* scaled average fraction */
+ int fve_count; /* counter to update f */
+ u_int fve_ifseq; /* ifseq at the last update of f */
+ struct timeval fve_lastdrop; /* timestamp of the last drop */
+
+ struct fv_flow fve_flow; /* unique address pair */
+};
+
+/* flowvalve structure */
+struct flowvalve {
+ u_int fv_ifseq; /* packet sequence number */
+ int fv_flows; /* number of valid flows in the flowlist */
+ int fv_pthresh; /* drop rate threshold */
+
+ TAILQ_HEAD(fv_flowhead, fve) fv_flowlist; /* LRU list */
+
+ struct fve *fv_fves; /* pointer to the allocated fves */
+
+ int *fv_p2ftab; /* drop rate to fraction table */
+
+ struct {
+ u_int pass; /* # of packets that have the fve
+ but aren't predropped */
+ u_int predrop; /* # of packets predropped */
+ u_int alloc; /* # of fves assigned */
+ u_int escape; /* # of fves escaped */
+ } fv_stats;
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_FLOWVALVE_H_ */
diff --git a/sys/altq/altq_hfsc.c b/sys/altq/altq_hfsc.c
new file mode 100644
index 00000000000..280e8e58eef
--- /dev/null
+++ b/sys/altq/altq_hfsc.c
@@ -0,0 +1,1811 @@
+/* $OpenBSD: altq_hfsc.c,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_hfsc.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $ */
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof, and that
+ * both notices appear in supporting documentation, and that credit
+ * is given to Carnegie Mellon University in all publications reporting
+ * on direct or indirect use of this code or its derivatives.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_hfsc.h>
+
+/*
+ * function prototypes
+ */
+static struct hfsc_if *hfsc_attach __P((struct ifaltq *, u_int));
+static int hfsc_detach __P((struct hfsc_if *));
+static int hfsc_clear_interface __P((struct hfsc_if *));
+static int hfsc_request __P((struct ifaltq *, int, void *));
+static void hfsc_purge __P((struct hfsc_if *));
+static struct hfsc_class *hfsc_class_create __P((struct hfsc_if *,
+ struct service_curve *, struct hfsc_class *, int, int));
+static int hfsc_class_destroy __P((struct hfsc_class *));
+static int hfsc_class_modify __P((struct hfsc_class *,
+ struct service_curve *, struct service_curve *));
+static struct hfsc_class *hfsc_nextclass __P((struct hfsc_class *));
+
+static int hfsc_enqueue __P((struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *));
+static struct mbuf *hfsc_dequeue __P((struct ifaltq *, int));
+
+static int hfsc_addq __P((struct hfsc_class *, struct mbuf *));
+static struct mbuf *hfsc_getq __P((struct hfsc_class *));
+static struct mbuf *hfsc_pollq __P((struct hfsc_class *));
+static void hfsc_purgeq __P((struct hfsc_class *));
+
+static void set_active __P((struct hfsc_class *, int));
+static void set_passive __P((struct hfsc_class *));
+
+static void init_ed __P((struct hfsc_class *, int));
+static void update_ed __P((struct hfsc_class *, int));
+static void update_d __P((struct hfsc_class *, int));
+static void init_v __P((struct hfsc_class *, int));
+static void update_v __P((struct hfsc_class *, int));
+static ellist_t *ellist_alloc __P((void));
+static void ellist_destroy __P((ellist_t *));
+static void ellist_insert __P((struct hfsc_class *));
+static void ellist_remove __P((struct hfsc_class *));
+static void ellist_update __P((struct hfsc_class *));
+struct hfsc_class *ellist_get_mindl __P((ellist_t *));
+static actlist_t *actlist_alloc __P((void));
+static void actlist_destroy __P((actlist_t *));
+static void actlist_insert __P((struct hfsc_class *));
+static void actlist_remove __P((struct hfsc_class *));
+static void actlist_update __P((struct hfsc_class *));
+
+static __inline u_int64_t seg_x2y __P((u_int64_t, u_int64_t));
+static __inline u_int64_t seg_y2x __P((u_int64_t, u_int64_t));
+static __inline u_int64_t m2sm __P((u_int));
+static __inline u_int64_t m2ism __P((u_int));
+static __inline u_int64_t d2dx __P((u_int));
+static u_int sm2m __P((u_int64_t));
+static u_int dx2d __P((u_int64_t));
+
+static void sc2isc __P((struct service_curve *, struct internal_sc *));
+static void rtsc_init __P((struct runtime_sc *, struct internal_sc *,
+ u_int64_t, u_int64_t));
+static u_int64_t rtsc_y2x __P((struct runtime_sc *, u_int64_t));
+static u_int64_t rtsc_x2y __P((struct runtime_sc *, u_int64_t));
+static void rtsc_min __P((struct runtime_sc *, struct internal_sc *,
+ u_int64_t, u_int64_t));
+
+int hfscopen __P((dev_t, int, int, struct proc *));
+int hfscclose __P((dev_t, int, int, struct proc *));
+int hfscioctl __P((dev_t, ioctlcmd_t, caddr_t, int, struct proc *));
+static int hfsccmd_if_attach __P((struct hfsc_attach *));
+static int hfsccmd_if_detach __P((struct hfsc_interface *));
+static int hfsccmd_add_class __P((struct hfsc_add_class *));
+static int hfsccmd_delete_class __P((struct hfsc_delete_class *));
+static int hfsccmd_modify_class __P((struct hfsc_modify_class *));
+static int hfsccmd_add_filter __P((struct hfsc_add_filter *));
+static int hfsccmd_delete_filter __P((struct hfsc_delete_filter *));
+static int hfsccmd_class_stats __P((struct hfsc_class_stats *));
+static void get_class_stats __P((struct class_stats *, struct hfsc_class *));
+static struct hfsc_class *clh_to_clp __P((struct hfsc_if *, u_long));
+static u_long clp_to_clh __P((struct hfsc_class *));
+
+/*
+ * macros
+ */
+#define is_a_parent_class(cl) ((cl)->cl_children != NULL)
+
+/* hif_list keeps all hfsc_if's allocated. */
+static struct hfsc_if *hif_list = NULL;
+
+static struct hfsc_if *
+hfsc_attach(ifq, bandwidth)
+ struct ifaltq *ifq;
+ u_int bandwidth;
+{
+ struct hfsc_if *hif;
+ struct service_curve root_sc;
+
+ MALLOC(hif, struct hfsc_if *, sizeof(struct hfsc_if),
+ M_DEVBUF, M_WAITOK);
+ if (hif == NULL)
+ return (NULL);
+ bzero(hif, sizeof(struct hfsc_if));
+
+ hif->hif_eligible = ellist_alloc();
+ if (hif->hif_eligible == NULL) {
+ FREE(hif, M_DEVBUF);
+ return NULL;
+ }
+
+ hif->hif_ifq = ifq;
+
+ /*
+ * create root class
+ */
+ root_sc.m1 = bandwidth;
+ root_sc.d = 0;
+ root_sc.m2 = bandwidth;
+ if ((hif->hif_rootclass =
+ hfsc_class_create(hif, &root_sc, NULL, 0, 0)) == NULL) {
+ FREE(hif, M_DEVBUF);
+ return (NULL);
+ }
+
+ /* add this state to the hfsc list */
+ hif->hif_next = hif_list;
+ hif_list = hif;
+
+ return (hif);
+}
+
+static int
+hfsc_detach(hif)
+ struct hfsc_if *hif;
+{
+ (void)hfsc_clear_interface(hif);
+ (void)hfsc_class_destroy(hif->hif_rootclass);
+
+ /* remove this interface from the hif list */
+ if (hif_list == hif)
+ hif_list = hif->hif_next;
+ else {
+ struct hfsc_if *h;
+
+ for (h = hif_list; h != NULL; h = h->hif_next)
+ if (h->hif_next == hif) {
+ h->hif_next = hif->hif_next;
+ break;
+ }
+ ASSERT(h != NULL);
+ }
+
+ ellist_destroy(hif->hif_eligible);
+
+ FREE(hif, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes except the root class.
+ */
+static int
+hfsc_clear_interface(hif)
+ struct hfsc_if *hif;
+{
+ struct hfsc_class *cl;
+
+ /* free the filters for this interface */
+ acc_discard_filters(&hif->hif_classifier, NULL, 1);
+
+ /* clear out the classes */
+ while ((cl = hif->hif_rootclass->cl_children) != NULL) {
+ /*
+ * remove the first leaf class found in the hierarchy
+ * then start over
+ */
+ for (; cl != NULL; cl = hfsc_nextclass(cl)) {
+ if (!is_a_parent_class(cl)) {
+ (void)hfsc_class_destroy(cl);
+ break;
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+hfsc_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ hfsc_purge(hif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+hfsc_purge(hif)
+ struct hfsc_if *hif;
+{
+ struct hfsc_class *cl;
+
+ for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ hif->hif_ifq->ifq_len = 0;
+}
+
+struct hfsc_class *
+hfsc_class_create(hif, sc, parent, qlimit, flags)
+ struct hfsc_if *hif;
+ struct service_curve *sc;
+ struct hfsc_class *parent;
+ int qlimit, flags;
+{
+ struct hfsc_class *cl, *p;
+ int s;
+
+#ifndef ALTQ_RED
+ if (flags & HFCF_RED) {
+ printf("hfsc_class_create: RED not configured for HFSC!\n");
+ return (NULL);
+ }
+#endif
+
+ MALLOC(cl, struct hfsc_class *, sizeof(struct hfsc_class),
+ M_DEVBUF, M_WAITOK);
+ if (cl == NULL)
+ return (NULL);
+ bzero(cl, sizeof(struct hfsc_class));
+
+ MALLOC(cl->cl_q, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (cl->cl_q == NULL)
+ goto err_ret;
+ bzero(cl->cl_q, sizeof(class_queue_t));
+
+ cl->cl_actc = actlist_alloc();
+ if (cl->cl_actc == NULL)
+ goto err_ret;
+
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ qlimit(cl->cl_q) = qlimit;
+ qtype(cl->cl_q) = Q_DROPTAIL;
+ qlen(cl->cl_q) = 0;
+ cl->cl_flags = flags;
+#ifdef ALTQ_RED
+ if (flags & (HFCF_RED|HFCF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & HFCF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & HFCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (sc->m2 == 0)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (sc->m2 / 8);
+ if (flags & HFCF_RED) {
+ cl->cl_red = red_alloc(0, 0, 0, 0,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RED;
+ }
+#ifdef ALTQ_RIO
+ else {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RIO;
+ }
+#endif
+ }
+#endif /* ALTQ_RED */
+
+ if (sc != NULL && (sc->m1 != 0 || sc->m2 != 0)) {
+ MALLOC(cl->cl_rsc, struct internal_sc *,
+ sizeof(struct internal_sc), M_DEVBUF, M_WAITOK);
+ if (cl->cl_rsc == NULL)
+ goto err_ret;
+ bzero(cl->cl_rsc, sizeof(struct internal_sc));
+ sc2isc(sc, cl->cl_rsc);
+ rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
+ rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
+
+ MALLOC(cl->cl_fsc, struct internal_sc *,
+ sizeof(struct internal_sc), M_DEVBUF, M_WAITOK);
+ if (cl->cl_fsc == NULL)
+ goto err_ret;
+ bzero(cl->cl_fsc, sizeof(struct internal_sc));
+ sc2isc(sc, cl->cl_fsc);
+ rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
+ }
+
+ cl->cl_id = hif->hif_classid++;
+ cl->cl_handle = (u_long)cl; /* XXX: just a pointer to this class */
+ cl->cl_hif = hif;
+ cl->cl_parent = parent;
+
+ s = splimp();
+ hif->hif_classes++;
+ if (flags & HFCF_DEFAULTCLASS)
+ hif->hif_defaultclass = cl;
+
+ /* add this class to the children list of the parent */
+ if (parent == NULL) {
+ /* this is root class */
+ }
+ else if ((p = parent->cl_children) == NULL)
+ parent->cl_children = cl;
+ else {
+ while (p->cl_siblings != NULL)
+ p = p->cl_siblings;
+ p->cl_siblings = cl;
+ }
+ splx(s);
+
+ return (cl);
+
+ err_ret:
+ if (cl->cl_actc != NULL)
+ actlist_destroy(cl->cl_actc);
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+ if (cl->cl_fsc != NULL)
+ FREE(cl->cl_fsc, M_DEVBUF);
+ if (cl->cl_rsc != NULL)
+ FREE(cl->cl_rsc, M_DEVBUF);
+ if (cl->cl_q != NULL)
+ FREE(cl->cl_q, M_DEVBUF);
+ FREE(cl, M_DEVBUF);
+ return (NULL);
+}
+
+static int
+hfsc_class_destroy(cl)
+ struct hfsc_class *cl;
+{
+ int s;
+
+ if (is_a_parent_class(cl))
+ return (EBUSY);
+
+ s = splimp();
+
+ /* delete filters referencing to this class */
+ acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0);
+
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+
+ if (cl->cl_parent == NULL) {
+ /* this is root class */
+ } else {
+ struct hfsc_class *p = cl->cl_parent->cl_children;
+
+ if (p == cl)
+ cl->cl_parent->cl_children = cl->cl_siblings;
+ else do {
+ if (p->cl_siblings == cl) {
+ p->cl_siblings = cl->cl_siblings;
+ break;
+ }
+ } while ((p = p->cl_siblings) != NULL);
+ ASSERT(p != NULL);
+ }
+ cl->cl_hif->hif_classes--;
+ splx(s);
+
+ actlist_destroy(cl->cl_actc);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+ if (cl->cl_fsc != NULL)
+ FREE(cl->cl_fsc, M_DEVBUF);
+ if (cl->cl_rsc != NULL)
+ FREE(cl->cl_rsc, M_DEVBUF);
+ FREE(cl->cl_q, M_DEVBUF);
+ FREE(cl, M_DEVBUF);
+
+ return (0);
+}
+
+static int
+hfsc_class_modify(cl, rsc, fsc)
+ struct hfsc_class *cl;
+ struct service_curve *rsc, *fsc;
+{
+ struct internal_sc *tmp;
+ int s;
+
+ s = splimp();
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+
+ if (rsc != NULL) {
+ if (rsc->m1 == 0 && rsc->m2 == 0) {
+ if (cl->cl_rsc != NULL) {
+ FREE(cl->cl_rsc, M_DEVBUF);
+ cl->cl_rsc = NULL;
+ }
+ } else {
+ if (cl->cl_rsc == NULL) {
+ MALLOC(tmp, struct internal_sc *,
+ sizeof(struct internal_sc),
+ M_DEVBUF, M_WAITOK);
+ if (tmp == NULL) {
+ splx(s);
+ return (ENOMEM);
+ }
+ cl->cl_rsc = tmp;
+ }
+ bzero(cl->cl_rsc, sizeof(struct internal_sc));
+ sc2isc(rsc, cl->cl_rsc);
+ rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
+ rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
+ }
+ }
+
+ if (fsc != NULL) {
+ if (fsc->m1 == 0 && fsc->m2 == 0) {
+ if (cl->cl_fsc != NULL) {
+ FREE(cl->cl_fsc, M_DEVBUF);
+ cl->cl_fsc = NULL;
+ }
+ } else {
+ if (cl->cl_fsc == NULL) {
+ MALLOC(tmp, struct internal_sc *,
+ sizeof(struct internal_sc),
+ M_DEVBUF, M_WAITOK);
+ if (tmp == NULL) {
+ splx(s);
+ return (ENOMEM);
+ }
+ cl->cl_fsc = tmp;
+ }
+ bzero(cl->cl_fsc, sizeof(struct internal_sc));
+ sc2isc(fsc, cl->cl_fsc);
+ rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
+ }
+ }
+ splx(s);
+
+ return (0);
+}
+
+/*
+ * hfsc_nextclass returns the next class in the tree.
+ * usage:
+ * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ * do_something;
+ */
+static struct hfsc_class *
+hfsc_nextclass(cl)
+ struct hfsc_class *cl;
+{
+ if (cl->cl_children != NULL)
+ cl = cl->cl_children;
+ else if (cl->cl_siblings != NULL)
+ cl = cl->cl_siblings;
+ else {
+ while ((cl = cl->cl_parent) != NULL)
+ if (cl->cl_siblings) {
+ cl = cl->cl_siblings;
+ break;
+ }
+ }
+
+ return (cl);
+}
+
+/*
+ * hfsc_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+hfsc_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+ struct hfsc_class *cl;
+ int len;
+
+ /* grab class set by classifier */
+ if (pktattr == NULL || (cl = pktattr->pattr_class) == NULL)
+ cl = hif->hif_defaultclass;
+ cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */
+
+ len = m_pktlen(m);
+ if (hfsc_addq(cl, m) != 0) {
+ /* drop occurred. mbuf was freed in hfsc_addq. */
+ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+ cl->cl_hif->hif_packets++;
+
+ /* successfully queued. */
+ if (qlen(cl->cl_q) == 1)
+ set_active(cl, m_pktlen(m));
+
+#ifdef HFSC_PKTLOG
+ /* put the logging_hook here */
+#endif
+ return (0);
+}
+
+/*
+ * hfsc_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+hfsc_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+ struct hfsc_class *cl;
+ struct mbuf *m;
+ int len, next_len;
+ int realtime = 0;
+
+ if (hif->hif_packets == 0)
+ /* no packet in the tree */
+ return (NULL);
+
+ if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) {
+ u_int64_t cur_time;
+
+ cl = hif->hif_pollcache;
+ hif->hif_pollcache = NULL;
+ /* check if the class was scheduled by real-time criteria */
+ if (cl->cl_rsc != NULL) {
+ cur_time = read_machclk();
+ realtime = (cl->cl_e <= cur_time);
+ }
+ } else {
+ /*
+ * if there are eligible classes, use real-time criteria.
+ * find the class with the minimum deadline among
+ * the eligible classes.
+ */
+ if ((cl = ellist_get_mindl(hif->hif_eligible)) != NULL) {
+ realtime = 1;
+ } else {
+ /*
+ * use link-sharing criteria
+ * get the class with the minimum vt in the hierarchy
+ */
+ cl = hif->hif_rootclass;
+ while (is_a_parent_class(cl)) {
+ cl = actlist_first(cl->cl_actc);
+ if (cl == NULL)
+ return (NULL);
+ }
+ }
+
+ if (op == ALTDQ_POLL) {
+ hif->hif_pollcache = cl;
+ m = hfsc_pollq(cl);
+ return (m);
+ }
+ }
+
+ m = hfsc_getq(cl);
+ len = m_pktlen(m);
+ cl->cl_hif->hif_packets--;
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len);
+
+ update_v(cl, len);
+ if (realtime)
+ cl->cl_cumul += len;
+
+ if (!qempty(cl->cl_q)) {
+ if (cl->cl_rsc != NULL) {
+ /* update ed */
+ next_len = m_pktlen(qhead(cl->cl_q));
+
+ if (realtime)
+ update_ed(cl, next_len);
+ else
+ update_d(cl, next_len);
+ }
+ } else {
+ /* the class becomes passive */
+ set_passive(cl);
+ }
+
+#ifdef HFSC_PKTLOG
+ /* put the logging_hook here */
+#endif
+
+ return (m);
+}
+
+static int
+hfsc_addq(cl, m)
+ struct hfsc_class *cl;
+ struct mbuf *m;
+{
+
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_addq((rio_t *)cl->cl_red, cl->cl_q,
+ m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+ if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & HFCF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(cl->cl_q, m);
+
+ return (0);
+}
+
+static struct mbuf *
+hfsc_getq(cl)
+ struct hfsc_class *cl;
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_getq(cl->cl_red, cl->cl_q);
+#endif
+ return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+hfsc_pollq(cl)
+ struct hfsc_class *cl;
+{
+ return qhead(cl->cl_q);
+}
+
+static void
+hfsc_purgeq(cl)
+ struct hfsc_class *cl;
+{
+ struct mbuf *m;
+
+ if (qempty(cl->cl_q))
+ return;
+
+ while ((m = _getq(cl->cl_q)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m));
+ m_freem(m);
+ }
+ ASSERT(qlen(cl->cl_q) == 0);
+
+ set_passive(cl);
+}
+
+static void
+set_active(cl, len)
+ struct hfsc_class *cl;
+ int len;
+{
+ if (cl->cl_rsc != NULL)
+ init_ed(cl, len);
+ if (cl->cl_fsc != NULL)
+ init_v(cl, len);
+
+ cl->cl_stats.period++;
+}
+
+static void
+set_passive(cl)
+ struct hfsc_class *cl;
+{
+ if (cl->cl_rsc != NULL)
+ ellist_remove(cl);
+
+ if (cl->cl_fsc != NULL) {
+ while (cl->cl_parent != NULL) {
+ if (--cl->cl_nactive == 0) {
+ /* remove this class from the vt list */
+ actlist_remove(cl);
+ } else
+ /* still has active children */
+ break;
+
+ /* go up to the parent class */
+ cl = cl->cl_parent;
+ }
+ }
+}
+
+static void
+init_ed(cl, next_len)
+ struct hfsc_class *cl;
+ int next_len;
+{
+ u_int64_t cur_time;
+
+ cur_time = read_machclk();
+
+ /* update the deadline curve */
+ rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
+
+ /*
+ * update the eligible curve.
+ * for concave, it is equal to the deadline curve.
+ * for convex, it is a linear curve with slope m2.
+ */
+ cl->cl_eligible = cl->cl_deadline;
+ if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+ cl->cl_eligible.dx = 0;
+ cl->cl_eligible.dy = 0;
+ }
+
+ /* compute e and d */
+ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+ ellist_insert(cl);
+}
+
+static void
+update_ed(cl, next_len)
+ struct hfsc_class *cl;
+ int next_len;
+{
+ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+ ellist_update(cl);
+}
+
+static void
+update_d(cl, next_len)
+ struct hfsc_class *cl;
+ int next_len;
+{
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static void
+init_v(cl, len)
+ struct hfsc_class *cl;
+ int len;
+{
+ struct hfsc_class *min_cl, *max_cl;
+
+ while (cl->cl_parent != NULL) {
+
+ if (cl->cl_nactive++ > 0)
+ /* already active */
+ break;
+
+ min_cl = actlist_first(cl->cl_parent->cl_actc);
+ if (min_cl != NULL) {
+ u_int64_t vt;
+
+ /*
+ * set vt to the average of the min and max classes.
+ * if the parent's period didn't change,
+ * don't decrease vt of the class.
+ */
+ max_cl = actlist_last(cl->cl_parent->cl_actc);
+ vt = (min_cl->cl_vt + max_cl->cl_vt) / 2;
+ if (cl->cl_parent->cl_vtperiod == cl->cl_parentperiod)
+ vt = max(cl->cl_vt, vt);
+ cl->cl_vt = vt;
+ } else {
+ /* no packet is backlogged. set vt to 0 */
+ cl->cl_vt = 0;
+ }
+
+ /* update the virtual curve */
+ rtsc_min(&cl->cl_virtual, cl->cl_fsc,
+ cl->cl_vt, cl->cl_total);
+
+ cl->cl_vtperiod++; /* increment vt period */
+ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+ if (cl->cl_parent->cl_nactive == 0)
+ cl->cl_parentperiod++;
+
+ actlist_insert(cl);
+
+ /* go up to the parent class */
+ cl = cl->cl_parent;
+ }
+}
+
+static void
+update_v(cl, len)
+ struct hfsc_class *cl;
+ int len;
+{
+ while (cl->cl_parent != NULL) {
+
+ cl->cl_total += len;
+
+ if (cl->cl_fsc != NULL) {
+ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total);
+
+ /* update the vt list */
+ actlist_update(cl);
+ }
+
+ /* go up to the parent class */
+ cl = cl->cl_parent;
+ }
+}
+
+/*
+ * TAILQ based ellist and actlist implementation
+ * (ion wanted to make a calendar queue based implementation)
+ */
+/*
+ * eligible list holds backlogged classes being sorted by their eligible times.
+ * there is one eligible list per interface.
+ */
+
+static ellist_t *
+ellist_alloc()
+{
+ ellist_t *head;
+
+ MALLOC(head, ellist_t *, sizeof(ellist_t), M_DEVBUF, M_WAITOK);
+ TAILQ_INIT(head);
+ return (head);
+}
+
+static void
+ellist_destroy(head)
+ ellist_t *head;
+{
+ FREE(head, M_DEVBUF);
+}
+
+static void
+ellist_insert(cl)
+ struct hfsc_class *cl;
+{
+ struct hfsc_if *hif = cl->cl_hif;
+ struct hfsc_class *p;
+
+ /* check the last entry first */
+ if ((p = TAILQ_LAST(hif->hif_eligible, _eligible)) == NULL ||
+ p->cl_e <= cl->cl_e) {
+ TAILQ_INSERT_TAIL(hif->hif_eligible, cl, cl_ellist);
+ return;
+ }
+
+ TAILQ_FOREACH(p, hif->hif_eligible, cl_ellist) {
+ if (cl->cl_e < p->cl_e) {
+ TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static void
+ellist_remove(cl)
+ struct hfsc_class *cl;
+{
+ struct hfsc_if *hif = cl->cl_hif;
+
+ TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+}
+
+static void
+ellist_update(cl)
+ struct hfsc_class *cl;
+{
+ struct hfsc_if *hif = cl->cl_hif;
+ struct hfsc_class *p, *last;
+
+ /*
+ * the eligible time of a class increases monotonically.
+ * if the next entry has a larger eligible time, nothing to do.
+ */
+ p = TAILQ_NEXT(cl, cl_ellist);
+ if (p == NULL || cl->cl_e <= p->cl_e)
+ return;
+
+ /* check the last entry */
+ last = TAILQ_LAST(hif->hif_eligible, _eligible);
+ ASSERT(last != NULL);
+ if (last->cl_e <= cl->cl_e) {
+ TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+ TAILQ_INSERT_TAIL(hif->hif_eligible, cl, cl_ellist);
+ return;
+ }
+
+ /*
+ * the new position must be between the next entry
+ * and the last entry
+ */
+ while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
+ if (cl->cl_e < p->cl_e) {
+ TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+ TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+struct hfsc_class *
+ellist_get_mindl(head)
+ ellist_t *head;
+{
+ struct hfsc_class *p, *cl = NULL;
+ u_int64_t cur_time;
+
+ cur_time = read_machclk();
+
+ TAILQ_FOREACH(p, head, cl_ellist) {
+ if (p->cl_e > cur_time)
+ break;
+ if (cl == NULL || p->cl_d < cl->cl_d)
+ cl = p;
+ }
+ return (cl);
+}
+
+/*
+ * active children list holds backlogged child classes being sorted
+ * by their virtual time.
+ * each intermediate class has one active children list.
+ */
+static actlist_t *
+actlist_alloc()
+{
+ actlist_t *head;
+
+ MALLOC(head, actlist_t *, sizeof(actlist_t), M_DEVBUF, M_WAITOK);
+ TAILQ_INIT(head);
+ return (head);
+}
+
+static void
+actlist_destroy(head)
+ actlist_t *head;
+{
+ FREE(head, M_DEVBUF);
+}
+static void
+actlist_insert(cl)
+ struct hfsc_class *cl;
+{
+ struct hfsc_class *p;
+
+ /* check the last entry first */
+ if ((p = TAILQ_LAST(cl->cl_parent->cl_actc, _active)) == NULL
+ || p->cl_vt <= cl->cl_vt) {
+ TAILQ_INSERT_TAIL(cl->cl_parent->cl_actc, cl, cl_actlist);
+ return;
+ }
+
+ TAILQ_FOREACH(p, cl->cl_parent->cl_actc, cl_actlist) {
+ if (cl->cl_vt < p->cl_vt) {
+ TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static void
+actlist_remove(cl)
+ struct hfsc_class *cl;
+{
+ TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+}
+
+static void
+actlist_update(cl)
+ struct hfsc_class *cl;
+{
+ struct hfsc_class *p, *last;
+
+ /*
+ * the virtual time of a class increases monotonically during its
+ * backlogged period.
+ * if the next entry has a larger virtual time, nothing to do.
+ */
+ p = TAILQ_NEXT(cl, cl_actlist);
+ if (p == NULL || cl->cl_vt <= p->cl_vt)
+ return;
+
+ /* check the last entry */
+ last = TAILQ_LAST(cl->cl_parent->cl_actc, _active);
+ ASSERT(last != NULL);
+ if (last->cl_vt <= cl->cl_vt) {
+ TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_INSERT_TAIL(cl->cl_parent->cl_actc, cl, cl_actlist);
+ return;
+ }
+
+ /*
+ * the new position must be between the next entry
+ * and the last entry
+ */
+ while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
+ if (cl->cl_vt < p->cl_vt) {
+ TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+/*
+ * service curve support functions
+ *
+ * external service curve parameters
+ * m: bits/sec
+ * d: msec
+ * internal service curve parameters
+ * sm: (bytes/tsc_interval) << SM_SHIFT
+ * ism: (tsc_count/byte) << ISM_SHIFT
+ * dx: tsc_count
+ *
+ * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
+ * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
+ * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
+ * digits in decimal using the following table.
+ *
+ * bits/set 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
+ * ----------+-------------------------------------------------------
+ * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
+ * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
+ * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
+ *
+ * nsec/byte 80000 8000 800 80 8
+ * ism(500MHz) 40000 4000 400 40 4
+ * ism(200MHz) 16000 1600 160 16 1.6
+ */
+#define SM_SHIFT 24
+#define ISM_SHIFT 10
+
+#define SC_LARGEVAL (1LL << 32)
+#define SC_INFINITY 0xffffffffffffffffLL
+
+static __inline u_int64_t
+seg_x2y(x, sm)
+ u_int64_t x;
+ u_int64_t sm;
+{
+ u_int64_t y;
+
+ if (x < SC_LARGEVAL)
+ y = x * sm >> SM_SHIFT;
+ else
+ y = (x >> SM_SHIFT) * sm;
+ return (y);
+}
+
+static __inline u_int64_t
+seg_y2x(y, ism)
+ u_int64_t y;
+ u_int64_t ism;
+{
+ u_int64_t x;
+
+ if (y == 0)
+ x = 0;
+ else if (ism == SC_INFINITY)
+ x = SC_INFINITY;
+ else if (y < SC_LARGEVAL)
+ x = y * ism >> ISM_SHIFT;
+ else
+ x = (y >> ISM_SHIFT) * ism;
+ return (x);
+}
+
+static __inline u_int64_t
+m2sm(m)
+ u_int m;
+{
+ u_int64_t sm;
+
+ sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq;
+ return (sm);
+}
+
+static __inline u_int64_t
+m2ism(m)
+ u_int m;
+{
+ u_int64_t ism;
+
+ if (m == 0)
+ ism = SC_INFINITY;
+ else
+ ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m;
+ return (ism);
+}
+
+static __inline u_int64_t
+d2dx(d)
+ u_int d;
+{
+ u_int64_t dx;
+
+ dx = ((u_int64_t)d * machclk_freq) / 1000;
+ return (dx);
+}
+
+static u_int
+sm2m(sm)
+ u_int64_t sm;
+{
+ u_int64_t m;
+
+ m = (sm * 8 * machclk_freq) >> SM_SHIFT;
+ return ((u_int)m);
+}
+
+static u_int
+dx2d(dx)
+ u_int64_t dx;
+{
+ u_int64_t d;
+
+ d = dx * 1000 / machclk_freq;
+ return ((u_int)d);
+}
+
+static void
+sc2isc(sc, isc)
+ struct service_curve *sc;
+ struct internal_sc *isc;
+{
+ isc->sm1 = m2sm(sc->m1);
+ isc->ism1 = m2ism(sc->m1);
+ isc->dx = d2dx(sc->d);
+ isc->dy = seg_x2y(isc->dx, isc->sm1);
+ isc->sm2 = m2sm(sc->m2);
+ isc->ism2 = m2ism(sc->m2);
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(rtsc, isc, x, y)
+ struct runtime_sc *rtsc;
+ struct internal_sc *isc;
+ u_int64_t x, y;
+{
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->sm1 = isc->sm1;
+ rtsc->ism1 = isc->ism1;
+ rtsc->dx = isc->dx;
+ rtsc->dy = isc->dy;
+ rtsc->sm2 = isc->sm2;
+ rtsc->ism2 = isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u_int64_t
+rtsc_y2x(rtsc, y)
+ struct runtime_sc *rtsc;
+ u_int64_t y;
+{
+ u_int64_t x;
+
+ if (y < rtsc->y)
+ x = rtsc->x;
+ else if (y <= rtsc->y + rtsc->dy) {
+ /* x belongs to the 1st segment */
+ if (rtsc->dy == 0)
+ x = rtsc->x + rtsc->dx;
+ else
+ x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+ } else {
+ /* x belongs to the 2nd segment */
+ x = rtsc->x + rtsc->dx
+ + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+ }
+ return (x);
+}
+
+static u_int64_t
+rtsc_x2y(rtsc, x)
+ struct runtime_sc *rtsc;
+ u_int64_t x;
+{
+ u_int64_t y;
+
+ if (x <= rtsc->x)
+ y = rtsc->y;
+ else if (x <= rtsc->x + rtsc->dx)
+ /* y belongs to the 1st segment */
+ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+ else
+ /* y belongs to the 2nd segment */
+ y = rtsc->y + rtsc->dy
+ + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+ return (y);
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(rtsc, isc, x, y)
+ struct runtime_sc *rtsc;
+ struct internal_sc *isc;
+ u_int64_t x, y;
+{
+ u_int64_t y1, y2, dx, dy;
+
+ if (isc->sm1 <= isc->sm2) {
+ /* service curve is convex */
+ y1 = rtsc_x2y(rtsc, x);
+ if (y1 < y)
+ /* the current rtsc is smaller */
+ return;
+ rtsc->x = x;
+ rtsc->y = y;
+ return;
+ }
+
+ /*
+ * service curve is concave
+ * compute the two y values of the current rtsc
+ * y1: at x
+ * y2: at (x + dx)
+ */
+ y1 = rtsc_x2y(rtsc, x);
+ if (y1 <= y) {
+ /* rtsc is below isc, no change to rtsc */
+ return;
+ }
+
+ y2 = rtsc_x2y(rtsc, x + isc->dx);
+ if (y2 >= y + isc->dy) {
+ /* rtsc is above isc, replace rtsc by isc */
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->dx = isc->dx;
+ rtsc->dy = isc->dy;
+ return;
+ }
+
+ /*
+ * the two curves intersect
+ * compute the offsets (dx, dy) using the reverse
+ * function of seg_x2y()
+ * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+ */
+ dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
+ /*
+ * check if (x, y1) belongs to the 1st segment of rtsc.
+ * if so, add the offset.
+ */
+ if (rtsc->x + rtsc->dx > x)
+ dx += rtsc->x + rtsc->dx - x;
+ dy = seg_x2y(dx, isc->sm1);
+
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->dx = dx;
+ rtsc->dy = dy;
+ return;
+}
+
+/*
+ * hfsc device interface
+ */
+int
+hfscopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ if (machclk_freq == 0)
+ init_machclk();
+
+ if (machclk_freq == 0) {
+ printf("hfsc: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+hfscclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ struct hfsc_if *hif;
+ int err, error = 0;
+
+ while ((hif = hif_list) != NULL) {
+ /* destroy all */
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ altq_disable(hif->hif_ifq);
+
+ err = altq_detach(hif->hif_ifq);
+ if (err == 0)
+ err = hfsc_detach(hif);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+hfscioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ struct hfsc_if *hif;
+ struct hfsc_interface *ifacep;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case HFSC_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case HFSC_IF_ATTACH:
+ error = hfsccmd_if_attach((struct hfsc_attach *)addr);
+ break;
+
+ case HFSC_IF_DETACH:
+ error = hfsccmd_if_detach((struct hfsc_interface *)addr);
+ break;
+
+ case HFSC_ENABLE:
+ case HFSC_DISABLE:
+ case HFSC_CLEAR_HIERARCHY:
+ ifacep = (struct hfsc_interface *)addr;
+ if ((hif = altq_lookup(ifacep->hfsc_ifname,
+ ALTQT_HFSC)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+
+ case HFSC_ENABLE:
+ if (hif->hif_defaultclass == NULL) {
+#if 1
+ printf("hfsc: no default class\n");
+#endif
+ error = EINVAL;
+ break;
+ }
+ error = altq_enable(hif->hif_ifq);
+ break;
+
+ case HFSC_DISABLE:
+ error = altq_disable(hif->hif_ifq);
+ break;
+
+ case HFSC_CLEAR_HIERARCHY:
+ hfsc_clear_interface(hif);
+ break;
+ }
+ break;
+
+ case HFSC_ADD_CLASS:
+ error = hfsccmd_add_class((struct hfsc_add_class *)addr);
+ break;
+
+ case HFSC_DEL_CLASS:
+ error = hfsccmd_delete_class((struct hfsc_delete_class *)addr);
+ break;
+
+ case HFSC_MOD_CLASS:
+ error = hfsccmd_modify_class((struct hfsc_modify_class *)addr);
+ break;
+
+ case HFSC_ADD_FILTER:
+ error = hfsccmd_add_filter((struct hfsc_add_filter *)addr);
+ break;
+
+ case HFSC_DEL_FILTER:
+ error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr);
+ break;
+
+ case HFSC_GETSTATS:
+ error = hfsccmd_class_stats((struct hfsc_class_stats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+hfsccmd_if_attach(ap)
+ struct hfsc_attach *ap;
+{
+ struct hfsc_if *hif;
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL)
+ return (ENXIO);
+
+ if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL)
+ return (ENOMEM);
+
+ /*
+ * set HFSC to this ifnet structure.
+ */
+ if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif,
+ hfsc_enqueue, hfsc_dequeue, hfsc_request,
+ &hif->hif_classifier, acc_classify)) != 0)
+ (void)hfsc_detach(hif);
+
+ return (error);
+}
+
+static int
+hfsccmd_if_detach(ap)
+ struct hfsc_interface *ap;
+{
+ struct hfsc_if *hif;
+ int error;
+
+ if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ altq_disable(hif->hif_ifq);
+
+ if ((error = altq_detach(hif->hif_ifq)))
+ return (error);
+
+ return hfsc_detach(hif);
+}
+
+static int
+hfsccmd_add_class(ap)
+ struct hfsc_add_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl, *parent;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL) {
+ if (ap->parent_handle == HFSC_ROOTCLASS_HANDLE)
+ parent = hif->hif_rootclass;
+ else
+ return (EINVAL);
+ }
+
+ if ((cl = hfsc_class_create(hif, &ap->service_curve, parent,
+ ap->qlimit, ap->flags)) == NULL)
+ return (ENOMEM);
+
+ /* return a class handle to the user */
+ ap->class_handle = clp_to_clh(cl);
+ return (0);
+}
+
+static int
+hfsccmd_delete_class(ap)
+ struct hfsc_delete_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return hfsc_class_destroy(cl);
+}
+
+static int
+hfsccmd_modify_class(ap)
+ struct hfsc_modify_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct service_curve *rsc = NULL;
+ struct service_curve *fsc = NULL;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ if (ap->sctype & HFSC_REALTIMESC)
+ rsc = &ap->service_curve;
+ if (ap->sctype & HFSC_LINKSHARINGSC)
+ fsc = &ap->service_curve;
+
+ return hfsc_class_modify(cl, rsc, fsc);
+}
+
+static int
+hfsccmd_add_filter(ap)
+ struct hfsc_add_filter *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ if (is_a_parent_class(cl)) {
+#if 1
+ printf("hfsccmd_add_filter: not a leaf class!\n");
+#endif
+ return (EINVAL);
+ }
+
+ return acc_add_filter(&hif->hif_classifier, &ap->filter,
+ cl, &ap->filter_handle);
+}
+
+static int
+hfsccmd_delete_filter(ap)
+ struct hfsc_delete_filter *ap;
+{
+ struct hfsc_if *hif;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&hif->hif_classifier,
+ ap->filter_handle);
+}
+
+static int
+hfsccmd_class_stats(ap)
+ struct hfsc_class_stats *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct class_stats stats, *usp;
+ int n, nclasses, error;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ ap->cur_time = read_machclk();
+ ap->hif_classes = hif->hif_classes;
+ ap->hif_packets = hif->hif_packets;
+
+ /* skip the first N classes in the tree */
+ nclasses = ap->nskip;
+ for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses;
+ cl = hfsc_nextclass(cl), n++)
+ ;
+ if (n != nclasses)
+ return (EINVAL);
+
+ /* then, read the next N classes in the tree */
+ nclasses = ap->nclasses;
+ usp = ap->stats;
+ for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) {
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+
+ ap->nclasses = n;
+
+ return (0);
+}
+
+static void get_class_stats(sp, cl)
+ struct class_stats *sp;
+ struct hfsc_class *cl;
+{
+ sp->class_id = cl->cl_id;
+ sp->class_handle = clp_to_clh(cl);
+
+ if (cl->cl_rsc != NULL) {
+ sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
+ sp->rsc.d = dx2d(cl->cl_rsc->dx);
+ sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
+ } else {
+ sp->rsc.m1 = 0;
+ sp->rsc.d = 0;
+ sp->rsc.m2 = 0;
+ }
+ if (cl->cl_fsc != NULL) {
+ sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
+ sp->fsc.d = dx2d(cl->cl_fsc->dx);
+ sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
+ } else {
+ sp->fsc.m1 = 0;
+ sp->fsc.d = 0;
+ sp->fsc.m2 = 0;
+ }
+
+ sp->total = cl->cl_total;
+ sp->cumul = cl->cl_cumul;
+
+ sp->d = cl->cl_d;
+ sp->e = cl->cl_e;
+ sp->vt = cl->cl_vt;
+
+ sp->qlength = qlen(cl->cl_q);
+ sp->xmit_cnt = cl->cl_stats.xmit_cnt;
+ sp->drop_cnt = cl->cl_stats.drop_cnt;
+ sp->period = cl->cl_stats.period;
+
+ sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct hfsc_class *
+clh_to_clp(hif, chandle)
+ struct hfsc_if *hif;
+ u_long chandle;
+{
+ struct hfsc_class *cl;
+
+ cl = (struct hfsc_class *)chandle;
+ if (chandle != ALIGN(cl)) {
+#if 1
+ printf("clh_to_cl: unaligned pointer %p\n", cl);
+#endif
+ return (NULL);
+ }
+
+ if (cl == NULL || cl->cl_handle != chandle || cl->cl_hif != hif)
+ return (NULL);
+
+ return (cl);
+}
+
+/* convert a class pointer to the corresponding class handle */
+static u_long
+clp_to_clh(cl)
+ struct hfsc_class *cl;
+{
+ if (cl->cl_parent == NULL)
+ return (HFSC_ROOTCLASS_HANDLE); /* XXX */
+ return (cl->cl_handle);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw hfsc_sw =
+ {"hfsc", hfscopen, hfscclose, hfscioctl};
+
+ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ_HFSC */
diff --git a/sys/altq/altq_hfsc.h b/sys/altq/altq_hfsc.h
new file mode 100644
index 00000000000..edcac261a5e
--- /dev/null
+++ b/sys/altq/altq_hfsc.h
@@ -0,0 +1,279 @@
+/* $OpenBSD: altq_hfsc.h,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_hfsc.h,v 1.6 2000/12/14 08:12:46 thorpej Exp $ */
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof, and that
+ * both notices appear in supporting documentation, and that credit
+ * is given to Carnegie Mellon University in all publications reporting
+ * on direct or indirect use of this code or its derivatives.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+#ifndef _ALTQ_ALTQ_HFSC_H_
+#define _ALTQ_ALTQ_HFSC_H_
+
+#include <altq/altq.h>
+#include <altq/altq_classq.h>
+#include <altq/altq_red.h>
+#include <altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct hfsc_interface {
+ char hfsc_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+};
+
+struct hfsc_attach {
+ struct hfsc_interface iface;
+ u_int bandwidth; /* link bandwidth in bits/sec */
+};
+
+struct service_curve {
+ u_int m1; /* slope of the first segment in bits/sec */
+ u_int d; /* the x-projection of the first segment in msec */
+ u_int m2; /* slope of the second segment in bits/sec */
+};
+
+struct hfsc_add_class {
+ struct hfsc_interface iface;
+ u_long parent_handle;
+ struct service_curve service_curve;
+ int qlimit;
+ int flags;
+
+ u_long class_handle; /* return value */
+};
+
+/* special class handles */
+#define HFSC_ROOTCLASS_HANDLE 0
+#define HFSC_NULLCLASS_HANDLE 0
+
+/* hfsc class flags */
+#define HFCF_RED 0x0001 /* use RED */
+#define HFCF_ECN 0x0002 /* use RED/ECN */
+#define HFCF_RIO 0x0004 /* use RIO */
+#define HFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define HFCF_DEFAULTCLASS 0x1000 /* default class */
+
+struct hfsc_delete_class {
+ struct hfsc_interface iface;
+ u_long class_handle;
+};
+
+/* service curve types */
+#define HFSC_REALTIMESC 1
+#define HFSC_LINKSHARINGSC 2
+#define HFSC_DEFAULTSC (HFSC_REALTIMESC|HFSC_LINKSHARINGSC)
+
+struct hfsc_modify_class {
+ struct hfsc_interface iface;
+ u_long class_handle;
+ struct service_curve service_curve;
+ int sctype;
+};
+
+struct hfsc_add_filter {
+ struct hfsc_interface iface;
+ u_long class_handle;
+ struct flow_filter filter;
+
+ u_long filter_handle; /* return value */
+};
+
+struct hfsc_delete_filter {
+ struct hfsc_interface iface;
+ u_long filter_handle;
+};
+
+struct class_stats {
+ u_int class_id;
+ u_long class_handle;
+ struct service_curve rsc;
+ struct service_curve fsc;
+
+ u_int64_t total; /* total work in bytes */
+ u_int64_t cumul; /* cumulative work in bytes
+ done by real-time criteria */
+ u_int64_t d; /* deadline */
+ u_int64_t e; /* eligible time */
+ u_int64_t vt; /* virtual time */
+
+ u_int qlength;
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+
+ /* red and rio related info */
+ int qtype;
+ struct redstats red[3];
+};
+
+struct hfsc_class_stats {
+ struct hfsc_interface iface;
+ int nskip; /* skip # of classes */
+ int nclasses; /* # of class stats (WR) */
+ u_int64_t cur_time; /* current time */
+ u_int hif_classes; /* # of classes in the tree */
+ u_int hif_packets; /* # of packets in the tree */
+ struct class_stats *stats; /* pointer to stats array */
+};
+
+#define HFSC_IF_ATTACH _IOW('Q', 1, struct hfsc_attach)
+#define HFSC_IF_DETACH _IOW('Q', 2, struct hfsc_interface)
+#define HFSC_ENABLE _IOW('Q', 3, struct hfsc_interface)
+#define HFSC_DISABLE _IOW('Q', 4, struct hfsc_interface)
+#define HFSC_CLEAR_HIERARCHY _IOW('Q', 5, struct hfsc_interface)
+#define HFSC_ADD_CLASS _IOWR('Q', 7, struct hfsc_add_class)
+#define HFSC_DEL_CLASS _IOW('Q', 8, struct hfsc_delete_class)
+#define HFSC_MOD_CLASS _IOW('Q', 9, struct hfsc_modify_class)
+#define HFSC_ADD_FILTER _IOWR('Q', 10, struct hfsc_add_filter)
+#define HFSC_DEL_FILTER _IOW('Q', 11, struct hfsc_delete_filter)
+#define HFSC_GETSTATS _IOWR('Q', 12, struct hfsc_class_stats)
+
+#ifdef _KERNEL
+/*
+ * kernel internal service curve representation
+ * coordinates are given by 64 bit unsigned integers.
+ * x-axis: unit is clock count. for the intel x86 architecture,
+ * the raw Pentium TSC (Timestamp Counter) value is used.
+ * virtual time is also calculated in this time scale.
+ * y-axis: unit is byte.
+ *
+ * the service curve parameters are converted to the internal
+ * representation.
+ * the slope values are scaled to avoid overflow.
+ * the inverse slope values as well as the y-projection of the 1st
+ * segment are kept in order to to avoid 64-bit divide operations
+ * that are expensive on 32-bit architectures.
+ *
+ * note: Intel Pentium TSC never wraps around in several thousands of years.
+ * x-axis doesn't wrap around for 1089 years with 1GHz clock.
+ * y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
+ */
+
+/* kernel internal representation of a service curve */
+struct internal_sc {
+ u_int64_t sm1; /* scaled slope of the 1st segment */
+ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
+ u_int64_t dx; /* the x-projection of the 1st segment */
+ u_int64_t dy; /* the y-projection of the 1st segment */
+ u_int64_t sm2; /* scaled slope of the 2nd segment */
+ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc {
+ u_int64_t x; /* current starting position on x-axis */
+ u_int64_t y; /* current starting position on x-axis */
+ u_int64_t sm1; /* scaled slope of the 1st segment */
+ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
+ u_int64_t dx; /* the x-projection of the 1st segment */
+ u_int64_t dy; /* the y-projection of the 1st segment */
+ u_int64_t sm2; /* scaled slope of the 2nd segment */
+ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
+};
+
+/* for TAILQ based ellist and actlist implementation */
+struct hfsc_class;
+typedef TAILQ_HEAD(_eligible, hfsc_class) ellist_t;
+typedef TAILQ_ENTRY(hfsc_class) elentry_t;
+typedef TAILQ_HEAD(_active, hfsc_class) actlist_t;
+typedef TAILQ_ENTRY(hfsc_class) actentry_t;
+#define ellist_first(s) TAILQ_FIRST(s)
+#define actlist_first(s) TAILQ_FIRST(s)
+#define actlist_last(s) TAILQ_LAST(s, _active)
+
+struct hfsc_class {
+ u_int cl_id; /* class id (just for debug) */
+ u_long cl_handle; /* class handle */
+ struct hfsc_if *cl_hif; /* back pointer to struct hfsc_if */
+ int cl_flags; /* misc flags */
+
+ struct hfsc_class *cl_parent; /* parent class */
+ struct hfsc_class *cl_siblings; /* sibling classes */
+ struct hfsc_class *cl_children; /* child classes */
+
+ class_queue_t *cl_q; /* class queue structure */
+ struct red *cl_red; /* RED state */
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ u_int64_t cl_total; /* total work in bytes */
+ u_int64_t cl_cumul; /* cumulative work in bytes
+ done by real-time criteria */
+ u_int64_t cl_d; /* deadline */
+ u_int64_t cl_e; /* eligible time */
+ u_int64_t cl_vt; /* virtual time */
+
+ struct internal_sc *cl_rsc; /* internal real-time service curve */
+ struct internal_sc *cl_fsc; /* internal fair service curve */
+ struct runtime_sc cl_deadline; /* deadline curve */
+ struct runtime_sc cl_eligible; /* eligible curve */
+ struct runtime_sc cl_virtual; /* virtual curve */
+
+ u_int cl_vtperiod; /* vt period sequence no */
+ u_int cl_parentperiod; /* parent's vt period seqno */
+ int cl_nactive; /* number of active children */
+ actlist_t *cl_actc; /* active children list */
+
+ actentry_t cl_actlist; /* active children list entry */
+ elentry_t cl_ellist; /* eligible list entry */
+
+ struct {
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+ } cl_stats;
+};
+
+/*
+ * hfsc interface state
+ */
+struct hfsc_if {
+ struct hfsc_if *hif_next; /* interface state list */
+ struct ifaltq *hif_ifq; /* backpointer to ifaltq */
+ struct hfsc_class *hif_rootclass; /* root class */
+ struct hfsc_class *hif_defaultclass; /* default class */
+ struct hfsc_class *hif_pollcache; /* cache for poll operation */
+
+ u_int hif_classes; /* # of classes in the tree */
+ u_int hif_packets; /* # of packets in the tree */
+ u_int hif_classid; /* class id sequence number */
+
+ ellist_t *hif_eligible; /* eligible list */
+
+ struct acc_classifier hif_classifier;
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_HFSC_H_ */
diff --git a/sys/altq/altq_localq.c b/sys/altq/altq_localq.c
new file mode 100644
index 00000000000..b8d70956dea
--- /dev/null
+++ b/sys/altq/altq_localq.c
@@ -0,0 +1,69 @@
+/* $OpenBSD: altq_localq.c,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_localq.c,v 1.3 2000/10/18 09:15:23 kjc Exp $ */
+
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_LOCALQ /* localq is enabled by ALTQ_LOCALQ option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+
+/*
+ * localq device interface
+ */
+altqdev_decl(localq);
+
+int
+localqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+localqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ int error = 0;
+
+ return error;
+}
+
+int
+localqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ int error = 0;
+
+ return error;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw localq_sw =
+ {"localq", localqopen, localqclose, localqioctl};
+
+ALTQ_MODULE(altq_localq, ALTQT_LOCALQ, &localq_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ_LOCALQ */
diff --git a/sys/altq/altq_priq.c b/sys/altq/altq_priq.c
new file mode 100644
index 00000000000..e3c50441fb2
--- /dev/null
+++ b/sys/altq/altq_priq.c
@@ -0,0 +1,866 @@
+/* $OpenBSD: altq_priq.c,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_priq.c,v 1.1 2000/10/18 09:15:23 kjc Exp $ */
+/*
+ * Copyright (C) 2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * priority queue
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_priq.h>
+
+/*
+ * function prototypes
+ */
+static struct priq_if *priq_attach __P((struct ifaltq *, u_int));
+static int priq_detach __P((struct priq_if *));
+static int priq_clear_interface __P((struct priq_if *));
+static int priq_request __P((struct ifaltq *, int, void *));
+static void priq_purge __P((struct priq_if *));
+static struct priq_class *priq_class_create __P((struct priq_if *,
+ int, int, int));
+static int priq_class_destroy __P((struct priq_class *));
+static int priq_enqueue __P((struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *));
+static struct mbuf *priq_dequeue __P((struct ifaltq *, int));
+
+static int priq_addq __P((struct priq_class *, struct mbuf *));
+static struct mbuf *priq_getq __P((struct priq_class *));
+static struct mbuf *priq_pollq __P((struct priq_class *));
+static void priq_purgeq __P((struct priq_class *));
+
+int priqopen __P((dev_t, int, int, struct proc *));
+int priqclose __P((dev_t, int, int, struct proc *));
+int priqioctl __P((dev_t, ioctlcmd_t, caddr_t, int, struct proc *));
+static int priqcmd_if_attach __P((struct priq_interface *));
+static int priqcmd_if_detach __P((struct priq_interface *));
+static int priqcmd_add_class __P((struct priq_add_class *));
+static int priqcmd_delete_class __P((struct priq_delete_class *));
+static int priqcmd_modify_class __P((struct priq_modify_class *));
+static int priqcmd_add_filter __P((struct priq_add_filter *));
+static int priqcmd_delete_filter __P((struct priq_delete_filter *));
+static int priqcmd_class_stats __P((struct priq_class_stats *));
+static void get_class_stats __P((struct class_stats *, struct priq_class *));
+static struct priq_class *clh_to_clp __P((struct priq_if *, u_long));
+static u_long clp_to_clh __P((struct priq_class *));
+
+/* pif_list keeps all priq_if's allocated. */
+static struct priq_if *pif_list = NULL;
+
+static struct priq_if *
+priq_attach(ifq, bandwidth)
+ struct ifaltq *ifq;
+ u_int bandwidth;
+{
+ struct priq_if *pif;
+
+ MALLOC(pif, struct priq_if *, sizeof(struct priq_if),
+ M_DEVBUF, M_WAITOK);
+ if (pif == NULL)
+ return (NULL);
+ bzero(pif, sizeof(struct priq_if));
+ pif->pif_bandwidth = bandwidth;
+ pif->pif_maxpri = -1;
+ pif->pif_ifq = ifq;
+
+ /* add this state to the priq list */
+ pif->pif_next = pif_list;
+ pif_list = pif;
+
+ return (pif);
+}
+
+static int
+priq_detach(pif)
+ struct priq_if *pif;
+{
+ (void)priq_clear_interface(pif);
+
+ /* remove this interface from the pif list */
+ if (pif_list == pif)
+ pif_list = pif->pif_next;
+ else {
+ struct priq_if *p;
+
+ for (p = pif_list; p != NULL; p = p->pif_next)
+ if (p->pif_next == pif) {
+ p->pif_next = pif->pif_next;
+ break;
+ }
+ ASSERT(p != NULL);
+ }
+
+ FREE(pif, M_DEVBUF);
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+priq_clear_interface(pif)
+ struct priq_if *pif;
+{
+ struct priq_class *cl;
+ int pri;
+
+ /* free the filters for this interface */
+ acc_discard_filters(&pif->pif_classifier, NULL, 1);
+
+ /* clear out the classes */
+ for (pri = 0; pri <= pif->pif_maxpri; pri++)
+ if ((cl = pif->pif_classes[pri]) != NULL)
+ priq_class_destroy(cl);
+
+ return (0);
+}
+
+static int
+priq_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ priq_purge(pif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+priq_purge(pif)
+ struct priq_if *pif;
+{
+ struct priq_class *cl;
+ int pri;
+
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q))
+ priq_purgeq(cl);
+ }
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ pif->pif_ifq->ifq_len = 0;
+}
+
+static struct priq_class *
+priq_class_create(pif, pri, qlimit, flags)
+ struct priq_if *pif;
+ int pri, qlimit, flags;
+{
+ struct priq_class *cl;
+ int s;
+
+#ifndef ALTQ_RED
+ if (flags & PRCF_RED) {
+ printf("priq_class_create: RED not configured for PRIQ!\n");
+ return (NULL);
+ }
+#endif
+
+ if ((cl = pif->pif_classes[pri]) != NULL) {
+ /* modify the class instead of creating a new one */
+ s = splimp();
+ if (!qempty(cl->cl_q))
+ priq_purgeq(cl);
+ splx(s);
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ } else {
+ MALLOC(cl, struct priq_class *, sizeof(struct priq_class),
+ M_DEVBUF, M_WAITOK);
+ if (cl == NULL)
+ return (NULL);
+ bzero(cl, sizeof(struct priq_class));
+
+ MALLOC(cl->cl_q, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (cl->cl_q == NULL)
+ goto err_ret;
+ bzero(cl->cl_q, sizeof(class_queue_t));
+ }
+
+ pif->pif_classes[pri] = cl;
+ if (flags & PRCF_DEFAULTCLASS)
+ pif->pif_default = cl;
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ qlimit(cl->cl_q) = qlimit;
+ qtype(cl->cl_q) = Q_DROPTAIL;
+ qlen(cl->cl_q) = 0;
+ cl->cl_flags = flags;
+ cl->cl_pri = pri;
+ if (pri > pif->pif_maxpri)
+ pif->pif_maxpri = pri;
+ cl->cl_pif = pif;
+ cl->cl_handle = (u_long)cl; /* XXX: just a pointer to this class */
+
+#ifdef ALTQ_RED
+ if (flags & (PRCF_RED|PRCF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & PRCF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & PRCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (pif->pif_bandwidth == 0)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
+#ifdef ALTQ_RIO
+ if (flags & PRCF_RIO) {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RIO;
+ } else
+#endif
+ if (flags & PRCF_RED) {
+ cl->cl_red = red_alloc(0, 0, 0, 0,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RED;
+ }
+ }
+#endif /* ALTQ_RED */
+
+ return (cl);
+
+ err_ret:
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+ if (cl->cl_q != NULL)
+ FREE(cl->cl_q, M_DEVBUF);
+ FREE(cl, M_DEVBUF);
+ return (NULL);
+}
+
+static int
+priq_class_destroy(cl)
+ struct priq_class *cl;
+{
+ struct priq_if *pif;
+ int s, pri;
+
+ s = splimp();
+
+ /* delete filters referencing to this class */
+ acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0);
+
+ if (!qempty(cl->cl_q))
+ priq_purgeq(cl);
+
+ pif = cl->cl_pif;
+ pif->pif_classes[cl->cl_pri] = NULL;
+ if (pif->pif_maxpri == cl->cl_pri) {
+ for (pri = cl->cl_pri; pri >= 0; pri--)
+ if (pif->pif_classes[pri] != NULL) {
+ pif->pif_maxpri = pri;
+ break;
+ }
+ if (pri < 0)
+ pif->pif_maxpri = -1;
+ }
+ splx(s);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+ FREE(cl->cl_q, M_DEVBUF);
+ FREE(cl, M_DEVBUF);
+ return (0);
+}
+
+/*
+ * priq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+priq_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+ struct priq_class *cl;
+ int len;
+
+ /* grab class set by classifier */
+ if (pktattr == NULL || (cl = pktattr->pattr_class) == NULL)
+ cl = pif->pif_default;
+ cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */
+
+ len = m_pktlen(m);
+ if (priq_addq(cl, m) != 0) {
+ /* drop occurred. mbuf was freed in priq_addq. */
+ PKTCNTR_ADD(&cl->cl_dropcnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+
+ /* successfully queued. */
+ return (0);
+}
+
+/*
+ * priq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+priq_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+ struct priq_class *cl;
+ struct mbuf *m;
+ int pri;
+
+ if (IFQ_IS_EMPTY(ifq))
+ /* no packet in the queue */
+ return (NULL);
+
+ for (pri = pif->pif_maxpri; pri >= 0; pri--) {
+ if ((cl = pif->pif_classes[pri]) != NULL &&
+ !qempty(cl->cl_q)) {
+ if (op == ALTDQ_POLL)
+ return (priq_pollq(cl));
+
+ m = priq_getq(cl);
+ if (m != NULL) {
+ IFQ_DEC_LEN(ifq);
+ if (qempty(cl->cl_q))
+ cl->cl_period++;
+ PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m));
+ }
+ return (m);
+ }
+ }
+ return (NULL);
+}
+
+static int
+priq_addq(cl, m)
+ struct priq_class *cl;
+ struct mbuf *m;
+{
+
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m,
+ cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+ if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & PRCF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(cl->cl_q, m);
+
+ return (0);
+}
+
+static struct mbuf *
+priq_getq(cl)
+ struct priq_class *cl;
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_getq(cl->cl_red, cl->cl_q);
+#endif
+ return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+priq_pollq(cl)
+ struct priq_class *cl;
+{
+ return qhead(cl->cl_q);
+}
+
+static void
+priq_purgeq(cl)
+ struct priq_class *cl;
+{
+ struct mbuf *m;
+
+ if (qempty(cl->cl_q))
+ return;
+
+ while ((m = _getq(cl->cl_q)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
+ m_freem(m);
+ }
+ ASSERT(qlen(cl->cl_q) == 0);
+}
+
+/*
+ * priq device interface
+ */
+int
+priqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+priqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ struct priq_if *pif;
+ int err, error = 0;
+
+ while ((pif = pif_list) != NULL) {
+ /* destroy all */
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ altq_disable(pif->pif_ifq);
+
+ err = altq_detach(pif->pif_ifq);
+ if (err == 0)
+ err = priq_detach(pif);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+priqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ struct priq_if *pif;
+ struct priq_interface *ifacep;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case PRIQ_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case PRIQ_IF_ATTACH:
+ error = priqcmd_if_attach((struct priq_interface *)addr);
+ break;
+
+ case PRIQ_IF_DETACH:
+ error = priqcmd_if_detach((struct priq_interface *)addr);
+ break;
+
+ case PRIQ_ENABLE:
+ case PRIQ_DISABLE:
+ case PRIQ_CLEAR:
+ ifacep = (struct priq_interface *)addr;
+ if ((pif = altq_lookup(ifacep->ifname,
+ ALTQT_PRIQ)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+ case PRIQ_ENABLE:
+ if (pif->pif_default == NULL) {
+#if 1
+ printf("priq: no default class\n");
+#endif
+ error = EINVAL;
+ break;
+ }
+ error = altq_enable(pif->pif_ifq);
+ break;
+
+ case PRIQ_DISABLE:
+ error = altq_disable(pif->pif_ifq);
+ break;
+
+ case PRIQ_CLEAR:
+ priq_clear_interface(pif);
+ break;
+ }
+ break;
+
+ case PRIQ_ADD_CLASS:
+ error = priqcmd_add_class((struct priq_add_class *)addr);
+ break;
+
+ case PRIQ_DEL_CLASS:
+ error = priqcmd_delete_class((struct priq_delete_class *)addr);
+ break;
+
+ case PRIQ_MOD_CLASS:
+ error = priqcmd_modify_class((struct priq_modify_class *)addr);
+ break;
+
+ case PRIQ_ADD_FILTER:
+ error = priqcmd_add_filter((struct priq_add_filter *)addr);
+ break;
+
+ case PRIQ_DEL_FILTER:
+ error = priqcmd_delete_filter((struct priq_delete_filter *)addr);
+ break;
+
+ case PRIQ_GETSTATS:
+ error = priqcmd_class_stats((struct priq_class_stats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+priqcmd_if_attach(ap)
+ struct priq_interface *ap;
+{
+ struct priq_if *pif;
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(ap->ifname)) == NULL)
+ return (ENXIO);
+
+ if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL)
+ return (ENOMEM);
+
+ /*
+ * set PRIQ to this ifnet structure.
+ */
+ if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif,
+ priq_enqueue, priq_dequeue, priq_request,
+ &pif->pif_classifier, acc_classify)) != 0)
+ (void)priq_detach(pif);
+
+ return (error);
+}
+
+static int
+priqcmd_if_detach(ap)
+ struct priq_interface *ap;
+{
+ struct priq_if *pif;
+ int error;
+
+ if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ altq_disable(pif->pif_ifq);
+
+ if ((error = altq_detach(pif->pif_ifq)))
+ return (error);
+
+ return priq_detach(pif);
+}
+
+static int
+priqcmd_add_class(ap)
+ struct priq_add_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+ return (EINVAL);
+
+ if ((cl = priq_class_create(pif, ap->pri,
+ ap->qlimit, ap->flags)) == NULL)
+ return (ENOMEM);
+
+ /* return a class handle to the user */
+ ap->class_handle = clp_to_clh(cl);
+ return (0);
+}
+
+static int
+priqcmd_delete_class(ap)
+ struct priq_delete_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return priq_class_destroy(cl);
+}
+
+static int
+priqcmd_modify_class(ap)
+ struct priq_modify_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ /*
+ * if priority is changed, move the class to the new priority
+ */
+ if (pif->pif_classes[ap->pri] != cl) {
+ if (pif->pif_classes[ap->pri] != NULL)
+ return (EEXIST);
+ pif->pif_classes[cl->cl_pri] = NULL;
+ pif->pif_classes[ap->pri] = cl;
+ cl->cl_pri = ap->pri;
+ }
+
+ /* call priq_class_create to change class parameters */
+ if ((cl = priq_class_create(pif, ap->pri,
+ ap->qlimit, ap->flags)) == NULL)
+ return (ENOMEM);
+ return 0;
+}
+
+static int
+priqcmd_add_filter(ap)
+ struct priq_add_filter *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&pif->pif_classifier, &ap->filter,
+ cl, &ap->filter_handle);
+}
+
+static int
+priqcmd_delete_filter(ap)
+ struct priq_delete_filter *ap;
+{
+ struct priq_if *pif;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&pif->pif_classifier,
+ ap->filter_handle);
+}
+
+static int
+priqcmd_class_stats(ap)
+ struct priq_class_stats *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+ struct class_stats stats, *usp;
+ int pri, error;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ ap->maxpri = pif->pif_maxpri;
+
+ /* then, read the next N classes in the tree */
+ usp = ap->stats;
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ cl = pif->pif_classes[pri];
+ if (cl != NULL)
+ get_class_stats(&stats, cl);
+ else
+ bzero(&stats, sizeof(stats));
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+ return (0);
+}
+
+static void get_class_stats(sp, cl)
+ struct class_stats *sp;
+ struct priq_class *cl;
+{
+ sp->class_handle = clp_to_clh(cl);
+
+ sp->qlength = qlen(cl->cl_q);
+ sp->period = cl->cl_period;
+ sp->xmitcnt = cl->cl_xmitcnt;
+ sp->dropcnt = cl->cl_dropcnt;
+
+ sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct priq_class *
+clh_to_clp(pif, chandle)
+ struct priq_if *pif;
+ u_long chandle;
+{
+ struct priq_class *cl;
+
+ cl = (struct priq_class *)chandle;
+ if (chandle != ALIGN(cl)) {
+#if 1
+ printf("clh_to_cl: unaligned pointer %p\n", cl);
+#endif
+ return (NULL);
+ }
+
+ if (cl == NULL || cl->cl_handle != chandle || cl->cl_pif != pif)
+ return (NULL);
+ return (cl);
+}
+
+/* convert a class pointer to the corresponding class handle */
+static u_long
+clp_to_clh(cl)
+ struct priq_class *cl;
+{
+ return (cl->cl_handle);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw priq_sw =
+ {"priq", priqopen, priqclose, priqioctl};
+
+ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ_PRIQ */
diff --git a/sys/altq/altq_priq.h b/sys/altq/altq_priq.h
new file mode 100644
index 00000000000..12395f945fa
--- /dev/null
+++ b/sys/altq/altq_priq.h
@@ -0,0 +1,161 @@
+/* $OpenBSD: altq_priq.h,v 1.1 2001/06/27 05:28:35 kjc Exp $ */
+/* $KAME: altq_priq.h,v 1.1 2000/10/18 09:15:23 kjc Exp $ */
+/*
+ * Copyright (C) 2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_PRIQ_H_
+#define _ALTQ_ALTQ_PRIQ_H_
+
+#include <altq/altq.h>
+#include <altq/altq_classq.h>
+#include <altq/altq_red.h>
+#include <altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PRIQ_MAXPRI 16 /* upper limit of the number of priorities */
+
+struct priq_interface {
+ char ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+ u_long arg; /* request-specific argument */
+};
+
+struct priq_add_class {
+ struct priq_interface iface;
+ int pri; /* priority (0 is the lowest) */
+ int qlimit; /* queue size limit */
+ int flags; /* misc flags (see below) */
+
+ u_long class_handle; /* return value */
+};
+
+/* priq class flags */
+#define PRCF_RED 0x0001 /* use RED */
+#define PRCF_ECN 0x0002 /* use RED/ECN */
+#define PRCF_RIO 0x0004 /* use RIO */
+#define PRCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define PRCF_DEFAULTCLASS 0x1000 /* default class */
+
+/* special class handles */
+#define PRIQ_NULLCLASS_HANDLE 0
+
+struct priq_delete_class {
+ struct priq_interface iface;
+ u_long class_handle;
+};
+
+struct priq_modify_class {
+ struct priq_interface iface;
+ u_long class_handle;
+ int pri;
+ int qlimit;
+ int flags;
+};
+
+struct priq_add_filter {
+ struct priq_interface iface;
+ u_long class_handle;
+ struct flow_filter filter;
+
+ u_long filter_handle; /* return value */
+};
+
+struct priq_delete_filter {
+ struct priq_interface iface;
+ u_long filter_handle;
+};
+
+struct class_stats {
+ u_long class_handle;
+
+ u_int qlength;
+ u_int period;
+ struct pktcntr xmitcnt; /* transmitted packet counter */
+ struct pktcntr dropcnt; /* dropped packet counter */
+
+ /* red and rio related info */
+ int qtype;
+ struct redstats red[3]; /* rio has 3 red stats */
+};
+
+struct priq_class_stats {
+ struct priq_interface iface;
+ int maxpri; /* in/out */
+
+ struct class_stats *stats; /* pointer to stats array */
+};
+
+#define PRIQ_IF_ATTACH _IOW('Q', 1, struct priq_interface)
+#define PRIQ_IF_DETACH _IOW('Q', 2, struct priq_interface)
+#define PRIQ_ENABLE _IOW('Q', 3, struct priq_interface)
+#define PRIQ_DISABLE _IOW('Q', 4, struct priq_interface)
+#define PRIQ_CLEAR _IOW('Q', 5, struct priq_interface)
+#define PRIQ_ADD_CLASS _IOWR('Q', 7, struct priq_add_class)
+#define PRIQ_DEL_CLASS _IOW('Q', 8, struct priq_delete_class)
+#define PRIQ_MOD_CLASS _IOW('Q', 9, struct priq_modify_class)
+#define PRIQ_ADD_FILTER _IOWR('Q', 10, struct priq_add_filter)
+#define PRIQ_DEL_FILTER _IOW('Q', 11, struct priq_delete_filter)
+#define PRIQ_GETSTATS _IOWR('Q', 12, struct priq_class_stats)
+
+#ifdef _KERNEL
+
+struct priq_class {
+ u_long cl_handle; /* class handle */
+ class_queue_t *cl_q; /* class queue structure */
+ struct red *cl_red; /* RED state */
+ int cl_pri; /* priority */
+ int cl_flags; /* class flags */
+ struct priq_if *cl_pif; /* back pointer to pif */
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ /* statistics */
+ u_int cl_period; /* backlog period */
+ struct pktcntr cl_xmitcnt; /* transmitted packet counter */
+ struct pktcntr cl_dropcnt; /* dropped packet counter */
+};
+
+/*
+ * priq interface state
+ */
+struct priq_if {
+ struct priq_if *pif_next; /* interface state list */
+ struct ifaltq *pif_ifq; /* backpointer to ifaltq */
+ u_int pif_bandwidth; /* link bandwidth in bps */
+ int pif_maxpri; /* max priority in use */
+ struct priq_class *pif_default; /* default class */
+ struct priq_class *pif_classes[PRIQ_MAXPRI]; /* classes */
+ struct acc_classifier pif_classifier; /* classifier */
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_PRIQ_H_ */
diff --git a/sys/altq/altq_red.c b/sys/altq/altq_red.c
new file mode 100644
index 00000000000..00e3ec8a5f5
--- /dev/null
+++ b/sys/altq/altq_red.c
@@ -0,0 +1,1475 @@
+/* $OpenBSD: altq_red.c,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_red.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#ifdef ALTQ_FLOWVALVE
+#include <sys/queue.h>
+#include <sys/time.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_types.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_red.h>
+#ifdef ALTQ_FLOWVALVE
+#include <altq/altq_flowvalve.h>
+#endif
+
+/*
+ * ALTQ/RED (Random Early Detection) implementation using 32-bit
+ * fixed-point calculation.
+ *
+ * written by kjc using the ns code as a reference.
+ * you can learn more about red and ns from Sally's home page at
+ * http://www-nrg.ee.lbl.gov/floyd/
+ *
+ * most of the red parameter values are fixed in this implementation
+ * to prevent fixed-point overflow/underflow.
+ * if you change the parameters, watch out for overflow/underflow!
+ *
+ * the parameters used are recommended values by Sally.
+ * the corresponding ns config looks:
+ * q_weight=0.00195
+ * minthresh=5 maxthresh=15 queue-size=60
+ * linterm=30
+ * dropmech=drop-tail
+ * bytes=false (can't be handled by 32-bit fixed-point)
+ * doubleq=false dqthresh=false
+ * wait=true
+ */
+/*
+ * alternative red parameters for a slow link.
+ *
+ * assume the queue length becomes from zero to L and keeps L, it takes
+ * N packets for q_avg to reach 63% of L.
+ * when q_weight is 0.002, N is about 500 packets.
+ * for a slow link like dial-up, 500 packets takes more than 1 minute!
+ * when q_weight is 0.008, N is about 127 packets.
+ * when q_weight is 0.016, N is about 63 packets.
+ * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
+ * are allowed for 0.016.
+ * see Sally's paper for more details.
+ */
+/* normal red parameters */
+#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
+ /* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
+ /* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
+ /* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define FP_SHIFT 12 /* fixed-point shift */
+
+/* red parameters for drop probability */
+#define INV_P_MAX 10 /* inverse of max drop probability */
+#define TH_MIN 5 /* min threshold */
+#define TH_MAX 15 /* max threshold */
+
+#define RED_LIMIT 60 /* default max queue lenght */
+#define RED_STATS /* collect statistics */
+
+/*
+ * our default policy for forced-drop is drop-tail.
+ * (in altq-1.1.2 or earlier, the default was random-drop.
+ * but it makes more sense to punish the cause of the surge.)
+ * to switch to the random-drop policy, define "RED_RANDOM_DROP".
+ */
+
+#ifdef ALTQ_FLOWVALVE
+/*
+ * flow-valve is an extention to protect red from unresponsive flows
+ * and to promote end-to-end congestion control.
+ * flow-valve observes the average drop rates of the flows that have
+ * experienced packet drops in the recent past.
+ * when the average drop rate exceeds the threshold, the flow is
+ * blocked by the flow-valve. the trapped flow should back off
+ * exponentially to escape from the flow-valve.
+ */
+#ifdef RED_RANDOM_DROP
+#error "random-drop can't be used with flow-valve!"
+#endif
+#endif /* ALTQ_FLOWVALVE */
+
+/* red_list keeps all red_queue_t's allocated. */
+static red_queue_t *red_list = NULL;
+
+/* default red parameter values */
+static int default_th_min = TH_MIN;
+static int default_th_max = TH_MAX;
+static int default_inv_pmax = INV_P_MAX;
+
+/* internal function prototypes */
+static int red_enqueue __P((struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *));
+static struct mbuf *red_dequeue __P((struct ifaltq *, int));
+static int red_request __P((struct ifaltq *, int, void *));
+static void red_purgeq __P((red_queue_t *));
+static int red_detach __P((red_queue_t *));
+#ifdef ALTQ_FLOWVALVE
+static __inline struct fve *flowlist_lookup __P((struct flowvalve *,
+ struct altq_pktattr *, struct timeval *));
+static __inline struct fve *flowlist_reclaim __P((struct flowvalve *,
+ struct altq_pktattr *));
+static __inline void flowlist_move_to_head __P((struct flowvalve *,
+ struct fve *));
+static __inline int fv_p2f __P((struct flowvalve *, int));
+static struct flowvalve *fv_alloc __P((struct red *));
+static void fv_destroy __P((struct flowvalve *));
+static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
+ struct fve **));
+static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
+ struct fve *));
+#endif
+
+/*
+ * red device interface
+ */
+altqdev_decl(red);
+
+int
+redopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+redclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ red_queue_t *rqp;
+ int err, error = 0;
+
+ while ((rqp = red_list) != NULL) {
+ /* destroy all */
+ err = red_detach(rqp);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+redioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ red_queue_t *rqp;
+ struct red_interface *ifacep;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case RED_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+ return (error);
+ break;
+ }
+
+ switch (cmd) {
+
+ case RED_ENABLE:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_enable(rqp->rq_ifq);
+ break;
+
+ case RED_DISABLE:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_disable(rqp->rq_ifq);
+ break;
+
+ case RED_IF_ATTACH:
+ ifp = ifunit(((struct red_interface *)addr)->red_ifname);
+ if (ifp == NULL) {
+ error = ENXIO;
+ break;
+ }
+
+ /* allocate and initialize red_queue_t */
+ MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
+ if (rqp == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp, sizeof(red_queue_t));
+
+ MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (rqp->rq_q == NULL) {
+ FREE(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp->rq_q, sizeof(class_queue_t));
+
+ rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
+ if (rqp->rq_red == NULL) {
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+
+ rqp->rq_ifq = &ifp->if_snd;
+ qtail(rqp->rq_q) = NULL;
+ qlen(rqp->rq_q) = 0;
+ qlimit(rqp->rq_q) = RED_LIMIT;
+ qtype(rqp->rq_q) = Q_RED;
+
+ /*
+ * set RED to this ifnet structure.
+ */
+ error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
+ red_enqueue, red_dequeue, red_request,
+ NULL, NULL);
+ if (error) {
+ red_destroy(rqp->rq_red);
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ break;
+ }
+
+ /* add this state to the red list */
+ rqp->rq_next = red_list;
+ red_list = rqp;
+ break;
+
+ case RED_IF_DETACH:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = red_detach(rqp);
+ break;
+
+ case RED_GETSTATS:
+ do {
+ struct red_stats *q_stats;
+ red_t *rp;
+
+ q_stats = (struct red_stats *)addr;
+ if ((rqp = altq_lookup(q_stats->iface.red_ifname,
+ ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ q_stats->q_len = qlen(rqp->rq_q);
+ q_stats->q_limit = qlimit(rqp->rq_q);
+
+ rp = rqp->rq_red;
+ q_stats->q_avg = rp->red_avg >> rp->red_wshift;
+ q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
+ q_stats->drop_cnt = rp->red_stats.drop_cnt;
+ q_stats->drop_forced = rp->red_stats.drop_forced;
+ q_stats->drop_unforced = rp->red_stats.drop_unforced;
+ q_stats->marked_packets = rp->red_stats.marked_packets;
+
+ q_stats->weight = rp->red_weight;
+ q_stats->inv_pmax = rp->red_inv_pmax;
+ q_stats->th_min = rp->red_thmin;
+ q_stats->th_max = rp->red_thmax;
+
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL) {
+ struct flowvalve *fv = rp->red_flowvalve;
+ q_stats->fv_flows = fv->fv_flows;
+ q_stats->fv_pass = fv->fv_stats.pass;
+ q_stats->fv_predrop = fv->fv_stats.predrop;
+ q_stats->fv_alloc = fv->fv_stats.alloc;
+ q_stats->fv_escape = fv->fv_stats.escape;
+ } else {
+#endif /* ALTQ_FLOWVALVE */
+ q_stats->fv_flows = 0;
+ q_stats->fv_pass = 0;
+ q_stats->fv_predrop = 0;
+ q_stats->fv_alloc = 0;
+ q_stats->fv_escape = 0;
+#ifdef ALTQ_FLOWVALVE
+ }
+#endif /* ALTQ_FLOWVALVE */
+ } while (0);
+ break;
+
+ case RED_CONFIG:
+ do {
+ struct red_conf *fc;
+ red_t *new;
+ int s, limit;
+
+ fc = (struct red_conf *)addr;
+ if ((rqp = altq_lookup(fc->iface.red_ifname,
+ ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ new = red_alloc(fc->red_weight,
+ fc->red_inv_pmax,
+ fc->red_thmin,
+ fc->red_thmax,
+ fc->red_flags,
+ fc->red_pkttime);
+ if (new == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+ s = splimp();
+ red_purgeq(rqp);
+ limit = fc->red_limit;
+ if (limit < fc->red_thmax)
+ limit = fc->red_thmax;
+ qlimit(rqp->rq_q) = limit;
+ fc->red_limit = limit; /* write back the new value */
+
+ red_destroy(rqp->rq_red);
+ rqp->rq_red = new;
+
+ splx(s);
+
+ /* write back new values */
+ fc->red_limit = limit;
+ fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
+ fc->red_thmin = rqp->rq_red->red_thmin;
+ fc->red_thmax = rqp->rq_red->red_thmax;
+
+ } while (0);
+ break;
+
+ case RED_SETDEFAULTS:
+ do {
+ struct redparams *rp;
+
+ rp = (struct redparams *)addr;
+
+ default_th_min = rp->th_min;
+ default_th_max = rp->th_max;
+ default_inv_pmax = rp->inv_pmax;
+ } while (0);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+red_detach(rqp)
+ red_queue_t *rqp;
+{
+ red_queue_t *tmp;
+ int error = 0;
+
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ altq_disable(rqp->rq_ifq);
+
+ if ((error = altq_detach(rqp->rq_ifq)))
+ return (error);
+
+ if (red_list == rqp)
+ red_list = rqp->rq_next;
+ else {
+ for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
+ if (tmp->rq_next == rqp) {
+ tmp->rq_next = rqp->rq_next;
+ break;
+ }
+ if (tmp == NULL)
+ printf("red_detach: no state found in red_list!\n");
+ }
+
+ red_destroy(rqp->rq_red);
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * red support routines
+ */
+
+red_t *
+red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
+ int weight, inv_pmax, th_min, th_max;
+ int flags, pkttime;
+{
+ red_t *rp;
+ int w, i;
+ int npkts_per_sec;
+
+ MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
+ if (rp == NULL)
+ return (NULL);
+ bzero(rp, sizeof(red_t));
+
+ rp->red_avg = 0;
+ rp->red_idle = 1;
+
+ if (weight == 0)
+ rp->red_weight = W_WEIGHT;
+ else
+ rp->red_weight = weight;
+ if (inv_pmax == 0)
+ rp->red_inv_pmax = default_inv_pmax;
+ else
+ rp->red_inv_pmax = inv_pmax;
+ if (th_min == 0)
+ rp->red_thmin = default_th_min;
+ else
+ rp->red_thmin = th_min;
+ if (th_max == 0)
+ rp->red_thmax = default_th_max;
+ else
+ rp->red_thmax = th_max;
+
+ rp->red_flags = flags;
+
+ if (pkttime == 0)
+ /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+ rp->red_pkttime = 800;
+ else
+ rp->red_pkttime = pkttime;
+
+ if (weight == 0) {
+ /* when the link is very slow, adjust red parameters */
+ npkts_per_sec = 1000000 / rp->red_pkttime;
+ if (npkts_per_sec < 50) {
+ /* up to about 400Kbps */
+ rp->red_weight = W_WEIGHT_2;
+ } else if (npkts_per_sec < 300) {
+ /* up to about 2.4Mbps */
+ rp->red_weight = W_WEIGHT_1;
+ }
+ }
+
+ /* calculate wshift. weight must be power of 2 */
+ w = rp->red_weight;
+ for (i = 0; w > 1; i++)
+ w = w >> 1;
+ rp->red_wshift = i;
+ w = 1 << rp->red_wshift;
+ if (w != rp->red_weight) {
+ printf("invalid weight value %d for red! use %d\n",
+ rp->red_weight, w);
+ rp->red_weight = w;
+ }
+
+ /*
+ * thmin_s and thmax_s are scaled versions of th_min and th_max
+ * to be compared with avg.
+ */
+ rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
+ rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
+
+ /*
+ * precompute probability denominator
+ * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+ */
+ rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
+ * rp->red_inv_pmax) << FP_SHIFT;
+
+ /* allocate weight table */
+ rp->red_wtab = wtab_alloc(rp->red_weight);
+
+ microtime(&rp->red_last);
+#ifdef ALTQ_FLOWVALVE
+ if (flags & REDF_FLOWVALVE)
+ rp->red_flowvalve = fv_alloc(rp);
+ /* if fv_alloc failes, flowvalve is just disabled */
+#endif
+ return (rp);
+}
+
+void
+red_destroy(rp)
+ red_t *rp;
+{
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL)
+ fv_destroy(rp->red_flowvalve);
+#endif
+ wtab_destroy(rp->red_wtab);
+ FREE(rp, M_DEVBUF);
+}
+
+void
+red_getstats(rp, sp)
+ red_t *rp;
+ struct redstats *sp;
+{
+ sp->q_avg = rp->red_avg >> rp->red_wshift;
+ sp->xmit_cnt = rp->red_stats.xmit_cnt;
+ sp->drop_cnt = rp->red_stats.drop_cnt;
+ sp->drop_forced = rp->red_stats.drop_forced;
+ sp->drop_unforced = rp->red_stats.drop_unforced;
+ sp->marked_packets = rp->red_stats.marked_packets;
+}
+
+/*
+ * enqueue routine:
+ *
+ * returns: 0 when successfully queued.
+ * ENOBUFS when drop occurs.
+ */
+static int
+red_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+ if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
+ return ENOBUFS;
+ ifq->ifq_len++;
+ return 0;
+}
+
+int
+red_addq(rp, q, m, pktattr)
+ red_t *rp;
+ class_queue_t *q;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ int avg, droptype;
+ int n;
+#ifdef ALTQ_FLOWVALVE
+ struct fve *fve = NULL;
+
+ if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
+ if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
+ m_freem(m);
+ return (-1);
+ }
+#endif
+
+ avg = rp->red_avg;
+
+ /*
+ * if we were idle, we pretend that n packets arrived during
+ * the idle period.
+ */
+ if (rp->red_idle) {
+ struct timeval now;
+ int t;
+
+ rp->red_idle = 0;
+ microtime(&now);
+ t = (now.tv_sec - rp->red_last.tv_sec);
+ if (t > 60) {
+ /*
+ * being idle for more than 1 minute, set avg to zero.
+ * this prevents t from overflow.
+ */
+ avg = 0;
+ } else {
+ t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
+ n = t / rp->red_pkttime - 1;
+
+ /* the following line does (avg = (1 - Wq)^n * avg) */
+ if (n > 0)
+ avg = (avg >> FP_SHIFT) *
+ pow_w(rp->red_wtab, n);
+ }
+ }
+
+ /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
+ avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
+ rp->red_avg = avg; /* save the new value */
+
+ /*
+ * red_count keeps a tally of arriving traffic that has not
+ * been dropped.
+ */
+ rp->red_count++;
+
+ /* see if we drop early */
+ droptype = DTYPE_NODROP;
+ if (avg >= rp->red_thmin_s && qlen(q) > 1) {
+ if (avg >= rp->red_thmax_s) {
+ /* avg >= th_max: forced drop */
+ droptype = DTYPE_FORCED;
+ } else if (rp->red_old == 0) {
+ /* first exceeds th_min */
+ rp->red_count = 1;
+ rp->red_old = 1;
+ } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
+ rp->red_probd, rp->red_count)) {
+ /* mark or drop by red */
+ if ((rp->red_flags & REDF_ECN) &&
+ mark_ecn(m, pktattr, rp->red_flags)) {
+ /* successfully marked. do not drop. */
+ rp->red_count = 0;
+#ifdef RED_STATS
+ rp->red_stats.marked_packets++;
+#endif
+ } else {
+ /* unforced drop by red */
+ droptype = DTYPE_EARLY;
+ }
+ }
+ } else {
+ /* avg < th_min */
+ rp->red_old = 0;
+ }
+
+ /*
+ * if the queue length hits the hard limit, it's a forced drop.
+ */
+ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+ droptype = DTYPE_FORCED;
+
+#ifdef RED_RANDOM_DROP
+ /* if successful or forced drop, enqueue this packet. */
+ if (droptype != DTYPE_EARLY)
+ _addq(q, m);
+#else
+ /* if successful, enqueue this packet. */
+ if (droptype == DTYPE_NODROP)
+ _addq(q, m);
+#endif
+ if (droptype != DTYPE_NODROP) {
+ if (droptype == DTYPE_EARLY) {
+ /* drop the incoming packet */
+#ifdef RED_STATS
+ rp->red_stats.drop_unforced++;
+#endif
+ } else {
+ /* forced drop, select a victim packet in the queue. */
+#ifdef RED_RANDOM_DROP
+ m = _getq_random(q);
+#endif
+#ifdef RED_STATS
+ rp->red_stats.drop_forced++;
+#endif
+ }
+#ifdef RED_STATS
+ PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
+#endif
+ rp->red_count = 0;
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL)
+ fv_dropbyred(rp->red_flowvalve, pktattr, fve);
+#endif
+ m_freem(m);
+ return (-1);
+ }
+ /* successfully queued */
+#ifdef RED_STATS
+ PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
+#endif
+ return (0);
+}
+
+/*
+ * early-drop probability is calculated as follows:
+ * prob = p_max * (avg - th_min) / (th_max - th_min)
+ * prob_a = prob / (2 - count*prob)
+ * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
+ * here prob_a increases as successive undrop count increases.
+ * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
+ * becomes 1 when (count >= (2 / prob))).
+ */
+int
+drop_early(fp_len, fp_probd, count)
+ int fp_len; /* (avg - TH_MIN) in fixed-point */
+ int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
+ int count; /* how many successive undropped packets */
+{
+ int d; /* denominator of drop-probability */
+
+ d = fp_probd - count * fp_len;
+ if (d <= 0)
+ /* count exceeds the hard limit: drop or mark */
+ return (1);
+
+ /*
+ * now the range of d is [1..600] in fixed-point. (when
+ * th_max-th_min=10 and p_max=1/30)
+ * drop probability = (avg - TH_MIN) / d
+ */
+
+ if ((random() % d) < fp_len) {
+ /* drop or mark */
+ return (1);
+ }
+ /* no drop/mark */
+ return (0);
+}
+
+/*
+ * try to mark CE bit to the packet.
+ * returns 1 if successfully marked, 0 otherwise.
+ */
+int
+mark_ecn(m, pktattr, flags)
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+ int flags;
+{
+ struct mbuf *m0;
+
+ if (pktattr == NULL ||
+ (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+ return (0);
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if ((pktattr->pattr_hdr >= m0->m_data) &&
+ (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, pattr_hdr is stale */
+ pktattr->pattr_af = AF_UNSPEC;
+ return (0);
+ }
+
+ switch (pktattr->pattr_af) {
+ case AF_INET:
+ if (flags & REDF_ECN4) {
+ struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+
+ if (ip->ip_v != 4)
+ return (0); /* version mismatch! */
+ if (ip->ip_tos & IPTOS_ECT) {
+ /* ECN-capable, mark ECN bit. */
+ if ((ip->ip_tos & IPTOS_CE) == 0) {
+#if (IPTOS_CE == 0x01)
+ u_short sum;
+
+ ip->ip_tos |= IPTOS_CE;
+ /*
+ * optimized version when IPTOS_CE
+ * is 0x01.
+ * HC' = HC -1 when HC > 0
+ * = 0xfffe when HC = 0
+ */
+ sum = ntohs(ip->ip_sum);
+ if (sum == 0)
+ sum = 0xfffe;
+ else
+ sum -= 1;
+ ip->ip_sum = htons(sum);
+#else /* IPTOS_CE != 0x01 */
+ long sum;
+
+ ip->ip_tos |= IPTOS_CE;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += 0xffff + IPTOS_CE;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+
+ ip->ip_sum = htons(~sum & 0xffff);
+#endif /* IPTOS_CE != 0x01 */
+ }
+ return (1);
+ }
+ }
+ break;
+#ifdef INET6
+ case AF_INET6:
+ if (flags & REDF_ECN6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return (0); /* version mismatch! */
+ if (flowlabel & (IPTOS_ECT << 20)) {
+ /* ECN-capable, mark ECN bit. */
+ flowlabel |= (IPTOS_CE << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ return (1);
+ }
+ }
+ break;
+#endif /* INET6 */
+ }
+
+ /* not marked */
+ return (0);
+}
+
+/*
+ * dequeue routine:
+ * must be called in splimp.
+ *
+ * returns: mbuf dequeued.
+ * NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+red_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+ struct mbuf *m;
+
+ if (op == ALTDQ_POLL)
+ return qhead(rqp->rq_q);
+
+ /* op == ALTDQ_REMOVE */
+ m = red_getq(rqp->rq_red, rqp->rq_q);
+ if (m != NULL)
+ ifq->ifq_len--;
+ return (m);
+}
+
+struct mbuf *
+red_getq(rp, q)
+ red_t *rp;
+ class_queue_t *q;
+{
+ struct mbuf *m;
+
+ if ((m = _getq(q)) == NULL) {
+ if (rp->red_idle == 0) {
+ rp->red_idle = 1;
+ microtime(&rp->red_last);
+ }
+ return NULL;
+ }
+
+ rp->red_idle = 0;
+ return (m);
+}
+
+static int
+red_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ red_purgeq(rqp);
+ break;
+ }
+ return (0);
+}
+
+static void
+red_purgeq(rqp)
+ red_queue_t *rqp;
+{
+ _flushq(rqp->rq_q);
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ rqp->rq_ifq->ifq_len = 0;
+}
+
+
+/*
+ * helper routine to calibrate avg during idle.
+ * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
+ * here Wq = 1/weight and the code assumes Wq is close to zero.
+ *
+ * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
+ */
+static struct wtab *wtab_list = NULL; /* pointer to wtab list */
+
+struct wtab *
+wtab_alloc(weight)
+ int weight;
+{
+ struct wtab *w;
+ int i;
+
+ for (w = wtab_list; w != NULL; w = w->w_next)
+ if (w->w_weight == weight) {
+ w->w_refcount++;
+ return (w);
+ }
+
+ MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
+ if (w == NULL)
+ panic("wtab_alloc: malloc failed!");
+ bzero(w, sizeof(struct wtab));
+ w->w_weight = weight;
+ w->w_refcount = 1;
+ w->w_next = wtab_list;
+ wtab_list = w;
+
+ /* initialize the weight table */
+ w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
+ for (i = 1; i < 32; i++) {
+ w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
+ if (w->w_tab[i] == 0 && w->w_param_max == 0)
+ w->w_param_max = 1 << i;
+ }
+
+ return (w);
+}
+
+int
+wtab_destroy(w)
+ struct wtab *w;
+{
+ struct wtab *prev;
+
+ if (--w->w_refcount > 0)
+ return (0);
+
+ if (wtab_list == w)
+ wtab_list = w->w_next;
+ else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
+ if (prev->w_next == w) {
+ prev->w_next = w->w_next;
+ break;
+ }
+
+ FREE(w, M_DEVBUF);
+ return (0);
+}
+
+int32_t
+pow_w(w, n)
+ struct wtab *w;
+ int n;
+{
+ int i, bit;
+ int32_t val;
+
+ if (n >= w->w_param_max)
+ return (0);
+
+ val = 1 << FP_SHIFT;
+ if (n <= 0)
+ return (val);
+
+ bit = 1;
+ i = 0;
+ while (n) {
+ if (n & bit) {
+ val = (val * w->w_tab[i]) >> FP_SHIFT;
+ n &= ~bit;
+ }
+ i++;
+ bit <<= 1;
+ }
+ return (val);
+}
+
+#ifdef ALTQ_FLOWVALVE
+
+#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
+#define FV_PSCALE(x) ((x) << FV_PSHIFT)
+#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
+#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
+#define FV_FSCALE(x) ((x) << FV_FSHIFT)
+#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
+
+#define FV_TIMER (3 * hz) /* timer value for garbage collector */
+#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
+
+#define FV_N 10 /* update fve_f every FV_N packets */
+
+#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
+#define FV_TTHRESH 3 /* time threshold to delete fve */
+#define FV_ALPHA 5 /* extra packet count */
+
+#define FV_STATS
+
+#if (__FreeBSD_version > 300000)
+#define FV_TIMESTAMP(tp) getmicrotime(tp)
+#else
+#define FV_TIMESTAMP(tp) { (*(tp)) = time; }
+#endif
+
+/*
+ * Brtt table: 127 entry table to convert drop rate (p) to
+ * the corresponding bandwidth fraction (f)
+ * the following equation is implemented to use scaled values,
+ * fve_p and fve_f, in the fixed point format.
+ *
+ * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
+ * f = Brtt(p) / (max_th + alpha)
+ */
+#define BRTT_SIZE 128
+#define BRTT_SHIFT 12
+#define BRTT_MASK 0x0007f000
+#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
+
+const int brtt_tab[BRTT_SIZE] = {
+ 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
+ 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
+ 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
+ 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
+ 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
+ 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
+ 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
+ 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
+ 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
+ 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
+ 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
+ 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
+ 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
+ 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
+ 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
+ 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
+};
+
+static __inline struct fve *
+flowlist_lookup(fv, pktattr, now)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct timeval *now;
+{
+ struct fve *fve;
+ int flows;
+ struct ip *ip;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+ struct timeval tthresh;
+
+ if (pktattr == NULL)
+ return (NULL);
+
+ tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
+ flows = 0;
+ /*
+ * search the flow list
+ */
+ switch (pktattr->pattr_af) {
+ case AF_INET:
+ ip = (struct ip *)pktattr->pattr_hdr;
+ TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+ if (fve->fve_lastdrop.tv_sec == 0)
+ break;
+ if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+ fve->fve_lastdrop.tv_sec = 0;
+ break;
+ }
+ if (fve->fve_flow.flow_af == AF_INET &&
+ fve->fve_flow.flow_ip.ip_src.s_addr ==
+ ip->ip_src.s_addr &&
+ fve->fve_flow.flow_ip.ip_dst.s_addr ==
+ ip->ip_dst.s_addr)
+ return (fve);
+ flows++;
+ }
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+ if (fve->fve_lastdrop.tv_sec == 0)
+ break;
+ if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+ fve->fve_lastdrop.tv_sec = 0;
+ break;
+ }
+ if (fve->fve_flow.flow_af == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
+ &ip6->ip6_src) &&
+ IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
+ &ip6->ip6_dst))
+ return (fve);
+ flows++;
+ }
+ break;
+#endif /* INET6 */
+
+ default:
+ /* unknown protocol. no drop. */
+ return (NULL);
+ }
+ fv->fv_flows = flows; /* save the number of active fve's */
+ return (NULL);
+}
+
+static __inline struct fve *
+flowlist_reclaim(fv, pktattr)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+{
+ struct fve *fve;
+ struct ip *ip;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+
+ /*
+ * get an entry from the tail of the LRU list.
+ */
+ fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
+
+ switch (pktattr->pattr_af) {
+ case AF_INET:
+ ip = (struct ip *)pktattr->pattr_hdr;
+ fve->fve_flow.flow_af = AF_INET;
+ fve->fve_flow.flow_ip.ip_src = ip->ip_src;
+ fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ fve->fve_flow.flow_af = AF_INET6;
+ fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
+ fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
+ break;
+#endif
+ }
+
+ fve->fve_state = Green;
+ fve->fve_p = 0.0;
+ fve->fve_f = 0.0;
+ fve->fve_ifseq = fv->fv_ifseq - 1;
+ fve->fve_count = 0;
+
+ fv->fv_flows++;
+#ifdef FV_STATS
+ fv->fv_stats.alloc++;
+#endif
+ return (fve);
+}
+
+static __inline void
+flowlist_move_to_head(fv, fve)
+ struct flowvalve *fv;
+ struct fve *fve;
+{
+ if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
+ TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
+ TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
+ }
+}
+
+/*
+ * allocate flowvalve structure
+ */
+static struct flowvalve *
+fv_alloc(rp)
+ struct red *rp;
+{
+ struct flowvalve *fv;
+ struct fve *fve;
+ int i, num;
+
+ num = FV_FLOWLISTSIZE;
+ MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
+ M_DEVBUF, M_WAITOK);
+ if (fv == NULL)
+ return (NULL);
+ bzero(fv, sizeof(struct flowvalve));
+
+ MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
+ M_DEVBUF, M_WAITOK);
+ if (fv->fv_fves == NULL) {
+ FREE(fv, M_DEVBUF);
+ return (NULL);
+ }
+ bzero(fv->fv_fves, sizeof(struct fve) * num);
+
+ fv->fv_flows = 0;
+ TAILQ_INIT(&fv->fv_flowlist);
+ for (i = 0; i < num; i++) {
+ fve = &fv->fv_fves[i];
+ fve->fve_lastdrop.tv_sec = 0;
+ TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
+ }
+
+ /* initialize drop rate threshold in scaled fixed-point */
+ fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
+
+ /* initialize drop rate to fraction table */
+ MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
+ M_DEVBUF, M_WAITOK);
+ if (fv->fv_p2ftab == NULL) {
+ FREE(fv->fv_fves, M_DEVBUF);
+ FREE(fv, M_DEVBUF);
+ return (NULL);
+ }
+ /*
+ * create the p2f table.
+ * (shift is used to keep the precision)
+ */
+ for (i = 1; i < BRTT_SIZE; i++) {
+ int f;
+
+ f = brtt_tab[i] << 8;
+ fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
+ }
+
+ return (fv);
+}
+
+static void fv_destroy(fv)
+ struct flowvalve *fv;
+{
+ FREE(fv->fv_p2ftab, M_DEVBUF);
+ FREE(fv->fv_fves, M_DEVBUF);
+ FREE(fv, M_DEVBUF);
+}
+
+static __inline int
+fv_p2f(fv, p)
+ struct flowvalve *fv;
+ int p;
+{
+ int val, f;
+
+ if (p >= BRTT_PMAX)
+ f = fv->fv_p2ftab[BRTT_SIZE-1];
+ else if ((val = (p & BRTT_MASK)))
+ f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
+ else
+ f = fv->fv_p2ftab[1];
+ return (f);
+}
+
+/*
+ * check if an arriving packet should be pre-dropped.
+ * called from red_addq() when a packet arrives.
+ * returns 1 when the packet should be pre-dropped.
+ * should be called in splimp.
+ */
+static int
+fv_checkflow(fv, pktattr, fcache)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct fve **fcache;
+{
+ struct fve *fve;
+ struct timeval now;
+
+ fv->fv_ifseq++;
+ FV_TIMESTAMP(&now);
+
+ if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+ /* no matching entry in the flowlist */
+ return (0);
+
+ *fcache = fve;
+
+ /* update fraction f for every FV_N packets */
+ if (++fve->fve_count == FV_N) {
+ /*
+ * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
+ */
+ fve->fve_f =
+ (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
+ + fve->fve_f - FV_FUNSCALE(fve->fve_f);
+ fve->fve_ifseq = fv->fv_ifseq;
+ fve->fve_count = 0;
+ }
+
+ /*
+ * overpumping test
+ */
+ if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
+ int fthresh;
+
+ /* calculate a threshold */
+ fthresh = fv_p2f(fv, fve->fve_p);
+ if (fve->fve_f > fthresh)
+ fve->fve_state = Red;
+ }
+
+ if (fve->fve_state == Red) {
+ /*
+ * backoff test
+ */
+ if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
+ /* no drop for at least FV_BACKOFFTHRESH sec */
+ fve->fve_p = 0;
+ fve->fve_state = Green;
+#ifdef FV_STATS
+ fv->fv_stats.escape++;
+#endif
+ } else {
+ /* block this flow */
+ flowlist_move_to_head(fv, fve);
+ fve->fve_lastdrop = now;
+#ifdef FV_STATS
+ fv->fv_stats.predrop++;
+#endif
+ return (1);
+ }
+ }
+
+ /*
+ * p = (1 - Wp) * p
+ */
+ fve->fve_p -= FV_PUNSCALE(fve->fve_p);
+ if (fve->fve_p < 0)
+ fve->fve_p = 0;
+#ifdef FV_STATS
+ fv->fv_stats.pass++;
+#endif
+ return (0);
+}
+
+/*
+ * called from red_addq when a packet is dropped by red.
+ * should be called in splimp.
+ */
+static void fv_dropbyred(fv, pktattr, fcache)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct fve *fcache;
+{
+ struct fve *fve;
+ struct timeval now;
+
+ if (pktattr == NULL)
+ return;
+ FV_TIMESTAMP(&now);
+
+ if (fcache != NULL)
+ /* the fve of this packet is already cached */
+ fve = fcache;
+ else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+ fve = flowlist_reclaim(fv, pktattr);
+
+ flowlist_move_to_head(fv, fve);
+
+ /*
+ * update p: the following line cancels the update
+ * in fv_checkflow() and calculate
+ * p = Wp + (1 - Wp) * p
+ */
+ fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
+
+ fve->fve_lastdrop = now;
+}
+
+#endif /* ALTQ_FLOWVALVE */
+
+#ifdef KLD_MODULE
+
+static struct altqsw red_sw =
+ {"red", redopen, redclose, redioctl};
+
+ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ_RED */
diff --git a/sys/altq/altq_red.h b/sys/altq/altq_red.h
new file mode 100644
index 00000000000..ba671c42ee8
--- /dev/null
+++ b/sys/altq/altq_red.h
@@ -0,0 +1,190 @@
+/* $OpenBSD: altq_red.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_red.h,v 1.5 2000/12/14 08:12:46 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_RED_H_
+#define _ALTQ_ALTQ_RED_H_
+
+#include <altq/altq_classq.h>
+
+struct red_interface {
+ char red_ifname[IFNAMSIZ];
+};
+
+struct red_stats {
+ struct red_interface iface;
+ int q_len;
+ int q_avg;
+
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+
+ /* static red parameters */
+ int q_limit;
+ int weight;
+ int inv_pmax;
+ int th_min;
+ int th_max;
+
+ /* flowvalve related stuff */
+ u_int fv_flows;
+ u_int fv_pass;
+ u_int fv_predrop;
+ u_int fv_alloc;
+ u_int fv_escape;
+};
+
+struct red_conf {
+ struct red_interface iface;
+ int red_weight; /* weight for EWMA */
+ int red_inv_pmax; /* inverse of max drop probability */
+ int red_thmin; /* red min threshold */
+ int red_thmax; /* red max threshold */
+ int red_limit; /* max queue length */
+ int red_pkttime; /* average packet time in usec */
+ int red_flags; /* see below */
+};
+
+/* red flags */
+#define REDF_ECN4 0x01 /* use packet marking for IPv4 packets */
+#define REDF_ECN6 0x02 /* use packet marking for IPv6 packets */
+#define REDF_ECN (REDF_ECN4 | REDF_ECN6)
+#define REDF_FLOWVALVE 0x04 /* use flowvalve (aka penalty-box) */
+
+/*
+ * simpler versions of red parameters and statistics used by other
+ * disciplines (e.g., CBQ)
+ */
+struct redparams {
+ int th_min; /* red min threshold */
+ int th_max; /* red max threshold */
+ int inv_pmax; /* inverse of max drop probability */
+};
+
+struct redstats {
+ int q_avg;
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+};
+
+
+/*
+ * IOCTLs for RED
+ */
+#define RED_IF_ATTACH _IOW('Q', 1, struct red_interface)
+#define RED_IF_DETACH _IOW('Q', 2, struct red_interface)
+#define RED_ENABLE _IOW('Q', 3, struct red_interface)
+#define RED_DISABLE _IOW('Q', 4, struct red_interface)
+#define RED_CONFIG _IOWR('Q', 6, struct red_conf)
+#define RED_GETSTATS _IOWR('Q', 12, struct red_stats)
+#define RED_SETDEFAULTS _IOW('Q', 30, struct redparams)
+
+#ifdef _KERNEL
+
+struct flowvalve;
+
+/* weight table structure for idle time calibration */
+struct wtab {
+ struct wtab *w_next;
+ int w_weight;
+ int w_param_max;
+ int w_refcount;
+ int32_t w_tab[32];
+};
+
+typedef struct red {
+ int red_pkttime; /* average packet time in micro sec
+ used for idle calibration */
+ int red_flags; /* red flags */
+
+ /* red parameters */
+ int red_weight; /* weight for EWMA */
+ int red_inv_pmax; /* inverse of max drop probability */
+ int red_thmin; /* red min threshold */
+ int red_thmax; /* red max threshold */
+
+ /* variables for internal use */
+ int red_wshift; /* log(red_weight) */
+ int red_thmin_s; /* th_min scaled by avgshift */
+ int red_thmax_s; /* th_max scaled by avgshift */
+ int red_probd; /* drop probability denominator */
+
+ int red_avg; /* queue length average scaled by avgshift */
+ int red_count; /* packet count since the last dropped/marked
+ packet */
+ int red_idle; /* queue was empty */
+ int red_old; /* avg is above th_min */
+ struct wtab *red_wtab; /* weight table */
+ struct timeval red_last; /* timestamp when the queue becomes idle */
+
+ struct flowvalve *red_flowvalve; /* flowvalve state */
+
+ struct {
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+ } red_stats;
+} red_t;
+
+typedef struct red_queue {
+ struct red_queue *rq_next; /* next red_state in the list */
+ struct ifaltq *rq_ifq; /* backpointer to ifaltq */
+
+ class_queue_t *rq_q;
+
+ red_t *rq_red;
+} red_queue_t;
+
+/* red drop types */
+#define DTYPE_NODROP 0 /* no drop */
+#define DTYPE_FORCED 1 /* a "forced" drop */
+#define DTYPE_EARLY 2 /* an "unforced" (early) drop */
+
+extern red_t *red_alloc __P((int, int, int, int, int, int));
+extern void red_destroy __P((red_t *));
+extern void red_getstats __P((red_t *, struct redstats *));
+extern int red_addq __P((red_t *, class_queue_t *, struct mbuf *,
+ struct altq_pktattr *));
+extern struct mbuf *red_getq __P((red_t *, class_queue_t *));
+extern int drop_early __P((int, int, int));
+extern int mark_ecn __P((struct mbuf *, struct altq_pktattr *, int));
+extern struct wtab *wtab_alloc __P((int));
+extern int wtab_destroy __P((struct wtab *));
+extern int32_t pow_w __P((struct wtab *, int));
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RED_H_ */
diff --git a/sys/altq/altq_rio.c b/sys/altq/altq_rio.c
new file mode 100644
index 00000000000..37fe1833f09
--- /dev/null
+++ b/sys/altq/altq_rio.c
@@ -0,0 +1,829 @@
+/* $OpenBSD: altq_rio.c,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_rio.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1998-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_cdnr.h>
+#include <altq/altq_red.h>
+#include <altq/altq_rio.h>
+
+/*
+ * RIO: RED with IN/OUT bit
+ * described in
+ * "Explicit Allocation of Best Effort Packet Delivery Service"
+ * David D. Clark and Wenjia Fang, MIT Lab for Computer Science
+ * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
+ *
+ * this implementation is extended to support more than 2 drop precedence
+ * values as described in RFC2597 (Assured Forwarding PHB Group).
+ *
+ */
+/*
+ * AF DS (differentiated service) codepoints.
+ * (classes can be mapped to CBQ or H-FSC classes.)
+ *
+ * 0 1 2 3 4 5 6 7
+ * +---+---+---+---+---+---+---+---+
+ * | CLASS |DropPre| 0 | CU |
+ * +---+---+---+---+---+---+---+---+
+ *
+ * class 1: 001
+ * class 2: 010
+ * class 3: 011
+ * class 4: 100
+ *
+ * low drop prec: 01
+ * medium drop prec: 10
+ * high drop prec: 01
+ */
+
+/* normal red parameters */
+#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
+ /* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
+ /* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
+ /* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define FP_SHIFT 12 /* fixed-point shift */
+
+/* red parameters for drop probability */
+#define INV_P_MAX 10 /* inverse of max drop probability */
+#define TH_MIN 5 /* min threshold */
+#define TH_MAX 15 /* max threshold */
+
+#define RIO_LIMIT 60 /* default max queue lenght */
+#define RIO_STATS /* collect statistics */
+
+#define TV_DELTA(a, b, delta) { \
+ register int xxs; \
+ \
+ delta = (a)->tv_usec - (b)->tv_usec; \
+ if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \
+ if (xxs < 0) { \
+ printf("rm_class: bogus time values"); \
+ delta = 60000000; \
+ } else if (xxs > 4) { \
+ if (xxs > 60) \
+ delta = 60000000; \
+ else \
+ delta += xxs * 1000000; \
+ } else while (xxs > 0) { \
+ delta += 1000000; \
+ xxs--; \
+ } \
+ } \
+}
+
+/* rio_list keeps all rio_queue_t's allocated. */
+static rio_queue_t *rio_list = NULL;
+/* default rio parameter values */
+static struct redparams default_rio_params[RIO_NDROPPREC] = {
+ /* th_min, th_max, inv_pmax */
+ { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
+ { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
+ { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */
+};
+
+/* internal function prototypes */
+static int rio_enqueue __P((struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *));
+static struct mbuf *rio_dequeue __P((struct ifaltq *, int));
+static int rio_request __P((struct ifaltq *, int, void *));
+static int rio_detach __P((rio_queue_t *));
+static int dscp2index __P((u_int8_t));
+
+/*
+ * rio device interface
+ */
+altqdev_decl(rio);
+
+int
+rioopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+rioclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ rio_queue_t *rqp;
+ int err, error = 0;
+
+ while ((rqp = rio_list) != NULL) {
+ /* destroy all */
+ err = rio_detach(rqp);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+rioioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ rio_queue_t *rqp;
+ struct rio_interface *ifacep;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case RIO_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case RIO_ENABLE:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_enable(rqp->rq_ifq);
+ break;
+
+ case RIO_DISABLE:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_disable(rqp->rq_ifq);
+ break;
+
+ case RIO_IF_ATTACH:
+ ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
+ if (ifp == NULL) {
+ error = ENXIO;
+ break;
+ }
+
+ /* allocate and initialize rio_queue_t */
+ MALLOC(rqp, rio_queue_t *, sizeof(rio_queue_t), M_DEVBUF, M_WAITOK);
+ if (rqp == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp, sizeof(rio_queue_t));
+
+ MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (rqp->rq_q == NULL) {
+ FREE(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp->rq_q, sizeof(class_queue_t));
+
+ rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
+ if (rqp->rq_rio == NULL) {
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+
+ rqp->rq_ifq = &ifp->if_snd;
+ qtail(rqp->rq_q) = NULL;
+ qlen(rqp->rq_q) = 0;
+ qlimit(rqp->rq_q) = RIO_LIMIT;
+ qtype(rqp->rq_q) = Q_RIO;
+
+ /*
+ * set RIO to this ifnet structure.
+ */
+ error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
+ rio_enqueue, rio_dequeue, rio_request,
+ NULL, NULL);
+ if (error) {
+ rio_destroy(rqp->rq_rio);
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ break;
+ }
+
+ /* add this state to the rio list */
+ rqp->rq_next = rio_list;
+ rio_list = rqp;
+ break;
+
+ case RIO_IF_DETACH:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = rio_detach(rqp);
+ break;
+
+ case RIO_GETSTATS:
+ do {
+ struct rio_stats *q_stats;
+ rio_t *rp;
+ int i;
+
+ q_stats = (struct rio_stats *)addr;
+ if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
+ ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ rp = rqp->rq_rio;
+
+ q_stats->q_limit = qlimit(rqp->rq_q);
+ q_stats->weight = rp->rio_weight;
+ q_stats->flags = rp->rio_flags;
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ q_stats->q_len[i] = rp->rio_precstate[i].qlen;
+ bcopy(&rp->q_stats[i], &q_stats->q_stats[i],
+ sizeof(struct redstats));
+ q_stats->q_stats[i].q_avg =
+ rp->rio_precstate[i].avg >> rp->rio_wshift;
+
+ q_stats->q_params[i].inv_pmax
+ = rp->rio_precstate[i].inv_pmax;
+ q_stats->q_params[i].th_min
+ = rp->rio_precstate[i].th_min;
+ q_stats->q_params[i].th_max
+ = rp->rio_precstate[i].th_max;
+ }
+ } while (0);
+ break;
+
+ case RIO_CONFIG:
+ do {
+ struct rio_conf *fc;
+ rio_t *new;
+ int s, limit, i;
+
+ fc = (struct rio_conf *)addr;
+ if ((rqp = altq_lookup(fc->iface.rio_ifname,
+ ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ new = rio_alloc(fc->rio_weight, &fc->q_params[0],
+ fc->rio_flags, fc->rio_pkttime);
+ if (new == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+ s = splimp();
+ _flushq(rqp->rq_q);
+ limit = fc->rio_limit;
+ if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
+ limit = fc->q_params[RIO_NDROPPREC-1].th_max;
+ qlimit(rqp->rq_q) = limit;
+
+ rio_destroy(rqp->rq_rio);
+ rqp->rq_rio = new;
+
+ splx(s);
+
+ /* write back new values */
+ fc->rio_limit = limit;
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ fc->q_params[i].inv_pmax =
+ rqp->rq_rio->rio_precstate[i].inv_pmax;
+ fc->q_params[i].th_min =
+ rqp->rq_rio->rio_precstate[i].th_min;
+ fc->q_params[i].th_max =
+ rqp->rq_rio->rio_precstate[i].th_max;
+ }
+ } while (0);
+ break;
+
+ case RIO_SETDEFAULTS:
+ do {
+ struct redparams *rp;
+ int i;
+
+ rp = (struct redparams *)addr;
+ for (i = 0; i < RIO_NDROPPREC; i++)
+ default_rio_params[i] = rp[i];
+ } while (0);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+static int
+rio_detach(rqp)
+ rio_queue_t *rqp;
+{
+ rio_queue_t *tmp;
+ int error = 0;
+
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ altq_disable(rqp->rq_ifq);
+
+ if ((error = altq_detach(rqp->rq_ifq)))
+ return (error);
+
+ if (rio_list == rqp)
+ rio_list = rqp->rq_next;
+ else {
+ for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
+ if (tmp->rq_next == rqp) {
+ tmp->rq_next = rqp->rq_next;
+ break;
+ }
+ if (tmp == NULL)
+ printf("rio_detach: no state found in rio_list!\n");
+ }
+
+ rio_destroy(rqp->rq_rio);
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * rio support routines
+ */
+static int
+rio_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ _flushq(rqp->rq_q);
+ if (ALTQ_IS_ENABLED(ifq))
+ ifq->ifq_len = 0;
+ break;
+ }
+ return (0);
+}
+
+
+rio_t *
+rio_alloc(weight, params, flags, pkttime)
+ int weight;
+ struct redparams *params;
+ int flags, pkttime;
+{
+ rio_t *rp;
+ int w, i;
+ int npkts_per_sec;
+
+ MALLOC(rp, rio_t *, sizeof(rio_t), M_DEVBUF, M_WAITOK);
+ if (rp == NULL)
+ return (NULL);
+ bzero(rp, sizeof(rio_t));
+
+ rp->rio_flags = flags;
+ if (pkttime == 0)
+ /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+ rp->rio_pkttime = 800;
+ else
+ rp->rio_pkttime = pkttime;
+
+ if (weight != 0)
+ rp->rio_weight = weight;
+ else {
+ /* use derfault */
+ rp->rio_weight = W_WEIGHT;
+
+ /* when the link is very slow, adjust red parameters */
+ npkts_per_sec = 1000000 / rp->rio_pkttime;
+ if (npkts_per_sec < 50) {
+ /* up to about 400Kbps */
+ rp->rio_weight = W_WEIGHT_2;
+ } else if (npkts_per_sec < 300) {
+ /* up to about 2.4Mbps */
+ rp->rio_weight = W_WEIGHT_1;
+ }
+ }
+
+ /* calculate wshift. weight must be power of 2 */
+ w = rp->rio_weight;
+ for (i = 0; w > 1; i++)
+ w = w >> 1;
+ rp->rio_wshift = i;
+ w = 1 << rp->rio_wshift;
+ if (w != rp->rio_weight) {
+ printf("invalid weight value %d for red! use %d\n",
+ rp->rio_weight, w);
+ rp->rio_weight = w;
+ }
+
+ /* allocate weight table */
+ rp->rio_wtab = wtab_alloc(rp->rio_weight);
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ struct dropprec_state *prec = &rp->rio_precstate[i];
+
+ prec->avg = 0;
+ prec->idle = 1;
+
+ if (params == NULL || params[i].inv_pmax == 0)
+ prec->inv_pmax = default_rio_params[i].inv_pmax;
+ else
+ prec->inv_pmax = params[i].inv_pmax;
+ if (params == NULL || params[i].th_min == 0)
+ prec->th_min = default_rio_params[i].th_min;
+ else
+ prec->th_min = params[i].th_min;
+ if (params == NULL || params[i].th_max == 0)
+ prec->th_max = default_rio_params[i].th_max;
+ else
+ prec->th_max = params[i].th_max;
+
+ /*
+ * th_min_s and th_max_s are scaled versions of th_min
+ * and th_max to be compared with avg.
+ */
+ prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
+ prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
+
+ /*
+ * precompute probability denominator
+ * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+ */
+ prec->probd = (2 * (prec->th_max - prec->th_min)
+ * prec->inv_pmax) << FP_SHIFT;
+
+ microtime(&prec->last);
+ }
+
+ return (rp);
+}
+
+void
+rio_destroy(rp)
+ rio_t *rp;
+{
+ wtab_destroy(rp->rio_wtab);
+ FREE(rp, M_DEVBUF);
+}
+
+void
+rio_getstats(rp, sp)
+ rio_t *rp;
+ struct redstats *sp;
+{
+ int i;
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
+ sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
+ sp++;
+ }
+}
+
+/*
+ * enqueue routine:
+ *
+ * returns: 0 when successfully queued.
+ * ENOBUFS when drop occurs.
+ */
+static int
+rio_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+ int error = 0;
+
+ if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
+ ifq->ifq_len++;
+ else
+ error = ENOBUFS;
+ return error;
+}
+
+#if (RIO_NDROPPREC == 3)
+/*
+ * internally, a drop precedence value is converted to an index
+ * starting from 0.
+ */
+static int
+dscp2index(u_int8_t dscp)
+{
+ int dpindex = dscp & AF_DROPPRECMASK;
+
+ if (dpindex == 0)
+ return (0);
+ return ((dpindex >> 3) - 1);
+}
+#endif
+
+#if 1
+/*
+ * kludge: when a packet is dequeued, we need to know its drop precedence
+ * in order to keep the queue length of each drop precedence.
+ * use m_pkthdr.rcvif to pass this info.
+ */
+#define RIOM_SET_PRECINDEX(m, idx) \
+ do { (m)->m_pkthdr.rcvif = (struct ifnet *)((long)(idx)); } while (0)
+#define RIOM_GET_PRECINDEX(m) \
+ ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
+ (m)->m_pkthdr.rcvif = NULL; idx; })
+#endif
+
+int
+rio_addq(rp, q, m, pktattr)
+ rio_t *rp;
+ class_queue_t *q;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ int avg, droptype;
+ u_int8_t dsfield, odsfield;
+ int dpindex, i, n, t;
+ struct timeval now;
+ struct dropprec_state *prec;
+
+ dsfield = odsfield = read_dsfield(m, pktattr);
+ dpindex = dscp2index(dsfield);
+
+ /*
+ * update avg of the precedence states whose drop precedence
+ * is larger than or equal to the drop precedence of the packet
+ */
+ now.tv_sec = 0;
+ for (i = dpindex; i < RIO_NDROPPREC; i++) {
+ prec = &rp->rio_precstate[i];
+ avg = prec->avg;
+ if (prec->idle) {
+ prec->idle = 0;
+ if (now.tv_sec == 0)
+ microtime(&now);
+ t = (now.tv_sec - prec->last.tv_sec);
+ if (t > 60)
+ avg = 0;
+ else {
+ t = t * 1000000 +
+ (now.tv_usec - prec->last.tv_usec);
+ n = t / rp->rio_pkttime;
+ /* calculate (avg = (1 - Wq)^n * avg) */
+ if (n > 0)
+ avg = (avg >> FP_SHIFT) *
+ pow_w(rp->rio_wtab, n);
+ }
+ }
+
+ /* run estimator. (avg is scaled by WEIGHT in fixed-point) */
+ avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
+ prec->avg = avg; /* save the new value */
+ /*
+ * count keeps a tally of arriving traffic that has not
+ * been dropped.
+ */
+ prec->count++;
+ }
+
+ prec = &rp->rio_precstate[dpindex];
+ avg = prec->avg;
+
+ /* see if we drop early */
+ droptype = DTYPE_NODROP;
+ if (avg >= prec->th_min_s && prec->qlen > 1) {
+ if (avg >= prec->th_max_s) {
+ /* avg >= th_max: forced drop */
+ droptype = DTYPE_FORCED;
+ } else if (prec->old == 0) {
+ /* first exceeds th_min */
+ prec->count = 1;
+ prec->old = 1;
+ } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
+ prec->probd, prec->count)) {
+ /* unforced drop by red */
+ droptype = DTYPE_EARLY;
+ }
+ } else {
+ /* avg < th_min */
+ prec->old = 0;
+ }
+
+ /*
+ * if the queue length hits the hard limit, it's a forced drop.
+ */
+ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+ droptype = DTYPE_FORCED;
+
+ if (droptype != DTYPE_NODROP) {
+ /* always drop incoming packet (as opposed to randomdrop) */
+ for (i = dpindex; i < RIO_NDROPPREC; i++)
+ rp->rio_precstate[i].count = 0;
+#ifdef RIO_STATS
+ if (droptype == DTYPE_EARLY)
+ rp->q_stats[dpindex].drop_unforced++;
+ else
+ rp->q_stats[dpindex].drop_forced++;
+ PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
+#endif
+ m_freem(m);
+ return (-1);
+ }
+
+ for (i = dpindex; i < RIO_NDROPPREC; i++)
+ rp->rio_precstate[i].qlen++;
+
+ /* save drop precedence index in mbuf hdr */
+ RIOM_SET_PRECINDEX(m, dpindex);
+
+ if (rp->rio_flags & RIOF_CLEARDSCP)
+ dsfield &= ~DSCP_MASK;
+
+ if (dsfield != odsfield)
+ write_dsfield(m, pktattr, dsfield);
+
+ _addq(q, m);
+
+#ifdef RIO_STATS
+ PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
+#endif
+ return (0);
+}
+
+/*
+ * dequeue routine:
+ * must be called in splimp.
+ *
+ * returns: mbuf dequeued.
+ * NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+rio_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+ struct mbuf *m = NULL;
+
+ if (op == ALTDQ_POLL)
+ return qhead(rqp->rq_q);
+
+ m = rio_getq(rqp->rq_rio, rqp->rq_q);
+ if (m != NULL)
+ ifq->ifq_len--;
+ return m;
+}
+
+struct mbuf *
+rio_getq(rp, q)
+ rio_t *rp;
+ class_queue_t *q;
+{
+ struct mbuf *m;
+ int dpindex, i;
+
+ if ((m = _getq(q)) == NULL)
+ return NULL;
+
+ dpindex = RIOM_GET_PRECINDEX(m);
+ for (i = dpindex; i < RIO_NDROPPREC; i++) {
+ if (--rp->rio_precstate[i].qlen == 0) {
+ if (rp->rio_precstate[i].idle == 0) {
+ rp->rio_precstate[i].idle = 1;
+ microtime(&rp->rio_precstate[i].last);
+ }
+ }
+ }
+ return (m);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw rio_sw =
+ {"rio", rioopen, rioclose, rioioctl};
+
+ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ_RIO */
diff --git a/sys/altq/altq_rio.h b/sys/altq/altq_rio.h
new file mode 100644
index 00000000000..02d6f24fa08
--- /dev/null
+++ b/sys/altq/altq_rio.h
@@ -0,0 +1,140 @@
+/* $OpenBSD: altq_rio.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_rio.h,v 1.5 2000/12/14 08:12:46 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1998-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_RIO_H_
+#define _ALTQ_ALTQ_RIO_H_
+
+#include <altq/altq_classq.h>
+
+/*
+ * RIO: RED with IN/OUT bit
+ * (extended to support more than 2 drop precedence values)
+ */
+#define RIO_NDROPPREC 3 /* number of drop precedence values */
+
+struct rio_interface {
+ char rio_ifname[IFNAMSIZ];
+};
+
+struct rio_stats {
+ struct rio_interface iface;
+ int q_len[RIO_NDROPPREC];
+ struct redstats q_stats[RIO_NDROPPREC];
+
+ /* static red parameters */
+ int q_limit;
+ int weight;
+ int flags;
+ struct redparams q_params[RIO_NDROPPREC];
+};
+
+struct rio_conf {
+ struct rio_interface iface;
+ struct redparams q_params[RIO_NDROPPREC];
+ int rio_weight; /* weight for EWMA */
+ int rio_limit; /* max queue length */
+ int rio_pkttime; /* average packet time in usec */
+ int rio_flags; /* see below */
+};
+
+/* rio flags */
+#define RIOF_ECN4 0x01 /* use packet marking for IPv4 packets */
+#define RIOF_ECN6 0x02 /* use packet marking for IPv6 packets */
+#define RIOF_ECN (RIOF_ECN4 | RIOF_ECN6)
+#define RIOF_CLEARDSCP 0x200 /* clear diffserv codepoint */
+
+/*
+ * IOCTLs for RIO
+ */
+#define RIO_IF_ATTACH _IOW('Q', 1, struct rio_interface)
+#define RIO_IF_DETACH _IOW('Q', 2, struct rio_interface)
+#define RIO_ENABLE _IOW('Q', 3, struct rio_interface)
+#define RIO_DISABLE _IOW('Q', 4, struct rio_interface)
+#define RIO_CONFIG _IOWR('Q', 6, struct rio_conf)
+#define RIO_GETSTATS _IOWR('Q', 12, struct rio_stats)
+#define RIO_SETDEFAULTS _IOW('Q', 30, struct redparams[RIO_NDROPPREC])
+
+#ifdef _KERNEL
+
+typedef struct rio {
+ /* per drop precedence structure */
+ struct dropprec_state {
+ /* red parameters */
+ int inv_pmax; /* inverse of max drop probability */
+ int th_min; /* red min threshold */
+ int th_max; /* red max threshold */
+
+ /* variables for internal use */
+ int th_min_s; /* th_min scaled by avgshift */
+ int th_max_s; /* th_max scaled by avgshift */
+ int probd; /* drop probability denominator */
+
+ int qlen; /* queue length */
+ int avg; /* (scaled) queue length average */
+ int count; /* packet count since the last dropped/marked
+ packet */
+ int idle; /* queue was empty */
+ int old; /* avg is above th_min */
+ struct timeval last; /* timestamp when queue becomes idle */
+ } rio_precstate[RIO_NDROPPREC];
+
+ int rio_wshift; /* log(red_weight) */
+ int rio_weight; /* weight for EWMA */
+ struct wtab *rio_wtab; /* weight table */
+
+ int rio_pkttime; /* average packet time in micro sec
+ used for idle calibration */
+ int rio_flags; /* rio flags */
+
+ u_int8_t rio_codepoint; /* codepoint value to tag packets */
+ u_int8_t rio_codepointmask; /* codepoint mask bits */
+
+ struct redstats q_stats[RIO_NDROPPREC]; /* statistics */
+} rio_t;
+
+typedef struct rio_queue {
+ struct rio_queue *rq_next; /* next red_state in the list */
+ struct ifaltq *rq_ifq; /* backpointer to ifaltq */
+
+ class_queue_t *rq_q;
+
+ rio_t *rq_rio;
+} rio_queue_t;
+
+extern rio_t *rio_alloc __P((int, struct redparams *, int, int));
+extern void rio_destroy __P((rio_t *));
+extern void rio_getstats __P((rio_t *, struct redstats *));
+extern int rio_addq __P((rio_t *, class_queue_t *, struct mbuf *,
+ struct altq_pktattr *));
+extern struct mbuf *rio_getq __P((rio_t *, class_queue_t *));
+extern int rio_set_meter __P((rio_t *, int, int, int));
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RIO_H_ */
diff --git a/sys/altq/altq_rmclass.c b/sys/altq/altq_rmclass.c
new file mode 100644
index 00000000000..ba5385eba6e
--- /dev/null
+++ b/sys/altq/altq_rmclass.c
@@ -0,0 +1,1874 @@
+/* $OpenBSD: altq_rmclass.c,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_rmclass.c,v 1.10 2001/02/09 07:20:40 kjc Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * LBL code modified by speer@eng.sun.com, May 1977.
+ * For questions and/or comments, please send mail to cbq@ee.lbl.gov
+ */
+
+#ident "@(#)rm_class.c 1.48 97/12/05 SMI"
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+
+#include <altq/altq.h>
+#include <altq/altq_rmclass.h>
+#include <altq/altq_rmclass_debug.h>
+#include <altq/altq_red.h>
+#include <altq/altq_rio.h>
+
+/*
+ * Local Macros
+ */
+
+#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
+
+/*
+ * Local routines.
+ */
+
+static int rmc_satisfied __P((struct rm_class *, struct timeval *));
+static void rmc_wrr_set_weights __P((struct rm_ifdat *));
+static void rmc_depth_compute __P((struct rm_class *));
+static void rmc_depth_recompute __P((rm_class_t *));
+
+static mbuf_t *_rmc_wrr_dequeue_next __P((struct rm_ifdat *, int));
+static mbuf_t *_rmc_prr_dequeue_next __P((struct rm_ifdat *, int));
+
+static int _rmc_addq __P((rm_class_t *, mbuf_t *));
+static void _rmc_dropq __P((rm_class_t *));
+static mbuf_t *_rmc_getq __P((rm_class_t *));
+static mbuf_t *_rmc_pollq __P((rm_class_t *));
+
+static int rmc_under_limit __P((struct rm_class *, struct timeval *));
+static void rmc_tl_satisfied __P((struct rm_ifdat *, struct timeval *));
+static void rmc_drop_action __P((struct rm_class *));
+static void rmc_restart __P((struct rm_class *));
+static void rmc_root_overlimit __P((struct rm_class *, struct rm_class *));
+
+#define BORROW_OFFTIME
+/*
+ * BORROW_OFFTIME (experimental):
+ * borrow the offtime of the class borrowing from.
+ * the reason is that when its own offtime is set, the class is unable
+ * to borrow much, especially when cutoff is taking effect.
+ * but when the borrowed class is overloaded (advidle is close to minidle),
+ * use the borrowing class's offtime to avoid overload.
+ */
+#define ADJUST_CUTOFF
+/*
+ * ADJUST_CUTOFF (experimental):
+ * if no underlimit class is found due to cutoff, increase cutoff and
+ * retry the scheduling loop.
+ * also, don't invoke delay_actions while cutoff is taking effect,
+ * since a sleeping class won't have a chance to be scheduled in the
+ * next loop.
+ *
+ * now heuristics for setting the top-level variable (cutoff_) becomes:
+ * 1. if a packet arrives for a not-overlimit class, set cutoff
+ * to the depth of the class.
+ * 2. if cutoff is i, and a packet arrives for an overlimit class
+ * with an underlimit ancestor at a lower level than i (say j),
+ * then set cutoff to j.
+ * 3. at scheduling a packet, if there is no underlimit class
+ * due to the current cutoff level, increase cutoff by 1 and
+ * then try to schedule again.
+ */
+
+/*
+ * rm_class_t *
+ * rmc_newclass(...) - Create a new resource management class at priority
+ * 'pri' on the interface given by 'ifd'.
+ *
+ * nsecPerByte is the data rate of the interface in nanoseconds/byte.
+ * E.g., 800 for a 10Mb/s ethernet. If the class gets less
+ * than 100% of the bandwidth, this number should be the
+ * 'effective' rate for the class. Let f be the
+ * bandwidth fraction allocated to this class, and let
+ * nsPerByte be the data rate of the output link in
+ * nanoseconds/byte. Then nsecPerByte is set to
+ * nsPerByte / f. E.g., 1600 (= 800 / .5)
+ * for a class that gets 50% of an ethernet's bandwidth.
+ *
+ * action the routine to call when the class is over limit.
+ *
+ * maxq max allowable queue size for class (in packets).
+ *
+ * parent parent class pointer.
+ *
+ * borrow class to borrow from (should be either 'parent' or null).
+ *
+ * maxidle max value allowed for class 'idle' time estimate (this
+ * parameter determines how large an initial burst of packets
+ * can be before overlimit action is invoked.
+ *
+ * offtime how long 'delay' action will delay when class goes over
+ * limit (this parameter determines the steady-state burst
+ * size when a class is running over its limit).
+ *
+ * Maxidle and offtime have to be computed from the following: If the
+ * average packet size is s, the bandwidth fraction allocated to this
+ * class is f, we want to allow b packet bursts, and the gain of the
+ * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
+ *
+ * ptime = s * nsPerByte * (1 - f) / f
+ * maxidle = ptime * (1 - g^b) / g^b
+ * minidle = -ptime * (1 / (f - 1))
+ * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
+ *
+ * Operationally, it's convenient to specify maxidle & offtime in units
+ * independent of the link bandwidth so the maxidle & offtime passed to
+ * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
+ * (The constant factor is a scale factor needed to make the parameters
+ * integers. This scaling also means that the 'unscaled' values of
+ * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
+ * not nanoseconds.) Also note that the 'idle' filter computation keeps
+ * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
+ * maxidle also must be scaled upward by this value. Thus, the passed
+ * values for maxidle and offtime can be computed as follows:
+ *
+ * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
+ * offtime = offtime * 8 / (1000 * nsecPerByte)
+ *
+ * When USE_HRTIME is employed, then maxidle and offtime become:
+ * maxidle = maxilde * (8.0 / nsecPerByte);
+ * offtime = offtime * (8.0 / nsecPerByte);
+ */
+
+struct rm_class *
+rmc_newclass(pri, ifd, nsecPerByte, action, maxq, parent, borrow,
+ maxidle, minidle, offtime, pktsize, flags)
+ int pri;
+ struct rm_ifdat *ifd;
+ u_int nsecPerByte;
+ void (*action)(rm_class_t *, rm_class_t *);
+ int maxq;
+ struct rm_class *parent;
+ struct rm_class *borrow;
+ u_int maxidle;
+ int minidle;
+ u_int offtime;
+ int pktsize;
+ int flags;
+{
+ struct rm_class *cl;
+ struct rm_class *peer;
+ int s;
+
+ if (pri >= RM_MAXPRIO)
+ return (NULL);
+#ifndef ALTQ_RED
+ if (flags & RMCF_RED) {
+ printf("rmc_newclass: RED not configured for CBQ!\n");
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_RIO
+ if (flags & RMCF_RIO) {
+ printf("rmc_newclass: RIO not configured for CBQ!\n");
+ return (NULL);
+ }
+#endif
+
+ MALLOC(cl, struct rm_class *, sizeof(struct rm_class),
+ M_DEVBUF, M_WAITOK);
+ if (cl == NULL)
+ return (NULL);
+ bzero(cl, sizeof(struct rm_class));
+ CALLOUT_INIT(&cl->callout_);
+ MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (cl->q_ == NULL) {
+ FREE(cl, M_DEVBUF);
+ return (NULL);
+ }
+ bzero(cl->q_, sizeof(class_queue_t));
+
+ /*
+ * Class initialization.
+ */
+ cl->children_ = NULL;
+ cl->parent_ = parent;
+ cl->borrow_ = borrow;
+ cl->leaf_ = 1;
+ cl->ifdat_ = ifd;
+ cl->pri_ = pri;
+ cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+ cl->depth_ = 0;
+ cl->qthresh_ = 0;
+ cl->ns_per_byte_ = nsecPerByte;
+
+ qlimit(cl->q_) = maxq;
+ qtype(cl->q_) = Q_DROPHEAD;
+ qlen(cl->q_) = 0;
+ cl->flags_ = flags;
+
+#if 1 /* minidle is also scaled in ALTQ */
+ cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
+ if (cl->minidle_ > 0)
+ cl->minidle_ = 0;
+#else
+ cl->minidle_ = minidle;
+#endif
+ cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+ if (cl->maxidle_ == 0)
+ cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+ cl->avgidle_ = cl->maxidle_;
+ cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+ if (cl->offtime_ == 0)
+ cl->offtime_ = 1;
+#else
+ cl->avgidle_ = 0;
+ cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+ cl->overlimit = action;
+
+#ifdef ALTQ_RED
+ if (flags & (RMCF_RED|RMCF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & RMCF_ECN)
+ red_flags |= REDF_ECN;
+ if (flags & RMCF_FLOWVALVE)
+ red_flags |= REDF_FLOWVALVE;
+#ifdef ALTQ_RIO
+ if (flags & RMCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ red_pkttime = nsecPerByte * pktsize / 1000;
+
+ if (flags & RMCF_RED) {
+ cl->red_ = red_alloc(0, 0, 0, 0,
+ red_flags, red_pkttime);
+ if (cl->red_ != NULL)
+ qtype(cl->q_) = Q_RED;
+ }
+#ifdef ALTQ_RIO
+ else {
+ cl->red_ = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->red_ != NULL)
+ qtype(cl->q_) = Q_RIO;
+ }
+#endif
+ }
+#endif /* ALTQ_RED */
+
+ /*
+ * put the class into the class tree
+ */
+ s = splimp();
+ if ((peer = ifd->active_[pri]) != NULL) {
+ /* find the last class at this pri */
+ cl->peer_ = peer;
+ while (peer->peer_ != ifd->active_[pri])
+ peer = peer->peer_;
+ peer->peer_ = cl;
+ } else {
+ ifd->active_[pri] = cl;
+ cl->peer_ = cl;
+ }
+
+ if (cl->parent_) {
+ cl->next_ = parent->children_;
+ parent->children_ = cl;
+ parent->leaf_ = 0;
+ }
+
+ /*
+ * Compute the depth of this class and it's ancestors in the class
+ * hierarchy.
+ */
+ rmc_depth_compute(cl);
+
+ /*
+ * If CBQ's WRR is enabled, then initailize the class WRR state.
+ */
+ if (ifd->wrr_) {
+ ifd->num_[pri]++;
+ ifd->alloc_[pri] += cl->allotment_;
+ rmc_wrr_set_weights(ifd);
+ }
+ splx(s);
+ return (cl);
+}
+
+int
+rmc_modclass(cl, nsecPerByte, maxq, maxidle, minidle, offtime, pktsize)
+ struct rm_class *cl;
+ u_int nsecPerByte;
+ int maxq;
+ u_int maxidle;
+ int minidle;
+ u_int offtime;
+ int pktsize;
+{
+ struct rm_ifdat *ifd;
+ u_int old_allotment;
+ int s;
+
+ ifd = cl->ifdat_;
+ old_allotment = cl->allotment_;
+
+ s = splimp();
+ cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+ cl->qthresh_ = 0;
+ cl->ns_per_byte_ = nsecPerByte;
+
+ qlimit(cl->q_) = maxq;
+
+#if 1 /* minidle is also scaled in ALTQ */
+ cl->minidle_ = (minidle * nsecPerByte) / 8;
+ if (cl->minidle_ > 0)
+ cl->minidle_ = 0;
+#else
+ cl->minidle_ = minidle;
+#endif
+ cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+ if (cl->maxidle_ == 0)
+ cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+ cl->avgidle_ = cl->maxidle_;
+ cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+ if (cl->offtime_ == 0)
+ cl->offtime_ = 1;
+#else
+ cl->avgidle_ = 0;
+ cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+
+ /*
+ * If CBQ's WRR is enabled, then initailize the class WRR state.
+ */
+ if (ifd->wrr_) {
+ ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
+ rmc_wrr_set_weights(ifd);
+ }
+ splx(s);
+ return (0);
+}
+
+/*
+ * static void
+ * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
+ * the appropriate run robin weights for the CBQ weighted round robin
+ * algorithm.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_wrr_set_weights(ifd)
+ struct rm_ifdat *ifd;
+{
+ int i;
+ struct rm_class *cl, *clh;
+
+ for (i = 0; i < RM_MAXPRIO; i++) {
+ /*
+ * This is inverted from that of the simulator to
+ * maintain precision.
+ */
+ if (ifd->num_[i] == 0)
+ ifd->M_[i] = 0;
+ else
+ ifd->M_[i] = ifd->alloc_[i] /
+ (ifd->num_[i] * ifd->maxpkt_);
+ /*
+ * Compute the weigthed allotment for each class.
+ * This takes the expensive div instruction out
+ * of the main loop for the wrr scheduling path.
+ * These only get recomputed when a class comes or
+ * goes.
+ */
+ if (ifd->active_[i] != NULL) {
+ clh = cl = ifd->active_[i];
+ do {
+ /* safe-guard for slow link or alloc_ == 0 */
+ if (ifd->M_[i] == 0)
+ cl->w_allotment_ = 0;
+ else
+ cl->w_allotment_ = cl->allotment_ /
+ ifd->M_[i];
+ cl = cl->peer_;
+ } while ((cl != NULL) && (cl != clh));
+ }
+ }
+}
+
+int
+rmc_get_weight(ifd, pri)
+ struct rm_ifdat *ifd;
+ int pri;
+{
+ if ((pri >= 0) && (pri < RM_MAXPRIO))
+ return (ifd->M_[pri]);
+ else
+ return (0);
+}
+
+/*
+ * static void
+ * rmc_depth_compute(struct rm_class *cl) - This function computes the
+ * appropriate depth of class 'cl' and its ancestors.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_depth_compute(cl)
+ struct rm_class *cl;
+{
+ rm_class_t *t = cl, *p;
+
+ /*
+ * Recompute the depth for the branch of the tree.
+ */
+ while (t != NULL) {
+ p = t->parent_;
+ if (p && (t->depth_ >= p->depth_)) {
+ p->depth_ = t->depth_ + 1;
+ t = p;
+ } else
+ t = NULL;
+ }
+}
+
+/*
+ * static void
+ * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
+ * the depth of the tree after a class has been deleted.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_depth_recompute(rm_class_t *cl)
+{
+#if 1 /* ALTQ */
+ rm_class_t *p, *t;
+
+ p = cl;
+ while (p != NULL) {
+ if ((t = p->children_) == NULL) {
+ p->depth_ = 0;
+ } else {
+ int cdepth = 0;
+
+ while (t != NULL) {
+ if (t->depth_ > cdepth)
+ cdepth = t->depth_;
+ t = t->next_;
+ }
+
+ if (p->depth_ == cdepth + 1)
+ /* no change to this parent */
+ return;
+
+ p->depth_ = cdepth + 1;
+ }
+
+ p = p->parent_;
+ }
+#else
+ rm_class_t *t;
+
+ if (cl->depth_ >= 1) {
+ if (cl->children_ == NULL) {
+ cl->depth_ = 0;
+ } else if ((t = cl->children_) != NULL) {
+ while (t != NULL) {
+ if (t->children_ != NULL)
+ rmc_depth_recompute(t);
+ t = t->next_;
+ }
+ } else
+ rmc_depth_compute(cl);
+ }
+#endif
+}
+
+/*
+ * void
+ * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
+ * function deletes a class from the link-sharing stucture and frees
+ * all resources associated with the class.
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_delete_class(ifd, cl)
+ struct rm_ifdat *ifd;
+ struct rm_class *cl;
+{
+ struct rm_class *p, *head, *previous;
+ int s;
+
+ ASSERT(cl->children_ == NULL);
+
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+
+ s = splimp();
+ /*
+ * Free packets in the packet queue.
+ * XXX - this may not be a desired behavior. Packets should be
+ * re-queued.
+ */
+ rmc_dropall(cl);
+
+ /*
+ * If the class has a parent, then remove the class from the
+ * class from the parent's children chain.
+ */
+ if (cl->parent_ != NULL) {
+ head = cl->parent_->children_;
+ p = previous = head;
+ if (head->next_ == NULL) {
+ ASSERT(head == cl);
+ cl->parent_->children_ = NULL;
+ cl->parent_->leaf_ = 1;
+ } else while (p != NULL) {
+ if (p == cl) {
+ if (cl == head)
+ cl->parent_->children_ = cl->next_;
+ else
+ previous->next_ = cl->next_;
+ cl->next_ = NULL;
+ p = NULL;
+ } else {
+ previous = p;
+ p = p->next_;
+ }
+ }
+ }
+
+ /*
+ * Delete class from class priority peer list.
+ */
+ if ((p = ifd->active_[cl->pri_]) != NULL) {
+ /*
+ * If there is more than one member of this priority
+ * level, then look for class(cl) in the priority level.
+ */
+ if (p != p->peer_) {
+ while (p->peer_ != cl)
+ p = p->peer_;
+ p->peer_ = cl->peer_;
+
+ if (ifd->active_[cl->pri_] == cl)
+ ifd->active_[cl->pri_] = cl->peer_;
+ } else {
+ ASSERT(p == cl);
+ ifd->active_[cl->pri_] = NULL;
+ }
+ }
+
+ /*
+ * Recompute the WRR weights.
+ */
+ if (ifd->wrr_) {
+ ifd->alloc_[cl->pri_] -= cl->allotment_;
+ ifd->num_[cl->pri_]--;
+ rmc_wrr_set_weights(ifd);
+ }
+
+ /*
+ * Re-compute the depth of the tree.
+ */
+#if 1 /* ALTQ */
+ rmc_depth_recompute(cl->parent_);
+#else
+ rmc_depth_recompute(ifd->root_);
+#endif
+
+ splx(s);
+
+ /*
+ * Free the class structure.
+ */
+ if (cl->red_ != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ rio_destroy((rio_t *)cl->red_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ red_destroy(cl->red_);
+#endif
+ }
+ FREE(cl->q_, M_DEVBUF);
+ FREE(cl, M_DEVBUF);
+}
+
+
+/*
+ * void
+ * rmc_init(...) - Initialize the resource management data structures
+ * associated with the output portion of interface 'ifp'. 'ifd' is
+ * where the structures will be built (for backwards compatibility, the
+ * structures aren't kept in the ifnet struct). 'nsecPerByte'
+ * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
+ * 'restart' is the driver-specific routine that the generic 'delay
+ * until under limit' action will call to restart output. `maxq'
+ * is the queue size of the 'link' & 'default' classes. 'maxqueued'
+ * is the maximum number of packets that the resource management
+ * code will allow to be queued 'downstream' (this is typically 1).
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_init(ifq, ifd, nsecPerByte, restart, maxq, maxqueued, maxidle,
+ minidle, offtime, flags)
+ struct ifaltq *ifq;
+ struct rm_ifdat *ifd;
+ u_int nsecPerByte;
+ void (*restart)(struct ifaltq *);
+ int maxq, maxqueued;
+ u_int maxidle;
+ int minidle;
+ u_int offtime;
+ int flags;
+{
+ int i, mtu;
+
+ /*
+ * Initialize the CBQ traciing/debug facility.
+ */
+ CBQTRACEINIT();
+
+ bzero((char *)ifd, sizeof (*ifd));
+ mtu = ifq->altq_ifp->if_mtu;
+ ifd->ifq_ = ifq;
+ ifd->restart = restart;
+ ifd->maxqueued_ = maxqueued;
+ ifd->ns_per_byte_ = nsecPerByte;
+ ifd->maxpkt_ = mtu;
+ ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
+ ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
+#if 1
+ ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
+ if (mtu * nsecPerByte > 10 * 1000000)
+ ifd->maxiftime_ /= 4;
+#endif
+
+ reset_cutoff(ifd);
+ CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
+
+ /*
+ * Initialize the CBQ's WRR state.
+ */
+ for (i = 0; i < RM_MAXPRIO; i++) {
+ ifd->alloc_[i] = 0;
+ ifd->M_[i] = 0;
+ ifd->num_[i] = 0;
+ ifd->na_[i] = 0;
+ ifd->active_[i] = NULL;
+ }
+
+ /*
+ * Initialize current packet state.
+ */
+ ifd->qi_ = 0;
+ ifd->qo_ = 0;
+ for (i = 0; i < RM_MAXQUEUED; i++) {
+ ifd->class_[i] = NULL;
+ ifd->curlen_[i] = 0;
+ ifd->borrowed_[i] = NULL;
+ }
+
+ /*
+ * Create the root class of the link-sharing structure.
+ */
+ if ((ifd->root_ = rmc_newclass(0, ifd,
+ nsecPerByte,
+ rmc_root_overlimit, maxq, 0, 0,
+ maxidle, minidle, offtime,
+ 0, 0)) == NULL) {
+ printf("rmc_init: root class not allocated\n");
+ return ;
+ }
+ ifd->root_->depth_ = 0;
+}
+
+/*
+ * void
+ * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
+ * mbuf 'm' to queue for resource class 'cl'. This routine is called
+ * by a driver's if_output routine. This routine must be called with
+ * output packet completion interrupts locked out (to avoid racing with
+ * rmc_dequeue_next).
+ *
+ * Returns: 0 on successful queueing
+ * -1 when packet drop occurs
+ */
+int
+rmc_queue_packet(cl, m)
+ struct rm_class *cl;
+ mbuf_t *m;
+{
+ struct timeval now;
+ struct rm_ifdat *ifd = cl->ifdat_;
+ int cpri = cl->pri_;
+ int is_empty = qempty(cl->q_);
+
+ RM_GETTIME(now);
+ if (ifd->cutoff_ > 0) {
+ if (TV_LT(&cl->undertime_, &now)) {
+ if (ifd->cutoff_ > cl->depth_)
+ ifd->cutoff_ = cl->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
+ }
+#if 1 /* ALTQ */
+ else {
+ /*
+ * the class is overlimit. if the class has
+ * underlimit ancestors, set cutoff to the lowest
+ * depth among them.
+ */
+ struct rm_class *borrow = cl->borrow_;
+
+ while (borrow != NULL &&
+ borrow->depth_ < ifd->cutoff_) {
+ if (TV_LT(&borrow->undertime_, &now)) {
+ ifd->cutoff_ = borrow->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
+ break;
+ }
+ borrow = borrow->borrow_;
+ }
+ }
+#else /* !ALTQ */
+ else if ((ifd->cutoff_ > 1) && cl->borrow_) {
+ if (TV_LT(&cl->borrow_->undertime_, &now)) {
+ ifd->cutoff_ = cl->borrow_->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffob',
+ cl->borrow_->depth_);
+ }
+ }
+#endif /* !ALTQ */
+ }
+
+ if (_rmc_addq(cl, m) < 0)
+ /* failed */
+ return (-1);
+
+ if (is_empty) {
+ CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
+ ifd->na_[cpri]++;
+ }
+
+ if (qlen(cl->q_) > qlimit(cl->q_)) {
+ /* note: qlimit can be set to 0 or 1 */
+ rmc_drop_action(cl);
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * void
+ * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
+ * classes to see if there are satified.
+ */
+
+static void
+rmc_tl_satisfied(ifd, now)
+ struct rm_ifdat *ifd;
+ struct timeval *now;
+{
+ int i;
+ rm_class_t *p, *bp;
+
+ for (i = RM_MAXPRIO - 1; i >= 0; i--) {
+ if ((bp = ifd->active_[i]) != NULL) {
+ p = bp;
+ do {
+ if (!rmc_satisfied(p, now)) {
+ ifd->cutoff_ = p->depth_;
+ return;
+ }
+ p = p->peer_;
+ } while (p != bp);
+ }
+ }
+
+ reset_cutoff(ifd);
+}
+
+/*
+ * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
+ */
+
+static int
+rmc_satisfied(cl, now)
+ struct rm_class *cl;
+ struct timeval *now;
+{
+ rm_class_t *p;
+
+ if (cl == NULL)
+ return (1);
+ if (TV_LT(now, &cl->undertime_))
+ return (1);
+ if (cl->depth_ == 0) {
+ if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
+ return (0);
+ else
+ return (1);
+ }
+ if (cl->children_ != NULL) {
+ p = cl->children_;
+ while (p != NULL) {
+ if (!rmc_satisfied(p, now))
+ return (0);
+ p = p->next_;
+ }
+ }
+
+ return (1);
+}
+
+/*
+ * Return 1 if class 'cl' is under limit or can borrow from a parent,
+ * 0 if overlimit. As a side-effect, this routine will invoke the
+ * class overlimit action if the class if overlimit.
+ */
+
+static int
+rmc_under_limit(cl, now)
+ struct rm_class *cl;
+ struct timeval *now;
+{
+ rm_class_t *p = cl;
+ rm_class_t *top;
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ ifd->borrowed_[ifd->qi_] = NULL;
+ /*
+ * If cl is the root class, then always return that it is
+ * underlimit. Otherwise, check to see if the class is underlimit.
+ */
+ if (cl->parent_ == NULL)
+ return (1);
+
+ if (cl->sleeping_) {
+ if (TV_LT(now, &cl->undertime_))
+ return (0);
+
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+ return (1);
+ }
+
+ top = NULL;
+ while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
+ if (((cl = cl->borrow_) == NULL) ||
+ (cl->depth_ > ifd->cutoff_)) {
+#ifdef ADJUST_CUTOFF
+ if (cl != NULL)
+ /* cutoff is taking effect, just
+ return false without calling
+ the delay action. */
+ return (0);
+#endif
+#ifdef BORROW_OFFTIME
+ /*
+ * check if the class can borrow offtime too.
+ * borrow offtime from the top of the borrow
+ * chain if the top class is not overloaded.
+ */
+ if (cl != NULL) {
+ /* cutoff is taking effect, use this class as top. */
+ top = cl;
+ CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
+ }
+ if (top != NULL && top->avgidle_ == top->minidle_)
+ top = NULL;
+ p->overtime_ = *now;
+ (p->overlimit)(p, top);
+#else
+ p->overtime_ = *now;
+ (p->overlimit)(p, NULL);
+#endif
+ return (0);
+ }
+ top = cl;
+ }
+
+ if (cl != p)
+ ifd->borrowed_[ifd->qi_] = cl;
+ return (1);
+}
+
+/*
+ * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
+ * Packet-by-packet round robin.
+ *
+ * The heart of the weigthed round-robin scheduler, which decides which
+ * class next gets to send a packet. Highest priority first, then
+ * weighted round-robin within priorites.
+ *
+ * Each able-to-send class gets to send until its byte allocation is
+ * exhausted. Thus, the active pointer is only changed after a class has
+ * exhausted its allocation.
+ *
+ * If the scheduler finds no class that is underlimit or able to borrow,
+ * then the first class found that had a nonzero queue and is allowed to
+ * borrow gets to send.
+ */
+
+static mbuf_t *
+_rmc_wrr_dequeue_next(ifd, op)
+ struct rm_ifdat *ifd;
+ int op;
+{
+ struct rm_class *cl = NULL, *first = NULL;
+ u_int deficit;
+ int cpri;
+ mbuf_t *m;
+ struct timeval now;
+
+ RM_GETTIME(now);
+
+ /*
+ * if the driver polls the top of the queue and then removes
+ * the polled packet, we must return the same packet.
+ */
+ if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+ cl = ifd->pollcache_;
+ cpri = cl->pri_;
+ if (ifd->efficient_) {
+ /* check if this class is overlimit */
+ if (cl->undertime_.tv_sec != 0 &&
+ rmc_under_limit(cl, &now) == 0)
+ first = cl;
+ }
+ ifd->pollcache_ = NULL;
+ goto _wrr_out;
+ }
+ else {
+ /* mode == ALTDQ_POLL || pollcache == NULL */
+ ifd->pollcache_ = NULL;
+ ifd->borrowed_[ifd->qi_] = NULL;
+ }
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+ for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+ if (ifd->na_[cpri] == 0)
+ continue;
+ deficit = 0;
+ /*
+ * Loop through twice for a priority level, if some class
+ * was unable to send a packet the first round because
+ * of the weighted round-robin mechanism.
+ * During the second loop at this level, deficit==2.
+ * (This second loop is not needed if for every class,
+ * "M[cl->pri_])" times "cl->allotment" is greater than
+ * the byte size for the largest packet in the class.)
+ */
+ _wrr_loop:
+ cl = ifd->active_[cpri];
+ ASSERT(cl != NULL);
+ do {
+ if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
+ cl->bytes_alloc_ += cl->w_allotment_;
+ if (!qempty(cl->q_)) {
+ if ((cl->undertime_.tv_sec == 0) ||
+ rmc_under_limit(cl, &now)) {
+ if (cl->bytes_alloc_ > 0 || deficit > 1)
+ goto _wrr_out;
+
+ /* underlimit but no alloc */
+ deficit = 1;
+#if 1
+ ifd->borrowed_[ifd->qi_] = NULL;
+#endif
+ }
+ else if (first == NULL && cl->borrow_ != NULL)
+ first = cl; /* borrowing candidate */
+ }
+
+ cl->bytes_alloc_ = 0;
+ cl = cl->peer_;
+ } while (cl != ifd->active_[cpri]);
+
+ if (deficit == 1) {
+ /* first loop found an underlimit class with deficit */
+ /* Loop on same priority level, with new deficit. */
+ deficit = 2;
+ goto _wrr_loop;
+ }
+ }
+
+#ifdef ADJUST_CUTOFF
+ /*
+ * no underlimit class found. if cutoff is taking effect,
+ * increase cutoff and try again.
+ */
+ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+ ifd->cutoff_++;
+ CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
+ goto _again;
+ }
+#endif /* ADJUST_CUTOFF */
+ /*
+ * If LINK_EFFICIENCY is turned on, then the first overlimit
+ * class we encounter will send a packet if all the classes
+ * of the link-sharing structure are overlimit.
+ */
+ reset_cutoff(ifd);
+ CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
+
+ if (!ifd->efficient_ || first == NULL)
+ return (NULL);
+
+ cl = first;
+ cpri = cl->pri_;
+#if 0 /* too time-consuming for nothing */
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+#endif
+ ifd->borrowed_[ifd->qi_] = cl->borrow_;
+ ifd->cutoff_ = cl->borrow_->depth_;
+
+ /*
+ * Deque the packet and do the book keeping...
+ */
+ _wrr_out:
+ if (op == ALTDQ_REMOVE) {
+ m = _rmc_getq(cl);
+ if (m == NULL)
+ panic("_rmc_wrr_dequeue_next");
+ if (qempty(cl->q_))
+ ifd->na_[cpri]--;
+
+ /*
+ * Update class statistics and link data.
+ */
+ if (cl->bytes_alloc_ > 0)
+ cl->bytes_alloc_ -= m_pktlen(m);
+
+ if ((cl->bytes_alloc_ <= 0) || first == cl)
+ ifd->active_[cl->pri_] = cl->peer_;
+ else
+ ifd->active_[cl->pri_] = cl;
+
+ ifd->class_[ifd->qi_] = cl;
+ ifd->curlen_[ifd->qi_] = m_pktlen(m);
+ ifd->now_[ifd->qi_] = now;
+ ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+ ifd->queued_++;
+ } else {
+ /* mode == ALTDQ_PPOLL */
+ m = _rmc_pollq(cl);
+ ifd->pollcache_ = cl;
+ }
+ return (m);
+}
+
+/*
+ * Dequeue & return next packet from the highest priority class that
+ * has a packet to send & has enough allocation to send it. This
+ * routine is called by a driver whenever it needs a new packet to
+ * output.
+ */
+static mbuf_t *
+_rmc_prr_dequeue_next(ifd, op)
+ struct rm_ifdat *ifd;
+ int op;
+{
+ mbuf_t *m;
+ int cpri;
+ struct rm_class *cl, *first = NULL;
+ struct timeval now;
+
+ RM_GETTIME(now);
+
+ /*
+ * if the driver polls the top of the queue and then removes
+ * the polled packet, we must return the same packet.
+ */
+ if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+ cl = ifd->pollcache_;
+ cpri = cl->pri_;
+ ifd->pollcache_ = NULL;
+ goto _prr_out;
+ } else {
+ /* mode == ALTDQ_POLL || pollcache == NULL */
+ ifd->pollcache_ = NULL;
+ ifd->borrowed_[ifd->qi_] = NULL;
+ }
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+ for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+ if (ifd->na_[cpri] == 0)
+ continue;
+ cl = ifd->active_[cpri];
+ ASSERT(cl != NULL);
+ do {
+ if (!qempty(cl->q_)) {
+ if ((cl->undertime_.tv_sec == 0) ||
+ rmc_under_limit(cl, &now))
+ goto _prr_out;
+ if (first == NULL && cl->borrow_ != NULL)
+ first = cl;
+ }
+ cl = cl->peer_;
+ } while (cl != ifd->active_[cpri]);
+ }
+
+#ifdef ADJUST_CUTOFF
+ /*
+ * no underlimit class found. if cutoff is taking effect, increase
+ * cutoff and try again.
+ */
+ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+ ifd->cutoff_++;
+ goto _again;
+ }
+#endif /* ADJUST_CUTOFF */
+ /*
+ * If LINK_EFFICIENCY is turned on, then the first overlimit
+ * class we encounter will send a packet if all the classes
+ * of the link-sharing structure are overlimit.
+ */
+ reset_cutoff(ifd);
+ if (!ifd->efficient_ || first == NULL)
+ return (NULL);
+
+ cl = first;
+ cpri = cl->pri_;
+#if 0 /* too time-consuming for nothing */
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+#endif
+ ifd->borrowed_[ifd->qi_] = cl->borrow_;
+ ifd->cutoff_ = cl->borrow_->depth_;
+
+ /*
+ * Deque the packet and do the book keeping...
+ */
+ _prr_out:
+ if (op == ALTDQ_REMOVE) {
+ m = _rmc_getq(cl);
+ if (m == NULL)
+ panic("_rmc_prr_dequeue_next");
+ if (qempty(cl->q_))
+ ifd->na_[cpri]--;
+
+ ifd->active_[cpri] = cl->peer_;
+
+ ifd->class_[ifd->qi_] = cl;
+ ifd->curlen_[ifd->qi_] = m_pktlen(m);
+ ifd->now_[ifd->qi_] = now;
+ ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+ ifd->queued_++;
+ } else {
+ /* mode == ALTDQ_POLL */
+ m = _rmc_pollq(cl);
+ ifd->pollcache_ = cl;
+ }
+ return (m);
+}
+
+/*
+ * mbuf_t *
+ * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
+ * is invoked by the packet driver to get the next packet to be
+ * dequeued and output on the link. If WRR is enabled, then the
+ * WRR dequeue next routine will determine the next packet to sent.
+ * Otherwise, packet-by-packet round robin is invoked.
+ *
+ * Returns: NULL, if a packet is not available or if all
+ * classes are overlimit.
+ *
+ * Otherwise, Pointer to the next packet.
+ */
+
+mbuf_t *
+rmc_dequeue_next(ifd, mode)
+ struct rm_ifdat *ifd;
+ int mode;
+{
+ if (ifd->queued_ >= ifd->maxqueued_)
+ return (NULL);
+ else if (ifd->wrr_)
+ return (_rmc_wrr_dequeue_next(ifd, mode));
+ else
+ return (_rmc_prr_dequeue_next(ifd, mode));
+}
+
+/*
+ * Update the utilization estimate for the packet that just completed.
+ * The packet's class & the parent(s) of that class all get their
+ * estimators updated. This routine is called by the driver's output-
+ * packet-completion interrupt service routine.
+ */
+
+/*
+ * a macro to approximate "divide by 1000" that gives 0.000999,
+ * if a value has enough effective digits.
+ * (on pentium, mul takes 9 cycles but div takes 46!)
+ */
+#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
+void
+rmc_update_class_util(ifd)
+ struct rm_ifdat *ifd;
+{
+ int idle, avgidle, pktlen;
+ int pkt_time, tidle;
+ rm_class_t *cl, *borrowed;
+ rm_class_t *borrows;
+ struct timeval *nowp;
+
+ /*
+ * Get the most recent completed class.
+ */
+ if ((cl = ifd->class_[ifd->qo_]) == NULL)
+ return;
+
+ pktlen = ifd->curlen_[ifd->qo_];
+ borrowed = ifd->borrowed_[ifd->qo_];
+ borrows = borrowed;
+
+ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+
+ /*
+ * Run estimator on class and it's ancesstors.
+ */
+ /*
+ * rm_update_class_util is designed to be called when the
+ * transfer is completed from a xmit complete interrupt,
+ * but most drivers don't implement an upcall for that.
+ * so, just use estimated completion time.
+ * as a result, ifd->qi_ and ifd->qo_ are always synced.
+ */
+ nowp = &ifd->now_[ifd->qo_];
+ /* get pkt_time (for link) in usec */
+#if 1 /* use approximation */
+ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
+ pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
+#endif
+#if 1 /* ALTQ4PPP */
+ if (TV_LT(nowp, &ifd->ifnow_)) {
+ int iftime;
+
+ /*
+ * make sure the estimated completion time does not go
+ * too far. it can happen when the link layer supports
+ * data compression or the interface speed is set to
+ * a much lower value.
+ */
+ TV_DELTA(&ifd->ifnow_, nowp, iftime);
+ if (iftime+pkt_time < ifd->maxiftime_) {
+ TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ } else {
+ TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
+ }
+ } else {
+ TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ }
+#else
+ if (TV_LT(nowp, &ifd->ifnow_)) {
+ TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ } else {
+ TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ }
+#endif
+
+ while (cl != NULL) {
+ TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
+ if (idle >= 2000000)
+ /*
+ * this class is idle enough, reset avgidle.
+ * (TV_DELTA returns 2000000 us when delta is large.)
+ */
+ cl->avgidle_ = cl->maxidle_;
+
+ /* get pkt_time (for class) in usec */
+#if 1 /* use approximation */
+ pkt_time = pktlen * cl->ns_per_byte_;
+ pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+ pkt_time = pktlen * cl->ns_per_byte_ / 1000;
+#endif
+ idle -= pkt_time;
+
+ avgidle = cl->avgidle_;
+ avgidle += idle - (avgidle >> RM_FILTER_GAIN);
+ cl->avgidle_ = avgidle;
+
+ /* Are we overlimit ? */
+ if (avgidle <= 0) {
+ CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
+#if 1 /* ALTQ */
+ /*
+ * need some lower bound for avgidle, otherwise
+ * a borrowing class gets unbounded penalty.
+ */
+ if (avgidle < cl->minidle_)
+ avgidle = cl->avgidle_ = cl->minidle_;
+#endif
+ /* set next idle to make avgidle 0 */
+ tidle = pkt_time +
+ (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
+ TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
+ ++cl->stats_.over;
+ } else {
+ cl->avgidle_ =
+ (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
+ cl->undertime_.tv_sec = 0;
+ if (cl->sleeping_) {
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ }
+ }
+
+ if (borrows != NULL) {
+ if (borrows != cl)
+ ++cl->stats_.borrows;
+ else
+ borrows = NULL;
+ }
+ cl->last_ = ifd->ifnow_;
+ cl->last_pkttime_ = pkt_time;
+
+#if 1
+ if (cl->parent_ == NULL) {
+ /* take stats of root class */
+ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+ }
+#endif
+
+ cl = cl->parent_;
+ }
+
+ /*
+ * Check to see if cutoff needs to set to a new level.
+ */
+ cl = ifd->class_[ifd->qo_];
+ if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
+#if 1 /* ALTQ */
+ if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
+ rmc_tl_satisfied(ifd, nowp);
+ CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+ } else {
+ ifd->cutoff_ = borrowed->depth_;
+ CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+ }
+#else /* !ALTQ */
+ if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
+ reset_cutoff(ifd);
+#ifdef notdef
+ rmc_tl_satisfied(ifd, &now);
+#endif
+ CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+ } else {
+ ifd->cutoff_ = borrowed->depth_;
+ CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+ }
+#endif /* !ALTQ */
+ }
+
+ /*
+ * Release class slot
+ */
+ ifd->borrowed_[ifd->qo_] = NULL;
+ ifd->class_[ifd->qo_] = NULL;
+ ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
+ ifd->queued_--;
+}
+
+/*
+ * void
+ * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
+ * over-limit action routines. These get invoked by rmc_under_limit()
+ * if a class with packets to send if over its bandwidth limit & can't
+ * borrow from a parent class.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_drop_action(cl)
+ struct rm_class *cl;
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ ASSERT(qlen(cl->q_) > 0);
+ _rmc_dropq(cl);
+ if (qempty(cl->q_))
+ ifd->na_[cl->pri_]--;
+}
+
+void rmc_dropall(cl)
+ struct rm_class *cl;
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ if (!qempty(cl->q_)) {
+ _flushq(cl->q_);
+
+ ifd->na_[cl->pri_]--;
+ }
+}
+
+#if (__FreeBSD_version > 300000)
+/* hzto() is removed from FreeBSD-3.0 */
+static int hzto __P((struct timeval *));
+
+static int
+hzto(tv)
+ struct timeval *tv;
+{
+ struct timeval t2;
+
+ getmicrotime(&t2);
+ t2.tv_sec = tv->tv_sec - t2.tv_sec;
+ t2.tv_usec = tv->tv_usec - t2.tv_usec;
+ return (tvtohz(&t2));
+}
+#endif /* __FreeBSD_version > 300000 */
+
+/*
+ * void
+ * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
+ * delay action routine. It is invoked via rmc_under_limit when the
+ * packet is discoverd to be overlimit.
+ *
+ * If the delay action is result of borrow class being overlimit, then
+ * delay for the offtime of the borrowing class that is overlimit.
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_delay_action(cl, borrow)
+ struct rm_class *cl, *borrow;
+{
+ int delay, t, extradelay;
+
+ cl->stats_.overactions++;
+ TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
+#ifndef BORROW_OFFTIME
+ delay += cl->offtime_;
+#endif
+
+ if (!cl->sleeping_) {
+ CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
+#ifdef BORROW_OFFTIME
+ if (borrow != NULL)
+ extradelay = borrow->offtime_;
+ else
+#endif
+ extradelay = cl->offtime_;
+
+#ifdef ALTQ
+ /*
+ * XXX recalculate suspend time:
+ * current undertime is (tidle + pkt_time) calculated
+ * from the last transmission.
+ * tidle: time required to bring avgidle back to 0
+ * pkt_time: target waiting time for this class
+ * we need to replace pkt_time by offtime
+ */
+ extradelay -= cl->last_pkttime_;
+#endif
+ if (extradelay > 0) {
+ TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
+ delay += extradelay;
+ }
+
+ cl->sleeping_ = 1;
+ cl->stats_.delays++;
+
+ /*
+ * Since packets are phased randomly with respect to the
+ * clock, 1 tick (the next clock tick) can be an arbitrarily
+ * short time so we have to wait for at least two ticks.
+ * NOTE: If there's no other traffic, we need the timer as
+ * a 'backstop' to restart this class.
+ */
+ if (delay > tick * 2) {
+#ifdef __FreeBSD__
+ /* FreeBSD rounds up the tick */
+ t = hzto(&cl->undertime_);
+#else
+ /* other BSDs round down the tick */
+ t = hzto(&cl->undertime_) + 1;
+#endif
+ } else
+ t = 2;
+ CALLOUT_RESET(&cl->callout_, t,
+ (timeout_t *)rmc_restart, (caddr_t)cl);
+ }
+}
+
+/*
+ * void
+ * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
+ * called by the system timer code & is responsible checking if the
+ * class is still sleeping (it might have been restarted as a side
+ * effect of the queue scan on a packet arrival) and, if so, restarting
+ * output for the class. Inspecting the class state & restarting output
+ * require locking the class structure. In general the driver is
+ * responsible for locking but this is the only routine that is not
+ * called directly or indirectly from the interface driver so it has
+ * know about system locking conventions. Under bsd, locking is done
+ * by raising IPL to splimp so that's what's implemented here. On a
+ * different system this would probably need to be changed.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_restart(cl)
+ struct rm_class *cl;
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+ int s;
+
+ s = splimp();
+ if (cl->sleeping_) {
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+
+ if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
+ CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
+ (ifd->restart)(ifd->ifq_);
+ }
+ }
+ splx(s);
+}
+
+/*
+ * void
+ * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
+ * handling routine for the root class of the link sharing structure.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_root_overlimit(cl, borrow)
+ struct rm_class *cl, *borrow;
+{
+ panic("rmc_root_overlimit");
+}
+
+/*
+ * Packet Queue handling routines. Eventually, this is to localize the
+ * effects on the code whether queues are red queues or droptail
+ * queues.
+ */
+
+static int
+_rmc_addq(cl, m)
+ rm_class_t *cl;
+ mbuf_t *m;
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
+#endif /* ALTQ_RED */
+
+ if (cl->flags_ & RMCF_CLEARDSCP)
+ write_dsfield(m, cl->pktattr_, 0);
+
+ _addq(cl->q_, m);
+ return (0);
+}
+
+/* note: _rmc_dropq is not called for red */
+static void
+_rmc_dropq(cl)
+ rm_class_t *cl;
+{
+ mbuf_t *m;
+
+ if ((m = _getq(cl->q_)) != NULL)
+ m_freem(m);
+}
+
+static mbuf_t *
+_rmc_getq(cl)
+ rm_class_t *cl;
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ return rio_getq((rio_t *)cl->red_, cl->q_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ return red_getq(cl->red_, cl->q_);
+#endif
+ return _getq(cl->q_);
+}
+
+static mbuf_t *
+_rmc_pollq(cl)
+ rm_class_t *cl;
+{
+ return qhead(cl->q_);
+}
+
+#ifdef CBQ_TRACE
+
+struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
+struct cbqtrace *cbqtrace_ptr = NULL;
+int cbqtrace_count;
+
+/*
+ * DDB hook to trace cbq events:
+ * the last 1024 events are held in a circular buffer.
+ * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
+ */
+void cbqtrace_dump(int);
+static char *rmc_funcname(void *);
+
+static struct rmc_funcs {
+ void *func;
+ char *name;
+} rmc_funcs[] =
+{
+ rmc_init, "rmc_init",
+ rmc_queue_packet, "rmc_queue_packet",
+ rmc_under_limit, "rmc_under_limit",
+ rmc_update_class_util, "rmc_update_class_util",
+ rmc_delay_action, "rmc_delay_action",
+ rmc_restart, "rmc_restart",
+ _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
+ NULL, NULL
+};
+
+static char *rmc_funcname(func)
+ void *func;
+{
+ struct rmc_funcs *fp;
+
+ for (fp = rmc_funcs; fp->func != NULL; fp++)
+ if (fp->func == func)
+ return (fp->name);
+ return ("unknown");
+}
+
+void cbqtrace_dump(counter)
+ int counter;
+{
+ int i, *p;
+ char *cp;
+
+ counter = counter % NCBQTRACE;
+ p = (int *)&cbqtrace_buffer[counter];
+
+ for (i=0; i<20; i++) {
+ printf("[0x%x] ", *p++);
+ printf("%s: ", rmc_funcname((void *)*p++));
+ cp = (char *)p++;
+ printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
+ printf("%d\n",*p++);
+
+ if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
+ p = (int *)cbqtrace_buffer;
+ }
+}
+#endif /* CBQ_TRACE */
+
+#endif /* ALTQ_CBQ */
+
+#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+void
+_addq(q, m)
+ class_queue_t *q;
+ mbuf_t *m;
+{
+ mbuf_t *m0;
+
+ if ((m0 = qtail(q)) != NULL)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ m0 = m;
+ m0->m_nextpkt = m;
+ qtail(q) = m;
+ qlen(q)++;
+}
+
+mbuf_t *
+_getq(q)
+ class_queue_t *q;
+{
+ mbuf_t *m, *m0;
+
+ if ((m = qtail(q)) == NULL)
+ return (NULL);
+ if ((m0 = m->m_nextpkt) != m)
+ m->m_nextpkt = m0->m_nextpkt;
+ else {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ }
+ qlen(q)--;
+ m0->m_nextpkt = NULL;
+ return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+mbuf_t *
+_getq_tail(q)
+ class_queue_t *q;
+{
+ mbuf_t *m, *m0, *prev;
+
+ if ((m = m0 = qtail(q)) == NULL)
+ return NULL;
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m) {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ } else
+ qtail(q) = prev;
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+/* randomly select a packet in the queue */
+mbuf_t *
+_getq_random(q)
+ class_queue_t *q;
+{
+ struct mbuf *m;
+ int i, n;
+
+ if ((m = qtail(q)) == NULL)
+ return NULL;
+ if (m->m_nextpkt == m) {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ } else {
+ struct mbuf *prev = NULL;
+
+ n = random() % qlen(q) + 1;
+ for (i = 0; i < n; i++) {
+ prev = m;
+ m = m->m_nextpkt;
+ }
+ prev->m_nextpkt = m->m_nextpkt;
+ if (m == qtail(q))
+ qtail(q) = prev;
+ }
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+void
+_removeq(q, m)
+ class_queue_t *q;
+ mbuf_t *m;
+{
+ mbuf_t *m0, *prev;
+
+ m0 = qtail(q);
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else if (qtail(q) == m)
+ qtail(q) = prev;
+ qlen(q)--;
+}
+
+void
+_flushq(q)
+ class_queue_t *q;
+{
+ mbuf_t *m;
+
+ while ((m = _getq(q)) != NULL)
+ m_freem(m);
+ ASSERT(qlen(q) == 0);
+}
+
+#endif /* !__GNUC__ || ALTQ_DEBUG */
+#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
diff --git a/sys/altq/altq_rmclass.h b/sys/altq/altq_rmclass.h
new file mode 100644
index 00000000000..11c2bef4b71
--- /dev/null
+++ b/sys/altq/altq_rmclass.h
@@ -0,0 +1,267 @@
+/* $OpenBSD: altq_rmclass.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_rmclass.h,v 1.6 2000/12/09 09:22:44 kjc Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_H_
+#define _ALTQ_ALTQ_RMCLASS_H_
+
+#include <altq/altq_classq.h>
+
+/* #pragma ident "@(#)rm_class.h 1.20 97/10/23 SMI" */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+typedef struct mbuf mbuf_t;
+typedef struct rm_ifdat rm_ifdat_t;
+typedef struct rm_class rm_class_t;
+
+struct red;
+
+/*
+ * Macros for dealing with time values. We assume all times are
+ * 'timevals'. `microtime' is used to get the best available clock
+ * resolution. If `microtime' *doesn't* return a value that's about
+ * ten times smaller than the average packet time on the fastest
+ * link that will use these routines, a slightly different clock
+ * scheme than this one should be used.
+ * (Bias due to truncation error in this scheme will overestimate utilization
+ * and discriminate against high bandwidth classes. To remove this bias an
+ * integrator needs to be added. The simplest integrator uses a history of
+ * 10 * avg.packet.time / min.tick.time packet completion entries. This is
+ * straight forward to add but we don't want to pay the extra memory
+ * traffic to maintain it if it's not necessary (occasionally a vendor
+ * accidentally builds a workstation with a decent clock - e.g., Sun & HP).)
+ */
+
+#define RM_GETTIME(now) microtime(&now)
+
+#define TV_LT(a, b) (((a)->tv_sec < (b)->tv_sec) || \
+ (((a)->tv_usec < (b)->tv_usec) && ((a)->tv_sec <= (b)->tv_sec)))
+
+#define TV_DELTA(a, b, delta) { \
+ register int xxs; \
+ \
+ delta = (a)->tv_usec - (b)->tv_usec; \
+ if ((xxs = (a)->tv_sec - (b)->tv_sec)) { \
+ switch (xxs) { \
+ default: \
+ if (xxs < 0) \
+ printf("rm_class: bogus time values\n"); \
+ delta = 0; \
+ /* fall through */ \
+ case 2: \
+ delta += 1000000; \
+ /* fall through */ \
+ case 1: \
+ delta += 1000000; \
+ break; \
+ } \
+ } \
+}
+
+#define TV_ADD_DELTA(a, delta, res) { \
+ register int xxus = (a)->tv_usec + (delta); \
+ \
+ (res)->tv_sec = (a)->tv_sec; \
+ while (xxus >= 1000000) { \
+ ++((res)->tv_sec); \
+ xxus -= 1000000; \
+ } \
+ (res)->tv_usec = xxus; \
+}
+
+#define RM_TIMEOUT 2 /* 1 Clock tick. */
+
+#if 1
+#define RM_MAXQUEUED 1 /* this isn't used in ALTQ/CBQ */
+#else
+#define RM_MAXQUEUED 16 /* Max number of packets downstream of CBQ */
+#endif
+#define RM_MAXPRIO 8 /* Max priority */
+#define RM_MAXQUEUE 64 /* Max queue length */
+#define RM_FILTER_GAIN 5 /* log2 of gain, e.g., 5 => 31/32 */
+#define RM_POWER (1 << RM_FILTER_GAIN)
+#define RM_MAXDEPTH 32
+#define RM_NS_PER_SEC (1000000000)
+
+typedef struct _rm_class_stats_ {
+ u_int handle;
+ u_int depth;
+
+ struct pktcntr xmit_cnt; /* packets sent in this class */
+ struct pktcntr drop_cnt; /* dropped packets */
+ u_int over; /* # times went over limit */
+ u_int borrows; /* # times tried to borrow */
+ u_int overactions; /* # times invoked overlimit action */
+ u_int delays; /* # times invoked delay actions */
+} rm_class_stats_t;
+
+/*
+ * CBQ Class state structure
+ */
+struct rm_class {
+ class_queue_t *q_; /* Queue of packets */
+ rm_ifdat_t *ifdat_;
+ int pri_; /* Class priority. */
+ int depth_; /* Class depth */
+ u_int ns_per_byte_; /* NanoSeconds per byte. */
+ u_int maxrate_; /* Bytes per second for this class. */
+ u_int allotment_; /* Fraction of link bandwidth. */
+ u_int w_allotment_; /* Weighted allotment for WRR */
+ int bytes_alloc_; /* Allocation for round of WRR */
+
+ int avgidle_;
+ int maxidle_;
+ int minidle_;
+ int offtime_;
+ int sleeping_; /* != 0 if delaying */
+ int qthresh_; /* Queue threshold for formal link sharing */
+ int leaf_; /* Note whether leaf class or not.*/
+
+ rm_class_t *children_; /* Children of this class */
+ rm_class_t *next_; /* Next pointer, used if child */
+
+ rm_class_t *peer_; /* Peer class */
+ rm_class_t *borrow_; /* Borrow class */
+ rm_class_t *parent_; /* Parent class */
+
+ void (*overlimit)(struct rm_class *, struct rm_class *);
+ void (*drop)(struct rm_class *); /* Class drop action. */
+
+ struct red *red_; /* RED state pointer */
+ struct altq_pktattr *pktattr_; /* saved hdr used by RED/ECN */
+ int flags_;
+
+ int last_pkttime_; /* saved pkt_time */
+ struct timeval undertime_; /* time can next send */
+ struct timeval last_; /* time last packet sent */
+ struct timeval overtime_;
+ struct callout callout_; /* for timeout() calls */
+
+ rm_class_stats_t stats_; /* Class Statistics */
+};
+
+/*
+ * CBQ Interface state
+ */
+struct rm_ifdat {
+ int queued_; /* # pkts queued downstream */
+ int efficient_; /* Link Efficency bit */
+ int wrr_; /* Enable Weighted Round-Robin */
+ u_long ns_per_byte_; /* Link byte speed. */
+ int maxqueued_; /* Max packets to queue */
+ int maxpkt_; /* Max packet size. */
+ int qi_; /* In/out pointers for downstream */
+ int qo_; /* packets */
+
+ /*
+ * Active class state and WRR state.
+ */
+ rm_class_t *active_[RM_MAXPRIO]; /* Active cl's in each pri */
+ int na_[RM_MAXPRIO]; /* # of active cl's in a pri */
+ int num_[RM_MAXPRIO]; /* # of cl's per pri */
+ int alloc_[RM_MAXPRIO]; /* Byte Allocation */
+ u_long M_[RM_MAXPRIO]; /* WRR weights. */
+
+ /*
+ * Network Interface/Solaris Queue state pointer.
+ */
+ struct ifaltq *ifq_;
+ rm_class_t *default_; /* Default Pkt class, BE */
+ rm_class_t *root_; /* Root Link class. */
+ rm_class_t *ctl_; /* Control Traffic class. */
+ void (*restart)(struct ifaltq *); /* Restart routine. */
+
+ /*
+ * Current packet downstream packet state and dynamic state.
+ */
+ rm_class_t *borrowed_[RM_MAXQUEUED]; /* Class borrowed last */
+ rm_class_t *class_[RM_MAXQUEUED]; /* class sending */
+ int curlen_[RM_MAXQUEUED]; /* Current pktlen */
+ struct timeval now_[RM_MAXQUEUED]; /* Current packet time. */
+ int is_overlimit_[RM_MAXQUEUED];/* Current packet time. */
+
+ int cutoff_; /* Cut-off depth for borrowing */
+
+ struct timeval ifnow_; /* expected xmit completion time */
+#if 1 /* ALTQ4PPP */
+ int maxiftime_; /* max delay inside interface */
+#endif
+ rm_class_t *pollcache_; /* cached rm_class by poll operation */
+};
+
+/* flags for rmc_init and rmc_newclass */
+/* class flags */
+#define RMCF_RED 0x0001
+#define RMCF_ECN 0x0002
+#define RMCF_RIO 0x0004
+#define RMCF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */
+#define RMCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+
+/* flags for rmc_init */
+#define RMCF_WRR 0x0100
+#define RMCF_EFFICIENT 0x0200
+
+#define is_a_parent_class(cl) ((cl)->children_ != NULL)
+
+extern rm_class_t *rmc_newclass __P((int, struct rm_ifdat *, u_int,
+ void (*)(struct rm_class *,
+ struct rm_class *),
+ int, struct rm_class *, struct rm_class *,
+ u_int, int, u_int, int, int));
+extern void rmc_delete_class __P((struct rm_ifdat *, struct rm_class *));
+extern int rmc_modclass __P((struct rm_class *, u_int, int,
+ u_int, int, u_int, int));
+extern void rmc_init __P((struct ifaltq *, struct rm_ifdat *, u_int,
+ void (*)(struct ifaltq *),
+ int, int, u_int, int, u_int, int));
+extern int rmc_queue_packet __P((struct rm_class *, mbuf_t *));
+extern mbuf_t *rmc_dequeue_next __P((struct rm_ifdat *, int));
+extern void rmc_update_class_util __P((struct rm_ifdat *));
+extern void rmc_delay_action __P((struct rm_class *, struct rm_class *));
+extern void rmc_dropall __P((struct rm_class *));
+extern int rmc_get_weight __P((struct rm_ifdat *, int));
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_RMCLASS_H_ */
diff --git a/sys/altq/altq_rmclass_debug.h b/sys/altq/altq_rmclass_debug.h
new file mode 100644
index 00000000000..1ff4f595252
--- /dev/null
+++ b/sys/altq/altq_rmclass_debug.h
@@ -0,0 +1,113 @@
+/* $OpenBSD: altq_rmclass_debug.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_rmclass_debug.h,v 1.2 2000/02/22 14:00:35 itojun Exp $ */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_DEBUG_H_
+#define _ALTQ_ALTQ_RMCLASS_DEBUG_H_
+
+/* #pragma ident "@(#)rm_class_debug.h 1.7 98/05/04 SMI" */
+
+/*
+ * Cbq debugging macros
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CBQ_TRACE
+#ifndef NCBQTRACE
+#define NCBQTRACE (16 * 1024)
+#endif
+
+/*
+ * To view the trace output, using adb, type:
+ * adb -k /dev/ksyms /dev/mem <cr>, then type
+ * cbqtrace_count/D to get the count, then type
+ * cbqtrace_buffer,0tcount/Dp4C" "Xn
+ * This will dump the trace buffer from 0 to count.
+ */
+/*
+ * in ALTQ, "call cbqtrace_dump(N)" from DDB to display 20 events
+ * from Nth event in the circular buffer.
+ */
+
+struct cbqtrace {
+ int count;
+ int function; /* address of function */
+ int trace_action; /* descriptive 4 characters */
+ int object; /* object operated on */
+};
+
+extern struct cbqtrace cbqtrace_buffer[];
+extern struct cbqtrace *cbqtrace_ptr;
+extern int cbqtrace_count;
+
+#define CBQTRACEINIT() { \
+ if (cbqtrace_ptr == NULL) \
+ cbqtrace_ptr = cbqtrace_buffer; \
+ else { \
+ cbqtrace_ptr = cbqtrace_buffer; \
+ bzero((void *)cbqtrace_ptr, sizeof(cbqtrace_buffer)); \
+ cbqtrace_count = 0; \
+ } \
+}
+
+#define LOCK_TRACE() splimp()
+#define UNLOCK_TRACE(x) splx(x)
+
+#define CBQTRACE(func, act, obj) { \
+ int __s = LOCK_TRACE(); \
+ int *_p = &cbqtrace_ptr->count; \
+ *_p++ = ++cbqtrace_count; \
+ *_p++ = (int)(func); \
+ *_p++ = (int)(act); \
+ *_p++ = (int)(obj); \
+ if ((struct cbqtrace *)(void *)_p >= &cbqtrace_buffer[NCBQTRACE])\
+ cbqtrace_ptr = cbqtrace_buffer; \
+ else \
+ cbqtrace_ptr = (struct cbqtrace *)(void *)_p; \
+ UNLOCK_TRACE(__s); \
+ }
+#else
+
+/* If no tracing, define no-ops */
+#define CBQTRACEINIT()
+#define CBQTRACE(a, b, c)
+
+#endif /* !CBQ_TRACE */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_RMCLASS_DEBUG_H_ */
diff --git a/sys/altq/altq_subr.c b/sys/altq/altq_subr.c
new file mode 100644
index 00000000000..dcd64a1f2e5
--- /dev/null
+++ b/sys/altq/altq_subr.c
@@ -0,0 +1,1552 @@
+/* $OpenBSD: altq_subr.c,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_subr.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef ALTQ
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+
+#ifdef __FreeBSD__
+#include "opt_cpu.h" /* for FreeBSD-2.2.8 to get i586_ctr_freq */
+#include <machine/clock.h>
+#endif
+
+/*
+ * internal function prototypes
+ */
+static void tbr_timeout __P((void *));
+static int extract_ports4 __P((struct mbuf *, struct ip *,
+ struct flowinfo_in *));
+#ifdef INET6
+static int extract_ports6 __P((struct mbuf *, struct ip6_hdr *,
+ struct flowinfo_in6 *));
+#endif
+static int apply_filter4 __P((u_int32_t, struct flow_filter *,
+ struct flowinfo_in *));
+static int apply_ppfilter4 __P((u_int32_t, struct flow_filter *,
+ struct flowinfo_in *));
+#ifdef INET6
+static int apply_filter6 __P((u_int32_t, struct flow_filter6 *,
+ struct flowinfo_in6 *));
+#endif
+static int apply_tosfilter4 __P((u_int32_t, struct flow_filter *,
+ struct flowinfo_in *));
+static u_long get_filt_handle __P((struct acc_classifier *, int));
+static struct acc_filter *filth_to_filtp __P((struct acc_classifier *,
+ u_long));
+static u_int32_t filt2fibmask __P((struct flow_filter *));
+
+static void ip4f_cache __P((struct ip *, struct flowinfo_in *));
+static int ip4f_lookup __P((struct ip *, struct flowinfo_in *));
+static int ip4f_init __P((void));
+static struct ip4_frag *ip4f_alloc __P((void));
+static void ip4f_free __P((struct ip4_frag *));
+
+int (*altq_input) __P((struct mbuf *, int)) = NULL;
+static int tbr_timer = 0; /* token bucket regulator timer */
+static struct callout tbr_callout = CALLOUT_INITIALIZER;
+
+/*
+ * alternate queueing support routines
+ */
+
+/* look up the queue state by the interface name and the queuing type. */
+void *
+altq_lookup(name, type)
+ char *name;
+ int type;
+{
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(name)) != NULL) {
+ if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
+ return (ifp->if_snd.altq_disc);
+ }
+
+ return NULL;
+}
+
+int
+altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
+ struct ifaltq *ifq;
+ int type;
+ void *discipline;
+ int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+ struct mbuf *(*dequeue)(struct ifaltq *, int);
+ int (*request)(struct ifaltq *, int, void *);
+ void *clfier;
+ void *(*classify)(void *, struct mbuf *, int);
+{
+ if (!ALTQ_IS_READY(ifq))
+ return ENXIO;
+ if (ALTQ_IS_ENABLED(ifq))
+ return EBUSY;
+ if (ALTQ_IS_ATTACHED(ifq))
+ return EEXIST;
+ ifq->altq_type = type;
+ ifq->altq_disc = discipline;
+ ifq->altq_enqueue = enqueue;
+ ifq->altq_dequeue = dequeue;
+ ifq->altq_request = request;
+ ifq->altq_clfier = clfier;
+ ifq->altq_classify = classify;
+ ifq->altq_flags &= ALTQF_CANTCHANGE;
+#ifdef ALTQ_KLD
+ altq_module_incref(type);
+#endif
+ return 0;
+}
+
+int
+altq_detach(ifq)
+ struct ifaltq *ifq;
+{
+ if (!ALTQ_IS_READY(ifq))
+ return ENXIO;
+ if (ALTQ_IS_ENABLED(ifq))
+ return EBUSY;
+ if (!ALTQ_IS_ATTACHED(ifq))
+ return (0);
+
+#ifdef ALTQ_KLD
+ altq_module_declref(ifq->altq_type);
+#endif
+ ifq->altq_type = ALTQT_NONE;
+ ifq->altq_disc = NULL;
+ ifq->altq_enqueue = NULL;
+ ifq->altq_dequeue = NULL;
+ ifq->altq_request = NULL;
+ ifq->altq_clfier = NULL;
+ ifq->altq_classify = NULL;
+ ifq->altq_flags &= ALTQF_CANTCHANGE;
+ return 0;
+}
+
+int
+altq_enable(ifq)
+ struct ifaltq *ifq;
+{
+ int s;
+
+ if (!ALTQ_IS_READY(ifq))
+ return ENXIO;
+ if (ALTQ_IS_ENABLED(ifq))
+ return 0;
+
+ s = splimp();
+ IFQ_PURGE(ifq);
+ ASSERT(ifq->ifq_len == 0);
+ ifq->altq_flags |= ALTQF_ENABLED;
+ if (ifq->altq_clfier != NULL)
+ ifq->altq_flags |= ALTQF_CLASSIFY;
+ splx(s);
+
+ return 0;
+}
+
+int
+altq_disable(ifq)
+ struct ifaltq *ifq;
+{
+ int s;
+
+ if (!ALTQ_IS_ENABLED(ifq))
+ return 0;
+
+ s = splimp();
+ IFQ_PURGE(ifq);
+ ASSERT(ifq->ifq_len == 0);
+ ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
+ splx(s);
+ return 0;
+}
+
+void
+altq_assert(file, line, failedexpr)
+ const char *file, *failedexpr;
+ int line;
+{
+ (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
+ failedexpr, file, line);
+ panic("altq assertion");
+ /* NOTREACHED */
+}
+
+/*
+ * internal representation of token bucket parameters
+ * rate: byte_per_unittime << 32
+ * (((bits_per_sec) / 8) << 32) / machclk_freq
+ * depth: byte << 32
+ *
+ */
+#define TBR_SHIFT 32
+#define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
+#define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
+
+struct mbuf *
+tbr_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ struct tb_regulator *tbr;
+ struct mbuf *m;
+ int64_t interval;
+ u_int64_t now;
+
+ tbr = ifq->altq_tbr;
+ if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
+ /* if this is a remove after poll, bypass tbr check */
+ } else {
+ /* update token only when it is negative */
+ if (tbr->tbr_token <= 0) {
+ now = read_machclk();
+ interval = now - tbr->tbr_last;
+ if (interval >= tbr->tbr_filluptime)
+ tbr->tbr_token = tbr->tbr_depth;
+ else {
+ tbr->tbr_token += interval * tbr->tbr_rate;
+ if (tbr->tbr_token > tbr->tbr_depth)
+ tbr->tbr_token = tbr->tbr_depth;
+ }
+ tbr->tbr_last = now;
+ }
+ /* if token is still negative, don't allow dequeue */
+ if (tbr->tbr_token <= 0)
+ return (NULL);
+ }
+
+ if (ALTQ_IS_ENABLED(ifq))
+ m = (*ifq->altq_dequeue)(ifq, op);
+ else {
+ if (op == ALTDQ_POLL)
+ IF_POLL(ifq, m);
+ else
+ IF_DEQUEUE(ifq, m);
+ }
+
+ if (m != NULL && op == ALTDQ_REMOVE)
+ tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
+ tbr->tbr_lastop = op;
+ return (m);
+}
+
+/*
+ * set a token bucket regulator.
+ * if the specified rate is zero, the token bucket regulator is deleted.
+ */
+int
+tbr_set(ifq, profile)
+ struct ifaltq *ifq;
+ struct tb_profile *profile;
+{
+ struct tb_regulator *tbr, *otbr;
+
+ if (machclk_freq == 0)
+ init_machclk();
+ if (machclk_freq == 0) {
+ printf("tbr_set: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ if (profile->rate == 0) {
+ /* delete this tbr */
+ if ((tbr = ifq->altq_tbr) == NULL)
+ return (ENOENT);
+ ifq->altq_tbr = NULL;
+ FREE(tbr, M_DEVBUF);
+ return (0);
+ }
+
+ MALLOC(tbr, struct tb_regulator *, sizeof(struct tb_regulator),
+ M_DEVBUF, M_WAITOK);
+ if (tbr == NULL)
+ return (ENOMEM);
+ bzero(tbr, sizeof(struct tb_regulator));
+
+ tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
+ tbr->tbr_depth = TBR_SCALE(profile->depth);
+ if (tbr->tbr_rate > 0)
+ tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
+ else
+ tbr->tbr_filluptime = 0xffffffffffffffffLL;
+ tbr->tbr_token = tbr->tbr_depth;
+ tbr->tbr_last = read_machclk();
+ tbr->tbr_lastop = ALTDQ_REMOVE;
+
+ otbr = ifq->altq_tbr;
+ ifq->altq_tbr = tbr; /* set the new tbr */
+
+ if (otbr != NULL)
+ FREE(otbr, M_DEVBUF);
+ else {
+ if (tbr_timer == 0) {
+ CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+ tbr_timer = 1;
+ }
+ }
+ return (0);
+}
+
+/*
+ * tbr_timeout goes through the interface list, and kicks the drivers
+ * if necessary.
+ */
+static void
+tbr_timeout(arg)
+ void *arg;
+{
+ struct ifnet *ifp;
+ int active, s;
+
+ active = 0;
+ s = splimp();
+#ifdef __FreeBSD__
+#if (__FreeBSD_version < 300000)
+ for (ifp = ifnet; ifp; ifp = ifp->if_next)
+#else
+ for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_link.tqe_next)
+#endif
+#else /* !FreeBSD */
+ for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_list.tqe_next)
+#endif
+ {
+ if (!TBR_IS_ENABLED(&ifp->if_snd))
+ continue;
+ active++;
+ if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL)
+ (*ifp->if_start)(ifp);
+ }
+ splx(s);
+ if (active > 0)
+ CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+ else
+ tbr_timer = 0; /* don't need tbr_timer anymore */
+#if defined(__alpha__) && !defined(ALTQ_NOPCC)
+ {
+ /*
+ * XXX read out the machine dependent clock once a second
+ * to detect counter wrap-around.
+ */
+ static u_int cnt;
+
+ if (++cnt >= hz) {
+ (void)read_machclk();
+ cnt = 0;
+ }
+ }
+#endif /* __alpha__ && !ALTQ_NOPCC */
+}
+
+/*
+ * get token bucket regulator profile
+ */
+int
+tbr_get(ifq, profile)
+ struct ifaltq *ifq;
+ struct tb_profile *profile;
+{
+ struct tb_regulator *tbr;
+
+ if ((tbr = ifq->altq_tbr) == NULL) {
+ profile->rate = 0;
+ profile->depth = 0;
+ } else {
+ profile->rate =
+ (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
+ profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
+ }
+ return (0);
+}
+
+
+#ifndef IPPROTO_ESP
+#define IPPROTO_ESP 50 /* encapsulating security payload */
+#endif
+#ifndef IPPROTO_AH
+#define IPPROTO_AH 51 /* authentication header */
+#endif
+
+/*
+ * extract flow information from a given packet.
+ * filt_mask shows flowinfo fields required.
+ * we assume the ip header is in one mbuf, and addresses and ports are
+ * in network byte order.
+ */
+int
+altq_extractflow(m, af, flow, filt_bmask)
+ struct mbuf *m;
+ int af;
+ struct flowinfo *flow;
+ u_int32_t filt_bmask;
+{
+
+ switch (af) {
+ case PF_INET: {
+ struct flowinfo_in *fin;
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+
+ if (ip->ip_v != 4)
+ break;
+
+ fin = (struct flowinfo_in *)flow;
+ fin->fi_len = sizeof(struct flowinfo_in);
+ fin->fi_family = AF_INET;
+
+ fin->fi_proto = ip->ip_p;
+ fin->fi_tos = ip->ip_tos;
+
+ fin->fi_src.s_addr = ip->ip_src.s_addr;
+ fin->fi_dst.s_addr = ip->ip_dst.s_addr;
+
+ if (filt_bmask & FIMB4_PORTS)
+ /* if port info is required, extract port numbers */
+ extract_ports4(m, ip, fin);
+ else {
+ fin->fi_sport = 0;
+ fin->fi_dport = 0;
+ fin->fi_gpi = 0;
+ }
+ return (1);
+ }
+
+#ifdef INET6
+ case PF_INET6: {
+ struct flowinfo_in6 *fin6;
+ struct ip6_hdr *ip6;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ /* should we check the ip version? */
+
+ fin6 = (struct flowinfo_in6 *)flow;
+ fin6->fi6_len = sizeof(struct flowinfo_in6);
+ fin6->fi6_family = AF_INET6;
+
+ fin6->fi6_proto = ip6->ip6_nxt;
+ fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+
+ fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
+ fin6->fi6_src = ip6->ip6_src;
+ fin6->fi6_dst = ip6->ip6_dst;
+
+ if ((filt_bmask & FIMB6_PORTS) ||
+ ((filt_bmask & FIMB6_PROTO)
+ && ip6->ip6_nxt > IPPROTO_IPV6))
+ /*
+ * if port info is required, or proto is required
+ * but there are option headers, extract port
+ * and protocol numbers.
+ */
+ extract_ports6(m, ip6, fin6);
+ else {
+ fin6->fi6_sport = 0;
+ fin6->fi6_dport = 0;
+ fin6->fi6_gpi = 0;
+ }
+ return (1);
+ }
+#endif /* INET6 */
+
+ default:
+ break;
+ }
+
+ /* failed */
+ flow->fi_len = sizeof(struct flowinfo);
+ flow->fi_family = AF_UNSPEC;
+ return (0);
+}
+
+/*
+ * helper routine to extract port numbers
+ */
+/* structure for ipsec and ipv6 option header template */
+struct _opt6 {
+ u_int8_t opt6_nxt; /* next header */
+ u_int8_t opt6_hlen; /* header extension length */
+ u_int16_t _pad;
+ u_int32_t ah_spi; /* security parameter index
+ for authentication header */
+};
+
+/*
+ * extract port numbers from a ipv4 packet.
+ */
+static int
+extract_ports4(m, ip, fin)
+ struct mbuf *m;
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct mbuf *m0;
+ u_short ip_off;
+ u_int8_t proto;
+ int off;
+
+ fin->fi_sport = 0;
+ fin->fi_dport = 0;
+ fin->fi_gpi = 0;
+
+ ip_off = ntohs(ip->ip_off);
+ /* if it is a fragment, try cached fragment info */
+ if (ip_off & IP_OFFMASK) {
+ ip4f_lookup(ip, fin);
+ return (1);
+ }
+
+ /* locate the mbuf containing the protocol header */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)ip >= m0->m_data) &&
+ ((caddr_t)ip < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("extract_ports4: can't locate header! ip=%p\n", ip);
+#endif
+ return (0);
+ }
+ off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
+ proto = ip->ip_p;
+
+#ifdef ALTQ_IPSEC
+ again:
+#endif
+ while (off >= m0->m_len) {
+ off -= m0->m_len;
+ m0 = m0->m_next;
+ }
+ ASSERT(m0->m_len >= off + 4);
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP: {
+ struct udphdr *udp;
+
+ udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+ fin->fi_sport = udp->uh_sport;
+ fin->fi_dport = udp->uh_dport;
+ fin->fi_proto = proto;
+ }
+ break;
+
+#ifdef ALTQ_IPSEC
+ case IPPROTO_ESP:
+ if (fin->fi_gpi == 0){
+ u_int32_t *gpi;
+
+ gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+ fin->fi_gpi = *gpi;
+ }
+ fin->fi_proto = proto;
+ break;
+
+ case IPPROTO_AH: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ proto = opt6->opt6_nxt;
+ off += 8 + (opt6->opt6_hlen * 4);
+ if (fin->fi_gpi == 0)
+ fin->fi_gpi = opt6->ah_spi;
+ }
+ /* goto the next header */
+ goto again;
+#endif /* ALTQ_IPSEC */
+
+ default:
+ fin->fi_proto = proto;
+ return (0);
+ }
+
+ /* if this is a first fragment, cache it. */
+ if (ip_off & IP_MF)
+ ip4f_cache(ip, fin);
+
+ return (1);
+}
+
+#ifdef INET6
+static int
+extract_ports6(m, ip6, fin6)
+ struct mbuf *m;
+ struct ip6_hdr *ip6;
+ struct flowinfo_in6 *fin6;
+{
+ struct mbuf *m0;
+ int off;
+ u_int8_t proto;
+
+ fin6->fi6_gpi = 0;
+ fin6->fi6_sport = 0;
+ fin6->fi6_dport = 0;
+
+ /* locate the mbuf containing the protocol header */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)ip6 >= m0->m_data) &&
+ ((caddr_t)ip6 < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
+#endif
+ return (0);
+ }
+ off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
+
+ proto = ip6->ip6_nxt;
+ do {
+ while (off >= m0->m_len) {
+ off -= m0->m_len;
+ m0 = m0->m_next;
+ }
+ ASSERT(m0->m_len >= off + 4);
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP: {
+ struct udphdr *udp;
+
+ udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+ fin6->fi6_sport = udp->uh_sport;
+ fin6->fi6_dport = udp->uh_dport;
+ fin6->fi6_proto = proto;
+ }
+ return (1);
+
+ case IPPROTO_ESP:
+ if (fin6->fi6_gpi == 0) {
+ u_int32_t *gpi;
+
+ gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+ fin6->fi6_gpi = *gpi;
+ }
+ fin6->fi6_proto = proto;
+ return (1);
+
+ case IPPROTO_AH: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ if (fin6->fi6_gpi == 0)
+ fin6->fi6_gpi = opt6->ah_spi;
+ proto = opt6->opt6_nxt;
+ off += 8 + (opt6->opt6_hlen * 4);
+ /* goto the next header */
+ break;
+ }
+
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ proto = opt6->opt6_nxt;
+ off += (opt6->opt6_hlen + 1) * 8;
+ /* goto the next header */
+ break;
+ }
+
+ case IPPROTO_FRAGMENT:
+ /* ipv6 fragmentations are not supported yet */
+ default:
+ fin6->fi6_proto = proto;
+ return (0);
+ }
+ } while (1);
+ /*NOTREACHED*/
+}
+#endif /* INET6 */
+
+/*
+ * altq common classifier
+ */
+int
+acc_add_filter(classifier, filter, class, phandle)
+ struct acc_classifier *classifier;
+ struct flow_filter *filter;
+ void *class;
+ u_long *phandle;
+{
+ struct acc_filter *afp, *prev, *tmp;
+ int i, s;
+
+#ifdef INET6
+ if (filter->ff_flow.fi_family != AF_INET &&
+ filter->ff_flow.fi_family != AF_INET6)
+ return (EINVAL);
+#else
+ if (filter->ff_flow.fi_family != AF_INET)
+ return (EINVAL);
+#endif
+
+ MALLOC(afp, struct acc_filter *, sizeof(struct acc_filter),
+ M_DEVBUF, M_WAITOK);
+ if (afp == NULL)
+ return (ENOMEM);
+ bzero(afp, sizeof(struct acc_filter));
+
+ afp->f_filter = *filter;
+ afp->f_class = class;
+
+ i = ACC_WILDCARD_INDEX;
+ if (filter->ff_flow.fi_family == AF_INET) {
+ struct flow_filter *filter4 = &afp->f_filter;
+
+ /*
+ * if address is 0, it's a wildcard. if address mask
+ * isn't set, use full mask.
+ */
+ if (filter4->ff_flow.fi_dst.s_addr == 0)
+ filter4->ff_mask.mask_dst.s_addr = 0;
+ else if (filter4->ff_mask.mask_dst.s_addr == 0)
+ filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
+ if (filter4->ff_flow.fi_src.s_addr == 0)
+ filter4->ff_mask.mask_src.s_addr = 0;
+ else if (filter4->ff_mask.mask_src.s_addr == 0)
+ filter4->ff_mask.mask_src.s_addr = 0xffffffff;
+
+ /* clear extra bits in addresses */
+ filter4->ff_flow.fi_dst.s_addr &=
+ filter4->ff_mask.mask_dst.s_addr;
+ filter4->ff_flow.fi_src.s_addr &=
+ filter4->ff_mask.mask_src.s_addr;
+
+ /*
+ * if dst address is a wildcard, use hash-entry
+ * ACC_WILDCARD_INDEX.
+ */
+ if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
+ i = ACC_WILDCARD_INDEX;
+ else
+ i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
+ }
+#ifdef INET6
+ else if (filter->ff_flow.fi_family == AF_INET6) {
+ struct flow_filter6 *filter6 =
+ (struct flow_filter6 *)&afp->f_filter;
+#ifndef IN6MASK0 /* taken from kame ipv6 */
+#define IN6MASK0 {{{ 0, 0, 0, 0 }}}
+#define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
+ const struct in6_addr in6mask0 = IN6MASK0;
+ const struct in6_addr in6mask128 = IN6MASK128;
+#endif
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
+ filter6->ff_mask6.mask6_dst = in6mask0;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
+ filter6->ff_mask6.mask6_dst = in6mask128;
+ if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
+ filter6->ff_mask6.mask6_src = in6mask0;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
+ filter6->ff_mask6.mask6_src = in6mask128;
+
+ /* clear extra bits in addresses */
+ for (i = 0; i < 16; i++)
+ filter6->ff_flow6.fi6_dst.s6_addr[i] &=
+ filter6->ff_mask6.mask6_dst.s6_addr[i];
+ for (i = 0; i < 16; i++)
+ filter6->ff_flow6.fi6_src.s6_addr[i] &=
+ filter6->ff_mask6.mask6_src.s6_addr[i];
+
+ if (filter6->ff_flow6.fi6_flowlabel == 0)
+ i = ACC_WILDCARD_INDEX;
+ else
+ i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
+ }
+#endif /* INET6 */
+
+ afp->f_handle = get_filt_handle(classifier, i);
+
+ /* update filter bitmask */
+ afp->f_fbmask = filt2fibmask(filter);
+ classifier->acc_fbmask |= afp->f_fbmask;
+
+ /*
+ * add this filter to the filter list.
+ * filters are ordered from the highest rule number.
+ */
+ s = splimp();
+ prev = NULL;
+ LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
+ if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
+ prev = tmp;
+ else
+ break;
+ }
+ if (prev == NULL)
+ LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
+ else
+ LIST_INSERT_AFTER(prev, afp, f_chain);
+ splx(s);
+
+ *phandle = afp->f_handle;
+ return (0);
+}
+
+int
+acc_delete_filter(classifier, handle)
+ struct acc_classifier *classifier;
+ u_long handle;
+{
+ struct acc_filter *afp;
+ int s;
+
+ if ((afp = filth_to_filtp(classifier, handle)) == NULL)
+ return (EINVAL);
+
+ s = splimp();
+ LIST_REMOVE(afp, f_chain);
+ splx(s);
+
+ FREE(afp, M_DEVBUF);
+
+ /* todo: update filt_bmask */
+
+ return (0);
+}
+
+/*
+ * delete filters referencing to the specified class.
+ * if the all flag is not 0, delete all the filters.
+ */
+int
+acc_discard_filters(classifier, class, all)
+ struct acc_classifier *classifier;
+ void *class;
+ int all;
+{
+ struct acc_filter *afp;
+ int i, s;
+
+ s = splimp();
+ for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
+ do {
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (all || afp->f_class == class) {
+ LIST_REMOVE(afp, f_chain);
+ FREE(afp, M_DEVBUF);
+ /* start again from the head */
+ break;
+ }
+ } while (afp != NULL);
+ }
+ splx(s);
+
+ if (all)
+ classifier->acc_fbmask = 0;
+
+ return (0);
+}
+
+void *
+acc_classify(clfier, m, af)
+ void *clfier;
+ struct mbuf *m;
+ int af;
+{
+ struct acc_classifier *classifier;
+ struct flowinfo flow;
+ struct acc_filter *afp;
+ int i;
+
+ classifier = (struct acc_classifier *)clfier;
+ altq_extractflow(m, af, &flow, classifier->acc_fbmask);
+
+ if (flow.fi_family == AF_INET) {
+ struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
+
+ if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
+ /* only tos is used */
+ LIST_FOREACH(afp,
+ &classifier->acc_filters[ACC_WILDCARD_INDEX],
+ f_chain)
+ if (apply_tosfilter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+ } else if ((classifier->acc_fbmask &
+ (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
+ == 0) {
+ /* only proto and ports are used */
+ LIST_FOREACH(afp,
+ &classifier->acc_filters[ACC_WILDCARD_INDEX],
+ f_chain)
+ if (apply_ppfilter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+ } else {
+ /* get the filter hash entry from its dest address */
+ i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
+ do {
+ /*
+ * go through this loop twice. first for dst
+ * hash, second for wildcards.
+ */
+ LIST_FOREACH(afp, &classifier->acc_filters[i],
+ f_chain)
+ if (apply_filter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+
+ /*
+ * check again for filters with a dst addr
+ * wildcard.
+ * (daddr == 0 || dmask != 0xffffffff).
+ */
+ if (i != ACC_WILDCARD_INDEX)
+ i = ACC_WILDCARD_INDEX;
+ else
+ break;
+ } while (1);
+ }
+ }
+#ifdef INET6
+ else if (flow.fi_family == AF_INET6) {
+ struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
+
+ /* get the filter hash entry from its flow ID */
+ if (fp6->fi6_flowlabel != 0)
+ i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
+ else
+ /* flowlable can be zero */
+ i = ACC_WILDCARD_INDEX;
+
+ /* go through this loop twice. first for flow hash, second
+ for wildcards. */
+ do {
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (apply_filter6(afp->f_fbmask,
+ (struct flow_filter6 *)&afp->f_filter,
+ fp6))
+ /* filter matched */
+ return (afp->f_class);
+
+ /*
+ * check again for filters with a wildcard.
+ */
+ if (i != ACC_WILDCARD_INDEX)
+ i = ACC_WILDCARD_INDEX;
+ else
+ break;
+ } while (1);
+ }
+#endif /* INET6 */
+
+ /* no filter matched */
+ return (NULL);
+}
+
+static int
+apply_filter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+ return (0);
+ if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+ return (0);
+ if ((fbmask & FIMB4_DADDR) &&
+ filt->ff_flow.fi_dst.s_addr !=
+ (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
+ return (0);
+ if ((fbmask & FIMB4_SADDR) &&
+ filt->ff_flow.fi_src.s_addr !=
+ (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
+ return (0);
+ if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+ return (0);
+ if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+ (pkt->fi_tos & filt->ff_mask.mask_tos))
+ return (0);
+ if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
+ return (0);
+ /* match */
+ return (1);
+}
+
+/*
+ * filter matching function optimized for a common case that checks
+ * only protocol and port numbers
+ */
+static int
+apply_ppfilter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+ return (0);
+ if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+ return (0);
+ if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+ return (0);
+ /* match */
+ return (1);
+}
+
+/*
+ * filter matching function only for tos field.
+ */
+static int
+apply_tosfilter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+ (pkt->fi_tos & filt->ff_mask.mask_tos))
+ return (0);
+ /* match */
+ return (1);
+}
+
+#ifdef INET6
+static int
+apply_filter6(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter6 *filt;
+ struct flowinfo_in6 *pkt;
+{
+ int i;
+
+ if (filt->ff_flow6.fi6_family != AF_INET6)
+ return (0);
+ if ((fbmask & FIMB6_FLABEL) &&
+ filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
+ return (0);
+ if ((fbmask & FIMB6_PROTO) &&
+ filt->ff_flow6.fi6_proto != pkt->fi6_proto)
+ return (0);
+ if ((fbmask & FIMB6_SPORT) &&
+ filt->ff_flow6.fi6_sport != pkt->fi6_sport)
+ return (0);
+ if ((fbmask & FIMB6_DPORT) &&
+ filt->ff_flow6.fi6_dport != pkt->fi6_dport)
+ return (0);
+ if (fbmask & FIMB6_SADDR) {
+ for (i = 0; i < 4; i++)
+ if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
+ (pkt->fi6_src.s6_addr32[i] &
+ filt->ff_mask6.mask6_src.s6_addr32[i]))
+ return (0);
+ }
+ if (fbmask & FIMB6_DADDR) {
+ for (i = 0; i < 4; i++)
+ if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
+ (pkt->fi6_dst.s6_addr32[i] &
+ filt->ff_mask6.mask6_dst.s6_addr32[i]))
+ return (0);
+ }
+ if ((fbmask & FIMB6_TCLASS) &&
+ filt->ff_flow6.fi6_tclass !=
+ (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
+ return (0);
+ if ((fbmask & FIMB6_GPI) &&
+ filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
+ return (0);
+ /* match */
+ return (1);
+}
+#endif /* INET6 */
+
+/*
+ * filter handle:
+ * bit 20-28: index to the filter hash table
+ * bit 0-19: unique id in the hash bucket.
+ */
+static u_long
+get_filt_handle(classifier, i)
+ struct acc_classifier *classifier;
+ int i;
+{
+ static u_long handle_number = 1;
+ u_long handle;
+ struct acc_filter *afp;
+
+ while (1) {
+ handle = handle_number++ & 0x000fffff;
+
+ if (LIST_EMPTY(&classifier->acc_filters[i]))
+ break;
+
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if ((afp->f_handle & 0x000fffff) == handle)
+ break;
+ if (afp == NULL)
+ break;
+ /* this handle is already used, try again */
+ }
+
+ return ((i << 20) | handle);
+}
+
+/* convert filter handle to filter pointer */
+static struct acc_filter *
+filth_to_filtp(classifier, handle)
+ struct acc_classifier *classifier;
+ u_long handle;
+{
+ struct acc_filter *afp;
+ int i;
+
+ i = ACC_GET_HINDEX(handle);
+
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (afp->f_handle == handle)
+ return (afp);
+
+ return (NULL);
+}
+
+/* create flowinfo bitmask */
+static u_int32_t
+filt2fibmask(filt)
+ struct flow_filter *filt;
+{
+ u_int32_t mask = 0;
+#ifdef INET6
+ struct flow_filter6 *filt6;
+#endif
+
+ switch (filt->ff_flow.fi_family) {
+ case AF_INET:
+ if (filt->ff_flow.fi_proto != 0)
+ mask |= FIMB4_PROTO;
+ if (filt->ff_flow.fi_tos != 0)
+ mask |= FIMB4_TOS;
+ if (filt->ff_flow.fi_dst.s_addr != 0)
+ mask |= FIMB4_DADDR;
+ if (filt->ff_flow.fi_src.s_addr != 0)
+ mask |= FIMB4_SADDR;
+ if (filt->ff_flow.fi_sport != 0)
+ mask |= FIMB4_SPORT;
+ if (filt->ff_flow.fi_dport != 0)
+ mask |= FIMB4_DPORT;
+ if (filt->ff_flow.fi_gpi != 0)
+ mask |= FIMB4_GPI;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ filt6 = (struct flow_filter6 *)filt;
+
+ if (filt6->ff_flow6.fi6_proto != 0)
+ mask |= FIMB6_PROTO;
+ if (filt6->ff_flow6.fi6_tclass != 0)
+ mask |= FIMB6_TCLASS;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
+ mask |= FIMB6_DADDR;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
+ mask |= FIMB6_SADDR;
+ if (filt6->ff_flow6.fi6_sport != 0)
+ mask |= FIMB6_SPORT;
+ if (filt6->ff_flow6.fi6_dport != 0)
+ mask |= FIMB6_DPORT;
+ if (filt6->ff_flow6.fi6_gpi != 0)
+ mask |= FIMB6_GPI;
+ if (filt6->ff_flow6.fi6_flowlabel != 0)
+ mask |= FIMB6_FLABEL;
+ break;
+#endif /* INET6 */
+ }
+ return (mask);
+}
+
+
+/*
+ * helper functions to handle IPv4 fragments.
+ * currently only in-sequence fragments are handled.
+ * - fragment info is cached in a LRU list.
+ * - when a first fragment is found, cache its flow info.
+ * - when a non-first fragment is found, lookup the cache.
+ */
+
+struct ip4_frag {
+ TAILQ_ENTRY(ip4_frag) ip4f_chain;
+ char ip4f_valid;
+ u_short ip4f_id;
+ struct flowinfo_in ip4f_info;
+};
+
+static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
+
+#define IP4F_TABSIZE 16 /* IPv4 fragment cache size */
+
+
+static void
+ip4f_cache(ip, fin)
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct ip4_frag *fp;
+
+ if (TAILQ_EMPTY(&ip4f_list)) {
+ /* first time call, allocate fragment cache entries. */
+ if (ip4f_init() < 0)
+ /* allocation failed! */
+ return;
+ }
+
+ fp = ip4f_alloc();
+ fp->ip4f_id = ip->ip_id;
+
+ /* save port numbers */
+ fp->ip4f_info.fi_sport = fin->fi_sport;
+ fp->ip4f_info.fi_dport = fin->fi_dport;
+ fp->ip4f_info.fi_gpi = fin->fi_gpi;
+}
+
+static int
+ip4f_lookup(ip, fin)
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct ip4_frag *fp;
+
+ for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
+ fp = TAILQ_NEXT(fp, ip4f_chain))
+ if (ip->ip_id == fp->ip4f_id &&
+ ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
+ ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
+ ip->ip_p == fp->ip4f_info.fi_proto) {
+
+ /* found the matching entry */
+ fin->fi_sport = fp->ip4f_info.fi_sport;
+ fin->fi_dport = fp->ip4f_info.fi_dport;
+ fin->fi_gpi = fp->ip4f_info.fi_gpi;
+
+ if ((ntohs(ip->ip_off) & IP_MF) == 0)
+ /* this is the last fragment,
+ release the entry. */
+ ip4f_free(fp);
+
+ return (1);
+ }
+
+ /* no matching entry found */
+ return (0);
+}
+
+static int
+ip4f_init(void)
+{
+ struct ip4_frag *fp;
+ int i;
+
+ TAILQ_INIT(&ip4f_list);
+ for (i=0; i<IP4F_TABSIZE; i++) {
+ MALLOC(fp, struct ip4_frag *, sizeof(struct ip4_frag),
+ M_DEVBUF, M_NOWAIT);
+ if (fp == NULL) {
+ printf("ip4f_init: can't alloc %dth entry!\n", i);
+ if (i == 0)
+ return (-1);
+ return (0);
+ }
+ fp->ip4f_valid = 0;
+ TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+ }
+ return (0);
+}
+
+static struct ip4_frag *
+ip4f_alloc(void)
+{
+ struct ip4_frag *fp;
+
+ /* reclaim an entry at the tail, put it at the head */
+ fp = TAILQ_LAST(&ip4f_list, ip4f_list);
+ TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+ fp->ip4f_valid = 1;
+ TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
+ return (fp);
+}
+
+static void
+ip4f_free(fp)
+ struct ip4_frag *fp;
+{
+ TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+ fp->ip4f_valid = 0;
+ TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+}
+
+/*
+ * read and write diffserv field in IPv4 or IPv6 header
+ */
+u_int8_t
+read_dsfield(m, pktattr)
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ struct mbuf *m0;
+ u_int8_t ds_field = 0;
+
+ if (pktattr == NULL ||
+ (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+ return ((u_int8_t)0);
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if ((pktattr->pattr_hdr >= m0->m_data) &&
+ (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, pattr_hdr is stale */
+ pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+ printf("read_dsfield: can't locate header!\n");
+#endif
+ return ((u_int8_t)0);
+ }
+
+ if (pktattr->pattr_af == AF_INET) {
+ struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+
+ if (ip->ip_v != 4)
+ return ((u_int8_t)0); /* version mismatch! */
+ ds_field = ip->ip_tos;
+ }
+#ifdef INET6
+ else if (pktattr->pattr_af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return ((u_int8_t)0); /* version mismatch! */
+ ds_field = (flowlabel >> 20) & 0xff;
+ }
+#endif
+ return (ds_field);
+}
+
+void
+write_dsfield(m, pktattr, dsfield)
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+ u_int8_t dsfield;
+{
+ struct mbuf *m0;
+
+ if (pktattr == NULL ||
+ (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+ return;
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if ((pktattr->pattr_hdr >= m0->m_data) &&
+ (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, pattr_hdr is stale */
+ pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+ printf("write_dsfield: can't locate header!\n");
+#endif
+ return;
+ }
+
+ if (pktattr->pattr_af == AF_INET) {
+ struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+ u_int8_t old;
+ int32_t sum;
+
+ if (ip->ip_v != 4)
+ return; /* version mismatch! */
+ old = ip->ip_tos;
+ dsfield |= old & 3; /* leave CU bits */
+ if (old == dsfield)
+ return;
+ ip->ip_tos = dsfield;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += 0xff00 + (~old & 0xff) + dsfield;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+
+ ip->ip_sum = htons(~sum & 0xffff);
+ }
+#ifdef INET6
+ else if (pktattr->pattr_af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return; /* version mismatch! */
+ flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ }
+#endif
+ return;
+}
+
+
+/*
+ * high resolution clock support taking advantage of a machine dependent
+ * high resolution time counter (e.g., timestamp counter of intel pentium).
+ * we assume
+ * - 64-bit-long monotonically-increasing counter
+ * - frequency range is 100M-4GHz (CPU speed)
+ */
+u_int32_t machclk_freq = 0;
+u_int32_t machclk_per_tick = 0;
+
+#if (defined(__i386__) || defined(__alpha__)) && !defined(ALTQ_NOPCC)
+#ifdef __FreeBSD__
+/* freebsd makes clock frequency accessible */
+#ifdef __alpha__
+extern u_int32_t cycles_per_sec; /* alpha cpu clock frequency */
+#endif
+void
+init_machclk(void)
+{
+#if defined(__i386__)
+#if (__FreeBSD_version > 300000)
+ machclk_freq = tsc_freq;
+#else
+ machclk_freq = i586_ctr_freq;
+#endif
+#elif defined(__alpha__)
+ machclk_freq = cycles_per_sec;
+#endif /* __alpha__ */
+ machclk_per_tick = machclk_freq / hz;
+}
+#else /* !__FreeBSD__ */
+/*
+ * measure Pentium TSC or Alpha PCC clock frequency
+ */
+void
+init_machclk(void)
+{
+ static int wait;
+ struct timeval tv_start, tv_end;
+ u_int64_t start, end, diff;
+ int timo;
+
+ microtime(&tv_start);
+ start = read_machclk();
+ timo = hz; /* 1 sec */
+ (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
+ microtime(&tv_end);
+ end = read_machclk();
+ diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
+ + tv_end.tv_usec - tv_start.tv_usec;
+ if (diff != 0)
+ machclk_freq = (u_int)((end - start) * 1000000 / diff);
+ machclk_per_tick = machclk_freq / hz;
+
+ printf("altq: CPU clock: %uHz\n", machclk_freq);
+}
+#endif /* !__FreeBSD__ */
+#ifdef __alpha__
+/*
+ * make a 64bit counter value out of the 32bit alpha processor cycle counter.
+ * read_machclk must be called within a half of its wrap-around cycle
+ * (about 5 sec for 400MHz cpu) to properly detect a counter wrap-around.
+ * tbr_timeout calls read_machclk once a second.
+ */
+u_int64_t
+read_machclk(void)
+{
+ static u_int32_t last_pcc, upper;
+ u_int32_t pcc;
+
+ pcc = (u_int32_t)alpha_rpcc();
+ if (pcc <= last_pcc)
+ upper++;
+ last_pcc = pcc;
+ return (((u_int64_t)upper << 32) + pcc);
+}
+#endif /* __alpha__ */
+#else /* !i386 && !alpha */
+/* use microtime() for now */
+void
+init_machclk(void)
+{
+ machclk_freq = 1000000 << MACHCLK_SHIFT;
+ machclk_per_tick = machclk_freq / hz;
+ printf("altq: emulate %uHz cpu clock\n", machclk_freq);
+}
+#endif /* !i386 && !alpha */
+
+#endif /* ALTQ */
diff --git a/sys/altq/altq_var.h b/sys/altq/altq_var.h
new file mode 100644
index 00000000000..213352a72b1
--- /dev/null
+++ b/sys/altq/altq_var.h
@@ -0,0 +1,238 @@
+/* $OpenBSD: altq_var.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_var.h,v 1.8 2001/02/09 09:44:41 kjc Exp $ */
+
+/*
+ * Copyright (C) 1998-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_ALTQ_VAR_H_
+#define _ALTQ_ALTQ_VAR_H_
+
+#ifdef _KERNEL
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+/*
+ * filter structure for altq common classifier
+ */
+struct acc_filter {
+ LIST_ENTRY(acc_filter) f_chain;
+ void *f_class; /* pointer to the class */
+ u_long f_handle; /* filter id */
+ u_int32_t f_fbmask; /* filter bitmask */
+ struct flow_filter f_filter; /* filter value */
+};
+
+/*
+ * XXX ACC_FILTER_TABLESIZE can't be larger than 2048 unless we fix
+ * the handle assignment.
+ */
+#define ACC_FILTER_TABLESIZE (256+1)
+#define ACC_FILTER_MASK (ACC_FILTER_TABLESIZE - 2)
+#define ACC_WILDCARD_INDEX (ACC_FILTER_TABLESIZE - 1)
+#ifdef __GNUC__
+#define ACC_GET_HASH_INDEX(addr) \
+ ({int x = (addr) + ((addr) >> 16); (x + (x >> 8)) & ACC_FILTER_MASK;})
+#else
+#define ACC_GET_HASH_INDEX(addr) \
+ (((addr) + ((addr) >> 8) + ((addr) >> 16) + ((addr) >> 24)) \
+ & ACC_FILTER_MASK)
+#endif
+#define ACC_GET_HINDEX(handle) ((handle) >> 20)
+
+struct acc_classifier {
+ u_int32_t acc_fbmask;
+ LIST_HEAD(filt, acc_filter) acc_filters[ACC_FILTER_TABLESIZE];
+};
+
+/*
+ * flowinfo mask bits used by classifier
+ */
+/* for ipv4 */
+#define FIMB4_PROTO 0x0001
+#define FIMB4_TOS 0x0002
+#define FIMB4_DADDR 0x0004
+#define FIMB4_SADDR 0x0008
+#define FIMB4_DPORT 0x0010
+#define FIMB4_SPORT 0x0020
+#define FIMB4_GPI 0x0040
+#define FIMB4_ALL 0x007f
+/* for ipv6 */
+#define FIMB6_PROTO 0x0100
+#define FIMB6_TCLASS 0x0200
+#define FIMB6_DADDR 0x0400
+#define FIMB6_SADDR 0x0800
+#define FIMB6_DPORT 0x1000
+#define FIMB6_SPORT 0x2000
+#define FIMB6_GPI 0x4000
+#define FIMB6_FLABEL 0x8000
+#define FIMB6_ALL 0xff00
+
+#define FIMB_ALL (FIMB4_ALL|FIMB6_ALL)
+
+#define FIMB4_PORTS (FIMB4_DPORT|FIMB4_SPORT|FIMB4_GPI)
+#define FIMB6_PORTS (FIMB6_DPORT|FIMB6_SPORT|FIMB6_GPI)
+
+/*
+ * machine dependent clock
+ * a 64bit high resolution time counter.
+ */
+extern u_int32_t machclk_freq;
+extern u_int32_t machclk_per_tick;
+extern void init_machclk(void);
+
+#if defined(__i386__) && !defined(ALTQ_NOPCC)
+/* for pentium tsc */
+#include <machine/cpufunc.h>
+
+#define read_machclk() rdtsc()
+#ifdef __OpenBSD__
+static __inline u_int64_t
+rdtsc(void)
+{
+ u_int64_t rv;
+ __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
+ return (rv);
+}
+#endif /* __OpenBSD__ */
+
+#elif defined(__alpha__) && !defined(ALTQ_NOPCC)
+/* for alpha rpcc */
+extern u_int64_t read_machclk(void);
+
+#else /* !i386 && !alpha */
+/* emulate 256MHz using microtime() */
+#define MACHCLK_SHIFT 8
+static __inline u_int64_t
+read_machclk(void)
+{
+ struct timeval tv;
+ microtime(&tv);
+ return (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
+ + tv.tv_usec) << MACHCLK_SHIFT);
+}
+#endif /* !i386 && !alpha */
+
+/*
+ * debug support
+ */
+#ifdef ALTQ_DEBUG
+#ifdef __STDC__
+#define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, #e))
+#else /* PCC */
+#define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, "e"))
+#endif
+#else
+#define ASSERT(e) ((void)0)
+#endif
+
+/*
+ * misc stuff for compatibility
+ */
+/* ioctl cmd type */
+#if defined(__FreeBSD__) && (__FreeBSD__ < 3)
+typedef int ioctlcmd_t;
+#else
+typedef u_long ioctlcmd_t;
+#endif
+
+/*
+ * queue macros:
+ * the interface of TAILQ_LAST macro changed after the introduction
+ * of softupdate. redefine it here to make it work with pre-2.2.7.
+ */
+#undef TAILQ_LAST
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#ifndef TAILQ_EMPTY
+#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#endif
+#ifndef TAILQ_FOREACH
+#define TAILQ_FOREACH(var, head, field) \
+ for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field))
+#endif
+
+/* macro for timeout/untimeout */
+#if (__FreeBSD_version > 300000) || defined(__NetBSD__)
+/* use callout */
+#include <sys/callout.h>
+
+#define CALLOUT_INIT(c) callout_init((c))
+#define CALLOUT_RESET(c,t,f,a) callout_reset((c),(t),(f),(a))
+#define CALLOUT_STOP(c) callout_stop((c))
+#ifndef CALLOUT_INITIALIZER
+#define CALLOUT_INITIALIZER { { { NULL } }, 0, NULL, NULL, 0 }
+#endif
+#elif defined(__OpenBSD__)
+#include <sys/timeout.h>
+/* callout structure as a wrapper of struct timeout */
+struct callout {
+ struct timeout c_to;
+};
+#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (0)
+#define CALLOUT_RESET(c,t,f,a) do { if (!timeout_initialized(&(c)->c_to)) \
+ timeout_set(&(c)->c_to, (f), (a)); \
+ timeout_add(&(c)->c_to, (t)); } while (0)
+#define CALLOUT_STOP(c) timeout_del(&(c)->c_to)
+#define CALLOUT_INITIALIZER { { { NULL }, NULL, NULL, 0, 0 } }
+#else
+/* use old-style timeout/untimeout */
+/* dummy callout structure */
+struct callout {
+ void *c_arg; /* function argument */
+ void (*c_func) __P((void *));/* functiuon to call */
+};
+#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (0)
+#define CALLOUT_RESET(c,t,f,a) do { (c)->c_arg = (a); \
+ (c)->c_func = (f); \
+ timeout((f),(a),(t)); } while (0)
+#define CALLOUT_STOP(c) untimeout((c)->c_func,(c)->c_arg)
+#define CALLOUT_INITIALIZER { NULL, NULL }
+#endif
+#if !defined(__FreeBSD__)
+typedef void (timeout_t)(void *);
+#endif
+
+#define m_pktlen(m) ((m)->m_pkthdr.len)
+
+struct ifnet; struct mbuf; struct flowinfo;
+
+void *altq_lookup __P((char *, int));
+int altq_extractflow __P((struct mbuf *, int, struct flowinfo *, u_int32_t));
+int acc_add_filter __P((struct acc_classifier *, struct flow_filter *,
+ void *, u_long *));
+int acc_delete_filter __P((struct acc_classifier *, u_long));
+int acc_discard_filters __P((struct acc_classifier *, void *, int));
+void *acc_classify __P((void *, struct mbuf *, int));
+u_int8_t read_dsfield __P((struct mbuf *, struct altq_pktattr *));
+void write_dsfield __P((struct mbuf *, struct altq_pktattr *, u_int8_t));
+void altq_assert __P((const char *, int, const char *));
+int tbr_set __P((struct ifaltq *, struct tb_profile *));
+int tbr_get __P((struct ifaltq *, struct tb_profile *));
+
+#endif /* _KERNEL */
+#endif /* _ALTQ_ALTQ_VAR_H_ */
diff --git a/sys/altq/altq_wfq.c b/sys/altq/altq_wfq.c
new file mode 100644
index 00000000000..e1545d5b694
--- /dev/null
+++ b/sys/altq/altq_wfq.c
@@ -0,0 +1,752 @@
+/* $OpenBSD: altq_wfq.c,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_wfq.c,v 1.7 2000/12/14 08:12:46 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * March 27, 1997. Written by Hiroshi Kyusojin of Keio University
+ * (kyu@mt.cs.keio.ac.jp).
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_WFQ
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/uio.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet/in.h>
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_wfq.h>
+
+/*
+#define WFQ_DEBUG
+*/
+
+static int wfq_setenable(struct wfq_interface *, int);
+static int wfq_ifattach(struct wfq_interface *);
+static int wfq_ifdetach(struct wfq_interface *);
+static int wfq_ifenqueue(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *);
+static u_long wfq_hash(struct flowinfo *, int);
+static __inline u_long wfq_hashbydstaddr(struct flowinfo *, int);
+static __inline u_long wfq_hashbysrcport(struct flowinfo *, int);
+static wfq *wfq_maxqueue(wfq_state_t *);
+static struct mbuf *wfq_ifdequeue(struct ifaltq *, int);
+static int wfq_getqid(struct wfq_getqid *);
+static int wfq_setweight(struct wfq_setweight *);
+static int wfq_getstats(struct wfq_getstats *);
+static int wfq_config(struct wfq_conf *);
+static int wfq_request __P((struct ifaltq *, int, void *));
+static int wfq_flush(struct ifaltq *);
+static void *wfq_classify(void *, struct mbuf *, int);
+
+/* global value : pointer to wfq queue list */
+static wfq_state_t *wfq_list = NULL;
+
+static int
+wfq_setenable(ifacep, flag)
+ struct wfq_interface *ifacep;
+ int flag;
+{
+ wfq_state_t *wfqp;
+ int error = 0;
+
+ if ((wfqp = altq_lookup(ifacep->wfq_ifacename, ALTQT_WFQ)) == NULL)
+ return (EBADF);
+
+ switch(flag){
+ case ENABLE:
+ error = altq_enable(wfqp->ifq);
+ break;
+ case DISABLE:
+ error = altq_disable(wfqp->ifq);
+ break;
+ }
+ return error;
+}
+
+
+static int
+wfq_ifattach(ifacep)
+ struct wfq_interface *ifacep;
+{
+ int error = 0, i;
+ struct ifnet *ifp;
+ wfq_state_t *new_wfqp;
+ wfq *queue;
+
+ if ((ifp = ifunit(ifacep->wfq_ifacename)) == NULL) {
+#ifdef WFQ_DEBUG
+ printf("wfq_ifattach()...no ifp found\n");
+#endif
+ return (ENXIO);
+ }
+
+ if (!ALTQ_IS_READY(&ifp->if_snd)) {
+#ifdef WFQ_DEBUG
+ printf("wfq_ifattach()...altq is not ready\n");
+#endif
+ return (ENXIO);
+ }
+
+ /* allocate and initialize wfq_state_t */
+ MALLOC(new_wfqp, wfq_state_t *, sizeof(wfq_state_t),
+ M_DEVBUF, M_WAITOK);
+ if (new_wfqp == NULL)
+ return (ENOMEM);
+ bzero(new_wfqp, sizeof(wfq_state_t));
+ MALLOC(queue, wfq *, sizeof(wfq) * DEFAULT_QSIZE,
+ M_DEVBUF, M_WAITOK);
+ if (queue == NULL) {
+ FREE(new_wfqp, M_DEVBUF);
+ return (ENOMEM);
+ }
+ bzero(queue, sizeof(wfq) * DEFAULT_QSIZE);
+
+ /* keep the ifq */
+ new_wfqp->ifq = &ifp->if_snd;
+ new_wfqp->nums = DEFAULT_QSIZE;
+ new_wfqp->hwm = HWM;
+ new_wfqp->bytes = 0;
+ new_wfqp->rrp = NULL;
+ new_wfqp->queue = queue;
+ new_wfqp->hash_func = wfq_hashbydstaddr;
+ new_wfqp->fbmask = FIMB4_DADDR;
+
+ for (i = 0; i < new_wfqp->nums; i++, queue++) {
+ queue->next = queue->prev = NULL;
+ queue->head = queue->tail = NULL;
+ queue->bytes = queue->quota = 0;
+ queue->weight = 100;
+ }
+
+ /*
+ * set WFQ to this ifnet structure.
+ */
+ if ((error = altq_attach(&ifp->if_snd, ALTQT_WFQ, new_wfqp,
+ wfq_ifenqueue, wfq_ifdequeue, wfq_request,
+ new_wfqp, wfq_classify)) != 0) {
+ FREE(queue, M_DEVBUF);
+ FREE(new_wfqp, M_DEVBUF);
+ return (error);
+ }
+
+ new_wfqp->next = wfq_list;
+ wfq_list = new_wfqp;
+
+ return (error);
+}
+
+
+static int
+wfq_ifdetach(ifacep)
+ struct wfq_interface *ifacep;
+{
+ int error = 0;
+ wfq_state_t *wfqp;
+
+ if ((wfqp = altq_lookup(ifacep->wfq_ifacename, ALTQT_WFQ)) == NULL)
+ return (EBADF);
+
+ /* free queued mbuf */
+ wfq_flush(wfqp->ifq);
+
+ /* remove WFQ from the ifnet structure. */
+ (void)altq_disable(wfqp->ifq);
+ (void)altq_detach(wfqp->ifq);
+
+ /* remove from the wfqstate list */
+ if (wfq_list == wfqp)
+ wfq_list = wfqp->next;
+ else {
+ wfq_state_t *wp = wfq_list;
+ do {
+ if (wp->next == wfqp) {
+ wp->next = wfqp->next;
+ break;
+ }
+ } while ((wp = wp->next) != NULL);
+ }
+
+ /* deallocate wfq_state_t */
+ FREE(wfqp->queue, M_DEVBUF);
+ FREE(wfqp, M_DEVBUF);
+ return (error);
+}
+
+static int
+wfq_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ wfq_state_t *wfqp = (wfq_state_t *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ wfq_flush(wfqp->ifq);
+ break;
+ }
+ return (0);
+}
+
+
+static int
+wfq_flush(ifq)
+ struct ifaltq *ifq;
+{
+ struct mbuf *mp;
+
+ while ((mp = wfq_ifdequeue(ifq, ALTDQ_REMOVE)) != NULL)
+ m_freem(mp);
+ if (ALTQ_IS_ENABLED(ifq))
+ ifq->ifq_len = 0;
+ return 0;
+}
+
+static void *
+wfq_classify(clfier, m, af)
+ void *clfier;
+ struct mbuf *m;
+ int af;
+{
+ wfq_state_t *wfqp = (wfq_state_t *)clfier;
+ struct flowinfo flow;
+
+ altq_extractflow(m, af, &flow, wfqp->fbmask);
+ return (&wfqp->queue[(*wfqp->hash_func)(&flow, wfqp->nums)]);
+}
+
+static int
+wfq_ifenqueue(ifq, mp, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *mp;
+ struct altq_pktattr *pktattr;
+{
+ wfq_state_t *wfqp;
+ wfq *queue;
+ int byte, error = 0;
+
+ wfqp = (wfq_state_t *)ifq->altq_disc;
+ mp->m_nextpkt = NULL;
+
+ /* grab a queue selected by classifier */
+ if (pktattr == NULL || (queue = pktattr->pattr_class) == NULL)
+ queue = &wfqp->queue[0];
+
+ if (queue->tail == NULL)
+ queue->head = mp;
+ else
+ queue->tail->m_nextpkt = mp;
+ queue->tail = mp;
+ byte = mp->m_pkthdr.len;
+ queue->bytes += byte;
+ wfqp->bytes += byte;
+ ifq->ifq_len++;
+
+ if (queue->next == NULL) {
+ /* this queue gets active. add the queue to the active list */
+ if (wfqp->rrp == NULL){
+ /* no queue in the active list */
+ queue->next = queue->prev = queue;
+ wfqp->rrp = queue;
+ WFQ_ADDQUOTA(queue);
+ } else {
+ /* insert the queue at the tail of the active list */
+ queue->prev = wfqp->rrp->prev;
+ wfqp->rrp->prev->next = queue;
+ wfqp->rrp->prev = queue;
+ queue->next = wfqp->rrp;
+ queue->quota = 0;
+ }
+ }
+
+ /* check overflow. if the total size exceeds the high water mark,
+ drop packets from the longest queue. */
+ while (wfqp->bytes > wfqp->hwm) {
+ wfq *drop_queue = wfq_maxqueue(wfqp);
+
+ /* drop the packet at the head. */
+ mp = drop_queue->head;
+ if ((drop_queue->head = mp->m_nextpkt) == NULL)
+ drop_queue->tail = NULL;
+ mp->m_nextpkt = NULL;
+ byte = mp->m_pkthdr.len;
+ drop_queue->bytes -= byte;
+ PKTCNTR_ADD(&drop_queue->drop_cnt, byte);
+ wfqp->bytes -= byte;
+ m_freem(mp);
+ ifq->ifq_len--;
+ if(drop_queue == queue)
+ /* the queue for this flow is selected to drop */
+ error = ENOBUFS;
+ }
+ return error;
+}
+
+
+static u_long wfq_hash(flow, n)
+ struct flowinfo *flow;
+ int n;
+{
+ u_long val = 0;
+
+ if (flow != NULL) {
+ if (flow->fi_family == AF_INET) {
+ struct flowinfo_in *fp = (struct flowinfo_in *)flow;
+ u_long val2;
+
+ val = fp->fi_dst.s_addr ^ fp->fi_src.s_addr;
+ val = val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24);
+ val2 = fp->fi_dport ^ fp->fi_sport ^ fp->fi_proto;
+ val2 = val2 ^ (val2 >> 8);
+ val = val ^ val2;
+ }
+#ifdef INET6
+ else if (flow->fi_family == AF_INET6) {
+ struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)flow;
+
+ val = ntohl(fp6->fi6_flowlabel);
+ }
+#endif
+ }
+
+ return (val % n);
+}
+
+
+static __inline u_long wfq_hashbydstaddr(flow, n)
+ struct flowinfo *flow;
+ int n;
+{
+ u_long val = 0;
+
+ if (flow != NULL) {
+ if (flow->fi_family == AF_INET) {
+ struct flowinfo_in *fp = (struct flowinfo_in *)flow;
+
+ val = fp->fi_dst.s_addr;
+ val = val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24);
+ }
+#ifdef INET6
+ else if (flow->fi_family == AF_INET6) {
+ struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)flow;
+
+ val = ntohl(fp6->fi6_flowlabel);
+ }
+#endif
+ }
+
+ return (val % n);
+}
+
+static __inline u_long wfq_hashbysrcport(flow, n)
+ struct flowinfo *flow;
+ int n;
+{
+ u_long val = 0;
+
+ if (flow != NULL) {
+ if (flow->fi_family == AF_INET) {
+ struct flowinfo_in *fp = (struct flowinfo_in *)flow;
+
+ val = fp->fi_sport;
+ }
+#ifdef INET6
+ else if (flow->fi_family == AF_INET6) {
+ struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)flow;
+
+ val = fp6->fi6_sport;
+ }
+#endif
+ }
+ val = val ^ (val >> 8);
+
+ return (val % n);
+}
+
+static wfq *wfq_maxqueue(wfqp)
+ wfq_state_t *wfqp;
+{
+ int byte, max_byte = 0;
+ wfq *queue, *max_queue = NULL;
+
+ if((queue = wfqp->rrp) == NULL)
+ /* never happens */
+ return NULL;
+ do{
+ if ((byte = queue->bytes * 100 / queue->weight) > max_byte) {
+ max_queue = queue;
+ max_byte = byte;
+ }
+ } while ((queue = queue->next) != wfqp->rrp);
+
+ return max_queue;
+}
+
+
+static struct mbuf *
+wfq_ifdequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ wfq_state_t *wfqp;
+ wfq *queue;
+ struct mbuf *mp;
+ int byte;
+
+ wfqp = (wfq_state_t *)ifq->altq_disc;
+
+ if ((wfqp->bytes == 0) || ((queue = wfqp->rrp) == NULL))
+ /* no packet in the queues */
+ return NULL;
+
+ while (1) {
+ if (queue->quota > 0) {
+ if (queue->bytes <= 0) {
+ /* this queue no longer has packet.
+ remove the queue from the active list. */
+ if (queue->next == queue){
+ /* no other active queue
+ -- this case never happens in
+ this algorithm. */
+ queue->next = queue->prev = NULL;
+ wfqp->rrp = NULL;
+ return NULL;
+ } else {
+ queue->prev->next = queue->next;
+ queue->next->prev = queue->prev;
+ /* the round-robin pointer points
+ to this queue, advance the rrp */
+ wfqp->rrp = queue->next;
+ queue->next = queue->prev = NULL;
+ queue = wfqp->rrp;
+ WFQ_ADDQUOTA(queue);
+ continue;
+ }
+ }
+
+ /* dequeue a packet from this queue */
+ mp = queue->head;
+ if (op == ALTDQ_REMOVE) {
+ if((queue->head = mp->m_nextpkt) == NULL)
+ queue->tail = NULL;
+ byte = mp->m_pkthdr.len;
+ mp->m_nextpkt = NULL;
+ queue->quota -= byte;
+ queue->bytes -= byte;
+ PKTCNTR_ADD(&queue->xmit_cnt, byte);
+ wfqp->bytes -= byte;
+ if (ALTQ_IS_ENABLED(ifq))
+ ifq->ifq_len--;
+ }
+ return mp;
+
+ /* if the queue gets empty by this dequeueing,
+ the queue will be removed from the active list
+ at the next round */
+ }
+
+ /* advance the round-robin pointer */
+ queue = wfqp->rrp = queue->next;
+ WFQ_ADDQUOTA(queue);
+ }
+}
+
+static int
+wfq_getqid(gqidp)
+ struct wfq_getqid *gqidp;
+{
+ wfq_state_t *wfqp;
+
+ if ((wfqp = altq_lookup(gqidp->iface.wfq_ifacename, ALTQT_WFQ))
+ == NULL)
+ return (EBADF);
+
+ gqidp->qid = (*wfqp->hash_func)(&gqidp->flow, wfqp->nums);
+ return 0;
+}
+
+static int
+wfq_setweight(swp)
+ struct wfq_setweight *swp;
+{
+ wfq_state_t *wfqp;
+ wfq *queue;
+ int old;
+
+ if (swp->weight < 0) {
+ printf("set weight in natural number\n");
+ return (EINVAL);
+ }
+
+ if ((wfqp = altq_lookup(swp->iface.wfq_ifacename, ALTQT_WFQ)) == NULL)
+ return (EBADF);
+
+ queue = &wfqp->queue[swp->qid];
+ old = queue->weight;
+ queue->weight = swp->weight;
+ swp->weight = old;
+ return 0;
+}
+
+
+static int
+wfq_getstats(gsp)
+ struct wfq_getstats *gsp;
+{
+ wfq_state_t *wfqp;
+ wfq *queue;
+ queue_stats *stats;
+
+ if ((wfqp = altq_lookup(gsp->iface.wfq_ifacename, ALTQT_WFQ)) == NULL)
+ return (EBADF);
+
+ if (gsp->qid >= wfqp->nums)
+ return (EINVAL);
+
+ queue = &wfqp->queue[gsp->qid];
+ stats = &gsp->stats;
+
+ stats->bytes = queue->bytes;
+ stats->weight = queue->weight;
+ stats->xmit_cnt = queue->xmit_cnt;
+ stats->drop_cnt = queue->drop_cnt;
+
+ return 0;
+}
+
+
+static int
+wfq_config(cf)
+ struct wfq_conf *cf;
+{
+ wfq_state_t *wfqp;
+ wfq *queue;
+ int i, error = 0;
+
+ if ((wfqp = altq_lookup(cf->iface.wfq_ifacename, ALTQT_WFQ)) == NULL)
+ return (EBADF);
+
+ if(cf->nqueues <= 0 || MAX_QSIZE < cf->nqueues)
+ cf->nqueues = DEFAULT_QSIZE;
+
+ if (cf->nqueues != wfqp->nums) {
+ /* free queued mbuf */
+ wfq_flush(wfqp->ifq);
+ FREE(wfqp->queue, M_DEVBUF);
+
+ MALLOC(queue, wfq *, sizeof(wfq) * cf->nqueues,
+ M_DEVBUF, M_WAITOK);
+ if (queue == NULL)
+ return (ENOMEM);
+ bzero(queue, sizeof(wfq) * cf->nqueues);
+
+ wfqp->nums = cf->nqueues;
+ wfqp->bytes = 0;
+ wfqp->rrp = NULL;
+ wfqp->queue = queue;
+ for (i = 0; i < wfqp->nums; i++, queue++) {
+ queue->next = queue->prev = NULL;
+ queue->head = queue->tail = NULL;
+ queue->bytes = queue->quota = 0;
+ queue->weight = 100;
+ }
+ }
+
+ if (cf->qlimit != 0)
+ wfqp->hwm = cf->qlimit;
+
+ switch (cf->hash_policy) {
+ case WFQ_HASH_DSTADDR:
+ wfqp->hash_func = wfq_hashbydstaddr;
+ wfqp->fbmask = FIMB4_DADDR;
+#ifdef INET6
+ wfqp->fbmask |= FIMB6_FLABEL; /* use flowlabel for ipv6 */
+#endif
+ break;
+ case WFQ_HASH_SRCPORT:
+ wfqp->hash_func = wfq_hashbysrcport;
+ wfqp->fbmask = FIMB4_SPORT;
+#ifdef INET6
+ wfqp->fbmask |= FIMB6_SPORT;
+#endif
+ break;
+ case WFQ_HASH_FULL:
+ wfqp->hash_func = wfq_hash;
+ wfqp->fbmask = FIMB4_ALL;
+#ifdef INET6
+ wfqp->fbmask |= FIMB6_FLABEL; /* use flowlabel for ipv6 */
+#endif
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+/*
+ * wfq device interface
+ */
+
+altqdev_decl(wfq);
+
+int
+wfqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ return 0;
+}
+
+int
+wfqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+ struct proc *p;
+{
+ struct ifnet *ifp;
+ struct wfq_interface iface;
+ wfq_state_t *wfqp;
+ int s;
+
+ s = splimp();
+ while ((wfqp = wfq_list) != NULL) {
+ ifp = wfqp->ifq->altq_ifp;
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+ sprintf(iface.wfq_ifacename, "%s", ifp->if_xname);
+#else
+ sprintf(iface.wfq_ifacename, "%s%d",
+ ifp->if_name, ifp->if_unit);
+#endif
+ wfq_ifdetach(&iface);
+ }
+ splx(s);
+ return 0;
+}
+
+int
+wfqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ int error = 0;
+ int s;
+
+ /* check cmd for superuser only */
+ switch (cmd) {
+ case WFQ_GET_QID:
+ case WFQ_GET_STATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+ return (error);
+ break;
+ }
+
+ s = splimp();
+ switch (cmd) {
+
+ case WFQ_ENABLE:
+ error = wfq_setenable((struct wfq_interface *)addr, ENABLE);
+ break;
+
+ case WFQ_DISABLE:
+ error = wfq_setenable((struct wfq_interface *)addr, DISABLE);
+ break;
+
+ case WFQ_IF_ATTACH:
+ error = wfq_ifattach((struct wfq_interface *)addr);
+ break;
+
+ case WFQ_IF_DETACH:
+ error = wfq_ifdetach((struct wfq_interface *)addr);
+ break;
+
+ case WFQ_GET_QID:
+ error = wfq_getqid((struct wfq_getqid *)addr);
+ break;
+
+ case WFQ_SET_WEIGHT:
+ error = wfq_setweight((struct wfq_setweight *)addr);
+ break;
+
+ case WFQ_GET_STATS:
+ error = wfq_getstats((struct wfq_getstats *)addr);
+ break;
+
+ case WFQ_CONFIG:
+ error = wfq_config((struct wfq_conf *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ splx(s);
+ return error;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw wfq_sw =
+ {"wfq", wfqopen, wfqclose, wfqioctl};
+
+ALTQ_MODULE(altq_wfq, ALTQT_WFQ, &wfq_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ_WFQ */
diff --git a/sys/altq/altq_wfq.h b/sys/altq/altq_wfq.h
new file mode 100644
index 00000000000..1fe215f0f02
--- /dev/null
+++ b/sys/altq/altq_wfq.h
@@ -0,0 +1,125 @@
+/* $OpenBSD: altq_wfq.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: altq_wfq.h,v 1.5 2000/12/14 08:12:46 thorpej Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * March 27, 1997. Written by Hiroshi Kyusojin of Keio University
+ * (kyu@mt.cs.keio.ac.jp).
+ */
+
+#ifndef _ALTQ_ALTQ_WFQ_H_
+#define _ALTQ_ALTQ_WFQ_H_
+
+#include <altq/altq.h>
+
+#define DEFAULT_QSIZE 256
+#define MAX_QSIZE 2048
+
+struct wfq_interface{
+ char wfq_ifacename[IFNAMSIZ];
+};
+
+struct wfq_getqid{
+ struct wfq_interface iface;
+ struct flowinfo flow;
+ u_long qid;
+};
+
+struct wfq_setweight {
+ struct wfq_interface iface;
+ int qid;
+ int weight;
+};
+
+typedef struct each_queue_stats {
+ int bytes; /* bytes in this queue */
+ int weight; /* weight in percent */
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+} queue_stats;
+
+struct wfq_getstats {
+ struct wfq_interface iface;
+ int qid;
+ queue_stats stats;
+};
+
+struct wfq_conf {
+ struct wfq_interface iface;
+ int hash_policy; /* hash policy */
+ int nqueues; /* number of queues */
+ int qlimit; /* queue size in bytes */
+};
+
+#define WFQ_HASH_DSTADDR 0 /* hash by dst address */
+#define WFQ_HASH_SRCPORT 1 /* hash by src port */
+#define WFQ_HASH_FULL 2 /* hash by all fields */
+
+#define WFQ_IF_ATTACH _IOW('Q', 1, struct wfq_interface)
+#define WFQ_IF_DETACH _IOW('Q', 2, struct wfq_interface)
+#define WFQ_ENABLE _IOW('Q', 3, struct wfq_interface)
+#define WFQ_DISABLE _IOW('Q', 4, struct wfq_interface)
+#define WFQ_CONFIG _IOWR('Q', 6, struct wfq_conf)
+#define WFQ_GET_STATS _IOWR('Q', 12, struct wfq_getstats)
+#define WFQ_GET_QID _IOWR('Q', 30, struct wfq_getqid)
+#define WFQ_SET_WEIGHT _IOWR('Q', 31, struct wfq_setweight)
+
+#ifdef _KERNEL
+
+#define HWM (64 * 1024)
+#define WFQ_QUOTA 512 /* quota bytes to send at a time */
+#define WFQ_ADDQUOTA(q) ((q)->quota += WFQ_QUOTA * (q)->weight / 100)
+#define ENABLE 0
+#define DISABLE 1
+
+typedef struct weighted_fair_queue{
+ struct weighted_fair_queue *next, *prev;
+ struct mbuf *head, *tail;
+ int bytes; /* bytes in this queue */
+ int quota; /* bytes sent in this round */
+ int weight; /* weight in percent */
+
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+} wfq;
+
+
+typedef struct wfqstate {
+ struct wfqstate *next; /* for wfqstate list */
+ struct ifaltq *ifq;
+ int nums; /* number of queues */
+ int hwm; /* high water mark */
+ int bytes; /* total bytes in all the queues */
+ wfq *rrp; /* round robin pointer */
+ wfq *queue; /* pointer to queue list */
+ u_long (*hash_func)(struct flowinfo *, int);
+ u_int32_t fbmask; /* filter bitmask */
+} wfq_state_t;
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_WFQ_H */
diff --git a/sys/altq/altqconf.h b/sys/altq/altqconf.h
new file mode 100644
index 00000000000..5666ffbe797
--- /dev/null
+++ b/sys/altq/altqconf.h
@@ -0,0 +1,16 @@
+/* $OpenBSD: altqconf.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $NetBSD: altqconf.h,v 1.2 2001/05/30 11:57:16 mrg Exp $ */
+
+#ifdef ALTQ
+#define NALTQ 1
+#else
+#define NALTQ 0
+#endif
+
+cdev_decl(altq);
+
+#define cdev_altq_init(c,n) { \
+ dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
+ (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
+ (dev_type_stop((*))) enodev, 0, (dev_type_select((*))) enodev, \
+ (dev_type_mmap((*))) enodev }
diff --git a/sys/altq/if_altq.h b/sys/altq/if_altq.h
new file mode 100644
index 00000000000..2b9c7d2ef02
--- /dev/null
+++ b/sys/altq/if_altq.h
@@ -0,0 +1,166 @@
+/* $OpenBSD: if_altq.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $KAME: if_altq.h,v 1.6 2001/01/29 19:59:09 itojun Exp $ */
+
+/*
+ * Copyright (C) 1997-2000
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_IF_ALTQ_H_
+#define _ALTQ_IF_ALTQ_H_
+
+#ifdef KERNEL
+#ifndef _KERNEL
+#define _KERNEL
+#endif
+#endif
+
+struct altq_pktattr; struct tb_regulator; struct top_cdnr;
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct ifaltq {
+ /* fields compatible with struct ifqueue */
+ struct mbuf *ifq_head;
+ struct mbuf *ifq_tail;
+ int ifq_len;
+ int ifq_maxlen;
+ int ifq_drops;
+
+ /* alternate queueing related fields */
+ int altq_type; /* discipline type */
+ int altq_flags; /* flags (e.g. ready, in-use) */
+ void *altq_disc; /* for discipline-specific use */
+ struct ifnet *altq_ifp; /* back pointer to interface */
+
+ int (*altq_enqueue) __P((struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *));
+ struct mbuf *(*altq_dequeue) __P((struct ifaltq *, int));
+ int (*altq_request) __P((struct ifaltq *, int, void *));
+
+ /* classifier fields */
+ void *altq_clfier; /* classifier-specific use */
+ void *(*altq_classify) __P((void *, struct mbuf *, int));
+
+ /* token bucket regulator */
+ struct tb_regulator *altq_tbr;
+
+ /* input traffic conditioner (doesn't belong to the output queue...) */
+ struct top_cdnr *altq_cdnr;
+};
+
+
+#ifdef _KERNEL
+
+/*
+ * packet attributes used by queueing disciplines.
+ * pattr_class is a discipline-dependent scheduling class that is
+ * set by a classifier.
+ * pattr_hdr and pattr_af may be used by a discipline to access
+ * the header within a mbuf. (e.g. ECN needs to update the CE bit)
+ * note that pattr_hdr could be stale after m_pullup, though link
+ * layer output routines usually don't use m_pullup. link-level
+ * compression also invalidates these fields. thus, pattr_hdr needs
+ * to be verified when a discipline touches the header.
+ */
+struct altq_pktattr {
+ void *pattr_class; /* sched class set by classifier */
+ int pattr_af; /* address family */
+ caddr_t pattr_hdr; /* saved header position in mbuf */
+};
+
+/*
+ * a token-bucket regulator limits the rate that a network driver can
+ * dequeue packets from the output queue.
+ * modern cards are able to buffer a large amount of packets and dequeue
+ * too many packets at a time. this bursty dequeue behavior makes it
+ * impossible to schedule packets by queueing disciplines.
+ * a token-bucket is used to control the burst size in a device
+ * independent manner.
+ */
+struct tb_regulator {
+ int64_t tbr_rate; /* (scaled) token bucket rate */
+ int64_t tbr_depth; /* (scaled) token bucket depth */
+
+ int64_t tbr_token; /* (scaled) current token */
+ int64_t tbr_filluptime; /* (scaled) time to fill up bucket */
+ u_int64_t tbr_last; /* last time token was updated */
+
+ int tbr_lastop; /* last dequeue operation type
+ needed for poll-and-dequeue */
+};
+
+/* if_altqflags */
+#define ALTQF_READY 0x01 /* driver supports alternate queueing */
+#define ALTQF_ENABLED 0x02 /* altq is in use */
+#define ALTQF_CLASSIFY 0x04 /* classify packets */
+#define ALTQF_CNDTNING 0x08 /* altq traffic conditioning is enabled */
+#define ALTQF_DRIVER1 0x40 /* driver specific */
+
+/* if_altqflags set internally only: */
+#define ALTQF_CANTCHANGE (ALTQF_READY)
+
+/* altq_dequeue 2nd arg */
+#define ALTDQ_REMOVE 1 /* dequeue mbuf from the queue */
+#define ALTDQ_POLL 2 /* don't dequeue mbuf from the queue */
+
+/* altq request types (currently only purge is defined) */
+#define ALTRQ_PURGE 1 /* purge all packets */
+
+#define ALTQ_IS_READY(ifq) ((ifq)->altq_flags & ALTQF_READY)
+#define ALTQ_IS_ENABLED(ifq) ((ifq)->altq_flags & ALTQF_ENABLED)
+#define ALTQ_NEEDS_CLASSIFY(ifq) ((ifq)->altq_flags & ALTQF_CLASSIFY)
+#define ALTQ_IS_CNDTNING(ifq) ((ifq)->altq_flags & ALTQF_CNDTNING)
+
+#define ALTQ_SET_CNDTNING(ifq) ((ifq)->altq_flags |= ALTQF_CNDTNING)
+#define ALTQ_CLEAR_CNDTNING(ifq) ((ifq)->altq_flags &= ~ALTQF_CNDTNING)
+#define ALTQ_IS_ATTACHED(ifq) ((ifq)->altq_disc != NULL)
+
+#define ALTQ_ENQUEUE(ifq, m, pa, err) \
+ (err) = (*(ifq)->altq_enqueue)((ifq),(m),(pa))
+#define ALTQ_DEQUEUE(ifq, m) \
+ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_REMOVE)
+#define ALTQ_POLL(ifq, m) \
+ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_POLL)
+#define ALTQ_PURGE(ifq) \
+ (void)(*(ifq)->altq_request)((ifq), ALTRQ_PURGE, (void *)0)
+#define ALTQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
+#define TBR_IS_ENABLED(ifq) ((ifq)->altq_tbr != NULL)
+
+extern int altq_attach __P((struct ifaltq *, int, void *,
+ int (*)(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *),
+ struct mbuf *(*)(struct ifaltq *, int),
+ int (*)(struct ifaltq *, int, void *),
+ void *,
+ void *(*)(void *, struct mbuf *, int)));
+extern int altq_detach __P((struct ifaltq *));
+extern int altq_enable __P((struct ifaltq *));
+extern int altq_disable __P((struct ifaltq *));
+extern struct mbuf *tbr_dequeue __P((struct ifaltq *, int));
+extern int (*altq_input) __P((struct mbuf *, int));
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_IF_ALTQ_H_ */