summaryrefslogtreecommitdiff
path: root/sys/dev
diff options
context:
space:
mode:
authorJan Klemkow <jan@cvs.openbsd.org>2024-06-07 08:44:26 +0000
committerJan Klemkow <jan@cvs.openbsd.org>2024-06-07 08:44:26 +0000
commitd886561dce1c2823ea68eaa7f1346b820f0b8292 (patch)
tree701465b0dd744231c3582bcef7eafdbccfaf3efc /sys/dev
parentb381c070dfc1570cb99c6bbe25ca52aa985e0fa5 (diff)
Use TCP Large Receive Offload in vmx(4).
tested by Hrvoje Popovski and bluhm@ ok bluhm@
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/pci/if_vmx.c175
-rw-r--r--sys/dev/pci/if_vmxreg.h5
2 files changed, 142 insertions, 38 deletions
diff --git a/sys/dev/pci/if_vmx.c b/sys/dev/pci/if_vmx.c
index 0c5883a1bca..e70d9b50f8a 100644
--- a/sys/dev/pci/if_vmx.c
+++ b/sys/dev/pci/if_vmx.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_vmx.c,v 1.86 2024/05/21 19:49:06 jan Exp $ */
+/* $OpenBSD: if_vmx.c,v 1.87 2024/06/07 08:44:25 jan Exp $ */
/*
* Copyright (c) 2013 Tsubai Masanari
@@ -114,6 +114,8 @@ struct vmxnet3_comp_ring {
};
u_int next;
u_int32_t gen;
+ struct mbuf *sendmp;
+ struct mbuf *lastmp;
};
struct vmxnet3_txqueue {
@@ -160,6 +162,7 @@ struct vmxnet3_softc {
struct vmxnet3_queue *sc_q;
struct intrmap *sc_intrmap;
+ u_int sc_vrrs;
struct vmxnet3_driver_shared *sc_ds;
u_int8_t *sc_mcast;
struct vmxnet3_upt1_rss_conf *sc_rss;
@@ -170,7 +173,7 @@ struct vmxnet3_softc {
#endif
};
-#define JUMBO_LEN (1024 * 9)
+#define JUMBO_LEN ((16 * 1024) - 1)
#define DMAADDR(map) ((map)->dm_segs[0].ds_addr)
#define READ_BAR0(sc, reg) bus_space_read_4((sc)->sc_iot0, (sc)->sc_ioh0, reg)
@@ -273,15 +276,21 @@ vmxnet3_attach(struct device *parent, struct device *self, void *aux)
return;
}
+ /* Vmxnet3 Revision Report and Selection */
ver = READ_BAR1(sc, VMXNET3_BAR1_VRRS);
- if ((ver & 0x1) == 0) {
+ if (ISSET(ver, 0x2)) {
+ sc->sc_vrrs = 2;
+ } else if (ISSET(ver, 0x1)) {
+ sc->sc_vrrs = 1;
+ } else {
printf(": unsupported hardware version 0x%x\n", ver);
return;
}
- WRITE_BAR1(sc, VMXNET3_BAR1_VRRS, 1);
+ WRITE_BAR1(sc, VMXNET3_BAR1_VRRS, sc->sc_vrrs);
+ /* UPT Version Report and Selection */
ver = READ_BAR1(sc, VMXNET3_BAR1_UVRS);
- if ((ver & 0x1) == 0) {
+ if (!ISSET(ver, 0x1)) {
printf(": incompatible UPT version 0x%x\n", ver);
return;
}
@@ -410,6 +419,11 @@ vmxnet3_attach(struct device *parent, struct device *self, void *aux)
ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
+ if (sc->sc_vrrs == 2) {
+ ifp->if_xflags |= IFXF_LRO;
+ ifp->if_capabilities |= IFCAP_LRO;
+ }
+
#if NVLAN > 0
if (sc->sc_ds->upt_features & UPT1_F_VLAN)
ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
@@ -704,6 +718,10 @@ vmxnet3_rxfill(struct vmxnet3_rxring *ring)
uint32_t rgen;
uint32_t type = htole32(VMXNET3_BTYPE_HEAD << VMXNET3_RX_BTYPE_S);
+ /* Second ring just contains packet bodies. */
+ if (ring->rid == 1)
+ type = htole32(VMXNET3_BTYPE_BODY << VMXNET3_RX_BTYPE_S);
+
MUTEX_ASSERT_LOCKED(&ring->mtx);
slots = if_rxr_get(&ring->rxr, NRXDESC);
@@ -781,17 +799,17 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
VMX_DMA_LEN(&ring->dmamem));
bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_PREWRITE);
- }
- /* XXX only fill ring 0 */
- ring = &rq->cmd_ring[0];
- mtx_enter(&ring->mtx);
- vmxnet3_rxfill(ring);
- mtx_leave(&ring->mtx);
+ mtx_enter(&ring->mtx);
+ vmxnet3_rxfill(ring);
+ mtx_leave(&ring->mtx);
+ }
comp_ring = &rq->comp_ring;
comp_ring->next = 0;
comp_ring->gen = VMX_RXC_GEN;
+ comp_ring->sendmp = NULL;
+ comp_ring->lastmp = NULL;
memset(VMX_DMA_KVA(&comp_ring->dmamem), 0,
VMX_DMA_LEN(&comp_ring->dmamem));
@@ -1074,9 +1092,9 @@ vmxnet3_rxintr(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
bus_dmamap_t map;
- unsigned int idx, len;
+ unsigned int idx;
unsigned int next, rgen;
- unsigned int done = 0;
+ unsigned int rid, done[2] = {0, 0};
next = comp_ring->next;
rgen = comp_ring->gen;
@@ -1096,11 +1114,14 @@ vmxnet3_rxintr(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
idx = letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_IDX_S) &
VMXNET3_RXC_IDX_M);
+
if (letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_QID_S) &
VMXNET3_RXC_QID_M) < sc->sc_nqueues)
- ring = &rq->cmd_ring[0];
+ rid = 0;
else
- ring = &rq->cmd_ring[1];
+ rid = 1;
+
+ ring = &rq->cmd_ring[rid];
m = ring->m[idx];
KASSERT(m != NULL);
@@ -1111,31 +1132,62 @@ vmxnet3_rxintr(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(sc->sc_dmat, map);
- done++;
+ done[rid]++;
+
+ /*
+ * A receive descriptor of type 4 which is flaged as start of
+ * packet, contains the number of TCP segment of an LRO packet.
+ */
+ if (letoh32((rxcd->rxc_word3 & VMXNET3_RXC_TYPE_M) >>
+ VMXNET3_RXC_TYPE_S) == 4 &&
+ ISSET(rxcd->rxc_word0, VMXNET3_RXC_SOP)) {
+ m->m_pkthdr.ph_mss = letoh32(rxcd->rxc_word1 &
+ VMXNET3_RXC_SEG_CNT_M);
+ }
+
+ m->m_len = letoh32((rxcd->rxc_word2 >> VMXNET3_RXC_LEN_S) &
+ VMXNET3_RXC_LEN_M);
+
+ if (comp_ring->sendmp == NULL) {
+ comp_ring->sendmp = comp_ring->lastmp = m;
+ comp_ring->sendmp->m_pkthdr.len = 0;
+ } else {
+ CLR(m->m_flags, M_PKTHDR);
+ comp_ring->lastmp->m_next = m;
+ comp_ring->lastmp = m;
+ }
+ comp_ring->sendmp->m_pkthdr.len += m->m_len;
+
+ if (!ISSET(rxcd->rxc_word0, VMXNET3_RXC_EOP))
+ continue;
+
+ /*
+ * End of Packet
+ */
if (letoh32(rxcd->rxc_word2 & VMXNET3_RXC_ERROR)) {
ifp->if_ierrors++;
- m_freem(m);
+ m_freem(comp_ring->sendmp);
+ comp_ring->sendmp = comp_ring->lastmp = NULL;
continue;
}
- len = letoh32((rxcd->rxc_word2 >> VMXNET3_RXC_LEN_S) &
- VMXNET3_RXC_LEN_M);
- if (len < VMXNET3_MIN_MTU) {
- m_freem(m);
+ if (comp_ring->sendmp->m_pkthdr.len < VMXNET3_MIN_MTU) {
+ m_freem(comp_ring->sendmp);
+ comp_ring->sendmp = comp_ring->lastmp = NULL;
continue;
}
- m->m_pkthdr.len = m->m_len = len;
-
- vmxnet3_rx_offload(rxcd, m);
if (((letoh32(rxcd->rxc_word0) >> VMXNET3_RXC_RSSTYPE_S) &
VMXNET3_RXC_RSSTYPE_M) != VMXNET3_RXC_RSSTYPE_NONE) {
- m->m_pkthdr.ph_flowid = letoh32(rxcd->rxc_word1);
- SET(m->m_pkthdr.csum_flags, M_FLOWID);
+ comp_ring->sendmp->m_pkthdr.ph_flowid =
+ letoh32(rxcd->rxc_word1);
+ SET(comp_ring->sendmp->m_pkthdr.csum_flags, M_FLOWID);
}
- ml_enqueue(&ml, m);
+ vmxnet3_rx_offload(rxcd, comp_ring->sendmp);
+ ml_enqueue(&ml, comp_ring->sendmp);
+ comp_ring->sendmp = comp_ring->lastmp = NULL;
}
bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
@@ -1144,19 +1196,20 @@ vmxnet3_rxintr(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
comp_ring->next = next;
comp_ring->gen = rgen;
- if (done == 0)
- return;
+ for (int i = 0; i < 2; i++) {
+ if (done[i] == 0)
+ continue;
- ring = &rq->cmd_ring[0];
+ ring = &rq->cmd_ring[i];
- if (ifiq_input(rq->ifiq, &ml))
- if_rxr_livelocked(&ring->rxr);
+ if (ifiq_input(rq->ifiq, &ml))
+ if_rxr_livelocked(&ring->rxr);
- /* XXX Should we (try to) allocate buffers for ring 2 too? */
- mtx_enter(&ring->mtx);
- if_rxr_put(&ring->rxr, done);
- vmxnet3_rxfill(ring);
- mtx_leave(&ring->mtx);
+ mtx_enter(&ring->mtx);
+ if_rxr_put(&ring->rxr, done[i]);
+ vmxnet3_rxfill(ring);
+ mtx_leave(&ring->mtx);
+ }
}
void
@@ -1211,6 +1264,8 @@ vmxnet3_iff(struct vmxnet3_softc *sc)
void
vmxnet3_rx_offload(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
{
+ uint32_t pkts;
+
/*
* VLAN Offload
*/
@@ -1243,6 +1298,45 @@ vmxnet3_rx_offload(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
else if (ISSET(rxcd->rxc_word3, VMXNET3_RXC_UDP))
SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_IN_OK);
}
+
+ /*
+ * TCP Large Receive Offload
+ */
+
+ pkts = m->m_pkthdr.ph_mss;
+ m->m_pkthdr.ph_mss = 0;
+
+ if (pkts > 1) {
+ struct ether_extracted ext;
+ uint32_t paylen;
+
+ ether_extract_headers(m, &ext);
+
+ paylen = ext.iplen;
+ if (ext.ip4 || ext.ip6)
+ paylen -= ext.iphlen;
+
+ if (ext.tcp) {
+ paylen -= ext.tcphlen;
+ tcpstat_inc(tcps_inhwlro);
+ tcpstat_add(tcps_inpktlro, pkts);
+ } else {
+ tcpstat_inc(tcps_inbadlro);
+ }
+
+ /*
+ * If we gonna forward this packet, we have to mark it as TSO,
+ * set a correct mss, and recalculate the TCP checksum.
+ */
+ if (ext.tcp && paylen >= pkts) {
+ SET(m->m_pkthdr.csum_flags, M_TCP_TSO);
+ m->m_pkthdr.ph_mss = paylen / pkts;
+ }
+ if (ext.tcp &&
+ ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_IN_OK)) {
+ SET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT);
+ }
+ }
}
void
@@ -1309,6 +1403,13 @@ vmxnet3_init(struct vmxnet3_softc *sc)
return EIO;
}
+ /* TCP Large Receive Offload */
+ if (ISSET(ifp->if_xflags, IFXF_LRO))
+ SET(sc->sc_ds->upt_features, UPT1_F_LRO);
+ else
+ CLR(sc->sc_ds->upt_features, UPT1_F_LRO);
+ WRITE_CMD(sc, VMXNET3_CMD_SET_FEATURE);
+
/* Program promiscuous mode and multicast filters. */
vmxnet3_iff(sc);
diff --git a/sys/dev/pci/if_vmxreg.h b/sys/dev/pci/if_vmxreg.h
index 44f5e4315e3..a697856f6ee 100644
--- a/sys/dev/pci/if_vmxreg.h
+++ b/sys/dev/pci/if_vmxreg.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_vmxreg.h,v 1.9 2020/07/07 01:36:49 dlg Exp $ */
+/* $OpenBSD: if_vmxreg.h,v 1.10 2024/06/07 08:44:25 jan Exp $ */
/*
* Copyright (c) 2013 Tsubai Masanari
@@ -76,6 +76,7 @@ enum UPT1_RxStats {
#define VMXNET3_CMD_RESET 0xcafe0002 /* reset device */
#define VMXNET3_CMD_SET_RXMODE 0xcafe0003 /* set interface flags */
#define VMXNET3_CMD_SET_FILTER 0xcafe0004 /* set address filter */
+#define VMXNET3_CMD_SET_FEATURE 0xcafe0009 /* set features */
#define VMXNET3_CMD_GET_STATUS 0xf00d0000 /* get queue errors */
#define VMXNET3_CMD_GET_STATS 0xf00d0001
#define VMXNET3_CMD_GET_LINK 0xf00d0002 /* get link status */
@@ -189,6 +190,7 @@ struct vmxnet3_rxcompdesc {
u_int32_t rxc_word1;
#define VMXNET3_RXC_RSSHASH_M 0xffffffff /* RSS hash value */
#define VMXNET3_RXC_RSSHASH_S 0
+#define VMXNET3_RXC_SEG_CNT_M 0x000000ff /* No. of seg. in LRO pkt */
u_int32_t rxc_word2;
#define VMXNET3_RXC_LEN_M 0x00003fff
@@ -210,6 +212,7 @@ struct vmxnet3_rxcompdesc {
#define VMXNET3_RXC_FRAGMENT 0x00400000 /* IP fragment */
#define VMXNET3_RXC_FCS 0x00800000 /* frame CRC correct */
#define VMXNET3_RXC_TYPE_M 0x7f000000
+#define VMXNET3_RXC_TYPE_S 24
#define VMXNET3_RXC_GEN_M 0x00000001U
#define VMXNET3_RXC_GEN_S 31
} __packed;