/* $OpenBSD: uipc_mbuf.c,v 1.31 2001/05/20 08:31:46 angelos Exp $ */ /* $NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $ */ /* * Copyright (c) 1982, 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 */ /* %%% portions-copyright-nrl-95 Portions of this software are Copyright 1995-1998 by Randall Atkinson, Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights Reserved. All rights under this copyright have been assigned to the US Naval Research Laboratory (NRL). The NRL Copyright Notice and License Agreement Version 1.1 (January 17, 1995) applies to these portions of the software. You should have received a copy of the license with this software. If you didn't get a copy, you may request one from . */ #include #include #include #include #include #define MBTYPES #include #include #include #include #include #include #include #include #if defined(UVM) #include #endif struct pool mbpool; /* mbuf pool */ struct pool mclpool; /* mbuf cluster pool */ extern vm_map_t mb_map; struct mbuf *mbutl; int needqueuedrain; void *mclpool_alloc __P((unsigned long, int, int)); void mclpool_release __P((void *, unsigned long, int)); struct mbuf *m_copym0 __P((struct mbuf *, int, int, int, int)); const char *mclpool_warnmsg = "WARNING: mclpool limit reached; increase NMBCLUSTERS"; /* * Initialize the mbuf allcator. */ void mbinit() { pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", 0, NULL, NULL, 0); pool_init(&mclpool, MCLBYTES, 0, 0, 0, "mclpl", 0, mclpool_alloc, mclpool_release, 0); /* * Set the hard limit on the mclpool to the number of * mbuf clusters the kernel is to support. Log the limit * reached message max once a minute. */ pool_sethardlimit(&mclpool, nmbclusters, mclpool_warnmsg, 60); /* * Set a low water mark for both mbufs and clusters. This should * help ensure that they can be allocated in a memory starvation * situation. This is important for e.g. diskless systems which * must allocate mbufs in order for the pagedaemon to clean pages. */ pool_setlowat(&mbpool, mblowat); pool_setlowat(&mclpool, mcllowat); } void * mclpool_alloc(sz, flags, mtype) unsigned long sz; int flags; int mtype; { #if defined(UVM) boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; return ((void *)uvm_km_alloc_poolpage1(mb_map, uvmexp.mb_object, waitok)); #else return pool_page_alloc_nointr(sz, flags, mtype); #endif } void mclpool_release(v, sz, mtype) void *v; unsigned long sz; int mtype; { #if defined(UVM) uvm_km_free_poolpage1(mb_map, (vaddr_t)v); #else pool_page_free_nointr(v, sz, mtype); #endif } /* * When MGET failes, ask protocols to free space when short of memory, * then re-attempt to allocate an mbuf. */ struct mbuf * m_retry(i, t) int i, t; { register struct mbuf *m; if (i & M_DONTWAIT) { needqueuedrain = 1; setsoftnet(); return (NULL); } m_reclaim(); #define m_retry(i, t) NULL MGET(m, i, t); #undef m_retry if (m != NULL) mbstat.m_wait++; else mbstat.m_drops++; return (m); } /* * As above; retry an MGETHDR. */ struct mbuf * m_retryhdr(i, t) int i, t; { register struct mbuf *m; if (i & M_DONTWAIT) { needqueuedrain = 1; setsoftnet(); return (NULL); } m_reclaim(); #define m_retryhdr(i, t) NULL MGETHDR(m, i, t); #undef m_retryhdr if (m != NULL) mbstat.m_wait++; else mbstat.m_drops++; return (m); } void m_reclaim() { register struct domain *dp; register struct protosw *pr; int s = splimp(); needqueuedrain = 0; for (dp = domains; dp; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain) (*pr->pr_drain)(); splx(s); mbstat.m_drain++; } /* * Space allocation routines. * These are also available as macros * for critical paths. */ struct mbuf * m_get(nowait, type) int nowait, type; { register struct mbuf *m; MGET(m, nowait, type); return (m); } struct mbuf * m_gethdr(nowait, type) int nowait, type; { register struct mbuf *m; MGETHDR(m, nowait, type); return (m); } struct mbuf * m_getclr(nowait, type) int nowait, type; { register struct mbuf *m; MGET(m, nowait, type); if (m == NULL) return (NULL); memset(mtod(m, caddr_t), 0, MLEN); return (m); } struct mbuf * m_free(m) struct mbuf *m; { register struct mbuf *n; MFREE(m, n); return (n); } void m_freem(m) register struct mbuf *m; { register struct mbuf *n; if (m == NULL) return; do { MFREE(m, n); } while ((m = n) != NULL); } /* * Mbuffer utility routines. */ /* * Lesser-used path for M_PREPEND: * allocate new mbuf to prepend to chain, * copy junk along. */ struct mbuf * m_prepend(m, len, how) register struct mbuf *m; int len, how; { struct mbuf *mn; MGET(mn, how, m->m_type); if (mn == NULL) { m_freem(m); return (NULL); } if (m->m_flags & M_PKTHDR) { M_COPY_PKTHDR(mn, m); m->m_flags &= ~M_PKTHDR; TAILQ_INIT(&m->m_pkthdr.tags); } mn->m_next = m; m = mn; if (len < MHLEN) MH_ALIGN(m, len); m->m_len = len; return (m); } /* * Make a copy of an mbuf chain starting "off0" bytes from the beginning, * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. */ int MCFail; struct mbuf * m_copym(m, off0, len, wait) struct mbuf *m; int off0, wait; int len; { return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ } /* * m_copym2() is like m_copym(), except it COPIES cluster mbufs, instead * of merely bumping the reference count. */ struct mbuf * m_copym2(m, off0, len, wait) struct mbuf *m; int off0, wait; int len; { return m_copym0(m, off0, len, wait, 1); /* deep copy */ } struct mbuf * m_copym0(m, off0, len, wait, deep) struct mbuf *m; int off0, wait; int len; int deep; /* deep copy */ { struct mbuf *n, **np; int off = off0; struct mbuf *top; int copyhdr = 0; if (off < 0 || len < 0) panic("m_copym0: off %d, len %d", off, len); if (off == 0 && m->m_flags & M_PKTHDR) copyhdr = 1; while (off > 0) { if (m == 0) panic("m_copym0: null mbuf"); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } np = ⊤ top = 0; while (len > 0) { if (m == 0) { if (len != M_COPYALL) panic("m_copym0: m == 0 and not COPYALL"); break; } MGET(n, wait, m->m_type); *np = n; if (n == 0) goto nospace; if (copyhdr) { M_DUP_PKTHDR(n, m); if (len == M_COPYALL) n->m_pkthdr.len -= off0; else n->m_pkthdr.len = len; copyhdr = 0; } n->m_len = min(len, m->m_len - off); if (m->m_flags & M_EXT) { if (!deep) { n->m_data = m->m_data + off; n->m_ext = m->m_ext; MCLADDREFERENCE(m, n); } else { /* * we are unsure about the way m was allocated. * copy into multiple MCLBYTES cluster mbufs. */ MCLGET(n, wait); n->m_len = 0; n->m_len = M_TRAILINGSPACE(n); n->m_len = min(n->m_len, len); n->m_len = min(n->m_len, m->m_len - off); memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, (unsigned)n->m_len); } } else memcpy(mtod(n, caddr_t), mtod(m, caddr_t)+off, (unsigned)n->m_len); if (len != M_COPYALL) len -= n->m_len; off += n->m_len; #ifdef DIAGNOSTIC if (off > m->m_len) panic("m_copym0 overrun"); #endif if (off == m->m_len) { m = m->m_next; off = 0; } np = &n->m_next; } if (top == 0) MCFail++; return (top); nospace: m_freem(top); MCFail++; return (0); } /* * Copy data from an mbuf chain starting "off" bytes from the beginning, * continuing for "len" bytes, into the indicated buffer. */ void m_copydata(m, off, len, cp) register struct mbuf *m; register int off; register int len; caddr_t cp; { register unsigned count; if (off < 0) panic("m_copydata: off %d < 0", off); if (len < 0) panic("m_copydata: len %d < 0", len); while (off > 0) { if (m == NULL) panic("m_copydata: null mbuf in skip"); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } while (len > 0) { if (m == NULL) panic("m_copydata: null mbuf"); count = min(m->m_len - off, len); bcopy(mtod(m, caddr_t) + off, cp, count); len -= count; cp += count; off = 0; m = m->m_next; } } /* * Concatenate mbuf chain n to m. * Both chains must be of the same type (e.g. MT_DATA). * Any m_pkthdr is not updated. */ void m_cat(m, n) register struct mbuf *m, *n; { while (m->m_next) m = m->m_next; while (n) { if (m->m_flags & M_EXT || m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { /* just join the two chains */ m->m_next = n; return; } /* splat the data from one into the other */ bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, (u_int)n->m_len); m->m_len += n->m_len; n = m_free(n); } } void m_adj(mp, req_len) struct mbuf *mp; int req_len; { register int len = req_len; register struct mbuf *m; register int count; if ((m = mp) == NULL) return; if (len >= 0) { /* * Trim from head. */ while (m != NULL && len > 0) { if (m->m_len <= len) { len -= m->m_len; m->m_len = 0; m = m->m_next; } else { m->m_len -= len; m->m_data += len; len = 0; } } m = mp; if (mp->m_flags & M_PKTHDR) m->m_pkthdr.len -= (req_len - len); } else { /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ len = -len; count = 0; for (;;) { count += m->m_len; if (m->m_next == NULL) break; m = m->m_next; } if (m->m_len >= len) { m->m_len -= len; if (mp->m_flags & M_PKTHDR) mp->m_pkthdr.len -= len; return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ m = mp; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len = count; for (; m; m = m->m_next) { if (m->m_len >= count) { m->m_len = count; break; } count -= m->m_len; } while ((m = m->m_next) != NULL) m->m_len = 0; } } /* * Rearange an mbuf chain so that len bytes are contiguous * and in the data area of an mbuf (so that mtod and dtom * will work for a structure of size len). Returns the resulting * mbuf chain on success, frees it and returns null on failure. * If there is room, it will add up to max_protohdr-len extra bytes to the * contiguous region in an attempt to avoid being called next time. */ int MPFail; struct mbuf * m_pullup(n, len) register struct mbuf *n; int len; { register struct mbuf *m; register int count; int space; /* * If first mbuf has no cluster, and has room for len bytes * without shifting current data, pullup into it, * otherwise allocate a new mbuf to prepend to the chain. */ if ((n->m_flags & M_EXT) == 0 && n->m_data + len < &n->m_dat[MLEN] && n->m_next) { if (n->m_len >= len) return (n); m = n; n = n->m_next; len -= m->m_len; } else { if (len > MHLEN) goto bad; MGET(m, M_DONTWAIT, n->m_type); if (m == NULL) goto bad; m->m_len = 0; if (n->m_flags & M_PKTHDR) { M_COPY_PKTHDR(m, n); n->m_flags &= ~M_PKTHDR; TAILQ_INIT(&n->m_pkthdr.tags); } } space = &m->m_dat[MLEN] - (m->m_data + m->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, (unsigned)count); len -= count; m->m_len += count; n->m_len -= count; space -= count; if (n->m_len) n->m_data += count; else n = m_free(n); } while (len > 0 && n); if (len > 0) { (void)m_free(m); goto bad; } m->m_next = n; return (m); bad: m_freem(n); MPFail++; return (NULL); } /* * m_pullup2() works like m_pullup, save that len can be <= MCLBYTES. * m_pullup2() only works on values of len such that MHLEN < len <= MCLBYTES, * it calls m_pullup() for values <= MHLEN. It also only coagulates the * reqested number of bytes. (For those of us who expect unwieldly option * headers. * * KEBE SAYS: Remember that dtom() calls with data in clusters does not work! */ struct mbuf * m_pullup2(n, len) register struct mbuf *n; int len; { register struct mbuf *m; register int count; int space; if (len <= MHLEN) return m_pullup(n, len); if ((n->m_flags & M_EXT) != 0 && n->m_data + len < &n->m_data[MCLBYTES] && n->m_next) { if (n->m_len >= len) return (n); m = n; n = n->m_next; len -= m->m_len; } else { if (len > MCLBYTES) goto bad; MGET(m, M_DONTWAIT, n->m_type); if (m == NULL) goto bad; MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) goto bad; m->m_len = 0; if (n->m_flags & M_PKTHDR) { /* M_COPY_PKTHDR(m, n);*//* Too many adverse side effects. */ m->m_pkthdr = n->m_pkthdr; m->m_flags = (n->m_flags & M_COPYFLAGS) | M_EXT; n->m_flags &= ~M_PKTHDR; TAILQ_INIT(&n->m_pkthdr.tags); /* n->m_data is cool. */ } } do { count = min(len, n->m_len); bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, (unsigned)count); len -= count; m->m_len += count; n->m_len -= count; space -= count; if (n->m_len) n->m_data += count; else n = m_free(n); } while (len > 0 && n); if (len > 0) { (void)m_free(m); goto bad; } m->m_next = n; return (m); bad: m_freem(n); MPFail++; return (NULL); } /* * Return a pointer to mbuf/offset of location in mbuf chain. */ struct mbuf * m_getptr(m, loc, off) struct mbuf *m; int loc; int *off; { while (loc >= 0) { /* Normal end of search */ if (m->m_len > loc) { *off = loc; return (m); } else { loc -= m->m_len; if (m->m_next == NULL) { if (loc == 0) { /* Point at the end of valid data */ *off = m->m_len; return (m); } else return (NULL); } else m = m->m_next; } } return (NULL); } /* * Inject a new mbuf chain of length siz in mbuf chain m0 at * position len0. Returns a pointer to the first injected mbuf, or * NULL on failure (m0 is left undisturbed). Note that if there is * enough space for an object of size siz in the appropriate position, * no memory will be allocated. Also, there will be no data movement in * the first len0 bytes (pointers to that will remain valid). * * XXX It is assumed that siz is less than the size of an mbuf at the moment. */ struct mbuf * m_inject(m0, len0, siz, wait) register struct mbuf *m0; int len0, siz, wait; { register struct mbuf *m, *n, *n2 = NULL, *n3; unsigned len = len0, remain; if ((siz >= MHLEN) || (len0 <= 0)) return (NULL); for (m = m0; m && len > m->m_len; m = m->m_next) len -= m->m_len; if (m == NULL) return (NULL); remain = m->m_len - len; if (remain == 0) { if ((m->m_next) && (M_LEADINGSPACE(m->m_next) >= siz)) { m->m_next->m_len += siz; if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += siz; m->m_next->m_data -= siz; return m->m_next; } } else { n2 = m_copym2(m, len, remain, wait); if (n2 == NULL) return (NULL); } MGET(n, wait, MT_DATA); if (n == NULL) { if (n2) m_freem(n2); return (NULL); } n->m_len = siz; if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += siz; m->m_len -= remain; /* Trim */ if (n2) { for (n3 = n; n3->m_next != NULL; n3 = n3->m_next) ; n3->m_next = n2; } else n3 = n; for (; n3->m_next != NULL; n3 = n3->m_next) ; n3->m_next = m->m_next; m->m_next = n; return n; } /* * Partition an mbuf chain in two pieces, returning the tail -- * all but the first len0 bytes. In case of failure, it returns NULL and * attempts to restore the chain to its original state. */ struct mbuf * m_split(m0, len0, wait) register struct mbuf *m0; int len0, wait; { register struct mbuf *m, *n; unsigned len = len0, remain, olen; for (m = m0; m && len > m->m_len; m = m->m_next) len -= m->m_len; if (m == NULL) return (NULL); remain = m->m_len - len; if (m0->m_flags & M_PKTHDR) { MGETHDR(n, wait, m0->m_type); if (n == NULL) return (NULL); M_DUP_PKTHDR(n, m0); n->m_pkthdr.len -= len0; olen = m0->m_pkthdr.len; m0->m_pkthdr.len = len0; if (m->m_flags & M_EXT) goto extpacket; if (remain > MHLEN) { /* m can't be the lead packet */ MH_ALIGN(n, 0); n->m_next = m_split(m, len, wait); if (n->m_next == NULL) { (void) m_free(n); m0->m_pkthdr.len = olen; return (NULL); } else return (n); } else MH_ALIGN(n, remain); } else if (remain == 0) { n = m->m_next; m->m_next = NULL; return (n); } else { MGET(n, wait, m->m_type); if (n == NULL) return (NULL); M_ALIGN(n, remain); } extpacket: if (m->m_flags & M_EXT) { n->m_flags |= M_EXT; MCLADDREFERENCE(m, n); n->m_data = m->m_data + len; } else { bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); } n->m_len = remain; m->m_len = len; n->m_next = m->m_next; m->m_next = NULL; return (n); } /* * Routine to copy from device local memory into mbufs. */ struct mbuf * m_devget(buf, totlen, off0, ifp, copy) char *buf; int totlen, off0; struct ifnet *ifp; void (*copy) __P((const void *, void *, size_t)); { register struct mbuf *m; struct mbuf *top = NULL, **mp = ⊤ register int off = off0, len; register char *cp; char *epkt; cp = buf; epkt = cp + totlen; if (off) { /* * If 'off' is non-zero, packet is trailer-encapsulated, * so we have to skip the type and length fields. */ cp += off + 2 * sizeof(u_int16_t); totlen -= 2 * sizeof(u_int16_t); } MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return (NULL); m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = totlen; m->m_len = MHLEN; while (totlen > 0) { if (top != NULL) { MGET(m, M_DONTWAIT, MT_DATA); if (m == NULL) { m_freem(top); return (NULL); } m->m_len = MLEN; } len = min(totlen, epkt - cp); if (len >= MINCLSIZE) { MCLGET(m, M_DONTWAIT); if (m->m_flags & M_EXT) m->m_len = len = min(len, MCLBYTES); else len = m->m_len; } else { /* * Place initial small packet/header at end of mbuf. */ if (len < m->m_len) { if (top == NULL && len + max_linkhdr <= m->m_len) m->m_data += max_linkhdr; m->m_len = len; } else len = m->m_len; } if (copy) copy(cp, mtod(m, caddr_t), (size_t)len); else bcopy(cp, mtod(m, caddr_t), (size_t)len); cp += len; *mp = m; mp = &m->m_next; totlen -= len; if (cp == epkt) cp = buf; } return (top); } void m_zero(m) struct mbuf *m; { while (m) { if (m->m_flags & M_PKTHDR) memset((void *)m + sizeof(struct m_hdr) + sizeof(struct pkthdr), 0, MHLEN); else memset((void *)m + sizeof(struct m_hdr), 0, MLEN); if ((m->m_flags & M_EXT) && (m->m_ext.ext_free == NULL) && !MCLISREFERENCED(m)) memset(m->m_ext.ext_buf, 0, m->m_ext.ext_size); m = m->m_next; } } /* * Apply function f to the data in an mbuf chain starting "off" bytes from the * beginning, continuing for "len" bytes. */ int m_apply(m, off, len, f, fstate) struct mbuf *m; int off; int len; /* fstate, data, len */ int (*f)(caddr_t, caddr_t, unsigned int); caddr_t fstate; { int rval; unsigned int count; if (len < 0) panic("m_apply: len %d < 0", len); if (off < 0) panic("m_apply: off %d < 0", off); while (off > 0) { if (m == NULL) panic("m_apply: null mbuf in skip"); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } while (len > 0) { if (m == NULL) panic("m_apply: null mbuf"); count = min(m->m_len - off, len); rval = f(fstate, mtod(m, caddr_t) + off, count); if (rval) return (rval); len -= count; off = 0; m = m->m_next; } return (0); } #ifdef SMALL_KERNEL /* * The idea of adding code in a small kernel might look absurd, but this is * instead of macros. */ struct mbuf * _sk_mget(int how, int type) { struct mbuf *m; _MGET(m, how, type); return m; } struct mbuf * _sk_mgethdr(int how, int type) { struct mbuf *m; _MGETHDR(m, how, type); return m; } void _sk_mclget(struct mbuf *m, int how) { _MCLGET(m, how); } #endif /* SMALL_KERNEL */