summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrad Smith <brad@cvs.openbsd.org>2005-05-02 02:39:46 +0000
committerBrad Smith <brad@cvs.openbsd.org>2005-05-02 02:39:46 +0000
commit64cb74f62cc1c60bccfa61b3d8384f2f134b918e (patch)
treeeb4ed1942848d70bfca088e06dee3f7e54ea5dc4
parent0802526e7cb653700e5ea22777cac297b8233c33 (diff)
New in{,4}_cksum that is between 1.5 and 5 times faster than the
old version depending on CPU type. From NetBSD ok drahn@
-rw-r--r--sys/arch/powerpc/conf/files.powerpc3
-rw-r--r--sys/arch/powerpc/powerpc/in_cksum.c293
2 files changed, 246 insertions, 50 deletions
diff --git a/sys/arch/powerpc/conf/files.powerpc b/sys/arch/powerpc/conf/files.powerpc
index 69a11f1b356..e3ef55bb758 100644
--- a/sys/arch/powerpc/conf/files.powerpc
+++ b/sys/arch/powerpc/conf/files.powerpc
@@ -1,4 +1,4 @@
-# $OpenBSD: files.powerpc,v 1.39 2005/05/01 21:36:57 brad Exp $
+# $OpenBSD: files.powerpc,v 1.40 2005/05/02 02:39:45 brad Exp $
#
file arch/powerpc/powerpc/setjmp.S ddb
@@ -7,7 +7,6 @@ file arch/powerpc/powerpc/bcopy.c
file arch/powerpc/powerpc/copystr.c
file arch/powerpc/powerpc/fpu.c
file arch/powerpc/powerpc/in_cksum.c inet
-file netinet/in4_cksum.c inet
file arch/powerpc/powerpc/pmap.c
file arch/powerpc/powerpc/process_machdep.c
file arch/powerpc/powerpc/sys_machdep.c
diff --git a/sys/arch/powerpc/powerpc/in_cksum.c b/sys/arch/powerpc/powerpc/in_cksum.c
index f6839e16da9..55fc17b4e05 100644
--- a/sys/arch/powerpc/powerpc/in_cksum.c
+++ b/sys/arch/powerpc/powerpc/in_cksum.c
@@ -1,11 +1,12 @@
-/* $OpenBSD: in_cksum.c,v 1.6 2003/10/15 02:43:09 drahn Exp $ */
-/* $NetBSD: in_cksum.c,v 1.1 1996/09/30 16:34:47 ws Exp $ */
+/* $OpenBSD: in_cksum.c,v 1.7 2005/05/02 02:39:45 brad Exp $ */
+/* $NetBSD: in_cksum.c,v 1.7 2003/07/15 02:54:48 lukem Exp $ */
/*
- * Copyright (C) 1995, 1996 Wolfgang Solfrank.
- * Copyright (C) 1995, 1996 TooLs GmbH.
+ * Copyright 2001 Wasabi Systems, Inc.
* All rights reserved.
*
+ * Written by Simon Burge and Eduardo Horvath for Wasabi Systems, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -16,70 +17,266 @@
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
- * This product includes software developed by TooLs GmbH.
- * 4. The name of TooLs GmbH may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
+ * This product includes software developed for the NetBSD Project by
+ * Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ * or promote products derived from this software without specific prior
+ * written permission.
*
- * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
*/
+
+#if 0
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: in_cksum.c,v 1.7 2003/07/15 02:54:48 lukem Exp $");
+#endif
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
+#include <sys/socketvar.h>
#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
/*
- * First cut for in_cksum.
- * This code is in C and should be optimized for PPC later.
+ * Checksum routine for Internet Protocol family headers.
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ *
+ * PowerPC version.
*/
-#define REDUCE (sum = (sum & 0xffff) + (sum >> 16))
-#define ROL (sum = sum << 8)
-#define ADDB (ROL, sum += *w, byte_swapped ^= 1)
-#define ADDS (sum += *(u_short *)w)
-#define SHIFT(n) (w += (n), mlen -= (n))
-#define ADDCARRY do { while (sum > 0xffff) REDUCE; } while (0)
-int
-in_cksum(struct mbuf *m, int len)
+#define REDUCE1 sum = (sum & 0xffff) + (sum >> 16)
+/* Two REDUCE1s is faster than REDUCE1; if (sum > 65535) sum -= 65536; */
+#define REDUCE { REDUCE1; REDUCE1; }
+
+static __inline__ int
+in_cksum_internal(struct mbuf *m, int off, int len, u_int sum)
{
- u_char *w;
- u_int sum = 0;
- int mlen;
+ uint8_t *w;
+ int mlen = 0;
int byte_swapped = 0;
-
- for (; m && len; m = m->m_next) {
+ int n;
+
+ union {
+ uint8_t c[2];
+ uint16_t s;
+ } s_util;
+
+ for (;m && len; m = m->m_next) {
if (m->m_len == 0)
continue;
- w = mtod(m, u_char *);
- mlen = m->m_len;
+ w = mtod(m, uint8_t *) + off;
+
+ /*
+ * 'off' can only be non-zero on the first pass of this
+ * loop when mlen != -1, so we don't need to worry about
+ * 'off' in the if clause below.
+ */
+ if (mlen == -1) {
+ /*
+ * The first byte of this mbuf is the continuation
+ * of a word spanning between this mbuf and the
+ * last mbuf.
+ *
+ * s_util.c[0] is already saved when scanning previous
+ * mbuf.
+ */
+ s_util.c[1] = *w++;
+ sum += s_util.s;
+ mlen = m->m_len - 1;
+ len--;
+ } else {
+ mlen = m->m_len - off;
+ off = 0;
+ }
if (len < mlen)
mlen = len;
len -= mlen;
- if ((long)w & 1) {
- REDUCE;
- ADDB;
- SHIFT(1);
+
+ /*
+ * Force to a word boundary.
+ */
+ if ((3 & (long) w) && (mlen > 0)) {
+ if ((1 & (long) w)) {
+ REDUCE;
+ sum <<= 8;
+ s_util.c[0] = *w++;
+ mlen--;
+ byte_swapped = 1;
+ }
+ if ((2 & (long) w) && (mlen > 1)) {
+ /*
+ * Since the `sum' may contain full 32 bit
+ * value, we can't simply add any value.
+ */
+ __asm __volatile(
+ "lhz 7,0(%1);" /* load current data
+ half word */
+ "addc %0,%0,7;" /* add to sum */
+ "addze %0,%0;" /* add carry bit */
+ : "+r"(sum)
+ : "b"(w)
+ : "7"); /* clobber r7 */
+ w += 2;
+ mlen -= 2;
+ }
+ }
+
+ if (mlen >= 64) {
+ n = mlen >> 6;
+ __asm __volatile(
+ "addic 0,0,0;" /* clear carry */
+ "mtctr %1;" /* load loop count */
+ "1:"
+ "lwz 7,4(%2);" /* load current data
+ word */
+ "lwz 8,8(%2);"
+ "lwz 9,12(%2);"
+ "lwz 10,16(%2);"
+ "adde %0,%0,7;" /* add to sum */
+ "adde %0,%0,8;"
+ "adde %0,%0,9;"
+ "adde %0,%0,10;"
+ "lwz 7,20(%2);"
+ "lwz 8,24(%2);"
+ "lwz 9,28(%2);"
+ "lwz 10,32(%2);"
+ "adde %0,%0,7;"
+ "adde %0,%0,8;"
+ "adde %0,%0,9;"
+ "adde %0,%0,10;"
+ "lwz 7,36(%2);"
+ "lwz 8,40(%2);"
+ "lwz 9,44(%2);"
+ "lwz 10,48(%2);"
+ "adde %0,%0,7;"
+ "adde %0,%0,8;"
+ "adde %0,%0,9;"
+ "adde %0,%0,10;"
+ "lwz 7,52(%2);"
+ "lwz 8,56(%2);"
+ "lwz 9,60(%2);"
+ "lwzu 10,64(%2);"
+ "adde %0,%0,7;"
+ "adde %0,%0,8;"
+ "adde %0,%0,9;"
+ "adde %0,%0,10;"
+ "bdnz 1b;" /* loop */
+ "addze %0,%0;" /* add carry bit */
+ : "+r"(sum)
+ : "r"(n), "b"(w - 4)
+ : "7", "8", "9", "10"); /* clobber r7, r8, r9,
+ r10 */
+ w += n * 64;
+ mlen -= n * 64;
}
- while (mlen >= 2) {
- ADDS;
- SHIFT(2);
+
+ if (mlen >= 8) {
+ n = mlen >> 3;
+ __asm __volatile(
+ "addic 0,0,0;" /* clear carry */
+ "mtctr %1;" /* load loop count */
+ "1:"
+ "lwz 7,4(%2);" /* load current data
+ word */
+ "lwzu 8,8(%2);"
+ "adde %0,%0,7;" /* add to sum */
+ "adde %0,%0,8;"
+ "bdnz 1b;" /* loop */
+ "addze %0,%0;" /* add carry bit */
+ : "+r"(sum)
+ : "r"(n), "b"(w - 4)
+ : "7", "8"); /* clobber r7, r8 */
+ w += n * 8;
+ mlen -= n * 8;
}
+
+ if (mlen == 0 && byte_swapped == 0)
+ continue;
REDUCE;
- if (mlen == 1)
- ADDB;
+
+ while ((mlen -= 2) >= 0) {
+ sum += *(uint16_t *)w;
+ w += 2;
+ }
+
+ if (byte_swapped) {
+ REDUCE;
+ sum <<= 8;
+ byte_swapped = 0;
+ if (mlen == -1) {
+ s_util.c[1] = *w;
+ sum += s_util.s;
+ mlen = 0;
+ } else
+ mlen = -1;
+ } else if (mlen == -1)
+ s_util.c[0] = *w;
}
- if (byte_swapped) {
- REDUCE;
- ROL;
+ if (len)
+ printf("cksum: out of data\n");
+ if (mlen == -1) {
+ /* The last mbuf has odd # of bytes. Follow the
+ standard (the odd byte may be shifted left by 8 bits
+ or not as determined by endian-ness of the machine) */
+ s_util.c[1] = 0;
+ sum += s_util.s;
+ }
+ REDUCE;
+ return (~sum & 0xffff);
+}
+
+int
+in_cksum(struct mbuf *m, int len)
+{
+
+ return (in_cksum_internal(m, 0, len, 0));
+}
+
+int
+in4_cksum(struct mbuf *m, uint8_t nxt, int off, int len)
+{
+ uint16_t *w;
+ u_int sum = 0;
+ union {
+ struct ipovly ipov;
+ u_int16_t w[10];
+ } u;
+
+ if (nxt != 0) {
+ /* pseudo header */
+ memset(&u.ipov, 0, sizeof(u.ipov));
+ u.ipov.ih_len = htons(len);
+ u.ipov.ih_pr = nxt;
+ u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
+ u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
+ w = u.w;
+ /* assumes sizeof(ipov) == 20 */
+ sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
+ sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
}
- ADDCARRY;
- return sum ^ 0xffff;
+
+ /* skip unnecessary part */
+ while (m && off > 0) {
+ if (m->m_len > off)
+ break;
+ off -= m->m_len;
+ m = m->m_next;
+ }
+
+ return (in_cksum_internal(m, off, len, sum));
}