summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThorsten Lockert <tholo@cvs.openbsd.org>1996-06-22 21:52:54 +0000
committerThorsten Lockert <tholo@cvs.openbsd.org>1996-06-22 21:52:54 +0000
commit24168a5e98557174d294e165aaac628d739c9505 (patch)
treedab3af5af757da3aa65d0c504db72d5a16034a91
parent49fdfe626f875ded43a575ebf180413fed9b3659 (diff)
Assembly version of in_cksum by Dave Richards. >20% faster in the usual
cases, up to 62% faster in other cases.
-rw-r--r--sys/arch/i386/conf/files.i3864
-rw-r--r--sys/arch/i386/i386/genassym.c9
-rw-r--r--sys/arch/i386/i386/in_cksum.c176
-rw-r--r--sys/arch/i386/i386/in_cksum.s231
4 files changed, 242 insertions, 178 deletions
diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386
index a2261b18849..4841180f478 100644
--- a/sys/arch/i386/conf/files.i386
+++ b/sys/arch/i386/conf/files.i386
@@ -1,4 +1,4 @@
-# $OpenBSD: files.i386,v 1.19 1996/06/16 10:33:20 deraadt Exp $
+# $OpenBSD: files.i386,v 1.20 1996/06/22 21:52:51 tholo Exp $
# $NetBSD: files.i386,v 1.73 1996/05/07 00:58:36 thorpej Exp $
#
# new style config file for i386 architecture
@@ -17,7 +17,7 @@ file arch/i386/i386/db_trace.c ddb
file arch/i386/i386/db_magic.s ddb
file arch/i386/i386/disksubr.c disk
file arch/i386/i386/gdt.c
-file arch/i386/i386/in_cksum.c inet
+file arch/i386/i386/in_cksum.s inet
file arch/i386/i386/ipx_cksum.c ipx
file arch/i386/i386/machdep.c
file arch/i386/i386/math_emulate.c math_emulate
diff --git a/sys/arch/i386/i386/genassym.c b/sys/arch/i386/i386/genassym.c
index 9e2b234fdfd..50cb71fd8d4 100644
--- a/sys/arch/i386/i386/genassym.c
+++ b/sys/arch/i386/i386/genassym.c
@@ -44,6 +44,9 @@
#include <sys/resourcevar.h>
#include <sys/device.h>
#include <sys/user.h>
+#ifdef INET
+#include <sys/mbuf.h>
+#endif
#include <vm/vm.h>
@@ -130,6 +133,12 @@ main()
off("SC_GS", struct sigcontext, sc_gs);
off("SC_EFLAGS", struct sigcontext, sc_eflags);
+#ifdef INET
+ off("M_NEXT", struct mbuf, m_next);
+ off("M_DATA", struct mbuf, m_data);
+ off("M_LEN", struct mbuf, m_len);
+#endif
+
#ifdef COMPAT_SVR4
off("SVR4_SIGF_HANDLER", struct svr4_sigframe, sf_handler);
off("SVR4_SIGF_UC", struct svr4_sigframe, sf_uc);
diff --git a/sys/arch/i386/i386/in_cksum.c b/sys/arch/i386/i386/in_cksum.c
deleted file mode 100644
index 91bfee78410..00000000000
--- a/sys/arch/i386/i386/in_cksum.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/* $NetBSD: in_cksum.c,v 1.9 1996/05/03 19:42:09 christos Exp $ */
-
-/*-
- * Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved.
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from tahoe: in_cksum.c 1.2 86/01/05
- * @(#)in_cksum.c 1.3 (Berkeley) 1/19/91
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/mbuf.h>
-#include <netinet/in.h>
-
-/*
- * Checksum routine for Internet Protocol family headers.
- *
- * This routine is very heavily used in the network
- * code and should be modified for each CPU to be as fast as possible.
- *
- * This implementation is 386 version.
- */
-
-#define REDUCE {sum = (sum & 0xffff) + (sum >> 16);}
-#define ADDCARRY {if (sum > 0xffff) sum -= 0xffff;}
-#define SWAP {sum <<= 8;}
-#define ADVANCE(x) {w += x; mlen -= x;}
-
-/*
- * Thanks to gcc we don't have to guess
- * which registers contain sum & w.
- */
-#define Asm __asm __volatile
-#define ADD(n) Asm("addl " #n "(%2),%0" : "=r" (sum) : "0" (sum), "r" (w))
-#define ADC(n) Asm("adcl " #n "(%2),%0" : "=r" (sum) : "0" (sum), "r" (w))
-#define MOP Asm("adcl $0,%0" : "=r" (sum) : "0" (sum))
-#define UNSWAP Asm("roll $8,%0" : "=r" (sum) : "0" (sum))
-#define ADDBYTE {sum += *w; SWAP; byte_swapped ^= 1;}
-#define ADDWORD {sum += *(u_short *)w;}
-
-int
-in_cksum(m, len)
- register struct mbuf *m;
- register int len;
-{
- register u_char *w;
- register unsigned sum = 0;
- register int mlen = 0;
- int byte_swapped = 0;
-
- for (; m && len; m = m->m_next) {
- mlen = m->m_len;
- if (mlen == 0)
- continue;
- w = mtod(m, u_char *);
- if (len < mlen)
- mlen = len;
- len -= mlen;
- if (mlen < 16)
- goto short_mbuf;
- /*
- * Force to long boundary so we do longword aligned
- * memory operations
- */
- if ((3 & (long)w) != 0) {
- REDUCE;
- if ((1 & (long)w) != 0) {
- ADDBYTE;
- ADVANCE(1);
- }
- if ((2 & (long)w) != 0) {
- ADDWORD;
- ADVANCE(2);
- }
- }
- /*
- * Align 4 bytes past a 16-byte cache line boundary.
- */
- if ((4 & (long)w) == 0) {
- ADD(0);
- MOP;
- ADVANCE(4);
- }
- if ((8 & (long)w) != 0) {
- ADD(0); ADC(4);
- MOP;
- ADVANCE(8);
- }
- /*
- * Do as much of the checksum as possible 32 bits at at time.
- * In fact, this loop is unrolled to make overhead from
- * branches &c small.
- */
- while ((mlen -= 32) >= 0) {
- /*
- * Add with carry 16 words and fold in the last carry
- * by adding a 0 with carry.
- *
- * We aligned the pointer above so that the out-of-
- * order operations will cause the next cache line to
- * be preloaded while we finish with the current one.
- */
- ADD(12); ADC(0); ADC(4); ADC(8);
- ADC(28); ADC(16); ADC(20); ADC(24);
- MOP;
- w += 32;
- }
- mlen += 32;
- if (mlen >= 16) {
- ADD(12); ADC(0); ADC(4); ADC(8);
- MOP;
- ADVANCE(16);
- }
- short_mbuf:
- if (mlen >= 8) {
- ADD(0); ADC(4);
- MOP;
- ADVANCE(8);
- }
- if (mlen >= 4) {
- ADD(0);
- MOP;
- ADVANCE(4);
- }
- if (mlen > 0) {
- REDUCE;
- if (mlen >= 2) {
- ADDWORD;
- ADVANCE(2);
- }
- if (mlen >= 1) {
- ADDBYTE;
- }
- }
- }
-
- if (len)
- printf("cksum: out of data\n");
- if (byte_swapped) {
- UNSWAP;
- }
- REDUCE;
- ADDCARRY;
- return (sum ^ 0xffff);
-}
-
diff --git a/sys/arch/i386/i386/in_cksum.s b/sys/arch/i386/i386/in_cksum.s
new file mode 100644
index 00000000000..1f31b50291f
--- /dev/null
+++ b/sys/arch/i386/i386/in_cksum.s
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 1996 Dave Richards <richards@zso.dec.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Dave Richards.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <machine/asm.h>
+#include "assym.h"
+
+ .text
+ENTRY(in_cksum)
+ pushl %ebp # save %ebp
+ pushl %ebx # save %ebx
+ pushl %esi # save %esi
+ pushl %edi # save %edi
+
+ movl 20(%esp), %ebp # %ebp := mp
+ movl 24(%esp), %edi # %edi := len
+ xorl %edx, %edx # %edx := 0
+ xorl %ecx, %ecx # %ecx := 0
+
+in_cksum1: orl %edi, %edi # if (%edi == 0)
+ je in_cksum47 # goto in_cksum47
+
+ orl %ebp, %ebp # if (%ebp == NULL)
+ je in_cksum49 # panic()
+
+ movl M_DATA(%ebp), %esi # %esi := %ebp->m_data
+ movl M_LEN(%ebp), %ebx # %ebx := %ebp->m_len
+ movl M_NEXT(%ebp), %ebp # %ebp := %ebp->m_next
+
+ cmpl %edi, %ebx # %ebx := min(%ebx, %edi)
+ jb in_cksum3 #
+ movl %edi, %ebx #
+
+in_cksum3: subl %ebx, %edi # %edi := %edi - %ebx
+
+ cmpl $4, %ebx # if (%ebx < 4)
+ jb in_cksum42a # goto in_cksum42a
+
+ movl $3, %eax # %eax := %esi & 3
+ andl %esi, %eax #
+ jmp *table1(,%eax,4) # switch (%eax)
+
+in_cksum4: # case 1:
+ roll $8, %edx # byte swap
+ xorb $8, %cl # re-align checksum
+ addb 0(%esi), %dh # checksum byte
+ leal -3(%ebx), %ebx # %ebx := %ebx - 3
+ adcw 1(%esi), %dx # checksum word
+ leal 3(%esi), %esi # %esi := %esi + 3
+ jmp in_cksum7 # break
+
+in_cksum5: # case 2:
+ addw 0(%esi), %dx # checksum word
+ leal 2(%esi), %esi # %esi := %esi + 2
+ leal -2(%ebx), %ebx # %ebx := %ebx - 2
+ jmp in_cksum7 # break
+
+in_cksum6: # case 3:
+ roll $8, %edx # byte swap
+ xorb $8, %cl # re-align checksum
+ addb 0(%esi), %dh # checksum byte
+ leal 1(%esi), %esi # %esi := %esi + 1
+ leal -1(%ebx), %ebx # %ebx := %ebx - 1
+
+in_cksum7: adcl $0, %edx # complete checksum
+
+in_cksum8: movb $3, %ch # %ch := %bl & 3
+ andb %bl, %ch #
+ shrl $2, %ebx # %ebx := %ebx / 4
+ je in_cksum42 # ig (%ebx == 0)
+ # goto in_cksum42
+
+in_cksum9: movl $31, %eax # %eax := %ebx & 31
+ andl %ebx, %eax #
+ leal (%esi,%eax,4), %esi # %esi := %esi + %eax * 4
+ jmp *table2(,%eax,4) # switch (%eax)
+
+in_cksum10: leal 128(%esi), %esi # Ugh!
+ movl $32, %eax # Ugh!
+ adcl -128(%esi), %edx # checksum 128 bytes
+in_cksum11: adcl -124(%esi), %edx # checksum 124 bytes
+in_cksum12: adcl -120(%esi), %edx # checksum 120 bytes
+in_cksum13: adcl -116(%esi), %edx # checksum 116 bytes
+in_cksum14: adcl -112(%esi), %edx # checksum 112 bytes
+in_cksum15: adcl -108(%esi), %edx # checksum 108 bytes
+in_cksum16: adcl -104(%esi), %edx # checksum 104 bytes
+in_cksum17: adcl -100(%esi), %edx # checksum 100 bytes
+in_cksum18: adcl -96(%esi), %edx # checksum 96 bytes
+in_cksum19: adcl -92(%esi), %edx # checksum 92 bytes
+in_cksum20: adcl -88(%esi), %edx # checksum 88 bytes
+in_cksum21: adcl -84(%esi), %edx # checksum 84 bytes
+in_cksum22: adcl -80(%esi), %edx # checksum 80 bytes
+in_cksum23: adcl -76(%esi), %edx # checksum 76 bytes
+in_cksum24: adcl -72(%esi), %edx # checksum 72 bytes
+in_cksum25: adcl -68(%esi), %edx # checksum 68 bytes
+in_cksum26: adcl -64(%esi), %edx # checksum 64 bytes
+in_cksum27: adcl -60(%esi), %edx # checksum 60 bytes
+in_cksum28: adcl -56(%esi), %edx # checksum 56 bytes
+in_cksum29: adcl -52(%esi), %edx # checksum 52 bytes
+in_cksum30: adcl -48(%esi), %edx # checksum 48 bytes
+in_cksum31: adcl -44(%esi), %edx # checksum 44 bytes
+in_cksum32: adcl -40(%esi), %edx # checksum 40 bytes
+in_cksum33: adcl -36(%esi), %edx # checksum 36 bytes
+in_cksum34: adcl -32(%esi), %edx # checksum 32 bytes
+in_cksum35: adcl -28(%esi), %edx # checksum 28 bytes
+in_cksum36: adcl -24(%esi), %edx # checksum 24 bytes
+in_cksum37: adcl -20(%esi), %edx # checksum 20 bytes
+in_cksum38: adcl -16(%esi), %edx # checksum 16 bytes
+in_cksum39: adcl -12(%esi), %edx # checksum 12 bytes
+in_cksum40: adcl -8(%esi), %edx # checksum 8 bytes
+in_cksum41: adcl -4(%esi), %edx # checksum 4 bytes
+ adcl $0, %edx # complete checksum
+
+ subl %eax, %ebx # %ebx := %ebx - %eax
+ jne in_cksum9 # if (%ebx != 0)
+ # goto in_cksum9
+
+in_cksum42: movb %ch, %bl # %ebx := byte count
+in_cksum42a: jmp *table3(,%ebx,4) # switch (%ebx)
+
+in_cksum43: # case 1:
+ roll $8, %edx # byte swap
+ xorb $8, %cl # re-align checksum
+ addb 0(%esi), %dh # checksum byte
+ jmp in_cksum46 # break
+
+in_cksum44: # case 2:
+ addw 0(%esi), %dx # checksum word
+ jmp in_cksum46 # break
+
+in_cksum45: # case 3:
+ xorb $8, %cl # re-align checksum
+ addw 0(%esi), %dx # checksum word
+ adcw $0, %dx # complete checksum
+ roll $8, %edx # byte swap
+ addb 2(%esi), %dh # checksum byte
+
+in_cksum46: adcl $0, %edx # complete checksum
+ jmp in_cksum1 # next mbuf
+
+in_cksum47: rorl %cl, %edx # re-align checksum
+ movzwl %dx, %eax # add uppwe and lowe words
+ shrl $16, %edx #
+ addw %dx, %ax #
+ adcw $0, %ax # complete checksum
+ notw %ax # compute ones complement
+
+in_cksum48: popl %edi # restore %edi
+ popl %esi # restore %esi
+ popl %ebx # restore %ebx
+ popl %ebp # restore %ebp
+ ret # return %eax
+
+in_cksum49: pushl panic # push panic string
+ call _panic # panic()
+ leal 4(%esp), %esp #
+ jmp in_cksum48 #
+
+ .data
+
+ .align 2
+
+table1: .long in_cksum8 # 4-byte aligned
+ .long in_cksum4 # checksum 3 bytes
+ .long in_cksum5 # checksum 2 bytes
+ .long in_cksum6 # checksum 1 byte
+
+table2: .long in_cksum10 # checksum 128 bytes
+ .long in_cksum41 # checksum 4 bytes
+ .long in_cksum40 # checksum 8 bytes
+ .long in_cksum39 # checksum 12 bytes
+ .long in_cksum38 # checksum 16 bytes
+ .long in_cksum37 # checksum 20 bytes
+ .long in_cksum36 # checksum 24 bytes
+ .long in_cksum35 # checksum 28 bytes
+ .long in_cksum34 # checksum 32 bytes
+ .long in_cksum33 # checksum 36 bytes
+ .long in_cksum32 # checksum 40 bytes
+ .long in_cksum31 # checksum 44 bytes
+ .long in_cksum30 # checksum 48 bytes
+ .long in_cksum29 # checksum 52 bytes
+ .long in_cksum28 # checksum 56 bytes
+ .long in_cksum27 # checksum 60 bytes
+ .long in_cksum26 # checksum 64 bytes
+ .long in_cksum25 # checksum 68 bytes
+ .long in_cksum24 # checksum 72 bytes
+ .long in_cksum23 # checksum 76 bytes
+ .long in_cksum22 # checksum 80 bytes
+ .long in_cksum21 # checksum 84 bytes
+ .long in_cksum20 # checksum 88 bytes
+ .long in_cksum19 # checksum 92 bytes
+ .long in_cksum18 # checksum 96 bytes
+ .long in_cksum17 # checksum 100 bytes
+ .long in_cksum16 # checksum 104 bytes
+ .long in_cksum15 # checksum 108 bytes
+ .long in_cksum14 # checksum 112 bytes
+ .long in_cksum13 # checksum 116 bytes
+ .long in_cksum12 # checksum 120 bytes
+ .long in_cksum11 # checksum 124 bytes
+
+table3: .long in_cksum1 # next mbuf
+ .long in_cksum43 # checksum 1 byte
+ .long in_cksum44 # checksum 2 bytes
+ .long in_cksum45 # checksum 3 bytes
+
+panic: .asciz "in_cksum: mp == NULL"