diff options
author | Brad Smith <brad@cvs.openbsd.org> | 2005-05-21 19:13:56 +0000 |
---|---|---|
committer | Brad Smith <brad@cvs.openbsd.org> | 2005-05-21 19:13:56 +0000 |
commit | 0196610a51a1adbf3128c47f06d926fa8e0b217a (patch) | |
tree | 149a5f4779b848ee70160438b7b3101d0f6c99c0 /sys/arch/i386 | |
parent | daf5e16c9c0498069e4e5e5e4b9d51be1d3c6aed (diff) |
add i386 optimized in4_cksum
From NetBSD
ok deraadt@
Diffstat (limited to 'sys/arch/i386')
-rw-r--r-- | sys/arch/i386/conf/files.i386 | 3 | ||||
-rw-r--r-- | sys/arch/i386/i386/genassym.cf | 10 | ||||
-rw-r--r-- | sys/arch/i386/i386/in_cksum.s | 566 |
3 files changed, 361 insertions, 218 deletions
diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386 index b946f3f3bfe..c92ad29ca6c 100644 --- a/sys/arch/i386/conf/files.i386 +++ b/sys/arch/i386/conf/files.i386 @@ -1,4 +1,4 @@ -# $OpenBSD: files.i386,v 1.133 2005/05/01 21:36:56 brad Exp $ +# $OpenBSD: files.i386,v 1.134 2005/05/21 19:13:55 brad Exp $ # # new style config file for i386 architecture # @@ -21,7 +21,6 @@ file arch/i386/i386/disksubr.c disk file arch/i386/i386/est.c !small_kernel & i686_cpu file arch/i386/i386/gdt.c file arch/i386/i386/in_cksum.s inet -file netinet/in4_cksum.c inet file arch/i386/i386/ipx_cksum.c ipx file arch/i386/i386/machdep.c file arch/i386/i386/via.c i686_cpu diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf index 13147e84556..95c33aa1d58 100644 --- a/sys/arch/i386/i386/genassym.cf +++ b/sys/arch/i386/i386/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.19 2004/12/24 21:22:00 pvalchev Exp $ +# $OpenBSD: genassym.cf,v 1.20 2005/05/21 19:13:55 brad Exp $ # # Copyright (c) 1982, 1990 The Regents of the University of California. # All rights reserved. @@ -37,6 +37,11 @@ include <sys/device.h> include <sys/user.h> ifdef INET include <sys/mbuf.h> +include <sys/socketvar.h> +include <netinet/in.h> +include <netinet/in_systm.h> +include <netinet/ip.h> +include <netinet/ip_var.h> endif include <uvm/uvm_extern.h> @@ -176,6 +181,9 @@ member mtx_wantipl member mtx_oldipl member mtx_owner +define IP_SRC offsetof(struct ip, ip_src) +define IP_DST offsetof(struct ip, ip_dst) + define P_MD_TSS_SEL offsetof(struct proc, p_md.md_tss_sel) define CPU_INFO_SELF offsetof(struct cpu_info, ci_self) diff --git a/sys/arch/i386/i386/in_cksum.s b/sys/arch/i386/i386/in_cksum.s index 9446b3e0726..c1e55e8ea33 100644 --- a/sys/arch/i386/i386/in_cksum.s +++ b/sys/arch/i386/i386/in_cksum.s @@ -1,9 +1,13 @@ -/* $OpenBSD: in_cksum.s,v 1.6 2003/04/17 03:42:14 drahn Exp $ */ +/* $OpenBSD: in_cksum.s,v 1.7 2005/05/21 19:13:55 brad Exp $ */ +/* $NetBSD: in_cksum.S,v 1.2 2003/08/07 16:27:54 agc Exp $ */ -/* - * Copyright (c) 1996 Dave Richards <richards@zso.dec.com> +/*- + * Copyright (c) 1998, 2001 The NetBSD Foundation, Inc. * All rights reserved. * + * This code is derived from software contributed to The NetBSD Foundation + * by Charles M. Hannum. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -14,221 +18,353 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by Dave Richards. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + #include <machine/asm.h> #include "assym.h" - .text +/* LINTSTUB: include <sys/types.h> */ +/* LINTSTUB: include <machine/param.h> */ +/* LINTSTUB: include <sys/mbuf.h> */ +/* LINTSTUB: include <netinet/in.h> */ + +/* + * Checksum routine for Internet Protocol family headers. + * + * in_cksum(m, len) + * + * Registers used: + * %eax = sum + * %ebx = m->m_data + * %cl = rotation count to unswap + * %edx = m->m_len + * %ebp = m + * %esi = len + */ + +#define SWAP \ + roll $8, %eax ; \ + xorb $8, %cl + +#define UNSWAP \ + roll %cl, %eax + +#define MOP \ + adcl $0, %eax + +#define ADVANCE(n) \ + leal n(%ebx), %ebx ; \ + leal -n(%edx), %edx ; \ + +#define ADDBYTE \ + SWAP ; \ + addb (%ebx), %ah + +#define ADDWORD \ + addw (%ebx), %ax + +#define ADD(n) \ + addl n(%ebx), %eax + +#define ADC(n) \ + adcl n(%ebx), %eax + +#define REDUCE \ + movzwl %ax, %edx ; \ + shrl $16, %eax ; \ + addw %dx, %ax ; \ + adcw $0, %ax + + +/* LINTSTUB: Func: int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len) */ +ENTRY(in4_cksum) + pushl %ebp + pushl %ebx + pushl %esi + + movl 16(%esp), %ebp + movzbl 20(%esp), %eax /* sum = nxt */ + movl 24(%esp), %edx /* %edx = off */ + movl 28(%esp), %esi /* %esi = len */ + testl %eax, %eax + jz .Lmbuf_loop_0 /* skip if nxt == 0 */ + movl M_DATA(%ebp), %ebx + addl %esi, %eax /* sum += len */ + shll $8, %eax /* sum = htons(sum) */ + + ADD(IP_SRC) /* sum += ip->ip_src */ + ADC(IP_DST) /* sum += ip->ip_dst */ + MOP +.Lmbuf_loop_0: + testl %ebp, %ebp + jz .Lout_of_mbufs + + movl M_DATA(%ebp), %ebx /* %ebx = m_data */ + movl M_LEN(%ebp), %ecx /* %ecx = m_len */ + movl M_NEXT(%ebp), %ebp + + subl %ecx, %edx /* %edx = off - m_len */ + jnb .Lmbuf_loop_0 + + addl %edx, %ebx /* %ebx = m_data + off - m_len */ + negl %edx /* %edx = m_len - off */ + addl %ecx, %ebx /* %ebx = m_data + off */ + xorb %cl, %cl + + /* + * The len == 0 case is handled really inefficiently, by going through + * the whole short_mbuf path once to get back to mbuf_loop_1 -- but + * this case never happens in practice, so it's sufficient that it + * doesn't explode. + */ + jmp .Lin4_entry + + +/* LINTSTUB: Func: int in_cksum(struct mbuf *m, int len) */ ENTRY(in_cksum) - pushl %ebp # save %ebp - pushl %ebx # save %ebx - pushl %esi # save %esi - pushl %edi # save %edi - - movl 20(%esp), %ebp # %ebp := mp - movl 24(%esp), %edi # %edi := len - xorl %edx, %edx # %edx := 0 - xorl %ecx, %ecx # %ecx := 0 - -in_cksum1: orl %edi, %edi # if (%edi == 0) - je in_cksum47 # goto in_cksum47 - - orl %ebp, %ebp # if (%ebp == NULL) - je in_cksum49 # panic() - - movl M_DATA(%ebp), %esi # %esi := %ebp->m_data - movl M_LEN(%ebp), %ebx # %ebx := %ebp->m_len - movl M_NEXT(%ebp), %ebp # %ebp := %ebp->m_next - - cmpl %edi, %ebx # %ebx := min(%ebx, %edi) - jb in_cksum3 # - movl %edi, %ebx # - -in_cksum3: subl %ebx, %edi # %edi := %edi - %ebx - - cmpl $4, %ebx # if (%ebx < 4) - jb in_cksum42a # goto in_cksum42a - - movl $3, %eax # %eax := %esi & 3 - andl %esi, %eax # - jmp *table1(,%eax,4) # switch (%eax) - -in_cksum4: # case 1: - roll $8, %edx # byte swap - xorb $8, %cl # re-align checksum - addb 0(%esi), %dh # checksum byte - leal -3(%ebx), %ebx # %ebx := %ebx - 3 - adcw 1(%esi), %dx # checksum word - leal 3(%esi), %esi # %esi := %esi + 3 - jmp in_cksum7 # break - -in_cksum5: # case 2: - addw 0(%esi), %dx # checksum word - leal 2(%esi), %esi # %esi := %esi + 2 - leal -2(%ebx), %ebx # %ebx := %ebx - 2 - jmp in_cksum7 # break - -in_cksum6: # case 3: - roll $8, %edx # byte swap - xorb $8, %cl # re-align checksum - addb 0(%esi), %dh # checksum byte - leal 1(%esi), %esi # %esi := %esi + 1 - leal -1(%ebx), %ebx # %ebx := %ebx - 1 - -in_cksum7: adcl $0, %edx # complete checksum - -in_cksum8: movb $3, %ch # %ch := %bl & 3 - andb %bl, %ch # - shrl $2, %ebx # %ebx := %ebx / 4 - je in_cksum42 # ig (%ebx == 0) - # goto in_cksum42 - -in_cksum9: movl $31, %eax # %eax := %ebx & 31 - andl %ebx, %eax # - leal (%esi,%eax,4), %esi # %esi := %esi + %eax * 4 - jmp *table2(,%eax,4) # switch (%eax) - -in_cksum10: leal 128(%esi), %esi # Ugh! - movl $32, %eax # Ugh! - adcl -128(%esi), %edx # checksum 128 bytes -in_cksum11: adcl -124(%esi), %edx # checksum 124 bytes -in_cksum12: adcl -120(%esi), %edx # checksum 120 bytes -in_cksum13: adcl -116(%esi), %edx # checksum 116 bytes -in_cksum14: adcl -112(%esi), %edx # checksum 112 bytes -in_cksum15: adcl -108(%esi), %edx # checksum 108 bytes -in_cksum16: adcl -104(%esi), %edx # checksum 104 bytes -in_cksum17: adcl -100(%esi), %edx # checksum 100 bytes -in_cksum18: adcl -96(%esi), %edx # checksum 96 bytes -in_cksum19: adcl -92(%esi), %edx # checksum 92 bytes -in_cksum20: adcl -88(%esi), %edx # checksum 88 bytes -in_cksum21: adcl -84(%esi), %edx # checksum 84 bytes -in_cksum22: adcl -80(%esi), %edx # checksum 80 bytes -in_cksum23: adcl -76(%esi), %edx # checksum 76 bytes -in_cksum24: adcl -72(%esi), %edx # checksum 72 bytes -in_cksum25: adcl -68(%esi), %edx # checksum 68 bytes -in_cksum26: adcl -64(%esi), %edx # checksum 64 bytes -in_cksum27: adcl -60(%esi), %edx # checksum 60 bytes -in_cksum28: adcl -56(%esi), %edx # checksum 56 bytes -in_cksum29: adcl -52(%esi), %edx # checksum 52 bytes -in_cksum30: adcl -48(%esi), %edx # checksum 48 bytes -in_cksum31: adcl -44(%esi), %edx # checksum 44 bytes -in_cksum32: adcl -40(%esi), %edx # checksum 40 bytes -in_cksum33: adcl -36(%esi), %edx # checksum 36 bytes -in_cksum34: adcl -32(%esi), %edx # checksum 32 bytes -in_cksum35: adcl -28(%esi), %edx # checksum 28 bytes -in_cksum36: adcl -24(%esi), %edx # checksum 24 bytes -in_cksum37: adcl -20(%esi), %edx # checksum 20 bytes -in_cksum38: adcl -16(%esi), %edx # checksum 16 bytes -in_cksum39: adcl -12(%esi), %edx # checksum 12 bytes -in_cksum40: adcl -8(%esi), %edx # checksum 8 bytes -in_cksum41: adcl -4(%esi), %edx # checksum 4 bytes - adcl $0, %edx # complete checksum - - subl %eax, %ebx # %ebx := %ebx - %eax - jne in_cksum9 # if (%ebx != 0) - # goto in_cksum9 - -in_cksum42: movb %ch, %bl # %ebx := byte count -in_cksum42a: jmp *table3(,%ebx,4) # switch (%ebx) - -in_cksum43: # case 1: - roll $8, %edx # byte swap - xorb $8, %cl # re-align checksum - addb 0(%esi), %dh # checksum byte - jmp in_cksum46 # break - -in_cksum44: # case 2: - addw 0(%esi), %dx # checksum word - jmp in_cksum46 # break - -in_cksum45: # case 3: - xorb $8, %cl # re-align checksum - addw 0(%esi), %dx # checksum word - adcw $0, %dx # complete checksum - roll $8, %edx # byte swap - addb 2(%esi), %dh # checksum byte - -in_cksum46: adcl $0, %edx # complete checksum - jmp in_cksum1 # next mbuf - -in_cksum47: rorl %cl, %edx # re-align checksum - movzwl %dx, %eax # add uppwe and lowe words - shrl $16, %edx # - addw %dx, %ax # - adcw $0, %ax # complete checksum - notw %ax # compute ones complement - -in_cksum48: popl %edi # restore %edi - popl %esi # restore %esi - popl %ebx # restore %ebx - popl %ebp # restore %ebp - ret # return %eax - -in_cksum49: pushl %edi # len - bytes checksummed - pushl $warning # push warning string - call _C_LABEL(printf) # printf() - leal 8(%esp), %esp # - jmp in_cksum48 # - - .data - - .align 4 - -table1: .long in_cksum8 # 4-byte aligned - .long in_cksum4 # checksum 3 bytes - .long in_cksum5 # checksum 2 bytes - .long in_cksum6 # checksum 1 byte - -table2: .long in_cksum10 # checksum 128 bytes - .long in_cksum41 # checksum 4 bytes - .long in_cksum40 # checksum 8 bytes - .long in_cksum39 # checksum 12 bytes - .long in_cksum38 # checksum 16 bytes - .long in_cksum37 # checksum 20 bytes - .long in_cksum36 # checksum 24 bytes - .long in_cksum35 # checksum 28 bytes - .long in_cksum34 # checksum 32 bytes - .long in_cksum33 # checksum 36 bytes - .long in_cksum32 # checksum 40 bytes - .long in_cksum31 # checksum 44 bytes - .long in_cksum30 # checksum 48 bytes - .long in_cksum29 # checksum 52 bytes - .long in_cksum28 # checksum 56 bytes - .long in_cksum27 # checksum 60 bytes - .long in_cksum26 # checksum 64 bytes - .long in_cksum25 # checksum 68 bytes - .long in_cksum24 # checksum 72 bytes - .long in_cksum23 # checksum 76 bytes - .long in_cksum22 # checksum 80 bytes - .long in_cksum21 # checksum 84 bytes - .long in_cksum20 # checksum 88 bytes - .long in_cksum19 # checksum 92 bytes - .long in_cksum18 # checksum 96 bytes - .long in_cksum17 # checksum 100 bytes - .long in_cksum16 # checksum 104 bytes - .long in_cksum15 # checksum 108 bytes - .long in_cksum14 # checksum 112 bytes - .long in_cksum13 # checksum 116 bytes - .long in_cksum12 # checksum 120 bytes - .long in_cksum11 # checksum 124 bytes - -table3: .long in_cksum1 # next mbuf - .long in_cksum43 # checksum 1 byte - .long in_cksum44 # checksum 2 bytes - .long in_cksum45 # checksum 3 bytes - -warning: .asciz "in_cksum: out of data by %u\n" + pushl %ebp + pushl %ebx + pushl %esi + + movl 16(%esp), %ebp + movl 20(%esp), %esi + xorl %eax, %eax + xorb %cl, %cl + +.Lmbuf_loop_1: + testl %esi, %esi + jz .Ldone + +.Lmbuf_loop_2: + testl %ebp, %ebp + jz .Lout_of_mbufs + + movl M_DATA(%ebp), %ebx + movl M_LEN(%ebp), %edx + movl M_NEXT(%ebp), %ebp + +.Lin4_entry: + cmpl %esi, %edx + jbe 1f + movl %esi, %edx + +1: + subl %edx, %esi + + cmpl $32, %edx + jb .Lshort_mbuf + + testb $3, %bl + jz .Ldword_aligned + + testb $1, %bl + jz .Lbyte_aligned + + ADDBYTE + ADVANCE(1) + MOP + + testb $2, %bl + jz .Lword_aligned + +.Lbyte_aligned: + ADDWORD + ADVANCE(2) + MOP + +.Lword_aligned: +.Ldword_aligned: + testb $4, %bl + jnz .Lqword_aligned + + ADD(0) + ADVANCE(4) + MOP + +.Lqword_aligned: + testb $8, %bl + jz .Loword_aligned + + ADD(0) + ADC(4) + ADVANCE(8) + MOP + +.Loword_aligned: + subl $128, %edx + jb .Lfinished_128 + +.Lloop_128: + ADD(12) + ADC(0) + ADC(4) + ADC(8) + ADC(28) + ADC(16) + ADC(20) + ADC(24) + ADC(44) + ADC(32) + ADC(36) + ADC(40) + ADC(60) + ADC(48) + ADC(52) + ADC(56) + ADC(76) + ADC(64) + ADC(68) + ADC(72) + ADC(92) + ADC(80) + ADC(84) + ADC(88) + ADC(108) + ADC(96) + ADC(100) + ADC(104) + ADC(124) + ADC(112) + ADC(116) + ADC(120) + leal 128(%ebx), %ebx + MOP + + subl $128, %edx + jnb .Lloop_128 + +.Lfinished_128: + subl $32-128, %edx + jb .Lfinished_32 + +.Lloop_32: + ADD(12) + ADC(0) + ADC(4) + ADC(8) + ADC(28) + ADC(16) + ADC(20) + ADC(24) + leal 32(%ebx), %ebx + MOP + + subl $32, %edx + jnb .Lloop_32 + +.Lfinished_32: +.Lshort_mbuf: + testb $16, %dl + jz .Lfinished_16 + + ADD(12) + ADC(0) + ADC(4) + ADC(8) + leal 16(%ebx), %ebx + MOP + +.Lfinished_16: + testb $8, %dl + jz .Lfinished_8 + + ADD(0) + ADC(4) + leal 8(%ebx), %ebx + MOP + +.Lfinished_8: + testb $4, %dl + jz .Lfinished_4 + + ADD(0) + leal 4(%ebx), %ebx + MOP + +.Lfinished_4: + testb $3, %dl + jz .Lmbuf_loop_1 + + testb $2, %dl + jz .Lfinished_2 + + ADDWORD + leal 2(%ebx), %ebx + MOP + + testb $1, %dl + jz .Lfinished_1 + +.Lfinished_2: + ADDBYTE + MOP + +.Lfinished_1: +.Lmbuf_done: + testl %esi, %esi + jnz .Lmbuf_loop_2 + +.Ldone: + UNSWAP + REDUCE + notw %ax + +.Lreturn: + popl %esi + popl %ebx + popl %ebp + ret + +.Lout_of_mbufs: + pushl $1f + call _C_LABEL(printf) + leal 4(%esp), %esp + jmp .Lreturn +1: + .asciz "cksum: out of data\n" |