patch-1.3.93 linux/include/asm-sparc/checksum.h
Next file: linux/include/asm-sparc/cypress.h
Previous file: linux/include/asm-sparc/cache.h
Back to the patch index
Back to the overall index
- Lines: 301
- Date:
Sun Apr 21 12:30:33 1996
- Orig file:
v1.3.92/linux/include/asm-sparc/checksum.h
- Orig date:
Sat Nov 25 04:31:24 1995
diff -u --recursive --new-file v1.3.92/linux/include/asm-sparc/checksum.h linux/include/asm-sparc/checksum.h
@@ -1,4 +1,4 @@
-/* $Id: checksum.h,v 1.10 1995/11/25 02:31:23 davem Exp $ */
+/* $Id: checksum.h,v 1.13 1996/04/18 03:30:19 davem Exp $ */
#ifndef __SPARC_CHECKSUM_H
#define __SPARC_CHECKSUM_H
@@ -6,23 +6,67 @@
*
* Copyright(C) 1995 Linus Torvalds
* Copyright(C) 1995 Miguel de Icaza
+ * Copyright(C) 1996 David S. Miller
+ *
+ * derived from:
+ * Alpha checksum c-code
+ * ix86 inline assembly
*/
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+
+extern inline unsigned short csum_tcpudp_magic(unsigned long saddr,
+ unsigned long daddr,
+ unsigned short len,
+ unsigned short proto,
+ unsigned int sum)
+{
+ __asm__ __volatile__("
+ addcc %0, %1, %0
+ addxcc %0, %4, %0
+ addxcc %0, %5, %0
+ addx %0, %%g0, %0
+
+ ! We need the carry from the addition of 16-bit
+ ! significant addition, so we zap out the low bits
+ ! in one half, zap out the high bits in another,
+ ! shift them both up to the top 16-bits of a word
+ ! and do the carry producing addition, finally
+ ! shift the result back down to the low 16-bits.
+
+ ! Actually, we can further optimize away two shifts
+ ! because we know the low bits of the original
+ ! value will be added to zero-only bits so cannot
+ ! affect the addition result nor the final carry
+ ! bit.
+
+ sll %0, 16, %1
+ addcc %0, %1, %0 ! add and set carry, neat eh?
+ srl %0, 16, %0 ! shift back down the result
+ addx %0, %%g0, %0 ! get remaining carry bit
+ xnor %%g0, %0, %0 ! negate, sparc is cool
+ "
+ : "=&r" (sum), "=&r" (saddr)
+ : "0" (daddr), "1" (saddr), "r" (len+proto), "r" (sum));
+ return ((unsigned short) sum);
+}
-/* 32 bits version of the checksum routines written for the Alpha by Linus */
extern inline unsigned short from32to16(unsigned long x)
{
- /* add up 16-bit and 17-bit words for 17+c bits */
- x = (x & 0xffff) + (x >> 16);
- /* add up 16-bit and 2-bit for 16+c bit */
- x = (x & 0xffff) + (x >> 16);
- /* add up carry.. */
- x = (x & 0xffff) + (x >> 16);
+ __asm__ __volatile__("
+ addcc %0, %1, %0
+ srl %0, 16, %0
+ addx %%g0, %0, %0
+ "
+ : "=r" (x)
+ : "r" (x << 16), "0" (x));
return x;
}
-extern inline unsigned long
-do_csum(unsigned char * buff, int len)
+extern inline unsigned long do_csum(unsigned char * buff, int len)
{
int odd, count;
unsigned long result = 0;
@@ -31,7 +75,7 @@
goto out;
odd = 1 & (unsigned long) buff;
if (odd) {
- result = *buff << 8;
+ result = *buff;
len--;
buff++;
}
@@ -50,7 +94,6 @@
unsigned long w = *(unsigned long *) buff;
count--;
buff += 4;
- len -= 4;
result += carry;
result += w;
carry = (w > result);
@@ -64,7 +107,7 @@
}
}
if (len & 1)
- result += (*buff) << 8;
+ result += (*buff << 8);
result = from32to16(result);
if (odd)
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
@@ -72,9 +115,43 @@
return result;
}
-extern inline unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl)
+/* ihl is always 5 or greater, almost always is 5, iph is always word
+ * aligned but can fail to be dword aligned very often.
+ */
+extern inline unsigned short ip_fast_csum(const unsigned char *iph, unsigned int ihl)
{
- return ~do_csum(iph,ihl*4);
+ unsigned int sum;
+
+ __asm__ __volatile__("
+ ld [%1], %0
+ sub %2, 4, %2
+ ld [%1 + 0x4], %%g1
+ ld [%1 + 0x8], %%g2
+ addcc %%g1, %0, %0
+ addxcc %%g2, %0, %0
+ ld [%1 + 0xc], %%g1
+ ld [%1 + 0x10], %%g2
+ addxcc %%g1, %0, %0
+ addxcc %0, %%g0, %0
+1:
+ addcc %%g2, %0, %0
+ add %1, 0x4, %1
+ addxcc %0, %%g0, %0
+ subcc %2, 0x1, %2
+ bne,a 1b
+ ld [%1 + 0x10], %%g2
+
+ sll %0, 16, %2
+ addcc %0, %2, %2
+ srl %2, 16, %0
+ addx %0, %%g0, %2
+ xnor %%g0, %2, %0
+2:
+ "
+ : "=&r" (sum), "=&r" (iph), "=&r" (ihl)
+ : "1" (iph), "2" (ihl)
+ : "g1", "g2");
+ return sum;
}
/*
@@ -91,13 +168,101 @@
*/
extern inline unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum)
{
- unsigned long result = do_csum(buff, len);
+ __asm__ __volatile__("
+ mov 0, %%g5 ! g5 = result
+ cmp %1, 0
+ bgu,a 1f
+ andcc %0, 1, %%g7 ! g7 = odd
+
+ b,a 9f
+
+1:
+ be,a 1f
+ srl %1, 1, %%g6 ! g6 = count = (len >> 1)
+
+ sub %1, 1, %1 ! if(odd) { result = *buff;
+ ldub [%0], %%g5 ! len--;
+ add %0, 1, %0 ! buff++ }
+
+ srl %1, 1, %%g6
+1:
+ cmp %%g6, 0 ! if (count) {
+ be,a 8f
+ andcc %1, 1, %%g0
+
+ andcc %0, 2, %%g0 ! if (2 & buff) {
+ be,a 1f
+ srl %%g6, 1, %%g6
+
+ sub %1, 2, %1 ! result += *(unsigned short *) buff;
+ lduh [%0], %%g1 ! count--;
+ sub %%g6, 1, %%g6 ! len -= 2;
+ add %%g1, %%g5, %%g5! buff += 2;
+ add %0, 2, %0 ! }
+
+ srl %%g6, 1, %%g6
+1:
+ cmp %%g6, 0 ! if (count) {
+ be,a 2f
+ andcc %1, 2, %%g0
+
+ ld [%0], %%g1 ! csum aligned 32bit words
+1:
+ add %0, 4, %0
+ addcc %%g1, %%g5, %%g5
+ addx %%g5, %%g0, %%g5
+ subcc %%g6, 1, %%g6
+ bne,a 1b
+ ld [%0], %%g1
+
+ sethi %%hi(0xffff), %%g3
+ srl %%g5, 16, %%g2
+ or %%g3, %%lo(0xffff), %%g3
+ and %%g5, %%g3, %%g5
+ add %%g2, %%g5, %%g5! }
+
+ andcc %1, 2, %%g0
+2:
+ be,a 8f ! if (len & 2) {
+ andcc %1, 1, %%g0
+
+ lduh [%0], %%g1 ! result += *(unsigned short *) buff;
+ add %%g5, %%g1, %%g5! buff += 2;
+ add %0, 2, %0 ! }
+
+
+ andcc %1, 1, %%g0
+8:
+ be,a 1f ! if (len & 1) {
+ sll %%g5, 16, %%g1
+
+ ldub [%0], %%g1
+ sll %%g1, 8, %%g1 ! result += (*buff << 8);
+ add %%g5, %%g1, %%g5! }
+
+ sll %%g5, 16, %%g1
+1:
+ addcc %%g1, %%g5, %%g5! result = from32to16(result);
+ srl %%g5, 16, %%g1
+ addx %%g0, %%g1, %%g5
+
+ orcc %%g7, %%g0, %%g0! if(odd) {
+ be 9f
+ srl %%g5, 8, %%g1
+
+ and %%g5, 0xff, %%g2! result = ((result >> 8) & 0xff) |
+ and %%g1, 0xff, %%g1! ((result & 0xff) << 8);
+ sll %%g2, 8, %%g2
+ or %%g2, %%g1, %%g5! }
+9:
+ addcc %2, %%g5, %2 ! add result and sum with carry
+ addx %%g0, %2, %2
+ " :
+ "=&r" (buff), "=&r" (len), "=&r" (sum) :
+ "0" (buff), "1" (len), "2" (sum) :
+ "g1", "g2", "g3", "g5", "g6", "g7");
- /* add in old sum, and carry.. */
- result += sum;
- /* 32+c bits -> 32 bits */
- result = (result & 0xffff) + (result >> 16);
- return result;
+ return sum;
}
/*
@@ -136,27 +301,17 @@
/*
* Fold a partial checksum without adding pseudo headers
*/
-
-static inline unsigned short csum_fold(unsigned int sum)
+extern inline unsigned int csum_fold(unsigned int sum)
{
- sum = (sum & 0xffff) + (sum >> 16);
- sum = (sum & 0xffff) + (sum >> 16);
- return ~sum;
-}
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-extern inline unsigned short int csum_tcpudp_magic(unsigned long saddr,
- unsigned long daddr,
- unsigned short len,
- unsigned short proto,
- unsigned int sum)
-{
- return ~from32to16 (((saddr >> 16) + (saddr & 0xffff) + (daddr >> 16)
- + (daddr & 0xffff) + (sum >> 16) +
- (sum & 0xffff) + proto + len));
+ __asm__ __volatile__("
+ addcc %0, %1, %0
+ srl %0, 16, %0
+ addx %%g0, %0, %0
+ xnor %%g0, %0, %0
+ "
+ : "=r" (sum)
+ : "r" (sum << 16), "0" (sum));
+ return sum;
}
#endif /* !(__SPARC_CHECKSUM_H) */
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov
with Sam's (original) version of this