patch-2.1.44 linux/arch/mips/lib/memcpy.S
Next file: linux/arch/mips/lib/memset.c
Previous file: linux/arch/mips/lib/io.c
Back to the patch index
Back to the overall index
- Lines: 222
- Date:
Thu Jun 26 12:33:37 1997
- Orig file:
v2.1.43/linux/arch/mips/lib/memcpy.S
- Orig date:
Wed Dec 31 16:00:00 1969
diff -u --recursive --new-file v2.1.43/linux/arch/mips/lib/memcpy.S linux/arch/mips/lib/memcpy.S
@@ -0,0 +1,221 @@
+/* memcpy.S: Mips optimized memcpy based upon SparcLinux code.
+ *
+ * Copyright(C) 1995 Linus Torvalds
+ * Copyright(C) 1996 David S. Miller
+ * Copyright(C) 1996 Eddie C. Dost
+ *
+ * derived from:
+ * e-mail between David and Eddie.
+ */
+
+#include <asm/asm.h>
+#include <asm/regdef.h>
+
+#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5) \
+ lw t0, (offset + 0x18)(src); \
+ lw t1, (offset + 0x1c)(src); \
+ sw t0, (offset + 0x18)(dst); \
+ lw t2, (offset + 0x10)(src); \
+ sw t1, (offset + 0x1c)(dst); \
+ lw t3, (offset + 0x14)(src); \
+ sw t2, (offset + 0x10)(dst); \
+ lw t4, (offset + 0x08)(src); \
+ sw t3, (offset + 0x14)(dst); \
+ lw t5, (offset + 0x0c)(src); \
+ sw t4, (offset + 0x08)(dst); \
+ lw t0, (offset + 0x00)(src); \
+ sw t5, (offset + 0x0c)(dst); \
+ lw t1, (offset + 0x04)(src); \
+ sw t0, (offset + 0x00)(dst); \
+ sw t1, (offset + 0x04)(dst); \
+
+ /* Alignment cases are:
+ * 1) (src&0x3)=0x0 (dst&0x3)=0x0 can optimize
+ * 2) (src&0x3)=0x1 (dst&0x3)=0x1 can optimize
+ * 3) (src&0x3)=0x2 (dst&0x3)=0x2 can optimize
+ * 4) (src&0x3)=0x3 (dst&0x3)=0x3 can optimize
+ * 5) anything else cannot optimize
+ */
+
+ /* I hate MIPS register names... AIEEE, it's a SPARC! */
+#define o0 a0
+#define o1 a1
+#define o2 a2
+#define o3 a3
+#define o4 t0
+#define o5 t1
+#define o6 sp
+#define o7 ra
+#define g0 zero
+#define g1 t2
+#define g2 t3
+#define g3 t4
+#define g4 t5
+#define g5 t6
+#define g6 t7
+#define g7 t8
+
+ .text
+ .set noreorder
+ .set noat
+
+ .globl bcopy
+ .globl amemmove
+ .globl memmove
+ .globl memcpy
+ .align 2
+bcopy:
+ move o3, o0
+ move o0, o1
+ move o1, o3
+
+amemmove:
+memmove:
+memcpy: /* o0=dst o1=src o2=len */
+ xor o4, o0, o1
+ andi o4, o4, 0x3
+ move g6, o0
+ beq o4, g0, can_align
+ sltiu g7, o2, 0x8
+
+ b cannot_optimize
+ move g1, o2
+
+can_align:
+ bne g7, g0, cannot_optimize
+ move g1, o2
+
+ beq o2, g0, out
+ andi g7, o1, 0x1
+
+hword_align:
+ beq g7, g0, word_align
+ andi g7, o1, 0x2
+
+ lbu o4, 0x00(o1)
+ subu o2, o2, 0x1
+ sb o4, 0x00(o0)
+ addu o1, o1, 0x1
+ addu o0, o0, 0x1
+ andi g7, o1, 0x2
+
+word_align:
+ beq g7, g0, dword_align
+ sltiu g7, o2, 56
+
+ lhu o4, 0x00(o1)
+ subu o2, o2, 0x2
+ sh o4, 0x00(o0)
+ sltiu g7, o2, 56
+ addu o0, o0, 0x2
+ addu o1, o1, 0x2
+
+dword_align:
+ bne g7, g0, do_end_words
+ move g7, o2
+
+ andi g7, o1, 0x4
+ beq g7, zero, qword_align
+ andi g7, o1, 0x8
+
+ lw o4, 0x00(o1)
+ subu o2, o2, 0x4
+ sw o4, 0x00(o0)
+ addu o1, o1, 0x4
+ addu o0, o0, 0x4
+ andi g7, o1, 0x8
+
+qword_align:
+ beq g7, g0, oword_align
+ andi g7, o1, 0x10
+
+ lw o4, 0x00(o1)
+ lw o5, 0x04(o1)
+ subu o2, o2, 0x8
+ sw o4, 0x00(o0)
+ addu o1, o1, 0x8
+ sw o5, 0x04(o0)
+ andi g7, o1, 0x10
+ addu o0, o0, 0x8
+
+oword_align:
+ beq g7, g0, begin_movement
+ srl g7, o2, 0x7
+
+ lw g2, 0x08(o1)
+ lw g3, 0x0c(o1)
+ lw o4, 0x00(o1)
+ lw o5, 0x04(o1)
+ sw g2, 0x08(o0)
+ subu o2, o2, 0x10
+ sw g3, 0x0c(o0)
+ addu o1, o1, 0x10
+ sw o4, 0x00(o0)
+ srl g7, o2, 0x7
+ addu o0, o0, 0x10
+ sw o5, -0x0c(o0)
+
+begin_movement:
+ beq g7, g0, 0f
+ andi g1, o2, 0x40
+
+move_128bytes:
+ MOVE_BIGCHUNK(o1, o0, 0x00, o4, o5, g2, g3, g4, g5)
+ MOVE_BIGCHUNK(o1, o0, 0x20, o4, o5, g2, g3, g4, g5)
+ MOVE_BIGCHUNK(o1, o0, 0x40, o4, o5, g2, g3, g4, g5)
+ MOVE_BIGCHUNK(o1, o0, 0x60, o4, o5, g2, g3, g4, g5)
+ subu g7, g7, 0x01
+ addu o1, o1, 0x80
+ bne g7, g0, move_128bytes
+ addu o0, o0, 0x80
+
+0:
+ beq g1, g0, 1f
+ andi g1, o2, 0x20
+
+move_64bytes:
+ MOVE_BIGCHUNK(o1, o0, 0x00, o4, o5, g2, g3, g4, g5)
+ MOVE_BIGCHUNK(o1, o0, 0x20, o4, o5, g2, g3, g4, g5)
+ addu o1, o1, 0x40
+ addu o0, o0, 0x40
+
+1:
+ beq g1, g0, do_end_words
+ andi g7, o2, 0x1c
+
+move_32bytes:
+ MOVE_BIGCHUNK(o1, o0, 0x00, o4, o5, g2, g3, g4, g5)
+ andi g7, o2, 0x1c
+ addu o1, o1, 0x20
+ addu o0, o0, 0x20
+
+do_end_words:
+ beq g7, g0, maybe_end_cruft
+ srl g7, g7, 0x2
+
+end_words:
+ lw o4, 0x00(o1)
+ subu g7, g7, 0x1
+ sw o4, 0x00(o0)
+ addu o1, o1, 0x4
+ bne g7, g0, end_words
+ addu o0, o0, 0x4
+
+maybe_end_cruft:
+ andi g1, o2, 0x3
+
+cannot_optimize:
+ beq g1, g0, out
+ move o2, g1
+
+end_bytes:
+ lbu o4, 0x00(o1)
+ subu o2, o2, 0x1
+ sb o4, 0x00(o0)
+ addu o1, o1, 0x1
+ bne o2, g0, end_bytes
+ addu o0, o0, 0x1
+
+out:
+ jr o7
+ move v0, g6
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov