diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 960af5192..7831d3bb7 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,10 @@ +2007-04-27 Kazu Hirata + + * libc/machine/m68k/Makefile.am (lib_a_SOURCES): Add memcpy.S + and memset.S. + * libc/machine/m68k/Makefile.in: Regenerate. + * libc/machine/m68k/memcpy.S, libc/machine/m68k/memset.S: New. + 2007-04-26 Patrick Mansfield * libc/include/ieeefp.h: use prefixed __ieeefp_ macros that can be diff --git a/newlib/libc/machine/m68k/Makefile.am b/newlib/libc/machine/m68k/Makefile.am index d2a19f1fa..11d78eb3c 100644 --- a/newlib/libc/machine/m68k/Makefile.am +++ b/newlib/libc/machine/m68k/Makefile.am @@ -8,7 +8,7 @@ AM_CCASFLAGS = $(INCLUDES) noinst_LIBRARIES = lib.a -lib_a_SOURCES = setjmp.S strcpy.c strlen.c +lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S lib_a_CCASFLAGS=$(AM_CCASFLAGS) lib_a_CFLAGS=$(AM_CFLAGS) diff --git a/newlib/libc/machine/m68k/Makefile.in b/newlib/libc/machine/m68k/Makefile.in index b812487f2..0615e51d4 100644 --- a/newlib/libc/machine/m68k/Makefile.in +++ b/newlib/libc/machine/m68k/Makefile.in @@ -56,7 +56,8 @@ ARFLAGS = cru lib_a_AR = $(AR) $(ARFLAGS) lib_a_LIBADD = am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-strcpy.$(OBJEXT) \ - lib_a-strlen.$(OBJEXT) + lib_a-strlen.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \ + lib_a-memset.$(OBJEXT) lib_a_OBJECTS = $(am_lib_a_OBJECTS) DEFAULT_INCLUDES = -I. -I$(srcdir) depcomp = @@ -181,7 +182,7 @@ AUTOMAKE_OPTIONS = cygnus INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) AM_CCASFLAGS = $(INCLUDES) noinst_LIBRARIES = lib.a -lib_a_SOURCES = setjmp.S strcpy.c strlen.c +lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S lib_a_CCASFLAGS = $(AM_CCASFLAGS) lib_a_CFLAGS = $(AM_CFLAGS) ACLOCAL_AMFLAGS = -I ../../.. @@ -249,6 +250,18 @@ lib_a-setjmp.o: setjmp.S lib_a-setjmp.obj: setjmp.S $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-setjmp.obj `if test -f 'setjmp.S'; then $(CYGPATH_W) 'setjmp.S'; else $(CYGPATH_W) '$(srcdir)/setjmp.S'; fi` +lib_a-memcpy.o: memcpy.S + $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.o `test -f 'memcpy.S' || echo '$(srcdir)/'`memcpy.S + +lib_a-memcpy.obj: memcpy.S + $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.obj `if test -f 'memcpy.S'; then $(CYGPATH_W) 'memcpy.S'; else $(CYGPATH_W) '$(srcdir)/memcpy.S'; fi` + +lib_a-memset.o: memset.S + $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.o `test -f 'memset.S' || echo '$(srcdir)/'`memset.S + +lib_a-memset.obj: memset.S + $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.obj `if test -f 'memset.S'; then $(CYGPATH_W) 'memset.S'; else $(CYGPATH_W) '$(srcdir)/memset.S'; fi` + .c.o: $(COMPILE) -c $< diff --git a/newlib/libc/machine/m68k/memcpy.S b/newlib/libc/machine/m68k/memcpy.S new file mode 100644 index 000000000..300397102 --- /dev/null +++ b/newlib/libc/machine/m68k/memcpy.S @@ -0,0 +1,93 @@ +/* a-memcpy.s -- memcpy, optimised for m68k asm + * + * Copyright (c) 2007 mocom software GmbH & Co KG) + * + * The authors hereby grant permission to use, copy, modify, distribute, + * and license this software and its documentation for any purpose, provided + * that existing copyright notices are retained in all copies and that this + * notice is included verbatim in any distributions. No written agreement, + * license, or royalty fee is required for any of the authorized uses. + * Modifications to this software may be copyrighted by their authors + * and need not follow the licensing terms described here, provided that + * the new terms are clearly indicated on the first page of each file where + * they apply. + */ + + .text + .align 4 + + .globl memcpy + .type memcpy, @function + +/* memcpy, optimised + * + * strategy: + * - no argument testing (the original memcpy from the GNU lib does + * no checking either) + * - make sure the destination pointer (the write pointer) is long word + * aligned. This is the best you can do, because writing to unaligned + * addresses can be the most costfull thing you could do. + * - Once you have figured that out, we do a little loop unrolling + * to further improve speed. + */ + +memcpy: + move.l 4(%sp),%a0 | dest ptr + move.l 8(%sp),%a1 | src ptr + move.l 12(%sp),%d1 | len + cmp.l #8,%d1 | if fewer than 8 bytes to transfer, + blo .Lresidue | do not optimise + + /* align dest */ + move.l %a0,%d0 | copy of dest + neg.l %d0 + and.l #3,%d0 | look for the lower two only + beq 2f | is aligned? + sub.l %d0,%d1 + lsr.l #1,%d0 | word align needed? + bcc 1f + move.b (%a1)+,(%a0)+ +1: + lsr.l #1,%d0 | long align needed? + bcc 2f + move.w (%a1)+,(%a0)+ +2: + + /* long word transfers */ + move.l %d1,%d0 + and.l #3,%d1 | byte residue + lsr.l #3,%d0 + bcc 1f | carry set for 4-byte residue + move.l (%a1)+,(%a0)+ +1: + lsr.l #1,%d0 | number of 16-byte transfers + bcc .Lcopy | carry set for 8-byte residue + bra .Lcopy8 + +1: + move.l (%a1)+,(%a0)+ + move.l (%a1)+,(%a0)+ +.Lcopy8: + move.l (%a1)+,(%a0)+ + move.l (%a1)+,(%a0)+ +.Lcopy: +#if !defined (__mcoldfire__) + dbra %d0,1b +#else + subq.l #1,%d0 + bpl 1b +#endif + bra .Lresidue + +1: + move.b (%a1)+,(%a0)+ | move residue bytes + +.Lresidue: +#if !defined (__mcoldfire__) + dbra %d1,1b | loop until done +#else + subq.l #1,%d1 + bpl 1b +#endif + move.l 4(%sp),%d0 | return value + rts diff --git a/newlib/libc/machine/m68k/memset.S b/newlib/libc/machine/m68k/memset.S new file mode 100644 index 000000000..10e3c3e2c --- /dev/null +++ b/newlib/libc/machine/m68k/memset.S @@ -0,0 +1,97 @@ +/* a-memset.s -- memset, optimised for fido asm + * + * Copyright (c) 2007 mocom software GmbH & Co KG) + * + * The authors hereby grant permission to use, copy, modify, distribute, + * and license this software and its documentation for any purpose, provided + * that existing copyright notices are retained in all copies and that this + * notice is included verbatim in any distributions. No written agreement, + * license, or royalty fee is required for any of the authorized uses. + * Modifications to this software may be copyrighted by their authors + * and need not follow the licensing terms described here, provided that + * the new terms are clearly indicated on the first page of each file where + * they apply. + */ + + .text + .align 4 + + .globl memset + .type memset, @function + +| memset, optimised +| +| strategy: +| - no argument testing (the original memcpy from the GNU lib does +| no checking either) +| - make sure the destination pointer (the write pointer) is long word +| aligned. This is the best you can do, because writing to unaligned +| addresses can be the most costfull thing one could do. +| - we fill long word wise if possible +| +| VG, 2006 +| +| bugfixes: +| - distribution of byte value improved - in cases someone gives +| non-byte value +| - residue byte transfer was not working +| +| VG, April 2007 +| +memset: + move.l 4(%sp),%a0 | dest ptr + move.l 8(%sp),%d0 | value + move.l 12(%sp),%d1 | len + cmp.l #16,%d1 + blo .Lbset | below, byte fills + | + move.l %d2,-(%sp) | need a register + move.b %d0,%d2 | distribute low byte to all byte in word + lsl.l #8,%d0 + move.b %d2,%d0 + move.w %d0,%d2 + swap %d0 | rotate 16 + move.w %d2,%d0 + | + move.l %a0,%d2 | copy of src + neg.l %d2 | 1 2 3 ==> 3 2 1 + and.l #3,%d2 + beq 2f | is aligned + | + sub.l %d2,%d1 | fix length + lsr.l #1,%d2 | word align needed? + bcc 1f + move.b %d0,(%a0)+ | fill byte +1: + lsr.l #1,%d2 | long align needed? + bcc 2f + move.w %d0,(%a0)+ | fill word +2: + move.l %d1,%d2 | number of long transfers (at least 3) + lsr.l #2,%d2 + subq.l #1,%d2 + +1: + move.l %d0,(%a0)+ | fill long words +.Llset: +#if !defined (__mcoldfire__) + dbra %d2,1b | loop until done +#else + subq.l #1,%d2 + bpl 1b +#endif + and.l #3,%d1 | residue byte transfers, fixed + move.l (%sp)+,%d2 | restore d2 + bra .Lbset + +1: + move.b %d0,(%a0)+ | fill residue bytes +.Lbset: +#if !defined (__mcoldfire__) + dbra %d1,1b | loop until done +#else + subq.l #1,%d1 + bpl 1b +#endif + move.l 4(%sp),%d0 | return value + rts