2007-04-27 Kazu Hirata <kazu@codesourcery.com>
* libc/machine/m68k/Makefile.am (lib_a_SOURCES): Add memcpy.S and memset.S. * libc/machine/m68k/Makefile.in: Regenerate. * libc/machine/m68k/memcpy.S, libc/machine/m68k/memset.S: New.
This commit is contained in:
parent
7369eb7af3
commit
9e5957de57
|
@ -1,3 +1,10 @@
|
|||
2007-04-27 Kazu Hirata <kazu@codesourcery.com>
|
||||
|
||||
* libc/machine/m68k/Makefile.am (lib_a_SOURCES): Add memcpy.S
|
||||
and memset.S.
|
||||
* libc/machine/m68k/Makefile.in: Regenerate.
|
||||
* libc/machine/m68k/memcpy.S, libc/machine/m68k/memset.S: New.
|
||||
|
||||
2007-04-26 Patrick Mansfield <patmans@us.ibm.com>
|
||||
|
||||
* libc/include/ieeefp.h: use prefixed __ieeefp_ macros that can be
|
||||
|
|
|
@ -8,7 +8,7 @@ AM_CCASFLAGS = $(INCLUDES)
|
|||
|
||||
noinst_LIBRARIES = lib.a
|
||||
|
||||
lib_a_SOURCES = setjmp.S strcpy.c strlen.c
|
||||
lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S
|
||||
lib_a_CCASFLAGS=$(AM_CCASFLAGS)
|
||||
lib_a_CFLAGS=$(AM_CFLAGS)
|
||||
|
||||
|
|
|
@ -56,7 +56,8 @@ ARFLAGS = cru
|
|||
lib_a_AR = $(AR) $(ARFLAGS)
|
||||
lib_a_LIBADD =
|
||||
am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-strcpy.$(OBJEXT) \
|
||||
lib_a-strlen.$(OBJEXT)
|
||||
lib_a-strlen.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \
|
||||
lib_a-memset.$(OBJEXT)
|
||||
lib_a_OBJECTS = $(am_lib_a_OBJECTS)
|
||||
DEFAULT_INCLUDES = -I. -I$(srcdir)
|
||||
depcomp =
|
||||
|
@ -181,7 +182,7 @@ AUTOMAKE_OPTIONS = cygnus
|
|||
INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
|
||||
AM_CCASFLAGS = $(INCLUDES)
|
||||
noinst_LIBRARIES = lib.a
|
||||
lib_a_SOURCES = setjmp.S strcpy.c strlen.c
|
||||
lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S
|
||||
lib_a_CCASFLAGS = $(AM_CCASFLAGS)
|
||||
lib_a_CFLAGS = $(AM_CFLAGS)
|
||||
ACLOCAL_AMFLAGS = -I ../../..
|
||||
|
@ -249,6 +250,18 @@ lib_a-setjmp.o: setjmp.S
|
|||
lib_a-setjmp.obj: setjmp.S
|
||||
$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-setjmp.obj `if test -f 'setjmp.S'; then $(CYGPATH_W) 'setjmp.S'; else $(CYGPATH_W) '$(srcdir)/setjmp.S'; fi`
|
||||
|
||||
lib_a-memcpy.o: memcpy.S
|
||||
$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.o `test -f 'memcpy.S' || echo '$(srcdir)/'`memcpy.S
|
||||
|
||||
lib_a-memcpy.obj: memcpy.S
|
||||
$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.obj `if test -f 'memcpy.S'; then $(CYGPATH_W) 'memcpy.S'; else $(CYGPATH_W) '$(srcdir)/memcpy.S'; fi`
|
||||
|
||||
lib_a-memset.o: memset.S
|
||||
$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.o `test -f 'memset.S' || echo '$(srcdir)/'`memset.S
|
||||
|
||||
lib_a-memset.obj: memset.S
|
||||
$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.obj `if test -f 'memset.S'; then $(CYGPATH_W) 'memset.S'; else $(CYGPATH_W) '$(srcdir)/memset.S'; fi`
|
||||
|
||||
.c.o:
|
||||
$(COMPILE) -c $<
|
||||
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
/* a-memcpy.s -- memcpy, optimised for m68k asm
|
||||
*
|
||||
* Copyright (c) 2007 mocom software GmbH & Co KG)
|
||||
*
|
||||
* The authors hereby grant permission to use, copy, modify, distribute,
|
||||
* and license this software and its documentation for any purpose, provided
|
||||
* that existing copyright notices are retained in all copies and that this
|
||||
* notice is included verbatim in any distributions. No written agreement,
|
||||
* license, or royalty fee is required for any of the authorized uses.
|
||||
* Modifications to this software may be copyrighted by their authors
|
||||
* and need not follow the licensing terms described here, provided that
|
||||
* the new terms are clearly indicated on the first page of each file where
|
||||
* they apply.
|
||||
*/
|
||||
|
||||
.text
|
||||
.align 4
|
||||
|
||||
.globl memcpy
|
||||
.type memcpy, @function
|
||||
|
||||
/* memcpy, optimised
|
||||
*
|
||||
* strategy:
|
||||
* - no argument testing (the original memcpy from the GNU lib does
|
||||
* no checking either)
|
||||
* - make sure the destination pointer (the write pointer) is long word
|
||||
* aligned. This is the best you can do, because writing to unaligned
|
||||
* addresses can be the most costfull thing you could do.
|
||||
* - Once you have figured that out, we do a little loop unrolling
|
||||
* to further improve speed.
|
||||
*/
|
||||
|
||||
memcpy:
|
||||
move.l 4(%sp),%a0 | dest ptr
|
||||
move.l 8(%sp),%a1 | src ptr
|
||||
move.l 12(%sp),%d1 | len
|
||||
cmp.l #8,%d1 | if fewer than 8 bytes to transfer,
|
||||
blo .Lresidue | do not optimise
|
||||
|
||||
/* align dest */
|
||||
move.l %a0,%d0 | copy of dest
|
||||
neg.l %d0
|
||||
and.l #3,%d0 | look for the lower two only
|
||||
beq 2f | is aligned?
|
||||
sub.l %d0,%d1
|
||||
lsr.l #1,%d0 | word align needed?
|
||||
bcc 1f
|
||||
move.b (%a1)+,(%a0)+
|
||||
1:
|
||||
lsr.l #1,%d0 | long align needed?
|
||||
bcc 2f
|
||||
move.w (%a1)+,(%a0)+
|
||||
2:
|
||||
|
||||
/* long word transfers */
|
||||
move.l %d1,%d0
|
||||
and.l #3,%d1 | byte residue
|
||||
lsr.l #3,%d0
|
||||
bcc 1f | carry set for 4-byte residue
|
||||
move.l (%a1)+,(%a0)+
|
||||
1:
|
||||
lsr.l #1,%d0 | number of 16-byte transfers
|
||||
bcc .Lcopy | carry set for 8-byte residue
|
||||
bra .Lcopy8
|
||||
|
||||
1:
|
||||
move.l (%a1)+,(%a0)+
|
||||
move.l (%a1)+,(%a0)+
|
||||
.Lcopy8:
|
||||
move.l (%a1)+,(%a0)+
|
||||
move.l (%a1)+,(%a0)+
|
||||
.Lcopy:
|
||||
#if !defined (__mcoldfire__)
|
||||
dbra %d0,1b
|
||||
#else
|
||||
subq.l #1,%d0
|
||||
bpl 1b
|
||||
#endif
|
||||
bra .Lresidue
|
||||
|
||||
1:
|
||||
move.b (%a1)+,(%a0)+ | move residue bytes
|
||||
|
||||
.Lresidue:
|
||||
#if !defined (__mcoldfire__)
|
||||
dbra %d1,1b | loop until done
|
||||
#else
|
||||
subq.l #1,%d1
|
||||
bpl 1b
|
||||
#endif
|
||||
move.l 4(%sp),%d0 | return value
|
||||
rts
|
|
@ -0,0 +1,97 @@
|
|||
/* a-memset.s -- memset, optimised for fido asm
|
||||
*
|
||||
* Copyright (c) 2007 mocom software GmbH & Co KG)
|
||||
*
|
||||
* The authors hereby grant permission to use, copy, modify, distribute,
|
||||
* and license this software and its documentation for any purpose, provided
|
||||
* that existing copyright notices are retained in all copies and that this
|
||||
* notice is included verbatim in any distributions. No written agreement,
|
||||
* license, or royalty fee is required for any of the authorized uses.
|
||||
* Modifications to this software may be copyrighted by their authors
|
||||
* and need not follow the licensing terms described here, provided that
|
||||
* the new terms are clearly indicated on the first page of each file where
|
||||
* they apply.
|
||||
*/
|
||||
|
||||
.text
|
||||
.align 4
|
||||
|
||||
.globl memset
|
||||
.type memset, @function
|
||||
|
||||
| memset, optimised
|
||||
|
|
||||
| strategy:
|
||||
| - no argument testing (the original memcpy from the GNU lib does
|
||||
| no checking either)
|
||||
| - make sure the destination pointer (the write pointer) is long word
|
||||
| aligned. This is the best you can do, because writing to unaligned
|
||||
| addresses can be the most costfull thing one could do.
|
||||
| - we fill long word wise if possible
|
||||
|
|
||||
| VG, 2006
|
||||
|
|
||||
| bugfixes:
|
||||
| - distribution of byte value improved - in cases someone gives
|
||||
| non-byte value
|
||||
| - residue byte transfer was not working
|
||||
|
|
||||
| VG, April 2007
|
||||
|
|
||||
memset:
|
||||
move.l 4(%sp),%a0 | dest ptr
|
||||
move.l 8(%sp),%d0 | value
|
||||
move.l 12(%sp),%d1 | len
|
||||
cmp.l #16,%d1
|
||||
blo .Lbset | below, byte fills
|
||||
|
|
||||
move.l %d2,-(%sp) | need a register
|
||||
move.b %d0,%d2 | distribute low byte to all byte in word
|
||||
lsl.l #8,%d0
|
||||
move.b %d2,%d0
|
||||
move.w %d0,%d2
|
||||
swap %d0 | rotate 16
|
||||
move.w %d2,%d0
|
||||
|
|
||||
move.l %a0,%d2 | copy of src
|
||||
neg.l %d2 | 1 2 3 ==> 3 2 1
|
||||
and.l #3,%d2
|
||||
beq 2f | is aligned
|
||||
|
|
||||
sub.l %d2,%d1 | fix length
|
||||
lsr.l #1,%d2 | word align needed?
|
||||
bcc 1f
|
||||
move.b %d0,(%a0)+ | fill byte
|
||||
1:
|
||||
lsr.l #1,%d2 | long align needed?
|
||||
bcc 2f
|
||||
move.w %d0,(%a0)+ | fill word
|
||||
2:
|
||||
move.l %d1,%d2 | number of long transfers (at least 3)
|
||||
lsr.l #2,%d2
|
||||
subq.l #1,%d2
|
||||
|
||||
1:
|
||||
move.l %d0,(%a0)+ | fill long words
|
||||
.Llset:
|
||||
#if !defined (__mcoldfire__)
|
||||
dbra %d2,1b | loop until done
|
||||
#else
|
||||
subq.l #1,%d2
|
||||
bpl 1b
|
||||
#endif
|
||||
and.l #3,%d1 | residue byte transfers, fixed
|
||||
move.l (%sp)+,%d2 | restore d2
|
||||
bra .Lbset
|
||||
|
||||
1:
|
||||
move.b %d0,(%a0)+ | fill residue bytes
|
||||
.Lbset:
|
||||
#if !defined (__mcoldfire__)
|
||||
dbra %d1,1b | loop until done
|
||||
#else
|
||||
subq.l #1,%d1
|
||||
bpl 1b
|
||||
#endif
|
||||
move.l 4(%sp),%d0 | return value
|
||||
rts
|
Loading…
Reference in New Issue