2007-04-27 Kazu Hirata <kazu@codesourcery.com>
* libc/machine/m68k/Makefile.am (lib_a_SOURCES): Add memcpy.S and memset.S. * libc/machine/m68k/Makefile.in: Regenerate. * libc/machine/m68k/memcpy.S, libc/machine/m68k/memset.S: New.
This commit is contained in:
		
							parent
							
								
									7369eb7af3
								
							
						
					
					
						commit
						9e5957de57
					
				|  | @ -1,3 +1,10 @@ | |||
| 2007-04-27  Kazu Hirata  <kazu@codesourcery.com> | ||||
| 
 | ||||
| 	* libc/machine/m68k/Makefile.am (lib_a_SOURCES): Add memcpy.S | ||||
| 	and memset.S. | ||||
| 	* libc/machine/m68k/Makefile.in: Regenerate. | ||||
| 	* libc/machine/m68k/memcpy.S, libc/machine/m68k/memset.S: New. | ||||
| 
 | ||||
| 2007-04-26  Patrick Mansfield <patmans@us.ibm.com> | ||||
| 
 | ||||
| 	* libc/include/ieeefp.h: use prefixed __ieeefp_ macros that can be | ||||
|  |  | |||
|  | @ -8,7 +8,7 @@ AM_CCASFLAGS = $(INCLUDES) | |||
| 
 | ||||
| noinst_LIBRARIES = lib.a | ||||
| 
 | ||||
| lib_a_SOURCES = setjmp.S strcpy.c strlen.c | ||||
| lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S | ||||
| lib_a_CCASFLAGS=$(AM_CCASFLAGS) | ||||
| lib_a_CFLAGS=$(AM_CFLAGS) | ||||
| 
 | ||||
|  |  | |||
|  | @ -56,7 +56,8 @@ ARFLAGS = cru | |||
| lib_a_AR = $(AR) $(ARFLAGS) | ||||
| lib_a_LIBADD = | ||||
| am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-strcpy.$(OBJEXT) \
 | ||||
| 	lib_a-strlen.$(OBJEXT) | ||||
| 	lib_a-strlen.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \
 | ||||
| 	lib_a-memset.$(OBJEXT) | ||||
| lib_a_OBJECTS = $(am_lib_a_OBJECTS) | ||||
| DEFAULT_INCLUDES = -I. -I$(srcdir) | ||||
| depcomp = | ||||
|  | @ -181,7 +182,7 @@ AUTOMAKE_OPTIONS = cygnus | |||
| INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) | ||||
| AM_CCASFLAGS = $(INCLUDES) | ||||
| noinst_LIBRARIES = lib.a | ||||
| lib_a_SOURCES = setjmp.S strcpy.c strlen.c | ||||
| lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S | ||||
| lib_a_CCASFLAGS = $(AM_CCASFLAGS) | ||||
| lib_a_CFLAGS = $(AM_CFLAGS) | ||||
| ACLOCAL_AMFLAGS = -I ../../.. | ||||
|  | @ -249,6 +250,18 @@ lib_a-setjmp.o: setjmp.S | |||
| lib_a-setjmp.obj: setjmp.S | ||||
| 	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-setjmp.obj `if test -f 'setjmp.S'; then $(CYGPATH_W) 'setjmp.S'; else $(CYGPATH_W) '$(srcdir)/setjmp.S'; fi` | ||||
| 
 | ||||
| lib_a-memcpy.o: memcpy.S | ||||
| 	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.o `test -f 'memcpy.S' || echo '$(srcdir)/'`memcpy.S | ||||
| 
 | ||||
| lib_a-memcpy.obj: memcpy.S | ||||
| 	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.obj `if test -f 'memcpy.S'; then $(CYGPATH_W) 'memcpy.S'; else $(CYGPATH_W) '$(srcdir)/memcpy.S'; fi` | ||||
| 
 | ||||
| lib_a-memset.o: memset.S | ||||
| 	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.o `test -f 'memset.S' || echo '$(srcdir)/'`memset.S | ||||
| 
 | ||||
| lib_a-memset.obj: memset.S | ||||
| 	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.obj `if test -f 'memset.S'; then $(CYGPATH_W) 'memset.S'; else $(CYGPATH_W) '$(srcdir)/memset.S'; fi` | ||||
| 
 | ||||
| .c.o: | ||||
| 	$(COMPILE) -c $< | ||||
| 
 | ||||
|  |  | |||
|  | @ -0,0 +1,93 @@ | |||
| /* a-memcpy.s -- memcpy, optimised for m68k asm | ||||
|  * | ||||
|  * Copyright (c) 2007 mocom software GmbH & Co KG) | ||||
|  * | ||||
|  * The authors hereby grant permission to use, copy, modify, distribute, | ||||
|  * and license this software and its documentation for any purpose, provided | ||||
|  * that existing copyright notices are retained in all copies and that this | ||||
|  * notice is included verbatim in any distributions. No written agreement, | ||||
|  * license, or royalty fee is required for any of the authorized uses. | ||||
|  * Modifications to this software may be copyrighted by their authors | ||||
|  * and need not follow the licensing terms described here, provided that | ||||
|  * the new terms are clearly indicated on the first page of each file where | ||||
|  * they apply. | ||||
|  */ | ||||
| 
 | ||||
| 	.text | ||||
| 	.align	4
 | ||||
| 
 | ||||
| 	.globl	memcpy
 | ||||
| 	.type	memcpy, @function
 | ||||
| 
 | ||||
| /*   memcpy, optimised | ||||
|  * | ||||
|  *   strategy: | ||||
|  *       - no argument testing (the original memcpy from the GNU lib does | ||||
|  *         no checking either) | ||||
|  *       - make sure the destination pointer (the write pointer) is long word | ||||
|  *         aligned. This is the best you can do, because writing to unaligned | ||||
|  *         addresses can be the most costfull thing you could do. | ||||
|  *       - Once you have figured that out, we do a little loop unrolling | ||||
|  *         to further improve speed. | ||||
|  */ | ||||
| 
 | ||||
| memcpy: | ||||
| 	move.l	4(%sp),%a0	| dest ptr | ||||
| 	move.l	8(%sp),%a1	| src ptr | ||||
| 	move.l	12(%sp),%d1	| len | ||||
| 	cmp.l	#8,%d1		| if fewer than 8 bytes to transfer, | ||||
| 	blo	.Lresidue	| do not optimise | ||||
| 
 | ||||
| 	/* align dest */ | ||||
| 	move.l	%a0,%d0		| copy of dest | ||||
| 	neg.l	%d0 | ||||
| 	and.l	#3,%d0		| look for the lower two only | ||||
| 	beq	2f		| is aligned? | ||||
| 	sub.l	%d0,%d1 | ||||
| 	lsr.l	#1,%d0		| word align needed? | ||||
| 	bcc	1f | ||||
| 	move.b	(%a1)+,(%a0)+ | ||||
| 1: | ||||
| 	lsr.l	#1,%d0		| long align needed? | ||||
| 	bcc	2f | ||||
| 	move.w	(%a1)+,(%a0)+ | ||||
| 2: | ||||
| 
 | ||||
| 	/* long word transfers */ | ||||
| 	move.l	%d1,%d0 | ||||
| 	and.l	#3,%d1		| byte residue | ||||
| 	lsr.l	#3,%d0 | ||||
| 	bcc	1f		| carry set for 4-byte residue | ||||
| 	move.l	(%a1)+,(%a0)+ | ||||
| 1: | ||||
| 	lsr.l	#1,%d0		| number of 16-byte transfers | ||||
| 	bcc	.Lcopy 		| carry set for 8-byte residue | ||||
| 	bra	.Lcopy8 | ||||
| 
 | ||||
| 1: | ||||
| 	move.l	(%a1)+,(%a0)+ | ||||
| 	move.l	(%a1)+,(%a0)+ | ||||
| .Lcopy8: | ||||
| 	move.l	(%a1)+,(%a0)+ | ||||
| 	move.l	(%a1)+,(%a0)+ | ||||
| .Lcopy: | ||||
| #if !defined (__mcoldfire__) | ||||
| 	dbra	%d0,1b | ||||
| #else | ||||
| 	subq.l	#1,%d0 | ||||
| 	bpl	1b | ||||
| #endif | ||||
| 	bra	.Lresidue | ||||
| 
 | ||||
| 1: | ||||
| 	move.b	(%a1)+,(%a0)+	| move residue bytes | ||||
| 
 | ||||
| .Lresidue: | ||||
| #if !defined (__mcoldfire__) | ||||
| 	dbra	%d1,1b		| loop until done | ||||
| #else | ||||
| 	subq.l	#1,%d1 | ||||
| 	bpl	1b | ||||
| #endif | ||||
| 	move.l	4(%sp),%d0	| return value | ||||
| 	rts | ||||
|  | @ -0,0 +1,97 @@ | |||
| /* a-memset.s -- memset, optimised for fido asm | ||||
|  * | ||||
|  * Copyright (c) 2007 mocom software GmbH & Co KG) | ||||
|  * | ||||
|  * The authors hereby grant permission to use, copy, modify, distribute, | ||||
|  * and license this software and its documentation for any purpose, provided | ||||
|  * that existing copyright notices are retained in all copies and that this | ||||
|  * notice is included verbatim in any distributions. No written agreement, | ||||
|  * license, or royalty fee is required for any of the authorized uses. | ||||
|  * Modifications to this software may be copyrighted by their authors | ||||
|  * and need not follow the licensing terms described here, provided that | ||||
|  * the new terms are clearly indicated on the first page of each file where | ||||
|  * they apply. | ||||
|  */ | ||||
| 
 | ||||
| 	.text | ||||
| 	.align	4
 | ||||
| 
 | ||||
| 	.globl	memset
 | ||||
| 	.type	memset, @function
 | ||||
| 
 | ||||
| |   memset, optimised | ||||
| | | ||||
| |   strategy: | ||||
| |       - no argument testing (the original memcpy from the GNU lib does | ||||
| |         no checking either) | ||||
| |       - make sure the destination pointer (the write pointer) is long word | ||||
| |         aligned. This is the best you can do, because writing to unaligned | ||||
| |         addresses can be the most costfull thing one could do. | ||||
| |       - we fill long word wise if possible | ||||
| | | ||||
| |   VG, 2006 | ||||
| | | ||||
| |	bugfixes: | ||||
| |		- distribution of byte value improved - in cases someone gives | ||||
| |         non-byte value | ||||
| |		- residue byte transfer was not working | ||||
| | | ||||
| |	VG, April 2007 | ||||
| | | ||||
| memset: | ||||
| 	move.l	4(%sp),%a0	| dest ptr | ||||
| 	move.l	8(%sp),%d0	| value | ||||
| 	move.l	12(%sp),%d1	| len | ||||
| 	cmp.l	#16,%d1 | ||||
| 	blo	.Lbset		| below, byte fills | ||||
| 	| | ||||
| 	move.l	%d2,-(%sp)	| need a register | ||||
| 	move.b	%d0,%d2		| distribute low byte to all byte in word | ||||
| 	lsl.l	#8,%d0 | ||||
| 	move.b	%d2,%d0 | ||||
| 	move.w	%d0,%d2 | ||||
| 	swap	%d0		| rotate 16 | ||||
| 	move.w	%d2,%d0 | ||||
| 	| | ||||
| 	move.l	%a0,%d2		| copy of src | ||||
| 	neg.l	%d2		| 1 2 3 ==> 3 2 1 | ||||
| 	and.l	#3,%d2 | ||||
| 	beq	2f		| is aligned | ||||
| 	| | ||||
| 	sub.l	%d2,%d1		| fix length | ||||
| 	lsr.l	#1,%d2		| word align needed? | ||||
| 	bcc	1f | ||||
| 	move.b	%d0,(%a0)+	| fill byte | ||||
| 1: | ||||
| 	lsr.l	#1,%d2		| long align needed? | ||||
| 	bcc	2f | ||||
| 	move.w	%d0,(%a0)+	| fill word | ||||
| 2: | ||||
| 	move.l	%d1,%d2		| number of long transfers (at least 3) | ||||
| 	lsr.l	#2,%d2 | ||||
| 	subq.l	#1,%d2 | ||||
| 
 | ||||
| 1: | ||||
| 	move.l	%d0,(%a0)+	| fill long words | ||||
| .Llset: | ||||
| #if !defined (__mcoldfire__) | ||||
| 	dbra	%d2,1b		| loop until done | ||||
| #else | ||||
| 	subq.l	#1,%d2 | ||||
| 	bpl	1b | ||||
| #endif | ||||
| 	and.l	#3,%d1		| residue byte transfers, fixed | ||||
| 	move.l	(%sp)+,%d2	| restore d2 | ||||
| 	bra	.Lbset | ||||
| 
 | ||||
| 1: | ||||
| 	move.b	%d0,(%a0)+	| fill residue bytes | ||||
| .Lbset: | ||||
| #if !defined (__mcoldfire__) | ||||
| 	dbra	%d1,1b		| loop until done | ||||
| #else | ||||
| 	subq.l	#1,%d1 | ||||
| 	bpl	1b | ||||
| #endif | ||||
| 	move.l	4(%sp),%d0	| return value | ||||
| 	rts | ||||
		Loading…
	
		Reference in New Issue