113 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			113 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| /* a-memcpy.s -- memcpy, optimised for m68k asm
 | |
|  *
 | |
|  * Copyright (c) 2007 mocom software GmbH & Co KG)
 | |
|  *
 | |
|  * The authors hereby grant permission to use, copy, modify, distribute,
 | |
|  * and license this software and its documentation for any purpose, provided
 | |
|  * that existing copyright notices are retained in all copies and that this
 | |
|  * notice is included verbatim in any distributions. No written agreement,
 | |
|  * license, or royalty fee is required for any of the authorized uses.
 | |
|  * Modifications to this software may be copyrighted by their authors
 | |
|  * and need not follow the licensing terms described here, provided that
 | |
|  * the new terms are clearly indicated on the first page of each file where
 | |
|  * they apply.
 | |
|  */
 | |
| 
 | |
| #include "m68kasm.h"
 | |
| 
 | |
| #if defined (__mcoldfire__) || defined (__mc68010__) || defined (__mc68020__) || defined (__mc68030__) || defined (__mc68040__) || defined (__mc68060__)
 | |
| # define MISALIGNED_OK 1
 | |
| #else
 | |
| # define MISALIGNED_OK 0
 | |
| #endif
 | |
| 	
 | |
| 	.text
 | |
| 	.align	4
 | |
| 
 | |
| 	.globl	SYM(memcpy)
 | |
| 	.type	SYM(memcpy), @function
 | |
| 
 | |
| /*   memcpy, optimised
 | |
|  *
 | |
|  *   strategy:
 | |
|  *       - no argument testing (the original memcpy from the GNU lib does
 | |
|  *         no checking either)
 | |
|  *       - make sure the destination pointer (the write pointer) is long word
 | |
|  *         aligned. This is the best you can do, because writing to unaligned
 | |
|  *         addresses can be the most costfull thing you could do.
 | |
|  *       - Once you have figured that out, we do a little loop unrolling
 | |
|  *         to further improve speed.
 | |
|  */
 | |
| 
 | |
| SYM(memcpy):
 | |
| 	move.l	4(sp),a0	| dest ptr
 | |
| 	move.l	8(sp),a1	| src ptr
 | |
| 	move.l	12(sp),d1	| len
 | |
| 	cmp.l	#8,d1		| if fewer than 8 bytes to transfer,
 | |
| 	blo	.Lresidue	| do not optimise
 | |
| 
 | |
| #if !MISALIGNED_OK
 | |
| 	/* Goto .Lresidue if either dest or src is not 4-byte aligned */
 | |
| 	move.l	a0,d0
 | |
| 	and.l	#3,d0
 | |
| 	bne	.Lresidue
 | |
| 	move.l	a1,d0
 | |
| 	and.l	#3,d0
 | |
| 	bne	.Lresidue
 | |
| #else /* MISALIGNED_OK */
 | |
| 	/* align dest */
 | |
| 	move.l	a0,d0		| copy of dest
 | |
| 	neg.l	d0
 | |
| 	and.l	#3,d0		| look for the lower two only
 | |
| 	beq	2f		| is aligned?
 | |
| 	sub.l	d0,d1
 | |
| 	lsr.l	#1,d0		| word align needed?
 | |
| 	bcc	1f
 | |
| 	move.b	(a1)+,(a0)+
 | |
| 1:
 | |
| 	lsr.l	#1,d0		| long align needed?
 | |
| 	bcc	2f
 | |
| 	move.w	(a1)+,(a0)+
 | |
| 2:
 | |
| #endif /* !MISALIGNED_OK */
 | |
| 
 | |
| 	/* long word transfers */
 | |
| 	move.l	d1,d0
 | |
| 	and.l	#3,d1		| byte residue
 | |
| 	lsr.l	#3,d0
 | |
| 	bcc	1f		| carry set for 4-byte residue
 | |
| 	move.l	(a1)+,(a0)+
 | |
| 1:
 | |
| 	lsr.l	#1,d0		| number of 16-byte transfers
 | |
| 	bcc	.Lcopy 		| carry set for 8-byte residue
 | |
| 	bra	.Lcopy8
 | |
| 
 | |
| 1:
 | |
| 	move.l	(a1)+,(a0)+
 | |
| 	move.l	(a1)+,(a0)+
 | |
| .Lcopy8:
 | |
| 	move.l	(a1)+,(a0)+
 | |
| 	move.l	(a1)+,(a0)+
 | |
| .Lcopy:
 | |
| #if !defined (__mcoldfire__)
 | |
| 	dbra	d0,1b
 | |
| 	sub.l	#0x10000,d0
 | |
| #else
 | |
| 	subq.l	#1,d0
 | |
| #endif
 | |
| 	bpl	1b
 | |
| 	bra	.Lresidue
 | |
| 
 | |
| 1:
 | |
| 	move.b	(a1)+,(a0)+	| move residue bytes
 | |
| 
 | |
| .Lresidue:
 | |
| #if !defined (__mcoldfire__)
 | |
| 	dbra	d1,1b		| loop until done
 | |
| #else
 | |
| 	subq.l	#1,d1
 | |
| 	bpl	1b
 | |
| #endif
 | |
| 	move.l	4(sp),d0	| return value
 | |
| 	rts
 |