101 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			101 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| /* a-memset.s -- memset, optimised for fido asm
 | |
|  *
 | |
|  * Copyright (c) 2007 mocom software GmbH & Co KG)
 | |
|  *
 | |
|  * The authors hereby grant permission to use, copy, modify, distribute,
 | |
|  * and license this software and its documentation for any purpose, provided
 | |
|  * that existing copyright notices are retained in all copies and that this
 | |
|  * notice is included verbatim in any distributions. No written agreement,
 | |
|  * license, or royalty fee is required for any of the authorized uses.
 | |
|  * Modifications to this software may be copyrighted by their authors
 | |
|  * and need not follow the licensing terms described here, provided that
 | |
|  * the new terms are clearly indicated on the first page of each file where
 | |
|  * they apply.
 | |
|  */
 | |
| 
 | |
| #include "m68kasm.h"
 | |
| 
 | |
| 	.text
 | |
| 	.align	4
 | |
| 
 | |
| 	.globl	SYM(memset)
 | |
| 	.type	SYM(memset), @function
 | |
| 
 | |
| |   memset, optimised
 | |
| |
 | |
| |   strategy:
 | |
| |       - no argument testing (the original memcpy from the GNU lib does
 | |
| |         no checking either)
 | |
| |       - make sure the destination pointer (the write pointer) is long word
 | |
| |         aligned. This is the best you can do, because writing to unaligned
 | |
| |         addresses can be the most costfull thing one could do.
 | |
| |       - we fill long word wise if possible
 | |
| |
 | |
| |   VG, 2006
 | |
| |
 | |
| |	bugfixes:
 | |
| |		- distribution of byte value improved - in cases someone gives
 | |
| |         non-byte value
 | |
| |		- residue byte transfer was not working
 | |
| |
 | |
| |	VG, April 2007
 | |
| |
 | |
| SYM(memset):
 | |
| 	move.l	4(sp),a0	| dest ptr
 | |
| 	move.l	8(sp),d0	| value
 | |
| 	move.l	12(sp),d1	| len
 | |
| 	cmp.l	#16,d1
 | |
| 	blo	.Lbset		| below, byte fills
 | |
| 	|
 | |
| 	move.l	d2,-(sp)	| need a register
 | |
| 	move.b	d0,d2		| distribute low byte to all byte in word
 | |
| 	lsl.l	#8,d0
 | |
| 	move.b	d2,d0
 | |
| 	move.w	d0,d2
 | |
| 	swap	d0		| rotate 16
 | |
| 	move.w	d2,d0
 | |
| 	|
 | |
| 	move.l	a0,d2		| copy of src
 | |
| 	neg.l	d2		| 1 2 3 ==> 3 2 1
 | |
| 	and.l	#3,d2
 | |
| 	beq	2f		| is aligned
 | |
| 	|
 | |
| 	sub.l	d2,d1		| fix length
 | |
| 	lsr.l	#1,d2		| word align needed?
 | |
| 	bcc	1f
 | |
| 	move.b	d0,(a0)+	| fill byte
 | |
| 1:
 | |
| 	lsr.l	#1,d2		| long align needed?
 | |
| 	bcc	2f
 | |
| 	move.w	d0,(a0)+	| fill word
 | |
| 2:
 | |
| 	move.l	d1,d2		| number of long transfers (at least 3)
 | |
| 	lsr.l	#2,d2
 | |
| 	subq.l	#1,d2
 | |
| 
 | |
| 1:
 | |
| 	move.l	d0,(a0)+	| fill long words
 | |
| .Llset:
 | |
| #if !defined (__mcoldfire__)
 | |
| 	dbra	d2,1b		| loop until done
 | |
| 	sub.l	#0x10000,d2
 | |
| #else
 | |
| 	subq.l	#1,d2
 | |
| #endif
 | |
| 	bpl	1b
 | |
| 	and.l	#3,d1		| residue byte transfers, fixed
 | |
| 	move.l	(sp)+,d2	| restore d2
 | |
| 	bra	.Lbset
 | |
| 
 | |
| 1:
 | |
| 	move.b	d0,(a0)+	| fill residue bytes
 | |
| .Lbset:
 | |
| #if !defined (__mcoldfire__)
 | |
| 	dbra	d1,1b		| loop until done
 | |
| #else
 | |
| 	subq.l	#1,d1
 | |
| 	bpl	1b
 | |
| #endif
 | |
| 	move.l	4(sp),d0	| return value
 | |
| 	rts
 |