226 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			226 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
! SH5 code Copyright 2002 SuperH Ltd.
 | 
						|
 | 
						|
#include "asm.h"
 | 
						|
 | 
						|
ENTRY(strcmp)
 | 
						|
 | 
						|
#if __SHMEDIA__
 | 
						|
	ld.ub	r2,0,r4
 | 
						|
	pt/l	quickret0,tr0
 | 
						|
	ld.ub	r3,0,r5
 | 
						|
	ptabs	r18,tr2
 | 
						|
	beqi/u	r4,0,tr0
 | 
						|
	ld.ub	r2,1,r6
 | 
						|
	bne/u	r4,r5,tr0
 | 
						|
	pt/l	quickret1,tr1
 | 
						|
	ld.ub	r3,1,r7
 | 
						|
	beqi/u	r6,0,tr1
 | 
						|
	ld.ub	r2,2,r4
 | 
						|
	bne/u	r6,r7,tr1
 | 
						|
	ld.ub	r3,2,r5
 | 
						|
	beqi/u	r4,0,tr0
 | 
						|
	ld.ub	r2,3,r6
 | 
						|
	bne/u	r4,r5,tr0
 | 
						|
	ld.ub	r3,3,r7
 | 
						|
	beqi/u	r6,0,tr1
 | 
						|
	ld.ub	r2,4,r4
 | 
						|
	bne/u	r6,r7,tr1
 | 
						|
	ld.ub	r3,4,r5
 | 
						|
	beqi/u	r4,0,tr0
 | 
						|
	ld.ub	r2,5,r6
 | 
						|
	bne/u	r4,r5,tr0
 | 
						|
	ld.ub	r3,5,r7
 | 
						|
	beqi/u	r6,0,tr1
 | 
						|
	ld.ub	r2,6,r4
 | 
						|
	bne/u	r6,r7,tr1
 | 
						|
	ld.ub	r3,6,r5
 | 
						|
	beqi/u	r4,0,tr0
 | 
						|
	ld.ub	r2,7,r6
 | 
						|
	bne/u	r4,r5,tr0
 | 
						|
	ld.ub	r3,7,r7
 | 
						|
	beqi/u	r6,0,tr1
 | 
						|
	sub	r3,r2,r3
 | 
						|
	bne/u	r6,r7,tr1
 | 
						|
 | 
						|
	andi	r2,-8,r2
 | 
						|
	add	r3,r2,r3
 | 
						|
	ldlo.q	r3,8,r23
 | 
						|
	pt	r23_zero,tr0
 | 
						|
	shlli	r3,3,r22
 | 
						|
	sub	r63,r22,r20
 | 
						|
	movi	0x101,r6
 | 
						|
	mperm.w	r6,r63,r6
 | 
						|
	SHLO	r6,r22,r7
 | 
						|
	msubs.ub r7,r23,r8
 | 
						|
	pt	loop,tr1
 | 
						|
	bnei/u	r8,0,tr0 // r23_zero
 | 
						|
	pt	found_zero,tr0
 | 
						|
	addi	r3,15,r3
 | 
						|
	andi	r3,-8,r3
 | 
						|
	sub	r3,r2,r3
 | 
						|
	bne/l	r7,r6,tr1 // loop
 | 
						|
	/* The strings are aligned to each other.  */
 | 
						|
	/* It is possible to have a loop with six cycles / iteration
 | 
						|
	   by re-ordering the exit conditions, but then it needs extra
 | 
						|
	   time and/or code to sort out the r4 != r5 case.  */
 | 
						|
	pt	al_loop,tr1
 | 
						|
	pt	al_found_zero,tr0
 | 
						|
al_loop:
 | 
						|
	ld.q	r2,8,r4
 | 
						|
	ldx.q	r2,r3,r5
 | 
						|
	addi	r2,8,r2
 | 
						|
	mcmpeq.b r63,r4,r8
 | 
						|
	pt	cmp_quad,tr3
 | 
						|
	bnei/u	r8,0,tr0  // al_found_zero
 | 
						|
	beq/l	r4,r5,tr1 // al_loop
 | 
						|
	blink	tr3,r63   // cmp_quad
 | 
						|
 | 
						|
	.balign 8
 | 
						|
quickret0:
 | 
						|
	sub	r4,r5,r2
 | 
						|
	blink	tr2,r63
 | 
						|
quickret1:
 | 
						|
	sub	r6,r7,r2
 | 
						|
	blink	tr2,r63
 | 
						|
 | 
						|
loop:
 | 
						|
	ld.q	r2,8,r4
 | 
						|
	ldx.q	r2,r3,r19
 | 
						|
	addi	r2,8,r2
 | 
						|
	msubs.ub r6,r4,r8
 | 
						|
	mcmpeq.b r63,r19,r9
 | 
						|
	SHHI	r19,r20,r21
 | 
						|
	or	r21,r23,r5
 | 
						|
	SHLO	r19,r22,r23
 | 
						|
	bne/u	r8,r9,tr0 // found_zero
 | 
						|
	beq/l	r4,r5,tr1 // loop
 | 
						|
cmp_quad:
 | 
						|
#ifdef __LITTLE_ENDIAN__
 | 
						|
	byterev r4,r4
 | 
						|
	byterev r5,r5
 | 
						|
#endif
 | 
						|
	cmpgtu	r4,r5,r6
 | 
						|
	cmpgtu	r5,r4,r7
 | 
						|
	sub	r6,r7,r2
 | 
						|
	blink tr2,r63
 | 
						|
found_zero:
 | 
						|
	pt	zero_now,tr0
 | 
						|
	pt	cmp_quad,tr1
 | 
						|
	SHHI	r9,r20,r7
 | 
						|
	bne/u	r8,r7,tr0 // zero_now
 | 
						|
	bne/u	r4,r5,tr1 // cmp_quad
 | 
						|
	SHLO	r9,r22,r8
 | 
						|
r23_zero:
 | 
						|
	ld.q	r2,8,r4
 | 
						|
	add	r23,r63,r5
 | 
						|
zero_now:
 | 
						|
al_found_zero:
 | 
						|
/* We konw that one of the values has at lest one zero, and r8 holds
 | 
						|
   an 0x01 or 0xff mask for every zero found in one of the operands.
 | 
						|
   If both operands have the first zero in the same place, this mask
 | 
						|
   allows us to truncate the comparison to the valid bytes in the
 | 
						|
   strings.  If the first zero is in different places, it doesn't
 | 
						|
   matter if some invalid bytes are included, since the comparison
 | 
						|
   of the zero with the non-zero will determine the outcome.  */
 | 
						|
#ifdef __LITTLE_ENDIAN__
 | 
						|
	shlli	r8,8,r8
 | 
						|
	addi	r8,-1,r9
 | 
						|
	andc	r9,r8,r8
 | 
						|
	and	r8,r4,r4
 | 
						|
	and	r8,r5,r5
 | 
						|
#else
 | 
						|
	shlri r8,1,r8
 | 
						|
	nsb	r8,r8
 | 
						|
	addi	r8,8,r8
 | 
						|
	andi	r8,56,r8
 | 
						|
	sub	r63,r8,r8
 | 
						|
	shlrd	r4,r8,r4
 | 
						|
	shlrd	r5,r8,r5
 | 
						|
#endif
 | 
						|
#ifdef __LITTLE_ENDIAN__
 | 
						|
	byterev r4,r4
 | 
						|
	byterev r5,r5
 | 
						|
#endif
 | 
						|
	cmpgtu	r4,r5,r6
 | 
						|
	cmpgtu	r5,r4,r7
 | 
						|
	sub	r6,r7,r2
 | 
						|
	blink tr2,r63
 | 
						|
 | 
						|
#else /* ! __SHMEDIA__, i.e. SH 1..4 / SHcompact */
 | 
						|
 | 
						|
#ifdef __SH5__
 | 
						|
#define STR1 r2
 | 
						|
#define STR2 r3
 | 
						|
#define RESULT r2
 | 
						|
#define TMP r4
 | 
						|
#else
 | 
						|
! Entry: r4: string1
 | 
						|
!        r5: string2
 | 
						|
! Exit:  r0: result
 | 
						|
!        r1-r2,r4-r5: clobbered
 | 
						|
#define STR1 r4
 | 
						|
#define STR2 r5
 | 
						|
#define RESULT r0
 | 
						|
#define TMP r2
 | 
						|
#endif /* __SH5__ */
 | 
						|
 | 
						|
	mov     STR1,r0
 | 
						|
	or      STR2,r0
 | 
						|
	tst	#3,r0
 | 
						|
	bf	L_setup_char_loop
 | 
						|
	mov	#0,r0
 | 
						|
#ifdef DELAYED_BRANCHES
 | 
						|
	mov.l	@STR1+,r1
 | 
						|
	.align  2
 | 
						|
Longword_loop:
 | 
						|
	mov.l	@STR2+,TMP
 | 
						|
	cmp/str	r0,r1
 | 
						|
	bt	Longword_loop_end
 | 
						|
	cmp/eq	r1,TMP
 | 
						|
	bt.s	Longword_loop
 | 
						|
	mov.l	@STR1+,r1
 | 
						|
	add #-4, STR1
 | 
						|
Longword_loop_end:
 | 
						|
	add #-4, STR1
 | 
						|
	add #-4, STR2
 | 
						|
L_setup_char_loop:
 | 
						|
	mov.b	@STR1+,r0
 | 
						|
	.align  2
 | 
						|
L_char_loop:
 | 
						|
	mov.b	@STR2+,r1
 | 
						|
	tst	r0,r0
 | 
						|
	bt	L_return
 | 
						|
	cmp/eq	r0,r1
 | 
						|
	bt.s L_char_loop
 | 
						|
	mov.b	@STR1+,r0
 | 
						|
	add	#-2,STR1
 | 
						|
	mov.b	@STR1,r0
 | 
						|
#else /* ! DELAYED_BRANCHES */
 | 
						|
	.align  2
 | 
						|
Longword_loop:
 | 
						|
	mov.l	@r4+,r1
 | 
						|
	mov.l	@r5+,r2
 | 
						|
	cmp/str	r0,r1
 | 
						|
	bt	Longword_loop_end
 | 
						|
	cmp/eq	r1,r2
 | 
						|
	bt	Longword_loop
 | 
						|
Longword_loop_end:
 | 
						|
	add #-4, r4
 | 
						|
	add #-4, r5
 | 
						|
	.align  2
 | 
						|
L_setup_char_loop:
 | 
						|
L_char_loop:
 | 
						|
	mov.b	@r4+,r0
 | 
						|
	mov.b	@r5+,r1
 | 
						|
	tst	r0,r0
 | 
						|
	bt	L_return
 | 
						|
	cmp/eq	r0,r1
 | 
						|
	bt L_char_loop
 | 
						|
#endif
 | 
						|
L_return:
 | 
						|
	extu.b	r0,RESULT
 | 
						|
	extu.b	r1,r1
 | 
						|
	rts
 | 
						|
	sub	r1,RESULT
 | 
						|
#endif /* ! __SHMEDIA__ */
 |