226 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			226 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| ! SH5 code Copyright 2002 SuperH Ltd.
 | |
| 
 | |
| #include "asm.h"
 | |
| 
 | |
| ENTRY(strcmp)
 | |
| 
 | |
| #if __SHMEDIA__
 | |
| 	ld.ub	r2,0,r4
 | |
| 	pt/l	quickret0,tr0
 | |
| 	ld.ub	r3,0,r5
 | |
| 	ptabs	r18,tr2
 | |
| 	beqi/u	r4,0,tr0
 | |
| 	ld.ub	r2,1,r6
 | |
| 	bne/u	r4,r5,tr0
 | |
| 	pt/l	quickret1,tr1
 | |
| 	ld.ub	r3,1,r7
 | |
| 	beqi/u	r6,0,tr1
 | |
| 	ld.ub	r2,2,r4
 | |
| 	bne/u	r6,r7,tr1
 | |
| 	ld.ub	r3,2,r5
 | |
| 	beqi/u	r4,0,tr0
 | |
| 	ld.ub	r2,3,r6
 | |
| 	bne/u	r4,r5,tr0
 | |
| 	ld.ub	r3,3,r7
 | |
| 	beqi/u	r6,0,tr1
 | |
| 	ld.ub	r2,4,r4
 | |
| 	bne/u	r6,r7,tr1
 | |
| 	ld.ub	r3,4,r5
 | |
| 	beqi/u	r4,0,tr0
 | |
| 	ld.ub	r2,5,r6
 | |
| 	bne/u	r4,r5,tr0
 | |
| 	ld.ub	r3,5,r7
 | |
| 	beqi/u	r6,0,tr1
 | |
| 	ld.ub	r2,6,r4
 | |
| 	bne/u	r6,r7,tr1
 | |
| 	ld.ub	r3,6,r5
 | |
| 	beqi/u	r4,0,tr0
 | |
| 	ld.ub	r2,7,r6
 | |
| 	bne/u	r4,r5,tr0
 | |
| 	ld.ub	r3,7,r7
 | |
| 	beqi/u	r6,0,tr1
 | |
| 	sub	r3,r2,r3
 | |
| 	bne/u	r6,r7,tr1
 | |
| 
 | |
| 	andi	r2,-8,r2
 | |
| 	add	r3,r2,r3
 | |
| 	ldlo.q	r3,8,r23
 | |
| 	pt	r23_zero,tr0
 | |
| 	shlli	r3,3,r22
 | |
| 	sub	r63,r22,r20
 | |
| 	movi	0x101,r6
 | |
| 	mperm.w	r6,r63,r6
 | |
| 	SHLO	r6,r22,r7
 | |
| 	msubs.ub r7,r23,r8
 | |
| 	pt	loop,tr1
 | |
| 	bnei/u	r8,0,tr0 // r23_zero
 | |
| 	pt	found_zero,tr0
 | |
| 	addi	r3,15,r3
 | |
| 	andi	r3,-8,r3
 | |
| 	sub	r3,r2,r3
 | |
| 	bne/l	r7,r6,tr1 // loop
 | |
| 	/* The strings are aligned to each other.  */
 | |
| 	/* It is possible to have a loop with six cycles / iteration
 | |
| 	   by re-ordering the exit conditions, but then it needs extra
 | |
| 	   time and/or code to sort out the r4 != r5 case.  */
 | |
| 	pt	al_loop,tr1
 | |
| 	pt	al_found_zero,tr0
 | |
| al_loop:
 | |
| 	ld.q	r2,8,r4
 | |
| 	ldx.q	r2,r3,r5
 | |
| 	addi	r2,8,r2
 | |
| 	mcmpeq.b r63,r4,r8
 | |
| 	pt	cmp_quad,tr3
 | |
| 	bnei/u	r8,0,tr0  // al_found_zero
 | |
| 	beq/l	r4,r5,tr1 // al_loop
 | |
| 	blink	tr3,r63   // cmp_quad
 | |
| 
 | |
| 	.balign 8
 | |
| quickret0:
 | |
| 	sub	r4,r5,r2
 | |
| 	blink	tr2,r63
 | |
| quickret1:
 | |
| 	sub	r6,r7,r2
 | |
| 	blink	tr2,r63
 | |
| 
 | |
| loop:
 | |
| 	ld.q	r2,8,r4
 | |
| 	ldx.q	r2,r3,r19
 | |
| 	addi	r2,8,r2
 | |
| 	msubs.ub r6,r4,r8
 | |
| 	mcmpeq.b r63,r19,r9
 | |
| 	SHHI	r19,r20,r21
 | |
| 	or	r21,r23,r5
 | |
| 	SHLO	r19,r22,r23
 | |
| 	bne/u	r8,r9,tr0 // found_zero
 | |
| 	beq/l	r4,r5,tr1 // loop
 | |
| cmp_quad:
 | |
| #ifdef __LITTLE_ENDIAN__
 | |
| 	byterev r4,r4
 | |
| 	byterev r5,r5
 | |
| #endif
 | |
| 	cmpgtu	r4,r5,r6
 | |
| 	cmpgtu	r5,r4,r7
 | |
| 	sub	r6,r7,r2
 | |
| 	blink tr2,r63
 | |
| found_zero:
 | |
| 	pt	zero_now,tr0
 | |
| 	pt	cmp_quad,tr1
 | |
| 	SHHI	r9,r20,r7
 | |
| 	bne/u	r8,r7,tr0 // zero_now
 | |
| 	bne/u	r4,r5,tr1 // cmp_quad
 | |
| 	SHLO	r9,r22,r8
 | |
| r23_zero:
 | |
| 	ld.q	r2,8,r4
 | |
| 	add	r23,r63,r5
 | |
| zero_now:
 | |
| al_found_zero:
 | |
| /* We konw that one of the values has at lest one zero, and r8 holds
 | |
|    an 0x01 or 0xff mask for every zero found in one of the operands.
 | |
|    If both operands have the first zero in the same place, this mask
 | |
|    allows us to truncate the comparison to the valid bytes in the
 | |
|    strings.  If the first zero is in different places, it doesn't
 | |
|    matter if some invalid bytes are included, since the comparison
 | |
|    of the zero with the non-zero will determine the outcome.  */
 | |
| #ifdef __LITTLE_ENDIAN__
 | |
| 	shlli	r8,8,r8
 | |
| 	addi	r8,-1,r9
 | |
| 	andc	r9,r8,r8
 | |
| 	and	r8,r4,r4
 | |
| 	and	r8,r5,r5
 | |
| #else
 | |
| 	shlri r8,1,r8
 | |
| 	nsb	r8,r8
 | |
| 	addi	r8,8,r8
 | |
| 	andi	r8,56,r8
 | |
| 	sub	r63,r8,r8
 | |
| 	shlrd	r4,r8,r4
 | |
| 	shlrd	r5,r8,r5
 | |
| #endif
 | |
| #ifdef __LITTLE_ENDIAN__
 | |
| 	byterev r4,r4
 | |
| 	byterev r5,r5
 | |
| #endif
 | |
| 	cmpgtu	r4,r5,r6
 | |
| 	cmpgtu	r5,r4,r7
 | |
| 	sub	r6,r7,r2
 | |
| 	blink tr2,r63
 | |
| 
 | |
| #else /* ! __SHMEDIA__, i.e. SH 1..4 / SHcompact */
 | |
| 
 | |
| #ifdef __SH5__
 | |
| #define STR1 r2
 | |
| #define STR2 r3
 | |
| #define RESULT r2
 | |
| #define TMP r4
 | |
| #else
 | |
| ! Entry: r4: string1
 | |
| !        r5: string2
 | |
| ! Exit:  r0: result
 | |
| !        r1-r2,r4-r5: clobbered
 | |
| #define STR1 r4
 | |
| #define STR2 r5
 | |
| #define RESULT r0
 | |
| #define TMP r2
 | |
| #endif /* __SH5__ */
 | |
| 
 | |
| 	mov     STR1,r0
 | |
| 	or      STR2,r0
 | |
| 	tst	#3,r0
 | |
| 	bf	L_setup_char_loop
 | |
| 	mov	#0,r0
 | |
| #ifdef DELAYED_BRANCHES
 | |
| 	mov.l	@STR1+,r1
 | |
| 	.align  2
 | |
| Longword_loop:
 | |
| 	mov.l	@STR2+,TMP
 | |
| 	cmp/str	r0,r1
 | |
| 	bt	Longword_loop_end
 | |
| 	cmp/eq	r1,TMP
 | |
| 	bt.s	Longword_loop
 | |
| 	mov.l	@STR1+,r1
 | |
| 	add #-4, STR1
 | |
| Longword_loop_end:
 | |
| 	add #-4, STR1
 | |
| 	add #-4, STR2
 | |
| L_setup_char_loop:
 | |
| 	mov.b	@STR1+,r0
 | |
| 	.align  2
 | |
| L_char_loop:
 | |
| 	mov.b	@STR2+,r1
 | |
| 	tst	r0,r0
 | |
| 	bt	L_return
 | |
| 	cmp/eq	r0,r1
 | |
| 	bt.s L_char_loop
 | |
| 	mov.b	@STR1+,r0
 | |
| 	add	#-2,STR1
 | |
| 	mov.b	@STR1,r0
 | |
| #else /* ! DELAYED_BRANCHES */
 | |
| 	.align  2
 | |
| Longword_loop:
 | |
| 	mov.l	@r4+,r1
 | |
| 	mov.l	@r5+,r2
 | |
| 	cmp/str	r0,r1
 | |
| 	bt	Longword_loop_end
 | |
| 	cmp/eq	r1,r2
 | |
| 	bt	Longword_loop
 | |
| Longword_loop_end:
 | |
| 	add #-4, r4
 | |
| 	add #-4, r5
 | |
| 	.align  2
 | |
| L_setup_char_loop:
 | |
| L_char_loop:
 | |
| 	mov.b	@r4+,r0
 | |
| 	mov.b	@r5+,r1
 | |
| 	tst	r0,r0
 | |
| 	bt	L_return
 | |
| 	cmp/eq	r0,r1
 | |
| 	bt L_char_loop
 | |
| #endif
 | |
| L_return:
 | |
| 	extu.b	r0,RESULT
 | |
| 	extu.b	r1,r1
 | |
| 	rts
 | |
| 	sub	r1,RESULT
 | |
| #endif /* ! __SHMEDIA__ */
 |