From 2ce884182c6752cf9bcfd72399d7e9e4c5048da0 Mon Sep 17 00:00:00 2001
From: Nick Clifton <nickc@redhat.com>
Date: Fri, 25 May 2001 21:17:06 +0000
Subject: [PATCH] Fix  bug when both pointers have matching, non-word
 alignment, and the length is <= 4 but more than enough to move them over a
 word boundary. Add comments explaining what each instruction does.

---
 newlib/ChangeLog                    |   7 ++
 newlib/libc/machine/xscale/memcmp.c | 173 ++++++++++++++--------------
 2 files changed, 95 insertions(+), 85 deletions(-)

diff --git a/newlib/ChangeLog b/newlib/ChangeLog
index 79f59fb42..5c23dece0 100644
--- a/newlib/ChangeLog
+++ b/newlib/ChangeLog
@@ -1,3 +1,10 @@
+2001-05-25  Nick Clifton  <nickc@cambridge.redhat.com>
+
+	* libc/machine/xscale/memcmp.c: Fix bug when both pointers have
+	matching, non-word alignment, and the length is <= 4 but more than
+	enough to move them over a word boundary.
+	Add comments explaining what each instruction does.
+
 Mon May  7 20:39:25 2001  Christopher Faylor <cgf@cygnus.com>
 
 	* libc/include/sys/stat.h: Revert March 3, Cygwin change.
diff --git a/newlib/libc/machine/xscale/memcmp.c b/newlib/libc/machine/xscale/memcmp.c
index 15ca9b1d1..1b85af45d 100644
--- a/newlib/libc/machine/xscale/memcmp.c
+++ b/newlib/libc/machine/xscale/memcmp.c
@@ -8,99 +8,102 @@
 #include "xscale.h"
 
 int
-memcmp (const void *s1, const void *s2, size_t len)
+memcmp (const void * s1, const void * s2, size_t len)
 {
   int result;
   asm (
 #ifndef __OPTIMIZE_SIZE__ 
 "		
-	cmp	%2, #0x3
-	bls	6f
-	and	r2, %0, #0x3
-	and	r3, %1, #0x3
-	cmp	r2, r3
-	bne	6f
-	mov	lr, %0
-	mov	r4, %1
-	cmp	r2, #0x0
-	beq	3f
-	b	1f
-0:
-	ldrb	r2, [lr], #1	@ zero_extendqisi2
-"	PRELOADSTR("lr") "
-	ldrb	r3, [r4], #1	@ zero_extendqisi2
-"	PRELOADSTR("r4") "
-	cmp	r2, r3
-	bne	5f
-	tst	lr, #0x3
-	beq	3f
-1:
-	sub	%2, %2, #1
-	cmn	%2, #0x1
-	bne	0b
-	b	4f
+	cmp	%2, #0x3	@ Is the length a multiple of four ?
+	bls	6f		@ no  = goto SLOW CHECK
+	and	r2, %0, #0x3	@ get alignment of first pointer
+	and	r3, %1, #0x3	@ get alignment of second pointer
+	cmp	r2, r3		@ Do the two pointers share the same alignment ?
+	bne	6f		@ no = goto SLOW CHECK
+	mov	lr, %0		@ copy first pointer into LR
+	mov	r4, %1		@ copy second pointer into R4
+	cmp	r2, #0x0	@ Are we comparing word aligned pointers ?
+	beq	3f		@ yes = goto START WORD CHECK LOOP
+	b	1f		@ jump to LOOP TEST
+0:			       @ LOOP START
+	ldrb	r2, [lr], #1	@ load byte from LR, post inc.
+"	PRELOADSTR("lr") "	@ preload 
+	ldrb	r3, [r4], #1	@ load byte from R4, post inc.
+"	PRELOADSTR("r4") "	@ preload
+	cmp	r2, r3		@ are the two bytes the same ?
+	bne	5f		@ no = goto EXIT
+	tst	lr, #0x3	@ has the LR become word aligned ?
+	bne     1f		@ no = skip the next test
+	cmp     %2, #4		@ is the count >= 4 ?
+	bhs     3f		@ yes = goto START WORD CHECK LOOP
+1:			       @ LOOP TEST
+	sub	%2, %2, #1	@ decrement count by one
+	cmn	%2, #0x1	@ has the count reached -1 ?
+	bne	0b		@ no = loop back to LOOP START
+	b	4f		@ goto PASS END
 
-0:
-	cmp	%2, #0x7
-	bls	3f
-	ldmia	lr,{r2, r3}
-	ldmia	r4,{r5, r6}
-	sub	%2, %2, #0x4
-	cmp	r2, r5
-	bne	1f
-	sub	%2, %2, #0x4
-	add	lr, lr, #0x4
-	add	r4, r4, #0x4
-	cmp	r3, r6
-	beq	0b
-1:
-	add	%2, %2, #0x4
-	sub	%0, lr, #0x4
-	sub	%1, r4, #0x4
-	b	6f
-3:
-	cmp	%2, #0x3
-	bls	1f
-	ldr	r2, [lr], #4
-	ldr	r3, [r4], #4
-	sub	%2, %2, #4
-	cmp	r2, r3
-	bne	1f
-0:
-	cmp	%2, #0x3
-	bls	1f
-	ldr	r2, [lr], #4
-"	PRELOADSTR("lr") "
-	ldr	r3, [r4], #4
-"	PRELOADSTR("r4") "
-	sub	%2, %2, #4
-	cmp	r2, r3
-	beq	0b
-1:
-	sub	%0, lr, #0x4
-	sub	%1, r4, #0x4
-	add	%2, %2, #4
-"
+0:			       @ ??
+	cmp	%2, #0x7	@ Is the count a multiple of 8 ?
+	bls	3f		@ no = goto ???
+	ldmia	lr,{r2, r3}	@ get two words from first pointer, post inc
+	ldmia	r4,{r5, r6}	@ get two words from second pointer, post inc
+	sub	%2, %2, #0x4	@ decrement count by 4
+	cmp	r2, r5		@ has the count reached ????
+	bne	1f		@ no = goto 
+	sub	%2, %2, #0x4	@ decrement the count by 4
+	add	lr, lr, #0x4	@ add 4 to first pointer
+	add	r4, r4, #0x4	@ add 4 to second pointer
+	cmp	r3, r6		@ ???
+	beq	0b		@ goto ???
+1:			       @ ??
+	add	%2, %2, #0x4	@ Add four to count
+	sub	%0, lr, #0x4	@ decrement first pointer by 4
+	sub	%1, r4, #0x4	@ decrement second pointer by 4
+	b	6f		@ goto SLOW CHECK
+
+3:			       @ START WORD CHECK LOOP
+	cmp	%2, #0x3	@ is the count <= 3 ?
+	bls	1f		@ yes = goto CHECK BYTES BY HAND
+	ldr	r2, [lr], #4	@ get word from LR, post inc
+	ldr	r3, [r4], #4	@ get word from R4, post inc
+	sub	%2, %2, #4	@ decrement count by 4
+	cmp	r2, r3		@ are the two words the same ?
+	bne	1f		@ no = goto CHECK WORD CONTENTS
+0:			       @ WORD CHECK LOOP
+	cmp	%2, #0x3	@ is the count <= 3 ?
+	bls	1f		@ yes = goto CHECK BYTES BY HAND
+	ldr	r2, [lr], #4	@ load word from LR, post inc
+"	PRELOADSTR("lr") "	@ preload
+	ldr	r3, [r4], #4	@ load word from R4, post inc
+"	PRELOADSTR("r4") "	@ preload
+	sub	%2, %2, #4	@ decrement count by 4
+	cmp	r2, r3		@ are the two words the same ?
+	beq	0b		@ yes = goto WORD CHECK LOOP
+1:			       @ CHECK BYTES BY HAND
+	sub	%0, lr, #0x4	@ move LR back a word and put into first pointer
+	sub	%1, r4, #0x4	@ move R4 back a word and put into second pointer
+	add	%2, %2, #4	@ increment the count by 4
+				@ fall through into SLOW CHECK"
 #endif /* !__OPTIMIZE_SIZE__ */
 "
-6:				
-	sub	%2, %2, #1
-	cmn	%2, #0x1
-	beq	4f
-0:
-	ldrb	r2, [%0], #1	@ zero_extendqisi2
-"	PRELOADSTR("%0") "
-	ldrb	r3, [%1], #1	@ zero_extendqisi2
-"	PRELOADSTR("%1") "
-	cmp	r2, r3
-	bne	5f
-	sub	%2, %2, #1
-	cmn	%2, #0x1
-	bne	0b
-4:		
-	mov	r3, r2
-5:		
-	rsb	%0, r3, r2"
+6:			       @ SLOW CHECK
+	sub	%2, %2, #1	@ Decrement the count by one
+	cmn	%2, #0x1	@ Has the count reached -1 ?
+	beq	4f		@ Yes - we are finished, goto PASS END
+0:			       @ LOOP1
+	ldrb	r2, [%0], #1	@ get byte from first pointer
+"	PRELOADSTR("%0") "	@ preload first pointer
+	ldrb	r3, [%1], #1	@ get byte from second pointer
+"	PRELOADSTR("%1") "	@ preload second pointer
+	cmp	r2, r3		@ compare the two loaded bytes
+	bne	5f		@ if they are not equal goto EXIT
+	sub	%2, %2, #1	@ decremented count by 1
+	cmn	%2, #0x1	@ has the count reached -1 ?
+	bne	0b		@ no = then go back to LOOP1
+4:			       @ PASS END
+	mov	r3, r2		@ Default return value is 0
+5:			       @ EXIT
+	rsb	%0, r3, r2	@ return difference between last two bytes loaded"
        : "=r" (result), "=&r" (s2), "=&r" (len)
        : "0" (s1), "1" (s2), "2" (len)
        : "r2", "r3", "r4", "r5", "r6", "cc", "lr");