From 59be22909b8ed45624a24fe9952d69a1280bd858 Mon Sep 17 00:00:00 2001 From: Jeff Johnston Date: Sat, 20 Apr 2002 00:29:51 +0000 Subject: [PATCH] 2002-04-19 Bill Siegmund * libc/machine/xscale/memchr.c: Don't use multi-line strings. * libc/machine/xscale/memcmp.c: Ditto. * libc/machine/xscale/memcpy.c: Ditto. * libc/machine/xscale/memmove.c: Ditto. * libc/machine/xscale/memset.c: Ditto. * libc/machine/xscale/strchr.c: Ditto. * libc/machine/xscale/strcmp.c: Ditto. * libc/machine/xscale/strcpy.c: Ditto. * libc/machine/xscale/strlen.c: Ditto. --- newlib/ChangeLog | 12 ++ newlib/libc/machine/xscale/memchr.c | 82 ++++++------- newlib/libc/machine/xscale/memcmp.c | 176 +++++++++++++-------------- newlib/libc/machine/xscale/memcpy.c | 130 ++++++++++---------- newlib/libc/machine/xscale/memmove.c | 130 ++++++++++---------- newlib/libc/machine/xscale/memset.c | 122 +++++++++---------- newlib/libc/machine/xscale/strchr.c | 38 +++--- newlib/libc/machine/xscale/strcmp.c | 94 +++++++------- newlib/libc/machine/xscale/strcpy.c | 38 +++--- newlib/libc/machine/xscale/strlen.c | 94 +++++++------- 10 files changed, 464 insertions(+), 452 deletions(-) diff --git a/newlib/ChangeLog b/newlib/ChangeLog index eccba1b74..28aa4de3f 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,15 @@ +2002-04-19 Bill Siegmund + + * libc/machine/xscale/memchr.c: Don't use multi-line strings. + * libc/machine/xscale/memcmp.c: Ditto. + * libc/machine/xscale/memcpy.c: Ditto. + * libc/machine/xscale/memmove.c: Ditto. + * libc/machine/xscale/memset.c: Ditto. + * libc/machine/xscale/strchr.c: Ditto. + * libc/machine/xscale/strcmp.c: Ditto. + * libc/machine/xscale/strcpy.c: Ditto. + * libc/machine/xscale/strlen.c: Ditto. + 2002-04-19 Alexandre Oliva * libc/include/sys/config.h: Remove include of . diff --git a/newlib/libc/machine/xscale/memchr.c b/newlib/libc/machine/xscale/memchr.c index fc24bf66e..9bf18268b 100644 --- a/newlib/libc/machine/xscale/memchr.c +++ b/newlib/libc/machine/xscale/memchr.c @@ -42,47 +42,47 @@ memchr (const void *start, int c, size_t len) R6 = 0x80808080 */ asm ( - "mov r6, #0x80 - add r6, r6, #0x8000 - add r6, r6, r6, lsl #16 - mvn r7, r6, lsl #1 - -0: - cmp %1, #0x7 - bls 1f - - ldmia %0!, { r3, r9 } -" PRELOADSTR ("%0") " - sub %1, %1, #8 - eor r3, r3, %2 - eor r9, r9, %2 - add r2, r3, r7 - add r8, r9, r7 - bic r2, r2, r3 - bic r8, r8, r9 - and r1, r2, r6 - and r9, r8, r6 - orrs r1, r1, r9 - beq 0b - - add %1, %1, #8 - sub %0, %0, #8 -1: - cmp %1, #0x3 - bls 2f - - ldr r3, [%0], #4 -" PRELOADSTR ("%0") " - sub %1, %1, #4 - eor r3, r3, %2 - add r2, r3, r7 - bic r2, r2, r3 - ands r1, r2, r6 - beq 1b - - sub %0, %0, #4 - add %1, %1, #4 -2: + "mov r6, #0x80\n\ + add r6, r6, #0x8000\n\ + add r6, r6, r6, lsl #16\n\ + mvn r7, r6, lsl #1\n\ +\n\ +0:\n\ + cmp %1, #0x7\n\ + bls 1f\n\ +\n\ + ldmia %0!, { r3, r9 }\n\ +" PRELOADSTR ("%0") "\n\ + sub %1, %1, #8\n\ + eor r3, r3, %2\n\ + eor r9, r9, %2\n\ + add r2, r3, r7\n\ + add r8, r9, r7\n\ + bic r2, r2, r3\n\ + bic r8, r8, r9\n\ + and r1, r2, r6\n\ + and r9, r8, r6\n\ + orrs r1, r1, r9\n\ + beq 0b\n\ +\n\ + add %1, %1, #8\n\ + sub %0, %0, #8\n\ +1:\n\ + cmp %1, #0x3\n\ + bls 2f\n\ +\n\ + ldr r3, [%0], #4\n\ +" PRELOADSTR ("%0") "\n\ + sub %1, %1, #4\n\ + eor r3, r3, %2\n\ + add r2, r3, r7\n\ + bic r2, r2, r3\n\ + ands r1, r2, r6\n\ + beq 1b\n\ +\n\ + sub %0, %0, #4\n\ + add %1, %1, #4\n\ +2:\n\ " : "=&r" (str), "=&r" (len) : "r" (c2), "0" (str), "1" (len) diff --git a/newlib/libc/machine/xscale/memcmp.c b/newlib/libc/machine/xscale/memcmp.c index 1b85af45d..c26bcd0b1 100644 --- a/newlib/libc/machine/xscale/memcmp.c +++ b/newlib/libc/machine/xscale/memcmp.c @@ -13,96 +13,96 @@ memcmp (const void * s1, const void * s2, size_t len) int result; asm ( #ifndef __OPTIMIZE_SIZE__ -" - cmp %2, #0x3 @ Is the length a multiple of four ? - bls 6f @ no = goto SLOW CHECK - and r2, %0, #0x3 @ get alignment of first pointer - and r3, %1, #0x3 @ get alignment of second pointer - cmp r2, r3 @ Do the two pointers share the same alignment ? - bne 6f @ no = goto SLOW CHECK - mov lr, %0 @ copy first pointer into LR - mov r4, %1 @ copy second pointer into R4 - cmp r2, #0x0 @ Are we comparing word aligned pointers ? - beq 3f @ yes = goto START WORD CHECK LOOP - b 1f @ jump to LOOP TEST -0: @ LOOP START - ldrb r2, [lr], #1 @ load byte from LR, post inc. -" PRELOADSTR("lr") " @ preload - ldrb r3, [r4], #1 @ load byte from R4, post inc. -" PRELOADSTR("r4") " @ preload - cmp r2, r3 @ are the two bytes the same ? - bne 5f @ no = goto EXIT - tst lr, #0x3 @ has the LR become word aligned ? - bne 1f @ no = skip the next test - cmp %2, #4 @ is the count >= 4 ? - bhs 3f @ yes = goto START WORD CHECK LOOP -1: @ LOOP TEST - sub %2, %2, #1 @ decrement count by one - cmn %2, #0x1 @ has the count reached -1 ? - bne 0b @ no = loop back to LOOP START - b 4f @ goto PASS END - -0: @ ?? - cmp %2, #0x7 @ Is the count a multiple of 8 ? - bls 3f @ no = goto ??? - ldmia lr,{r2, r3} @ get two words from first pointer, post inc - ldmia r4,{r5, r6} @ get two words from second pointer, post inc - sub %2, %2, #0x4 @ decrement count by 4 - cmp r2, r5 @ has the count reached ???? - bne 1f @ no = goto - sub %2, %2, #0x4 @ decrement the count by 4 - add lr, lr, #0x4 @ add 4 to first pointer - add r4, r4, #0x4 @ add 4 to second pointer - cmp r3, r6 @ ??? - beq 0b @ goto ??? -1: @ ?? - add %2, %2, #0x4 @ Add four to count - sub %0, lr, #0x4 @ decrement first pointer by 4 - sub %1, r4, #0x4 @ decrement second pointer by 4 - b 6f @ goto SLOW CHECK - -3: @ START WORD CHECK LOOP - cmp %2, #0x3 @ is the count <= 3 ? - bls 1f @ yes = goto CHECK BYTES BY HAND - ldr r2, [lr], #4 @ get word from LR, post inc - ldr r3, [r4], #4 @ get word from R4, post inc - sub %2, %2, #4 @ decrement count by 4 - cmp r2, r3 @ are the two words the same ? - bne 1f @ no = goto CHECK WORD CONTENTS -0: @ WORD CHECK LOOP - cmp %2, #0x3 @ is the count <= 3 ? - bls 1f @ yes = goto CHECK BYTES BY HAND - ldr r2, [lr], #4 @ load word from LR, post inc -" PRELOADSTR("lr") " @ preload - ldr r3, [r4], #4 @ load word from R4, post inc -" PRELOADSTR("r4") " @ preload - sub %2, %2, #4 @ decrement count by 4 - cmp r2, r3 @ are the two words the same ? - beq 0b @ yes = goto WORD CHECK LOOP -1: @ CHECK BYTES BY HAND - sub %0, lr, #0x4 @ move LR back a word and put into first pointer - sub %1, r4, #0x4 @ move R4 back a word and put into second pointer - add %2, %2, #4 @ increment the count by 4 +"\n\ + cmp %2, #0x3 @ Is the length a multiple of four ?\n\ + bls 6f @ no = goto SLOW CHECK\n\ + and r2, %0, #0x3 @ get alignment of first pointer\n\ + and r3, %1, #0x3 @ get alignment of second pointer\n\ + cmp r2, r3 @ Do the two pointers share the same alignment ?\n\ + bne 6f @ no = goto SLOW CHECK\n\ + mov lr, %0 @ copy first pointer into LR\n\ + mov r4, %1 @ copy second pointer into R4\n\ + cmp r2, #0x0 @ Are we comparing word aligned pointers ?\n\ + beq 3f @ yes = goto START WORD CHECK LOOP\n\ + b 1f @ jump to LOOP TEST\n\ +0: @ LOOP START\n\ + ldrb r2, [lr], #1 @ load byte from LR, post inc.\n\ +" PRELOADSTR("lr") " @ preload\n\ + ldrb r3, [r4], #1 @ load byte from R4, post inc.\n\ +" PRELOADSTR("r4") " @ preload\n\ + cmp r2, r3 @ are the two bytes the same ?\n\ + bne 5f @ no = goto EXIT\n\ + tst lr, #0x3 @ has the LR become word aligned ?\n\ + bne 1f @ no = skip the next test\n\ + cmp %2, #4 @ is the count >= 4 ?\n\ + bhs 3f @ yes = goto START WORD CHECK LOOP\n\ +1: @ LOOP TEST\n\ + sub %2, %2, #1 @ decrement count by one\n\ + cmn %2, #0x1 @ has the count reached -1 ?\n\ + bne 0b @ no = loop back to LOOP START\n\ + b 4f @ goto PASS END\n\ +\n\ +0: @ ??\n\ + cmp %2, #0x7 @ Is the count a multiple of 8 ?\n\ + bls 3f @ no = goto ???\n\ + ldmia lr,{r2, r3} @ get two words from first pointer, post inc\n\ + ldmia r4,{r5, r6} @ get two words from second pointer, post inc\n\ + sub %2, %2, #0x4 @ decrement count by 4\n\ + cmp r2, r5 @ has the count reached ????\n\ + bne 1f @ no = goto\n\ + sub %2, %2, #0x4 @ decrement the count by 4\n\ + add lr, lr, #0x4 @ add 4 to first pointer\n\ + add r4, r4, #0x4 @ add 4 to second pointer\n\ + cmp r3, r6 @ ???\n\ + beq 0b @ goto ???\n\ +1: @ ??\n\ + add %2, %2, #0x4 @ Add four to count\n\ + sub %0, lr, #0x4 @ decrement first pointer by 4\n\ + sub %1, r4, #0x4 @ decrement second pointer by 4\n\ + b 6f @ goto SLOW CHECK\n\ +\n\ +3: @ START WORD CHECK LOOP\n\ + cmp %2, #0x3 @ is the count <= 3 ?\n\ + bls 1f @ yes = goto CHECK BYTES BY HAND\n\ + ldr r2, [lr], #4 @ get word from LR, post inc\n\ + ldr r3, [r4], #4 @ get word from R4, post inc\n\ + sub %2, %2, #4 @ decrement count by 4\n\ + cmp r2, r3 @ are the two words the same ?\n\ + bne 1f @ no = goto CHECK WORD CONTENTS\n\ +0: @ WORD CHECK LOOP\n\ + cmp %2, #0x3 @ is the count <= 3 ?\n\ + bls 1f @ yes = goto CHECK BYTES BY HAND\n\ + ldr r2, [lr], #4 @ load word from LR, post inc\n\ +" PRELOADSTR("lr") " @ preload\n\ + ldr r3, [r4], #4 @ load word from R4, post inc\n\ +" PRELOADSTR("r4") " @ preload\n\ + sub %2, %2, #4 @ decrement count by 4\n\ + cmp r2, r3 @ are the two words the same ?\n\ + beq 0b @ yes = goto WORD CHECK LOOP\n\ +1: @ CHECK BYTES BY HAND\n\ + sub %0, lr, #0x4 @ move LR back a word and put into first pointer\n\ + sub %1, r4, #0x4 @ move R4 back a word and put into second pointer\n\ + add %2, %2, #4 @ increment the count by 4\n\ @ fall through into SLOW CHECK" #endif /* !__OPTIMIZE_SIZE__ */ -" -6: @ SLOW CHECK - sub %2, %2, #1 @ Decrement the count by one - cmn %2, #0x1 @ Has the count reached -1 ? - beq 4f @ Yes - we are finished, goto PASS END -0: @ LOOP1 - ldrb r2, [%0], #1 @ get byte from first pointer -" PRELOADSTR("%0") " @ preload first pointer - ldrb r3, [%1], #1 @ get byte from second pointer -" PRELOADSTR("%1") " @ preload second pointer - cmp r2, r3 @ compare the two loaded bytes - bne 5f @ if they are not equal goto EXIT - sub %2, %2, #1 @ decremented count by 1 - cmn %2, #0x1 @ has the count reached -1 ? - bne 0b @ no = then go back to LOOP1 -4: @ PASS END - mov r3, r2 @ Default return value is 0 -5: @ EXIT +"\n\ +6: @ SLOW CHECK\n\ + sub %2, %2, #1 @ Decrement the count by one\n\ + cmn %2, #0x1 @ Has the count reached -1 ?\n\ + beq 4f @ Yes - we are finished, goto PASS END\n\ +0: @ LOOP1\n\ + ldrb r2, [%0], #1 @ get byte from first pointer\n\ +" PRELOADSTR("%0") " @ preload first pointer\n\ + ldrb r3, [%1], #1 @ get byte from second pointer\n\ +" PRELOADSTR("%1") " @ preload second pointer\n\ + cmp r2, r3 @ compare the two loaded bytes\n\ + bne 5f @ if they are not equal goto EXIT\n\ + sub %2, %2, #1 @ decremented count by 1\n\ + cmn %2, #0x1 @ has the count reached -1 ?\n\ + bne 0b @ no = then go back to LOOP1\n\ +4: @ PASS END\n\ + mov r3, r2 @ Default return value is 0\n\ +5: @ EXIT\n\ rsb %0, r3, r2 @ return difference between last two bytes loaded" : "=r" (result), "=&r" (s2), "=&r" (len) : "0" (s1), "1" (s2), "2" (len) diff --git a/newlib/libc/machine/xscale/memcpy.c b/newlib/libc/machine/xscale/memcpy.c index 2799fe2bf..1a3297778 100644 --- a/newlib/libc/machine/xscale/memcpy.c +++ b/newlib/libc/machine/xscale/memcpy.c @@ -13,85 +13,85 @@ memcpy (void *dst0, const void *src0, size_t len) int dummy; asm volatile ( #ifndef __OPTIMIZE_SIZE__ - "cmp %2, #0x3 - bls 3f - and lr, %1, #0x3 - and r3, %0, #0x3 - cmp lr, r3 - bne 3f - cmp lr, #0x0 - beq 2f - b 1f -0: - ldrb r3, [%1], #1 + "cmp %2, #0x3\n\ + bls 3f\n\ + and lr, %1, #0x3\n\ + and r3, %0, #0x3\n\ + cmp lr, r3\n\ + bne 3f\n\ + cmp lr, #0x0\n\ + beq 2f\n\ + b 1f\n\ +0:\n\ + ldrb r3, [%1], #1\n\ " PRELOADSTR ("%1") -" - tst %1, #0x3 - strb r3, [%0], #1 - beq 3f -1: - sub %2, %2, #1 - cmn %2, #1 - bne 0b -2: - cmp %2, #0xf - bls 1f -0: - ldmia %1!, { r3, r4, r5, lr } +"\n\ + tst %1, #0x3\n\ + strb r3, [%0], #1\n\ + beq 3f\n\ +1:\n\ + sub %2, %2, #1\n\ + cmn %2, #1\n\ + bne 0b\n\ +2:\n\ + cmp %2, #0xf\n\ + bls 1f\n\ +0:\n\ + ldmia %1!, { r3, r4, r5, lr }\n\ " PRELOADSTR ("%1") -" - - sub %2, %2, #16 - cmp %2, #0xf - stmia %0!, { r3, r4, r5, lr } - bhi 0b -1: - cmp %2, #0x7 - bls 1f -0: - ldmia %1!, { r3, r4 } +"\n\ +\n\ + sub %2, %2, #16\n\ + cmp %2, #0xf\n\ + stmia %0!, { r3, r4, r5, lr }\n\ + bhi 0b\n\ +1:\n\ + cmp %2, #0x7\n\ + bls 1f\n\ +0:\n\ + ldmia %1!, { r3, r4 }\n\ " PRELOADSTR ("%1") -" - - sub %2, %2, #8 - cmp %2, #0x7 - stmia %0!, { r3, r4 } - bhi 0b -1: - cmp %2, #0x3 - bls 3f -0: - sub %2, %2, #4 - ldr r3, [%1], #4 +"\n\ +\n\ + sub %2, %2, #8\n\ + cmp %2, #0x7\n\ + stmia %0!, { r3, r4 }\n\ + bhi 0b\n\ +1:\n\ + cmp %2, #0x3\n\ + bls 3f\n\ +0:\n\ + sub %2, %2, #4\n\ + ldr r3, [%1], #4\n\ " PRELOADSTR ("%1") -" - - cmp %2, #0x3 - str r3, [%0], #4 - bhi 0b +"\n\ +\n\ + cmp %2, #0x3\n\ + str r3, [%0], #4\n\ + bhi 0b\n\ " #endif /* !__OPTIMIZE_SIZE__ */ -" -3: +"\n\ +3:\n\ " PRELOADSTR ("%1") -" - sub %2, %2, #1 - cmn %2, #1 - beq 1f -0: - sub %2, %2, #1 - ldrb r3, [%1], #1 +"\n\ + sub %2, %2, #1\n\ + cmn %2, #1\n\ + beq 1f\n\ +0:\n\ + sub %2, %2, #1\n\ + ldrb r3, [%1], #1\n\ " PRELOADSTR ("%1") -" - cmn %2, #1 - strb r3, [%0], #1 - bne 0b +"\n\ + cmn %2, #1\n\ + strb r3, [%0], #1\n\ + bne 0b\n\ 1:" : "=&r" (dummy), "=&r" (src0), "=&r" (len) : "0" (dst0), "1" (src0), "2" (len) diff --git a/newlib/libc/machine/xscale/memmove.c b/newlib/libc/machine/xscale/memmove.c index 47fbfccdd..04d1a14d2 100644 --- a/newlib/libc/machine/xscale/memmove.c +++ b/newlib/libc/machine/xscale/memmove.c @@ -13,85 +13,85 @@ do_memcpy (void *dst0, const void *src0, size_t len) int dummy; asm volatile ( #ifndef __OPTIMIZE_SIZE__ - "cmp %2, #0x3 - bls 3f - and lr, %1, #0x3 - and r3, %0, #0x3 - cmp lr, r3 - bne 3f - cmp lr, #0x0 - beq 2f - b 1f -0: - ldrb r3, [%1], #1 + "cmp %2, #0x3\n\ + bls 3f\n\ + and lr, %1, #0x3\n\ + and r3, %0, #0x3\n\ + cmp lr, r3\n\ + bne 3f\n\ + cmp lr, #0x0\n\ + beq 2f\n\ + b 1f\n\ +0:\n\ + ldrb r3, [%1], #1\n\ " PRELOADSTR ("%1") -" - tst %1, #0x3 - strb r3, [%0], #1 - beq 3f -1: - sub %2, %2, #1 - cmn %2, #1 - bne 0b -2: - cmp %2, #0xf - bls 1f -0: - ldmia %1!, { r3, r4, r5, lr } +"\n\ + tst %1, #0x3\n\ + strb r3, [%0], #1\n\ + beq 3f\n\ +1:\n\ + sub %2, %2, #1\n\ + cmn %2, #1\n\ + bne 0b\n\ +2:\n\ + cmp %2, #0xf\n\ + bls 1f\n\ +0:\n\ + ldmia %1!, { r3, r4, r5, lr }\n\ " PRELOADSTR ("%1") -" - - sub %2, %2, #16 - cmp %2, #0xf - stmia %0!, { r3, r4, r5, lr } - bhi 0b -1: - cmp %2, #0x7 - bls 1f -0: - ldmia %1!, { r3, r4 } +"\n\ +\n\ + sub %2, %2, #16\n\ + cmp %2, #0xf\n\ + stmia %0!, { r3, r4, r5, lr }\n\ + bhi 0b\n\ +1:\n\ + cmp %2, #0x7\n\ + bls 1f\n\ +0:\n\ + ldmia %1!, { r3, r4 }\n\ " PRELOADSTR ("%1") -" - - sub %2, %2, #8 - cmp %2, #0x7 - stmia %0!, { r3, r4 } - bhi 0b -1: - cmp %2, #0x3 - bls 3f -0: - sub %2, %2, #4 - ldr r3, [%1], #4 +"\n\ +\n\ + sub %2, %2, #8\n\ + cmp %2, #0x7\n\ + stmia %0!, { r3, r4 }\n\ + bhi 0b\n\ +1:\n\ + cmp %2, #0x3\n\ + bls 3f\n\ +0:\n\ + sub %2, %2, #4\n\ + ldr r3, [%1], #4\n\ " PRELOADSTR ("%1") -" - - cmp %2, #0x3 - str r3, [%0], #4 - bhi 0b +"\n\ +\n\ + cmp %2, #0x3\n\ + str r3, [%0], #4\n\ + bhi 0b\n\ " #endif /* !__OPTIMIZE_SIZE__ */ -" -3: +"\n\ +3:\n\ " PRELOADSTR ("%1") -" - sub %2, %2, #1 - cmn %2, #1 - beq 1f -0: - sub %2, %2, #1 - ldrb r3, [%1], #1 +"\n\ + sub %2, %2, #1\n\ + cmn %2, #1\n\ + beq 1f\n\ +0:\n\ + sub %2, %2, #1\n\ + ldrb r3, [%1], #1\n\ " PRELOADSTR ("%1") -" - cmn %2, #1 - strb r3, [%0], #1 - bne 0b +"\n\ + cmn %2, #1\n\ + strb r3, [%0], #1\n\ + bne 0b\n\ 1:" : "=&r" (dummy), "=&r" (src0), "=&r" (len) : "0" (dst0), "1" (src0), "2" (len) diff --git a/newlib/libc/machine/xscale/memset.c b/newlib/libc/machine/xscale/memset.c index ad1fc74bc..4ff8d01e0 100644 --- a/newlib/libc/machine/xscale/memset.c +++ b/newlib/libc/machine/xscale/memset.c @@ -14,69 +14,69 @@ memset (void *dst, int c, size_t len) asm volatile ("tst %0, #0x3" #ifndef __OPTIMIZE_SIZE__ -" - beq 1f - b 2f -0: - strb %1, [%0], #1 - tst %0, #0x3 - beq 1f -2: - movs r3, %2 - sub %2, %2, #1 - bne 0b -# At this point we know that %2 == len == -1 (since the SUB has already taken -# place). If we fall through to the 1: label (as the code used to do), the -# CMP will detect this negative value and branch to the 2: label. This will -# test %2 again, but this time against 0. The test will fail and the loop -# at 2: will go on for (almost) ever. Hence the explicit branch to the end -# of the hand written assembly code. - b 4f -1: - cmp %2, #0x3 - bls 2f - and %1, %1, #0xff - orr lr, %1, %1, asl #8 - cmp %2, #0xf - orr lr, lr, lr, asl #16 - bls 1f - mov r3, lr - mov r4, lr - mov r5, lr -0: - sub %2, %2, #16 - stmia %0!, { r3, r4, r5, lr } - cmp %2, #0xf - bhi 0b -1: - cmp %2, #0x7 - bls 1f - mov r3, lr -0: - sub %2, %2, #8 - stmia %0!, { r3, lr } - cmp %2, #0x7 - bhi 0b -1: - cmp %2, #0x3 - bls 2f -0: - sub %2, %2, #4 - str lr, [%0], #4 - cmp %2, #0x3 - bhi 0b +"\n\ + beq 1f\n\ + b 2f\n\ +0:\n\ + strb %1, [%0], #1\n\ + tst %0, #0x3\n\ + beq 1f\n\ +2:\n\ + movs r3, %2\n\ + sub %2, %2, #1\n\ + bne 0b\n\ +# At this point we know that %2 == len == -1 (since the SUB has already taken\n\ +# place). If we fall through to the 1: label (as the code used to do), the\n\ +# CMP will detect this negative value and branch to the 2: label. This will\n\ +# test %2 again, but this time against 0. The test will fail and the loop\n\ +# at 2: will go on for (almost) ever. Hence the explicit branch to the end\n\ +# of the hand written assembly code.\n\ + b 4f\n\ +1:\n\ + cmp %2, #0x3\n\ + bls 2f\n\ + and %1, %1, #0xff\n\ + orr lr, %1, %1, asl #8\n\ + cmp %2, #0xf\n\ + orr lr, lr, lr, asl #16\n\ + bls 1f\n\ + mov r3, lr\n\ + mov r4, lr\n\ + mov r5, lr\n\ +0:\n\ + sub %2, %2, #16\n\ + stmia %0!, { r3, r4, r5, lr }\n\ + cmp %2, #0xf\n\ + bhi 0b\n\ +1:\n\ + cmp %2, #0x7\n\ + bls 1f\n\ + mov r3, lr\n\ +0:\n\ + sub %2, %2, #8\n\ + stmia %0!, { r3, lr }\n\ + cmp %2, #0x7\n\ + bhi 0b\n\ +1:\n\ + cmp %2, #0x3\n\ + bls 2f\n\ +0:\n\ + sub %2, %2, #4\n\ + str lr, [%0], #4\n\ + cmp %2, #0x3\n\ + bhi 0b\n\ " #endif /* !__OPTIMIZE_SIZE__ */ -" -2: - movs r3, %2 - sub %2, %2, #1 - beq 4f -0: - movs r3, %2 - sub %2, %2, #1 - strb %1, [%0], #1 - bne 0b +"\n\ +2:\n\ + movs r3, %2\n\ + sub %2, %2, #1\n\ + beq 4f\n\ +0:\n\ + movs r3, %2\n\ + sub %2, %2, #1\n\ + strb %1, [%0], #1\n\ + bne 0b\n\ 4:" : "=&r" (dummy), "=&r" (c), "=&r" (len) diff --git a/newlib/libc/machine/xscale/strchr.c b/newlib/libc/machine/xscale/strchr.c index 027077cfc..3b736c53c 100644 --- a/newlib/libc/machine/xscale/strchr.c +++ b/newlib/libc/machine/xscale/strchr.c @@ -38,25 +38,25 @@ strchr (const char *s, int c) R6 = 0xfefefeff [ == ~(0x80808080 << 1) ] R5 = 0x80808080 */ - asm (PRELOADSTR ("%0") " - mov r5, #0x80 - add r5, r5, #0x8000 - add r5, r5, r5, lsl #16 - mvn r6, r5, lsl #1 - - sub %0, %0, #4 -0: - ldr r1, [%0, #4]! -" PRELOADSTR ("%0") " - add r3, r1, r6 - bic r3, r3, r1 - ands r2, r3, r5 - bne 1f - eor r2, r1, %1 - add r3, r2, r6 - bic r3, r3, r2 - ands r1, r3, r5 - beq 0b + asm (PRELOADSTR ("%0") "\n\ + mov r5, #0x80\n\ + add r5, r5, #0x8000\n\ + add r5, r5, r5, lsl #16\n\ + mvn r6, r5, lsl #1\n\ +\n\ + sub %0, %0, #4\n\ +0:\n\ + ldr r1, [%0, #4]!\n\ +" PRELOADSTR ("%0") "\n\ + add r3, r1, r6\n\ + bic r3, r3, r1\n\ + ands r2, r3, r5\n\ + bne 1f\n\ + eor r2, r1, %1\n\ + add r3, r2, r6\n\ + bic r3, r3, r2\n\ + ands r1, r3, r5\n\ + beq 0b\n\ 1:" : "=&r" (s) : "r" (c2), "0" (s) diff --git a/newlib/libc/machine/xscale/strcmp.c b/newlib/libc/machine/xscale/strcmp.c index d9ec99b5d..6c94d126f 100644 --- a/newlib/libc/machine/xscale/strcmp.c +++ b/newlib/libc/machine/xscale/strcmp.c @@ -32,58 +32,58 @@ strcmp (const char *s1, const char *s2) ip = 0x80808080 */ asm ( - "ldr r2, [%1, #0] - ldr r3, [%2, #0] - cmp r2, r3 - bne 2f - - mov ip, #0x80 - add ip, ip, #0x8000 - add ip, ip, ip, lsl #16 - mvn lr, ip, lsl #1 - -0: - ldr r2, [%1, #0] - add r3, r2, lr - bic r3, r3, r2 - tst r3, ip - beq 1f - mov %0, #0x0 - b 3f -1: - ldr r2, [%1, #4]! - ldr r3, [%2, #4]! -" PRELOADSTR("%1") " -" PRELOADSTR("%2") " - cmp r2, r3 + "ldr r2, [%1, #0]\n\ + ldr r3, [%2, #0]\n\ + cmp r2, r3\n\ + bne 2f\n\ +\n\ + mov ip, #0x80\n\ + add ip, ip, #0x8000\n\ + add ip, ip, ip, lsl #16\n\ + mvn lr, ip, lsl #1\n\ +\n\ +0:\n\ + ldr r2, [%1, #0]\n\ + add r3, r2, lr\n\ + bic r3, r3, r2\n\ + tst r3, ip\n\ + beq 1f\n\ + mov %0, #0x0\n\ + b 3f\n\ +1:\n\ + ldr r2, [%1, #4]!\n\ + ldr r3, [%2, #4]!\n\ +" PRELOADSTR("%1") "\n\ +" PRELOADSTR("%2") "\n\ + cmp r2, r3\n\ beq 0b" /* The following part could be done in a C loop as well, but it needs to be assembler to save some cycles in the case where the optimized loop above finds the strings to be equal. */ -" -2: - ldrb r2, [%1, #0] -" PRELOADSTR("%1") " -" PRELOADSTR("%2") " - cmp r2, #0x0 - beq 1f - ldrb r3, [%2, #0] - cmp r2, r3 - bne 1f -0: - ldrb r3, [%1, #1]! - add %2, %2, #1 - ands ip, r3, #0xff - beq 1f - ldrb r3, [%2] - cmp ip, r3 - beq 0b -1: - ldrb lr, [%1, #0] - ldrb ip, [%2, #0] - rsb %0, ip, lr -3: +"\n\ +2:\n\ + ldrb r2, [%1, #0]\n\ +" PRELOADSTR("%1") "\n\ +" PRELOADSTR("%2") "\n\ + cmp r2, #0x0\n\ + beq 1f\n\ + ldrb r3, [%2, #0]\n\ + cmp r2, r3\n\ + bne 1f\n\ +0:\n\ + ldrb r3, [%1, #1]!\n\ + add %2, %2, #1\n\ + ands ip, r3, #0xff\n\ + beq 1f\n\ + ldrb r3, [%2]\n\ + cmp ip, r3\n\ + beq 0b\n\ +1:\n\ + ldrb lr, [%1, #0]\n\ + ldrb ip, [%2, #0]\n\ + rsb %0, ip, lr\n\ +3:\n\ " : "=r" (result), "=&r" (s1), "=&r" (s2) diff --git a/newlib/libc/machine/xscale/strcpy.c b/newlib/libc/machine/xscale/strcpy.c index 707902671..46db2ba6a 100644 --- a/newlib/libc/machine/xscale/strcpy.c +++ b/newlib/libc/machine/xscale/strcpy.c @@ -28,25 +28,25 @@ strcpy (char *dest, const char *src) R4 = 0xfefefeff [ == ~(0x80808080 << 1) ] R5 = 0x80808080 */ - asm ("mov r5, #0x80 - ldr r1, [%1, #0] - add r5, r5, #0x8000 - add r5, r5, r5, lsl #16 - mvn r4, r5, lsl #1 - - add r3, r1, r5 - bic r3, r3, r1 - ands r2, r3, r4 - bne 1f -0: - ldr r3, [%1, #0] - ldr r1, [%1, #4]! -" PRELOADSTR("%1") " - str r3, [%0], #4 - add r2, r1, r4 - bic r2, r2, r1 - ands r3, r2, r5 - beq 0b + asm ("mov r5, #0x80\n\ + ldr r1, [%1, #0]\n\ + add r5, r5, #0x8000\n\ + add r5, r5, r5, lsl #16\n\ + mvn r4, r5, lsl #1\n\ +\n\ + add r3, r1, r5\n\ + bic r3, r3, r1\n\ + ands r2, r3, r4\n\ + bne 1f\n\ +0:\n\ + ldr r3, [%1, #0]\n\ + ldr r1, [%1, #4]!\n\ +" PRELOADSTR("%1") "\n\ + str r3, [%0], #4\n\ + add r2, r1, r4\n\ + bic r2, r2, r1\n\ + ands r3, r2, r5\n\ + beq 0b\n\ 1:" : "=&r" (dest), "=&r" (src) : "0" (dest), "1" (src) diff --git a/newlib/libc/machine/xscale/strlen.c b/newlib/libc/machine/xscale/strlen.c index e113ade34..a8bc0851d 100644 --- a/newlib/libc/machine/xscale/strlen.c +++ b/newlib/libc/machine/xscale/strlen.c @@ -28,68 +28,68 @@ strlen (const char *str) R4 = 0xfefefeff [ == ~(0x80808080 << 1) ] R5 = 0x80808080 */ - asm ("mov r5, #0x80 - add r5, r5, #0x8000 - add r5, r5, r5, lsl #16 - mvn r4, r5, lsl #1 + asm ("mov r5, #0x80\n\ + add r5, r5, #0x8000\n\ + add r5, r5, r5, lsl #16\n\ + mvn r4, r5, lsl #1\n\ " #if defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5E__ || defined __ARM_ARCH_5TE__ -" tst %0, #0x7 - ldreqd r6, [%0] - beq 1f - ldr r2, [%0] - add r3, r2, r4 - bic r3, r3, r2 - ands r2, r3, r5 - bne 2f - sub %0, %0, #4 - -0: - ldrd r6, [%0, #8]! +" tst %0, #0x7\n\ + ldreqd r6, [%0]\n\ + beq 1f\n\ + ldr r2, [%0]\n\ + add r3, r2, r4\n\ + bic r3, r3, r2\n\ + ands r2, r3, r5\n\ + bne 2f\n\ + sub %0, %0, #4\n\ +\n\ +0:\n\ + ldrd r6, [%0, #8]!\n\ " PRELOADSTR ("%0") -" -1: - add r3, r6, r4 - add r2, r7, r4 - bic r3, r3, r6 - bic r2, r2, r7 - and r3, r3, r5 - and r2, r2, r5 - orrs r3, r2, r3 - beq 0b +"\n\ +1:\n\ + add r3, r6, r4\n\ + add r2, r7, r4\n\ + bic r3, r3, r6\n\ + bic r2, r2, r7\n\ + and r3, r3, r5\n\ + and r2, r2, r5\n\ + orrs r3, r2, r3\n\ + beq 0b\n\ " #else -" sub %0, %0, #4 - -0: - ldr r6, [%0, #4]! +" sub %0, %0, #4\n\ +\n\ +0:\n\ + ldr r6, [%0, #4]!\n\ " PRELOADSTR ("%0") -" - add r3, r6, r4 - bic r3, r3, r6 - ands r3, r3, r5 - beq 0b +"\n\ + add r3, r6, r4\n\ + bic r3, r3, r6\n\ + ands r3, r3, r5\n\ + beq 0b\n\ " #endif /* __ARM_ARCH_5[T][E]__ */ -" -2: - ldrb r3, [%0] - cmp r3, #0x0 - beq 1f - -0: - ldrb r3, [%0, #1]! +"\n\ +2:\n\ + ldrb r3, [%0]\n\ + cmp r3, #0x0\n\ + beq 1f\n\ +\n\ +0:\n\ + ldrb r3, [%0, #1]!\n\ " PRELOADSTR ("%0") -" - cmp r3, #0x0 - bne 0b -1: +"\n\ + cmp r3, #0x0\n\ + bne 0b\n\ +1:\n\ " : "=r" (str) : "0" (str) : "r2", "r3", "r4", "r5", "r6", "r7");