From 4962a9453ac3a9c23cba3c7e952242a6831f0cb3 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 21 May 2008 21:46:04 +0000 Subject: [PATCH] Optimize strchr for x86. * libc/machine/i386/strchr.S (strchr): Pre-align data so unaligned searches aren't penalized. Special-case searching for 0. --- newlib/ChangeLog | 8 ++- newlib/libc/machine/i386/strchr.S | 113 ++++++++++++++++++++++++++---- 2 files changed, 107 insertions(+), 14 deletions(-) diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 7cdfd0356..35a80e266 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,9 @@ +2008-05-21 Eric Blake + + Optimize strchr for x86. + * libc/machine/i386/strchr.S (strchr): Pre-align data so unaligned + searches aren't penalized. Special-case searching for 0. + 2008-05-20 Nick Clifton * libc/sys/sysnecv850/crt0.S (___dso_handle): Define (weak). @@ -5,7 +11,7 @@ 2008-05-20 DJ Delorie * libc/sys/sysnecv850/isatty.c (_isatty): Renamed from isatty. - + 2008-05-14 Jeff Johnston * libc/include/sys/reent.h: Change _REENT_INIT... macros to diff --git a/newlib/libc/machine/i386/strchr.S b/newlib/libc/machine/i386/strchr.S index fe425d2d8..1d98b8149 100644 --- a/newlib/libc/machine/i386/strchr.S +++ b/newlib/libc/machine/i386/strchr.S @@ -1,6 +1,6 @@ /* * ==================================================== - * Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved. + * Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved. * * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice @@ -9,7 +9,7 @@ */ #include "i386mach.h" - + .global SYM (strchr) SOTYPE_FUNCTION(strchr) @@ -21,14 +21,45 @@ SYM (strchr): pushl ebx xorl ebx,ebx movl 8(ebp),edi - movb 12(ebp),bl + addb 12(ebp),bl -#ifndef __OPTIMIZE_SIZE__ -/* check if string is aligned, if not do check one byte at a time */ +#ifndef __OPTIMIZE_SIZE__ +/* Special case strchr(p,0). */ + je L25 + +/* Do byte-wise checks until string is aligned. */ test $3,edi - jne L9 + je L5 + movl edi,eax + movb (eax),cl + testb cl,cl + je L14 + cmpb bl,cl + je L19 + incl edi + + test $3,edi + je L5 + movl edi,eax + movb (eax),cl + testb cl,cl + je L14 + cmpb bl,cl + je L19 + incl edi + + test $3,edi + je L5 + movl edi,eax + movb (eax),cl + testb cl,cl + je L14 + cmpb bl,cl + je L19 + incl edi /* create 4 byte mask which is just the desired byte repeated 4 times */ +L5: movl ebx,ecx sall $8,ebx subl $4,edi @@ -49,15 +80,14 @@ L10: testl $-2139062144,edx jne L9 - movl ebx,eax - xorl ecx,eax - leal -16843009(eax),edx - notl eax - andl eax,edx + xorl ebx,ecx + leal -16843009(ecx),edx + notl ecx + andl ecx,edx testl $-2139062144,edx je L10 #endif /* not __OPTIMIZE_SIZE__ */ - + /* loop while (*s && *s++ != c) */ L9: leal -1(edi),eax @@ -69,7 +99,7 @@ L15: je L14 cmpb bl,dl jne L15 - + L14: /* if (*s == c) return address otherwise return NULL */ cmpb bl,(eax) @@ -83,3 +113,60 @@ L19: leave ret +#ifndef __OPTIMIZE_SIZE__ +/* Special case strchr(p,0). */ +#if 0 + /* Hideous performance on modern machines. */ +L25: + cld + movl $-1,ecx + xor eax,eax + repnz + scasb + leal -1(edi),eax + jmp L19 +#endif +L25: +/* Do byte-wise checks until string is aligned. */ + test $3,edi + je L26 + movl edi,eax + movb (eax),cl + testb cl,cl + je L19 + incl edi + + test $3,edi + je L26 + movl edi,eax + movb (eax),cl + testb cl,cl + je L19 + incl edi + + test $3,edi + je L26 + movl edi,eax + movb (eax),cl + testb cl,cl + je L19 + incl edi + +L26: + subl $4,edi + +/* loop performing 4 byte mask checking for desired 0 byte */ + .p2align 4,,7 +L27: + addl $4,edi + movl (edi),ecx + leal -16843009(ecx),edx + movl ecx,eax + notl eax + andl eax,edx + testl $-2139062144,edx + je L27 + + jmp L9 + +#endif /* !__OPTIMIZE_SIZE__ */