2008-05-26 Eric Blake <ebb9@byu.net>

Optimize the generic and x86 memchr.
        * libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]:
        Pre-align pointer so unaligned searches aren't penalized.
        * libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word
        operations are faster than repnz byte searches.
This commit is contained in:
Jeff Johnston 2008-05-26 23:31:08 +00:00
parent a6bd72a278
commit 70bff2d503
3 changed files with 124 additions and 53 deletions

View File

@ -1,3 +1,11 @@
2008-05-26 Eric Blake <ebb9@byu.net>
Optimize the generic and x86 memchr.
* libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]:
Pre-align pointer so unaligned searches aren't penalized.
* libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word
operations are faster than repnz byte searches.
2008-05-26 Eric Blake <ebb9@byu.net> 2008-05-26 Eric Blake <ebb9@byu.net>
Optimize the generic and x86 memset. Optimize the generic and x86 memset.

View File

@ -1,6 +1,6 @@
/* /*
* ==================================================== * ====================================================
* Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved. * Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
* *
* Permission to use, copy, modify, and distribute this * Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice * software is freely granted, provided that this notice
@ -16,14 +16,16 @@
SYM (memchr): SYM (memchr):
pushl ebp pushl ebp
movl esp,ebp movl esp,ebp
pushl edi pushl edi
movl 12(ebp),eax movzbl 12(ebp),eax
movl 16(ebp),ecx movl 16(ebp),ecx
movl 8(ebp),edi movl 8(ebp),edi
xorl edx,edx xorl edx,edx
testl ecx,ecx testl ecx,ecx
jz L1 jz L20
#ifdef __OPTIMIZE_SIZE__
cld cld
repnz repnz
@ -31,9 +33,79 @@ SYM (memchr):
setnz dl setnz dl
decl edi decl edi
#else /* !__OPTIMIZE_SIZE__ */
/* Do byte-wise checks until string is aligned. */
testl $3,edi
je L5
cmpb (edi),al
je L15
incl edi
decl ecx
je L20
testl $3,edi
je L5
cmpb (edi),al
je L15
incl edi
decl ecx
je L20
testl $3,edi
je L5
cmpb (edi),al
je L15
incl edi
decl ecx
je L20
/* Create a mask, then check a word at a time. */
L5:
movb al,ah
movl eax,edx
sall $16,edx
orl edx,eax
pushl ebx
.p2align 4,,7
L8:
subl $4,ecx
jc L9
movl (edi),edx
addl $4,edi
xorl eax,edx
leal -16843009(edx),ebx
notl edx
andl edx,ebx
testl $-2139062144,ebx
je L8
subl $4,edi
L9:
popl ebx
xorl edx,edx
addl $4,ecx
je L20
/* Final byte-wise checks. */
.p2align 4,,7
L10:
cmpb (edi),al
je L15
incl edi
decl ecx
jne L10
xorl edi,edi
#endif /* !__OPTIMIZE_SIZE__ */
L15:
decl edx decl edx
andl edi,edx andl edi,edx
L1: L20:
movl edx,eax movl edx,eax
leal -4(ebp),esp leal -4(ebp),esp

View File

@ -20,7 +20,7 @@ DESCRIPTION
This function searches memory starting at <<*<[src]>>> for the This function searches memory starting at <<*<[src]>>> for the
character <[c]>. The search only ends with the first character <[c]>. The search only ends with the first
occurrence of <[c]>, or after <[length]> characters; in occurrence of <[c]>, or after <[length]> characters; in
particular, <<NULL>> does not terminate the search. particular, <<NUL>> does not terminate the search.
RETURNS RETURNS
If the character <[c]> is found within <[length]> characters If the character <[c]> is found within <[length]> characters
@ -64,6 +64,9 @@ QUICKREF
#error long int is not a 32bit or 64bit byte #error long int is not a 32bit or 64bit byte
#endif #endif
/* DETECTCHAR returns nonzero if (long)X contains the byte used
to fill (long)MASK. */
#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
_PTR _PTR
_DEFUN (memchr, (src_void, c, length), _DEFUN (memchr, (src_void, c, length),
@ -71,56 +74,43 @@ _DEFUN (memchr, (src_void, c, length),
int c _AND int c _AND
size_t length) size_t length)
{ {
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
_CONST unsigned char *src = (_CONST unsigned char *) src_void; _CONST unsigned char *src = (_CONST unsigned char *) src_void;
unsigned char d = c;
c &= 0xff; #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
unsigned long *asrc;
unsigned long mask;
int i;
while (length--) while (UNALIGNED (src))
{ {
if (*src == c) if (!length--)
return (char *) src; return NULL;
if (*src == d)
return (void *) src;
src++; src++;
} }
return NULL;
#else
_CONST unsigned char *src = (_CONST unsigned char *) src_void;
unsigned long *asrc;
unsigned long buffer;
unsigned long mask;
int i, j;
c &= 0xff; if (!TOO_SMALL (length))
/* If the size is small, or src is unaligned, then
use the bytewise loop. We can hope this is rare. */
if (!TOO_SMALL (length) && !UNALIGNED (src))
{ {
/* The fast code reads the ASCII one word at a time and only /* If we get this far, we know that length is large and src is
word-aligned. */
/* The fast code reads the source one word at a time and only
performs the bytewise search on word-sized segments if they performs the bytewise search on word-sized segments if they
contain the search character, which is detected by XORing contain the search character, which is detected by XORing
the word-sized segment with a word-sized block of the search the word-sized segment with a word-sized block of the search
character and then detecting for the presence of NULL in the character and then detecting for the presence of NUL in the
result. */ result. */
asrc = (unsigned long*) src; asrc = (unsigned long *) src;
mask = 0; mask = d << 8 | d;
for (i = 0; i < LBLOCKSIZE; i++) mask = mask << 16 | mask;
mask = (mask << 8) + c; for (i = 32; i < LBLOCKSIZE * 8; i <<= 1)
mask = (mask << i) | mask;
while (length >= LBLOCKSIZE) while (length >= LBLOCKSIZE)
{ {
buffer = *asrc; if (DETECTCHAR (*asrc, mask))
buffer ^= mask; break;
if (DETECTNULL (buffer))
{
src = (unsigned char*) asrc;
for ( j = 0; j < LBLOCKSIZE; j++ )
{
if (*src == c)
return (char*) src;
src++;
}
}
length -= LBLOCKSIZE; length -= LBLOCKSIZE;
asrc++; asrc++;
} }
@ -128,16 +118,17 @@ _DEFUN (memchr, (src_void, c, length),
/* If there are fewer than LBLOCKSIZE characters left, /* If there are fewer than LBLOCKSIZE characters left,
then we resort to the bytewise loop. */ then we resort to the bytewise loop. */
src = (unsigned char*) asrc; src = (unsigned char *) asrc;
} }
#endif /* not PREFER_SIZE_OVER_SPEED */
while (length--) while (length--)
{ {
if (*src == c) if (*src == d)
return (char*) src; return (void *) src;
src++; src++;
} }
return NULL; return NULL;
#endif /* not PREFER_SIZE_OVER_SPEED */
} }