2008-05-26 Eric Blake <ebb9@byu.net>
Optimize the generic and x86 memchr. * libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]: Pre-align pointer so unaligned searches aren't penalized. * libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word operations are faster than repnz byte searches.
This commit is contained in:
parent
a6bd72a278
commit
70bff2d503
|
@ -1,3 +1,11 @@
|
||||||
|
2008-05-26 Eric Blake <ebb9@byu.net>
|
||||||
|
|
||||||
|
Optimize the generic and x86 memchr.
|
||||||
|
* libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]:
|
||||||
|
Pre-align pointer so unaligned searches aren't penalized.
|
||||||
|
* libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word
|
||||||
|
operations are faster than repnz byte searches.
|
||||||
|
|
||||||
2008-05-26 Eric Blake <ebb9@byu.net>
|
2008-05-26 Eric Blake <ebb9@byu.net>
|
||||||
|
|
||||||
Optimize the generic and x86 memset.
|
Optimize the generic and x86 memset.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* ====================================================
|
* ====================================================
|
||||||
* Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
|
* Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission to use, copy, modify, and distribute this
|
* Permission to use, copy, modify, and distribute this
|
||||||
* software is freely granted, provided that this notice
|
* software is freely granted, provided that this notice
|
||||||
|
@ -9,21 +9,23 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "i386mach.h"
|
#include "i386mach.h"
|
||||||
|
|
||||||
.global SYM (memchr)
|
.global SYM (memchr)
|
||||||
SOTYPE_FUNCTION(memchr)
|
SOTYPE_FUNCTION(memchr)
|
||||||
|
|
||||||
SYM (memchr):
|
SYM (memchr):
|
||||||
pushl ebp
|
pushl ebp
|
||||||
movl esp,ebp
|
movl esp,ebp
|
||||||
pushl edi
|
pushl edi
|
||||||
movl 12(ebp),eax
|
movzbl 12(ebp),eax
|
||||||
movl 16(ebp),ecx
|
movl 16(ebp),ecx
|
||||||
movl 8(ebp),edi
|
movl 8(ebp),edi
|
||||||
|
|
||||||
xorl edx,edx
|
xorl edx,edx
|
||||||
testl ecx,ecx
|
testl ecx,ecx
|
||||||
jz L1
|
jz L20
|
||||||
|
|
||||||
|
#ifdef __OPTIMIZE_SIZE__
|
||||||
|
|
||||||
cld
|
cld
|
||||||
repnz
|
repnz
|
||||||
|
@ -31,9 +33,79 @@ SYM (memchr):
|
||||||
|
|
||||||
setnz dl
|
setnz dl
|
||||||
decl edi
|
decl edi
|
||||||
|
|
||||||
|
#else /* !__OPTIMIZE_SIZE__ */
|
||||||
|
/* Do byte-wise checks until string is aligned. */
|
||||||
|
testl $3,edi
|
||||||
|
je L5
|
||||||
|
cmpb (edi),al
|
||||||
|
je L15
|
||||||
|
incl edi
|
||||||
|
decl ecx
|
||||||
|
je L20
|
||||||
|
|
||||||
|
testl $3,edi
|
||||||
|
je L5
|
||||||
|
cmpb (edi),al
|
||||||
|
je L15
|
||||||
|
incl edi
|
||||||
|
decl ecx
|
||||||
|
je L20
|
||||||
|
|
||||||
|
testl $3,edi
|
||||||
|
je L5
|
||||||
|
cmpb (edi),al
|
||||||
|
je L15
|
||||||
|
incl edi
|
||||||
|
decl ecx
|
||||||
|
je L20
|
||||||
|
|
||||||
|
/* Create a mask, then check a word at a time. */
|
||||||
|
L5:
|
||||||
|
movb al,ah
|
||||||
|
movl eax,edx
|
||||||
|
sall $16,edx
|
||||||
|
orl edx,eax
|
||||||
|
pushl ebx
|
||||||
|
|
||||||
|
.p2align 4,,7
|
||||||
|
L8:
|
||||||
|
subl $4,ecx
|
||||||
|
jc L9
|
||||||
|
movl (edi),edx
|
||||||
|
addl $4,edi
|
||||||
|
xorl eax,edx
|
||||||
|
leal -16843009(edx),ebx
|
||||||
|
notl edx
|
||||||
|
andl edx,ebx
|
||||||
|
testl $-2139062144,ebx
|
||||||
|
je L8
|
||||||
|
|
||||||
|
subl $4,edi
|
||||||
|
|
||||||
|
L9:
|
||||||
|
popl ebx
|
||||||
|
xorl edx,edx
|
||||||
|
addl $4,ecx
|
||||||
|
je L20
|
||||||
|
|
||||||
|
/* Final byte-wise checks. */
|
||||||
|
.p2align 4,,7
|
||||||
|
L10:
|
||||||
|
cmpb (edi),al
|
||||||
|
je L15
|
||||||
|
incl edi
|
||||||
|
decl ecx
|
||||||
|
jne L10
|
||||||
|
|
||||||
|
xorl edi,edi
|
||||||
|
|
||||||
|
#endif /* !__OPTIMIZE_SIZE__ */
|
||||||
|
|
||||||
|
L15:
|
||||||
decl edx
|
decl edx
|
||||||
andl edi,edx
|
andl edi,edx
|
||||||
L1:
|
L20:
|
||||||
movl edx,eax
|
movl edx,eax
|
||||||
|
|
||||||
leal -4(ebp),esp
|
leal -4(ebp),esp
|
||||||
|
|
|
@ -20,7 +20,7 @@ DESCRIPTION
|
||||||
This function searches memory starting at <<*<[src]>>> for the
|
This function searches memory starting at <<*<[src]>>> for the
|
||||||
character <[c]>. The search only ends with the first
|
character <[c]>. The search only ends with the first
|
||||||
occurrence of <[c]>, or after <[length]> characters; in
|
occurrence of <[c]>, or after <[length]> characters; in
|
||||||
particular, <<NULL>> does not terminate the search.
|
particular, <<NUL>> does not terminate the search.
|
||||||
|
|
||||||
RETURNS
|
RETURNS
|
||||||
If the character <[c]> is found within <[length]> characters
|
If the character <[c]> is found within <[length]> characters
|
||||||
|
@ -64,6 +64,9 @@ QUICKREF
|
||||||
#error long int is not a 32bit or 64bit byte
|
#error long int is not a 32bit or 64bit byte
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* DETECTCHAR returns nonzero if (long)X contains the byte used
|
||||||
|
to fill (long)MASK. */
|
||||||
|
#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
|
||||||
|
|
||||||
_PTR
|
_PTR
|
||||||
_DEFUN (memchr, (src_void, c, length),
|
_DEFUN (memchr, (src_void, c, length),
|
||||||
|
@ -71,73 +74,61 @@ _DEFUN (memchr, (src_void, c, length),
|
||||||
int c _AND
|
int c _AND
|
||||||
size_t length)
|
size_t length)
|
||||||
{
|
{
|
||||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
|
||||||
_CONST unsigned char *src = (_CONST unsigned char *) src_void;
|
_CONST unsigned char *src = (_CONST unsigned char *) src_void;
|
||||||
|
unsigned char d = c;
|
||||||
|
|
||||||
c &= 0xff;
|
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
|
||||||
|
unsigned long *asrc;
|
||||||
while (length--)
|
unsigned long mask;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
while (UNALIGNED (src))
|
||||||
{
|
{
|
||||||
if (*src == c)
|
if (!length--)
|
||||||
return (char *) src;
|
return NULL;
|
||||||
|
if (*src == d)
|
||||||
|
return (void *) src;
|
||||||
src++;
|
src++;
|
||||||
}
|
}
|
||||||
return NULL;
|
|
||||||
#else
|
|
||||||
_CONST unsigned char *src = (_CONST unsigned char *) src_void;
|
|
||||||
unsigned long *asrc;
|
|
||||||
unsigned long buffer;
|
|
||||||
unsigned long mask;
|
|
||||||
int i, j;
|
|
||||||
|
|
||||||
c &= 0xff;
|
if (!TOO_SMALL (length))
|
||||||
|
|
||||||
/* If the size is small, or src is unaligned, then
|
|
||||||
use the bytewise loop. We can hope this is rare. */
|
|
||||||
if (!TOO_SMALL (length) && !UNALIGNED (src))
|
|
||||||
{
|
{
|
||||||
/* The fast code reads the ASCII one word at a time and only
|
/* If we get this far, we know that length is large and src is
|
||||||
|
word-aligned. */
|
||||||
|
/* The fast code reads the source one word at a time and only
|
||||||
performs the bytewise search on word-sized segments if they
|
performs the bytewise search on word-sized segments if they
|
||||||
contain the search character, which is detected by XORing
|
contain the search character, which is detected by XORing
|
||||||
the word-sized segment with a word-sized block of the search
|
the word-sized segment with a word-sized block of the search
|
||||||
character and then detecting for the presence of NULL in the
|
character and then detecting for the presence of NUL in the
|
||||||
result. */
|
result. */
|
||||||
asrc = (unsigned long*) src;
|
asrc = (unsigned long *) src;
|
||||||
mask = 0;
|
mask = d << 8 | d;
|
||||||
for (i = 0; i < LBLOCKSIZE; i++)
|
mask = mask << 16 | mask;
|
||||||
mask = (mask << 8) + c;
|
for (i = 32; i < LBLOCKSIZE * 8; i <<= 1)
|
||||||
|
mask = (mask << i) | mask;
|
||||||
|
|
||||||
while (length >= LBLOCKSIZE)
|
while (length >= LBLOCKSIZE)
|
||||||
{
|
{
|
||||||
buffer = *asrc;
|
if (DETECTCHAR (*asrc, mask))
|
||||||
buffer ^= mask;
|
break;
|
||||||
if (DETECTNULL (buffer))
|
|
||||||
{
|
|
||||||
src = (unsigned char*) asrc;
|
|
||||||
for ( j = 0; j < LBLOCKSIZE; j++ )
|
|
||||||
{
|
|
||||||
if (*src == c)
|
|
||||||
return (char*) src;
|
|
||||||
src++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
length -= LBLOCKSIZE;
|
length -= LBLOCKSIZE;
|
||||||
asrc++;
|
asrc++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If there are fewer than LBLOCKSIZE characters left,
|
/* If there are fewer than LBLOCKSIZE characters left,
|
||||||
then we resort to the bytewise loop. */
|
then we resort to the bytewise loop. */
|
||||||
|
|
||||||
src = (unsigned char*) asrc;
|
src = (unsigned char *) asrc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||||
|
|
||||||
while (length--)
|
while (length--)
|
||||||
{
|
{
|
||||||
if (*src == c)
|
if (*src == d)
|
||||||
return (char*) src;
|
return (void *) src;
|
||||||
src++;
|
src++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue