2006-11-22 Luca Barbato <lu_zero@gentoo.org>
* libc/machine/spu/memcpy.c: Use spu_splats, explicit cast. * libc/machine/spu/memmove.c: Use spu_splats, explicit cast. * libc/machine/spu/memset.c: Use spu_splats, remove apple-cast. * libc/machine/spu/strchr.c: Use spu_splats, remove apple-cast. * libc/machine/spu/strncat.c: Explicit cast. * libc/machine/spu/strncmp.c: Use spu_splats. * libc/machine/spu/strncpy.c: Explicit cast. * libc/machine/spu/strrchr.c: Use spu_splats. * libc/machine/spu/strspn.c: Use spu_splats.
This commit is contained in:
parent
d22713e25d
commit
9bc5b6181b
|
@ -1,3 +1,15 @@
|
||||||
|
2006-11-22 Luca Barbato <lu_zero@gentoo.org>
|
||||||
|
|
||||||
|
* libc/machine/spu/memcpy.c: Use spu_splats, explicit cast.
|
||||||
|
* libc/machine/spu/memmove.c: Use spu_splats, explicit cast.
|
||||||
|
* libc/machine/spu/memset.c: Use spu_splats, remove apple-cast.
|
||||||
|
* libc/machine/spu/strchr.c: Use spu_splats, remove apple-cast.
|
||||||
|
* libc/machine/spu/strncat.c: Explicit cast.
|
||||||
|
* libc/machine/spu/strncmp.c: Use spu_splats.
|
||||||
|
* libc/machine/spu/strncpy.c: Explicit cast.
|
||||||
|
* libc/machine/spu/strrchr.c: Use spu_splats.
|
||||||
|
* libc/machine/spu/strspn.c: Use spu_splats.
|
||||||
|
|
||||||
2006-11-15 Till Straumann <strauman@slac.stanford.edu>
|
2006-11-15 Till Straumann <strauman@slac.stanford.edu>
|
||||||
|
|
||||||
* libc/time/tzset_r.c (_tzset_r): Initialize local variable.
|
* libc/time/tzset_r.c (_tzset_r): Initialize local variable.
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
* The memory areas may not overlap. The memcpy subroutine
|
* The memory areas may not overlap. The memcpy subroutine
|
||||||
* returns a pointer to dest.
|
* returns a pointer to dest.
|
||||||
*
|
*
|
||||||
* Faster implemenation of this function can be implemented
|
* Faster implementation of this function can be implemented
|
||||||
* either with prior knowledge of the alignment or special
|
* either with prior knowledge of the alignment or special
|
||||||
* casing specific optimal alignments.
|
* casing specific optimal alignments.
|
||||||
*/
|
*/
|
||||||
|
@ -88,10 +88,10 @@ void * memcpy(void * __restrict__ dest, const void * __restrict__ src, size_t n)
|
||||||
* mask2 = mask for trailing unchange bytes
|
* mask2 = mask for trailing unchange bytes
|
||||||
* mask3 = mask indicating the more than one qword is being changed.
|
* mask3 = mask indicating the more than one qword is being changed.
|
||||||
*/
|
*/
|
||||||
mask = VEC_SPLAT_U8(-1);
|
mask = spu_splats((unsigned char)-1);
|
||||||
mask1 = spu_rlmaskqwbyte(mask, -doffset1);
|
mask1 = spu_rlmaskqwbyte(mask, -doffset1);
|
||||||
mask2 = spu_slqwbyte(mask, 16-doffset2);
|
mask2 = spu_slqwbyte(mask, 16-doffset2);
|
||||||
mask3 = (vec_uchar16)spu_cmpgt(spu_splats(doffset1 + n), 15);
|
mask3 = (vec_uchar16)spu_cmpgt(spu_splats((unsigned int)(doffset1 + n)), 15);
|
||||||
|
|
||||||
*vDst++ = spu_sel(ddata, sdata, spu_and(mask1, spu_or(mask2, mask3)));
|
*vDst++ = spu_sel(ddata, sdata, spu_and(mask1, spu_or(mask2, mask3)));
|
||||||
|
|
||||||
|
@ -108,7 +108,7 @@ void * memcpy(void * __restrict__ dest, const void * __restrict__ src, size_t n)
|
||||||
|
|
||||||
/* Handle any trailing partial (destination) quadwords
|
/* Handle any trailing partial (destination) quadwords
|
||||||
*/
|
*/
|
||||||
mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats(n), 16), mask2);
|
mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats((unsigned int)n), 16), mask2);
|
||||||
*vDst = spu_sel(*vDst, spu_shuffle(sdata2, *vSrc, shuffle), mask);
|
*vDst = spu_sel(*vDst, spu_shuffle(sdata2, *vSrc, shuffle), mask);
|
||||||
|
|
||||||
return (dest);
|
return (dest);
|
||||||
|
|
|
@ -48,7 +48,7 @@ void * memmove(void * __restrict__ dest, const void * __restrict__ src, size_t n
|
||||||
unsigned int soffset1, soffset2, doffset1, doffset2;
|
unsigned int soffset1, soffset2, doffset1, doffset2;
|
||||||
vec_uchar16 *vSrc, *vDst;
|
vec_uchar16 *vSrc, *vDst;
|
||||||
vec_uchar16 sdata1, sdata2, sdata, ddata, shuffle;
|
vec_uchar16 sdata1, sdata2, sdata, ddata, shuffle;
|
||||||
vec_uchar16 mask, mask1, mask2, mask3;
|
vec_uchar16 mask, mask1, mask2, mask3, one = spu_splats((unsigned char)-1);
|
||||||
|
|
||||||
soffset1 = (unsigned int)(src) & 15;
|
soffset1 = (unsigned int)(src) & 15;
|
||||||
doffset1 = (unsigned int)(dest) & 15;
|
doffset1 = (unsigned int)(dest) & 15;
|
||||||
|
@ -62,10 +62,10 @@ void * memmove(void * __restrict__ dest, const void * __restrict__ src, size_t n
|
||||||
* mask2 = mask for trailing unchange bytes
|
* mask2 = mask for trailing unchange bytes
|
||||||
* mask3 = mask indicating the more than one qword is being changed.
|
* mask3 = mask indicating the more than one qword is being changed.
|
||||||
*/
|
*/
|
||||||
mask = VEC_SPLAT_U8(-1);
|
mask = one;
|
||||||
mask1 = spu_rlmaskqwbyte(mask, -doffset1);
|
mask1 = spu_rlmaskqwbyte(mask, -doffset1);
|
||||||
mask2 = spu_slqwbyte(mask, 16-doffset2);
|
mask2 = spu_slqwbyte(mask, 16-doffset2);
|
||||||
mask3 = (vec_uchar16)spu_cmpgt(spu_splats(doffset1 + n), 15);
|
mask3 = (vec_uchar16)spu_cmpgt(spu_splats((unsigned int)(doffset1 + n)), 15);
|
||||||
|
|
||||||
vDst = (vec_uchar16 *)(dest);
|
vDst = (vec_uchar16 *)(dest);
|
||||||
|
|
||||||
|
@ -120,10 +120,10 @@ void * memmove(void * __restrict__ dest, const void * __restrict__ src, size_t n
|
||||||
* mask2 = mask for trailing unchange bytes
|
* mask2 = mask for trailing unchange bytes
|
||||||
* mask3 = mask indicating the more than one qword is being changed.
|
* mask3 = mask indicating the more than one qword is being changed.
|
||||||
*/
|
*/
|
||||||
mask = VEC_SPLAT_U8(-1);
|
mask = one;
|
||||||
mask1 = spu_rlmaskqwbyte(mask, -doffset1);
|
mask1 = spu_rlmaskqwbyte(mask, -doffset1);
|
||||||
mask2 = spu_slqwbyte(mask, 16-doffset2);
|
mask2 = spu_slqwbyte(mask, 16-doffset2);
|
||||||
mask3 = (vec_uchar16)spu_cmpgt(spu_splats(doffset1 + n), 15);
|
mask3 = (vec_uchar16)spu_cmpgt(spu_splats((unsigned int)(doffset1 + n)), 15);
|
||||||
|
|
||||||
*vDst++ = spu_sel(ddata, sdata, spu_and(mask1, spu_or(mask2, mask3)));
|
*vDst++ = spu_sel(ddata, sdata, spu_and(mask1, spu_or(mask2, mask3)));
|
||||||
|
|
||||||
|
@ -140,7 +140,7 @@ void * memmove(void * __restrict__ dest, const void * __restrict__ src, size_t n
|
||||||
|
|
||||||
/* Handle any trailing partial (destination) quadwords
|
/* Handle any trailing partial (destination) quadwords
|
||||||
*/
|
*/
|
||||||
mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats(n), 16), mask2);
|
mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats((unsigned int)n), 16), mask2);
|
||||||
*vDst = spu_sel(*vDst, spu_shuffle(sdata2, *vSrc, shuffle), mask);
|
*vDst = spu_sel(*vDst, spu_shuffle(sdata2, *vSrc, shuffle), mask);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -188,7 +188,7 @@ void * memmove(void * __restrict__ dest, const void * __restrict__ src, size_t n
|
||||||
* mask2 = mask for trailing unchange bytes
|
* mask2 = mask for trailing unchange bytes
|
||||||
* mask3 = mask indicating the more than one qword is being changed.
|
* mask3 = mask indicating the more than one qword is being changed.
|
||||||
*/
|
*/
|
||||||
mask = VEC_SPLAT_U8(-1);
|
mask = one;
|
||||||
mask1 = spu_rlmaskqwbyte(mask, -doffset1);
|
mask1 = spu_rlmaskqwbyte(mask, -doffset1);
|
||||||
mask2 = spu_slqwbyte(mask, 15-doffset2);
|
mask2 = spu_slqwbyte(mask, 15-doffset2);
|
||||||
mask3 = (vec_uchar16)spu_cmpgt(spu_splats((int)(doffset2 - n)), -2);
|
mask3 = (vec_uchar16)spu_cmpgt(spu_splats((int)(doffset2 - n)), -2);
|
||||||
|
|
|
@ -40,7 +40,7 @@ void * memset(void *s, int c, size_t n)
|
||||||
{
|
{
|
||||||
int skip, cnt, i;
|
int skip, cnt, i;
|
||||||
vec_uchar16 *vs;
|
vec_uchar16 *vs;
|
||||||
vec_uchar16 vc, mask;
|
vec_uchar16 vc, mask, one = spu_splats((unsigned int)-1);
|
||||||
|
|
||||||
vs = (vec_uchar16 *)(s);
|
vs = (vec_uchar16 *)(s);
|
||||||
vc = spu_splats((unsigned char)c);
|
vc = spu_splats((unsigned char)c);
|
||||||
|
@ -52,10 +52,10 @@ void * memset(void *s, int c, size_t n)
|
||||||
*/
|
*/
|
||||||
skip = (int)(s) & 15;
|
skip = (int)(s) & 15;
|
||||||
if (skip) {
|
if (skip) {
|
||||||
mask = spu_rlmaskqwbyte((vec_uchar16)(-1), 0-skip);
|
mask = spu_rlmaskqwbyte(one, -skip);
|
||||||
cnt -= 16 - skip;
|
cnt -= 16 - skip;
|
||||||
if (cnt < 0) {
|
if (cnt < 0) {
|
||||||
mask = spu_and(mask, spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(-cnt)));
|
mask = spu_and(mask, spu_slqwbyte(one, (unsigned int)(-cnt)));
|
||||||
}
|
}
|
||||||
*vs = spu_sel(*vs, vc, mask);
|
*vs = spu_sel(*vs, vc, mask);
|
||||||
vs++;
|
vs++;
|
||||||
|
@ -82,7 +82,7 @@ void * memset(void *s, int c, size_t n)
|
||||||
/* Handle any trailing partial quadwords
|
/* Handle any trailing partial quadwords
|
||||||
*/
|
*/
|
||||||
if (cnt > 0) {
|
if (cnt > 0) {
|
||||||
mask = spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(16-cnt));
|
mask = spu_slqwbyte(one, (unsigned int)(16-cnt));
|
||||||
*vs = spu_sel(*vs, vc, mask);
|
*vs = spu_sel(*vs, vc, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,14 +44,14 @@ char *strchr(const char *s, int c)
|
||||||
vec_uint4 cmp_c, cmp_0;
|
vec_uint4 cmp_c, cmp_0;
|
||||||
vec_uint4 result;
|
vec_uint4 result;
|
||||||
vec_uint4 mask;
|
vec_uint4 mask;
|
||||||
|
vec_uint4 one = spu_splats((unsigned int)-1);
|
||||||
/* Scan memory array a quadword at a time. Skip leading
|
/* Scan memory array a quadword at a time. Skip leading
|
||||||
* mis-aligned bytes.
|
* mis-aligned bytes.
|
||||||
*/
|
*/
|
||||||
ptr = (vec_uchar16 *)s;
|
ptr = (vec_uchar16 *)s;
|
||||||
|
|
||||||
skip = (unsigned int)(ptr) & 15;
|
skip = (unsigned int)(ptr) & 15;
|
||||||
mask = spu_rlmask((vec_uint4)(0xFFFF), -skip);
|
mask = spu_rlmask(one, -skip);
|
||||||
|
|
||||||
vc = spu_splats((unsigned char)(c));
|
vc = spu_splats((unsigned char)(c));
|
||||||
|
|
||||||
|
|
|
@ -42,8 +42,7 @@
|
||||||
|
|
||||||
char * strncat(char * __restrict__ dest, const char * __restrict__ src, size_t n)
|
char * strncat(char * __restrict__ dest, const char * __restrict__ src, size_t n)
|
||||||
{
|
{
|
||||||
size_t len;
|
unsigned int cmp, skip, mask, len;
|
||||||
unsigned int cmp, skip, mask;
|
|
||||||
vec_uchar16 *ptr, data;
|
vec_uchar16 *ptr, data;
|
||||||
vec_uint4 cnt, gt, N;
|
vec_uint4 cnt, gt, N;
|
||||||
char *dst;
|
char *dst;
|
||||||
|
@ -55,7 +54,7 @@ char * strncat(char * __restrict__ dest, const char * __restrict__ src, size_t n
|
||||||
/* Copy the src image until either the src string terminates
|
/* Copy the src image until either the src string terminates
|
||||||
* or n characters are copied.
|
* or n characters are copied.
|
||||||
*/
|
*/
|
||||||
N = spu_promote(n, 0);
|
N = spu_promote((unsigned int)n, 0);
|
||||||
|
|
||||||
/* Determine the string length, not including termination character,
|
/* Determine the string length, not including termination character,
|
||||||
* clamped to n characters.
|
* clamped to n characters.
|
||||||
|
|
|
@ -50,7 +50,7 @@ int strncmp(const char *s1, const char *s2, size_t n)
|
||||||
vec_uchar16 data1A, data1B, data1, data2A, data2B, data2;
|
vec_uchar16 data1A, data1B, data1, data2A, data2B, data2;
|
||||||
vec_uchar16 *ptr1, *ptr2;
|
vec_uchar16 *ptr1, *ptr2;
|
||||||
|
|
||||||
data1 = data2 = VEC_SPLAT_U8(0);
|
data1 = data2 = spu_splats(0);
|
||||||
|
|
||||||
ptr1 = (vec_uchar16 *)s1;
|
ptr1 = (vec_uchar16 *)s1;
|
||||||
ptr2 = (vec_uchar16 *)s2;
|
ptr2 = (vec_uchar16 *)s2;
|
||||||
|
|
|
@ -40,12 +40,12 @@
|
||||||
*/
|
*/
|
||||||
char * strncpy(char * __restrict__ dest, const char * __restrict__ src, size_t n)
|
char * strncpy(char * __restrict__ dest, const char * __restrict__ src, size_t n)
|
||||||
{
|
{
|
||||||
size_t len;
|
unsigned int len;
|
||||||
unsigned int cmp, skip, mask;
|
unsigned int cmp, skip, mask;
|
||||||
vec_uchar16 *ptr, data;
|
vec_uchar16 *ptr, data;
|
||||||
vec_uint4 cnt, gt, N;
|
vec_uint4 cnt, gt, N;
|
||||||
|
|
||||||
N = spu_promote(n, 0);
|
N = spu_promote((unsigned int)n, 0);
|
||||||
|
|
||||||
/* Determine the string length, including termination character,
|
/* Determine the string length, including termination character,
|
||||||
* clamped to n characters.
|
* clamped to n characters.
|
||||||
|
@ -74,7 +74,7 @@ char * strncpy(char * __restrict__ dest, const char * __restrict__ src, size_t n
|
||||||
|
|
||||||
/* len = MIN(len, n)
|
/* len = MIN(len, n)
|
||||||
*/
|
*/
|
||||||
len = spu_extract(spu_sel(spu_promote(len, 0), N, gt), 0);
|
len = spu_extract(spu_sel(spu_promote((unsigned int)len, 0), N, gt), 0);
|
||||||
|
|
||||||
/* Perform a memcpy of the resulting length
|
/* Perform a memcpy of the resulting length
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -45,14 +45,14 @@ char * strrchr(const char *s, int c)
|
||||||
vec_uint4 cmp_c, cmp_0, cmp;
|
vec_uint4 cmp_c, cmp_0, cmp;
|
||||||
vec_uint4 res_ptr, res_cmp;
|
vec_uint4 res_ptr, res_cmp;
|
||||||
vec_uint4 mask, result;
|
vec_uint4 mask, result;
|
||||||
|
vec_uint4 one = spu_splats((unsigned int)-1);
|
||||||
/* Scan memory array a quadword at a time. Skip leading
|
/* Scan memory array a quadword at a time. Skip leading
|
||||||
* mis-aligned bytes.
|
* mis-aligned bytes.
|
||||||
*/
|
*/
|
||||||
ptr = (vec_uchar16 *)s;
|
ptr = (vec_uchar16 *)s;
|
||||||
|
|
||||||
nskip = -((unsigned int)(ptr) & 15);
|
nskip = -((unsigned int)(ptr) & 15);
|
||||||
mask = spu_rlmask((vec_uint4)(0xFFFF), nskip);
|
mask = spu_rlmask(one, nskip);
|
||||||
|
|
||||||
vc = spu_splats((unsigned char)(c));
|
vc = spu_splats((unsigned char)(c));
|
||||||
|
|
||||||
|
@ -62,8 +62,8 @@ char * strrchr(const char *s, int c)
|
||||||
cmp_c = spu_and(spu_gather(spu_cmpeq(data, vc)), mask);
|
cmp_c = spu_and(spu_gather(spu_cmpeq(data, vc)), mask);
|
||||||
cmp_0 = spu_and(spu_gather(spu_cmpeq(data, 0)), mask);
|
cmp_0 = spu_and(spu_gather(spu_cmpeq(data, 0)), mask);
|
||||||
|
|
||||||
res_ptr = VEC_SPLAT_U32(0);
|
res_ptr = spu_splats(0);
|
||||||
res_cmp = VEC_SPLAT_U32(0);
|
res_cmp = spu_splats(0);
|
||||||
|
|
||||||
while (spu_extract(cmp_0, 0) == 0) {
|
while (spu_extract(cmp_0, 0) == 0) {
|
||||||
cmp = spu_cmpeq(cmp_c, 0);
|
cmp = spu_cmpeq(cmp_c, 0);
|
||||||
|
@ -84,7 +84,7 @@ char * strrchr(const char *s, int c)
|
||||||
*
|
*
|
||||||
* First mask off compare results following the first termination character.
|
* First mask off compare results following the first termination character.
|
||||||
*/
|
*/
|
||||||
mask = spu_sl(VEC_SPLAT_U32(-1), 31 - spu_extract(spu_cntlz(cmp_0), 0));
|
mask = spu_sl(one, 31 - spu_extract(spu_cntlz(cmp_0), 0));
|
||||||
cmp_c = spu_and(cmp_c, mask);
|
cmp_c = spu_and(cmp_c, mask);
|
||||||
|
|
||||||
/* Conditionally update res_ptr and res_cmd if a match was found in the last
|
/* Conditionally update res_ptr and res_cmd if a match was found in the last
|
||||||
|
|
|
@ -63,7 +63,7 @@ size_t strspn(const char *s1, const char *s2)
|
||||||
do {
|
do {
|
||||||
data1 = spu_shuffle(dataA, dataB, shuffle);
|
data1 = spu_shuffle(dataA, dataB, shuffle);
|
||||||
|
|
||||||
match = VEC_SPLAT_U8(0);
|
match = spu_splats(0);
|
||||||
|
|
||||||
ptr2 = (vec_uchar16 *)s2;
|
ptr2 = (vec_uchar16 *)s2;
|
||||||
data2 = *ptr2;
|
data2 = *ptr2;
|
||||||
|
|
Loading…
Reference in New Issue