From 149cabea8220c7baf1185ccfaea03922bbfd390a Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Wed, 1 Mar 2023 10:44:52 +0100 Subject: [PATCH] Cygwin: mbsnrtowci: like mbsnrtowcs, just for wint_t Deviation from standard: If the input is broken, the output will be broken. I. e., we just copy the current byte over into the wint_t destination and try to pick up on the next byte. This is in line with the way fnmatch works. Signed-off-by: Corinna Vinschen --- winsup/cygwin/local_includes/wchar.h | 8 +++++ winsup/cygwin/strfuncs.cc | 53 ++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/winsup/cygwin/local_includes/wchar.h b/winsup/cygwin/local_includes/wchar.h index d1b638591..ecf489cd5 100644 --- a/winsup/cygwin/local_includes/wchar.h +++ b/winsup/cygwin/local_includes/wchar.h @@ -52,6 +52,14 @@ size_t wirtomb (char *, wint_t, mbstate_t *); a UTF-32 value. Defined in strfuncs.cc */ extern size_t mbrtowi (wint_t *, const char *, size_t, mbstate_t *); +/* replacement function for mbsnrtowcs, returning a wint_t representing + a UTF-32 value. Defined in strfuncs.cc. + Deviation from standard: If the input is broken, the output will be + broken. I. e., we just copy the current byte over into the wint_t + destination and try to pick up on the next byte. This is in line + with the way fnmatch works. */ +extern size_t mbsnrtowci(wint_t *, const char **, size_t, size_t, mbstate_t *); + /* convert wint_t string to char string, but *only* if the string consists entirely of ASCII chars */ static inline void diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 80e3eb0ad..9324e1553 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -180,6 +180,59 @@ mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps) return len; } +extern "C" size_t +mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps) +{ + wint_t *ptr = dst; + const char *tmp_src; + size_t max; + size_t count = 0; + size_t bytes; + + if (dst == NULL) + { + /* Ignore original len value and do not alter src pointer if the + dst pointer is NULL. */ + len = (size_t)-1; + tmp_src = *src; + src = &tmp_src; + } + max = len; + while (len > 0) + { + bytes = mbrtowi (ptr, *src, MB_CUR_MAX, ps); + if (bytes > 0) + { + *src += bytes; + nms -= bytes; + ++count; + ptr = (dst == NULL) ? NULL : ptr + 1; + --len; + } + else if (bytes == 0) + { + *src = NULL; + return count; + } + else + { + /* Deviation from standard: If the input is broken, the output + will be broken. I. e., we just copy the current byte over + into the wint_t destination and try to pick up on the next + byte. This is in line with the way fnmatch works. */ + ps->__count = 0; + if (dst) + { + *ptr++ = (const wint_t) *(*src)++; + ++count; + --nms; + --len; + } + } + } + return (size_t) max; +} + /* The SJIS, JIS and eucJP conversion in newlib does not use UTF as wchar_t character representation. That's unfortunate for us since we require UTF for the OS. What we do here is to have our own