* libc/locale/locale.c (loadlocale): Fix width of CJK ambigous
characters to 1 for singlebyte charsets and 2 for non-Unicode multibyte charsets. Change documentation accordingly.
This commit is contained in:
parent
9a984ac7cb
commit
617dc68bfe
|
@ -1,3 +1,9 @@
|
||||||
|
2010-11-18 Andy Koppe <andy.koppe@gmail.com>
|
||||||
|
|
||||||
|
* libc/locale/locale.c (loadlocale): Fix width of CJK ambigous
|
||||||
|
characters to 1 for singlebyte charsets and 2 for non-Unicode
|
||||||
|
multibyte charsets. Change documentation accordingly.
|
||||||
|
|
||||||
2010-11-17 Bernd Schmidt <bernds@codesourcery.com>
|
2010-11-17 Bernd Schmidt <bernds@codesourcery.com>
|
||||||
|
|
||||||
* configure.host (newlib_cflags): For tic6x, add -DCLOCK_PROVIDED.
|
* configure.host (newlib_cflags): For tic6x, add -DCLOCK_PROVIDED.
|
||||||
|
|
|
@ -90,16 +90,15 @@ Cygwin additionally supports locales from the file
|
||||||
(<<"">> is also accepted; if given, the settings are read from the
|
(<<"">> is also accepted; if given, the settings are read from the
|
||||||
corresponding LC_* environment variables and $LANG according to POSIX rules.
|
corresponding LC_* environment variables and $LANG according to POSIX rules.
|
||||||
|
|
||||||
This implementation also supports a single modifier, <<"cjknarrow">>.
|
This implementation also supports the modifier <<"cjknarrow">>, which
|
||||||
Any other modifier is ignored. <<"cjknarrow">>, in conjunction with one
|
affects how the functions <<wcwidth>> and <<wcswidth>> handle characters
|
||||||
of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies
|
from the "CJK Ambiguous Width" category of characters described at
|
||||||
how the functions <<wcwidth>> and <<wcswidth>> handle characters from
|
http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
|
||||||
the "CJK Ambiguous Width" character class described in
|
of 1 for singlebyte charsets and a width of 2 for multibyte charsets
|
||||||
http://www.unicode.org/unicode/reports/tr11/. Usually these characters
|
other than UTF-8. For UTF-8, their width depends on the language specifier:
|
||||||
have a width of 1, unless you specify one of the aforementioned
|
it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
|
||||||
languages, in which case these characters have a width of 2. By
|
and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
|
||||||
specifying the <<"cjknarrow">> modifier, these characters will have a
|
independent of charset and language.
|
||||||
width of one in the languages <<"ja">>, <<"ko">>, and <<"zh">> as well.
|
|
||||||
|
|
||||||
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
|
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
|
||||||
pointer to the string representing the current locale. The acceptable
|
pointer to the string representing the current locale. The acceptable
|
||||||
|
@ -845,16 +844,18 @@ restart:
|
||||||
__wctomb = l_wctomb;
|
__wctomb = l_wctomb;
|
||||||
__mbtowc = l_mbtowc;
|
__mbtowc = l_mbtowc;
|
||||||
__set_ctype (charset);
|
__set_ctype (charset);
|
||||||
/* Check for the language part of the locale specifier. In case
|
/* Determine the width for the "CJK Ambiguous Width" category of
|
||||||
of "ja", "ko", or "zh", assume the use of CJK fonts, unless the
|
characters. This is used in wcwidth(). Assume single width for
|
||||||
"@cjknarrow" modifier has been specifed.
|
single-byte charsets, and double width for multi-byte charsets
|
||||||
The result is stored in lc_ctype_cjk_lang and tested in wcwidth()
|
other than UTF-8. For UTF-8, use double width for the East Asian
|
||||||
to figure out the width to return (1 or 2) for the "CJK Ambiguous
|
languages ("ja", "ko", "zh"), and single width for everything else.
|
||||||
Width" category of characters. */
|
Single width can also be forced with the "@cjknarrow" modifier. */
|
||||||
lc_ctype_cjk_lang = !cjknarrow
|
lc_ctype_cjk_lang = !cjknarrow
|
||||||
&& ((strncmp (locale, "ja", 2) == 0
|
&& mbc_max > 1
|
||||||
|
&& (charset[0] != 'U'
|
||||||
|
|| strncmp (locale, "ja", 2) == 0
|
||||||
|| strncmp (locale, "ko", 2) == 0
|
|| strncmp (locale, "ko", 2) == 0
|
||||||
|| strncmp (locale, "zh", 2) == 0));
|
|| strncmp (locale, "zh", 2) == 0);
|
||||||
#ifdef __HAVE_LOCALE_INFO__
|
#ifdef __HAVE_LOCALE_INFO__
|
||||||
ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max);
|
ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max);
|
||||||
#endif /* __HAVE_LOCALE_INFO__ */
|
#endif /* __HAVE_LOCALE_INFO__ */
|
||||||
|
|
Loading…
Reference in New Issue