Locale modifier @cjkwide to adjust ambiguous-width in non-CJK locales
Locale modifier @cjkwide makes Unicode "ambiguous width" characters wide. So ambiguous width characters can be enforced to have width 2 even in non-CJK locales. This gives e.g. users of "Powerline symbols" the opportunity to adjust their width to the desired behaviour (and the behaviour apparently expected by some tools) without having to set a CJK locale and without losing consistence of terminal character width with wcwidth/wcswidth locale width.
This commit is contained in:
parent
df14d97fff
commit
f92f048528
|
@ -74,15 +74,16 @@ Cygwin additionally supports locales from the file
|
||||||
(<<"">> is also accepted; if given, the settings are read from the
|
(<<"">> is also accepted; if given, the settings are read from the
|
||||||
corresponding LC_* environment variables and $LANG according to POSIX rules.)
|
corresponding LC_* environment variables and $LANG according to POSIX rules.)
|
||||||
|
|
||||||
This implementation also supports the modifier <<"cjknarrow">>, which
|
This implementation also supports the modifiers <<"cjknarrow">> and
|
||||||
affects how the functions <<wcwidth>> and <<wcswidth>> handle characters
|
<<"cjkwide">>, which affect how the functions <<wcwidth>> and <<wcswidth>>
|
||||||
from the "CJK Ambiguous Width" category of characters described at
|
handle characters from the "CJK Ambiguous Width" category of characters
|
||||||
http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
|
described at http://www.unicode.org/reports/tr11/#Ambiguous.
|
||||||
of 1 for singlebyte charsets and a width of 2 for multibyte charsets
|
These characters have a width of 1 for singlebyte charsets and a width of 2
|
||||||
other than UTF-8. For UTF-8, their width depends on the language specifier:
|
for multibyte charsets other than UTF-8.
|
||||||
|
For UTF-8, their width depends on the language specifier:
|
||||||
it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
|
it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
|
||||||
and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
|
and 1 for everything else. Specifying <<"cjknarrow">> or <<"cjkwide">>
|
||||||
independent of charset and language.
|
forces a width of 1 or 2, respectively, independent of charset and language.
|
||||||
|
|
||||||
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
|
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
|
||||||
pointer to the string representing the current locale. The acceptable
|
pointer to the string representing the current locale. The acceptable
|
||||||
|
@ -480,6 +481,7 @@ __loadlocale (struct __locale_t *loc, int category, const char *new_locale)
|
||||||
wctomb_p l_wctomb;
|
wctomb_p l_wctomb;
|
||||||
mbtowc_p l_mbtowc;
|
mbtowc_p l_mbtowc;
|
||||||
int cjknarrow = 0;
|
int cjknarrow = 0;
|
||||||
|
int cjkwide = 0;
|
||||||
|
|
||||||
/* Avoid doing everything twice if nothing has changed.
|
/* Avoid doing everything twice if nothing has changed.
|
||||||
|
|
||||||
|
@ -593,11 +595,13 @@ restart:
|
||||||
if (c && c[0] == '@')
|
if (c && c[0] == '@')
|
||||||
{
|
{
|
||||||
/* Modifier */
|
/* Modifier */
|
||||||
/* Only one modifier is recognized right now. "cjknarrow" is used
|
/* Modifiers "cjknarrow" or "cjkwide" are recognized to modify the
|
||||||
to modify the behaviour of wcwidth() for East Asian languages.
|
behaviour of wcwidth() and wcswidth() for East Asian languages.
|
||||||
For details see the comment at the end of this function. */
|
For details see the comment at the end of this function. */
|
||||||
if (!strcmp (c + 1, "cjknarrow"))
|
if (!strcmp (c + 1, "cjknarrow"))
|
||||||
cjknarrow = 1;
|
cjknarrow = 1;
|
||||||
|
else if (!strcmp (c + 1, "cjkwide"))
|
||||||
|
cjkwide = 1;
|
||||||
}
|
}
|
||||||
/* We only support this subset of charsets. */
|
/* We only support this subset of charsets. */
|
||||||
switch (charset[0])
|
switch (charset[0])
|
||||||
|
@ -894,12 +898,15 @@ restart:
|
||||||
single-byte charsets, and double width for multi-byte charsets
|
single-byte charsets, and double width for multi-byte charsets
|
||||||
other than UTF-8. For UTF-8, use double width for the East Asian
|
other than UTF-8. For UTF-8, use double width for the East Asian
|
||||||
languages ("ja", "ko", "zh"), and single width for everything else.
|
languages ("ja", "ko", "zh"), and single width for everything else.
|
||||||
Single width can also be forced with the "@cjknarrow" modifier. */
|
Single width can also be forced with the "@cjknarrow" modifier.
|
||||||
loc->cjk_lang = !cjknarrow && mbc_max > 1
|
Double width can also be forced with the "@cjkwide" modifier.
|
||||||
&& (charset[0] != 'U'
|
*/
|
||||||
|| strncmp (locale, "ja", 2) == 0
|
loc->cjk_lang = cjkwide ||
|
||||||
|| strncmp (locale, "ko", 2) == 0
|
(!cjknarrow && mbc_max > 1
|
||||||
|| strncmp (locale, "zh", 2) == 0);
|
&& (charset[0] != 'U'
|
||||||
|
|| strncmp (locale, "ja", 2) == 0
|
||||||
|
|| strncmp (locale, "ko", 2) == 0
|
||||||
|
|| strncmp (locale, "zh", 2) == 0));
|
||||||
#ifdef __HAVE_LOCALE_INFO__
|
#ifdef __HAVE_LOCALE_INFO__
|
||||||
ret = __ctype_load_locale (loc, locale, (void *) l_wctomb, charset,
|
ret = __ctype_load_locale (loc, locale, (void *) l_wctomb, charset,
|
||||||
mbc_max);
|
mbc_max);
|
||||||
|
|
Loading…
Reference in New Issue