Cygwin: convert Windows locale handling from LCID to ISO5646 strings

Since Windows Vista, locale handling is converted from using numeric
locale identifiers (LCID) to using ISO5646 locale strings.  In the
meantime Windows introduced new locales which don't even have a LCID
attached.  Those were unusable in Cygwin because locale information
for these locales required to call the new locale functions taking
a locale string.

Convert Cygwin to drop LCIDs and use Windows ISO5646 locales instead.

The last place using LCIDs is the __set_charset_from_locale function.
Checking numerically is easier and uslay faster than checking strings.
However, this function is clearly a TODO
This commit is contained in:
Corinna Vinschen 2023-02-24 16:37:44 +01:00
parent 89eb4bce15
commit e95a7a7955
5 changed files with 244 additions and 215 deletions

View File

@ -46,7 +46,7 @@ __BEGIN_DECLS
#ifdef __CYGWIN__ #ifdef __CYGWIN__
struct lc_collate_T struct lc_collate_T
{ {
__uint32_t lcid; wchar_t win_locale[ENCODING_LEN + 1];
int (*mbtowc) (struct _reent *, wchar_t *, const char *, size_t, int (*mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
mbstate_t *); mbstate_t *);
char codeset[ENCODING_LEN + 1]; char codeset[ENCODING_LEN + 1];

View File

@ -929,7 +929,7 @@ match(Char *name, Char *pat, Char *patend)
if (M_COLL_P(pat[1])) if (M_COLL_P(pat[1]))
len2 = M_COLL_CNT(*++pat); len2 = M_COLL_CNT(*++pat);
#ifdef __CYGWIN__ #ifdef __CYGWIN__
if ((!__get_current_collate_locale ()->lcid) ? if ((!__get_current_collate_locale ()->win_locale[0]) ?
#else #else
if (__collate_load_error ? if (__collate_load_error ?
#endif #endif

View File

@ -330,7 +330,7 @@ rangematch(const char *pattern, wint_t test, int flags, char **newp,
c2 = towlower(c2); c2 = towlower(c2);
#ifdef __CYGWIN__ #ifdef __CYGWIN__
if ((!__get_current_collate_locale ()->lcid) ? if ((!__get_current_collate_locale ()->win_locale[0]) ?
#else #else
if (table->__collate_load_error ? if (table->__collate_load_error ?
#endif #endif

View File

@ -25,128 +25,129 @@ details. */
#define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr #define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
#define getlocaleinfo(category,type) \ #define getlocaleinfo(category,type) \
__getlocaleinfo(lcid,(type),_LC(category)) __getlocaleinfo(win_locale,(type),_LC(category))
#define getlocaleint(type) \
__getlocaleint(win_locale,(type))
#define setlocaleinfo(category,val) \ #define setlocaleinfo(category,val) \
__setlocaleinfo(_LC(category),(val)) __setlocaleinfo(_LC(category),(val))
#define eval_datetimefmt(type,flags) \ #define eval_datetimefmt(type,flags) \
__eval_datetimefmt(lcid,(type),(flags),&lc_time_ptr,\ __eval_datetimefmt(win_locale,(type),(flags),&lc_time_ptr,\
lc_time_end-lc_time_ptr) lc_time_end-lc_time_ptr)
#define charfromwchar(category,in) \ #define charfromwchar(category,in) \
__charfromwchar (_##category##_locale->in,_LC(category),f_wctomb) __charfromwchar (_##category##_locale->in,_LC(category),f_wctomb)
#define has_modifier(x) ((x)[0] && !strcmp (modifier, (x))) #define has_modifier(x) ((x)[0] && !strcmp (modifier, (x)))
static char last_locale[ENCODING_LEN + 1]; /* Fetch Windows RFC 5646 locale from POSIX locale specifier.
static LCID last_lcid;
/* Fetch LCID from POSIX locale specifier.
Return values: Return values:
-1: Invalid locale -1: Invalid locale
0: C or POSIX 0: C or POSIX
>0: LCID 1: valid locale
*/ */
static LCID static int
__get_lcid_from_locale (const char *name) __get_rfc5646_from_locale (const char *name, wchar_t *win_locale)
{ {
char locale[ENCODING_LEN + 1]; wchar_t wlocale[ENCODING_LEN + 1] = { 0 };
char *c; wchar_t locale[ENCODING_LEN + 1];
LCID lcid; wchar_t *c;
/* Speed up reusing the same locale as before, for instance in LC_ALL case. */ win_locale[0] = L'\0';
if (!strcmp (name, last_locale)) mbstowcs (locale, name, ENCODING_LEN + 1);
{ /* Remember modifier for later use. */
debug_printf ("LCID=%04y", last_lcid); const char *modifier = strchr (name, '@') ? : "";
return last_lcid;
}
stpcpy (last_locale, name);
stpcpy (locale, name);
/* Store modifier for later use. */
const char *modifier = strchr (last_locale, '@') ? : "";
/* Drop charset and modifier */ /* Drop charset and modifier */
c = strchr (locale, '.'); c = wcschr (locale, L'.');
if (!c) if (!c)
c = strchr (locale, '@'); c = wcschr (locale, L'@');
if (c) if (c)
*c = '\0'; *c = L'\0';
/* "POSIX" already converted to "C" in loadlocale. */ /* "POSIX" already converted to "C" in loadlocale. */
if (!strcmp (locale, "C")) if (!wcscmp (locale, L"C"))
return last_lcid = 0; return 0;
c = strchr (locale, '_'); c = wcschr (locale, '_');
if (!c) if (!c)
return last_lcid = (LCID) -1;
wchar_t wlocale[ENCODING_LEN + 1];
/* Convert to RFC 4646 syntax. */
*c = '-';
mbstowcs (wlocale, locale, ENCODING_LEN + 1);
lcid = LocaleNameToLCID (wlocale, 0);
/* Bug on Windows 10: LocaleNameToLCID returns LOCALE_CUSTOM_UNSPECIFIED
for unknown locales. */
if (lcid == 0 || lcid == LOCALE_CUSTOM_UNSPECIFIED)
{ {
/* Unfortunately there are a couple of locales for which no form /* try if the locale can be resolved from the language tag
without a Script part per RFC 4646 exists. fix up Linux-only locale first */
Linux also supports no_NO which is equivalent to nb_NO. */ if (!wcscmp (locale, L"ber"))
wcscpy (locale, L"tzm");
if (ResolveLocaleName (locale, wlocale, ENCODING_LEN + 1) <= 0)
return -1;
wcpcpy (win_locale, wlocale);
return 1;
}
/* Convert to RFC 5646 syntax. */
*c = '-';
/* Override a few locales with a different default script as used
on Linux. Linux also supports no_NO which is equivalent to nb_NO,
but Windows can resolve that nicely. Also, "tzm" and "zgh" are
subsumed under "ber" on Linux. */
struct { struct {
const char *loc; const wchar_t *loc;
const wchar_t *wloc; const wchar_t *wloc;
} sc_only_locale[] = { } override_locale[] = {
{ "az-AZ" , L"az-Latn-AZ" }, { L"ber-DZ" , L"tzm-Latn-DZ" },
{ "bs-BA" , L"bs-Latn-BA" }, { L"ber-MA" , L"zgh-Tfng-MA" },
{ "chr-US", L"chr-Cher-US"}, { L"mn-CN" , L"mn-Mong-CN" },
{ "ff-SN" , L"ff-Latn-SN" }, { L"mn-MN" , L"mn-Mong-MN" },
{ "ha-NG" , L"ha-Latn-NG" }, { L"pa-PK" , L"pa-Arab-PK" },
{ "iu-CA" , L"iu-Latn-CA" }, { L"sd-IN" , L"sd-Deva-IN" },
{ "ks-IN" , L"ks-Arab-IN" }, { L"sr-BA" , L"sr-Cyrl-BA" },
{ "ku-IQ" , L"ku-Arab-IQ" }, { L"sr-ME" , L"sr-Cyrl-ME" },
{ "mn-CN" , L"mn-Mong-CN" }, { L"sr-RS" , L"sr-Cyrl-RS" },
{ "mn-MN" , L"mn-Mong-MN" }, { L"sr-XK" , L"sr-Cyrl-XK" },
{ "no-NO" , L"nb-NO" }, { L"tzm-MA", L"tzm-Tfng-MA" },
{ "pa-PK" , L"pa-Arab-PK" },
{ "quc-GT", L"quc-Latn-GT" },
{ "sd-PK" , L"sd-Arab-PK" },
{ "sd-IN" , L"sd-Deva-IN" },
{ "sr-BA" , L"sr-Cyrl-BA" },
{ "sr-ME" , L"sr-Cyrl-ME" },
{ "sr-RS" , L"sr-Cyrl-RS" },
{ "tg-TJ" , L"tg-Cyrl-TJ" },
{ "tzm-DZ", L"tzm-Latn-DZ" },
{ "tzm-MA", L"tzm-Tfng-MA" },
{ "uz-UZ" , L"uz-Latn-UZ" },
{ NULL , NULL } { NULL , NULL }
}; };
for (int i = 0; sc_only_locale[i].loc
&& sc_only_locale[i].loc[0] <= locale[0]; ++i) for (int i = 0; override_locale[i].loc
if (!strcmp (locale, sc_only_locale[i].loc)) && override_locale[i].loc[0] <= locale[0]; ++i)
{ {
lcid = LocaleNameToLCID (sc_only_locale[i].wloc, 0); if (!wcscmp (locale, override_locale[i].loc))
if (!strncmp (locale, "sr-", 3))
{ {
/* "@latin" modifier for the sr_XY locales changes wcscpy (wlocale, override_locale[i].wloc);
collation behaviour so lcid should accommodate that
by being set to the Latin sublang. */
if (lcid != 0 && lcid != LOCALE_CUSTOM_UNSPECIFIED
&& has_modifier ("@latin"))
lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
}
else if (!strncmp (locale, "uz-", 3))
{
/* Equivalent for "@cyrillic" modifier in uz_UZ locale */
if (lcid != 0 && lcid != LOCALE_CUSTOM_UNSPECIFIED
&& has_modifier ("@cyrillic"))
lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) + 1);
}
break; break;
} }
} }
if (lcid && lcid != LOCALE_CUSTOM_UNSPECIFIED) if (!wlocale[0]
last_lcid = lcid; && ResolveLocaleName (locale, wlocale, ENCODING_LEN + 1) <= 1)
return -1;
/* Check for modifiers changing the script */
const wchar_t *iso15924_script[] = { L"Latn-", L"Cyrl-", L"Deva-", L"Adlm-" };
int idx = -1;
if (modifier[0])
{
if (!strcmp (++modifier, "latin"))
idx = 0;
else if (!strcmp (modifier, "cyrillic"))
idx = 1;
else if (!strcmp (modifier, "devanagari"))
idx = 2;
else if (!strcmp (modifier, "adlam"))
idx = 3;
}
if (idx >= 0)
{
wchar_t *iso3166 = wcschr (wlocale, L'-') + 1;
wchar_t *wlp;
/* Copy iso639 language part including dash */
wlp = wcpncpy (win_locale, wlocale, iso3166 - wlocale);
/* Concat new iso15924 script */
wlp = wcpcpy (wlp, iso15924_script[idx]);
/* Concat iso3166 territory. Skip script, if already in the locale */
wchar_t *skip_script = wcschr (iso3166, L'-');
if (skip_script)
iso3166 = skip_script + 1;
wcpcpy (wlp, iso3166);
}
else else
last_lcid = (LCID) -1; wcpcpy (win_locale, wlocale);
debug_printf ("LCID=%04y", last_lcid); return 1;
return last_lcid;
} }
/* Never returns -1. Just skips invalid chars instead. Only if return_invalid /* Never returns -1. Just skips invalid chars instead. Only if return_invalid
@ -257,7 +258,7 @@ rebase_locale_buf (const void *ptrv, const void *ptrvend, const char *newbase,
} }
static wchar_t * static wchar_t *
__getlocaleinfo (LCID lcid, LCTYPE type, char **ptr, size_t size) __getlocaleinfo (wchar_t *loc, LCTYPE type, char **ptr, size_t size)
{ {
size_t num; size_t num;
wchar_t *ret; wchar_t *ret;
@ -265,7 +266,7 @@ __getlocaleinfo (LCID lcid, LCTYPE type, char **ptr, size_t size)
if ((uintptr_t) *ptr % 1) if ((uintptr_t) *ptr % 1)
++*ptr; ++*ptr;
ret = (wchar_t *) *ptr; ret = (wchar_t *) *ptr;
num = GetLocaleInfoW (lcid, type, ret, size / sizeof (wchar_t)); num = GetLocaleInfoEx (loc, type, ret, size / sizeof (wchar_t));
*ptr = (char *) (ret + num); *ptr = (char *) (ret + num);
return ret; return ret;
} }
@ -296,10 +297,10 @@ __charfromwchar (const wchar_t *in, char **ptr, size_t size, wctomb_p f_wctomb)
} }
static UINT static UINT
getlocaleint (LCID lcid, LCTYPE type) __getlocaleint (wchar_t *loc, LCTYPE type)
{ {
UINT val; UINT val;
return GetLocaleInfoW (lcid, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val, return GetLocaleInfoEx (loc, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val,
sizeof val) ? val : 0; sizeof val) ? val : 0;
} }
@ -310,7 +311,7 @@ enum dt_flags {
}; };
static wchar_t * static wchar_t *
__eval_datetimefmt (LCID lcid, LCTYPE type, dt_flags flags, char **ptr, __eval_datetimefmt (wchar_t *loc, LCTYPE type, dt_flags flags, char **ptr,
size_t size) size_t size)
{ {
wchar_t buf[80]; wchar_t buf[80];
@ -327,7 +328,7 @@ __eval_datetimefmt (LCID lcid, LCTYPE type, dt_flags flags, char **ptr,
++*ptr; ++*ptr;
wchar_t *ret = (wchar_t *) *ptr; wchar_t *ret = (wchar_t *) *ptr;
wchar_t *p = (wchar_t *) *ptr; wchar_t *p = (wchar_t *) *ptr;
GetLocaleInfoW (lcid, type, buf, 80); GetLocaleInfoEx (loc, type, buf, 80);
for (wchar_t *fmt = buf; *fmt; ++fmt) for (wchar_t *fmt = buf; *fmt; ++fmt)
switch (fc = *fmt) switch (fc = *fmt)
{ {
@ -390,20 +391,21 @@ __eval_datetimefmt (LCID lcid, LCTYPE type, dt_flags flags, char **ptr,
/* Convert Windows grouping format into POSIX grouping format. */ /* Convert Windows grouping format into POSIX grouping format. */
static char * static char *
conv_grouping (LCID lcid, LCTYPE type, char **lc_ptr) conv_grouping (wchar_t *loc, LCTYPE type, char **lc_ptr)
{ {
char buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */ wchar_t buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */
bool repeat = false; bool repeat = false;
char *ptr = *lc_ptr; char *ptr = *lc_ptr;
char *ret = ptr; char *ret = ptr;
GetLocaleInfoA (lcid, type, buf, 10); GetLocaleInfoEx (loc, type, buf, 10);
/* Convert Windows grouping format into POSIX grouping format. */ /* Convert Windows grouping format into POSIX grouping format. Note that
for (char *c = buf; *c; ++c) only ASCII chars are used in the grouping format. */
for (wchar_t *c = buf; *c; ++c)
{ {
if (*c < '0' || *c > '9') if (*c < L'0' || *c > L'9')
continue; continue;
char val = *c - '0'; char val = *c - L'0';
if (!val) if (!val)
{ {
repeat = true; repeat = true;
@ -429,10 +431,11 @@ __set_lc_time_from_win (const char *name,
char **lc_time_buf, wctomb_p f_wctomb, char **lc_time_buf, wctomb_p f_wctomb,
const char *charset) const char *charset)
{ {
LCID lcid = __get_lcid_from_locale (name); wchar_t win_locale[ENCODING_LEN + 1];
if (lcid == (LCID) -1) int ret = __get_rfc5646_from_locale (name, win_locale);
return lcid; if (ret < 0)
if (!lcid && !strcmp (charset, "ASCII")) return ret;
if (!ret && !strcmp (charset, "ASCII"))
return 0; return 0;
# define MAX_TIME_BUFFER_SIZE 4096 # define MAX_TIME_BUFFER_SIZE 4096
@ -445,17 +448,17 @@ __set_lc_time_from_win (const char *name,
char *lc_time_ptr = new_lc_time_buf; char *lc_time_ptr = new_lc_time_buf;
/* C.foo is just a copy of "C" with fixed charset. */ /* C.foo is just a copy of "C" with fixed charset. */
if (!lcid) if (!ret)
memcpy (_time_locale, _C_time_locale, sizeof (struct lc_time_T)); memcpy (_time_locale, _C_time_locale, sizeof (struct lc_time_T));
/* codeset */ /* codeset */
_time_locale->codeset = lc_time_ptr; _time_locale->codeset = lc_time_ptr;
lc_time_ptr = stpcpy (lc_time_ptr, charset) + 1; lc_time_ptr = stpcpy (lc_time_ptr, charset) + 1;
if (lcid) if (ret)
{ {
char locale[ENCODING_LEN + 1]; char locale[ENCODING_LEN + 1];
strcpy (locale, name); strcpy (locale, name);
/* Removes the charset from the locale and attach the modifer to the /* Removes the charset from the locale and attach the modifier to the
language_TERRITORY part. */ language_TERRITORY part. */
char *c = strchr (locale, '.'); char *c = strchr (locale, '.');
if (c) if (c)
@ -476,16 +479,21 @@ __set_lc_time_from_win (const char *name,
sizeof *lc_era, locale_cmp); sizeof *lc_era, locale_cmp);
/* mon */ /* mon */
/* Windows has a bug in Japanese and Korean locales. In these /* Windows has a bug in "ja-JP" and "ko-KR" (but not in "ko-KP").
locales, strings returned for LOCALE_SABBREVMONTHNAME* are missing In these locales, strings returned for LOCALE_SABBREVMONTHNAME*
the suffix representing a month. Unfortunately this is not are missing the suffix representing a month.
documented in English. A Japanese article describing the problem
is http://msdn.microsoft.com/ja-jp/library/cc422084.aspx A Japanese article describing the problem was
https://msdn.microsoft.com/ja-jp/library/cc422084.aspx, which is
only available via
https://web.archive.org/web/20110922195821/https://msdn.microsoft.com/ja-jp/library/cc422084.aspx
these days. Testing indicates that this problem is still present
in Windows 11.
The workaround is to use LOCALE_SMONTHNAME* in these locales, The workaround is to use LOCALE_SMONTHNAME* in these locales,
even for the abbreviated month name. */ even for the abbreviated month name. */
const LCTYPE mon_base = const LCTYPE mon_base = !wcscmp (win_locale, L"ja-JP")
lcid == MAKELANGID (LANG_JAPANESE, SUBLANG_JAPANESE_JAPAN) || !wcscmp (win_locale, L"ko-KR")
|| lcid == MAKELANGID (LANG_KOREAN, SUBLANG_KOREAN)
? LOCALE_SMONTHNAME1 : LOCALE_SABBREVMONTHNAME1; ? LOCALE_SMONTHNAME1 : LOCALE_SABBREVMONTHNAME1;
for (int i = 0; i < 12; ++i) for (int i = 0; i < 12; ++i)
{ {
@ -495,7 +503,8 @@ __set_lc_time_from_win (const char *name,
/* month and alt_month */ /* month and alt_month */
for (int i = 0; i < 12; ++i) for (int i = 0; i < 12; ++i)
{ {
_time_locale->wmonth[i] = getlocaleinfo (time, LOCALE_SMONTHNAME1 + i); _time_locale->wmonth[i] = getlocaleinfo (time,
LOCALE_SMONTHNAME1 + i);
_time_locale->month[i] = _time_locale->alt_month[i] _time_locale->month[i] = _time_locale->alt_month[i]
= charfromwchar (time, wmonth[i]); = charfromwchar (time, wmonth[i]);
} }
@ -570,7 +579,7 @@ __set_lc_time_from_win (const char *name,
/* md */ /* md */
{ {
wchar_t buf[80]; wchar_t buf[80];
GetLocaleInfoW (lcid, LOCALE_IDATE, buf, 80); GetLocaleInfoEx (win_locale, LOCALE_IDATE, buf, 80);
_time_locale->md_order = (const char *) lc_time_ptr; _time_locale->md_order = (const char *) lc_time_ptr;
lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1; lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1;
} }
@ -690,10 +699,11 @@ __set_lc_ctype_from_win (const char *name,
char **lc_ctype_buf, wctomb_p f_wctomb, char **lc_ctype_buf, wctomb_p f_wctomb,
const char *charset, int mb_cur_max) const char *charset, int mb_cur_max)
{ {
LCID lcid = __get_lcid_from_locale (name); wchar_t win_locale[ENCODING_LEN + 1];
if (lcid == (LCID) -1) int ret = __get_rfc5646_from_locale (name, win_locale);
return lcid; if (ret < 0)
if (!lcid && !strcmp (charset, "ASCII")) return ret;
if (!ret && !strcmp (charset, "ASCII"))
return 0; return 0;
# define MAX_CTYPE_BUFFER_SIZE 256 # define MAX_CTYPE_BUFFER_SIZE 256
@ -704,7 +714,7 @@ __set_lc_ctype_from_win (const char *name,
return -1; return -1;
char *lc_ctype_ptr = new_lc_ctype_buf; char *lc_ctype_ptr = new_lc_ctype_buf;
/* C.foo is just a copy of "C" with fixed charset. */ /* C.foo is just a copy of "C" with fixed charset. */
if (!lcid) if (!ret)
memcpy (_ctype_locale, _C_ctype_locale, sizeof (struct lc_ctype_T)); memcpy (_ctype_locale, _C_ctype_locale, sizeof (struct lc_ctype_T));
/* codeset */ /* codeset */
_ctype_locale->codeset = lc_ctype_ptr; _ctype_locale->codeset = lc_ctype_ptr;
@ -713,11 +723,11 @@ __set_lc_ctype_from_win (const char *name,
_ctype_locale->mb_cur_max = lc_ctype_ptr; _ctype_locale->mb_cur_max = lc_ctype_ptr;
*lc_ctype_ptr++ = mb_cur_max; *lc_ctype_ptr++ = mb_cur_max;
*lc_ctype_ptr++ = '\0'; *lc_ctype_ptr++ = '\0';
if (lcid) if (ret)
{ {
/* outdigits and woutdigits */ /* outdigits and woutdigits */
wchar_t digits[11]; wchar_t digits[11];
GetLocaleInfoW (lcid, LOCALE_SNATIVEDIGITS, digits, 11); GetLocaleInfoEx (win_locale, LOCALE_SNATIVEDIGITS, digits, 11);
for (int i = 0; i <= 9; ++i) for (int i = 0; i <= 9; ++i)
{ {
mbstate_t state; mbstate_t state;
@ -762,10 +772,11 @@ __set_lc_numeric_from_win (const char *name,
char **lc_numeric_buf, wctomb_p f_wctomb, char **lc_numeric_buf, wctomb_p f_wctomb,
const char *charset) const char *charset)
{ {
LCID lcid = __get_lcid_from_locale (name); wchar_t win_locale[ENCODING_LEN + 1];
if (lcid == (LCID) -1) int ret = __get_rfc5646_from_locale (name, win_locale);
return lcid; if (ret < 0)
if (!lcid && !strcmp (charset, "ASCII")) return ret;
if (!ret && !strcmp (charset, "ASCII"))
return 0; return 0;
# define MAX_NUMERIC_BUFFER_SIZE 256 # define MAX_NUMERIC_BUFFER_SIZE 256
@ -777,20 +788,20 @@ __set_lc_numeric_from_win (const char *name,
return -1; return -1;
char *lc_numeric_ptr = new_lc_numeric_buf; char *lc_numeric_ptr = new_lc_numeric_buf;
/* C.foo is just a copy of "C" with fixed charset. */ /* C.foo is just a copy of "C" with fixed charset. */
if (!lcid) if (!ret)
memcpy (_numeric_locale, _C_numeric_locale, sizeof (struct lc_numeric_T)); memcpy (_numeric_locale, _C_numeric_locale, sizeof (struct lc_numeric_T));
else else
{ {
/* decimal_point and thousands_sep */ /* decimal_point and thousands_sep */
if (lcid == 0x0429) /* fa_IR. Windows decimal_point is slash, /* fa_IR. Windows decimal_point is slash, correct is dot */
correct is dot */ if (!wcscmp (win_locale, L"fa-IR"))
{ {
_numeric_locale->wdecimal_point = setlocaleinfo (numeric, L'.'); _numeric_locale->wdecimal_point = setlocaleinfo (numeric, L'.');
_numeric_locale->wthousands_sep = setlocaleinfo (numeric, L','); _numeric_locale->wthousands_sep = setlocaleinfo (numeric, L',');
} }
else if (lcid == 0x0463) /* ps_AF. Windows decimal_point is dot, /* ps_AF. Windows decimal_point is dot, thousands_sep is comma,
thousands_sep is comma, correct are correct are arabic separators. */
arabic separators. */ else if (!wcscmp (win_locale, L"ps-AF"))
{ {
_numeric_locale->wdecimal_point = setlocaleinfo (numeric, 0x066b); _numeric_locale->wdecimal_point = setlocaleinfo (numeric, 0x066b);
_numeric_locale->wthousands_sep = setlocaleinfo (numeric, 0x066c); _numeric_locale->wthousands_sep = setlocaleinfo (numeric, 0x066c);
@ -805,7 +816,7 @@ __set_lc_numeric_from_win (const char *name,
_numeric_locale->decimal_point = charfromwchar (numeric, wdecimal_point); _numeric_locale->decimal_point = charfromwchar (numeric, wdecimal_point);
_numeric_locale->thousands_sep = charfromwchar (numeric, wthousands_sep); _numeric_locale->thousands_sep = charfromwchar (numeric, wthousands_sep);
/* grouping */ /* grouping */
_numeric_locale->grouping = conv_grouping (lcid, LOCALE_SGROUPING, _numeric_locale->grouping = conv_grouping (win_locale, LOCALE_SGROUPING,
&lc_numeric_ptr); &lc_numeric_ptr);
} }
/* codeset */ /* codeset */
@ -837,10 +848,11 @@ __set_lc_monetary_from_win (const char *name,
char **lc_monetary_buf, wctomb_p f_wctomb, char **lc_monetary_buf, wctomb_p f_wctomb,
const char *charset) const char *charset)
{ {
LCID lcid = __get_lcid_from_locale (name); wchar_t win_locale[ENCODING_LEN + 1];
if (lcid == (LCID) -1) int ret = __get_rfc5646_from_locale (name, win_locale);
return lcid; if (ret < 0)
if (!lcid && !strcmp (charset, "ASCII")) return ret;
if (!ret && !strcmp (charset, "ASCII"))
return 0; return 0;
# define MAX_MONETARY_BUFFER_SIZE 512 # define MAX_MONETARY_BUFFER_SIZE 512
@ -852,7 +864,7 @@ __set_lc_monetary_from_win (const char *name,
return -1; return -1;
char *lc_monetary_ptr = new_lc_monetary_buf; char *lc_monetary_ptr = new_lc_monetary_buf;
/* C.foo is just a copy of "C" with fixed charset. */ /* C.foo is just a copy of "C" with fixed charset. */
if (!lcid) if (!ret)
memcpy (_monetary_locale, _C_monetary_locale, sizeof (struct lc_monetary_T)); memcpy (_monetary_locale, _C_monetary_locale, sizeof (struct lc_monetary_T));
else else
{ {
@ -881,11 +893,11 @@ __set_lc_monetary_from_win (const char *name,
_monetary_locale->currency_symbol = charfromwchar (monetary, _monetary_locale->currency_symbol = charfromwchar (monetary,
wcurrency_symbol); wcurrency_symbol);
/* mon_decimal_point and mon_thousands_sep */ /* mon_decimal_point and mon_thousands_sep */
if (lcid == 0x0429 || lcid == 0x0463) /* fa_IR or ps_AF. Windows /* fa_IR or ps_AF. Windows mon_decimal_point is slash and comma,
mon_decimal_point is slash mon_thousands_sep is comma and dot, correct
and comma, mon_thousands_sep
is comma and dot, correct
are arabic separators. */ are arabic separators. */
if (!wcscmp (win_locale, L"fa-IR")
|| !wcscmp (win_locale, L"ps-AF"))
{ {
_monetary_locale->wmon_decimal_point = setlocaleinfo (monetary, _monetary_locale->wmon_decimal_point = setlocaleinfo (monetary,
0x066b); 0x066b);
@ -904,7 +916,8 @@ __set_lc_monetary_from_win (const char *name,
_monetary_locale->mon_thousands_sep = charfromwchar (monetary, _monetary_locale->mon_thousands_sep = charfromwchar (monetary,
wmon_thousands_sep); wmon_thousands_sep);
/* mon_grouping */ /* mon_grouping */
_monetary_locale->mon_grouping = conv_grouping (lcid, LOCALE_SMONGROUPING, _monetary_locale->mon_grouping = conv_grouping (win_locale,
LOCALE_SMONGROUPING,
&lc_monetary_ptr); &lc_monetary_ptr);
/* positive_sign */ /* positive_sign */
_monetary_locale->wpositive_sign = getlocaleinfo (monetary, _monetary_locale->wpositive_sign = getlocaleinfo (monetary,
@ -915,33 +928,33 @@ __set_lc_monetary_from_win (const char *name,
LOCALE_SNEGATIVESIGN); LOCALE_SNEGATIVESIGN);
_monetary_locale->negative_sign = charfromwchar (monetary, wnegative_sign); _monetary_locale->negative_sign = charfromwchar (monetary, wnegative_sign);
/* int_frac_digits */ /* int_frac_digits */
*lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IINTLCURRDIGITS); *lc_monetary_ptr = (char) getlocaleint (LOCALE_IINTLCURRDIGITS);
_monetary_locale->int_frac_digits = lc_monetary_ptr++; _monetary_locale->int_frac_digits = lc_monetary_ptr++;
/* frac_digits */ /* frac_digits */
*lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_ICURRDIGITS); *lc_monetary_ptr = (char) getlocaleint (LOCALE_ICURRDIGITS);
_monetary_locale->frac_digits = lc_monetary_ptr++; _monetary_locale->frac_digits = lc_monetary_ptr++;
/* p_cs_precedes and int_p_cs_precedes */ /* p_cs_precedes and int_p_cs_precedes */
*lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSYMPRECEDES); *lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSYMPRECEDES);
_monetary_locale->p_cs_precedes _monetary_locale->p_cs_precedes
= _monetary_locale->int_p_cs_precedes = lc_monetary_ptr++; = _monetary_locale->int_p_cs_precedes = lc_monetary_ptr++;
/* p_sep_by_space and int_p_sep_by_space */ /* p_sep_by_space and int_p_sep_by_space */
*lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSEPBYSPACE); *lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSEPBYSPACE);
_monetary_locale->p_sep_by_space _monetary_locale->p_sep_by_space
= _monetary_locale->int_p_sep_by_space = lc_monetary_ptr++; = _monetary_locale->int_p_sep_by_space = lc_monetary_ptr++;
/* n_cs_precedes and int_n_cs_precedes */ /* n_cs_precedes and int_n_cs_precedes */
*lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSYMPRECEDES); *lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSYMPRECEDES);
_monetary_locale->n_cs_precedes _monetary_locale->n_cs_precedes
= _monetary_locale->int_n_cs_precedes = lc_monetary_ptr++; = _monetary_locale->int_n_cs_precedes = lc_monetary_ptr++;
/* n_sep_by_space and int_n_sep_by_space */ /* n_sep_by_space and int_n_sep_by_space */
*lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSEPBYSPACE); *lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSEPBYSPACE);
_monetary_locale->n_sep_by_space _monetary_locale->n_sep_by_space
= _monetary_locale->int_n_sep_by_space = lc_monetary_ptr++; = _monetary_locale->int_n_sep_by_space = lc_monetary_ptr++;
/* p_sign_posn and int_p_sign_posn */ /* p_sign_posn and int_p_sign_posn */
*lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSIGNPOSN); *lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSIGNPOSN);
_monetary_locale->p_sign_posn _monetary_locale->p_sign_posn
= _monetary_locale->int_p_sign_posn = lc_monetary_ptr++; = _monetary_locale->int_p_sign_posn = lc_monetary_ptr++;
/* n_sign_posn and int_n_sign_posn */ /* n_sign_posn and int_n_sign_posn */
*lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSIGNPOSN); *lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSIGNPOSN);
_monetary_locale->n_sign_posn _monetary_locale->n_sign_posn
= _monetary_locale->int_n_sign_posn = lc_monetary_ptr++; = _monetary_locale->int_n_sign_posn = lc_monetary_ptr++;
} }
@ -970,10 +983,11 @@ __set_lc_messages_from_win (const char *name,
char **lc_messages_buf, char **lc_messages_buf,
wctomb_p f_wctomb, const char *charset) wctomb_p f_wctomb, const char *charset)
{ {
LCID lcid = __get_lcid_from_locale (name); wchar_t win_locale[ENCODING_LEN + 1];
if (lcid == (LCID) -1) int ret = __get_rfc5646_from_locale (name, win_locale);
return lcid; if (ret < 0)
if (!lcid && !strcmp (charset, "ASCII")) return ret;
if (!ret && !strcmp (charset, "ASCII"))
return 0; return 0;
char locale[ENCODING_LEN + 1]; char locale[ENCODING_LEN + 1];
@ -981,7 +995,7 @@ __set_lc_messages_from_win (const char *name,
lc_msg_t *msg = NULL; lc_msg_t *msg = NULL;
/* C.foo is just a copy of "C" with fixed charset. */ /* C.foo is just a copy of "C" with fixed charset. */
if (!lcid) if (!ret)
memcpy (_messages_locale, _C_messages_locale, sizeof (struct lc_messages_T)); memcpy (_messages_locale, _C_messages_locale, sizeof (struct lc_messages_T));
else else
{ {
@ -1012,7 +1026,7 @@ __set_lc_messages_from_win (const char *name,
target charset are simply ignored, as on Linux. */ target charset are simply ignored, as on Linux. */
size_t len = 0; size_t len = 0;
len += (strlen (charset) + 1); len += (strlen (charset) + 1);
if (lcid) if (ret)
{ {
len += lc_wcstombs (f_wctomb, NULL, msg->yesexpr, 0) + 1; len += lc_wcstombs (f_wctomb, NULL, msg->yesexpr, 0) + 1;
len += lc_wcstombs (f_wctomb, NULL, msg->noexpr, 0) + 1; len += lc_wcstombs (f_wctomb, NULL, msg->noexpr, 0) + 1;
@ -1036,7 +1050,7 @@ __set_lc_messages_from_win (const char *name,
/* codeset */ /* codeset */
_messages_locale->codeset = c; _messages_locale->codeset = c;
c = stpcpy (c, charset) + 1; c = stpcpy (c, charset) + 1;
if (lcid) if (ret)
{ {
_messages_locale->yesexpr = (const char *) c; _messages_locale->yesexpr = (const char *) c;
len = lc_wcstombs (f_wctomb, c, msg->yesexpr, lc_messages_end - c); len = lc_wcstombs (f_wctomb, c, msg->yesexpr, lc_messages_end - c);
@ -1065,7 +1079,7 @@ __set_lc_messages_from_win (const char *name,
const struct lc_collate_T _C_collate_locale = const struct lc_collate_T _C_collate_locale =
{ {
0, L"",
__ascii_mbtowc, __ascii_mbtowc,
"ASCII" "ASCII"
}; };
@ -1080,10 +1094,11 @@ __collate_load_locale (struct __locale_t *locale, const char *name,
char *bufp = NULL; char *bufp = NULL;
struct lc_collate_T *cop = NULL; struct lc_collate_T *cop = NULL;
LCID lcid = __get_lcid_from_locale (name); wchar_t win_locale[ENCODING_LEN + 1];
if (lcid == (LCID) -1) int ret = __get_rfc5646_from_locale (name, win_locale);
return -1; if (ret < 0)
if (lcid) return ret;
if (ret)
{ {
bufp = (char *) malloc (1); /* dummy */ bufp = (char *) malloc (1); /* dummy */
if (!bufp) if (!bufp)
@ -1094,12 +1109,12 @@ __collate_load_locale (struct __locale_t *locale, const char *name,
free (bufp); free (bufp);
return -1; return -1;
} }
cop->lcid = lcid; wcscpy (cop->win_locale, win_locale);
cop->mbtowc = (mbtowc_p) f_mbtowc; cop->mbtowc = (mbtowc_p) f_mbtowc;
stpcpy (cop->codeset, charset); stpcpy (cop->codeset, charset);
} }
struct __lc_cats tmp = locale->lc_cat[LC_COLLATE]; struct __lc_cats tmp = locale->lc_cat[LC_COLLATE];
locale->lc_cat[LC_COLLATE].ptr = lcid == 0 ? &_C_collate_locale : cop; locale->lc_cat[LC_COLLATE].ptr = !win_locale[0] ? &_C_collate_locale : cop;
locale->lc_cat[LC_COLLATE].buf = bufp; locale->lc_cat[LC_COLLATE].buf = bufp;
/* If buf is not NULL, both pointers have been alloc'ed */ /* If buf is not NULL, both pointers have been alloc'ed */
if (tmp.buf) if (tmp.buf)
@ -1119,11 +1134,11 @@ wcscoll_l (const wchar_t *__restrict ws1, const wchar_t *__restrict ws2,
struct __locale_t *locale) struct __locale_t *locale)
{ {
int ret; int ret;
LCID collate_lcid = __get_collate_locale (locale)->lcid; const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
if (!collate_lcid) if (!collate_locale[0])
return wcscmp (ws1, ws2); return wcscmp (ws1, ws2);
ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1); ret = CompareStringEx (collate_locale, 0, ws1, -1, ws2, -1, NULL, NULL, 0);
if (!ret) if (!ret)
set_errno (EINVAL); set_errno (EINVAL);
return ret - CSTR_EQUAL; return ret - CSTR_EQUAL;
@ -1143,12 +1158,10 @@ strcoll_l (const char *__restrict s1, const char *__restrict s2,
wchar_t *ws1, *ws2; wchar_t *ws1, *ws2;
tmp_pathbuf tp; tmp_pathbuf tp;
int ret; int ret;
LCID collate_lcid = __get_collate_locale (locale)->lcid; const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
if (!collate_lcid) if (!collate_locale[0])
return strcmp (s1, s2); return strcmp (s1, s2);
/* The ANSI version of CompareString uses the default charset of the lcid,
so we must use the Unicode version. */
mbtowc_p collate_mbtowc = __get_collate_locale (locale)->mbtowc; mbtowc_p collate_mbtowc = __get_collate_locale (locale)->mbtowc;
n1 = lc_mbstowcs (collate_mbtowc, NULL, s1, 0) + 1; n1 = lc_mbstowcs (collate_mbtowc, NULL, s1, 0) + 1;
ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t)) ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t))
@ -1158,7 +1171,7 @@ strcoll_l (const char *__restrict s1, const char *__restrict s2,
ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t)) ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
: tp.w_get ()); : tp.w_get ());
lc_mbstowcs (collate_mbtowc, ws2, s2, n2); lc_mbstowcs (collate_mbtowc, ws2, s2, n2);
ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1); ret = CompareStringEx (collate_locale, 0, ws1, -1, ws2, -1, NULL, NULL, 0);
if (n1 > NT_MAX_PATH) if (n1 > NT_MAX_PATH)
free (ws1); free (ws1);
if (n2 > NT_MAX_PATH) if (n2 > NT_MAX_PATH)
@ -1350,15 +1363,15 @@ wcsxfrm_l (wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t wsn,
struct __locale_t *locale) struct __locale_t *locale)
{ {
size_t ret; size_t ret;
LCID collate_lcid = __get_collate_locale (locale)->lcid; const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
if (!collate_lcid) if (!collate_locale[0])
return wcslcpy (ws1, ws2, wsn); return wcslcpy (ws1, ws2, wsn);
/* Don't use LCMAP_SORTKEY in conjunction with LCMAP_BYTEREV. The cchDest /* Don't use LCMAP_SORTKEY in conjunction with LCMAP_BYTEREV. The cchDest
parameter is used as byte count with LCMAP_SORTKEY but as char count with parameter is used as byte count with LCMAP_SORTKEY but as char count with
LCMAP_BYTEREV. */ LCMAP_BYTEREV. */
ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, ws1, ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1, ws1,
wsn * sizeof (wchar_t)); wsn * sizeof (wchar_t), NULL, NULL, 0);
if (ret) if (ret)
{ {
ret /= sizeof (wchar_t); ret /= sizeof (wchar_t);
@ -1380,7 +1393,8 @@ wcsxfrm_l (wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t wsn,
set_errno (EINVAL); set_errno (EINVAL);
else else
{ {
ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, NULL, 0); ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
NULL, 0, NULL, NULL, 0);
if (ret) if (ret)
wsn = ret / sizeof (wchar_t); wsn = ret / sizeof (wchar_t);
} }
@ -1401,12 +1415,10 @@ strxfrm_l (char *__restrict s1, const char *__restrict s2, size_t sn,
size_t n2; size_t n2;
wchar_t *ws2; wchar_t *ws2;
tmp_pathbuf tp; tmp_pathbuf tp;
LCID collate_lcid = __get_collate_locale (locale)->lcid; const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
if (!collate_lcid) if (!collate_locale[0])
return strlcpy (s1, s2, sn); return strlcpy (s1, s2, sn);
/* The ANSI version of LCMapString uses the default charset of the lcid,
so we must use the Unicode version. */
mbtowc_p collate_mbtowc = __get_collate_locale (locale)->mbtowc; mbtowc_p collate_mbtowc = __get_collate_locale (locale)->mbtowc;
n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1; n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1;
ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t)) ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
@ -1415,8 +1427,8 @@ strxfrm_l (char *__restrict s1, const char *__restrict s2, size_t sn,
{ {
lc_mbstowcs (collate_mbtowc, ws2, s2, n2); lc_mbstowcs (collate_mbtowc, ws2, s2, n2);
/* The sort key is a NUL-terminated byte string. */ /* The sort key is a NUL-terminated byte string. */
ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
(PWCHAR) s1, sn); (PWCHAR) s1, sn, NULL, NULL, 0);
} }
if (ret == 0) if (ret == 0)
{ {
@ -1424,7 +1436,8 @@ strxfrm_l (char *__restrict s1, const char *__restrict s2, size_t sn,
if (!ws2 || GetLastError () != ERROR_INSUFFICIENT_BUFFER) if (!ws2 || GetLastError () != ERROR_INSUFFICIENT_BUFFER)
set_errno (EINVAL); set_errno (EINVAL);
else else
ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, NULL, 0); ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
NULL, 0, NULL, NULL, 0);
} }
if (ws2 && n2 > NT_MAX_PATH) if (ws2 && n2 > NT_MAX_PATH)
free (ws2); free (ws2);
@ -1442,20 +1455,30 @@ strxfrm (char *__restrict s1, const char *__restrict s2, size_t sn)
/* Fetch default ANSI codepage from locale info and generate a setlocale /* Fetch default ANSI codepage from locale info and generate a setlocale
compatible character set code. Called from newlib's setlocale(), if the compatible character set code. Called from newlib's setlocale(), if the
charset isn't given explicitely in the POSIX compatible locale specifier. */ charset isn't given explicitely in the POSIX compatible locale specifier. */
/* FIXME: Check all locales against their Linux counterpart again and
make sure the codeset conversion is correct.
FIXME: Perhaps, convert to locale names only.
FIXME: Perhaps, maintain a sorted list of Linux locales and their
default codesets. */
extern "C" void extern "C" void
__set_charset_from_locale (const char *locale, char *charset) __set_charset_from_locale (const char *locale, char *charset)
{ {
UINT cp; UINT cp;
LCID lcid = __get_lcid_from_locale (locale); wchar_t win_locale[ENCODING_LEN + 1];
int ret = __get_rfc5646_from_locale (locale, win_locale);
wchar_t wbuf[9]; wchar_t wbuf[9];
/* "C" locale, or invalid locale? */ /* "C" locale, or invalid locale? */
if (lcid == 0 || lcid == (LCID) -1) if (ret <= 0)
cp = 20127; cp = 20127;
else if (!GetLocaleInfoW (lcid, else if (!GetLocaleInfoEx (win_locale,
LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
(PWCHAR) &cp, sizeof cp)) (PWCHAR) &cp, sizeof cp))
cp = 0; cp = 0;
/* For simplicity, we still convert to LCID here. */
LCID lcid = LocaleNameToLCID (win_locale, 0);
/* Translate codepage and lcid to a charset closely aligned with the default /* Translate codepage and lcid to a charset closely aligned with the default
charsets defined in Glibc. */ charsets defined in Glibc. */
const char *cs; const char *cs;
@ -1481,11 +1504,12 @@ __set_charset_from_locale (const char *locale, char *charset)
cs = "BIG5"; cs = "BIG5";
break; break;
case 1250: case 1250:
if (lcid == 0x181a /* sr_BA (Serbian Language/Bosnia if (lcid == 0x181a /* sr_BA (Serbian/Bosnia
and Herzegovina) */ and Herzegovina) */
|| lcid == 0x241a /* sr_RS (Serbian Language/Serbia) */ || lcid == 0x241a /* sr_RS (Serbian/Serbia) */
|| lcid == 0x2c1a /* sr_ME (Serbian Language/Montenegro)*/ || lcid == 0x2c1a /* sr_ME (Serbian/Montenegro)*/
|| lcid == 0x0442) /* tk_TM (Turkmen/Turkmenistan) */ || lcid == 0x0442 /* tk_TM (Turkmen/Turkmenistan) */
|| !wcscmp (win_locale, L"sr-Latn-XK")) /* (Serbian/Kosovo) */
cs = "UTF-8"; cs = "UTF-8";
else if (lcid == 0x041c) /* sq_AL (Albanian/Albania) */ else if (lcid == 0x041c) /* sq_AL (Albanian/Albania) */
cs = "ISO-8859-1"; cs = "ISO-8859-1";
@ -1498,17 +1522,21 @@ __set_charset_from_locale (const char *locale, char *charset)
|| lcid == 0x281a /* sr_RS (Serbian Language/Serbia) */ || lcid == 0x281a /* sr_RS (Serbian Language/Serbia) */
|| lcid == 0x301a /* sr_ME (Serbian Language/Montenegro)*/ || lcid == 0x301a /* sr_ME (Serbian Language/Montenegro)*/
|| lcid == 0x0440 /* ky_KG (Kyrgyz/Kyrgyzstan) */ || lcid == 0x0440 /* ky_KG (Kyrgyz/Kyrgyzstan) */
|| lcid == 0x082c /* az_AZ@cyrillic (Azerbaijani/Azerbaijan) */
|| lcid == 0x0843 /* uz_UZ (Uzbek/Uzbekistan) */ || lcid == 0x0843 /* uz_UZ (Uzbek/Uzbekistan) */
/* tt_RU (Tatar/Russia), /* tt_RU (Tatar/Russia),
IQTElif alphabet */ IQTElif alphabet */
|| (lcid == 0x0444 && has_modifier ("@iqtelif")) || (lcid == 0x0444 && has_modifier ("@iqtelif"))
|| lcid == 0x0450) /* mn_MN (Mongolian/Mongolia) */ || lcid == 0x0450 /* mn_MN (Mongolian/Mongolia) */
|| !wcscmp (win_locale, L"sr-Cyrl-XK")) /* (Serbian/Kosovo) */
cs = "UTF-8"; cs = "UTF-8";
else if (lcid == 0x0423) /* be_BY (Belarusian/Belarus) */ else if (lcid == 0x0423) /* be_BY (Belarusian/Belarus) */
cs = has_modifier ("@latin") ? "UTF-8" : "CP1251"; cs = has_modifier ("@latin") ? "UTF-8" : "CP1251";
else if (lcid == 0x0402) /* bg_BG (Bulgarian/Bulgaria) */ else if (lcid == 0x0402 /* bg_BG (Bulgarian/Bulgaria) */
|| lcid == 0x0423) /* be_BY (Belarusian/Belarus) */
cs = "CP1251"; cs = "CP1251";
else if (lcid == 0x0422) /* uk_UA (Ukrainian/Ukraine) */ else if (lcid == 0x0422 /* uk_UA (Ukrainian/Ukraine) */
|| !wcscmp (win_locale, L"ru-UA")) /* (Russian/Ukraine) */
cs = "KOI8-U"; cs = "KOI8-U";
else if (lcid == 0x0428) /* tg_TJ (Tajik/Tajikistan) */ else if (lcid == 0x0428) /* tg_TJ (Tajik/Tajikistan) */
cs = "KOI8-T"; cs = "KOI8-T";
@ -1532,7 +1560,8 @@ __set_charset_from_locale (const char *locale, char *charset)
|| lcid == 0x0832 /* tn_BW (Tswana/Botswana) */ || lcid == 0x0832 /* tn_BW (Tswana/Botswana) */
|| lcid == 0x0432 /* tn_ZA (Tswana/South Africa) */ || lcid == 0x0432 /* tn_ZA (Tswana/South Africa) */
|| lcid == 0x0488 /* wo_SN (Wolof/Senegal) */ || lcid == 0x0488 /* wo_SN (Wolof/Senegal) */
|| lcid == 0x046a) /* yo_NG (Yoruba/Nigeria) */ || lcid == 0x046a /* yo_NG (Yoruba/Nigeria) */
|| lcid == 0x085f) /* ber_DZ (Tamazight/Algeria) */
cs = "UTF-8"; cs = "UTF-8";
else if (lcid == 0x042e) /* hsb_DE (Upper Sorbian/Germany) */ else if (lcid == 0x042e) /* hsb_DE (Upper Sorbian/Germany) */
cs = "ISO-8859-2"; cs = "ISO-8859-2";

View File

@ -827,7 +827,7 @@ p_b_term(struct parse *p, cset *cs)
CHadd(p, cs, start); CHadd(p, cs, start);
else { else {
#ifdef __CYGWIN__ #ifdef __CYGWIN__
if (!__get_current_collate_locale ()->lcid) { if (!__get_current_collate_locale ()->win_locale[0]) {
#else #else
if (__collate_load_error) { if (__collate_load_error) {
#endif #endif