Cygwin: locale(1): drop using LCID, use Windows locale names

LCIDs are deprecated since Windows Vista.  Worse, lots of new locales
have been added in the meantime which have no LCID attached.  They
are only available by locale name.

As first step, rearrange the locale(1) tool to use Windows locale
names, rather than LCIDs, so we can now enumerate *all* locales
available in more recent Windows versions.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2023-02-23 00:22:56 +01:00
parent 17ac400c11
commit ac405ab9bc
1 changed files with 198 additions and 138 deletions

View File

@ -30,6 +30,7 @@
#include <getopt.h> #include <getopt.h>
#include <string.h> #include <string.h>
#include <wchar.h> #include <wchar.h>
#include <wctype.h>
#include <locale.h> #include <locale.h>
#include <langinfo.h> #include <langinfo.h>
#include <limits.h> #include <limits.h>
@ -107,17 +108,42 @@ struct option longopts[] = {
const char *opts = "acfhikmnsuUvV"; const char *opts = "acfhikmnsuUvV";
int int
getlocale (LCID lcid, char *name) getlocale (PWCHAR loc_name, wchar_t *iso639, wchar_t *iso3166,
wchar_t *iso15924 = NULL)
{ {
char iso639[10]; wchar_t *cp;
char iso3166[10];
iso3166[0] = '\0'; /* Skip language-only locales, e. g. "en" */
if (!GetLocaleInfo (lcid, LOCALE_SISO639LANGNAME, iso639, 10)) if (!(cp = wcschr (loc_name, L'-')))
return 0; return 0;
GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso3166, 10); ++cp;
sprintf (name, "%s%s%s", iso639, lcid > 0x3ff ? "_" : "", /* Script inside? Scripts are Upper/Lower, e. g. "Latn" */
lcid > 0x3ff ? iso3166 : ""); if (iswupper (cp[0]) && iswlower (cp[1]))
{
wchar_t *cp2;
/* Skip language-Script locales, missing country */
if (!(cp2 = wcschr (cp + 2, L'-')))
return 0;
/* Otherwise, store in iso15924 */
if (iso15924)
wcpcpy (wcpncpy (iso15924, cp, cp2 - cp), L";");
}
cp = wcsrchr (loc_name, L'-');
if (cp)
{
/* Skip numeric iso3166 country name. */
if (iswdigit (cp[1]))
return 0;
/* Special case postfix after iso3166 country name: ca-ES-valencia.
Use the postfix thingy as script so it will become a @modifier */
if (iswlower (cp[1]))
wcpcpy (iso15924, cp + 1);
}
if (!GetLocaleInfoEx (loc_name, LOCALE_SISO639LANGNAME, iso639, 10))
return 0;
GetLocaleInfoEx (loc_name, LOCALE_SISO3166CTRYNAME, iso3166, 10);
return 1; return 1;
} }
@ -132,14 +158,6 @@ loc_t *locale;
size_t loc_max; size_t loc_max;
size_t loc_num; size_t loc_num;
void
print_codeset (const char *codeset)
{
for (; *codeset; ++codeset)
if (*codeset != '-')
putc (tolower ((int)(unsigned char) *codeset), stdout);
}
void void
print_locale_with_codeset (int verbose, loc_t *locale, bool utf8, print_locale_with_codeset (int verbose, loc_t *locale, bool utf8,
const char *modifier) const char *modifier)
@ -187,7 +205,7 @@ print_locale (int verbose, loc_t *locale)
{ {
if (!modifier) if (!modifier)
print_locale_with_codeset (verbose, locale, true, NULL); print_locale_with_codeset (verbose, locale, true, NULL);
else if (!strcmp (modifier, "@cjknarrow")) else if (strcmp (modifier, "@euro"))
{ {
*modifier++ = '\0'; *modifier++ = '\0';
print_locale_with_codeset (verbose, locale, true, modifier); print_locale_with_codeset (verbose, locale, true, modifier);
@ -266,110 +284,147 @@ add_locale_alias_locales ()
c = strchr (replace, '.'); c = strchr (replace, '.');
if (c) if (c)
*c = '\0'; *c = '\0';
/* Ignore "ja_JP" and "ko_KR" locales from here, they are in the Windows
DB anyway. */
if (!strcmp (alias, "ja_JP") || !strcmp (alias, "ko_KR"))
continue;
search.name = replace; search.name = replace;
loc = (loc_t *) bsearch (&search, locale, orig_loc_num, sizeof (loc_t), loc = (loc_t *) bsearch (&search, locale, orig_loc_num, sizeof (loc_t),
compare_locales); compare_locales);
add_locale (alias, loc ? loc->language : L"", loc ? loc->territory : L"", add_locale (alias, loc ? loc->language : L"", loc ? loc->territory : L"",
true); true);
} }
fclose (fp); fclose (fp);
} }
void BOOL
print_all_locales (int verbose) print_all_locales_proc (LPWSTR loc_name, DWORD info, LPARAM param)
{ {
LCID lcid = 0; wchar_t iso639[32] = { 0 };
char name[32]; wchar_t iso3166[32] = { 0 };
wchar_t iso15924[32] = { 0 };
DWORD cp; DWORD cp;
unsigned lang, sublang; #if 0
add_locale ("C", L"C", L"POSIX");
add_locale ("POSIX", L"C", L"POSIX", true);
for (lang = 1; lang <= 0xff; ++lang)
{
struct { struct {
wchar_t language[256]; wchar_t language[256];
wchar_t country[256]; wchar_t country[256];
char loc[32]; char loc[32];
} loc_list[32]; } loc_list[32];
int lcnt = 0; int lcnt = 0;
#endif
for (sublang = 1; sublang <= 0x3f; ++sublang) if (getlocale (loc_name, iso639, iso3166, iso15924))
{
lcid = (sublang << 10) | lang;
if (getlocale (lcid, name))
{ {
char *c, posix_loc[32];
wchar_t language[256]; wchar_t language[256];
wchar_t country[256]; wchar_t country[256];
int i; wchar_t currency[9];
char *c, loc[32];
wchar_t wbuf[9];
/* Go figure. Even the English name of a language or c = posix_loc + snprintf (posix_loc, sizeof posix_loc, "%ls_%ls",
locale might contain native characters. */ iso639, iso3166);
GetLocaleInfoW (lcid, LOCALE_SENGLANGUAGE, language, 256); /* Inuktitut: equivalent @latin due to lack of info on Linux */
GetLocaleInfoW (lcid, LOCALE_SENGCOUNTRY, country, 256); if (!wcscmp (iso639, L"iu"))
/* Avoid dups */
for (i = 0; i < lcnt; ++ i)
if (!wcscmp (loc_list[i].language, language)
&& !wcscmp (loc_list[i].country, country))
break;
if (i < lcnt)
continue;
if (lcnt < 32)
{ {
wcscpy (loc_list[lcnt].language, language); if (wcscmp (iso15924, L"Latn;"))
wcscpy (loc_list[lcnt].country, country); return TRUE;
} }
c = stpcpy (loc, name); /* Javanese: only use @latin locale. */
/* Now check certain conditions to figure out if that else if (!wcscmp (iso639, L"jv"))
locale requires a modifier. */ {
if (lang == LANG_SERBIAN && !strncmp (loc, "sr_", 3) if (wcscmp (iso15924, L"Latn;"))
&& wcsstr (language, L"(Latin)")) return TRUE;
}
/* Mongolian: only use @mongolian locale. */
else if (!wcscmp (iso639, L"mn"))
{
if (wcscmp (iso15924, L"Mong;"))
return TRUE;
}
/* Serbian: Windows default is Latin, Linux default is Cyrillic.
We want the Linux default and attach @latin otherwise */
else if (!wcscmp (iso639, L"sr") && !wcscmp (iso15924, L"Latn;"))
stpcpy (c, "@latin"); stpcpy (c, "@latin");
else if (lang == LANG_UZBEK /* Tamazight: no modifier, iso639 is "ber" on Linux.
&& sublang == SUBLANG_UZBEK_CYRILLIC) "zgh-Tfng-MA" is equivalent to "ber_MA". */
else if (!wcscmp (iso639, L"zgh"))
snprintf (posix_loc, sizeof posix_loc, "ber_%.27ls", iso3166);
/* Tamazight: "tzm-Latn-DZ" is equivalent to "ber_DZ",
skip everything else. */
else if (!wcscmp (iso639, L"tzm"))
{
if (!wcscmp (iso3166, L"DZ") && !wcscmp (iso15924, L"Latn;"))
snprintf (posix_loc, sizeof posix_loc, "ber_%.27ls", iso3166);
else
return TRUE;
}
/* In all other cases, we check if the script from the Windows
locale is the default locale in that language. If not, we
add it as modifier if possible, or skip it */
else if (iso15924[0])
{
wchar_t scriptless_win_locale[32];
wchar_t default_iso15924[32];
wcpcpy (wcpcpy (wcpcpy (scriptless_win_locale, iso639), L"_"),
iso3166);
if ((GetLocaleInfoEx (scriptless_win_locale, LOCALE_SSCRIPTS,
default_iso15924, 32)
|| GetLocaleInfoEx (iso639, LOCALE_SSCRIPTS,
default_iso15924, 32))
&& !wcsstr (default_iso15924, iso15924))
{
if (!wcscmp (iso15924, L"Latn;"))
stpcpy (c, "@latin");
else if (!wcscmp (iso15924, L"Cyrl;"))
stpcpy (c, "@cyrillic"); stpcpy (c, "@cyrillic");
/* Avoid more dups */ else if (!wcscmp (iso15924, L"Deva;"))
for (i = 0; i < lcnt; ++ i) stpcpy (c, "@devanagari");
if (!strcmp (loc_list[i].loc, loc)) else if (!wcscmp (iso15924, L"Adlm;"))
{ stpcpy (c, "@adlam");
lcnt++; else
break; return TRUE;
} }
if (i < lcnt) }
continue;
if (lcnt < 32)
strcpy (loc_list[lcnt++].loc, loc);
/* Print */ /* Print */
add_locale (loc, language, country); GetLocaleInfoEx (loc_name, LOCALE_SENGLISHLANGUAGENAME, language, 256);
/* Check for locales which sport a modifier for GetLocaleInfoEx (loc_name, LOCALE_SENGLISHCOUNTRYNAME, country, 256);
add_locale (posix_loc, language, country);
/* Check for locales sporting an additional modifier for
changing the codeset and other stuff. */ changing the codeset and other stuff. */
if (lang == LANG_BELARUSIAN if (!wcscmp (iso639, L"be") && !wcscmp (iso3166, L"BY"))
&& sublang == SUBLANG_BELARUSIAN_BELARUS)
stpcpy (c, "@latin"); stpcpy (c, "@latin");
else if (lang == LANG_TATAR if (!wcscmp (iso639, L"tt") && !wcscmp (iso3166, L"RU"))
&& sublang == SUBLANG_TATAR_RUSSIA)
stpcpy (c, "@iqtelif"); stpcpy (c, "@iqtelif");
else if (GetLocaleInfoW (lcid, else if (GetLocaleInfoEx (loc_name,
LOCALE_IDEFAULTANSICODEPAGE LOCALE_IDEFAULTANSICODEPAGE
| LOCALE_RETURN_NUMBER, | LOCALE_RETURN_NUMBER,
(PWCHAR) &cp, sizeof cp) (PWCHAR) &cp, sizeof cp)
&& cp == 1252 /* Latin1*/ && cp == 1252 /* Latin1*/
&& GetLocaleInfoW (lcid, LOCALE_SINTLSYMBOL, wbuf, 9) && GetLocaleInfoEx (loc_name, LOCALE_SINTLSYMBOL, currency, 9)
&& !wcsncmp (wbuf, L"EUR", 3)) && !wcsncmp (currency, L"EUR", 3))
stpcpy (c, "@euro"); stpcpy (c, "@euro");
else if (lang == LANG_JAPANESE else if (!wcscmp (iso639, L"ja")
|| lang == LANG_KOREAN || !wcscmp (iso639, L"ko")
|| lang == LANG_CHINESE) || !wcscmp (iso639, L"zh"))
stpcpy (c, "@cjknarrow"); stpcpy (c, "@cjknarrow");
else else
continue; return TRUE;
add_locale (loc, language, country); add_locale (posix_loc, language, country);
}
} }
return TRUE;
} }
void
print_all_locales (int verbose)
{
add_locale ("C", L"C", L"POSIX");
add_locale ("POSIX", L"C", L"POSIX", true);
EnumSystemLocalesEx (print_all_locales_proc,
LOCALE_WINDOWS | LOCALE_SUPPLEMENTAL,
0, NULL);
/* First sort allows add_locale_alias_locales to bsearch in locales. */ /* First sort allows add_locale_alias_locales to bsearch in locales. */
qsort (locale, loc_num, sizeof (loc_t), compare_locales); qsort (locale, loc_num, sizeof (loc_t), compare_locales);
add_locale_alias_locales (); add_locale_alias_locales ();
@ -739,14 +794,13 @@ int
main (int argc, char **argv) main (int argc, char **argv)
{ {
int opt; int opt;
LCID lcid = 0; wchar_t loc_name[256] = { 0 };
int all = 0; int all = 0;
int cat = 0; int cat = 0;
int key = 0; int key = 0;
int maps = 0; int maps = 0;
int verbose = 0; int verbose = 0;
const char *utf = ""; const char *utf = "";
char name[32];
setlocale (LC_ALL, ""); setlocale (LC_ALL, "");
while ((opt = getopt_long (argc, argv, opts, longopts, NULL)) != -1) while ((opt = getopt_long (argc, argv, opts, longopts, NULL)) != -1)
@ -765,19 +819,22 @@ main (int argc, char **argv)
maps = 1; maps = 1;
break; break;
case 'i': case 'i':
lcid = (UINT_PTR) GetKeyboardLayout (0) & 0xffff; GetLocaleInfoW ((UINT_PTR) GetKeyboardLayout (0) & 0xffff, LOCALE_SNAME,
loc_name, 256);
break; break;
case 's': case 's':
lcid = GetSystemDefaultUILanguage (); GetLocaleInfoW (GetSystemDefaultUILanguage (), LOCALE_SNAME,
loc_name, 256);
break; break;
case 'u': case 'u':
lcid = GetUserDefaultUILanguage (); GetLocaleInfoW (GetUserDefaultUILanguage (), LOCALE_SNAME,
loc_name, 256);
break; break;
case 'f': case 'f':
lcid = GetUserDefaultLCID (); GetUserDefaultLocaleName (loc_name, 256);
break; break;
case 'n': case 'n':
lcid = GetSystemDefaultLCID (); GetSystemDefaultLocaleName (loc_name, 256);
break; break;
case 'U': case 'U':
utf = ".UTF-8"; utf = ".UTF-8";
@ -799,10 +856,13 @@ main (int argc, char **argv)
print_all_locales (verbose); print_all_locales (verbose);
else if (maps) else if (maps)
print_charmaps (); print_charmaps ();
else if (lcid) else if (loc_name[0])
{ {
if (getlocale (lcid, name)) wchar_t iso639[10];
printf ("%s%s\n", name, utf); wchar_t iso3166[10];
if (getlocale (loc_name, iso639, iso3166, NULL))
printf ("%ls_%ls%s", iso639, iso3166, utf);
} }
else if (optind < argc) else if (optind < argc)
while (optind < argc) while (optind < argc)