diff --git a/newlib/ChangeLog b/newlib/ChangeLog index c2064598c..6284d01cb 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,18 @@ +2010-02-18 Corinna Vinschen + + * libc/ctype/iswalpha.c (iswalpha): Update to Unicode 5.2. + * libc/ctype/iswprint.c (iswprint): Ditto. + * libc/ctype/iswpunct.c (iswpunct): Drop standalone implementation. + Define in terms of other wctype functions instead. + * libc/ctype/towlower.c (towlower): Update to Unicode 5.2. Add comment + to explain how to fetch the data from the Unicode database. + * libc/ctype/towupper.c (towupper): Ditto. + * libc/ctype/utf8alpha.h: Ditto. + * libc/ctype/utf8print.h: Ditto. + * libc/ctype/utf8punct.h: Remove. + * libc/ctype/iswcntrl.c (iswcntrl): Add comment to explain how to + fetch the data from the Unicode database. + 2010-02-18 Corinna Vinschen * libc/locale/timelocal.h (struct lc_time_T): Add missing ERA-related diff --git a/newlib/libc/ctype/iswalpha.c b/newlib/libc/ctype/iswalpha.c index ab67857fb..16d424086 100644 --- a/newlib/libc/ctype/iswalpha.c +++ b/newlib/libc/ctype/iswalpha.c @@ -78,12 +78,17 @@ _DEFUN(iswalpha,(c), wint_t c) c = _jp2uc (c); + /* Based on and tested against Unicode 5.2 + See utf8alpha.h for a description how to fetch the data. */ x = (c >> 8); /* for some large sections, all characters are alphabetic so handle them here */ if ((x >= 0x34 && x <= 0x4c) || (x >= 0x4e && x <= 0x9e) || (x >= 0xac && x <= 0xd6) || - (x >= 0x200 && x <= 0x2a5)) + (x >= 0x120 && x <= 0x122) || + (x >= 0x130 && x <= 0x133) || + (x >= 0x200 && x <= 0x2a5) || + (x >= 0x2a7 && x <= 0x2b6)) return 1; switch (x) @@ -93,11 +98,14 @@ _DEFUN(iswalpha,(c), wint_t c) size = sizeof(u0); break; case 0x01: + case 0x11: case 0x15: + case 0x1e: case 0xa0: case 0xa1: case 0xa2: case 0xa3: + case 0xa5: case 0xf9: case 0xfc: case 0x2f8: @@ -127,6 +135,10 @@ _DEFUN(iswalpha,(c), wint_t c) table = u7; size = sizeof(u7); break; + case 0x08: + table = u8; + size = sizeof(u8); + break; case 0x09: table = u9; size = sizeof(u9); @@ -159,10 +171,6 @@ _DEFUN(iswalpha,(c), wint_t c) table = u10; size = sizeof(u10); break; - case 0x11: - table = u11; - size = sizeof(u11); - break; case 0x12: table = u12; size = sizeof(u12); @@ -187,9 +195,25 @@ _DEFUN(iswalpha,(c), wint_t c) table = u18; size = sizeof(u18); break; - case 0x1e: - table = u1e; - size = sizeof(u1e); + case 0x19: + table = u19; + size = sizeof(u19); + break; + case 0x1a: + table = u1a; + size = sizeof(u1a); + break; + case 0x1b: + table = u1b; + size = sizeof(u1b); + break; + case 0x1c: + table = u1c; + size = sizeof(u1c); + break; + case 0x1d: + table = u1d; + size = sizeof(u1d); break; case 0x1f: table = u1f; @@ -207,6 +231,18 @@ _DEFUN(iswalpha,(c), wint_t c) table = u24; size = sizeof(u24); break; + case 0x2c: + table = u2c; + size = sizeof(u2c); + break; + case 0x2d: + table = u2d; + size = sizeof(u2d); + break; + case 0x2e: + table = u2e; + size = sizeof(u2e); + break; case 0x30: table = u30; size = sizeof(u30); @@ -227,6 +263,30 @@ _DEFUN(iswalpha,(c), wint_t c) table = ua4; size = sizeof(ua4); break; + case 0xa6: + table = ua6; + size = sizeof(ua6); + break; + case 0xa7: + table = ua7; + size = sizeof(ua7); + break; + case 0xa8: + table = ua8; + size = sizeof(ua8); + break; + case 0xa9: + table = ua9; + size = sizeof(ua9); + break; + case 0xaa: + table = uaa; + size = sizeof(uaa); + break; + case 0xab: + table = uab; + size = sizeof(uab); + break; case 0xd7: table = ud7; size = sizeof(ud7); @@ -251,6 +311,18 @@ _DEFUN(iswalpha,(c), wint_t c) table = uff; size = sizeof(uff); break; + case 0x100: + table = u100; + size = sizeof(u100); + break; + case 0x101: + table = u101; + size = sizeof(u101); + break; + case 0x102: + table = u102; + size = sizeof(u102); + break; case 0x103: table = u103; size = sizeof(u103); @@ -259,6 +331,42 @@ _DEFUN(iswalpha,(c), wint_t c) table = u104; size = sizeof(u104); break; + case 0x108: + table = u108; + size = sizeof(u108); + break; + case 0x109: + table = u109; + size = sizeof(u109); + break; + case 0x10a: + table = u10a; + size = sizeof(u10a); + break; + case 0x10b: + table = u10b; + size = sizeof(u10b); + break; + case 0x10c: + table = u10c; + size = sizeof(u10c); + break; + case 0x110: + table = u110; + size = sizeof(u110); + break; + case 0x123: + table = u123; + size = sizeof(u123); + break; + case 0x124: + table = u124; + size = sizeof(u124); + break; + case 0x134: + table = u134; + size = sizeof(u134); + break; case 0x1d4: table = u1d4; size = sizeof(u1d4); @@ -275,10 +383,18 @@ _DEFUN(iswalpha,(c), wint_t c) table = u1d7; size = sizeof(u1d7); break; + case 0x1f1: + table = u1f1; + size = sizeof(u1f1); + break; case 0x2a6: table = u2a6; size = sizeof(u2a6); break; + case 0x2b7: + table = u2b7; + size = sizeof(u2b7); + break; case 0x2fa: table = u2fa; size = sizeof(u2fa); diff --git a/newlib/libc/ctype/iswcntrl.c b/newlib/libc/ctype/iswcntrl.c index 94439784f..05b3ea99a 100644 --- a/newlib/libc/ctype/iswcntrl.c +++ b/newlib/libc/ctype/iswcntrl.c @@ -67,6 +67,9 @@ _DEFUN(iswcntrl,(c), wint_t c) { #ifdef _MB_CAPABLE c = _jp2uc (c); + + /* Based on Unicode 5.2. All characters from general category "Cc", "Zl", + and "Zp". */ return ((c >= 0x0000 && c <= 0x001f) || (c >= 0x007f && c <= 0x009f) || c == 0x2028 || c == 0x2029); diff --git a/newlib/libc/ctype/iswprint.c b/newlib/libc/ctype/iswprint.c index 055855e78..0ab955975 100644 --- a/newlib/libc/ctype/iswprint.c +++ b/newlib/libc/ctype/iswprint.c @@ -78,13 +78,19 @@ _DEFUN(iswprint,(c), wint_t c) c = _jp2uc (c); + /* Based on and tested against Unicode 5.2 + See utf8print.h for a description how to fetch the data. */ x = (c >> 8); /* for some large sections, all characters are printuation so handle them here */ - if ((x >= 0x34 && x <= 0x4c) || + if ((x >= 0x33 && x <= 0x4c) || (x >= 0x4e && x <= 0x9e) || + (x >= 0xa0 && x <= 0xa3) || (x >= 0xac && x <= 0xd6) || (x >= 0xe0 && x <= 0xf9) || + (x >= 0x120 && x <= 0x122) || + (x >= 0x130 && x <= 0x133) || (x >= 0x200 && x <= 0x2a5) || + (x >= 0x2a7 && x <= 0x2b6) || (x >= 0xf00 && x <= 0xffe) || (x >= 0x1000 && x <= 0x10fe)) return 1; @@ -92,16 +98,18 @@ _DEFUN(iswprint,(c), wint_t c) switch (x) { case 0x01: + case 0x02: + case 0x04: + case 0x11: + case 0x14: case 0x15: + case 0x1e: case 0x22: case 0x25: case 0x28: case 0x29: case 0x2a: - case 0xa0: - case 0xa1: - case 0xa2: - case 0xa3: + case 0xa5: case 0xfc: case 0x2f8: case 0x2f9: @@ -110,18 +118,10 @@ _DEFUN(iswprint,(c), wint_t c) table = u0; size = sizeof(u0); break; - case 0x02: - table = u2; - size = sizeof(u2); - break; case 0x03: table = u3; size = sizeof(u3); break; - case 0x04: - table = u4; - size = sizeof(u4); - break; case 0x05: table = u5; size = sizeof(u5); @@ -134,6 +134,10 @@ _DEFUN(iswprint,(c), wint_t c) table = u7; size = sizeof(u7); break; + case 0x08: + table = u8; + size = sizeof(u8); + break; case 0x09: table = u9; size = sizeof(u9); @@ -166,10 +170,6 @@ _DEFUN(iswprint,(c), wint_t c) table = u10; size = sizeof(u10); break; - case 0x11: - table = u11; - size = sizeof(u11); - break; case 0x12: table = u12; size = sizeof(u12); @@ -178,10 +178,6 @@ _DEFUN(iswprint,(c), wint_t c) table = u13; size = sizeof(u13); break; - case 0x14: - table = u14; - size = sizeof(u14); - break; case 0x16: table = u16; size = sizeof(u16); @@ -194,9 +190,25 @@ _DEFUN(iswprint,(c), wint_t c) table = u18; size = sizeof(u18); break; - case 0x1e: - table = u1e; - size = sizeof(u1e); + case 0x19: + table = u19; + size = sizeof(u19); + break; + case 0x1a: + table = u1a; + size = sizeof(u1a); + break; + case 0x1b: + table = u1b; + size = sizeof(u1b); + break; + case 0x1c: + table = u1c; + size = sizeof(u1c); + break; + case 0x1d: + table = u1d; + size = sizeof(u1d); break; case 0x1f: table = u1f; @@ -226,6 +238,18 @@ _DEFUN(iswprint,(c), wint_t c) table = u27; size = sizeof(u27); break; + case 0x2b: + table = u2b; + size = sizeof(u2b); + break; + case 0x2c: + table = u2c; + size = sizeof(u2c); + break; + case 0x2d: + table = u2d; + size = sizeof(u2d); + break; case 0x2e: table = u2e; size = sizeof(u2e); @@ -246,10 +270,6 @@ _DEFUN(iswprint,(c), wint_t c) table = u32; size = sizeof(u32); break; - case 0x33: - table = u33; - size = sizeof(u33); - break; case 0x4d: table = u4d; size = sizeof(u4d); @@ -262,6 +282,30 @@ _DEFUN(iswprint,(c), wint_t c) table = ua4; size = sizeof(ua4); break; + case 0xa6: + table = ua6; + size = sizeof(ua6); + break; + case 0xa7: + table = ua7; + size = sizeof(ua7); + break; + case 0xa8: + table = ua8; + size = sizeof(ua8); + break; + case 0xa9: + table = ua9; + size = sizeof(ua9); + break; + case 0xaa: + table = uaa; + size = sizeof(uaa); + break; + case 0xab: + table = uab; + size = sizeof(uab); + break; case 0xd7: table = ud7; size = sizeof(ud7); @@ -286,6 +330,18 @@ _DEFUN(iswprint,(c), wint_t c) table = uff; size = sizeof(uff); break; + case 0x100: + table = u100; + size = sizeof(u100); + break; + case 0x101: + table = u101; + size = sizeof(u101); + break; + case 0x102: + table = u102; + size = sizeof(u102); + break; case 0x103: table = u103; size = sizeof(u103); @@ -294,6 +350,46 @@ _DEFUN(iswprint,(c), wint_t c) table = u104; size = sizeof(u104); break; + case 0x108: + table = u108; + size = sizeof(u108); + break; + case 0x109: + table = u109; + size = sizeof(u109); + break; + case 0x10a: + table = u10a; + size = sizeof(u10a); + break; + case 0x10b: + table = u10b; + size = sizeof(u10b); + break; + case 0x10c: + table = u10c; + size = sizeof(u10c); + break; + case 0x10e: + table = u10e; + size = sizeof(u10e); + break; + case 0x110: + table = u110; + size = sizeof(u110); + break; + case 0x123: + table = u123; + size = sizeof(u123); + break; + case 0x124: + table = u124; + size = sizeof(u124); + break; + case 0x134: + table = u134; + size = sizeof(u134); + break; case 0x1d0: table = u1d0; size = sizeof(u1d0); @@ -302,6 +398,14 @@ _DEFUN(iswprint,(c), wint_t c) table = u1d1; size = sizeof(u1d1); break; + case 0x1d2: + table = u1d2; + size = sizeof(u1d2); + break; + case 0x1d3: + table = u1d3; + size = sizeof(u1d3); + break; case 0x1d4: table = u1d4; size = sizeof(u1d4); @@ -318,10 +422,26 @@ _DEFUN(iswprint,(c), wint_t c) table = u1d7; size = sizeof(u1d7); break; + case 0x1f0: + table = u1f0; + size = sizeof(u1f0); + break; + case 0x1f1: + table = u1f1; + size = sizeof(u1f1); + break; + case 0x1f2: + table = u1f2; + size = sizeof(u1f2); + break; case 0x2a6: table = u2a6; size = sizeof(u2a6); break; + case 0x2b7: + table = u2b7; + size = sizeof(u2b7); + break; case 0x2fa: table = u2fa; size = sizeof(u2fa); @@ -330,6 +450,10 @@ _DEFUN(iswprint,(c), wint_t c) table = ue00; size = sizeof(ue00); break; + case 0xe01: + table = ue01; + size = sizeof(ue01); + break; case 0xfff: table = ufff; size = sizeof(ufff); diff --git a/newlib/libc/ctype/iswpunct.c b/newlib/libc/ctype/iswpunct.c index 4e9dd88d2..e65771a70 100644 --- a/newlib/libc/ctype/iswpunct.c +++ b/newlib/libc/ctype/iswpunct.c @@ -62,246 +62,9 @@ No supporting OS subroutines are required. #include #include "local.h" -#ifdef _MB_CAPABLE -#include "utf8punct.h" -#endif /* _MB_CAPABLE */ - int _DEFUN(iswpunct,(c), wint_t c) { -#ifdef _MB_CAPABLE - unsigned const char *table; - unsigned char *ptr; - unsigned char ctmp; - int size; - wint_t x; - - c = _jp2uc (c); - - x = (c >> 8); - /* for some large sections, all characters are punctuation so handle them here */ - if ((x >= 0xe0 && x <= 0xf8) || - (x >= 0xf00 && x <= 0xffe) || - (x >= 0x1000 && x <= 0x10fe)) - return 1; - - switch (x) - { - case 0x22: - case 0x25: - case 0x28: - case 0x29: - case 0x2a: - return 1; - case 0x00: - table = u0; - size = sizeof(u0); - break; - case 0x02: - table = u2; - size = sizeof(u2); - break; - case 0x03: - table = u3; - size = sizeof(u3); - break; - case 0x04: - table = u4; - size = sizeof(u4); - break; - case 0x05: - table = u5; - size = sizeof(u5); - break; - case 0x06: - table = u6; - size = sizeof(u6); - break; - case 0x07: - table = u7; - size = sizeof(u7); - break; - case 0x09: - table = u9; - size = sizeof(u9); - break; - case 0x0a: - table = ua; - size = sizeof(ua); - break; - case 0x0b: - table = ub; - size = sizeof(ub); - break; - case 0x0c: - table = uc; - size = sizeof(uc); - break; - case 0x0d: - table = ud; - size = sizeof(ud); - break; - case 0x0e: - table = ue; - size = sizeof(ue); - break; - case 0x0f: - table = uf; - size = sizeof(uf); - break; - case 0x10: - table = u10; - size = sizeof(u10); - break; - case 0x13: - table = u13; - size = sizeof(u13); - break; - case 0x16: - table = u16; - size = sizeof(u16); - break; - case 0x17: - table = u17; - size = sizeof(u17); - break; - case 0x18: - table = u18; - size = sizeof(u18); - break; - case 0x1f: - table = u1f; - size = sizeof(u1f); - break; - case 0x20: - table = u20; - size = sizeof(u20); - break; - case 0x21: - table = u21; - size = sizeof(u21); - break; - case 0x23: - table = u23; - size = sizeof(u23); - break; - case 0x24: - table = u24; - size = sizeof(u24); - break; - case 0x26: - table = u26; - size = sizeof(u26); - break; - case 0x27: - table = u27; - size = sizeof(u27); - break; - case 0x2e: - table = u2e; - size = sizeof(u2e); - break; - case 0x2f: - table = u2f; - size = sizeof(u2f); - break; - case 0x30: - table = u30; - size = sizeof(u30); - break; - case 0x31: - table = u31; - size = sizeof(u31); - break; - case 0x32: - table = u32; - size = sizeof(u32); - break; - case 0x33: - table = u33; - size = sizeof(u33); - break; - case 0xa4: - table = ua4; - size = sizeof(ua4); - break; - case 0xfb: - table = ufb; - size = sizeof(ufb); - break; - case 0xfd: - table = ufd; - size = sizeof(ufd); - break; - case 0xfe: - table = ufe; - size = sizeof(ufe); - break; - case 0xff: - table = uff; - size = sizeof(uff); - break; - case 0x103: - table = u103; - size = sizeof(u103); - break; - case 0x1d0: - table = u1d0; - size = sizeof(u1d0); - break; - case 0x1d1: - table = u1d1; - size = sizeof(u1d1); - break; - case 0x1d6: - table = u1d6; - size = sizeof(u1d6); - break; - case 0x1d7: - table = u1d7; - size = sizeof(u1d7); - break; - case 0xe00: - table = ue00; - size = sizeof(ue00); - break; - case 0xfff: - table = ufff; - size = sizeof(ufff); - break; - case 0x10ff: - table = u10ff; - size = sizeof(u10ff); - break; - default: - return 0; - } - /* we have narrowed down to a section of 256 characters to check */ - /* now check if c matches the punctuation wide-chars within that section */ - ptr = (unsigned char *)table; - ctmp = (unsigned char)c; - while (ptr < table + size) - { - if (ctmp == *ptr) - return 1; - if (ctmp < *ptr) - return 0; - /* otherwise c > *ptr */ - /* look for 0x0 as next element which indicates a range */ - ++ptr; - if (*ptr == 0x0) - { - /* we have a range..see if c falls within range */ - ++ptr; - if (ctmp <= *ptr) - return 1; - ++ptr; - } - } - /* not in table */ - return 0; -#else - return (c < (wint_t)0x100 ? ispunct (c) : 0); -#endif /* _MB_CAPABLE */ + return (!iswalnum (c) && iswgraph (c)); } diff --git a/newlib/libc/ctype/towlower.c b/newlib/libc/ctype/towlower.c index f1ce1f5e0..f4d70b69a 100644 --- a/newlib/libc/ctype/towlower.c +++ b/newlib/libc/ctype/towlower.c @@ -71,15 +71,19 @@ _DEFUN(towlower,(c), wint_t c) { #ifdef _MB_CAPABLE c = _jp2uc (c); + /* Based on and tested against Unicode 5.2 */ + + /* Expression used to filter out the characters for the below code: + + awk -F\; '{ if ( $14 != "" ) print $1; }' UnicodeData.txt + */ if (c < 0x100) { if ((c >= 0x0041 && c <= 0x005a) || - (c >= 0x00c0 && c <= 0x00de)) + (c >= 0x00c0 && c <= 0x00d6) || + (c >= 0x00d8 && c <= 0x00de)) return (c + 0x20); - if (c == 0x00b5) - return 0x03bc; - return c; } else if (c < 0x300) @@ -96,8 +100,11 @@ _DEFUN(towlower,(c), wint_t c) return c; } + if (c == 0x0130) + return 0x0069; + if ((c >= 0x0139 && c <= 0x0147) || - (c >= 0x01cd && c <= 0x91db)) + (c >= 0x01cd && c <= 0x01db)) { if (c & 0x01) return (c + 1); @@ -146,9 +153,6 @@ _DEFUN(towlower,(c), wint_t c) case 0x01f4: k = c + 1; break; - case 0x017f: - k = 0x0073; - break; case 0x0181: k = 0x0253; break; @@ -227,17 +231,56 @@ _DEFUN(towlower,(c), wint_t c) if (k != 0) return k; } - - if (c == 0x0220) - return 0x019e; + else if (c == 0x0220) + return 0x019e; + else if (c >= 0x023a && c <= 0x024e) + { + wint_t k; + switch (c) + { + case 0x023a: + k = 0x2c65; + break; + case 0x023b: + case 0x0241: + case 0x0246: + case 0x0248: + case 0x024a: + case 0x024c: + case 0x024e: + k = c + 1; + break; + case 0x023d: + k = 0x019a; + break; + case 0x023e: + k = 0x2c66; + break; + case 0x0243: + k = 0x0180; + break; + case 0x0244: + k = 0x0289; + break; + case 0x0245: + k = 0x028c; + break; + default: + k = 0; + } + if (k != 0) + return k; + } } else if (c < 0x0400) { + if (c == 0x0370 || c == 0x0372 || c == 0x0376) + return (c + 1); if (c >= 0x0391 && c <= 0x03ab && c != 0x03a2) return (c + 0x20); if (c >= 0x03d8 && c <= 0x03ee && !(c & 0x01)) return (c + 1); - if (c >= 0x0386 && c <= 0x03f5) + if (c >= 0x0386 && c <= 0x03ff) { wint_t k; switch (c) @@ -261,37 +304,31 @@ _DEFUN(towlower,(c), wint_t c) k = 0x03cd; break; case 0x038f: - k = 0x038f; + k = 0x03ce; break; - case 0x03c2: - k = 0x03c3; - break; - case 0x03d0: - k = 0x03b2; - break; - case 0x03d1: - k = 0x03b8; - break; - case 0x03d5: - k = 0x03c6; - break; - case 0x03d6: - k = 0x03c0; - break; - case 0x03f0: - k = 0x03ba; - break; - case 0x03f1: - k = 0x03c1; - break; - case 0x03f2: - k = 0x03c3; + case 0x03cf: + k = 0x03d7; break; case 0x03f4: k = 0x03b8; break; - case 0x03f5: - k = 0x03b5; + case 0x03f7: + k = 0x03f8; + break; + case 0x03f9: + k = 0x03f2; + break; + case 0x03fa: + k = 0x03fb; + break; + case 0x03fd: + k = 0x037b; + break; + case 0x03fe: + k = 0x037c; + break; + case 0x03ff: + k = 0x037d; break; default: k = 0; @@ -299,9 +336,6 @@ _DEFUN(towlower,(c), wint_t c) if (k != 0) return k; } - - if (c == 0x0345) - return 0x03b9; } else if (c < 0x500) { @@ -313,14 +347,16 @@ _DEFUN(towlower,(c), wint_t c) if ((c >= 0x0460 && c <= 0x0480) || (c >= 0x048a && c <= 0x04be) || - (c >= 0x04d0 && c <= 0x04f4) || - (c == 0x04f8)) + (c >= 0x04d0 && c <= 0x04fe)) { if (!(c & 0x01)) return (c + 1); return c; } + if (c == 0x04c0) + return 0x04cf; + if (c >= 0x04c1 && c <= 0x04cd) { if (c & 0x01) @@ -331,6 +367,7 @@ _DEFUN(towlower,(c), wint_t c) else if (c < 0x1f00) { if ((c >= 0x0500 && c <= 0x050e) || + (c >= 0x0510 && c <= 0x0524) || (c >= 0x1e00 && c <= 0x1e94) || (c >= 0x1ea0 && c <= 0x1ef8)) { @@ -342,8 +379,14 @@ _DEFUN(towlower,(c), wint_t c) if (c >= 0x0531 && c <= 0x0556) return (c + 0x30); - if (c == 0x1e9b) - return 0x1e61; + if (c >= 0x10a0 && c <= 0x10c5) + return (c + 0x1c60); + + if (c == 0x1e9e) + return 0x00df; + + if (c >= 0x1efa && c <= 0x1efe && !(c & 0x01)) + return (c + 1); } else if (c < 0x2000) { @@ -385,9 +428,6 @@ _DEFUN(towlower,(c), wint_t c) case 0x1fbc: k = 0x1fb3; break; - case 0x1fbe: - k = 0x03b9; - break; case 0x1fc8: case 0x1fc9: case 0x1fca: @@ -408,6 +448,10 @@ _DEFUN(towlower,(c), wint_t c) case 0x1fec: k = 0x1fe5; break; + case 0x1ff8: + case 0x1ff9: + k = c - 0x80; + break; case 0x1ffa: case 0x1ffb: k = c - 0x7e; @@ -422,26 +466,100 @@ _DEFUN(towlower,(c), wint_t c) return k; } } - else + else if (c < 0x2c00) { if (c >= 0x2160 && c <= 0x216f) return (c + 0x10); - + if (c >= 0x24b6 && c <= 0x24cf) return (c + 0x1a); + switch (c) + { + case 0x2126: + return 0x03c9; + case 0x212a: + return 0x006b; + case 0x212b: + return 0x00e5; + case 0x2132: + return 0x214e; + case 0x2183: + return 0x2184; + } + } + else if (c < 0x2d00) + { + if (c >= 0x2c00 && c <= 0x2c2e) + return (c + 0x30); + + if (c >= 0x2c80 && c <= 0x2ce2 && !(c & 0x01)) + return (c + 1); + + switch (c) + { + case 0x2c60: + return 0x2c61; + case 0x2c62: + return 0x026b; + case 0x2c63: + return 0x1d7d; + case 0x2c64: + return 0x027d; + case 0x2c67: + case 0x2c69: + case 0x2c6b: + case 0x2c72: + case 0x2c75: + case 0x2ceb: + case 0x2ced: + return c + 1; + case 0x2c6d: + return 0x0251; + case 0x2c6e: + return 0x0271; + case 0x2c6f: + return 0x0250; + case 0x2c70: + return 0x0252; + case 0x2c7e: + return 0x023f; + case 0x2c7f: + return 0x0240; + } + } + else if (c >= 0xa600 && c < 0xa800) + { + if ((c >= 0xa640 && c <= 0xa65e) || + (c >= 0xa662 && c <= 0xa66c) || + (c >= 0xa680 && c <= 0xa696) || + (c >= 0xa722 && c <= 0xa72e) || + (c >= 0xa732 && c <= 0xa76e) || + (c >= 0xa77f && c <= 0xa786)) + { + if (!(c & 1)) + return (c + 1); + return c; + } + + switch (c) + { + case 0xa779: + case 0xa77b: + case 0xa77e: + case 0xa78b: + return (c + 1); + case 0xa77d: + return 0x1d79; + } + } + else + { if (c >= 0xff21 && c <= 0xff3a) return (c + 0x20); - if (c >= 0x10400 && c <= 0x10425) + if (c >= 0x10400 && c <= 0x10427) return (c + 0x28); - - if (c == 0x2126) - return 0x03c9; - if (c == 0x212a) - return 0x006b; - if (c == 0x212b) - return 0x00e5; } return c; #else diff --git a/newlib/libc/ctype/towupper.c b/newlib/libc/ctype/towupper.c index 945266fea..8b1755b53 100644 --- a/newlib/libc/ctype/towupper.c +++ b/newlib/libc/ctype/towupper.c @@ -71,12 +71,18 @@ _DEFUN(towupper,(c), wint_t c) { #ifdef _MB_CAPABLE c = _jp2uc (c); + /* Based on and tested against Unicode 5.2 */ + + /* Expression used to filter out the characters for the below code: + + awk -F\; '{ if ( $13 != "" ) print $1; }' UnicodeData.txt + */ if (c < 0x100) { if (c == 0x00b5) return 0x039c; - if ((c >= 0x00e0 && c <= 0x00fe) || + if ((c >= 0x00e0 && c <= 0x00fe && c != 0x00f7) || (c >= 0x0061 && c <= 0x007a)) return (c - 0x20); @@ -92,7 +98,8 @@ _DEFUN(towupper,(c), wint_t c) (c >= 0x014b && c <= 0x0177) || (c >= 0x01df && c <= 0x01ef) || (c >= 0x01f9 && c <= 0x021f) || - (c >= 0x0223 && c <= 0x0233)) + (c >= 0x0223 && c <= 0x0233) || + (c >= 0x0247 && c <= 0x024f)) { if (c & 0x01) return (c - 1); @@ -100,7 +107,8 @@ _DEFUN(towupper,(c), wint_t c) } if ((c >= 0x013a && c <= 0x0148) || - (c >= 0x01ce && c <= 0x1dc)) + (c >= 0x01ce && c <= 0x01dc) || + c == 0x023c || c == 0x0242) { if (!(c & 0x01)) return (c - 1); @@ -121,6 +129,9 @@ _DEFUN(towupper,(c), wint_t c) case 0x017f: k = 0x0053; break; + case 0x0180: + k = 0x0243; + break; case 0x0183: k = 0x0182; break; @@ -142,6 +153,9 @@ _DEFUN(towupper,(c), wint_t c) case 0x0199: k = 0x0198; break; + case 0x019a: + k = 0x023d; + break; case 0x019e: k = 0x0220; break; @@ -176,6 +190,21 @@ _DEFUN(towupper,(c), wint_t c) case 0x01f3: k = 0x01f1; break; + case 0x023f: + k = 0x2c7e; + break; + case 0x0240: + k = 0x2c7f; + break; + case 0x0250: + k = 0x2c6f; + break; + case 0x0251: + k = 0x2c6d; + break; + case 0x0252: + k = 0x2c70; + break; case 0x0253: k = 0x0181; break; @@ -206,15 +235,24 @@ _DEFUN(towupper,(c), wint_t c) case 0x0269: k = 0x0196; break; + case 0x026b: + k = 0x2c62; + break; case 0x026f: k = 0x019c; break; + case 0x0271: + k = 0x2c6e; + break; case 0x0272: k = 0x019d; break; case 0x0275: k = 0x019f; break; + case 0x027d: + k = 0x2c64; + break; case 0x0280: k = 0x01a6; break; @@ -224,12 +262,18 @@ _DEFUN(towupper,(c), wint_t c) case 0x0288: k = 0x01ae; break; + case 0x0289: + k = 0x0244; + break; case 0x028a: k = 0x01b1; break; case 0x028b: k = 0x01b2; break; + case 0x028c: + k = 0x0245; + break; case 0x0292: k = 0x01b7; break; @@ -242,86 +286,91 @@ _DEFUN(towupper,(c), wint_t c) } else if (c < 0x0400) { - if (c == 0x03ac) - return 0x0386; - - if ((c & 0xfff0) == 0x03a0 && c >= 0x03ad) - return (c - 0x15); - + wint_t k; + + if (c >= 0x03ad && c <= 0x03af) + return (c - 0x25); + if (c >= 0x03b1 && c <= 0x03cb && c != 0x03c2) return (c - 0x20); - if (c == 0x03c2) - return 0x03a3; - - if (c >= 0x03cc && c <= 0x03f5) + if (c >= 0x03d9 && c <= 0x03ef && (c & 1)) + return (c - 1); + + switch (c) { - wint_t k; - switch (c) - { - case 0x03cc: - k = 0x038c; - break; - case 0x03cd: - case 0x03ce: - k = c - 0x3f; - break; - case 0x03d0: - k = 0x0392; - break; - case 0x03d1: - k = 0x0398; - break; - case 0x03d5: - k = 0x03a6; - break; - case 0x03d6: - k = 0x03a0; - break; - case 0x03d9: - case 0x03db: - case 0x03dd: - case 0x03df: - case 0x03e1: - case 0x03e3: - case 0x03e5: - case 0x03e7: - case 0x03e9: - case 0x03eb: - case 0x03ed: - case 0x03ef: - k = c - 1; - break; - case 0x03f0: - k = 0x039a; - break; - case 0x03f1: - k = 0x03a1; - break; - case 0x03f2: - k = 0x03a3; - break; - case 0x03f5: - k = 0x0395; - break; - default: - k = 0; - } - if (k != 0) - return k; + case 0x0345: + k = 0x0399; + break; + case 0x0371: + case 0x0373: + case 0x0377: + case 0x03f8: + case 0x03fb: + k = c - 1; + break; + case 0x037b: + case 0x037c: + case 0x037d: + k = c + 0x82; + break; + case 0x03ac: + k = 0x0386; + break; + case 0x03c2: + k = 0x03a3; + break; + case 0x03cc: + k = 0x038c; + break; + case 0x03cd: + case 0x03ce: + k = c - 0x3f; + break; + case 0x03d0: + k = 0x0392; + break; + case 0x03d1: + k = 0x0398; + break; + case 0x03d5: + k = 0x03a6; + break; + case 0x03d6: + k = 0x03a0; + break; + case 0x03d7: + k = 0x03cf; + break; + case 0x03f0: + k = 0x039a; + break; + case 0x03f1: + k = 0x03a1; + break; + case 0x03f2: + k = 0x03f9; + break; + case 0x03f5: + k = 0x0395; + break; + default: + k = 0; } + if (k != 0) + return k; } else if (c < 0x500) { - if (c >= 0x0450 && c <= 0x045f) - return (c - 0x50); - if (c >= 0x0430 && c <= 0x044f) return (c - 0x20); + if (c >= 0x0450 && c <= 0x045f) + return (c - 0x50); + if ((c >= 0x0461 && c <= 0x0481) || (c >= 0x048b && c <= 0x04bf) || - (c >= 0x04d1 && c <= 0x04f5)) + (c >= 0x04d1 && c <= 0x04ff)) { if (c & 0x01) return (c - 1); @@ -335,23 +384,36 @@ _DEFUN(towupper,(c), wint_t c) return c; } - if (c == 0x04f9) - return 0x04f8; + if (c == 0x04cf) + return 0x04c0; + + if (c >= 0x04f7 && c <= 0x04f9) + return (c - 1); + } + else if (c < 0x0600) + { + if (c >= 0x0501 && c <= 0x0525 && (c & 1)) + return c - 1; + + if (c >= 0x0561 && c <= 0x0586) + return (c - 0x30); } else if (c < 0x1f00) { - if ((c >= 0x0501 && c <= 0x050f) || - (c >= 0x1e01 && c <= 0x1e95) || - (c >= 0x1ea1 && c <= 0x1ef9)) + if (c == 0x1d79) + return 0xa77d; + + if (c == 0x1d7d) + return 0x2c63; + + if ((c >= 0x1e01 && c <= 0x1e95) || + (c >= 0x1ea1 && c <= 0x1eff)) { if (c & 0x01) return (c - 1); return c; } - if (c >= 0x0561 && c <= 0x0586) - return (c - 0x30); - if (c == 0x1e9b) return 0x1e60; } @@ -407,6 +469,9 @@ _DEFUN(towupper,(c), wint_t c) case 0x1f75: k = 0x1fcb; break; + case 0x1fc3: + k = 0x1fcc; + break; case 0x1fd0: k = 0x1fd8; break; @@ -456,18 +521,65 @@ _DEFUN(towupper,(c), wint_t c) return k; } } - else + else if (c < 0x3000) { + if (c == 0x214e) + return 0x2132; + + if (c == 0x2184) + return 0x2183; + if (c >= 0x2170 && c <= 0x217f) return (c - 0x10); if (c >= 0x24d0 && c <= 0x24e9) return (c - 0x1a); + if (c >= 0x2c30 && c <= 0x2c5e) + return (c - 0x30); + + if ((c >= 0x2c68 && c <= 0x2c6c && !(c & 1)) || + (c >= 0x2c81 && c <= 0x2ce3 && (c & 1)) || + c == 0x2c73 || c == 0x2c76 || + c == 0x2cec || c == 0x2cee) + return (c - 1); + + if (c >= 0x2c81 && c <= 0x2ce3 && (c & 1)) + return (c - 1); + + if (c >= 0x2d00 && c <= 0x2d25) + return (c - 0x1c60); + + switch (c) + { + case 0x2c61: + return 0x2c60; + case 0x2c65: + return 0x023a; + case 0x2c66: + return 0x023e; + } + } + else if (c >= 0xa000 && c < 0xb000) + { + if (((c >= 0xa641 && c <= 0xa65f) || + (c >= 0xa663 && c <= 0xa66d) || + (c >= 0xa681 && c <= 0xa697) || + (c >= 0xa723 && c <= 0xa72f) || + (c >= 0xa733 && c <= 0xa76f) || + (c >= 0xa77f && c <= 0xa787)) && + (c & 1)) + return (c - 1); + + if (c == 0xa77a || c == 0xa77c || c == 0xa78c) + return (c - 1); + } + else + { if (c >= 0xff41 && c <= 0xff5a) return (c - 0x20); - if (c >= 0x10428 && c <= 0x1044d) + if (c >= 0x10428 && c <= 0x1044f) return (c - 0x28); } return c; diff --git a/newlib/libc/ctype/utf8alpha.h b/newlib/libc/ctype/utf8alpha.h index c7ee160ae..d9306b720 100644 --- a/newlib/libc/ctype/utf8alpha.h +++ b/newlib/libc/ctype/utf8alpha.h @@ -27,7 +27,32 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* Generated using unicode.txt 3.2 */ +/* Generated using UnicodeData.txt 5.2 */ + +/* Expression used to filter out the characters for the below tables: + + awk -F\; \ + '{ \ + VAL = strtonum (sprintf("0x%s", $1)); \ + # All of general category "L", except for two Thai characters which \ + # are actually punctuation characters. Old Unicode weirdness. \ + # The character "COMBINING GREEK YPOGEGRAMMENI", as well as all Thai \ + # characters which are in "Mn" category. Old Unicode weirdness. \ + # All numerical digit or letter characters, except the ASCII variants. \ + # This is necessary due to the unfortunate ISO C definition for the \ + # iswdigit class, otherwise these characters are missing in iswalnum. \ + # All "Other Symbols" which are named as "LETTER" characters. \ + # \ + # Before running this test, make sure to expand all Unicode blocks \ + # which are just marked by their first and last character! \ + # \ + if ( (match($3, "^L") && VAL != 0x0e2f && VAL != 0x0e46) \ + || (match($3, "^Mn") && (VAL == 0x0345 || match($2, "\\"))) \ + || (match($3, "^N[dl]") && VAL >= 0x100) \ + || (match($3, "^So") && match($2, "\\"))) \ + print $1; \ + }' UnicodeData.txt +*/ static const unsigned char u0[] = { 0x41, 0x0, 0x5a, 0x61, 0x0, 0x7a, 0xaa, 0xb5, @@ -35,68 +60,71 @@ static const unsigned char u0[] = { 0x0, 0xff }; /* u1 all alphabetic */ static const unsigned char u2[] = { - 0x00, 0x0, 0x20, 0x22, 0x0, 0x33, 0x50, 0x0, 0xad, - 0xb0, 0x0, 0xb8, 0xbb, 0x0, 0xc1, 0xd0, 0x0, - 0xd1, 0xe0, 0x0, 0xe4, 0xee }; + 0x00, 0x0, 0xc1, 0xc6, 0x0, 0xd1, + 0xe0, 0x0, 0xe4, 0xec, 0xee }; static const unsigned char u3[] = { - 0x45, 0x7a, 0x86, - 0x88, 0x0, 0x8a, 0x8c, 0x8e, 0x0, 0xa1, 0xa3, - 0x0, 0xce, 0xd0, 0x0, 0xf5 }; + 0x45, 0x70, 0x0, 0x74, 0x76, 0x77, + 0x7a, 0x0, 0x7d, 0x86, 0x88, 0x0, 0x8a, 0x8c, + 0x8e, 0x0, 0xa1, 0xa3, 0x0, 0xf5, + 0xf7, 0x0, 0xff }; static const unsigned char u4[] = { - 0x00, 0x0, 0x81, - 0x8a, 0x0, 0xce, 0xd0, 0x0, 0xf5, 0xf8, 0x0, - 0xf9 }; + 0x00, 0x0, 0x81, 0x8a, 0x0, 0xff }; static const unsigned char u5[] = { - 0x00, 0x0, 0x0f, 0x31, 0x0, 0x56, 0x59, - 0x61, 0x0, 0x87, 0xd0, 0x0, 0xea, 0xf0, 0x0, - 0xf2 }; + 0x00, 0x0, 0x25, 0x31, 0x0, 0x56, 0x59, + 0x61, 0x0, 0x87, 0xd0, 0x0, 0xea, + 0xf0, 0x0, 0xf2 }; static const unsigned char u6[] = { - 0x21, 0x0, 0x3a, - 0x40, 0x0, 0x4a, 0x60, - 0x0, 0x69, 0x6e, 0x0, 0x6f, 0x71, 0x0, 0xd3, - 0xd5, 0xe5, 0x0, 0xe6, 0xf0, 0x0, 0xfc }; + 0x21, 0x0, 0x4a, 0x60, 0x0, 0x69, + 0x6e, 0x0, 0x6f, 0x71, 0x0, 0xd3, + 0xd5, 0xe5, 0x0, 0xe6, 0xee, 0x0, 0xfc, 0xff }; static const unsigned char u7[] = { - 0x10, - 0x12, 0x0, 0x2c, 0x80, 0x0, 0xa5, 0xb1 }; + 0x10, 0x12, 0x0, 0x2f, 0x4d, 0x0, 0xa5, 0xb1, + 0xc0, 0x0, 0xea, 0xf4, 0xf5, 0xfa }; +static const unsigned char u8[] = { + 0x00, 0x0, 0x15, 0x1a, 0x24, 0x28 }; static const unsigned char u9[] = { - 0x05, - 0x0, 0x39, 0x3d, 0x50, 0x58, 0x0, 0x61, 0x66, - 0x0, 0x6f, 0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, - 0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, 0xb2, 0xb6, - 0x0, 0xb9, 0xdc, 0x0, 0xdd, 0xdf, 0x0, 0xe1, - 0xe6, 0x0, 0xf1 }; + 0x04, 0x0, 0x39, 0x3d, 0x50, 0x58, 0x0, 0x61, + 0x66, 0x0, 0x6f, 0x71, 0x72, 0x79, 0x0, 0x7f, + 0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, + 0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, 0xb2, + 0xb6, 0x0, 0xb9, 0xbd, 0xce, 0xdc, 0x0, 0xdd, + 0xdf, 0x0, 0xe1, 0xe6, 0x0, 0xf1 }; static const unsigned char ua[] = { - 0x05, 0x0, 0x0a, 0x0f, 0x0, - 0x10, 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, 0x32, - 0x0, 0x33, 0x35, 0x0, 0x36, 0x38, 0x0, 0x39, - 0x59, 0x0, 0x5c, 0x5e, 0x66, 0x0, 0x6f, 0x72, - 0x0, 0x74, 0x85, 0x0, 0x8b, 0x8d, 0x8f, 0x0, - 0x91, 0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, 0xb2, - 0x0, 0xb3, 0xb5, 0x0, 0xb9, 0xbd, 0xd0, 0xe0, - 0xe6, 0x0, 0xef }; + 0x05, 0x0, 0x0a, 0x0f, 0x0, 0x10, + 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, + 0x32, 0x0, 0x33, 0x35, 0x0, 0x36, + 0x38, 0x0, 0x39, 0x59, 0x0, 0x5c, + 0x5e, 0x66, 0x0, 0x6f, 0x72, 0x0, 0x74, + 0x85, 0x0, 0x8d, 0x8f, 0x0, 0x91, + 0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, + 0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9, + 0xbd, 0xd0, 0xe0, 0xe1, 0xe6, 0x0, 0xef }; static const unsigned char ub[] = { - 0x05, 0x0, 0x0c, 0x0f, 0x0, - 0x10, 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, 0x32, - 0x0, 0x33, 0x36, 0x0, 0x39, 0x3d, 0x5c, 0x0, - 0x5d, 0x5f, 0x0, 0x61, 0x66, 0x0, 0x6f, 0x83, - 0x85, 0x0, 0x8a, 0x8e, 0x0, 0x90, 0x92, 0x0, - 0x95, 0x99, 0x0, 0x9a, 0x9c, 0x9e, 0x0, 0x9f, - 0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa, 0xae, 0x0, - 0xb5, 0xb7, 0x0, 0xb9, 0xe7, 0x0, 0xef }; + 0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10, + 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, + 0x32, 0x0, 0x33, 0x35, 0x0, 0x39, 0x3d, + 0x5c, 0x0, 0x5d, 0x5f, 0x0, 0x61, + 0x66, 0x0, 0x6f, 0x71, 0x83, 0x85, 0x0, 0x8a, + 0x8e, 0x0, 0x90, 0x92, 0x0, 0x95, + 0x99, 0x0, 0x9a, 0x9c, 0x9e, 0x0, 0x9f, + 0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa, + 0xae, 0x0, 0xb9, 0xd0, 0xe6, 0x0, 0xef }; static const unsigned char uc[] = { - 0x05, - 0x0, 0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28, - 0x2a, 0x0, 0x33, 0x35, 0x0, 0x39, 0x60, 0x0, - 0x61, 0x66, 0x0, 0x6f, 0x85, 0x0, 0x8c, 0x8e, - 0x0, 0x90, 0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3, - 0xb5, 0x0, 0xb9, 0xde, 0xe0, 0x0, 0xe1, 0xe6, - 0x0, 0xef }; + 0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10, + 0x12, 0x0, 0x28, 0x2a, 0x0, 0x33, + 0x35, 0x0, 0x39, 0x3d, 0x58, 0x59, + 0x60, 0x0, 0x61, 0x66, 0x0, 0x6f, + 0x85, 0x0, 0x8c, 0x8e, 0x0, 0x90, + 0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3, + 0xb5, 0x0, 0xb9, 0xbd, 0xde, 0xe0, 0x0, 0xe1, + 0xe6, 0x0, 0xef }; static const unsigned char ud[] = { 0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10, - 0x12, 0x0, 0x28, 0x2a, 0x0, 0x39, 0x60, 0x0, - 0x61, 0x66, 0x0, 0x6f, 0x85, 0x0, 0x96, 0x9a, - 0x0, 0xb1, 0xb3, 0x0, 0xbb, 0xbd, 0xc0, 0x0, - 0xc6 }; + 0x12, 0x0, 0x28, 0x2a, 0x0, 0x39, 0x3d, + 0x60, 0x0, 0x61, 0x66, 0x0, 0x6f, + 0x7a, 0x0, 0x7f, 0x85, 0x0, 0x96, 0x9a, + 0x0, 0xb1, 0xb3, 0x0, 0xbb, 0xbd, + 0xc0, 0x0, 0xc6 }; static const unsigned char ue[] = { 0x01, 0x0, 0x2e, 0x30, 0x0, 0x3a, 0x40, 0x0, 0x45, 0x47, 0x0, 0x4e, 0x50, 0x0, 0x59, @@ -107,44 +135,58 @@ static const unsigned char ue[] = { 0xc4, 0xc6, 0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd }; static const unsigned char uf[] = { 0x00, 0x20, 0x0, 0x29, 0x40, 0x0, 0x47, 0x49, - 0x0, 0x6a, 0x88, 0x0, 0x8b }; + 0x0, 0x6c, 0x88, 0x0, 0x8b }; static const unsigned char u10[] = { - 0x00, 0x0, 0x21, - 0x23, 0x0, 0x27, 0x29, 0x0, 0x2a, 0x40, 0x0, - 0x49, 0x50, 0x0, 0x55, 0xa0, 0x0, 0xc5, 0xd0, - 0x0, 0xf8 }; -static const unsigned char u11[] = { - 0x00, 0x0, 0x59, 0x5f, 0x0, 0xa2, - 0xa8, 0x0, 0xf9 }; + 0x00, 0x0, 0x2a, 0x3f, 0x0, 0x49, + 0x50, 0x0, 0x55, 0x5a, 0x0, 0x5d, + 0x61, 0x65, 0x66, 0x6e, 0x0, 0x70, + 0x75, 0x0, 0x81, 0x8e, 0x90, 0x0, 0x99, + 0xa0, 0x0, 0xc5, 0xd0, 0x0, 0xfa, 0xfc }; +/* u11 all alphabetic */ static const unsigned char u12[] = { - 0x00, 0x0, 0x06, 0x08, 0x0, - 0x46, 0x48, 0x4a, 0x0, 0x4d, 0x50, 0x0, 0x56, - 0x58, 0x5a, 0x0, 0x5d, 0x60, 0x0, 0x86, 0x88, - 0x8a, 0x0, 0x8d, 0x90, 0x0, 0xae, 0xb0, 0xb2, - 0x0, 0xb5, 0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, - 0xc5, 0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6, 0xd8, - 0x0, 0xee, 0xf0, 0x0, 0xff }; + 0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d, + 0x50, 0x0, 0x56, 0x58, 0x5a, 0x0, 0x5d, + 0x60, 0x0, 0x88, 0x8a, 0x0, 0x8d, + 0x90, 0x0, 0xb0, 0xb2, 0x0, 0xb5, + 0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, 0xc5, + 0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff }; static const unsigned char u13[] = { - 0x00, 0x0, 0x0e, 0x10, 0x12, 0x0, - 0x15, 0x18, 0x0, 0x1e, 0x20, 0x0, 0x46, 0x48, - 0x0, 0x5a, 0x69, 0x0, 0x71, 0xa0, 0x0, 0xf4 }; + 0x00, 0x0, 0x10, 0x12, 0x0, 0x15, + 0x18, 0x0, 0x5a, 0x80, 0x0, 0x8f, + 0xa0, 0x0, 0xf4 }; static const unsigned char u14[] = { 0x01, 0x0, 0xff }; /* u15 all alphabetic */ static const unsigned char u16[] = { - 0x00, 0x0, 0x6c, 0x6f, 0x0, 0x76, - 0x81, 0x0, 0x9a, 0xa0, 0x0, 0xea, 0xee, 0x0, 0xf0 }; + 0x00, 0x0, 0x6c, 0x6f, 0x0, 0x7f, + 0x81, 0x0, 0x9a, 0xa0, 0x0, 0xea, + 0xee, 0x0, 0xf0 }; static const unsigned char u17[] = { - 0x00, - 0x0, 0x0c, 0x0e, 0x0, 0x11, 0x20, 0x0, 0x31, - 0x40, 0x0, 0x51, 0x60, 0x0, 0x6c, 0x6e, 0x0, - 0x70, 0x80, 0x0, 0xb3, 0xd7, 0xdc, 0xe0, 0x0, - 0xe9 }; + 0x00, 0x0, 0x0c, 0x0e, 0x0, 0x11, + 0x20, 0x0, 0x31, 0x40, 0x0, 0x51, + 0x60, 0x0, 0x6c, 0x6e, 0x0, 0x70, + 0x80, 0x0, 0xb3, 0xd7, 0xdc, 0xe0, 0x0, 0xe9 }; static const unsigned char u18[] = { - 0x10, 0x0, 0x19, 0x20, 0x0, 0x77, 0x80, - 0x0, 0xa8 }; -static const unsigned char u1e[] = { - 0x00, 0x0, 0x9b, 0xa0, 0x0, 0xf9 }; + 0x10, 0x0, 0x19, 0x20, 0x0, 0x77, + 0x80, 0x0, 0xa8, 0xaa, 0xb0, 0x0, 0xf5 }; +static const unsigned char u19[] = { + 0x00, 0x0, 0x1c, 0x46, 0x0, 0x6d, + 0x70, 0x0, 0x74, 0x80, 0x0, 0xab, + 0xc1, 0x0, 0xc7, 0xd0, 0x0, 0xda }; +static const unsigned char u1a[] = { + 0x00, 0x0, 0x16, 0x20, 0x0, 0x54, + 0x80, 0x0, 0x89, 0x90, 0x0, 0x99, 0xa7 }; +static const unsigned char u1b[] = { + 0x05, 0x0, 0x33, 0x45, 0x0, 0x4b, + 0x50, 0x0, 0x59, 0x83, 0x0, 0xa0, + 0xae, 0x0, 0xb9 }; +static const unsigned char u1c[] = { + 0x00, 0x0, 0x23, 0x40, 0x0, 0x49, + 0x4d, 0x0, 0x7d, 0xe9, 0x0, 0xec, + 0xee, 0x0, 0xf1 }; +static const unsigned char u1d[] = { + 0x00, 0x0, 0xbf }; +/* u1e all alphabetic */ static const unsigned char u1f[] = { 0x00, 0x0, 0x15, 0x18, 0x0, 0x1d, 0x20, 0x0, 0x45, 0x48, 0x0, 0x4d, 0x50, 0x0, 0x57, 0x59, @@ -154,37 +196,77 @@ static const unsigned char u1f[] = { 0xe0, 0x0, 0xec, 0xf2, 0x0, 0xf4, 0xf6, 0x0, 0xfc }; static const unsigned char u20[] = { - 0x71, 0x7f }; + 0x71, 0x7f, 0x90, 0x0, 0x94 }; static const unsigned char u21[] = { - 0x02, 0x07, 0x0a, 0x0, 0x13, - 0x15, 0x19, 0x0, 0x1d, 0x24, 0x26, 0x28, 0x0, - 0x2d, 0x2f, 0x0, 0x31, 0x33, 0x0, 0x39, 0x3d, - 0x0, 0x3f, 0x45, 0x0, 0x49, 0x60, 0x0, 0x83 }; + 0x02, 0x07, 0x0a, 0x0, 0x13, 0x15, + 0x19, 0x0, 0x1d, 0x24, 0x26, 0x28, 0x0, 0x2d, + 0x2f, 0x0, 0x39, 0x3c, 0x0, 0x3f, + 0x45, 0x0, 0x49, 0x4e, 0x60, 0x0, 0x88 }; static const unsigned char u24[] = { 0x9c, 0x0, 0xe9 }; +static const unsigned char u2c[] = { + 0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e, + 0x60, 0x0, 0xe4, 0xeb, 0x0, 0xee }; +static const unsigned char u2d[] = { + 0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f, + 0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6, + 0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6, + 0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6, + 0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6, + 0xd8, 0x0, 0xde }; +static const unsigned char u2e[] = { + 0x2f }; static const unsigned char u30[] = { 0x05, 0x0, 0x07, 0x21, 0x0, 0x29, 0x31, 0x0, 0x35, 0x38, 0x0, 0x3c, 0x41, 0x0, 0x96, 0x9d, 0x0, 0x9f, 0xa1, 0x0, 0xfa, 0xfc, 0x0, 0xff }; static const unsigned char u31[] = { - 0x05, 0x0, 0x2c, 0x31, 0x0, + 0x05, 0x0, 0x2d, 0x31, 0x0, 0x8e, 0xa0, 0x0, 0xb7, 0xf0, 0x0, 0xff }; /* u34 to u4c all alphabetic */ static const unsigned char u4d[] = { 0x00, 0x0, 0xb5 }; /* u4e to u9e all alphabetic */ static const unsigned char u9f[] = { - 0x00, 0x0, 0xa5 }; + 0x00, 0x0, 0xcb }; /* ua0 to ua3 all alphabetic */ static const unsigned char ua4[] = { - 0x00, 0x0, 0x8c }; + 0x00, 0x0, 0x8c, 0xd0, 0x0, 0xfd }; +/* ua5 all alphabetic */ +static const unsigned char ua6[] = { + 0x00, 0x0, 0x0c, 0x10, 0x0, 0x2b, + 0x40, 0x0, 0x5f, 0x62, 0x0, 0x6e, + 0x7f, 0x0, 0x97, 0xa0, 0x0, 0xef }; +static const unsigned char ua7[] = { + 0x17, 0x0, 0x1f, 0x22, 0x0, 0x88, + 0x8b, 0x8c, + 0xfb, 0x0, 0xff }; +static const unsigned char ua8[] = { + 0x00, 0x01, 0x03, 0x0, 0x05, 0x07, 0x0, 0x0a, + 0x0c, 0x0, 0x22, 0x40, 0x0, 0x73, + 0x82, 0x0, 0xb3, 0xd0, 0x0, 0xd9, + 0xf2, 0x0, 0xf7, 0xfb }; +static const unsigned char ua9[] = { + 0x00, 0x0, 0x25, 0x30, 0x0, 0x46, + 0x60, 0x0, 0x7c, 0x84, 0x0, 0xb2, + 0xcf, 0x0, 0xd9 }; +static const unsigned char uaa[] = { + 0x00, 0x0, 0x28, 0x40, 0x0, 0x42, + 0x44, 0x0, 0x4b, 0x50, 0x0, 0x59, + 0x60, 0x0, 0x76, 0x7a, 0x80, 0x0, 0xaf, + 0xb1, 0xb5, 0xb6, 0xb9, 0x0, 0xbd, + 0xc0, 0xc2, 0xdb, 0x0, 0xdd }; +static const unsigned char uab[] = { + 0xc0, 0x0, 0xe2, 0xf0, 0x0, 0xf9 }; /* uac to ud6 all alphabetic */ static const unsigned char ud7[] = { - 0xa3 }; + 0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6, + 0xcb, 0x0, 0xfb }; /* uf9 all alphabetic */ static const unsigned char ufa[] = { - 0x00, 0x0, 0x2d, 0x30, 0x0, 0x6a }; + 0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d, + 0x70, 0x0, 0xd9 }; static const unsigned char ufb[] = { 0x00, 0x0, 0x06, 0x13, 0x0, 0x17, 0x1d, 0x1f, 0x0, 0x28, 0x2a, 0x0, 0x36, 0x38, 0x0, @@ -202,34 +284,72 @@ static const unsigned char uff[] = { 0x21, 0x0, 0x3a, 0x41, 0x0, 0x5a, 0x66, 0x0, 0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0, 0xcf, 0xd2, 0x0, 0xd7, 0xda, 0x0, 0xdc }; +static const unsigned char u100[] = { + 0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26, + 0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d, + 0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa }; +static const unsigned char u101[] = { + 0x40, 0x0, 0x74 }; +static const unsigned char u102[] = { + 0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 }; static const unsigned char u103[] = { - 0x00, 0x0, 0x1e, - 0x30, 0x0, 0x4a }; + 0x00, 0x0, 0x1e, 0x30, 0x0, 0x4a, + 0x80, 0x0, 0x9d, 0xa0, 0x0, 0xc3, + 0xc8, 0x0, 0xcf, 0xd1, 0x0, 0xd5 }; static const unsigned char u104[] = { - 0x00, 0x0, 0x25, 0x28, 0x0, - 0x4d }; + 0x00, 0x0, 0x9d, 0xa0, 0x0, 0xa9 }; +static const unsigned char u108[] = { + 0x00, 0x0, 0x05, 0x08, 0x0a, 0x0, 0x35, + 0x37, 0x38, 0x3c, 0x3f, 0x0, 0x55 }; +static const unsigned char u109[] = { + 0x00, 0x0, 0x15, 0x20, 0x0, 0x39 }; +static const unsigned char u10a[] = { + 0x00, 0x10, 0x0, 0x13, 0x15, 0x0, 0x17, + 0x19, 0x0, 0x33, 0x60, 0x0, 0x7c }; +static const unsigned char u10b[] = { + 0x00, 0x0, 0x35, 0x40, 0x0, 0x55, + 0x60, 0x0, 0x72 }; +static const unsigned char u10c[] = { + 0x00, 0x0, 0x48 }; +static const unsigned char u110[] = { + 0x83, 0x0, 0xaf }; +/* u120 to u122 all alphabetic */ +static const unsigned char u123[] = { + 0x00, 0x0, 0x6e }; +static const unsigned char u124[] = { + 0x00, 0x0, 0x62 }; +/* u130 to u133 all alphabetic */ +static const unsigned char u134[] = { + 0x00, 0x0, 0x2e }; static const unsigned char u1d4[] = { - 0x00, 0x0, 0x54, 0x56, 0x0, 0x9c, 0x9e, - 0x0, 0x9f, 0xa2, 0xa5, 0x0, 0xa6, 0xa9, 0x0, - 0xac, 0xae, 0x0, 0xb9, 0xbb, 0xbd, 0x0, 0xc0, - 0xc2, 0x0, 0xc3, 0xc5, 0x0, 0xff }; + 0x00, 0x0, 0x54, 0x56, 0x0, 0x9c, + 0x9e, 0x0, 0x9f, 0xa2, 0xa5, 0x0, 0xa6, + 0xa9, 0x0, 0xac, 0xae, 0x0, 0xb9, 0xbb, + 0xbd, 0x0, 0xc3, 0xc5, 0x0, 0xff }; static const unsigned char u1d5[] = { 0x00, 0x0, 0x05, 0x07, 0x0, 0x0a, 0x0d, 0x0, 0x14, 0x16, 0x0, 0x1c, 0x1e, 0x0, 0x39, 0x3b, 0x0, 0x3e, 0x40, 0x0, 0x44, 0x46, 0x4a, 0x0, 0x50, 0x52, 0x0, 0xff }; static const unsigned char u1d6[] = { - 0x00, 0x0, 0xa3, 0xa8, - 0x0, 0xc0, 0xc2, 0x0, 0xda, 0xdc, 0x0, 0xfa, + 0x00, 0x0, 0xa5, 0xa8, 0x0, 0xc0, + 0xc2, 0x0, 0xda, 0xdc, 0x0, 0xfa, 0xfc, 0x0, 0xff }; static const unsigned char u1d7[] = { - 0x00, 0x0, 0x14, 0x16, 0x0, - 0x34, 0x36, 0x0, 0x4e, 0x50, 0x0, 0x6e, - 0x70, 0x0, 0x88, 0x8a, 0x0, 0xa8, 0xaa, 0x0, 0xc2, - 0xc4, 0x0, 0xc9, 0xce, 0x0, 0xff }; + 0x00, 0x0, 0x14, 0x16, 0x0, 0x34, + 0x36, 0x0, 0x4e, 0x50, 0x0, 0x6e, + 0x70, 0x0, 0x88, 0x8a, 0x0, 0xa8, + 0xaa, 0x0, 0xc2, 0xc4, 0x0, 0xcb, + 0xce, 0x0, 0xff }; +static const unsigned char u1f1[] = { + 0x10, 0x0, 0x2c, 0x31, 0x3d, 0x3f, 0x42, 0x46, + 0x57, 0x5f, 0x79, 0x7b, 0x7c, 0x7f, 0x8a }; /* u200 to u2a5 all alphabetic */ static const unsigned char u2a6[] = { 0x00, 0x0, 0xd6 }; +/* u2a7 to u2b6 all alphabetic */ +static const unsigned char u2b7[] = { + 0x00, 0x0, 0x34 }; /* u2f8 to u2f9 all alphabetic */ static const unsigned char u2fa[] = { 0x00, 0x0, 0x1d }; diff --git a/newlib/libc/ctype/utf8print.h b/newlib/libc/ctype/utf8print.h index c895a3cb8..abeb81cb5 100644 --- a/newlib/libc/ctype/utf8print.h +++ b/newlib/libc/ctype/utf8print.h @@ -27,85 +27,96 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* Generated using unicode.txt 3.2 */ +/* Generated using UnicodeData.txt 5.2 */ +/* Expression used to filter out the characters for the below tables: + + awk -F\; \ + '{ \ + VAL = strtonum (sprintf("0x%s", $1)); \ + # All valid characters except from categories Cc (C0 or C1 control code), \ + # Cs (Surrogates), Zl (Line separator), and Zp (Paragraph separator).\ + # \ + # Before running this test, make sure to expand all Unicode blocks \ + # which are just marked by their first and last character! \ + # \ + if (!match($3, "^C[cs]") && !match($3, "^Z[lp]")) \ + print $1; \ + }' UnicodeData.txt +*/ static const unsigned char u0[] = { 0x20, 0x0, 0x7e, 0xa0, 0x0, 0xff }; /* u1 is all-print */ -static const unsigned char u2[] = { - 0x00, 0x0, 0x20, 0x22, 0x0, - 0x33, 0x50, 0x0, 0xad, 0xb0, 0x0, 0xee }; +/* u2 is all-print */ static const unsigned char u3[] = { - 0x00, 0x0, 0x4f, 0x60, 0x0, 0x6f, 0x74, 0x0, 0x75, - 0x7a, 0x7e, 0x84, 0x0, 0x8a, 0x8c, 0x8e, 0x0, - 0xa1, 0xa3, 0x0, 0xce, 0xd0, 0x0, 0xf6 }; -static const unsigned char u4[] = { - 0x00, 0x0, 0x86, 0x88, 0x0, 0xce, 0xd0, 0x0, 0xf5, - 0xf8, 0x0, 0xf9 }; + 0x00, 0x0, 0x77, 0x7a, 0x0, 0x7e, + 0x84, 0x0, 0x8a, 0x8c, 0x8e, 0x0, + 0xa1, 0xa3, 0x0, 0xff }; +/* u4 is all-print */ static const unsigned char u5[] = { - 0x00, 0x0, 0x0f, 0x31, 0x0, + 0x00, 0x0, 0x25, 0x31, 0x0, 0x56, 0x59, 0x0, 0x5f, 0x61, 0x0, 0x87, 0x89, - 0x0, 0x8a, 0x91, 0x0, 0xa1, 0xa3, 0x0, 0xb9, - 0xbb, 0x0, 0xc4, 0xd0, 0x0, 0xea, 0xf0, 0x0, - 0xf4 }; + 0x0, 0x8a, 0x91, 0x0, 0xc7, 0xd0, 0x0, 0xea, + 0xf0, 0x0, 0xf4 }; static const unsigned char u6[] = { - 0x0c, 0x1b, 0x1f, 0x21, 0x0, 0x3a, 0x40, - 0x0, 0x55, 0x60, 0x0, 0xed, 0xf0, 0x0, 0xfe }; + 0x00, 0x0, 0x03, 0x06, 0x0, 0x1b, 0x1e, 0x1f, + 0x21, 0x0, 0x5e, 0x60, 0x0, 0xff }; static const unsigned char u7[] = { - 0x00, 0x0, 0x0d, 0x0f, 0x0, 0x2c, 0x30, 0x0, - 0x4a, 0x80, 0x0, 0xb1 }; + 0x00, 0x0, 0x0d, 0x0f, 0x0, 0x4a, 0x4d, 0x0, 0xb1, + 0xc0, 0x0, 0xfa }; +static const unsigned char u8[] = { + 0x00, 0x0, 0x2d, 0x30, 0x0, 0x3e, }; static const unsigned char u9[] = { - 0x01, 0x0, 0x03, 0x05, - 0x0, 0x39, 0x3c, 0x0, 0x4d, 0x50, 0x0, 0x54, - 0x58, 0x0, 0x70, 0x81, 0x0, 0x83, 0x85, 0x0, - 0x8c, 0x8f, 0x0, 0x90, 0x93, 0x0, 0xa8, 0xaa, - 0x0, 0xb0, 0xb2, 0xb6, 0x0, 0xb9, 0xbc, 0xbe, - 0x0, 0xc4, 0xc7, 0x0, 0xc8, 0xcb, 0x0, 0xcd, + 0x00, 0x0, 0x39, 0x3c, 0x0, 0x4e, 0x50, 0x0, 0x55, + 0x58, 0x0, 0x72, 0x79, 0x0, 0x7f, 0x81, 0x0, 0x83, + 0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, 0x93, 0x0, 0xa8, + 0xaa, 0x0, 0xb0, 0xb2, 0xb6, 0x0, 0xb9, 0xbc, + 0x0, 0xc4, 0xc7, 0xc8, 0xcb, 0x0, 0xce, 0xd7, 0xdc, 0x0, 0xdd, 0xdf, 0x0, 0xe3, 0xe6, - 0x0, 0xfa }; + 0x0, 0xfb }; static const unsigned char ua[] = { - 0x02, 0x05, 0x0, 0x0a, 0x0f, 0x0, + 0x01, 0x0, 0x03, 0x05, 0x0, 0x0a, 0x0f, 0x0, 0x10, 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, 0x32, 0x0, 0x33, 0x35, 0x0, 0x36, 0x38, 0x0, 0x39, 0x3c, 0x3e, 0x0, 0x42, 0x47, 0x0, 0x48, 0x4b, - 0x0, 0x4d, 0x59, 0x0, 0x5c, 0x5e, 0x66, 0x0, - 0x74, 0x81, 0x0, 0x83, 0x85, 0x0, 0x8b, 0x8d, + 0x0, 0x4d, 0x51, 0x59, 0x0, 0x5c, 0x5e, 0x66, 0x0, + 0x75, 0x81, 0x0, 0x83, 0x85, 0x0, 0x8d, 0x8f, 0x0, 0x91, 0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, 0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9, 0xbc, 0x0, 0xc5, 0xc7, 0x0, 0xc9, 0xcb, 0x0, 0xcd, - 0xd0, 0xe0, 0xe6, 0x0, 0xef }; + 0xd0, 0xe0, 0x0, 0xe3, 0xe6, 0x0, 0xef, 0xf1 }; static const unsigned char ub[] = { 0x01, 0x0, 0x03, 0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10, 0x13, 0x0, - 0x28, 0x2a, 0x0, 0x30, 0x32, 0x0, 0x33, 0x36, - 0x0, 0x39, 0x3c, 0x0, 0x43, 0x47, 0x0, 0x48, + 0x28, 0x2a, 0x0, 0x30, 0x32, 0x0, 0x33, 0x35, + 0x0, 0x39, 0x3c, 0x0, 0x44, 0x47, 0x0, 0x48, 0x4b, 0x0, 0x4d, 0x56, 0x0, 0x57, 0x5c, 0x0, - 0x5d, 0x5f, 0x0, 0x61, 0x66, 0x0, 0x70, 0x82, + 0x5d, 0x5f, 0x0, 0x63, 0x66, 0x0, 0x71, 0x82, 0x0, 0x83, 0x85, 0x0, 0x8a, 0x8e, 0x0, 0x90, 0x92, 0x0, 0x95, 0x99, 0x0, 0x9a, 0x9c, 0x9e, 0x0, 0x9f, 0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa, - 0xae, 0x0, 0xb5, 0xb7, 0x0, 0xb9, 0xbe, 0x0, - 0xc2, 0xc6, 0x0, 0xc8, 0xca, 0x0, 0xcd, 0xd7, - 0xe7, 0x0, 0xf2 }; + 0xae, 0x0, 0xb9, 0xbe, 0x0, + 0xc2, 0xc6, 0x0, 0xc8, 0xca, 0x0, 0xcd, 0xd0, + 0xd7, 0xe6, 0xe7, 0x0, 0xfa }; static const unsigned char uc[] = { 0x01, 0x0, 0x03, 0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28, 0x2a, - 0x0, 0x33, 0x35, 0x0, 0x39, 0x3e, 0x0, 0x44, + 0x0, 0x33, 0x35, 0x0, 0x39, 0x3d, 0x0, 0x44, 0x46, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x55, 0x0, - 0x56, 0x60, 0x0, 0x61, 0x66, 0x0, 0x6f, 0x82, - 0x0, 0x83, 0x85, 0x0, 0x8c, 0x8e, 0x0, 0x90, - 0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3, 0xb5, 0x0, - 0xb9, 0xbe, 0x0, 0xc4, 0xc6, 0x0, 0xc8, 0xca, - 0x0, 0xcd, 0xd5, 0x0, 0xd6, 0xde, 0xe0, 0x0, - 0xe1, 0xe6, 0x0, 0xef }; + 0x56, 0x58, 0x59, 0x60, 0x0, 0x63, 0x66, 0x0, 0x6f, + 0x78, 0x0, 0x7f, 0x82, 0x83, 0x85, 0x0, 0x8c, + 0x8e, 0x0, 0x90, 0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3, + 0xb5, 0x0, 0xb9, 0xbc, 0x0, 0xc4, 0xc6, 0x0, 0xc8, + 0xca, 0x0, 0xcd, 0xd5, 0x0, 0xd6, 0xde, 0xe0, 0x0, + 0xe3, 0xe6, 0x0, 0xef, 0xf1, 0xf2 }; static const unsigned char ud[] = { 0x02, 0x0, 0x03, 0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28, - 0x2a, 0x0, 0x39, 0x3e, 0x0, 0x43, 0x46, 0x0, - 0x48, 0x4a, 0x0, 0x4d, 0x57, 0x60, 0x0, 0x61, - 0x66, 0x0, 0x6f, 0x82, 0x0, 0x83, 0x85, 0x0, - 0x96, 0x9a, 0x0, 0xb1, 0xb3, 0x0, 0xbb, 0xbd, - 0xc0, 0x0, 0xc6, 0xca, 0xcf, 0x0, 0xd4, 0xd6, + 0x2a, 0x0, 0x39, 0x3d, 0x0, 0x44, 0x46, 0x0, + 0x48, 0x4a, 0x0, 0x4d, 0x57, 0x60, 0x0, 0x63, + 0x66, 0x0, 0x75, 0x79, 0x0, 0x7f, 0x82, 0x0, 0x83, + 0x85, 0x0, 0x96, 0x9a, 0x0, 0xb1, 0xb3, 0x0, 0xbb, + 0xbd, 0xc0, 0x0, 0xc6, 0xca, 0xcf, 0x0, 0xd4, 0xd6, 0xd8, 0x0, 0xdf, 0xf2, 0x0, 0xf4 }; static const unsigned char ue[] = { 0x01, 0x0, @@ -116,46 +127,55 @@ static const unsigned char ue[] = { 0xbd, 0xc0, 0x0, 0xc4, 0xc6, 0xc8, 0x0, 0xcd, 0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd }; static const unsigned char uf[] = { - 0x00, 0x0, - 0x47, 0x49, 0x0, 0x6a, 0x71, 0x0, 0x8b, 0x90, - 0x0, 0x97, 0x99, 0x0, 0xbc, 0xbe, 0x0, 0xcc, - 0xcf }; + 0x00, 0x0, 0x47, 0x49, 0x0, 0x6c, + 0x71, 0x0, 0x8b, 0x90, 0x0, 0x97, + 0x99, 0x0, 0xbc, 0xbe, 0x0, 0xcc, + 0xce, 0x0, 0xd8 }; static const unsigned char u10[] = { - 0x00, 0x0, 0x21, 0x23, 0x0, 0x27, 0x29, - 0x0, 0x2a, 0x2c, 0x0, 0x32, 0x36, 0x0, 0x39, - 0x40, 0x0, 0x59, 0xa0, 0x0, 0xc5, 0xd0, 0x0, - 0xf8, 0xfb }; -static const unsigned char u11[] = { - 0x00, 0x0, 0x59, 0x5f, 0x0, 0xa2, - 0xa8, 0x0, 0xf9 }; + 0x00, 0x0, 0xc5, 0xd0, 0x0, 0xfc }; +/* u11 is all-print */ static const unsigned char u12[] = { - 0x00, 0x0, 0x06, 0x08, 0x0, - 0x46, 0x48, 0x4a, 0x0, 0x4d, 0x50, 0x0, 0x56, - 0x58, 0x5a, 0x0, 0x5d, 0x60, 0x0, 0x86, 0x88, - 0x8a, 0x0, 0x8d, 0x90, 0x0, 0xae, 0xb0, 0xb2, + 0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x50, 0x0, 0x56, + 0x58, 0x5a, 0x0, 0x5d, 0x60, 0x0, 0x88, + 0x8a, 0x0, 0x8d, 0x90, 0x0, 0xb0, 0xb2, 0x0, 0xb5, 0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, - 0xc5, 0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6, 0xd8, - 0x0, 0xee, 0xf0, 0x0, 0xff }; + 0xc5, 0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff }; static const unsigned char u13[] = { - 0x00, 0x0, 0x0e, 0x10, 0x12, 0x0, - 0x15, 0x18, 0x0, 0x1e, 0x20, 0x0, 0x46, 0x48, - 0x0, 0x5a, 0x61, 0x0, 0x7c, 0xa0, 0x0, 0xf4 }; -static const unsigned char u14[] = { - 0x01, 0x0, 0xff }; + 0x00, 0x0, 0x10, 0x12, 0x0, 0x15, + 0x18, 0x0, 0x5a, 0x5f, 0x0, 0x7c, + 0x80, 0x0, 0x99, 0xa0, 0x0, 0xf4 }; +/* u14 is all-print */ /* u15 is all-print */ static const unsigned char u16[] = { - 0x00, 0x0, 0x76, 0x80, 0x0, 0x9c, 0xa0, 0x0, - 0xf0 }; + 0x00, 0x0, 0x9c, 0xa0, 0x0, 0xf0 }; static const unsigned char u17[] = { 0x00, 0x0, 0x0c, 0x0e, 0x0, 0x14, 0x20, 0x0, 0x36, 0x40, 0x0, 0x53, 0x60, 0x0, 0x6c, 0x6e, 0x0, 0x70, 0x72, 0x0, 0x73, 0x80, 0x0, - 0xdc, 0xe0, 0x0, 0xe9 }; + 0xdd, 0xe0, 0x0, 0xe9, 0xf0, 0x0, 0xf9 }; static const unsigned char u18[] = { 0x00, 0x0, 0x0e, 0x10, - 0x0, 0x19, 0x20, 0x0, 0x77, 0x80, 0x0, 0xa9 }; -static const unsigned char u1e[] = { - 0x00, 0x0, 0x9b, 0xa0, 0x0, 0xf9 }; + 0x0, 0x19, 0x20, 0x0, 0x77, 0x80, 0x0, 0xaa, + 0xb0, 0x0, 0xf5 }; +static const unsigned char u19[] = { + 0x00, 0x0, 0x1c, 0x20, 0x0, 0x2b, + 0x30, 0x0, 0x3b, 0x40, 0x44, 0x0, 0x6d, + 0x70, 0x0, 0x74, 0x80, 0x0, 0xab, + 0xb0, 0x0, 0xc9, 0xd0, 0x0, 0xda, + 0xde, 0x0, 0xff }; +static const unsigned char u1a[] = { + 0x00, 0x0, 0x1b, 0x1e, 0x0, 0x5e, + 0x60, 0x0, 0x7c, 0x7f, 0x0, 0x89, + 0x90, 0x0, 0x99, 0xa0, 0x0, 0xad }; +static const unsigned char u1b[] = { + 0x00, 0x0, 0x4b, 0x50, 0x0, 0x7c, + 0x80, 0x0, 0xaa, 0xae, 0x0, 0xb9 }; +static const unsigned char u1c[] = { + 0x00, 0x0, 0x37, 0x3b, 0x0, 0x49, + 0x4d, 0x0, 0x7f, 0xd0, 0x0, 0xf2 }; +static const unsigned char u1d[] = { + 0x00, 0x0, 0xe6, 0xfd, 0x0, 0xff }; +/* u1e is all-print */ static const unsigned char u1f[] = { 0x00, 0x0, 0x15, 0x18, 0x0, 0x1d, 0x20, 0x0, 0x45, 0x48, @@ -164,119 +184,206 @@ static const unsigned char u1f[] = { 0xc4, 0xc6, 0x0, 0xd3, 0xd6, 0x0, 0xdb, 0xdd, 0x0, 0xef, 0xf2, 0x0, 0xf4, 0xf6, 0x0, 0xfe }; static const unsigned char u20[] = { - 0x00, 0x0, 0x27, 0x2a, 0x0, 0x52, 0x57, 0x5f, - 0x0, 0x63, 0x6a, 0x0, 0x71, 0x74, 0x0, 0x8e, - 0xa0, 0x0, 0xb1, 0xd0, 0x0, 0xea }; + 0x00, 0x0, 0x27, 0x2a, 0x0, 0x64, + 0x6a, 0x0, 0x71, 0x74, 0x0, 0x8e, + 0x90, 0x0, 0x94, 0xa0, 0x0, 0xb8, + 0xd0, 0x0, 0xf0 }; static const unsigned char u21[] = { - 0x00, 0x0, - 0x3a, 0x3d, 0x0, 0x4b, 0x53, 0x0, 0x83, 0x90, - 0x0, 0xff }; + 0x00, 0x0, 0x89, 0x90, 0x0, 0xff }; /* u22 is all-print */ static const unsigned char u23[] = { - 0x00, 0x0, 0xce }; + 0x00, 0x0, 0xe8 }; static const unsigned char u24[] = { 0x00, 0x0, 0x26, 0x40, 0x0, 0x4a, - 0x60, 0x0, 0xfe }; + 0x60, 0x0, 0xff }; /* u25 is all-print */ static const unsigned char u26[] = { - 0x00, 0x0, 0x13, 0x16, 0x0, - 0x17, 0x19, 0x0, 0x7d, 0x80, 0x0, 0x89 }; + 0x00, 0x0, 0xcd, 0xcf, 0x0, 0xe1, + 0xe3, 0xe8, 0x0, 0xff }; static const unsigned char u27[] = { - 0x01, - 0x0, 0x04, 0x06, 0x0, 0x09, 0x0c, 0x0, 0x27, - 0x29, 0x0, 0x4b, 0x4d, 0x4f, 0x0, 0x52, 0x56, - 0x58, 0x0, 0x5e, 0x61, 0x0, 0x94, 0x98, 0x0, - 0xaf, 0xb1, 0x0, 0xbe, 0xd0, 0x0, 0xeb, 0xf0, - 0x0, 0xff }; + 0x01, 0x0, 0x04, 0x06, 0x0, 0x09, + 0x0c, 0x0, 0x27, 0x29, 0x0, 0x4b, 0x4d, + 0x4f, 0x0, 0x52, 0x56, 0x0, 0x5e, + 0x61, 0x0, 0x94, 0x98, 0x0, 0xaf, + 0xb1, 0x0, 0xbe, 0xc0, 0x0, 0xca, 0xcc, + 0xd0, 0x0, 0xff }; /* u28 to u2a are all-print */ +static const unsigned char u2b[] = { + 0x00, 0x0, 0x4c, 0x50, 0x0, 0x59 }; +static const unsigned char u2c[] = { + 0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e, + 0x60, 0x0, 0xf1, 0xf9, 0x0, 0xff }; +static const unsigned char u2d[] = { + 0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f, + 0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6, + 0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6, + 0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6, + 0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6, + 0xd8, 0x0, 0xde, 0xe0, 0x0, 0xff }; static const unsigned char u2e[] = { - 0x80, 0x0, 0x99, 0x9b, 0x0, 0xf3 }; + 0x00, 0x0, 0x31, 0x80, 0x0, 0x99, + 0x9b, 0x0, 0xf3 }; static const unsigned char u2f[] = { 0x00, 0x0, 0xd5, 0xf0, 0x0, 0xfb }; static const unsigned char u30[] = { 0x00, 0x0, 0x3f, 0x41, 0x0, 0x96, 0x99, 0x0, 0xff }; static const unsigned char u31[] = { - 0x05, - 0x0, 0x2c, 0x31, 0x0, 0x8e, 0x90, 0x0, 0xb7, + 0x05, 0x0, 0x2d, 0x31, 0x0, 0x8e, + 0x90, 0x0, 0xb7, 0xc0, 0x0, 0xe3, 0xf0, 0x0, 0xff }; static const unsigned char u32[] = { - 0x00, 0x0, 0x1c, 0x20, 0x0, 0x43, 0x51, 0x0, - 0x7b, 0x7f, 0x0, 0xcb, 0xd0, 0x0, 0xfe }; -static const unsigned char u33[] = { - 0x00, - 0x0, 0x76, 0x7b, 0x0, 0xdd, 0xe0, 0x0, 0xfe }; -/* u34 to u4c is all-print */ + 0x00, 0x0, 0x1e, 0x20, 0x0, 0xfe }; +/* u33 to u4c is all-print */ static const unsigned char u4d[] = { - 0x00, 0x0, 0xb5 }; + 0x00, 0x0, 0xb5, 0xc0, 0x0, 0xff }; /* u4e to u9e is all-print */ static const unsigned char u9f[] = { - 0x00, 0x0, 0xa5 }; + 0x00, 0x0, 0xcb }; /* ua0 to ua3 is all-print */ static const unsigned char ua4[] = { - 0x00, 0x0, - 0x8c, 0x90, 0x0, 0xc6 }; + 0x00, 0x0, 0x8c, 0x90, 0x0, 0xc6, + 0xd0, 0x0, 0xff }; +/* ua5 is all-print */ +static const unsigned char ua6[] = { + 0x00, 0x0, 0x2b, 0x40, 0x0, 0x5f, + 0x62, 0x0, 0x73, 0x7c, 0x0, 0x97, + 0xa0, 0x0, 0xf7 }; +static const unsigned char ua7[] = { + 0x00, 0x0, 0x8c, 0xfb, 0x0, 0xff }; +static const unsigned char ua8[] = { + 0x00, 0x0, 0x2b, 0x30, 0x0, 0x39, + 0x40, 0x0, 0x77, 0x80, 0x0, 0xc4, + 0xce, 0x0, 0xd9, 0xe0, 0x0, 0xfb }; +static const unsigned char ua9[] = { + 0x00, 0x0, 0x53, 0x5f, 0x0, 0x7c, + 0x80, 0x0, 0xcd, 0xcf, 0x0, 0xd9, + 0xde, 0xdf }; +static const unsigned char uaa[] = { + 0x00, 0x0, 0x36, 0x40, 0x0, 0x4d, + 0x50, 0x0, 0x59, 0x5c, 0x0, 0x7b, + 0x80, 0x0, 0xc2, 0xdb, 0x0, 0xdf }; +static const unsigned char uab[] = { + 0xc0, 0x0, 0xed, 0xf0, 0x0, 0xf9 }; /* uac to ud6 is all-print */ static const unsigned char ud7[] = { - 0x00, 0x0, 0xa3 }; + 0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6, + 0xcb, 0x0, 0xfb }; +/* ud8 to udf are UTF-16 surrogates, non-printable */ /* ue0 to uf9 is all-print */ static const unsigned char ufa[] = { - 0x00, 0x0, 0x2d, 0x30, 0x0, 0x6a }; + 0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d, + 0x70, 0x0, 0xd9 }; static const unsigned char ufb[] = { - 0x00, 0x0, 0x06, - 0x13, 0x0, 0x17, 0x1d, 0x0, 0x36, 0x38, 0x0, - 0x3c, 0x3e, 0x40, 0x0, 0x41, 0x43, 0x0, 0x44, + 0x00, 0x0, 0x06, 0x13, 0x0, 0x17, + 0x1d, 0x0, 0x36, 0x38, 0x0, 0x3c, + 0x3e, 0x40, 0x41, 0x43, 0x44, 0x46, 0x0, 0xb1, 0xd3, 0x0, 0xff }; /* ufc is all-print */ static const unsigned char ufd[] = { - 0x00, 0x0, 0x3f, 0x50, 0x0, - 0x8f, 0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfc }; + 0x00, 0x0, 0x3f, 0x50, 0x0, 0x8f, + 0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfd }; static const unsigned char ufe[] = { - 0x00, - 0x0, 0x0f, 0x20, 0x0, 0x23, 0x30, 0x0, 0x46, - 0x49, 0x0, 0x52, 0x54, 0x0, 0x66, 0x68, 0x0, - 0x6b, 0x70, 0x0, 0x74, 0x76, 0x0, 0xfc, 0xff }; + 0x00, 0x0, 0x19, 0x20, 0x0, 0x26, + 0x30, 0x0, 0x52, 0x54, 0x0, 0x66, + 0x68, 0x0, 0x6b, 0x70, 0x0, 0x74, + 0x76, 0x0, 0xfc, 0xff }; static const unsigned char uff[] = { 0x01, 0x0, 0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0, 0xcf, 0xd2, 0x0, 0xd7, 0xda, 0x0, 0xdc, 0xe0, 0x0, 0xe6, 0xe8, 0x0, 0xee, 0xf9, 0x0, 0xfd }; +static const unsigned char u100[] = { + 0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26, + 0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d, + 0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa }; +static const unsigned char u101[] = { + 0x00, 0x0, 0x02, 0x07, 0x0, 0x33, + 0x37, 0x0, 0x8a, 0x90, 0x0, 0x9b, + 0xd0, 0x0, 0xfd }; +static const unsigned char u102[] = { + 0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 }; static const unsigned char u103[] = { - 0x00, 0x0, 0x1e, 0x20, 0x0, 0x23, 0x30, 0x0, - 0x4a }; + 0x00, 0x0, 0x1e, 0x20, 0x0, 0x23, + 0x30, 0x0, 0x4a, 0x80, 0x0, 0x9d, + 0x9f, 0x0, 0xc3, 0xc8, 0x0, 0xd5 }; static const unsigned char u104[] = { - 0x00, 0x0, 0x25, 0x28, 0x0, 0x4d }; + 0x00, 0x0, 0x9d, 0xa0, 0x0, 0xa9 }; +static const unsigned char u108[] = { + 0x00, 0x0, 0x05, 0x08, 0x0a, 0x0, 0x35, + 0x37, 0x38, 0x3c, 0x3f, 0x0, 0x55, + 0x57, 0x0, 0x5f }; +static const unsigned char u109[] = { + 0x00, 0x0, 0x1b, 0x1f, 0x0, 0x39, 0x3f }; +static const unsigned char u10a[] = { + 0x00, 0x0, 0x03, 0x05, 0x06, 0x0c, 0x0, 0x13, + 0x15, 0x0, 0x17, 0x19, 0x0, 0x33, + 0x38, 0x0, 0x3a, 0x3f, 0x0, 0x47, + 0x50, 0x0, 0x58, 0x60, 0x0, 0x7f }; +static const unsigned char u10b[] = { + 0x00, 0x0, 0x35, 0x39, 0x0, 0x55, + 0x58, 0x0, 0x72, 0x78, 0x0, 0x7f }; +static const unsigned char u10c[] = { + 0x00, 0x0, 0x48 }; +static const unsigned char u10e[] = { + 0x60, 0x0, 0x7e }; +static const unsigned char u110[] = { + 0x80, 0x0, 0xc1 }; +/* u120 to u122 is all-print */ +static const unsigned char u123[] = { + 0x00, 0x0, 0x6e }; +static const unsigned char u124[] = { + 0x00, 0x0, 0x62, 0x70, 0x0, 0x73 }; +/* u130 to u133 is all-print */ +static const unsigned char u134[] = { + 0x00, 0x0, 0x2e }; static const unsigned char u1d0[] = { - 0x00, - 0x0, 0xf5 }; + 0x00, 0x0, 0xf5 }; static const unsigned char u1d1[] = { - 0x00, 0x0, 0x26, 0x2a, 0x0, 0xdd }; + 0x00, 0x0, 0x26, 0x29, 0x0, 0xdd }; +static const unsigned char u1d2[] = { + 0x00, 0x0, 0x45 }; +static const unsigned char u1d3[] = { + 0x00, 0x0, 0x56, 0x60, 0x0, 0x71 }; static const unsigned char u1d4[] = { 0x00, 0x0, 0x54, 0x56, 0x0, 0x9c, 0x9e, 0x0, 0x9f, 0xa2, 0xa5, 0x0, 0xa6, 0xa9, 0x0, 0xac, - 0xae, 0x0, 0xb9, 0xbb, 0xbd, 0x0, 0xc0, 0xc2, - 0x0, 0xc3, 0xc5, 0x0, 0xff }; + 0xae, 0x0, 0xb9, 0xbb, 0xbd, 0x0, 0xc3, + 0xc5, 0x0, 0xff }; static const unsigned char u1d5[] = { 0x00, 0x0, 0x05, 0x07, 0x0, 0x0a, 0x0d, 0x0, 0x14, 0x16, 0x0, 0x1c, 0x1e, 0x0, 0x39, 0x3b, 0x0, 0x3e, 0x40, 0x0, 0x44, 0x46, 0x4a, 0x0, 0x50, 0x52, 0x0, 0xff }; static const unsigned char u1d6[] = { - 0x00, 0x0, 0xa3, 0xa8, 0x0, 0xff }; + 0x00, 0x0, 0xa5, 0xa8, 0x0, 0xff }; static const unsigned char u1d7[] = { - 0x00, 0x0, 0xc9, 0xce, 0x0, 0xff }; + 0x00, 0x0, 0xcb, 0xce, 0x0, 0xff }; +static const unsigned char u1f0[] = { + 0x00, 0x0, 0x2b, 0x30, 0x0, 0x93 }; +static const unsigned char u1f1[] = { + 0x00, 0x0, 0x0a, 0x10, 0x0, 0x2e, + 0x31, 0x3d, 0x3f, 0x42, 0x46, 0x4a, 0x0, 0x4e, + 0x57, 0x5f, 0x79, 0x7b, 0x7c, 0x7f, 0x8a, 0x0, + 0x8c, 0x8d, 0x90 }; +static const unsigned char u1f2[] = { + 0x00, 0x10, 0x0, 0x31, 0x40, 0x0, 0x48 }; /* u200 to u2a5 is all-print */ static const unsigned char u2a6[] = { 0x00, 0x0, 0xd6 }; +/* u2a7 to u2b6 is all-print */ +static const unsigned char u2b7[] = { + 0x00, 0x0, 0x34 }; /* u2f8 to u2f9 is all-print */ static const unsigned char u2fa[] = { 0x00, 0x0, 0x1d }; static const unsigned char ue00[] = { 0x01, 0x20, 0x0, 0x7f }; +static const unsigned char ue01[] = { + 0x00, 0x0, 0xef }; /* uf00 to uffe is all-print */ static const unsigned char ufff[] = { - 0x00, 0x0, - 0xfd }; + 0x00, 0x0, 0xfd }; /* u1000 to u10fe is all-print */ static const unsigned char u10ff[] = { 0x00, 0x0, 0xfd }; diff --git a/newlib/libc/ctype/utf8punct.h b/newlib/libc/ctype/utf8punct.h deleted file mode 100644 index 55c31f9fe..000000000 --- a/newlib/libc/ctype/utf8punct.h +++ /dev/null @@ -1,201 +0,0 @@ -/* Copyright (c) 2002 Red Hat Incorporated. - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - The name of Red Hat Incorporated may not be used to endorse - or promote products derived from this software without specific - prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Generated using unicode.txt 3.2 */ - -static const unsigned char u0[] = { - 0x21, 0x0, 0x2f, 0x3a, 0x0, 0x40, 0x5b, 0x0, - 0x60, 0x7b, 0x0, 0x7e, 0xa0, 0x0, 0xa9, 0xab, - 0x0, 0xb4, 0xb6, 0x0, 0xb9, 0xbb, 0x0, 0xbf, - 0xd7, 0xf7 }; -static const unsigned char u2[] = { - 0xb9, 0x0, 0xba, 0xc2, 0x0, 0xcf, - 0xd2, 0x0, 0xdf, 0xe5, 0x0, 0xed }; -static const unsigned char u3[] = { - 0x00, 0x0, - 0x44, 0x46, 0x0, 0x4f, 0x60, 0x0, 0x6f, 0x74, - 0x0, 0x75, 0x7e, 0x84, 0x0, 0x85, 0x87, 0xf6 }; -static const unsigned char u4[] = { - 0x82, 0x0, 0x86, 0x88, 0x0, 0x89 }; -static const unsigned char u5[] = { - 0x5a, 0x0, - 0x5f, 0x89, 0x0, 0x8a, 0x91, 0x0, 0xa1, 0xa3, - 0x0, 0xb9, 0xbb, 0x0, 0xc4, 0xf3, 0x0, 0xf4 }; -static const unsigned char u6[] = { - 0x0c, 0x1b, 0x1f, 0x4b, 0x0, 0x55, 0x6a, 0x0, - 0x6d, 0x70, 0xd4, 0xd6, 0x0, 0xe4, 0xe7, 0x0, - 0xed, 0xfd, 0x0, 0xfe }; -static const unsigned char u7[] = { - 0x00, 0x0, 0x0d, 0x0f, - 0x11, 0x30, 0x0, 0x4a, 0xa6, 0x0, 0xb0 }; -static const unsigned char u9[] = { - 0x01, - 0x0, 0x03, 0x3c, 0x3e, 0x0, 0x4d, 0x51, 0x0, - 0x54, 0x62, 0x0, 0x65, 0x70, 0x81, 0x0, 0x83, - 0xbc, 0xbe, 0x0, 0xc4, 0xc7, 0x0, 0xc8, 0xcb, - 0x0, 0xcd, 0xd7, 0xe2, 0x0, 0xe3, 0xf2, 0x0, - 0xfa }; -static const unsigned char ua[] = { - 0x02, 0x3c, 0x3e, 0x0, 0x42, 0x47, 0x0, - 0x48, 0x4b, 0x0, 0x4d, 0x70, 0x0, 0x71, 0x81, - 0x0, 0x83, 0xbc, 0xbe, 0x0, 0xc5, 0xc7, 0x0, - 0xc9, 0xcb, 0x0, 0xcd }; -static const unsigned char ub[] = { - 0x01, 0x0, 0x03, 0x3c, - 0x3e, 0x0, 0x43, 0x47, 0x0, 0x48, 0x4b, 0x0, - 0x4d, 0x56, 0x0, 0x57, 0x70, 0x82, 0xbe, 0x0, - 0xc2, 0xc6, 0x0, 0xc8, 0xca, 0x0, 0xcd, 0xd7, - 0xf0, 0x0, 0xf2 }; -static const unsigned char uc[] = { - 0x01, 0x0, 0x03, 0x3e, 0x0, - 0x44, 0x46, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x55, - 0x0, 0x56, 0x82, 0x0, 0x83, 0xbe, 0x0, 0xc4, - 0xc6, 0x0, 0xc8, 0xca, 0x0, 0xcd, 0xd5, 0x0, - 0xd6 }; -static const unsigned char ud[] = { - 0x02, 0x0, 0x03, - 0x3e, 0x0, 0x43, 0x46, - 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x57, 0x82, 0x0, - 0x83, 0xca, 0xcf, 0x0, 0xd4, 0xd6, 0xd8, 0x0, - 0xdf, 0xf2, 0x0, 0xf4 }; -static const unsigned char ue[] = { - 0x2f, 0x3f, 0x46, 0x4f, - 0x5a, 0x0, 0x5b, 0xb1, 0xb4, 0x0, 0xb9, 0xbb, - 0x0, 0xbc, 0xc8, 0x0, 0xcd }; -static const unsigned char uf[] = { - 0x01, 0x0, 0x1f, - 0x2a, 0x0, 0x3f, 0x71, 0x0, 0x87, 0x90, 0x0, - 0x97, 0x99, 0x0, 0xbc, 0xbe, 0x0, 0xcc, 0xcf }; -static const unsigned char u10[] = { - 0x2c, 0x0, 0x32, 0x36, 0x0, 0x39, 0x4a, 0x0, - 0x4f, 0x56, 0x0, 0x59, 0xfb }; -static const unsigned char u13[] = { - 0x61, 0x0, 0x68, - 0x72, 0x0, 0x7c }; -static const unsigned char u16[] = { - 0x6d, 0x0, 0x6e, 0x9b, 0x0, - 0x9c, 0xeb, 0x0, 0xed }; -static const unsigned char u17[] = { - 0x12, 0x0, 0x14, 0x32, - 0x0, 0x36, 0x52, 0x0, 0x53, 0x72, 0x0, 0x73, - 0xb4, 0x0, 0xd6, 0xd8, 0x0, 0xdb }; -static const unsigned char u18[] = { - 0x00, 0x0, - 0x0e, 0xa9 }; -static const unsigned char u1f[] = { - 0xbd, 0xbf, 0x0, 0xc1, - 0xcd, 0x0, 0xcf, 0xdd, 0x0, 0xdf, 0xed, 0x0, 0xef, 0xfd, - 0x0, 0xfe }; -static const unsigned char u20[] = { - 0x07, 0x0c, 0x0, 0x27, 0x2a, 0x0, - 0x52, 0x57, 0x60, 0x0, 0x63, 0x6a, 0x0, 0x70, - 0x74, 0x0, 0x7e, 0x80, 0x0, 0x8e, 0xa0, 0x0, - 0xb1, 0xd0, 0x0, 0xea }; -static const unsigned char u21[] = { - 0x00, 0x0, 0x01, 0x03, - 0x0, 0x06, 0x08, 0x0, 0x09, 0x14, 0x16, 0x0, - 0x18, 0x1e, 0x0, 0x23, 0x25, 0x27, 0x2e, 0x32, - 0x3a, 0x40, 0x0, 0x44, 0x4a, 0x0, 0x4b, 0x53, - 0x0, 0x5f, 0x90, 0x0, 0xff }; -/* u22 is all-punctuation */ -static const unsigned char u23[] = { - 0x00, 0x0, 0xce }; -static const unsigned char u24[] = { - 0x00, 0x0, 0x26, - 0x40, 0x0, 0x4a, 0x60, 0x0, 0x9b, 0xea, 0x0, - 0xfe }; -/* u25 is all-punctuation */ -static const unsigned char u26[] = { - 0x00, 0x0, 0x13, - 0x16, 0x0, 0x17, 0x19, - 0x0, 0x7d, 0x80, 0x0, 0x89 }; -static const unsigned char u27[] = { - 0x01, 0x0, 0x04, - 0x06, 0x0, 0x09, 0x0c, 0x0, 0x27, 0x29, 0x0, - 0x4b, 0x4d, 0x4f, 0x0, 0x52, 0x56, 0x58, 0x0, - 0x5e, 0x61, 0x0, 0x94, 0x98, 0x0, 0xaf, 0xb1, - 0x0, 0xbe, 0xd0, 0x0, 0xeb, 0xf0, 0x0, 0xff }; -/* u28 to u2a is all-punctuation */ -static const unsigned char u2e[] = { - 0x80, 0x0, 0x99, - 0x9b, 0x0, 0xf3 }; -static const unsigned char u2f[] = { - 0x00, 0x0, - 0xd5, 0xf0, 0x0, 0xfb }; -static const unsigned char u30[] = { - 0x01, 0x0, 0x04, 0x08, - 0x0, 0x20, 0x2a, 0x0, 0x30, 0x36, 0x0, 0x37, - 0x3d, 0x0, 0x3f, 0x99, 0x0, 0x9c, 0xa0, 0xfb }; -static const unsigned char u31[] = { - 0x90, 0x0, 0x9f }; -static const unsigned char u32[] = { - 0x00, 0x0, 0x1c, 0x20, 0x0, - 0x43, 0x51, 0x0, 0x7b, 0x7f, 0x0, 0xcb, 0xd0, - 0x0, 0xfe }; -static const unsigned char u33[] = { - 0x00, 0x0, 0x76, 0x7b, 0x0, 0xdd, - 0xe0, 0x0, 0xfe }; -static const unsigned char ua4[] = { - 0x90, 0x0, 0xc6 }; -/* ue0 to uf8 are all-punctuation */ -static const unsigned char ufb[] = { - 0x1e, 0x29 }; -static const unsigned char ufd[] = { - 0x3e, 0x0, 0x3f, 0xfc }; -static const unsigned char ufe[] = { - 0x00, - 0x0, 0x0f, 0x20, 0x0, 0x23, 0x30, 0x0, 0x46, - 0x49, 0x0, 0x52, 0x54, 0x0, 0x66, 0x68, 0x0, - 0x6b, 0xff }; -static const unsigned char uff[] = { - 0x01, 0x0, 0x0f, 0x1a, 0x0, 0x20, - 0x3b, 0x0, 0x40, 0x5b, 0x0, 0x65, 0xe0, 0x0, - 0xe6, 0xe8, 0x0, 0xee, 0xf9, 0x0, 0xfd }; -static const unsigned char u103[] = { - 0x20, - 0x0, 0x23 }; -static const unsigned char u1d0[] = { - 0x00, 0x0, 0xf5 }; -static const unsigned char u1d1[] = { - 0x00, 0x0, 0x26, - 0x2a, 0x0, 0xdd }; -static const unsigned char u1d6[] = { - 0xc1, 0xdb, 0xfb }; -static const unsigned char u1d7[] = { - 0x15, 0x35, - 0x4f, 0x6f, 0x89, 0xa9, 0xc3 }; -static const unsigned char ue00[] = { - 0x01, 0x20, 0x0, - 0x7f }; -/* uf00 to uffe are all punctuation */ -static const unsigned char ufff[] = { - 0x00, 0x0, 0xfd }; -/* u1000 to u10fe are all punctuation */ -static const unsigned char u10ff[] = { - 0x00, 0x0, 0xfd };