Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | merge with trunk |
---|---|
Timelines: | family | ancestors | descendants | both | wtf-8-experiment |
Files: | files | file ages | folders |
SHA1: |
34c5b6ec22447702cb9e7eb2232fcfcb |
User & Date: | chw 2020-05-20 12:32:15.129 |
Context
2020-05-21
| ||
06:01 | merge with trunk check-in: 1663fd9a31 user: chw tags: wtf-8-experiment | |
2020-05-20
| ||
12:32 | merge with trunk check-in: 34c5b6ec22 user: chw tags: wtf-8-experiment | |
11:02 | backport most unicode fixes from wtf-8-experiment branch check-in: 088e611d9f user: chw tags: trunk | |
08:28 | more string trim tests check-in: 4912c0913b user: chw tags: wtf-8-experiment | |
Changes
Changes to jni/tcl/generic/tclScan.c.
︙ | ︙ | |||
114 115 116 117 118 119 120 | */ static const char * BuildCharSet( CharSet *cset, const char *format) /* Points to first char of set. */ { | | | 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | */ static const char * BuildCharSet( CharSet *cset, const char *format) /* Points to first char of set. */ { int ch, start; int offset, nranges; const char *end; memset(cset, 0, sizeof(CharSet)); offset = UtfToUniChar(format, &ch); if (ch == '^') { |
︙ | ︙ | |||
295 296 297 298 299 300 301 | Tcl_Interp *interp, /* Current interpreter. */ const char *format, /* The format string. */ int numVars, /* The number of variables passed to the scan * command. */ int *totalSubs) /* The number of variables that will be * required. */ { | | | 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 | Tcl_Interp *interp, /* Current interpreter. */ const char *format, /* The format string. */ int numVars, /* The number of variables passed to the scan * command. */ int *totalSubs) /* The number of variables that will be * required. */ { int gotXpg, gotSequential, value, i, flags, ch; char *end; int objIndex, xpgSize, nspace = numVars; int *nassign = TclStackAlloc(interp, nspace * sizeof(int)); char buf[TCL_UTF_MAX+1]; Tcl_Obj *errorMsg; /* Place to build an error messages. Note that * these are messy operations because we do * not want to use the formatting engine; |
︙ | ︙ | |||
621 622 623 624 625 626 627 | int numVars, nconversions, totalVars = -1; int objIndex, offset, i, result, code; long value; const char *string, *end, *baseString; char op = 0; int width, underflow = 0; Tcl_WideInt wideValue; | | | 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 | int numVars, nconversions, totalVars = -1; int objIndex, offset, i, result, code; long value; const char *string, *end, *baseString; char op = 0; int width, underflow = 0; Tcl_WideInt wideValue; int ch, sch; Tcl_Obj **objs = NULL, *objPtr = NULL; int flags; if (objc < 3) { Tcl_WrongNumArgs(interp, 1, objv, "string format ?varName ...?"); return TCL_ERROR; |
︙ | ︙ |
Changes to jni/tcl/generic/tclUtf.c.
︙ | ︙ | |||
1349 1350 1351 1352 1353 1354 1355 | int Tcl_UtfNcmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ unsigned long numChars) /* Number of UTF chars to compare. */ { | | | 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 | int Tcl_UtfNcmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ unsigned long numChars) /* Number of UTF chars to compare. */ { Tcl_UniChar ch1, ch2; int uch1, uch2; #if TCL_UTF_MAX == 3 int num1 = numChars, num2 = numChars; #endif /* * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the |
︙ | ︙ | |||
1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 | } } #endif if (uch1 != uch2) { return (uch1 - uch2); } } return 0; } /* *---------------------------------------------------------------------- * | > | 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 | } } #endif if (uch1 != uch2) { return (uch1 - uch2); } } return 0; } /* *---------------------------------------------------------------------- * |
︙ | ︙ | |||
1432 1433 1434 1435 1436 1437 1438 | int Tcl_UtfNcasecmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ unsigned long numChars) /* Number of UTF chars to compare. */ { | | > > > > > > | 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 | int Tcl_UtfNcasecmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ unsigned long numChars) /* Number of UTF chars to compare. */ { Tcl_UniChar ch1, ch2; int uch1, uch2; #if TCL_UTF_MAX == 3 int num1 = numChars, num2 = numChars; #endif /* * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the * pair of bytes 0xC0,0x80) is larger than byte representation of \u0001 * (the byte 0x01.) */ while ( #if TCL_UTF_MAX == 3 (num1-- > 0) && (num2-- > 0) #else numChars-- > 0 #endif ) { |
︙ | ︙ | |||
1512 1513 1514 1515 1516 1517 1518 | */ int TclUtfCasecmp( const char *cs, /* UTF string to compare to ct. */ const char *ct) /* UTF string cs is compared to. */ { | | > | 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 | */ int TclUtfCasecmp( const char *cs, /* UTF string to compare to ct. */ const char *ct) /* UTF string cs is compared to. */ { Tcl_UniChar ch1, ch2; int uch1, uch2; while (*cs && *ct) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); uch1 = ch1; uch2 = ch2; |
︙ | ︙ | |||
1556 1557 1558 1559 1560 1561 1562 | if (uch1 != uch2) { return uch1 - uch2; } } } return UCHAR(*cs) - UCHAR(*ct); } | < | 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 | if (uch1 != uch2) { return uch1 - uch2; } } } return UCHAR(*cs) - UCHAR(*ct); } /* *---------------------------------------------------------------------- * * Tcl_UniCharToUpper -- * * Compute the uppercase equivalent of the given Unicode character. |
︙ | ︙ |
Changes to jni/tcl/generic/tclUtil.c.
︙ | ︙ | |||
1715 1716 1717 1718 1719 1720 1721 | Tcl_Backslash( const char *src, /* Points to the backslash character of a * backslash sequence. */ int *readPtr) /* Fill in with number of characters read from * src, unless NULL. */ { char buf[TCL_UTF_MAX*2]; | | | 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 | Tcl_Backslash( const char *src, /* Points to the backslash character of a * backslash sequence. */ int *readPtr) /* Fill in with number of characters read from * src, unless NULL. */ { char buf[TCL_UTF_MAX*2]; int ch; buf[0] = '\0'; Tcl_UtfBackslash(src, readPtr, buf); UtfToUniChar(buf, &ch); return (char) ch; } |
︙ | ︙ | |||
1744 1745 1746 1747 1748 1749 1750 | *---------------------------------------------------------------------- */ int TclTrimRight( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ | | | | 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 | *---------------------------------------------------------------------- */ int TclTrimRight( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ /* Calls to UtfToUniChar() in this routine * rely on (bytes[numBytes] == '\0'). */ const char *trim, /* String of trim characters... */ int numTrim) /* ...and its length in bytes */ /* Calls to UtfToUniChar() in this routine * rely on (trim[numTrim] == '\0'). */ { const char *pp, *p = bytes + numBytes, *q; Tcl_UniChar ch1 = 0; int i; Tcl_DString ds; |
︙ | ︙ | |||
1883 1884 1885 1886 1887 1888 1889 | *---------------------------------------------------------------------- */ int TclTrimLeft( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ | | | | 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 | *---------------------------------------------------------------------- */ int TclTrimLeft( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ /* Calls to UtfToUniChar() in this routine * rely on (bytes[numBytes] == '\0'). */ const char *trim, /* String of trim characters... */ int numTrim) /* ...and its length in bytes */ /* Calls to UtfToUniChar() in this routine * rely on (trim[numTrim] == '\0'). */ { const char *p = bytes, *q; int i; Tcl_DString ds; /* Empty strings -> nothing to do */ |
︙ | ︙ | |||
2018 2019 2020 2021 2022 2023 2024 | /* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes */ trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim); numBytes -= trimLeft; /* If we did not trim the whole string, it starts with a character * that we will not trim. Skip over it. */ if (numBytes > 0) { | | | 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 | /* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes */ trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim); numBytes -= trimLeft; /* If we did not trim the whole string, it starts with a character * that we will not trim. Skip over it. */ if (numBytes > 0) { int len, uch; const char *first = bytes + trimLeft; len = UtfToUniChar(first, &uch); bytes += len; numBytes -= (bytes - first); if (numBytes > 0) { |
︙ | ︙ | |||
2322 2323 2324 2325 2326 2327 2328 | int Tcl_StringCaseMatch( const char *str, /* String. */ const char *pattern, /* Pattern, which may contain special * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { | | | 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 | int Tcl_StringCaseMatch( const char *str, /* String. */ const char *pattern, /* Pattern, which may contain special * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { int p, charLen, ch1, ch2; while (1) { p = *pattern; /* * See if we're at the end of both the pattern and the string. If so, * we succeeded. If we're at the end of the pattern but not at the end |
︙ | ︙ |