AndroWish: Check-in [f7d4bad347]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

Comment:	fix match for beyond BMP chars
Timelines:	family \| ancestors \| descendants \| both \| wtf-8-experiment
Files:	files \| file ages \| folders
SHA1:	f7d4bad3473453aa82e54297afdf3cbc5422b4a9
User & Date:	chw 2020-05-16 19:11:09.793

Context

2020-05-18
06:27		fix sort/compare for beyond BMP chars (unfinished, WIP) check-in: bd15431fd8 user: chw tags: wtf-8-experiment
2020-05-16
19:11		fix match for beyond BMP chars check-in: f7d4bad347 user: chw tags: wtf-8-experiment
05:31		merge with trunk check-in: 9028740ed7 user: chw tags: wtf-8-experiment

Changes

Changes to jni/tcl/generic/tclScan.c.

Changes to jni/tcl/generic/tclUtf.c.

Changes to jni/tcl/generic/tclUtil.c.

Changes to jni/tcl/tests/split.test.

Changes to jni/tcl/tests/stringComp.test.

︙			︙
39 40 41 42 43 44 45 46 47 48 49 50 51 52	} ranges; } CharSet; / * Declarations for functions used only in this file. / static int UtfToUniChar(const char string, int chPtr); static const char BuildCharSet(CharSet cset, const char format); static int CharInSet(CharSet cset, int ch); static void ReleaseCharSet(CharSet cset); static int ValidateFormat(Tcl_Interp interp, const char format, int numVars, int *totalVars);	>	39 40 41 42 43 44 45 46 47 48 49 50 51 52 53	} ranges; } CharSet; / * Declarations for functions used only in this file. / #undef UtfToUniChar static int UtfToUniChar(const char string, int chPtr); static const char BuildCharSet(CharSet cset, const char format); static int CharInSet(CharSet cset, int ch); static void ReleaseCharSet(CharSet cset); static int ValidateFormat(Tcl_Interp interp, const char format, int numVars, int *totalVars);
︙			︙
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80	* * Side effects: * None. * ---------------------------------------------------------------------- / ~~#undef UtfToUniChar~~ static int UtfToUniChar( const char src, int chPtr) { Tcl_UniChar ch; int uch, len;	<	67 68 69 70 71 72 73 74 75 76 77 78 79 80	* * Side effects: * None. * ---------------------------------------------------------------------- / static int UtfToUniChar( const char src, int chPtr) { Tcl_UniChar ch; int uch, len;
︙			︙

︙			︙
2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117	Tcl_UniCharCaseMatch( const Tcl_UniChar uniStr, / Unicode String. / const Tcl_UniChar uniPattern, /* Pattern, which may contain special * characters. / int nocase) / 0 for case sensitive, 1 for insensitive / { Tcl_UniChar ch1 = 0, p; while (1) { p = uniPattern; /* * See if we're at the end of both the pattern and the string. If so,	> > > > > > > > > > >	2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128	Tcl_UniCharCaseMatch( const Tcl_UniChar uniStr, / Unicode String. / const Tcl_UniChar uniPattern, /* Pattern, which may contain special * characters. / int nocase) / 0 for case sensitive, 1 for insensitive / { #if TCL_UTF_MAX == 3 int strLen = 0, ptnLen = 0; while (uniStr[strLen] != 0) { strLen++; } while (uniPattern[ptnLen] != 0) { ptnLen++; } return TclUniCharMatch(uniStr, strLen, uniPattern, ptnLen, nocase); #else Tcl_UniChar ch1 = 0, p; while (1) { p = uniPattern; /* * See if we're at the end of both the pattern and the string. If so,
︙			︙
2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274	} } else if (uniStr != uniPattern) { return 0; } uniStr++; uniPattern++; } } /* ---------------------------------------------------------------------- * TclUniCharMatch -- *	>	2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286	} } else if (uniStr != uniPattern) { return 0; } uniStr++; uniPattern++; } #endif } /* ---------------------------------------------------------------------- * TclUniCharMatch -- *
︙			︙
2294 2295 2296 2297 2298 2299 2300 ~~2301~~ 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323	int strLen, /* Length of String / const Tcl_UniChar pattern, /* Pattern, which may contain special * characters. / int ptnLen, / Length of Pattern / int nocase) / 0 for case sensitive, 1 for insensitive / { const Tcl_UniChar stringEnd, patternEnd; ~~~~Tcl_UniChar~~ p;~~ stringEnd = string + strLen; patternEnd = pattern + ptnLen; while (1) { / * See if we're at the end of both the pattern and the string. If so, * we succeeded. If we're at the end of the pattern but not at the end * of the string, we failed. / if (pattern == patternEnd) { return (string == stringEnd); } p = pattern; if ((string == stringEnd) && (p != '')) { return 0; } / * Check for a "" as the next pattern character. It matches any substring. We handle this by skipping all the characters up to the	\| > > > > > > > > > > > >	2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347	int strLen, /* Length of String / const Tcl_UniChar pattern, /* Pattern, which may contain special * characters. / int ptnLen, / Length of Pattern / int nocase) / 0 for case sensitive, 1 for insensitive / { const Tcl_UniChar stringEnd, patternEnd; int p; #if TCL_UTF_MAX == 3 int q; #endif stringEnd = string + strLen; patternEnd = pattern + ptnLen; while (1) { / * See if we're at the end of both the pattern and the string. If so, * we succeeded. If we're at the end of the pattern but not at the end * of the string, we failed. / if (pattern == patternEnd) { return (string == stringEnd); } p = pattern; #if TCL_UTF_MAX == 3 if ((p & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { p = (((p&0x3FF)<<10) \| (pattern[1]&0x3FF)) + 0x10000; ++pattern; } } #endif if ((string == stringEnd) && (p != '')) { return 0; } / * Check for a "" as the next pattern character. It matches any substring. We handle this by skipping all the characters up to the
︙			︙
2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 ~~2392~~ 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 ~~2448~~ 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467	while ((++pattern) == '') { /* empty body / } if (pattern == patternEnd) { return 1; } p = pattern; if (nocase) { p = Tcl_UniCharToLower(p); } while (1) { /* * Optimization for matching - cruise through the string * quickly if the next char in the pattern isn't a special * character. / if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while ((string < stringEnd) && (p != string) && (p != Tcl_UniCharToLower(string))) { string++; } } else { while ((string < stringEnd) && (p != string)) { string++; } } } if (TclUniCharMatch(string, stringEnd - string, pattern, patternEnd - pattern, nocase)) { return 1; } if (string == stringEnd) { return 0; } string++; } } /* * Check for a "?" as the next pattern character. It matches any * single character. / if (p == '?') { pattern++; string++; continue; } / * Check for a "[" as the next pattern character. It is followed by a * list of characters that are acceptable, or by a range (two * characters separated by "-"). / if (p == '[') { ~~~~Tcl_UniChar~~ ch1, startChar, endChar;~~ pattern++; ch1 = (nocase ? Tcl_UniCharToLower(string) : string); string++; while (1) { if ((pattern == ']') \|\| (pattern == patternEnd)) { return 0; } startChar = (nocase ? Tcl_UniCharToLower(pattern) : pattern); pattern++; if (pattern == '-') { pattern++; if (pattern == patternEnd) { return 0; } endChar = (nocase ? Tcl_UniCharToLower(pattern) : pattern); pattern++; if (((startChar <= ch1) && (ch1 <= endChar)) \|\| ((endChar <= ch1) && (ch1 <= startChar))) { / * Matches ranges of form [a-z] or [z-a]. / break; } } else if (startChar == ch1) { break; } } while (pattern != ']') { if (pattern == patternEnd) { pattern--; break; } pattern++; } pattern++; continue; } /* * If the next pattern character is '\', just strip off the '\' so we * do exact matching on the character that follows. / if (p == '\\') { if (++pattern == patternEnd) { return 0; } } / * There's no special character. Just make sure that the next bytes of * each string match. / if (nocase) { if (Tcl_UniCharToLower(string) != Tcl_UniCharToLower(pattern)) { return 0; } } else if (string != pattern) { return 0; } string++; pattern++; } } / * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */	> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > \| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > \| > > > > > > > > > > > > > > > > > >	2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616	while ((++pattern) == '') { /* empty body / } if (pattern == patternEnd) { return 1; } p = pattern; #if TCL_UTF_MAX == 3 if ((p & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { p = (((p&0x3FF)<<10) \| (pattern[1]&0x3FF)) + 0x10000; ++pattern; } } #endif if (nocase) { p = Tcl_UniCharToLower(p); } while (1) { /* * Optimization for matching - cruise through the string * quickly if the next char in the pattern isn't a special * character. / if ((p != '[') && (p != '?') && (p != '\\')) { #if TCL_UTF_MAX == 3 while (string < stringEnd) { q = string; if ((q & 0xFC00) == 0xD800) { if ((string + 1 < stringEnd) && ((string[1] & 0xFC00) == 0xDC00)) { q = (((q&0x3FF)<<10) \| (string[1]&0x3FF)) + 0x10000; } } if ((p == q) \|\| (nocase && (p == Tcl_UniCharToLower(q)))) { break; } if (q > 0xFFFF) { string++; } string++; } #else if (nocase) { while ((string < stringEnd) && (p != string) && (p != Tcl_UniCharToLower(string))) { string++; } } else { while ((string < stringEnd) && (p != string)) { string++; } } #endif } if (TclUniCharMatch(string, stringEnd - string, pattern, patternEnd - pattern, nocase)) { return 1; } if (string == stringEnd) { return 0; } #if TCL_UTF_MAX == 3 if ((string[0] & 0xFC00) == 0xD800) { if ((string + 1 < stringEnd) && ((string[1] & 0xFC00) == 0xDC00)) { string++; } } #endif string++; } } / * Check for a "?" as the next pattern character. It matches any * single character. / if (p == '?') { pattern++; #if TCL_UTF_MAX == 3 if ((string[0] & 0xFC00) == 0xD800) { if ((string + 1 < stringEnd) && ((string[1] & 0xFC00) == 0xDC00)) { string++; } } #endif string++; continue; } / * Check for a "[" as the next pattern character. It is followed by a * list of characters that are acceptable, or by a range (two * characters separated by "-"). / if (p == '[') { int ch1, startChar, endChar; pattern++; #if TCL_UTF_MAX == 3 ch1 = string; if ((ch1 & 0xFC00) == 0xD800) { if ((string + 1 < stringEnd) && ((string[1] & 0xFC00) == 0xDC00)) { ch1 = (((ch1&0x3FF)<<10) \| (string[1]&0x3FF)) + 0x10000; string++; } } if (nocase) { ch1 = Tcl_UniCharToLower(ch1); } #else ch1 = (nocase ? Tcl_UniCharToLower(string) : string); #endif string++; while (1) { if ((pattern == ']') \|\| (pattern == patternEnd)) { return 0; } #if TCL_UTF_MAX == 3 startChar = pattern; if ((startChar & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { startChar = (((startChar&0x3FF)<<10) \| (pattern[1]&0x3FF)) + 0x10000; pattern++; } } if (nocase) { startChar = Tcl_UniCharToLower(startChar); } #else startChar = (nocase ? Tcl_UniCharToLower(pattern) : pattern); #endif pattern++; if (pattern == '-') { pattern++; if (pattern == patternEnd) { return 0; } #if TCL_UTF_MAX == 3 endChar = pattern; if ((endChar & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { endChar = (((endChar&0x3FF)<<10) \| (pattern[1]&0x3FF)) + 0x10000; pattern++; } } if (nocase) { endChar = Tcl_UniCharToLower(endChar); } #else endChar = (nocase ? Tcl_UniCharToLower(pattern) : pattern); #endif pattern++; if (((startChar <= ch1) && (ch1 <= endChar)) \|\| ((endChar <= ch1) && (ch1 <= startChar))) { /* * Matches ranges of form [a-z] or [z-a]. / break; } } else if (startChar == ch1) { break; } } while (pattern != ']') { if (pattern == patternEnd) { pattern--; break; } pattern++; } #if TCL_UTF_MAX == 3 if ((pattern[0] & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { pattern++; } } #endif pattern++; continue; } /* * If the next pattern character is '\', just strip off the '\' so we * do exact matching on the character that follows. / if (p == '\\') { if (++pattern == patternEnd) { return 0; } } / * There's no special character. Just make sure that the next bytes of * each string match. / #if TCL_UTF_MAX == 3 p = pattern; if ((p & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { p = (((p&0x3FF)<<10) \| (pattern[1]&0x3FF)) + 0x10000; pattern++; } } q = string; if ((q & 0xFC00) == 0xD800) { if ((string + 1 < stringEnd) && ((string[1] & 0xFC00) == 0xDC00)) { q = (((q&0x3FF)<<10) \| (string[1]&0x3FF)) + 0x10000; string++; } } if (nocase) { if (Tcl_UniCharToLower(q) != Tcl_UniCharToLower(p)) { return 0; } } else if (q != p) { return 0; } #else if (nocase) { if (Tcl_UniCharToLower(string) != Tcl_UniCharToLower(pattern)) { return 0; } } else if (string != pattern) { return 0; } #endif string++; pattern++; } } / * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */

︙			︙
100 101 102 103 104 105 106 107 108 109 110 111 112 113	static Tcl_ThreadDataKey precisionKey; /* * Prototypes for functions defined later in this file. / static void ClearHash(Tcl_HashTable tablePtr); static void FreeProcessGlobalValue(ClientData clientData); static void FreeThreadHash(ClientData clientData); static int GetEndOffsetFromObj(Tcl_Obj objPtr, int endValue, int indexPtr); static Tcl_HashTable * GetThreadHash(Tcl_ThreadDataKey keyPtr); static int SetEndOffsetFromAny(Tcl_Interp interp,	> >	100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115	static Tcl_ThreadDataKey precisionKey; /* * Prototypes for functions defined later in this file. / #undef UtfToUniChar static int UtfToUniChar(const char string, int chPtr); static void ClearHash(Tcl_HashTable tablePtr); static void FreeProcessGlobalValue(ClientData clientData); static void FreeThreadHash(ClientData clientData); static int GetEndOffsetFromObj(Tcl_Obj objPtr, int endValue, int indexPtr); static Tcl_HashTable * GetThreadHash(Tcl_ThreadDataKey keyPtr); static int SetEndOffsetFromAny(Tcl_Interp interp,
︙			︙
359 360 361 362 363 364 365 366 367 368 369 370 371 372	* balance for the list as a whole, while the current implementation achieves * this by establishing brace balance for every element. * * Finally, a reminder that the rules for parsing and formatting lists are * closely tied together with the rules for parsing and evaluating scripts, * and will need to evolve in sync. / / ---------------------------------------------------------------------- * TclMaxListLength -- * * Given 'bytes' pointing to 'numBytes' bytes, scan through them and	> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > >	361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417	* balance for the list as a whole, while the current implementation achieves * this by establishing brace balance for every element. * * Finally, a reminder that the rules for parsing and formatting lists are * closely tied together with the rules for parsing and evaluating scripts, * and will need to evolve in sync. / / ---------------------------------------------------------------------- * UtfToUniChar -- * * Wrapper to Tcl_UtfToUniChar() capable of dealing with * surrogate pairs when compiled with TCL_UTF_MAX == 3. * * Results: * chPtr is filled with the full unicode character, and the return value is the number of bytes from the UTF-8 string that * were consumed. * * Side effects: * None. * ---------------------------------------------------------------------- / static int UtfToUniChar( const char src, int chPtr) { Tcl_UniChar ch; int uch, len; len = TclUtfToUniChar(src, &ch); uch = ch; #if TCL_UTF_MAX == 3 if ((ch & 0xFC00) == 0xD800) { int len2 = TclUtfToUniChar(src + len, &ch); if (len2 && ((ch & 0xFC00) == 0xDC00)) { uch = ((uch & 0x3FF) << 10) + 0x10000 + (ch & 0x3FF); len += len2; } } #endif chPtr = uch; return len; } / ---------------------------------------------------------------------- * TclMaxListLength -- * * Given 'bytes' pointing to 'numBytes' bytes, scan through them and
︙			︙
1670 1671 1672 1673 1674 1675 1676 ~~1677~~ 1678 1679 1680 ~~1681~~ 1682 1683 1684 1685 1686 1687 1688	Tcl_Backslash( const char src, / Points to the backslash character of a * backslash sequence. / int readPtr) /* Fill in with number of characters read from * src, unless NULL. / { char buf[TCL_UTF_MAX2]; ~~~~Tcl_UniChar~~ ch = 0;~~ buf[0] = '\0'; Tcl_UtfBackslash(src, readPtr, buf); ~~~~Tcl~~UtfToUniChar(buf, &ch);~~ return (char) ch; } /* ---------------------------------------------------------------------- * TclTrimRight --	\| \|	1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733	Tcl_Backslash( const char src, / Points to the backslash character of a * backslash sequence. / int readPtr) /* Fill in with number of characters read from * src, unless NULL. / { char buf[TCL_UTF_MAX2]; int ch = 0; buf[0] = '\0'; Tcl_UtfBackslash(src, readPtr, buf); UtfToUniChar(buf, &ch); return (char) ch; } /* ---------------------------------------------------------------------- * TclTrimRight --
︙			︙
1752 1753 1754 1755 1756 1757 1758 ~~1759 1760~~ 1761 1762 ~~1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774~~ 1775 1776 1777 1778 1779 1780 1781	* Make trim string into unicode array. / slowPath: Tcl_DStringInit(&ds); q = trim; do { ~~~~Tcl_UniChar ch2 = 0;~~ int uch, qInc = ~~Tcl~~UtfToUniChar(q, &~~ch2~~);~~ q += qInc; ~~uch = ch2;~~ ~~#if TCL_UTF_MAX == 3~~ ~~if (((ch2 & 0xFC00) == 0xD800) &&~~ ~~Tcl_UtfCharComplete(q, numTrim - (q - trim))) {~~ ~~ch2 = 0;~~ ~~qInc = TclUtfToUniChar(q, &ch2);~~ ~~if ((ch2 & 0xFC00) == 0xDC00) {~~ ~~q += qInc;~~ ~~uch = (((uch&0x3FF)<<10) \| (ch2&0x3FF)) + 0x10000;~~ } } ~~#endif~~ Tcl_DStringAppend(&ds, (char ) &uch, sizeof(int)); } while (q < trim + numTrim); numTrim = Tcl_DStringLength(&ds) / sizeof(int); /* * Outer loop: iterate over string to be trimmed. */	< \| < < < < < < < < < < < <	1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813	* Make trim string into unicode array. / slowPath: Tcl_DStringInit(&ds); q = trim; do { int uch = 0, qInc = UtfToUniChar(q, &uch); q += qInc; Tcl_DStringAppend(&ds, (char ) &uch, sizeof(int)); } while (q < trim + numTrim); numTrim = Tcl_DStringLength(&ds) / sizeof(int); /* * Outer loop: iterate over string to be trimmed. */
︙			︙
1859 1860 1861 1862 1863 1864 1865 ~~1866~~ 1867 1868 1869 1870 1871 1872 1873	* rely on (bytes[numBytes] == '\0'). / const char trim, /* String of trim characters... / int numTrim) / ...and its length in bytes / / Calls to TclUtfToUniChar() in this routine * rely on (trim[numTrim] == '\0'). / { const char p = bytes, q; ~~Tcl_UniChar ch1 = 0;~~ int i; Tcl_DString ds; / Empty strings -> nothing to do */ if ((numBytes == 0) \|\| (numTrim == 0)) { return 0; }	<	1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904	* rely on (bytes[numBytes] == '\0'). / const char trim, /* String of trim characters... / int numTrim) / ...and its length in bytes / / Calls to TclUtfToUniChar() in this routine * rely on (trim[numTrim] == '\0'). / { const char p = bytes, q; int i; Tcl_DString ds; / Empty strings -> nothing to do */ if ((numBytes == 0) \|\| (numTrim == 0)) { return 0; }
︙			︙
1905 1906 1907 1908 1909 1910 1911 ~~1912 1913~~ 1914 1915 ~~1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927~~ 1928 1929 1930 1931 1932 1933 1934 1935 1936 ~~1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952~~ 1953 1954 1955 1956 1957 1958 1959	* Make trim string into unicode array. / slowPath: Tcl_DStringInit(&ds); q = trim; do { ~~~~Tcl_UniChar ch2 = 0;~~ int uch, qInc = ~~Tcl~~UtfToUniChar(q, &~~ch2~~);~~ q += qInc; ~~uch = ch2;~~ ~~#if TCL_UTF_MAX == 3~~ ~~if (((ch2 & 0xFC00) == 0xD800) &&~~ ~~Tcl_UtfCharComplete(q, numTrim - (q - trim))) {~~ ~~ch2 = 0;~~ ~~qInc = TclUtfToUniChar(q, &ch2);~~ ~~if ((ch2 & 0xFC00) == 0xDC00) {~~ ~~q += qInc;~~ ~~uch = (((uch&0x3FF)<<10) \| (ch2&0x3FF)) + 0x10000;~~ } } ~~#endif~~ Tcl_DStringAppend(&ds, (char ) &uch, sizeof(int)); } while (q < trim + numTrim); numTrim = Tcl_DStringLength(&ds) / sizeof(int); /* * Outer loop: iterate over string to be trimmed. / do { int uch, pInc = ~~Tcl~~UtfToUniChar(p, &~~ch1~~); ~~uch = ch1;~~ ~~#if TCL_UTF_MAX == 3~~ ~~if (((ch1 & 0xFC00) == 0xD800) &&~~ ~~Tcl_UtfCharComplete(p, numBytes - (p - bytes))) {~~ ~~int ppInc;~~ ~~ch1 = 0;~~ ~~ppInc = TclUtfToUniChar(p + pInc, &ch1);~~ ~~if ((ch1 & 0xFC00) == 0xDC00) {~~ ~~pInc += ppInc;~~ ~~uch = (((uch&0x3FF)<<10) \| (ch1&0x3FF)) + 0x10000;~~ } } ~~#endif~~ / * Inner loop: scan trim string for match to current character. / for (i = 0; i < numTrim; i++) { if (uch == ((int )Tcl_DStringValue(&ds))[i]) {	< \| < < < < < < < < < < < < \| < < < < < < < < < < < < < < <	1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962	* Make trim string into unicode array. / slowPath: Tcl_DStringInit(&ds); q = trim; do { int uch = 0, qInc = UtfToUniChar(q, &uch); q += qInc; Tcl_DStringAppend(&ds, (char ) &uch, sizeof(int)); } while (q < trim + numTrim); numTrim = Tcl_DStringLength(&ds) / sizeof(int); /* * Outer loop: iterate over string to be trimmed. / do { int uch = 0, pInc = UtfToUniChar(p, &uch); / * Inner loop: scan trim string for match to current character. / for (i = 0; i < numTrim; i++) { if (uch == ((int )Tcl_DStringValue(&ds))[i]) {
︙			︙
2015 2016 2017 2018 2019 2020 2021 ~~2022 2023~~ 2024 2025 ~~2026 2027 2028 2029 2030 2031 2032 2033 2034 2035~~ 2036 2037 2038 2039 2040 2041 2042	/* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes / trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim); numBytes -= trimLeft; / If we did not trim the whole string, it starts with a character * that we will not trim. Skip over it. / if (numBytes > 0) { ~~~~Tcl_UniChar ch = 0;~~ int len;~~ const char first = bytes + trimLeft; len = ~~Tcl~~UtfToUniChar(first, &ch); ~~#if TCL_UTF_MAX == 3~~ ~~if ((ch & 0xFC00) == 0xD800) {~~ ~~int len2 = TclUtfToUniChar(first + len, &ch);~~ ~~if ((ch & 0xFC00) == 0xDC00) {~~ ~~len += len2;~~ } } ~~#endif~~ bytes += len; numBytes -= (bytes - first); if (numBytes > 0) { /* When bytes is NUL-terminated, returns * 0 <= trimRight <= numBytes */ trimRight = TclTrimRight(bytes, numBytes, trim, numTrim);	< \| \| < < < < < < < < <	2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035	/* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes / trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim); numBytes -= trimLeft; / If we did not trim the whole string, it starts with a character * that we will not trim. Skip over it. / if (numBytes > 0) { int len, uch = 0; const char first = bytes + trimLeft; len = UtfToUniChar(first, &uch); bytes += len; numBytes -= (bytes - first); if (numBytes > 0) { /* When bytes is NUL-terminated, returns * 0 <= trimRight <= numBytes */ trimRight = TclTrimRight(bytes, numBytes, trim, numTrim);
︙			︙
2329 2330 2331 2332 2333 2334 2335 ~~2336 2337~~ 2338 2339 2340 2341 2342 2343 2344	int Tcl_StringCaseMatch( const char str, / String. / const char pattern, /* Pattern, which may contain special * characters. / int nocase) / 0 for case sensitive, 1 for insensitive / { ~~int p, charLen; ~~Tcl_UniChar ch1 = 0, ch2 = 0;~~~~ while (1) { p = pattern; /* * See if we're at the end of both the pattern and the string. If so, * we succeeded. If we're at the end of the pattern but not at the end	\| <	2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336	int Tcl_StringCaseMatch( const char str, / String. / const char pattern, /* Pattern, which may contain special * characters. / int nocase) / 0 for case sensitive, 1 for insensitive / { int p, charLen, ch1 = 0, ch2 = 0; while (1) { p = pattern; /* * See if we're at the end of both the pattern and the string. If so, * we succeeded. If we're at the end of the pattern but not at the end
︙			︙
2371 2372 2373 2374 2375 2376 2377 ~~2378~~ 2379 2380 ~~2381~~ 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 ~~2397~~ 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 ~~2411~~ 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 ~~2425~~ 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 ~~2436~~ 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 ~~2447~~ 2448 2449 2450 ~~2451~~ 2452 2453 2454 ~~2455~~ 2456 2457 2458 2459 2460 2461 2462 2463 2464 ~~2465~~ 2466 2467 2468 ~~2469~~ 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 ~~2480~~ 2481 2482 2483 ~~2484~~ 2485 2486 2487 2488 2489 2490 2491	} /* * This is a special case optimization for single-byte utf. / if (UCHAR(pattern) < 0x80) { ~~ch2 = (~~Tcl_UniChar~~)~~ (nocase ? tolower(UCHAR(pattern)) : UCHAR(pattern)); } else { ~~~~Tcl_~~UtfToUniChar(pattern, &ch2);~~ if (nocase) { ch2 = Tcl_UniCharToLower(ch2); } } while (1) { /* * Optimization for matching - cruise through the string * quickly if the next char in the pattern isn't a special * character / if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while (str) { ~~charLen = ~~Tcl~~UtfToUniChar(str, &ch1);~~ if (ch2==ch1 \|\| ch2==Tcl_UniCharToLower(ch1)) { break; } str += charLen; } } else { /* * There's no point in trying to make this code * shorter, as the number of bytes you want to compare * each time is non-constant. / while (str) { ~~charLen = ~~Tcl~~UtfToUniChar(str, &ch1);~~ if (ch2 == ch1) { break; } str += charLen; } } } if (Tcl_StringCaseMatch(str, pattern, nocase)) { return 1; } if (str == '\0') { return 0; } ~~str += ~~Tcl~~UtfToUniChar(str, &ch1);~~ } } / * Check for a "?" as the next pattern character. It matches any * single character. / if (p == '?') { pattern++; ~~str += ~~Tcl~~UtfToUniChar(str, &ch1);~~ continue; } / * Check for a "[" as the next pattern character. It is followed by a * list of characters that are acceptable, or by a range (two * characters separated by "-"). / if (p == '[') { ~~~~Tcl_UniChar~~ startChar = 0, endChar = 0;~~ pattern++; if (UCHAR(str) < 0x80) { ~~ch1 = (~~Tcl_UniChar~~)~~ (nocase ? tolower(UCHAR(str)) : UCHAR(str)); str++; } else { ~~str += ~~Tcl_~~UtfToUniChar(str, &ch1);~~ if (nocase) { ch1 = Tcl_UniCharToLower(ch1); } } while (1) { if ((pattern == ']') \|\| (pattern == '\0')) { return 0; } if (UCHAR(pattern) < 0x80) { ~~startChar = (~~Tcl_UniChar~~) (nocase~~ ? tolower(UCHAR(pattern)) : UCHAR(pattern)); pattern++; } else { ~~pattern += ~~Tcl_~~UtfToUniChar(pattern, &startChar);~~ if (nocase) { startChar = Tcl_UniCharToLower(startChar); } } if (pattern == '-') { pattern++; if (pattern == '\0') { return 0; } if (UCHAR(pattern) < 0x80) { ~~endChar = (~~Tcl_UniChar~~) (nocase~~ ? tolower(UCHAR(pattern)) : UCHAR(pattern)); pattern++; } else { ~~pattern += ~~Tcl_~~UtfToUniChar(pattern, &endChar);~~ if (nocase) { endChar = Tcl_UniCharToLower(endChar); } } if (((startChar <= ch1) && (ch1 <= endChar)) \|\| ((endChar <= ch1) && (ch1 <= startChar))) { /*	\| \| \| \| \| \| \| \| \| \| \| \| \|	2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483	} /* * This is a special case optimization for single-byte utf. / if (UCHAR(pattern) < 0x80) { ch2 = (int) (nocase ? tolower(UCHAR(pattern)) : UCHAR(pattern)); } else { UtfToUniChar(pattern, &ch2); if (nocase) { ch2 = Tcl_UniCharToLower(ch2); } } while (1) { /* * Optimization for matching - cruise through the string * quickly if the next char in the pattern isn't a special * character / if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while (str) { charLen = UtfToUniChar(str, &ch1); if (ch2==ch1 \|\| ch2==Tcl_UniCharToLower(ch1)) { break; } str += charLen; } } else { /* * There's no point in trying to make this code * shorter, as the number of bytes you want to compare * each time is non-constant. / while (str) { charLen = UtfToUniChar(str, &ch1); if (ch2 == ch1) { break; } str += charLen; } } } if (Tcl_StringCaseMatch(str, pattern, nocase)) { return 1; } if (str == '\0') { return 0; } str += UtfToUniChar(str, &ch1); } } / * Check for a "?" as the next pattern character. It matches any * single character. / if (p == '?') { pattern++; str += UtfToUniChar(str, &ch1); continue; } / * Check for a "[" as the next pattern character. It is followed by a * list of characters that are acceptable, or by a range (two * characters separated by "-"). / if (p == '[') { int startChar = 0, endChar = 0; pattern++; if (UCHAR(str) < 0x80) { ch1 = (int) (nocase ? tolower(UCHAR(str)) : UCHAR(str)); str++; } else { str += UtfToUniChar(str, &ch1); if (nocase) { ch1 = Tcl_UniCharToLower(ch1); } } while (1) { if ((pattern == ']') \|\| (pattern == '\0')) { return 0; } if (UCHAR(pattern) < 0x80) { startChar = (int) (nocase ? tolower(UCHAR(pattern)) : UCHAR(pattern)); pattern++; } else { pattern += UtfToUniChar(pattern, &startChar); if (nocase) { startChar = Tcl_UniCharToLower(startChar); } } if (pattern == '-') { pattern++; if (pattern == '\0') { return 0; } if (UCHAR(pattern) < 0x80) { endChar = (int) (nocase ? tolower(UCHAR(pattern)) : UCHAR(pattern)); pattern++; } else { pattern += UtfToUniChar(pattern, &endChar); if (nocase) { endChar = Tcl_UniCharToLower(endChar); } } if (((startChar <= ch1) && (ch1 <= endChar)) \|\| ((endChar <= ch1) && (ch1 <= startChar))) { /*
︙			︙
2525 2526 2527 2528 2529 2530 2531 ~~2532 2533~~ 2534 2535 2536 2537 2538 2539 2540	} /* * There's no special character. Just make sure that the next bytes of * each string match. */ ~~str += ~~Tcl~~UtfToUniChar(str, &ch1); pattern += ~~Tcl~~UtfToUniChar(pattern, &ch2);~~ if (nocase) { if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) { return 0; } } else if (ch1 != ch2) { return 0; }	\| \|	2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532	} /* * There's no special character. Just make sure that the next bytes of * each string match. */ str += UtfToUniChar(str, &ch1); pattern += UtfToUniChar(pattern, &ch2); if (nocase) { if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) { return 0; } } else if (ch1 != ch2) { return 0; }
︙			︙