Check-in [34c5b6ec22]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:merge with trunk
Timelines: family | ancestors | descendants | both | wtf-8-experiment
Files: files | file ages | folders
SHA1: 34c5b6ec22447702cb9e7eb2232fcfcbc903e517
User & Date: chw 2020-05-20 12:32:15.129
Context
2020-05-21
06:01
merge with trunk check-in: 1663fd9a31 user: chw tags: wtf-8-experiment
2020-05-20
12:32
merge with trunk check-in: 34c5b6ec22 user: chw tags: wtf-8-experiment
11:02
backport most unicode fixes from wtf-8-experiment branch check-in: 088e611d9f user: chw tags: trunk
08:28
more string trim tests check-in: 4912c0913b user: chw tags: wtf-8-experiment
Changes
Unified Diff Ignore Whitespace Patch
Changes to jni/tcl/generic/tclScan.c.
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
 */

static const char *
BuildCharSet(
    CharSet *cset,
    const char *format)		/* Points to first char of set. */
{
    int ch = 0, start;
    int offset, nranges;
    const char *end;

    memset(cset, 0, sizeof(CharSet));

    offset = UtfToUniChar(format, &ch);
    if (ch == '^') {







|







114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
 */

static const char *
BuildCharSet(
    CharSet *cset,
    const char *format)		/* Points to first char of set. */
{
    int ch, start;
    int offset, nranges;
    const char *end;

    memset(cset, 0, sizeof(CharSet));

    offset = UtfToUniChar(format, &ch);
    if (ch == '^') {
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
    Tcl_Interp *interp,		/* Current interpreter. */
    const char *format,		/* The format string. */
    int numVars,		/* The number of variables passed to the scan
				 * command. */
    int *totalSubs)		/* The number of variables that will be
				 * required. */
{
    int gotXpg, gotSequential, value, i, flags, ch = 0;
    char *end;
    int objIndex, xpgSize, nspace = numVars;
    int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
    char buf[TCL_UTF_MAX+1];
    Tcl_Obj *errorMsg;		/* Place to build an error messages. Note that
				 * these are messy operations because we do
				 * not want to use the formatting engine;







|







295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
    Tcl_Interp *interp,		/* Current interpreter. */
    const char *format,		/* The format string. */
    int numVars,		/* The number of variables passed to the scan
				 * command. */
    int *totalSubs)		/* The number of variables that will be
				 * required. */
{
    int gotXpg, gotSequential, value, i, flags, ch;
    char *end;
    int objIndex, xpgSize, nspace = numVars;
    int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
    char buf[TCL_UTF_MAX+1];
    Tcl_Obj *errorMsg;		/* Place to build an error messages. Note that
				 * these are messy operations because we do
				 * not want to use the formatting engine;
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
    int numVars, nconversions, totalVars = -1;
    int objIndex, offset, i, result, code;
    long value;
    const char *string, *end, *baseString;
    char op = 0;
    int width, underflow = 0;
    Tcl_WideInt wideValue;
    int ch = 0, sch = 0;
    Tcl_Obj **objs = NULL, *objPtr = NULL;
    int flags;

    if (objc < 3) {
	Tcl_WrongNumArgs(interp, 1, objv,
		"string format ?varName ...?");
	return TCL_ERROR;







|







621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
    int numVars, nconversions, totalVars = -1;
    int objIndex, offset, i, result, code;
    long value;
    const char *string, *end, *baseString;
    char op = 0;
    int width, underflow = 0;
    Tcl_WideInt wideValue;
    int ch, sch;
    Tcl_Obj **objs = NULL, *objPtr = NULL;
    int flags;

    if (objc < 3) {
	Tcl_WrongNumArgs(interp, 1, objv,
		"string format ?varName ...?");
	return TCL_ERROR;
Changes to jni/tcl/generic/tclUtf.c.
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363

int
Tcl_UtfNcmp(
    const char *cs,		/* UTF string to compare to ct. */
    const char *ct,		/* UTF string cs is compared to. */
    unsigned long numChars)	/* Number of UTF chars to compare. */
{
    Tcl_UniChar ch1 = 0, ch2 = 0;
    int uch1, uch2;
#if TCL_UTF_MAX == 3
    int num1 = numChars, num2 = numChars;
#endif

    /*
     * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the







|







1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363

int
Tcl_UtfNcmp(
    const char *cs,		/* UTF string to compare to ct. */
    const char *ct,		/* UTF string cs is compared to. */
    unsigned long numChars)	/* Number of UTF chars to compare. */
{
    Tcl_UniChar ch1, ch2;
    int uch1, uch2;
#if TCL_UTF_MAX == 3
    int num1 = numChars, num2 = numChars;
#endif

    /*
     * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the
1404
1405
1406
1407
1408
1409
1410

1411
1412
1413
1414
1415
1416
1417
	    }
	}
#endif

	if (uch1 != uch2) {
	    return (uch1 - uch2);
	}

    }
    return 0;
}

/*
 *----------------------------------------------------------------------
 *







>







1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
	    }
	}
#endif

	if (uch1 != uch2) {
	    return (uch1 - uch2);
	}

    }
    return 0;
}

/*
 *----------------------------------------------------------------------
 *
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444






1445
1446
1447
1448
1449
1450
1451

int
Tcl_UtfNcasecmp(
    const char *cs,		/* UTF string to compare to ct. */
    const char *ct,		/* UTF string cs is compared to. */
    unsigned long numChars)	/* Number of UTF chars to compare. */
{
    Tcl_UniChar ch1 = 0, ch2 = 0;
    int uch1, uch2;
#if TCL_UTF_MAX == 3
    int num1 = numChars, num2 = numChars;
#endif







    while (
#if TCL_UTF_MAX == 3
	(num1-- > 0) && (num2-- > 0)
#else
	numChars-- > 0
#endif
    ) {







|





>
>
>
>
>
>







1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458

int
Tcl_UtfNcasecmp(
    const char *cs,		/* UTF string to compare to ct. */
    const char *ct,		/* UTF string cs is compared to. */
    unsigned long numChars)	/* Number of UTF chars to compare. */
{
    Tcl_UniChar ch1, ch2;
    int uch1, uch2;
#if TCL_UTF_MAX == 3
    int num1 = numChars, num2 = numChars;
#endif

    /*
     * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the
     * pair of bytes 0xC0,0x80) is larger than byte representation of \u0001
     * (the byte 0x01.)
     */

    while (
#if TCL_UTF_MAX == 3
	(num1-- > 0) && (num2-- > 0)
#else
	numChars-- > 0
#endif
    ) {
1512
1513
1514
1515
1516
1517
1518
1519
1520

1521
1522
1523
1524
1525
1526
1527
 */

int
TclUtfCasecmp(
    const char *cs,		/* UTF string to compare to ct. */
    const char *ct)		/* UTF string cs is compared to. */
{
    Tcl_UniChar ch1 = 0, ch2 = 0;
    int uch1, uch2;


    while (*cs && *ct) {
	cs += TclUtfToUniChar(cs, &ch1);
	ct += TclUtfToUniChar(ct, &ch2);

	uch1 = ch1;
	uch2 = ch2;







|

>







1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
 */

int
TclUtfCasecmp(
    const char *cs,		/* UTF string to compare to ct. */
    const char *ct)		/* UTF string cs is compared to. */
{
    Tcl_UniChar ch1, ch2;
    int uch1, uch2;


    while (*cs && *ct) {
	cs += TclUtfToUniChar(cs, &ch1);
	ct += TclUtfToUniChar(ct, &ch2);

	uch1 = ch1;
	uch2 = ch2;
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
	    if (uch1 != uch2) {
		return uch1 - uch2;
	    }
	}
    }
    return UCHAR(*cs) - UCHAR(*ct);
}


/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToUpper --
 *
 *	Compute the uppercase equivalent of the given Unicode character.







<







1564
1565
1566
1567
1568
1569
1570

1571
1572
1573
1574
1575
1576
1577
	    if (uch1 != uch2) {
		return uch1 - uch2;
	    }
	}
    }
    return UCHAR(*cs) - UCHAR(*ct);
}


/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToUpper --
 *
 *	Compute the uppercase equivalent of the given Unicode character.
Changes to jni/tcl/generic/tclUtil.c.
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
Tcl_Backslash(
    const char *src,		/* Points to the backslash character of a
				 * backslash sequence. */
    int *readPtr)		/* Fill in with number of characters read from
				 * src, unless NULL. */
{
    char buf[TCL_UTF_MAX*2];
    int ch = 0;

    buf[0] = '\0';
    Tcl_UtfBackslash(src, readPtr, buf);
    UtfToUniChar(buf, &ch);
    return (char) ch;
}








|







1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
Tcl_Backslash(
    const char *src,		/* Points to the backslash character of a
				 * backslash sequence. */
    int *readPtr)		/* Fill in with number of characters read from
				 * src, unless NULL. */
{
    char buf[TCL_UTF_MAX*2];
    int ch;

    buf[0] = '\0';
    Tcl_UtfBackslash(src, readPtr, buf);
    UtfToUniChar(buf, &ch);
    return (char) ch;
}

1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
 *----------------------------------------------------------------------
 */

int
TclTrimRight(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
			/* Calls to TclUtfToUniChar() in this routine
			 * rely on (bytes[numBytes] == '\0'). */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
			/* Calls to TclUtfToUniChar() in this routine
			 * rely on (trim[numTrim] == '\0'). */
{
    const char *pp, *p = bytes + numBytes, *q;
    Tcl_UniChar ch1 = 0;
    int i;
    Tcl_DString ds;








|



|







1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
 *----------------------------------------------------------------------
 */

int
TclTrimRight(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
			/* Calls to UtfToUniChar() in this routine
			 * rely on (bytes[numBytes] == '\0'). */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
			/* Calls to UtfToUniChar() in this routine
			 * rely on (trim[numTrim] == '\0'). */
{
    const char *pp, *p = bytes + numBytes, *q;
    Tcl_UniChar ch1 = 0;
    int i;
    Tcl_DString ds;

1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
 *----------------------------------------------------------------------
 */

int
TclTrimLeft(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
			/* Calls to TclUtfToUniChar() in this routine
			 * rely on (bytes[numBytes] == '\0'). */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
			/* Calls to TclUtfToUniChar() in this routine
			 * rely on (trim[numTrim] == '\0'). */
{
    const char *p = bytes, *q;
    int i;
    Tcl_DString ds;

    /* Empty strings -> nothing to do */







|



|







1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
 *----------------------------------------------------------------------
 */

int
TclTrimLeft(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
			/* Calls to UtfToUniChar() in this routine
			 * rely on (bytes[numBytes] == '\0'). */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
			/* Calls to UtfToUniChar() in this routine
			 * rely on (trim[numTrim] == '\0'). */
{
    const char *p = bytes, *q;
    int i;
    Tcl_DString ds;

    /* Empty strings -> nothing to do */
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
	/* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes */
	trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim);
	numBytes -= trimLeft;

	/* If we did not trim the whole string, it starts with a character
	 * that we will not trim. Skip over it. */
	if (numBytes > 0) {
	    int len, uch = 0;
	    const char *first = bytes + trimLeft;

	    len = UtfToUniChar(first, &uch);
	    bytes += len;
	    numBytes -= (bytes - first);

	    if (numBytes > 0) {







|







2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
	/* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes */
	trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim);
	numBytes -= trimLeft;

	/* If we did not trim the whole string, it starts with a character
	 * that we will not trim. Skip over it. */
	if (numBytes > 0) {
	    int len, uch;
	    const char *first = bytes + trimLeft;

	    len = UtfToUniChar(first, &uch);
	    bytes += len;
	    numBytes -= (bytes - first);

	    if (numBytes > 0) {
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
int
Tcl_StringCaseMatch(
    const char *str,		/* String. */
    const char *pattern,	/* Pattern, which may contain special
				 * characters. */
    int nocase)			/* 0 for case sensitive, 1 for insensitive */
{
    int p, charLen, ch1 = 0, ch2 = 0;

    while (1) {
	p = *pattern;

	/*
	 * See if we're at the end of both the pattern and the string. If so,
	 * we succeeded. If we're at the end of the pattern but not at the end







|







2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
int
Tcl_StringCaseMatch(
    const char *str,		/* String. */
    const char *pattern,	/* Pattern, which may contain special
				 * characters. */
    int nocase)			/* 0 for case sensitive, 1 for insensitive */
{
    int p, charLen, ch1, ch2;

    while (1) {
	p = *pattern;

	/*
	 * See if we're at the end of both the pattern and the string. If so,
	 * we succeeded. If we're at the end of the pattern but not at the end