AndroWish: Check-in [31b847f06b]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

Comment:	fix compare for beyond BMP chars
Timelines:	family \| ancestors \| descendants \| both \| wtf-8-experiment
Files:	files \| file ages \| folders
SHA1:	31b847f06b076a000e24ba3e955e39159b382f42
User & Date:	chw 2020-05-18 09:03:47.623

Context

2020-05-18
16:44		fix string map for beyond BMP chars check-in: 92fafa5ea8 user: chw tags: wtf-8-experiment
09:03		fix compare for beyond BMP chars check-in: 31b847f06b user: chw tags: wtf-8-experiment
06:27		fix sort/compare for beyond BMP chars (unfinished, WIP) check-in: bd15431fd8 user: chw tags: wtf-8-experiment

Changes

Changes to jni/tcl/generic/tclCmdMZ.c.

Changes to jni/tcl/generic/tclInt.h.

Changes to jni/tcl/tests/stringComp.test.

︙			︙
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49	static Tcl_NRPostProc TryPostBody; static Tcl_NRPostProc TryPostFinal; static Tcl_NRPostProc TryPostHandler; static int UniCharIsAscii(int character); static int UniCharIsHexDigit(int character); #if TCL_UTF_MAX == 3 ~~static int NumCodePointsUtf(const char src, int length);~~ static int NumCodePointsUnicode(const Tcl_UniChar src, ~~int length);~~ static int UniCharNcmp(const Tcl_UniChar ucs, ~~const Tcl_UniChar uct, unsi~~gned long~~ numCp);~~ static int UtfNcasecmp(const char cs, const char ct, ~~unsi~~gned long~~ numCp);~~ static int UtfNcmp(const char cs, const char ct, ~~unsi~~gned long~~ numCp);~~ #endif /* * Default set of characters to trim in [string trim] and friends. This is a * UTF-8 literal string containing all Unicode space characters [TIP #413] */	> > \| > \| \| > > \| \|	27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54	static Tcl_NRPostProc TryPostBody; static Tcl_NRPostProc TryPostFinal; static Tcl_NRPostProc TryPostHandler; static int UniCharIsAscii(int character); static int UniCharIsHexDigit(int character); #if TCL_UTF_MAX == 3 static int MemCmp(const void s1, const void s2, size_t n, int flags); static int NumCodePointsUtf(const char src, int length, int flagPtr); static int NumCodePointsUnicode(const Tcl_UniChar src, int length, int flagPtr); static int UniCharNcmp(const Tcl_UniChar ucs, const Tcl_UniChar uct, size_t numCp, int flags); static int UniCharNcasecmp(const Tcl_UniChar ucs, const Tcl_UniChar uct, size_t numCp, int flags); static int UtfNcasecmp(const char cs, const char ct, size_t numCp, int flags); static int UtfNcmp(const char cs, const char ct, size_t numCp, int flags); #endif /* * Default set of characters to trim in [string trim] and friends. This is a * UTF-8 literal string containing all Unicode space characters [TIP #413] */
︙			︙
75 76 77 78 79 80 81 82 83 84 ~~85 86 87~~ 88 89 90 91 92 93 94 95 96 97 98 99 100 ~~101~~ 102 103 104 ~~105~~ 106 107 108 109 110 111 112 113 114	"\xef\xbb\xbf" /* zero width no-break space (U+feff) / ; #if TCL_UTF_MAX == 3 / --------------------------------------------------------------------------- * NumCodePointsUtf -- * * Like Tcl_NumUtfChars() but returns the number of code points. * Problem: single high surrogates (0xD800..0xDBFF) at the very * end of the string are not counted. If they were, the functions * UtfNcmp() and UtfNcasecmp() would read beyond the buffer. * * Results: * As above. * * Side effects: * None. * --------------------------------------------------------------------------- / static int NumCodePointsUtf( const char src, / The UTF-8 string to measure. / ~~int length) / The length of the string in bytes. /~~ { Tcl_UniChar ch = 0; int i = 0; const char endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { src += TclUtfToUniChar(src, &ch); if ((ch & 0xFC00) == 0xD800) { if ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { int len = TclUtfToUniChar(src, &ch); if ((ch & 0xFC00) == 0xDC00) {	> > > > > > > > > > > > > > > > > > < < < \| > < >	80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135	"\xef\xbb\xbf" /* zero width no-break space (U+feff) / ; #if TCL_UTF_MAX == 3 / --------------------------------------------------------------------------- * MemCmp -- * * Private wrapper for memcmp(). See C library documentation. * --------------------------------------------------------------------------- / static int MemCmp(const void s1, const void s2, size_t n, int flags) { return memcmp(s1, s2, n); } #endif #if TCL_UTF_MAX == 3 /* --------------------------------------------------------------------------- * NumCodePointsUtf -- * * Like Tcl_NumUtfChars() but returns the number of code points. * * Results: * As above. * * Side effects: * None. * --------------------------------------------------------------------------- / static int NumCodePointsUtf( const char src, / The UTF-8 string to measure. / int length, / The length of the string in bytes. / int flagPtr) /* Location to receive end flag. / { Tcl_UniChar ch = 0; int i = 0; const char endPtr = src + length - TCL_UTF_MAX; *flagPtr = 0; while (src < endPtr) { src += TclUtfToUniChar(src, &ch); if ((ch & 0xFC00) == 0xD800) { if ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { int len = TclUtfToUniChar(src, &ch); if ((ch & 0xFC00) == 0xDC00) {
︙			︙
133 134 135 136 137 138 139 ~~140~~ 141 142 143 144 145 146 147	} } i++; } if (src < endPtr) { i += endPtr - src; } else if (i && ((ch & 0xFC00) == 0xD800)) { ~~--i;~~ } return i; } #endif #if TCL_UTF_MAX == 3 /*	< >	154 155 156 157 158 159 160 161 162 163 164 165 166 167 168	} } i++; } if (src < endPtr) { i += endPtr - src; } else if (i && ((ch & 0xFC00) == 0xD800)) { flagPtr = 1; } return i; } #endif #if TCL_UTF_MAX == 3 /
︙			︙
161 162 163 164 165 166 167 ~~168~~ 169 170 171 172 173 174 175 176 177 178 179 180 181 ~~182~~ 183 ~~184 185 186~~ 187 188 189 ~~190~~ 191 ~~192 193 194~~ 195 196 197 198 199 200 201	---------------------------------------------------------------------- / static int UtfNcmp( const char cs, / UTF string to compare to ct. / const char ct, /* UTF string cs is compared to. / ~~unsi~~gned long~~ numCp) / Number of code points to compare. */~~ { Tcl_UniChar ch1 = 0, ch2 = 0; int uch1, uch2; while (numCp-- > 0) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); uch1 = ch1; uch2 = ch2; if ((ch1 & 0xFC00) == 0xD800) { ~~int len = TclUtfToUniChar(cs, &ch1);~~ ~~if ((ch1 & 0xFC00) == 0xDC00) { uch1 = (((uch1&0x3FF)<<10) \| (ch1&0x3FF)) + 0x10000; cs += len;~~ } } if ((ch2 & 0xFC00) == 0xD800) { ~~int len = TclUtfToUniChar(ct, &ch2);~~ ~~if ((ch2 & 0xFC00) == 0xDC00) { uch2 = (((uch2&0x3FF)<<10) \| (ch2&0x3FF)) + 0x10000; ct += len;~~ } } if (uch1 != uch2) { return (uch1 - uch2); } }	\| > > > > \| \| \| \| > > > > \| \| \| \| >	182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231	---------------------------------------------------------------------- / static int UtfNcmp( const char cs, / UTF string to compare to ct. / const char ct, /* UTF string cs is compared to. / size_t numCp, / Number of code points to compare. / int flags) / Flags describing string ends. / { Tcl_UniChar ch1 = 0, ch2 = 0; int uch1, uch2; while (numCp-- > 0) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); uch1 = ch1; uch2 = ch2; if ((ch1 & 0xFC00) == 0xD800) { if ((flags & 1) && (numCp == 0)) { / String ends with high surrogate. / } else { int len = TclUtfToUniChar(cs, &ch1); if ((ch1 & 0xFC00) == 0xDC00) { uch1 = (((uch1&0x3FF)<<10) \| (ch1&0x3FF)) + 0x10000; cs += len; } } } if ((ch2 & 0xFC00) == 0xD800) { if ((flags & 2) && (numCp == 0)) { / String ends with high surrogate. */ } else { int len = TclUtfToUniChar(ct, &ch2); if ((ch2 & 0xFC00) == 0xDC00) { uch2 = (((uch2&0x3FF)<<10) \| (ch2&0x3FF)) + 0x10000; ct += len; } } } if (uch1 != uch2) { return (uch1 - uch2); } }
︙			︙
221 222 223 224 225 226 227 ~~228~~ 229 230 231 232 233 234 235 236 237 238 239 240 241 ~~242~~ 243 ~~244 245 246~~ 247 248 249 ~~250~~ 251 ~~252 253 254~~ 255 256 257 258 259 260 261	---------------------------------------------------------------------- / static int UtfNcasecmp( const char cs, / UTF string to compare to ct. / const char ct, /* UTF string cs is compared to. / ~~unsi~~gned long~~ numCp) / Number of code points to compare. */~~ { Tcl_UniChar ch1 = 0, ch2 = 0; int uch1, uch2; while (numCp-- > 0) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); uch1 = ch1; uch2 = ch2; if ((ch1 & 0xFC00) == 0xD800) { ~~int len = TclUtfToUniChar(cs, &ch1);~~ ~~if ((ch1 & 0xFC00) == 0xDC00) { uch1 = (((uch1&0x3FF)<<10) \| (ch1&0x3FF)) + 0x10000; cs += len;~~ } } if ((ch2 & 0xFC00) == 0xD800) { ~~int len = TclUtfToUniChar(ct, &ch2);~~ ~~if ((ch2 & 0xFC00) == 0xDC00) { uch2 = (((uch2&0x3FF)<<10) \| (ch2&0x3FF)) + 0x10000; ct += len;~~ } } if (uch1 != uch2) { uch1 = TclUCS4ToLower(uch1); uch2 = TclUCS4ToLower(uch2); if (uch1 != uch2) {	\| > > > > \| \| \| \| > > > > \| \| \| \| >	251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300	---------------------------------------------------------------------- / static int UtfNcasecmp( const char cs, / UTF string to compare to ct. / const char ct, /* UTF string cs is compared to. / size_t numCp, / Number of code points to compare. / int flags) / Flags describing string ends. / { Tcl_UniChar ch1 = 0, ch2 = 0; int uch1, uch2; while (numCp-- > 0) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); uch1 = ch1; uch2 = ch2; if ((ch1 & 0xFC00) == 0xD800) { if ((flags & 1) && (numCp == 0)) { / String ends with high surrogate. / } else { int len = TclUtfToUniChar(cs, &ch1); if ((ch1 & 0xFC00) == 0xDC00) { uch1 = (((uch1&0x3FF)<<10) \| (ch1&0x3FF)) + 0x10000; cs += len; } } } if ((ch2 & 0xFC00) == 0xD800) { if ((flags & 2) && (numCp == 0)) { / String ends with high surrogate. */ } else { int len = TclUtfToUniChar(ct, &ch2); if ((ch2 & 0xFC00) == 0xDC00) { uch2 = (((uch2&0x3FF)<<10) \| (ch2&0x3FF)) + 0x10000; ct += len; } } } if (uch1 != uch2) { uch1 = TclUCS4ToLower(uch1); uch2 = TclUCS4ToLower(uch2); if (uch1 != uch2) {
︙			︙
270 271 272 273 274 275 276 ~~277 278 279 280~~ 281 282 283 284 285 286 287 288 289 290 291 292 293 ~~294~~ 295 296 297 298 299 300 ~~301~~ 302 303 304 305 306 307 308	#if TCL_UTF_MAX == 3 /* --------------------------------------------------------------------------- * NumCodePointsUnicode -- * * Returns the number of code points of a Tcl_UniChar array. * Problem: single high surrogates (0xD800..0xDBFF) at the * very end of the array are not counted. If they were, the * functions UniCharNcmp() and UniCharNcasecmp() would read * beyond the buffer. * * Results: * As above. * * Side effects: * None. * --------------------------------------------------------------------------- / static int NumCodePointsUnicode( const Tcl_UniChar src, / The array to measure. / ~~int length) / The length of the array in elements. */~~ { int i, n = 0; for (i = 0; i < length; i++, n++) { if ((src[i] & 0xFC00) == 0xD800) { if (i + 1 >= length) { ~~n--;~~ } if ((i + 1 < length) && ((src[i+1] & 0xFC00) == 0xDC00)) { i++; } } } return n;	< < < < \| > > < >	309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345	#if TCL_UTF_MAX == 3 /* --------------------------------------------------------------------------- * NumCodePointsUnicode -- * * Returns the number of code points of a Tcl_UniChar array. * * Results: * As above. * * Side effects: * None. * --------------------------------------------------------------------------- / static int NumCodePointsUnicode( const Tcl_UniChar src, / The array to measure. / int length, / The length of the array in elements. / int flagPtr) /* Location to receive end flag. / { int i, n = 0; flagPtr = 0; for (i = 0; i < length; i++, n++) { if ((src[i] & 0xFC00) == 0xD800) { if (i + 1 >= length) { *flagPtr = 1; } if ((i + 1 < length) && ((src[i+1] & 0xFC00) == 0xDC00)) { i++; } } } return n;
︙			︙
327 328 329 330 331 332 333 ~~334~~ 335 336 337 338 339 340 341 ~~342~~ 343 344 345 346 347 ~~348~~ 349 350 351 352 353 354 355	---------------------------------------------------------------------- / static int UniCharNcmp( const Tcl_UniChar ucs, / Unicode string to compare to uct. / const Tcl_UniChar uct, /* Unicode string ucs is compared to. / ~~unsi~~gned long~~ numCp) / Number of code points to compare. /~~ { int lcs, lct; for ( ; numCp != 0; numCp--, ucs++, uct++) { lcs = ucs; lct = *uct; if ((lcs & 0xFC00) == 0xD800) { ~~if ((ucs[1] & 0xFC00) == 0xDC00) {~~ lcs = (((lcs&0x3FF)<<10) \| (ucs[1]&0x3FF)) + 0x10000; ucs++; } } if ((lct & 0xFC00) == 0xD800) { ~~if ((uct[1] & 0xFC00) == 0xDC00) {~~ lct = (((lct&0x3FF)<<10) \| (uct[1]&0x3FF)) + 0x10000; uct++; } } if (lcs != lct) { return (lcs - lct); }	\| > > > \| > > \|	364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397	---------------------------------------------------------------------- / static int UniCharNcmp( const Tcl_UniChar ucs, / Unicode string to compare to uct. / const Tcl_UniChar uct, /* Unicode string ucs is compared to. / size_t numCp, / Number of code points to compare. / int flags) / Flags describing string ends. / { int lcs, lct; for ( ; numCp != 0; numCp--, ucs++, uct++) { lcs = ucs; lct = uct; if ((lcs & 0xFC00) == 0xD800) { if ((flags & 1) && (numCp == 1)) { / String ends with high surrogate. / } else if ((ucs[1] & 0xFC00) == 0xDC00) { lcs = (((lcs&0x3FF)<<10) \| (ucs[1]&0x3FF)) + 0x10000; ucs++; } } if ((lct & 0xFC00) == 0xD800) { if ((flags & 2) && (numCp == 1)) { / String ends with high surrogate. */ } else if ((uct[1] & 0xFC00) == 0xDC00) { lct = (((lct&0x3FF)<<10) \| (uct[1]&0x3FF)) + 0x10000; uct++; } } if (lcs != lct) { return (lcs - lct); }
︙			︙
376 377 378 379 380 381 382 ~~383~~ 384 385 386 387 388 389 390 ~~391~~ 392 393 394 395 396 ~~397~~ 398 399 400 401 402 403 404	---------------------------------------------------------------------- / static int UniCharNcasecmp( const Tcl_UniChar ucs, / Unicode string to compare to uct. / const Tcl_UniChar uct, /* Unicode string ucs is compared to. / ~~unsi~~gned long~~ numCp) / Number of code points to compare. /~~ { int lcs, lct; for ( ; numCp != 0; numCp--, ucs++, uct++) { lcs = ucs; lct = *uct; if ((lcs & 0xFC00) == 0xD800) { ~~if ((ucs[1] & 0xFC00) == 0xDC00) {~~ lcs = (((lcs&0x3FF)<<10) \| (ucs[1]&0x3FF)) + 0x10000; ucs++; } } if ((lct & 0xFC00) == 0xD800) { ~~if ((uct[1] & 0xFC00) == 0xDC00) {~~ lct = (((lct&0x3FF)<<10) \| (uct[1]&0x3FF)) + 0x10000; uct++; } } if (lcs != lct) { lcs = TclUCS4ToLower(lcs); lct = TclUCS4ToLower(lct);	\| > > > \| > > \|	418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451	---------------------------------------------------------------------- / static int UniCharNcasecmp( const Tcl_UniChar ucs, / Unicode string to compare to uct. / const Tcl_UniChar uct, /* Unicode string ucs is compared to. / size_t numCp, / Number of code points to compare. / int flags) / Flags describing string ends. / { int lcs, lct; for ( ; numCp != 0; numCp--, ucs++, uct++) { lcs = ucs; lct = uct; if ((lcs & 0xFC00) == 0xD800) { if ((flags & 1) && (numCp == 1)) { / String ends with high surrogate. / } else if ((ucs[1] & 0xFC00) == 0xDC00) { lcs = (((lcs&0x3FF)<<10) \| (ucs[1]&0x3FF)) + 0x10000; ucs++; } } if ((lct & 0xFC00) == 0xD800) { if ((flags & 2) && (numCp == 1)) { / String ends with high surrogate. */ } else if ((uct[1] & 0xFC00) == 0xDC00) { lct = (((lct&0x3FF)<<10) \| (uct[1]&0x3FF)) + 0x10000; uct++; } } if (lcs != lct) { lcs = TclUCS4ToLower(lcs); lct = TclUCS4ToLower(lct);
︙			︙
3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 ~~3166 3167~~ 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 ~~3197~~ ~~3198~~ 3199 3200 3201 3202 3203 3204 3205	int checkEq, /* comparison is only for equality / int nocase, / comparison is not case sensitive / int reqlength) / requested length; -1 to compare whole * strings / { const char s1, s2; int empty, length, match, s1len, s2len; memCmpFn_t memCmpFn; if ((reqlength == 0) \|\| (value1Ptr == value2Ptr)) { / * Always match at 0 chars or if it is the same obj. / return 0; } if (!nocase && TclIsPureByteArray(value1Ptr) && TclIsPureByteArray(value2Ptr)) { / * Use binary versions of comparisons since that won't cause undue * type conversions and it is much faster. Only do this if we're * case-sensitive (which is all that really makes sense with byte * arrays anyway, and we have no memcasecmp() for some reason... :^) / s1 = (char ) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char ) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); memCmpFn = memcmp; } else if ((value1Ptr->typePtr == &tclStringType) && (value2Ptr->typePtr == &tclStringType)) { / * Do a unicode-specific comparison if both of the args are of String * type. If the char length == byte length, we can do a memcmp. In * benchmark testing this proved the most efficient check between the * unicode and string comparison operations. / if (nocase) { s1 = (char ) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); s2 = (char ) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); #if TCL_UTF_MAX == 3 ~~s1len = NumCodePointsUnicode((Tcl_UniChar ) s1, s1len); s2len = NumCodePointsUnicode((Tcl_UniChar ) s2, s2len);~~ memCmpFn = (memCmpFn_t) UniCharNcasecmp; #else memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp; #endif } else { s1len = Tcl_GetCharLength(value1Ptr); s2len = Tcl_GetCharLength(value2Ptr); if ((s1len == value1Ptr->length) && (value1Ptr->bytes != NULL) && (s2len == value2Ptr->length) && (value2Ptr->bytes != NULL)) { s1 = value1Ptr->bytes; s2 = value2Ptr->bytes; memCmpFn = memcmp; } else { s1 = (char ) Tcl_GetUnicode(value1Ptr); s2 = (char ) Tcl_GetUnicode(value2Ptr); if ( #ifdef WORDS_BIGENDIAN 1 #else checkEq #endif / WORDS_BIGENDIAN / ) { memCmpFn = memcmp; s1len = sizeof(Tcl_UniChar); s2len = sizeof(Tcl_UniChar); } else { #if TCL_UTF_MAX == 3 ~~s1len = NumCodePointsUnicode((Tcl_UniChar ) s1, ~~s1len);~~~~ ~~s2len = NumCodePointsUnicode((Tcl_UniChar *) s2, ~~s2len);~~~~ memCmpFn = (memCmpFn_t) UniCharNcmp; #else memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; #endif } } }	> > > > > > > > > > \| \| > > > > > > > > \| > \| >	3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272	int checkEq, /* comparison is only for equality / int nocase, / comparison is not case sensitive / int reqlength) / requested length; -1 to compare whole * strings / { const char s1, s2; int empty, length, match, s1len, s2len; #if TCL_UTF_MAX == 3 int s1flag = 0, s2flag = 0; typedef int (memCmpFn_t)(const void , const void , size_t, int); #else typedef int (memCmpFn_t)(const void , const void , size_t); #endif memCmpFn_t memCmpFn; if ((reqlength == 0) \|\| (value1Ptr == value2Ptr)) { / * Always match at 0 chars or if it is the same obj. / return 0; } if (!nocase && TclIsPureByteArray(value1Ptr) && TclIsPureByteArray(value2Ptr)) { / * Use binary versions of comparisons since that won't cause undue * type conversions and it is much faster. Only do this if we're * case-sensitive (which is all that really makes sense with byte * arrays anyway, and we have no memcasecmp() for some reason... :^) / s1 = (char ) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char ) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); #if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif } else if ((value1Ptr->typePtr == &tclStringType) && (value2Ptr->typePtr == &tclStringType)) { / * Do a unicode-specific comparison if both of the args are of String * type. If the char length == byte length, we can do a memcmp. In * benchmark testing this proved the most efficient check between the * unicode and string comparison operations. / if (nocase) { s1 = (char ) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); s2 = (char ) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); #if TCL_UTF_MAX == 3 s1len = NumCodePointsUnicode((Tcl_UniChar ) s1, s1len, &s1flag); s2len = NumCodePointsUnicode((Tcl_UniChar ) s2, s2len, &s2flag); memCmpFn = (memCmpFn_t) UniCharNcasecmp; #else memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp; #endif } else { s1len = Tcl_GetCharLength(value1Ptr); s2len = Tcl_GetCharLength(value2Ptr); if ((s1len == value1Ptr->length) && (value1Ptr->bytes != NULL) && (s2len == value2Ptr->length) && (value2Ptr->bytes != NULL)) { s1 = value1Ptr->bytes; s2 = value2Ptr->bytes; #if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif } else { s1 = (char ) Tcl_GetUnicode(value1Ptr); s2 = (char ) Tcl_GetUnicode(value2Ptr); if ( #ifdef WORDS_BIGENDIAN 1 #else checkEq #endif / WORDS_BIGENDIAN / ) { #if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif s1len = sizeof(Tcl_UniChar); s2len = sizeof(Tcl_UniChar); } else { #if TCL_UTF_MAX == 3 s1len = NumCodePointsUnicode((Tcl_UniChar ) s1, s1len, &s1flag); s2len = NumCodePointsUnicode((Tcl_UniChar *) s2, s2len, &s2flag); memCmpFn = (memCmpFn_t) UniCharNcmp; #else memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; #endif } } }
︙			︙
3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 ~~3257 3258~~ 3259 3260 3261 3262 3263 3264 3265	if (!nocase && checkEq) { /* * When we have equal-length we can check only for (in)equality. * We can use memcmp() in all (n)eq cases because we don't need to * worry about lexical LE/BE variance. / memCmpFn = memcmp; } else { / * As a catch-all we will work with UTF-8. We cannot use memcmp() * as that is unsafe with any string containing NUL (\xC0\x80 in * Tcl's utf rep). We can use the more efficient TclpUtfNcmp2 if * we are case-sensitive and no specific length was requested. */ #if TCL_UTF_MAX == 3 ~~s1len = NumCodePointsUtf(s1, s1len); s2len = NumCodePointsUtf(s2, s2len);~~ memCmpFn = (memCmpFn_t) (nocase ? UtfNcasecmp : UtfNcmp); #else if ((reqlength < 0) && !nocase) { memCmpFn = (memCmpFn_t) TclpUtfNcmp2; } else { s1len = Tcl_NumUtfChars(s1, s1len);	> > > > \| \|	3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336	if (!nocase && checkEq) { /* * When we have equal-length we can check only for (in)equality. * We can use memcmp() in all (n)eq cases because we don't need to * worry about lexical LE/BE variance. / #if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif } else { / * As a catch-all we will work with UTF-8. We cannot use memcmp() * as that is unsafe with any string containing NUL (\xC0\x80 in * Tcl's utf rep). We can use the more efficient TclpUtfNcmp2 if * we are case-sensitive and no specific length was requested. */ #if TCL_UTF_MAX == 3 s1len = NumCodePointsUtf(s1, s1len, &s1flag); s2len = NumCodePointsUtf(s2, s2len, &s2flag); memCmpFn = (memCmpFn_t) (nocase ? UtfNcasecmp : UtfNcmp); #else if ((reqlength < 0) && !nocase) { memCmpFn = (memCmpFn_t) TclpUtfNcmp2; } else { s1len = Tcl_NumUtfChars(s1, s1len);
︙			︙
3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300	if (checkEq && (s1len != s2len)) { match = 1; /* This will be reversed below. / } else { / * The comparison function should compare up to the minimum byte * length only. */ match = memCmpFn(s1, s2, (size_t) length); } if ((match == 0) && (reqlength > length)) { match = s1len - s2len; } return (match > 0) ? 1 : (match < 0) ? -1 : 0; }	> > > >	3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375	if (checkEq && (s1len != s2len)) { match = 1; /* This will be reversed below. / } else { / * The comparison function should compare up to the minimum byte * length only. */ #if TCL_UTF_MAX == 3 match = memCmpFn(s1, s2, (size_t) length, s1flag \| (s2flag << 1)); #else match = memCmpFn(s1, s2, (size_t) length); #endif } if ((match == 0) && (reqlength > length)) { match = s1len - s2len; } return (match > 0) ? 1 : (match < 0) ? -1 : 0; }
︙			︙