Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | fix compare for beyond BMP chars |
---|---|
Timelines: | family | ancestors | descendants | both | wtf-8-experiment |
Files: | files | file ages | folders |
SHA1: |
31b847f06b076a000e24ba3e955e3915 |
User & Date: | chw 2020-05-18 09:03:47.623 |
Context
2020-05-18
| ||
16:44 | fix string map for beyond BMP chars check-in: 92fafa5ea8 user: chw tags: wtf-8-experiment | |
09:03 | fix compare for beyond BMP chars check-in: 31b847f06b user: chw tags: wtf-8-experiment | |
06:27 | fix sort/compare for beyond BMP chars (unfinished, WIP) check-in: bd15431fd8 user: chw tags: wtf-8-experiment | |
Changes
Changes to jni/tcl/generic/tclCmdMZ.c.
︙ | ︙ | |||
27 28 29 30 31 32 33 | static Tcl_NRPostProc TryPostBody; static Tcl_NRPostProc TryPostFinal; static Tcl_NRPostProc TryPostHandler; static int UniCharIsAscii(int character); static int UniCharIsHexDigit(int character); #if TCL_UTF_MAX == 3 | > > | > | | > > | | | 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | static Tcl_NRPostProc TryPostBody; static Tcl_NRPostProc TryPostFinal; static Tcl_NRPostProc TryPostHandler; static int UniCharIsAscii(int character); static int UniCharIsHexDigit(int character); #if TCL_UTF_MAX == 3 static int MemCmp(const void *s1, const void *s2, size_t n, int flags); static int NumCodePointsUtf(const char *src, int length, int *flagPtr); static int NumCodePointsUnicode(const Tcl_UniChar *src, int length, int *flagPtr); static int UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, size_t numCp, int flags); static int UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, size_t numCp, int flags); static int UtfNcasecmp(const char *cs, const char *ct, size_t numCp, int flags); static int UtfNcmp(const char *cs, const char *ct, size_t numCp, int flags); #endif /* * Default set of characters to trim in [string trim] and friends. This is a * UTF-8 literal string containing all Unicode space characters [TIP #413] */ |
︙ | ︙ | |||
75 76 77 78 79 80 81 82 83 84 | "\xef\xbb\xbf" /* zero width no-break space (U+feff) */ ; #if TCL_UTF_MAX == 3 /* *--------------------------------------------------------------------------- * * NumCodePointsUtf -- * * Like Tcl_NumUtfChars() but returns the number of code points. | > > > > > > > > > > > > > > > > > > < < < | > < > | 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | "\xef\xbb\xbf" /* zero width no-break space (U+feff) */ ; #if TCL_UTF_MAX == 3 /* *--------------------------------------------------------------------------- * * MemCmp -- * * Private wrapper for memcmp(). See C library documentation. * *--------------------------------------------------------------------------- */ static int MemCmp(const void *s1, const void *s2, size_t n, int flags) { return memcmp(s1, s2, n); } #endif #if TCL_UTF_MAX == 3 /* *--------------------------------------------------------------------------- * * NumCodePointsUtf -- * * Like Tcl_NumUtfChars() but returns the number of code points. * * Results: * As above. * * Side effects: * None. * *--------------------------------------------------------------------------- */ static int NumCodePointsUtf( const char *src, /* The UTF-8 string to measure. */ int length, /* The length of the string in bytes. */ int *flagPtr) /* Location to receive end flag. */ { Tcl_UniChar ch = 0; int i = 0; const char *endPtr = src + length - TCL_UTF_MAX; *flagPtr = 0; while (src < endPtr) { src += TclUtfToUniChar(src, &ch); if ((ch & 0xFC00) == 0xD800) { if ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { int len = TclUtfToUniChar(src, &ch); if ((ch & 0xFC00) == 0xDC00) { |
︙ | ︙ | |||
133 134 135 136 137 138 139 | } } i++; } if (src < endPtr) { i += endPtr - src; } else if (i && ((ch & 0xFC00) == 0xD800)) { | < > | 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | } } i++; } if (src < endPtr) { i += endPtr - src; } else if (i && ((ch & 0xFC00) == 0xD800)) { *flagPtr = 1; } return i; } #endif #if TCL_UTF_MAX == 3 /* |
︙ | ︙ | |||
161 162 163 164 165 166 167 | *---------------------------------------------------------------------- */ static int UtfNcmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ | | > > > > | | | | > > > > | | | | > | 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | *---------------------------------------------------------------------- */ static int UtfNcmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ size_t numCp, /* Number of code points to compare. */ int flags) /* Flags describing string ends. */ { Tcl_UniChar ch1 = 0, ch2 = 0; int uch1, uch2; while (numCp-- > 0) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); uch1 = ch1; uch2 = ch2; if ((ch1 & 0xFC00) == 0xD800) { if ((flags & 1) && (numCp == 0)) { /* String ends with high surrogate. */ } else { int len = TclUtfToUniChar(cs, &ch1); if ((ch1 & 0xFC00) == 0xDC00) { uch1 = (((uch1&0x3FF)<<10) | (ch1&0x3FF)) + 0x10000; cs += len; } } } if ((ch2 & 0xFC00) == 0xD800) { if ((flags & 2) && (numCp == 0)) { /* String ends with high surrogate. */ } else { int len = TclUtfToUniChar(ct, &ch2); if ((ch2 & 0xFC00) == 0xDC00) { uch2 = (((uch2&0x3FF)<<10) | (ch2&0x3FF)) + 0x10000; ct += len; } } } if (uch1 != uch2) { return (uch1 - uch2); } } |
︙ | ︙ | |||
221 222 223 224 225 226 227 | *---------------------------------------------------------------------- */ static int UtfNcasecmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ | | > > > > | | | | > > > > | | | | > | 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | *---------------------------------------------------------------------- */ static int UtfNcasecmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ size_t numCp, /* Number of code points to compare. */ int flags) /* Flags describing string ends. */ { Tcl_UniChar ch1 = 0, ch2 = 0; int uch1, uch2; while (numCp-- > 0) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); uch1 = ch1; uch2 = ch2; if ((ch1 & 0xFC00) == 0xD800) { if ((flags & 1) && (numCp == 0)) { /* String ends with high surrogate. */ } else { int len = TclUtfToUniChar(cs, &ch1); if ((ch1 & 0xFC00) == 0xDC00) { uch1 = (((uch1&0x3FF)<<10) | (ch1&0x3FF)) + 0x10000; cs += len; } } } if ((ch2 & 0xFC00) == 0xD800) { if ((flags & 2) && (numCp == 0)) { /* String ends with high surrogate. */ } else { int len = TclUtfToUniChar(ct, &ch2); if ((ch2 & 0xFC00) == 0xDC00) { uch2 = (((uch2&0x3FF)<<10) | (ch2&0x3FF)) + 0x10000; ct += len; } } } if (uch1 != uch2) { uch1 = TclUCS4ToLower(uch1); uch2 = TclUCS4ToLower(uch2); if (uch1 != uch2) { |
︙ | ︙ | |||
270 271 272 273 274 275 276 | #if TCL_UTF_MAX == 3 /* *--------------------------------------------------------------------------- * * NumCodePointsUnicode -- * * Returns the number of code points of a Tcl_UniChar array. | < < < < | > > < > | 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 | #if TCL_UTF_MAX == 3 /* *--------------------------------------------------------------------------- * * NumCodePointsUnicode -- * * Returns the number of code points of a Tcl_UniChar array. * * Results: * As above. * * Side effects: * None. * *--------------------------------------------------------------------------- */ static int NumCodePointsUnicode( const Tcl_UniChar *src, /* The array to measure. */ int length, /* The length of the array in elements. */ int *flagPtr) /* Location to receive end flag. */ { int i, n = 0; *flagPtr = 0; for (i = 0; i < length; i++, n++) { if ((src[i] & 0xFC00) == 0xD800) { if (i + 1 >= length) { *flagPtr = 1; } if ((i + 1 < length) && ((src[i+1] & 0xFC00) == 0xDC00)) { i++; } } } return n; |
︙ | ︙ | |||
327 328 329 330 331 332 333 | *---------------------------------------------------------------------- */ static int UniCharNcmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ | | > > > | > > | | 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 | *---------------------------------------------------------------------- */ static int UniCharNcmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ size_t numCp, /* Number of code points to compare. */ int flags) /* Flags describing string ends. */ { int lcs, lct; for ( ; numCp != 0; numCp--, ucs++, uct++) { lcs = *ucs; lct = *uct; if ((lcs & 0xFC00) == 0xD800) { if ((flags & 1) && (numCp == 1)) { /* String ends with high surrogate. */ } else if ((ucs[1] & 0xFC00) == 0xDC00) { lcs = (((lcs&0x3FF)<<10) | (ucs[1]&0x3FF)) + 0x10000; ucs++; } } if ((lct & 0xFC00) == 0xD800) { if ((flags & 2) && (numCp == 1)) { /* String ends with high surrogate. */ } else if ((uct[1] & 0xFC00) == 0xDC00) { lct = (((lct&0x3FF)<<10) | (uct[1]&0x3FF)) + 0x10000; uct++; } } if (lcs != lct) { return (lcs - lct); } |
︙ | ︙ | |||
376 377 378 379 380 381 382 | *---------------------------------------------------------------------- */ static int UniCharNcasecmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ | | > > > | > > | | 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 | *---------------------------------------------------------------------- */ static int UniCharNcasecmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ size_t numCp, /* Number of code points to compare. */ int flags) /* Flags describing string ends. */ { int lcs, lct; for ( ; numCp != 0; numCp--, ucs++, uct++) { lcs = *ucs; lct = *uct; if ((lcs & 0xFC00) == 0xD800) { if ((flags & 1) && (numCp == 1)) { /* String ends with high surrogate. */ } else if ((ucs[1] & 0xFC00) == 0xDC00) { lcs = (((lcs&0x3FF)<<10) | (ucs[1]&0x3FF)) + 0x10000; ucs++; } } if ((lct & 0xFC00) == 0xD800) { if ((flags & 2) && (numCp == 1)) { /* String ends with high surrogate. */ } else if ((uct[1] & 0xFC00) == 0xDC00) { lct = (((lct&0x3FF)<<10) | (uct[1]&0x3FF)) + 0x10000; uct++; } } if (lcs != lct) { lcs = TclUCS4ToLower(lcs); lct = TclUCS4ToLower(lct); |
︙ | ︙ | |||
3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 | int checkEq, /* comparison is only for equality */ int nocase, /* comparison is not case sensitive */ int reqlength) /* requested length; -1 to compare whole * strings */ { const char *s1, *s2; int empty, length, match, s1len, s2len; memCmpFn_t memCmpFn; if ((reqlength == 0) || (value1Ptr == value2Ptr)) { /* * Always match at 0 chars or if it is the same obj. */ return 0; } if (!nocase && TclIsPureByteArray(value1Ptr) && TclIsPureByteArray(value2Ptr)) { /* * Use binary versions of comparisons since that won't cause undue * type conversions and it is much faster. Only do this if we're * case-sensitive (which is all that really makes sense with byte * arrays anyway, and we have no memcasecmp() for some reason... :^) */ s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); memCmpFn = memcmp; } else if ((value1Ptr->typePtr == &tclStringType) && (value2Ptr->typePtr == &tclStringType)) { /* * Do a unicode-specific comparison if both of the args are of String * type. If the char length == byte length, we can do a memcmp. In * benchmark testing this proved the most efficient check between the * unicode and string comparison operations. */ if (nocase) { s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); #if TCL_UTF_MAX == 3 | > > > > > > > > > > | | > > > > > > > > | > | > | 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 | int checkEq, /* comparison is only for equality */ int nocase, /* comparison is not case sensitive */ int reqlength) /* requested length; -1 to compare whole * strings */ { const char *s1, *s2; int empty, length, match, s1len, s2len; #if TCL_UTF_MAX == 3 int s1flag = 0, s2flag = 0; typedef int (*memCmpFn_t)(const void *, const void *, size_t, int); #else typedef int (*memCmpFn_t)(const void *, const void *, size_t); #endif memCmpFn_t memCmpFn; if ((reqlength == 0) || (value1Ptr == value2Ptr)) { /* * Always match at 0 chars or if it is the same obj. */ return 0; } if (!nocase && TclIsPureByteArray(value1Ptr) && TclIsPureByteArray(value2Ptr)) { /* * Use binary versions of comparisons since that won't cause undue * type conversions and it is much faster. Only do this if we're * case-sensitive (which is all that really makes sense with byte * arrays anyway, and we have no memcasecmp() for some reason... :^) */ s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); #if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif } else if ((value1Ptr->typePtr == &tclStringType) && (value2Ptr->typePtr == &tclStringType)) { /* * Do a unicode-specific comparison if both of the args are of String * type. If the char length == byte length, we can do a memcmp. In * benchmark testing this proved the most efficient check between the * unicode and string comparison operations. */ if (nocase) { s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); #if TCL_UTF_MAX == 3 s1len = NumCodePointsUnicode((Tcl_UniChar *) s1, s1len, &s1flag); s2len = NumCodePointsUnicode((Tcl_UniChar *) s2, s2len, &s2flag); memCmpFn = (memCmpFn_t) UniCharNcasecmp; #else memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp; #endif } else { s1len = Tcl_GetCharLength(value1Ptr); s2len = Tcl_GetCharLength(value2Ptr); if ((s1len == value1Ptr->length) && (value1Ptr->bytes != NULL) && (s2len == value2Ptr->length) && (value2Ptr->bytes != NULL)) { s1 = value1Ptr->bytes; s2 = value2Ptr->bytes; #if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif } else { s1 = (char *) Tcl_GetUnicode(value1Ptr); s2 = (char *) Tcl_GetUnicode(value2Ptr); if ( #ifdef WORDS_BIGENDIAN 1 #else checkEq #endif /* WORDS_BIGENDIAN */ ) { #if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif s1len *= sizeof(Tcl_UniChar); s2len *= sizeof(Tcl_UniChar); } else { #if TCL_UTF_MAX == 3 s1len = NumCodePointsUnicode((Tcl_UniChar *) s1, s1len, &s1flag); s2len = NumCodePointsUnicode((Tcl_UniChar *) s2, s2len, &s2flag); memCmpFn = (memCmpFn_t) UniCharNcmp; #else memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; #endif } } } |
︙ | ︙ | |||
3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 | if (!nocase && checkEq) { /* * When we have equal-length we can check only for (in)equality. * We can use memcmp() in all (n)eq cases because we don't need to * worry about lexical LE/BE variance. */ memCmpFn = memcmp; } else { /* * As a catch-all we will work with UTF-8. We cannot use memcmp() * as that is unsafe with any string containing NUL (\xC0\x80 in * Tcl's utf rep). We can use the more efficient TclpUtfNcmp2 if * we are case-sensitive and no specific length was requested. */ #if TCL_UTF_MAX == 3 | > > > > | | | 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 | if (!nocase && checkEq) { /* * When we have equal-length we can check only for (in)equality. * We can use memcmp() in all (n)eq cases because we don't need to * worry about lexical LE/BE variance. */ #if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif } else { /* * As a catch-all we will work with UTF-8. We cannot use memcmp() * as that is unsafe with any string containing NUL (\xC0\x80 in * Tcl's utf rep). We can use the more efficient TclpUtfNcmp2 if * we are case-sensitive and no specific length was requested. */ #if TCL_UTF_MAX == 3 s1len = NumCodePointsUtf(s1, s1len, &s1flag); s2len = NumCodePointsUtf(s2, s2len, &s2flag); memCmpFn = (memCmpFn_t) (nocase ? UtfNcasecmp : UtfNcmp); #else if ((reqlength < 0) && !nocase) { memCmpFn = (memCmpFn_t) TclpUtfNcmp2; } else { s1len = Tcl_NumUtfChars(s1, s1len); |
︙ | ︙ | |||
3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 | if (checkEq && (s1len != s2len)) { match = 1; /* This will be reversed below. */ } else { /* * The comparison function should compare up to the minimum byte * length only. */ match = memCmpFn(s1, s2, (size_t) length); } if ((match == 0) && (reqlength > length)) { match = s1len - s2len; } return (match > 0) ? 1 : (match < 0) ? -1 : 0; } | > > > > | 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 | if (checkEq && (s1len != s2len)) { match = 1; /* This will be reversed below. */ } else { /* * The comparison function should compare up to the minimum byte * length only. */ #if TCL_UTF_MAX == 3 match = memCmpFn(s1, s2, (size_t) length, s1flag | (s2flag << 1)); #else match = memCmpFn(s1, s2, (size_t) length); #endif } if ((match == 0) && (reqlength > length)) { match = s1len - s2len; } return (match > 0) ? 1 : (match < 0) ? -1 : 0; } |
︙ | ︙ |
Changes to jni/tcl/generic/tclInt.h.
︙ | ︙ | |||
3158 3159 3160 3161 3162 3163 3164 | MODULE_SCOPE void TclSignalExitThread(Tcl_ThreadId id, int result); MODULE_SCOPE void TclSpellFix(Tcl_Interp *interp, Tcl_Obj *const *objv, int objc, int subIdx, Tcl_Obj *bad, Tcl_Obj *fix); MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, int numBytes); | < | 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 | MODULE_SCOPE void TclSignalExitThread(Tcl_ThreadId id, int result); MODULE_SCOPE void TclSpellFix(Tcl_Interp *interp, Tcl_Obj *const *objv, int objc, int subIdx, Tcl_Obj *bad, Tcl_Obj *fix); MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, int numBytes); MODULE_SCOPE int TclStringCmp (Tcl_Obj *value1Ptr, Tcl_Obj *value2Ptr, int checkEq, int nocase, int reqlength); MODULE_SCOPE int TclStringCmpOpts (Tcl_Interp *interp, int objc, Tcl_Obj *const objv[], int *nocase, int *reqlength); MODULE_SCOPE int TclStringMatch(const char *str, int strLen, const char *pattern, int ptnLen, int flags); MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj, |
︙ | ︙ |
Changes to jni/tcl/tests/stringComp.test.
︙ | ︙ | |||
182 183 184 185 186 187 188 | {binary neq} { string compare [binary format a100a 0 1] [binary format a100a 0 0] } 1 {} {binary neq inequal length} { string compare [binary format a20a 0 1] [binary format a100a 0 0] } 1 {} {unicode corner cases} { | < | < < | | > > > > | 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | {binary neq} { string compare [binary format a100a 0 1] [binary format a100a 0 0] } 1 {} {binary neq inequal length} { string compare [binary format a20a 0 1] [binary format a100a 0 0] } 1 {} {unicode corner cases} { string compare \uD7FF \uD800] } -1 {} {unicode corner cases} { string compare \uD800\uD7FF \uD800\uD800] } -1 {} {unicode corner cases} { string compare \uD800\uD800 \uD800\uD7FF] } 1 {} {unicode corner cases} { string compare \uDBFF \uDC00 } -1 {} {unicode corner cases} { string compare \uD83D \uDE00 } -1 {} {unicode corner cases} { |
︙ | ︙ |