Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Experimental branch to use TCL_UTF_MAX=3 with 16 bit Tcl_UniChar for WTF-8 coded strings internally, i.e. surrogate pairs for code points > 0xFFFF, and TCL_UTF_MAX=4 with 32 bit Tcl_UniChar without surrogate pairs. This check-in touches the tcl core plus tdbcodbc. |
---|---|
Timelines: | family | ancestors | descendants | both | wtf-8-experiment |
Files: | files | file ages | folders |
SHA1: |
110db5bc0c75e91409a0282aa1c24205 |
User & Date: | chw 2017-05-12 20:30:16.428 |
Original Comment: | Experimental branch to use TCL_UTF_MAX=3 with 16 bit Tcl_UniChar for WTF-8 coded strings internally, i.e. surrogate pairs for code points > 0xFFFF, and TCL_UTF_MAX=4 with 32 bit Tcl_UniChar without surrogate pairs. This check-in touches the tcl core plus tdbcodbc. This is the |
Context
2017-05-12
| ||
20:41 | changes to make tk deal with surrogate pairs in the TCL_UTF_MAX=3 case, i.e. to treat a surrogate pair correctly regarding font rendering and cursor index (text, entry, canvas text, ttk::entry), only partially tested on Linux with X11 and SDL2. check-in: d99860b567 user: chw tags: wtf-8-experiment | |
20:30 | Experimental branch to use TCL_UTF_MAX=3 with 16 bit Tcl_UniChar for WTF-8 coded strings internally, i.e. surrogate pairs for code points > 0xFFFF, and TCL_UTF_MAX=4 with 32 bit Tcl_UniChar without surrogate pairs. This check-in touches the tcl core plus tdbcodbc. check-in: 110db5bc0c user: chw tags: wtf-8-experiment | |
20:19 | Create new branch named "wtf-8-experiment" check-in: 725f3dd511 user: chw tags: wtf-8-experiment | |
Changes
Changes to jni/tcl/generic/regc_locale.c.
︙ | ︙ | |||
199 200 201 202 203 204 205 | {0xde80, 0xdebe}, {0xdec0, 0xdefe}, {0xdf00, 0xdf3e}, {0xdf40, 0xdf7e}, {0xdf80, 0xdfbe}, {0xdfc0, 0xdffe}, {0xf900, 0xfa6d}, {0xfa70, 0xfad9}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28}, {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc} | | | 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | {0xde80, 0xdebe}, {0xdec0, 0xdefe}, {0xdf00, 0xdf3e}, {0xdf40, 0xdf7e}, {0xdf80, 0xdfbe}, {0xdfc0, 0xdffe}, {0xf900, 0xfa6d}, {0xfa70, 0xfad9}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28}, {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc} #if TCL_UTF_MAX > 3 ,{0x10000, 0x1000b}, {0x1000d, 0x10026}, {0x10028, 0x1003a}, {0x1003f, 0x1004d}, {0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10280, 0x1029c}, {0x102a0, 0x102d0}, {0x10300, 0x1031f}, {0x10330, 0x10340}, {0x10342, 0x10349}, {0x10350, 0x10375}, {0x10380, 0x1039d}, {0x103a0, 0x103c3}, {0x103c8, 0x103cf}, {0x10400, 0x1049d}, {0x104b0, 0x104d3}, {0x104d8, 0x104fb}, {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10800, 0x10805}, {0x1080a, 0x10835}, {0x1083f, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089e}, |
︙ | ︙ | |||
260 261 262 263 264 265 266 | 0xebd, 0xec6, 0xf00, 0x103f, 0x1061, 0x1065, 0x1066, 0x108e, 0x10c7, 0x10cd, 0x1258, 0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1bae, 0x1baf, 0x1cf5, 0x1cf6, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2183, 0x2184, 0x2cf2, 0x2cf3, 0x2d27, 0x2d2d, 0x2d6f, 0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa8fb, 0xa8fd, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44 | | | | | 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 | 0xebd, 0xec6, 0xf00, 0x103f, 0x1061, 0x1065, 0x1066, 0x108e, 0x10c7, 0x10cd, 0x1258, 0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1bae, 0x1baf, 0x1cf5, 0x1cf6, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x214e, 0x2183, 0x2184, 0x2cf2, 0x2cf3, 0x2d27, 0x2d2d, 0x2d6f, 0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa8fb, 0xa8fd, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44 #if TCL_UTF_MAX > 3 ,0x1003c, 0x1003d, 0x10808, 0x10837, 0x10838, 0x1083c, 0x108f4, 0x108f5, 0x109be, 0x109bf, 0x10a00, 0x11176, 0x111da, 0x111dc, 0x11288, 0x1130f, 0x11310, 0x11332, 0x11333, 0x1133d, 0x11350, 0x114c4, 0x114c5, 0x114c7, 0x11644, 0x118ff, 0x11c40, 0x16f50, 0x16fe0, 0x1b000, 0x1b001, 0x1d49e, 0x1d49f, 0x1d4a2, 0x1d4a5, 0x1d4a6, 0x1d4bb, 0x1d546, 0x1ee21, 0x1ee22, 0x1ee24, 0x1ee27, 0x1ee39, 0x1ee3b, 0x1ee42, 0x1ee47, 0x1ee49, 0x1ee4b, 0x1ee51, 0x1ee52, 0x1ee54, 0x1ee57, 0x1ee59, 0x1ee5b, 0x1ee5d, 0x1ee5f, 0x1ee61, 0x1ee62, 0x1ee64, 0x1ee7e #endif }; #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) /* * Unicode: control characters. */ static const crange controlRangeTable[] = { {0x0, 0x1f}, {0x7f, 0x9f}, {0x600, 0x605}, {0x200b, 0x200f}, {0x202a, 0x202e}, {0x2060, 0x2064}, {0x2066, 0x206f}, {0xe000, 0xf8ff}, {0xfff9, 0xfffb} #if TCL_UTF_MAX > 3 ,{0x1bca0, 0x1bca3}, {0x1d173, 0x1d17a}, {0xe0020, 0xe007f}, {0xf0000, 0xffffd}, {0x100000, 0x10fffd} #endif }; #define NUM_CONTROL_RANGE (sizeof(controlRangeTable)/sizeof(crange)) static const chr controlCharTable[] = { 0xad, 0x61c, 0x6dd, 0x70f, 0x8e2, 0x180e, 0xfeff #if TCL_UTF_MAX > 3 ,0x110bd, 0xe0001 #endif }; #define NUM_CONTROL_CHAR (sizeof(controlCharTable)/sizeof(chr)) /* |
︙ | ︙ | |||
313 314 315 316 317 318 319 | {0xd66, 0xd6f}, {0xde6, 0xdef}, {0xe50, 0xe59}, {0xed0, 0xed9}, {0xf20, 0xf29}, {0x1040, 0x1049}, {0x1090, 0x1099}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0x1946, 0x194f}, {0x19d0, 0x19d9}, {0x1a80, 0x1a89}, {0x1a90, 0x1a99}, {0x1b50, 0x1b59}, {0x1bb0, 0x1bb9}, {0x1c40, 0x1c49}, {0x1c50, 0x1c59}, {0xa620, 0xa629}, {0xa8d0, 0xa8d9}, {0xa900, 0xa909}, {0xa9d0, 0xa9d9}, {0xa9f0, 0xa9f9}, {0xaa50, 0xaa59}, {0xabf0, 0xabf9}, {0xff10, 0xff19} | | | 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 | {0xd66, 0xd6f}, {0xde6, 0xdef}, {0xe50, 0xe59}, {0xed0, 0xed9}, {0xf20, 0xf29}, {0x1040, 0x1049}, {0x1090, 0x1099}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0x1946, 0x194f}, {0x19d0, 0x19d9}, {0x1a80, 0x1a89}, {0x1a90, 0x1a99}, {0x1b50, 0x1b59}, {0x1bb0, 0x1bb9}, {0x1c40, 0x1c49}, {0x1c50, 0x1c59}, {0xa620, 0xa629}, {0xa8d0, 0xa8d9}, {0xa900, 0xa909}, {0xa9d0, 0xa9d9}, {0xa9f0, 0xa9f9}, {0xaa50, 0xaa59}, {0xabf0, 0xabf9}, {0xff10, 0xff19} #if TCL_UTF_MAX > 3 ,{0x104a0, 0x104a9}, {0x11066, 0x1106f}, {0x110f0, 0x110f9}, {0x11136, 0x1113f}, {0x111d0, 0x111d9}, {0x112f0, 0x112f9}, {0x11450, 0x11459}, {0x114d0, 0x114d9}, {0x11650, 0x11659}, {0x116c0, 0x116c9}, {0x11730, 0x11739}, {0x118e0, 0x118e9}, {0x11c50, 0x11c59}, {0x16a60, 0x16a69}, {0x16b50, 0x16b59}, {0x1d7ce, 0x1d7ff}, {0x1e950, 0x1e959} #endif }; |
︙ | ︙ | |||
347 348 349 350 351 352 353 | {0x2308, 0x230b}, {0x2768, 0x2775}, {0x27e6, 0x27ef}, {0x2983, 0x2998}, {0x29d8, 0x29db}, {0x2cf9, 0x2cfc}, {0x2e00, 0x2e2e}, {0x2e30, 0x2e44}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f}, {0xa60d, 0xa60f}, {0xa6f2, 0xa6f7}, {0xa874, 0xa877}, {0xa8f8, 0xa8fa}, {0xa9c1, 0xa9cd}, {0xaa5c, 0xaa5f}, {0xfe10, 0xfe19}, {0xfe30, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03}, {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff5f, 0xff65} | | | 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 | {0x2308, 0x230b}, {0x2768, 0x2775}, {0x27e6, 0x27ef}, {0x2983, 0x2998}, {0x29d8, 0x29db}, {0x2cf9, 0x2cfc}, {0x2e00, 0x2e2e}, {0x2e30, 0x2e44}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f}, {0xa60d, 0xa60f}, {0xa6f2, 0xa6f7}, {0xa874, 0xa877}, {0xa8f8, 0xa8fa}, {0xa9c1, 0xa9cd}, {0xaa5c, 0xaa5f}, {0xfe10, 0xfe19}, {0xfe30, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03}, {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff5f, 0xff65} #if TCL_UTF_MAX > 3 ,{0x10100, 0x10102}, {0x10a50, 0x10a58}, {0x10af0, 0x10af6}, {0x10b39, 0x10b3f}, {0x10b99, 0x10b9c}, {0x11047, 0x1104d}, {0x110be, 0x110c1}, {0x11140, 0x11143}, {0x111c5, 0x111c9}, {0x111dd, 0x111df}, {0x11238, 0x1123d}, {0x1144b, 0x1144f}, {0x115c1, 0x115d7}, {0x11641, 0x11643}, {0x11660, 0x1166c}, {0x1173c, 0x1173e}, {0x11c41, 0x11c45}, {0x12470, 0x12474}, {0x16b37, 0x16b3b}, {0x1da87, 0x1da8b} #endif }; |
︙ | ︙ | |||
371 372 373 374 375 376 377 | 0x10fb, 0x1400, 0x166d, 0x166e, 0x169b, 0x169c, 0x1735, 0x1736, 0x1944, 0x1945, 0x1a1e, 0x1a1f, 0x1c7e, 0x1c7f, 0x1cd3, 0x207d, 0x207e, 0x208d, 0x208e, 0x2329, 0x232a, 0x27c5, 0x27c6, 0x29fc, 0x29fd, 0x2cfe, 0x2cff, 0x2d70, 0x3030, 0x303d, 0x30a0, 0x30fb, 0xa4fe, 0xa4ff, 0xa673, 0xa67e, 0xa8ce, 0xa8cf, 0xa8fc, 0xa92e, 0xa92f, 0xa95f, 0xa9de, 0xa9df, 0xaade, 0xaadf, 0xaaf0, 0xaaf1, 0xabeb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68, 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d | | | 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | 0x10fb, 0x1400, 0x166d, 0x166e, 0x169b, 0x169c, 0x1735, 0x1736, 0x1944, 0x1945, 0x1a1e, 0x1a1f, 0x1c7e, 0x1c7f, 0x1cd3, 0x207d, 0x207e, 0x208d, 0x208e, 0x2329, 0x232a, 0x27c5, 0x27c6, 0x29fc, 0x29fd, 0x2cfe, 0x2cff, 0x2d70, 0x3030, 0x303d, 0x30a0, 0x30fb, 0xa4fe, 0xa4ff, 0xa673, 0xa67e, 0xa8ce, 0xa8cf, 0xa8fc, 0xa92e, 0xa92f, 0xa95f, 0xa9de, 0xa9df, 0xaade, 0xaadf, 0xaaf0, 0xaaf1, 0xabeb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68, 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d #if TCL_UTF_MAX > 3 ,0x1039f, 0x103d0, 0x1056f, 0x10857, 0x1091f, 0x1093f, 0x10a7f, 0x110bb, 0x110bc, 0x11174, 0x11175, 0x111cd, 0x111db, 0x112a9, 0x1145b, 0x1145d, 0x114c6, 0x11c70, 0x11c71, 0x16a6e, 0x16a6f, 0x16af5, 0x16b44, 0x1bc9f, 0x1e95e, 0x1e95f #endif }; #define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr)) |
︙ | ︙ | |||
415 416 417 418 419 420 421 | {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67}, {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7}, {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7}, {0x1ff2, 0x1ff4}, {0x2146, 0x2149}, {0x2c30, 0x2c5e}, {0x2c76, 0x2c7b}, {0x2d00, 0x2d25}, {0xa72f, 0xa731}, {0xa771, 0xa778}, {0xa793, 0xa795}, {0xab30, 0xab5a}, {0xab60, 0xab65}, {0xab70, 0xabbf}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a} | | | 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 | {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67}, {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7}, {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7}, {0x1ff2, 0x1ff4}, {0x2146, 0x2149}, {0x2c30, 0x2c5e}, {0x2c76, 0x2c7b}, {0x2d00, 0x2d25}, {0xa72f, 0xa731}, {0xa771, 0xa778}, {0xa793, 0xa795}, {0xab30, 0xab5a}, {0xab60, 0xab65}, {0xab70, 0xabbf}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a} #if TCL_UTF_MAX > 3 ,{0x10428, 0x1044f}, {0x104d8, 0x104fb}, {0x10cc0, 0x10cf2}, {0x118c0, 0x118df}, {0x1d41a, 0x1d433}, {0x1d44e, 0x1d454}, {0x1d456, 0x1d467}, {0x1d482, 0x1d49b}, {0x1d4b6, 0x1d4b9}, {0x1d4bd, 0x1d4c3}, {0x1d4c5, 0x1d4cf}, {0x1d4ea, 0x1d503}, {0x1d51e, 0x1d537}, {0x1d552, 0x1d56b}, {0x1d586, 0x1d59f}, {0x1d5ba, 0x1d5d3}, {0x1d5ee, 0x1d607}, {0x1d622, 0x1d63b}, {0x1d656, 0x1d66f}, {0x1d68a, 0x1d6a5}, {0x1d6c2, 0x1d6da}, {0x1d6dc, 0x1d6e1}, {0x1d6fc, 0x1d714}, {0x1d716, 0x1d71b}, {0x1d736, 0x1d74e}, {0x1d750, 0x1d755}, {0x1d770, 0x1d788}, {0x1d78a, 0x1d78f}, |
︙ | ︙ | |||
494 495 496 497 498 499 500 | 0xa729, 0xa72b, 0xa72d, 0xa733, 0xa735, 0xa737, 0xa739, 0xa73b, 0xa73d, 0xa73f, 0xa741, 0xa743, 0xa745, 0xa747, 0xa749, 0xa74b, 0xa74d, 0xa74f, 0xa751, 0xa753, 0xa755, 0xa757, 0xa759, 0xa75b, 0xa75d, 0xa75f, 0xa761, 0xa763, 0xa765, 0xa767, 0xa769, 0xa76b, 0xa76d, 0xa76f, 0xa77a, 0xa77c, 0xa77f, 0xa781, 0xa783, 0xa785, 0xa787, 0xa78c, 0xa78e, 0xa791, 0xa797, 0xa799, 0xa79b, 0xa79d, 0xa79f, 0xa7a1, 0xa7a3, 0xa7a5, 0xa7a7, 0xa7a9, 0xa7b5, 0xa7b7, 0xa7fa | | | 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 | 0xa729, 0xa72b, 0xa72d, 0xa733, 0xa735, 0xa737, 0xa739, 0xa73b, 0xa73d, 0xa73f, 0xa741, 0xa743, 0xa745, 0xa747, 0xa749, 0xa74b, 0xa74d, 0xa74f, 0xa751, 0xa753, 0xa755, 0xa757, 0xa759, 0xa75b, 0xa75d, 0xa75f, 0xa761, 0xa763, 0xa765, 0xa767, 0xa769, 0xa76b, 0xa76d, 0xa76f, 0xa77a, 0xa77c, 0xa77f, 0xa781, 0xa783, 0xa785, 0xa787, 0xa78c, 0xa78e, 0xa791, 0xa797, 0xa799, 0xa79b, 0xa79d, 0xa79f, 0xa7a1, 0xa7a3, 0xa7a5, 0xa7a7, 0xa7a9, 0xa7b5, 0xa7b7, 0xa7fa #if TCL_UTF_MAX > 3 ,0x1d4bb, 0x1d7cb #endif }; #define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr)) /* |
︙ | ︙ | |||
516 517 518 519 520 521 522 | {0x3d2, 0x3d4}, {0x3fd, 0x42f}, {0x531, 0x556}, {0x10a0, 0x10c5}, {0x13a0, 0x13f5}, {0x1f08, 0x1f0f}, {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d}, {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb}, {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x2130, 0x2133}, {0x2c00, 0x2c2e}, {0x2c62, 0x2c64}, {0x2c6d, 0x2c70}, {0x2c7e, 0x2c80}, {0xa7aa, 0xa7ae}, {0xa7b0, 0xa7b4}, {0xff21, 0xff3a} | | | 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 | {0x3d2, 0x3d4}, {0x3fd, 0x42f}, {0x531, 0x556}, {0x10a0, 0x10c5}, {0x13a0, 0x13f5}, {0x1f08, 0x1f0f}, {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d}, {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb}, {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x2130, 0x2133}, {0x2c00, 0x2c2e}, {0x2c62, 0x2c64}, {0x2c6d, 0x2c70}, {0x2c7e, 0x2c80}, {0xa7aa, 0xa7ae}, {0xa7b0, 0xa7b4}, {0xff21, 0xff3a} #if TCL_UTF_MAX > 3 ,{0x10400, 0x10427}, {0x104b0, 0x104d3}, {0x10c80, 0x10cb2}, {0x118a0, 0x118bf}, {0x1d400, 0x1d419}, {0x1d434, 0x1d44d}, {0x1d468, 0x1d481}, {0x1d4a9, 0x1d4ac}, {0x1d4ae, 0x1d4b5}, {0x1d4d0, 0x1d4e9}, {0x1d507, 0x1d50a}, {0x1d50d, 0x1d514}, {0x1d516, 0x1d51c}, {0x1d53b, 0x1d53e}, {0x1d540, 0x1d544}, {0x1d54a, 0x1d550}, {0x1d56c, 0x1d585}, {0x1d5a0, 0x1d5b9}, {0x1d5d4, 0x1d5ed}, {0x1d608, 0x1d621}, {0x1d63c, 0x1d655}, {0x1d670, 0x1d689}, {0x1d6a8, 0x1d6c0}, {0x1d6e2, 0x1d6fa}, {0x1d71c, 0x1d734}, {0x1d756, 0x1d76e}, {0x1d790, 0x1d7a8}, {0x1e900, 0x1e921} |
︙ | ︙ | |||
593 594 595 596 597 598 599 | 0xa698, 0xa69a, 0xa722, 0xa724, 0xa726, 0xa728, 0xa72a, 0xa72c, 0xa72e, 0xa732, 0xa734, 0xa736, 0xa738, 0xa73a, 0xa73c, 0xa73e, 0xa740, 0xa742, 0xa744, 0xa746, 0xa748, 0xa74a, 0xa74c, 0xa74e, 0xa750, 0xa752, 0xa754, 0xa756, 0xa758, 0xa75a, 0xa75c, 0xa75e, 0xa760, 0xa762, 0xa764, 0xa766, 0xa768, 0xa76a, 0xa76c, 0xa76e, 0xa779, 0xa77b, 0xa77d, 0xa77e, 0xa780, 0xa782, 0xa784, 0xa786, 0xa78b, 0xa78d, 0xa790, 0xa792, 0xa796, 0xa798, 0xa79a, 0xa79c, 0xa79e, 0xa7a0, 0xa7a2, 0xa7a4, 0xa7a6, 0xa7a8, 0xa7b6 | | | 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 | 0xa698, 0xa69a, 0xa722, 0xa724, 0xa726, 0xa728, 0xa72a, 0xa72c, 0xa72e, 0xa732, 0xa734, 0xa736, 0xa738, 0xa73a, 0xa73c, 0xa73e, 0xa740, 0xa742, 0xa744, 0xa746, 0xa748, 0xa74a, 0xa74c, 0xa74e, 0xa750, 0xa752, 0xa754, 0xa756, 0xa758, 0xa75a, 0xa75c, 0xa75e, 0xa760, 0xa762, 0xa764, 0xa766, 0xa768, 0xa76a, 0xa76c, 0xa76e, 0xa779, 0xa77b, 0xa77d, 0xa77e, 0xa780, 0xa782, 0xa784, 0xa786, 0xa78b, 0xa78d, 0xa790, 0xa792, 0xa796, 0xa798, 0xa79a, 0xa79c, 0xa79e, 0xa7a0, 0xa7a2, 0xa7a4, 0xa7a6, 0xa7a8, 0xa7b6 #if TCL_UTF_MAX > 3 ,0x1d49c, 0x1d49e, 0x1d49f, 0x1d4a2, 0x1d4a5, 0x1d4a6, 0x1d504, 0x1d505, 0x1d538, 0x1d539, 0x1d546, 0x1d7ca #endif }; #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) |
︙ | ︙ | |||
686 687 688 689 690 691 692 | {0xdf00, 0xdf3e}, {0xdf40, 0xdf7e}, {0xdf80, 0xdfbe}, {0xdfc0, 0xdffe}, {0xf900, 0xfa6d}, {0xfa70, 0xfad9}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbc1}, {0xfbd3, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfd}, {0xfe00, 0xfe19}, {0xfe20, 0xfe52}, {0xfe54, 0xfe66}, {0xfe68, 0xfe6b}, {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff01, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee} | | | 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 | {0xdf00, 0xdf3e}, {0xdf40, 0xdf7e}, {0xdf80, 0xdfbe}, {0xdfc0, 0xdffe}, {0xf900, 0xfa6d}, {0xfa70, 0xfad9}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbc1}, {0xfbd3, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfd}, {0xfe00, 0xfe19}, {0xfe20, 0xfe52}, {0xfe54, 0xfe66}, {0xfe68, 0xfe6b}, {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff01, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee} #if TCL_UTF_MAX > 3 ,{0x10000, 0x1000b}, {0x1000d, 0x10026}, {0x10028, 0x1003a}, {0x1003f, 0x1004d}, {0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10100, 0x10102}, {0x10107, 0x10133}, {0x10137, 0x1018e}, {0x10190, 0x1019b}, {0x101d0, 0x101fd}, {0x10280, 0x1029c}, {0x102a0, 0x102d0}, {0x102e0, 0x102fb}, {0x10300, 0x10323}, {0x10330, 0x1034a}, {0x10350, 0x1037a}, {0x10380, 0x1039d}, {0x1039f, 0x103c3}, {0x103c8, 0x103d5}, {0x10400, 0x1049d}, {0x104a0, 0x104a9}, {0x104b0, 0x104d3}, {0x104d8, 0x104fb}, {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10600, 0x10736}, {0x10740, 0x10755}, |
︙ | ︙ | |||
759 760 761 762 763 764 765 | 0xb57, 0xb5c, 0xb5d, 0xb82, 0xb83, 0xb99, 0xb9a, 0xb9c, 0xb9e, 0xb9f, 0xba3, 0xba4, 0xbd0, 0xbd7, 0xc55, 0xc56, 0xcd5, 0xcd6, 0xcde, 0xcf1, 0xcf2, 0xd82, 0xd83, 0xdbd, 0xdca, 0xdd6, 0xe81, 0xe82, 0xe84, 0xe87, 0xe88, 0xe8a, 0xe8d, 0xea5, 0xea7, 0xeaa, 0xeab, 0xec6, 0x10c7, 0x10cd, 0x1258, 0x12c0, 0x1772, 0x1773, 0x1940, 0x1cf8, 0x1cf9, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x2071, 0x2d27, 0x2d2d, 0x2d6f, 0x2d70, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfffc, 0xfffd | | | 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 | 0xb57, 0xb5c, 0xb5d, 0xb82, 0xb83, 0xb99, 0xb9a, 0xb9c, 0xb9e, 0xb9f, 0xba3, 0xba4, 0xbd0, 0xbd7, 0xc55, 0xc56, 0xcd5, 0xcd6, 0xcde, 0xcf1, 0xcf2, 0xd82, 0xd83, 0xdbd, 0xdca, 0xdd6, 0xe81, 0xe82, 0xe84, 0xe87, 0xe88, 0xe8a, 0xe8d, 0xea5, 0xea7, 0xeaa, 0xeab, 0xec6, 0x10c7, 0x10cd, 0x1258, 0x12c0, 0x1772, 0x1773, 0x1940, 0x1cf8, 0x1cf9, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x2071, 0x2d27, 0x2d2d, 0x2d6f, 0x2d70, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfffc, 0xfffd #if TCL_UTF_MAX > 3 ,0x1003c, 0x1003d, 0x101a0, 0x1056f, 0x10808, 0x10837, 0x10838, 0x1083c, 0x108f4, 0x108f5, 0x1093f, 0x10a05, 0x10a06, 0x11288, 0x1130f, 0x11310, 0x11332, 0x11333, 0x11347, 0x11348, 0x11350, 0x11357, 0x1145b, 0x1145d, 0x118ff, 0x16a6e, 0x16a6f, 0x16fe0, 0x1b000, 0x1b001, 0x1d49e, 0x1d49f, 0x1d4a2, 0x1d4a5, 0x1d4a6, 0x1d4bb, 0x1d546, 0x1e023, 0x1e024, 0x1e95e, 0x1e95f, 0x1ee21, 0x1ee22, 0x1ee24, 0x1ee27, 0x1ee39, 0x1ee3b, 0x1ee42, 0x1ee47, 0x1ee49, 0x1ee4b, 0x1ee51, 0x1ee52, 0x1ee54, 0x1ee57, 0x1ee59, 0x1ee5b, 0x1ee5d, 0x1ee5f, 0x1ee61, 0x1ee62, 0x1ee64, 0x1ee7e, |
︙ | ︙ |
Changes to jni/tcl/generic/regcustom.h.
︙ | ︙ | |||
85 86 87 88 89 90 91 | typedef Tcl_UniChar chr; /* The type itself. */ typedef int pchr; /* What it promotes to. */ typedef unsigned uchr; /* Unsigned type that will hold a chr. */ typedef int celt; /* Type to hold chr, or NOCELT */ #define NOCELT (-1) /* Celt value which is not valid chr */ #define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */ #define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */ | | | 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | typedef Tcl_UniChar chr; /* The type itself. */ typedef int pchr; /* What it promotes to. */ typedef unsigned uchr; /* Unsigned type that will hold a chr. */ typedef int celt; /* Type to hold chr, or NOCELT */ #define NOCELT (-1) /* Celt value which is not valid chr */ #define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */ #define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */ #if TCL_UTF_MAX > 3 #define CHRBITS 32 /* Bits in a chr; must not use sizeof */ #define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */ #define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ #else #define CHRBITS 16 /* Bits in a chr; must not use sizeof */ #define CHR_MIN 0x0000 /* Smallest and largest chr; the value */ #define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ |
︙ | ︙ |
Changes to jni/tcl/generic/tcl.h.
︙ | ︙ | |||
2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 | * is the default and recommended mode. UCS-4 is experimental and not * recommended. It works for the core, but most extensions expect UCS-2. */ #ifndef TCL_UTF_MAX #define TCL_UTF_MAX 3 #endif /* * This represents a Unicode character. Any changes to this should also be * reflected in regcustom.h. */ | > > > | | 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 | * is the default and recommended mode. UCS-4 is experimental and not * recommended. It works for the core, but most extensions expect UCS-2. */ #ifndef TCL_UTF_MAX #define TCL_UTF_MAX 3 #endif #if (TCL_UTF_MAX != 3) && (TCL_UTF_MAX != 4) #error TCL_UTF_MAX must be 3 or 4 #endif /* * This represents a Unicode character. Any changes to this should also be * reflected in regcustom.h. */ #if TCL_UTF_MAX > 3 /* * unsigned int isn't 100% accurate as it should be a strict 4-byte value * (perhaps wchar_t). 64-bit systems may have troubles. The size of this * value must be reflected correctly in regcustom.h and * in tclEncoding.c. * XXX: Tcl is currently UCS-2 and planning UTF-16 for the Unicode * XXX: string rep that Tcl_UniChar represents. Changing the size |
︙ | ︙ |
Changes to jni/tcl/generic/tclCompCmdsSZ.c.
︙ | ︙ | |||
1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 | TclAdvanceLines(&bline, tokenPtr->start, tokenPtr->start + tokenPtr->size); count++; continue; case TCL_TOKEN_BS: length = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, buf); literal = TclRegisterNewLiteral(envPtr, buf, length); TclEmitPush(literal, envPtr); count++; continue; case TCL_TOKEN_VARIABLE: /* * Check for simple variable access; see if we can only generate | > > > > > > > > > > > > > > > > > | 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 | TclAdvanceLines(&bline, tokenPtr->start, tokenPtr->start + tokenPtr->size); count++; continue; case TCL_TOKEN_BS: length = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, buf); #if TCL_UTF_MAX > 3 if (TokenAfter(tokenPtr) < endTokenPtr) { /* * Try to collapse a surrogate pair into a single code point * by inspecting the following token. This makes the * notations "\ud83d\ude02" and "\U0001F602" equivalent. */ int newLength = TclCollapseSurrogatePair(TokenAfter(tokenPtr), NULL,buf); if (newLength > 0) { length = newLength; tokenPtr = TokenAfter(tokenPtr); } } #endif literal = TclRegisterNewLiteral(envPtr, buf, length); TclEmitPush(literal, envPtr); count++; continue; case TCL_TOKEN_VARIABLE: /* * Check for simple variable access; see if we can only generate |
︙ | ︙ |
Changes to jni/tcl/generic/tclCompile.c.
︙ | ︙ | |||
2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 | TclAdvanceLines(&envPtr->line, tokenPtr->start, tokenPtr->start + tokenPtr->size); break; case TCL_TOKEN_BS: length = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, buffer); Tcl_DStringAppend(&textBuffer, buffer, length); /* * If the backslash sequence we found is in a literal, and * represented a continuation line, we compute and store its * location (as char offset to the beginning of the _result_ * script). We may have to extend the table of locations. | > > > > > > > > > > > > > > > > > > | 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 | TclAdvanceLines(&envPtr->line, tokenPtr->start, tokenPtr->start + tokenPtr->size); break; case TCL_TOKEN_BS: length = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, buffer); #if TCL_UTF_MAX > 3 if (count > 0) { /* * Try to collapse a surrogate pair into a single code point * by inspecting the following token. This makes the * notations "\ud83d\ude02" and "\U0001F602" equivalent. */ int newLength = TclCollapseSurrogatePair(tokenPtr + 1, NULL, buffer); if (newLength > 0) { Tcl_DStringAppend(&textBuffer, buffer, newLength); tokenPtr++; count--; break; } } #endif Tcl_DStringAppend(&textBuffer, buffer, length); /* * If the backslash sequence we found is in a literal, and * represented a continuation line, we compute and store its * location (as char offset to the beginning of the _result_ * script). We may have to extend the table of locations. |
︙ | ︙ |
Changes to jni/tcl/generic/tclDisassemble.c.
︙ | ︙ | |||
889 890 891 892 893 894 895 | i += 2; continue; case '\v': Tcl_AppendToObj(appendObj, "\\v", -1); i += 2; continue; default: | | < < < < < < < < < < < | 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 | i += 2; continue; case '\v': Tcl_AppendToObj(appendObj, "\\v", -1); i += 2; continue; default: #if TCL_UTF_MAX > 3 if (ch > 0xffff) { Tcl_AppendPrintfToObj(appendObj, "\\U%08x", ch); i += 10; } else #endif if (ch < 0x20 || ch >= 0x7f) { Tcl_AppendPrintfToObj(appendObj, "\\u%04x", ch); i += 6; } else { Tcl_AppendPrintfToObj(appendObj, "%c", ch); |
︙ | ︙ |
Changes to jni/tcl/generic/tclEncoding.c.
︙ | ︙ | |||
230 231 232 233 234 235 236 | int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int TableToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static size_t unilen(const char *src); | | > > | | 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int TableToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static size_t unilen(const char *src); #if TCL_UTF_MAX > 3 static size_t unilen4(const char *src); #endif static int UnicodeToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int UtfToUnicodeProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); #if TCL_UTF_MAX > 3 static int UtfToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr, int pureNullMode); #endif static int UtfIntToUtfExtProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int UtfExtToUtfIntProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int Iso88591FromUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int Iso88591ToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); #if TCL_UTF_MAX > 3 static int Utf16ToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int UtfToUtf16Proc(ClientData clientData, const char *src, int srcLen, int flags, |
︙ | ︙ | |||
589 590 591 592 593 594 595 | type.toUtfProc = UtfExtToUtfIntProc; type.fromUtfProc = UtfIntToUtfExtProc; type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; Tcl_CreateEncoding(&type); | | | | | 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 | type.toUtfProc = UtfExtToUtfIntProc; type.fromUtfProc = UtfIntToUtfExtProc; type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; Tcl_CreateEncoding(&type); #if TCL_UTF_MAX > 3 type.encodingName = "utf-32"; #else type.encodingName = "unicode"; #endif type.toUtfProc = UnicodeToUtfProc; type.fromUtfProc = UtfToUnicodeProc; type.freeProc = NULL; #if TCL_UTF_MAX > 3 type.nullSize = 4; #else type.nullSize = 2; #endif type.clientData = NULL; Tcl_CreateEncoding(&type); #if TCL_UTF_MAX > 3 type.encodingName = "unicode"; type.toUtfProc = Utf16ToUtfProc; type.fromUtfProc = UtfToUtf16Proc; type.freeProc = NULL; type.nullSize = 2; type.clientData = NULL; Tcl_CreateEncoding(&type); |
︙ | ︙ | |||
1100 1101 1102 1103 1104 1105 1106 | encodingPtr->toUtfProc = typePtr->toUtfProc; encodingPtr->fromUtfProc = typePtr->fromUtfProc; encodingPtr->freeProc = typePtr->freeProc; encodingPtr->nullSize = typePtr->nullSize; encodingPtr->clientData = typePtr->clientData; if (typePtr->nullSize == 1) { encodingPtr->lengthProc = (LengthProc *) strlen; | | | 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 | encodingPtr->toUtfProc = typePtr->toUtfProc; encodingPtr->fromUtfProc = typePtr->fromUtfProc; encodingPtr->freeProc = typePtr->freeProc; encodingPtr->nullSize = typePtr->nullSize; encodingPtr->clientData = typePtr->clientData; if (typePtr->nullSize == 1) { encodingPtr->lengthProc = (LengthProc *) strlen; #if TCL_UTF_MAX > 3 } else if (typePtr->nullSize == 4) { encodingPtr->lengthProc = (LengthProc *) unilen4; #endif } else { encodingPtr->lengthProc = (LengthProc *) unilen; } encodingPtr->refCount = 1; |
︙ | ︙ | |||
1364 1365 1366 1367 1368 1369 1370 | while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); if (result != TCL_CONVERT_NOSPACE) { | | > > | 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 | while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); if (result != TCL_CONVERT_NOSPACE) { if (encodingPtr->nullSize == 4) { Tcl_DStringSetLength(dstPtr, soFar + 3); } else if (encodingPtr->nullSize == 2) { Tcl_DStringSetLength(dstPtr, soFar + 1); } Tcl_DStringSetLength(dstPtr, soFar); return Tcl_DStringValue(dstPtr); } flags &= ~TCL_ENCODING_START; |
︙ | ︙ | |||
1463 1464 1465 1466 1467 1468 1469 | dstCharsPtr = &dstChars; } dstLen -= encodingPtr->nullSize; result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); | | > > > > | 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 | dstCharsPtr = &dstChars; } dstLen -= encodingPtr->nullSize; result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); if (encodingPtr->nullSize == 4) { dst[*dstWrotePtr + 1] = '\0'; dst[*dstWrotePtr + 2] = '\0'; dst[*dstWrotePtr + 3] = '\0'; } else if (encodingPtr->nullSize == 2) { dst[*dstWrotePtr + 1] = '\0'; } dst[*dstWrotePtr] = '\0'; return result; } |
︙ | ︙ | |||
2200 2201 2202 2203 2204 2205 2206 | memcpy(dst, src, (size_t) srcLen); return result; } /* *------------------------------------------------------------------------- * | | | | | 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 | memcpy(dst, src, (size_t) srcLen); return result; } /* *------------------------------------------------------------------------- * * UtfIntToUtfExtProc -- * * Convert from (U|W)TF-8 to UTF-8. While converting null-bytes * from Tcl's internal representation (0xc0, 0x80) to the official * representation (0x00). See UtfToUtfProc for details. * * Results: * Returns TCL_OK if conversion was successful. * * Side effects: * None. |
︙ | ︙ | |||
2242 2243 2244 2245 2246 2247 2248 2249 2250 | int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr, 1); | > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 | int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { #if TCL_UTF_MAX > 3 /* UTF-8 to UTF-8 */ return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr, 1); #else /* WTF-8 to UTF-8 */ const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; Tcl_UniChar ch; result = TCL_OK; srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if ((flags & TCL_ENCODING_END) == 0) { srcClose -= 3; } if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } dstStart = dst; dstEnd = dst + dstLen - 4; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } if (UCHAR(*src) == 0xc0 && (src + 1 < srcEnd) && UCHAR(*(src+1)) == 0x80) { *dst++ = 0; src += 2; } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { ch = (unsigned char) *src; src += 1; dst += Tcl_UniCharToUtf(ch, dst); } else { int ch2, len; src += Tcl_UtfToUniChar(src, &ch); ch2 = ch; if ((ch & 0xFC00) == 0xD800) { if (Tcl_UtfCharComplete(src, srcEnd - src)) { len = Tcl_UtfToUniChar(src, &ch); if ((ch & 0xFC00) == 0xDC00) { src += len; ch2 = (((ch2&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; } } } dst += TclUniCharToUtfExt(ch2, dst); } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; #endif } /* *------------------------------------------------------------------------- * * UtfExtToUtfIntProc -- * * Convert from UTF-8 to (U|W)TF-8 while converting null-bytes from the * official representation (0x00) to Tcl's internal representation (0xc0, * 0x80). See UtfToUtfProc for details. * * Results: * Returns TCL_OK if conversion was successful. * * Side effects: |
︙ | ︙ | |||
2291 2292 2293 2294 2295 2296 2297 2298 2299 | int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr, 0); | > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 | int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { #if TCL_UTF_MAX > 3 /* UTF-8 to UTF-8 */ return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr, 0); #else /* UTF-8 to WTF-8 */ const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int ch, result, numChars, charLimit = INT_MAX; result = TCL_OK; srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if ((flags & TCL_ENCODING_END) == 0) { srcClose -= 4; } if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if ((src > srcClose) && (!TclUtfCharCompleteExt(src, srcEnd - src))) { result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } if (UCHAR(*src) < 0x80 && UCHAR(*src) != 0) { *dst++ = *src++; } else if (!TclUtfCharCompleteExt(src, srcEnd - src)) { ch = (unsigned char) *src; src += 1; dst += Tcl_UniCharToUtf(ch, dst); } else { int len = TclUtfToUniCharExt(src, &ch); if (ch > 0xFFFF && ch <= 0x10FFFF) { int dstLen; ch -= 0x10000; dstLen = Tcl_UniCharToUtf((ch >> 10) | 0xD800, dst); ch = (ch & 0x3FF) | 0xDC00; if (dst + dstLen > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } dst += dstLen; numChars++; } src += len; dst += Tcl_UniCharToUtf(ch, dst); } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; #endif } #if TCL_UTF_MAX > 3 /* *------------------------------------------------------------------------- * * UtfToUtfProc -- * * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation * is not a no-op, because it will turn a stream of improperly formed |
︙ | ︙ | |||
2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 | */ ch = (unsigned char) *src; src += 1; dst += Tcl_UniCharToUtf(ch, dst); } else { src += Tcl_UtfToUniChar(src, &ch); dst += Tcl_UniCharToUtf(ch, dst); } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } /* *------------------------------------------------------------------------- * * UnicodeToUtfProc -- * * Convert from Unicode to UTF-8. | > > > > > > > > > > > > > > > > | 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 | */ ch = (unsigned char) *src; src += 1; dst += Tcl_UniCharToUtf(ch, dst); } else { src += Tcl_UtfToUniChar(src, &ch); #if TCL_UTF_MAX > 3 /* * Collapse surrogate pairs in both directions. */ if ((ch & 0xFFFFC00) == 0xD800 && Tcl_UtfCharComplete(src, srcEnd - src)) { int ch2, len = Tcl_UtfToUniChar(src, &ch2); if ((ch2 & 0xFFFFFC00) == 0xDC00) { src += len; ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000; } } #endif dst += Tcl_UniCharToUtf(ch, dst); } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } #endif /* *------------------------------------------------------------------------- * * UnicodeToUtfProc -- * * Convert from Unicode to UTF-8. |
︙ | ︙ | |||
2585 2586 2587 2588 2589 2590 2591 | /* * Need to handle this in a way that won't cause misalignment by * casting dst to a Tcl_UniChar. [Bug 1122671] */ #ifdef WORDS_BIGENDIAN | | | | 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 | /* * Need to handle this in a way that won't cause misalignment by * casting dst to a Tcl_UniChar. [Bug 1122671] */ #ifdef WORDS_BIGENDIAN #if TCL_UTF_MAX > 3 *dst++ = (ch >> 24); *dst++ = ((ch >> 16) & 0xFF); *dst++ = ((ch >> 8) & 0xFF); *dst++ = (ch & 0xFF); #else *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); #endif #else #if TCL_UTF_MAX > 3 *dst++ = (ch & 0xFF); *dst++ = ((ch >> 8) & 0xFF); *dst++ = ((ch >> 16) & 0xFF); *dst++ = (ch >> 24); #else *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); |
︙ | ︙ | |||
3037 3038 3039 3040 3041 3042 3043 | *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } | | | 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 | *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } #if TCL_UTF_MAX > 3 /* *------------------------------------------------------------------------- * * Utf16ToUtfProc -- * * Convert from UTF-16 to UTF-8. * |
︙ | ︙ | |||
3131 3132 3133 3134 3135 3136 3137 | *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } #endif | | | 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 | *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } #endif #if TCL_UTF_MAX > 3 /* *------------------------------------------------------------------------- * * UtfToUtf16Proc -- * * Convert from UTF-8 to UTF-16. * |
︙ | ︙ | |||
3829 3830 3831 3832 3833 3834 3835 | p = (unsigned short *) src; while (*p != 0x0000) { p++; } return (char *) p - src; } | | | 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 | p = (unsigned short *) src; while (*p != 0x0000) { p++; } return (char *) p - src; } #if TCL_UTF_MAX > 3 static size_t unilen4( const char *src) { unsigned int *p; p = (unsigned int *) src; |
︙ | ︙ |
Changes to jni/tcl/generic/tclInt.h.
︙ | ︙ | |||
3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 | MODULE_SCOPE int TclZlibInit(Tcl_Interp *interp); MODULE_SCOPE void * TclpThreadCreateKey(void); MODULE_SCOPE void TclpThreadDeleteKey(void *keyPtr); MODULE_SCOPE void TclpThreadSetMasterTSD(void *tsdKeyPtr, void *ptr); MODULE_SCOPE void * TclpThreadGetMasterTSD(void *tsdKeyPtr); MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp, const char *msg, int length); /* *---------------------------------------------------------------- * Command procedures in the generic core: *---------------------------------------------------------------- */ | > > > > > > > > > | 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 | MODULE_SCOPE int TclZlibInit(Tcl_Interp *interp); MODULE_SCOPE void * TclpThreadCreateKey(void); MODULE_SCOPE void TclpThreadDeleteKey(void *keyPtr); MODULE_SCOPE void TclpThreadSetMasterTSD(void *tsdKeyPtr, void *ptr); MODULE_SCOPE void * TclpThreadGetMasterTSD(void *tsdKeyPtr); MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp, const char *msg, int length); #if TCL_UTF_MAX > 3 MODULE_SCOPE int TclCollapseSurrogatePair(Tcl_Token *tokenPtr, int *numReadPtr, char *buffer); #else MODULE_SCOPE int TclUniCharToUtfExt(int ch, char *buf); MODULE_SCOPE int TclUtfToUniCharExt(const char *src, int *chPtr); MODULE_SCOPE int TclUtfCharCompleteExt(const char *src, int len); #endif /* *---------------------------------------------------------------- * Command procedures in the generic core: *---------------------------------------------------------------- */ |
︙ | ︙ |
Changes to jni/tcl/generic/tclMain.c.
︙ | ︙ | |||
65 66 67 68 69 70 71 | /* * Further on, in UNICODE mode we just use Tcl_NewUnicodeObj, otherwise * NewNativeObj is needed (which provides proper conversion from native * encoding to UTF-8). */ | | | | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | /* * Further on, in UNICODE mode we just use Tcl_NewUnicodeObj, otherwise * NewNativeObj is needed (which provides proper conversion from native * encoding to UTF-8). */ #if defined(UNICODE) && (TCL_UTF_MAX <= 3) # define NewNativeObj Tcl_NewUnicodeObj #else /* !UNICODE || (TCL_UTF_MAX > 3) */ static inline Tcl_Obj * NewNativeObj( TCHAR *string, int length) { Tcl_DString ds; |
︙ | ︙ |
Changes to jni/tcl/generic/tclParse.c.
︙ | ︙ | |||
783 784 785 786 787 788 789 | { int result = 0; register const char *p = src; while (numBytes--) { unsigned char digit = UCHAR(*p); | | | 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 | { int result = 0; register const char *p = src; while (numBytes--) { unsigned char digit = UCHAR(*p); if (!isxdigit(digit)) { break; } p++; result <<= 4; if (digit >= 'a') { |
︙ | ︙ | |||
925 926 927 928 929 930 931 932 933 934 935 936 937 938 | case 'U': count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); if (count == 2) { /* * No hexadigits -> This is just "U". */ result = 'U'; } break; case '\n': count--; do { p++; count++; | > > > > > > > > > > > > > | 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 | case 'U': count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); if (count == 2) { /* * No hexadigits -> This is just "U". */ result = 'U'; } else { /* * Check Unicode range. */ #if TCL_UTF_MAX > 3 if ((result < 0) || (result > 0x10FFFF)) { result = 0xFFFD; } #else if ((result < 0) || (result > 0xFFFF)) { result = 0xFFFD; } #endif } break; case '\n': count--; do { p++; count++; |
︙ | ︙ | |||
2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 | appendByteLength = tokenPtr->size; break; case TCL_TOKEN_BS: appendByteLength = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, utfCharBytes); append = utfCharBytes; /* * If the backslash sequence we found is in a literal, and * represented a continuation line, we compute and store its * location (as char offset to the beginning of the _result_ * script). We may have to extend the table of locations. * * Note that the continuation line information is relevant even if | > > > > > > > > > > > > > > > > > > | 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 | appendByteLength = tokenPtr->size; break; case TCL_TOKEN_BS: appendByteLength = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, utfCharBytes); append = utfCharBytes; #if TCL_UTF_MAX > 3 if (count > 0) { /* * Try to collapse a surrogate pair into a single code point * by inspecting the following token. This makes the * notations "\ud83d\ude02" and "\U0001F602" equivalent. */ int newLength = TclCollapseSurrogatePair(tokenPtr + 1, NULL, utfCharBytes); if (newLength > 0) { appendByteLength = newLength; tokenPtr++; count--; break; } } #endif /* * If the backslash sequence we found is in a literal, and * represented a continuation line, we compute and store its * location (as char offset to the beginning of the _result_ * script). We may have to extend the table of locations. * * Note that the continuation line information is relevant even if |
︙ | ︙ | |||
2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 | * check. */ { int length; const char *script = Tcl_GetStringFromObj(objPtr, &length); return CommandComplete(script, length); } /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 | * check. */ { int length; const char *script = Tcl_GetStringFromObj(objPtr, &length); return CommandComplete(script, length); } #if TCL_UTF_MAX > 3 /* *---------------------------------------------------------------------- * * TclCollapseSurrogatePair -- * * If possible, a following TCL_TOKEN_BS giving the second in * a Unicode surrogate pair is collapsed to a single code point. * * Results: * 0, if surrogate pair not combined to single code point, * otherwise the length of the UTF representation stored to * buffer. * * Side effects: * The buffer variable is overwritten with a new UTF sequence * if the combination was successful. * If numReadPtr is not NULL, it receives the number of bytes * consumed by the conversion of the backslash sequence. * *---------------------------------------------------------------------- */ int TclCollapseSurrogatePair( Tcl_Token *tokenPtr, /* Pointer to token to be checked. */ int *numReadPtr, /* Pointer to number of consumed input chars. */ char *buffer) /* Buffer holding UTF data of previous token. */ { int count, numRead; Tcl_UniChar ch, ch2; char buffer2[TCL_UTF_MAX]; Tcl_UtfToUniChar(buffer, &ch); if ((ch <= 0xFFFF) && ((ch & 0xFC00) == 0xD800)) { if (tokenPtr->type == TCL_TOKEN_BS) { count = TclParseBackslash(tokenPtr->start, tokenPtr->size, &numRead, buffer2); if (count <= 0) { return 0; } Tcl_UtfToUniChar(buffer2, &ch2); if ((ch2 <= 0xFFFF) && ((ch2 & 0xFC00) == 0xDC00)) { ch = ((ch & 0x3FF) << 10) + (ch2 & 0x3FF); ch += 0x10000; if (numReadPtr != NULL) { *numReadPtr = numRead; } return Tcl_UniCharToUtf(ch, buffer); } } } return 0; } #endif /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */ |
Changes to jni/tcl/generic/tclUtf.c.
︙ | ︙ | |||
72 73 74 75 76 77 78 79 80 81 82 83 84 85 | 4,4,4,4,4,4,4,4, #else 1,1,1,1,1,1,1,1, #endif 1,1,1,1,1,1,1,1 }; /* * Functions used only in this module. */ static int UtfCount(int ch); /* | > > > > > > > > > > > > > > > > > | 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | 4,4,4,4,4,4,4,4, #else 1,1,1,1,1,1,1,1, #endif 1,1,1,1,1,1,1,1 }; #if TCL_UTF_MAX <= 3 static const unsigned char totalBytesExt[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 1,1,1,1,1,1,1,1 }; #endif /* * Functions used only in this module. */ static int UtfCount(int ch); /* |
︙ | ︙ | |||
150 151 152 153 154 155 156 | if (ch >= 0) { if (ch <= 0x7FF) { buf[1] = (char) ((ch | 0x80) & 0xBF); buf[0] = (char) ((ch >> 6) | 0xC0); return 2; } if (ch <= 0xFFFF) { | > > > | < | < | | > > | > | > | | > | | | | | | > | > > > > > > > > > > > | | > > > > > | | > > < > > | 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | if (ch >= 0) { if (ch <= 0x7FF) { buf[1] = (char) ((ch | 0x80) & 0xBF); buf[0] = (char) ((ch >> 6) | 0xC0); return 2; } if (ch <= 0xFFFF) { goto three; } #if TCL_UTF_MAX > 3 if (ch <= 0x10FFFF) { buf[3] = (char) ((ch | 0x80) & 0xBF); buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); buf[0] = (char) ((ch >> 18) | 0xF0); return 4; } #endif } ch = 0xFFFD; three: buf[2] = (char) ((ch | 0x80) & 0xBF); buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); buf[0] = (char) ((ch >> 12) | 0xE0); return 3; } #if TCL_UTF_MAX <= 3 int TclUniCharToUtfExt( int ch, /* The Tcl_UniChar to be stored in the * buffer. */ char *buf) /* Buffer in which the UTF-8 representation of * the Tcl_UniChar is stored. Buffer must be * large enough to hold the UTF-8 character * (at most 4 bytes). */ { if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) { buf[0] = (char) ch; return 1; } if (ch >= 0) { if (ch <= 0x7FF) { buf[1] = (char) ((ch | 0x80) & 0xBF); buf[0] = (char) ((ch >> 6) | 0xC0); return 2; } if (ch <= 0xFFFF) { goto three; } if (ch <= 0x10FFFF) { buf[3] = (char) ((ch | 0x80) & 0xBF); buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); buf[0] = (char) ((ch >> 18) | 0xF0); return 4; } } ch = 0xFFFD; three: buf[2] = (char) ((ch | 0x80) & 0xBF); buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); buf[0] = (char) ((ch >> 12) | 0xE0); return 3; } #endif /* *--------------------------------------------------------------------------- * * Tcl_UniCharToUtfDString -- * * Convert the given Unicode string to UTF-8. |
︙ | ︙ | |||
330 331 332 333 334 335 336 | *chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); return 4; } /* | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 | *chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); return 4; } /* * A four-byte-character lead-byte not followed by three trail-bytes * represents itself. */ } #endif *chPtr = (Tcl_UniChar) byte; return 1; } #if TCL_UTF_MAX <= 3 int TclUtfToUniCharExt( const char *src, /* The UTF-8 string. */ int *chPtr) /* Filled with the Unicode represented by * the UTF-8 string. */ { int byte; byte = *((unsigned char *) src); if (byte < 0xC0) { *chPtr = byte; return 1; } else if (byte < 0xE0) { if ((src[1] & 0xC0) == 0x80) { *chPtr = ((byte & 0x1F) << 6) | (src[1] & 0x3F); return 2; } } else if (byte < 0xF0) { if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) { *chPtr = ((byte & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F); return 3; } } else if (byte < 0xF8) { if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { *chPtr = ((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F); return 4; } } *chPtr = byte; return 1; } #endif /* *--------------------------------------------------------------------------- * * Tcl_UtfToUniCharDString -- * * Convert the UTF-8 string to Unicode. |
︙ | ︙ | |||
429 430 431 432 433 434 435 436 437 438 439 440 441 442 | int length) /* Length of above string in bytes. */ { int ch; ch = *((unsigned char *) src); return length >= totalBytes[ch]; } /* *--------------------------------------------------------------------------- * * Tcl_NumUtfChars -- * * Returns the number of characters (not bytes) in the UTF-8 string, not | > > > > > > > > > > > > > > > > | 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 | int length) /* Length of above string in bytes. */ { int ch; ch = *((unsigned char *) src); return length >= totalBytes[ch]; } #if TCL_UTF_MAX <= 3 int TclUtfCharCompleteExt( const char *src, /* String to check if first few bytes contain * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { int ch; ch = *((unsigned char *) src); return length >= totalBytesExt[ch]; } #endif /* *--------------------------------------------------------------------------- * * Tcl_NumUtfChars -- * * Returns the number of characters (not bytes) in the UTF-8 string, not |
︙ | ︙ |
Changes to jni/tcl/generic/tclUtil.c.
︙ | ︙ | |||
798 799 800 801 802 803 804 | while (count > 0) { char c = *src; if (c == '\\') { int numRead; int backslashCount = TclParseBackslash(src, count, &numRead, dst); | > > > > > > | > > > > > > > > > > > > | | | > > > > > | 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 | while (count > 0) { char c = *src; if (c == '\\') { int numRead; int backslashCount = TclParseBackslash(src, count, &numRead, dst); src += numRead; count -= numRead; #if TCL_UTF_MAX > 3 if (*src == '\\') { Tcl_Token token; int length; /* * Try to collapse a surrogate pair into a single code point * by inspecting the following token. This makes the * notations "\ud83d\ude02" and "\U0001F602" equivalent. */ token.type = TCL_TOKEN_BS; token.start = src; token.size = count; token.numComponents = 0; length = TclCollapseSurrogatePair(&token, &numRead, dst); if (length > 0) { backslashCount = length; src += numRead; count -= numRead; } } #endif dst += backslashCount; newCount += backslashCount; } else { *dst = c; dst++; newCount++; src++; count--; } |
︙ | ︙ |
Changes to jni/tcl/pkgs/tdbcodbc1.0.4/generic/tdbcodbc.c.
︙ | ︙ | |||
778 779 780 781 782 783 784 | ) { int i; char buf[TCL_UTF_MAX]; for (i = 0; i < len; ++i) { int bytes, ch; ch = (int) ws[i]; | | | 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 | ) { int i; char buf[TCL_UTF_MAX]; for (i = 0; i < len; ++i) { int bytes, ch; ch = (int) ws[i]; #if TCL_UTF_MAX > 3 /* Assumes that SQLWCHAR is 16 bit */ if ((ch >= 0xd800) && (ch <= 0xdbff)) { if (i + 1 < len) { int ch2 = (int) ws[i+1]; if ((ch2 >= 0xdc00) && (ch2 <= 0xdfff)) { ch = ((ch & 0x3ff) << 10) + 0x10000 + (ch2 & 0x3ff); |
︙ | ︙ | |||
828 829 830 831 832 833 834 | /* UTF-8 representation of the input string */ char* end = bytes + len; /* End of UTF-8 representation */ SQLWCHAR* retval; /* Buffer to hold the converted string */ SQLWCHAR* wcPtr; Tcl_UniChar ch = 0; len = (len + 1) * sizeof(SQLWCHAR); | | | | 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 | /* UTF-8 representation of the input string */ char* end = bytes + len; /* End of UTF-8 representation */ SQLWCHAR* retval; /* Buffer to hold the converted string */ SQLWCHAR* wcPtr; Tcl_UniChar ch = 0; len = (len + 1) * sizeof(SQLWCHAR); #if TCL_UTF_MAX > 3 /* Assumes that SQLWCHAR is 16 bit, doubled space for surrogates */ len *= 2; #endif retval = wcPtr = (SQLWCHAR*) ckalloc(len); while (bytes < end) { bytes += Tcl_UtfToUniChar(bytes, &ch); #if TCL_UTF_MAX > 3 if (ch > 0x10ffff) { ch = 0xfffd; } if (ch > 0xffff) { *wcPtr++ = (((ch - 0x10000) >> 10) & 0x3ff) | 0xd800; ch = ((ch - 0x10000) & 0x3ff) | 0xdc00; } |
︙ | ︙ |
Changes to jni/tcl/win/tclWinInit.c.
︙ | ︙ | |||
483 484 485 486 487 488 489 | const WCHAR *wSrc, char *dst) { char *start; start = dst; while (*wSrc != '\0') { | | | 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 | const WCHAR *wSrc, char *dst) { char *start; start = dst; while (*wSrc != '\0') { #if TCL_UTF_MAX > 3 Tcl_UniChar ch = *wSrc; if ((ch & 0xF800) == 0xD800) { if (ch & 0x0400) { /* Low surrogate */ dst[3] = (char) ((ch | 0x80) & 0xBF); dst[2] |= (char) (((ch >> 6) | 0x80) & 0x8F); |
︙ | ︙ |