Index: jni/rl_json/generic/parser.c ================================================================== --- jni/rl_json/generic/parser.c +++ jni/rl_json/generic/parser.c @@ -147,23 +147,18 @@ // test, but 0x00 is transformed to 0xC0 0x80 by Tcl (MUTF-8 rather than UTF-8) return CHAR_ADVANCE_UNESCAPED_NULL; } if (first < 0xe0 /* 0b11100000 */) { eat = 1; -#if TCL_UTF_MAX == 3 - } else { - eat = 2; -#else } else if (first < 0xf0 /* 0b11110000 */) { eat = 2; } else if (first < 0xf8 /* 0b11111000 */) { eat = 3; } else if (first < 0xfc /* 0b11111100 */) { eat = 4; } else { eat = 5; -#endif } *p += eat; *char_adj += eat; } @@ -318,11 +313,11 @@ append_mapped: Tcl_AppendToObj(out, &mapped, 1); // Weird, but arranged this way the compiler optimizes it to a jump table break; case 'u': { - Tcl_UniChar acc = 0; + unsigned int acc = 0; char utfbuf[6]; int i=4, digit; if (unlikely(e-p-2 < i)) { // -2 is for the "u" and the close quote err_at = e-1; @@ -352,14 +347,13 @@ } acc <<= 4; acc += digit; } -#if TCL_UTF_MAX > 4 if ((acc & 0xFFFFFC00) == 0xD800 && p[1] == '\\' && p[2] == 'u') { const unsigned char *pp = p + 2; - Tcl_UniChar acc2=0; + unsigned int acc2=0; i=4; if (unlikely(e-pp-2 < i)) { // -2 is for the "u" and the close quote err_at = e-1; if (err_at < e && *err_at != '"') @@ -394,10 +388,18 @@ if ((acc2 & 0xFFFFFC00) == 0xDC00) { acc = ((acc & 0x3FF) << 10) + 0x10000 + (acc2 & 0x3FF); p = pp; } } +#if TCL_UTF_MAX == 3 + if (acc > 0xFFFF) { + acc -= 0x10000; + Tcl_UniChar uch = ((acc >> 10) & 0x3FF) | 0xD800; + acc = (acc & 0x3FF) | 0xDC00; + const int len0 = Tcl_UniCharToUtf(uch, utfbuf); + Tcl_AppendToObj(out, utfbuf, len0); + } #endif //const unsigned char* utfend = output_utf8(acc, utfbuf); const int len = Tcl_UniCharToUtf(acc, utfbuf); Tcl_AppendToObj(out, utfbuf, len); } Index: jni/sdl2tk/generic/tkCanvText.c ================================================================== --- jni/sdl2tk/generic/tkCanvText.c +++ jni/sdl2tk/generic/tkCanvText.c @@ -1022,11 +1022,29 @@ byteIndex = Tcl_UtfAtIndex(text, index) - text; byteCount = strlen(string); if (byteCount == 0) { return; } +#if TCL_UTF_MAX < 4 + /* + * Don't insert within surrogate pairs. + */ + if (byteIndex) { + Tcl_UniChar ch; + const char *prevPtr; + Tcl_UtfToUniChar(string + byteIndex, &ch); + if ((ch & 0xfc00) == 0xdc00) { + prevPtr = Tcl_UtfPrev(string + byteIndex, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xfc00) == 0xd800) { + byteIndex = prevPtr - string; + index -= 1; + } + } + } +#endif newStr = (char *) ckalloc(textPtr->numBytes + byteCount + 1); memcpy(newStr, text, byteIndex); strcpy(newStr + byteIndex, string); strcpy(newStr + byteIndex + byteCount, text + byteIndex); @@ -1101,12 +1119,58 @@ return; } charsRemoved = last + 1 - first; byteIndex = Tcl_UtfAtIndex(text, first) - text; +#if TCL_UTF_MAX < 4 + /* + * Delete complete surrogate pairs. + */ + if (byteIndex >= 0) { + Tcl_UniChar ch; + const char *prevPtr, *nextPtr; + + Tcl_UtfToUniChar(text + byteIndex, &ch); + if ((ch & 0xfc00) == 0xdc00) { + prevPtr = Tcl_UtfPrev(text + byteIndex, text); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xfc00) == 0xd800) { + byteIndex = prevPtr - text; + first--; + } + } else if (charsRemoved == 1 && (ch & 0xfc00) == 0xd800) { + nextPtr = Tcl_UtfNext(text + byteIndex); + Tcl_UtfToUniChar(nextPtr, &ch); + if ((ch & 0xfc00) == 0xdc00) { + last++; + } + } + if (last >= textPtr->numChars) { + last = textPtr->numChars - 1; + } + charsRemoved = last + 1 - first; + } +#endif byteCount = Tcl_UtfAtIndex(text + byteIndex, charsRemoved) - (text + byteIndex); +#if TCL_UTF_MAX < 4 + if (byteCount) { + int len; + Tcl_UniChar ch; + const char *prevPtr; + + len = Tcl_UtfToUniChar(text + byteIndex + byteCount, &ch); + if ((ch & 0xfc00) == 0xdc00) { + prevPtr = Tcl_UtfPrev(text + byteIndex + byteCount, text); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xfc00) == 0xd800) { + byteCount += len; + charsRemoved++; + } + } + } +#endif newStr = (char *) ckalloc(textPtr->numBytes + 1 - byteCount); memcpy(newStr, text, byteIndex); strcpy(newStr + byteIndex, text + byteIndex + byteCount); @@ -1342,10 +1406,14 @@ TextItem *textPtr = (TextItem *) itemPtr; int length; int c; Tk_CanvasTextInfo *textInfoPtr = textPtr->textInfoPtr; const char *string = Tcl_GetStringFromObj(obj, &length); +#if TCL_UTF_MAX < 4 + int roundUp = 0; + int oldInsertPos = textPtr->insertPos; +#endif c = string[0]; if ((c == 'e') && (strncmp(string, "end", length) == 0)) { *indexPtr = textPtr->numChars; @@ -1396,10 +1464,13 @@ if (*indexPtr < 0) { *indexPtr = 0; } else if (*indexPtr > textPtr->numChars) { *indexPtr = textPtr->numChars; } +#if TCL_UTF_MAX < 4 + roundUp = (*indexPtr == (oldInsertPos + 1)); +#endif } else { /* * Some of the paths here leave messages in the interp's result, so we * have to clear it out before storing our own message. */ @@ -1407,10 +1478,41 @@ badIndex: Tcl_SetObjResult(interp, Tcl_ObjPrintf("bad index \"%s\"", string)); Tcl_SetErrorCode(interp, "TK", "CANVAS", "ITEM_INDEX", "TEXT", NULL); return TCL_ERROR; } +#if TCL_UTF_MAX < 4 + /* + * Enforce index on start or end of surrogate pair. + */ + if (*indexPtr) { + int byteIndex; + Tcl_UniChar ch; + const char *prevPtr, *nextPtr; + + string = textPtr->text; + byteIndex = Tcl_UtfAtIndex(string, *indexPtr) - string; + Tcl_UtfToUniChar(string + byteIndex, &ch); + if ((ch & 0xfc00) == 0xdc00) { + if (roundUp) { + *indexPtr += 1; + } else { + prevPtr = Tcl_UtfPrev(string + byteIndex, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xfc00) == 0xd800) { + *indexPtr -= 1; + } + } + } else if (roundUp && (ch & 0xfc00) == 0xd800) { + nextPtr = Tcl_UtfNext(string + byteIndex); + Tcl_UtfToUniChar(nextPtr, &ch); + if ((ch & 0xfc00) == 0xdc00) { + *indexPtr += 2; + } + } + } +#endif return TCL_OK; } /* *-------------------------------------------------------------- Index: jni/sdl2tk/generic/tkEntry.c ================================================================== --- jni/sdl2tk/generic/tkEntry.c +++ jni/sdl2tk/generic/tkEntry.c @@ -1984,23 +1984,23 @@ * If we're displaying a special character instead of the value of the * entry, recompute the displayString. */ if (entryPtr->showChar != NULL) { - Tcl_UniChar ch; - char buf[TCL_UTF_MAX]; + int ch; + char buf[6]; int size; /* * Normalize the special character so we can safely duplicate it in * the display string. If we didn't do this, then two malformed * characters might end up looking like one valid UTF character in the * resulting string. */ - Tcl_UtfToUniChar(entryPtr->showChar, &ch); - size = Tcl_UniCharToUtf(ch, buf); + TkUtfToUniChar(entryPtr->showChar, &ch); + size = TkUniCharToUtf(ch, buf); entryPtr->numDisplayBytes = entryPtr->numChars * size; p = (char *) ckalloc(entryPtr->numDisplayBytes + 1); entryPtr->displayString = p; @@ -2161,17 +2161,40 @@ ptrdiff_t byteIndex; size_t byteCount, newByteCount; int oldChars, charsAdded; const char *string; char *newStr; +#if TCL_UTF_MAX < 4 + Tcl_UniChar ch; +#endif string = entryPtr->string; byteIndex = Tcl_UtfAtIndex(string, index) - string; byteCount = strlen(value); if (byteCount == 0) { return TCL_OK; } + +#if TCL_UTF_MAX < 4 + /* + * Don't insert within surrogate pairs. + */ + if (byteIndex) { + Tcl_UniChar ch; + const char *prevPtr; + + Tcl_UtfToUniChar(string + byteIndex, &ch); + if ((ch & 0xFC00) == 0xDC00) { + prevPtr = Tcl_UtfPrev(string + byteIndex, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xFC00) == 0xD800) { + byteIndex = prevPtr - string; + index -= 1; + } + } + } +#endif newByteCount = entryPtr->numBytes + byteCount + 1; newStr = (char *) ckalloc(newByteCount); memcpy(newStr, string, byteIndex); strcpy(newStr + byteIndex, value); @@ -2182,10 +2205,32 @@ EntryValidateChange(entryPtr, value, newStr, index, VALIDATE_INSERT) != TCL_OK) { ckfree(newStr); return TCL_OK; } + +#if TCL_UTF_MAX < 4 + /* + * Account for high surrogate at end of inserted string. + */ + if (byteCount > 1) { + const char *lastPtr = Tcl_UtfPrev(newStr+byteIndex+byteCount, newStr); + + Tcl_UtfToUniChar(lastPtr, &ch); + if ((ch & 0xFC00) == 0xD800) { + /* + * A high surrogate. If followed by low surrogate, + * adjust index after it. + */ + lastPtr = Tcl_UtfNext(lastPtr); + Tcl_UtfToUniChar(lastPtr, &ch); + if ((ch & 0xFC00) == 0xDC00) { + index++; + } + } + } +#endif ckfree((char *)string); entryPtr->string = newStr; /* @@ -2268,11 +2313,60 @@ return TCL_OK; } string = entryPtr->string; byteIndex = Tcl_UtfAtIndex(string, index) - string; + +#if TCL_UTF_MAX < 4 + /* + * Delete complete surrogate pairs. + */ + if (byteIndex >= 0) { + Tcl_UniChar ch; + const char *prevPtr, *nextPtr; + + Tcl_UtfToUniChar(string + byteIndex, &ch); + if ((ch & 0xFC00) == 0xDC00) { + prevPtr = Tcl_UtfPrev(string + byteIndex, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xFC00) == 0xD800) { + byteIndex = prevPtr - string; + count++; + index--; + } + } else if ((count == 1) && ((ch & 0xFC00) == 0xD800)) { + nextPtr = Tcl_UtfNext(string + byteIndex); + Tcl_UtfToUniChar(nextPtr, &ch); + if ((ch & 0xFC00) == 0xDC00) { + count++; + } + } + if ((index + count) > entryPtr->numChars) { + count = entryPtr->numChars - index; + } + } +#endif + byteCount = Tcl_UtfAtIndex(string + byteIndex, count) - (string+byteIndex); + +#if TCL_UTF_MAX < 4 + if (byteCount) { + int len; + Tcl_UniChar ch; + const char *prevPtr; + + len = Tcl_UtfToUniChar(string + byteIndex + byteCount, &ch); + if ((ch & 0xFC00) == 0xDC00) { + prevPtr = Tcl_UtfPrev(string + byteIndex + byteCount, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xFC00) == 0xD800) { + byteCount += len; + count++; + } + } + } +#endif newByteCount = entryPtr->numBytes + 1 - byteCount; newStr = (char *) ckalloc(newByteCount); memcpy(newStr, string, (size_t) byteIndex); strcpy(newStr + byteIndex, string + byteIndex + byteCount); @@ -2675,10 +2769,14 @@ Tcl_Obj *indexObj, /* Specifies character in entryPtr. */ int *indexPtr) /* Where to store converted character index */ { const char *string = Tcl_GetString(indexObj); size_t length = indexObj->length; + int roundUp = 0; +#if TCL_UTF_MAX < 4 + int oldInsertPos = entryPtr->insertPos; +#endif switch (string[0]) { case 'a': if (strncmp(string, "anchor", length) != 0) { goto badIndex; @@ -2718,19 +2816,18 @@ } else { goto badIndex; } break; case '@': { - int x, roundUp, maxWidth; + int x, maxWidth; if (Tcl_GetInt(NULL, string + 1, &x) != TCL_OK) { goto badIndex; } if (x < entryPtr->inset) { x = entryPtr->inset; } - roundUp = 0; maxWidth = Tk_Width(entryPtr->tkwin) - entryPtr->inset - entryPtr->xWidth - 1; if (x > maxWidth) { x = maxWidth; roundUp = 1; @@ -2757,11 +2854,42 @@ if (*indexPtr < 0){ *indexPtr = 0; } else if (*indexPtr > entryPtr->numChars) { *indexPtr = entryPtr->numChars; } +#if TCL_UTF_MAX < 4 + roundUp = (indexPtr == &entryPtr->insertPos) && + (*indexPtr == (oldInsertPos + 1)); +#endif + } + +#if TCL_UTF_MAX < 4 + /* + * Enforce index on start or end of surrogate pair. + */ + if (*indexPtr) { + int byteIndex; + Tcl_UniChar ch; + const char *prevPtr; + + string = entryPtr->string; + byteIndex = Tcl_UtfAtIndex(string, *indexPtr) - string; + Tcl_UtfToUniChar(string + byteIndex, &ch); + if ((ch & 0xFC00) == 0xDC00) { + if (roundUp) { + *indexPtr += 1; + } else { + prevPtr = Tcl_UtfPrev(string + byteIndex, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xFC00) == 0xD800) { + *indexPtr -= 1; + } + } + } } +#endif + return TCL_OK; badIndex: Tcl_SetObjResult(interp, Tcl_ObjPrintf("bad %s index \"%s\"", (entryPtr->type == TK_ENTRY) ? "entry" : "spinbox", string)); @@ -2871,10 +2999,31 @@ if (entryPtr->selectAnchor > entryPtr->numChars) { entryPtr->selectAnchor = entryPtr->numChars; } if (entryPtr->selectAnchor <= index) { +#if TCL_UTF_MAX < 4 + /* + * Correct ending point for surrogate pair. + */ + + Tcl_UniChar ch; + const char *string; + + string = Tcl_UtfAtIndex(entryPtr->string, index); + string += Tcl_UtfToUniChar(string, &ch); + if (((ch & 0xFC00) == 0xDC00) && (index + 1 < entryPtr->numChars)) { + index += 1; + } else if (((ch & 0xFC00) == 0xD800) && + (index + 1 < entryPtr->numChars) && + (index == entryPtr->insertPos)) { + Tcl_UtfToUniChar(string, &ch); + if ((ch & 0xFC00) == 0xDC00) { + index += 2; + } + } +#endif newFirst = entryPtr->selectAnchor; newLast = index; } else { newFirst = index; newLast = entryPtr->selectAnchor; @@ -3548,11 +3697,11 @@ { int spaceNeeded, cvtFlags; /* Used to substitute string as proper Tcl * list element. */ int number, length; const char *string; - Tcl_UniChar ch; + int ch; char numStorage[2*TCL_INTEGER_SPACE]; while (1) { if (*before == '\0') { break; @@ -3581,11 +3730,11 @@ * There's a percent sequence here. Process it. */ before++; /* skip over % */ if (*before != '\0') { - before += Tcl_UtfToUniChar(before, &ch); + before += TkUtfToUniChar(before, &ch); } else { ch = '%'; } if (type == VALIDATE_BUTTON) { /* @@ -3601,11 +3750,11 @@ break; case 'W': /* widget name */ string = Tk_PathName(entryPtr->tkwin); break; default: - length = Tcl_UniCharToUtf(ch, numStorage); + length = TkUniCharToUtf(ch, numStorage); numStorage[length] = '\0'; string = numStorage; break; } } else { @@ -3661,11 +3810,11 @@ break; case 'W': /* widget name */ string = Tk_PathName(entryPtr->tkwin); break; default: - length = Tcl_UniCharToUtf(ch, numStorage); + length = TkUniCharToUtf(ch, numStorage); numStorage[length] = '\0'; string = numStorage; break; } } @@ -4161,11 +4310,11 @@ if (GetEntryIndex(interp, entryPtr, objv[3], &index) != TCL_OK) { goto error; } if (GetEntryIndex(interp, entryPtr, - objv[4],& index2) != TCL_OK) { + objv[4], &index2) != TCL_OK) { goto error; } if (index >= index2) { entryPtr->selectFirst = -1; entryPtr->selectLast = -1; Index: jni/sdl2tk/generic/tkFont.c ================================================================== --- jni/sdl2tk/generic/tkFont.c +++ jni/sdl2tk/generic/tkFont.c @@ -562,11 +562,11 @@ case FONT_ACTUAL: { int skip, result, n; const char *s; Tk_Font tkfont; Tcl_Obj *optPtr, *charPtr, *resultPtr; - Tcl_UniChar uniChar = 0; + int uniChar = 0; const TkFontAttributes *faPtr; TkFontAttributes fa; /* * Params 0 and 1 are 'font actual'. Param 2 is the font name. 3-4 may @@ -627,21 +627,23 @@ /* * The 'charPtr' arg must be a single Unicode. */ if (charPtr != NULL) { - if (Tcl_GetCharLength(charPtr) != 1) { + const char *string = Tcl_GetString(charPtr); + int len = TkUtfToUniChar(string, &uniChar); + + if (len != charPtr->length) { resultPtr = Tcl_NewStringObj( "expected a single character but got \"", -1); - Tcl_AppendLimitedToObj(resultPtr, Tcl_GetString(charPtr), + Tcl_AppendLimitedToObj(resultPtr, string, -1, 40, "..."); Tcl_AppendToObj(resultPtr, "\"", -1); Tcl_SetObjResult(interp, resultPtr); Tcl_SetErrorCode(interp, "TK", "VALUE", "FONT_SAMPLE", NULL); return TCL_ERROR; } - uniChar = Tcl_GetUniChar(charPtr, 0); } /* * Find the font. */ @@ -1805,11 +1807,11 @@ } else if (strcasecmp(family, "ZapfChancery") == 0) { family = "ZapfChancery"; } else if (strcasecmp(family, "ZapfDingbats") == 0) { family = "ZapfDingbats"; } else { - Tcl_UniChar ch; + int ch; /* * Inline, capitalize the first letter of each word, lowercase the * rest of the letters in each word, and then take out the spaces * between the words. This may make the DString shorter, which is safe @@ -1823,18 +1825,18 @@ for (; *src != '\0'; ) { while (isspace(UCHAR(*src))) { /* INTL: ISO space */ src++; upper = 1; } - src += Tcl_UtfToUniChar(src, &ch); + src += TkUtfToUniChar(src, &ch); if (upper) { ch = Tcl_UniCharToUpper(ch); upper = 0; } else { ch = Tcl_UniCharToLower(ch); } - dest += Tcl_UniCharToUtf(ch, dest); + dest += TkUniCharToUtf(ch, dest); } *dest = '\0'; Tcl_DStringSetLength(dsPtr, dest - Tcl_DStringValue(dsPtr)); family = Tcl_DStringValue(dsPtr) + len; } @@ -2864,11 +2866,11 @@ Tk_MeasureChars(tkfont, chunkPtr->start, end - chunkPtr->start, -1, 0, &x); x += chunkPtr->x; } if (widthPtr != NULL) { - Tk_MeasureChars(tkfont, end, Tcl_UtfNext(end) - end, + Tk_MeasureChars(tkfont, end, TkUtfNext(end) - end, -1, 0, &w); } goto check; } index -= chunkPtr->numChars; @@ -3387,11 +3389,11 @@ int baseline = chunkPtr->y; Tcl_Obj *psObj = Tcl_NewObj(); int i, j, len; const char *p, *glyphname; char uindex[5], c, *ps; - Tcl_UniChar ch; + int ch; Tcl_AppendToObj(psObj, "[(", -1); for (i = 0; i < layoutPtr->numChunks; i++, chunkPtr++) { if (baseline != chunkPtr->y) { Tcl_AppendToObj(psObj, ")]\n[(", -1); @@ -3410,11 +3412,11 @@ * from the standard set defined by Adobe. The rest get punted. * Eventually this should be revised to handle more sophsticiated * international postscript fonts. */ - p += Tcl_UtfToUniChar(p, &ch); + p += TkUtfToUniChar(p, &ch); if ((ch == '(') || (ch == ')') || (ch == '\\') || (ch < 0x20)) { /* * Tricky point: the "03" is necessary in the sprintf below, * so that a full three digits of octal are always generated. * Without the "03", a number following this sequence could be Index: jni/sdl2tk/generic/tkInt.h ================================================================== --- jni/sdl2tk/generic/tkInt.h +++ jni/sdl2tk/generic/tkInt.h @@ -1323,10 +1323,23 @@ #endif #ifdef TK_USE_POLL MODULE_SCOPE TkDisplay * TkGetDisplayListExt(struct pollfd **pollTablePtr); #endif + +#if TCL_UTF_MAX > 3 +# define TkUtfToUniChar(src, chPtr) \ + Tcl_UtfToUniChar((src), (Tcl_UniChar *)(chPtr)) +# define TkUniCharToUtf Tcl_UniCharToUtf +# define TkUtfPrev Tcl_UtfPrev +# define TkUtfNext Tcl_UtfNext +#else + MODULE_SCOPE int TkUtfToUniChar(const char *, int *); + MODULE_SCOPE int TkUniCharToUtf(int, char *); + MODULE_SCOPE const char *TkUtfPrev(const char *, const char *); + MODULE_SCOPE const char *TkUtfNext(const char *); +#endif /* * Unsupported commands. */ Index: jni/sdl2tk/generic/tkSelect.c ================================================================== --- jni/sdl2tk/generic/tkSelect.c +++ jni/sdl2tk/generic/tkSelect.c @@ -1292,11 +1292,28 @@ * selection. */ Tcl_Interp *interp, /* Interpreter used for error reporting (not * used). */ const char *portion) /* New information to be appended. */ { +#if TCL_UTF_MAX < 4 + /* + * Ensure WTF-8 without 4-byte UTF-8 sequences. + */ + Tcl_Encoding encoding; + Tcl_DString tmp; + + encoding = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_ExternalToUtfDString(encoding, portion, -1, &tmp); + Tcl_DStringAppend((Tcl_DString *) clientData, Tcl_DStringValue(&tmp), + Tcl_DStringLength(&tmp)); + Tcl_DStringFree(&tmp); + if (encoding) { + Tcl_FreeEncoding(encoding); + } +#else Tcl_DStringAppend((Tcl_DString *) clientData, portion, -1); +#endif return TCL_OK; } /* *---------------------------------------------------------------------- @@ -1398,11 +1415,11 @@ if (cmdInfoPtr->interp != NULL) { if (length <= maxBytes) { cmdInfoPtr->charOffset += Tcl_NumUtfChars(string, -1); cmdInfoPtr->buffer[0] = '\0'; } else { - Tcl_UniChar ch = 0; + Tcl_UniChar ch; p = string; string += count; numChars = 0; while (p < string) { Index: jni/sdl2tk/generic/tkText.c ================================================================== --- jni/sdl2tk/generic/tkText.c +++ jni/sdl2tk/generic/tkText.c @@ -4449,11 +4449,11 @@ { int objc, i, count; Tcl_Obj **objv; TkTextTabArray *tabArrayPtr; TkTextTab *tabPtr; - Tcl_UniChar ch; + int ch; double prevStop, lastStop; /* * Map these strings to TkTextTabAlign values. */ static const char *const tabOptionStrings[] = { @@ -4557,11 +4557,11 @@ /* * There may be a more efficient way of getting this. */ - Tcl_UtfToUniChar(Tcl_GetString(objv[i+1]), &ch); + TkUtfToUniChar(Tcl_GetString(objv[i+1]), &ch); if (!Tcl_UniCharIsAlpha(ch)) { continue; } i += 1; @@ -5872,11 +5872,11 @@ int maxExtraLines = 0; const char *startOfLine = Tcl_GetString(theLine); CLANG_ASSERT(pattern); do { - Tcl_UniChar ch; + int ch; const char *p; int lastFullLine = lastOffset; if (firstNewLine == -1) { if (searchSpecPtr->strictLimits @@ -6106,11 +6106,11 @@ if (alreadySearchOffset < 0) { break; } } else { firstOffset = p - startOfLine + - Tcl_UtfToUniChar(startOfLine+matchOffset,&ch); + TkUtfToUniChar(startOfLine+matchOffset,&ch); } } } while (searchSpecPtr->all); } else { int maxExtraLines = 0; Index: jni/sdl2tk/generic/tkTextDisp.c ================================================================== --- jni/sdl2tk/generic/tkTextDisp.c +++ jni/sdl2tk/generic/tkTextDisp.c @@ -7707,12 +7707,12 @@ chunkPtr->x, maxX, TK_ISOLATE_END, &nextX); #endif /* TK_LAYOUT_WITH_BASE_CHUNKS */ if (bytesThatFit < maxBytes) { if ((bytesThatFit == 0) && noCharsYet) { - Tcl_UniChar ch; - int chLen = Tcl_UtfToUniChar(p, &ch); + int ch; + int chLen = TkUtfToUniChar(p, &ch); #ifdef TK_LAYOUT_WITH_BASE_CHUNKS bytesThatFit = CharChunkMeasureChars(chunkPtr, line, lineOffset+chLen, lineOffset, -1, chunkPtr->x, -1, 0, &nextX); Index: jni/sdl2tk/generic/tkTextIndex.c ================================================================== --- jni/sdl2tk/generic/tkTextIndex.c +++ jni/sdl2tk/generic/tkTextIndex.c @@ -385,11 +385,11 @@ TkTextIndex *indexPtr) /* Structure to fill in. */ { TkTextSegment *segPtr; int index; const char *p, *start; - Tcl_UniChar ch = 0; + Tcl_UniChar ch; indexPtr->tree = tree; if (lineIndex < 0) { lineIndex = 0; byteIndex = 0; @@ -478,11 +478,11 @@ TkTextIndex *indexPtr) /* Structure to fill in. */ { TkTextSegment *segPtr; char *p, *start, *end; int index, offset; - Tcl_UniChar ch = 0; + Tcl_UniChar ch; indexPtr->tree = tree; if (lineIndex < 0) { lineIndex = 0; charIndex = 0; @@ -1550,11 +1550,11 @@ TkTextLine *linePtr; TkTextSegment *segPtr; TkTextElideInfo *infoPtr = NULL; int byteOffset; char *start, *end, *p; - Tcl_UniChar ch = 0; + Tcl_UniChar ch; int elide = 0; int checkElided = (type & COUNT_DISPLAY); if (charCount < 0) { TkTextIndexBackChars(textPtr, srcPtr, -charCount, dstPtr, type); @@ -1650,10 +1650,23 @@ if (!elide) { if (segPtr->typePtr == &tkTextCharType) { start = segPtr->body.chars + byteOffset; end = segPtr->body.chars + segPtr->size; for (p = start; p < end; p += Tcl_UtfToUniChar(p, &ch)) { +#if TCL_UTF_MAX < 4 + if (((ch & 0xFC00) == 0xD800) && (p < end)) { + char *pp = p; + + pp += Tcl_UtfToUniChar(pp, &ch); + if ((ch & 0xFC00) == 0xDC00) { + p = pp; + if (charCount > 0) { + charCount--; + } + } + } +#endif if (charCount == 0) { dstPtr->byteIndex += (p - start); goto forwardCharDone; } charCount--; @@ -2186,10 +2199,28 @@ if (!elide) { if (segPtr->typePtr == &tkTextCharType) { start = segPtr->body.chars; end = segPtr->body.chars + segSize; for (p = end; ; p = Tcl_UtfPrev(p, start)) { +#if TCL_UTF_MAX < 4 + Tcl_UniChar ch; + + Tcl_UtfToUniChar(p, &ch); + if (((ch & 0xFC00) == 0xDC00) && (p > start)) { + const char *pp = Tcl_UtfPrev(p, start); + + if (pp != NULL) { + Tcl_UtfToUniChar(pp, &ch); + if ((ch & 0xFC00) == 0xD800) { + p = pp; + if (charCount > 0) { + charCount--; + } + } + } + } +#endif if (charCount == 0) { dstPtr->byteIndex -= (end - p); goto backwardCharDone; } if (p == start) { @@ -2373,13 +2404,13 @@ segPtr = TkTextIndexToSeg(indexPtr, &offset); while (1) { int chSize = 1; if (segPtr->typePtr == &tkTextCharType) { - Tcl_UniChar ch = 0; + int ch; - chSize = Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch); + chSize = TkUtfToUniChar(segPtr->body.chars + offset, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } firstChar = 0; } @@ -2418,19 +2449,19 @@ segPtr = TkTextIndexToSeg(indexPtr, &offset); while (1) { int chSize = 1; if (segPtr->typePtr == &tkTextCharType) { - Tcl_UniChar ch = 0; + int ch; - Tcl_UtfToUniChar(segPtr->body.chars + offset, &ch); + TkUtfToUniChar(segPtr->body.chars + offset, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } if (offset > 0) { chSize = (segPtr->body.chars + offset - - Tcl_UtfPrev(segPtr->body.chars + offset, + - TkUtfPrev(segPtr->body.chars + offset, segPtr->body.chars)); } firstChar = 0; } if (offset == 0) { Index: jni/sdl2tk/generic/tkUtil.c ================================================================== --- jni/sdl2tk/generic/tkUtil.c +++ jni/sdl2tk/generic/tkUtil.c @@ -1191,10 +1191,166 @@ Tcl_IncrRefCount(detail); } Tk_QueueWindowEvent(&event.general, TCL_QUEUE_TAIL); } + +#if TCL_UTF_MAX < 4 +/* + *--------------------------------------------------------------------------- + * + * TkUtfToUniChar -- + * + * Almost the same as Tcl_UtfToUniChar but using int instead of Tcl_UniChar. + * This function is capable of collapsing a upper/lower surrogate pair to a + * single unicode character. So, up to 6 bytes might be consumed. + * + * Results: + * *chPtr is filled with the Tcl_UniChar, and the return value is the + * number of bytes from the UTF-8 string that were consumed. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +int +TkUtfToUniChar( + const char *src, /* The UTF-8 string. */ + int *chPtr) /* Filled with the Tcl_UniChar represented by + * the UTF-8 string. */ +{ + Tcl_UniChar uniChar = 0; + int len = Tcl_UtfToUniChar(src, &uniChar); + + if ((uniChar & 0xfc00) == 0xd800) { + int high = uniChar; + int len2 = Tcl_UtfToUniChar(src+len, &uniChar); + + if ((uniChar & 0xfc00) == 0xdc00) { + *chPtr = (((high & 0x3ff) << 10) | (uniChar & 0x3ff)) + 0x10000; + len += len2; + } else { + *chPtr = high; + } + } else { + *chPtr = uniChar; + } + return len; +} + +/* + *--------------------------------------------------------------------------- + * + * TkUniCharToUtf -- + * + * Almost the same as Tcl_UniCharToUtf but producing surrogates if + * TCL_UTF_MAX==3. So, up to 6 bytes might be produced. + * + * Results: + * *buf is filled with the UTF-8 string, and the return value is the + * number of bytes produced. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +int +TkUniCharToUtf( + int ch, + char *buf) +{ + int size; + + if ((ch > 0xffff) && (ch <= 0x10ffff)) { + ch -= 0x10000; + size = Tcl_UniCharToUtf(((ch >> 10) | 0xd800), buf); + size += Tcl_UniCharToUtf(((ch & 0x3ff) | 0xdc00), buf+size); + } else { + size = Tcl_UniCharToUtf(ch, buf); + } + return size; +} + +/* + *--------------------------------------------------------------------------- + * + * TkUtfPrev -- + * + * Almost the same as Tcl_UtfPrev but check for surrogates if + * TCL_UTF_MAX==3. So, the pointer might move up to 6 bytes. + * + * Results: + * Return a pointer to the previous unicode character. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +const char * +TkUtfPrev( + const char *start, + const char *source) +{ + const char *p = Tcl_UtfPrev(start, source); + + if ((p == source-3) && ((p[0]&0xFF) == 0xED) + && ((p[1]&0xF0) == 0xB0) && ((p[2]&0xC0) == 0x80)) { + /* We are pointing to a low surrogate. If the previous + * codepoint is a high surrogate, we want that in stead. */ + const char *q = p - 3; + + if ((q >= start) && ((q[0]&0xFF) == 0xED) + && ((q[1]&0xF0) == 0xA0) && ((q[2]&0xC0) == 0x80)) { + p = q; + } + } + return p; +} + +/* + *--------------------------------------------------------------------------- + * + * TkUtfNext -- + * + * Almost the same as Tcl_UtfNext but check for surrogates if + * TCL_UTF_MAX==3. So, the pointer might move up to 6 bytes. + * + * Results: + * Return a pointer to the next unicode character. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +const char * +TkUtfNext( + const char *source) +{ + const char *p = Tcl_UtfNext(source); + + if ((p == source+3) && ((source[0]&0xFF) == 0xED) + && ((source[1]&0xF0) == 0xA0) && ((source[2]&0xC0) == 0x80)) { + /* We were pointing to a high surrogate. If the next + * codepoint is a low surrogate, we want to advance one more. */ + if (((p[0]&0xFF) == 0xED) && ((p[1]&0xF0) == 0xB0) + && ((p[2]&0xC0) == 0x80)) { + + p += 3; + } + } + return p; +} + +#endif /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 Index: jni/sdl2tk/generic/ttk/ttkEntry.c ================================================================== --- jni/sdl2tk/generic/ttk/ttkEntry.c +++ jni/sdl2tk/generic/ttk/ttkEntry.c @@ -291,15 +291,15 @@ */ static char *EntryDisplayString(const char *showChar, int numChars) { char *displayString, *p; int size; - Tcl_UniChar ch; - char buf[TCL_UTF_MAX]; + int ch; + char buf[6]; - Tcl_UtfToUniChar(showChar, &ch); - size = Tcl_UniCharToUtf(ch, buf); + TkUtfToUniChar(showChar, &ch); + size = TkUniCharToUtf(ch, buf); p = displayString = ckalloc(numChars * size + 1); while (numChars--) { memcpy(p, buf, size); p += size; @@ -428,11 +428,11 @@ { int spaceNeeded, cvtFlags; int number, length; const char *string; int stringLength; - Tcl_UniChar ch; + int ch; char numStorage[2*TCL_INTEGER_SPACE]; while (*templ) { /* Find everything up to the next % character and append it * to the result string. @@ -452,11 +452,11 @@ /* There's a percent sequence here. Process it. */ ++templ; /* skip over % */ if (*templ != '\0') { - templ += Tcl_UtfToUniChar(templ, &ch); + templ += TkUtfToUniChar(templ, &ch); } else { ch = '%'; } stringLength = -1; @@ -502,11 +502,11 @@ break; case 'W': /* widget name */ string = Tk_PathName(entryPtr->core.tkwin); break; default: - length = Tcl_UniCharToUtf(ch, numStorage); + length = TkUniCharToUtf(ch, numStorage); numStorage[length] = '\0'; string = numStorage; break; } @@ -842,22 +842,67 @@ size_t byteCount = strlen(value); int charsAdded = Tcl_NumUtfChars(value, byteCount); size_t newByteCount = entryPtr->entry.numBytes + byteCount + 1; char *newBytes; int code; +#if TCL_UTF_MAX < 4 + Tcl_UniChar ch; +#endif if (byteCount == 0) { return TCL_OK; } + +#if TCL_UTF_MAX < 4 + /* + * Don't insert within surrogate pairs. + */ + if (byteIndex) { + const char *prevPtr; + + Tcl_UtfToUniChar(string + byteIndex, &ch); + if ((ch & 0xFC00) == 0xDC00) { + prevPtr = Tcl_UtfPrev(string + byteIndex, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xFC00) == 0xD800) { + byteIndex = prevPtr - string; + index -= 1; + } + } + } +#endif newBytes = (char *) ckalloc(newByteCount); memcpy(newBytes, string, byteIndex); strcpy(newBytes + byteIndex, value); strcpy(newBytes + byteIndex + byteCount, string + byteIndex); code = EntryValidateChange( entryPtr, newBytes, index, charsAdded, VALIDATE_INSERT); + +#if TCL_UTF_MAX < 4 + /* + * Account for high surrogate at end of inserted string. + */ + if (byteCount > 1) { + const char *lastPtr = Tcl_UtfPrev(newBytes+byteIndex+byteCount, newBytes); + + Tcl_UtfToUniChar(lastPtr, &ch); + if ((ch & 0xFC00) == 0xD800) { + /* + * A high surrogate. If followed by low surrogate, + * adjust index after it. + */ + lastPtr = Tcl_UtfNext(lastPtr); + Tcl_UtfToUniChar(lastPtr, &ch); + if ((ch & 0xFC00) == 0xDC00) { + index++; + charsAdded++; + } + } + } +#endif if (code == TCL_OK) { AdjustIndices(entryPtr, index, charsAdded); code = EntrySetValue(entryPtr, newBytes); } else if (code == TCL_BREAK) { @@ -879,10 +924,15 @@ { char *string = entryPtr->entry.string; size_t byteIndex, byteCount, newByteCount; char *newBytes; int code; +#if TCL_UTF_MAX < 4 + Tcl_UniChar ch; + int len; + const char *prevPtr, *nextPtr; +#endif if (index < 0) { index = 0; } if (count + index > entryPtr->entry.numChars) { @@ -891,11 +941,51 @@ if (count <= 0) { return TCL_OK; } byteIndex = Tcl_UtfAtIndex(string, index) - string; + +#if TCL_UTF_MAX < 4 + /* + * Delete complete surrogate pairs. + */ + Tcl_UtfToUniChar(string + byteIndex, &ch); + if ((ch & 0xFC00) == 0xDC00) { + prevPtr = Tcl_UtfPrev(string + byteIndex, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xFC00) == 0xD800) { + byteIndex = prevPtr - string; + count++; + index--; + } + } else if ((count == 1) && ((ch & 0xFC00) == 0xD800)) { + nextPtr = Tcl_UtfNext(string + byteIndex); + Tcl_UtfToUniChar(nextPtr, &ch); + if ((ch & 0xFC00) == 0xDC00) { + count++; + } + } + if (count > entryPtr->entry.numChars - index) { + count = entryPtr->entry.numChars - index; + } +#endif + byteCount = Tcl_UtfAtIndex(string+byteIndex, count) - (string+byteIndex); + +#if TCL_UTF_MAX < 4 + if (byteCount) { + len = Tcl_UtfToUniChar(string + byteIndex + byteCount, &ch); + if ((ch & 0xFC00) == 0xDC00) { + prevPtr = Tcl_UtfPrev(string + byteIndex + byteCount, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xFC00) == 0xD800) { + byteCount += len; + count++; + } + } + } +#endif newByteCount = entryPtr->entry.numBytes + 1 - byteCount; newBytes = (char *) ckalloc(newByteCount); memcpy(newBytes, string, byteIndex); strcpy(newBytes + byteIndex, string + byteIndex + byteCount); @@ -1391,10 +1481,14 @@ int *indexPtr) /* Return value */ { # define EntryWidth(e) (Tk_Width(entryPtr->core.tkwin)) /* Not Right */ const char *string = Tcl_GetString(indexObj); size_t length = indexObj->length; + int roundUp = 0; +#if TCL_UTF_MAX < 4 + int oldPos = entryPtr->entry.insertPos; +#endif if (strncmp(string, "end", length) == 0) { *indexPtr = entryPtr->entry.numChars; } else if (strncmp(string, "insert", length) == 0) { *indexPtr = entryPtr->entry.insertPos; @@ -1416,11 +1510,10 @@ *indexPtr = entryPtr->entry.selectLast; } else { goto badIndex; } } else if (string[0] == '@') { - int roundUp = 0; int maxWidth = EntryWidth(entryPtr); int x; if (Tcl_GetInt(interp, string + 1, &x) != TCL_OK) { goto badIndex; @@ -1454,11 +1547,42 @@ if (*indexPtr < 0) { *indexPtr = 0; } else if (*indexPtr > entryPtr->entry.numChars) { *indexPtr = entryPtr->entry.numChars; } +#if TCL_UTF_MAX < 4 + roundUp = (indexPtr == &entryPtr->entry.insertPos) && + (*indexPtr == (oldPos + 1)); +#endif + } + +#if TCL_UTF_MAX < 4 + /* + * Enforce index on start or end of surrogate pair. + */ + if (*indexPtr) { + int byteIndex; + Tcl_UniChar ch; + const char *prevPtr; + + string = entryPtr->entry.string; + byteIndex = Tcl_UtfAtIndex(string, *indexPtr) - string; + Tcl_UtfToUniChar(string + byteIndex, &ch); + if ((ch & 0xFC00) == 0xDC00) { + if (roundUp) { + *indexPtr += 1; + } else { + prevPtr = Tcl_UtfPrev(string + byteIndex, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xFC00) == 0xD800) { + *indexPtr -= 1; + } + } + } } +#endif + return TCL_OK; badIndex: Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad entry index \"%s\"", string)); Index: jni/sdl2tk/library/entry.tcl ================================================================== --- jni/sdl2tk/library/entry.tcl +++ jni/sdl2tk/library/entry.tcl @@ -83,11 +83,10 @@ tk::EntryMouseSelect %W %x } bind Entry { set tk::Priv(selectMode) word tk::EntryMouseSelect %W %x - catch {%W icursor sel.last} } bind Entry { set tk::Priv(selectMode) line tk::EntryMouseSelect %W %x catch {%W icursor sel.last} @@ -438,27 +437,38 @@ } word { if {$cur < $anchor} { set before [tcl_wordBreakBefore [$w get] $cur] set after [tcl_wordBreakAfter [$w get] $anchor-1] + set icur before } elseif {$cur > $anchor} { set before [tcl_wordBreakBefore [$w get] $anchor] set after [tcl_wordBreakAfter [$w get] $cur-1] + set icur after } else { if {[$w index @$Priv(pressX)] < $anchor} { incr anchor -1 } set before [tcl_wordBreakBefore [$w get] $anchor] set after [tcl_wordBreakAfter [$w get] $anchor] + set icur after } if {$before < 0} { set before 0 } if {$after < 0} { set after end } $w selection range $before $after + if {$icur eq "after"} { + $w icursor end + $w icursor $after + } else { + $w icursor 0 + $w icursor $before + } + set Priv(mouseMoved) 0 } line { $w selection range 0 end } } Index: jni/sdl2tk/library/spinbox.tcl ================================================================== --- jni/sdl2tk/library/spinbox.tcl +++ jni/sdl2tk/library/spinbox.tcl @@ -499,21 +499,32 @@ } word { if {$cur < [$w index anchor]} { set before [tcl_wordBreakBefore [$w get] $cur] set after [tcl_wordBreakAfter [$w get] $anchor-1] + set icur before } else { set before [tcl_wordBreakBefore [$w get] $anchor] set after [tcl_wordBreakAfter [$w get] $cur-1] + set icur after } if {$before < 0} { set before 0 } if {$after < 0} { set after end } $w selection range $before $after + if {$icur eq "after"} { + $w icursor end + $w icursor $after + } else { + $w icursor 0 + $w icursor $before + } + set Priv(mouseMoved) 0 + set cursor ignore } line { $w selection range 0 end } } Index: jni/sdl2tk/library/ttk/entry.tcl ================================================================== --- jni/sdl2tk/library/ttk/entry.tcl +++ jni/sdl2tk/library/ttk/entry.tcl @@ -312,10 +312,18 @@ } ## RelIndex -- Compute character/word/line-relative index. # proc ttk::entry::RelIndex {w where {index insert}} { + if {$index eq "insert" && $where eq "nextchar"} { + # Needed for surrogate pairs: + set icur [$w index insert] + $w icursor [expr {$icur + 1}] + set ret [expr {[$w index insert]}] + $w icursor $icur + return $ret + } switch -- $where { prevchar { expr {[$w index $index] - 1} } nextchar { expr {[$w index $index] + 1} } prevword { PrevWord $w $index } nextword { NextWord $w $index } Index: jni/sdl2tk/macosx/tkMacOSXBitmap.c ================================================================== --- jni/sdl2tk/macosx/tkMacOSXBitmap.c +++ jni/sdl2tk/macosx/tkMacOSXBitmap.c @@ -250,51 +250,66 @@ Tcl_HashEntry *hPtr; Pixmap pixmap = None; NSString *string; NSImage *image = nil; NSSize size = { .width = builtInIconSize, .height = builtInIconSize }; + Tcl_Encoding utf8; + Tcl_DString ds; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_DStringInit(&ds); if (iconBitmapTable.buckets && (hPtr = Tcl_FindHashEntry(&iconBitmapTable, name))) { OSType type; IconBitmap *iconBitmap = (IconBitmap *) Tcl_GetHashValue(hPtr); name = NULL; size = NSMakeSize(iconBitmap->width, iconBitmap->height); switch (iconBitmap->kind) { case ICON_FILE: - string = [[NSString stringWithUTF8String:iconBitmap->value] + name = Tcl_UtfToExternalDString(utf8, iconBitmap->value, -1, &ds); + string = [[NSString stringWithUTF8String:name] stringByExpandingTildeInPath]; image = [[NSWorkspace sharedWorkspace] iconForFile:string]; + name = NULL; break; case ICON_FILETYPE: - string = [NSString stringWithUTF8String:iconBitmap->value]; + name = Tcl_UtfToExternalDString(utf8, iconBitmap->value, -1, &ds); + string = [NSString stringWithUTF8String:name]; image = [[NSWorkspace sharedWorkspace] iconForFileType:string]; + name = NULL; break; case ICON_OSTYPE: - if (OSTypeFromString(iconBitmap->value, &type) == TCL_OK) { + name = Tcl_UtfToExternalDString(utf8, iconBitmap->value, -1, &ds); + if (OSTypeFromString(name, &type) == TCL_OK) { string = NSFileTypeForHFSTypeCode(type); image = [[NSWorkspace sharedWorkspace] iconForFileType:string]; } + name = NULL; break; case ICON_SYSTEMTYPE: - name = iconBitmap->value; + name = Tcl_UtfToExternalDString(utf8, iconBitmap->value, -1, &ds); break; case ICON_NAMEDIMAGE: - string = [NSString stringWithUTF8String:iconBitmap->value]; + name = Tcl_UtfToExternalDString(utf8, iconBitmap->value, -1, &ds); + string = [NSString stringWithUTF8String:name]; image = [NSImage imageNamed:string]; + name = NULL; break; case ICON_IMAGEFILE: - string = [[NSString stringWithUTF8String:iconBitmap->value] + name = Tcl_UtfToExternalDString(utf8, iconBitmap->value, -1, &ds); + string = [[NSString stringWithUTF8String:name] stringByExpandingTildeInPath]; image = [[[NSImage alloc] initWithContentsOfFile:string] autorelease]; + name = NULL; break; } if (image) { [image setSize:size]; } } else { + name = Tcl_UtfToExternalDString(utf8, name, -1, &ds); string = [NSString stringWithUTF8String:name]; image = [NSImage imageNamed:string]; if (!image) { NSURL *url = [NSURL fileURLWithPath:string]; if (url) { @@ -317,10 +332,12 @@ pixmap = PixmapFromImage(display, iconImage, NSSizeToCGSize(size)); } } *width = (int) size.width; *height = (int) size.height; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); return pixmap; } /* *---------------------------------------------------------------------- Index: jni/sdl2tk/macosx/tkMacOSXCursor.c ================================================================== --- jni/sdl2tk/macosx/tkMacOSXCursor.c +++ jni/sdl2tk/macosx/tkMacOSXCursor.c @@ -229,11 +229,15 @@ NSString *path = nil; NSImage *image = nil; NSPoint hotSpot = NSZeroPoint; int haveHotSpot = 0, result = TCL_ERROR; NSCursor *macCursor = nil; + Tcl_Encoding utf8; + Tcl_DString ds; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + name = Tcl_UtfToExternalDString(utf8, name, -1, &ds); if (name[0] == '@') { /* * System cursor of type @filename */ @@ -352,10 +356,12 @@ hotSpot.y = -hotSpot.y; macCursor = [[NSCursor alloc] initWithImage:image hotSpot:hotSpot]; [image release]; } macCursorPtr->macCursor = macCursor; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); } /* *---------------------------------------------------------------------- * Index: jni/sdl2tk/macosx/tkMacOSXDialog.c ================================================================== --- jni/sdl2tk/macosx/tkMacOSXDialog.c +++ jni/sdl2tk/macosx/tkMacOSXDialog.c @@ -219,20 +219,32 @@ { FilePanelCallbackInfo *callbackInfo = (FilePanelCallbackInfo *) contextInfo; if (returnCode == modalOK) { Tcl_Obj *resultObj; + Tcl_Encoding utf8; + Tcl_DString ds; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); if (callbackInfo->multiple) { resultObj = Tcl_NewListObj(0, NULL); for (NSURL *url in [(NSOpenPanel*)panel URLs]) { + Tcl_ExternalToUtfDString(utf8, [[url path] UTF8String], -1, + &ds); Tcl_ListObjAppendElement(callbackInfo->interp, resultObj, - Tcl_NewStringObj([[url path] UTF8String], -1)); + Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); } } else { - resultObj = Tcl_NewStringObj([[[panel URL]path] UTF8String], -1); + Tcl_ExternalToUtfDString(utf8, + [[[panel URL]path] UTF8String], -1, &ds); + resultObj = Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); } + Tcl_FreeEncoding(utf8); if (callbackInfo->cmdObj) { Tcl_Obj **objv, **tmpv; int objc, result = Tcl_ListObjGetElements(callbackInfo->interp, callbackInfo->cmdObj, &objc, &objv); @@ -456,14 +468,21 @@ [colorPanel setContinuous:NO]; [colorPanel setBecomesKeyOnlyIfNeeded:NO]; [colorPanel setShowsAlpha: NO]; [colorPanel _setUseModalAppearance:YES]; if (title) { - NSString *s = [[NSString alloc] initWithUTF8String:title]; + NSString *s; + Tcl_Encoding utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_DString ds; + + title = Tcl_ExternalToUtfDString(utf8, title, -1, &ds); + s = [[NSString alloc] initWithUTF8String:title]; [colorPanel setTitle:s]; [s release]; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); } if (initialColor) { [colorPanel setColor:initialColor]; } returnCode = [NSApp runModalForWindow:colorPanel]; @@ -500,17 +519,17 @@ parseFileFilters( Tcl_Interp *interp, Tcl_Obj *fileTypesPtr, Tcl_Obj *typeVariablePtr) { + FileFilterList fl; if (!fileTypesPtr) { filterInfo.doFileTypes = false; return TCL_OK; } - FileFilterList fl; TkInitFileFilters(&fl); if (TkGetFileFilters(interp, &fl, fileTypesPtr, 0) != TCL_OK) { TkFreeFileFilters(&fl); return TCL_ERROR; @@ -526,39 +545,54 @@ filterInfo.allowedExtensions = [NSMutableArray array]; filterInfo.allowedExtensionsAllowAll = NO; if (filterInfo.doFileTypes) { + Tcl_Encoding utf8; + Tcl_DString ds; + const char *str; + + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_DStringInit(&ds); for (FileFilter *filterPtr = fl.filters; filterPtr; filterPtr = filterPtr->next) { - NSString *name = [[NSString alloc] initWithUTF8String: filterPtr->name]; + NSString *name; + NSMutableArray *clauseextensions; + NSMutableArray *displayextensions; + bool allowsAll = NO; + str = Tcl_UtfToExternalDString(utf8, filterPtr->name, -1, &ds); + name = [[NSString alloc] initWithUTF8String:str]; [filterInfo.fileTypeNames addObject:name]; [name release]; - NSMutableArray *clauseextensions = [NSMutableArray array]; - NSMutableArray *displayextensions = [NSMutableArray array]; - bool allowsAll = NO; + Tcl_DStringFree(&ds); + clauseextensions = [NSMutableArray array]; + displayextensions = [NSMutableArray array]; for (FileFilterClause *clausePtr = filterPtr->clauses; clausePtr; clausePtr = clausePtr->next) { for (GlobPattern *globPtr = clausePtr->patterns; globPtr; globPtr = globPtr->next) { - const char *str = globPtr->pattern; + str = globPtr->pattern; while (*str && (*str == '*' || *str == '.')) { str++; } if (*str) { - NSString *extension = [[NSString alloc] initWithUTF8String:str]; + NSString *extension; + + str = Tcl_UtfToExternalDString(utf8, str, -1, &ds); + extension = [[NSString alloc] initWithUTF8String:str]; if (![filterInfo.allowedExtensions containsObject:extension]) { [filterInfo.allowedExtensions addObject:extension]; } [clauseextensions addObject:extension]; [displayextensions addObject:[@"." stringByAppendingString:extension]]; [extension release]; + Tcl_DStringFree(&ds); } else { /* * It is the all pattern (*, .* or *.*) */ @@ -596,24 +630,27 @@ /* * Check that the typevariable exists. */ if (selectedFileTypeObj != NULL) { - const char *selectedFileType = - Tcl_GetString(selectedFileTypeObj); - NSString *selectedFileTypeStr = - [[NSString alloc] initWithUTF8String:selectedFileType]; - NSUInteger index = - [filterInfo.fileTypeNames indexOfObject:selectedFileTypeStr]; + NSString *selectedFileTypeStr; + NSUInteger index; + + str = Tcl_UtfToExternalDString(utf8, + Tcl_GetString(selectedFileTypeObj), -1, &ds); + selectedFileTypeStr = [[NSString alloc] initWithUTF8String:str]; + index = [filterInfo.fileTypeNames indexOfObject:selectedFileTypeStr]; if (index != NSNotFound) { filterInfo.fileTypeIndex = index; filterInfo.preselectFilter = true; } } } + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); } TkFreeFileFilters(&fl); return TCL_OK; } @@ -669,11 +706,15 @@ NSString *message = nil, *title = nil; NSWindow *parent; openpanel = [NSOpenPanel openPanel]; NSInteger modalReturnCode = modalError; BOOL parentIsKey = NO; + Tcl_Encoding utf8; + Tcl_DString ds; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_DStringInit(&ds); for (i = 1; i < objc; i += 2) { if (Tcl_GetIndexFromObjStruct(interp, objv[i], openOptionStrings, sizeof(char *), "option", TCL_EXACT, &index) != TCL_OK) { goto end; } @@ -690,24 +731,30 @@ fileTypesPtr = objv[i + 1]; break; case OPEN_INITDIR: str = Tcl_GetStringFromObj(objv[i + 1], &len); if (len) { + str = Tcl_UtfToExternalDString(utf8, str, -1, &ds); directory = [[[NSString alloc] initWithUTF8String:str] autorelease]; + Tcl_DStringFree(&ds); } break; case OPEN_INITFILE: str = Tcl_GetStringFromObj(objv[i + 1], &len); if (len) { + str = Tcl_UtfToExternalDString(utf8, str, -1, &ds); filename = [[[NSString alloc] initWithUTF8String:str] autorelease]; + Tcl_DStringFree(&ds); } break; case OPEN_MESSAGE: - message = [[NSString alloc] initWithUTF8String: - Tcl_GetString(objv[i + 1])]; + str = Tcl_UtfToExternalDString(utf8, + Tcl_GetString(objv[i + 1]), -1, &ds); + message = [[NSString alloc] initWithUTF8String:str]; + Tcl_DStringFree(&ds); break; case OPEN_MULTIPLE: if (Tcl_GetBooleanFromObj(interp, objv[i + 1], &multiple) != TCL_OK) { goto end; @@ -720,12 +767,14 @@ goto end; } haveParentOption = 1; break; case OPEN_TITLE: - title = [[NSString alloc] initWithUTF8String: - Tcl_GetString(objv[i + 1])]; + str = Tcl_UtfToExternalDString(utf8, + Tcl_GetString(objv[i + 1]), -1, &ds); + title = [[NSString alloc] initWithUTF8String:str]; + Tcl_DStringFree(&ds); break; case OPEN_TYPEVARIABLE: typeVariablePtr = objv[i + 1]; break; case OPEN_COMMAND: @@ -901,15 +950,19 @@ } else { selectedFilter = @""; } } } + Tcl_DStringFree(&ds); + Tcl_ExternalToUtfDString(utf8, [selectedFilter UTF8String], -1, &ds); Tcl_ObjSetVar2(interp, typeVariablePtr, NULL, - Tcl_NewStringObj([selectedFilter UTF8String], -1), - TCL_GLOBAL_ONLY); + Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)), TCL_GLOBAL_ONLY); } end: + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); return result; } /* *---------------------------------------------------------------------- @@ -945,11 +998,15 @@ NSString *message = nil, *title = nil; NSWindow *parent; savepanel = [NSSavePanel savePanel]; NSInteger modalReturnCode = modalError; BOOL parentIsKey = NO; + Tcl_Encoding utf8; + Tcl_DString ds; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_DStringInit(&ds); for (i = 1; i < objc; i += 2) { if (Tcl_GetIndexFromObjStruct(interp, objv[i], saveOptionStrings, sizeof(char *), "option", TCL_EXACT, &index) != TCL_OK) { goto end; } @@ -958,72 +1015,82 @@ "value for \"%s\" missing", Tcl_GetString(objv[i]))); Tcl_SetErrorCode(interp, "TK", "FILEDIALOG", "VALUE", NULL); goto end; } switch (index) { - case SAVE_DEFAULT: - str = Tcl_GetStringFromObj(objv[i + 1], &len); - while (*str && (*str == '*' || *str == '.')) { - str++; - } - if (*str) { - defaultType = [[[NSString alloc] initWithUTF8String:str] - autorelease]; - } - break; - case SAVE_FILETYPES: - fileTypesPtr = objv[i + 1]; - break; - case SAVE_INITDIR: - str = Tcl_GetStringFromObj(objv[i + 1], &len); - if (len) { - directory = [[[NSString alloc] initWithUTF8String:str] - autorelease]; - } - break; - case SAVE_INITFILE: - str = Tcl_GetStringFromObj(objv[i + 1], &len); - if (len) { - filename = [[[NSString alloc] initWithUTF8String:str] - autorelease]; - [savepanel setNameFieldStringValue:filename]; - } - break; - case SAVE_MESSAGE: - message = [[NSString alloc] initWithUTF8String: - Tcl_GetString(objv[i + 1])]; - break; - case SAVE_PARENT: - str = Tcl_GetStringFromObj(objv[i + 1], &len); - tkwin = Tk_NameToWindow(interp, str, tkwin); - if (!tkwin) { - goto end; - } - haveParentOption = 1; - break; - case SAVE_TITLE: - title = [[NSString alloc] initWithUTF8String: - Tcl_GetString(objv[i + 1])]; - break; - case SAVE_TYPEVARIABLE: - typeVariablePtr = objv[i + 1]; - break; - case SAVE_COMMAND: - cmdObj = objv[i+1]; - break; - case SAVE_CONFIRMOW: - if (Tcl_GetBooleanFromObj(interp, objv[i + 1], - &confirmOverwrite) != TCL_OK) { - goto end; - } - break; - case SAVE_NATIVEONLY: - if (Tcl_GetBooleanFromObj(interp, objv[i + 1], - &dummy) != TCL_OK) { - goto end; - } - break; + case SAVE_DEFAULT: + str = Tcl_GetStringFromObj(objv[i + 1], &len); + while (*str && (*str == '*' || *str == '.')) { + str++; + } + if (*str) { + str = Tcl_UtfToExternalDString(utf8, str, -1, &ds); + defaultType = [[[NSString alloc] initWithUTF8String:str] + autorelease]; + Tcl_DStringFree(&ds); + } + break; + case SAVE_FILETYPES: + fileTypesPtr = objv[i + 1]; + break; + case SAVE_INITDIR: + str = Tcl_GetStringFromObj(objv[i + 1], &len); + if (len) { + str = Tcl_UtfToExternalDString(utf8, str, -1, &ds); + directory = [[[NSString alloc] initWithUTF8String:str] + autorelease]; + Tcl_DStringFree(&ds); + } + break; + case SAVE_INITFILE: + str = Tcl_GetStringFromObj(objv[i + 1], &len); + if (len) { + str = Tcl_UtfToExternalDString(utf8, str, -1, &ds); + filename = [[[NSString alloc] initWithUTF8String:str] + autorelease]; + [savepanel setNameFieldStringValue:filename]; + Tcl_DStringFree(&ds); + } + break; + case SAVE_MESSAGE: + str = Tcl_UtfToExternalDString(utf8, + Tcl_GetString(objv[i + 1]), -1, &ds); + message = [[NSString alloc] initWithUTF8String:str]; + Tcl_DStringFree(&ds); + break; + case SAVE_PARENT: + str = Tcl_GetStringFromObj(objv[i + 1], &len); + tkwin = Tk_NameToWindow(interp, str, tkwin); + if (!tkwin) { + goto end; + } + haveParentOption = 1; + break; + case SAVE_TITLE: + str = Tcl_UtfToExternalDString(utf8, + Tcl_GetString(objv[i + 1]), -1, &ds); + title = [[NSString alloc] initWithUTF8String:str]; + Tcl_DStringFree(&ds); + break; + case SAVE_TYPEVARIABLE: + typeVariablePtr = objv[i + 1]; + break; + case SAVE_COMMAND: + cmdObj = objv[i+1]; + break; + case SAVE_CONFIRMOW: + if (Tcl_GetBooleanFromObj(interp, objv[i + 1], + &confirmOverwrite) != TCL_OK) { + goto end; + } + break; + case SAVE_NATIVEONLY: + if (Tcl_GetBooleanFromObj(interp, objv[i + 1], + &dummy) != TCL_OK) { + goto end; + } + break; } } if (title) { [savepanel setTitle:title]; @@ -1141,19 +1208,22 @@ && filterInfo.doFileTypes) { /* * The -typevariable must be set to the selected file type, if the * dialog was not cancelled. */ + NSString *selectedFilter = [filterInfo.fileTypeNames objectAtIndex:filterInfo.fileTypeIndex]; - NSString *selectedFilter = - [filterInfo.fileTypeNames objectAtIndex:filterInfo.fileTypeIndex]; + Tcl_DStringFree(&ds); + Tcl_ExternalToUtfDString(utf8, [selectedFilter UTF8String], -1, &ds); Tcl_ObjSetVar2(interp, typeVariablePtr, NULL, - Tcl_NewStringObj([selectedFilter UTF8String], -1), - TCL_GLOBAL_ONLY); + Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)), TCL_GLOBAL_ONLY); } end: + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); return result; } /* *---------------------------------------------------------------------- @@ -1189,11 +1259,15 @@ NSString *message, *title; NSWindow *parent; NSOpenPanel *panel = [NSOpenPanel openPanel]; NSInteger modalReturnCode = modalError; BOOL parentIsKey = NO; + Tcl_Encoding utf8; + Tcl_DString ds; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_DStringInit(&ds); for (i = 1; i < objc; i += 2) { if (Tcl_GetIndexFromObjStruct(interp, objv[i], chooseOptionStrings, sizeof(char *), "option", TCL_EXACT, &index) != TCL_OK) { goto end; } @@ -1205,19 +1279,23 @@ } switch (index) { case CHOOSE_INITDIR: str = Tcl_GetStringFromObj(objv[i + 1], &len); if (len) { + str = Tcl_UtfToExternalDString(utf8, str, -1, &ds); directory = [[[NSString alloc] initWithUTF8String:str] autorelease]; + Tcl_DStringFree(&ds); } break; case CHOOSE_MESSAGE: - message = [[NSString alloc] initWithUTF8String: - Tcl_GetString(objv[i + 1])]; + str = Tcl_UtfToExternalDString(utf8, + Tcl_GetString(objv[i + 1]), -1, &ds); + message = [[NSString alloc] initWithUTF8String:str]; [panel setMessage:message]; [message release]; + Tcl_DStringFree(&ds); break; case CHOOSE_MUSTEXIST: if (Tcl_GetBooleanFromObj(interp, objv[i + 1], &mustexist) != TCL_OK) { goto end; @@ -1230,14 +1308,16 @@ goto end; } haveParentOption = 1; break; case CHOOSE_TITLE: - title = [[NSString alloc] initWithUTF8String: - Tcl_GetString(objv[i + 1])]; + str = Tcl_UtfToExternalDString(utf8, + Tcl_GetString(objv[i + 1]), -1, &ds); + title = [[NSString alloc] initWithUTF8String:str]; [panel setTitle:title]; [title release]; + Tcl_DStringFree(&ds); break; case CHOOSE_COMMAND: cmdObj = objv[i+1]; break; case CHOOSE_NATIVEONLY: @@ -1283,10 +1363,12 @@ result = (modalReturnCode != modalError) ? TCL_OK : TCL_ERROR; if (parentIsKey) { [parent makeKeyWindow]; } end: + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); return result; } /* *---------------------------------------------------------------------- @@ -1430,13 +1512,17 @@ NSWindow *parent; NSArray *buttons; NSAlert *alert = [NSAlert new]; NSInteger modalReturnCode = 1; BOOL parentIsKey = NO; + Tcl_Encoding utf8; + Tcl_DString ds; iconIndex = ICON_INFO; typeIndex = TYPE_OK; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_DStringInit(&ds); for (i = 1; i < objc; i += 2) { if (Tcl_GetIndexFromObjStruct(interp, objv[i], alertOptionStrings, sizeof(char *), "option", TCL_EXACT, &index) != TCL_OK) { goto end; } @@ -1455,14 +1541,16 @@ indexDefaultOption = i; break; case ALERT_DETAIL: - message = [[NSString alloc] initWithUTF8String: - Tcl_GetString(objv[i + 1])]; + str = Tcl_ExternalToUtfDString(utf8, + Tcl_GetString(objv[i + 1]), -1, &ds); + message = [[NSString alloc] initWithUTF8String:str]; [alert setInformativeText:message]; [message release]; + Tcl_DStringFree(&ds); break; case ALERT_ICON: if (Tcl_GetIndexFromObjStruct(interp, objv[i + 1], alertIconStrings, sizeof(char *), "-icon value", TCL_EXACT, &iconIndex) != TCL_OK) { @@ -1469,14 +1557,16 @@ goto end; } break; case ALERT_MESSAGE: - message = [[NSString alloc] initWithUTF8String: - Tcl_GetString(objv[i + 1])]; + str = Tcl_ExternalToUtfDString(utf8, + Tcl_GetString(objv[i + 1]), -1, &ds); + message = [[NSString alloc] initWithUTF8String:str]; [alert setMessageText:message]; [message release]; + Tcl_DStringFree(&ds); break; case ALERT_PARENT: str = Tcl_GetString(objv[i + 1]); tkwin = Tk_NameToWindow(interp, str, tkwin); @@ -1485,14 +1575,16 @@ } haveParentOption = 1; break; case ALERT_TITLE: - title = [[NSString alloc] initWithUTF8String: - Tcl_GetString(objv[i + 1])]; + str = Tcl_ExternalToUtfDString(utf8, + Tcl_GetString(objv[i + 1]), -1, &ds); + title = [[NSString alloc] initWithUTF8String:str]; [[alert window] setTitle:title]; [title release]; + Tcl_DStringFree(&ds); break; case ALERT_TYPE: if (Tcl_GetIndexFromObjStruct(interp, objv[i + 1], alertTypeStrings, sizeof(char *), "-type value", TCL_EXACT, &typeIndex) != TCL_OK) { Index: jni/sdl2tk/macosx/tkMacOSXFont.c ================================================================== --- jni/sdl2tk/macosx/tkMacOSXFont.c +++ jni/sdl2tk/macosx/tkMacOSXFont.c @@ -119,11 +119,11 @@ *--------------------------------------------------------------------------- */ #if TCL_UTF_MAX == 3 -/* No special code for BMP needed. */ +/* No special code for WTF-8 needed. */ #define NumUTF16Chars Tcl_NumUtfChars #else static int @@ -136,43 +136,37 @@ int i = 0; if (length < 0) { while (*src != '\0') { src += Tcl_UtfToUniChar(src, &ch); -#if TCL_UTF_MAX > 4 if (ch > 0xFFFF) { /* A surrogate pair in UTF16Char representation. */ i++; } -#endif i++; } if (i < 0) { i = INT_MAX; } } else { - const char *endPtr = src + length - 4; + const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { src += Tcl_UtfToUniChar(src, &ch); -#if TCL_UTF_MAX > 4 + if (ch > 0xFFFF) { + /* A surrogate pair in UTF16Char representation. */ + i++; + } + i++; + } + endPtr += TCL_UTF_MAX; + while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { + src += Tcl_UtfToUniChar(src, &ch); if (ch > 0xFFFF) { /* A surrogate pair in UTF16Char representation. */ i++; } -#endif - i++; - } - endPtr += 4; - while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - src += Tcl_UtfToUniChar(src, &ch); -#if TCL_UTF_MAX > 4 - if (ch > 0xFFFF) { - /* A surrogate pair in UTF16Char representation. */ - i++; - } -#endif i++; } if (src < endPtr) { i += endPtr - src; } @@ -219,11 +213,11 @@ if (Tcl_UtfCharComplete(src, end - src)) { src += Tcl_UtfToUniChar(src, &ch); } else { ch = *src++ & 0x00FF; } -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 if (ch > 0xFFFF) { /* A surrogate pair in UTF16Char representation. */ --index; } #endif @@ -257,11 +251,11 @@ int numBytes, /* Maximum number of bytes to consider from * source string in all. */ Tcl_DString *dsPtr, /* Tcl_DString receiving the result. */ int *lengthPtr) /* Number of UTF16Chars in result buffer. */ { - Tcl_UniChar ch = 0; + Tcl_UniChar ch; UTF16Char utf16; const char *end; Tcl_DStringInit(dsPtr); if (numBytes > 0) { @@ -268,41 +262,45 @@ Tcl_DStringSetLength(dsPtr, numBytes * sizeof(utf16)); Tcl_DStringSetLength(dsPtr, 0); } end = src + numBytes; while (src < end) { - int len; - if (Tcl_UtfCharComplete(src, end - src)) { - len = Tcl_UtfToUniChar(src, &ch); + src += Tcl_UtfToUniChar(src, &ch); } else { - len = 1; - ch = *src & 0x00FF; + ch = *src++ & 0x00FF; } utf16 = ch; -#if TCL_UTF_MAX < 4 - if (ch >= 0xD800 && ch <= 0xDFFF) { - utf16 = 0xFFFD; - } -#elif TCL_UTF_MAX == 4 - if (ch >= 0xD800 && ch <= 0xDFFF) { - if ((*src & 0xF8) != 0xF0) { - utf16 = 0xFFFD; - } - } -#else +#if TCL_UTF_MAX > 3 if (ch >= 0xD800 && ch <= 0xDFFF) { utf16 = 0xFFFD; } else if (ch > 0xFFFF) { utf16 = (((ch - 0x10000) >> 10) & 0x3FF) | 0xD800; Tcl_DStringAppend(dsPtr, (char *) &utf16, sizeof(utf16)); utf16 = ((ch - 0x10000) & 0x3FF) | 0xDC00; } +#else + if (ch >= 0xD800 && ch <= 0xDBFF) { + if (!Tcl_UtfCharComplete(src, end - src)) { + utf16 = 0xFFFD; + } else { + int len = Tcl_UtfToUniChar(src, &ch); + + if (ch >= 0xDC00 && ch <= 0xDFFF) { + Tcl_DStringAppend(dsPtr, (char *) &utf16, sizeof(utf16)); + utf16 = ch; + src += len; + } else { + utf16 = 0xFFFD; + } + } + } else if (ch >= 0xDC00 && ch <= 0xDFFF) { + utf16 = 0xFFFD; + } #endif Tcl_DStringAppend(dsPtr, (char *) &utf16, sizeof(utf16)); - src += len; } *lengthPtr = Tcl_DStringLength(dsPtr) / sizeof(utf16); return (UTF16Char *) Tcl_DStringValue(dsPtr); } @@ -327,12 +325,18 @@ NSFont *nsFont, TkFontAttributes *faPtr) { NSFontTraitMask traits = [[NSFontManager sharedFontManager] traitsOfFont:nsFont]; + Tcl_Encoding utf8; + Tcl_DString ds; - faPtr->family = Tk_GetUid([[nsFont familyName] UTF8String]); + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_ExternalToUtfDString(utf8, [[nsFont familyName] UTF8String], -1, &ds); + faPtr->family = Tk_GetUid(Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); faPtr->size = [nsFont pointSize]; faPtr->weight = (traits & NSBoldFontMask ? TK_FW_BOLD : TK_FW_NORMAL); faPtr->slant = (traits & NSItalicFontMask ? TK_FS_ITALIC : TK_FS_ROMAN); } @@ -367,11 +371,18 @@ NSFont *nsFont, *dflt = nil; #define defaultFont (dflt ? dflt : (dflt = [NSFont systemFontOfSize:0])) NSString *family; if (familyName) { + Tcl_Encoding utf8; + Tcl_DString ds; + + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + familyName = Tcl_UtfToExternalDString(utf8, familyName, -1, &ds); family = [[[NSString alloc] initWithUTF8String:familyName] autorelease]; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); } else { family = [defaultFont familyName]; } if (size == 0.0) { size = [defaultFont pointSize]; @@ -824,15 +835,22 @@ Tcl_Interp *interp, /* Interp to hold result. */ Tk_Window tkwin) /* For display to query. */ { Tcl_Obj *resultPtr = Tcl_NewListObj(0, NULL); NSArray *list = [[NSFontManager sharedFontManager] availableFontFamilies]; + Tcl_Encoding utf8; + Tcl_DString ds; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); for (NSString *family in list) { + Tcl_ExternalToUtfDString(utf8, [family UTF8String], -1, &ds); Tcl_ListObjAppendElement(NULL, resultPtr, - Tcl_NewStringObj([family UTF8String], -1)); + Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); } + Tcl_FreeEncoding(utf8); Tcl_SetObjResult(interp, resultPtr); } /* *------------------------------------------------------------------------- @@ -861,19 +879,26 @@ Tcl_Obj *resultPtr = Tcl_NewListObj(0, NULL); if (fontPtr->nsFont) { NSArray *list = [[fontPtr->nsFont fontDescriptor] objectForKey:NSFontCascadeListAttribute]; + Tcl_Encoding utf8; + Tcl_DString ds; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); for (NSFontDescriptor *subFontDesc in list) { NSString *family = [subFontDesc objectForKey:NSFontFamilyAttribute]; if (family) { + Tcl_ExternalToUtfDString(utf8, [family UTF8String], -1, &ds); Tcl_ListObjAppendElement(NULL, resultPtr, - Tcl_NewStringObj([family UTF8String], -1)); + Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); } } + Tcl_FreeEncoding(utf8); } Tcl_SetObjResult(interp, resultPtr); } /* @@ -1483,12 +1508,18 @@ traitsOfFont:nsFont]; id underline = [nsAttributes objectForKey: NSUnderlineStyleAttributeName]; id strikethrough = [nsAttributes objectForKey: NSStrikethroughStyleAttributeName]; + Tcl_Encoding utf8; + Tcl_DString ds; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + familyName = Tcl_ExternalToUtfDString(utf8, familyName, -1, &ds); objv[i++] = Tcl_NewStringObj(familyName, -1); + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); objv[i++] = Tcl_NewWideIntObj((Tcl_WideInt) floor([nsFont pointSize] + 0.5)); #define S(s) Tcl_NewStringObj(STRINGIFY(s),(int)(sizeof(STRINGIFY(s))-1)) objv[i++] = (traits & NSBoldFontMask) ? S(bold) : S(normal); objv[i++] = (traits & NSItalicFontMask) ? S(italic) : S(roman); if ([underline respondsToSelector:@selector(intValue)] && Index: jni/sdl2tk/macosx/tkMacOSXInit.c ================================================================== --- jni/sdl2tk/macosx/tkMacOSXInit.c +++ jni/sdl2tk/macosx/tkMacOSXInit.c @@ -221,31 +221,40 @@ - (NSString *) tkFrameworkImagePath: (NSString *) image { NSString *path = nil; NSAutoreleasePool *pool = [NSAutoreleasePool new]; + Tcl_Encoding utf8; + Tcl_DString ds; + char *str; + + utf8 = Tcl_GetEncoding(NULL, "utf-8"); if (tkLibPath[0] != '\0') { - path = [[NSBundle bundleWithPath:[[NSString stringWithUTF8String: - tkLibPath] stringByAppendingString:@"/../.."]] + str = Tcl_UtfToExternalDString(utf8, tkLibPath, -1, &ds); + path = [[NSBundle bundleWithPath:[[NSString stringWithUTF8String:str] + stringByAppendingString:@"/../.."]] pathForImageResource:image]; + Tcl_DStringFree(&ds); } if (!path) { const char *tk_library = Tcl_GetVar2(_eventInterp, "tk_library", NULL, TCL_GLOBAL_ONLY); if (tk_library) { NSFileManager *fm = [NSFileManager defaultManager]; - path = [[NSString stringWithUTF8String:tk_library] + str = Tcl_UtfToExternalDString(utf8, tk_library, -1, &ds); + path = [[NSString stringWithUTF8String:str] stringByAppendingFormat:@"/%@", image]; if (![fm isReadableFileAtPath:path]) { - path = [[NSString stringWithUTF8String:tk_library] + path = [[NSString stringWithUTF8String:str] stringByAppendingFormat:@"/../macosx/%@", image]; if (![fm isReadableFileAtPath:path]) { path = nil; } } + Tcl_DStringFree(&ds); } } #ifdef TK_MAC_DEBUG if (!path && getenv("TK_SRCROOT")) { path = [[NSString stringWithUTF8String:getenv("TK_SRCROOT")] @@ -253,10 +262,11 @@ if (![[NSFileManager defaultManager] isReadableFileAtPath:path]) { path = nil; } } #endif + Tcl_FreeEncoding(utf8); [path retain]; [pool drain]; return path; } @end @@ -792,25 +802,39 @@ TkMacOSXGetStringObjFromCFString( CFStringRef str) { Tcl_Obj *obj = NULL; const char *c = CFStringGetCStringPtr(str, kCFStringEncodingUTF8); + Tcl_Encoding utf8; + Tcl_DString ds, buffer; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); if (c) { - obj = Tcl_NewStringObj(c, -1); + c = Tcl_ExternalToUtfDString(utf8, c, -1, &ds); + obj = Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)); } else { CFRange all = CFRangeMake(0, CFStringGetLength(str)); CFIndex len; if (CFStringGetBytes(str, all, kCFStringEncodingUTF8, 0, false, NULL, 0, &len) > 0 && len < INT_MAX) { - obj = Tcl_NewObj(); - Tcl_SetObjLength(obj, len); + Tcl_DStringInit(&buffer); + Tcl_DStringSetLength(&buffer, len); CFStringGetBytes(str, all, kCFStringEncodingUTF8, 0, false, - (UInt8*) obj->bytes, len, NULL); + (UInt8*) Tcl_DStringValue(&ds), len, NULL); + c = Tcl_ExternalToUtfDString(utf8, Tcl_DStringValue(&buffer), + Tcl_DStringLength(&buffer), &ds); + Tcl_DStringFree(&buffer); + if (Tcl_DStringLength(&ds) < INT_MAX) { + obj = Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)); + } } } + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); return obj; } /* * Local Variables: Index: jni/sdl2tk/macosx/tkMacOSXKeyEvent.c ================================================================== --- jni/sdl2tk/macosx/tkMacOSXKeyEvent.c +++ jni/sdl2tk/macosx/tkMacOSXKeyEvent.c @@ -331,33 +331,22 @@ } /* * NSString represents a non-BMP character as a string of length 2 where * the first character is the high surrogate and the second character is - * the low surrogate. We could record this in the XEvent by setting the - * keycode to the unicode code point and setting the trans_chars to the - * 4-byte UTF-8 string. However, that will not help as long as TCL_UTF_MAX - * is set to 3. Until that changes, we just replace non-BMP characters by - * the "replacement character" U+FFFD. + * the low surrogate. */ for (i = 0; i < len; i++) { UniChar nextChar = [str characterAtIndex: i]; if (CFStringIsSurrogateHighCharacter(nextChar)) { -#if TCL_UTF_MAX > 3 UniChar lowChar = [str characterAtIndex: ++i]; xEvent.xkey.keycode = CFStringGetLongCharacterForSurrogatePair( nextChar, lowChar); - xEvent.xkey.nbytes = Tcl_UniCharToUtf(xEvent.xkey.keycode, - &xEvent.xkey.trans_chars); + xEvent.xkey.nbytes = TkUniCharToUtf(xEvent.xkey.keycode, + &xEvent.xkey.trans_chars); xEvent.xkey.trans_chars[xEvent.xkey.nbytes] = '\0'; -#else - i++; - xEvent.xkey.keycode = 0xfffd; - strcpy(xEvent.xkey.trans_chars, "\xef\xbf\xbd"); - xEvent.xkey.nbytes = strlen(xEvent.xkey.trans_chars); -#endif } else { xEvent.xkey.keycode = (int) nextChar; [[str substringWithRange: NSMakeRange(i,1)] getCString: xEvent.xkey.trans_chars maxLength: XMaxTransChars-1 encoding: NSUTF8StringEncoding]; Index: jni/sdl2tk/macosx/tkMacOSXMenu.c ================================================================== --- jni/sdl2tk/macosx/tkMacOSXMenu.c +++ jni/sdl2tk/macosx/tkMacOSXMenu.c @@ -149,13 +149,20 @@ return self; } - (id) initWithTkMenu: (TkMenu *) tkMenu { - NSString *title = [[NSString alloc] initWithUTF8String: - Tk_PathName(tkMenu->tkwin)]; + NSString *title; + Tcl_Encoding utf8; + Tcl_DString ds; + char *name; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + name = Tcl_UtfToExternalDString(utf8, Tk_PathName(tkMenu->tkwin), -1, &ds); + title = [[NSString alloc] initWithUTF8String:name]; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); self = [self initWithTitle:title]; [title release]; if (self) { _tkMenu = tkMenu; } @@ -628,17 +635,25 @@ } } [menuItem setImage:image]; if ((!image || mePtr->compound != COMPOUND_NONE) && mePtr->labelPtr && mePtr->labelLength) { - title = [[[NSString alloc] initWithBytes:Tcl_GetString(mePtr->labelPtr) - length:mePtr->labelLength encoding:NSUTF8StringEncoding] + Tcl_Encoding utf8; + Tcl_DString ds; + + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_UtfToExternalDString(utf8, Tcl_GetString(mePtr->labelPtr), + mePtr->labelLength, &ds); + title = [[[NSString alloc] initWithBytes:Tcl_DStringValue(&ds) + length:Tcl_DStringLength(&ds) encoding:NSUTF8StringEncoding] autorelease]; if ([title hasSuffix:@"..."]) { title = [NSString stringWithFormat:@"%@%C", [title substringToIndex:[title length] - 3], 0x2026]; } + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); } [menuItem setTitle:title]; if (strcmp(Tcl_GetString(fontPtr), "menu") || gc->foreground != defaultFg || gc->background != defaultBg) { attributes = TkMacOSXNSFontAttributesForFont(Tk_GetFontFromObj( @@ -1180,12 +1195,21 @@ } } if (ch) { return [[[NSString alloc] initWithCharacters:&ch length:1] autorelease]; } else { - return [[[[NSString alloc] initWithUTF8String:accel] autorelease] - lowercaseString]; + NSString *result; + Tcl_Encoding utf8; + Tcl_DString ds; + + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + accel = Tcl_UtfToExternalDString(utf8, accel, -1, &ds); + result = [[[[NSString alloc] initWithUTF8String:accel] autorelease] + lowercaseString]; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); + return result; } } /* *-------------------------------------------------------------- Index: jni/sdl2tk/macosx/tkMacOSXSysTray.c ================================================================== --- jni/sdl2tk/macosx/tkMacOSXSysTray.c +++ jni/sdl2tk/macosx/tkMacOSXSysTray.c @@ -559,10 +559,12 @@ int width, height; Tk_Window tkwin = Tk_MainWindow(interp); TkWindow *winPtr = (TkWindow *)tkwin; Display *d = winPtr->display; NSImage *icon; + Tcl_Encoding utf8; + Tcl_DString ds; tk_image = Tk_GetImage(interp, tkwin, Tcl_GetString(objv[2]), NULL, NULL); if (tk_image == NULL) { return TCL_ERROR; } @@ -577,11 +579,15 @@ /* * Set the text for the tooltip. */ - NSString *tooltip = [NSString stringWithUTF8String: Tcl_GetString(objv[3])]; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_UtfToExternalDString(utf8, Tcl_GetString(objv[3]), -1, &ds); + NSString *tooltip = [NSString stringWithUTF8String: Tcl_DStringValue(&ds)]; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); if (tooltip == nil) { Tcl_AppendResult(interp, " unable to set tooltip for systray icon", NULL); return TCL_ERROR; } @@ -640,11 +646,18 @@ /* * Modify the text for the tooltip. */ case TRAY_TEXT: { - NSString *tooltip = [NSString stringWithUTF8String:Tcl_GetString(objv[3])]; + Tcl_Encoding utf8; + Tcl_DString ds; + + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_UtfToExternalDString(utf8, Tcl_GetString(objv[3]), -1, &ds); + NSString *tooltip = [NSString stringWithUTF8String: Tcl_DStringValue(&ds)]; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); if (tooltip == nil) { Tcl_AppendResult(interp, "unable to set tooltip for systray icon", NULL); return TCL_ERROR; } @@ -710,10 +723,13 @@ ClientData clientData, Tcl_Interp * interp, int objc, Tcl_Obj *const *objv) { + Tcl_Encoding utf8; + Tcl_DString ds; + if (objc < 3) { Tcl_WrongNumArgs(interp, 1, objv, "title message"); return TCL_ERROR; } @@ -722,12 +738,18 @@ "Notifications not supported on macOS versions lower than 10.10", NULL); return TCL_OK; } - NSString *title = [NSString stringWithUTF8String: Tcl_GetString(objv[1])]; - NSString *message = [NSString stringWithUTF8String: Tcl_GetString(objv[2])]; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_UtfToExternalDString(utf8, Tcl_GetString(objv[1]), -1, &ds); + NSString *title = [NSString stringWithUTF8String: Tcl_DStringValue(&ds)]; + Tcl_DStringFree(&ds); + Tcl_UtfToExternalDString(utf8, Tcl_GetString(objv[2]), -1, &ds); + NSString *message = [NSString stringWithUTF8String: Tcl_DStringValue(&ds)]; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); /* * Update the authorization status in case the user enabled or disabled * notifications after the app started up. */ Index: jni/sdl2tk/macosx/tkMacOSXWm.c ================================================================== --- jni/sdl2tk/macosx/tkMacOSXWm.c +++ jni/sdl2tk/macosx/tkMacOSXWm.c @@ -1421,11 +1421,18 @@ case WMATT_TITLEPATH: { const char *path = (const char *) Tcl_FSGetNativePath(value); NSString *filename = @""; if (path && *path) { + Tcl_Encoding utf8; + Tcl_DString ds; + + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + path = Tcl_UtfToExternalDString(utf8, path, -1, &ds); filename = [NSString stringWithUTF8String:path]; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); } [macWindow setRepresentedFilename:filename]; break; } case WMATT_TOPMOST: @@ -1508,14 +1515,23 @@ result = Tcl_NewBooleanObj([macWindow isDocumentEdited]); break; case WMATT_NOTIFY: result = Tcl_NewBooleanObj(tkMacOSXWmAttrNotifyVal); break; - case WMATT_TITLEPATH: - result = Tcl_NewStringObj([[macWindow representedFilename] UTF8String], - -1); + case WMATT_TITLEPATH: { + Tcl_Encoding utf8; + Tcl_DString ds; + + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_ExternalToUtfDString(utf8, + [[macWindow representedFilename] UTF8String], -1, &ds); + result = Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); break; + } case WMATT_TOPMOST: result = Tcl_NewBooleanObj(wmPtr->flags & WM_TOPMOST); break; case WMATT_TRANSPARENT: result = Tcl_NewBooleanObj(wmPtr->flags & WM_TRANSPARENT); @@ -5442,17 +5458,25 @@ void TkSetWMName( TkWindow *winPtr, Tk_Uid titleUid) { + Tcl_Encoding utf8; + Tcl_DString ds; + char *tStr; + if (Tk_IsEmbedded(winPtr)) { return; } - NSString *title = [[NSString alloc] initWithUTF8String:titleUid]; + utf8 = Tcl_GetEncoding(NULL, "utf-8"); + tStr = Tcl_UtfToExternalDString(utf8, (const char *) titleUid, -1, &ds); + NSString *title = [[NSString alloc] initWithUTF8String:tStr]; [TkMacOSXDrawableWindow(winPtr->window) setTitle:title]; [title release]; + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); } /* *---------------------------------------------------------------------- * Index: jni/sdl2tk/sdl/SdlTkInt.c ================================================================== --- jni/sdl2tk/sdl/SdlTkInt.c +++ jni/sdl2tk/sdl/SdlTkInt.c @@ -830,79 +830,37 @@ static int ProcessTextInput(XEvent *event, int no_rel, int sdl_mod, const char *text, int len) { - int i, n, n2, ulen = Tcl_NumUtfChars(text, len); + int ret = 0, i, n, ulen; char buf[TCL_UTF_MAX]; +#if TCL_UTF_MAX < 4 + Tcl_DString ubuf; + Tcl_Encoding encoding = Tcl_GetEncoding(NULL, "utf-8"); + /* + * This should make UTF-8 into WTF-8. + */ + Tcl_DStringInit(&ubuf); + Tcl_ExternalToUtfDString(encoding, text, len, &ubuf); + if (encoding) { + Tcl_FreeEncoding(encoding); + } + text = Tcl_DStringValue(&ubuf); +#endif + ulen = Tcl_NumUtfChars(text, len); if (ulen <= 0) { - SdlTkX.keyuc = 0; - return 0; + goto done; } if (sdl_mod & KMOD_RALT) { event->xkey.state &= ~Mod4Mask; } for (i = 0; i < ulen; i++) { Tcl_UniChar ch; n = Tcl_UtfToUniChar(text, &ch); - n2 = 0; - - /* Deal with surrogate pairs */ -#if TCL_UTF_MAX > 4 - if ((ch >= 0xd800) && (ch <= 0xdbff)) { - Tcl_UniChar ch2; - - if (i + 1 < ulen) { - n2 = Tcl_UtfToUniChar(text + n, &ch2); - if ((ch2 >= 0xdc00) && (ch2 <= 0xdfff)) { - ch = ((ch & 0x3ff) << 10) | (ch2 & 0x3ff); - ch += 0x10000; - ++i; - } else { - ch = 0xfffd; - n2 = 0; - } - } else { - SdlTkX.keyuc = ch; - return -1; - } - } else if ((ch >= 0xdc00) && (ch <= 0xdfff)) { - if (SdlTkX.keyuc) { - ch = ((SdlTkX.keyuc & 0x3ff) << 10) | (ch & 0x3ff); - ch += 0x10000; - } else { - ch = 0xfffd; - } - SdlTkX.keyuc = 0; - } else if ((ch == 0xfffe) || (ch == 0xffff)) { - ch = 0xfffd; - SdlTkX.keyuc = 0; - } else { - SdlTkX.keyuc = 0; - } -#else - if ((ch >= 0xd800) && (ch <= 0xdbff)) { - Tcl_UniChar ch2; - - if (i + 1 < ulen) { - n2 = Tcl_UtfToUniChar(text + n, &ch2); - if ((ch2 >= 0xdc00) && (ch2 <= 0xdfff)) { - ++i; - } else { - n2 = 0; - } - } - ch = 0xfffd; - } else if ((ch >= 0xdc00) && (ch <= 0xdfff)) { - ch = 0xfffd; - } else if ((ch == 0xfffe) || (ch == 0xffff)) { - ch = 0xfffd; - } - SdlTkX.keyuc = 0; -#endif event->xkey.nbytes = Tcl_UniCharToUtf(ch, buf); event->xkey.time = SdlTkX.time_count; if (event->xkey.nbytes > sizeof (event->xkey.trans_chars)) { event->xkey.nbytes = sizeof (event->xkey.trans_chars); } @@ -910,11 +868,11 @@ if (len == 1) { event->xkey.keycode = FixKeyCode(event->xkey.trans_chars[0]); } else { event->xkey.keycode = -1; } - text += n + n2; + text += n; /* Queue the KeyPress */ EVLOG(" KEYPRESS: CODE=0x%02X UC=0x%X", event->xkey.keycode, ch); event->type = KeyPress; if (!no_rel || (i < ulen - 1)) { @@ -925,11 +883,15 @@ EVLOG(" KEYRELEASE: CODE=0x%02X", event->xkey.keycode); SdlTkQueueEvent(event); } } } - return 1; +done: +#if TCL_UTF_MAX < 4 + Tcl_DStringFree(&ubuf); +#endif + return ret; } /* *---------------------------------------------------------------------- * Index: jni/sdl2tk/sdl/SdlTkInt.h ================================================================== --- jni/sdl2tk/sdl/SdlTkInt.h +++ jni/sdl2tk/sdl/SdlTkInt.h @@ -205,11 +205,10 @@ _Window *mouse_window; _Window *keyboard_window; int mouse_x, mouse_y; int raw_mouse_x, raw_mouse_y; int sdlfocus; - int keyuc; int cursor_change; #if !defined(ANDROID) || defined(__TERMUX__) Tcl_HashTable sdlcursors; #endif Index: jni/sdl2tk/sdl/SdlTkUtils.c ================================================================== --- jni/sdl2tk/sdl/SdlTkUtils.c +++ jni/sdl2tk/sdl/SdlTkUtils.c @@ -655,17 +655,31 @@ dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; for (numChars = 0; wSrc < wSrcEnd; numChars++) { + int uch; Tcl_UniChar ch; if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - ch = *wSrc++; + uch = *wSrc++; + ch = uch; +#if TCL_UTF_MAX < 4 + if (uch > 0xFFFF && uch < 0x10FFFF) { + uch -= 0x10000; + ch = (uch & 0x3FF) | 0xDC00; + dst += Tcl_UniCharToUtf((Tcl_UniChar)((uch >> 10) | 0xD800), dst); + if (dst >= dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } + numChars++; + } +#endif dst += Tcl_UniCharToUtf(ch, dst); } *srcReadPtr = (char *) wSrc - (char *) wSrcStart; *dstWrotePtr = dst - dstStart; @@ -732,11 +746,12 @@ wDstStart = (unsigned int *) dst; wDstEnd = (unsigned int *) (dst + dstLen - sizeof (unsigned int)); result = TCL_OK; for (numChars = 0; src < srcEnd; numChars++) { - Tcl_UniChar ucs2; + int ch, len; + Tcl_UniChar ucs, ucs2; if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { /* * If there is more string to follow, this will ensure that the * last UTF-8 character in the source buffer hasn't been cut off. @@ -747,19 +762,29 @@ } if (wDst > wDstEnd) { result = TCL_CONVERT_NOSPACE; break; } - src += Tcl_UtfToUniChar(src, &ucs2); + src += Tcl_UtfToUniChar(src, &ucs); + ch = ucs; + if ((ucs & 0xFFFFFC00) == 0xD800) { + if (Tcl_UtfCharComplete(src, srcEnd - src)) { + len = Tcl_UtfToUniChar(src, &ucs2); + if ((ucs2 & 0xFFFFFC00) == 0xDC00) { + src += len; + ch = (((ucs&0x3FF)<<10) | (ucs2&0x3FF)) + 0x10000; + } + } + } #ifdef USE_SYMBOLA_CTRL - if (((int) ucs2 >= 0x00) && ((int) ucs2 < 0x20)) { - ucs2 += 0x2400; - } else if (ucs2 == 0x7F) { - ucs2 = 0x2421; + if ((ch >= 0x00) && (ch < 0x20)) { + ch += 0x2400; + } else if (ch == 0x7F) { + ch = 0x2421; } #endif - *wDst++ = ucs2; + *wDst++ = ch; } *srcReadPtr = src - srcStart; *dstWrotePtr = (char *) wDst - (char *) wDstStart; *dstCharsPtr = numChars; return result; Index: jni/sdl2tk/sdl/tkSDLFont.c ================================================================== --- jni/sdl2tk/sdl/tkSDLFont.c +++ jni/sdl2tk/sdl/tkSDLFont.c @@ -31,17 +31,12 @@ * * Under Unix, there are three attributes that uniquely identify a "font * family": the foundry, face name, and charset. */ -#if TCL_UTF_MAX > 3 #define FONTMAP_SHIFT 12 #define FONTMAP_PAGES (1 << (21 - FONTMAP_SHIFT)) -#else -#define FONTMAP_SHIFT 10 -#define FONTMAP_PAGES (1 << (sizeof(Tcl_UniChar)*8 - FONTMAP_SHIFT)) -#endif #define FONTMAP_BITSPERPAGE (1 << FONTMAP_SHIFT) typedef struct FontFamily { struct FontFamily *nextPtr; /* Next in list of all known font families. */ int refCount; /* How many SubFonts are referring to this @@ -90,15 +85,11 @@ /* * The following structure represents Unix's implementation of a font object. */ #define SUBFONT_SPACE 3 -#if TCL_UTF_MAX > 3 #define BASE_CHARS 4096 -#else -#define BASE_CHARS 2048 -#endif typedef struct UnixFont { TkFont font; /* Stuff used by generic font package. Must be * first in structure. */ SubFont staticSubFonts[SUBFONT_SPACE]; @@ -367,12 +358,11 @@ * output buffer. */ { const char *srcStart, *srcEnd; char *dstStart, *dstEnd; unsigned int *wDst; - Tcl_UniChar ch = 0; - int result; + int ch, result; static char const hexChars[] = "0123456789abcdef"; static char const mapChars[] = { 0, 0, 0, 0, 0, 0, 0, 'a', 'b', 't', 'n', 'v', 'f', 'r' }; @@ -381,33 +371,28 @@ srcStart = src; srcEnd = src + srcLen; dstStart = dst; -#if TCL_UTF_MAX > 3 dstEnd = dst + dstLen - 10 * sizeof(unsigned int); -#else - dstEnd = dst + dstLen - 6 * sizeof(unsigned int); -#endif wDst = (unsigned int *) dst; for ( ; src < srcEnd; ) { if ((char *) wDst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - src += Tcl_UtfToUniChar(src, &ch); + src += TkUtfToUniChar(src, &ch); wDst[0] = '\\'; if ((ch < sizeof(mapChars)) && (mapChars[ch] != 0)) { wDst[1] = mapChars[ch]; wDst += 2; } else if (ch < 256) { wDst[1] = 'x'; wDst[2] = hexChars[(ch >> 4) & 0xf]; wDst[3] = hexChars[ch & 0xf]; wDst += 4; -#if TCL_UTF_MAX > 3 } else if (ch < 0x10000) { wDst[1] = 'u'; wDst[2] = hexChars[(ch >> 12) & 0xf]; wDst[3] = hexChars[(ch >> 8) & 0xf]; wDst[4] = hexChars[(ch >> 4) & 0xf]; @@ -432,19 +417,10 @@ wDst[6] = hexChars[(ch >> 12) & 0xf]; wDst[7] = hexChars[(ch >> 8) & 0xf]; wDst[8] = hexChars[(ch >> 4) & 0xf]; wDst[9] = hexChars[ch & 0xf]; wDst += 10; -#else - } else { - wDst[1] = 'u'; - wDst[2] = hexChars[(ch >> 12) & 0xf]; - wDst[3] = hexChars[(ch >> 8) & 0xf]; - wDst[4] = hexChars[(ch >> 4) & 0xf]; - wDst[5] = hexChars[ch & 0xf]; - wDst += 6; -#endif } } *srcReadPtr = src - srcStart; *dstWrotePtr = (char *) wDst - dstStart; *dstCharsPtr = *dstWrotePtr / sizeof(unsigned int); @@ -882,11 +858,11 @@ if (numBytes == 0) { curX = 0; curByte = 0; } else if (maxLength < 0) { const char *p, *end, *next; - Tcl_UniChar ch = 0; + int ch; SubFont *thisSubFontPtr; FontFamily *familyPtr; Tcl_DString runString; /* @@ -898,11 +874,11 @@ */ curX = 0; end = source + numBytes; for (p = source; p < end; ) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); thisSubFontPtr = FindSubFontForChar(fontPtr, ch, &lastSubFontPtr); if (thisSubFontPtr != lastSubFontPtr) { familyPtr = lastSubFontPtr->familyPtr; Tcl_UtfToExternalDString(familyPtr->encoding, source, p - source, &runString); @@ -938,22 +914,21 @@ } Tcl_DStringFree(&runString); curByte = numBytes; } else { const char *p, *end, *next, *term; - int newX, termX, sawNonSpace, dstWrote; - Tcl_UniChar ch = 0; + int newX, termX, sawNonSpace, dstWrote, ch; FontFamily *familyPtr; char buf[64]; /* * How many chars will fit in the space allotted? This first version * may be inefficient because it measures every character * individually. */ - next = source + Tcl_UtfToUniChar(source, &ch); + next = source + TkUtfToUniChar(source, &ch); newX = curX = termX = 0; term = source; end = source + numBytes; @@ -995,11 +970,11 @@ term = end; termX = curX; break; } - next += Tcl_UtfToUniChar(next, &ch); + next += TkUtfToUniChar(next, &ch); if ((ch < 256) && isspace(ch)) { if (sawNonSpace) { term = p; termX = curX; sawNonSpace = 0; @@ -1020,17 +995,17 @@ * desired span. The width returned will include the width of that * extra character. */ curX = newX; - p += Tcl_UtfToUniChar(p, &ch); + p += TkUtfToUniChar(p, &ch); } if ((flags & TK_AT_LEAST_ONE) && (term == source) && (p < end)) { term = p; termX = curX; if (term == source) { - term += Tcl_UtfToUniChar(term, &ch); + term += TkUtfToUniChar(term, &ch); termX = newX; } } else if ((p >= end) || !(flags & TK_WHOLE_WORDS)) { term = p; termX = curX; @@ -1138,12 +1113,11 @@ { UnixFont *fontPtr = (UnixFont *) tkfont; SubFont *thisSubFontPtr, *lastSubFontPtr; Tcl_DString runString; const char *p, *end, *next; - int xStart, window_width; - Tcl_UniChar ch = 0; + int xStart, window_width, ch; FontFamily *familyPtr; int rx, ry, fontSize = 0; unsigned width, height, border_width, depth; Drawable root; @@ -1162,11 +1136,11 @@ } end = source + numBytes; for (p = source; p <= end; ) { if (p < end) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); thisSubFontPtr = FindSubFontForChar(fontPtr, ch, &lastSubFontPtr); } else { next = p + 1; thisSubFontPtr = lastSubFontPtr; } @@ -2138,11 +2112,11 @@ end = (row + 1) << FONTMAP_SHIFT; for (i = row << FONTMAP_SHIFT; i < end; i++) { if ((i >= 0xD800) && (i <= 0xDFFF)) { continue; } - if (Tcl_UtfToExternal(NULL, encoding, src, Tcl_UniCharToUtf(i, src), + if (Tcl_UtfToExternal(NULL, encoding, src, TkUniCharToUtf(i, src), TCL_ENCODING_STOPONERROR, NULL, buf, sizeof(buf), NULL, NULL, NULL) != TCL_OK) { continue; } if (SdlTkFontHasChar(subFontPtr->fontStructPtr, buf)) { @@ -2897,12 +2871,11 @@ } else { UnixFont *fontPtr = (UnixFont *) tkfont; SubFont *thisSubFontPtr, *lastSubFontPtr; Tcl_DString runString; const char *p, *end, *next; - int xStart, yStart, fontSize = 0; - Tcl_UniChar ch = 0; + int xStart, yStart, fontSize = 0, ch; FontFamily *familyPtr; double sinA = sin(angle*PI/180), cosA = cos(angle*PI/180), dy; XPoint points[5]; lastSubFontPtr = &fontPtr->subFontArray[0]; @@ -2910,11 +2883,11 @@ yStart = y; end = source + numBytes; for (p = source; p <= end; ) { if (p < end) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); thisSubFontPtr = FindSubFontForChar(fontPtr, ch, &lastSubFontPtr); } else { next = p + 1; thisSubFontPtr = lastSubFontPtr; } Index: jni/sdl2tk/sdl/tkSDLMenu.c ================================================================== --- jni/sdl2tk/sdl/tkSDLMenu.c +++ jni/sdl2tk/sdl/tkSDLMenu.c @@ -857,16 +857,15 @@ int len; len = Tcl_GetCharLength(mePtr->labelPtr); if (mePtr->underline < len) { int activeBorderWidth, leftEdge; - Tcl_UniChar ch = 0; const char *label, *start, *end; label = Tcl_GetString(mePtr->labelPtr); start = Tcl_UtfAtIndex(label, mePtr->underline); - end = start + Tcl_UtfToUniChar(start, &ch); + end = TkUtfNext(start); Tk_GetPixelsFromObj(NULL, menuPtr->tkwin, menuPtr->activeBorderWidthPtr, &activeBorderWidth); leftEdge = x + mePtr->indicatorSpace + activeBorderWidth; if (menuPtr->menuType == MENUBAR) { Index: jni/sdl2tk/unix/tkUnixFont.c ================================================================== --- jni/sdl2tk/unix/tkUnixFont.c +++ jni/sdl2tk/unix/tkUnixFont.c @@ -33,17 +33,12 @@ * * Under Unix, there are three attributes that uniquely identify a "font * family": the foundry, face name, and charset. */ -#if TCL_UTF_MAX > 3 #define FONTMAP_SHIFT 12 #define FONTMAP_PAGES (1 << (21 - FONTMAP_SHIFT)) -#else -#define FONTMAP_SHIFT 10 -#define FONTMAP_PAGES (1 << (sizeof(Tcl_UniChar)*8 - FONTMAP_SHIFT)) -#endif #define FONTMAP_BITSPERPAGE (1 << FONTMAP_SHIFT) typedef struct FontFamily { struct FontFamily *nextPtr; /* Next in list of all known font families. */ int refCount; /* How many SubFonts are referring to this @@ -408,12 +403,11 @@ * correspond to the bytes stored in the * output buffer. */ { const char *srcStart, *srcEnd; char *dstStart, *dstEnd; - Tcl_UniChar ch = 0; - int result; + int ch, result; static const char hexChars[] = "0123456789abcdef"; static const char mapChars[] = { 0, 0, 0, 0, 0, 0, 0, 'a', 'b', 't', 'n', 'v', 'f', 'r' }; @@ -429,11 +423,11 @@ for ( ; src < srcEnd; ) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - src += Tcl_UtfToUniChar(src, &ch); + src += TkUtfToUniChar(src, &ch); dst[0] = '\\'; if ((ch < sizeof(mapChars)) && (mapChars[ch] != 0)) { dst[1] = mapChars[ch]; dst += 2; } else if (ch < 256) { @@ -592,11 +586,11 @@ * correspond to the bytes stored in the * output buffer. */ { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; - Tcl_UniChar ch = 0; + Tcl_UniChar ch; srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if (!(flags & TCL_ENCODING_END)) { @@ -1019,11 +1013,11 @@ int *lengthPtr) /* Filled with x-location just after the * terminating character. */ { UnixFont *fontPtr; SubFont *lastSubFontPtr; - int curX, curByte; + int curX, curByte, ch; /* * Unix does not use kerning or fractional character widths when * displaying text on the screen. So that means we can safely measure * individual characters or spans of characters and add up the widths w/o @@ -1037,11 +1031,10 @@ if (numBytes == 0) { curX = 0; curByte = 0; } else if (maxLength < 0) { const char *p, *end, *next; - Tcl_UniChar ch = 0; SubFont *thisSubFontPtr; FontFamily *familyPtr; Tcl_DString runString; /* @@ -1053,11 +1046,11 @@ */ curX = 0; end = source + numBytes; for (p = source; p < end; ) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); thisSubFontPtr = FindSubFontForChar(fontPtr, ch, &lastSubFontPtr); if (thisSubFontPtr != lastSubFontPtr) { familyPtr = lastSubFontPtr->familyPtr; Tcl_UtfToExternalDString(familyPtr->encoding, source, p - source, &runString); @@ -1091,21 +1084,20 @@ Tcl_DStringFree(&runString); curByte = numBytes; } else { const char *p, *end, *next, *term; int newX, termX, sawNonSpace, dstWrote; - Tcl_UniChar ch = 0; FontFamily *familyPtr; XChar2b buf[8]; /* * How many chars will fit in the space allotted? This first version * may be inefficient because it measures every character * individually. */ - next = source + Tcl_UtfToUniChar(source, &ch); + next = source + TkUtfToUniChar(source, &ch); newX = curX = termX = 0; term = source; end = source + numBytes; @@ -1136,11 +1128,11 @@ term = end; termX = curX; break; } - next += Tcl_UtfToUniChar(next, &ch); + next += TkUtfToUniChar(next, &ch); if ((ch < 256) && isspace(ch)) { if (sawNonSpace) { term = p; termX = curX; sawNonSpace = 0; @@ -1161,17 +1153,17 @@ * desired span. The width returned will include the width of that * extra character. */ curX = newX; - p += Tcl_UtfToUniChar(p, &ch); + p += TkUtfToUniChar(p, &ch); } if ((flags & TK_AT_LEAST_ONE) && (term == source) && (p < end)) { term = p; termX = curX; if (term == source) { - term += Tcl_UtfToUniChar(term, &ch); + term += TkUtfToUniChar(term, &ch); termX = newX; } } else if ((p >= end) || !(flags & TK_WHOLE_WORDS)) { term = p; termX = curX; @@ -1278,12 +1270,11 @@ { UnixFont *fontPtr = (UnixFont *) tkfont; SubFont *thisSubFontPtr, *lastSubFontPtr; Tcl_DString runString; const char *p, *end, *next; - int xStart, needWidth, window_width, do_width; - Tcl_UniChar ch = 0; + int xStart, needWidth, window_width, do_width, ch; FontFamily *familyPtr; #ifdef TK_DRAW_CHAR_XWINDOW_CHECK int rx, ry; unsigned width, height, border_width, depth; Drawable root; @@ -1314,11 +1305,11 @@ end = source + numBytes; needWidth = fontPtr->font.fa.underline + fontPtr->font.fa.overstrike; for (p = source; p <= end; ) { if (p < end) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); thisSubFontPtr = FindSubFontForChar(fontPtr, ch, &lastSubFontPtr); } else { next = p + 1; thisSubFontPtr = lastSubFontPtr; } @@ -2250,11 +2241,11 @@ SubFont *subFontPtr, /* Contains font mapping cache to be * updated. */ int row) /* Index of the page to be loaded into the * cache. */ { - char buf[16], src[TCL_UTF_MAX]; + char buf[16], src[TCL_UTF_MAX*2]; int minHi, maxHi, minLo, maxLo, scale, checkLo; int i, end, bitOffset, isTwoByteFont, n; Tcl_Encoding encoding; XFontStruct *fontStructPtr; XCharStruct *widths; @@ -2288,11 +2279,11 @@ end = (row + 1) << FONTMAP_SHIFT; for (i = row << FONTMAP_SHIFT; i < end; i++) { int hi, lo; - if (Tcl_UtfToExternal(NULL, encoding, src, Tcl_UniCharToUtf(i, src), + if (Tcl_UtfToExternal(NULL, encoding, src, TkUniCharToUtf(i, src), TCL_ENCODING_STOPONERROR, NULL, buf, sizeof(buf), NULL, NULL, NULL) != TCL_OK) { continue; } if (isTwoByteFont) { @@ -2454,11 +2445,11 @@ Tk_Uid hateFoundry; const char *charset, *hateCharset; unsigned bestScore[2]; char **nameList; char **nameListOrig; - char src[TCL_UTF_MAX]; + char src[8]; FontAttributes want, got; Display *display; SubFont subFont; XFontStruct *fontStructPtr; Tcl_DString dsEncodings; @@ -2484,11 +2475,11 @@ if (numNames == 0) { return NULL; } nameListOrig = nameList; - srcLen = Tcl_UniCharToUtf(ch, src); + srcLen = TkUniCharToUtf(ch, src); want.fa = fontPtr->font.fa; want.xa = fontPtr->xa; want.fa.family = Tk_GetUid(faceName); Index: jni/sdl2tk/unix/tkUnixKey.c ================================================================== --- jni/sdl2tk/unix/tkUnixKey.c +++ jni/sdl2tk/unix/tkUnixKey.c @@ -142,10 +142,47 @@ && (winPtr->dispPtr->inputContext != NULL) && (eventPtr->type == KeyPress)) { Status status; #if X_HAVE_UTF8_STRING +#if TCL_UTF_MAX < 4 + /* + * We want WTF-8 here. + */ + Tcl_DStringInit(&buf); + Tcl_DStringSetLength(&buf, TCL_DSTRING_STATIC_SIZE-1); + len = Xutf8LookupString(winPtr->dispPtr->inputContext, + &eventPtr->xkey, + Tcl_DStringValue(&buf), Tcl_DStringLength(&buf), + &kePtr->keysym, &status); + + if (status == XBufferOverflow) { + /* + * Expand buffer and try again. + */ + + Tcl_DStringSetLength(&buf, len); + len = Xutf8LookupString(winPtr->dispPtr->inputContext, + &eventPtr->xkey, + Tcl_DStringValue(&buf), Tcl_DStringLength(&buf), + &kePtr->keysym, &status); + } + Tcl_DStringSetLength(&buf, len); + if ((status != XLookupChars) && (status != XLookupBoth)) { + len = 0; + } else { + Tcl_Encoding encoding = Tcl_GetEncoding(NULL, "utf-8"); + + Tcl_ExternalToUtfDString(encoding, Tcl_DStringValue(&buf), + len, dsPtr); + len = Tcl_DStringLength(dsPtr); + if (encoding) { + Tcl_FreeEncoding(encoding); + } + } + Tcl_DStringFree(&buf); +#else Tcl_DStringSetLength(dsPtr, TCL_DSTRING_STATIC_SIZE-1); len = Xutf8LookupString(winPtr->dispPtr->inputContext, &eventPtr->xkey, Tcl_DStringValue(dsPtr), Tcl_DStringLength(dsPtr), &kePtr->keysym, &status); @@ -163,10 +200,11 @@ } if ((status != XLookupChars) && (status != XLookupBoth)) { len = 0; } Tcl_DStringSetLength(dsPtr, len); +#endif #else /* !X_HAVE_UTF8_STRING */ /* * Overallocate the dstring to the maximum stack amount. */ Index: jni/sdl2tk/unix/tkUnixMenu.c ================================================================== --- jni/sdl2tk/unix/tkUnixMenu.c +++ jni/sdl2tk/unix/tkUnixMenu.c @@ -857,16 +857,15 @@ int len; len = Tcl_GetCharLength(mePtr->labelPtr); if (mePtr->underline < len) { int activeBorderWidth, leftEdge; - Tcl_UniChar ch = 0; const char *label, *start, *end; label = Tcl_GetString(mePtr->labelPtr); start = Tcl_UtfAtIndex(label, mePtr->underline); - end = start + Tcl_UtfToUniChar(start, &ch); + end = TkUtfNext(start); Tk_GetPixelsFromObj(NULL, menuPtr->tkwin, menuPtr->activeBorderWidthPtr, &activeBorderWidth); leftEdge = x + mePtr->indicatorSpace + activeBorderWidth; if (menuPtr->menuType == MENUBAR) { Index: jni/sdl2tk/unix/tkUnixRFont.c ================================================================== --- jni/sdl2tk/unix/tkUnixRFont.c +++ jni/sdl2tk/unix/tkUnixRFont.c @@ -161,20 +161,20 @@ } static XftFont * GetFont( UnixFtFont *fontPtr, - FcChar32 ucs4, + int ucs4, double angle) { int i; if (ucs4) { for (i = 0; i < fontPtr->nfaces; i++) { FcCharSet *charset = fontPtr->faces[i].charset; - if (charset && FcCharSetHasChar(charset, ucs4)) { + if (charset && FcCharSetHasChar(charset, (FcChar32)ucs4)) { break; } } if (i == fontPtr->nfaces) { i = 0; @@ -758,13 +758,11 @@ int c, /* Character of interest */ TkFontAttributes *faPtr) /* Output: Font attributes */ { UnixFtFont *fontPtr = (UnixFtFont *) tkfont; /* Structure describing the logical font */ - FcChar32 ucs4 = (FcChar32) c; - /* UCS-4 character to map */ - XftFont *ftFont = GetFont(fontPtr, ucs4, 0.0); + XftFont *ftFont = GetFont(fontPtr, c, 0.0); /* Actual font used to render the character */ GetTkFontAttributes(ftFont, faPtr); faPtr->underline = fontPtr->font.fa.underline; faPtr->overstrike = fontPtr->font.fa.overstrike; @@ -809,31 +807,17 @@ -1, -1, -1, InitFontErrorProc, &errorFlag); curX = 0; curByte = 0; sawNonSpace = 0; while (numBytes > 0) { - Tcl_UniChar unichar; - - clen = Tcl_UtfToUniChar(source, &unichar); - c = (FcChar32) unichar; -#if TCL_UTF_MAX == 4 - if (!clen && (*source & 0xf8) == 0xf0) { - clen += Tcl_UtfToUniChar(source, &unichar); - c = (((c & 0x3ff) << 10) | (unichar & 0x3ff)) + 0x10000; - } -#endif - - if (clen <= 0) { - /* - * This can't happen (but see #1185640) - */ - - Tk_DeleteErrorHandler(handler); - *lengthPtr = curX; - return curByte; - } - + int unichar; + + clen = TkUtfToUniChar(source, &unichar); + if ((unichar >= 0xD800) && (unichar <= 0xDFFF)) { + unichar = 0xFFFD; + } + c = (FcChar32) unichar; source += clen; numBytes -= clen; if (c < 256 && isspace(c)) { /* I18N: ??? */ if (sawNonSpace) { termByte = curByte; @@ -1045,19 +1029,15 @@ XftDrawSetClip(fontPtr->ftDraw, tsdPtr->clipRegion); } nspec = 0; while (numBytes > 0) { XftFont *ftFont; - FcChar32 c; - - clen = FcUtf8ToUcs4((FcChar8 *) source, &c, numBytes); - if (clen <= 0) { - /* - * This should not happen, but it can. - */ - - goto doUnderlineStrikeout; + int c; + + clen = TkUtfToUniChar(source, &c); + if ((c >= 0xD800) && (c <= 0xDFFF)) { + c = 0xFFFD; } source += clen; numBytes -= clen; ftFont = GetFont(fontPtr, c, 0.0); @@ -1094,11 +1074,10 @@ LOCK; XftDrawGlyphFontSpec(fontPtr->ftDraw, xftcolor, specs, nspec); UNLOCK; } - doUnderlineStrikeout: if (tsdPtr->clipRegion != NULL) { XftDrawSetClip(fontPtr->ftDraw, NULL); } if (fontPtr->font.fa.underline != 0) { XFillRectangle(display, drawable, gc, xStart, @@ -1190,19 +1169,15 @@ currentFtFont = NULL; originX = originY = 0; while (numBytes > 0) { XftFont *ftFont; - FcChar32 c; - - clen = FcUtf8ToUcs4((FcChar8 *) source, &c, numBytes); - if (clen <= 0) { - /* - * This should not happen, but it can. - */ - - goto doUnderlineStrikeout; + int c; + + clen = TkUtfToUniChar(source, &c); + if ((c >= 0xD800) && (c <= 0xDFFF)) { + c = 0xFFFD; } source += clen; numBytes -= clen; ftFont = GetFont(fontPtr, c, angle); @@ -1288,19 +1263,15 @@ XftDrawSetClip(fontPtr->ftDraw, tsdPtr->clipRegion); } nspec = 0; while (numBytes > 0) { XftFont *ftFont, *ft0Font; - FcChar32 c; - - clen = FcUtf8ToUcs4((FcChar8 *) source, &c, numBytes); - if (clen <= 0) { - /* - * This should not happen, but it can. - */ - - goto doUnderlineStrikeout; + int c; + + clen = TkUtfToUniChar(source, &c); + if ((c >= 0xD800) && (c <= 0xDFFF)) { + c = 0xFFFD; } source += clen; numBytes -= clen; ftFont = GetFont(fontPtr, c, angle); @@ -1339,11 +1310,10 @@ XftDrawGlyphFontSpec(fontPtr->ftDraw, xftcolor, specs, nspec); UNLOCK; } #endif /* XFT_HAS_FIXED_ROTATED_PLACEMENT */ - doUnderlineStrikeout: if (tsdPtr->clipRegion != NULL) { XftDrawSetClip(fontPtr->ftDraw, NULL); } if (fontPtr->font.fa.underline || fontPtr->font.fa.overstrike) { XPoint points[5]; Index: jni/sdl2tk/unix/tkUnixSelect.c ================================================================== --- jni/sdl2tk/unix/tkUnixSelect.c +++ jni/sdl2tk/unix/tkUnixSelect.c @@ -635,10 +635,13 @@ * guarantee that propInfo is NULL-terminated, so we might have to * copy the string. */ char *propData = propInfo; +#if TCL_UTF_MAX < 4 + Tcl_Encoding encoding; +#endif if (format != 8) { Tcl_SetObjResult(retrPtr->interp, Tcl_ObjPrintf( "bad format for string selection: wanted \"8\", got \"%d\"", format)); @@ -651,12 +654,27 @@ if (propInfo[numItems] != '\0') { propData = (char *) ckalloc(numItems + 1); strcpy(propData, propInfo); propData[numItems] = '\0'; } + + interp = retrPtr->interp; + Tcl_Preserve(interp); +#if TCL_UTF_MAX < 4 + encoding = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_ExternalToUtfDString(encoding, propInfo, (int)numItems, &ds); + if (encoding) { + Tcl_FreeEncoding(encoding); + } + retrPtr->result = retrPtr->proc(retrPtr->clientData, + retrPtr->interp, Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); +#else retrPtr->result = retrPtr->proc(retrPtr->clientData, retrPtr->interp, propData); +#endif + Tcl_Release(interp); if (propData != propInfo) { ckfree(propData); } } else if (type == dispPtr->incrAtom) { @@ -969,18 +987,37 @@ incr.converts[i].offset = 0; XChangeProperty(reply.xsel.display, reply.xsel.requestor, property, type, format, PropModeReplace, (unsigned char *) propPtr, numItems); } else if (type == winPtr->dispPtr->utf8Atom) { +#if TCL_UTF_MAX > 3 /* * This matches selection requests of type UTF8_STRING, which * allows us to pass our utf-8 information untouched. */ XChangeProperty(reply.xsel.display, reply.xsel.requestor, property, type, 8, PropModeReplace, (unsigned char *) buffer, numItems); +#else + /* + * Must ensure not to export WTF-8 here. + */ + Tcl_DString ds; + Tcl_Encoding encoding; + + encoding = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_UtfToExternalDString(encoding, (char *) buffer, -1, &ds); + XChangeProperty(reply.xsel.display, reply.xsel.requestor, + property, type, 8, PropModeReplace, + (unsigned char *) Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)); + if (encoding) { + Tcl_FreeEncoding(encoding); + } + Tcl_DStringFree(&ds); +#endif } else if ((type == XA_STRING) || (type == winPtr->dispPtr->compoundTextAtom)) { Tcl_DString ds; Tcl_Encoding encoding; Index: jni/sdl2tk/unix/tkUnixWm.c ================================================================== --- jni/sdl2tk/unix/tkUnixWm.c +++ jni/sdl2tk/unix/tkUnixWm.c @@ -5322,10 +5322,11 @@ { WmInfo *wmPtr = winPtr->wmInfoPtr; Atom XA_UTF8_STRING = Tk_InternAtom((Tk_Window) winPtr, "UTF8_STRING"); const char *string; Tcl_DString ds; + Tcl_Encoding encoding; /* * Set window title: */ @@ -5333,12 +5334,15 @@ Tcl_UtfToExternalDString(NULL, string, -1, &ds); XStoreName(winPtr->display, wmPtr->wrapperPtr->window, Tcl_DStringValue(&ds)); Tcl_DStringFree(&ds); + encoding = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_UtfToExternalDString(encoding, string, -1, &ds); SetWindowProperty(wmPtr->wrapperPtr, "_NET_WM_NAME", XA_UTF8_STRING, 8, - string, strlen(string)); + Tcl_DStringValue(&ds), Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); /* * Set icon name: */ @@ -5346,12 +5350,19 @@ Tcl_UtfToExternalDString(NULL, wmPtr->iconName, -1, &ds); XSetIconName(winPtr->display, wmPtr->wrapperPtr->window, Tcl_DStringValue(&ds)); Tcl_DStringFree(&ds); + Tcl_UtfToExternalDString(encoding, wmPtr->iconName, -1, &ds); SetWindowProperty(wmPtr->wrapperPtr, "_NET_WM_ICON_NAME", - XA_UTF8_STRING, 8, wmPtr->iconName, strlen(wmPtr->iconName)); + XA_UTF8_STRING, 8, + Tcl_DStringValue(&ds), Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); + } + + if (encoding) { + Tcl_FreeEncoding(encoding); } } /* *-------------------------------------------------------------- Index: jni/sdl2tk/win/tkWinFont.c ================================================================== --- jni/sdl2tk/win/tkWinFont.c +++ jni/sdl2tk/win/tkWinFont.c @@ -24,17 +24,12 @@ * a given Unicode character. * * Under Windows, a "font family" is uniquely identified by its face name. */ -#if TCL_UTF_MAX > 3 #define FONTMAP_SHIFT 12 #define FONTMAP_PAGES (1 << (21 - FONTMAP_SHIFT)) -#else -#define FONTMAP_SHIFT 10 -#define FONTMAP_PAGES (1 << (sizeof(Tcl_UniChar)*8 - FONTMAP_SHIFT)) -#endif #define FONTMAP_BITSPERPAGE (1 << FONTMAP_SHIFT) typedef struct FontFamily { struct FontFamily *nextPtr; /* Next in list of all known font families. */ size_t refCount; /* How many SubFonts are referring to this @@ -821,11 +816,11 @@ { HDC hdc; HFONT oldFont; WinFont *fontPtr; int curX, moretomeasure; - Tcl_UniChar ch; + int ch; SIZE size; FontFamily *familyPtr; Tcl_DString runString; SubFont *thisSubFontPtr, *lastSubFontPtr; const char *p, *end, *next = NULL, *start; @@ -852,11 +847,11 @@ moretomeasure = 0; curX = 0; start = source; end = start + numBytes; for (p = start; p < end; ) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); thisSubFontPtr = FindSubFontForChar(fontPtr, ch, &lastSubFontPtr); if (thisSubFontPtr != lastSubFontPtr) { familyPtr = lastSubFontPtr->familyPtr; Tcl_UtfToExternalDString(familyPtr->encoding, start, p - start, &runString); @@ -914,11 +909,11 @@ int lastSize = 0; familyPtr = lastSubFontPtr->familyPtr; Tcl_DStringInit(&runString); for (p = start; p < end; ) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); Tcl_UtfToExternal(NULL, familyPtr->encoding, p, (int) (next - p), 0, NULL, buf, sizeof(buf), NULL, &dstWrote, NULL); Tcl_DStringAppend(&runString,buf,dstWrote); size.cx = 0; @@ -963,17 +958,17 @@ * Scan the string for the last word break and than repeat the whole * procedure without the maxLength limit or any flags. */ const char *lastWordBreak = NULL; - Tcl_UniChar ch2; + int ch2; end = p; p = source; ch = ' '; while (p < end) { - next = p + Tcl_UtfToUniChar(p, &ch2); + next = p + TkUtfToUniChar(p, &ch2); if ((ch != ' ') && (ch2 == ' ')) { lastWordBreak = p; } p = next; ch = ch2; @@ -1470,11 +1465,11 @@ int numBytes, /* Length of string in bytes. */ double x, double y, /* Coordinates at which to place origin of * string when drawing. */ double angle) { - Tcl_UniChar ch; + int ch; SIZE size; HFONT oldFont; FontFamily *familyPtr; Tcl_DString runString; const char *p, *end, *next; @@ -1486,11 +1481,11 @@ oldFont = SelectFont(hdc, fontPtr, lastSubFontPtr, angle); GetTextMetricsW(hdc, &tm); end = source + numBytes; for (p = source; p < end; ) { - next = p + Tcl_UtfToUniChar(p, &ch); + next = p + TkUtfToUniChar(p, &ch); thisSubFontPtr = FindSubFontForChar(fontPtr, ch, &lastSubFontPtr); /* * The drawing API has a limit of 32767 pixels in one go. * To avoid spending time on a rare case we do not measure each char, @@ -2225,11 +2220,11 @@ int row) /* Index of the page to be loaded into the * cache. */ { FontFamily *familyPtr; Tcl_Encoding encoding; - char src[TCL_UTF_MAX], buf[16]; + char src[4], buf[16]; USHORT *startCount, *endCount; int i, j, bitOffset, end, segCount; subFontPtr->fontMap[row] = (char *) ckalloc(FONTMAP_BITSPERPAGE / 8); memset(subFontPtr->fontMap[row], 0, FONTMAP_BITSPERPAGE / 8); @@ -2278,11 +2273,11 @@ */ end = (row + 1) << FONTMAP_SHIFT; for (i = row << FONTMAP_SHIFT; i < end; i++) { if (Tcl_UtfToExternal(NULL, encoding, src, - Tcl_UniCharToUtf(i, src), TCL_ENCODING_STOPONERROR, NULL, + TkUniCharToUtf(i, src), TCL_ENCODING_STOPONERROR, NULL, buf, sizeof(buf), NULL, NULL, NULL) != TCL_OK) { continue; } bitOffset = i & (FONTMAP_BITSPERPAGE - 1); subFontPtr->fontMap[row][bitOffset >> 3] |= 1 << (bitOffset & 7); Index: jni/sdl2tk/win/tkWinMenu.c ================================================================== --- jni/sdl2tk/win/tkWinMenu.c +++ jni/sdl2tk/win/tkWinMenu.c @@ -509,11 +509,10 @@ : Tcl_GetString(mePtr->labelPtr); const char *accel = ((menuPtr->menuType == MENUBAR) || (mePtr->accelPtr == NULL)) ? "" : Tcl_GetString(mePtr->accelPtr); const char *p, *next; Tcl_DString itemString; - Tcl_UniChar ch = 0; /* * We have to construct the string with an ampersand preceeding the * underline character, and a tab seperating the text and the accel * text. We have to be careful with ampersands in the string. @@ -526,21 +525,20 @@ Tcl_DStringAppend(&itemString, "&", 1); } if (*p == '&') { Tcl_DStringAppend(&itemString, "&", 1); } - next = p + Tcl_UtfToUniChar(p, &ch); + next = TkUtfNext(p); Tcl_DStringAppend(&itemString, p, (int) (next - p)); } - ch = 0; if (mePtr->accelLength > 0) { Tcl_DStringAppend(&itemString, "\t", 1); for (p = accel, i = 0; *p != '\0'; i++, p = next) { if (*p == '&') { Tcl_DStringAppend(&itemString, "&", 1); } - next = p + Tcl_UtfToUniChar(p, &ch); + next = TkUtfNext(p); Tcl_DStringAppend(&itemString, p, (int) (next - p)); } } itemText = (char *)ckalloc(Tcl_DStringLength(&itemString) + 1); @@ -2102,15 +2100,14 @@ int len; len = Tcl_GetCharLength(mePtr->labelPtr); if (mePtr->underline < len) { const char *label, *start, *end; - Tcl_UniChar ch = 0; label = Tcl_GetString(mePtr->labelPtr); start = Tcl_UtfAtIndex(label, mePtr->underline); - end = start + Tcl_UtfToUniChar(start, &ch); + end = TkUtfNext(start); Tk_UnderlineChars(menuPtr->display, d, gc, tkfont, label, x + mePtr->indicatorSpace, y + (height + fmPtr->ascent - fmPtr->descent) / 2, (int) (start - label), (int) (end - label)); } Index: jni/sdl2tk/win/tkWinTest.c ================================================================== --- jni/sdl2tk/win/tkWinTest.c +++ jni/sdl2tk/win/tkWinTest.c @@ -178,11 +178,13 @@ Tcl_Encoding encoding; char *msgPtr; encoding = Tcl_GetEncoding(NULL, "unicode"); Tcl_ExternalToUtfDString(encoding, (char *) wMsgPtr, -1, &ds); - Tcl_FreeEncoding(encoding); + if (encoding) { + Tcl_FreeEncoding(encoding); + } LocalFree(wMsgPtr); msgPtr = Tcl_DStringValue(&ds); length = Tcl_DStringLength(&ds); Index: jni/tcl/generic/regcustom.h ================================================================== --- jni/tcl/generic/regcustom.h +++ jni/tcl/generic/regcustom.h @@ -87,11 +87,11 @@ typedef unsigned uchr; /* Unsigned type that will hold a chr. */ typedef int celt; /* Type to hold chr, or NOCELT */ #define NOCELT (-1) /* Celt value which is not valid chr */ #define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */ #define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 #define CHRBITS 32 /* Bits in a chr; must not use sizeof */ #define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */ #define CHR_MAX 0xFFFFFFFF /* CHR_MAX-CHR_MIN+1 should fit in uchr */ #else #define CHRBITS 16 /* Bits in a chr; must not use sizeof */ Index: jni/tcl/generic/tcl.h ================================================================== --- jni/tcl/generic/tcl.h +++ jni/tcl/generic/tcl.h @@ -2189,17 +2189,20 @@ */ #ifndef TCL_UTF_MAX #define TCL_UTF_MAX 3 #endif +#if (TCL_UTF_MAX != 3) && (TCL_UTF_MAX != 4) +#error TCL_UTF_MAX must be 3 or 4 +#endif /* * This represents a Unicode character. Any changes to this should also be * reflected in regcustom.h. */ -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 /* * unsigned int isn't 100% accurate as it should be a strict 4-byte value * (perhaps wchar_t). 64-bit systems may have troubles. The size of this * value must be reflected correctly in regcustom.h and * in tclEncoding.c. Index: jni/tcl/generic/tclBinary.c ================================================================== --- jni/tcl/generic/tclBinary.c +++ jni/tcl/generic/tclBinary.c @@ -449,10 +449,24 @@ byteArrayPtr = (ByteArray *) ckalloc(BYTEARRAY_SIZE(length)); byteArrayPtr->bytes = (unsigned char *) (byteArrayPtr + 1); for (dst = byteArrayPtr->bytes; src < srcEnd; ) { src += TclUtfToUniChar(src, &ch); +#if TCL_UTF_MAX == 3 + if ((ch & 0xFC00) == 0xD800) { + if (Tcl_UtfCharComplete(src, srcEnd - src)) { + int uch = ch; + int len2 = TclUtfToUniChar(src, &ch); + + if ((ch & 0xFC00) == 0xDC00) { + src += len2; + uch = (((uch&0x3ff)<<10) | (ch&0x3ff)) + 0x10000; + } + ch = UCHAR(uch); + } + } +#endif *dst++ = UCHAR(ch); } byteArrayPtr->used = dst - byteArrayPtr->bytes; byteArrayPtr->allocated = length; @@ -1213,11 +1227,11 @@ goto error; badField: { Tcl_UniChar ch = 0; - char buf[TCL_UTF_MAX + 1] = ""; + char buf[TCL_UTF_MAX + 1]; TclUtfToUniChar(errorString, &ch); buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad field specifier \"%s\"", buf)); @@ -1592,11 +1606,11 @@ goto error; badField: { Tcl_UniChar ch = 0; - char buf[TCL_UTF_MAX + 1] = ""; + char buf[TCL_UTF_MAX + 1]; TclUtfToUniChar(errorString, &ch); buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad field specifier \"%s\"", buf)); Index: jni/tcl/generic/tclCmdIL.c ================================================================== --- jni/tcl/generic/tclCmdIL.c +++ jni/tcl/generic/tclCmdIL.c @@ -4417,32 +4417,34 @@ * bail out immediately. */ if ((*left != '\0') && (*right != '\0')) { Tcl_UniChar lch = 0, rch = 0; - int len; - - len = TclUtfToUniChar(left, &lch); - uniLeft = lch; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(left, &lch); - uniLeft = (((uniLeft&0x3FF)<<10) | (lch&0x3FF)) + 0x10000; - } -#endif - left += len; - - len = TclUtfToUniChar(right, &rch); - uniRight = rch; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(right, &rch); - uniRight = (((uniRight&0x3FF)<<10) | (rch&0x3FF)) + 0x10000; - } -#endif - right += len; - + + left += TclUtfToUniChar(left, &lch); + right += TclUtfToUniChar(right, &rch); + + uniLeft = lch; + uniRight = rch; +#if TCL_UTF_MAX == 3 + if (*left && ((lch & 0xFC00) == 0xD800)) { + int len2 = TclUtfToUniChar(left, &lch); + + if ((lch & 0xFC00) == 0xDC00) { + uniLeft = (((uniLeft&0x3FF)<<10) | (lch&0x3FF)) + 0x10000; + left += len2; + } + } + if (*right && ((rch & 0xFC00) == 0xD800)) { + int len2 = TclUtfToUniChar(right, &rch); + + if ((rch & 0xFC00) == 0xDC00) { + uniRight = (((uniRight&0x3FF)<<10) | (rch&0x3FF)) + 0x10000; + right += len2; + } + } +#endif /* * Convert both chars to lower for the comparison, because * dictionary sorts are case-insensitive. Covert to lower, not * upper, so chars between Z and a will sort before A (where most * other interesting punctuations occur). Index: jni/tcl/generic/tclCmdMZ.c ================================================================== --- jni/tcl/generic/tclCmdMZ.c +++ jni/tcl/generic/tclCmdMZ.c @@ -28,11 +28,11 @@ static Tcl_NRPostProc TryPostFinal; static Tcl_NRPostProc TryPostHandler; static int UniCharIsAscii(int character); static int UniCharIsHexDigit(int character); -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 static int MemCmp(const void *s1, const void *s2, size_t n, int flags); static int NumCodePointsUtf(const char *src, int length, int *flagPtr); static int NumCodePointsUnicode(const Tcl_UniChar *src, @@ -78,11 +78,11 @@ "\xE2\x81\xA0" /* word joiner (U+2060) */ "\xE3\x80\x80" /* ideographic space (U+3000) */ "\xEF\xBB\xBF" /* zero width no-break space (U+feff) */ ; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 /* *--------------------------------------------------------------------------- * * MemCmp -- * @@ -96,11 +96,11 @@ { return memcmp(s1, s2, n); } #endif -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 /* *--------------------------------------------------------------------------- * * NumCodePointsUtf -- * @@ -120,42 +120,53 @@ const char *src, /* The UTF-8 string to measure. */ int length, /* The length of the string in bytes. */ int *flagPtr) /* Location to receive end flag. */ { Tcl_UniChar ch = 0; - int len, i = 0; + int i = 0; const char *endPtr = src + length - TCL_UTF_MAX; + *flagPtr = 0; while (src < endPtr) { - len = TclUtfToUniChar(src, &ch); - if (len) { - ch = 0; - } else { - len = TclUtfToUniChar(src, &ch); + src += TclUtfToUniChar(src, &ch); + if ((ch & 0xFC00) == 0xD800) { + if ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { + int len = TclUtfToUniChar(src, &ch); + + if ((ch & 0xFC00) == 0xDC00) { + --i; + src += len; + } + } } - src += len; i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - len = TclUtfToUniChar(src, &ch); - if (len) { - ch = 0; - } else { - len = TclUtfToUniChar(src, &ch); + src += TclUtfToUniChar(src, &ch); + if ((ch & 0xFC00) == 0xD800) { + if ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { + int len = TclUtfToUniChar(src, &ch); + + if ((ch & 0xFC00) == 0xDC00) { + --i; + src += len; + } + } } - src += len; i++; } if (src < endPtr) { i += endPtr - src; + } else if (i && ((ch & 0xFC00) == 0xD800)) { + *flagPtr = 1; } return i; } #endif -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 /* *---------------------------------------------------------------------- * * UtfNcmp -- * @@ -176,15 +187,55 @@ const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ size_t numCp, /* Number of code points to compare. */ int flags) /* Flags describing string ends. */ { - return Tcl_UtfNcmp(cs, ct, numCp); + Tcl_UniChar ch1 = 0, ch2 = 0; + int uch1, uch2; + + while (numCp-- > 0) { + + cs += TclUtfToUniChar(cs, &ch1); + ct += TclUtfToUniChar(ct, &ch2); + + uch1 = ch1; + uch2 = ch2; + + if ((ch1 & 0xFC00) == 0xD800) { + if ((flags & 1) && (numCp == 0)) { + /* String ends with high surrogate. */ + } else { + int len = TclUtfToUniChar(cs, &ch1); + + if ((ch1 & 0xFC00) == 0xDC00) { + uch1 = (((uch1&0x3FF)<<10) | (ch1&0x3FF)) + 0x10000; + cs += len; + } + } + } + if ((ch2 & 0xFC00) == 0xD800) { + if ((flags & 2) && (numCp == 0)) { + /* String ends with high surrogate. */ + } else { + int len = TclUtfToUniChar(ct, &ch2); + + if ((ch2 & 0xFC00) == 0xDC00) { + uch2 = (((uch2&0x3FF)<<10) | (ch2&0x3FF)) + 0x10000; + ct += len; + } + } + } + + if (uch1 != uch2) { + return (uch1 - uch2); + } + } + return 0; } #endif -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 /* *---------------------------------------------------------------------- * * UtfNcasecmp -- * @@ -205,15 +256,59 @@ const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ size_t numCp, /* Number of code points to compare. */ int flags) /* Flags describing string ends. */ { - return Tcl_UtfNcasecmp(cs, ct, numCp); + Tcl_UniChar ch1 = 0, ch2 = 0; + int uch1, uch2; + + while (numCp-- > 0) { + + cs += TclUtfToUniChar(cs, &ch1); + ct += TclUtfToUniChar(ct, &ch2); + + uch1 = ch1; + uch2 = ch2; + + if ((ch1 & 0xFC00) == 0xD800) { + if ((flags & 1) && (numCp == 0)) { + /* String ends with high surrogate. */ + } else { + int len = TclUtfToUniChar(cs, &ch1); + + if ((ch1 & 0xFC00) == 0xDC00) { + uch1 = (((uch1&0x3FF)<<10) | (ch1&0x3FF)) + 0x10000; + cs += len; + } + } + } + if ((ch2 & 0xFC00) == 0xD800) { + if ((flags & 2) && (numCp == 0)) { + /* String ends with high surrogate. */ + } else { + int len = TclUtfToUniChar(ct, &ch2); + + if ((ch2 & 0xFC00) == 0xDC00) { + uch2 = (((uch2&0x3FF)<<10) | (ch2&0x3FF)) + 0x10000; + ct += len; + } + } + } + + if (uch1 != uch2) { + uch1 = TclUCS4ToLower(uch1); + uch2 = TclUCS4ToLower(uch2); + if (uch1 != uch2) { + return (uch1 - uch2); + } + } + } + return 0; } #endif -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 /* *--------------------------------------------------------------------------- * * NumCodePointsUnicode -- * @@ -249,11 +344,11 @@ } return n; } #endif -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 /* *---------------------------------------------------------------------- * * UniCharNcmp -- * @@ -303,11 +398,11 @@ } return 0; } #endif -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 /* *---------------------------------------------------------------------- * * UniCharNcasecmp -- * @@ -895,11 +990,11 @@ */ int slen, nocase; int (*strCmpFn)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long); Tcl_UniChar *p; -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 Tcl_UniChar wsrclc; #endif numMatches = 0; nocase = (cflags & TCL_REG_NOCASE); @@ -926,16 +1021,16 @@ numMatches++; } wlen = 0; } } else { -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 wsrclc = Tcl_UniCharToLower(*wsrc); #endif for (p = wfirstChar = wstring; wstring < wend; wstring++) { if ( -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 (*wstring == *wsrc || (nocase && Tcl_UniCharToLower(*wstring)==wsrclc)) && (slen==1 || (strCmpFn(wstring, wsrc, (unsigned long) slen) == 0)) #else @@ -1394,15 +1489,20 @@ for ( ; stringPtr < end; stringPtr += len) { int fullchar; len = TclUtfToUniChar(stringPtr, &ch); fullchar = ch; +#if TCL_UTF_MAX == 3 + if ((stringPtr + len < end) && (fullchar <= 0xFFFF) + && ((fullchar & 0xFC00) == 0xD800)) { + int len2 = TclUtfToUniChar(stringPtr + len, &ch); -#if TCL_UTF_MAX == 4 - if (!len) { - len += TclUtfToUniChar(stringPtr, &ch); - fullchar = (((fullchar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; + if ((ch & 0xFC00) == 0xDC00) { + fullchar = ((fullchar & 0x3FF) << 10) | (ch & 0x3FF); + fullchar += 0x10000; + len += len2; + } } #endif /* * Assume Tcl_UniChar is an integral type... @@ -1453,14 +1553,20 @@ Tcl_DStringInit(&ds); p = splitChars; while (p < splitChars + splitCharLen) { len = TclUtfToUniChar(p, &ch); uch = ch; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(p, &ch); - uch = (((uch&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; +#if TCL_UTF_MAX == 3 + if ((ch & 0xFC00) == 0xD800) { + int len2; + + ch = 0; + len2 = TclUtfToUniChar(p + len, &ch); + if ((ch & 0xFC00) == 0xDC00) { + len += len2; + uch = (((uch&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + } } #endif Tcl_DStringAppend(&ds, (char *) &uch, sizeof(int)); p += len; } @@ -1477,14 +1583,20 @@ */ for (element = stringPtr; stringPtr < end; stringPtr += len) { len = TclUtfToUniChar(stringPtr, &ch); uch = ch; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(stringPtr, &ch); - uch = (((uch&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; +#if TCL_UTF_MAX == 3 + if ((ch & 0xFC00) == 0xD800) { + int len2; + + ch = 0; + len2 = TclUtfToUniChar(stringPtr + len, &ch); + if ((ch & 0xFC00) == 0xDC00) { + len += len2; + uch = (((uch&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + } } #endif for (i = 0; i < splitCharLen; i++) { if (uch == ((int *)Tcl_DStringValue(&ds))[i]) { TclNewStringObj(objPtr, element, stringPtr - element); @@ -1766,19 +1878,13 @@ if (TclIsPureByteArray(objv[1])) { unsigned char uch = UCHAR(ch); Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1)); } else { - char buf[TCL_UTF_MAX] = ""; + char buf[TCL_UTF_MAX]; length = Tcl_UniCharToUtf(ch, buf); -#if TCL_UTF_MAX == 4 - if (!length) { - /* Special case for handling high surrogates. */ - length = Tcl_UniCharToUtf(-1, buf); - } -#endif Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length)); } } return TCL_OK; } @@ -2142,14 +2248,19 @@ for (; string1 < end; string1 += length2, failat++) { int fullchar; length2 = TclUtfToUniChar(string1, &ch); fullchar = ch; -#if TCL_UTF_MAX == 4 - if (!length2) { - length2 = TclUtfToUniChar(string1, &ch); - fullchar = (((fullchar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; +#if TCL_UTF_MAX == 3 + if ((string1 + length2 < end) && ((ch & 0xFC00) == 0xD800)) { + int length3 = TclUtfToUniChar(string1 + length2, &ch); + + if ((ch & 0xFC00) == 0xDC00) { + fullchar = ((fullchar & 0x3FF) << 10) | (ch & 0x3FF); + fullchar += 0x10000; + length2 += length3; + } } #endif if (!chcomp(fullchar)) { result = 0; break; @@ -2339,11 +2450,11 @@ * larger strings. */ int mapLen; Tcl_UniChar *mapString; -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 Tcl_UniChar u2lc; #endif ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); p = ustring1; @@ -2353,16 +2464,16 @@ */ ustring1 = end; } else { mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); #endif for (; ustring1 < end; ustring1++) { if ( -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 ((*ustring1 == *ustring2) || (nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) && (length2==1 || strCmpFn(ustring1, ustring2, (unsigned long) length2) == 0) #else @@ -2381,11 +2492,11 @@ } } } } else { Tcl_UniChar **mapStrings; -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 Tcl_UniChar *u2lc = NULL; #endif int *mapLens; /* @@ -2397,20 +2508,20 @@ mapStrings = (Tcl_UniChar **) TclStackAlloc(interp, mapElemc*2*sizeof(Tcl_UniChar *)); mapLens = (int *) TclStackAlloc(interp, mapElemc * 2 * sizeof(int)); -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 if (nocase) { u2lc = (Tcl_UniChar *) TclStackAlloc(interp, mapElemc * sizeof(Tcl_UniChar)); } #endif for (index = 0; index < mapElemc; index++) { mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], mapLens+index); -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 if (nocase && ((index % 2) == 0)) { u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); } #endif } @@ -2421,11 +2532,11 @@ */ ustring2 = mapStrings[index]; length2 = mapLens[index]; if ( -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 (length2 > 0) && ((*ustring1 == *ustring2) || (nocase && (Tcl_UniCharToLower(*ustring1) == u2lc[index/2]))) && /* Restrict max compare length. */ (end-ustring1 >= length2) && ((length2 == 1) || !strCmpFn(ustring2, ustring1, length2)) @@ -2459,11 +2570,11 @@ mapStrings[index+1], mapLens[index+1]); break; } } } -#if TCL_UTF_MAX != 4 +#if TCL_UTF_MAX > 3 if (nocase) { TclStackFree(interp, u2lc); } #endif TclStackFree(interp, mapLens); @@ -2869,24 +2980,44 @@ if (index >= numChars) { index = numChars - 1; } cur = 0; if (index > 0) { +#if TCL_UTF_MAX == 3 + int len; +#endif p = Tcl_UtfAtIndex(string, index); -#if TCL_UTF_MAX == 4 - length = TclUtfToUniChar(p, &ch); +#if TCL_UTF_MAX == 3 + len = TclUtfToUniChar(p, &ch); uch = ch; - if (!length) { - TclUtfToUniChar(p, &ch); - uch = (((uch&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + if ((p + len < string + length) && (ch & 0xFC00) == 0xD800) { + TclUtfToUniChar(p + len, &ch); + if ((ch & 0xFC00) == 0xDC00) { + uch = (((uch&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + } + } else if ((ch & 0xFC00) == 0xDC00) { + const char *pp; + int ppInc = 0; + + pp = TclUtfPrev(p, string); + do { + pp += ppInc; + ppInc = TclUtfToUniChar(pp, &ch); + } while (pp + ppInc < p); + if ((ch & 0xFC00) == 0xD800) { + p = pp; + uch = (((ch&0x3FF)<<10) | (uch&0x3FF)) + 0x10000; + index--; + } } #else TclUtfToUniChar(p, &ch); uch = ch; #endif + for (cur = index; cur >= 0; cur--) { int delta = 0; const char *next; if (!Tcl_UniCharIsWordChar(uch)) { @@ -2895,27 +3026,35 @@ next = TclUtfPrev(p, string); do { next += delta; delta = TclUtfToUniChar(next, &ch); - uch = ch; -#if TCL_UTF_MAX == 4 - if (!delta) { - delta = TclUtfToUniChar(next, &ch); - uch = (((uch&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + } while (next + delta < p); + p = next; + uch = ch; + +#if TCL_UTF_MAX == 3 + if ((p > string) && (ch & 0xFC00) == 0xDC00) { + const char *pp; + int ppInc = 0; + + pp = TclUtfPrev(p, string); + do { + pp += ppInc; + ppInc = TclUtfToUniChar(pp, &ch); + } while (pp + ppInc < p); + if ((ch & 0xFC00) == 0xD800) { + p = pp; + uch = (((ch&0x3FF)<<10) | (uch&0x3FF)) + 0x10000; if (!Tcl_UniCharIsWordChar(uch)) { - goto endForLoop; + break; } cur--; } + } #endif - } while (next + delta < p); - p = next; } -#if TCL_UTF_MAX == 4 -endForLoop: -#endif if (cur != index) { cur += 1; } } Tcl_SetObjResult(interp, Tcl_NewIntObj(cur)); @@ -2967,26 +3106,26 @@ } if (index < numChars) { p = Tcl_UtfAtIndex(string, index); end = string+length; for (cur = index; p < end; cur++) { -#if TCL_UTF_MAX == 4 - length = TclUtfToUniChar(p, &ch); - uch = ch; - if (!length) { - length = TclUtfToUniChar(p, &ch); - uch = (((uch&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; - if (Tcl_UniCharIsWordChar(uch)) { - cur++; - continue; - } - break; - } - p += length; -#else p += TclUtfToUniChar(p, &ch); uch = ch; +#if TCL_UTF_MAX == 3 + if ((p < end) && ((ch & 0xFC00) == 0xD800)) { + int len = TclUtfToUniChar(p, &ch); + + if ((ch & 0xFC00) == 0xDC00) { + uch = (((uch&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + p += len; + if (Tcl_UniCharIsWordChar(uch)) { + cur++; + continue; + } + break; + } + } #endif if (!Tcl_UniCharIsWordChar(uch)) { break; } } @@ -3146,11 +3285,11 @@ int reqlength) /* requested length; -1 to compare whole * strings */ { const char *s1, *s2; int empty, length, match, s1len, s2len; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 int s1flag = 0, s2flag = 0; typedef int (*memCmpFn_t)(const void *, const void *, size_t, int); #else typedef int (*memCmpFn_t)(const void *, const void *, size_t); #endif @@ -3172,11 +3311,11 @@ * arrays anyway, and we have no memcasecmp() for some reason... :^) */ s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif } else if ((value1Ptr->typePtr == &tclStringType) @@ -3189,16 +3328,16 @@ */ if (nocase) { s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 s1len = NumCodePointsUnicode((Tcl_UniChar *) s1, s1len, &s1flag); s2len = NumCodePointsUnicode((Tcl_UniChar *) s2, s2len, &s2flag); memCmpFn = (memCmpFn_t) UniCharNcasecmp; #else - memCmpFn = (memCmpFn_t) Tcl_UniCharNcasecmp; + memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp; #endif } else { s1len = Tcl_GetCharLength(value1Ptr); s2len = Tcl_GetCharLength(value2Ptr); if ((s1len == value1Ptr->length) @@ -3205,11 +3344,11 @@ && (value1Ptr->bytes != NULL) && (s2len == value2Ptr->length) && (value2Ptr->bytes != NULL)) { s1 = value1Ptr->bytes; s2 = value2Ptr->bytes; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif } else { @@ -3220,19 +3359,19 @@ 1 #else checkEq #endif /* WORDS_BIGENDIAN */ ) { -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif s1len *= sizeof(Tcl_UniChar); s2len *= sizeof(Tcl_UniChar); } else { -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 s1len = NumCodePointsUnicode((Tcl_UniChar *) s1, s1len, &s1flag); s2len = NumCodePointsUnicode((Tcl_UniChar *) s2, s2len, &s2flag); memCmpFn = (memCmpFn_t) UniCharNcmp; @@ -3282,11 +3421,11 @@ /* * When we have equal-length we can check only for (in)equality. * We can use memcmp() in all (n)eq cases because we don't need to * worry about lexical LE/BE variance. */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 memCmpFn = MemCmp; #else memCmpFn = memcmp; #endif } else { @@ -3294,14 +3433,15 @@ * As a catch-all we will work with UTF-8. We cannot use memcmp() * as that is unsafe with any string containing NUL (\xC0\x80 in * Tcl's utf rep). We can use the more efficient TclpUtfNcmp2 if * we are case-sensitive and no specific length was requested. */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 s1len = NumCodePointsUtf(s1, s1len, &s1flag); s2len = NumCodePointsUtf(s2, s2len, &s2flag); - memCmpFn = (memCmpFn_t) (nocase ? UtfNcasecmp : UtfNcmp); + memCmpFn = (memCmpFn_t) + (nocase ? UtfNcasecmp : UtfNcmp); #else if ((reqlength < 0) && !nocase) { memCmpFn = (memCmpFn_t) TclpUtfNcmp2; } else { s1len = Tcl_NumUtfChars(s1, s1len); @@ -3330,11 +3470,11 @@ } else { /* * The comparison function should compare up to the minimum byte * length only. */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 match = memCmpFn(s1, s2, (size_t) length, s1flag | (s2flag << 1)); #else match = memCmpFn(s1, s2, (size_t) length); #endif } Index: jni/tcl/generic/tclCompCmdsSZ.c ================================================================== --- jni/tcl/generic/tclCompCmdsSZ.c +++ jni/tcl/generic/tclCompCmdsSZ.c @@ -1491,11 +1491,11 @@ } for (endTokenPtr = tokenPtr + parse.numTokens; tokenPtr < endTokenPtr; tokenPtr = TokenAfter(tokenPtr)) { int length, literal, catchRange, breakJump; - char buf[TCL_UTF_MAX] = ""; + char buf[TCL_UTF_MAX*2]; JumpFixup startFixup, okFixup, returnFixup, breakFixup; JumpFixup continueFixup, otherFixup, endFixup; switch (tokenPtr->type) { case TCL_TOKEN_TEXT: Index: jni/tcl/generic/tclCompile.c ================================================================== --- jni/tcl/generic/tclCompile.c +++ jni/tcl/generic/tclCompile.c @@ -1721,11 +1721,11 @@ } break; case TCL_TOKEN_BS: if (tempPtr != NULL) { - char utfBuf[TCL_UTF_MAX] = ""; + char utfBuf[TCL_UTF_MAX*2]; int length = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, utfBuf); #if TCL_UTF_MAX > 3 if ((length > 1) && (numComponents > 0)) { @@ -2388,11 +2388,11 @@ * Must be at least 1. */ CompileEnv *envPtr) /* Holds the resulting instructions. */ { Tcl_DString textBuffer; /* Holds concatenated chars from adjacent * TCL_TOKEN_TEXT, TCL_TOKEN_BS tokens. */ - char buffer[TCL_UTF_MAX] = ""; + char buffer[TCL_UTF_MAX*2]; int i, numObjsToConcat, length, adjust; unsigned char *entryCodeNext = envPtr->codeNext; #define NUM_STATIC_POS 20 int isLiteral, maxNumCL, numCL; int *clPosition = NULL; Index: jni/tcl/generic/tclDisassemble.c ================================================================== --- jni/tcl/generic/tclDisassemble.c +++ jni/tcl/generic/tclDisassemble.c @@ -892,25 +892,14 @@ case '\v': Tcl_AppendToObj(appendObj, "\\v", -1); i += 2; continue; default: -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 if (ch > 0xFFFF) { Tcl_AppendPrintfToObj(appendObj, "\\U%08x", ch); i += 10; - } else -#elif TCL_UTF_MAX > 3 - /* If len == 0, this means we have a char > 0xFFFF, resulting in - * TclUtfToUniChar producing a surrogate pair. We want to output - * this pair as a single Unicode character. - */ - if (len == 0) { - int upper = ((ch & 0x3FF) + 1) << 10; - len = TclUtfToUniChar(p, &ch); - Tcl_AppendPrintfToObj(appendObj, "\\U%08x", upper + (ch & 0x3FF)); - i += 10; } else #endif if (ch < 0x20 || ch >= 0x7F) { Tcl_AppendPrintfToObj(appendObj, "\\u%04x", ch); i += 6; Index: jni/tcl/generic/tclEncoding.c ================================================================== --- jni/tcl/generic/tclEncoding.c +++ jni/tcl/generic/tclEncoding.c @@ -222,11 +222,11 @@ static int TableToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static size_t unilen(const char *src); -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 static size_t unilen4(const char *src); #endif static int UnicodeToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, @@ -235,15 +235,17 @@ static int UtfToUnicodeProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); +#if TCL_UTF_MAX > 3 static int UtfToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr, int pureNullMode); +#endif static int UtfIntToUtfExtProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); @@ -270,11 +272,11 @@ static int UtfToMcesu8Proc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 static int Utf16ToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); @@ -590,27 +592,27 @@ type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; Tcl_CreateEncoding(&type); -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 type.encodingName = "utf-32"; #else type.encodingName = "unicode"; #endif type.toUtfProc = UnicodeToUtfProc; type.fromUtfProc = UtfToUnicodeProc; type.freeProc = NULL; -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 type.nullSize = 4; #else type.nullSize = 2; #endif type.clientData = NULL; Tcl_CreateEncoding(&type); -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 type.encodingName = "unicode"; type.toUtfProc = Utf16ToUtfProc; type.fromUtfProc = UtfToUtf16Proc; type.freeProc = NULL; type.nullSize = 2; @@ -1107,11 +1109,11 @@ encodingPtr->freeProc = typePtr->freeProc; encodingPtr->nullSize = typePtr->nullSize; encodingPtr->clientData = typePtr->clientData; if (typePtr->nullSize == 1) { encodingPtr->lengthProc = (LengthProc *) strlen; -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 } else if (typePtr->nullSize == 4) { encodingPtr->lengthProc = (LengthProc *) unilen4; #endif } else { encodingPtr->lengthProc = (LengthProc *) unilen; @@ -2277,12 +2279,12 @@ /* *------------------------------------------------------------------------- * * UtfIntToUtfExtProc -- * - * Convert from UTF-8 to UTF-8. While converting null-bytes from the - * Tcl's internal representation (0xC0, 0x80) to the official + * Convert from (U|W)TF-8 to UTF-8. While converting null-bytes + * from Tcl's internal representation (0xC0, 0x80) to the official * representation (0x00). See UtfToUtfProc for details. * * Results: * Returns TCL_OK if conversion was successful. * @@ -2317,20 +2319,85 @@ * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { +#if TCL_UTF_MAX > 3 + /* UTF-8 to UTF-8 */ + return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr, 1); +#else + /* WTF-8 to UTF-8 */ + + const char *srcStart, *srcEnd, *srcClose; + const char *dstStart, *dstEnd; + int result, numChars, charLimit = INT_MAX; + Tcl_UniChar ch; + + result = TCL_OK; + + srcStart = src; + srcEnd = src + srcLen; + srcClose = srcEnd; + if ((flags & TCL_ENCODING_END) == 0) { + srcClose -= 3; + } + if (flags & TCL_ENCODING_CHAR_LIMIT) { + charLimit = *dstCharsPtr; + } + + dstStart = dst; + dstEnd = dst + dstLen - 4; + + for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { + if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { + result = TCL_CONVERT_MULTIBYTE; + break; + } + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } + if (UCHAR(*src) == 0xc0 && (src + 1 < srcEnd) && + UCHAR(*(src+1)) == 0x80) { + *dst++ = 0; + src += 2; + } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { + ch = (unsigned char) *src; + src += 1; + dst += Tcl_UniCharToUtf(ch, dst); + } else { + int ch2, len; + + src += Tcl_UtfToUniChar(src, &ch); + ch2 = ch; + if ((ch & 0xFC00) == 0xD800) { + if (Tcl_UtfCharComplete(src, srcEnd - src)) { + len = Tcl_UtfToUniChar(src, &ch); + if ((ch & 0xFC00) == 0xDC00) { + src += len; + ch2 = (((ch2&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + } + } + } + dst += TclUniCharToUtfExt(ch2, dst); + } + } + *srcReadPtr = src - srcStart; + *dstWrotePtr = dst - dstStart; + *dstCharsPtr = numChars; + return result; +#endif } /* *------------------------------------------------------------------------- * * UtfExtToUtfIntProc -- * - * Convert from UTF-8 to UTF-8 while converting null-bytes from the + * Convert from UTF-8 to (U|W)TF-8 while converting null-bytes from the * official representation (0x00) to Tcl's internal representation (0xC0, * 0x80). See UtfToUtfProc for details. * * Results: * Returns TCL_OK if conversion was successful. @@ -2366,14 +2433,81 @@ * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { +#if TCL_UTF_MAX > 3 + /* UTF-8 to UTF-8 */ + return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr, 0); +#else + /* UTF-8 to WTF-8 */ + + const char *srcStart, *srcEnd, *srcClose; + const char *dstStart, *dstEnd; + int ch, result, numChars, charLimit = INT_MAX; + + result = TCL_OK; + + srcStart = src; + srcEnd = src + srcLen; + srcClose = srcEnd; + if ((flags & TCL_ENCODING_END) == 0) { + srcClose -= 4; + } + if (flags & TCL_ENCODING_CHAR_LIMIT) { + charLimit = *dstCharsPtr; + } + + dstStart = dst; + dstEnd = dst + dstLen - TCL_UTF_MAX; + + for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { + if ((src > srcClose) && (!TclUtfCharCompleteExt(src, srcEnd - src))) { + result = TCL_CONVERT_MULTIBYTE; + break; + } + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } + if (UCHAR(*src) < 0x80 && UCHAR(*src) != 0) { + *dst++ = *src++; + } else if (!TclUtfCharCompleteExt(src, srcEnd - src)) { + ch = (unsigned char) *src; + src += 1; + dst += Tcl_UniCharToUtf(ch, dst); + } else { + int len = TclUtfToUniCharExt(src, &ch); + + if (ch > 0xFFFF && ch <= 0x10FFFF) { + int dstLen; + + ch -= 0x10000; + dstLen = Tcl_UniCharToUtf((ch >> 10) | 0xD800, dst); + ch = (ch & 0x3FF) | 0xDC00; + if (dst + dstLen > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } + dst += dstLen; + numChars++; + } + src += len; + dst += Tcl_UniCharToUtf(ch, dst); + } + } + + *srcReadPtr = src - srcStart; + *dstWrotePtr = dst - dstStart; + *dstCharsPtr = numChars; + return result; +#endif } +#if TCL_UTF_MAX > 3 /* *------------------------------------------------------------------------- * * UtfToUtfProc -- * @@ -2421,15 +2555,12 @@ * versa. */ { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + Tcl_UniChar ch = 0; - if (flags & TCL_ENCODING_START) { - *statePtr = 0; - } result = TCL_OK; srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; @@ -2477,31 +2608,42 @@ * Always check before using TclUtfToUniChar. Not doing can so * cause it run beyond the end of the buffer! If we happen such an * incomplete char its bytes are made to represent themselves. */ - *chPtr = (unsigned char) *src; + ch = (unsigned char) *src; src += 1; - dst += Tcl_UniCharToUtf(*chPtr, dst); + dst += Tcl_UniCharToUtf(ch, dst); } else { - int len = TclUtfToUniChar(src, chPtr); - src += len; - dst += Tcl_UniCharToUtf(*chPtr, dst); -#if TCL_UTF_MAX == 4 - if (!len) { - src += TclUtfToUniChar(src, chPtr); - dst += Tcl_UniCharToUtf(*chPtr, dst); + src += TclUtfToUniChar(src, &ch); +#if TCL_UTF_MAX > 3 + /* + * Collapse surrogate pairs in both directions. + */ + + if ((ch & 0xFFFFC00) == 0xD800 && + Tcl_UtfCharComplete(src, srcEnd - src)) { + Tcl_UniChar ch2; + int len; + + len = Tcl_UtfToUniChar(src, &ch2); + if ((ch2 & 0xFFFFFC00) == 0xDC00) { + src += len; + ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000; + } } #endif + dst += Tcl_UniCharToUtf(ch, dst); } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } +#endif /* *------------------------------------------------------------------------- * * UnicodeToUtfProc -- @@ -2545,15 +2687,12 @@ * output buffer. */ { const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + Tcl_UniChar ch = 0; - if (flags & TCL_ENCODING_START) { - *statePtr = 0; - } if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } result = TCL_OK; if ((srcLen % sizeof(Tcl_UniChar)) != 0) { @@ -2577,15 +2716,15 @@ /* * Special case for 1-byte utf chars for speed. Make sure we work with * Tcl_UniChar-size data. */ - *chPtr = *(Tcl_UniChar *)src; - if (*chPtr && *chPtr < 0x80) { - *dst++ = (*chPtr & 0xFF); + ch = *(Tcl_UniChar *)src; + if (ch && ch < 0x80) { + *dst++ = (ch & 0xFF); } else { - dst += Tcl_UniCharToUtf(*chPtr, dst); + dst += Tcl_UniCharToUtf(ch, dst); } src += sizeof(Tcl_UniChar); } *srcReadPtr = src - srcStart; @@ -2638,15 +2777,12 @@ * correspond to the bytes stored in the * output buffer. */ { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; - Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + Tcl_UniChar ch = 0; - if (flags & TCL_ENCODING_START) { - *statePtr = 0; - } srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; @@ -2668,36 +2804,36 @@ } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - src += TclUtfToUniChar(src, chPtr); + src += TclUtfToUniChar(src, &ch); /* * Need to handle this in a way that won't cause misalignment by * casting dst to a Tcl_UniChar. [Bug 1122671] */ #ifdef WORDS_BIGENDIAN -#if TCL_UTF_MAX > 4 - *dst++ = (*chPtr >> 24); - *dst++ = ((*chPtr >> 16) & 0xFF); - *dst++ = ((*chPtr >> 8) & 0xFF); - *dst++ = (*chPtr & 0xFF); -#else - *dst++ = (*chPtr >> 8); - *dst++ = (*chPtr & 0xFF); -#endif -#else -#if TCL_UTF_MAX > 4 - *dst++ = (*chPtr & 0xFF); - *dst++ = ((*chPtr >> 8) & 0xFF); - *dst++ = ((*chPtr >> 16) & 0xFF); - *dst++ = (*chPtr >> 24); -#else - *dst++ = (*chPtr & 0xFF); - *dst++ = (*chPtr >> 8); +#if TCL_UTF_MAX > 3 + *dst++ = (ch >> 24); + *dst++ = ((ch >> 16) & 0xFF); + *dst++ = ((ch >> 8) & 0xFF); + *dst++ = (ch & 0xFF); +#else + *dst++ = (ch >> 8); + *dst++ = (ch & 0xFF); +#endif +#else +#if TCL_UTF_MAX > 3 + *dst++ = (ch & 0xFF); + *dst++ = ((ch >> 8) & 0xFF); + *dst++ = ((ch >> 16) & 0xFF); + *dst++ = (ch >> 24); +#else + *dst++ = (ch & 0xFF); + *dst++ = (ch >> 8); #endif #endif } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; @@ -2904,21 +3040,17 @@ result = TCL_CONVERT_MULTIBYTE; break; } len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 /* * This prevents a crash condition. More evaluation is required for * full support of int Tcl_UniChar. [Bug 1004065] */ if (ch & 0xFFFF0000) { - word = 0; - } else -#elif TCL_UTF_MAX == 4 - if (!len) { word = 0; } else #endif word = fromUnicode[(ch >> 8)][ch & 0xFF]; @@ -3114,22 +3246,15 @@ /* * Check for illegal characters. */ - if (ch > 0xFF -#if TCL_UTF_MAX == 4 - || !len -#endif - ) { + if (ch > 0xFF) { if (flags & TCL_ENCODING_STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } -#if TCL_UTF_MAX == 4 - if (!len) len = 4; -#endif /* * Plunge on, using '?' as a fallback character. */ @@ -3148,11 +3273,11 @@ *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 /* *------------------------------------------------------------------------- * * Utf16ToUtfProc -- * @@ -3242,11 +3367,11 @@ *dstCharsPtr = numChars; return result; } #endif -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 /* *------------------------------------------------------------------------- * * UtfToUtf16Proc -- * @@ -3455,11 +3580,11 @@ dst += Tcl_UniCharToUtf(*chPtr, dst); } else { int len = TclUtfToUniChar(src, chPtr); src += len; -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 if (*chPtr > 0xFFFF) { int ch; ch = (((*chPtr - 0x10000) >> 10) & 0x3FF) | 0xD800; dst += Tcl_UniCharToUtf(ch, dst); @@ -3466,23 +3591,10 @@ ch = ((*chPtr - 0x10000) & 0x3FF) | 0xDC00; dst += Tcl_UniCharToUtf(ch, dst); continue; } #endif -#if TCL_UTF_MAX == 4 - if (!len) { - dst += Tcl_UniCharToUtf(*chPtr, dst); - src += TclUtfToUniChar(src, chPtr); - } -#endif - if ((*chPtr & 0xF800) == 0xD800) { - /* - * Don't let stray surrogates pass. - */ - - *chPtr = 0xFFFD; - } dst += Tcl_UniCharToUtf(*chPtr, dst); } } *srcReadPtr = src - srcStart; @@ -3590,11 +3702,11 @@ dst += Tcl_UniCharToUtf(*chPtr, dst); } else { int len = TclUtfToUniChar(src, chPtr); src += len; -#if TCL_UTF_MAX >= 4 +#if TCL_UTF_MAX > 3 if ((*chPtr & 0xFC00) == 0xD800) { int len2 = 0; Tcl_UniChar ch2 = 0; if (Tcl_UtfCharComplete(src, srcEnd - src)) { @@ -4203,11 +4315,11 @@ p++; } return (char *) p - src; } -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 static size_t unilen4( const char *src) { unsigned int *p; Index: jni/tcl/generic/tclExecute.c ================================================================== --- jni/tcl/generic/tclExecute.c +++ jni/tcl/generic/tclExecute.c @@ -5587,26 +5587,20 @@ Tcl_GetByteArrayFromObj(valuePtr, NULL)+index, 1); } else if (valuePtr->bytes && length == valuePtr->length) { objResultPtr = Tcl_NewStringObj((const char *) valuePtr->bytes+index, 1); } else { - char buf[TCL_UTF_MAX] = ""; + char buf[TCL_UTF_MAX]; Tcl_UniChar ch = Tcl_GetUniChar(valuePtr, index); /* * This could be: Tcl_NewUnicodeObj((const Tcl_UniChar *)&ch, 1) * but creating the object as a string seems to be faster in * practical use. */ length = Tcl_UniCharToUtf(ch, buf); -#if TCL_UTF_MAX == 4 - /* Special case for handling high surrogates. */ - if (!length) { - length = Tcl_UniCharToUtf(-1, buf); - } -#endif objResultPtr = Tcl_NewStringObj(buf, length); } TRACE_APPEND(("\"%s\"\n", O2S(objResultPtr))); NEXT_INST_F(1, 2, 1); @@ -5949,11 +5943,11 @@ if (length > 0) { end = ustring1 + length; for (p=ustring1 ; p 3 MODULE_SCOPE int TclCollapseSurrogatePair(Tcl_Token *tokenPtr, int *numReadPtr, char *buffer); -#endif -#if TCL_UTF_MAX == 4 +#define TclUCS4ToUpper(ch) Tcl_UniCharToUpper((ch)) +#define TclUCS4ToLower(ch) Tcl_UniCharToLower((ch)) +#define TclUCS4ToTitle(ch) Tcl_UniCharToTitle((ch)) +#else +MODULE_SCOPE int TclUniCharToUtfExt(int ch, char *buf); +MODULE_SCOPE int TclUtfToUniCharExt(const char *src, int *chPtr); +MODULE_SCOPE int TclUtfCharCompleteExt(const char *src, int len); MODULE_SCOPE int TclUCS4ToUpper(int ch); MODULE_SCOPE int TclUCS4ToLower(int ch); MODULE_SCOPE int TclUCS4ToTitle(int ch); -#else -#define TclUCS4ToUpper(ch) Tcl_UniCharToUpper((ch)) -#define TclUCS4ToLower(ch) Tcl_UniCharToLower((ch)) -#define TclUCS4ToTitle(ch) Tcl_UniCharToTitle((ch)) #endif /* * Many parsing tasks need a common definition of whitespace. * Use this routine and macro to achieve that and place Index: jni/tcl/generic/tclMain.c ================================================================== --- jni/tcl/generic/tclMain.c +++ jni/tcl/generic/tclMain.c @@ -67,13 +67,13 @@ * Further on, in UNICODE mode we just use Tcl_NewUnicodeObj, otherwise * NewNativeObj is needed (which provides proper conversion from native * encoding to UTF-8). */ -#if defined(UNICODE) && (TCL_UTF_MAX <= 4) +#if defined(UNICODE) && (TCL_UTF_MAX == 3) # define NewNativeObj Tcl_NewUnicodeObj -#else /* !UNICODE || (TCL_UTF_MAX > 4) */ +#else /* !UNICODE || (TCL_UTF_MAX > 3) */ static inline Tcl_Obj * NewNativeObj( TCHAR *string, int length) { Index: jni/tcl/generic/tclParse.c ================================================================== --- jni/tcl/generic/tclParse.c +++ jni/tcl/generic/tclParse.c @@ -817,11 +817,11 @@ * * Results: * Records at readPtr the number of bytes making up the backslash * sequence. Records at dst the UTF-8 encoded equivalent of that * backslash sequence. Returns the number of bytes written to dst, at - * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results + * most TCL_UTF_MAX*2. Either readPtr or dst may be NULL, if the results * are not needed, but the return value is the same either way. * * Side effects: * None. * @@ -835,18 +835,18 @@ int numBytes, /* Max number of bytes to scan. */ int *readPtr, /* NULL, or points to storage where the number * of bytes scanned should be written. */ char *dst) /* NULL, or points to buffer where the UTF-8 * encoding of the backslash sequence is to be - * written. At most TCL_UTF_MAX bytes will be + * written. At most TCL_UTF_MAX*2 bytes will be * written there. */ { const char *p = src+1; Tcl_UniChar unichar = 0; int result; int count; - char buf[TCL_UTF_MAX] = ""; + char buf[TCL_UTF_MAX]; if (numBytes == 0) { if (readPtr != NULL) { *readPtr = 0; } @@ -931,19 +931,13 @@ result = 'U'; } else { /* * Check Unicode range. */ -#if TCL_UTF_MAX > 3 if ((result < 0) || (result > 0x10FFFF)) { result = 0xFFFD; } -#else - if ((result < 0) || (result > 0xFFFF)) { - result = 0xFFFD; - } -#endif } break; case '\n': count--; do { @@ -1002,17 +996,19 @@ done: if (readPtr != NULL) { *readPtr = count; } - count = Tcl_UniCharToUtf(result, dst); -#if TCL_UTF_MAX == 4 - if (!count) { - /* Special case for handling high surrogates. */ - count = Tcl_UniCharToUtf(-1, dst); + count = 0; +#if TCL_UTF_MAX == 3 + if ((result > 0xFFFF) && (result <= 0x10FFFF)) { + result -= 0x10000; + count += Tcl_UniCharToUtf((result >> 10) | 0xD800, dst); + result = (result & 0x3FF) | 0xDC00; } #endif + count += Tcl_UniCharToUtf(result, dst + count); return count; } /* *---------------------------------------------------------------------- @@ -2224,11 +2220,11 @@ result = NULL; for (; count>0 && code==TCL_OK ; count--, tokenPtr++) { Tcl_Obj *appendObj = NULL; const char *append = NULL; int appendByteLength = 0; - char utfCharBytes[TCL_UTF_MAX] = ""; + char utfCharBytes[TCL_UTF_MAX*2]; switch (tokenPtr->type) { case TCL_TOKEN_TEXT: append = tokenPtr->start; appendByteLength = tokenPtr->size; @@ -2589,21 +2585,21 @@ int *numReadPtr, /* Pointer to number of consumed input chars. */ char *buffer) /* Buffer holding UTF data of previous token. */ { int count, numRead; Tcl_UniChar ch = 0, ch2 = 0; - char buffer2[TCL_UTF_MAX] = ""; + char buffer2[TCL_UTF_MAX]; - Tcl_UtfToUniChar(buffer, &ch); + TclUtfToUniChar(buffer, &ch); if ((ch <= 0xFFFF) && ((ch & 0xFC00) == 0xD800)) { if (tokenPtr->type == TCL_TOKEN_BS) { count = TclParseBackslash(tokenPtr->start, tokenPtr->size, &numRead, buffer2); if (count <= 0) { return 0; } - Tcl_UtfToUniChar(buffer2, &ch2); + TclUtfToUniChar(buffer2, &ch2); if ((ch2 <= 0xFFFF) && ((ch2 & 0xFC00) == 0xDC00)) { unsigned int uch = ((ch & 0x3FF) << 10) + (ch2 & 0x3FF); uch += 0x10000; if (numReadPtr != NULL) { @@ -2614,13 +2610,13 @@ } } return 0; } #endif - + /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */ Index: jni/tcl/generic/tclScan.c ================================================================== --- jni/tcl/generic/tclScan.c +++ jni/tcl/generic/tclScan.c @@ -58,11 +58,11 @@ *---------------------------------------------------------------------- * * UtfToUniChar -- * * Wrapper to Tcl_UtfToUniChar() capable of dealing with - * UCS4 when compiled with TCL_UTF_MAX == 4. + * surrogate pairs when compiled with TCL_UTF_MAX == 3. * * Results: * *chPtr is filled with the full unicode character, and the * return value is the number of bytes from the UTF-8 string that * were consumed. @@ -72,27 +72,27 @@ * *---------------------------------------------------------------------- */ static int -#if TCL_UTF_MAX != 4 -inline -#endif UtfToUniChar( const char *src, int *chPtr) { - Tcl_UniChar ch = 0; + Tcl_UniChar ch; int uch, len; len = TclUtfToUniChar(src, &ch); uch = ch; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(src + len, &ch); +#if TCL_UTF_MAX == 3 + if ((ch & 0xFC00) == 0xD800) { + int len2 = TclUtfToUniChar(src + len, &ch); - uch = ((uch & 0x3FF) << 10) + 0x10000 + (ch & 0x3FF); + if (len2 && ((ch & 0xFC00) == 0xDC00)) { + uch = ((uch & 0x3FF) << 10) + 0x10000 + (ch & 0x3FF); + len += len2; + } } #endif *chPtr = uch; return len; } Index: jni/tcl/generic/tclStringObj.c ================================================================== --- jni/tcl/generic/tclStringObj.c +++ jni/tcl/generic/tclStringObj.c @@ -757,11 +757,11 @@ } if (last < first) { TclNewObj(newObjPtr); return newObjPtr; } -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 /* See: bug [11ae2be95dac9417] */ if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00) && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) { ++first; } @@ -2095,23 +2095,25 @@ allocSegment = 1; } } break; case 'c': { - char buf[TCL_UTF_MAX] = ""; + char buf[TCL_UTF_MAX*2]; int code, length; if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) { goto error; } - length = Tcl_UniCharToUtf(code, buf); -#if TCL_UTF_MAX == 4 - if (!length) { - /* Special case for handling high surrogates. */ - length = Tcl_UniCharToUtf(-1, buf); + length = 0; +#if TCL_UTF_MAX == 3 + if ((code > 0xFFFF) && (code <= 0x10FFFF)) { + code -= 0x10000; + length = Tcl_UniCharToUtf((code >> 10) | 0xD800, buf); + code = (code & 0x3FF) | 0xDC00; } #endif + length += Tcl_UniCharToUtf(code, buf + length); segment = Tcl_NewStringObj(buf, length); Tcl_IncrRefCount(segment); allocSegment = 1; break; } @@ -2889,10 +2891,13 @@ TclStringReverse( Tcl_Obj *objPtr) { String *stringPtr; Tcl_UniChar ch = 0; +#if TCL_UTF_MAX == 3 + int needFlip = 0; +#endif if (TclIsPureByteArray(objPtr)) { int numBytes; unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); @@ -2908,13 +2913,10 @@ if (stringPtr->hasUnicode) { Tcl_UniChar *from = Tcl_GetUnicode(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; Tcl_UniChar *to; -#if TCL_UTF_MAX == 4 - int needFlip = 0; -#endif if (Tcl_IsShared(objPtr)) { /* * Create a non-empty, pure unicode value, so we can coax * Tcl_SetObjLength into growing the unicode rep buffer. @@ -2922,11 +2924,11 @@ objPtr = Tcl_NewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); to = Tcl_GetUnicode(objPtr); while (--src >= from) { -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 ch = *src; if ((ch & 0xF800) == 0xD800) { needFlip = 1; } *to++ = ch; @@ -2937,25 +2939,25 @@ } else { /* * Reversing in place. */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 to = src; #endif while (--src > from) { ch = *src; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 if ((ch & 0xF800) == 0xD800) { needFlip = 1; } #endif *src = *from; *from++ = ch; } } -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 if (needFlip) { /* * Flip back surrogate pairs. There might be a better way. */ @@ -3005,13 +3007,13 @@ * skip calling Tcl_UtfCharComplete() here. */ int bytesInChar = TclUtfToUniChar(from, &ch); -#if TCL_UTF_MAX == 4 - if (bytesInChar == 0) { - bytesInChar += TclUtfToUniChar(from, &ch); +#if TCL_UTF_MAX == 3 + if ((ch & 0xF800) == 0xD800) { + needFlip = 1; } #endif ReverseBytes((unsigned char *)to, (unsigned char *)from, bytesInChar); to += bytesInChar; @@ -3023,10 +3025,35 @@ from = to = objPtr->bytes; stringPtr->numChars = charCount; } /* Pass 2. Reverse all the bytes. */ ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes); + +#if TCL_UTF_MAX == 3 + if (needFlip) { + /* Pass 3. Flip back surrogate pairs. Might be a better way. */ + numBytes = objPtr->length; + from = to = objPtr->bytes; + from += numBytes; /* This is the end (The Doors, Jim Morrison) */ + while (to < from) { + Tcl_UniChar ch1, ch2; + int len, len2; + + len = TclUtfToUniChar(to, &ch1); + if (((ch1 & 0xFC00) == 0xDC00) && + (to + len + TCL_UTF_MAX <= from)) { + len2 = TclUtfToUniChar(to + len, &ch2); + if ((ch2 & 0xFC00) == 0xD800) { + Tcl_UniCharToUtf(ch2, to); + Tcl_UniCharToUtf(ch1, to + len); + len += len2; + } + } + to += len; + } + } +#endif } return objPtr; } @@ -3065,11 +3092,11 @@ int numBytes, int numAppendChars) { String *stringPtr = GET_STRING(objPtr); int needed, numOrigChars = 0; - Tcl_UniChar *dst, unichar = 0; + Tcl_UniChar *dst; if (stringPtr->hasUnicode) { numOrigChars = stringPtr->numChars; } if (numAppendChars == -1) { @@ -3088,12 +3115,11 @@ stringPtr->numChars = needed; } else { numAppendChars = 0; } for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) { - bytes += TclUtfToUniChar(bytes, &unichar); - *dst = unichar; + bytes += TclUtfToUniChar(bytes, dst); } *dst = 0; } /* @@ -3310,15 +3336,12 @@ /* * Precondition: this is the "string" Tcl_ObjType. */ int i, origLength, size = 0; - char *dst, buf[TCL_UTF_MAX] = ""; + char *dst, buf[TCL_UTF_MAX]; String *stringPtr = GET_STRING(objPtr); -#if TCL_UTF_MAX == 4 - int length; -#endif if (numChars < 0) { numChars = UnicodeLength(unicode); } @@ -3338,25 +3361,13 @@ if (numChars <= (INT_MAX - size)/TCL_UTF_MAX && stringPtr->allocated >= size + numChars * TCL_UTF_MAX) { goto copyBytes; } -#if TCL_UTF_MAX == 4 - length = -1; - for (i = 0; i < numChars && size >= 0; i++) { - length = Tcl_UniCharToUtf((int) unicode[i], buf); - size += (unsigned int) length; - } - if (!length) { - /* Special case for handling high surrogates. */ - size += (unsigned int) Tcl_UniCharToUtf(-1, buf); - } -#else for (i = 0; i < numChars && size >= 0; i++) { size += (unsigned int) Tcl_UniCharToUtf((int) unicode[i], buf); } -#endif if (size < 0) { Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); } /* @@ -3367,25 +3378,13 @@ GrowStringBuffer(objPtr, size, 1); } copyBytes: dst = objPtr->bytes + origLength; -#if TCL_UTF_MAX == 4 - length = -1; - for (i = 0; i < numChars; i++) { - length = Tcl_UniCharToUtf(unicode[i], dst); - dst += length; - } - if (!length) { - /* Special case for handling high surrogates. */ - dst += Tcl_UniCharToUtf(-1, dst); - } -#else for (i = 0; i < numChars; i++) { dst += Tcl_UniCharToUtf(unicode[i], dst); } -#endif *dst = '\0'; objPtr->length = dst - objPtr->bytes; return numChars; } Index: jni/tcl/generic/tclUniData.c ================================================================== --- jni/tcl/generic/tclUniData.c +++ jni/tcl/generic/tclUniData.c @@ -192,13 +192,12 @@ 9856, 9856, 9856, 9856, 9856, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 9888, 1344, 1344, 9920, 3296, 9952, 9984, 10016, 1344, 1344, 10048, 10080, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 10112, 10144, 1344, 10176, 1344, 10208, 10240, 10272, 10304, 10336, 10368, 1344, 1344, 1344, 10400, 10432, 64, 10464, 10496, - 10528, 4736, 10560, 10592 -#if TCL_UTF_MAX > 3 || TCL_MAJOR_VERSION > 8 || TCL_MINOR_VERSION > 6 - ,10624, 10656, 10688, 3296, 1344, 1344, 1344, 10720, 10752, 10784, + 10528, 4736, 10560, 10592, + 10624, 10656, 10688, 3296, 1344, 1344, 1344, 10720, 10752, 10784, 10816, 10848, 10880, 10912, 8032, 10944, 3296, 3296, 3296, 3296, 9216, 1344, 10976, 11008, 1344, 11040, 11072, 11104, 11136, 1344, 11168, 3296, 11200, 11232, 11264, 1344, 11296, 11328, 11360, 11392, 1344, 11424, 1344, 11456, 11488, 11520, 3296, 3296, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 7776, 4704, 11552, 11584, 11616, 3296, @@ -567,11 +566,10 @@ 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 15488 -#endif /* TCL_UTF_MAX > 3 */ }; /* * The groupMap is indexed by combining the alternate page number with * the page offset and returns a group number that identifies a unique @@ -1177,13 +1175,12 @@ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 92, 92, 0, 0, 15, 15, 15, 15, 15, 15, 0, 0, 15, 15, 15, 15, 15, 15, 0, 0, 15, 15, 15, 15, 15, 15, 0, 0, 15, 15, 15, 0, 0, 0, 4, 4, 7, 11, 14, 4, 4, 0, 14, 7, 7, 7, 7, 14, 14, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 14, 14, 0, 0 -#if TCL_UTF_MAX > 3 || TCL_MAJOR_VERSION > 8 || TCL_MINOR_VERSION > 6 - ,15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 15, + 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 14, 14, 0, 0, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, @@ -1650,11 +1647,10 @@ 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 -#endif /* TCL_UTF_MAX > 3 */ }; /* * Each group represents a unique set of character attributes. The attributes * are encoded into a 32-bit value as follows: @@ -1698,15 +1694,11 @@ -10830783, -10833599, -10832575, -10830015, -10817983, -10824127, -10818751, 237633, -12223, -10830527, -9058239, 237698, 9949314, 18, 17, 10305, 10370, 10049, 10114, 8769, 8834 }; -#if TCL_UTF_MAX > 3 || TCL_MAJOR_VERSION > 8 || TCL_MINOR_VERSION > 6 -# define UNICODE_OUT_OF_RANGE(ch) (((ch) & 0x1FFFFF) >= 0x323C0) -#else -# define UNICODE_OUT_OF_RANGE(ch) (((ch) & 0x1F0000) != 0) -#endif +#define UNICODE_OUT_OF_RANGE(ch) (((ch) & 0x1FFFFF) >= 0x323C0) /* * The following constants are used to determine the category of a * Unicode character. */ @@ -1757,10 +1749,6 @@ /* * This macro extracts the information about a character from the * Unicode character tables. */ -#if TCL_UTF_MAX > 3 || TCL_MAJOR_VERSION > 8 || TCL_MINOR_VERSION > 6 -# define GetUniCharInfo(ch) (groups[groupMap[pageMap[((ch) & 0x1FFFFF) >> OFFSET_BITS] | ((ch) & ((1 << OFFSET_BITS)-1))]]) -#else -# define GetUniCharInfo(ch) (groups[groupMap[pageMap[((ch) & 0xFFFF) >> OFFSET_BITS] | ((ch) & ((1 << OFFSET_BITS)-1))]]) -#endif +#define GetUniCharInfo(ch) (groups[groupMap[pageMap[((ch) & 0x1FFFFF) >> OFFSET_BITS] | ((ch) & ((1 << OFFSET_BITS)-1))]]) Index: jni/tcl/generic/tclUtf.c ================================================================== --- jni/tcl/generic/tclUtf.c +++ jni/tcl/generic/tclUtf.c @@ -74,10 +74,27 @@ 1,1,1,1,1, #endif 1,1,1,1,1,1,1,1,1,1,1 }; +#if TCL_UTF_MAX == 3 + +static const unsigned char totalBytesExt[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4, + 1,1,1,1,1,1,1,1,1,1,1 +}; + +#endif + /* * Functions used only in this module. */ static int UtfCount(int ch); @@ -207,34 +224,10 @@ buf[1] = (char) ((ch | 0x80) & 0xBF); buf[0] = (char) ((ch >> 6) | 0xC0); return 2; } if (ch <= 0xFFFF) { -#if TCL_UTF_MAX == 4 - if ((ch & 0xF800) == 0xD800) { - if (ch & 0x0400) { - /* Low surrogate */ - if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) - && ((buf[2] & 0xCF) == 0)) { - /* Previous Tcl_UniChar was a High surrogate, so combine */ - buf[3] = (char) ((ch & 0x3F) | 0x80); - buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80); - return 4; - } - /* Previous Tcl_UniChar was not a high surrogate, so just output */ - } else { - /* High surrogate */ - ch += 0x40; - /* Fill buffer with specific 3-byte (invalid) byte combination, - so following low surrogate can recognize it and combine */ - buf[2] = (char) ((ch << 4) & 0x30); - buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80); - buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0); - return 0; - } - } -#endif goto three; } #if TCL_UTF_MAX > 3 if (ch <= 0x10FFFF) { @@ -245,28 +238,60 @@ return 4; } #endif } -#if TCL_UTF_MAX == 4 - else if (ch == -1) { - if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) - && ((buf[2] & 0xCF) == 0)) { - ch = 0xD7C0 + ((buf[0] & 0x07) << 8) + ((buf[1] & 0x3F) << 2) - + ((buf[2] & 0x30) >> 4); + ch = 0xFFFD; +three: + buf[2] = (char) ((ch | 0x80) & 0xBF); + buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); + buf[0] = (char) ((ch >> 12) | 0xE0); + return 3; +} + +#if TCL_UTF_MAX == 3 + +int +TclUniCharToUtfExt( + int ch, /* The Tcl_UniChar to be stored in the + * buffer. */ + char *buf) /* Buffer in which the UTF-8 representation of + * the Tcl_UniChar is stored. Buffer must be + * large enough to hold the UTF-8 character + * (at most 4 bytes). */ +{ + if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) { + buf[0] = (char) ch; + return 1; + } + if (ch >= 0) { + if (ch <= 0x7FF) { + buf[1] = (char) ((ch | 0x80) & 0xBF); + buf[0] = (char) ((ch >> 6) | 0xC0); + return 2; + } + if (ch <= 0xFFFF) { goto three; } + if (ch <= 0x10FFFF) { + buf[3] = (char) ((ch | 0x80) & 0xBF); + buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); + buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); + buf[0] = (char) ((ch >> 18) | 0xF0); + return 4; + } } -#endif ch = 0xFFFD; three: buf[2] = (char) ((ch | 0x80) & 0xBF); buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); buf[0] = (char) ((ch >> 12) | 0xE0); return 3; } + +#endif /* *--------------------------------------------------------------------------- * * Tcl_UniCharToUtfDString -- @@ -330,19 +355,10 @@ * looking for trail bytes. If the source buffer is known to be '\0' * terminated, this cannot happen. Otherwise, the caller should call * Tcl_UtfCharComplete() before calling this routine to ensure that * enough bytes remain in the string. * - * If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done: - * For any UTF-8 string containing a character outside of the BMP, the - * first call to this function will fill *chPtr with the high surrogate - * and generate a return value of 0. Calling Tcl_UtfToUniChar again - * will produce the low surrogate and a return value of 4. Because *chPtr - * is used to remember whether the high surrogate is already produced, it - * is recommended to initialize the variable it points to as 0 before - * the first call to Tcl_UtfToUniChar is done. - * * Results: * *chPtr is filled with the Tcl_UniChar, and the return value is the * number of bytes from the UTF-8 string that were consumed. * * Side effects: @@ -411,34 +427,18 @@ else if (byte < 0xF8) { if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { /* * Four-byte-character lead byte followed by three trail bytes. */ -#if TCL_UTF_MAX == 4 - Tcl_UniChar surrogate; - - byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) - | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000; - surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10)); - if (byte & 0x100000) { - /* out of range, < 0x10000 or > 0x10ffff */ - } else if (*chPtr != surrogate) { - /* produce high surrogate, but don't advance source pointer */ - *chPtr = surrogate; - return 0; - } else { - /* produce low surrogate, and advance source pointer */ - *chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF)); - return 4; - } -#else - *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) - | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); + + *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) + | ((src[1] & 0x3F) << 12) + | ((src[2] & 0x3F) << 6) + | (src[3] & 0x3F)); if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) { return 4; } -#endif } /* * A four-byte-character lead-byte not followed by two trail-bytes * represents itself. @@ -447,10 +447,54 @@ #endif *chPtr = (Tcl_UniChar) byte; return 1; } + +#if TCL_UTF_MAX == 3 + +int +TclUtfToUniCharExt( + const char *src, /* The UTF-8 string. */ + int *chPtr) /* Filled with the Unicode represented by + * the UTF-8 string. */ +{ + int byte; + + byte = *((unsigned char *) src); + if (byte < 0xC0) { + *chPtr = byte; + return 1; + } else if (byte < 0xE0) { + if ((src[1] & 0xC0) == 0x80) { + *chPtr = ((byte & 0x1F) << 6) | (src[1] & 0x3F); + if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) { + return 2; + } + } + } else if (byte < 0xF0) { + if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) { + *chPtr = ((byte & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | + (src[2] & 0x3F); + if (*chPtr > 0x7FF) { + return 3; + } + } + } else if (byte < 0xF8) { + if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { + *chPtr = ((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12) + | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F); + if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) { + return 4; + } + } + } + *chPtr = byte; + return 1; +} + +#endif /* *--------------------------------------------------------------------------- * * Tcl_UtfToUniCharDString -- @@ -544,10 +588,26 @@ * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { return length >= totalBytes[UCHAR(*src)]; } + +#if TCL_UTF_MAX == 3 + +int +TclUtfCharCompleteExt( + const char *src, /* String to check if first few bytes contain + * a complete UTF-8 character. */ + int length) /* Length of above string in bytes. */ +{ + int ch; + + ch = *((unsigned char *) src); + return length >= totalBytesExt[ch]; +} + +#endif /* *--------------------------------------------------------------------------- * * Tcl_NumUtfChars -- @@ -571,13 +631,10 @@ int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { Tcl_UniChar ch = 0; int i = 0; -#if TCL_UTF_MAX == 4 - int ulen; -#endif /* * The separate implementations are faster. * * Since this is a time-sensitive function, we also do the check for the @@ -584,48 +641,24 @@ * single-byte char case specially. */ if (length < 0) { while (*src != '\0') { -#if TCL_UTF_MAX == 4 - ulen = TclUtfToUniChar(src, &ch); - src += ulen; - if (ulen) { - ch = 0; - } -#else src += TclUtfToUniChar(src, &ch); -#endif i++; } if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { -#if TCL_UTF_MAX == 4 - ulen = TclUtfToUniChar(src, &ch); - src += ulen; - if (ulen) { - ch = 0; - } -#else src += TclUtfToUniChar(src, &ch); -#endif i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { -#if TCL_UTF_MAX == 4 - ulen = TclUtfToUniChar(src, &ch); - src += ulen; - if (ulen) { - ch = 0; - } -#else src += TclUtfToUniChar(src, &ch); -#endif i++; } if (src < endPtr) { i += endPtr - src; } @@ -655,23 +688,16 @@ const char * Tcl_UtfFindFirst( const char *src, /* The UTF-8 string to be searched. */ int ch) /* The Unicode character to search for. */ { - int len, fullchar; - Tcl_UniChar find = 0; + int len; + Tcl_UniChar find; while (1) { len = TclUtfToUniChar(src, &find); - fullchar = find; -#if TCL_UTF_MAX == 4 - if (!len) { - len += TclUtfToUniChar(src, &find); - fullchar = (((fullchar & 0x3FF) << 10) | (find & 0x3FF)) + 0x10000; - } -#endif - if (fullchar == ch) { + if (find == ch) { return src; } if (*src == '\0') { return NULL; } @@ -701,25 +727,18 @@ const char * Tcl_UtfFindLast( const char *src, /* The UTF-8 string to be searched. */ int ch) /* The Unicode character to search for. */ { - int len, fullchar; - Tcl_UniChar find = 0; + int len; + Tcl_UniChar find; const char *last; last = NULL; while (1) { len = TclUtfToUniChar(src, &find); - fullchar = find; -#if TCL_UTF_MAX == 4 - if (!len) { - len += TclUtfToUniChar(src, &find); - fullchar = (((fullchar & 0x3FF) << 10) | (find & 0x3FF)) + 0x10000; - } -#endif - if (fullchar == ch) { + if (find == ch) { last = src; } if (*src == '\0') { break; } @@ -945,10 +964,11 @@ { Tcl_UniChar ch = 0; while (index-- > 0) { int len = TclUtfToUniChar(src, &ch); + src += len; } return src; } @@ -988,22 +1008,27 @@ * backslash sequence. */ { #define LINE_LENGTH 128 int numRead; int result; + char buffer[TCL_UTF_MAX*2]; - result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst); + result = TclParseBackslash(src, LINE_LENGTH, &numRead, buffer); if (numRead == LINE_LENGTH) { /* * We ate a whole line. Pay the price of a strlen() */ - result = TclParseBackslash(src, strlen(src), &numRead, dst); + result = TclParseBackslash(src, strlen(src), &numRead, buffer); } if (readPtr != NULL) { *readPtr = numRead; } + if (result > TCL_UTF_MAX) { + result = TCL_UTF_MAX; + } + memcpy(dst, buffer, result); return result; } /* *---------------------------------------------------------------------- @@ -1025,29 +1050,30 @@ int Tcl_UtfToUpper( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0; + Tcl_UniChar ch; char *src, *dst; int len, upChar; -#if TCL_UTF_MAX == 4 - int ulen = -1; -#endif /* * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 upChar = ch; - if (!len) { - len += TclUtfToUniChar(src, &ch); - upChar = (((upChar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; + if ((ch & 0xFC00) == 0xD800 && src[len] != '\0') { + int len2 = TclUtfToUniChar(src + len, &ch); + + if ((ch & 0xFC00) == 0xDC00) { + len += len2; + upChar = (((upChar&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + } } upChar = TclUCS4ToUpper(upChar); #else upChar = Tcl_UniCharToUpper(ch); #endif @@ -1059,24 +1085,21 @@ if (len < UtfCount(upChar)) { memmove(dst, src, len); dst += len; } else { -#if TCL_UTF_MAX == 4 - ulen = Tcl_UniCharToUtf(upChar, dst); - dst += ulen; -#else - dst += Tcl_UniCharToUtf(upChar, dst); +#if TCL_UTF_MAX == 3 + if (upChar > 0xFFFF) { + upChar -= 0x10000; + dst += Tcl_UniCharToUtf((upChar >> 10) | 0xD800, dst); + upChar = (upChar & 0x3FF) | 0xDC00; + } #endif + dst += Tcl_UniCharToUtf(upChar, dst); } src += len; } -#if TCL_UTF_MAX == 4 - if (!ulen) { - dst += Tcl_UniCharToUtf(-1, dst); - } -#endif *dst = '\0'; return (dst - str); } /* @@ -1099,29 +1122,30 @@ int Tcl_UtfToLower( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0; + Tcl_UniChar ch; char *src, *dst; int len, lowChar; -#if TCL_UTF_MAX == 4 - int ulen = -1; -#endif /* * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 lowChar = ch; - if (!len) { - len += TclUtfToUniChar(src, &ch); - lowChar = (((lowChar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; + if ((ch & 0xFC00) == 0xD800 && src[len] != '\0') { + int len2 = TclUtfToUniChar(src + len, &ch); + + if ((ch & 0xFC00) == 0xDC00) { + len += len2; + lowChar = (((lowChar&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + } } lowChar = TclUCS4ToLower(lowChar); #else lowChar = Tcl_UniCharToLower(ch); #endif @@ -1134,24 +1158,21 @@ if (len < UtfCount(lowChar)) { memmove(dst, src, len); dst += len; } else { -#if TCL_UTF_MAX == 4 - ulen = Tcl_UniCharToUtf(lowChar, dst); - dst += ulen; -#else - dst += Tcl_UniCharToUtf(lowChar, dst); +#if TCL_UTF_MAX == 3 + if (lowChar > 0xFFFF) { + lowChar -= 0x10000; + dst += Tcl_UniCharToUtf((lowChar >> 10) | 0xD800, dst); + lowChar = (lowChar & 0x3FF) | 0xDC00; + } #endif + dst += Tcl_UniCharToUtf(lowChar, dst); } src += len; } -#if TCL_UTF_MAX == 4 - if (!ulen) { - dst += Tcl_UniCharToUtf(-1, dst); - } -#endif *dst = '\0'; return (dst - str); } /* @@ -1175,16 +1196,13 @@ int Tcl_UtfToTitle( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0; + Tcl_UniChar ch; char *src, *dst; int len, titleChar, lowChar; -#if TCL_UTF_MAX == 4 - int ulen = -1; -#endif /* * Capitalize the first character and then lowercase the rest of the * characters until we get to a null. */ @@ -1191,15 +1209,19 @@ src = dst = str; if (*src) { len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 titleChar = ch; - if (!len) { - len += TclUtfToUniChar(src, &ch); - titleChar = (((titleChar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; + if ((ch & 0xFC00) == 0xD800 && src[len] != '\0') { + int len2 = TclUtfToUniChar(src + len, &ch); + + if ((ch & 0xFC00) == 0xDC00) { + len += len2; + titleChar = (((titleChar&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + } } titleChar = TclUCS4ToTitle(titleChar); #else titleChar = Tcl_UniCharToTitle(ch); #endif @@ -1206,61 +1228,59 @@ if (len < UtfCount(titleChar)) { memmove(dst, src, len); dst += len; } else { -#if TCL_UTF_MAX == 4 - ulen = Tcl_UniCharToUtf(titleChar, dst); - dst += ulen; -#else - dst += Tcl_UniCharToUtf(titleChar, dst); +#if TCL_UTF_MAX == 3 + if (titleChar > 0xFFFF) { + titleChar -= 0x10000; + dst += Tcl_UniCharToUtf((titleChar >> 10) | 0xD800, dst); + titleChar = (titleChar & 0x3FF) | 0xDC00; + } #endif + dst += Tcl_UniCharToUtf(titleChar, dst); } src += len; } -#if TCL_UTF_MAX == 4 - if (!ulen) { - dst += Tcl_UniCharToUtf(-1, dst); - ulen = -1; - } -#endif while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = ch; - /* Special exception for Georgian Asomtavruli chars, no titlecase. */ -#if TCL_UTF_MAX == 4 - if (!len) { - len += TclUtfToUniChar(src, &ch); - lowChar = (((lowChar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; +#if TCL_UTF_MAX == 3 + if ((ch & 0xFC00) == 0xD800 && src[len] != '\0') { + int len2 = TclUtfToUniChar(src + len, &ch); + + if ((ch & 0xFC00) == 0xDC00) { + len += len2; + lowChar = (((lowChar&0x3FF)<<10) | (ch&0x3FF)) + 0x10000; + } } + /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = TclUCS4ToLower(lowChar); } #else + /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_UniCharToLower(lowChar); } #endif if (len < UtfCount(lowChar)) { memmove(dst, src, len); dst += len; } else { -#if TCL_UTF_MAX == 4 - ulen = Tcl_UniCharToUtf(lowChar, dst); - dst += ulen; -#else - dst += Tcl_UniCharToUtf(lowChar, dst); +#if TCL_UTF_MAX == 3 + if (lowChar > 0xFFFF) { + lowChar -= 0x10000; + dst += Tcl_UniCharToUtf((lowChar >> 10) | 0xD800, dst); + lowChar = (lowChar & 0x3FF) | 0xDC00; + } #endif + dst += Tcl_UniCharToUtf(lowChar, dst); } src += len; } -#if TCL_UTF_MAX == 4 - if (!ulen) { - dst += Tcl_UniCharToUtf(-1, dst); - } -#endif *dst = '\0'; return (dst - str); } /* @@ -1331,46 +1351,61 @@ Tcl_UtfNcmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ unsigned long numChars) /* Number of UTF chars to compare. */ { - Tcl_UniChar ch1 = 0, ch2 = 0; - int uch1, uch2, len; + Tcl_UniChar ch1, ch2; + int uch1, uch2; +#if TCL_UTF_MAX == 3 + int num1 = numChars, num2 = numChars; +#endif /* * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the * pair of bytes 0xC0,0x80) is larger than byte representation of \u0001 * (the byte 0x01.) */ - while (numChars-- > 0) { + while ( +#if TCL_UTF_MAX == 3 + (num1-- > 0) && (num2 > 0) +#else + numChars-- > 0 +#endif + ) { /* * n must be interpreted as chars, not bytes. This should be called * only when both strings are of at least n chars long (no need for \0 * check) */ - len = TclUtfToUniChar(cs, &ch1); + cs += TclUtfToUniChar(cs, &ch1); + ct += TclUtfToUniChar(ct, &ch2); + uch1 = ch1; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(cs, &ch1); - uch1 = (((uch1&0x3FF)<<10) | (ch1&0x3FF)) + 0x10000; - } -#endif - cs += len; - - len = TclUtfToUniChar(ct, &ch2); uch2 = ch2; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(ct, &ch2); - uch2 = (((uch2&0x3FF)<<10) | (ch2&0x3FF)) + 0x10000; +#if TCL_UTF_MAX == 3 + if ((num1 > 0) && ((ch1 & 0xFC00) == 0xD800)) { + int len = TclUtfToUniChar(cs, &ch1); + + if ((ch1 & 0xFC00) == 0xDC00) { + uch1 = (((uch1&0x3FF)<<10) | (ch1&0x3FF)) + 0x10000; + cs += len; + num1--; + } + } + if ((num2 > 0) && ((ch2 & 0xFC00) == 0xD800)) { + int len = TclUtfToUniChar(ct, &ch2); + + if ((ch2 & 0xFC00) == 0xDC00) { + uch2 = (((uch2&0x3FF)<<10) | (ch2&0x3FF)) + 0x10000; + ct += len; + num2--; + } } #endif - ct += len; if (uch1 != uch2) { return (uch1 - uch2); } @@ -1400,46 +1435,61 @@ Tcl_UtfNcasecmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ unsigned long numChars) /* Number of UTF chars to compare. */ { - Tcl_UniChar ch1 = 0, ch2 = 0; - int uch1, uch2, len; + Tcl_UniChar ch1, ch2; + int uch1, uch2; +#if TCL_UTF_MAX == 3 + int num1 = numChars, num2 = numChars; +#endif /* * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the * pair of bytes 0xC0,0x80) is larger than byte representation of \u0001 * (the byte 0x01.) */ - while (numChars-- > 0) { + while ( +#if TCL_UTF_MAX == 3 + (num1-- > 0) && (num2-- > 0) +#else + numChars-- > 0 +#endif + ) { + /* - * n must be interpreted as chars, not bytes. This should be called - * only when both strings are of at least n chars long (no need for \0 - * check) + * n must be interpreted as chars, not bytes. + * This should be called only when both strings are of + * at least n chars long (no need for \0 check) */ - - len = TclUtfToUniChar(cs, &ch1); - uch1 = ch1; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(cs, &ch1); - uch1 = (((uch1&0x3FF)<<10) | (ch1&0x3FF)) + 0x10000; - } -#endif - cs += len; - - len = TclUtfToUniChar(ct, &ch2); + cs += TclUtfToUniChar(cs, &ch1); + ct += TclUtfToUniChar(ct, &ch2); + + uch1 = ch1; uch2 = ch2; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(ct, &ch2); - uch2 = (((uch2&0x3FF)<<10) | (ch2&0x3FF)) + 0x10000; +#if TCL_UTF_MAX == 3 + if ((num1 > 0) && ((ch1 & 0xFC00) == 0xD800)) { + int len = TclUtfToUniChar(cs, &ch1); + + if ((ch1 & 0xFC00) == 0xDC00) { + uch1 = (((uch1&0x3FF)<<10) | (ch1&0x3FF)) + 0x10000; + cs += len; + num1--; + } + } + if ((num2 > 0) && ((ch2 & 0xFC00) == 0xD800)) { + int len = TclUtfToUniChar(ct, &ch2); + + if ((ch2 & 0xFC00) == 0xDC00) { + uch2 = (((uch2&0x3FF)<<10) | (ch2&0x3FF)) + 0x10000; + ct += len; + num2--; + } } #endif - ct += len; if (uch1 != uch2) { uch1 = TclUCS4ToLower(uch1); uch2 = TclUCS4ToLower(uch2); if (uch1 != uch2) { @@ -1471,40 +1521,50 @@ int TclUtfCasecmp( const char *cs, /* UTF string to compare to ct. */ const char *ct) /* UTF string cs is compared to. */ { - Tcl_UniChar ch1 = 0, ch2 = 0; - int uch1, uch2, len; + Tcl_UniChar ch1, ch2; + int uch1, uch2; + while (*cs && *ct) { - len = TclUtfToUniChar(cs, &ch1); - uch1 = ch1; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(cs, &ch1); - uch1 = (((uch1&0x3FF)<<10) | (ch1&0x3FF)) + 0x10000; - } -#endif - cs += len; - - len = TclUtfToUniChar(ct, &ch2); - uch2 = ch2; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(ct, &ch2); - - uch2 = (((uch2&0x3FF)<<10) | (ch2&0x3FF)) + 0x10000; - } -#endif - ct += len; - - if (uch1 != uch2) { - uch1 = TclUCS4ToLower(uch1); - uch2 = TclUCS4ToLower(uch2); - if (uch1 != uch2) { - return (uch1 - uch2); + cs += TclUtfToUniChar(cs, &ch1); + ct += TclUtfToUniChar(ct, &ch2); + + uch1 = ch1; + uch2 = ch2; + +#if TCL_UTF_MAX == 3 + if (*cs && ((ch1 & 0xFC00) == 0xD800)) { + int len = TclUtfToUniChar(cs, &ch1); + + if ((ch1 & 0xFC00) == 0xDC00) { + uch1 = (((uch1&0x3FF)<<10) | (ch1&0x3FF)) + 0x10000; + cs += len; + } + } + if (*ct && ((ch2 & 0xFC00) == 0xD800)) { + int len = TclUtfToUniChar(ct, &ch2); + + if ((ch2 & 0xFC00) == 0xDC00) { + uch2 = (((uch2&0x3FF)<<10) | (ch2&0x3FF)) + 0x10000; + ct += len; + } + } +#endif + + if (uch1 != uch2) { +#if TCL_UTF_MAX == 3 + uch1 = TclUCS4ToLower(uch1); + uch2 = TclUCS4ToLower(uch2); +#else + uch1 = Tcl_UniCharToLower(uch1); + uch2 = Tcl_UniCharToLower(uch2); +#endif + if (uch1 != uch2) { + return uch1 - uch2; } } } return UCHAR(*cs) - UCHAR(*ct); } @@ -1523,11 +1583,11 @@ * None. * *---------------------------------------------------------------------- */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 int TclUCS4ToUpper( int ch) /* Unicode character to convert. */ { if (!UNICODE_OUT_OF_RANGE(ch)) { @@ -1570,11 +1630,11 @@ * None. * *---------------------------------------------------------------------- */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 int TclUCS4ToLower( int ch) /* Unicode character to convert. */ { if (!UNICODE_OUT_OF_RANGE(ch)) { @@ -1619,11 +1679,11 @@ * None. * *---------------------------------------------------------------------- */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 int TclUCS4ToTitle( int ch) /* Unicode character to convert. */ { if (!UNICODE_OUT_OF_RANGE(ch)) { @@ -1721,11 +1781,11 @@ Tcl_UniCharNcmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 int lcs, lct, nums = numChars, numt = numChars; for ( ; nums != 0 && numt != 0; nums--, numt--, ucs++, uct++) { lcs = *ucs; lct = *uct; @@ -1766,11 +1826,11 @@ return (*ucs - *uct); } } return 0; #endif /* WORDS_BIGENDIAN */ -#endif /* TCL_UTF_MAX == 4 */ +#endif /* TCL_UTF_MAX == 3 */ } /* *---------------------------------------------------------------------- * @@ -1793,11 +1853,11 @@ Tcl_UniCharNcasecmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 int lcs, lct, nums = numChars, numt = numChars; for ( ; nums != 0 && numt != 0; nums--, numt--, ucs++, uct++) { lcs = *ucs; lct = *uct; @@ -1856,15 +1916,13 @@ int Tcl_UniCharIsAlnum( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { return 0; } -#endif return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- @@ -1884,15 +1942,13 @@ int Tcl_UniCharIsAlpha( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { return 0; } -#endif return ((ALPHA_BITS >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- @@ -1912,11 +1968,10 @@ int Tcl_UniCharIsControl( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { /* Clear away extension bits, if any */ ch &= 0x1FFFFF; if ((ch == 0xE0001) || ((ch >= 0xE0020) && (ch <= 0xE007F))) { return 1; @@ -1924,11 +1979,10 @@ if ((ch >= 0xF0000) && ((ch & 0xFFFF) <= 0xFFFD)) { return 1; } return 0; } -#endif return ((CONTROL_BITS >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- @@ -1948,15 +2002,13 @@ int Tcl_UniCharIsDigit( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { return 0; } -#endif return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER); } /* *---------------------------------------------------------------------- @@ -1976,15 +2028,13 @@ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { return ((unsigned)((ch & 0x1FFFFF) - 0xE0100) <= 0xEF); } -#endif return ((GRAPH_BITS >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- @@ -2004,15 +2054,13 @@ int Tcl_UniCharIsLower( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { return 0; } -#endif return (GetCategory(ch) == LOWERCASE_LETTER); } /* *---------------------------------------------------------------------- @@ -2032,15 +2080,13 @@ int Tcl_UniCharIsPrint( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { return ((unsigned)((ch & 0x1FFFFF) - 0xE0100) <= 0xEF); } -#endif return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- @@ -2060,15 +2106,13 @@ int Tcl_UniCharIsPunct( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { return 0; } -#endif return ((PUNCT_BITS >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- @@ -2088,29 +2132,22 @@ int Tcl_UniCharIsSpace( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 /* Ignore upper 11 bits. */ ch &= 0x1FFFFF; -#else - /* Ignore upper 16 bits. */ - ch &= 0xFFFF; -#endif /* * If the character is within the first 127 characters, just use the * standard C function, otherwise consult the Unicode table. */ if (ch < 0x80) { return TclIsSpaceProcM((char) ch); -#if TCL_UTF_MAX > 3 } else if (UNICODE_OUT_OF_RANGE(ch)) { return 0; -#endif } else if (ch == 0x0085 || ch == 0x180E || ch == 0x200B || ch == 0x202F || ch == 0x2060 || ch == 0xFEFF) { return 1; } else { return ((SPACE_BITS >> GetCategory(ch)) & 1); @@ -2135,15 +2172,13 @@ int Tcl_UniCharIsUpper( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { return 0; } -#endif return (GetCategory(ch) == UPPERCASE_LETTER); } /* *---------------------------------------------------------------------- @@ -2163,15 +2198,13 @@ int Tcl_UniCharIsWordChar( int ch) /* Unicode character to test. */ { -#if TCL_UTF_MAX > 3 if (UNICODE_OUT_OF_RANGE(ch)) { return 0; } -#endif return ((WORD_BITS >> GetCategory(ch)) & 1); } /* *---------------------------------------------------------------------- @@ -2202,11 +2235,11 @@ const Tcl_UniChar *uniPattern, /* Pattern, which may contain special * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 int strLen = 0, ptnLen = 0; while (uniStr[strLen] != 0) { strLen++; } @@ -2405,11 +2438,11 @@ int ptnLen, /* Length of Pattern */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { const Tcl_UniChar *stringEnd, *patternEnd; int p; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 int q; #endif stringEnd = string + strLen; patternEnd = pattern + ptnLen; @@ -2423,11 +2456,11 @@ if (pattern == patternEnd) { return (string == stringEnd); } p = *pattern; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 if ((p & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { p = (((p&0x3FF)<<10) | (pattern[1]&0x3FF)) + 0x10000; ++pattern; @@ -2456,11 +2489,11 @@ } if (pattern == patternEnd) { return 1; } p = *pattern; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 if ((p & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { p = (((p&0x3FF)<<10) | (pattern[1]&0x3FF)) + 0x10000; ++pattern; @@ -2478,11 +2511,11 @@ * quickly if the next char in the pattern isn't a special * character. */ if ((p != '[') && (p != '?') && (p != '\\')) { -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 while (string < stringEnd) { q = *string; if ((q & 0xFC00) == 0xD800) { if ((string + 1 < stringEnd) && ((string[1] & 0xFC00) == 0xDC00)) { @@ -2517,11 +2550,11 @@ return 1; } if (string == stringEnd) { return 0; } -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 if ((string[0] & 0xFC00) == 0xD800) { if ((string + 1 < stringEnd) && ((string[1] & 0xFC00) == 0xDC00)) { string++; } @@ -2536,11 +2569,11 @@ * single character. */ if (p == '?') { pattern++; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 if ((string[0] & 0xFC00) == 0xD800) { if ((string + 1 < stringEnd) && ((string[1] & 0xFC00) == 0xDC00)) { string++; } @@ -2558,11 +2591,11 @@ if (p == '[') { int ch1, startChar, endChar; pattern++; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 ch1 = *string; if ((ch1 & 0xFC00) == 0xD800) { if ((string + 1 < stringEnd) && ((string[1] & 0xFC00) == 0xDC00)) { ch1 = (((ch1&0x3FF)<<10) | (string[1]&0x3FF)) + 0x10000; @@ -2578,11 +2611,11 @@ string++; while (1) { if ((*pattern == ']') || (pattern == patternEnd)) { return 0; } -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 startChar = *pattern; if ((startChar & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { startChar = (((startChar&0x3FF)<<10) | @@ -2600,11 +2633,11 @@ if (*pattern == '-') { pattern++; if (pattern == patternEnd) { return 0; } -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 endChar = *pattern; if ((endChar & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { endChar = (((endChar&0x3FF)<<10) | @@ -2636,11 +2669,11 @@ pattern--; break; } pattern++; } -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 if ((pattern[0] & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { pattern++; } @@ -2663,11 +2696,11 @@ /* * There's no special character. Just make sure that the next bytes of * each string match. */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 p = *pattern; if ((p & 0xFC00) == 0xD800) { if ((pattern + 1 < patternEnd) && ((pattern[1] & 0xFC00) == 0xDC00)) { p = (((p&0x3FF)<<10) | (pattern[1]&0x3FF)) + 0x10000; Index: jni/tcl/generic/tclUtil.c ================================================================== --- jni/tcl/generic/tclUtil.c +++ jni/tcl/generic/tclUtil.c @@ -102,10 +102,12 @@ /* * Prototypes for functions defined later in this file. */ +#undef UtfToUniChar +static int UtfToUniChar(const char *string, int *chPtr); static void ClearHash(Tcl_HashTable *tablePtr); static void FreeProcessGlobalValue(ClientData clientData); static void FreeThreadHash(ClientData clientData); static int GetEndOffsetFromObj(Tcl_Obj *objPtr, int endValue, int *indexPtr); @@ -368,11 +370,11 @@ *---------------------------------------------------------------------- * * UtfToUniChar -- * * Wrapper to Tcl_UtfToUniChar() capable of dealing with - * UCS4 when compiled with TCL_UTF_MAX > 3. + * surrogate pairs when compiled with TCL_UTF_MAX == 3. * * Results: * *chPtr is filled with the full unicode character, and the * return value is the number of bytes from the UTF-8 string that * were consumed. @@ -382,26 +384,27 @@ * *---------------------------------------------------------------------- */ static int -#if TCL_UTF_MAX != 4 -inline -#endif UtfToUniChar( const char *src, int *chPtr) { - Tcl_UniChar ch = 0; + Tcl_UniChar ch; int uch, len; len = TclUtfToUniChar(src, &ch); uch = ch; -#if TCL_UTF_MAX == 4 - if (!len) { - len = TclUtfToUniChar(src, &ch); - uch = ((uch & 0x3FF) << 10) + 0x10000 + (ch & 0x3FF); +#if TCL_UTF_MAX == 3 + if ((ch & 0xFC00) == 0xD800) { + int len2 = TclUtfToUniChar(src + len, &ch); + + if (len2 && ((ch & 0xFC00) == 0xDC00)) { + uch = ((uch & 0x3FF) << 10) + 0x10000 + (ch & 0x3FF); + len += len2; + } } #endif *chPtr = uch; return len; } @@ -1755,10 +1758,11 @@ int numTrim) /* ...and its length in bytes */ /* Calls to UtfToUniChar() in this routine * rely on (trim[numTrim] == '\0'). */ { const char *pp, *p = bytes + numBytes, *q; + Tcl_UniChar ch1 = 0; int i; Tcl_DString ds; /* Empty strings -> nothing to do */ if ((numBytes == 0) || (numTrim == 0)) { @@ -1816,13 +1820,30 @@ int uch, pInc = 0; pp = TclUtfPrev(p, bytes); do { pp += pInc; - pInc = UtfToUniChar(pp, &uch); + pInc = TclUtfToUniChar(pp, &ch1); } while (pp + pInc < p); + uch = ch1; +#if TCL_UTF_MAX == 3 + if (((ch1 & 0xFC00) == 0xDC00) && (pp > bytes)) { + const char *ppp; + int ppInc = 0; + + ppp = TclUtfPrev(pp, bytes); + do { + ppp += ppInc; + ppInc = TclUtfToUniChar(ppp, &ch1); + } while (ppp + ppInc < pp); + if ((ch1 & 0xFC00) == 0xD800) { + pp = ppp; + uch = (((ch1&0x3FF)<<10) | (uch&0x3FF)) + 0x10000; + } + } +#endif /* * Inner loop: scan trim string for match to current character. */ Index: jni/tcl/generic/zipfs.c ================================================================== --- jni/tcl/generic/zipfs.c +++ jni/tcl/generic/zipfs.c @@ -1111,34 +1111,53 @@ char *q, buffer[TCL_UTF_MAX * 2]; int i, n; if (utf) { const char *p = path, *end = p + length; -#if TCL_UTF_MAX == 4 - Tcl_UniChar ch = 0; - int uch; +#if TCL_UTF_MAX == 3 + int ch; #else - Tcl_UniChar uch; + Tcl_UniChar ch; #endif while (p < end) { -#if TCL_UTF_MAX == 4 - n = Tcl_UtfToUniChar(p, &ch); - uch = ch; - if (n == 0) { - n = Tcl_UtfToUniChar(p, &ch); - uch = (((uch & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; - } -#else - n = Tcl_UtfToUniChar(p, &uch); -#endif - if (uch == 0) { +#if TCL_UTF_MAX == 3 + n = TclUtfToUniCharExt(p, &ch); +#else + n = Tcl_UtfToUniChar(p, &ch); +#endif + if (p + n > end) { + while (p < end) { + ch = UCHAR(*p); + if (ch == 0) { + buffer[0] = 0xc0; + buffer[1] = 0x80; + i = 2; + } else { + i = Tcl_UniCharToUtf(ch, buffer); + } + Tcl_DStringAppend(dsPtr, buffer, i); + p++; + } + break; + } + if (ch == 0) { buffer[0] = 0xc0; buffer[1] = 0x80; i = 2; } else { - i = Tcl_UniCharToUtf(uch, buffer); +#if TCL_UTF_MAX == 3 + i = 0; + if (ch > 0xFFFF) { + ch -= 0x10000; + i = Tcl_UniCharToUtf((ch >> 10) | 0xd800, buffer); + ch = (ch & 0x3ff) | 0xdc00; + } + i += Tcl_UniCharToUtf(ch, buffer + i); +#else + i = Tcl_UniCharToUtf(ch, buffer); +#endif } Tcl_DStringAppend(dsPtr, buffer, i); p += n; } } else { @@ -1206,15 +1225,13 @@ static char * EncodePathname(const char *path, int length, Tcl_DString *dsPtr) { const char *p, *end; -#if TCL_UTF_MAX == 4 - Tcl_UniChar ch = 0; + Tcl_UniChar ch; +#if TCL_UTF_MAX == 3 int uch; -#else - Tcl_UniChar uch; #endif int i, n; char buffer[TCL_UTF_MAX * 2]; if (length < 0) { @@ -1221,21 +1238,26 @@ length = strlen(path); } p = path; end = p + length; while (p < end) { -#if TCL_UTF_MAX == 4 n = Tcl_UtfToUniChar(p, &ch); +#if TCL_UTF_MAX == 3 uch = ch; - if (n == 0) { - n = Tcl_UtfToUniChar(p, &ch); - uch = (((uch & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; + if (((ch & 0xfc00) == 0xd800) && (p + n < end - 3)) { + int nn; + + nn = Tcl_UtfToUniChar(p + n, &ch); + if ((ch & 0xfc00) == 0xdc00) { + n += nn; + uch = (((uch & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; + } } + i = TclUniCharToUtfExt(uch, buffer); #else - n = Tcl_UtfToUniChar(p, &uch); + i = Tcl_UniCharToUtf(ch, buffer); #endif - i = Tcl_UniCharToUtf(uch, buffer); Tcl_DStringAppend(dsPtr, buffer, i); p += n; } return Tcl_DStringValue(dsPtr); } Index: jni/tcl/pkgs/sqlite3.45.3/generic/tclsqlite3.c ================================================================== --- jni/tcl/pkgs/sqlite3.45.3/generic/tclsqlite3.c +++ jni/tcl/pkgs/sqlite3.45.3/generic/tclsqlite3.c @@ -235,10 +235,11 @@ int openFlags; /* Flags used to open. (SQLITE_OPEN_URI) */ int nRef; /* Delete object when this reaches 0 */ #ifdef SQLITE_TEST int bLegacyPrepare; /* True to use sqlite3_prepare() */ #endif + Tcl_DString dsErr; /* Used for error messages (utf8Encoding) */ }; struct IncrblobChannel { sqlite3_blob *pBlob; /* sqlite3 blob handle */ SqliteDb *pDb; /* Associated database connection */ @@ -246,10 +247,19 @@ Tcl_Channel channel; /* Channel identifier */ IncrblobChannel *pNext; /* Linked list of all open incrblob channels */ IncrblobChannel *pPrev; /* Linked list of all open incrblob channels */ }; +/* + ** Tcl UTF-8 encoding; loaded and tested on module init. Depending + ** on unicode support detected, it is reset to NULL or kept until + ** module gets unloaded. + */ +static Tcl_Encoding utf8Encoding = NULL; +static int utf8EncInit = 0; +TCL_DECLARE_MUTEX(utf8EncMutex); + /* ** Compute a string length that is limited to what can be stored in ** lower 30 bits of a 32-bit signed integer. */ static int strlen30(const char *z){ @@ -260,10 +270,29 @@ #ifdef USE_TCL_STUBS # define tclStubsPtr staticTclStubsPtr static const TclStubs *tclStubsPtr = 0; #endif + +/* + ** Error message converter. + */ +static const char *SQLITEDB_ERRMSG(SqliteDb *pDb){ + if( pDb!=NULL ){ + if( pDb->db!=NULL ){ + if( utf8Encoding!=NULL ){ + Tcl_DStringFree(&pDb->dsErr); + Tcl_ExternalToUtfDString(utf8Encoding, sqlite3_errmsg(pDb->db), + -1, &pDb->dsErr); + return Tcl_DStringValue(&pDb->dsErr); + } + return sqlite3_errmsg(pDb->db); + } + return "not a valid database"; + } + return "unknown error"; +} #ifndef SQLITE_OMIT_INCRBLOB /* ** Close all incrblob channels opened using database connection pDb. ** This is called when shutting down the database connection. @@ -290,11 +319,11 @@ ClientData instanceData, Tcl_Interp *interp ){ IncrblobChannel *p = (IncrblobChannel *)instanceData; int rc = sqlite3_blob_close(p->pBlob); - sqlite3 *db = p->pDb->db; + SqliteDb *pDb = p->pDb; /* Remove the channel from the SqliteDb.pIncrblob list. */ if( p->pNext ){ p->pNext->pPrev = p->pPrev; } @@ -307,11 +336,11 @@ /* Free the IncrblobChannel structure */ Tcl_Free((char *)p); if( rc!=SQLITE_OK ){ - Tcl_SetResult(interp, (char *)sqlite3_errmsg(db), TCL_VOLATILE); + Tcl_SetResult(interp, (char*)SQLITEDB_ERRMSG(pDb), TCL_VOLATILE); return TCL_ERROR; } return TCL_OK; } @@ -503,11 +532,11 @@ static int count = 0; char zChannel[64]; rc = sqlite3_blob_open(db, zDb, zTable, zColumn, iRow, !isReadonly, &pBlob); if( rc!=SQLITE_OK ){ - Tcl_SetResult(interp, (char *)sqlite3_errmsg(pDb->db), TCL_VOLATILE); + Tcl_SetResult(interp, (char*)SQLITEDB_ERRMSG(pDb), TCL_VOLATILE); return TCL_ERROR; } p = (IncrblobChannel *)Tcl_Alloc(sizeof(IncrblobChannel)); p->iSeek = 0; @@ -679,10 +708,11 @@ Tcl_DecrRefCount(pDb->pWalHook); } if( pDb->pCollateNeeded ){ Tcl_DecrRefCount(pDb->pCollateNeeded); } + Tcl_DStringFree(&pDb->dsErr); Tcl_Free((char*)pDb); } } /* @@ -748,11 +778,18 @@ SqliteDb *pDb = (SqliteDb*)cd; Tcl_DString str; Tcl_DStringInit(&str); Tcl_DStringAppend(&str, pDb->zTrace, -1); - Tcl_DStringAppendElement(&str, (char *)xd); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, (char *)xd, -1, &ds); + Tcl_DStringAppendElement(&str, Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + }else{ + Tcl_DStringAppendElement(&str, (char *)xd); + } Tcl_EvalEx(pDb->interp, Tcl_DStringValue(&str), -1, 0); Tcl_DStringFree(&str); Tcl_ResetResult(pDb->interp); return TCL_OK; } @@ -781,12 +818,21 @@ pCmd = Tcl_NewStringObj(pDb->zTraceV2, -1); Tcl_IncrRefCount(pCmd); Tcl_ListObjAppendElement(pDb->interp, pCmd, Tcl_NewWideIntObj((Tcl_WideInt)(uptr)pStmt)); - Tcl_ListObjAppendElement(pDb->interp, pCmd, - Tcl_NewStringObj(zSql, -1)); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zSql, -1, &ds); + Tcl_ListObjAppendElement(pDb->interp, pCmd, + Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); + }else{ + Tcl_ListObjAppendElement(pDb->interp, pCmd, + Tcl_NewStringObj(zSql, -1)); + } Tcl_EvalObjEx(pDb->interp, pCmd, TCL_EVAL_DIRECT); Tcl_DecrRefCount(pCmd); Tcl_ResetResult(pDb->interp); break; } @@ -851,11 +897,18 @@ sqlite3_stmt *pStmt = (sqlite3_stmt *)pd; sqlite3_snprintf(sizeof(zTm)-1, zTm, "%lld", (Tcl_WideInt)(uptr)xd); Tcl_DStringInit(&str); Tcl_DStringAppend(&str, pDb->zProfile, -1); - Tcl_DStringAppendElement(&str, sqlite3_sql(pStmt)); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, sqlite3_sql(pStmt), -1, &ds); + Tcl_DStringAppendElement(&str, Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + }else{ + Tcl_DStringAppendElement(&str, sqlite3_sql(pStmt)); + } Tcl_DStringAppendElement(&str, zTm); Tcl_EvalEx(pDb->interp, Tcl_DStringValue(&str), -1, 0); Tcl_DStringFree(&str); Tcl_ResetResult(pDb->interp); return SQLITE_OK; @@ -904,11 +957,19 @@ assert(pDb->pWalHook); assert( db==pDb->db ); p = Tcl_DuplicateObj(pDb->pWalHook); Tcl_IncrRefCount(p); - Tcl_ListObjAppendElement(interp, p, Tcl_NewStringObj(zDb, -1)); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zDb, -1, &ds); + Tcl_ListObjAppendElement(interp, p, + Tcl_NewStringObj(Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); + }else{ + Tcl_ListObjAppendElement(interp, p, Tcl_NewStringObj(zDb, -1)); + } Tcl_ListObjAppendElement(interp, p, Tcl_NewWideIntObj(nEntry)); if( TCL_OK!=(rc=Tcl_EvalObjEx(interp, p, 0)) || TCL_OK!=(rc=Tcl_GetIntFromObj(interp, Tcl_GetObjResult(interp), &ret)) ){ Tcl_BackgroundException(interp, rc); @@ -970,12 +1031,24 @@ assert( op==SQLITE_INSERT || op==SQLITE_UPDATE || op==SQLITE_DELETE ); pCmd = Tcl_DuplicateObj(pDb->pPreUpdateHook); Tcl_IncrRefCount(pCmd); Tcl_ListObjAppendElement(0, pCmd, Tcl_NewStringObj(azStr[(op-1)/9], -1)); - Tcl_ListObjAppendElement(0, pCmd, Tcl_NewStringObj(zDb, -1)); - Tcl_ListObjAppendElement(0, pCmd, Tcl_NewStringObj(zTbl, -1)); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zDb, -1, &ds); + Tcl_ListObjAppendElement(0, pCmd, + Tcl_NewStringObj(Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); + Tcl_ExternalToUtfDString(utf8Encoding, zTbl, -1, &ds); + Tcl_ListObjAppendElement(0, pCmd, + Tcl_NewStringObj(Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); + }else{ + Tcl_ListObjAppendElement(0, pCmd, Tcl_NewStringObj(zDb, -1)); + Tcl_ListObjAppendElement(0, pCmd, Tcl_NewStringObj(zTbl, -1)); + } Tcl_ListObjAppendElement(0, pCmd, Tcl_NewWideIntObj(iKey1)); Tcl_ListObjAppendElement(0, pCmd, Tcl_NewWideIntObj(iKey2)); Tcl_EvalObjEx(pDb->interp, pCmd, TCL_EVAL_DIRECT); Tcl_DecrRefCount(pCmd); } @@ -1000,12 +1073,24 @@ assert( op==SQLITE_INSERT || op==SQLITE_UPDATE || op==SQLITE_DELETE ); pCmd = Tcl_DuplicateObj(pDb->pUpdateHook); Tcl_IncrRefCount(pCmd); Tcl_ListObjAppendElement(0, pCmd, Tcl_NewStringObj(azStr[(op-1)/9], -1)); - Tcl_ListObjAppendElement(0, pCmd, Tcl_NewStringObj(zDb, -1)); - Tcl_ListObjAppendElement(0, pCmd, Tcl_NewStringObj(zTbl, -1)); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zDb, -1, &ds); + Tcl_ListObjAppendElement(0, pCmd, + Tcl_NewStringObj(Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); + Tcl_ExternalToUtfDString(utf8Encoding, zTbl, -1, &ds); + Tcl_ListObjAppendElement(0, pCmd, + Tcl_NewStringObj(Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); + }else{ + Tcl_ListObjAppendElement(0, pCmd, Tcl_NewStringObj(zDb, -1)); + Tcl_ListObjAppendElement(0, pCmd, Tcl_NewStringObj(zTbl, -1)); + } Tcl_ListObjAppendElement(0, pCmd, Tcl_NewWideIntObj(rowid)); Tcl_EvalObjEx(pDb->interp, pCmd, TCL_EVAL_DIRECT); Tcl_DecrRefCount(pCmd); } @@ -1016,11 +1101,19 @@ const char *zName ){ SqliteDb *pDb = (SqliteDb *)pCtx; Tcl_Obj *pScript = Tcl_DuplicateObj(pDb->pCollateNeeded); Tcl_IncrRefCount(pScript); - Tcl_ListObjAppendElement(0, pScript, Tcl_NewStringObj(zName, -1)); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zName, -1, &ds); + Tcl_ListObjAppendElement(0, pScript, + Tcl_NewStringObj(Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); + }else{ + Tcl_ListObjAppendElement(0, pScript, Tcl_NewStringObj(zName, -1)); + } Tcl_EvalObjEx(pDb->interp, pScript, 0); Tcl_DecrRefCount(pScript); } /* @@ -1037,12 +1130,24 @@ SqlCollate *p = (SqlCollate *)pCtx; Tcl_Obj *pCmd; pCmd = Tcl_NewStringObj(p->zScript, -1); Tcl_IncrRefCount(pCmd); - Tcl_ListObjAppendElement(p->interp, pCmd, Tcl_NewStringObj((const char *)zA, nA)); - Tcl_ListObjAppendElement(p->interp, pCmd, Tcl_NewStringObj((const char *)zB, nB)); + if( utf8Encoding!=NULL ){ + Tcl_DString dsA, dsB; + Tcl_ExternalToUtfDString(utf8Encoding, (char*)zA, nA, &dsA); + Tcl_ExternalToUtfDString(utf8Encoding, (char*)zB, nB, &dsB); + Tcl_ListObjAppendElement(p->interp, pCmd, + Tcl_NewStringObj(Tcl_DStringValue(&dsA), Tcl_DStringLength(&dsA))); + Tcl_ListObjAppendElement(p->interp, pCmd, + Tcl_NewStringObj(Tcl_DStringValue(&dsB), Tcl_DStringLength(&dsB))); + Tcl_DStringFree(&dsA); + Tcl_DStringFree(&dsB); + }else{ + Tcl_ListObjAppendElement(p->interp, pCmd, Tcl_NewStringObj((const char *)zA, nA)); + Tcl_ListObjAppendElement(p->interp, pCmd, Tcl_NewStringObj((const char *)zB, nB)); + } Tcl_EvalObjEx(p->interp, pCmd, TCL_EVAL_DIRECT); Tcl_DecrRefCount(pCmd); return (atoi(Tcl_GetStringResult(p->interp))); } @@ -1076,11 +1181,21 @@ ** be preserved and reused on the next invocation. */ Tcl_Obj **aArg; int nArg; if( Tcl_ListObjGetElements(p->interp, p->pScript, &nArg, &aArg) ){ - sqlite3_result_error(context, Tcl_GetStringResult(p->interp), -1); +resultError: + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_UtfToExternalDString(utf8Encoding, Tcl_GetStringResult(p->interp), + -1, &ds); + sqlite3_result_error(context, Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); + }else{ + sqlite3_result_error(context, Tcl_GetStringResult(p->interp), -1); + } return; } pCmd = Tcl_NewListObj(nArg, aArg); Tcl_IncrRefCount(pCmd); for(i=0; ipDb->zNull, -1); break; } default: { int bytes = sqlite3_value_bytes(pIn); - pVal = Tcl_NewStringObj((char *)sqlite3_value_text(pIn), bytes); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, + (char *)sqlite3_value_text(pIn), bytes, &ds); + pVal = Tcl_NewStringObj(Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); + }else{ + pVal = Tcl_NewStringObj((char *)sqlite3_value_text(pIn), bytes); + } break; } } rc = Tcl_ListObjAppendElement(p->interp, pCmd, pVal); if( rc ){ Tcl_DecrRefCount(pCmd); - sqlite3_result_error(context, Tcl_GetStringResult(p->interp), -1); - return; + goto resultError; } } if( !p->useEvalObjv ){ /* Tcl_EvalObjEx() will automatically call Tcl_EvalObjv() if pCmd ** is a list without a string representation. To prevent this from @@ -1132,11 +1255,11 @@ } if( rc==TCL_BREAK ){ sqlite3_result_null(context); }else if( rc && rc!=TCL_RETURN ){ - sqlite3_result_error(context, Tcl_GetStringResult(p->interp), -1); + goto resultError; }else{ Tcl_Obj *pVar = Tcl_GetObjResult(p->interp); int n; u8 *data; const char *zType = (pVar->typePtr ? pVar->typePtr->name : ""); @@ -1263,16 +1386,61 @@ default : zCode="????"; break; } Tcl_DStringInit(&str); Tcl_DStringAppend(&str, pDb->zAuth, -1); Tcl_DStringAppendElement(&str, zCode); - Tcl_DStringAppendElement(&str, zArg1 ? zArg1 : ""); - Tcl_DStringAppendElement(&str, zArg2 ? zArg2 : ""); - Tcl_DStringAppendElement(&str, zArg3 ? zArg3 : ""); - Tcl_DStringAppendElement(&str, zArg4 ? zArg4 : ""); + if( zArg1==NULL ){ + Tcl_DStringAppendElement(&str, ""); + }else if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zArg1, -1, &ds); + Tcl_DStringAppendElement(&str, Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + }else{ + Tcl_DStringAppendElement(&str, zArg1); + } + if( zArg2==NULL ){ + Tcl_DStringAppendElement(&str, ""); + }else if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zArg2, -1, &ds); + Tcl_DStringAppendElement(&str, Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + }else{ + Tcl_DStringAppendElement(&str, zArg2); + } + if( zArg3==NULL ){ + Tcl_DStringAppendElement(&str, ""); + }else if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zArg3, -1, &ds); + Tcl_DStringAppendElement(&str, Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + }else{ + Tcl_DStringAppendElement(&str, zArg3); + } + if( zArg4==NULL ){ + Tcl_DStringAppendElement(&str, ""); + }else if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zArg4, -1, &ds); + Tcl_DStringAppendElement(&str, Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + }else{ + Tcl_DStringAppendElement(&str, zArg4); + } #ifdef SQLITE_USER_AUTHENTICATION - Tcl_DStringAppendElement(&str, zArg5 ? zArg5 : ""); + if( zArg5==NULL ){ + Tcl_DStringAppendElement(&str, ""); + }else if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zArg5, -1, &ds); + Tcl_DStringAppendElement(&str, Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + }else{ + Tcl_DStringAppendElement(&str, zArg5); + } #endif rc = Tcl_EvalEx(pDb->interp, Tcl_DStringValue(&str), -1, TCL_EVAL_GLOBAL); Tcl_DStringFree(&str); zReply = rc==TCL_OK ? Tcl_GetStringResult(pDb->interp) : "SQLITE_DENY"; if( strcmp(zReply,"SQLITE_OK")==0 ){ @@ -1373,11 +1541,11 @@ ** But it could also be that the user executed one or more BEGIN, ** COMMIT, SAVEPOINT, RELEASE or ROLLBACK commands that are confusing ** this method's logic. Not clear how this would be best handled. */ if( rc!=TCL_ERROR ){ - Tcl_AppendResult(interp, sqlite3_errmsg(pDb->db), (char*)0); + Tcl_AppendResult(interp, (char*)SQLITEDB_ERRMSG(pDb), (char*)0); rc = TCL_ERROR; } sqlite3_exec(pDb->db, "ROLLBACK", 0, 0, 0); } pDb->disableAuth--; @@ -1443,10 +1611,11 @@ char c; int i; int needResultReset = 0; /* Need to invoke Tcl_ResetResult() */ int rc = SQLITE_OK; /* Value to return */ Tcl_Interp *interp = pDb->interp; + Tcl_DString ds; *ppPreStmt = 0; /* Trim spaces from the start of zSql and calculate the remaining length. */ while( (c = zSql[0])==' ' || c=='\t' || c=='\r' || c=='\n' ){ zSql++; } @@ -1485,17 +1654,17 @@ ** a new SqlPreparedStmt structure. */ if( pPreStmt==0 ){ int nByte; if( SQLITE_OK!=dbPrepare(pDb, zSql, &pStmt, pzOut) ){ - Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3_errmsg(pDb->db), -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(SQLITEDB_ERRMSG(pDb), -1)); return TCL_ERROR; } if( pStmt==0 ){ if( SQLITE_OK!=sqlite3_errcode(pDb->db) ){ /* A compile-time error in the statement. */ - Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3_errmsg(pDb->db), -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(SQLITEDB_ERRMSG(pDb), -1)); return TCL_ERROR; }else{ /* The statement was a no-op. Continue to the next statement ** in the SQL string. */ @@ -1525,12 +1694,18 @@ assert( pPreStmt ); assert( strlen30(pPreStmt->zSql)==pPreStmt->nSql ); assert( 0==memcmp(pPreStmt->zSql, zSql, pPreStmt->nSql) ); /* Bind values to parameters that begin with $ or : */ + Tcl_DStringInit(&ds); for(i=1; i<=nVar; i++){ const char *zVar = sqlite3_bind_parameter_name(pStmt, i); + if( utf8Encoding!= NULL ){ + Tcl_DStringFree(&ds); + Tcl_ExternalToUtfDString(utf8Encoding, zVar, -1, &ds); + zVar = Tcl_DStringValue(&ds); + } if( zVar!=0 && (zVar[0]=='$' || zVar[0]==':' || zVar[0]=='@') ){ Tcl_Obj *pVar = Tcl_GetVar2Ex(interp, &zVar[1], 0, 0); if( pVar==0 && pDb->zBindFallback!=0 ){ Tcl_Obj *pCmd; int rx; @@ -1575,10 +1750,20 @@ }else if( (c=='w' && strcmp(zType,"wideInt")==0) || (c=='i' && strcmp(zType,"int")==0) ){ Tcl_WideInt v; Tcl_GetWideIntFromObj(interp, pVar, &v); sqlite3_bind_int64(pStmt, i, v); + }else if( utf8Encoding!=NULL ){ + const char *pStr; + int len; + Tcl_DStringFree(&ds); + pStr = Tcl_GetStringFromObj(pVar, &len); + Tcl_UtfToExternalDString(utf8Encoding, pStr, len, &ds); + sqlite3_bind_text(pStmt, i, Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds), SQLITE_TRANSIENT); + Tcl_IncrRefCount(pVar); + pPreStmt->apParm[iParm++] = pVar; }else{ data = (unsigned char *)Tcl_GetString(pVar); sqlite3_bind_text(pStmt, i, (char *)data, pVar->length, SQLITE_STATIC); Tcl_IncrRefCount(pVar); pPreStmt->apParm[iParm++] = pVar; @@ -1587,10 +1772,11 @@ sqlite3_bind_null(pStmt, i); } if( needResultReset ) Tcl_ResetResult(pDb->interp); } } + Tcl_DStringFree(&ds); pPreStmt->nParm = iParm; *ppPreStmt = pPreStmt; if( needResultReset && rc==TCL_OK ) Tcl_ResetResult(pDb->interp); return rc; @@ -1660,10 +1846,11 @@ */ typedef struct DbEvalContext DbEvalContext; struct DbEvalContext { SqliteDb *pDb; /* Database handle */ Tcl_Obj *pSql; /* Object holding string zSql */ + Tcl_DString dsSql; /* Encoded SQL text */ const char *zSql; /* Remaining SQL to execute */ SqlPreparedStmt *pPreStmt; /* Current statement */ int nCol; /* Number of columns returned by pStmt */ int evalFlags; /* Flags used */ Tcl_Obj *pArray; /* Name of array variable */ @@ -1708,11 +1895,19 @@ Tcl_Obj *pArray, /* Name of Tcl array to set (*) element of */ int evalFlags /* Flags controlling evaluation */ ){ memset(p, 0, sizeof(DbEvalContext)); p->pDb = pDb; - p->zSql = Tcl_GetString(pSql); + if( utf8Encoding!=NULL ){ + const char *pStr; + int len; + pStr = Tcl_GetStringFromObj(pSql, &len); + p->zSql = Tcl_UtfToExternalDString(utf8Encoding, pStr, len, &p->dsSql); + }else{ + Tcl_DStringInit(&p->dsSql); + p->zSql = Tcl_GetString(pSql); + } p->pSql = pSql; Tcl_IncrRefCount(pSql); if( pArray ){ p->pArray = pArray; Tcl_IncrRefCount(pArray); @@ -1739,11 +1934,20 @@ p->nCol = nCol = sqlite3_column_count(pStmt); if( nCol>0 && (papColName || p->pArray) ){ apColName = (Tcl_Obj**)Tcl_Alloc( sizeof(Tcl_Obj*)*nCol ); for(i=0; iapColName = apColName; } @@ -1850,11 +2054,11 @@ p->zSql = zPrevSql; continue; } #endif Tcl_SetObjResult(pDb->interp, - Tcl_NewStringObj(sqlite3_errmsg(pDb->db), -1)); + Tcl_NewStringObj(SQLITEDB_ERRMSG(pDb), -1)); return TCL_ERROR; }else{ dbReleaseStmt(pDb, pPreStmt, 0); } } @@ -1877,10 +2081,11 @@ } if( p->pArray ){ Tcl_DecrRefCount(p->pArray); p->pArray = 0; } + Tcl_DStringFree(&p->dsSql); Tcl_DecrRefCount(p->pSql); dbReleaseColumnNames(p); delDatabaseRef(p->pDb); } @@ -1907,11 +2112,19 @@ } case SQLITE_NULL: { return Tcl_NewStringObj(p->pDb->zNull, -1); } } - + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_Obj *obj; + Tcl_ExternalToUtfDString(utf8Encoding, + (char*)sqlite3_column_text(pStmt, iCol), -1, &ds); + obj = Tcl_NewStringObj(Tcl_DStringValue(&ds), Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); + return obj; + } return Tcl_NewStringObj((char*)sqlite3_column_text(pStmt, iCol), -1); } /* ** If using Tcl version 8.6 or greater, use the NR functions to avoid @@ -2124,11 +2337,11 @@ "progress", "rekey", "restore", "rollback_hook", "serialize", "status", "timeout", "total_changes", "trace", "trace_v2", "transaction", "unlock_notify", "update_hook", "version", "wal_hook", - 0 + 0 }; enum DB_enum { DB_AUTHORIZER, DB_BACKUP, DB_BIND_FALLBACK, DB_BUSY, DB_CACHE, DB_CHANGES, DB_CLOSE, DB_COLLATE, DB_COLLATION_NEEDED, @@ -2234,32 +2447,72 @@ zDestFile = Tcl_GetString(objv[3]); }else{ Tcl_WrongNumArgs(interp, 2, objv, "?DATABASE? FILENAME"); return TCL_ERROR; } - rc = sqlite3_open_v2(zDestFile, &pDest, + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_UtfToExternalDString(utf8Encoding, zDestFile, -1, &ds); + rc = sqlite3_open_v2(Tcl_DStringValue(&ds), &pDest, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE| pDb->openFlags, 0); + Tcl_DStringFree(&ds); + }else{ + rc = sqlite3_open_v2(zDestFile, &pDest, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE| pDb->openFlags, 0); + } if( rc!=SQLITE_OK ){ - Tcl_AppendResult(interp, "cannot open target database: ", - sqlite3_errmsg(pDest), (char*)0); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, sqlite3_errmsg(pDest), -1, &ds); + Tcl_AppendResult(interp, "cannot open target database: ", + Tcl_DStringValue(&ds), (char*)0); + Tcl_DStringFree(&ds); + }else{ + Tcl_AppendResult(interp, "cannot open target database: ", + sqlite3_errmsg(pDest), (char*)0); + } sqlite3_close(pDest); return TCL_ERROR; } - pBackup = sqlite3_backup_init(pDest, "main", pDb->db, zSrcDb); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_UtfToExternalDString(utf8Encoding, zSrcDb, -1, &ds); + pBackup = sqlite3_backup_init(pDest, "main", pDb->db, + Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + }else{ + pBackup = sqlite3_backup_init(pDest, "main", pDb->db, zSrcDb); + } if( pBackup==0 ){ - Tcl_AppendResult(interp, "backup failed: ", - sqlite3_errmsg(pDest), (char*)0); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, sqlite3_errmsg(pDest), -1, &ds); + Tcl_AppendResult(interp, "backup failed: ", + Tcl_DStringValue(&ds), (char*)0); + Tcl_DStringFree(&ds); + }else{ + Tcl_AppendResult(interp, "backup failed: ", + sqlite3_errmsg(pDest), (char*)0); + } sqlite3_close(pDest); return TCL_ERROR; } while( (rc = sqlite3_backup_step(pBackup,100))==SQLITE_OK ){} sqlite3_backup_finish(pBackup); if( rc==SQLITE_DONE ){ rc = TCL_OK; }else{ - Tcl_AppendResult(interp, "backup failed: ", - sqlite3_errmsg(pDest), (char*)0); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, sqlite3_errmsg(pDest), -1, &ds); + Tcl_AppendResult(interp, "backup failed: ", + Tcl_DStringValue(&ds), (char*)0); + Tcl_DStringFree(&ds); + }else{ + Tcl_AppendResult(interp, "backup failed: ", + sqlite3_errmsg(pDest), (char*)0); + } rc = TCL_ERROR; } sqlite3_close(pDest); break; } @@ -2428,15 +2681,22 @@ case DB_COLLATE: { SqlCollate *pCollate; char *zName; char *zScript; int nScript; + Tcl_DString ds; if( objc!=4 ){ Tcl_WrongNumArgs(interp, 2, objv, "NAME SCRIPT"); return TCL_ERROR; } zName = Tcl_GetString(objv[2]); + if( utf8Encoding!=NULL ){ + Tcl_UtfToExternalDString(utf8Encoding, zName, -1, &ds); + zName = Tcl_DStringValue(&ds); + }else{ + Tcl_DStringInit(&ds); + } zScript = Tcl_GetStringFromObj(objv[3], &nScript); pCollate = (SqlCollate*)Tcl_Alloc( sizeof(*pCollate) + nScript + 1 ); if( pCollate==0 ) return TCL_ERROR; pCollate->interp = interp; pCollate->pNext = pDb->pCollate; @@ -2443,13 +2703,15 @@ pCollate->zScript = (char*)&pCollate[1]; pDb->pCollate = pCollate; memcpy(pCollate->zScript, zScript, nScript+1); if( sqlite3_create_collation_v2(pDb->db, zName, SQLITE_UTF8, pCollate, tclSqlCollate, 0) ){ - Tcl_SetResult(interp, (char *)sqlite3_errmsg(pDb->db), TCL_VOLATILE); + Tcl_DStringFree(&ds); + Tcl_SetResult(interp, (char*)SQLITEDB_ERRMSG(pDb), TCL_VOLATILE); return TCL_ERROR; } + Tcl_DStringFree(&ds); break; } /* ** $db collation_needed SCRIPT @@ -2521,11 +2783,21 @@ int isComplete; if( objc!=3 ){ Tcl_WrongNumArgs(interp, 2, objv, "SQL"); return TCL_ERROR; } - isComplete = sqlite3_complete( Tcl_GetString(objv[2]) ); + if( utf8Encoding!=NULL ){ + const char *pStr; + int len; + Tcl_DString ds; + pStr = Tcl_GetStringFromObj(objv[2], &len); + Tcl_UtfToExternalDString(utf8Encoding, pStr, len, &ds); + isComplete = sqlite3_complete( Tcl_DStringValue(&ds) ); + Tcl_DStringFree(&ds); + }else{ + isComplete = sqlite3_complete( Tcl_GetString(objv[2]) ); + } pResult = Tcl_GetObjResult(interp); Tcl_SetBooleanObj(pResult, isComplete); #endif break; } @@ -2672,20 +2944,28 @@ Tcl_AppendResult(interp, "Error: \"", zConflict, "\", conflict-algorithm must be one of: rollback, " "abort, fail, ignore, or replace", (char*)0); return TCL_ERROR; } - zSql = sqlite3_mprintf("SELECT * FROM '%q'", zTable); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_UtfToExternalDString(utf8Encoding, zTable, -1, &ds); + zSql = sqlite3_mprintf("SELECT * FROM '%q'", Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + }else{ + zSql = sqlite3_mprintf("SELECT * FROM '%q'", zTable); + } if( zSql==0 ){ Tcl_AppendResult(interp, "Error: no such table: ", zTable, (char*)0); return TCL_ERROR; } nByte = strlen30(zSql); rc = sqlite3_prepare(pDb->db, zSql, -1, &pStmt, 0); sqlite3_free(zSql); if( rc ){ - Tcl_AppendResult(interp, "Error: ", sqlite3_errmsg(pDb->db), (char*)0); + Tcl_AppendResult(interp, "Error: ", (char*)SQLITEDB_ERRMSG(pDb), + (char*)0); nCol = 0; }else{ nCol = sqlite3_column_count(pStmt); } sqlite3_finalize(pStmt); @@ -2695,12 +2975,20 @@ zSql = (char *)sqlite3_malloc( nByte + 50 + nCol*2 ); if( zSql==0 ) { Tcl_AppendResult(interp, "Error: can't malloc()", (char*)0); return TCL_ERROR; } - sqlite3_snprintf(nByte+50, zSql, "INSERT OR %q INTO '%q' VALUES(?", - zConflict, zTable); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_UtfToExternalDString(utf8Encoding, zTable, -1, &ds); + sqlite3_snprintf(nByte+50, zSql, "INSERT OR %q INTO '%q' VALUES(?", + zConflict, Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); + }else{ + sqlite3_snprintf(nByte+50, zSql, "INSERT OR %q INTO '%q' VALUES(?", + zConflict, zTable); + } j = strlen30(zSql); for(i=1; idb, zSql, -1, &pStmt, 0); sqlite3_free(zSql); if( rc ){ - Tcl_AppendResult(interp, "Error: ", sqlite3_errmsg(pDb->db), (char*)0); + Tcl_AppendResult(interp, "Error: ", (char*)SQLITEDB_ERRMSG(pDb), + (char*)0); sqlite3_finalize(pStmt); return TCL_ERROR; } in = Tcl_OpenFileChannel(interp, zFile, "r", 0666); if( in==0 ){ @@ -2759,19 +3048,27 @@ for(i=0; i0 && strcmp(azCol[i], zNull)==0) ){ sqlite3_bind_null(pStmt, i+1); + }else if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_UtfToExternalDString(utf8Encoding, azCol[i], -1, &ds); + sqlite3_bind_text(pStmt, i+1, Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds), SQLITE_TRANSIENT); + Tcl_DStringFree(&ds); }else{ - sqlite3_bind_text(pStmt, i+1, azCol[i], strlen30(azCol[i]), SQLITE_STATIC); + sqlite3_bind_text(pStmt, i+1, azCol[i], strlen30(azCol[i]), + SQLITE_STATIC); } } sqlite3_step(pStmt); rc = sqlite3_reset(pStmt); Tcl_DStringSetLength(&str, 0); if( rc!=SQLITE_OK ){ - Tcl_AppendResult(interp,"Error: ", sqlite3_errmsg(pDb->db), (char*)0); + Tcl_AppendResult(interp,"Error: ", (char*)SQLITEDB_ERRMSG(pDb), + (char*)0); zCommit = "ROLLBACK"; break; } } Tcl_DStringFree(&str); @@ -2855,11 +3152,19 @@ if( isReadonly ){ flags = SQLITE_DESERIALIZE_FREEONCLOSE | SQLITE_DESERIALIZE_READONLY; }else{ flags = SQLITE_DESERIALIZE_FREEONCLOSE | SQLITE_DESERIALIZE_RESIZEABLE; } - xrc = sqlite3_deserialize(pDb->db, zSchema, pData, len, len, flags); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, (char*)zSchema, -1, &ds); + xrc = sqlite3_deserialize(pDb->db, Tcl_DStringValue(&ds), + pData, len, len, flags); + Tcl_DStringFree(&ds); + } else { + xrc = sqlite3_deserialize(pDb->db, zSchema, pData, len, len, flags); + } if( xrc ){ Tcl_AppendResult(interp, "unable to set MEMDB content", (char*)0); rc = TCL_ERROR; } if( mxSize>0 ){ @@ -3046,10 +3351,11 @@ Tcl_Obj *pScript; char *zName; int nArg = -1; int i; int eType = SQLITE_NULL; + Tcl_DString ds; if( objc<4 ){ Tcl_WrongNumArgs(interp, 2, objv, "NAME ?SWITCHES? SCRIPT"); return TCL_ERROR; } for(i=3; i<(objc-1); i++){ @@ -3099,10 +3405,16 @@ } } pScript = objv[objc-1]; zName = Tcl_GetString(objv[2]); + if( utf8Encoding!=NULL ){ + Tcl_UtfToExternalDString(utf8Encoding, zName, -1, &ds); + zName = Tcl_DStringValue(&ds); + }else{ + Tcl_DStringInit(&ds); + } pFunc = findSqlFunc(pDb, zName); if( pFunc==0 ) return TCL_ERROR; if( pFunc->pScript ){ Tcl_DecrRefCount(pFunc->pScript); } @@ -3110,13 +3422,14 @@ Tcl_IncrRefCount(pScript); pFunc->useEvalObjv = safeToUseEvalObjv(interp, pScript); pFunc->eType = eType; rc = sqlite3_create_function_v2(pDb->db, zName, nArg, flags, pFunc, tclSqlFunc, 0, 0, 0); + Tcl_DStringFree(&ds); if( rc!=SQLITE_OK ){ rc = TCL_ERROR; - Tcl_SetResult(interp, (char *)sqlite3_errmsg(pDb->db), TCL_VOLATILE); + Tcl_SetResult(interp, (char*)SQLITEDB_ERRMSG(pDb), TCL_VOLATILE); } break; } /* @@ -3149,13 +3462,27 @@ zTable = Tcl_GetString(objv[objc-3]); zColumn = Tcl_GetString(objv[objc-2]); rc = Tcl_GetWideIntFromObj(interp, objv[objc-1], &iRow); if( rc==TCL_OK ){ - rc = createIncrblobChannel( - interp, pDb, zDb, zTable, zColumn, (sqlite3_int64)iRow, isReadonly - ); + if( utf8Encoding!=NULL ){ + Tcl_DString ds1, ds2, ds3; + Tcl_UtfToExternalDString(utf8Encoding, zDb, -1, &ds1); + Tcl_UtfToExternalDString(utf8Encoding, zTable, -1, &ds2); + Tcl_UtfToExternalDString(utf8Encoding, zColumn, -1, &ds3); + rc = createIncrblobChannel( + interp, pDb, Tcl_DStringValue(&ds1), Tcl_DStringValue(&ds2), + Tcl_DStringValue(&ds3), (sqlite3_int64)iRow, isReadonly + ); + Tcl_DStringFree(&ds1); + Tcl_DStringFree(&ds2); + Tcl_DStringFree(&ds3); + }else{ + rc = createIncrblobChannel( + interp, pDb, zDb, zTable, zColumn, (sqlite3_int64)iRow, isReadonly + ); + } } #endif break; } @@ -3340,22 +3667,46 @@ zSrcFile = Tcl_GetString(objv[3]); }else{ Tcl_WrongNumArgs(interp, 2, objv, "?DATABASE? FILENAME"); return TCL_ERROR; } - rc = sqlite3_open_v2(zSrcFile, &pSrc, - SQLITE_OPEN_READONLY | pDb->openFlags, 0); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_UtfToExternalDString(utf8Encoding, zSrcFile, -1, &ds); + rc = sqlite3_open_v2(Tcl_DStringValue(&ds), &pSrc, + SQLITE_OPEN_READONLY | pDb->openFlags, 0); + Tcl_DStringFree(&ds); + }else{ + rc = sqlite3_open_v2(zSrcFile, &pSrc, + SQLITE_OPEN_READONLY | pDb->openFlags, 0); + } if( rc!=SQLITE_OK ){ - Tcl_AppendResult(interp, "cannot open source database: ", - sqlite3_errmsg(pSrc), (char*)0); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, sqlite3_errmsg(pSrc), -1, &ds); + Tcl_AppendResult(interp, "cannot open source database: ", + Tcl_DStringValue(&ds), (char*)0); + Tcl_DStringFree(&ds); + }else{ + Tcl_AppendResult(interp, "cannot open source database: ", + sqlite3_errmsg(pSrc), (char*)0); + } sqlite3_close(pSrc); return TCL_ERROR; } - pBackup = sqlite3_backup_init(pDb->db, zDestDb, pSrc, "main"); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_UtfToExternalDString(utf8Encoding, zDestDb, -1, &ds); + pBackup = sqlite3_backup_init(pDb->db, Tcl_DStringValue(&ds), pSrc, + "main"); + Tcl_DStringFree(&ds); + }else{ + pBackup = sqlite3_backup_init(pDb->db, zDestDb, pSrc, "main"); + } if( pBackup==0 ){ Tcl_AppendResult(interp, "restore failed: ", - sqlite3_errmsg(pDb->db), (char*)0); + (char*)SQLITEDB_ERRMSG(pDb), (char*)0); sqlite3_close(pSrc); return TCL_ERROR; } while( (rc = sqlite3_backup_step(pBackup,100))==SQLITE_OK || rc==SQLITE_BUSY ){ @@ -3371,11 +3722,11 @@ Tcl_AppendResult(interp, "restore failed: source database busy", (char*)0); rc = TCL_ERROR; }else{ Tcl_AppendResult(interp, "restore failed: ", - sqlite3_errmsg(pDb->db), (char*)0); + (char*)SQLITEDB_ERRMSG(pDb), (char*)0); rc = TCL_ERROR; } sqlite3_close(pSrc); break; } @@ -3397,11 +3748,20 @@ if( objc!=2 && objc!=3 ){ Tcl_WrongNumArgs(interp, 2, objv, "?DATABASE?"); rc = TCL_ERROR; }else{ int needFree; - pData = sqlite3_serialize(pDb->db, zSchema, &sz, SQLITE_SERIALIZE_NOCOPY); + if( utf8Encoding!=NULL ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, (char*)zSchema, -1, &ds); + pData = sqlite3_serialize(pDb->db, Tcl_DStringValue(&ds), &sz, + SQLITE_SERIALIZE_NOCOPY); + Tcl_DStringFree(&ds); + } else { + pData = sqlite3_serialize(pDb->db, zSchema, &sz, + SQLITE_SERIALIZE_NOCOPY); + } if( pData ){ needFree = 0; }else{ pData = sqlite3_serialize(pDb->db, zSchema, &sz, 0); needFree = 1; @@ -3641,11 +4001,11 @@ /* Run the SQLite BEGIN command to open a transaction or savepoint. */ pDb->disableAuth++; rc = sqlite3_exec(pDb->db, zBegin, 0, 0, 0); pDb->disableAuth--; if( rc!=SQLITE_OK ){ - Tcl_AppendResult(interp, sqlite3_errmsg(pDb->db), (char*)0); + Tcl_AppendResult(interp, (char*)SQLITEDB_ERRMSG(pDb), (char*)0); return TCL_ERROR; } pDb->nTransaction++; /* If using NRE, schedule a callback to invoke the script pScript, then @@ -3690,11 +4050,11 @@ pDb->pUnlockNotify = objv[2]; Tcl_IncrRefCount(pDb->pUnlockNotify); } if( sqlite3_unlock_notify(pDb->db, xNotify, pNotifyArg) ){ - Tcl_AppendResult(interp, sqlite3_errmsg(pDb->db), (char*)0); + Tcl_AppendResult(interp, (char*)SQLITEDB_ERRMSG(pDb), (char*)0); rc = TCL_ERROR; } } #endif break; @@ -3774,11 +4134,11 @@ if( rc==SQLITE_OK ){ Tcl_Obj *pObj; pObj = Tcl_NewStringObj((char*)sqlite3_value_text(pValue), -1); Tcl_SetObjResult(interp, pObj); }else{ - Tcl_AppendResult(interp, sqlite3_errmsg(pDb->db), (char*)0); + Tcl_AppendResult(interp, (char*)SQLITEDB_ERRMSG(pDb), (char*)0); return TCL_ERROR; } } } #endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ @@ -4030,15 +4390,30 @@ } } zErrMsg = 0; p = (SqliteDb*)Tcl_Alloc( sizeof(*p) ); memset(p, 0, sizeof(*p)); + Tcl_DStringInit(&p->dsErr); if( zFile==0 ) zFile = ""; if( bTranslateFileName ){ zFile = Tcl_TranslateFileName(interp, zFile, &translatedFilename); } - rc = sqlite3_open_v2(zFile, &p->db, flags, zVfs); + if( utf8Encoding!=NULL ){ + Tcl_DString ds1, ds2; + Tcl_UtfToExternalDString(utf8Encoding, zFile, -1, &ds1); + if ( zVfs!=NULL ){ + Tcl_UtfToExternalDString(utf8Encoding, zVfs, -1, &ds2); + zVfs = Tcl_DStringValue(&ds2); + } + rc = sqlite3_open_v2(Tcl_DStringValue(&ds1), &p->db, flags, zVfs); + Tcl_DStringFree(&ds1); + if( zVfs!=NULL ){ + Tcl_DStringFree(&ds2); + } + }else{ + rc = sqlite3_open_v2(zFile, &p->db, flags, zVfs); + } if( bTranslateFileName ){ Tcl_DStringFree(&translatedFilename); } if( p->db ){ if( SQLITE_OK!=sqlite3_errcode(p->db) ){ @@ -4048,11 +4423,18 @@ } }else{ zErrMsg = sqlite3_mprintf("%s", sqlite3_errstr(rc)); } if( p->db==0 ){ - Tcl_SetResult(interp, zErrMsg, TCL_VOLATILE); + if( utf8Encoding!=NULL && zErrMsg ){ + Tcl_DString ds; + Tcl_ExternalToUtfDString(utf8Encoding, zErrMsg, -1, &ds); + Tcl_SetResult(interp, Tcl_DStringValue(&ds), TCL_VOLATILE); + Tcl_DStringFree(&ds); + }else{ + Tcl_SetResult(interp, zErrMsg, TCL_VOLATILE); + } Tcl_Free((char*)p); sqlite3_free(zErrMsg); return TCL_ERROR; } p->maxStmt = NUM_PREPARED_STMTS; @@ -4128,16 +4510,75 @@ ** command. */ Tcl_CreateObjCommand(interp, "sqlite", (Tcl_ObjCmdProc*)DbMain, 0, 0); #endif rc = Tcl_PkgProvideEx(interp, "sqlite3", PACKAGE_VERSION, NULL); } + if( !utf8EncInit ){ + Tcl_Encoding enc; + Tcl_MutexLock(&utf8EncMutex); + if( ++utf8EncInit>1 ){ + goto utf8EncInitDone; + } + enc = Tcl_GetEncoding(NULL, "utf-8"); + /* + ** Try to detect the level of UTF-8 support in the Tcl core. + */ + if( enc!=NULL ){ + const char probe1[] = { 0xF0, 0x9F, 0x98, 0x82 }; + const char probe1sp[] = { 0xED, 0xA0, 0xBD, 0xED, 0xB8, 0x82 }; + Tcl_DString ds; + Tcl_ExternalToUtfDString(enc, probe1, sizeof(probe1), &ds); + if( Tcl_DStringLength(&ds)==sizeof(probe1) && + memcmp(probe1, Tcl_DStringValue(&ds), sizeof(probe1))==0 ){ + /* + ** Tcl core supports full Unicode. + */ + Tcl_FreeEncoding(enc); + }else if( Tcl_DStringLength(&ds)==sizeof(probe1sp) && + memcmp(probe1sp, Tcl_DStringValue(&ds), sizeof(probe1sp))==0 ){ + /* + ** Tcl core supports full Unicode using surrogate pairs, + ** keep utf8Encoding for conversion to/from 4 byte UTF-8 sequences. + */ + utf8Encoding = enc; + }else{ + /* + ** Tcl core supports BMP only, let SQLite handle corner cases. + */ + Tcl_FreeEncoding(enc); + } + Tcl_DStringFree(&ds); + } +utf8EncInitDone: + Tcl_MutexUnlock(&utf8EncMutex); + } return rc; } -DLLEXPORT int Tclsqlite3_Init(Tcl_Interp *interp){ return Sqlite3_Init(interp); } + +#ifdef ENABLE_UNLOAD +DLLEXPORT int Sqlite3_Unload(Tcl_Interp *interp, int flags){ + Tcl_MutexLock(&utf8EncMutex); + if( --utf8EncInit<1 ){ + if( utf8Encoding!=NULL ){ + Tcl_FreeEncoding(utf8Encoding); + utf8Encoding = NULL; + } + utf8EncInit = 0; + } + Tcl_MutexUnlock(&utf8EncMutex); + return TCL_OK; +} +#endif + +DLLEXPORT int Tclsqlite3_Init(Tcl_Interp *interp){ + return Sqlite3_Init(interp); +} + #ifdef ENABLE_UNLOAD -DLLEXPORT int Sqlite3_Unload(Tcl_Interp *interp, int flags){ return TCL_OK; } -DLLEXPORT int Tclsqlite3_Unload(Tcl_Interp *interp, int flags){ return TCL_OK; } +DLLEXPORT int Tclsqlite3_Unload(Tcl_Interp *interp, int flags){ + return Sqlite3_Unload(interp, flags); +} #endif /* Because it accesses the file-system and uses persistent state, SQLite ** is not considered appropriate for safe interpreters. Hence, we cause ** the _SafeInit() interfaces return TCL_ERROR. @@ -4151,12 +4592,16 @@ #ifndef SQLITE_3_SUFFIX_ONLY int Sqlite_Init(Tcl_Interp *interp){ return Sqlite3_Init(interp); } int Tclsqlite_Init(Tcl_Interp *interp){ return Sqlite3_Init(interp); } #ifdef ENABLE_UNLOAD -int Sqlite_Unload(Tcl_Interp *interp, int flags){ return TCL_OK; } -int Tclsqlite_Unload(Tcl_Interp *interp, int flags){ return TCL_OK; } +int Sqlite_Unload(Tcl_Interp *interp, int flags){ + return SqLite3_Unload(interp, flags); +} +int Tclsqlite_Unload(Tcl_Interp *interp, int flags){ + return Sqlite3_Unload(interp, flags); +} #endif #endif /* ** If the TCLSH macro is defined, add code to make a stand-alone program. Index: jni/tcl/pkgs/tdbcmysql1.1.5/generic/tdbcmysql.c ================================================================== --- jni/tcl/pkgs/tdbcmysql1.1.5/generic/tdbcmysql.c +++ jni/tcl/pkgs/tdbcmysql1.1.5/generic/tdbcmysql.c @@ -116,10 +116,11 @@ */ typedef struct ConnectionData { int refCount; /* Reference count. */ PerInterpData* pidata; /* Per-interpreter data */ + Tcl_Encoding enc; /* "utf-8" encoding */ MYSQL* mysqlPtr; /* MySql connection handle */ unsigned int nCollations; /* Number of collations defined */ int* collationSizes; /* Character lengths indexed by collation ID */ int flags; } ConnectionData; @@ -155,11 +156,11 @@ typedef struct StatementData { int refCount; /* Reference count */ ConnectionData* cdata; /* Data for the connection to which this * statement pertains. */ - Tcl_Obj* subVars; /* List of variables to be substituted, in the + Tcl_Obj* subVars; /* List of variables to be substituted, in the * order in which they appear in the * statement */ struct ParamData* params; /* Data types and attributes of parameters */ Tcl_Obj* nativeSql; /* Native SQL statement to pass into * MySQL */ @@ -327,11 +328,11 @@ { "-compress", TYPE_FLAG, CLIENT_COMPRESS, 0, "SELECT '', @@SLAVE_COMPRESSED_PROTOCOL" }, { "-database", TYPE_STRING, INDX_DB, CONN_OPT_FLAG_MOD, "SELECT '', DATABASE();"}, { "-db", TYPE_STRING, INDX_DB, CONN_OPT_FLAG_MOD - | CONN_OPT_FLAG_ALIAS, + | CONN_OPT_FLAG_ALIAS, "SELECT '', DATABASE()" }, { "-encoding", TYPE_ENCODING, 0, 0, "SELECT '', 'utf-8'" }, { "-host", TYPE_STRING, INDX_HOST, 0, "SHOW SESSION VARIABLES WHERE VARIABLE_NAME = 'hostname'" }, @@ -338,11 +339,11 @@ { "-interactive", TYPE_FLAG, CLIENT_INTERACTIVE, 0, "SELECT '', 0" }, { "-isolation", TYPE_ISOLATION, 0, CONN_OPT_FLAG_MOD, "SELECT '', LCASE(REPLACE(@@TX_ISOLATION, '-', ''))" }, { "-passwd", TYPE_STRING, INDX_PASSWD, CONN_OPT_FLAG_MOD - | CONN_OPT_FLAG_ALIAS, + | CONN_OPT_FLAG_ALIAS, "SELECT '', ''" }, { "-password", TYPE_STRING, INDX_PASSWD, CONN_OPT_FLAG_MOD, "SELECT '', ''" }, { "-port", TYPE_PORT, 0, 0, "SHOW SESSION VARIABLES WHERE VARIABLE_NAME = 'port'" }, @@ -357,11 +358,11 @@ { "-ssl_cert", TYPE_STRING, INDX_SSLCERT, CONN_OPT_FLAG_SSL, "SELECT '', @@SSL_CERT" }, { "-ssl_cipher", TYPE_STRING, INDX_SSLCIPHER, CONN_OPT_FLAG_SSL, "SELECT '', @@SSL_CIPHER" }, { "-ssl_cypher", TYPE_STRING, INDX_SSLCIPHER, CONN_OPT_FLAG_SSL - | CONN_OPT_FLAG_ALIAS, + | CONN_OPT_FLAG_ALIAS, "SELECT '', @@SSL_CIPHER" }, { "-ssl_key", TYPE_STRING, INDX_SSLKEY, CONN_OPT_FLAG_SSL, "SELECT '', @@SSL_KEY" }, { "-timeout", TYPE_TIMEOUT, 0, CONN_OPT_FLAG_MOD, "SELECT '', @@WAIT_TIMEOUT" }, @@ -394,10 +395,14 @@ ISOL_NONE = -1 }; /* Declarations of static functions appearing in this file */ +static Tcl_Obj* StringObjFromExternal(ConnectionData* cdata, + const char* string, int length); +static char* ExternalFromStringObj(ConnectionData* cdata, Tcl_Obj* strObj, + Tcl_DString* dsPtr); static MYSQL_BIND* MysqlBindAlloc(int nBindings); static MYSQL_BIND* MysqlBindIndex(MYSQL_BIND* b, int i); static void* MysqlBindAllocBuffer(MYSQL_BIND* b, int i, unsigned long len); static void MysqlBindFreeBuffer(MYSQL_BIND* b, int i); static void MysqlBindSetBufferType(MYSQL_BIND* b, int i, @@ -408,12 +413,13 @@ static void MysqlBindSetIsNull(MYSQL_BIND* b, int i, my_bool* p); static void MysqlBindSetError(MYSQL_BIND* b, int i, my_bool* p); static MYSQL_FIELD* MysqlFieldIndex(MYSQL_FIELD* fields, int i); -static void TransferMysqlError(Tcl_Interp* interp, MYSQL* mysqlPtr); -static void TransferMysqlStmtError(Tcl_Interp* interp, MYSQL_STMT* mysqlPtr); +static void TransferMysqlError(ConnectionData* cdata, Tcl_Interp* interp); +static void TransferMysqlStmtError(ConnectionData* cdata, Tcl_Interp* interp, + MYSQL_STMT* mysqlPtr); static Tcl_Obj* QueryConnectionOption(ConnectionData* cdata, Tcl_Interp* interp, int optionNum); static int ConfigureConnection(ConnectionData* cdata, Tcl_Interp* interp, int objc, Tcl_Obj *const objv[], int skip); @@ -457,11 +463,12 @@ ClientData* newClientData); static StatementData* NewStatement(ConnectionData* cdata); static MYSQL_STMT* AllocAndPrepareStatement(Tcl_Interp* interp, StatementData* sdata); -static Tcl_Obj* ResultDescToTcl(MYSQL_RES* resultDesc, int flags); +static Tcl_Obj* ResultDescToTcl(ConnectionData* cdata, + MYSQL_RES* resultDesc, int flags); static int StatementConstructor(ClientData clientData, Tcl_Interp* interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const objv[]); static int StatementParamtypeMethod(ClientData clientData, Tcl_Interp* interp, @@ -708,11 +715,65 @@ const static Tcl_MethodType* ResultSetMethods[] = { &ResultSetColumnsMethodType, &ResultSetRowcountMethodType, NULL }; + +/* + *----------------------------------------------------------------------------- + * + * StringObjFromExternal -- + * + * Return a new Tcl string object from a string with conversion from + * external to internal utf-8 encoding. + * + * Results: + * New string object. + * + *----------------------------------------------------------------------------- + */ +static Tcl_Obj* +StringObjFromExternal( + ConnectionData* cdata, /* Connection data */ + const char* string, /* String to convert */ + int length /* Length of string */ +) { + Tcl_DString ds; + Tcl_Obj* obj; + + Tcl_ExternalToUtfDString(cdata->enc, string, length, &ds); + obj = Tcl_NewStringObj(Tcl_DStringValue(&ds), Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); + return obj; +} + +/* + *----------------------------------------------------------------------------- + * + * ExternalFromStringObj -- + * + * Return a string from Tcl_Obj* in an uninitialized Tcl_DString + * which the caller must free. The resulting string is converted + * to external utf-8 encoding. + * + * Results: + * New string in provided Tcl_DString. + * + *----------------------------------------------------------------------------- + */ + +static char* +ExternalFromStringObj( + ConnectionData* cdata, /* Connection data */ + Tcl_Obj* strObj, /* Tcl_Obj to convert */ + Tcl_DString *dsPtr /* Result area */ +) { + Tcl_UtfToExternalDString(cdata->enc, Tcl_GetString(strObj), -1, dsPtr); + return Tcl_DStringValue(dsPtr); +} + /* *----------------------------------------------------------------------------- * * MysqlBindAlloc -- * @@ -980,13 +1041,14 @@ *----------------------------------------------------------------------------- */ static void TransferMysqlError( - Tcl_Interp* interp, /* Tcl interpreter */ - MYSQL* mysqlPtr /* MySQL connection handle */ + ConnectionData* cdata, /* Connection data */ + Tcl_Interp* interp /* Tcl interpreter */ ) { + MYSQL* mysqlPtr = cdata->mysqlPtr; const char* sqlstate = mysql_sqlstate(mysqlPtr); Tcl_Obj* errorCode = Tcl_NewObj(); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewStringObj("TDBC", -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewStringObj(Tdbc_MapSqlState(sqlstate), -1)); @@ -994,11 +1056,12 @@ Tcl_NewStringObj(sqlstate, -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewStringObj("MYSQL", -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewWideIntObj(mysql_errno(mysqlPtr))); Tcl_SetObjErrorCode(interp, errorCode); - Tcl_SetObjResult(interp, Tcl_NewStringObj(mysql_error(mysqlPtr), -1)); + Tcl_SetObjResult(interp, StringObjFromExternal(cdata, + mysql_error(mysqlPtr), -1)); } /* *----------------------------------------------------------------------------- * @@ -1016,10 +1079,11 @@ *----------------------------------------------------------------------------- */ static void TransferMysqlStmtError( + ConnectionData* cdata, /* Connection data */ Tcl_Interp* interp, /* Tcl interpreter */ MYSQL_STMT* stmtPtr /* MySQL statment handle */ ) { const char* sqlstate = mysql_stmt_sqlstate(stmtPtr); Tcl_Obj* errorCode = Tcl_NewObj(); @@ -1030,11 +1094,13 @@ Tcl_NewStringObj(sqlstate, -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewStringObj("MYSQL", -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewWideIntObj(mysql_stmt_errno(stmtPtr))); Tcl_SetObjErrorCode(interp, errorCode); - Tcl_SetObjResult(interp, Tcl_NewStringObj(mysql_stmt_error(stmtPtr), -1)); + Tcl_SetObjResult(interp, + StringObjFromExternal(cdata, + mysql_stmt_error(stmtPtr), -1)); } /* *----------------------------------------------------------------------------- * @@ -1061,33 +1127,33 @@ int fieldCount; /* Number of fields in a row */ unsigned long* lengths; /* Character lengths of the fields */ Tcl_Obj* retval; /* Return value */ if (mysql_query(cdata->mysqlPtr, ConnOptions[optionNum].query)) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); return NULL; } result = mysql_store_result(cdata->mysqlPtr); if (result == NULL) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); return NULL; } fieldCount = mysql_num_fields(result); if (fieldCount < 2) { retval = cdata->pidata->literals[LIT_EMPTY]; } else { if ((row = mysql_fetch_row(result)) == NULL) { if (mysql_errno(cdata->mysqlPtr)) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); mysql_free_result(result); return NULL; } else { retval = cdata->pidata->literals[LIT_EMPTY]; } } else { lengths = mysql_fetch_lengths(result); - retval = Tcl_NewStringObj(row[1], lengths[1]); + retval = StringObjFromExternal(cdata, row[1], lengths[1]); } } mysql_free_result(result); return retval; } @@ -1119,10 +1185,11 @@ int skip /* Number of parameters to skip */ ) { const char* stringOpts[INDX_MAX]; /* String-valued options */ + Tcl_DString stringBufs[INDX_MAX]; unsigned long mysqlFlags=0; /* Connection flags */ int sslFlag = 0; /* Flag==1 if SSL configuration is needed */ int optionIndex; /* Index of the current option in ConnOptions */ int optionValue; /* Integer value of the current option */ unsigned short port = 0; /* Server port number */ @@ -1129,10 +1196,11 @@ int isolation = ISOL_NONE; /* Isolation level */ int timeout = 0; /* Timeout value */ int i; Tcl_Obj* retval; Tcl_Obj* optval; + int result = TCL_ERROR; if (cdata->mysqlPtr != NULL) { /* Query configuration options on an existing connection */ @@ -1175,19 +1243,20 @@ /* Extract options from the command line */ for (i = 0; i < INDX_MAX; ++i) { stringOpts[i] = NULL; + Tcl_DStringInit(&stringBufs[i]); } for (i = skip; i < objc; i += 2) { /* Unknown option */ if (Tcl_GetIndexFromObjStruct(interp, objv[i], (void*) ConnOptions, sizeof(ConnOptions[0]), "option", 0, &optionIndex) != TCL_OK) { - return TCL_ERROR; + goto error; } /* Unmodifiable option */ if (cdata->mysqlPtr != NULL && !(ConnOptions[optionIndex].flags @@ -1196,24 +1265,24 @@ Tcl_AppendObjToObj(msg, objv[i]); Tcl_AppendToObj(msg, "\" option cannot be changed dynamically", -1); Tcl_SetObjResult(interp, msg); Tcl_SetErrorCode(interp, "TDBC", "GENERAL_ERROR", "HY000", "MYSQL", "-1", NULL); - return TCL_ERROR; + goto error; } /* Record option value */ switch (ConnOptions[optionIndex].type) { case TYPE_STRING: stringOpts[ConnOptions[optionIndex].info] = - Tcl_GetString(objv[i+1]); + ExternalFromStringObj(cdata, objv[i+1], &stringBufs[i]); break; case TYPE_FLAG: if (Tcl_GetBooleanFromObj(interp, objv[i+1], &optionValue) != TCL_OK) { - return TCL_ERROR; + goto error; } if (optionValue) { mysqlFlags |= ConnOptions[optionIndex].info; } break; @@ -1223,51 +1292,51 @@ Tcl_NewStringObj("Only UTF-8 transfer " "encoding is supported.\n", -1)); Tcl_SetErrorCode(interp, "TDBC", "GENERAL_ERROR", "HY000", "MYSQL", "-1", NULL); - return TCL_ERROR; + goto error; } break; case TYPE_ISOLATION: if (Tcl_GetIndexFromObjStruct(interp, objv[i+1], TclIsolationLevels, sizeof(char *), "isolation level", TCL_EXACT, &isolation) != TCL_OK) { - return TCL_ERROR; + goto error; } break; case TYPE_PORT: if (Tcl_GetIntFromObj(interp, objv[i+1], &optionValue) != TCL_OK) { - return TCL_ERROR; + goto error; } if (optionValue < 0 || optionValue > 0xffff) { Tcl_SetObjResult(interp, Tcl_NewStringObj("port number must " "be in range " "[0..65535]", -1)); Tcl_SetErrorCode(interp, "TDBC", "GENERAL_ERROR", "HY000", "MYSQL", "-1", NULL); - return TCL_ERROR; + goto error; } port = optionValue; break; case TYPE_READONLY: if (Tcl_GetBooleanFromObj(interp, objv[i+1], &optionValue) != TCL_OK) { - return TCL_ERROR; + goto error; } if (optionValue != 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj("MySQL does not support " "readonly connections", -1)); Tcl_SetErrorCode(interp, "TDBC", "GENERAL_ERROR", "HY000", "MYSQL", "-1", NULL); - return TCL_ERROR; + goto error; } break; case TYPE_TIMEOUT: if (Tcl_GetIntFromObj(interp, objv[i+1], &timeout) != TCL_OK) { - return TCL_ERROR; + goto error; } break; } if (ConnOptions[optionIndex].flags & CONN_OPT_FLAG_SSL) { sslFlag = 1; @@ -1282,11 +1351,11 @@ if (cdata->mysqlPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj("mysql_init() failed.", -1)); Tcl_SetErrorCode(interp, "TDBC", "GENERAL_ERROR", "HY001", "MYSQL", "NULL", NULL); - return TCL_ERROR; + goto error; } /* Set character set for the connection */ mysql_options(cdata->mysqlPtr, MYSQL_SET_CHARSET_NAME, "utf8"); @@ -1308,12 +1377,12 @@ if (mysql_real_connect(cdata->mysqlPtr, stringOpts[INDX_HOST], stringOpts[INDX_USER], stringOpts[INDX_PASSWD], stringOpts[INDX_DB], port, stringOpts[INDX_SOCKET], mysqlFlags) == NULL) { - TransferMysqlError(interp, cdata->mysqlPtr); - return TCL_ERROR; + TransferMysqlError(cdata, interp); + goto error; } cdata->flags |= CONN_FLAG_AUTOCOMMIT; } else { @@ -1326,49 +1395,55 @@ if (mysql_change_user(cdata->mysqlPtr, stringOpts[INDX_USER], stringOpts[INDX_PASSWD], stringOpts[INDX_DB])) { - TransferMysqlError(interp, cdata->mysqlPtr); - return TCL_ERROR; + TransferMysqlError(cdata, interp); + goto error; } } else if (stringOpts[INDX_DB] != NULL) { /* Database name changed - use the new database */ if (mysql_select_db(cdata->mysqlPtr, stringOpts[INDX_DB])) { - TransferMysqlError(interp, cdata->mysqlPtr); - return TCL_ERROR; + TransferMysqlError(cdata, interp); + goto error; } } } /* Transaction isolation level */ if (isolation != ISOL_NONE) { if (mysql_query(cdata->mysqlPtr, SqlIsolationLevels[isolation])) { - TransferMysqlError(interp, cdata->mysqlPtr); - return TCL_ERROR; + TransferMysqlError(cdata, interp); + goto error; } } /* Timeout */ if (timeout != 0) { - int result; + int sqlresult; Tcl_Obj* query = Tcl_ObjPrintf("SET SESSION WAIT_TIMEOUT = %d\n", timeout); Tcl_IncrRefCount(query); - result = mysql_query(cdata->mysqlPtr, Tcl_GetString(query)); + sqlresult = mysql_query(cdata->mysqlPtr, Tcl_GetString(query)); Tcl_DecrRefCount(query); - if (result) { - TransferMysqlError(interp, cdata->mysqlPtr); - return TCL_ERROR; + if (sqlresult) { + TransferMysqlError(cdata, interp); + goto error; } } + result = TCL_OK; - return TCL_OK; +error: + for (i = 0; i < INDX_MAX; ++i) { + Tcl_DStringFree(&stringBufs[i]); + } + + return result; } /* *----------------------------------------------------------------------------- * @@ -1406,10 +1481,11 @@ /* Hang client data on this connection */ cdata = (ConnectionData*) ckalloc(sizeof(ConnectionData)); cdata->refCount = 1; cdata->pidata = pidata; + cdata->enc = Tcl_GetEncoding(NULL, "utf-8"); cdata->mysqlPtr = NULL; cdata->nCollations = 0; cdata->collationSizes = NULL; cdata->flags = 0; IncrPerInterpRefCount(pidata); @@ -1478,11 +1554,11 @@ /* Turn off autocommit for the duration of the transaction */ if (cdata->flags & CONN_FLAG_AUTOCOMMIT) { if (mysql_autocommit(cdata->mysqlPtr, 0)) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); return TCL_ERROR; } cdata->flags &= ~CONN_FLAG_AUTOCOMMIT; } @@ -1545,11 +1621,11 @@ } results = mysql_list_fields(cdata->mysqlPtr, Tcl_GetString(objv[2]), patternStr); if (results == NULL) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); return TCL_ERROR; } else { unsigned int fieldCount = mysql_num_fields(results); MYSQL_FIELD* fields = mysql_fetch_fields(results); unsigned int i; @@ -1556,11 +1632,12 @@ retval = Tcl_NewObj(); Tcl_IncrRefCount(retval); for (i = 0; i < fieldCount; ++i) { MYSQL_FIELD* field = MysqlFieldIndex(fields, i); attrs = Tcl_NewObj(); - name = Tcl_NewStringObj(field->name, field->name_length); + name = StringObjFromExternal(cdata, + field->name, field->name_length); Tcl_DictObjPut(NULL, attrs, literals[LIT_NAME], name); /* TODO - Distinguish CHAR and BINARY */ entry = Tcl_FindHashEntry(&(pidata->typeNumHash), (char*) field->type); @@ -1646,11 +1723,11 @@ /* End transaction, turn off "transaction in progress", and report status */ rc = mysql_commit(cdata->mysqlPtr); cdata->flags &= ~ CONN_FLAG_IN_XCN; if (rc) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); return TCL_ERROR; } return TCL_OK; } @@ -1737,11 +1814,11 @@ ConnectionEvaldirectMethod( ClientData clientData, /* Unused */ Tcl_Interp* interp, /* Tcl interpreter */ Tcl_ObjectContext objectContext, /* Object context */ int objc, /* Parameter count */ - Tcl_Obj *const objv[]) /* Parameter vector */ + Tcl_Obj *const objv[]) /* Parameter vector */ { Tcl_Object thisObject = Tcl_ObjectContextObject(objectContext); /* Current connection object */ ConnectionData* cdata = (ConnectionData*) Tcl_ObjectGetMetadata(thisObject, &connectionDataType); @@ -1751,10 +1828,11 @@ MYSQL_ROW rowPtr; /* One row of the result set */ unsigned long* lengths; /* Lengths of the fields in a row */ Tcl_Obj* retObj; /* Result set as a Tcl list */ Tcl_Obj* rowObj; /* One row of the result set as a Tcl list */ Tcl_Obj* fieldObj; /* One field of the row */ + Tcl_DString ds; int i; /* Check parameters */ if (objc != 3) { @@ -1762,14 +1840,17 @@ return TCL_ERROR; } /* Execute the given statement */ - if (mysql_query(cdata->mysqlPtr, Tcl_GetString(objv[2]))) { - TransferMysqlError(interp, cdata->mysqlPtr); + if (mysql_query(cdata->mysqlPtr, + ExternalFromStringObj(cdata, objv[2], &ds))) { + Tcl_DStringFree(&ds); + TransferMysqlError(cdata, interp); return TCL_ERROR; } + Tcl_DStringFree(&ds); /* Retrieve the result set */ resultPtr = mysql_store_result(cdata->mysqlPtr); nColumns = mysql_field_count(cdata->mysqlPtr); @@ -1782,11 +1863,11 @@ Tcl_SetObjResult (interp, Tcl_NewWideIntObj(mysql_affected_rows(cdata->mysqlPtr))); return TCL_OK; } else { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); return TCL_ERROR; } } /* Make a list-of-lists of the result */ @@ -1795,11 +1876,11 @@ while ((rowPtr = mysql_fetch_row(resultPtr)) != NULL) { rowObj = Tcl_NewObj(); lengths = mysql_fetch_lengths(resultPtr); for (i = 0; i < nColumns; ++i) { if (rowPtr[i] != NULL) { - fieldObj = Tcl_NewStringObj(rowPtr[i], lengths[i]); + fieldObj = StringObjFromExternal(cdata, rowPtr[i], lengths[i]); } else { fieldObj = cdata->pidata->literals[LIT_EMPTY]; } Tcl_ListObjAppendElement(NULL, rowObj, fieldObj); } @@ -1915,11 +1996,11 @@ /* End transaction, turn off "transaction in progress", and report status */ rc = mysql_rollback(cdata->mysqlPtr); cdata->flags &= ~CONN_FLAG_IN_XCN; if (rc) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); return TCL_ERROR; } return TCL_OK; } @@ -2067,26 +2148,25 @@ return TCL_ERROR; } results = mysql_list_tables(cdata->mysqlPtr, patternStr); if (results == NULL) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); return TCL_ERROR; } else { retval = Tcl_NewObj(); Tcl_IncrRefCount(retval); while ((row = mysql_fetch_row(results)) != NULL) { unsigned long* lengths = mysql_fetch_lengths(results); if (row[0]) { Tcl_ListObjAppendElement(NULL, retval, - Tcl_NewStringObj(row[0], - (int)lengths[0])); + StringObjFromExternal(cdata, row[0], (int)lengths[0])); Tcl_ListObjAppendElement(NULL, retval, literals[LIT_EMPTY]); } } if (mysql_errno(cdata->mysqlPtr)) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); status = TCL_ERROR; } if (status == TCL_OK) { Tcl_SetObjResult(interp, retval); } @@ -2179,10 +2259,11 @@ } if (cdata->mysqlPtr != NULL) { mysql_close(cdata->mysqlPtr); } DecrPerInterpRefCount(cdata->pidata); + Tcl_FreeEncoding(cdata->enc); ckfree((char*) cdata); } /* *----------------------------------------------------------------------------- @@ -2283,21 +2364,25 @@ * the default cursor type is writable. Make all our cursors * read-only to avoid 'Commands out of sync' errors. */ if (stmtPtr == NULL) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); } else { + Tcl_DString ds; /* Prepare the statement */ - nativeSqlStr = Tcl_GetStringFromObj(sdata->nativeSql, &nativeSqlLen); + nativeSqlStr = ExternalFromStringObj(cdata, sdata->nativeSql, &ds); + nativeSqlLen = Tcl_DStringLength(&ds); if (mysql_stmt_prepare(stmtPtr, nativeSqlStr, nativeSqlLen)) { - TransferMysqlStmtError(interp, stmtPtr); + Tcl_DStringFree(&ds); + TransferMysqlStmtError(cdata, interp, stmtPtr); mysql_stmt_close(stmtPtr); stmtPtr = NULL; } + Tcl_DStringFree(&ds); } return stmtPtr; } /* @@ -2317,10 +2402,11 @@ *----------------------------------------------------------------------------- */ static Tcl_Obj* ResultDescToTcl( + ConnectionData* cdata, /* Connection data */ MYSQL_RES* result, /* Result set description */ int flags /* Flags governing the conversion */ ) { Tcl_Obj* retval = Tcl_NewObj(); Tcl_HashTable names; /* Hash table to resolve name collisions */ @@ -2335,11 +2421,11 @@ unsigned int fieldCount = mysql_num_fields(result); MYSQL_FIELD* fields = mysql_fetch_fields(result); unsigned int i; for (i = 0; i < fieldCount; ++i) { MYSQL_FIELD* field = MysqlFieldIndex(fields, i); - nameObj = Tcl_NewStringObj(field->name, -1); + nameObj = StringObjFromExternal(cdata, field->name, -1); Tcl_IncrRefCount(nameObj); countEntry = NULL; for (;;) { entry = Tcl_CreateHashEntry(&names, Tcl_GetString(nameObj), @@ -2353,12 +2439,13 @@ } count = PTR2INT(Tcl_GetHashValue(countEntry)); ++count; Tcl_SetHashValue(countEntry, INT2PTR(count)); Tcl_DecrRefCount(nameObj); - nameObj = Tcl_ObjPrintf("%s#%d", field->name, count); + nameObj = StringObjFromExternal(cdata, field->name, -1); Tcl_IncrRefCount(nameObj); + Tcl_AppendPrintfToObj(nameObj, "#%d", count); } Tcl_ListObjAppendElement(NULL, retval, nameObj); Tcl_DecrRefCount(nameObj); } @@ -2501,14 +2588,14 @@ /* Get result set metadata */ sdata->metadataPtr = mysql_stmt_result_metadata(sdata->stmtPtr); if (mysql_stmt_errno(sdata->stmtPtr)) { - TransferMysqlStmtError(interp, sdata->stmtPtr); + TransferMysqlStmtError(cdata, interp, sdata->stmtPtr); goto freeSData; } - sdata->columnNames = ResultDescToTcl(sdata->metadataPtr, 0); + sdata->columnNames = ResultDescToTcl(cdata, sdata->metadataPtr, 0); Tcl_IncrRefCount(sdata->columnNames); Tcl_ListObjLength(NULL, sdata->subVars, &nParams); sdata->params = (ParamData*) ckalloc(nParams * sizeof(ParamData)); for (i = 0; i < nParams; ++i) { @@ -2919,11 +3006,11 @@ * this statement will execute directly. */ if ((cdata->flags & (CONN_FLAG_IN_XCN | CONN_FLAG_AUTOCOMMIT)) == 0) { if (mysql_autocommit(cdata->mysqlPtr, 1)) { - TransferMysqlError(interp, cdata->mysqlPtr); + TransferMysqlError(cdata, interp); return TCL_ERROR; } cdata->flags |= CONN_FLAG_AUTOCOMMIT; } @@ -3051,10 +3138,12 @@ * Convert the parameters to the appropriate data types for * MySQL's prepared statement interface, and bind them. */ if (paramValObj != NULL) { + Tcl_DString ds; + switch (sdata->params[nBound].dataType & 0xffff) { case MYSQL_TYPE_NEWDECIMAL: case MYSQL_TYPE_DECIMAL: if (sdata->params[nBound].scale == 0) { @@ -3130,16 +3219,21 @@ paramValStr = (char*) Tcl_GetByteArrayFromObj(paramValObj, &len); } else { MysqlBindSetBufferType(rdata->paramBindings, nBound, MYSQL_TYPE_STRING); - paramValStr = Tcl_GetStringFromObj(paramValObj, &len); + paramValStr = + ExternalFromStringObj(cdata, paramValObj, &ds); + len = Tcl_DStringLength(&ds); } bufPtr = (char *)MysqlBindAllocBuffer(rdata->paramBindings, nBound, len+1); memcpy(bufPtr, paramValStr, len); rdata->paramLengths[nBound] = len; + if (!(sdata->params[nBound].dataType & IS_BINARY)) { + Tcl_DStringFree(&ds); + } MysqlBindSetLength(rdata->paramBindings, nBound, &(rdata->paramLengths[nBound])); break; } @@ -3165,11 +3259,11 @@ if (mysql_stmt_bind_param(rdata->stmtPtr, rdata->paramBindings) || ((nColumns > 0) && mysql_stmt_bind_result(rdata->stmtPtr, resultBindings)) || mysql_stmt_execute(rdata->stmtPtr) || mysql_stmt_store_result(rdata->stmtPtr) ) { - TransferMysqlStmtError(interp, sdata->stmtPtr); + TransferMysqlStmtError(cdata, interp, sdata->stmtPtr); return TCL_ERROR; } /* Determine and store the row count */ @@ -3373,12 +3467,12 @@ default: if (field->charsetnr == 63) { colObj = Tcl_NewByteArrayObj((unsigned char*) bufPtr, resultLengths[i]); } else { - colObj = Tcl_NewStringObj((char*) bufPtr, - resultLengths[i]); + colObj = StringObjFromExternal(cdata, (char*) bufPtr, + resultLengths[i]); } break; } } @@ -3405,11 +3499,11 @@ Tcl_SetObjResult(interp, literals[LIT_1]); status = TCL_OK; cleanup: if (status != TCL_OK) { - TransferMysqlStmtError(interp, rdata->stmtPtr); + TransferMysqlStmtError(cdata, interp, rdata->stmtPtr); } Tcl_DecrRefCount(resultRow); return status; } Index: jni/tcl/pkgs/tdbcodbc1.1.1/generic/tdbcodbc.c ================================================================== --- jni/tcl/pkgs/tdbcodbc1.1.1/generic/tdbcodbc.c +++ jni/tcl/pkgs/tdbcodbc1.1.1/generic/tdbcodbc.c @@ -820,11 +820,11 @@ ch = ptr16[i]; if (ch > 0x10ffff) { ch = 0xfffd; } -#if TCL_UTF_MAX >= 4 +#if TCL_UTF_MAX > 3 /* Collapse a surrogate pair, if any. */ if (ch >= 0xd800 && ch <= 0xdbff) { if (i + 1 < len) { unsigned int ch2 = ptr16[i+1]; @@ -847,11 +847,11 @@ ch = ptr32[i]; if (ch > 0x10ffff) { ch = 0xfffd; } -#if TCL_UTF_MAX >= 4 +#if TCL_UTF_MAX > 3 /* Collapse a surrogate pair, if any. */ if (ch >= 0xd800 && ch <= 0xdbff) { if (i + 1 < len) { unsigned int ch2 = ptr32[i+1]; @@ -917,11 +917,11 @@ bytes += Tcl_UtfToUniChar(bytes, &ch); } else { ch = *bytes++ & 0x00ff; } uch = ch; -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 if (uch > 0xffff) { *ptr16++ = (((uch - 0x10000) >> 10) & 0x3ff) | 0xd800; uch = ((uch - 0x10000) & 0x3ff) | 0xdc00; } #endif @@ -943,11 +943,11 @@ bytes += Tcl_UtfToUniChar(bytes, &ch); } else { ch = *bytes++ & 0x00ff; } uch = ch; -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX == 3 if ((uch & 0xfc00) == 0xd800) { if (Tcl_UtfCharComplete(bytes, end - bytes)) { len = Tcl_UtfToUniChar(bytes, &ch); if ((ch & 0xfc00) == 0xdc00) { bytes += len; Index: jni/tcl/pkgs/tdbcpostgres1.1.1/generic/tdbcpostgres.c ================================================================== --- jni/tcl/pkgs/tdbcpostgres1.1.1/generic/tdbcpostgres.c +++ jni/tcl/pkgs/tdbcpostgres1.1.1/generic/tdbcpostgres.c @@ -192,11 +192,11 @@ static char* _PQtty(const PGconn* conn) { return PQtty(conn); } /* Table of configuration options */ static const struct { - const char * name; /* Option name */ + const char *name; /* Option name */ enum OptType type; /* Option data type */ int info; /* Option index or flag value */ int flags; /* Flags - modifiable; SSL related; * is an alias */ char *(*queryF)(const PGconn*); /* Function used to determine the @@ -256,16 +256,17 @@ */ typedef struct ConnectionData { int refCount; /* Reference count. */ PerInterpData* pidata; /* Per-interpreter data */ - PGconn* pgPtr; /* Postgres connection handle */ + Tcl_Encoding enc; /* "utf-8" encoding */ + PGconn* pgPtr; /* Postgres connection handle */ int stmtCounter; /* Counter for naming statements */ int flags; int isolation; /* Current isolation level */ int readOnly; /* Read only connection indicator */ - char * savedOpts[INDX_MAX]; /* Saved configuration options */ + char* savedOpts[INDX_MAX]; /* Saved configuration options */ } ConnectionData; /* * Flags for the state of an POSTGRES connection */ @@ -398,18 +399,23 @@ ISOL_NONE = -1 }; /* Static functions defined within this file */ +static Tcl_Obj* StringObjFromExternal(ConnectionData* cdata, + const char* string, int length); +static char* ExternalFromStringObj(ConnectionData* cdata, Tcl_Obj* strObj, + Tcl_DString* dsPtr); static int DeterminePostgresMajorVersion(Tcl_Interp* interp, ConnectionData* cdata, int* versionPtr); static void DummyNoticeProcessor(void*, const PGresult*); -static int ExecSimpleQuery(Tcl_Interp* interp, PGconn * pgPtr, - const char * query, PGresult** resOut); -static void TransferPostgresError(Tcl_Interp* interp, PGconn * pgPtr); -static int TransferResultError(Tcl_Interp* interp, PGresult * res); +static int ExecSimpleQuery(ConnectionData* cdata, Tcl_Interp* interp, + const char* query, PGresult** resOut); +static void TransferPostgresError(ConnectionData* cdata, Tcl_Interp* interp); +static int TransferResultError(ConnectionData* cdata, Tcl_Interp* interp, + PGresult *res); static Tcl_Obj* QueryConnectionOption(ConnectionData* cdata, Tcl_Interp* interp, int optionNum); static int ConfigureConnection(ConnectionData* cdata, Tcl_Interp* interp, @@ -444,11 +450,12 @@ static char* GenStatementName(ConnectionData* cdata); static void UnallocateStatement(PGconn* pgPtr, char* stmtName); static StatementData* NewStatement(ConnectionData* cdata); static PGresult* PrepareStatement(Tcl_Interp* interp, StatementData* sdata, char* stmtName); -static Tcl_Obj* ResultDescToTcl(PGresult* resultDesc, int flags); +static Tcl_Obj* ResultDescToTcl(ConnectionData* cdata, PGresult* resultDesc, + int flags); static int StatementConstructor(ClientData clientData, Tcl_Interp* interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const objv[]); static int StatementParamtypeMethod(ClientData clientData, Tcl_Interp* interp, Tcl_ObjectContext context, @@ -677,10 +684,64 @@ }; /* *----------------------------------------------------------------------------- * + * StringObjFromExternal -- + * + * Return a new Tcl string object from a string with conversion from + * external to internal utf-8 encoding. + * + * Results: + * New string object. + * + *----------------------------------------------------------------------------- + */ + +static Tcl_Obj* +StringObjFromExternal( + ConnectionData* cdata, /* Connection data */ + const char* string, /* String to convert */ + int length /* Length of string */ +) { + Tcl_DString ds; + Tcl_Obj* obj; + + Tcl_ExternalToUtfDString(cdata->enc, string, length, &ds); + obj = Tcl_NewStringObj(Tcl_DStringValue(&ds), Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); + return obj; +} + +/* + *----------------------------------------------------------------------------- + * + * ExternalFromStringObj -- + * + * Return a string from Tcl_Obj* in an uninitialized Tcl_DString + * which the caller must free. The resulting string is converted + * to external utf-8 encoding. + * + * Results: + * New string in provided Tcl_DString. + * + *----------------------------------------------------------------------------- + */ + +static char* +ExternalFromStringObj( + ConnectionData* cdata, /* Connection data */ + Tcl_Obj* strObj, /* Tcl_Obj to convert */ + Tcl_DString *dsPtr /* Result area */ +) { + Tcl_UtfToExternalDString(cdata->enc, Tcl_GetString(strObj), -1, dsPtr); + return Tcl_DStringValue(dsPtr); +} + +/* + *----------------------------------------------------------------------------- + * * DummyNoticeReceiver -- * * Ignores warnings and notices from the PostgreSQL client library * * Results: @@ -712,35 +773,37 @@ * * Side effects: * Sets the interpreter result and error code appropiately to * query execution process. Optionally, when res parameter is * not NULL and the execution is successful, it returns the - * PGResult * struct by this parameter. This struct should be + * PGResult* struct by this parameter. This struct should be * freed with PQclear() when no longer needed. * *----------------------------------------------------------------------------- */ static int ExecSimpleQuery( + ConnectionData* cdata, /* Connection data */ Tcl_Interp* interp, /* Tcl interpreter */ - PGconn * pgPtr, /* Connection handle */ - const char * query, /* Query to execute */ + const char* query, /* Query to execute */ PGresult** resOut /* Optional handle to result struct */ ) { - PGresult * res; /* Query result */ + PGresult* res; /* Query result */ + PGconn *pgPtr = cdata->pgPtr; + /* Connection handle */ /* Execute the query */ res = PQexec(pgPtr, query); /* Return error if the query was unsuccessful */ if (res == NULL) { - TransferPostgresError(interp, pgPtr); + TransferPostgresError(cdata, interp); return TCL_ERROR; } - if (TransferResultError(interp, res) != TCL_OK) { + if (TransferResultError(cdata, interp, res) != TCL_OK) { PQclear(res); return TCL_ERROR; } /* Transfer query result to the caller */ @@ -774,30 +837,33 @@ *----------------------------------------------------------------------------- */ static void TransferPostgresError( - Tcl_Interp* interp, /* Tcl interpreter */ - PGconn* pgPtr /* Postgres connection handle */ + ConnectionData* cdata, /* Connection data */ + Tcl_Interp* interp /* Tcl interpreter */ ) { + PGconn *pgPtr = cdata->pgPtr; /* Connection handle */ Tcl_Obj* errorCode = Tcl_NewObj(); + Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewStringObj("TDBC", -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewStringObj("GENERAL_ERROR", -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewStringObj("HY000", -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewStringObj("POSTGRES", -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewWideIntObj(-1)); Tcl_SetObjErrorCode(interp, errorCode); - Tcl_SetObjResult(interp, Tcl_NewStringObj(PQerrorMessage(pgPtr), -1)); + Tcl_SetObjResult(interp, + StringObjFromExternal(cdata, PQerrorMessage(pgPtr), -1)); } /* *----------------------------------------------------------------------------- * - * TransferPostgresError -- + * TransferResultError -- * * Check if there is any error related to given PGresult object. * If there was an error, it obtains error message, SQL state * and error number from the Postgres client library and transfers * thenm into the Tcl interpreter. @@ -813,12 +879,13 @@ * *----------------------------------------------------------------------------- */ static int TransferResultError( + ConnectionData* cdata, Tcl_Interp* interp, - PGresult * res + PGresult* res ) { ExecStatusType error = PQresultStatus(res); const char* sqlstate; if (error == PGRES_BAD_RESPONSE || error == PGRES_EMPTY_QUERY @@ -832,20 +899,20 @@ sqlstate = "HY000"; } Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewStringObj(Tdbc_MapSqlState(sqlstate), -1)); Tcl_ListObjAppendElement(NULL, errorCode, - Tcl_NewStringObj(sqlstate, -1)); + StringObjFromExternal(cdata, sqlstate, -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewStringObj("POSTGRES", -1)); Tcl_ListObjAppendElement(NULL, errorCode, Tcl_NewWideIntObj(error)); Tcl_SetObjErrorCode(interp, errorCode); if (error == PGRES_EMPTY_QUERY) { Tcl_SetObjResult(interp, Tcl_NewStringObj("empty query", -1)); } else { - Tcl_SetObjResult(interp, Tcl_NewStringObj( + Tcl_SetObjResult(interp, StringObjFromExternal(cdata, PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY), -1)); } } if (error == PGRES_BAD_RESPONSE || error == PGRES_EMPTY_QUERY @@ -872,22 +939,20 @@ * *----------------------------------------------------------------------------- */ static int -DeterminePostgresMajorVersion(Tcl_Interp* interp, - /* Tcl interpreter */ - ConnectionData* cdata, - /* Connection data */ - int* versionPtr) - /* OUTPUT: PostgreSQL server version */ +DeterminePostgresMajorVersion( + Tcl_Interp* interp, /* Tcl interpreter */ + ConnectionData* cdata, /* Connection data */ + int* versionPtr) /* OUTPUT: PostgreSQL server version */ { PGresult* res; /* Result of a Postgres query */ int status = TCL_ERROR; /* Status return */ char* versionStr; /* Version information from server */ - if (ExecSimpleQuery(interp, cdata->pgPtr, + if (ExecSimpleQuery(cdata, interp, "SELECT version()", &res) == TCL_OK) { versionStr = PQgetvalue(res, 0, 0); if (sscanf(versionStr, " PostgreSQL %d", versionPtr) == 1) { status = TCL_OK; } else { @@ -924,30 +989,30 @@ Tcl_Interp* interp, /* Tcl interpreter */ int optionNum /* Position of the option in the table */ ) { PerInterpData* pidata = cdata->pidata; /* Per-interpreter data */ Tcl_Obj** literals = pidata->literals; - char * value; /* Return value as C string */ + char* value; /* Return value as C string */ /* Suppress attempts to query the password */ if (ConnOptions[optionNum].info == INDX_PASS) { return Tcl_NewObj(); } if (ConnOptions[optionNum].type == TYPE_ENCODING) { value = (char* )pg_encoding_to_char(PQclientEncoding(cdata->pgPtr)); - return Tcl_NewStringObj(value, -1); + return StringObjFromExternal(cdata, value, -1); } if (ConnOptions[optionNum].type == TYPE_ISOLATION) { if (cdata->isolation == ISOL_NONE) { - PGresult * res; - char * isoName; + PGresult* res; + char* isoName; int i = 0; /* The isolation level wasn't set - get default value */ - if (ExecSimpleQuery(interp, cdata->pgPtr, + if (ExecSimpleQuery(cdata, interp, "SHOW default_transaction_isolation", &res) != TCL_OK) { return NULL; } value = PQgetvalue(res, 0, 0); isoName = (char*) ckalloc(strlen(value) + 1); @@ -993,11 +1058,11 @@ } if (ConnOptions[optionNum].queryF != NULL) { value = ConnOptions[optionNum].queryF(cdata->pgPtr); if (value != NULL) { - return Tcl_NewStringObj(value, -1); + return StringObjFromExternal(cdata, value, -1); } } if (ConnOptions[optionNum].type == TYPE_STRING && ConnOptions[optionNum].info != -1) { /* Fallback: get value saved ealier */ @@ -1039,18 +1104,19 @@ * ConnOptions */ int optionValue; /* Integer value of the current option */ int i; size_t j; char portval[16]; /* String representation of port number */ - char * encoding = NULL; /* Selected encoding name */ + char* encoding = NULL; /* Selected encoding name */ int isolation = ISOL_NONE; /* Isolation level */ int readOnly = -1; /* Read only indicator */ #define CONNINFO_LEN 1000 char connInfo[CONNINFO_LEN]; /* Configuration string for PQconnectdb() */ Tcl_Obj* retval; Tcl_Obj* optval; + char* valPtr; int vers; /* PostgreSQL major version */ if (cdata->pgPtr != NULL) { /* Query configuration options on an existing connection */ @@ -1097,11 +1163,14 @@ } /* Extract options from the command line */ for (i = 0; i < INDX_MAX; ++i) { - cdata->savedOpts[i] = NULL; + if (cdata->savedOpts[i] != NULL) { + ckfree(cdata->savedOpts[i]); + cdata->savedOpts[i] = NULL; + } } for (i = skip; i < objc; i += 2) { /* Unknown option */ @@ -1127,12 +1196,14 @@ /* Record option value */ switch (ConnOptions[optionIndex].type) { case TYPE_STRING: + valPtr = Tcl_GetString(objv[i+1]); cdata->savedOpts[ConnOptions[optionIndex].info] = - Tcl_GetString(objv[i+1]); + ckalloc(strlen(valPtr) + 1); + strcpy(cdata->savedOpts[ConnOptions[optionIndex].info], valPtr); break; case TYPE_ENCODING: encoding = Tcl_GetString(objv[i+1]); break; case TYPE_ISOLATION: @@ -1153,11 +1224,12 @@ Tcl_SetErrorCode(interp, "TDBC", "GENERAL_ERROR", "HY000", "POSTGRES", "-1", NULL); return TCL_ERROR; } snprintf(portval, 16, "%d", optionValue); - cdata->savedOpts[INDX_PORT] = portval; + cdata->savedOpts[INDX_PORT] = ckalloc(strlen(portval) + 1); + strcpy(cdata->savedOpts[INDX_PORT], portval); break; case TYPE_READONLY: if (Tcl_GetBooleanFromObj(interp, objv[i+1], &readOnly) != TCL_OK) { return TCL_ERROR; @@ -1191,29 +1263,29 @@ "POSTGRES", "NULL", NULL); return TCL_ERROR; } if (PQstatus(cdata->pgPtr) != CONNECTION_OK) { - TransferPostgresError(interp, cdata->pgPtr); + TransferPostgresError(cdata, interp); return TCL_ERROR; } PQsetNoticeProcessor(cdata->pgPtr, DummyNoticeProcessor, NULL); } /* Character encoding */ if (encoding != NULL ) { if (PQsetClientEncoding(cdata->pgPtr, encoding) != 0) { - TransferPostgresError(interp, cdata->pgPtr); + TransferPostgresError(cdata, interp); return TCL_ERROR; } } /* Transaction isolation level */ if (isolation != ISOL_NONE) { - if (ExecSimpleQuery(interp, cdata->pgPtr, + if (ExecSimpleQuery(cdata, interp, SqlIsolationLevels[isolation], NULL) != TCL_OK) { return TCL_ERROR; } cdata->isolation = isolation; } @@ -1220,16 +1292,16 @@ /* Readonly indicator */ if (readOnly != -1) { if (readOnly == 0) { - if (ExecSimpleQuery(interp, cdata->pgPtr, + if (ExecSimpleQuery(cdata, interp, "SET TRANSACTION READ WRITE", NULL) != TCL_OK) { return TCL_ERROR; } } else { - if (ExecSimpleQuery(interp, cdata->pgPtr, + if (ExecSimpleQuery(cdata, interp, "SET TRANSACTION READ ONLY", NULL) != TCL_OK) { return TCL_ERROR; } } @@ -1247,11 +1319,11 @@ * backward-compatible 'escape' setting, so that the code in * ResultSetNextrowMethod will retrieve byte array values correctly * on either 8.x or 9.x servers. */ if (vers >= 9) { - if (ExecSimpleQuery(interp, cdata->pgPtr, + if (ExecSimpleQuery(cdata, interp, "SET bytea_output = 'escape'", NULL) != TCL_OK) { return TCL_ERROR; } } return TCL_OK; @@ -1295,10 +1367,11 @@ cdata = (ConnectionData*) ckalloc(sizeof(ConnectionData)); memset(cdata, 0, sizeof(ConnectionData)); cdata->refCount = 1; cdata->pidata = pidata; + cdata->enc = Tcl_GetEncoding(NULL, "utf-8"); cdata->pgPtr = NULL; cdata->stmtCounter = 0; cdata->flags = 0; cdata->isolation = ISOL_NONE; cdata->readOnly = 0; @@ -1366,11 +1439,11 @@ } cdata->flags |= CONN_FLAG_IN_XCN; /* Execute begin trasnaction block command */ - return ExecSimpleQuery(interp, cdata->pgPtr, "BEGIN", NULL); + return ExecSimpleQuery(cdata, interp, "BEGIN", NULL); } /* *----------------------------------------------------------------------------- * @@ -1424,11 +1497,11 @@ } cdata->flags &= ~ CONN_FLAG_IN_XCN; /* Execute commit SQL command */ - return ExecSimpleQuery(interp, cdata->pgPtr, "COMMIT", NULL); + return ExecSimpleQuery(cdata, interp, "COMMIT", NULL); } /* *----------------------------------------------------------------------------- * @@ -1472,10 +1545,11 @@ Tcl_Obj* retval; /* List of table names */ Tcl_Obj* attrs; /* Attributes of the column */ Tcl_Obj* name; /* Name of a column */ Tcl_Obj* sqlQuery = Tcl_NewStringObj("SELECT * FROM \"", -1); /* Query used */ + Tcl_DString ds; Tcl_IncrRefCount(sqlQuery); /* Check parameters */ @@ -1487,15 +1561,18 @@ /* Check if table exists by retreiving one row. * The result will be later used to determine column types (oids) */ Tcl_AppendObjToObj(sqlQuery, objv[2]); Tcl_AppendToObj(sqlQuery, "\" LIMIT 1", -1); - if (ExecSimpleQuery(interp, cdata->pgPtr, Tcl_GetString(sqlQuery), + if (ExecSimpleQuery(cdata, interp, + ExternalFromStringObj(cdata, sqlQuery, &ds), &resType) != TCL_OK) { + Tcl_DStringFree(&ds); Tcl_DecrRefCount(sqlQuery); return TCL_ERROR; } + Tcl_DStringFree(&ds); Tcl_DecrRefCount(sqlQuery); /* Retreive column attributes */ @@ -1514,25 +1591,29 @@ Tcl_AppendToObj(sqlQuery, "' AND column_name LIKE '", -1); Tcl_AppendObjToObj(sqlQuery, objv[3]); } Tcl_AppendToObj(sqlQuery, "'", -1); - if (ExecSimpleQuery(interp, cdata->pgPtr, - Tcl_GetString(sqlQuery), &res) != TCL_OK) { + if (ExecSimpleQuery(cdata, interp, + ExternalFromStringObj(cdata, sqlQuery, &ds), + &res) != TCL_OK) { + Tcl_DStringFree(&ds); Tcl_DecrRefCount(sqlQuery); PQclear(resType); return TCL_ERROR; } else { int i, j; + + Tcl_DStringFree(&ds); retval = Tcl_NewObj(); Tcl_IncrRefCount(retval); for (i = 0; i < PQntuples(res); i += 1) { attrs = Tcl_NewObj(); /* 0 is column_name column number */ columnName = PQgetvalue(res, i, 0); - name = Tcl_NewStringObj(columnName, -1); + name = StringObjFromExternal(cdata, columnName, -1); Tcl_DictObjPut(NULL, attrs, literals[LIT_NAME], name); /* Get the type name, by retrieving type oid */ @@ -1554,18 +1635,18 @@ /* 1 is numeric_precision column number */ if (!PQgetisnull(res, i, 1)) { Tcl_DictObjPut(NULL, attrs, literals[LIT_PRECISION], - Tcl_NewStringObj(PQgetvalue(res, i, 1), -1)); + StringObjFromExternal(cdata, PQgetvalue(res, i, 1), -1)); } else { /* 2 is character_maximum_length column number */ if (!PQgetisnull(res, i, 2)) { Tcl_DictObjPut(NULL, attrs, literals[LIT_PRECISION], - Tcl_NewStringObj(PQgetvalue(res, i, 2), -1)); + StringObjFromExternal(cdata, PQgetvalue(res, i, 2), -1)); } } /* 3 is character_maximum_length column number */ @@ -1572,11 +1653,11 @@ if (!PQgetisnull(res, i, 3)) { /* This is for numbers */ Tcl_DictObjPut(NULL, attrs, literals[LIT_SCALE], - Tcl_NewStringObj(PQgetvalue(res, i, 3), -1)); + StringObjFromExternal(cdata, PQgetvalue(res, i, 3), -1)); } /* 4 is is_nullable column number */ Tcl_DictObjPut(NULL, attrs, literals[LIT_NULLABLE], @@ -1697,11 +1778,11 @@ } cdata->flags &= ~CONN_FLAG_IN_XCN; /* Send end transaction SQL command */ - return ExecSimpleQuery(interp, cdata->pgPtr, "ROLLBACK", NULL); + return ExecSimpleQuery(cdata, interp, "ROLLBACK", NULL); } /* *----------------------------------------------------------------------------- * @@ -1736,18 +1817,20 @@ Tcl_ObjectGetMetadata(thisObject, &connectionDataType); /* Instance data */ Tcl_Obj** literals = cdata->pidata->literals; /* Literal pool */ PGresult* res; /* Result of libpq call */ - char * field; /* Field value from SQL result */ + char* field; /* Field value from SQL result */ Tcl_Obj* retval; /* List of table names */ Tcl_Obj* sqlQuery = Tcl_NewStringObj("SELECT tablename" " FROM pg_tables" " WHERE schemaname = 'public'", -1); /* SQL query for table list */ int i; + Tcl_DString ds; + Tcl_IncrRefCount(sqlQuery); /* Check parameters */ if (objc < 2 || objc > 3) { @@ -1757,22 +1840,25 @@ if (objc == 3) { /* Pattern string is given */ - Tcl_AppendToObj(sqlQuery, " AND tablename LIKE '", -1); + Tcl_AppendToObj(sqlQuery, " AND tablename LIKE '", -1); Tcl_AppendObjToObj(sqlQuery, objv[2]); Tcl_AppendToObj(sqlQuery, "'", -1); } /* Retrieve the table list */ - if (ExecSimpleQuery(interp, cdata ->pgPtr, Tcl_GetString(sqlQuery), + if (ExecSimpleQuery(cdata, interp, + ExternalFromStringObj(cdata, sqlQuery, &ds), &res) != TCL_OK) { + Tcl_DStringFree(&ds); Tcl_DecrRefCount(sqlQuery); return TCL_ERROR; } + Tcl_DStringFree(&ds); Tcl_DecrRefCount(sqlQuery); /* Iterate through the tuples and make the Tcl result */ retval = Tcl_NewObj(); @@ -1779,11 +1865,11 @@ for (i = 0; i < PQntuples(res); i+=1) { if (!PQgetisnull(res, i, 0)) { field = PQgetvalue(res, i, 0); if (field) { Tcl_ListObjAppendElement(NULL, retval, - Tcl_NewStringObj(field, -1)); + StringObjFromExternal(cdata, field, -1)); Tcl_ListObjAppendElement(NULL, retval, literals[LIT_EMPTY]); } } } PQclear(res); @@ -1818,14 +1904,23 @@ static void DeleteConnection( ConnectionData* cdata /* Instance data for the connection */ ) { + int i; + if (cdata->pgPtr != NULL) { PQfinish(cdata->pgPtr); } DecrPerInterpRefCount(cdata->pidata); + for (i = 0; i < INDX_MAX; ++i) { + if (cdata->savedOpts[i] != NULL) { + ckfree(cdata->savedOpts[i]); + cdata->savedOpts[i] = NULL; + } + } + Tcl_FreeEncoding(cdata->enc); ckfree(cdata); } /* *----------------------------------------------------------------------------- @@ -1948,14 +2043,14 @@ *----------------------------------------------------------------------------- */ static void UnallocateStatement( - PGconn * pgPtr, /* Connection handle */ + PGconn* pgPtr, /* Connection handle */ char* stmtName /* Statement name */ ) { - Tcl_Obj * sqlQuery = Tcl_NewStringObj("DEALLOCATE ", -1); + Tcl_Obj* sqlQuery = Tcl_NewStringObj("DEALLOCATE ", -1); Tcl_IncrRefCount(sqlQuery); Tcl_AppendToObj(sqlQuery, stmtName, -1); PQclear(PQexec(pgPtr, Tcl_GetString(sqlQuery))); Tcl_DecrRefCount(sqlQuery); } @@ -2015,19 +2110,19 @@ static PGresult* PrepareStatement( Tcl_Interp* interp, /* Tcl interpreter for error reporting */ StatementData* sdata, /* Statement data */ - char * stmtName /* Overriding name of the statement */ + char* stmtName /* Overriding name of the statement */ ) { ConnectionData* cdata = sdata->cdata; /* Connection data */ const char* nativeSqlStr; /* Native SQL statement to prepare */ - int nativeSqlLen; /* Length of the statement */ PGresult* res; /* result of statement preparing*/ PGresult* res2; int i; + Tcl_DString ds; if (stmtName == NULL) { stmtName = sdata->stmtName; } @@ -2034,24 +2129,25 @@ /* * Prepare the statement. Rather than giving parameter types, try * to let PostgreSQL infer all of them. */ - nativeSqlStr = Tcl_GetStringFromObj(sdata->nativeSql, &nativeSqlLen); + nativeSqlStr = ExternalFromStringObj(cdata, sdata->nativeSql, &ds); res = PQprepare(cdata->pgPtr, stmtName, nativeSqlStr, 0, NULL); + Tcl_DStringFree(&ds); if (res == NULL) { - TransferPostgresError(interp, cdata->pgPtr); + TransferPostgresError(cdata, interp); return NULL; } /* * Report on what parameter types were inferred. */ res2 = PQdescribePrepared(cdata->pgPtr, stmtName); if (res2 == NULL) { - TransferPostgresError(interp, cdata->pgPtr); + TransferPostgresError(cdata, interp); PQclear(res); return NULL; } for (i = 0; i < PQnparams(res2); ++i) { sdata->paramDataTypes[i] = PQparamtype(res2, i); @@ -2080,16 +2176,17 @@ *----------------------------------------------------------------------------- */ static Tcl_Obj* ResultDescToTcl( + ConnectionData* cdata, /* Instance data for the connection */ PGresult* result, /* Result set description */ int flags /* Flags governing the conversion */ ) { Tcl_Obj* retval = Tcl_NewObj(); Tcl_HashTable names; /* Hash table to resolve name collisions */ - char * fieldName; + char* fieldName; Tcl_InitHashTable(&names, TCL_STRING_KEYS); if (result != NULL) { unsigned int fieldCount = PQnfields(result); unsigned int i; for (i = 0; i < fieldCount; ++i) { @@ -2097,11 +2194,11 @@ int count = 1; Tcl_Obj* nameObj; Tcl_HashEntry* entry; Tcl_HashEntry* countEntry; fieldName = PQfname(result, i); - nameObj = Tcl_NewStringObj(fieldName, -1); + nameObj = StringObjFromExternal(cdata, fieldName, -1); Tcl_IncrRefCount(nameObj); countEntry = NULL; for (;;) { entry = Tcl_CreateHashEntry(&names, Tcl_GetString(nameObj), @@ -2115,12 +2212,13 @@ } count = PTR2INT(Tcl_GetHashValue(countEntry)); ++count; Tcl_SetHashValue(countEntry, INT2PTR(count)); Tcl_DecrRefCount(nameObj); - nameObj = Tcl_ObjPrintf("%s#%d", fieldName, count); + nameObj = StringObjFromExternal(cdata, fieldName, -1); Tcl_IncrRefCount(nameObj); + Tcl_AppendPrintfToObj(nameObj, "#%d", count); } Tcl_ListObjAppendElement(NULL, retval, nameObj); Tcl_DecrRefCount(nameObj); } @@ -2283,11 +2381,11 @@ res = PrepareStatement(interp, sdata, NULL); if (res == NULL) { goto freeSData; } - if (TransferResultError(interp, res) != TCL_OK) { + if (TransferResultError(cdata, interp, res) != TCL_OK) { PQclear(res); goto freeSData; } PQclear(res); @@ -2658,11 +2756,11 @@ * (binary or string) */ char* paramNeedsFreeing; /* Flags for whether a parameter needs * its memory released */ Tcl_Obj** paramTempObjs; /* Temporary parameter objects allocated * to canonicalize numeric parameter values */ - + Tcl_DString* paramTempDs; /* Temporary string buffers */ PGresult* res; /* Temporary result */ int i; int status = TCL_ERROR; /* Return status */ /* Check parameter count */ @@ -2712,11 +2810,11 @@ rdata->stmtName = GenStatementName(cdata); res = PrepareStatement(interp, sdata, rdata->stmtName); if (res == NULL) { return TCL_ERROR; } - if (TransferResultError(interp, res) != TCL_OK) { + if (TransferResultError(cdata, interp, res) != TCL_OK) { PQclear(res); return TCL_ERROR; } PQclear(res); } else { @@ -2734,11 +2832,11 @@ rdata->stmtName = sdata->stmtName; res = PrepareStatement(interp, sdata, NULL); if (res == NULL) { return TCL_ERROR; } - if (TransferResultError(interp, res) != TCL_OK) { + if (TransferResultError(cdata, interp, res) != TCL_OK) { PQclear(res); return TCL_ERROR; } PQclear(res); sdata->paramTypesChanged = 0; @@ -2748,14 +2846,16 @@ paramValues = (const char**) ckalloc(sdata->nParams * sizeof(char* )); paramLengths = (int*) ckalloc(sdata->nParams * sizeof(int*)); paramFormats = (int*) ckalloc(sdata->nParams * sizeof(int*)); paramNeedsFreeing = (char *)ckalloc(sdata->nParams); paramTempObjs = (Tcl_Obj**) ckalloc(sdata->nParams * sizeof(Tcl_Obj*)); + paramTempDs = (Tcl_DString*) ckalloc(sdata->nParams * sizeof(Tcl_DString)); memset(paramNeedsFreeing, 0, sdata->nParams); for (i = 0; i < sdata->nParams; i++) { paramTempObjs[i] = NULL; + Tcl_DStringInit(¶mTempDs[i]); } for (i=0; inParams; i++) { Tcl_ListObjIndex(NULL, sdata->subVars, i, ¶mNameObj); paramName = Tcl_GetString(paramNameObj); @@ -2773,11 +2873,11 @@ TCL_LEAVE_ERR_MSG); } /* At this point, paramValObj contains the parameter value */ if (paramValObj != NULL) { - char * bufPtr; + char* bufPtr; int32_t tmp32; int16_t tmp16; switch (sdata->paramDataTypes[i]) { case INT2OID: @@ -2828,12 +2928,13 @@ == TCL_OK) { paramTempObjs[i] = Tcl_NewWideIntObj(val); Tcl_IncrRefCount(paramTempObjs[i]); paramFormats[i] = 0; paramValues[i] = - Tcl_GetStringFromObj(paramTempObjs[i], - ¶mLengths[i]); + ExternalFromStringObj(cdata, paramTempObjs[i], + ¶mTempDs[i]); + paramLengths[i] = Tcl_DStringLength(¶mTempDs[i]); } else { goto convertString; /* If Tcl can't parse it, let SQL try */ } } @@ -2847,12 +2948,13 @@ == TCL_OK) { paramTempObjs[i] = Tcl_NewDoubleObj(val); Tcl_IncrRefCount(paramTempObjs[i]); paramFormats[i] = 0; paramValues[i] = - Tcl_GetStringFromObj(paramTempObjs[i], - ¶mLengths[i]); + ExternalFromStringObj(cdata, paramTempObjs[i], + ¶mTempDs[i]); + paramLengths[i] = Tcl_DStringLength(¶mTempDs[i]); } else { goto convertString; /* If Tcl can't parse it, let SQL try */ } } @@ -2866,12 +2968,14 @@ break; default: convertString: paramFormats[i] = 0; - paramValues[i] = Tcl_GetStringFromObj(paramValObj, - ¶mLengths[i]); + paramValues[i] = + ExternalFromStringObj(cdata, paramValObj, + ¶mTempDs[i]); + paramLengths[i] = Tcl_DStringLength(¶mTempDs[i]); break; } } else { paramValues[i] = NULL; paramFormats[i] = 0; @@ -2880,15 +2984,15 @@ /* Execute the statement */ rdata->execResult = PQexecPrepared(cdata->pgPtr, rdata->stmtName, sdata->nParams, paramValues, paramLengths, paramFormats, 0); - if (TransferResultError(interp, rdata->execResult) != TCL_OK) { + if (TransferResultError(cdata, interp, rdata->execResult) != TCL_OK) { goto freeParamTables; } - sdata->columnNames = ResultDescToTcl(rdata->execResult, 0); + sdata->columnNames = ResultDescToTcl(cdata, rdata->execResult, 0); Tcl_IncrRefCount(sdata->columnNames); status = TCL_OK; /* Clean up allocated memory */ @@ -2898,17 +3002,19 @@ ckfree(paramValues[i]); } if (paramTempObjs[i] != NULL) { Tcl_DecrRefCount(paramTempObjs[i]); } + Tcl_DStringFree(¶mTempDs[i]); } ckfree(paramValues); ckfree(paramLengths); ckfree(paramFormats); ckfree(paramNeedsFreeing); ckfree(paramTempObjs); + ckfree(paramTempDs); return status; } @@ -3007,11 +3113,11 @@ Tcl_Obj* colName; /* Name of the current column */ Tcl_Obj* resultRow; /* Row of the result set under construction */ int status = TCL_ERROR; /* Status return from this command */ - char * buffer; /* buffer containing field value */ + char* buffer; /* buffer containing field value */ int buffSize; /* size of buffer containing field value */ int i; if (objc != 3) { Tcl_WrongNumArgs(interp, 2, objv, "varName"); @@ -3045,16 +3151,16 @@ /* * Postgres returns backslash-escape sequences for * binary data. Substitute them away. */ Tcl_Obj* toSubst; - toSubst = Tcl_NewStringObj(buffer, buffSize); + toSubst = StringObjFromExternal(cdata, buffer, buffSize); Tcl_IncrRefCount(toSubst); colObj = Tcl_SubstObj(interp, toSubst, TCL_SUBST_BACKSLASHES); Tcl_DecrRefCount(toSubst); } else { - colObj = Tcl_NewStringObj((char*)buffer, buffSize); + colObj = StringObjFromExternal(cdata, (char*)buffer, buffSize); } } if (lists) { if (colObj == NULL) { @@ -3178,11 +3284,11 @@ Tcl_Interp* interp, /* Tcl interpreter */ Tcl_ObjectContext context, /* Object context */ int objc, /* Parameter count */ Tcl_Obj *const objv[] /* Parameter vector */ ) { - char * nTuples; + char* nTuples; Tcl_Object thisObject = Tcl_ObjectContextObject(context); /* The current result set object */ ResultSetData* rdata = (ResultSetData*) Tcl_ObjectGetMetadata(thisObject, &resultSetDataType); /* Data pertaining to the current result set */ @@ -3200,11 +3306,11 @@ nTuples = PQcmdTuples(rdata->execResult); if (strlen(nTuples) == 0) { Tcl_SetObjResult(interp, literals[LIT_0]); } else { Tcl_SetObjResult(interp, - Tcl_NewStringObj(nTuples, -1)); + StringObjFromExternal(cdata, nTuples, -1)); } return TCL_OK; } /* Index: jni/tcl/tcl-config.mk ================================================================== --- jni/tcl/tcl-config.mk +++ jni/tcl/tcl-config.mk @@ -51,11 +51,11 @@ -DMP_FIXED_CUTOFFS=1 \ -DMP_NO_STDINT=1 \ -DTCL_TOMMATH=1 \ -D_REENTRANT=1 \ -D_THREADSAFE=1 \ - -DTCL_UTF_MAX=6 \ + -DTCL_UTF_MAX=4 \ -DTCL_THREADS=1 \ -DUSE_THREAD_ALLOC=1 \ -DTCL_CFGVAL_ENCODING="\"utf-8\"" \ -DTCL_UNLOAD_DLLS=1 \ -DTCL_CFG_OPTIMIZED=1 \ Index: jni/tcl/tests/cmdIL.test ================================================================== --- jni/tcl/tests/cmdIL.test +++ jni/tcl/tests/cmdIL.test @@ -17,11 +17,10 @@ catch [list package require -exact Tcltest [info patchlevel]] # Used for constraining memory leak tests testConstraint memory [llength [info commands memory]] testConstraint testobj [llength [info commands testobj]] -testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] test cmdIL-1.1 {Tcl_LsortObjCmd procedure} -returnCodes error -body { lsort } -result {wrong # args: should be "lsort ?-option value ...? list"} test cmdIL-1.2 {Tcl_LsortObjCmd procedure} -returnCodes error -body { @@ -503,29 +502,29 @@ rename test_lsort "" } test cmdIL-5.6 {lsort with multiple list-style index options} { lsort -index {1 2 3} -index 0 {{a b} {c d} {b e}} } {{a b} {b e} {c d}} -test cmdIL-5.7 {lsort unicode beyond U+FFFF} fullutf { +test cmdIL-5.7 {lsort unicode beyond U+FFFF} { lsort {\uD83D\uDE03 \uD83D\uDE02 \uD83D\uDE04} } "\uD83D\uDE02 \uD83D\uDE03 \uD83D\udE04" -test cmdIL-5.7 {lsort unicode beyond U+FFFF} fullutf { +test cmdIL-5.7 {lsort unicode beyond U+FFFF} { lsort -decreasing {\uD83D\uDE03 \uD83D\uDE02 \uD83D\uDE04} } "\uD83D\uDE04 \uD83D\uDE03 \uD83D\udE02" -test cmdIL-5.8 {lsort unicode beyond U+FFFF} fullutf { +test cmdIL-5.8 {lsort unicode beyond U+FFFF} { lsort -nocase {\U0001F603 \U0001F602 \U0001F604} } "\U0001F602 \U0001F603 \U0001F604" -test cmdIL-5.9 {lsort unicode beyond U+FFFF} fullutf { +test cmdIL-5.9 {lsort unicode beyond U+FFFF} { lsort -dictionary {\U0001F603x1 \U0001F602y1 \U0001F602y \U0001F603xx} } "\U0001F602y \U0001F602y1 \U0001F603x1 \U0001F603xx" -test cmdIL-5.9 {lsort unicode beyond U+FFFF} fullutf { +test cmdIL-5.9 {lsort unicode beyond U+FFFF} { lsort -dictionary {b\U0001F60320 c\U0001F60230 c\U0001F6023x b\U0001F6032} } "b\U0001F6032 b\U0001F60320 c\U0001F6023x c\U0001F60230" -test cmdIL-5.10 {lsort unicode beyond U+FFFF} fullutf { +test cmdIL-5.10 {lsort unicode beyond U+FFFF} { lsort -nocase {b\U00010428a B\U00010400C} } "b\U00010428a B\U00010400C" -test cmdIL-5.11 {lsort unicode beyond U+FFFF} fullutf { +test cmdIL-5.11 {lsort unicode beyond U+FFFF} { lsort -dictionary -nocase {b\U00010428a B\U00010400C} } "b\U00010428a B\U00010400C" # Compiled version test cmdIL-6.1 {lassign command syntax} -returnCodes error -body { Index: jni/tcl/tests/encoding.test ================================================================== --- jni/tcl/tests/encoding.test +++ jni/tcl/tests/encoding.test @@ -35,12 +35,11 @@ # Some tests require the testencoding command testConstraint testencoding [llength [info commands testencoding]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint teststringbytes [llength [info commands teststringbytes]] -testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] -testConstraint not389 [expr {[string length \U010000] == 1}] +testConstraint ucs4 [expr {[string length \U010000] == 1}] testConstraint exec [llength [info commands exec]] testConstraint testgetdefenc [llength [info commands testgetdefenc]] # TclInitEncodingSubsystem is tested by the rest of this file # TclFinalizeEncodingSubsystem is not currently tested @@ -305,15 +304,16 @@ test encoding-12.5 {LoadTableEncoding: symbol encoding} { set x [encoding convertto symbol \u3b3] append x [encoding convertto symbol \u67] append x [encoding convertfrom symbol \x67] } "\x67\x67\u3b3" -test encoding-12.6 {LoadTableEncoding: overflow in char value} { - fullutf not389 -} { +test encoding-12.6.1 {LoadTableEncoding: overflow in char value} ucs4 { encoding convertto iso8859-3 \U010000 } "?" +test encoding-12.6.2 {LoadTableEncoding: overflow in char value} !ucs4 { + encoding convertto iso8859-3 \U010000 +} "??" test encoding-13.1 {LoadEscapeTable} { viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]] } [viewable "ab\x1b\$B8C\x1b\$\(DD%\x1b(Bg"] @@ -331,104 +331,80 @@ test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { set y [encoding convertfrom utf-8 [encoding convertto utf-8 \u0000]] binary scan [teststringbytes $y] H* z set z } c080 -test encoding-15.4 {UtfToUtfProc emoji character input} -constraints { - fullutf -} -body { +test encoding-15.4 {UtfToUtfProc emoji character input} { + set x \xed\xa0\xbd\xed\xb8\x82 + set y [encoding convertfrom utf-8 \xed\xa0\xbd\xed\xb8\x82] + list [string length $x] $y +} "6 \uD83D\uDE02" +test encoding-15.5 {UtfToUtfProc emoji character input} { set x \xf0\x9f\x98\x82 set y [encoding convertfrom utf-8 \xf0\x9f\x98\x82] list [string length $x] $y -} -result "4 \uD83D\uDE02" -test encoding-15.5 {UtfToUtfProc emoji character input} -constraints { - fullutf -} -body { - set x \xF0\x9F\x98\x82 - set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] - list [string length $x] $y -} -result "4 \uD83D\uDE02" -test encoding-15.6 {UtfToUtfProc emoji character output} -constraints { - fullutf not389 -} -body { +} "4 \uD83D\uDE02" +test encoding-15.6 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uDE02\uD83D set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z -} -result {10 edb882f09f9882eda0bd} -test encoding-15.7 {UtfToUtfProc emoji character output} -constraints { - fullutf not389 -} -body { +} {10 edb882f09f9882eda0bd} +test encoding-15.7 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uD83D set y [encoding convertto utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z -} -result {3 9 edb882eda0bdeda0bd} -test encoding-15.8 {UtfToUtfProc emoji character output} -constraints { - fullutf not389 -} -body { +} {3 9 edb882eda0bdeda0bd} +test encoding-15.8 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\xE9 set y [encoding convertto utf-8 \uDE02\uD83D\xE9] binary scan $y H* z list [string length $x] [string length $y] $z -} -result {3 8 edb882eda0bdc3a9} -test encoding-15.9 {UtfToUtfProc emoji character output} -constraints { - fullutf not389 -} -body { +} {3 8 edb882eda0bdc3a9} +test encoding-15.9 {UtfToUtfProc emoji character output} { set x \uDE02\uD83DX set y [encoding convertto utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z -} -result {3 7 edb882eda0bd58} -test encoding-15.10 {UtfToUtfProc high surrogate character output} -constraints { - fullutf not389 -} -body { +} {3 7 edb882eda0bd58} +test encoding-15.10 {UtfToUtfProc high surrogate character output} { set x \uDE02\xE9 set y [encoding convertto utf-8 \uDE02\xE9] binary scan $y H* z list [string length $x] [string length $y] $z -} -result {2 5 edb882c3a9} -test encoding-15.11 {UtfToUtfProc low surrogate character output} -constraints { - fullutf not389 -} -body { +} {2 5 edb882c3a9} +test encoding-15.11 {UtfToUtfProc low surrogate character output} { set x \uDA02\xE9 set y [encoding convertto utf-8 \uDA02\xE9] binary scan $y H* z list [string length $x] [string length $y] $z -} -result {2 5 eda882c3a9} -test encoding-15.12 {UtfToUtfProc high surrogate character output} -constraints { - fullutf not389 -} -body { +} {2 5 eda882c3a9} +test encoding-15.12 {UtfToUtfProc high surrogate character output} { set x \uDE02Y set y [encoding convertto utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z -} -result {2 4 edb88259} -test encoding-15.13 {UtfToUtfProc low surrogate character output} -constraints { - fullutf not389 -} -body { +} {2 4 edb88259} +test encoding-15.13 {UtfToUtfProc low surrogate character output} { set x \uDA02Y set y [encoding convertto utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z -} -result {2 4 eda88259} -test encoding-15.14 {UtfToUtfProc high surrogate character output} -constraints { - fullutf not389 -} -body { +} {2 4 eda88259} +test encoding-15.14 {UtfToUtfProc high surrogate character output} { set x \uDE02 set y [encoding convertto utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z -} -result {1 3 edb882} -test encoding-15.15 {UtfToUtfProc low surrogate character output} -constraints { - fullutf not389 -} -body { +} {1 3 edb882} +test encoding-15.15 {UtfToUtfProc low surrogate character output} { set x \uDA02 set y [encoding convertto utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z -} -result {1 3 eda882} +} {1 3 eda882} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" @@ -435,20 +411,25 @@ test encoding-16.1 {UnicodeToUtfProc} { set val [encoding convertfrom unicode NN] list $val [format %x [scan $val %c]] } "\u4e4e 4e4e" -test encoding-16.2 {UnicodeToUtfProc} -constraints fullutf -body { +test encoding-16.2 {UnicodeToUtfProc} -body { set val [encoding convertfrom unicode "\xD8\xD8\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\U460DC 460dc" test encoding-16.3 {UnicodeToUtfProc} -body { set val [encoding convertfrom unicode "\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\uDCDC dcdc" +test encoding-16.3 {UnicodeToUtfProc} -body { + set val [encoding convertfrom unicode \ + [testbytestring "\xdc\xdc\xd8\xd8\xee\xee"]] + list "\udcdc\ud8d8\ueeee" [format %x [scan $val %c]] +} -result [list "\udcdc\ud8d8\ueeee" dcdc] -test encoding-17.1 {UtfToUnicodeProc} -constraints fullutf -body { +test encoding-17.1 {UtfToUnicodeProc} -body { encoding convertto unicode "\U460DC" } -result "\xD8\xD8\xDC\xDC" test encoding-17.2 {UtfToUnicodeProc} -body { encoding convertto unicode "\uDCDC" } -result "\xDC\xDC" Index: jni/tcl/tests/scan.test ================================================================== --- jni/tcl/tests/scan.test +++ jni/tcl/tests/scan.test @@ -85,11 +85,12 @@ } testConstraint ieeeFloatingPoint [testIEEE] testConstraint wideIs64bit \ [expr {(wide(0x80000000) > 0) && (wide(0x8000000000000000) < 0)}] -testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] +testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD" && \ + [string length \U010000] == 1}] test scan-1.1 {BuildCharSet, CharInSet} { list [scan foo {%[^o]} x] $x } {1 f} test scan-1.2 {BuildCharSet, CharInSet} { Index: jni/tcl/tests/split.test ================================================================== --- jni/tcl/tests/split.test +++ jni/tcl/tests/split.test @@ -13,11 +13,10 @@ if {"::tcltest" ni [namespace children]} { package require tcltest 2.5 namespace import -force ::tcltest::* } -testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] test split-1.1 {basic split commands} { split "a\n b\t\r c\n " } {a {} b {} {} c {} {}} test split-1.2 {basic split commands} { @@ -69,23 +68,23 @@ split "12,34,56," {,} } {12 34 56 {}} test split-1.14 {basic split commands} { split ",12,,,34,56," {,} } {{} 12 {} {} 34 56 {}} -test split-1.15 {basic split commands} fullutf { +test split-1.15 {basic split commands} { split "a\U0001F4A9b" {} } "a \U0001F4A9 b" -test split-1.16 {basic split commands} fullutf { +test split-1.16 {basic split commands} { split "\uD83D\uDE02Hello\uD83D\uDE02World\uD83D\uDE02" \U0001F602 } {{} Hello World {}} -test split-1.17 {basic split commands} fullutf { +test split-1.17 {basic split commands} { split "\U0001F602Hello\U0001F602World\U0001F602" \uD83D\uDE02 } {{} Hello World {}} -test split-1.18 {basic split commands} fullutf { +test split-1.18 {basic split commands} { split "\U0001F602\U0001F602\U0001F602" \uD83D\uDE02 } {{} {} {} {}} -test split-1.19 {basic split commands} fullutf { +test split-1.19 {basic split commands} { proc foo args { tailcall split {*}$args } foo "\U0001F602Hello\U0001F602World\U0001F602" \U0001F602 } {{} Hello World {}} Index: jni/tcl/tests/string.test ================================================================== --- jni/tcl/tests/string.test +++ jni/tcl/tests/string.test @@ -23,13 +23,11 @@ # Some tests require the testobj command testConstraint testobj [expr {[info commands testobj] != {}}] testConstraint testindexobj [expr {[info commands testindexobj] != {}}] testConstraint testevalex [expr {[info commands testevalex] != {}}] -testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] -testConstraint tip389 [expr {[string length \U010000] == 2}] -testConstraint ucs4 [expr {[string length \U010000] == 1 && [format %c 0x010000] ne "\uFFFD"}] +testConstraint wtf8 [expr {[string length \U00010000] != 1}] testConstraint testbytestring [expr {[info commands testbytestring] != {}}] # Used for constraining memory leak tests testConstraint memory [llength [info commands memory]] @@ -1329,13 +1327,16 @@ test string-12.22.$noComp {string range, shimmering binary/index} { set s 0000000001 binary scan $s a* x run {string range $s $s end} } 000000001 -test string-12.23.$noComp {string range, surrogates, bug [11ae2be95dac9417]} tip389 { +test string-12.23.$noComp {string range, surrogates, bug [11ae2be95dac9417]} wtf8 { run {list [string range a\U00100000b 1 1] [string range a\U00100000b 2 2] [string range a\U00100000b 3 3]} } [list \U00100000 {} b] +test string-12.23.$noComp {string range, surrogates, bug [11ae2be95dac9417]} !wtf8 { + run {list [string range a\U00100000b 1 1] [string range a\U00100000b 2 2] [string range a\U00100000b 3 3]} +} [list \U00100000 b {}] test string-13.1.$noComp {string repeat} { list [catch {run {string repeat}} msg] $msg } {1 {wrong # args: should be "string repeat string count"}} test string-13.2.$noComp {string repeat} { @@ -1663,19 +1664,19 @@ run {string wordend "xyz\u2045de fg" 0} } -result 3 test string-21.14.$noComp {string wordend, unicode} -body { run {string wordend "\uC700\uC700 abc" 8} } -result 6 -test string-21.15.$noComp {string trim, unicode} fullutf { +test string-21.15.$noComp {string trim, unicode} -body { run {string trim "\U1F602Hello world!\U1F602" \U1F602} -} "Hello world!" -test string-21.16.$noComp {string trimleft, unicode} fullutf { +} -result "Hello world!" +test string-21.16.$noComp {string trimleft, unicode} -body { run {string trimleft "\U1F602Hello world!\U1F602" \U1F602} -} "Hello world!\U1F602" -test string-21.17.$noComp {string trimright, unicode} fullutf { +} -result "Hello world!\U1F602" +test string-21.17.$noComp {string trimright, unicode} -body { run {string trimright "\U1F602Hello world!\U1F602" \U1F602} -} "\U1F602Hello world!" +} -result "\U1F602Hello world!" test string-22.1.$noComp {string wordstart} -body { list [catch {run {string word a}} msg] $msg } -result {1 {unknown or ambiguous subcommand "word": must be bytelength, cat, compare, equal, first, index, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}} test string-22.2.$noComp {string wordstart} -body { @@ -1717,23 +1718,23 @@ test string-22.14.$noComp {string wordstart, invalid UTF-8} -constraints testbytestring -body { # See Bug c61818e4c9 set demo [testbytestring "abc def\xE0\xA9ghi"] run {string index $demo [string wordstart $demo 10]} } -result g -test string-22.15.$noComp {string wordstart, unicode} tip389 { +test string-22.15.$noComp {string wordstart, unicode} wtf8 { run {string wordstart "ab\uD83D\uDCA3 cdef ghi" 12} } 10 -test string-22.16.$noComp {string wordstart, unicode} tip389 { +test string-22.16.$noComp {string wordstart, unicode} wtf8 { run {string wordstart "ab\uD83D\uDCA3 cdef ghi" 3} -} 3 -test string-22.17.$noComp {string wordstart, unicode} fullutf { +} 2 +test string-22.17.$noComp {string wordstart, unicode} !wtf8 { run {string wordstart "ab\uD83D\uDCA3 cdef ghi" 2} } 2 -test string-22.18.$noComp {string wordstart, unicode} tip389 { +test string-22.18.$noComp {string wordstart, unicode} wtf8 { run {string wordstart "ab\uD83D\uDCA3cdef" 4} } 4 -test string-22.19.$noComp {string wordstart, unicode} ucs4 { +test string-22.19.$noComp {string wordstart, unicode} !wtf8 { run {string wordstart "ab\uD83D\uDCA3cdef" 3} } 3 test string-23.0.$noComp {string is boolean, Bug 1187123} testindexobj { set x 5 @@ -1843,23 +1844,23 @@ } 030201 test string-24.15.$noComp {string reverse command - pure bytearray} { binary scan [run {tcl::string::reverse [binary format H* 010203]}] H* x set x } 030201 -test string-24.16.$noComp {string reverse command - fullutf} fullutf { +test string-24.16.$noComp {string reverse command - surrogates} { run {string reverse \u0444bulb\ud83d\ude02} } \ud83d\ude02blub\u0444 -test string-24.17.$noComp {string reverse command - fullutf} fullutf { +test string-24.17.$noComp {string reverse command - surrogates} { run {string reverse \ud83d\ude02hello\ud83d\ude02} } \ud83d\ude02olleh\ud83d\ude02 -test string-24.18.$noComp {string reverse command - ucs4} ucs4 { +test string-24.18.$noComp {string reverse command - surrogates} { set s \u0444bulb\ud83d\ude02 # shim shimmery ... string index $s 0 run {string reverse $s} } \ud83d\ude02blub\u0444 -test string-24.19.$noComp {string reverse command - ucs4} ucs4 { +test string-24.19.$noComp {string reverse command - surrogates} { set s \ud83d\ude02hello\ud83d\ude02 # shim shimmery ... string index $s 0 run {string reverse $s} } \ud83d\ude02olleh\ud83d\ude02 Index: jni/tcl/tests/stringComp.test ================================================================== --- jni/tcl/tests/stringComp.test +++ jni/tcl/tests/stringComp.test @@ -24,11 +24,10 @@ catch [list package require -exact Tcltest [info patchlevel]] # Some tests require the testobj command testConstraint testobj [expr {[info commands testobj] != {}}] -testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint memory [llength [info commands memory]] if {[testConstraint memory]} { proc getbytes {} { set lines [split [memory info] \n] return [lindex $lines 3 3] @@ -226,34 +225,28 @@ string compare \uD83D\uDE00 \uD83D\uDE01\U0001F600\U0001F601 } -1 {} } { if {$tname eq ""} { continue } if {$tcode eq ""} { set tcode ok } - # not nice but... - if {$tname in {{unicode corner cases} {unicode beyond U+FFFF}}} { - set cnstr fullutf - } else { - set cnstr {} - } test stringComp-2.[incr i] "string compare, $tname" \ -body [list eval $tbody] \ - -returnCodes $tcode -result $tresult -constraints $cnstr + -returnCodes $tcode -result $tresult test stringComp-2.[incr i] "string compare bc, $tname" \ -body "[list proc foo {} $tbody];foo" \ - -returnCodes $tcode -result $tresult -constraints $cnstr + -returnCodes $tcode -result $tresult if {"error" ni $tcode} { set tresult [expr {!$tresult}] } else { set tresult [string map {compare equal} $tresult] } set tbody [string map {compare equal} $tbody] test stringComp-2.[incr i] "string equal, $tname" \ -body [list eval $tbody] \ - -returnCodes $tcode -result $tresult -constraints $cnstr + -returnCodes $tcode -result $tresult test stringComp-2.[incr i] "string equal bc, $tname" \ -body "[list proc foo {} $tbody];foo" \ - -returnCodes $tcode -result $tresult -constraints $cnstr + -returnCodes $tcode -result $tresult } # need a few extra tests short abbr cmd test stringComp-3.1 {string compare, shortest method name} { proc foo {} {string co abcde ABCDE} @@ -735,20 +728,20 @@ [string match *a*l*\u0000*cba* $longString] \ [string match *===* $longString] } foo } {0 1 1 1 0 0} -test stringComp-11.55 {string match, unicode} fullutf { +test stringComp-11.55 {string match, unicode} { string match *\U1F602* Hello\U1F602World } 1 -test stringComp-11.56 {string match, unicode} fullutf { +test stringComp-11.56 {string match, unicode} { string match *\[\U1F602\]* Hello\U1F602World } 1 -test stringComp-11.57 {string match, unicode} fullutf { +test stringComp-11.57 {string match, unicode} { string match *\[\U1F602-\U1F604\]* Hello\U1F603World } 1 -test stringComp-11.58 {string match, unicode} fullutf { +test stringComp-11.58 {string match, unicode} { proc foo {p s} { return [string match $p $s] } list \ [foo *\[\U1F602-\U1F604\]* Hello\uD83D\uDE03World] \ Index: jni/tcl/tests/utf.test ================================================================== --- jni/tcl/tests/utf.test +++ jni/tcl/tests/utf.test @@ -16,13 +16,12 @@ namespace path ::tcl::mathop ::tcltest::loadTestedCommands catch [list package require -exact Tcltest [info patchlevel]] -testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] -testConstraint tip389 [expr {[string length [format %c 0x10000]] == 2}] +testConstraint wtf8 [expr {[string length \U010000] eq 2}] testConstraint ucs4 [expr {[testConstraint fullutf] && [string length [format %c 0x10000]] == 1}] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testfindfirst [llength [info commands testfindfirst]] @@ -51,16 +50,16 @@ expr {[format %c 0x110000] eq [testbytestring \xEF\xBF\xBD]} } 1 test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { expr {[format %c -1] eq [testbytestring \xEF\xBF\xBD]} } 1 -test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf testbytestring} { +test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {testbytestring wtf8} { + expr {"\U014E4E" eq [testbytestring "\xED\xA0\x93\xED\xB9\x8E"]} +} 1 +test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {testbytestring ucs4} { expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} } 1 -test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {ucs2 testbytestring} { - expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} -} 0 test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { expr {"\uD842" eq [testbytestring \xED\xA1\x82]} } 1 test utf-1.9 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { expr {"\uDC42" eq [testbytestring \xED\xB1\x82]} @@ -69,27 +68,36 @@ expr {[format %c 0xD842] eq [testbytestring \xED\xA1\x82]} } 1 test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { expr {[format %c 0xDC42] eq [testbytestring \xED\xB1\x82]} } 1 -test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {testbytestring fullutf} { +test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {testbytestring ucs4} { expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]} } 1 test utf-1.13 {Tcl_UniCharToUtf: sequence of high surrogates} testbytestring { expr {"\uD842\uD842\uD842" eq [testbytestring \xED\xA1\x82\xED\xA1\x82\xED\xA1\x82]} } 1 test utf-1.14 {Tcl_UniCharToUtf: sequence of low surrogates} testbytestring { expr {"\uDC42\uDC42\uDC42" eq [testbytestring \xED\xB1\x82\xED\xB1\x82\xED\xB1\x82]} } 1 -test utf-1.15 {Tcl_UniCharToUtf: mix of surrogates} {testbytestring fullutf} { +test utf-1.15.0 {Tcl_UniCharToUtf: mix of surrogates} {testbytestring wtf8} { + expr {"\uDC42\uD842\uDC42" eq [testbytestring \xED\xB1\x82\xED\xA1\x82\xED\xB1\x82]} +} 1 +test utf-1.15.1 {Tcl_UniCharToUtf: mix of surrogates} {testbytestring ucs4} { expr {"\uDC42\uD842\uDC42" eq [testbytestring \xED\xB1\x82\xF0\xA0\xA1\x82]} } 1 string compare "\uDC42\uD842\uDC42" [testbytestring \xED\xB1\x82\xF0\xA0\xA1\x82] -test utf-1.16 {Tcl_UniCharToUtf: mix of surrogates} {testbytestring fullutf} { +test utf-1.16.0 {Tcl_UniCharToUtf: mix of surrogates} {testbytestring wtf8} { + expr {"\uD842\uD842\uDC42" eq [testbytestring \xED\xA1\x82\xED\xA1\x82\xED\xB1\x82]} +} 1 +test utf-1.16.1 {Tcl_UniCharToUtf: mix of surrogates} {testbytestring ucs4} { expr {"\uD842\uD842\uDC42" eq [testbytestring \xED\xA1\x82\xF0\xA0\xA1\x82]} } 1 -test utf-1.17 {Tcl_UniCharToUtf: mix of surrogates} {testbytestring fullutf} { +test utf-1.17.0 {Tcl_UniCharToUtf: mix of surrogates} {testbytestring wtf8} { + expr {"\uD842\uDC42\uD842" eq [testbytestring \xED\xA1\x82\xED\xB1\x82\xED\xA1\x82]} +} 1 +test utf-1.17.1 {Tcl_UniCharToUtf: mix of surrogates} {testbytestring ucs4} { expr {"\uD842\uDC42\uD842" eq [testbytestring \xF0\xA0\xA1\x82\xED\xA1\x82]} } 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" @@ -110,36 +118,30 @@ string length [testbytestring \xE2\xA2] } 2 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { string length [testbytestring \xE4\xB9\x8E] } 1 -test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { +test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring wtf8} { string length [testbytestring \xF0\x90\x80\x80] } 4 test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { string length [testbytestring \xF0\x90\x80\x80] } 1 -test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring tip389} { - string length [testbytestring \xF0\x90\x80\x80] -} 2 -test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { +test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring wtf8} { string length [testbytestring \xF4\x8F\xBF\xBF] } 4 test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} ucs4 { string length \U10FFFF } 1 -test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} tip389 { - string length \uDBFF\uDFFF -} 2 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { string length [testbytestring \xF0\x8F\xBF\xBF] } 4 -test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { +test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring wtf8} { # Would decode to U+110000 but that is outside the Unicode range. string length [testbytestring \xF4\x90\x80\x80] } 4 -test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { +test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} {testbytestring wtf8} { string length [testbytestring \xF8\xA2\xA2\xA2\xA2] } 5 test utf-3.1 {Tcl_UtfCharComplete} { } {} @@ -173,46 +175,40 @@ testnumutfchars [testbytestring \xE2\x82\xAC] end-1 } 2 test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} { testnumutfchars [testbytestring \x00] end+1 } 2 -test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { +test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} { + testnumutfchars testbytestring ucs4} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end-1 } 3 -test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { +test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} { + testnumutfchars testbytestring wtf8} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 4 -test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { +test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} { + testnumutfchars testbytestring ucs4} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 1 -test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { - testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end -} 2 test utf-4.13 {Tcl_NumUtfChars: high surrogates} { string length \uD842\uD842\uD842 } 3 test utf-4.14 {Tcl_NumUtfChars: low surrogates} { string length \uDE42\uDE42\uDE42 } 3 -test utf-4.15.0 {Tcl_NumUtfChars: mixed surrogates} ucs2 { +test utf-4.15.0 {Tcl_NumUtfChars: mixed surrogates} wtf8 { string length \uDE42\uD842\uDE42 } 3 test utf-4.15.1 {Tcl_NumUtfChars: mixed surrogates} ucs4 { string length \uDE42\uD842\uDE42 } 2 -test utf-4.15.2 {Tcl_NumUtfChars: mixed surrogates} tip389 { - string length \uDE42\uD842\uDE42 -} 3 -test utf-4.16.0 {Tcl_NumUtfChars: mixed surrogates} ucs2 { +test utf-4.16.0 {Tcl_NumUtfChars: mixed surrogates} wtf8 { string length \uD842\uDE42\uD842 } 3 test utf-4.16.1 {Tcl_NumUtfChars: mixed surrogates} ucs4 { string length \uD842\uDE42\uD842 } 2 -test utf-4.16.2 {Tcl_NumUtfChars: mixed surrogates} tip389 { - string length \uD842\uDE42\uD842 -} 3 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { testfindfirst [testbytestring abcbc] 98 } bcbc test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { @@ -423,14 +419,14 @@ testutfnext \xE8\xA0\xA0\xF8 } 3 test utf-6.68 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0G } 1 -test utf-6.69.0 {Tcl_UtfNext} {testutfnext ucs2} { +test utf-6.69.0 {Tcl_UtfNext} {testutfnext wtf8} { testutfnext \xF2\xA0\xA0\xA0 } 1 -test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { +test utf-6.69.1 {Tcl_UtfNext} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xD0 } 1 @@ -441,44 +437,44 @@ testutfnext \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xF8 } 1 -test utf-6.74.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0G -} 1 -test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0G -} 4 -test utf-6.75.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0\xA0 -} 1 -test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xA0 -} 4 -test utf-6.76.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0\xD0 -} 1 -test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xD0 -} 4 -test utf-6.77.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0\xE8 -} 1 -test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xE8 -} 4 -test utf-6.78.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0\xF2 -} 1 -test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xF2 -} 4 -test utf-6.79.0 {Tcl_UtfNext} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0G\xF8 -} 1 -test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { +test utf-6.74.0 {Tcl_UtfNext} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0G +} 1 +test utf-6.74.1 {Tcl_UtfNext} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0G +} 4 +test utf-6.75.0 {Tcl_UtfNext} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0\xA0 +} 1 +test utf-6.75.1 {Tcl_UtfNext} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0\xA0 +} 4 +test utf-6.76.0 {Tcl_UtfNext} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0\xD0 +} 1 +test utf-6.76.1 {Tcl_UtfNext} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0\xD0 +} 4 +test utf-6.77.0 {Tcl_UtfNext} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0\xE8 +} 1 +test utf-6.77.1 {Tcl_UtfNext} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0\xE8 +} 4 +test utf-6.78.0 {Tcl_UtfNext} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0\xF2 +} 1 +test utf-6.78.1 {Tcl_UtfNext} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0\xF2 +} 4 +test utf-6.79.0 {Tcl_UtfNext} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0G\xF8 +} 1 +test utf-6.79.1 {Tcl_UtfNext} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xC0\x80 } 2 @@ -498,47 +494,44 @@ testutfnext \xE0\xA0\x80 } 3 test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext ucs2} { +test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext wtf8} { testutfnext \xF0\x90\x80\x80 } 1 -test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { +test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext ucs4} { testutfnext \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { testutfnext \xA0\xA0 } 1 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { testutfnext \x80\x80 } 1 -test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext wtf8} { testutfnext \xF4\x8F\xBF\xBF } 1 -test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { +test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs4} { testutfnext \xF4\x8F\xBF\xBF } 4 -test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs2} { +test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext wtf8} { testutfnext \xF4\x90\x80\x80 } 1 -test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} { +test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs4} { testutfnext \xF4\x90\x80\x80 } 1 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext \xA0\xA0\xA0 } 1 -test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { - testutfnext \x80\x80\x80 -} 1 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf} { +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext wtf8} { testutfnext \x80\x80\x80 } 1 test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { testutfnext \xA0\xA0\xA0\xA0 } 1 -test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext ucs2} { +test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext wtf8} { testutfnext \x80\x80\x80\x80 } 1 test utf-6.96 {Tcl_UtfNext, read limits} testutfnext { testutfnext G 0 } 0 @@ -579,71 +572,83 @@ testutfnext \xE8\xA0\xA0\xA0 2 } 0 test utf-6.109 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xE8\xA0\xA0\xA0 3 } 3 -test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0G 1 -} 1 -test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0G 1 -} 0 -test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0G 2 -} 1 -test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0G 2 -} 0 -test utf-6.112.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0G 3 -} 1 -test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0G 3 -} 0 -test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0G 4 -} 1 -test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { +test utf-6.108.0 {Tcl_UtfNext, read limits} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0G 1 +} 1 +test utf-6.108.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0G 1 +} 0 +test utf-6.109.0 {Tcl_UtfNext, read limits} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0G 2 +} 1 +test utf-6.109.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0G 2 +} 0 +test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0G 3 +} 1 +test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0G 1 +} 0 +test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0G 2 +} 1 +test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0G 2 +} 0 +test utf-6.112.0 {Tcl_UtfNext, read limits} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0G 3 +} 1 +test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0G 3 +} 0 +test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0G 4 +} 1 +test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0G 4 } 4 -test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0\xA0 1 -} 1 -test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xA0 1 -} 0 -test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0\xA0 2 -} 1 -test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xA0 2 -} 0 -test utf-6.116.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0\xA0 3 -} 1 -test utf-6.116.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { - testutfnext \xF2\xA0\xA0\xA0\xA0 3 -} 0 -test utf-6.117.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} { - testutfnext \xF2\xA0\xA0\xA0\xA0 4 -} 1 -test utf-6.117.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} { +test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0\xA0 1 +} 1 +test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0\xA0 1 +} 0 +test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0\xA0 2 +} 1 +test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0\xA0 2 +} 0 +test utf-6.116.0 {Tcl_UtfNext, read limits} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0\xA0 3 +} 1 +test utf-6.116.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { + testutfnext \xF2\xA0\xA0\xA0\xA0 3 +} 0 +test utf-6.117.0 {Tcl_UtfNext, read limits} {testutfnext wtf8} { + testutfnext \xF2\xA0\xA0\xA0\xA0 4 +} 1 +test utf-6.117.1 {Tcl_UtfNext, read limits} {testutfnext ucs4} { testutfnext \xF2\xA0\xA0\xA0\xA0 4 } 4 test utf-6.118 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0G 0 } 0 -test utf-6.119 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +test utf-6.119 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0G 1 } 1 -test utf-6.120 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +test utf-6.120 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0 1 } 1 -test utf-6.121 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +test utf-6.121 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0G 2 } 1 -test utf-6.122 {Tcl_UtfNext, read limits} {testutfnext ucs2} { +test utf-6.122 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0 2 } 1 test utf-6.123 {Tcl_UtfNext, read limits} testutfnext { testutfnext \xA0\xA0\xA0G 3 } 1 @@ -718,26 +723,26 @@ testutfprev A\xF8\xA0\xA0\xA0 3 } 2 test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10.0 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xF2\xA0 -} 2 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF2\xA0 -} 1 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xF2\xA0\xA0\xA0 3 -} 2 -test utf-7.10.3 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF2\xA0\xA0\xA0 3 -} 1 -test utf-7.10.4 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xF2\xA0\xF8\xA0 3 -} 2 -test utf-7.10.5 {Tcl_UtfPrev} {testutfprev fullutf} { +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xF2\xA0 +} 2 +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xF2\xA0 +} 1 +test utf-7.10.1.0 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xF2\xA0\xA0\xA0 3 +} 2 +test utf-7.10.1.1 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xF2\xA0\xA0\xA0 3 +} 1 +test utf-7.10.2.0 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xF2\xA0\xF8\xA0 3 +} 2 +test utf-7.10.2.1 {Tcl_UtfPrev} {testutfprev ucs4} { testutfprev A\xF2\xA0\xF8\xA0 3 } 1 test utf-7.11 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0 } 1 @@ -775,26 +780,26 @@ testutfprev A\xF8\xA0\xA0\xA0 4 } 3 test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15.0 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xF2\xA0\xA0 -} 3 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF2\xA0\xA0 -} 1 -test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xF2\xA0\xA0\xA0 4 -} 3 -test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF2\xA0\xA0\xA0 4 -} 1 -test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xF2\xA0\xA0\xF8 4 -} 3 -test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev fullutf} { +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xF2\xA0\xA0 +} 3 +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xF2\xA0\xA0 +} 1 +test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xF2\xA0\xA0\xA0 4 +} 3 +test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xF2\xA0\xA0\xA0 4 +} 1 +test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xF2\xA0\xA0\xF8 4 +} 3 +test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev ucs4} { testutfprev A\xF2\xA0\xA0\xF8 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xA0 } 1 @@ -811,56 +816,53 @@ testutfprev A\xD0\xA0\xA0\xA0 4 } 3 test utf-7.17.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xD0\xA0\xA0\xF8 4 } 3 -test utf-7.18.0 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xA0\xA0\xA0 -} 1 -test utf-7.18.1 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xA0\xA0\xA0 -} 3 -test utf-7.18.2 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xA0\xA0\xA0\xA0 4 -} 1 -test utf-7.18.3 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xA0\xA0\xA0\xA0 4 -} 3 -test utf-7.18.4 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xA0\xA0\xA0\xF8 4 -} 1 -test utf-7.18.5 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xA0\xA0\xA0\xF8 4 -} 3 -test utf-7.19.0 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xF8\xA0\xA0\xA0 -} 2 -test utf-7.19.1 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF8\xA0\xA0\xA0 -} 4 -test utf-7.20.0 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xF4\xA0\xA0\xA0 -} 2 -test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xF4\xA0\xA0\xA0 -} 4 -test utf-7.21.0 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xE8\xA0\xA0\xA0 -} 2 -test utf-7.21.1 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xE8\xA0\xA0\xA0 -} 4 -test utf-7.22.0 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xD0\xA0\xA0\xA0 -} 2 -test utf-7.22.1 {Tcl_UtfPrev} {testutfprev fullutf} { - testutfprev A\xD0\xA0\xA0\xA0 -} 4 -test utf-7.23.0 {Tcl_UtfPrev} {testutfprev ucs2} { - testutfprev A\xA0\xA0\xA0\xA0 -} 2 -test utf-7.23.1 {Tcl_UtfPrev} {testutfprev fullutf} { +test utf-7.18 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xA0\xA0\xA0 +} 1 +test utf-7.18.1 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xA0\xA0\xA0\xA0 4 +} 1 +test utf-7.18.2 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xA0\xA0\xA0\xF8 4 +} 1 +test utf-7.18.3 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xA0\xA0\xA0 +} 3 +test utf-7.18.4 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xA0\xA0\xA0\xA0 4 +} 3 +test utf-7.18.5 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xA0\xA0\xA0\xF8 4 +} 3 +test utf-7.19 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xF8\xA0\xA0\xA0 +} 2 +test utf-7.19.1 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xF8\xA0\xA0\xA0 +} 4 +test utf-7.20.1 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xF4\xA0\xA0\xA0 +} 4 +test utf-7.21 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xE8\xA0\xA0\xA0 +} 2 +test utf-7.21.1 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xE8\xA0\xA0\xA0 +} 4 +test utf-7.22 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xD0\xA0\xA0\xA0 +} 2 +test utf-7.22.1 {Tcl_UtfPrev} {testutfprev ucs4} { + testutfprev A\xD0\xA0\xA0\xA0 +} 4 +test utf-7.23 {Tcl_UtfPrev} {testutfprev wtf8} { + testutfprev A\xA0\xA0\xA0\xA0 +} 2 +test utf-7.23.1 {Tcl_UtfPrev} {testutfprev ucs4} { testutfprev A\xA0\xA0\xA0\xA0 } 4 test utf-7.24 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xC0\x81 } 2 @@ -880,14 +882,14 @@ testutfprev A\xE0 } 1 test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\x80\x80 2 } 1 -test utf-7.29.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { +test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev wtf8} { testutfprev A\xF0\x80\x80\x80 } 2 -test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { +test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { testutfprev A\xF0\x80\x80\x80 } 4 test utf-7.30 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xF0\x80\x80\x80 4 } 3 @@ -913,26 +915,26 @@ testutfprev A\xE0\xA0\x80 3 } 1 test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { +test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev wtf8} { testutfprev A\xF0\x90\x80\x80 } 2 -test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { +test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { testutfprev A\xF0\x90\x80\x80 } 1 -test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev wtf8} { testutfprev A\xF0\x90\x80\x80 4 } 3 -test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { testutfprev A\xF0\x90\x80\x80 4 } 1 -test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs2} { +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev wtf8} { testutfprev A\xF0\x90\x80\x80 3 } 2 -test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev ucs4} { testutfprev A\xF0\x90\x80\x80 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xF0\x90\x80\x80 2 } 1 @@ -943,14 +945,14 @@ testutfprev \xA0\xA0 } 1 test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { testutfprev \xA0\xA0\xA0 } 2 -test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev ucs2} { +test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev wtf8} { testutfprev \xA0\xA0\xA0\xA0 } 1 -test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev fullutf} { +test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev ucs4} { testutfprev \xA0\xA0\xA0\xA0 } 3 test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev { testutfprev \xE8\xA0 } 0 @@ -958,47 +960,47 @@ testutfprev \xE8\xA0\xA0 2 } 0 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} testutfprev { testutfprev \xE8\xA0\x00 2 } 0 -test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { - testutfprev A\xF4\x8F\xBF\xBF -} 2 -test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { - testutfprev A\xF4\x8F\xBF\xBF -} 1 -test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { - testutfprev A\xF4\x8F\xBF\xBF 4 -} 3 -test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { - testutfprev A\xF4\x8F\xBF\xBF 4 -} 1 -test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { - testutfprev A\xF4\x8F\xBF\xBF 3 -} 2 -test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { - testutfprev A\xF4\x8F\xBF\xBF 3 -} 1 -test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { - testutfprev A\xF4\x8F\xBF\xBF 2 -} 1 -test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { - testutfprev A\xF4\x90\x80\x80 -} 2 -test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { - testutfprev A\xF4\x90\x80\x80 -} 4 -test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { - testutfprev A\xF4\x90\x80\x80 4 -} 3 -test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { - testutfprev A\xF4\x90\x80\x80 4 -} 3 -test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs2} { - testutfprev A\xF4\x90\x80\x80 3 -} 2 -test utf-7.49.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev fullutf} { +test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev wtf8} { + testutfprev A\xF4\x8F\xBF\xBF +} 2 +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { + testutfprev A\xF4\x8F\xBF\xBF +} 1 +test utf-7.48.1.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev wtf8} { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 3 +test utf-7.48.1.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { + testutfprev A\xF4\x8F\xBF\xBF 4 +} 1 +test utf-7.48.2.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev wtf8} { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 2 +test utf-7.48.2.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { + testutfprev A\xF4\x8F\xBF\xBF 3 +} 1 +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { + testutfprev A\xF4\x8F\xBF\xBF 2 +} 1 +test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev wtf8} { + testutfprev A\xF4\x90\x80\x80 +} 2 +test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { + testutfprev A\xF4\x90\x80\x80 +} 4 +test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev wtf8} { + testutfprev A\xF4\x90\x80\x80 4 +} 3 +test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { + testutfprev A\xF4\x90\x80\x80 4 +} 3 +test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev wtf8} { + testutfprev A\xF4\x90\x80\x80 3 +} 2 +test utf-7.49.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev ucs4} { testutfprev A\xF4\x90\x80\x80 3 } 2 test utf-7.49.6 {Tcl_UtfPrev, validity check [493dccc2de]} testutfprev { testutfprev A\xF4\x90\x80\x80 2 } 1 @@ -1006,144 +1008,84 @@ test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 } a test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { string index \u4E4E\u25A 0 -} "\u4E4E" +} \u4E4E test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { string index abcd 2 } c test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 -} "\uFF" -test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { +} \uFF +test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} wtf8 { string index \uD842 0 } "\uD842" test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { string index \uD842 0 } "\uD842" -test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} tip389 { - string index \uD842 0 -} "\uD842" test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { string index \uDC42 0 } "\uDC42" -test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { +test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} wtf8 { string index \uD83D\uDE00G 0 } "\uD83D" test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 0 } "\U1F600" -test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} tip389 { - string index \uD83D\uDE00G 0 -} "\uD83D" -test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { +test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} wtf8 { string index \uD83D\uDE00G 1 } "\uDE00" test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 1 } G -test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} tip389 { - string index \uD83D\uDE00G 1 -} "\uDE00" -test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { +test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} wtf8 { string index \uD83D\uDE00G 2 } G test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 2 } {} -test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} tip389 { - string index \uD83D\uDE00G 2 -} G -test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { +test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} wtf8 { string index \U1F600G 0 -} "\uFFFD" +} "\uD83D" test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \U1F600G 0 } "\U1F600" -test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} tip389 { - string index \U1F600G 0 -} "\uD83D" -test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { +test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} wtf8 { string index \U1F600G 1 -} G +} "\uDE00" test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \U1F600G 1 } G -test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} tip389 { - string index \U1F600G 1 -} "\uDE00" -test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { +test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} wtf8 { string index \U1F600G 2 -} {} +} G test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \U1F600G 2 } {} -test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} tip389 { - string index \U1F600G 2 -} G test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 } abc test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 -} "\u25A\xFF\u543kl" -test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { - string range \uD83D\uDE00G 0 0 -} "\uD83D" -test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { - string range \uD83D\uDE00G 0 0 -} "\U1F600" -test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} tip389 { - string range \uD83D\uDE00G 0 0 -} "\U1F600" -test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { - string range \uD83D\uDE00G 1 1 -} "\uDE00" -test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { - string range \uD83D\uDE00G 1 1 -} "G" -test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { - string range \uD83D\uDE00G 1 1 -} {} -test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { - string range \uD83D\uDE00G 2 2 -} G -test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { - string range \uD83D\uDE00G 2 2 -} {} -test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { - string range \uD83D\uDE00G 2 2 -} G -test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { - string range \U1f600G 0 0 -} "\uFFFD" -test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { - string range \U1f600G 0 0 -} "\U1F600" -test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} tip389 { - string range \U1f600G 0 0 -} "\U1F600" -test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { - string range \U1f600G 1 1 -} G -test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { - string range \U1f600G 1 1 -} "G" -test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { - string range \U1f600G 1 1 -} {} -test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { - string range \U1f600G 2 2 -} {} -test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { - string range \U1f600G 2 2 -} {} -test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { - string range \U1f600G 2 2 -} G +} \u25A\xFF\u543kl +test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} wtf8 { + string range \uD83D\uDE00G 0 0 +} \U1F600 +test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { + string range \uD83D\uDE00G 0 0 +} \U1F600 +test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} wtf8 { + string range \uD83D\uDE00G 1 1 +} {} +test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} wtf8 { + string range \uD83D\uDE00G 2 2 +} G +test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { + string range \uD83D\uDE00G 1 1 +} G test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { set x \n } { } @@ -1157,11 +1099,16 @@ expr {"\u4E2k" eq "[testbytestring \xD3\xA2]k"} } 1 test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring { expr {"\u4E216" eq "[testbytestring \xE4\xB8\xA1]6"} } 1 -test utf-10.6 {Tcl_UtfBackslash: stops after 8 hex chars} {fullutf testbytestring} { +test utf-10.6.0 {Tcl_UtfBackslash: stops after 8 hex chars} { + testbytestring wtf8} { + expr {"\U0001E2165" eq "[testbytestring \xED\xA0\xB8\xED\xB8\x96]5"} +} 1 +test utf-10.6.1 {Tcl_UtfBackslash: stops after 8 hex chars} { + testbytestring ucs4} { expr {"\U0001E2165" eq "[testbytestring \xF0\x9E\x88\x96]5"} } 1 proc bsCheck {char num {constraints {}}} { global errNum @@ -1241,20 +1188,17 @@ string toupper \u01E3AB } \u01E2AB test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { string toupper \u10D0\u1C90 } \u1C90\u1C90 -test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} fullutf { +test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} { string toupper \U10428 } \U10400 -test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} fullutf { +test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} { string toupper \uD801\uDC28 } \uD801\uDC00 -test utf-11.8.0 {Tcl_UtfToUpper low/high surrogate)} ucs2 { - string toupper \uDC24\uD824 -} \uDC24\uD824 -test utf-11.8.1 {Tcl_UtfToUpper low/high surrogate)} fullutf { +test utf-11.8 {Tcl_UtfToUpper low/high surrogate)} { string toupper \uDC24\uD824 } \uDC24\uD824 test utf-12.1 {Tcl_UtfToLower} { string tolower {} @@ -1269,17 +1213,17 @@ string tolower \u01E2AB } \u01E3ab test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10D0\u1C90 } \u10D0\u10D0 -test utf-12.6 {Tcl_UtfToLower low/high surrogate)} { +test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} { string tolower \uDC24\uD824 } \uDC24\uD824 -test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} fullutf { +test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} { string tolower \U10400 } \U10428 -test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} fullutf { +test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} { string tolower \uD801\uDC00 } \uD801\uDC28 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -1300,14 +1244,14 @@ string totitle \u1C90\u10D0 } \u1C90\u10D0 test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { string totitle \uDC24\uD824 } \uDC24\uD824 -test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} fullutf { +test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} { string totitle \U10428 } \U10400 -test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} fullutf { +test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} { string totitle \uD801\uDC28\uD801\uDC00 } \uD801\uDC00\uD801\uDC28 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b @@ -1363,17 +1307,17 @@ list [regexp \\d abc456def foo] $foo } -cleanup { unset -nocomplain foo } -result {1 4} -test utf-20.1 {TclUniCharNcmp} {tip389 knownBug} { +test utf-20.1 {TclUniCharNcmp} {wtf8 knownBug} { string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0] } -1 test utf-20.2 {TclUniCharNcmp} ucs4 { string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0] } -1 -test utf-20.3 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} {ucs2 knownBug} { +test utf-20.3 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} {wtf8 knownBug} { set one [format %c 0xFFFF] set two [format %c 0x10000] set first [string compare $one $two] string range $one 0 0 string range $two 0 0 @@ -1443,11 +1387,11 @@ string wordend "xyz123_bar fg" 0 } 10 test utf-22.2 {Tcl_UniCharIsWordChar} { string wordend "x\u5080z123_bar\u203C fg" 0 } 10 -test utf-22.3 {Tcl_UniCharIsWordChar} fullutf { +test utf-22.3 {Tcl_UniCharIsWordChar} { string wordend "x\u5080\uD83D\uDCA3z123_bar\uD83D\uDCA3 fg" 0 } 2 test utf-23.1 {Tcl_UniCharIsAlpha} { # this returns 1 with Unicode 7 compliance @@ -1500,14 +1444,14 @@ variable count 1 UniCharCaseCmpTest < a b UniCharCaseCmpTest > b a UniCharCaseCmpTest > B a UniCharCaseCmpTest > aBcB abca -UniCharCaseCmpTest < \uFFFF [format %c 0x10000] fullutf -UniCharCaseCmpTest < \uFFFF \U10000 fullutf -UniCharCaseCmpTest > [format %c 0x10000] \uFFFF fullutf -UniCharCaseCmpTest > \U10000 \uFFFF fullutf +UniCharCaseCmpTest < \uFFFF [format %c 0x10000] +UniCharCaseCmpTest < \uFFFF \U10000 +UniCharCaseCmpTest > [format %c 0x10000] \uFFFF +UniCharCaseCmpTest > \U10000 \uFFFF Index: jni/tcl/win/tclWinError.c ================================================================== --- jni/tcl/win/tclWinError.c +++ jni/tcl/win/tclWinError.c @@ -175,11 +175,11 @@ EINVAL, /* 156 */ EINVAL, /* 157 */ EACCES, /* ERROR_NOT_LOCKED 158 */ EINVAL, /* 159 */ EINVAL, /* 160 */ - ENOENT, /* ERROR_BAD_PATHNAME 161 */ + ENOENT, /* ERROR_BAD_PATHNAME 161 */ EINVAL, /* 162 */ EINVAL, /* 163 */ EINVAL, /* 164 */ EINVAL, /* 165 */ EINVAL, /* 166 */ @@ -388,25 +388,28 @@ #define TCL_MAX_WARN_LEN 1024 va_list argList; va_start(argList, format); if (IsDebuggerPresent()) { - WCHAR msgString[TCL_MAX_WARN_LEN]; + WCHAR *msgString; char buf[TCL_MAX_WARN_LEN * TCL_UTF_MAX]; + Tcl_DString ds; vsnprintf(buf, sizeof(buf), format, argList); - msgString[TCL_MAX_WARN_LEN-1] = L'\0'; - MultiByteToWideChar(CP_UTF8, 0, buf, -1, msgString, TCL_MAX_WARN_LEN); + msgString = (WCHAR *) Tcl_WinUtfToTChar(buf, -1, &ds); /* - * Truncate MessageBox string if it is too long to not overflow the buffer. + * Truncate MessageBox string if it is too long to not + * overflow the buffer. */ - if (msgString[TCL_MAX_WARN_LEN-1] != L'\0') { - memcpy(msgString + (TCL_MAX_WARN_LEN - 5), L" ...", 5 * sizeof(WCHAR)); + if (Tcl_DStringLength(&ds) >= TCL_MAX_WARN_LEN * sizeof(WCHAR)) { + memcpy(msgString + (TCL_MAX_WARN_LEN - 5), L" ...", + 5 * sizeof(WCHAR)); } OutputDebugStringW(msgString); + Tcl_DStringFree(&ds); } else { vfprintf(stderr, format, argList); fprintf(stderr, "\n"); fflush(stderr); } Index: jni/tcl/win/tclWinFile.c ================================================================== --- jni/tcl/win/tclWinFile.c +++ jni/tcl/win/tclWinFile.c @@ -823,33 +823,36 @@ const char *format, ...) { #define TCL_MAX_WARN_LEN 1024 va_list argList; char buf[TCL_MAX_WARN_LEN * TCL_UTF_MAX]; - WCHAR msgString[TCL_MAX_WARN_LEN]; + WCHAR *msgString; + Tcl_DString ds; va_start(argList, format); vsnprintf(buf, sizeof(buf), format, argList); - msgString[TCL_MAX_WARN_LEN-1] = L'\0'; - MultiByteToWideChar(CP_UTF8, 0, buf, -1, msgString, TCL_MAX_WARN_LEN); + msgString = (WCHAR *) Tcl_WinUtfToTChar(buf, -1, &ds); /* * Truncate MessageBox string if it is too long to not overflow the screen * and cause possible oversized window error. */ - if (msgString[TCL_MAX_WARN_LEN-1] != L'\0') { + if (Tcl_DStringLength(&ds) >= TCL_MAX_WARN_LEN * sizeof(WCHAR)) { memcpy(msgString + (TCL_MAX_WARN_LEN - 5), L" ...", 5 * sizeof(WCHAR)); } if (IsDebuggerPresent()) { OutputDebugStringW(msgString); } else { MessageBeep(MB_ICONEXCLAMATION); MessageBoxW(NULL, msgString, L"Fatal Error", MB_ICONSTOP | MB_OK | MB_TASKMODAL | MB_SETFOREGROUND); } + + Tcl_DStringFree(&ds); + #if defined(__GNUC__) __builtin_trap(); #elif defined(_WIN64) __debugbreak(); #elif defined(_MSC_VER) && defined (_M_IX86) @@ -881,11 +884,12 @@ TclpFindExecutable( const char *argv0) /* If NULL, install PanicMessageBox, otherwise * ignore. */ { WCHAR wName[MAX_PATH]; - char name[MAX_PATH * TCL_UTF_MAX]; + int i; + char *p, name[(MAX_PATH + 1) * TCL_UTF_MAX]; /* * Under Windows we ignore argv0, and return the path for the file used to * create this process. Only if it is NULL, install a new panic handler. */ @@ -892,12 +896,27 @@ if (argv0 == NULL) { Tcl_SetPanicProc(tclWinDebugPanic); } + wName[0] = L'\0'; GetModuleFileNameW(NULL, wName, sizeof(wName)/sizeof(WCHAR)); - WideCharToMultiByte(CP_UTF8, 0, wName, -1, name, sizeof(name), NULL, NULL); + i = 0; + p = name; + while (wName[i] != L'\0') { + if (p > name + (MAX_PATH * TCL_UTF_MAX)) { + break; + } + + /* + * We make WTF-8 here implicitely. + */ + + p += Tcl_UniCharToUtf((Tcl_UniChar) wName[i], p); + i++; + } + *p = '\0'; TclWinNoBackslash(name); TclSetObjNameOfExecutable(Tcl_NewStringObj(name, -1), NULL); } /* @@ -3040,10 +3059,11 @@ WCHAR *nativePathPtr = NULL; const char *str; Tcl_Obj *validPathPtr; size_t len; WCHAR *wp; + Tcl_DString ds; if (TclFSCwdIsNative()) { /* * The cwd is native, which means we can use the translated path * without worrying about normalization (this will also usually be @@ -3091,43 +3111,35 @@ /* * For a reserved device, strip a possible postfix ':' */ len = WinIsReserved(str); - if (len == 0) { - /* - * Let MultiByteToWideChar check for other invalid sequences, like - * 0xC0 0x80 (== overlong NUL). See bug [3118489]: NUL in filenames - */ - - len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str, -1, 0, 0); - if (len==0) { - if (GetLastError() == ERROR_INVALID_FLAGS) { - /* Win NT/2000? */ - len = MultiByteToWideChar(CP_UTF8, 0, str, -1, 0, 0); - } - if (len==0) { - goto done; - } - } + wp = (WCHAR *) Tcl_WinUtfToTChar(str, (len == 0) ? -1 : len, &ds); + + /* + * Watch out for '\0' in native path, this is invalid. + */ + + len = wcslen(wp); + if (len != Tcl_DStringLength(&ds) / sizeof(WCHAR)) { + Tcl_DStringFree(&ds); + goto done; } /* * Overallocate 6 chars, making some room for extended paths */ - wp = nativePathPtr = (WCHAR *) ckalloc((len + 6) * sizeof(WCHAR)); - if (nativePathPtr==0) { - goto done; - } - if ((MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str, -1, - nativePathPtr, len+2) == 0) && - (GetLastError() == ERROR_INVALID_FLAGS)) { - /* Win NT/2000? */ - MultiByteToWideChar(CP_UTF8, 0, str, -1, nativePathPtr, len+2); - } + nativePathPtr = (WCHAR *) attemptckalloc((len + 6) * sizeof(WCHAR)); + if (nativePathPtr == NULL) { + Tcl_DStringFree(&ds); + goto done; + } + memcpy(nativePathPtr, wp, len * sizeof(WCHAR)); nativePathPtr[len] = 0; + wp = nativePathPtr; + Tcl_DStringFree(&ds); /* * If path starts with "//?/" or "\\?\" (extended path), translate any * slashes to backslashes but leave the '?' intact */ Index: jni/tcl/win/tclWinInit.c ================================================================== --- jni/tcl/win/tclWinInit.c +++ jni/tcl/win/tclWinInit.c @@ -498,11 +498,11 @@ { char *start; start = dst; while (*wSrc != '\0') { -#if TCL_UTF_MAX >= 4 +#if TCL_UTF_MAX > 3 Tcl_UniChar ch = *wSrc; if ((ch & 0xF800) == 0xD800) { if (ch & 0x0400) { /* Low surrogate */ Index: jni/tcl/win/tclWinSerial.c ================================================================== --- jni/tcl/win/tclWinSerial.c +++ jni/tcl/win/tclWinSerial.c @@ -1809,22 +1809,16 @@ dcb.XoffChar = argv[1][0]; if (argv[0][0] & 0x80 || argv[1][0] & 0x80) { Tcl_UniChar character; int charLen; - charLen = Tcl_UtfToUniChar(argv[0], &character); -#if TCL_UTF_MAX == 4 - /* Character > 0xFFFF: charLen is 0, next test fails. */ -#endif + charLen = TclUtfToUniChar(argv[0], &character); if ((character & ~0xFF) || argv[0][charLen]) { goto badXchar; } dcb.XonChar = (char) character; - charLen = Tcl_UtfToUniChar(argv[1], &character); -#if TCL_UTF_MAX == 4 - /* Character > 0xFFFF: charLen is 0, next test fails. */ -#endif + charLen = TclUtfToUniChar(argv[1], &character); if ((character & ~0xFF) || argv[1][charLen]) { goto badXchar; } dcb.XoffChar = (char) character; } Index: jni/tdom/expat/xmltok.c ================================================================== --- jni/tdom/expat/xmltok.c +++ jni/tdom/expat/xmltok.c @@ -168,13 +168,16 @@ utf8_isInvalid2(const ENCODING *enc, const char *p) { UNUSED_P(enc); return UTF8_INVALID2((const unsigned char *)p); } +int XmlAllowWTF = 0; /* when 1, don't treat 0xD800..0xDFFF as invalid */ + static int PTRFASTCALL utf8_isInvalid3(const ENCODING *enc, const char *p) { UNUSED_P(enc); + if (XmlAllowWTF) return 0; return UTF8_INVALID3((const unsigned char *)p); } static int PTRFASTCALL utf8_isInvalid4(const ENCODING *enc, const char *p) { Index: jni/tdom/generic/dom.c ================================================================== --- jni/tdom/generic/dom.c +++ jni/tdom/generic/dom.c @@ -2303,18 +2303,28 @@ } if (channel == NULL) { status = XML_Parse (parser, xml, length, 1); } else { +#if TCL_UTF_MAX == 3 + extern int XmlAllowWTF; +#endif + Tcl_DStringInit (&dStr); if (Tcl_GetChannelOption (interp, channel, "-encoding", &dStr) != TCL_OK) { domFreeDocument (doc, NULL, NULL); *resultcode = TCL_ERROR; doc = NULL; goto cleanup; } +#if TCL_UTF_MAX == 3 + if (XmlAllowWTF) { + bufObj = Tcl_NewObj(); + Tcl_SetObjLength (bufObj, 6144); + } else +#endif if (strcmp (Tcl_DStringValue (&dStr), "utf-8")==0 ) { useBinary = 1; } else { bufObj = Tcl_NewObj(); Tcl_SetObjLength (bufObj, 6144); Index: jni/tdom/generic/tdominit.c ================================================================== --- jni/tdom/generic/tdominit.c +++ jni/tdom/generic/tdominit.c @@ -72,33 +72,38 @@ if (Tcl_InitStubs(interp, "8.4", 0) == NULL) { return TCL_ERROR; } #endif - nrOfBytes = Tcl_UtfToUniChar ("\xF4\x82\xA2\xA2", uniChar); -#if (TCL_MAJOR_VERSION == 8) && (TCL_MINOR_VERSION == 6) -# if TCL_UTF_MAX > 4 - if (nrOfBytes != 4) -# elif TCL_UTF_MAX == 4 - if (nrOfBytes != 0) -# else - if (nrOfBytes > 1) -# endif -#else -# if TCL_UTF_MAX > 3 - if (nrOfBytes != 4) -# else - if (nrOfBytes > 1) -# endif + nrOfBytes = Tcl_UtfToUniChar("\xF4\x82\xA2\xA2", uniChar); +#if TCL_UTF_MAX > 3 + if (nrOfBytes != 4) +#else + if (nrOfBytes > 1) #endif { Tcl_SetResult(interp, "This interpreter and tDOM are build with" " different Tcl_UniChar types and therefore not" " binary compatible.", NULL); return TCL_ERROR; } +#if TCL_UTF_MAX == 3 + if (nrOfBytes == 1) { + Tcl_DString ds; + Tcl_Encoding enc; + extern int XmlAllowWTF; + /* Find out if WTF-8 is used in the tcl core. */ + enc = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_ExternalToUtfDString(enc, "\xF4\x82\xA2\xA2", 4, &ds); + if (Tcl_DStringLength(&ds) == 6) { + XmlAllowWTF = 1; + } + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(enc); + } +#endif domModuleInitialize(); #ifdef TCL_THREADS tcldom_initialize(); #endif /* TCL_THREADS */ Index: jni/tdom/renxpat.h ================================================================== --- jni/tdom/renxpat.h +++ jni/tdom/renxpat.h @@ -66,10 +66,11 @@ #define XML_SetUserData tdom_XML_SetUserData #define XML_SetXmlDeclHandler tdom_XML_SetXmlDeclHandler #define XML_StopParser tdom_XML_StopParser #define XML_UseForeignDTD tdom_XML_UseForeignDTD #define XML_UseParserAsHandlerArg tdom_XML_UseParserAsHandlerArg +#define XmlAllowWTF tdom_XmlAllowWTF #define XmlPrologStateInit tdom_XmlPrologStateInit #define XmlPrologStateInitExternalEntity tdom_XmlPrologStateInitExternalEntity #define XML_SimpleParseDocument tdom_XML_SimpleParseDocument #define _INTERNAL_trim_to_complete_utf8_characters tdom__INTERNAL_trim_to_complete_utf8_characters #define XmlGetUtf16InternalEncoding tdom_XmlGetUtf16InternalEncoding Index: jni/tkpath/sdl/tkSDLAGGPath.cpp ================================================================== --- jni/tkpath/sdl/tkSDLAGGPath.cpp +++ jni/tkpath/sdl/tkSDLAGGPath.cpp @@ -636,10 +636,19 @@ char *up = utf8; while (up < utf8 + length) { Tcl_UniChar uch; up += Tcl_UtfToUniChar(up, &uch); chu = uch; +#if TCL_UTF_MAX == 3 + if (((uch & 0xFC00) == 0xD800) && (up < utf8 + length)) { + int n = Tcl_UtfToUniChar(up, &uch); + if ((uch &0xFC00) == 0xDC00) { + up += n; + chu = (((chu & 0x3FF) << 10) | (uch & 0x3FF)) + 0x10000; + } + } +#endif Tcl_DStringAppend(&ds, (const char *) &chu, sizeof(unsigned)); } #else Tcl_Encoding enc = Tcl_GetEncoding(NULL, "ucs-4"); Tcl_UtfToExternalDString(enc, utf8, -1, &ds); Index: jni/tkzinc/generic/Draw.c ================================================================== --- jni/tkzinc/generic/Draw.c +++ jni/tkzinc/generic/Draw.c @@ -2010,17 +2010,17 @@ unsigned int len) { unsigned int clen; int ch; Tcl_UniChar c = 0; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 Tcl_UniChar c2; #endif while (len) { clen = Tcl_UtfToUniChar((char *) string, &c); -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 if ((c & 0xFC00) == 0xD800) { ch = 0xFFFD; clen = Tcl_UtfToUniChar((char *) string, &c2); if ((c2 & 0xFC00) == 0xDC00) { ch = ((c & 0x3FF) << 10) + 0x10000 + (c2 & 0x3FF); Index: jni/vu/generic/tkCombobox.c ================================================================== --- jni/vu/generic/tkCombobox.c +++ jni/vu/generic/tkCombobox.c @@ -2006,22 +2006,41 @@ * the combo, recompute the displayString. */ if (comboPtr->showChar != NULL) { Tcl_UniChar ch; - char buf[TCL_UTF_MAX]; +#if TCL_UTF_MAX == 3 + Tcl_UniChar ch2 = 0; +#endif + char buf[6]; int size; /* * Normalize the special character so we can safely duplicate it * in the display string. If we didn't do this, then two malformed * characters might end up looking like one valid UTF character in * the resulting string. */ - Tcl_UtfToUniChar(comboPtr->showChar, &ch); + size = Tcl_UtfToUniChar(comboPtr->showChar, &ch); +#if TCL_UTF_MAX == 3 + if ((ch & 0xFC00) == 0xD800) { + if (comboPtr->showChar[size] != '\0') { + Tcl_UtfToUniChar(comboPtr->showChar + size, &ch2); + } + size = 0; + if ((ch2 & 0xFC00) == 0xDC00) { + size = Tcl_UniCharToUtf(ch, buf); + ch = ch2; + } + } else { + size = 0; + } + size += Tcl_UniCharToUtf(ch, buf + size); +#else size = Tcl_UniCharToUtf(ch, buf); +#endif comboPtr->numDisplayBytes = comboPtr->numChars * size; comboPtr->displayString = (char *) ckalloc((unsigned) (comboPtr->numDisplayBytes + 1)); Index: undroid/build-undroidwish-freebsd.sh ================================================================== --- undroid/build-undroidwish-freebsd.sh +++ undroid/build-undroidwish-freebsd.sh @@ -72,12 +72,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="cc -DTCL_UTF_MAX=6" -CXX="c++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" +CC="cc -DTCL_UTF_MAX=3" +CXX="c++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload" SUBDIRS="${SUBDIRS} tls libwebsockets" @@ -1353,11 +1353,11 @@ exec 3>&1 exec >> build.log 2>&1 cd ffidl test -e build-stamp && echo >&3 "already done" && exit 0 mkdir -p lib-src && rm -rf lib-src/libffi && cp -rp ../libffi lib-src - CC="gcc10 -DTCL_UTF_MAX=6" DESTDIR=${HERE} ./configure --prefix=${PFX} \ + CC="gcc10 -DTCL_UTF_MAX=3" DESTDIR=${HERE} ./configure --prefix=${PFX} \ --with-tcl=${HERE}/tcl/unix --enable-threads \ --enable-libffi || exit 1 MAKE=gmake gmake || exit 1 MAKE=gmake gmake install DESTDIR=${HERE} || exit 1 touch build-stamp Index: undroid/build-undroidwish-generic.sh ================================================================== --- undroid/build-undroidwish-generic.sh +++ undroid/build-undroidwish-generic.sh @@ -72,12 +72,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -DTCL_UTF_MAX=6" -CXX="g++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" +CC="gcc -DTCL_UTF_MAX=3" +CXX="g++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload" SUBDIRS="${SUBDIRS} tls libwebsockets" Index: undroid/build-undroidwish-haiku.sh ================================================================== --- undroid/build-undroidwish-haiku.sh +++ undroid/build-undroidwish-haiku.sh @@ -83,15 +83,15 @@ CXX=g++-$(uname -p) else CXX=g++ fi if type ccache >/dev/null 2>&1 ; then - CC="ccache $CC -DTCL_UTF_MAX=6" - CXX="ccache $CXX -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" + CC="ccache $CC -DTCL_UTF_MAX=3" + CXX="ccache $CXX -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" else - CC="$CC -DTCL_UTF_MAX=6" - CXX="$CXX -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" + CC="$CC -DTCL_UTF_MAX=3" + CXX="$CXX -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" fi NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl zlib curl tcludp tdom tclvfs tclkit trofs tbcload" Index: undroid/build-undroidwish-illumos.sh ================================================================== --- undroid/build-undroidwish-illumos.sh +++ undroid/build-undroidwish-illumos.sh @@ -81,12 +81,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -DTCL_UTF_MAX=6" -CXX="g++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" +CC="gcc -DTCL_UTF_MAX=3" +CXX="g++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload tls" SUBDIRS="${SUBDIRS} Memchan TclCurl freetype SDL2 sdl2tk blt jpeg-turbo" Index: undroid/build-undroidwish-kmsdrm.sh ================================================================== --- undroid/build-undroidwish-kmsdrm.sh +++ undroid/build-undroidwish-kmsdrm.sh @@ -76,12 +76,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -DTCL_UTF_MAX=6" -CXX="g++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" +CC="gcc -DTCL_UTF_MAX=3" +CXX="g++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload" SUBDIRS="${SUBDIRS} tls libwebsockets" Index: undroid/build-undroidwish-linux32.sh ================================================================== --- undroid/build-undroidwish-linux32.sh +++ undroid/build-undroidwish-linux32.sh @@ -77,12 +77,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -m32 -march=i586 -mtune=generic -DTCL_UTF_MAX=6" -CXX="g++ -m32 -march=i586 -mtune=generic -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" +CC="gcc -m32 -march=i586 -mtune=generic -DTCL_UTF_MAX=3" +CXX="g++ -m32 -march=i586 -mtune=generic -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload" SUBDIRS="${SUBDIRS} tls libwebsockets" Index: undroid/build-undroidwish-linux64.sh ================================================================== --- undroid/build-undroidwish-linux64.sh +++ undroid/build-undroidwish-linux64.sh @@ -77,12 +77,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -m64 -DTCL_UTF_MAX=6" -CXX="g++ -m64 -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" +CC="gcc -m64 -DTCL_UTF_MAX=3" +CXX="g++ -m64 -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload" SUBDIRS="${SUBDIRS} tls libwebsockets" Index: undroid/build-undroidwish-macosx.sh ================================================================== --- undroid/build-undroidwish-macosx.sh +++ undroid/build-undroidwish-macosx.sh @@ -77,12 +77,12 @@ # the toolchain STRIP="strip -S -x" AR=ar RANLIB=ranlib -CC="cc -DTCL_UTF_MAX=6 -mmacosx-version-min=10.10" -CXX="c++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6 -mmacosx-version-min=10.10" +CC="cc -DTCL_UTF_MAX=3 -mmacosx-version-min=10.10" +CXX="c++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3 -mmacosx-version-min=10.10" NM=nm # RPATH for binaries ADD_RPATH="/Applications/VLC.app/Contents/MacOS/lib" export STRIP AR RANLIB CC CXX NM ADD_RPATH Index: undroid/build-undroidwish-openbsd.sh ================================================================== --- undroid/build-undroidwish-openbsd.sh +++ undroid/build-undroidwish-openbsd.sh @@ -72,12 +72,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="cc -DTCL_UTF_MAX=6" -CXX="c++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" +CC="cc -DTCL_UTF_MAX=3" +CXX="c++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl zlib curl tcludp tdom tclvfs tclkit trofs tbcload" SUBDIRS="${SUBDIRS} tls libwebsockets" Index: undroid/build-undroidwish-termux.sh ================================================================== --- undroid/build-undroidwish-termux.sh +++ undroid/build-undroidwish-termux.sh @@ -76,13 +76,14 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -DTCL_UTF_MAX=6" -CXX="g++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" -CVCXX="g++ -DTCL_UTF_MAX=6" +CC="gcc -DTCL_UTF_MAX=3" +CXX="g++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" +CVCXX="g++ -DTCL_UTF_MAX=3" +CVCXX="g++ -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX CVCXX NM SUBDIRS="tcl libressl zlib tcludp tdom tclvfs tclkit trofs tbcload" SUBDIRS="${SUBDIRS} tls libwebsockets" Index: undroid/build-undroidwish-wayland.sh ================================================================== --- undroid/build-undroidwish-wayland.sh +++ undroid/build-undroidwish-wayland.sh @@ -76,12 +76,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -DTCL_UTF_MAX=6" -CXX="g++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6" +CC="gcc -DTCL_UTF_MAX=3" +CXX="g++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload" SUBDIRS="${SUBDIRS} tls libwebsockets" Index: undroid/build-undroidwish-win32.sh ================================================================== --- undroid/build-undroidwish-win32.sh +++ undroid/build-undroidwish-win32.sh @@ -103,13 +103,13 @@ PATH="/opt/mingw64/bin:$PATH" STRIP="x86_64-w64-mingw32-strip" OBJCOPY="x86_64-w64-mingw32-objcopy" AR="x86_64-w64-mingw32-ar" RANLIB="x86_64-w64-mingw32-ranlib" - CC="x86_64-w64-mingw32-gcc -m32 -march=i386 -mtune=i386 -DTCL_UTF_MAX=6" - CC_OLD="x86_64-w64-mingw32-gcc -m32 -march=i386 -mtune=i386 -D_WIN32_WINNT=0x0400 -DTCL_UTF_MAX=6" - CXX="x86_64-w64-mingw32-g++ -m32 -march=i386 -mtune=i386 -fno-exceptions -DTCL_UTF_MAX=6" + CC="x86_64-w64-mingw32-gcc -m32 -march=i386 -mtune=i386 -DTCL_UTF_MAX=3" + CC_OLD="x86_64-w64-mingw32-gcc -m32 -march=i386 -mtune=i386 -D_WIN32_WINNT=0x0400 -DTCL_UTF_MAX=3" + CXX="x86_64-w64-mingw32-g++ -m32 -march=i386 -mtune=i386 -fno-exceptions -DTCL_UTF_MAX=3" RC="x86_64-w64-mingw32-windres -F pe-i386" NM="x86_64-w64-mingw32-nm" export STRIP OBJCOPY AR RANLIB CC CC_OLD CXX RC NM else # would like to use -march=i386 -mtune=i386, too, but then gcc-4.8 @@ -118,13 +118,13 @@ echo using toolchain prefix i686-w64-mingw32 STRIP="i686-w64-mingw32-strip" OBJCOPY="i686-w64-mingw32-objcopy" AR="i686-w64-mingw32-ar" RANLIB="i686-w64-mingw32-ranlib" - CC="i686-w64-mingw32-gcc -m32 -march=i586 -mtune=generic -DTCL_UTF_MAX=6" - CC_OLD="i686-w64-mingw32-gcc -m32 -march=i586 -mtune=generic -D_WIN32_WINNT=0x0400 -DTCL_UTF_MAX=6" - CXX="i686-w64-mingw32-g++ -m32 -march=i586 -mtune=generic -fno-exceptions -DTCL_UTF_MAX=6" + CC="i686-w64-mingw32-gcc -m32 -march=i586 -mtune=generic -DTCL_UTF_MAX=3" + CC_OLD="i686-w64-mingw32-gcc -m32 -march=i586 -mtune=generic -D_WIN32_WINNT=0x0400 -DTCL_UTF_MAX=3" + CXX="i686-w64-mingw32-g++ -m32 -march=i586 -mtune=generic -fno-exceptions -DTCL_UTF_MAX=3" RC="i686-w64-mingw32-windres -F pe-i386" NM="i686-w64-mingw32-nm" TWAPI_LDFLAGS="-L${AWDIR}/undroid/compat/win32/lib32" export STRIP OBJCOPY AR RANLIB CC CC_OLD CXX RC NM TWAPI_LDFLAGS fi Index: undroid/build-undroidwish-win64.sh ================================================================== --- undroid/build-undroidwish-win64.sh +++ undroid/build-undroidwish-win64.sh @@ -101,23 +101,23 @@ PATH="/opt/mingw64/bin:$PATH" STRIP="x86_64-w64-mingw32-strip" OBJCOPY="x86_64-w64-mingw32-objcopy" AR="x86_64-w64-mingw32-ar" RANLIB="x86_64-w64-mingw32-ranlib" - CC="x86_64-w64-mingw32-gcc -D_WIN32_WINNT=0x0600 -DTCL_UTF_MAX=6" - CXX="x86_64-w64-mingw32-g++ -D_WIN32_WINNT=0x0600 -DTCL_UTF_MAX=6" + CC="x86_64-w64-mingw32-gcc -D_WIN32_WINNT=0x0600 -DTCL_UTF_MAX=3" + CXX="x86_64-w64-mingw32-g++ -D_WIN32_WINNT=0x0600 -DTCL_UTF_MAX=3" RC="x86_64-w64-mingw32-windres" NM="x86_64-w64-mingw32-nm" export STRIP OBJCOPY AR RANLIB CC CXX RC NM else echo using toolchain prefix x86_64-w64-mingw32 STRIP="x86_64-w64-mingw32-strip" OBJCOPY="x86_64-w64-mingw32-objcopy" AR="x86_64-w64-mingw32-ar" RANLIB="x86_64-w64-mingw32-ranlib" - CC="x86_64-w64-mingw32-gcc -DTCL_UTF_MAX=6" - CXX="x86_64-w64-mingw32-g++ -DTCL_UTF_MAX=6" + CC="x86_64-w64-mingw32-gcc -DTCL_UTF_MAX=3" + CXX="x86_64-w64-mingw32-g++ -DTCL_UTF_MAX=3" RC="x86_64-w64-mingw32-windres" NM="x86_64-w64-mingw32-nm" export STRIP OBJCOPY AR RANLIB CC CXX RC NM fi Index: undroid/build-vanilla-freebsd.sh ================================================================== --- undroid/build-vanilla-freebsd.sh +++ undroid/build-vanilla-freebsd.sh @@ -73,12 +73,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="cc -DTCL_UTF_MAX=6" -CXX="c++ -DTCL_UTF_MAX=6" +CC="cc -DTCL_UTF_MAX=3" +CXX="c++ -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload tls" SUBDIRS="${SUBDIRS} Memchan TclCurl sdl2tk blt jpeg-turbo 3dcanvas" @@ -1303,11 +1303,11 @@ exec 3>&1 exec >> build.log 2>&1 cd ffidl test -e build-stamp && echo >&3 "already done" && exit 0 mkdir -p lib-src && rm -rf lib-src/libffi && cp -rp ../libffi lib-src - CC="gcc10 -DTCL_UTF_MAX=6" DESTDIR=${HERE} ./configure --prefix=${PFX} \ + CC="gcc10 -DTCL_UTF_MAX=3" DESTDIR=${HERE} ./configure --prefix=${PFX} \ --with-tcl=${HERE}/tcl/unix --enable-threads \ --enable-libffi || exit 1 MAKE=gmake gmake || exit 1 MAKE=gmake gmake install DESTDIR=${HERE} || exit 1 touch build-stamp Index: undroid/build-vanilla-generic.sh ================================================================== --- undroid/build-vanilla-generic.sh +++ undroid/build-vanilla-generic.sh @@ -73,12 +73,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -DTCL_UTF_MAX=6" -CXX="g++ -DTCL_UTF_MAX=6" +CC="gcc -DTCL_UTF_MAX=3" +CXX="g++ -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload tls" SUBDIRS="${SUBDIRS} Memchan TclCurl sdl2tk blt jpeg-turbo 3dcanvas" Index: undroid/build-vanilla-illumos.sh ================================================================== --- undroid/build-vanilla-illumos.sh +++ undroid/build-vanilla-illumos.sh @@ -82,12 +82,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -DTCL_UTF_MAX=6" -CXX="g++ -DTCL_UTF_MAX=6" +CC="gcc -DTCL_UTF_MAX=3" +CXX="g++ -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload tls" SUBDIRS="${SUBDIRS} Memchan TclCurl sdl2tk blt jpeg-turbo 3dcanvas" Index: undroid/build-vanilla-linux32.sh ================================================================== --- undroid/build-vanilla-linux32.sh +++ undroid/build-vanilla-linux32.sh @@ -77,12 +77,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -m32 -march=i586 -mtune=generic -DTCL_UTF_MAX=6" -CXX="g++ -m32 -march=i586 -mtune=generic -DTCL_UTF_MAX=6" +CC="gcc -m32 -march=i586 -mtune=generic -DTCL_UTF_MAX=3" +CXX="g++ -m32 -march=i586 -mtune=generic -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload tls" SUBDIRS="${SUBDIRS} Memchan TclCurl libxft sdl2tk blt jpeg-turbo 3dcanvas" Index: undroid/build-vanilla-linux64.sh ================================================================== --- undroid/build-vanilla-linux64.sh +++ undroid/build-vanilla-linux64.sh @@ -77,12 +77,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -m64 -DTCL_UTF_MAX=6" -CXX="g++ -m64 -DTCL_UTF_MAX=6" +CC="gcc -m64 -DTCL_UTF_MAX=3" +CXX="g++ -m64 -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib curl tcludp tdom tclvfs tclkit trofs tbcload tls" SUBDIRS="${SUBDIRS} Memchan TclCurl libxft sdl2tk blt jpeg-turbo 3dcanvas" Index: undroid/build-vanilla-macosx.sh ================================================================== --- undroid/build-vanilla-macosx.sh +++ undroid/build-vanilla-macosx.sh @@ -78,13 +78,13 @@ # the toolchain STRIP="strip -S -x" AR=ar RANLIB=ranlib -CC="cc -DTCL_UTF_MAX=6 -mmacosx-version-min=10.10" -CXX="c++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=6 -mmacosx-version-min=10.10" -CXX_CV="c++ -DTCL_UTF_MAX=6 -mmacosx-version-min=10.10" +CC="cc -DTCL_UTF_MAX=3 -mmacosx-version-min=10.10" +CXX="c++ -fno-exceptions -fno-rtti -DTCL_UTF_MAX=3 -mmacosx-version-min=10.10" +CXX_CV="c++ -DTCL_UTF_MAX=3 -mmacosx-version-min=10.10" NM=nm # RPATH for binaries ADD_RPATH="/Applications/VLC.app/Contents/MacOS/lib" export STRIP AR RANLIB CC CXX CXX_CV NM ADD_RPATH Index: undroid/build-vanilla-openbsd.sh ================================================================== --- undroid/build-vanilla-openbsd.sh +++ undroid/build-vanilla-openbsd.sh @@ -73,12 +73,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="cc -DTCL_UTF_MAX=6" -CXX="c++ -DTCL_UTF_MAX=6" +CC="cc -DTCL_UTF_MAX=3" +CXX="c++ -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl zlib curl tcludp tdom tclvfs tclkit trofs tbcload tls" SUBDIRS="${SUBDIRS} Memchan TclCurl sdl2tk blt jpeg-turbo 3dcanvas" Index: undroid/build-vanilla-termux.sh ================================================================== --- undroid/build-vanilla-termux.sh +++ undroid/build-vanilla-termux.sh @@ -73,12 +73,12 @@ # the toolchain STRIP=strip AR=ar RANLIB=ranlib -CC="gcc -DTCL_UTF_MAX=6" -CXX="g++ -DTCL_UTF_MAX=6" +CC="gcc -DTCL_UTF_MAX=3" +CXX="g++ -DTCL_UTF_MAX=3" NM=nm export STRIP AR RANLIB CC CXX NM SUBDIRS="tcl libressl zlib tcludp tdom tclvfs tclkit trofs tbcload tls" SUBDIRS="${SUBDIRS} Memchan TclCurl sdl2tk blt jpeg-turbo 3dcanvas" Index: undroid/build-vanilla-win32.sh ================================================================== --- undroid/build-vanilla-win32.sh +++ undroid/build-vanilla-win32.sh @@ -104,13 +104,13 @@ PATH="/opt/mingw64/bin:$PATH" STRIP="x86_64-w64-mingw32-strip" OBJCOPY="x86_64-w64-mingw32-objcopy" AR="x86_64-w64-mingw32-ar" RANLIB="x86_64-w64-mingw32-ranlib" - CC="x86_64-w64-mingw32-gcc -m32 -march=i386 -mtune=i386 -D_WIN32_WINNT=0x0601 -DTCL_UTF_MAX=6" - CC_OLD="x86_64-w64-mingw32-gcc -m32 -march=i386 -mtune=i386 -D_WIN32_WINNT=0x0400 -DTCL_UTF_MAX=6" - CXX="x86_64-w64-mingw32-g++ -m32 -march=i386 -mtune=i386 -fno-exceptions -D_WIN32_WINNT=0x0601 -DTCL_UTF_MAX=6" + CC="x86_64-w64-mingw32-gcc -m32 -march=i386 -mtune=i386 -D_WIN32_WINNT=0x0601 -DTCL_UTF_MAX=3" + CC_OLD="x86_64-w64-mingw32-gcc -m32 -march=i386 -mtune=i386 -D_WIN32_WINNT=0x0400 -DTCL_UTF_MAX=3" + CXX="x86_64-w64-mingw32-g++ -m32 -march=i386 -mtune=i386 -fno-exceptions -D_WIN32_WINNT=0x0601 -DTCL_UTF_MAX=3" RC="x86_64-w64-mingw32-windres -F pe-i386" NM="x86_64-w64-mingw32-nm" export STRIP OBJCOPY AR RANLIB CC CC_OLD CXX RC NM else # would like to use -march=i386 -mtune=i386, too, but then gcc-4.8 @@ -119,13 +119,13 @@ echo using toolchain prefix i686-w64-mingw32 STRIP="i686-w64-mingw32-strip" OBJCOPY="i686-w64-mingw32-objcopy" AR="i686-w64-mingw32-ar" RANLIB="i686-w64-mingw32-ranlib" - CC="i686-w64-mingw32-gcc -m32 -march=i586 -mtune=generic -D_WIN32_WINNT=0x0601 -DWINVER=0x0601 -DTCL_UTF_MAX=6" - CC_OLD="i686-w64-mingw32-gcc -m32 -march=i586 -mtune=generic -D_WIN32_WINNT=0x0400 -DWINVER=0x0400 -DTCL_UTF_MAX=6" - CXX="i686-w64-mingw32-g++ -m32 -march=i586 -mtune=generic -fno-exceptions -D_WIN32_WINNT=0x0601 -DWINVER=0x0601 -DTCL_UTF_MAX=6" + CC="i686-w64-mingw32-gcc -m32 -march=i586 -mtune=generic -D_WIN32_WINNT=0x0601 -DWINVER=0x0601 -DTCL_UTF_MAX=3" + CC_OLD="i686-w64-mingw32-gcc -m32 -march=i586 -mtune=generic -D_WIN32_WINNT=0x0400 -DWINVER=0x0400 -DTCL_UTF_MAX=3" + CXX="i686-w64-mingw32-g++ -m32 -march=i586 -mtune=generic -fno-exceptions -D_WIN32_WINNT=0x0601 -DWINVER=0x0601 -DTCL_UTF_MAX=3" RC="i686-w64-mingw32-windres -F pe-i386" NM="i686-w64-mingw32-nm" TWAPI_LDFLAGS="-L${AWDIR}/undroid/compat/win32/lib32" export STRIP OBJCOPY AR RANLIB CC CC_OLD CXX RC NM TWAPI_LDFLAGS fi Index: undroid/build-vanilla-win64.sh ================================================================== --- undroid/build-vanilla-win64.sh +++ undroid/build-vanilla-win64.sh @@ -102,23 +102,23 @@ PATH="/opt/mingw64/bin:$PATH" STRIP="x86_64-w64-mingw32-strip" OBJCOPY="x86_64-w64-mingw32-objcopy" AR="x86_64-w64-mingw32-ar" RANLIB="x86_64-w64-mingw32-ranlib" - CC="x86_64-w64-mingw32-gcc -D_WIN32_WINNT=0x0601 -DTCL_UTF_MAX=6" - CXX="x86_64-w64-mingw32-g++ -D_WIN32_WINNT=0x0601 -DTCL_UTF_MAX=6" + CC="x86_64-w64-mingw32-gcc -D_WIN32_WINNT=0x0601 -DTCL_UTF_MAX=3" + CXX="x86_64-w64-mingw32-g++ -D_WIN32_WINNT=0x0601 -DTCL_UTF_MAX=3" RC="x86_64-w64-mingw32-windres" NM="x86_64-w64-mingw32-nm" export STRIP OBJCOPY AR RANLIB CC CXX RC NM else echo using toolchain prefix x86_64-w64-mingw32 STRIP="x86_64-w64-mingw32-strip" OBJCOPY="x86_64-w64-mingw32-objcopy" AR="x86_64-w64-mingw32-ar" RANLIB="x86_64-w64-mingw32-ranlib" - CC="x86_64-w64-mingw32-gcc -D_WIN32_WINNT=0x0601 -DWINVER=0x0601 -DTCL_UTF_MAX=6" - CXX="x86_64-w64-mingw32-g++ -D_WIN32_WINT=0x0601 -DWINVER=0x0601 -DTCL_UTF_MAX=6" + CC="x86_64-w64-mingw32-gcc -D_WIN32_WINNT=0x0601 -DWINVER=0x0601 -DTCL_UTF_MAX=3" + CXX="x86_64-w64-mingw32-g++ -D_WIN32_WINT=0x0601 -DWINVER=0x0601 -DTCL_UTF_MAX=3" RC="x86_64-w64-mingw32-windres" NM="x86_64-w64-mingw32-nm" export STRIP OBJCOPY AR RANLIB CC CXX RC NM fi Index: undroid/ck8.x/ck.h ================================================================== --- undroid/ck8.x/ck.h +++ undroid/ck8.x/ck.h @@ -44,16 +44,10 @@ #elif (TCL_MINOR_VERSION == 1) #define CK_VERSION "8.1" #define CK_MINOR_VERSION 1 #else #error unsupported Tcl minor version -#endif - -#ifdef TCL_UTF_MAX -#if TCL_UTF_MAX == 4 -#error TCL_UTF_MAX=4 is unsupported -#endif #endif #ifndef RESOURCE_INCLUDED #ifdef __STDC__ Index: undroid/ck8.x/ckBind.c ================================================================== --- undroid/ck8.x/ckBind.c +++ undroid/ck8.x/ckBind.c @@ -1417,12 +1417,26 @@ } else { numStorage[numChars++] = eventPtr->key.keycode; } } if (eventPtr->key.is_uch) { +#if TCL_UTF_MAX == 3 + int uch = eventPtr->key.uch; + + numChars = 0; + if (uch >= 0x10000) { + uch -= 0x10000; + numChars += Tcl_UniCharToUtf((uch>>10) | 0xd800, + numStorage); + uch = (uch&0x3ff) | 0xdc00; + } + numChars += Tcl_UniCharToUtf(uch, + numStorage + numChars); +#else numChars = Tcl_UniCharToUtf(eventPtr->key.uch, numStorage); +#endif } numStorage[numChars] = '\0'; string = numStorage; } else if (eventPtr->type == CK_EV_BARCODE) { string = CkGetBarcodeData(winPtr->mainPtr); Index: undroid/ck8.x/ckEntry.c ================================================================== --- undroid/ck8.x/ckEntry.c +++ undroid/ck8.x/ckEntry.c @@ -566,10 +566,27 @@ break; case CK_SCROLL_UNITS: index += count; break; } +#if TCL_UTF_MAX == 3 + /* + * Adjust to begin of surrogate pair. + */ + if (index > 0) { + char *cPtr = Tcl_UtfAtIndex(entryPtr->string, index); + Tcl_UniChar ch; + + Tcl_UtfToUniChar(cPtr, &ch); + if ((ch & 0xfc00) == 0xdc00) { + cPtr = Tcl_UtfPrev(cPtr, entryPtr->string); + Tcl_UtfToUniChar(cPtr, &ch); + if ((ch & 0xfc00) == 0xd800) + index--; + } + } +#endif } if (index >= entryPtr->numChars) { index = entryPtr->numChars-1; } if (index < 0) { @@ -918,16 +935,16 @@ if (entryPtr->displayString != NULL) { ckfree(entryPtr->displayString); entryPtr->displayString = NULL; } if (entryPtr->showChar != NULL) { - int ulen; + int nc, ulen; - entryPtr->displayString = (char *) ckalloc(entryPtr->numChars * 3 + 1); + nc = entryPtr->numChars; ulen = Tcl_UtfNext(entryPtr->showChar) - entryPtr->showChar; - for (p = entryPtr->displayString, i = entryPtr->numChars; i > 0; - i--) { + entryPtr->displayString = (char *) ckalloc(nc * ulen + 1); + for (p = entryPtr->displayString, i = nc; i > 0; i--) { memcpy(p, entryPtr->showChar, ulen); p += ulen; } *p = 0; displayString = entryPtr->displayString; @@ -978,10 +995,29 @@ if (entryPtr->leftIndex > maxOffScreen) { entryPtr->leftIndex = maxOffScreen; } leftIndex = Tcl_UtfAtIndex(displayString, entryPtr->leftIndex) - displayString; +#if TCL_UTF_MAX == 3 + /* + * Adjust to begin of surrogate pair. + */ + if (entryPtr->leftIndex > 0) { + char *cPtr = displayString + leftIndex; + Tcl_UniChar ch; + + Tcl_UtfToUniChar(cPtr, &ch); + if ((ch & 0xfc00) == 0xdc00) { + cPtr = Tcl_UtfPrev(cPtr, displayString); + Tcl_UtfToUniChar(cPtr, &ch); + if ((ch & 0xfc00) == 0xd800) { + entryPtr->leftIndex--; + leftIndex = cPtr - displayString; + } + } + } +#endif CkMeasureChars(winPtr->mainPtr, displayString, leftIndex, 0, INT_MAX, 0, CK_NEWLINES_NOT_SPECIAL|CK_PARTIAL_OK, &rightX, &dummy); entryPtr->leftX = 0; entryPtr->tabOrigin = entryPtr->leftX - rightX; @@ -1024,25 +1060,64 @@ * string). */ { int length, clength; char *new; int inspos; +#if TCL_UTF_MAX == 3 + Tcl_UniChar ch; +#endif length = strlen(string); if (length == 0) { return; } clength = Tcl_NumUtfChars(string, -1); new = (char *) ckalloc((unsigned) (entryPtr->numBytes + length + 1)); inspos = Tcl_UtfAtIndex(entryPtr->string, index) - entryPtr->string; +#if TCL_UTF_MAX == 3 + if (length) { + char *prevPtr; + + Tcl_UtfToUniChar(string + length, &ch); + if ((ch & 0xfc00) == 0xdc00) { + prevPtr = Tcl_UtfPrev(string + length, string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xfc00) == 0xd800) { + length = prevPtr - string; + index -= 1; + } + } + } +#endif strncpy(new, entryPtr->string, (size_t) inspos); strcpy(new+inspos, string); strcpy(new+inspos+length, entryPtr->string+inspos); ckfree(entryPtr->string); entryPtr->string = new; entryPtr->numChars += clength; entryPtr->numBytes += length; + +#if TCL_UTF_MAX == 3 + /* + * Account for high surrogate at end of inserted string. + */ + if (length > 1) { + char *lastPtr = Tcl_UtfPrev(new+inspos+length, new); + + Tcl_UtfToUniChar(lastPtr, &ch); + if ((ch & 0xfc00) == 0xd800) { + /* + * A high surrogate. If followed by low surrogate, + * adjust index after it. + */ + lastPtr = Tcl_UtfNext(lastPtr); + Tcl_UtfToUniChar(lastPtr, &ch); + if ((ch & 0xfc00) == 0xdc00) + index++; + } + } +#endif /* * Inserting characters invalidates all indexes into the string. * Touch up the indexes so that they still refer to the same * characters (at new positions). When updating the selection @@ -1106,13 +1181,64 @@ } if (count <= 0) { return; } +#if TCL_UTF_MAX == 3 delpos = Tcl_UtfAtIndex(entryPtr->string, index) - entryPtr->string; + + /* + * Delete complete surrogate pairs. + */ + if (delpos >= 0) { + Tcl_UniChar ch; + char *prevPtr, *nextPtr; + + Tcl_UtfToUniChar(entryPtr->string + delpos, &ch); + if ((ch & 0xfc00) == 0xdc00) { + prevPtr = Tcl_UtfPrev(entryPtr->string + delpos, entryPtr->string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xfc00) == 0xd800) { + delpos = prevPtr - entryPtr->string; + count++; + index--; + } + } else if ((count == 1) && ((ch & 0xfc00) == 0xd800)) { + nextPtr = Tcl_UtfNext(entryPtr->string + delpos); + Tcl_UtfToUniChar(nextPtr, &ch); + if ((ch & 0xfc00) == 0xdc00) { + count++; + } + } + if ((index + count) > entryPtr->numChars) { + count = entryPtr->numChars - index; + } + } +#endif + delcount = Tcl_UtfAtIndex(entryPtr->string + delpos, count) - (entryPtr->string + delpos); + +#if TCL_UTF_MAX == 3 + if (delcount) { + int len; + Tcl_UniChar ch; + char *prevPtr; + + len = Tcl_UtfToUniChar(entryPtr->string + delpos + delcount, &ch); + if ((ch & 0xfc00) == 0xdc00) { + prevPtr = Tcl_UtfPrev(entryPtr->string + delpos + delcount, + entryPtr->string); + Tcl_UtfToUniChar(prevPtr, &ch); + if ((ch & 0xfc00) == 0xd800) { + delcount += len; + count++; + } + } + } +#endif + new = (char *) ckalloc((unsigned) (entryPtr->numBytes + 1 - delcount)); strncpy(new, entryPtr->string, (size_t) delpos); strcpy(new+delpos, entryPtr->string+delpos+delcount); entryPtr->numChars = Tcl_NumUtfChars(new, -1); entryPtr->numBytes = strlen(new); @@ -1293,10 +1419,14 @@ int *indexPtr) /* Where to store converted index. */ { size_t length; int dummy; CkWindow *winPtr = entryPtr->winPtr; + int roundUp = 0; +#if TCL_UTF_MAX == 3 + int oldInsertPos = entryPtr->insertPos; +#endif length = strlen(string); if (string[0] == 'a') { if (strncmp(string, "anchor", length) == 0) { @@ -1340,11 +1470,11 @@ *indexPtr = entryPtr->selectLast; } else { goto badIndex; } } else if (string[0] == '@') { - int x, roundUp; + int x; if (Tcl_GetInt(interp, string+1, &x) != TCL_OK) { goto badIndex; } if (x < 0) { @@ -1381,11 +1511,39 @@ if (*indexPtr < 0){ *indexPtr = 0; } else if (*indexPtr > entryPtr->numChars) { *indexPtr = entryPtr->numChars; } +#if TCL_UTF_MAX == 3 + roundUp = (indexPtr == &entryPtr->insertPos) && + (*indexPtr == (oldInsertPos + 1)); +#endif + } + +#if TCL_UTF_MAX == 3 + /* + * Enforce index on start or end of surrogate pair. + */ + if (*indexPtr) { + Tcl_UniChar ch; + + string = Tcl_UtfAtIndex(entryPtr->string, *indexPtr); + Tcl_UtfToUniChar(string, &ch); + if ((ch & 0xfc00) == 0xdc00) { + if (roundUp) { + *indexPtr += 1; + } else { + string = Tcl_UtfPrev(string, entryPtr->string); + Tcl_UtfToUniChar(string, &ch); + if ((ch & 0xfc00) == 0xd800) { + *indexPtr -= 1; + } + } + } } +#endif + return TCL_OK; } /* *---------------------------------------------------------------------- @@ -1419,10 +1577,30 @@ if (entryPtr->selectAnchor > entryPtr->numChars) { entryPtr->selectAnchor = entryPtr->numChars; } if (entryPtr->selectAnchor <= index) { +#if TCL_UTF_MAX == 3 + /* + * Correct ending point for surrogate pair. + */ + Tcl_UniChar ch; + char *string; + + string = Tcl_UtfAtIndex(entryPtr->string, index); + string += Tcl_UtfToUniChar(string, &ch); + if (((ch & 0xfc00) == 0xdc00) && (index + 1 < entryPtr->numChars)) { + index += 1; + } else if (((ch & 0xfc00) == 0xd800) && + (index + 1 < entryPtr->numChars) && + (index == entryPtr->insertPos)) { + Tcl_UtfToUniChar(string, &ch); + if ((ch & 0xfc00) == 0xdc00) { + index += 2; + } + } +#endif newFirst = entryPtr->selectAnchor; newLast = index; } else { newFirst = index; newLast = entryPtr->selectAnchor; Index: undroid/ck8.x/ckEvent.c ================================================================== --- undroid/ck8.x/ckEvent.c +++ undroid/ck8.x/ckEvent.c @@ -702,21 +702,25 @@ } } nodelay(curscr, TRUE); done_uc: ucbuf[ucp] = '\0'; -#if TCL_UTF_MAX == 4 - { - int n; - - n = Tcl_UtfToUniChar(ucbuf, &uch); - ch = uch; - if (n == 0) { - Tcl_UtfToUniChar(ucbuf, &uch); - ch = (((ch&0x3ff)<<10) | (uch&0x3ff)) + 0x10000; +#if TCL_UTF_MAX == 3 + if ((ucp > 3) && ((ucbuf[0] & 0xff) < 0xf8)) { + if (((ucbuf[1] & 0xc0) == 0x80) && + ((ucbuf[2] & 0xc0) == 0x80) && + ((ucbuf[3] & 0xc0) == 0x80)) { + ch = ((ucbuf[0] & 0x0f) << 18) | + ((ucbuf[1] & 0x3f) << 12) | + ((ucbuf[2] & 0x3f) << 6) | + (ucbuf[3] & 0x3f); + goto decDone; } } + Tcl_UtfToUniChar(ucbuf, &uch); + ch = uch; +decDone: #else Tcl_UtfToUniChar(ucbuf, &uch); ch = uch; #endif code = 0; Index: undroid/ck8.x/ckText.c ================================================================== --- undroid/ck8.x/ckText.c +++ undroid/ck8.x/ckText.c @@ -458,11 +458,11 @@ segPtr->body.chars[last] = 0; Tcl_AppendResult(interp, segPtr->body.chars + offset, (char *) NULL); segPtr->body.chars[last] = savedChar; } - CkTextIndexForwChars(&index1, last-offset, &index1); + CkTextIndexForwBytes(&index1, last-offset, &index1); } } else if ((c == 'i') && (strncmp(argv[1], "index", length) == 0) && (length >= 3)) { char buffer[64]; @@ -499,11 +499,11 @@ } if (textPtr->state == ckTextNormalUid) { for (j = 3; j < argc; j += 2) { InsertChars(textPtr, &index1, argv[j]); if (argc > (j+1)) { - CkTextIndexForwChars(&index1, (int) strlen(argv[j]), + CkTextIndexForwBytes(&index1, (int) strlen(argv[j]), &index2); oldTagArrayPtr = CkBTreeGetTags(&index1, &numTags); if (oldTagArrayPtr != NULL) { for (i = 0; i < numTags; i++) { CkBTreeTag(&index1, &index2, oldTagArrayPtr[i], 0); Index: undroid/ck8.x/ckTextIndex.c ================================================================== --- undroid/ck8.x/ckTextIndex.c +++ undroid/ck8.x/ckTextIndex.c @@ -824,10 +824,22 @@ for ( ; segPtr != NULL; segPtr = segPtr->nextPtr) { if (segPtr->typePtr == &ckTextCharType) { start = segPtr->body.chars + byteOffset; end = segPtr->body.chars + segPtr->size; for (p = start; p < end; p += Tcl_UtfToUniChar(p, &ch)) { +#if TCL_UTF_MAX == 3 + if (((ch & 0xfc00) == 0xd800) && (p < end)) { + char *pp = p; + + pp += Tcl_UtfToUniChar(pp, &ch); + if ((ch & 0xfc00) == 0xdc00) { + p = pp; + if (count > 0) + count--; + } + } +#endif if (count == 0) { dstPtr->charIndex += (p - start); return; } count--; @@ -981,10 +993,27 @@ while (1) { if (segPtr->typePtr == &ckTextCharType) { start = segPtr->body.chars; end = segPtr->body.chars + segSize; for (p = end; ; p = Tcl_UtfPrev(p, start)) { +#if TCL_UTF_MAX == 3 + Tcl_UniChar ch; + + Tcl_UtfToUniChar(p, &ch); + if (((ch & 0xfc00) == 0xdc00) && (p > start)) { + char *pp = Tcl_UtfPrev(p, start); + + if (pp != NULL) { + Tcl_UtfToUniChar(pp, &ch); + if ((ch & 0xfc00) == 0xd800) { + p = pp; + if (count > 0) + count--; + } + } + } +#endif if (count == 0) { dstPtr->charIndex -= (end - p); return; } if (p == start) { Index: undroid/ck8.x/ckUtil.c ================================================================== --- undroid/ck8.x/ckUtil.c +++ undroid/ck8.x/ckUtil.c @@ -498,10 +498,105 @@ } /* *-------------------------------------------------------------- * + * NumUtfChars, UtfToUniChar, UtfAtIndex -- + * + * Wrappers for Tcl_NumUtfChars(), Tcl_UtfToUniChar(), and + * Tcl_UtAtIndex() counting/producing 32 bit codepoints. + * + *-------------------------------------------------------------- + */ + +static int +NumUtfChars(buf, len) + char *buf; + int len; +{ + Tcl_UniChar ch; + char *end; + int n, i = 0; + + if (len < 0) { + len = strlen(buf); + } + end = buf + len; + while (buf < end) { + n = Tcl_UtfToUniChar(buf, &ch); +#if TCL_UTF_MAX == 3 + if ((ch & 0xfc00) == 0xd800) { + int n2; + Tcl_UniChar ch2; + + n2 = Tcl_UtfToUniChar(buf + n, &ch2); + if ((ch2 & 0xfc00) == 0xdc00) + buf += n2; + } +#endif + buf += n; + i++; + } + return i; +} + +static int +UtfToUniChar(buf, chPtr) + char *buf; + int *chPtr; +{ + int len; + Tcl_UniChar ch; + + len = Tcl_UtfToUniChar(buf, &ch); +#if TCL_UTF_MAX == 3 + if ((ch & 0xfc00) == 0xd800) { + int len2; + Tcl_UniChar ch2; + + len2 = Tcl_UtfToUniChar(buf + len, &ch2); + if ((ch2 & 0xfc00) == 0xdc00) { + *chPtr = (((ch & 0x3ff) << 10) | (ch2 & 0x3ff)) + 0x10000; + len += len2; + return len; + } + } +#endif + *chPtr = ch; + return len; +} + +static CONST char * +UtfAtIndex(src, index) + CONST char *src; + int index; +{ + int len; + Tcl_UniChar ch = 0; + + while (index-- > 0) { + len = Tcl_UtfToUniChar(src, &ch); +#if TCL_UTF_MAX == 3 + if ((ch & 0xfc00) == 0xd800) { + int len2; + Tcl_UniChar ch2; + + len2 = Tcl_UtfToUniChar(src + len, &ch2); + if ((ch2 & 0xfc00) == 0xdc00) { + len += len2; + index++; + } + } +#endif + src += len; + } + return src; +} + +/* + *-------------------------------------------------------------- + * * MakeUCRepl -- * * Make replacement for unprintable chars. * *-------------------------------------------------------------- @@ -612,12 +707,11 @@ int termX; /* X-position just after term. */ int curX; /* X-position corresponding to p. */ int newX; /* X-position corresponding to p+1. */ int rem; int nChars = 0; - int n, m, srcRead, dstWrote, dstChars; - Tcl_UniChar uch; + int uch, n, m, srcRead, dstWrote, dstChars; char buf[TCL_UTF_MAX*2], buf2[TCL_UTF_MAX*2]; /* * Scan the input string one character at a time, until a character * is found that crosses maxX. @@ -627,11 +721,11 @@ termX = 0; term = source; for (p = source; *p != '\0' && maxChars > 0;) { char *p2; - n = Tcl_UtfToUniChar(p, &uch); + n = UtfToUniChar(p, &uch); p2 = p + n; ++nChars; maxChars -= n; m = Tcl_UniCharToUtf(uch, buf); if (mainPtr->isoEncoding) { @@ -690,11 +784,11 @@ if (newX > maxX) { break; } p = p2; if (maxChars > 1) { - n = Tcl_UtfToUniChar(p, &uch); + n = UtfToUniChar(p, &uch); p2 = p + n; m = Tcl_UniCharToUtf(uch, buf); if (mainPtr->isoEncoding) { buf2[0] = '\0'; Tcl_UtfToExternal(NULL, mainPtr->isoEncoding, buf, m, @@ -724,21 +818,21 @@ */ if ((flags & CK_PARTIAL_OK) && (curX < maxX)) { curX = newX; if (*p) { - n = Tcl_UtfToUniChar(p, &uch); + n = UtfToUniChar(p, &uch); p += n; ++nChars; } } if ((flags & CK_AT_LEAST_ONE) && (term == source) && (maxChars > 0) && !isspace((unsigned char) *term)) { term = p; termX = curX; if (term == source) { - n = Tcl_UtfToUniChar(term, &uch); + n = UtfToUniChar(term, &uch); term += n; ++nChars; } } else if ((maxChars == 0) || !(flags & CK_WHOLE_WORDS)) { term = p; @@ -796,26 +890,25 @@ * character. */ getmaxyx(window, dummy, maxX); p = string; - nc = Tcl_NumUtfChars(p, numChars); + nc = NumUtfChars(p, numChars); if (nc > maxX - x) - numChars = Tcl_UtfAtIndex(p, maxX - x) - p; + numChars = UtfAtIndex(p, maxX - x) - p; else numChars = nc; if (numChars > maxX - x) numChars = maxX - x; startX = curX = x; wmove(window, y, (x > 0) ? x : 0); for (; numChars > 0; numChars--, p += nc) { - Tcl_UniChar uch; - int len; + int uch, len; if (*p == '\0') break; - nc = Tcl_UtfToUniChar(p, &uch); + nc = UtfToUniChar(p, &uch); if (mainPtr->isoEncoding) { int srcRead, dstWrote, dstChars; char buf[TCL_UTF_MAX*2]; buf[0] = '\0'; @@ -973,26 +1066,25 @@ */ count = 0; getmaxyx(window, dummy, maxX); p = string; - nc = Tcl_NumUtfChars(p, numChars); + nc = NumUtfChars(p, numChars); if (nc > maxX - x) - numChars = Tcl_UtfAtIndex(p, maxX - x) - p; + numChars = UtfAtIndex(p, maxX - x) - p; else numChars = nc; if (numChars > maxX - x) numChars = maxX - x; startX = curX = x; wmove(window, y, (x > 0) ? x : 0); for (; numChars > 0 && count <= last; numChars -= nc, count++, p += nc) { - Tcl_UniChar uch; - int len; + int uch, len; if (*p == '\0') break; - nc = Tcl_UtfToUniChar(p, &uch); + nc = UtfToUniChar(p, &uch); if (mainPtr->isoEncoding) { int srcRead, dstWrote, dstChars; char buf[TCL_UTF_MAX*2]; buf[0] = '\0'; Index: undroid/ck8.x/library/text.tcl ================================================================== --- undroid/ck8.x/library/text.tcl +++ undroid/ck8.x/library/text.tcl @@ -273,10 +273,15 @@ set ckPriv(char) $char } set new [$w index [expr {$line + $n}].$ckPriv(char)] if {[$w compare $new == end] || [$w compare $new == "insert linestart"]} { set new $i + } else { + scan [$w get $new] "%c" char + if {($char & 0xfffffc00) == 0xdc00} { + set new [$w index "$new + 1c"] + } } set ckPriv(prevPos) $new return $new } Index: undroid/tclbsd/generic/bsd.c ================================================================== --- undroid/tclbsd/generic/bsd.c +++ undroid/tclbsd/generic/bsd.c @@ -661,22 +661,26 @@ Tcl_Obj *const objv[]; { char *path; Tcl_Obj *resultObj = Tcl_GetObjResult (interp); struct statfs statfsbuf; + Tcl_DString ds; if (objc != 2) { Tcl_WrongNumArgs(interp, 1, objv, "path"); return TCL_ERROR; } path = Tcl_GetStringFromObj (objv[1], NULL); + path = Tcl_UtfToExternalDString (NULL, path, -1, &ds); if (statfs (path, &statfsbuf) < 0) { + Tcl_DStringFree (&ds); Tcl_SetStringObj (resultObj, Tcl_PosixError (interp), -1); return TCL_ERROR; } + Tcl_DStringFree (&ds); return StatfsBufToList (interp, resultObj, &statfsbuf); } /*----------------------------------------------------------------------------- @@ -829,11 +833,12 @@ Tcl_Interp *interp; int objc; Tcl_Obj *const objv[]; { #ifdef HAVE_SETPROCTITLE - char *titleString; + char *titleString; + Tcl_DString ds; #endif if (objc > 2) { Tcl_WrongNumArgs (interp, 1, objv, "[string]"); return TCL_ERROR; @@ -842,13 +847,18 @@ #ifdef HAVE_SETPROCTITLE if (objc == 1) { titleString = NULL; } else { titleString = Tcl_GetString (objv[1]); + titleString = Tcl_UtfToExternalDString (NULL, titleString, -1, &ds); } setproctitle ("-%s", titleString); + + if (titleString != NULL) { + Tcl_DStringFree (&ds); + } #endif return TCL_OK; } /*----------------------------------------------------------------------------- Index: undroid/tclbsd/generic/bsdsyslog.c ================================================================== --- undroid/tclbsd/generic/bsdsyslog.c +++ undroid/tclbsd/generic/bsdsyslog.c @@ -132,11 +132,12 @@ return TCL_ERROR; } switch ((enum options) optIndex) { case OPT_LOG: { - int priority; + int priority; + Tcl_DString ds; if (objc != 4) { Tcl_WrongNumArgs (interp, 2, objv, "priority message"); return TCL_ERROR; } @@ -144,11 +145,13 @@ priority = GetSyslogPriority (interp, Tcl_GetString (objv[2]), TCLBSD_LOG_ERROR); if (priority == -1) { return TCL_ERROR; } - syslog (priority, "%s", Tcl_GetString (objv[3])); + syslog (priority, "%s", Tcl_UtfToExternalDString (NULL, + Tcl_GetString (objv[3]), -1, &ds)); + Tcl_DStringFree (&ds); break; } case OPT_OPEN: { int facility; @@ -156,10 +159,12 @@ int logopt = 0; int logoptIndex; int logoptObjc; int i; Tcl_Obj **logoptObjv; + + static Tcl_DString identDs; static const char *logopts[] = { "console", "no_delay", "perror", "pid", (char *)NULL }; @@ -168,17 +173,10 @@ if (objc != 5) { Tcl_WrongNumArgs (interp, 2, objv, "ident logopt facility"); return TCL_ERROR; } - /* OK, ident needs to be a const char *, i.e. it needs to not change - * behind openlog/syslog's back. How shall we implement that? - * Should we malloc and copy? Nah, let's just increment the - * reference count on the object so that Tcl will leave it the - * heck alone. -kl - */ - Tcl_IncrRefCount (objv[2]); ident = Tcl_GetString (objv[2]); if (Tcl_ListObjGetElements (interp, objv[3], &logoptObjc, &logoptObjv) == TCL_ERROR) { Tcl_AppendResult (interp, " while getting list of log options", NULL); return TCL_ERROR; @@ -194,10 +192,11 @@ facility = GetSyslogFacility (interp, Tcl_GetString (objv[4]), TCLBSD_LOG_ERROR); if (facility == -1) { return TCL_ERROR; } + ident = Tcl_UtfToExternalDString (NULL, ident, -1, &identDs); openlog (ident, logopt, facility); break; } case OPT_CLOSE: Index: undroid/twapi/twapi/base/errors.c ================================================================== --- undroid/twapi/twapi/base/errors.c +++ undroid/twapi/twapi/base/errors.c @@ -349,16 +349,16 @@ /* Third element of error code is also the message */ if (ObjListIndex(NULL, errorCodeObj, 2, &msgObj) == TCL_OK && msgObj != NULL) { Tcl_Obj *resultObj = ObjDuplicate(ObjGetResult(interp)); if (ObjCharLength(resultObj)) { -#if TCL_UTF_MAX <= 4 - Tcl_AppendUnicodeToObj(resultObj, L" ", 1); -#else +#if TCL_UTF_MAX > 3 /* Tcl_UniChar is int. So cannot use AppendUnicode. Have to force a shimmer to string */ Tcl_AppendToObj(resultObj, " ", 1); +#else + Tcl_AppendUnicodeToObj(resultObj, L" ", 1); #endif } Tcl_AppendObjToObj(resultObj, msgObj); (void) ObjSetResult(interp, resultObj); } Index: undroid/twapi/twapi/base/tclobjs.c ================================================================== --- undroid/twapi/twapi/base/tclobjs.c +++ undroid/twapi/twapi/base/tclobjs.c @@ -4379,34 +4379,34 @@ TWAPI_EXTERN Tcl_Obj *ObjFromTclUniCharN(const Tcl_UniChar *ws, int len) { if (ws == NULL) return ObjFromEmptyString(); -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX > 3 + return Tcl_NewUnicodeObj(ws, len); +#else TWAPI_ASSERT(sizeof(Tcl_UniChar) == sizeof(WCHAR)); if (gBaseSettings.use_unicode_obj) return Tcl_NewUnicodeObj(ws, len); else return TwapiUtf8ObjFromWinChars(ws, len); -#else - return Tcl_NewUnicodeObj(ws, len); #endif } TWAPI_EXTERN Tcl_Obj *ObjFromTclUniChar(const Tcl_UniChar *ws) { if (ws == NULL) return ObjFromEmptyString(); /* TBD - log ? */ -#if TCL_UTF_MAX <= 4 +#if TCL_UTF_MAX > 3 + return Tcl_NewUnicodeObj(ws, -1); +#else TWAPI_ASSERT(sizeof(Tcl_UniChar) == sizeof(WCHAR)); if (gBaseSettings.use_unicode_obj) return Tcl_NewUnicodeObj(ws, -1); else return TwapiUtf8ObjFromWinChars(ws, -1); -#else - return Tcl_NewUnicodeObj(ws, -1); #endif } TWAPI_EXTERN char *ObjToString(Tcl_Obj *objP) { Index: undroid/twapi/twapi/base/winchars.c ================================================================== --- undroid/twapi/twapi/base/winchars.c +++ undroid/twapi/twapi/base/winchars.c @@ -13,11 +13,11 @@ * sizeof(Tcl_UniChar) != sizeof(WCHAR). Otherwise the Obj{To,From}WinChars * are inlined as Obj{To,From}TclUniChar in twapi.h * * TWAPI_FORCE_WINCHARS is used for testing purposes. */ -#if (TCL_UTF_MAX > 4) || defined(TWAPI_FORCE_WINCHARS) +#if (TCL_UTF_MAX > 3) || defined(TWAPI_FORCE_WINCHARS) /* * Implements a new Tcl_Obj intrep to hold WCHARs. */ Index: undroid/twapi/twapi/include/twapi.h ================================================================== --- undroid/twapi/twapi/include/twapi.h +++ undroid/twapi/twapi/include/twapi.h @@ -1521,11 +1521,11 @@ TWAPI_EXTERN Tcl_UniChar *ObjToTclUniCharN(Tcl_Obj *objP, int *lenP); TWAPI_EXTERN Tcl_UniChar *ObjToTclUniCharDW(Tcl_Obj *objP, DWORD *lenP); TWAPI_EXTERN Tcl_Obj *ObjFromTclUniCharN(const Tcl_UniChar *ws, int len); TWAPI_EXTERN Tcl_Obj *ObjFromTclUniChar(const Tcl_UniChar *ws); -#if (TCL_UTF_MAX > 4) || defined(TWAPI_FORCE_WINCHARS) +#if (TCL_UTF_MAX > 3) || defined(TWAPI_FORCE_WINCHARS) TWAPI_EXTERN WCHAR *ObjToWinChars(Tcl_Obj *objP); TWAPI_EXTERN WCHAR *ObjToWinCharsN(Tcl_Obj *objP, int *lenP); TWAPI_EXTERN WCHAR *ObjToWinCharsDW(Tcl_Obj *objP, DWORD *lenP); TWAPI_EXTERN Tcl_Obj *ObjFromWinCharsN(const WCHAR *ws, int len); TWAPI_EXTERN Tcl_Obj *ObjFromWinChars(const WCHAR *ws); Index: undroid/twapi/twapi/input/input.c ================================================================== --- undroid/twapi/twapi/input/input.c +++ undroid/twapi/twapi/input/input.c @@ -210,29 +210,27 @@ /* Now loop through every character adding it to the input event array */ /* Win2K and up, accepts unicode characters */ /* NUmber of events is twice number of chars (keydown + keyup) */ -#if TCL_UTF_MAX < 4 - max_input_records = 2 * num_chars; +#if TCL_UTF_MAX > 3 + max_input_records = 4 * num_chars; #else - max_input_records = 4 * num_chars; + max_input_records = 2 * num_chars; #endif input = MemLifoAlloc(ticP->memlifoP, max_input_records * sizeof(*input), NULL); for (i = 0, j = 0; i < num_chars; ++i) { WCHAR wch; -#if TCL_UTF_MAX > 4 +#if TCL_UTF_MAX > 3 Tcl_UniChar uch; #endif #ifndef KEYEVENTF_UNICODE #define KEYEVENTF_UNICODE 0x0004 #endif -#if TCL_UTF_MAX <= 4 - wch = Tcl_GetUniChar(input_obj, i); -#else +#if TCL_UTF_MAX > 3 uch = Tcl_GetUniChar(input_obj, i); if (uch > 0xFFFF) { wch = (((uch - 0x10000) >> 10) & 0x3FF) | 0xD800; init_keyboard_input(&input[j], 0, KEYEVENTF_UNICODE); input[j].ki.wScan = wch; @@ -242,10 +240,12 @@ j++; wch = ((uch - 0x10000) & 0x3FF) | 0xDC00; } else { wch = (WCHAR) uch; } +#else + wch = Tcl_GetUniChar(input_obj, i); #endif init_keyboard_input(&input[j], 0, KEYEVENTF_UNICODE); input[j].ki.wScan = wch; ++j; init_keyboard_input(&input[j], 0, KEYEVENTF_UNICODE|KEYEVENTF_KEYUP); Index: undroid/twapi/twapi/storage/dirmonitor.c ================================================================== --- undroid/twapi/twapi/storage/dirmonitor.c +++ undroid/twapi/twapi/storage/dirmonitor.c @@ -723,13 +723,11 @@ include = 1; ++pattern; } else include = 1; /* Default is inclusive pattern */ -#if TCL_UTF_MAX <= 4 - return Tcl_UniCharCaseMatch(path, pattern, 1) ? include : 0; -#else +#if TCL_UTF_MAX > 3 { Tcl_Obj *patObj, *pathObj; Tcl_UniChar *pathuni, *patuni; int match_result; patObj = ObjFromWinChars(pattern); @@ -739,7 +737,9 @@ match_result = Tcl_UniCharCaseMatch(pathuni, patuni, 1) ? include : 0; ObjDecrRefs(patObj); ObjDecrRefs(pathObj); return match_result; } +#else + return Tcl_UniCharCaseMatch(path, pattern, 1) ? include : 0; #endif }