diff options
Diffstat (limited to 'thirdparty/icu4c/common/unistr.cpp')
-rw-r--r-- | thirdparty/icu4c/common/unistr.cpp | 163 |
1 files changed, 113 insertions, 50 deletions
diff --git a/thirdparty/icu4c/common/unistr.cpp b/thirdparty/icu4c/common/unistr.cpp index ce81138c2c..a720245772 100644 --- a/thirdparty/icu4c/common/unistr.cpp +++ b/thirdparty/icu4c/common/unistr.cpp @@ -20,6 +20,8 @@ ****************************************************************************** */ +#include <string_view> + #include "unicode/utypes.h" #include "unicode/appendable.h" #include "unicode/putil.h" @@ -107,12 +109,34 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) UnicodeString U_EXPORT2 operator+ (const UnicodeString &s1, const UnicodeString &s2) { - return - UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). - append(s1). - append(s2); + int32_t sumLengths; + if (uprv_add32_overflow(s1.length(), s2.length(), &sumLengths)) { + UnicodeString bogus; + bogus.setToBogus(); + return bogus; + } + if (sumLengths != INT32_MAX) { + ++sumLengths; // space for a terminating NUL if we need one + } + return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2); +} + +U_COMMON_API UnicodeString U_EXPORT2 +unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2) { + int32_t sumLengths; + if (s2.length() > INT32_MAX || + uprv_add32_overflow(s1.length(), static_cast<int32_t>(s2.length()), &sumLengths)) { + UnicodeString bogus; + bogus.setToBogus(); + return bogus; + } + if (sumLengths != INT32_MAX) { + ++sumLengths; // space for a terminating NUL if we need one + } + return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2); } + //======================================== // Reference Counting functions, put at top of file so that optimizing compilers // have a chance to automatically inline. @@ -120,23 +144,23 @@ operator+ (const UnicodeString &s1, const UnicodeString &s2) { void UnicodeString::addRef() { - umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1); + umtx_atomic_inc(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1); } int32_t UnicodeString::removeRef() { - return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1); + return umtx_atomic_dec(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1); } int32_t UnicodeString::refCount() const { - return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1)); + return umtx_loadAcquire(*(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1)); } void UnicodeString::releaseArray() { if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) { - uprv_free((int32_t *)fUnion.fFields.fArray - 1); + uprv_free(reinterpret_cast<int32_t*>(fUnion.fFields.fArray) - 1); } } @@ -150,7 +174,7 @@ UnicodeString::releaseArray() { UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) { fUnion.fFields.fLengthAndFlags = 0; - if(count <= 0 || (uint32_t)c > 0x10ffff) { + if (count <= 0 || static_cast<uint32_t>(c) > 0x10ffff) { // just allocate and do not do anything else allocate(capacity); } else if(c <= 0xffff) { @@ -160,7 +184,7 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) { } if(allocate(capacity)) { char16_t *array = getArrayStart(); - char16_t unit = (char16_t)c; + char16_t unit = static_cast<char16_t>(c); for(int32_t i = 0; i < length; ++i) { array[i] = unit; } @@ -206,11 +230,6 @@ UnicodeString::UnicodeString(UChar32 ch) { } } -UnicodeString::UnicodeString(const char16_t *text) { - fUnion.fFields.fLengthAndFlags = kShortString; - doAppend(text, 0, -1); -} - UnicodeString::UnicodeString(const char16_t *text, int32_t textLength) { fUnion.fFields.fLengthAndFlags = kShortString; @@ -256,7 +275,7 @@ UnicodeString::UnicodeString(char16_t *buff, while(p != limit && *p != 0) { ++p; } - buffLength = (int32_t)(p - buff); + buffLength = static_cast<int32_t>(p - buff); } setArray(buff, buffLength, buffCapacity); } @@ -268,7 +287,7 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) { // treat as an empty string } else { if(length<0) { - length=(int32_t)uprv_strlen(src); + length = static_cast<int32_t>(uprv_strlen(src)); } if(cloneArrayIfNeeded(length, length, false)) { u_charsToUChars(src, getArrayStart(), length); @@ -279,6 +298,26 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) { } } +UnicodeString UnicodeString::readOnlyAliasFromU16StringView(std::u16string_view text) { + UnicodeString result; + if (text.length() <= INT32_MAX) { + result.setTo(false, text.data(), static_cast<int32_t>(text.length())); + } else { + result.setToBogus(); + } + return result; +} + +UnicodeString UnicodeString::readOnlyAliasFromUnicodeString(const UnicodeString &text) { + UnicodeString result; + if (text.isBogus()) { + result.setToBogus(); + } else { + result.setTo(false, text.getBuffer(), text.length()); + } + return result; +} + #if U_CHARSET_IS_UTF8 UnicodeString::UnicodeString(const char *codepageData) { @@ -295,7 +334,7 @@ UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) { return; } if(dataLength == -1) { - dataLength = (int32_t)uprv_strlen(codepageData); + dataLength = static_cast<int32_t>(uprv_strlen(codepageData)); } setToUTF8(StringPiece(codepageData, dataLength)); } @@ -376,18 +415,18 @@ UnicodeString::allocate(int32_t capacity) { ++capacity; // for the NUL // Switch to size_t which is unsigned so that we can allocate up to 4GB. // Reference counter + UChars. - size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR; + size_t numBytes = sizeof(int32_t) + static_cast<size_t>(capacity) * U_SIZEOF_UCHAR; // Round up to a multiple of 16. numBytes = (numBytes + 15) & ~15; - int32_t *array = (int32_t *) uprv_malloc(numBytes); + int32_t* array = static_cast<int32_t*>(uprv_malloc(numBytes)); if(array != nullptr) { // set initial refCount and point behind the refCount *array++ = 1; numBytes -= sizeof(int32_t); // have fArray point to the first char16_t - fUnion.fFields.fArray = (char16_t *)array; - fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR); + fUnion.fFields.fArray = reinterpret_cast<char16_t*>(array); + fUnion.fFields.fCapacity = static_cast<int32_t>(numBytes / U_SIZEOF_UCHAR); fUnion.fFields.fLengthAndFlags = kLongString; return true; } @@ -622,7 +661,7 @@ void UnicodeString::swap(UnicodeString &other) noexcept { //======================================== UnicodeString UnicodeString::unescape() const { - UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity + UnicodeString result(length(), static_cast<UChar32>(0), static_cast<int32_t>(0)); // construct with capacity if (result.isBogus()) { return result; } @@ -656,10 +695,10 @@ UChar32 UnicodeString::unescapeAt(int32_t &offset) const { // Read-only implementation //======================================== UBool -UnicodeString::doEquals(const UnicodeString &text, int32_t len) const { - // Requires: this & text not bogus and have same lengths. +UnicodeString::doEquals(const char16_t *text, int32_t len) const { + // Requires: this not bogus and have same lengths. // Byte-wise comparison works for equality regardless of endianness. - return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0; + return uprv_memcmp(getArrayStart(), text, len * U_SIZEOF_UCHAR) == 0; } UBool @@ -772,9 +811,9 @@ UnicodeString::doCompare( int32_t start, # else // little-endian: compare char16_t units do { - result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); + result = static_cast<int32_t>(*(chars++)) - static_cast<int32_t>(*(srcChars++)); if(result != 0) { - return (int8_t)(result >> 15 | 1); + return static_cast<int8_t>(result >> 15 | 1); } } while(--minLength > 0); # endif @@ -806,7 +845,7 @@ UnicodeString::doCompareCodePointOrder(int32_t start, int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=nullptr)?(srcChars + srcStart):nullptr, srcLength, false, true); /* translate the 32-bit result into an 8-bit one */ if(diff!=0) { - return (int8_t)(diff >> 15 | 1); + return static_cast<int8_t>(diff >> 15 | 1); } else { return 0; } @@ -831,7 +870,7 @@ UChar32 UnicodeString::char32At(int32_t offset) const { int32_t len = length(); - if((uint32_t)offset < (uint32_t)len) { + if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(len)) { const char16_t *array = getArrayStart(); UChar32 c; U16_GET(array, 0, offset, len, c); @@ -843,7 +882,7 @@ UnicodeString::char32At(int32_t offset) const int32_t UnicodeString::getChar32Start(int32_t offset) const { - if((uint32_t)offset < (uint32_t)length()) { + if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) { const char16_t *array = getArrayStart(); U16_SET_CP_START(array, 0, offset); return offset; @@ -855,7 +894,7 @@ UnicodeString::getChar32Start(int32_t offset) const { int32_t UnicodeString::getChar32Limit(int32_t offset) const { int32_t len = length(); - if((uint32_t)offset < (uint32_t)len) { + if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(len)) { const char16_t *array = getArrayStart(); U16_SET_CP_LIMIT(array, 0, offset, len); return offset; @@ -989,7 +1028,7 @@ UnicodeString::extract(int32_t start, int32_t len, if (/*dstSize < 0 || */(dstSize > 0 && target == nullptr)) { return 0; } - return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff); + return toUTF8(start, len, target, dstSize <= 0x7fffffff ? static_cast<int32_t>(dstSize) : 0x7fffffff); } // else see unistr_cnv.cpp @@ -1013,7 +1052,7 @@ UnicodeString::toUTF8(ByteSink &sink) const { int32_t length16 = length(); if(length16 != 0) { char stackBuffer[1024]; - int32_t capacity = (int32_t)sizeof(stackBuffer); + int32_t capacity = static_cast<int32_t>(sizeof(stackBuffer)); UBool utf8IsOwned = false; char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity, 3*length16, @@ -1027,7 +1066,7 @@ UnicodeString::toUTF8(ByteSink &sink) const { nullptr, // Don't care about number of substitutions. &errorCode); if(errorCode == U_BUFFER_OVERFLOW_ERROR) { - utf8 = (char *)uprv_malloc(length8); + utf8 = static_cast<char*>(uprv_malloc(length8)); if(utf8 != nullptr) { utf8IsOwned = true; errorCode = U_ZERO_ERROR; @@ -1089,7 +1128,7 @@ UnicodeString::indexOf(const char16_t *srcChars, if(match == nullptr) { return -1; } else { - return (int32_t)(match - array); + return static_cast<int32_t>(match - array); } } @@ -1107,7 +1146,7 @@ UnicodeString::doIndexOf(char16_t c, if(match == nullptr) { return -1; } else { - return (int32_t)(match - array); + return static_cast<int32_t>(match - array); } } @@ -1124,7 +1163,7 @@ UnicodeString::doIndexOf(UChar32 c, if(match == nullptr) { return -1; } else { - return (int32_t)(match - array); + return static_cast<int32_t>(match - array); } } @@ -1153,7 +1192,7 @@ UnicodeString::lastIndexOf(const char16_t *srcChars, if(match == nullptr) { return -1; } else { - return (int32_t)(match - array); + return static_cast<int32_t>(match - array); } } @@ -1175,7 +1214,7 @@ UnicodeString::doLastIndexOf(char16_t c, if(match == nullptr) { return -1; } else { - return (int32_t)(match - array); + return static_cast<int32_t>(match - array); } } @@ -1192,7 +1231,7 @@ UnicodeString::doLastIndexOf(UChar32 c, if(match == nullptr) { return -1; } else { - return (int32_t)(match - array); + return static_cast<int32_t>(match - array); } } @@ -1329,7 +1368,7 @@ UnicodeString::setTo(UBool isTerminated, textLength = u_strlen(text); } fUnion.fFields.fLengthAndFlags = kReadonlyAlias; - setArray((char16_t *)text, textLength, isTerminated ? textLength + 1 : textLength); + setArray(const_cast<char16_t*>(text), textLength, isTerminated ? textLength + 1 : textLength); return *this; } @@ -1359,7 +1398,7 @@ UnicodeString::setTo(char16_t *buffer, while(p != limit && *p != 0) { ++p; } - buffLength = (int32_t)(p - buffer); + buffLength = static_cast<int32_t>(p - buffer); } releaseArray(); @@ -1574,6 +1613,18 @@ UnicodeString::doReplace(int32_t start, return *this; } +UnicodeString& +UnicodeString::doReplace(int32_t start, int32_t length, std::u16string_view src) { + if (!isWritable()) { + return *this; + } + if (src.length() > INT32_MAX) { + setToBogus(); + return *this; + } + return doReplace(start, length, src.data(), 0, static_cast<int32_t>(src.length())); +} + // Versions of doReplace() only for append() variants. // doReplace() and doAppend() optimize for different cases. @@ -1662,6 +1713,18 @@ UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcL return *this; } +UnicodeString& +UnicodeString::doAppend(std::u16string_view src) { + if (!isWritable() || src.empty()) { + return *this; + } + if (src.length() > INT32_MAX) { + setToBogus(); + return *this; + } + return doAppend(src.data(), 0, static_cast<int32_t>(src.length())); +} + /** * Replaceable API */ @@ -1680,7 +1743,7 @@ UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { if (limit <= start) { return; // Nothing to do; avoid bogus malloc call } - char16_t* text = (char16_t*) uprv_malloc( sizeof(char16_t) * (limit - start) ); + char16_t* text = static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * (limit - start))); // Check to make sure text is not null. if (text != nullptr) { extractBetween(start, limit, text, 0); @@ -1725,13 +1788,13 @@ UnicodeString::doReverse(int32_t start, int32_t length) { // Before the loop we know left<right because length>=2. do { - hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left); - hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right); + hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(swap = *left)); + hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(*left++ = *right)); *right-- = swap; } while(left < right); // Make sure to test the middle code unit of an odd-length string. // Redundant if the length is even. - hasSupplementary |= (UBool)U16_IS_LEAD(*left); + hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(*left)); /* if there are supplementary code points in the reversed range, then re-swap their surrogates */ if(hasSupplementary) { @@ -1834,7 +1897,7 @@ UnicodeString::releaseBuffer(int32_t newLength) { while(p<limit && *p!=0) { ++p; } - newLength=(int32_t)(p-array); + newLength = static_cast<int32_t>(p - array); } else if(newLength>capacity) { newLength=capacity; } @@ -1928,7 +1991,7 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, // release the old array if(flags & kRefCounted) { // the array is refCounted; decrement and release if 0 - u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1); + u_atomic_int32_t* pRefCount = reinterpret_cast<u_atomic_int32_t*>(oldArray) - 1; if(umtx_atomic_dec(pRefCount) == 0) { if (pBufferToDelete == nullptr) { // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t @@ -1937,7 +2000,7 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, uprv_free((void *)pRefCount); } else { // the caller requested to delete it himself - *pBufferToDelete = (int32_t *)pRefCount; + *pBufferToDelete = reinterpret_cast<int32_t*>(pRefCount); } } } |