summaryrefslogtreecommitdiffstats
path: root/thirdparty/icu4c/common/unistr.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/unistr.cpp')
-rw-r--r--thirdparty/icu4c/common/unistr.cpp163
1 files changed, 113 insertions, 50 deletions
diff --git a/thirdparty/icu4c/common/unistr.cpp b/thirdparty/icu4c/common/unistr.cpp
index ce81138c2c..a720245772 100644
--- a/thirdparty/icu4c/common/unistr.cpp
+++ b/thirdparty/icu4c/common/unistr.cpp
@@ -20,6 +20,8 @@
******************************************************************************
*/
+#include <string_view>
+
#include "unicode/utypes.h"
#include "unicode/appendable.h"
#include "unicode/putil.h"
@@ -107,12 +109,34 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
UnicodeString U_EXPORT2
operator+ (const UnicodeString &s1, const UnicodeString &s2) {
- return
- UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
- append(s1).
- append(s2);
+ int32_t sumLengths;
+ if (uprv_add32_overflow(s1.length(), s2.length(), &sumLengths)) {
+ UnicodeString bogus;
+ bogus.setToBogus();
+ return bogus;
+ }
+ if (sumLengths != INT32_MAX) {
+ ++sumLengths; // space for a terminating NUL if we need one
+ }
+ return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
+}
+
+U_COMMON_API UnicodeString U_EXPORT2
+unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2) {
+ int32_t sumLengths;
+ if (s2.length() > INT32_MAX ||
+ uprv_add32_overflow(s1.length(), static_cast<int32_t>(s2.length()), &sumLengths)) {
+ UnicodeString bogus;
+ bogus.setToBogus();
+ return bogus;
+ }
+ if (sumLengths != INT32_MAX) {
+ ++sumLengths; // space for a terminating NUL if we need one
+ }
+ return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
}
+
//========================================
// Reference Counting functions, put at top of file so that optimizing compilers
// have a chance to automatically inline.
@@ -120,23 +144,23 @@ operator+ (const UnicodeString &s1, const UnicodeString &s2) {
void
UnicodeString::addRef() {
- umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
+ umtx_atomic_inc(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1);
}
int32_t
UnicodeString::removeRef() {
- return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
+ return umtx_atomic_dec(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1);
}
int32_t
UnicodeString::refCount() const {
- return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
+ return umtx_loadAcquire(*(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1));
}
void
UnicodeString::releaseArray() {
if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
- uprv_free((int32_t *)fUnion.fFields.fArray - 1);
+ uprv_free(reinterpret_cast<int32_t*>(fUnion.fFields.fArray) - 1);
}
}
@@ -150,7 +174,7 @@ UnicodeString::releaseArray() {
UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
fUnion.fFields.fLengthAndFlags = 0;
- if(count <= 0 || (uint32_t)c > 0x10ffff) {
+ if (count <= 0 || static_cast<uint32_t>(c) > 0x10ffff) {
// just allocate and do not do anything else
allocate(capacity);
} else if(c <= 0xffff) {
@@ -160,7 +184,7 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
}
if(allocate(capacity)) {
char16_t *array = getArrayStart();
- char16_t unit = (char16_t)c;
+ char16_t unit = static_cast<char16_t>(c);
for(int32_t i = 0; i < length; ++i) {
array[i] = unit;
}
@@ -206,11 +230,6 @@ UnicodeString::UnicodeString(UChar32 ch) {
}
}
-UnicodeString::UnicodeString(const char16_t *text) {
- fUnion.fFields.fLengthAndFlags = kShortString;
- doAppend(text, 0, -1);
-}
-
UnicodeString::UnicodeString(const char16_t *text,
int32_t textLength) {
fUnion.fFields.fLengthAndFlags = kShortString;
@@ -256,7 +275,7 @@ UnicodeString::UnicodeString(char16_t *buff,
while(p != limit && *p != 0) {
++p;
}
- buffLength = (int32_t)(p - buff);
+ buffLength = static_cast<int32_t>(p - buff);
}
setArray(buff, buffLength, buffCapacity);
}
@@ -268,7 +287,7 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
// treat as an empty string
} else {
if(length<0) {
- length=(int32_t)uprv_strlen(src);
+ length = static_cast<int32_t>(uprv_strlen(src));
}
if(cloneArrayIfNeeded(length, length, false)) {
u_charsToUChars(src, getArrayStart(), length);
@@ -279,6 +298,26 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
}
}
+UnicodeString UnicodeString::readOnlyAliasFromU16StringView(std::u16string_view text) {
+ UnicodeString result;
+ if (text.length() <= INT32_MAX) {
+ result.setTo(false, text.data(), static_cast<int32_t>(text.length()));
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+UnicodeString UnicodeString::readOnlyAliasFromUnicodeString(const UnicodeString &text) {
+ UnicodeString result;
+ if (text.isBogus()) {
+ result.setToBogus();
+ } else {
+ result.setTo(false, text.getBuffer(), text.length());
+ }
+ return result;
+}
+
#if U_CHARSET_IS_UTF8
UnicodeString::UnicodeString(const char *codepageData) {
@@ -295,7 +334,7 @@ UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
return;
}
if(dataLength == -1) {
- dataLength = (int32_t)uprv_strlen(codepageData);
+ dataLength = static_cast<int32_t>(uprv_strlen(codepageData));
}
setToUTF8(StringPiece(codepageData, dataLength));
}
@@ -376,18 +415,18 @@ UnicodeString::allocate(int32_t capacity) {
++capacity; // for the NUL
// Switch to size_t which is unsigned so that we can allocate up to 4GB.
// Reference counter + UChars.
- size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
+ size_t numBytes = sizeof(int32_t) + static_cast<size_t>(capacity) * U_SIZEOF_UCHAR;
// Round up to a multiple of 16.
numBytes = (numBytes + 15) & ~15;
- int32_t *array = (int32_t *) uprv_malloc(numBytes);
+ int32_t* array = static_cast<int32_t*>(uprv_malloc(numBytes));
if(array != nullptr) {
// set initial refCount and point behind the refCount
*array++ = 1;
numBytes -= sizeof(int32_t);
// have fArray point to the first char16_t
- fUnion.fFields.fArray = (char16_t *)array;
- fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
+ fUnion.fFields.fArray = reinterpret_cast<char16_t*>(array);
+ fUnion.fFields.fCapacity = static_cast<int32_t>(numBytes / U_SIZEOF_UCHAR);
fUnion.fFields.fLengthAndFlags = kLongString;
return true;
}
@@ -622,7 +661,7 @@ void UnicodeString::swap(UnicodeString &other) noexcept {
//========================================
UnicodeString UnicodeString::unescape() const {
- UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
+ UnicodeString result(length(), static_cast<UChar32>(0), static_cast<int32_t>(0)); // construct with capacity
if (result.isBogus()) {
return result;
}
@@ -656,10 +695,10 @@ UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
// Read-only implementation
//========================================
UBool
-UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
- // Requires: this & text not bogus and have same lengths.
+UnicodeString::doEquals(const char16_t *text, int32_t len) const {
+ // Requires: this not bogus and have same lengths.
// Byte-wise comparison works for equality regardless of endianness.
- return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
+ return uprv_memcmp(getArrayStart(), text, len * U_SIZEOF_UCHAR) == 0;
}
UBool
@@ -772,9 +811,9 @@ UnicodeString::doCompare( int32_t start,
# else
// little-endian: compare char16_t units
do {
- result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
+ result = static_cast<int32_t>(*(chars++)) - static_cast<int32_t>(*(srcChars++));
if(result != 0) {
- return (int8_t)(result >> 15 | 1);
+ return static_cast<int8_t>(result >> 15 | 1);
}
} while(--minLength > 0);
# endif
@@ -806,7 +845,7 @@ UnicodeString::doCompareCodePointOrder(int32_t start,
int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=nullptr)?(srcChars + srcStart):nullptr, srcLength, false, true);
/* translate the 32-bit result into an 8-bit one */
if(diff!=0) {
- return (int8_t)(diff >> 15 | 1);
+ return static_cast<int8_t>(diff >> 15 | 1);
} else {
return 0;
}
@@ -831,7 +870,7 @@ UChar32
UnicodeString::char32At(int32_t offset) const
{
int32_t len = length();
- if((uint32_t)offset < (uint32_t)len) {
+ if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(len)) {
const char16_t *array = getArrayStart();
UChar32 c;
U16_GET(array, 0, offset, len, c);
@@ -843,7 +882,7 @@ UnicodeString::char32At(int32_t offset) const
int32_t
UnicodeString::getChar32Start(int32_t offset) const {
- if((uint32_t)offset < (uint32_t)length()) {
+ if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
const char16_t *array = getArrayStart();
U16_SET_CP_START(array, 0, offset);
return offset;
@@ -855,7 +894,7 @@ UnicodeString::getChar32Start(int32_t offset) const {
int32_t
UnicodeString::getChar32Limit(int32_t offset) const {
int32_t len = length();
- if((uint32_t)offset < (uint32_t)len) {
+ if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(len)) {
const char16_t *array = getArrayStart();
U16_SET_CP_LIMIT(array, 0, offset, len);
return offset;
@@ -989,7 +1028,7 @@ UnicodeString::extract(int32_t start, int32_t len,
if (/*dstSize < 0 || */(dstSize > 0 && target == nullptr)) {
return 0;
}
- return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
+ return toUTF8(start, len, target, dstSize <= 0x7fffffff ? static_cast<int32_t>(dstSize) : 0x7fffffff);
}
// else see unistr_cnv.cpp
@@ -1013,7 +1052,7 @@ UnicodeString::toUTF8(ByteSink &sink) const {
int32_t length16 = length();
if(length16 != 0) {
char stackBuffer[1024];
- int32_t capacity = (int32_t)sizeof(stackBuffer);
+ int32_t capacity = static_cast<int32_t>(sizeof(stackBuffer));
UBool utf8IsOwned = false;
char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
3*length16,
@@ -1027,7 +1066,7 @@ UnicodeString::toUTF8(ByteSink &sink) const {
nullptr, // Don't care about number of substitutions.
&errorCode);
if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
- utf8 = (char *)uprv_malloc(length8);
+ utf8 = static_cast<char*>(uprv_malloc(length8));
if(utf8 != nullptr) {
utf8IsOwned = true;
errorCode = U_ZERO_ERROR;
@@ -1089,7 +1128,7 @@ UnicodeString::indexOf(const char16_t *srcChars,
if(match == nullptr) {
return -1;
} else {
- return (int32_t)(match - array);
+ return static_cast<int32_t>(match - array);
}
}
@@ -1107,7 +1146,7 @@ UnicodeString::doIndexOf(char16_t c,
if(match == nullptr) {
return -1;
} else {
- return (int32_t)(match - array);
+ return static_cast<int32_t>(match - array);
}
}
@@ -1124,7 +1163,7 @@ UnicodeString::doIndexOf(UChar32 c,
if(match == nullptr) {
return -1;
} else {
- return (int32_t)(match - array);
+ return static_cast<int32_t>(match - array);
}
}
@@ -1153,7 +1192,7 @@ UnicodeString::lastIndexOf(const char16_t *srcChars,
if(match == nullptr) {
return -1;
} else {
- return (int32_t)(match - array);
+ return static_cast<int32_t>(match - array);
}
}
@@ -1175,7 +1214,7 @@ UnicodeString::doLastIndexOf(char16_t c,
if(match == nullptr) {
return -1;
} else {
- return (int32_t)(match - array);
+ return static_cast<int32_t>(match - array);
}
}
@@ -1192,7 +1231,7 @@ UnicodeString::doLastIndexOf(UChar32 c,
if(match == nullptr) {
return -1;
} else {
- return (int32_t)(match - array);
+ return static_cast<int32_t>(match - array);
}
}
@@ -1329,7 +1368,7 @@ UnicodeString::setTo(UBool isTerminated,
textLength = u_strlen(text);
}
fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
- setArray((char16_t *)text, textLength, isTerminated ? textLength + 1 : textLength);
+ setArray(const_cast<char16_t*>(text), textLength, isTerminated ? textLength + 1 : textLength);
return *this;
}
@@ -1359,7 +1398,7 @@ UnicodeString::setTo(char16_t *buffer,
while(p != limit && *p != 0) {
++p;
}
- buffLength = (int32_t)(p - buffer);
+ buffLength = static_cast<int32_t>(p - buffer);
}
releaseArray();
@@ -1574,6 +1613,18 @@ UnicodeString::doReplace(int32_t start,
return *this;
}
+UnicodeString&
+UnicodeString::doReplace(int32_t start, int32_t length, std::u16string_view src) {
+ if (!isWritable()) {
+ return *this;
+ }
+ if (src.length() > INT32_MAX) {
+ setToBogus();
+ return *this;
+ }
+ return doReplace(start, length, src.data(), 0, static_cast<int32_t>(src.length()));
+}
+
// Versions of doReplace() only for append() variants.
// doReplace() and doAppend() optimize for different cases.
@@ -1662,6 +1713,18 @@ UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcL
return *this;
}
+UnicodeString&
+UnicodeString::doAppend(std::u16string_view src) {
+ if (!isWritable() || src.empty()) {
+ return *this;
+ }
+ if (src.length() > INT32_MAX) {
+ setToBogus();
+ return *this;
+ }
+ return doAppend(src.data(), 0, static_cast<int32_t>(src.length()));
+}
+
/**
* Replaceable API
*/
@@ -1680,7 +1743,7 @@ UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
if (limit <= start) {
return; // Nothing to do; avoid bogus malloc call
}
- char16_t* text = (char16_t*) uprv_malloc( sizeof(char16_t) * (limit - start) );
+ char16_t* text = static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * (limit - start)));
// Check to make sure text is not null.
if (text != nullptr) {
extractBetween(start, limit, text, 0);
@@ -1725,13 +1788,13 @@ UnicodeString::doReverse(int32_t start, int32_t length) {
// Before the loop we know left<right because length>=2.
do {
- hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
- hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
+ hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(swap = *left));
+ hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(*left++ = *right));
*right-- = swap;
} while(left < right);
// Make sure to test the middle code unit of an odd-length string.
// Redundant if the length is even.
- hasSupplementary |= (UBool)U16_IS_LEAD(*left);
+ hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(*left));
/* if there are supplementary code points in the reversed range, then re-swap their surrogates */
if(hasSupplementary) {
@@ -1834,7 +1897,7 @@ UnicodeString::releaseBuffer(int32_t newLength) {
while(p<limit && *p!=0) {
++p;
}
- newLength=(int32_t)(p-array);
+ newLength = static_cast<int32_t>(p - array);
} else if(newLength>capacity) {
newLength=capacity;
}
@@ -1928,7 +1991,7 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
// release the old array
if(flags & kRefCounted) {
// the array is refCounted; decrement and release if 0
- u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
+ u_atomic_int32_t* pRefCount = reinterpret_cast<u_atomic_int32_t*>(oldArray) - 1;
if(umtx_atomic_dec(pRefCount) == 0) {
if (pBufferToDelete == nullptr) {
// Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
@@ -1937,7 +2000,7 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
uprv_free((void *)pRefCount);
} else {
// the caller requested to delete it himself
- *pBufferToDelete = (int32_t *)pRefCount;
+ *pBufferToDelete = reinterpret_cast<int32_t*>(pRefCount);
}
}
}