1 files changed, 113 insertions, 50 deletions
diff --git a/thirdparty/icu4c/common/unistr.cpp b/thirdparty/icu4c/common/unistr.cpp
index ce81138c2c..a720245772 100644
--- a/thirdparty/icu4c/common/unistr.cpp
+++ b/thirdparty/icu4c/common/unistr.cpp
@@ -20,6 +20,8 @@
 ******************************************************************************
 */
 
+#include <string_view>
+
 #include "unicode/utypes.h"
 #include "unicode/appendable.h"
 #include "unicode/putil.h"
@@ -107,12 +109,34 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
 
 UnicodeString U_EXPORT2
 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
-    return
-        UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
-            append(s1).
-                append(s2);
+  int32_t sumLengths;
+  if (uprv_add32_overflow(s1.length(), s2.length(), &sumLengths)) {
+    UnicodeString bogus;
+    bogus.setToBogus();
+    return bogus;
+  }
+  if (sumLengths != INT32_MAX) {
+    ++sumLengths;  // space for a terminating NUL if we need one
+  }
+  return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
+}
+
+U_COMMON_API UnicodeString U_EXPORT2
+unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2) {
+  int32_t sumLengths;
+  if (s2.length() > INT32_MAX ||
+      uprv_add32_overflow(s1.length(), static_cast<int32_t>(s2.length()), &sumLengths)) {
+    UnicodeString bogus;
+    bogus.setToBogus();
+    return bogus;
+  }
+  if (sumLengths != INT32_MAX) {
+    ++sumLengths;  // space for a terminating NUL if we need one
+  }
+  return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
 }
 
+
 //========================================
 // Reference Counting functions, put at top of file so that optimizing compilers
 //                               have a chance to automatically inline.
@@ -120,23 +144,23 @@ operator+ (const UnicodeString &s1, const UnicodeString &s2) {
 
 void
 UnicodeString::addRef() {
-  umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
+  umtx_atomic_inc(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1);
 }
 
 int32_t
 UnicodeString::removeRef() {
-  return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
+  return umtx_atomic_dec(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1);
 }
 
 int32_t
 UnicodeString::refCount() const {
-  return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
+  return umtx_loadAcquire(*(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1));
 }
 
 void
 UnicodeString::releaseArray() {
   if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
-    uprv_free((int32_t *)fUnion.fFields.fArray - 1);
+    uprv_free(reinterpret_cast<int32_t*>(fUnion.fFields.fArray) - 1);
   }
 }
 
@@ -150,7 +174,7 @@ UnicodeString::releaseArray() {
 
 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
   fUnion.fFields.fLengthAndFlags = 0;
-  if(count <= 0 || (uint32_t)c > 0x10ffff) {
+  if (count <= 0 || static_cast<uint32_t>(c) > 0x10ffff) {
     // just allocate and do not do anything else
     allocate(capacity);
   } else if(c <= 0xffff) {
@@ -160,7 +184,7 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
     }
     if(allocate(capacity)) {
       char16_t *array = getArrayStart();
-      char16_t unit = (char16_t)c;
+      char16_t unit = static_cast<char16_t>(c);
       for(int32_t i = 0; i < length; ++i) {
         array[i] = unit;
       }
@@ -206,11 +230,6 @@ UnicodeString::UnicodeString(UChar32 ch) {
   }
 }
 
-UnicodeString::UnicodeString(const char16_t *text) {
-  fUnion.fFields.fLengthAndFlags = kShortString;
-  doAppend(text, 0, -1);
-}
-
 UnicodeString::UnicodeString(const char16_t *text,
                              int32_t textLength) {
   fUnion.fFields.fLengthAndFlags = kShortString;
@@ -256,7 +275,7 @@ UnicodeString::UnicodeString(char16_t *buff,
       while(p != limit && *p != 0) {
         ++p;
       }
-      buffLength = (int32_t)(p - buff);
+      buffLength = static_cast<int32_t>(p - buff);
     }
     setArray(buff, buffLength, buffCapacity);
   }
@@ -268,7 +287,7 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
     // treat as an empty string
   } else {
     if(length<0) {
-      length=(int32_t)uprv_strlen(src);
+      length = static_cast<int32_t>(uprv_strlen(src));
     }
     if(cloneArrayIfNeeded(length, length, false)) {
       u_charsToUChars(src, getArrayStart(), length);
@@ -279,6 +298,26 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
   }
 }
 
+UnicodeString UnicodeString::readOnlyAliasFromU16StringView(std::u16string_view text) {
+  UnicodeString result;
+  if (text.length() <= INT32_MAX) {
+    result.setTo(false, text.data(), static_cast<int32_t>(text.length()));
+  } else {
+    result.setToBogus();
+  }
+  return result;
+}
+
+UnicodeString UnicodeString::readOnlyAliasFromUnicodeString(const UnicodeString &text) {
+  UnicodeString result;
+  if (text.isBogus()) {
+    result.setToBogus();
+  } else {
+    result.setTo(false, text.getBuffer(), text.length());
+  }
+  return result;
+}
+
 #if U_CHARSET_IS_UTF8
 
 UnicodeString::UnicodeString(const char *codepageData) {
@@ -295,7 +334,7 @@ UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
     return;
   }
   if(dataLength == -1) {
-    dataLength = (int32_t)uprv_strlen(codepageData);
+    dataLength = static_cast<int32_t>(uprv_strlen(codepageData));
   }
   setToUTF8(StringPiece(codepageData, dataLength));
 }
@@ -376,18 +415,18 @@ UnicodeString::allocate(int32_t capacity) {
     ++capacity;  // for the NUL
     // Switch to size_t which is unsigned so that we can allocate up to 4GB.
     // Reference counter + UChars.
-    size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
+    size_t numBytes = sizeof(int32_t) + static_cast<size_t>(capacity) * U_SIZEOF_UCHAR;
     // Round up to a multiple of 16.
     numBytes = (numBytes + 15) & ~15;
-    int32_t *array = (int32_t *) uprv_malloc(numBytes);
+    int32_t* array = static_cast<int32_t*>(uprv_malloc(numBytes));
     if(array != nullptr) {
       // set initial refCount and point behind the refCount
       *array++ = 1;
       numBytes -= sizeof(int32_t);
 
       // have fArray point to the first char16_t
-      fUnion.fFields.fArray = (char16_t *)array;
-      fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
+      fUnion.fFields.fArray = reinterpret_cast<char16_t*>(array);
+      fUnion.fFields.fCapacity = static_cast<int32_t>(numBytes / U_SIZEOF_UCHAR);
       fUnion.fFields.fLengthAndFlags = kLongString;
       return true;
     }
@@ -622,7 +661,7 @@ void UnicodeString::swap(UnicodeString &other) noexcept {
 //========================================
 
 UnicodeString UnicodeString::unescape() const {
-    UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
+    UnicodeString result(length(), static_cast<UChar32>(0), static_cast<int32_t>(0)); // construct with capacity
     if (result.isBogus()) {
         return result;
     }
@@ -656,10 +695,10 @@ UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
 // Read-only implementation
 //========================================
 UBool
-UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
-  // Requires: this & text not bogus and have same lengths.
+UnicodeString::doEquals(const char16_t *text, int32_t len) const {
+  // Requires: this not bogus and have same lengths.
   // Byte-wise comparison works for equality regardless of endianness.
-  return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
+  return uprv_memcmp(getArrayStart(), text, len * U_SIZEOF_UCHAR) == 0;
 }
 
 UBool
@@ -772,9 +811,9 @@ UnicodeString::doCompare( int32_t start,
 #   else
       // little-endian: compare char16_t units
       do {
-        result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
+        result = static_cast<int32_t>(*(chars++)) - static_cast<int32_t>(*(srcChars++));
         if(result != 0) {
-          return (int8_t)(result >> 15 | 1);
+          return static_cast<int8_t>(result >> 15 | 1);
         }
       } while(--minLength > 0);
 #   endif
@@ -806,7 +845,7 @@ UnicodeString::doCompareCodePointOrder(int32_t start,
   int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=nullptr)?(srcChars + srcStart):nullptr, srcLength, false, true);
   /* translate the 32-bit result into an 8-bit one */
   if(diff!=0) {
-    return (int8_t)(diff >> 15 | 1);
+    return static_cast<int8_t>(diff >> 15 | 1);
   } else {
     return 0;
   }
@@ -831,7 +870,7 @@ UChar32
 UnicodeString::char32At(int32_t offset) const
 {
   int32_t len = length();
-  if((uint32_t)offset < (uint32_t)len) {
+  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(len)) {
     const char16_t *array = getArrayStart();
     UChar32 c;
     U16_GET(array, 0, offset, len, c);
@@ -843,7 +882,7 @@ UnicodeString::char32At(int32_t offset) const
 
 int32_t
 UnicodeString::getChar32Start(int32_t offset) const {
-  if((uint32_t)offset < (uint32_t)length()) {
+  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
     const char16_t *array = getArrayStart();
     U16_SET_CP_START(array, 0, offset);
     return offset;
@@ -855,7 +894,7 @@ UnicodeString::getChar32Start(int32_t offset) const {
 int32_t
 UnicodeString::getChar32Limit(int32_t offset) const {
   int32_t len = length();
-  if((uint32_t)offset < (uint32_t)len) {
+  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(len)) {
     const char16_t *array = getArrayStart();
     U16_SET_CP_LIMIT(array, 0, offset, len);
     return offset;
@@ -989,7 +1028,7 @@ UnicodeString::extract(int32_t start, int32_t len,
   if (/*dstSize < 0 || */(dstSize > 0 && target == nullptr)) {
     return 0;
   }
-  return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
+  return toUTF8(start, len, target, dstSize <= 0x7fffffff ? static_cast<int32_t>(dstSize) : 0x7fffffff);
 }
 
 // else see unistr_cnv.cpp
@@ -1013,7 +1052,7 @@ UnicodeString::toUTF8(ByteSink &sink) const {
   int32_t length16 = length();
   if(length16 != 0) {
     char stackBuffer[1024];
-    int32_t capacity = (int32_t)sizeof(stackBuffer);
+    int32_t capacity = static_cast<int32_t>(sizeof(stackBuffer));
     UBool utf8IsOwned = false;
     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
                                       3*length16,
@@ -1027,7 +1066,7 @@ UnicodeString::toUTF8(ByteSink &sink) const {
                        nullptr,    // Don't care about number of substitutions.
                        &errorCode);
     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
-      utf8 = (char *)uprv_malloc(length8);
+      utf8 = static_cast<char*>(uprv_malloc(length8));
       if(utf8 != nullptr) {
         utf8IsOwned = true;
         errorCode = U_ZERO_ERROR;
@@ -1089,7 +1128,7 @@ UnicodeString::indexOf(const char16_t *srcChars,
   if(match == nullptr) {
     return -1;
   } else {
-    return (int32_t)(match - array);
+    return static_cast<int32_t>(match - array);
   }
 }
 
@@ -1107,7 +1146,7 @@ UnicodeString::doIndexOf(char16_t c,
   if(match == nullptr) {
     return -1;
   } else {
-    return (int32_t)(match - array);
+    return static_cast<int32_t>(match - array);
   }
 }
 
@@ -1124,7 +1163,7 @@ UnicodeString::doIndexOf(UChar32 c,
   if(match == nullptr) {
     return -1;
   } else {
-    return (int32_t)(match - array);
+    return static_cast<int32_t>(match - array);
   }
 }
 
@@ -1153,7 +1192,7 @@ UnicodeString::lastIndexOf(const char16_t *srcChars,
   if(match == nullptr) {
     return -1;
   } else {
-    return (int32_t)(match - array);
+    return static_cast<int32_t>(match - array);
   }
 }
 
@@ -1175,7 +1214,7 @@ UnicodeString::doLastIndexOf(char16_t c,
   if(match == nullptr) {
     return -1;
   } else {
-    return (int32_t)(match - array);
+    return static_cast<int32_t>(match - array);
   }
 }
 
@@ -1192,7 +1231,7 @@ UnicodeString::doLastIndexOf(UChar32 c,
   if(match == nullptr) {
     return -1;
   } else {
-    return (int32_t)(match - array);
+    return static_cast<int32_t>(match - array);
   }
 }
 
@@ -1329,7 +1368,7 @@ UnicodeString::setTo(UBool isTerminated,
     textLength = u_strlen(text);
   }
   fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
-  setArray((char16_t *)text, textLength, isTerminated ? textLength + 1 : textLength);
+  setArray(const_cast<char16_t*>(text), textLength, isTerminated ? textLength + 1 : textLength);
   return *this;
 }
 
@@ -1359,7 +1398,7 @@ UnicodeString::setTo(char16_t *buffer,
     while(p != limit && *p != 0) {
       ++p;
     }
-    buffLength = (int32_t)(p - buffer);
+    buffLength = static_cast<int32_t>(p - buffer);
   }
 
   releaseArray();
@@ -1574,6 +1613,18 @@ UnicodeString::doReplace(int32_t start,
   return *this;
 }
 
+UnicodeString&
+UnicodeString::doReplace(int32_t start, int32_t length, std::u16string_view src) {
+  if (!isWritable()) {
+    return *this;
+  }
+  if (src.length() > INT32_MAX) {
+    setToBogus();
+    return *this;
+  }
+  return doReplace(start, length, src.data(), 0, static_cast<int32_t>(src.length()));
+}
+
 // Versions of doReplace() only for append() variants.
 // doReplace() and doAppend() optimize for different cases.
 
@@ -1662,6 +1713,18 @@ UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcL
   return *this;
 }
 
+UnicodeString&
+UnicodeString::doAppend(std::u16string_view src) {
+  if (!isWritable() || src.empty()) {
+    return *this;
+  }
+  if (src.length() > INT32_MAX) {
+    setToBogus();
+    return *this;
+  }
+  return doAppend(src.data(), 0, static_cast<int32_t>(src.length()));
+}
+
 /**
  * Replaceable API
  */
@@ -1680,7 +1743,7 @@ UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
     if (limit <= start) {
         return; // Nothing to do; avoid bogus malloc call
     }
-    char16_t* text = (char16_t*) uprv_malloc( sizeof(char16_t) * (limit - start) );
+    char16_t* text = static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * (limit - start)));
     // Check to make sure text is not null.
     if (text != nullptr) {
 	    extractBetween(start, limit, text, 0);
@@ -1725,13 +1788,13 @@ UnicodeString::doReverse(int32_t start, int32_t length) {
 
   // Before the loop we know left<right because length>=2.
   do {
-    hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
-    hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
+    hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(swap = *left));
+    hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(*left++ = *right));
     *right-- = swap;
   } while(left < right);
   // Make sure to test the middle code unit of an odd-length string.
   // Redundant if the length is even.
-  hasSupplementary |= (UBool)U16_IS_LEAD(*left);
+  hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(*left));
 
   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
   if(hasSupplementary) {
@@ -1834,7 +1897,7 @@ UnicodeString::releaseBuffer(int32_t newLength) {
       while(p<limit && *p!=0) {
         ++p;
       }
-      newLength=(int32_t)(p-array);
+      newLength = static_cast<int32_t>(p - array);
     } else if(newLength>capacity) {
       newLength=capacity;
     }
@@ -1928,7 +1991,7 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
       // release the old array
       if(flags & kRefCounted) {
         // the array is refCounted; decrement and release if 0
-        u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
+        u_atomic_int32_t* pRefCount = reinterpret_cast<u_atomic_int32_t*>(oldArray) - 1;
         if(umtx_atomic_dec(pRefCount) == 0) {
           if (pBufferToDelete == nullptr) {
               // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
@@ -1937,7 +2000,7 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
             uprv_free((void *)pRefCount);
           } else {
             // the caller requested to delete it himself
-            *pBufferToDelete = (int32_t *)pRefCount;
+            *pBufferToDelete = reinterpret_cast<int32_t*>(pRefCount);
           }
         }
       }