1 files changed, 902 insertions, 750 deletions
diff --git a/thirdparty/icu4c/common/uloc.cpp b/thirdparty/icu4c/common/uloc.cpp
index ce49d6c50e..88fe7eaadc 100644
--- a/thirdparty/icu4c/common/uloc.cpp
+++ b/thirdparty/icu4c/common/uloc.cpp
@@ -30,6 +30,8 @@
      l = lang, C = ctry, M = charmap, V = variant
 */
 
+#include <optional>
+
 #include "unicode/bytestream.h"
 #include "unicode/errorcode.h"
 #include "unicode/stringpiece.h"
@@ -58,6 +60,8 @@ U_NAMESPACE_USE
 U_CFUNC void locale_set_default(const char *id);
 U_CFUNC const char *locale_get_default();
 
+namespace {
+
 /* ### Data tables **************************************************/
 
 /**
@@ -94,7 +98,7 @@ U_CFUNC const char *locale_get_default();
 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
 /* ISO639 table version is 20150505 */
 /* Subsequent hand addition of selected languages */
-static const char * const LANGUAGES[] = {
+constexpr const char* LANGUAGES[] = {
     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",
     "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",
     "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",
@@ -185,10 +189,10 @@ nullptr,
 nullptr
 };
 
-static const char* const DEPRECATED_LANGUAGES[]={
+constexpr const char* DEPRECATED_LANGUAGES[]={
     "in", "iw", "ji", "jw", "mo", nullptr, nullptr
 };
-static const char* const REPLACEMENT_LANGUAGES[]={
+constexpr const char* REPLACEMENT_LANGUAGES[]={
     "id", "he", "yi", "jv", "ro", nullptr, nullptr
 };
 
@@ -211,7 +215,7 @@ static const char* const REPLACEMENT_LANGUAGES[]={
 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
 /* ISO639 table version is 20150505 */
 /* Subsequent hand addition of selected languages */
-static const char * const LANGUAGES_3[] = {
+constexpr const char* LANGUAGES_3[] = {
     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
     "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
     "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
@@ -327,7 +331,7 @@ nullptr
  * RO(ROM) is now RO(ROU) according to
  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
  */
-static const char * const COUNTRIES[] = {
+constexpr const char* COUNTRIES[] = {
     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
@@ -363,10 +367,10 @@ nullptr,
 nullptr
 };
 
-static const char* const DEPRECATED_COUNTRIES[] = {
+constexpr const char* DEPRECATED_COUNTRIES[] = {
     "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", nullptr, nullptr /* deprecated country list */
 };
-static const char* const REPLACEMENT_COUNTRIES[] = {
+constexpr const char* REPLACEMENT_COUNTRIES[] = {
 /*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
     "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", nullptr, nullptr  /* replacement country codes */
 };
@@ -384,7 +388,7 @@ static const char* const REPLACEMENT_COUNTRIES[] = {
  * second list, and another nullptr entry.  The two lists correspond to
  * the two lists in COUNTRIES.
  */
-static const char * const COUNTRIES_3[] = {
+constexpr const char* COUNTRIES_3[] = {
 /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
 /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
@@ -460,7 +464,7 @@ typedef struct CanonicalizationMap {
  * A map to canonicalize locale IDs.  This handles a variety of
  * different semantic kinds of transformations.
  */
-static const CanonicalizationMap CANONICALIZE_MAP[] = {
+constexpr CanonicalizationMap CANONICALIZE_MAP[] = {
     { "art__LOJBAN",    "jbo" }, /* registered name */
     { "hy__AREVELA",    "hy" }, /* Registered IANA variant */
     { "hy__AREVMDA",    "hyw" }, /* Registered IANA variant */
@@ -475,15 +479,13 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = {
 };
 
 /* ### BCP47 Conversion *******************************************/
-/* Test if the locale id has BCP47 u extension and does not have '@' */
-#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == nullptr && getShortestSubtagLength(localeID) == 1)
 /* Gets the size of the shortest subtag in the given localeID. */
-static int32_t getShortestSubtagLength(const char *localeID) {
+int32_t getShortestSubtagLength(const char *localeID) {
     int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
     int32_t length = localeIDLength;
     int32_t tmpLength = 0;
     int32_t i;
-    UBool reset = true;
+    bool reset = true;
 
     for (i = 0; i < localeIDLength; i++) {
         if (localeID[i] != '_' && localeID[i] != '-') {
@@ -502,12 +504,18 @@ static int32_t getShortestSubtagLength(const char *localeID) {
 
     return length;
 }
+/* Test if the locale id has BCP47 u extension and does not have '@' */
+inline bool _hasBCP47Extension(const char *id) {
+    return id != nullptr && uprv_strstr(id, "@") == nullptr && getShortestSubtagLength(id) == 1;
+}
 
 /* ### Keywords **************************************************/
-#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
-#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
+inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
+inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
 /* Punctuation/symbols allowed in legacy key values */
-#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
+inline bool UPRV_OK_VALUE_PUNCTUATION(char c) { return c == '_' || c == '-' || c == '+' || c == '/'; }
+
+}  // namespace
 
 #define ULOC_KEYWORD_BUFFER_LEN 25
 #define ULOC_MAX_NO_KEYWORDS 25
@@ -536,36 +544,31 @@ locale_getKeywordsStart(const char *localeID) {
     return nullptr;
 }
 
+namespace {
+
 /**
- * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
  * @param keywordName incoming name to be canonicalized
  * @param status return status (keyword too long)
- * @return length of the keyword name
+ * @return the keyword name
  */
-static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
+CharString locale_canonKeywordName(const char* keywordName, UErrorCode& status)
 {
-  int32_t keywordNameLen = 0;
+  if (U_FAILURE(status)) { return {}; }
+  CharString result;
 
   for (; *keywordName != 0; keywordName++) {
     if (!UPRV_ISALPHANUM(*keywordName)) {
-      *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
-      return 0;
-    }
-    if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
-      buf[keywordNameLen++] = uprv_tolower(*keywordName);
-    } else {
-      /* keyword name too long for internal buffer */
-      *status = U_INTERNAL_PROGRAM_ERROR;
-      return 0;
+      status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
+      return {};
     }
+    result.append(uprv_tolower(*keywordName), status);
   }
-  if (keywordNameLen == 0) {
-    *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
-    return 0;
+  if (result.isEmpty()) {
+    status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
+    return {};
   }
-  buf[keywordNameLen] = 0; /* terminate */
 
-  return keywordNameLen;
+  return result;
 }
 
 typedef struct {
@@ -575,20 +578,41 @@ typedef struct {
     int32_t valueLen;
 } KeywordStruct;
 
-static int32_t U_CALLCONV
+int32_t U_CALLCONV
 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
     const char* leftString = ((const KeywordStruct *)left)->keyword;
     const char* rightString = ((const KeywordStruct *)right)->keyword;
     return uprv_strcmp(leftString, rightString);
 }
 
-U_CFUNC void
-ulocimp_getKeywords(const char *localeID,
+}  // namespace
+
+U_EXPORT CharString
+ulocimp_getKeywords(const char* localeID,
+                    char prev,
+                    bool valuesToo,
+                    UErrorCode& status)
+{
+    return ByteSinkUtil::viaByteSinkToCharString(
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getKeywords(localeID,
+                                prev,
+                                sink,
+                                valuesToo,
+                                status);
+        },
+        status);
+}
+
+U_EXPORT void
+ulocimp_getKeywords(const char* localeID,
                     char prev,
                     ByteSink& sink,
-                    UBool valuesToo,
-                    UErrorCode *status)
+                    bool valuesToo,
+                    UErrorCode& status)
 {
+    if (U_FAILURE(status)) { return; }
+
     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
 
     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
@@ -601,7 +625,7 @@ ulocimp_getKeywords(const char *localeID,
     if(prev == '@') { /* start of keyword definition */
         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
         do {
-            UBool duplicate = false;
+            bool duplicate = false;
             /* skip leading spaces */
             while(*pos == ' ') {
                 pos++;
@@ -610,7 +634,7 @@ ulocimp_getKeywords(const char *localeID,
                 break;
             }
             if(numKeywords == maxKeywords) {
-                *status = U_INTERNAL_PROGRAM_ERROR;
+                status = U_INTERNAL_PROGRAM_ERROR;
                 return;
             }
             equalSign = uprv_strchr(pos, '=');
@@ -618,13 +642,13 @@ ulocimp_getKeywords(const char *localeID,
             /* lack of '=' [foo@currency] is illegal */
             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
             if(!equalSign || (semicolon && semicolon<equalSign)) {
-                *status = U_INVALID_FORMAT_ERROR;
+                status = U_INVALID_FORMAT_ERROR;
                 return;
             }
             /* need to normalize both keyword and keyword name */
             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
                 /* keyword name too long for internal buffer */
-                *status = U_INTERNAL_PROGRAM_ERROR;
+                status = U_INTERNAL_PROGRAM_ERROR;
                 return;
             }
             for(i = 0, n = 0; i < equalSign - pos; ++i) {
@@ -635,7 +659,7 @@ ulocimp_getKeywords(const char *localeID,
 
             /* zero-length keyword is an error. */
             if (n == 0) {
-                *status = U_INVALID_FORMAT_ERROR;
+                status = U_INVALID_FORMAT_ERROR;
                 return;
             }
 
@@ -650,7 +674,7 @@ ulocimp_getKeywords(const char *localeID,
 
             /* Premature end or zero-length value */
             if (!*equalSign || equalSign == semicolon) {
-                *status = U_INVALID_FORMAT_ERROR;
+                status = U_INVALID_FORMAT_ERROR;
                 return;
             }
 
@@ -685,7 +709,7 @@ ulocimp_getKeywords(const char *localeID,
 
         /* now we have a list of keywords */
         /* we need to sort it */
-        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, nullptr, false, status);
+        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, nullptr, false, &status);
 
         /* Now construct the keyword part */
         for(i = 0; i < numKeywords; i++) {
@@ -709,137 +733,127 @@ uloc_getKeywordValue(const char* localeID,
                      char* buffer, int32_t bufferCapacity,
                      UErrorCode* status)
 {
-    if (U_FAILURE(*status)) {
-        return 0;
-    }
-
-    CheckedArrayByteSink sink(buffer, bufferCapacity);
-    ulocimp_getKeywordValue(localeID, keywordName, sink, status);
-
-    int32_t reslen = sink.NumberOfBytesAppended();
-
-    if (U_FAILURE(*status)) {
-        return reslen;
-    }
-
-    if (sink.Overflowed()) {
-        *status = U_BUFFER_OVERFLOW_ERROR;
-    } else {
-        u_terminateChars(buffer, bufferCapacity, reslen, status);
-    }
+    return ByteSinkUtil::viaByteSinkToTerminatedChars(
+        buffer, bufferCapacity,
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getKeywordValue(localeID, keywordName, sink, status);
+        },
+        *status);
+}
 
-    return reslen;
+U_EXPORT CharString
+ulocimp_getKeywordValue(const char* localeID,
+                        const char* keywordName,
+                        UErrorCode& status)
+{
+    return ByteSinkUtil::viaByteSinkToCharString(
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getKeywordValue(localeID, keywordName, sink, status);
+        },
+        status);
 }
 
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_getKeywordValue(const char* localeID,
                         const char* keywordName,
                         icu::ByteSink& sink,
-                        UErrorCode* status)
+                        UErrorCode& status)
 {
+    if (U_FAILURE(status)) { return; }
+
+    if (localeID == nullptr || keywordName == nullptr || keywordName[0] == 0) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
     const char* startSearchHere = nullptr;
     const char* nextSeparator = nullptr;
-    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
-    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 
-    if(status && U_SUCCESS(*status) && localeID) {
-      CharString tempBuffer;
-      const char* tmpLocaleID;
+    CharString tempBuffer;
+    const char* tmpLocaleID;
 
-      if (keywordName == nullptr || keywordName[0] == 0) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return;
-      }
+    CharString canonKeywordName = locale_canonKeywordName(keywordName, status);
+    if (U_FAILURE(status)) {
+      return;
+    }
+
+    if (_hasBCP47Extension(localeID)) {
+        tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, status);
+        tmpLocaleID = U_SUCCESS(status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
+    } else {
+        tmpLocaleID=localeID;
+    }
 
-      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
-      if(U_FAILURE(*status)) {
+    startSearchHere = locale_getKeywordsStart(tmpLocaleID);
+    if(startSearchHere == nullptr) {
+        /* no keywords, return at once */
         return;
-      }
+    }
 
-      if (_hasBCP47Extension(localeID)) {
-        CharStringByteSink sink(&tempBuffer);
-        ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status);
-        tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
-      } else {
-          tmpLocaleID=localeID;
-      }
-
-      startSearchHere = locale_getKeywordsStart(tmpLocaleID);
-      if(startSearchHere == nullptr) {
-          /* no keywords, return at once */
-          return;
-      }
-
-      /* find the first keyword */
-      while(startSearchHere) {
-          const char* keyValueTail;
-          int32_t keyValueLen;
-
-          startSearchHere++; /* skip @ or ; */
-          nextSeparator = uprv_strchr(startSearchHere, '=');
-          if(!nextSeparator) {
-              *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
-              return;
-          }
-          /* strip leading & trailing spaces (TC decided to tolerate these) */
-          while(*startSearchHere == ' ') {
-              startSearchHere++;
+    /* find the first keyword */
+    while(startSearchHere) {
+        const char* keyValueTail;
+
+        startSearchHere++; /* skip @ or ; */
+        nextSeparator = uprv_strchr(startSearchHere, '=');
+        if(!nextSeparator) {
+            status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
+            return;
+        }
+        /* strip leading & trailing spaces (TC decided to tolerate these) */
+        while(*startSearchHere == ' ') {
+            startSearchHere++;
+        }
+        keyValueTail = nextSeparator;
+        while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
+            keyValueTail--;
+        }
+        /* now keyValueTail points to first char after the keyName */
+        /* copy & normalize keyName from locale */
+        if (startSearchHere == keyValueTail) {
+            status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
+            return;
+        }
+        CharString localeKeywordName;
+        while (startSearchHere < keyValueTail) {
+          if (!UPRV_ISALPHANUM(*startSearchHere)) {
+            status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
+            return;
           }
-          keyValueTail = nextSeparator;
-          while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
+          localeKeywordName.append(uprv_tolower(*startSearchHere++), status);
+        }
+        if (U_FAILURE(status)) {
+            return;
+        }
+
+        startSearchHere = uprv_strchr(nextSeparator, ';');
+
+        if (canonKeywordName == localeKeywordName) {
+             /* current entry matches the keyword. */
+           nextSeparator++; /* skip '=' */
+            /* First strip leading & trailing spaces (TC decided to tolerate these) */
+            while(*nextSeparator == ' ') {
+              nextSeparator++;
+            }
+            keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
+            while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
               keyValueTail--;
-          }
-          /* now keyValueTail points to first char after the keyName */
-          /* copy & normalize keyName from locale */
-          if (startSearchHere == keyValueTail) {
-              *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
-              return;
-          }
-          keyValueLen = 0;
-          while (startSearchHere < keyValueTail) {
-            if (!UPRV_ISALPHANUM(*startSearchHere)) {
-              *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
-              return;
             }
-            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
-              localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
-            } else {
-              /* keyword name too long for internal buffer */
-              *status = U_INTERNAL_PROGRAM_ERROR;
+            /* Now copy the value, but check well-formedness */
+            if (nextSeparator == keyValueTail) {
+              status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
               return;
             }
-          }
-          localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
-
-          startSearchHere = uprv_strchr(nextSeparator, ';');
-
-          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
-               /* current entry matches the keyword. */
-             nextSeparator++; /* skip '=' */
-              /* First strip leading & trailing spaces (TC decided to tolerate these) */
-              while(*nextSeparator == ' ') {
-                nextSeparator++;
-              }
-              keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
-              while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
-                keyValueTail--;
-              }
-              /* Now copy the value, but check well-formedness */
-              if (nextSeparator == keyValueTail) {
-                *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
+            while (nextSeparator < keyValueTail) {
+              if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
+                status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
                 return;
               }
-              while (nextSeparator < keyValueTail) {
-                if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
-                  *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
-                  return;
-                }
-                /* Should we lowercase value to return here? Tests expect as-is. */
-                sink.Append(nextSeparator++, 1);
-              }
-              return;
-          }
-      }
+              /* Should we lowercase value to return here? Tests expect as-is. */
+              sink.Append(nextSeparator++, 1);
+            }
+            return;
+        }
     }
 }
 
@@ -849,100 +863,136 @@ uloc_setKeywordValue(const char* keywordName,
                      char* buffer, int32_t bufferCapacity,
                      UErrorCode* status)
 {
-    /* TODO: sorting. removal. */
-    int32_t keywordNameLen;
-    int32_t keywordValueLen;
-    int32_t bufLen;
-    int32_t needLen = 0;
-    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
-    char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
-    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
-    int32_t rc;
-    char* nextSeparator = nullptr;
-    char* nextEqualsign = nullptr;
-    char* startSearchHere = nullptr;
-    char* keywordStart = nullptr;
-    CharString updatedKeysAndValues;
-    UBool handledInputKeyAndValue = false;
-    char keyValuePrefix = '@';
+    if (U_FAILURE(*status)) { return 0; }
 
-    if(U_FAILURE(*status)) {
-        return -1;
-    }
-    if (*status == U_STRING_NOT_TERMINATED_WARNING) {
-        *status = U_ZERO_ERROR;
-    }
-    if (keywordName == nullptr || keywordName[0] == 0 || bufferCapacity <= 1) {
+    if (bufferCapacity <= 1) {
         *status = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
-    bufLen = (int32_t)uprv_strlen(buffer);
+
+    int32_t bufLen = (int32_t)uprv_strlen(buffer);
     if(bufferCapacity<bufLen) {
         /* The capacity is less than the length?! Is this NUL terminated? */
         *status = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
-    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
-    if(U_FAILURE(*status)) {
+
+    char* keywords = const_cast<char*>(locale_getKeywordsStart(buffer));
+    int32_t baseLen = keywords == nullptr ? bufLen : keywords - buffer;
+    // Remove -1 from the capacity so that this function can guarantee NUL termination.
+    CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords,
+                              bufferCapacity - baseLen - 1);
+    int32_t reslen = ulocimp_setKeywordValue(
+            keywords, keywordName, keywordValue, sink, *status);
+
+    if (U_FAILURE(*status)) {
+        return *status == U_BUFFER_OVERFLOW_ERROR ? reslen + baseLen : 0;
+    }
+
+    // See the documentation for this function, it's guaranteed to never
+    // overflow the buffer but instead abort with BUFFER_OVERFLOW_ERROR.
+    // In this case, nothing has been written to the sink, so it cannot have Overflowed().
+    U_ASSERT(!sink.Overflowed());
+    U_ASSERT(reslen >= 0);
+    return u_terminateChars(buffer, bufferCapacity, reslen + baseLen, status);
+}
+
+U_EXPORT void
+ulocimp_setKeywordValue(const char* keywordName,
+                        const char* keywordValue,
+                        CharString& localeID,
+                        UErrorCode& status)
+{
+    if (U_FAILURE(status)) { return; }
+    // This is safe because CharString::truncate() doesn't actually erase any
+    // data, but simply sets the position for where new data will be written.
+    const char* keywords = locale_getKeywordsStart(localeID.data());
+    if (keywords != nullptr) localeID.truncate(keywords - localeID.data());
+    CharStringByteSink sink(&localeID);
+    ulocimp_setKeywordValue(keywords, keywordName, keywordValue, sink, status);
+}
+
+U_EXPORT int32_t
+ulocimp_setKeywordValue(const char* keywords,
+                        const char* keywordName,
+                        const char* keywordValue,
+                        ByteSink& sink,
+                        UErrorCode& status)
+{
+    if (U_FAILURE(status)) { return 0; }
+
+    /* TODO: sorting. removal. */
+    int32_t needLen = 0;
+    int32_t rc;
+    const char* nextSeparator = nullptr;
+    const char* nextEqualsign = nullptr;
+    const char* keywordStart = nullptr;
+    CharString updatedKeysAndValues;
+    bool handledInputKeyAndValue = false;
+    char keyValuePrefix = '@';
+
+    if (status == U_STRING_NOT_TERMINATED_WARNING) {
+        status = U_ZERO_ERROR;
+    }
+    if (keywordName == nullptr || keywordName[0] == 0) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    CharString canonKeywordName = locale_canonKeywordName(keywordName, status);
+    if (U_FAILURE(status)) {
         return 0;
     }
 
-    keywordValueLen = 0;
+    CharString canonKeywordValue;
     if(keywordValue) {
         while (*keywordValue != 0) {
             if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
-                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
-                return 0;
-            }
-            if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
-                /* Should we force lowercase in value to set? */
-                keywordValueBuffer[keywordValueLen++] = *keywordValue++;
-            } else {
-                /* keywordValue too long for internal buffer */
-                *status = U_INTERNAL_PROGRAM_ERROR;
+                status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
                 return 0;
             }
+            /* Should we force lowercase in value to set? */
+            canonKeywordValue.append(*keywordValue++, status);
         }
     }
-    keywordValueBuffer[keywordValueLen] = 0; /* terminate */
+    if (U_FAILURE(status)) {
+        return 0;
+    }
 
-    startSearchHere = (char*)locale_getKeywordsStart(buffer);
-    if(startSearchHere == nullptr || (startSearchHere[1]==0)) {
-        if(keywordValueLen == 0) { /* no keywords = nothing to remove */
-            U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
-            return bufLen;
+    if (keywords == nullptr || keywords[1] == '\0') {
+        if (canonKeywordValue.isEmpty()) { /* no keywords = nothing to remove */
+            U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
+            return 0;
         }
 
-        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
-        if(startSearchHere) { /* had a single @ */
-            needLen--; /* already had the @ */
-            /* startSearchHere points at the @ */
-        } else {
-            startSearchHere=buffer+bufLen;
-        }
-        if(needLen >= bufferCapacity) {
-            *status = U_BUFFER_OVERFLOW_ERROR;
+        needLen = 1 + canonKeywordName.length() + 1 + canonKeywordValue.length();
+        int32_t capacity = 0;
+        char* buffer = sink.GetAppendBuffer(
+                needLen, needLen, nullptr, needLen, &capacity);
+        if (capacity < needLen || buffer == nullptr) {
+            status = U_BUFFER_OVERFLOW_ERROR;
             return needLen; /* no change */
         }
-        *startSearchHere++ = '@';
-        uprv_strcpy(startSearchHere, keywordNameBuffer);
-        startSearchHere += keywordNameLen;
-        *startSearchHere++ = '=';
-        uprv_strcpy(startSearchHere, keywordValueBuffer);
-        U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
+        char* it = buffer;
+
+        *it++ = '@';
+        uprv_memcpy(it, canonKeywordName.data(), canonKeywordName.length());
+        it += canonKeywordName.length();
+        *it++ = '=';
+        uprv_memcpy(it, canonKeywordValue.data(), canonKeywordValue.length());
+        sink.Append(buffer, needLen);
+        U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
         return needLen;
     } /* end shortcut - no @ */
 
-    keywordStart = startSearchHere;
+    keywordStart = keywords;
     /* search for keyword */
     while(keywordStart) {
         const char* keyValueTail;
-        int32_t keyValueLen;
 
         keywordStart++; /* skip @ or ; */
         nextEqualsign = uprv_strchr(keywordStart, '=');
         if (!nextEqualsign) {
-            *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
+            status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
             return 0;
         }
         /* strip leading & trailing spaces (TC decided to tolerate these) */
@@ -956,24 +1006,20 @@ uloc_setKeywordValue(const char* keywordName,
         /* now keyValueTail points to first char after the keyName */
         /* copy & normalize keyName from locale */
         if (keywordStart == keyValueTail) {
-            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
+            status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
             return 0;
         }
-        keyValueLen = 0;
+        CharString localeKeywordName;
         while (keywordStart < keyValueTail) {
             if (!UPRV_ISALPHANUM(*keywordStart)) {
-                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
-                return 0;
-            }
-            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
-                localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
-            } else {
-                /* keyword name too long for internal buffer */
-                *status = U_INTERNAL_PROGRAM_ERROR;
+                status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
                 return 0;
             }
+            localeKeywordName.append(uprv_tolower(*keywordStart++), status);
+        }
+        if (U_FAILURE(status)) {
+            return 0;
         }
-        localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
 
         nextSeparator = uprv_strchr(nextEqualsign, ';');
 
@@ -988,46 +1034,46 @@ uloc_setKeywordValue(const char* keywordName,
             keyValueTail--;
         }
         if (nextEqualsign == keyValueTail) {
-            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
+            status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
             return 0;
         }
 
-        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
+        rc = uprv_strcmp(canonKeywordName.data(), localeKeywordName.data());
         if(rc == 0) {
             /* Current entry matches the input keyword. Update the entry */
-            if(keywordValueLen > 0) { /* updating a value */
-                updatedKeysAndValues.append(keyValuePrefix, *status);
+            if (!canonKeywordValue.isEmpty()) { /* updating a value */
+                updatedKeysAndValues.append(keyValuePrefix, status);
                 keyValuePrefix = ';'; /* for any subsequent key-value pair */
-                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
-                updatedKeysAndValues.append('=', *status);
-                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
+                updatedKeysAndValues.append(canonKeywordName, status);
+                updatedKeysAndValues.append('=', status);
+                updatedKeysAndValues.append(canonKeywordValue, status);
             } /* else removing this entry, don't emit anything */
             handledInputKeyAndValue = true;
         } else {
            /* input keyword sorts earlier than current entry, add before current entry */
-            if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
+            if (rc < 0 && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
                 /* insert new entry at this location */
-                updatedKeysAndValues.append(keyValuePrefix, *status);
+                updatedKeysAndValues.append(keyValuePrefix, status);
                 keyValuePrefix = ';'; /* for any subsequent key-value pair */
-                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
-                updatedKeysAndValues.append('=', *status);
-                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
+                updatedKeysAndValues.append(canonKeywordName, status);
+                updatedKeysAndValues.append('=', status);
+                updatedKeysAndValues.append(canonKeywordValue, status);
                 handledInputKeyAndValue = true;
             }
             /* copy the current entry */
-            updatedKeysAndValues.append(keyValuePrefix, *status);
+            updatedKeysAndValues.append(keyValuePrefix, status);
             keyValuePrefix = ';'; /* for any subsequent key-value pair */
-            updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
-            updatedKeysAndValues.append('=', *status);
-            updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
+            updatedKeysAndValues.append(localeKeywordName, status);
+            updatedKeysAndValues.append('=', status);
+            updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), status);
         }
-        if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
+        if (!nextSeparator && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
             /* append new entry at the end, it sorts later than existing entries */
-            updatedKeysAndValues.append(keyValuePrefix, *status);
+            updatedKeysAndValues.append(keyValuePrefix, status);
             /* skip keyValuePrefix update, no subsequent key-value pair */
-            updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
-            updatedKeysAndValues.append('=', *status);
-            updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
+            updatedKeysAndValues.append(canonKeywordName, status);
+            updatedKeysAndValues.append('=', status);
+            updatedKeysAndValues.append(canonKeywordValue, status);
             handledInputKeyAndValue = true;
         }
         keywordStart = nextSeparator;
@@ -1041,44 +1087,57 @@ uloc_setKeywordValue(const char* keywordName,
      * error return but the passed-in locale is unmodified and the original bufLen is
      * returned.
      */
-    if (!handledInputKeyAndValue || U_FAILURE(*status)) {
+    if (!handledInputKeyAndValue || U_FAILURE(status)) {
         /* if input key/value specified removal of a keyword not present in locale, or
          * there was an error in CharString.append, leave original locale alone. */
-        U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
-        return bufLen;
+        U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
+        return (int32_t)uprv_strlen(keywords);
     }
 
-    // needLen = length of the part before '@'
-    needLen = (int32_t)(startSearchHere - buffer);
-    // Check to see can we fit the startSearchHere, if not, return
+    needLen = updatedKeysAndValues.length();
+    // Check to see can we fit the updatedKeysAndValues, if not, return
     // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
     // We do this because this API function does not behave like most others:
     // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
     // When the contents fits but without the terminating NUL, in this case we need to not change
     // the buffer contents and return with a buffer overflow error.
-    int32_t appendLength = updatedKeysAndValues.length();
-    if (appendLength >= bufferCapacity - needLen) {
-        *status = U_BUFFER_OVERFLOW_ERROR;
-        return needLen + appendLength;
-    }
-    needLen += updatedKeysAndValues.extract(
-                         startSearchHere, bufferCapacity - needLen, *status);
-    U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
+    if (needLen > 0) {
+        int32_t capacity = 0;
+        char* buffer = sink.GetAppendBuffer(
+                needLen, needLen, nullptr, needLen, &capacity);
+        if (capacity < needLen || buffer == nullptr) {
+            status = U_BUFFER_OVERFLOW_ERROR;
+            return needLen;
+        }
+        uprv_memcpy(buffer, updatedKeysAndValues.data(), needLen);
+        sink.Append(buffer, needLen);
+    }
+    U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
     return needLen;
 }
 
 /* ### ID parsing implementation **************************************************/
 
-#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
+namespace {
+
+inline bool _isPrefixLetter(char a) { return a == 'x' || a == 'X' || a == 'i' || a == 'I'; }
 
 /*returns true if one of the special prefixes is here (s=string)
   'x-' or 'i-' */
-#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
+inline bool _isIDPrefix(const char *s) { return _isPrefixLetter(s[0]) && _isIDSeparator(s[1]); }
 
 /* Dot terminates it because of POSIX form  where dot precedes the codepage
  * except for variant
  */
-#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
+inline bool _isTerminator(char a) { return a == 0 || a == '.' || a == '@'; }
+
+inline bool _isBCP47Extension(const char* p) {
+    return p[0] == '-' &&
+           (p[1] == 't' || p[1] == 'T' ||
+            p[1] == 'u' || p[1] == 'U' ||
+            p[1] == 'x' || p[1] == 'X') &&
+           p[2] == '-';
+}
 
 /**
  * Lookup 'key' in the array 'list'.  The array 'list' should contain
@@ -1087,7 +1146,7 @@ uloc_setKeywordValue(const char* keywordName,
  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
  * COUNTRIES_3.
  */
-static int16_t _findIndex(const char* const* list, const char* key)
+std::optional<int16_t> _findIndex(const char* const* list, const char* key)
 {
     const char* const* anchor = list;
     int32_t pass = 0;
@@ -1102,38 +1161,40 @@ static int16_t _findIndex(const char* const* list, const char* key)
         }
         ++list;     /* skip final nullptr *CWB*/
     }
-    return -1;
+    return std::nullopt;
 }
 
+}  // namespace
+
 U_CFUNC const char*
 uloc_getCurrentCountryID(const char* oldID){
-    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
-    if (offset >= 0) {
-        return REPLACEMENT_COUNTRIES[offset];
-    }
-    return oldID;
+    std::optional<int16_t> offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
+    return offset.has_value() ? REPLACEMENT_COUNTRIES[*offset] : oldID;
 }
 U_CFUNC const char*
 uloc_getCurrentLanguageID(const char* oldID){
-    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
-    if (offset >= 0) {
-        return REPLACEMENT_LANGUAGES[offset];
-    }
-    return oldID;
+    std::optional<int16_t> offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
+    return offset.has_value() ? REPLACEMENT_LANGUAGES[*offset] : oldID;
 }
+
+namespace {
+
 /*
- * the internal functions _getLanguage(), _getCountry(), _getVariant()
+ * the internal functions _getLanguage(), _getScript(), _getRegion(), _getVariant()
  * avoid duplicating code to handle the earlier locale ID pieces
  * in the functions for the later ones by
  * setting the *pEnd pointer to where they stopped parsing
  *
  * TODO try to use this in Locale
  */
-CharString U_EXPORT2
-ulocimp_getLanguage(const char *localeID,
-                    const char **pEnd,
-                    UErrorCode &status) {
-    CharString result;
+
+void
+_getLanguage(const char* localeID,
+             ByteSink* sink,
+             const char** pEnd,
+             UErrorCode& status) {
+    U_ASSERT(pEnd != nullptr);
+    *pEnd = localeID;
 
     if (uprv_stricmp(localeID, "root") == 0) {
         localeID += 4;
@@ -1145,150 +1206,381 @@ ulocimp_getLanguage(const char *localeID,
         localeID += 3;
     }
 
+    constexpr int32_t MAXLEN = ULOC_LANG_CAPACITY - 1;  // Minus NUL.
+
     /* if it starts with i- or x- then copy that prefix */
-    if(_isIDPrefix(localeID)) {
-        result.append((char)uprv_tolower(*localeID), status);
-        result.append('-', status);
-        localeID+=2;
+    int32_t len = _isIDPrefix(localeID) ? 2 : 0;
+    while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
+        if (len == MAXLEN) {
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        len++;
     }
 
-    /* copy the language as far as possible and count its length */
-    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
-        result.append((char)uprv_tolower(*localeID), status);
-        localeID++;
+    *pEnd = localeID + len;
+    if (sink == nullptr || len == 0) { return; }
+
+    int32_t minCapacity = uprv_max(len, 4);  // Minimum 3 letters plus NUL.
+    char scratch[MAXLEN];
+    int32_t capacity = 0;
+    char* buffer = sink->GetAppendBuffer(
+            minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity);
+
+    for (int32_t i = 0; i < len; ++i) {
+        buffer[i] = uprv_tolower(localeID[i]);
+    }
+    if (_isIDSeparator(localeID[1])) {
+        buffer[1] = '-';
     }
 
-    if(result.length()==3) {
+    if (len == 3) {
         /* convert 3 character code to 2 character code if possible *CWB*/
-        int32_t offset = _findIndex(LANGUAGES_3, result.data());
-        if(offset>=0) {
-            result.clear();
-            result.append(LANGUAGES[offset], status);
+        U_ASSERT(capacity >= 4);
+        buffer[3] = '\0';
+        std::optional<int16_t> offset = _findIndex(LANGUAGES_3, buffer);
+        if (offset.has_value()) {
+            const char* const alias = LANGUAGES[*offset];
+            sink->Append(alias, (int32_t)uprv_strlen(alias));
+            return;
         }
     }
 
-    if(pEnd!=nullptr) {
-        *pEnd=localeID;
-    }
-
-    return result;
+    sink->Append(buffer, len);
 }
 
-CharString U_EXPORT2
-ulocimp_getScript(const char *localeID,
-                  const char **pEnd,
-                  UErrorCode &status) {
-    CharString result;
-    int32_t idLen = 0;
+void
+_getScript(const char* localeID,
+           ByteSink* sink,
+           const char** pEnd) {
+    U_ASSERT(pEnd != nullptr);
+    *pEnd = localeID;
 
-    if (pEnd != nullptr) {
-        *pEnd = localeID;
-    }
+    constexpr int32_t LENGTH = 4;
 
-    /* copy the second item as far as possible and count its length */
-    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
-            && uprv_isASCIILetter(localeID[idLen])) {
-        idLen++;
+    int32_t len = 0;
+    while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) &&
+            uprv_isASCIILetter(localeID[len])) {
+        if (len == LENGTH) { return; }
+        len++;
     }
+    if (len != LENGTH) { return; }
 
-    /* If it's exactly 4 characters long, then it's a script and not a country. */
-    if (idLen == 4) {
-        int32_t i;
-        if (pEnd != nullptr) {
-            *pEnd = localeID+idLen;
-        }
-        if (idLen >= 1) {
-            result.append((char)uprv_toupper(*(localeID++)), status);
-        }
-        for (i = 1; i < idLen; i++) {
-            result.append((char)uprv_tolower(*(localeID++)), status);
-        }
+    *pEnd = localeID + LENGTH;
+    if (sink == nullptr) { return; }
+
+    char scratch[LENGTH];
+    int32_t capacity = 0;
+    char* buffer = sink->GetAppendBuffer(
+            LENGTH, LENGTH, scratch, UPRV_LENGTHOF(scratch), &capacity);
+
+    buffer[0] = uprv_toupper(localeID[0]);
+    for (int32_t i = 1; i < LENGTH; ++i) {
+        buffer[i] = uprv_tolower(localeID[i]);
     }
 
-    return result;
+    sink->Append(buffer, LENGTH);
 }
 
-CharString U_EXPORT2
-ulocimp_getCountry(const char *localeID,
-                   const char **pEnd,
-                   UErrorCode &status) {
-    CharString result;
-    int32_t idLen=0;
+void
+_getRegion(const char* localeID,
+           ByteSink* sink,
+           const char** pEnd) {
+    U_ASSERT(pEnd != nullptr);
+    *pEnd = localeID;
 
-    /* copy the country as far as possible and count its length */
-    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
-        result.append((char)uprv_toupper(localeID[idLen]), status);
-        idLen++;
+    constexpr int32_t MINLEN = 2;
+    constexpr int32_t MAXLEN = ULOC_COUNTRY_CAPACITY - 1;  // Minus NUL.
+
+    int32_t len = 0;
+    while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
+        if (len == MAXLEN) { return; }
+        len++;
     }
+    if (len < MINLEN) { return; }
 
-    /* the country should be either length 2 or 3 */
-    if (idLen == 2 || idLen == 3) {
-        /* convert 3 character code to 2 character code if possible *CWB*/
-        if(idLen==3) {
-            int32_t offset = _findIndex(COUNTRIES_3, result.data());
-            if(offset>=0) {
-                result.clear();
-                result.append(COUNTRIES[offset], status);
-            }
-        }
-        localeID+=idLen;
-    } else {
-        result.clear();
+    *pEnd = localeID + len;
+    if (sink == nullptr) { return; }
+
+    char scratch[ULOC_COUNTRY_CAPACITY];
+    int32_t capacity = 0;
+    char* buffer = sink->GetAppendBuffer(
+            ULOC_COUNTRY_CAPACITY,
+            ULOC_COUNTRY_CAPACITY,
+            scratch,
+            UPRV_LENGTHOF(scratch),
+            &capacity);
+
+    for (int32_t i = 0; i < len; ++i) {
+        buffer[i] = uprv_toupper(localeID[i]);
     }
 
-    if(pEnd!=nullptr) {
-        *pEnd=localeID;
+    if (len == 3) {
+        /* convert 3 character code to 2 character code if possible *CWB*/
+        U_ASSERT(capacity >= 4);
+        buffer[3] = '\0';
+        std::optional<int16_t> offset = _findIndex(COUNTRIES_3, buffer);
+        if (offset.has_value()) {
+            const char* const alias = COUNTRIES[*offset];
+            sink->Append(alias, (int32_t)uprv_strlen(alias));
+            return;
+        }
     }
 
-    return result;
+    sink->Append(buffer, len);
 }
 
 /**
  * @param needSeparator if true, then add leading '_' if any variants
  * are added to 'variant'
  */
-static void
-_getVariant(const char *localeID,
+void
+_getVariant(const char* localeID,
             char prev,
-            ByteSink& sink,
-            UBool needSeparator) {
-    UBool hasVariant = false;
+            ByteSink* sink,
+            const char** pEnd,
+            bool needSeparator,
+            UErrorCode& status) {
+    if (U_FAILURE(status)) return;
+    if (pEnd != nullptr) { *pEnd = localeID; }
+
+    // Reasonable upper limit for variants
+    // There are no strict limitation of the syntax of variant in the legacy
+    // locale format. If the locale is constructed from unicode_locale_id
+    // as defined in UTS35, then we know each unicode_variant_subtag
+    // could have max length of 8 ((alphanum{5,8} | digit alphanum{3})
+    // 179 would allow 20 unicode_variant_subtag with sep in the
+    // unicode_locale_id
+    // 8*20 + 1*(20-1) = 179
+    constexpr int32_t MAX_VARIANTS_LENGTH = 179;
 
     /* get one or more variant tags and separate them with '_' */
-    if(_isIDSeparator(prev)) {
+    int32_t index = 0;
+    if (_isIDSeparator(prev)) {
         /* get a variant string after a '-' or '_' */
-        while(!_isTerminator(*localeID)) {
+        for (index=0; !_isTerminator(localeID[index]); index++) {
+            if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
+                status = U_ILLEGAL_ARGUMENT_ERROR;
+                return;
+            }
             if (needSeparator) {
-                sink.Append("_", 1);
+                if (sink != nullptr) {
+                    sink->Append("_", 1);
+                }
                 needSeparator = false;
             }
-            char c = (char)uprv_toupper(*localeID);
-            if (c == '-') c = '_';
-            sink.Append(&c, 1);
-            hasVariant = true;
-            localeID++;
+            if (sink != nullptr) {
+                char c = (char)uprv_toupper(localeID[index]);
+                if (c == '-') c = '_';
+                sink->Append(&c, 1);
+            }
         }
+        if (pEnd != nullptr) { *pEnd = localeID+index; }
     }
 
     /* if there is no variant tag after a '-' or '_' then look for '@' */
-    if(!hasVariant) {
-        if(prev=='@') {
+    if (index == 0) {
+        if (prev=='@') {
             /* keep localeID */
         } else if((localeID=locale_getKeywordsStart(localeID))!=nullptr) {
             ++localeID; /* point after the '@' */
         } else {
             return;
         }
-        while(!_isTerminator(*localeID)) {
+        for(; !_isTerminator(localeID[index]); index++) {
+            if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
+                status = U_ILLEGAL_ARGUMENT_ERROR;
+                return;
+            }
             if (needSeparator) {
-                sink.Append("_", 1);
+                if (sink != nullptr) {
+                    sink->Append("_", 1);
+                }
                 needSeparator = false;
             }
-            char c = (char)uprv_toupper(*localeID);
-            if (c == '-' || c == ',') c = '_';
-            sink.Append(&c, 1);
+            if (sink != nullptr) {
+                char c = (char)uprv_toupper(localeID[index]);
+                if (c == '-' || c == ',') c = '_';
+                sink->Append(&c, 1);
+            }
+        }
+        if (pEnd != nullptr) { *pEnd = localeID + index; }
+    }
+}
+
+}  // namespace
+
+U_EXPORT CharString
+ulocimp_getLanguage(const char* localeID, UErrorCode& status) {
+    return ByteSinkUtil::viaByteSinkToCharString(
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getSubtags(
+                    localeID,
+                    &sink,
+                    nullptr,
+                    nullptr,
+                    nullptr,
+                    nullptr,
+                    status);
+        },
+        status);
+}
+
+U_EXPORT CharString
+ulocimp_getScript(const char* localeID, UErrorCode& status) {
+    return ByteSinkUtil::viaByteSinkToCharString(
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getSubtags(
+                    localeID,
+                    nullptr,
+                    &sink,
+                    nullptr,
+                    nullptr,
+                    nullptr,
+                    status);
+        },
+        status);
+}
+
+U_EXPORT CharString
+ulocimp_getRegion(const char* localeID, UErrorCode& status) {
+    return ByteSinkUtil::viaByteSinkToCharString(
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getSubtags(
+                    localeID,
+                    nullptr,
+                    nullptr,
+                    &sink,
+                    nullptr,
+                    nullptr,
+                    status);
+        },
+        status);
+}
+
+U_EXPORT CharString
+ulocimp_getVariant(const char* localeID, UErrorCode& status) {
+    return ByteSinkUtil::viaByteSinkToCharString(
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getSubtags(
+                    localeID,
+                    nullptr,
+                    nullptr,
+                    nullptr,
+                    &sink,
+                    nullptr,
+                    status);
+        },
+        status);
+}
+
+U_EXPORT void
+ulocimp_getSubtags(
+        const char* localeID,
+        CharString* language,
+        CharString* script,
+        CharString* region,
+        CharString* variant,
+        const char** pEnd,
+        UErrorCode& status) {
+    if (U_FAILURE(status)) { return; }
+
+    std::optional<CharStringByteSink> languageSink;
+    std::optional<CharStringByteSink> scriptSink;
+    std::optional<CharStringByteSink> regionSink;
+    std::optional<CharStringByteSink> variantSink;
+
+    if (language != nullptr) { languageSink.emplace(language); }
+    if (script != nullptr) { scriptSink.emplace(script); }
+    if (region != nullptr) { regionSink.emplace(region); }
+    if (variant != nullptr) { variantSink.emplace(variant); }
+
+    ulocimp_getSubtags(
+            localeID,
+            languageSink.has_value() ? &*languageSink : nullptr,
+            scriptSink.has_value() ? &*scriptSink : nullptr,
+            regionSink.has_value() ? &*regionSink : nullptr,
+            variantSink.has_value() ? &*variantSink : nullptr,
+            pEnd,
+            status);
+}
+
+U_EXPORT void
+ulocimp_getSubtags(
+        const char* localeID,
+        ByteSink* language,
+        ByteSink* script,
+        ByteSink* region,
+        ByteSink* variant,
+        const char** pEnd,
+        UErrorCode& status) {
+    if (U_FAILURE(status)) { return; }
+
+    if (pEnd != nullptr) {
+        *pEnd = localeID;
+    } else if (language == nullptr &&
+               script == nullptr &&
+               region == nullptr &&
+               variant == nullptr) {
+        return;
+    }
+
+    bool hasRegion = false;
+
+    if (localeID == nullptr) {
+        localeID = uloc_getDefault();
+    }
+
+    _getLanguage(localeID, language, &localeID, status);
+    if (U_FAILURE(status)) { return; }
+    U_ASSERT(localeID != nullptr);
+
+    if (pEnd != nullptr) {
+        *pEnd = localeID;
+    } else if (script == nullptr &&
+               region == nullptr &&
+               variant == nullptr) {
+        return;
+    }
+
+    if (_isIDSeparator(*localeID)) {
+        const char* begin = localeID + 1;
+        const char* end = nullptr;
+        _getScript(begin, script, &end);
+        U_ASSERT(end != nullptr);
+        if (end != begin) {
+            localeID = end;
+            if (pEnd != nullptr) { *pEnd = localeID; }
+        }
+    }
+
+    if (region == nullptr && variant == nullptr && pEnd == nullptr) { return; }
+
+    if (_isIDSeparator(*localeID)) {
+        const char* begin = localeID + 1;
+        const char* end = nullptr;
+        _getRegion(begin, region, &end);
+        U_ASSERT(end != nullptr);
+        if (end != begin) {
+            hasRegion = true;
+            localeID = end;
+            if (pEnd != nullptr) { *pEnd = localeID; }
+        }
+    }
+
+    if (variant == nullptr && pEnd == nullptr) { return; }
+
+    if (_isIDSeparator(*localeID) && !_isBCP47Extension(localeID)) {
+        /* If there was no country ID, skip a possible extra IDSeparator */
+        if (!hasRegion && _isIDSeparator(localeID[1])) {
             localeID++;
         }
+        const char* begin = localeID + 1;
+        const char* end = nullptr;
+        _getVariant(begin, *localeID, variant, &end, false, status);
+        if (U_FAILURE(status)) { return; }
+        U_ASSERT(end != nullptr);
+        if (end != begin && pEnd != nullptr) { *pEnd = end; }
     }
 }
 
@@ -1359,12 +1651,11 @@ static const UEnumeration gKeywordsEnum = {
 U_CAPI UEnumeration* U_EXPORT2
 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
 {
+    if (U_FAILURE(*status)) { return nullptr; }
+
     LocalMemory<UKeywordsContext> myContext;
     LocalMemory<UEnumeration> result;
 
-    if (U_FAILURE(*status)) {
-        return nullptr;
-    }
     myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
     result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
     if (myContext.isNull() || result.isNull()) {
@@ -1388,16 +1679,15 @@ U_CAPI UEnumeration* U_EXPORT2
 uloc_openKeywords(const char* localeID,
                         UErrorCode* status)
 {
-    CharString tempBuffer;
-    const char* tmpLocaleID;
-
     if(status==nullptr || U_FAILURE(*status)) {
-        return 0;
+        return nullptr;
     }
 
+    CharString tempBuffer;
+    const char* tmpLocaleID;
+
     if (_hasBCP47Extension(localeID)) {
-        CharStringByteSink sink(&tempBuffer);
-        ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status);
+        tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, *status);
         tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
     } else {
         if (localeID==nullptr) {
@@ -1406,37 +1696,21 @@ uloc_openKeywords(const char* localeID,
         tmpLocaleID=localeID;
     }
 
-    /* Skip the language */
-    ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
+    ulocimp_getSubtags(
+            tmpLocaleID,
+            nullptr,
+            nullptr,
+            nullptr,
+            nullptr,
+            &tmpLocaleID,
+            *status);
     if (U_FAILURE(*status)) {
-        return 0;
-    }
-
-    if(_isIDSeparator(*tmpLocaleID)) {
-        const char *scriptID;
-        /* Skip the script if available */
-        ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
-        if (U_FAILURE(*status)) {
-            return 0;
-        }
-        if(scriptID != tmpLocaleID+1) {
-            /* Found optional script */
-            tmpLocaleID = scriptID;
-        }
-        /* Skip the Country */
-        if (_isIDSeparator(*tmpLocaleID)) {
-            ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
-            if (U_FAILURE(*status)) {
-                return 0;
-            }
-        }
+        return nullptr;
     }
 
     /* keywords are located after '@' */
     if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != nullptr) {
-        CharString keywords;
-        CharStringByteSink sink(&keywords);
-        ulocimp_getKeywords(tmpLocaleID+1, '@', sink, false, status);
+        CharString keywords = ulocimp_getKeywords(tmpLocaleID + 1, '@', false, *status);
         if (U_FAILURE(*status)) {
             return nullptr;
         }
@@ -1450,10 +1724,12 @@ uloc_openKeywords(const char* localeID,
 #define _ULOC_STRIP_KEYWORDS 0x2
 #define _ULOC_CANONICALIZE   0x1
 
-#define OPTION_SET(options, mask) ((options & mask) != 0)
+namespace {
 
-static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
-#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
+inline bool OPTION_SET(uint32_t options, uint32_t mask) { return (options & mask) != 0; }
+
+constexpr char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
+constexpr int32_t I_DEFAULT_LENGTH = UPRV_LENGTHOF(i_default);
 
 /**
  * Canonicalize the given localeID, to level 1 or to level 2,
@@ -1462,16 +1738,16 @@ static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
  *
  * This is the code underlying uloc_getName and uloc_canonicalize.
  */
-static void
+void
 _canonicalize(const char* localeID,
               ByteSink& sink,
               uint32_t options,
-              UErrorCode* err) {
-    if (U_FAILURE(*err)) {
+              UErrorCode& err) {
+    if (U_FAILURE(err)) {
         return;
     }
 
-    int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
+    int32_t j, fieldCount=0;
     CharString tempBuffer;  // if localeID has a BCP47 extension, tmpLocaleID points to this
     CharString localeIDWithHyphens;  // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
     const char* origLocaleID;
@@ -1484,8 +1760,8 @@ _canonicalize(const char* localeID,
 
         // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
         if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') {
-            localeIDWithHyphens.append(localeID, -1, *err);
-            if (U_SUCCESS(*err)) {
+            localeIDWithHyphens.append(localeID, -1, err);
+            if (U_SUCCESS(err)) {
                 for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
                     if (*p == '_') {
                         *p = '-';
@@ -1495,9 +1771,8 @@ _canonicalize(const char* localeID,
             }
         }
 
-        CharStringByteSink tempSink(&tempBuffer);
-        ulocimp_forLanguageTag(localeIDPtr, -1, tempSink, nullptr, err);
-        tmpLocaleID = U_SUCCESS(*err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeIDPtr;
+        tempBuffer = ulocimp_forLanguageTag(localeIDPtr, -1, nullptr, err);
+        tmpLocaleID = U_SUCCESS(err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeIDPtr;
     } else {
         if (localeID==nullptr) {
            localeID=uloc_getDefault();
@@ -1508,76 +1783,70 @@ _canonicalize(const char* localeID,
     origLocaleID=tmpLocaleID;
 
     /* get all pieces, one after another, and separate with '_' */
-    CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
+    CharString tag;
+    CharString script;
+    CharString country;
+    CharString variant;
+    ulocimp_getSubtags(
+            tmpLocaleID,
+            &tag,
+            &script,
+            &country,
+            &variant,
+            &tmpLocaleID,
+            err);
+    if (U_FAILURE(err)) {
+        return;
+    }
 
     if (tag.length() == I_DEFAULT_LENGTH &&
             uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
         tag.clear();
-        tag.append(uloc_getDefault(), *err);
-    } else if(_isIDSeparator(*tmpLocaleID)) {
-        const char *scriptID;
-
-        ++fieldCount;
-        tag.append('_', *err);
-
-        CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
-        tag.append(script, *err);
-        scriptSize = script.length();
-        if(scriptSize > 0) {
-            /* Found optional script */
-            tmpLocaleID = scriptID;
+        tag.append(uloc_getDefault(), err);
+    } else {
+        if (!script.isEmpty()) {
             ++fieldCount;
-            if (_isIDSeparator(*tmpLocaleID)) {
-                /* If there is something else, then we add the _ */
-                tag.append('_', *err);
-            }
+            tag.append('_', err);
+            tag.append(script, err);
         }
-
-        if (_isIDSeparator(*tmpLocaleID)) {
-            const char *cntryID;
-
-            CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
-            tag.append(country, *err);
-            if (!country.isEmpty()) {
-                /* Found optional country */
-                tmpLocaleID = cntryID;
-            }
-            if(_isIDSeparator(*tmpLocaleID)) {
-                /* If there is something else, then we add the _  if we found country before. */
-                if (!_isIDSeparator(*(tmpLocaleID+1))) {
-                    ++fieldCount;
-                    tag.append('_', *err);
-                }
-
-                variantSize = -tag.length();
-                {
-                    CharStringByteSink s(&tag);
-                    _getVariant(tmpLocaleID+1, *tmpLocaleID, s, false);
-                }
-                variantSize += tag.length();
-                if (variantSize > 0) {
-                    tmpLocaleID += variantSize + 1; /* skip '_' and variant */
-                }
+        if (!country.isEmpty()) {
+            ++fieldCount;
+            tag.append('_', err);
+            tag.append(country, err);
+        }
+        if (!variant.isEmpty()) {
+            ++fieldCount;
+            if (country.isEmpty()) {
+                tag.append('_', err);
             }
+            tag.append('_', err);
+            tag.append(variant, err);
         }
     }
 
     /* Copy POSIX-style charset specifier, if any [mr.utf8] */
     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
-        UBool done = false;
-        do {
-            char c = *tmpLocaleID;
-            switch (c) {
-            case 0:
-            case '@':
-                done = true;
-                break;
-            default:
-                tag.append(c, *err);
-                ++tmpLocaleID;
-                break;
-            }
-        } while (!done);
+        tag.append('.', err);
+        ++tmpLocaleID;
+        const char *atPos = nullptr;
+        size_t length;
+        if((atPos = uprv_strchr(tmpLocaleID, '@')) != nullptr) {
+            length = atPos - tmpLocaleID;
+        } else {
+            length = uprv_strlen(tmpLocaleID);
+        }
+        // The longest charset name we found in IANA charset registry
+        // https://www.iana.org/assignments/character-sets/ is
+        // "Extended_UNIX_Code_Packed_Format_for_Japanese" in length 45.
+        // we therefore restrict the length here to be 64 which is a power of 2
+        // number that is longer than 45.
+        constexpr size_t kMaxCharsetLength = 64;
+        if (length > kMaxCharsetLength) {
+           err = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
+           return;
+        }
+        tag.append(tmpLocaleID, static_cast<int32_t>(length), err);
+        tmpLocaleID += length;
     }
 
     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
@@ -1595,7 +1864,7 @@ _canonicalize(const char* localeID,
             if (c == 0) {
                 break;
             }
-            tag.append(c, *err);
+            tag.append(c, err);
             ++tmpLocaleID;
         }
     }
@@ -1604,22 +1873,16 @@ _canonicalize(const char* localeID,
         /* Handle @FOO variant if @ is present and not followed by = */
         if (tmpLocaleID!=nullptr && keywordAssign==nullptr) {
             /* Add missing '_' if needed */
-            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
+            if (fieldCount < 2 || (fieldCount < 3 && !script.isEmpty())) {
                 do {
-                    tag.append('_', *err);
+                    tag.append('_', err);
                     ++fieldCount;
                 } while(fieldCount<2);
             }
 
-            int32_t posixVariantSize = -tag.length();
-            {
-                CharStringByteSink s(&tag);
-                _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
-            }
-            posixVariantSize += tag.length();
-            if (posixVariantSize > 0) {
-                variantSize += posixVariantSize;
-            }
+            CharStringByteSink s(&tag);
+            _getVariant(tmpLocaleID+1, '@', &s, nullptr, !variant.isEmpty(), err);
+            if (U_FAILURE(err)) { return; }
         }
 
         /* Look up the ID in the canonicalization map */
@@ -1630,7 +1893,7 @@ _canonicalize(const char* localeID,
                     break; /* Don't remap "" if keywords present */
                 }
                 tag.clear();
-                tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
+                tag.append(CANONICALIZE_MAP[j].canonicalID, err);
                 break;
             }
         }
@@ -1648,6 +1911,8 @@ _canonicalize(const char* localeID,
     }
 }
 
+}  // namespace
+
 /* ### ID parsing API **************************************************/
 
 U_CAPI int32_t  U_EXPORT2
@@ -1656,39 +1921,35 @@ uloc_getParent(const char*    localeID,
                int32_t parentCapacity,
                UErrorCode* err)
 {
-    if (U_FAILURE(*err)) {
-        return 0;
-    }
-
-    CheckedArrayByteSink sink(parent, parentCapacity);
-    ulocimp_getParent(localeID, sink, err);
-
-    int32_t reslen = sink.NumberOfBytesAppended();
-
-    if (U_FAILURE(*err)) {
-        return reslen;
-    }
-
-    if (sink.Overflowed()) {
-        *err = U_BUFFER_OVERFLOW_ERROR;
-    } else {
-        u_terminateChars(parent, parentCapacity, reslen, err);
-    }
+    return ByteSinkUtil::viaByteSinkToTerminatedChars(
+        parent, parentCapacity,
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getParent(localeID, sink, status);
+        },
+        *err);
+}
 
-    return reslen;
+U_EXPORT CharString
+ulocimp_getParent(const char* localeID,
+                  UErrorCode& err)
+{
+    return ByteSinkUtil::viaByteSinkToCharString(
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getParent(localeID, sink, status);
+        },
+        err);
 }
 
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_getParent(const char* localeID,
                   icu::ByteSink& sink,
-                  UErrorCode* err)
+                  UErrorCode& err)
 {
+    if (U_FAILURE(err)) { return; }
+
     const char *lastUnderscore;
     int32_t i;
 
-    if (U_FAILURE(*err))
-        return;
-
     if (localeID == nullptr)
         localeID = uloc_getDefault();
 
@@ -1715,16 +1976,19 @@ uloc_getLanguage(const char*    localeID,
          UErrorCode* err)
 {
     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
-
-    if (err==nullptr || U_FAILURE(*err)) {
-        return 0;
-    }
-
-    if(localeID==nullptr) {
-        localeID=uloc_getDefault();
-    }
-
-    return ulocimp_getLanguage(localeID, nullptr, *err).extract(language, languageCapacity, *err);
+    return ByteSinkUtil::viaByteSinkToTerminatedChars(
+        language, languageCapacity,
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getSubtags(
+                    localeID,
+                    &sink,
+                    nullptr,
+                    nullptr,
+                    nullptr,
+                    nullptr,
+                    status);
+        },
+        *err);
 }
 
 U_CAPI int32_t U_EXPORT2
@@ -1733,24 +1997,19 @@ uloc_getScript(const char*    localeID,
          int32_t scriptCapacity,
          UErrorCode* err)
 {
-    if(err==nullptr || U_FAILURE(*err)) {
-        return 0;
-    }
-
-    if(localeID==nullptr) {
-        localeID=uloc_getDefault();
-    }
-
-    /* skip the language */
-    ulocimp_getLanguage(localeID, &localeID, *err);
-    if (U_FAILURE(*err)) {
-        return 0;
-    }
-
-    if(_isIDSeparator(*localeID)) {
-        return ulocimp_getScript(localeID+1, nullptr, *err).extract(script, scriptCapacity, *err);
-    }
-    return u_terminateChars(script, scriptCapacity, 0, err);
+    return ByteSinkUtil::viaByteSinkToTerminatedChars(
+        script, scriptCapacity,
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getSubtags(
+                    localeID,
+                    nullptr,
+                    &sink,
+                    nullptr,
+                    nullptr,
+                    nullptr,
+                    status);
+        },
+        *err);
 }
 
 U_CAPI int32_t  U_EXPORT2
@@ -1759,36 +2018,19 @@ uloc_getCountry(const char* localeID,
             int32_t countryCapacity,
             UErrorCode* err)
 {
-    if(err==nullptr || U_FAILURE(*err)) {
-        return 0;
-    }
-
-    if(localeID==nullptr) {
-        localeID=uloc_getDefault();
-    }
-
-    /* Skip the language */
-    ulocimp_getLanguage(localeID, &localeID, *err);
-    if (U_FAILURE(*err)) {
-        return 0;
-    }
-
-    if(_isIDSeparator(*localeID)) {
-        const char *scriptID;
-        /* Skip the script if available */
-        ulocimp_getScript(localeID+1, &scriptID, *err);
-        if (U_FAILURE(*err)) {
-            return 0;
-        }
-        if(scriptID != localeID+1) {
-            /* Found optional script */
-            localeID = scriptID;
-        }
-        if(_isIDSeparator(*localeID)) {
-            return ulocimp_getCountry(localeID+1, nullptr, *err).extract(country, countryCapacity, *err);
-        }
-    }
-    return u_terminateChars(country, countryCapacity, 0, err);
+    return ByteSinkUtil::viaByteSinkToTerminatedChars(
+        country, countryCapacity,
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getSubtags(
+                    localeID,
+                    nullptr,
+                    nullptr,
+                    &sink,
+                    nullptr,
+                    nullptr,
+                    status);
+        },
+        *err);
 }
 
 U_CAPI int32_t  U_EXPORT2
@@ -1797,77 +2039,19 @@ uloc_getVariant(const char* localeID,
                 int32_t variantCapacity,
                 UErrorCode* err)
 {
-    CharString tempBuffer;
-    const char* tmpLocaleID;
-    int32_t i=0;
-
-    if(err==nullptr || U_FAILURE(*err)) {
-        return 0;
-    }
-
-    if (_hasBCP47Extension(localeID)) {
-        CharStringByteSink sink(&tempBuffer);
-        ulocimp_forLanguageTag(localeID, -1, sink, nullptr, err);
-        tmpLocaleID = U_SUCCESS(*err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
-    } else {
-        if (localeID==nullptr) {
-           localeID=uloc_getDefault();
-        }
-        tmpLocaleID=localeID;
-    }
-
-    /* Skip the language */
-    ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
-    if (U_FAILURE(*err)) {
-        return 0;
-    }
-
-    if(_isIDSeparator(*tmpLocaleID)) {
-        const char *scriptID;
-        /* Skip the script if available */
-        ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
-        if (U_FAILURE(*err)) {
-            return 0;
-        }
-        if(scriptID != tmpLocaleID+1) {
-            /* Found optional script */
-            tmpLocaleID = scriptID;
-        }
-        /* Skip the Country */
-        if (_isIDSeparator(*tmpLocaleID)) {
-            const char *cntryID;
-            ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
-            if (U_FAILURE(*err)) {
-                return 0;
-            }
-            if (cntryID != tmpLocaleID+1) {
-                /* Found optional country */
-                tmpLocaleID = cntryID;
-            }
-            if(_isIDSeparator(*tmpLocaleID)) {
-                /* If there was no country ID, skip a possible extra IDSeparator */
-                if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
-                    tmpLocaleID++;
-                }
-
-                CheckedArrayByteSink sink(variant, variantCapacity);
-                _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, false);
-
-                i = sink.NumberOfBytesAppended();
-
-                if (U_FAILURE(*err)) {
-                    return i;
-                }
-
-                if (sink.Overflowed()) {
-                    *err = U_BUFFER_OVERFLOW_ERROR;
-                    return i;
-                }
-            }
-        }
-    }
-
-    return u_terminateChars(variant, variantCapacity, i, err);
+    return ByteSinkUtil::viaByteSinkToTerminatedChars(
+        variant, variantCapacity,
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getSubtags(
+                    localeID,
+                    nullptr,
+                    nullptr,
+                    nullptr,
+                    &sink,
+                    nullptr,
+                    status);
+        },
+        *err);
 }
 
 U_CAPI int32_t  U_EXPORT2
@@ -1876,32 +2060,29 @@ uloc_getName(const char* localeID,
              int32_t nameCapacity,
              UErrorCode* err)
 {
-    if (U_FAILURE(*err)) {
-        return 0;
-    }
-
-    CheckedArrayByteSink sink(name, nameCapacity);
-    ulocimp_getName(localeID, sink, err);
-
-    int32_t reslen = sink.NumberOfBytesAppended();
-
-    if (U_FAILURE(*err)) {
-        return reslen;
-    }
-
-    if (sink.Overflowed()) {
-        *err = U_BUFFER_OVERFLOW_ERROR;
-    } else {
-        u_terminateChars(name, nameCapacity, reslen, err);
-    }
+    return ByteSinkUtil::viaByteSinkToTerminatedChars(
+        name, nameCapacity,
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getName(localeID, sink, status);
+        },
+        *err);
+}
 
-    return reslen;
+U_EXPORT CharString
+ulocimp_getName(const char* localeID,
+                UErrorCode& err)
+{
+    return ByteSinkUtil::viaByteSinkToCharString(
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getName(localeID, sink, status);
+        },
+        err);
 }
 
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_getName(const char* localeID,
                 ByteSink& sink,
-                UErrorCode* err)
+                UErrorCode& err)
 {
     _canonicalize(localeID, sink, 0, err);
 }
@@ -1912,32 +2093,29 @@ uloc_getBaseName(const char* localeID,
                  int32_t nameCapacity,
                  UErrorCode* err)
 {
-    if (U_FAILURE(*err)) {
-        return 0;
-    }
-
-    CheckedArrayByteSink sink(name, nameCapacity);
-    ulocimp_getBaseName(localeID, sink, err);
-
-    int32_t reslen = sink.NumberOfBytesAppended();
-
-    if (U_FAILURE(*err)) {
-        return reslen;
-    }
-
-    if (sink.Overflowed()) {
-        *err = U_BUFFER_OVERFLOW_ERROR;
-    } else {
-        u_terminateChars(name, nameCapacity, reslen, err);
-    }
+    return ByteSinkUtil::viaByteSinkToTerminatedChars(
+        name, nameCapacity,
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getBaseName(localeID, sink, status);
+        },
+        *err);
+}
 
-    return reslen;
+U_EXPORT CharString
+ulocimp_getBaseName(const char* localeID,
+                    UErrorCode& err)
+{
+    return ByteSinkUtil::viaByteSinkToCharString(
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_getBaseName(localeID, sink, status);
+        },
+        err);
 }
 
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_getBaseName(const char* localeID,
                     ByteSink& sink,
-                    UErrorCode* err)
+                    UErrorCode& err)
 {
     _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
 }
@@ -1948,32 +2126,29 @@ uloc_canonicalize(const char* localeID,
                   int32_t nameCapacity,
                   UErrorCode* err)
 {
-    if (U_FAILURE(*err)) {
-        return 0;
-    }
-
-    CheckedArrayByteSink sink(name, nameCapacity);
-    ulocimp_canonicalize(localeID, sink, err);
-
-    int32_t reslen = sink.NumberOfBytesAppended();
-
-    if (U_FAILURE(*err)) {
-        return reslen;
-    }
-
-    if (sink.Overflowed()) {
-        *err = U_BUFFER_OVERFLOW_ERROR;
-    } else {
-        u_terminateChars(name, nameCapacity, reslen, err);
-    }
+    return ByteSinkUtil::viaByteSinkToTerminatedChars(
+        name, nameCapacity,
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_canonicalize(localeID, sink, status);
+        },
+        *err);
+}
 
-    return reslen;
+U_EXPORT CharString
+ulocimp_canonicalize(const char* localeID,
+                     UErrorCode& err)
+{
+    return ByteSinkUtil::viaByteSinkToCharString(
+        [&](ByteSink& sink, UErrorCode& status) {
+            ulocimp_canonicalize(localeID, sink, status);
+        },
+        err);
 }
 
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_canonicalize(const char* localeID,
                      ByteSink& sink,
-                     UErrorCode* err)
+                     UErrorCode& err)
 {
     _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
 }
@@ -1981,49 +2156,39 @@ ulocimp_canonicalize(const char* localeID,
 U_CAPI const char*  U_EXPORT2
 uloc_getISO3Language(const char* localeID)
 {
-    int16_t offset;
-    char lang[ULOC_LANG_CAPACITY];
     UErrorCode err = U_ZERO_ERROR;
 
     if (localeID == nullptr)
     {
         localeID = uloc_getDefault();
     }
-    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
+    CharString lang = ulocimp_getLanguage(localeID, err);
     if (U_FAILURE(err))
         return "";
-    offset = _findIndex(LANGUAGES, lang);
-    if (offset < 0)
-        return "";
-    return LANGUAGES_3[offset];
+    std::optional<int16_t> offset = _findIndex(LANGUAGES, lang.data());
+    return offset.has_value() ? LANGUAGES_3[*offset] : "";
 }
 
 U_CAPI const char*  U_EXPORT2
 uloc_getISO3Country(const char* localeID)
 {
-    int16_t offset;
-    char cntry[ULOC_LANG_CAPACITY];
     UErrorCode err = U_ZERO_ERROR;
 
     if (localeID == nullptr)
     {
         localeID = uloc_getDefault();
     }
-    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
+    CharString cntry = ulocimp_getRegion(localeID, err);
     if (U_FAILURE(err))
         return "";
-    offset = _findIndex(COUNTRIES, cntry);
-    if (offset < 0)
-        return "";
-
-    return COUNTRIES_3[offset];
+    std::optional<int16_t> offset = _findIndex(COUNTRIES, cntry.data());
+    return offset.has_value() ? COUNTRIES_3[*offset] : "";
 }
 
 U_CAPI uint32_t  U_EXPORT2
 uloc_getLCID(const char* localeID)
 {
     UErrorCode status = U_ZERO_ERROR;
-    char       langID[ULOC_FULLNAME_CAPACITY];
     uint32_t   lcid = 0;
 
     /* Check for incomplete id. */
@@ -2042,37 +2207,20 @@ uloc_getLCID(const char* localeID)
         return lcid;
     }
 
-    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
-    if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
+    CharString langID = ulocimp_getLanguage(localeID, status);
+    if (U_FAILURE(status)) {
         return 0;
     }
 
     if (uprv_strchr(localeID, '@')) {
         // uprv_convertToLCID does not support keywords other than collation.
         // Remove all keywords except collation.
-        int32_t len;
-        char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
-
-        CharString collVal;
-        {
-            CharStringByteSink sink(&collVal);
-            ulocimp_getKeywordValue(localeID, "collation", sink, &status);
-        }
-
+        CharString collVal = ulocimp_getKeywordValue(localeID, "collation", status);
         if (U_SUCCESS(status) && !collVal.isEmpty()) {
-            len = uloc_getBaseName(localeID, tmpLocaleID,
-                UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
-
-            if (U_SUCCESS(status) && len > 0) {
-                tmpLocaleID[len] = 0;
-
-                len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
-                    UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
-
-                if (U_SUCCESS(status) && len > 0) {
-                    tmpLocaleID[len] = 0;
-                    return uprv_convertToLCID(langID, tmpLocaleID, &status);
-                }
+            CharString tmpLocaleID = ulocimp_getBaseName(localeID, status);
+            ulocimp_setKeywordValue("collation", collVal.data(), tmpLocaleID, status);
+            if (U_SUCCESS(status)) {
+                return uprv_convertToLCID(langID.data(), tmpLocaleID.data(), &status);
             }
         }
 
@@ -2080,7 +2228,7 @@ uloc_getLCID(const char* localeID)
         status = U_ZERO_ERROR;
     }
 
-    return uprv_convertToLCID(langID, localeID, &status);
+    return uprv_convertToLCID(langID.data(), localeID, &status);
 }
 
 U_CAPI int32_t U_EXPORT2
@@ -2156,7 +2304,9 @@ uloc_toUnicodeLocaleType(const char* keyword, const char* value)
     return bcpType;
 }
 
-static UBool
+namespace {
+
+bool
 isWellFormedLegacyKey(const char* legacyKey)
 {
     const char* p = legacyKey;
@@ -2169,7 +2319,7 @@ isWellFormedLegacyKey(const char* legacyKey)
     return true;
 }
 
-static UBool
+bool
 isWellFormedLegacyType(const char* legacyType)
 {
     const char* p = legacyType;
@@ -2190,6 +2340,8 @@ isWellFormedLegacyType(const char* legacyType)
     return (alphaNumLen != 0);
 }
 
+}  // namespace
+
 U_CAPI const char* U_EXPORT2
 uloc_toLegacyKey(const char* keyword)
 {