summaryrefslogtreecommitdiffstats
path: root/thirdparty/icu4c/common/uloc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/uloc.cpp')
-rw-r--r--thirdparty/icu4c/common/uloc.cpp1652
1 files changed, 902 insertions, 750 deletions
diff --git a/thirdparty/icu4c/common/uloc.cpp b/thirdparty/icu4c/common/uloc.cpp
index ce49d6c50e..88fe7eaadc 100644
--- a/thirdparty/icu4c/common/uloc.cpp
+++ b/thirdparty/icu4c/common/uloc.cpp
@@ -30,6 +30,8 @@
l = lang, C = ctry, M = charmap, V = variant
*/
+#include <optional>
+
#include "unicode/bytestream.h"
#include "unicode/errorcode.h"
#include "unicode/stringpiece.h"
@@ -58,6 +60,8 @@ U_NAMESPACE_USE
U_CFUNC void locale_set_default(const char *id);
U_CFUNC const char *locale_get_default();
+namespace {
+
/* ### Data tables **************************************************/
/**
@@ -94,7 +98,7 @@ U_CFUNC const char *locale_get_default();
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
/* ISO639 table version is 20150505 */
/* Subsequent hand addition of selected languages */
-static const char * const LANGUAGES[] = {
+constexpr const char* LANGUAGES[] = {
"aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
"af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
"aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
@@ -185,10 +189,10 @@ nullptr,
nullptr
};
-static const char* const DEPRECATED_LANGUAGES[]={
+constexpr const char* DEPRECATED_LANGUAGES[]={
"in", "iw", "ji", "jw", "mo", nullptr, nullptr
};
-static const char* const REPLACEMENT_LANGUAGES[]={
+constexpr const char* REPLACEMENT_LANGUAGES[]={
"id", "he", "yi", "jv", "ro", nullptr, nullptr
};
@@ -211,7 +215,7 @@ static const char* const REPLACEMENT_LANGUAGES[]={
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
/* ISO639 table version is 20150505 */
/* Subsequent hand addition of selected languages */
-static const char * const LANGUAGES_3[] = {
+constexpr const char* LANGUAGES_3[] = {
"aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
"afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
"aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
@@ -327,7 +331,7 @@ nullptr
* RO(ROM) is now RO(ROU) according to
* http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
*/
-static const char * const COUNTRIES[] = {
+constexpr const char* COUNTRIES[] = {
"AD", "AE", "AF", "AG", "AI", "AL", "AM",
"AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
"BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
@@ -363,10 +367,10 @@ nullptr,
nullptr
};
-static const char* const DEPRECATED_COUNTRIES[] = {
+constexpr const char* DEPRECATED_COUNTRIES[] = {
"AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", nullptr, nullptr /* deprecated country list */
};
-static const char* const REPLACEMENT_COUNTRIES[] = {
+constexpr const char* REPLACEMENT_COUNTRIES[] = {
/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
"CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", nullptr, nullptr /* replacement country codes */
};
@@ -384,7 +388,7 @@ static const char* const REPLACEMENT_COUNTRIES[] = {
* second list, and another nullptr entry. The two lists correspond to
* the two lists in COUNTRIES.
*/
-static const char * const COUNTRIES_3[] = {
+constexpr const char* COUNTRIES_3[] = {
/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
"AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
@@ -460,7 +464,7 @@ typedef struct CanonicalizationMap {
* A map to canonicalize locale IDs. This handles a variety of
* different semantic kinds of transformations.
*/
-static const CanonicalizationMap CANONICALIZE_MAP[] = {
+constexpr CanonicalizationMap CANONICALIZE_MAP[] = {
{ "art__LOJBAN", "jbo" }, /* registered name */
{ "hy__AREVELA", "hy" }, /* Registered IANA variant */
{ "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
@@ -475,15 +479,13 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = {
};
/* ### BCP47 Conversion *******************************************/
-/* Test if the locale id has BCP47 u extension and does not have '@' */
-#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == nullptr && getShortestSubtagLength(localeID) == 1)
/* Gets the size of the shortest subtag in the given localeID. */
-static int32_t getShortestSubtagLength(const char *localeID) {
+int32_t getShortestSubtagLength(const char *localeID) {
int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
int32_t length = localeIDLength;
int32_t tmpLength = 0;
int32_t i;
- UBool reset = true;
+ bool reset = true;
for (i = 0; i < localeIDLength; i++) {
if (localeID[i] != '_' && localeID[i] != '-') {
@@ -502,12 +504,18 @@ static int32_t getShortestSubtagLength(const char *localeID) {
return length;
}
+/* Test if the locale id has BCP47 u extension and does not have '@' */
+inline bool _hasBCP47Extension(const char *id) {
+ return id != nullptr && uprv_strstr(id, "@") == nullptr && getShortestSubtagLength(id) == 1;
+}
/* ### Keywords **************************************************/
-#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
-#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
+inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
+inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
/* Punctuation/symbols allowed in legacy key values */
-#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
+inline bool UPRV_OK_VALUE_PUNCTUATION(char c) { return c == '_' || c == '-' || c == '+' || c == '/'; }
+
+} // namespace
#define ULOC_KEYWORD_BUFFER_LEN 25
#define ULOC_MAX_NO_KEYWORDS 25
@@ -536,36 +544,31 @@ locale_getKeywordsStart(const char *localeID) {
return nullptr;
}
+namespace {
+
/**
- * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
* @param keywordName incoming name to be canonicalized
* @param status return status (keyword too long)
- * @return length of the keyword name
+ * @return the keyword name
*/
-static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
+CharString locale_canonKeywordName(const char* keywordName, UErrorCode& status)
{
- int32_t keywordNameLen = 0;
+ if (U_FAILURE(status)) { return {}; }
+ CharString result;
for (; *keywordName != 0; keywordName++) {
if (!UPRV_ISALPHANUM(*keywordName)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
- return 0;
- }
- if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
- buf[keywordNameLen++] = uprv_tolower(*keywordName);
- } else {
- /* keyword name too long for internal buffer */
- *status = U_INTERNAL_PROGRAM_ERROR;
- return 0;
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
+ return {};
}
+ result.append(uprv_tolower(*keywordName), status);
}
- if (keywordNameLen == 0) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
- return 0;
+ if (result.isEmpty()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
+ return {};
}
- buf[keywordNameLen] = 0; /* terminate */
- return keywordNameLen;
+ return result;
}
typedef struct {
@@ -575,20 +578,41 @@ typedef struct {
int32_t valueLen;
} KeywordStruct;
-static int32_t U_CALLCONV
+int32_t U_CALLCONV
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
const char* leftString = ((const KeywordStruct *)left)->keyword;
const char* rightString = ((const KeywordStruct *)right)->keyword;
return uprv_strcmp(leftString, rightString);
}
-U_CFUNC void
-ulocimp_getKeywords(const char *localeID,
+} // namespace
+
+U_EXPORT CharString
+ulocimp_getKeywords(const char* localeID,
+ char prev,
+ bool valuesToo,
+ UErrorCode& status)
+{
+ return ByteSinkUtil::viaByteSinkToCharString(
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getKeywords(localeID,
+ prev,
+ sink,
+ valuesToo,
+ status);
+ },
+ status);
+}
+
+U_EXPORT void
+ulocimp_getKeywords(const char* localeID,
char prev,
ByteSink& sink,
- UBool valuesToo,
- UErrorCode *status)
+ bool valuesToo,
+ UErrorCode& status)
{
+ if (U_FAILURE(status)) { return; }
+
KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
@@ -601,7 +625,7 @@ ulocimp_getKeywords(const char *localeID,
if(prev == '@') { /* start of keyword definition */
/* we will grab pairs, trim spaces, lowercase keywords, sort and return */
do {
- UBool duplicate = false;
+ bool duplicate = false;
/* skip leading spaces */
while(*pos == ' ') {
pos++;
@@ -610,7 +634,7 @@ ulocimp_getKeywords(const char *localeID,
break;
}
if(numKeywords == maxKeywords) {
- *status = U_INTERNAL_PROGRAM_ERROR;
+ status = U_INTERNAL_PROGRAM_ERROR;
return;
}
equalSign = uprv_strchr(pos, '=');
@@ -618,13 +642,13 @@ ulocimp_getKeywords(const char *localeID,
/* lack of '=' [foo@currency] is illegal */
/* ';' before '=' [foo@currency;collation=pinyin] is illegal */
if(!equalSign || (semicolon && semicolon<equalSign)) {
- *status = U_INVALID_FORMAT_ERROR;
+ status = U_INVALID_FORMAT_ERROR;
return;
}
/* need to normalize both keyword and keyword name */
if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
/* keyword name too long for internal buffer */
- *status = U_INTERNAL_PROGRAM_ERROR;
+ status = U_INTERNAL_PROGRAM_ERROR;
return;
}
for(i = 0, n = 0; i < equalSign - pos; ++i) {
@@ -635,7 +659,7 @@ ulocimp_getKeywords(const char *localeID,
/* zero-length keyword is an error. */
if (n == 0) {
- *status = U_INVALID_FORMAT_ERROR;
+ status = U_INVALID_FORMAT_ERROR;
return;
}
@@ -650,7 +674,7 @@ ulocimp_getKeywords(const char *localeID,
/* Premature end or zero-length value */
if (!*equalSign || equalSign == semicolon) {
- *status = U_INVALID_FORMAT_ERROR;
+ status = U_INVALID_FORMAT_ERROR;
return;
}
@@ -685,7 +709,7 @@ ulocimp_getKeywords(const char *localeID,
/* now we have a list of keywords */
/* we need to sort it */
- uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, nullptr, false, status);
+ uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, nullptr, false, &status);
/* Now construct the keyword part */
for(i = 0; i < numKeywords; i++) {
@@ -709,137 +733,127 @@ uloc_getKeywordValue(const char* localeID,
char* buffer, int32_t bufferCapacity,
UErrorCode* status)
{
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- CheckedArrayByteSink sink(buffer, bufferCapacity);
- ulocimp_getKeywordValue(localeID, keywordName, sink, status);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*status)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(buffer, bufferCapacity, reslen, status);
- }
+ return ByteSinkUtil::viaByteSinkToTerminatedChars(
+ buffer, bufferCapacity,
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getKeywordValue(localeID, keywordName, sink, status);
+ },
+ *status);
+}
- return reslen;
+U_EXPORT CharString
+ulocimp_getKeywordValue(const char* localeID,
+ const char* keywordName,
+ UErrorCode& status)
+{
+ return ByteSinkUtil::viaByteSinkToCharString(
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getKeywordValue(localeID, keywordName, sink, status);
+ },
+ status);
}
-U_CAPI void U_EXPORT2
+U_EXPORT void
ulocimp_getKeywordValue(const char* localeID,
const char* keywordName,
icu::ByteSink& sink,
- UErrorCode* status)
+ UErrorCode& status)
{
+ if (U_FAILURE(status)) { return; }
+
+ if (localeID == nullptr || keywordName == nullptr || keywordName[0] == 0) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
const char* startSearchHere = nullptr;
const char* nextSeparator = nullptr;
- char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
- char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
- if(status && U_SUCCESS(*status) && localeID) {
- CharString tempBuffer;
- const char* tmpLocaleID;
+ CharString tempBuffer;
+ const char* tmpLocaleID;
- if (keywordName == nullptr || keywordName[0] == 0) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
+ CharString canonKeywordName = locale_canonKeywordName(keywordName, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (_hasBCP47Extension(localeID)) {
+ tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, status);
+ tmpLocaleID = U_SUCCESS(status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
+ } else {
+ tmpLocaleID=localeID;
+ }
- locale_canonKeywordName(keywordNameBuffer, keywordName, status);
- if(U_FAILURE(*status)) {
+ startSearchHere = locale_getKeywordsStart(tmpLocaleID);
+ if(startSearchHere == nullptr) {
+ /* no keywords, return at once */
return;
- }
+ }
- if (_hasBCP47Extension(localeID)) {
- CharStringByteSink sink(&tempBuffer);
- ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status);
- tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
- } else {
- tmpLocaleID=localeID;
- }
-
- startSearchHere = locale_getKeywordsStart(tmpLocaleID);
- if(startSearchHere == nullptr) {
- /* no keywords, return at once */
- return;
- }
-
- /* find the first keyword */
- while(startSearchHere) {
- const char* keyValueTail;
- int32_t keyValueLen;
-
- startSearchHere++; /* skip @ or ; */
- nextSeparator = uprv_strchr(startSearchHere, '=');
- if(!nextSeparator) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
- return;
- }
- /* strip leading & trailing spaces (TC decided to tolerate these) */
- while(*startSearchHere == ' ') {
- startSearchHere++;
+ /* find the first keyword */
+ while(startSearchHere) {
+ const char* keyValueTail;
+
+ startSearchHere++; /* skip @ or ; */
+ nextSeparator = uprv_strchr(startSearchHere, '=');
+ if(!nextSeparator) {
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
+ return;
+ }
+ /* strip leading & trailing spaces (TC decided to tolerate these) */
+ while(*startSearchHere == ' ') {
+ startSearchHere++;
+ }
+ keyValueTail = nextSeparator;
+ while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
+ keyValueTail--;
+ }
+ /* now keyValueTail points to first char after the keyName */
+ /* copy & normalize keyName from locale */
+ if (startSearchHere == keyValueTail) {
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
+ return;
+ }
+ CharString localeKeywordName;
+ while (startSearchHere < keyValueTail) {
+ if (!UPRV_ISALPHANUM(*startSearchHere)) {
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
+ return;
}
- keyValueTail = nextSeparator;
- while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
+ localeKeywordName.append(uprv_tolower(*startSearchHere++), status);
+ }
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ startSearchHere = uprv_strchr(nextSeparator, ';');
+
+ if (canonKeywordName == localeKeywordName) {
+ /* current entry matches the keyword. */
+ nextSeparator++; /* skip '=' */
+ /* First strip leading & trailing spaces (TC decided to tolerate these) */
+ while(*nextSeparator == ' ') {
+ nextSeparator++;
+ }
+ keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
+ while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
keyValueTail--;
- }
- /* now keyValueTail points to first char after the keyName */
- /* copy & normalize keyName from locale */
- if (startSearchHere == keyValueTail) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
- return;
- }
- keyValueLen = 0;
- while (startSearchHere < keyValueTail) {
- if (!UPRV_ISALPHANUM(*startSearchHere)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
- return;
}
- if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
- localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
- } else {
- /* keyword name too long for internal buffer */
- *status = U_INTERNAL_PROGRAM_ERROR;
+ /* Now copy the value, but check well-formedness */
+ if (nextSeparator == keyValueTail) {
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
return;
}
- }
- localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
-
- startSearchHere = uprv_strchr(nextSeparator, ';');
-
- if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
- /* current entry matches the keyword. */
- nextSeparator++; /* skip '=' */
- /* First strip leading & trailing spaces (TC decided to tolerate these) */
- while(*nextSeparator == ' ') {
- nextSeparator++;
- }
- keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
- while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
- keyValueTail--;
- }
- /* Now copy the value, but check well-formedness */
- if (nextSeparator == keyValueTail) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
+ while (nextSeparator < keyValueTail) {
+ if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
return;
}
- while (nextSeparator < keyValueTail) {
- if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
- return;
- }
- /* Should we lowercase value to return here? Tests expect as-is. */
- sink.Append(nextSeparator++, 1);
- }
- return;
- }
- }
+ /* Should we lowercase value to return here? Tests expect as-is. */
+ sink.Append(nextSeparator++, 1);
+ }
+ return;
+ }
}
}
@@ -849,100 +863,136 @@ uloc_setKeywordValue(const char* keywordName,
char* buffer, int32_t bufferCapacity,
UErrorCode* status)
{
- /* TODO: sorting. removal. */
- int32_t keywordNameLen;
- int32_t keywordValueLen;
- int32_t bufLen;
- int32_t needLen = 0;
- char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
- char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
- char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
- int32_t rc;
- char* nextSeparator = nullptr;
- char* nextEqualsign = nullptr;
- char* startSearchHere = nullptr;
- char* keywordStart = nullptr;
- CharString updatedKeysAndValues;
- UBool handledInputKeyAndValue = false;
- char keyValuePrefix = '@';
+ if (U_FAILURE(*status)) { return 0; }
- if(U_FAILURE(*status)) {
- return -1;
- }
- if (*status == U_STRING_NOT_TERMINATED_WARNING) {
- *status = U_ZERO_ERROR;
- }
- if (keywordName == nullptr || keywordName[0] == 0 || bufferCapacity <= 1) {
+ if (bufferCapacity <= 1) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
- bufLen = (int32_t)uprv_strlen(buffer);
+
+ int32_t bufLen = (int32_t)uprv_strlen(buffer);
if(bufferCapacity<bufLen) {
/* The capacity is less than the length?! Is this NUL terminated? */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
- keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
- if(U_FAILURE(*status)) {
+
+ char* keywords = const_cast<char*>(locale_getKeywordsStart(buffer));
+ int32_t baseLen = keywords == nullptr ? bufLen : keywords - buffer;
+ // Remove -1 from the capacity so that this function can guarantee NUL termination.
+ CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords,
+ bufferCapacity - baseLen - 1);
+ int32_t reslen = ulocimp_setKeywordValue(
+ keywords, keywordName, keywordValue, sink, *status);
+
+ if (U_FAILURE(*status)) {
+ return *status == U_BUFFER_OVERFLOW_ERROR ? reslen + baseLen : 0;
+ }
+
+ // See the documentation for this function, it's guaranteed to never
+ // overflow the buffer but instead abort with BUFFER_OVERFLOW_ERROR.
+ // In this case, nothing has been written to the sink, so it cannot have Overflowed().
+ U_ASSERT(!sink.Overflowed());
+ U_ASSERT(reslen >= 0);
+ return u_terminateChars(buffer, bufferCapacity, reslen + baseLen, status);
+}
+
+U_EXPORT void
+ulocimp_setKeywordValue(const char* keywordName,
+ const char* keywordValue,
+ CharString& localeID,
+ UErrorCode& status)
+{
+ if (U_FAILURE(status)) { return; }
+ // This is safe because CharString::truncate() doesn't actually erase any
+ // data, but simply sets the position for where new data will be written.
+ const char* keywords = locale_getKeywordsStart(localeID.data());
+ if (keywords != nullptr) localeID.truncate(keywords - localeID.data());
+ CharStringByteSink sink(&localeID);
+ ulocimp_setKeywordValue(keywords, keywordName, keywordValue, sink, status);
+}
+
+U_EXPORT int32_t
+ulocimp_setKeywordValue(const char* keywords,
+ const char* keywordName,
+ const char* keywordValue,
+ ByteSink& sink,
+ UErrorCode& status)
+{
+ if (U_FAILURE(status)) { return 0; }
+
+ /* TODO: sorting. removal. */
+ int32_t needLen = 0;
+ int32_t rc;
+ const char* nextSeparator = nullptr;
+ const char* nextEqualsign = nullptr;
+ const char* keywordStart = nullptr;
+ CharString updatedKeysAndValues;
+ bool handledInputKeyAndValue = false;
+ char keyValuePrefix = '@';
+
+ if (status == U_STRING_NOT_TERMINATED_WARNING) {
+ status = U_ZERO_ERROR;
+ }
+ if (keywordName == nullptr || keywordName[0] == 0) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ CharString canonKeywordName = locale_canonKeywordName(keywordName, status);
+ if (U_FAILURE(status)) {
return 0;
}
- keywordValueLen = 0;
+ CharString canonKeywordValue;
if(keywordValue) {
while (*keywordValue != 0) {
if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
- return 0;
- }
- if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
- /* Should we force lowercase in value to set? */
- keywordValueBuffer[keywordValueLen++] = *keywordValue++;
- } else {
- /* keywordValue too long for internal buffer */
- *status = U_INTERNAL_PROGRAM_ERROR;
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
return 0;
}
+ /* Should we force lowercase in value to set? */
+ canonKeywordValue.append(*keywordValue++, status);
}
}
- keywordValueBuffer[keywordValueLen] = 0; /* terminate */
+ if (U_FAILURE(status)) {
+ return 0;
+ }
- startSearchHere = (char*)locale_getKeywordsStart(buffer);
- if(startSearchHere == nullptr || (startSearchHere[1]==0)) {
- if(keywordValueLen == 0) { /* no keywords = nothing to remove */
- U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
- return bufLen;
+ if (keywords == nullptr || keywords[1] == '\0') {
+ if (canonKeywordValue.isEmpty()) { /* no keywords = nothing to remove */
+ U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
+ return 0;
}
- needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
- if(startSearchHere) { /* had a single @ */
- needLen--; /* already had the @ */
- /* startSearchHere points at the @ */
- } else {
- startSearchHere=buffer+bufLen;
- }
- if(needLen >= bufferCapacity) {
- *status = U_BUFFER_OVERFLOW_ERROR;
+ needLen = 1 + canonKeywordName.length() + 1 + canonKeywordValue.length();
+ int32_t capacity = 0;
+ char* buffer = sink.GetAppendBuffer(
+ needLen, needLen, nullptr, needLen, &capacity);
+ if (capacity < needLen || buffer == nullptr) {
+ status = U_BUFFER_OVERFLOW_ERROR;
return needLen; /* no change */
}
- *startSearchHere++ = '@';
- uprv_strcpy(startSearchHere, keywordNameBuffer);
- startSearchHere += keywordNameLen;
- *startSearchHere++ = '=';
- uprv_strcpy(startSearchHere, keywordValueBuffer);
- U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
+ char* it = buffer;
+
+ *it++ = '@';
+ uprv_memcpy(it, canonKeywordName.data(), canonKeywordName.length());
+ it += canonKeywordName.length();
+ *it++ = '=';
+ uprv_memcpy(it, canonKeywordValue.data(), canonKeywordValue.length());
+ sink.Append(buffer, needLen);
+ U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
return needLen;
} /* end shortcut - no @ */
- keywordStart = startSearchHere;
+ keywordStart = keywords;
/* search for keyword */
while(keywordStart) {
const char* keyValueTail;
- int32_t keyValueLen;
keywordStart++; /* skip @ or ; */
nextEqualsign = uprv_strchr(keywordStart, '=');
if (!nextEqualsign) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
return 0;
}
/* strip leading & trailing spaces (TC decided to tolerate these) */
@@ -956,24 +1006,20 @@ uloc_setKeywordValue(const char* keywordName,
/* now keyValueTail points to first char after the keyName */
/* copy & normalize keyName from locale */
if (keywordStart == keyValueTail) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
return 0;
}
- keyValueLen = 0;
+ CharString localeKeywordName;
while (keywordStart < keyValueTail) {
if (!UPRV_ISALPHANUM(*keywordStart)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
- return 0;
- }
- if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
- localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
- } else {
- /* keyword name too long for internal buffer */
- *status = U_INTERNAL_PROGRAM_ERROR;
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
return 0;
}
+ localeKeywordName.append(uprv_tolower(*keywordStart++), status);
+ }
+ if (U_FAILURE(status)) {
+ return 0;
}
- localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
nextSeparator = uprv_strchr(nextEqualsign, ';');
@@ -988,46 +1034,46 @@ uloc_setKeywordValue(const char* keywordName,
keyValueTail--;
}
if (nextEqualsign == keyValueTail) {
- *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
return 0;
}
- rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
+ rc = uprv_strcmp(canonKeywordName.data(), localeKeywordName.data());
if(rc == 0) {
/* Current entry matches the input keyword. Update the entry */
- if(keywordValueLen > 0) { /* updating a value */
- updatedKeysAndValues.append(keyValuePrefix, *status);
+ if (!canonKeywordValue.isEmpty()) { /* updating a value */
+ updatedKeysAndValues.append(keyValuePrefix, status);
keyValuePrefix = ';'; /* for any subsequent key-value pair */
- updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
- updatedKeysAndValues.append('=', *status);
- updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
+ updatedKeysAndValues.append(canonKeywordName, status);
+ updatedKeysAndValues.append('=', status);
+ updatedKeysAndValues.append(canonKeywordValue, status);
} /* else removing this entry, don't emit anything */
handledInputKeyAndValue = true;
} else {
/* input keyword sorts earlier than current entry, add before current entry */
- if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
+ if (rc < 0 && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
/* insert new entry at this location */
- updatedKeysAndValues.append(keyValuePrefix, *status);
+ updatedKeysAndValues.append(keyValuePrefix, status);
keyValuePrefix = ';'; /* for any subsequent key-value pair */
- updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
- updatedKeysAndValues.append('=', *status);
- updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
+ updatedKeysAndValues.append(canonKeywordName, status);
+ updatedKeysAndValues.append('=', status);
+ updatedKeysAndValues.append(canonKeywordValue, status);
handledInputKeyAndValue = true;
}
/* copy the current entry */
- updatedKeysAndValues.append(keyValuePrefix, *status);
+ updatedKeysAndValues.append(keyValuePrefix, status);
keyValuePrefix = ';'; /* for any subsequent key-value pair */
- updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
- updatedKeysAndValues.append('=', *status);
- updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
+ updatedKeysAndValues.append(localeKeywordName, status);
+ updatedKeysAndValues.append('=', status);
+ updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), status);
}
- if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
+ if (!nextSeparator && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
/* append new entry at the end, it sorts later than existing entries */
- updatedKeysAndValues.append(keyValuePrefix, *status);
+ updatedKeysAndValues.append(keyValuePrefix, status);
/* skip keyValuePrefix update, no subsequent key-value pair */
- updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
- updatedKeysAndValues.append('=', *status);
- updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
+ updatedKeysAndValues.append(canonKeywordName, status);
+ updatedKeysAndValues.append('=', status);
+ updatedKeysAndValues.append(canonKeywordValue, status);
handledInputKeyAndValue = true;
}
keywordStart = nextSeparator;
@@ -1041,44 +1087,57 @@ uloc_setKeywordValue(const char* keywordName,
* error return but the passed-in locale is unmodified and the original bufLen is
* returned.
*/
- if (!handledInputKeyAndValue || U_FAILURE(*status)) {
+ if (!handledInputKeyAndValue || U_FAILURE(status)) {
/* if input key/value specified removal of a keyword not present in locale, or
* there was an error in CharString.append, leave original locale alone. */
- U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
- return bufLen;
+ U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
+ return (int32_t)uprv_strlen(keywords);
}
- // needLen = length of the part before '@'
- needLen = (int32_t)(startSearchHere - buffer);
- // Check to see can we fit the startSearchHere, if not, return
+ needLen = updatedKeysAndValues.length();
+ // Check to see can we fit the updatedKeysAndValues, if not, return
// U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
// We do this because this API function does not behave like most others:
// It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
// When the contents fits but without the terminating NUL, in this case we need to not change
// the buffer contents and return with a buffer overflow error.
- int32_t appendLength = updatedKeysAndValues.length();
- if (appendLength >= bufferCapacity - needLen) {
- *status = U_BUFFER_OVERFLOW_ERROR;
- return needLen + appendLength;
- }
- needLen += updatedKeysAndValues.extract(
- startSearchHere, bufferCapacity - needLen, *status);
- U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
+ if (needLen > 0) {
+ int32_t capacity = 0;
+ char* buffer = sink.GetAppendBuffer(
+ needLen, needLen, nullptr, needLen, &capacity);
+ if (capacity < needLen || buffer == nullptr) {
+ status = U_BUFFER_OVERFLOW_ERROR;
+ return needLen;
+ }
+ uprv_memcpy(buffer, updatedKeysAndValues.data(), needLen);
+ sink.Append(buffer, needLen);
+ }
+ U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
return needLen;
}
/* ### ID parsing implementation **************************************************/
-#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
+namespace {
+
+inline bool _isPrefixLetter(char a) { return a == 'x' || a == 'X' || a == 'i' || a == 'I'; }
/*returns true if one of the special prefixes is here (s=string)
'x-' or 'i-' */
-#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
+inline bool _isIDPrefix(const char *s) { return _isPrefixLetter(s[0]) && _isIDSeparator(s[1]); }
/* Dot terminates it because of POSIX form where dot precedes the codepage
* except for variant
*/
-#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
+inline bool _isTerminator(char a) { return a == 0 || a == '.' || a == '@'; }
+
+inline bool _isBCP47Extension(const char* p) {
+ return p[0] == '-' &&
+ (p[1] == 't' || p[1] == 'T' ||
+ p[1] == 'u' || p[1] == 'U' ||
+ p[1] == 'x' || p[1] == 'X') &&
+ p[2] == '-';
+}
/**
* Lookup 'key' in the array 'list'. The array 'list' should contain
@@ -1087,7 +1146,7 @@ uloc_setKeywordValue(const char* keywordName,
* The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
* COUNTRIES_3.
*/
-static int16_t _findIndex(const char* const* list, const char* key)
+std::optional<int16_t> _findIndex(const char* const* list, const char* key)
{
const char* const* anchor = list;
int32_t pass = 0;
@@ -1102,38 +1161,40 @@ static int16_t _findIndex(const char* const* list, const char* key)
}
++list; /* skip final nullptr *CWB*/
}
- return -1;
+ return std::nullopt;
}
+} // namespace
+
U_CFUNC const char*
uloc_getCurrentCountryID(const char* oldID){
- int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
- if (offset >= 0) {
- return REPLACEMENT_COUNTRIES[offset];
- }
- return oldID;
+ std::optional<int16_t> offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
+ return offset.has_value() ? REPLACEMENT_COUNTRIES[*offset] : oldID;
}
U_CFUNC const char*
uloc_getCurrentLanguageID(const char* oldID){
- int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
- if (offset >= 0) {
- return REPLACEMENT_LANGUAGES[offset];
- }
- return oldID;
+ std::optional<int16_t> offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
+ return offset.has_value() ? REPLACEMENT_LANGUAGES[*offset] : oldID;
}
+
+namespace {
+
/*
- * the internal functions _getLanguage(), _getCountry(), _getVariant()
+ * the internal functions _getLanguage(), _getScript(), _getRegion(), _getVariant()
* avoid duplicating code to handle the earlier locale ID pieces
* in the functions for the later ones by
* setting the *pEnd pointer to where they stopped parsing
*
* TODO try to use this in Locale
*/
-CharString U_EXPORT2
-ulocimp_getLanguage(const char *localeID,
- const char **pEnd,
- UErrorCode &status) {
- CharString result;
+
+void
+_getLanguage(const char* localeID,
+ ByteSink* sink,
+ const char** pEnd,
+ UErrorCode& status) {
+ U_ASSERT(pEnd != nullptr);
+ *pEnd = localeID;
if (uprv_stricmp(localeID, "root") == 0) {
localeID += 4;
@@ -1145,150 +1206,381 @@ ulocimp_getLanguage(const char *localeID,
localeID += 3;
}
+ constexpr int32_t MAXLEN = ULOC_LANG_CAPACITY - 1; // Minus NUL.
+
/* if it starts with i- or x- then copy that prefix */
- if(_isIDPrefix(localeID)) {
- result.append((char)uprv_tolower(*localeID), status);
- result.append('-', status);
- localeID+=2;
+ int32_t len = _isIDPrefix(localeID) ? 2 : 0;
+ while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
+ if (len == MAXLEN) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ len++;
}
- /* copy the language as far as possible and count its length */
- while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
- result.append((char)uprv_tolower(*localeID), status);
- localeID++;
+ *pEnd = localeID + len;
+ if (sink == nullptr || len == 0) { return; }
+
+ int32_t minCapacity = uprv_max(len, 4); // Minimum 3 letters plus NUL.
+ char scratch[MAXLEN];
+ int32_t capacity = 0;
+ char* buffer = sink->GetAppendBuffer(
+ minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity);
+
+ for (int32_t i = 0; i < len; ++i) {
+ buffer[i] = uprv_tolower(localeID[i]);
+ }
+ if (_isIDSeparator(localeID[1])) {
+ buffer[1] = '-';
}
- if(result.length()==3) {
+ if (len == 3) {
/* convert 3 character code to 2 character code if possible *CWB*/
- int32_t offset = _findIndex(LANGUAGES_3, result.data());
- if(offset>=0) {
- result.clear();
- result.append(LANGUAGES[offset], status);
+ U_ASSERT(capacity >= 4);
+ buffer[3] = '\0';
+ std::optional<int16_t> offset = _findIndex(LANGUAGES_3, buffer);
+ if (offset.has_value()) {
+ const char* const alias = LANGUAGES[*offset];
+ sink->Append(alias, (int32_t)uprv_strlen(alias));
+ return;
}
}
- if(pEnd!=nullptr) {
- *pEnd=localeID;
- }
-
- return result;
+ sink->Append(buffer, len);
}
-CharString U_EXPORT2
-ulocimp_getScript(const char *localeID,
- const char **pEnd,
- UErrorCode &status) {
- CharString result;
- int32_t idLen = 0;
+void
+_getScript(const char* localeID,
+ ByteSink* sink,
+ const char** pEnd) {
+ U_ASSERT(pEnd != nullptr);
+ *pEnd = localeID;
- if (pEnd != nullptr) {
- *pEnd = localeID;
- }
+ constexpr int32_t LENGTH = 4;
- /* copy the second item as far as possible and count its length */
- while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
- && uprv_isASCIILetter(localeID[idLen])) {
- idLen++;
+ int32_t len = 0;
+ while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) &&
+ uprv_isASCIILetter(localeID[len])) {
+ if (len == LENGTH) { return; }
+ len++;
}
+ if (len != LENGTH) { return; }
- /* If it's exactly 4 characters long, then it's a script and not a country. */
- if (idLen == 4) {
- int32_t i;
- if (pEnd != nullptr) {
- *pEnd = localeID+idLen;
- }
- if (idLen >= 1) {
- result.append((char)uprv_toupper(*(localeID++)), status);
- }
- for (i = 1; i < idLen; i++) {
- result.append((char)uprv_tolower(*(localeID++)), status);
- }
+ *pEnd = localeID + LENGTH;
+ if (sink == nullptr) { return; }
+
+ char scratch[LENGTH];
+ int32_t capacity = 0;
+ char* buffer = sink->GetAppendBuffer(
+ LENGTH, LENGTH, scratch, UPRV_LENGTHOF(scratch), &capacity);
+
+ buffer[0] = uprv_toupper(localeID[0]);
+ for (int32_t i = 1; i < LENGTH; ++i) {
+ buffer[i] = uprv_tolower(localeID[i]);
}
- return result;
+ sink->Append(buffer, LENGTH);
}
-CharString U_EXPORT2
-ulocimp_getCountry(const char *localeID,
- const char **pEnd,
- UErrorCode &status) {
- CharString result;
- int32_t idLen=0;
+void
+_getRegion(const char* localeID,
+ ByteSink* sink,
+ const char** pEnd) {
+ U_ASSERT(pEnd != nullptr);
+ *pEnd = localeID;
- /* copy the country as far as possible and count its length */
- while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
- result.append((char)uprv_toupper(localeID[idLen]), status);
- idLen++;
+ constexpr int32_t MINLEN = 2;
+ constexpr int32_t MAXLEN = ULOC_COUNTRY_CAPACITY - 1; // Minus NUL.
+
+ int32_t len = 0;
+ while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
+ if (len == MAXLEN) { return; }
+ len++;
}
+ if (len < MINLEN) { return; }
- /* the country should be either length 2 or 3 */
- if (idLen == 2 || idLen == 3) {
- /* convert 3 character code to 2 character code if possible *CWB*/
- if(idLen==3) {
- int32_t offset = _findIndex(COUNTRIES_3, result.data());
- if(offset>=0) {
- result.clear();
- result.append(COUNTRIES[offset], status);
- }
- }
- localeID+=idLen;
- } else {
- result.clear();
+ *pEnd = localeID + len;
+ if (sink == nullptr) { return; }
+
+ char scratch[ULOC_COUNTRY_CAPACITY];
+ int32_t capacity = 0;
+ char* buffer = sink->GetAppendBuffer(
+ ULOC_COUNTRY_CAPACITY,
+ ULOC_COUNTRY_CAPACITY,
+ scratch,
+ UPRV_LENGTHOF(scratch),
+ &capacity);
+
+ for (int32_t i = 0; i < len; ++i) {
+ buffer[i] = uprv_toupper(localeID[i]);
}
- if(pEnd!=nullptr) {
- *pEnd=localeID;
+ if (len == 3) {
+ /* convert 3 character code to 2 character code if possible *CWB*/
+ U_ASSERT(capacity >= 4);
+ buffer[3] = '\0';
+ std::optional<int16_t> offset = _findIndex(COUNTRIES_3, buffer);
+ if (offset.has_value()) {
+ const char* const alias = COUNTRIES[*offset];
+ sink->Append(alias, (int32_t)uprv_strlen(alias));
+ return;
+ }
}
- return result;
+ sink->Append(buffer, len);
}
/**
* @param needSeparator if true, then add leading '_' if any variants
* are added to 'variant'
*/
-static void
-_getVariant(const char *localeID,
+void
+_getVariant(const char* localeID,
char prev,
- ByteSink& sink,
- UBool needSeparator) {
- UBool hasVariant = false;
+ ByteSink* sink,
+ const char** pEnd,
+ bool needSeparator,
+ UErrorCode& status) {
+ if (U_FAILURE(status)) return;
+ if (pEnd != nullptr) { *pEnd = localeID; }
+
+ // Reasonable upper limit for variants
+ // There are no strict limitation of the syntax of variant in the legacy
+ // locale format. If the locale is constructed from unicode_locale_id
+ // as defined in UTS35, then we know each unicode_variant_subtag
+ // could have max length of 8 ((alphanum{5,8} | digit alphanum{3})
+ // 179 would allow 20 unicode_variant_subtag with sep in the
+ // unicode_locale_id
+ // 8*20 + 1*(20-1) = 179
+ constexpr int32_t MAX_VARIANTS_LENGTH = 179;
/* get one or more variant tags and separate them with '_' */
- if(_isIDSeparator(prev)) {
+ int32_t index = 0;
+ if (_isIDSeparator(prev)) {
/* get a variant string after a '-' or '_' */
- while(!_isTerminator(*localeID)) {
+ for (index=0; !_isTerminator(localeID[index]); index++) {
+ if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
if (needSeparator) {
- sink.Append("_", 1);
+ if (sink != nullptr) {
+ sink->Append("_", 1);
+ }
needSeparator = false;
}
- char c = (char)uprv_toupper(*localeID);
- if (c == '-') c = '_';
- sink.Append(&c, 1);
- hasVariant = true;
- localeID++;
+ if (sink != nullptr) {
+ char c = (char)uprv_toupper(localeID[index]);
+ if (c == '-') c = '_';
+ sink->Append(&c, 1);
+ }
}
+ if (pEnd != nullptr) { *pEnd = localeID+index; }
}
/* if there is no variant tag after a '-' or '_' then look for '@' */
- if(!hasVariant) {
- if(prev=='@') {
+ if (index == 0) {
+ if (prev=='@') {
/* keep localeID */
} else if((localeID=locale_getKeywordsStart(localeID))!=nullptr) {
++localeID; /* point after the '@' */
} else {
return;
}
- while(!_isTerminator(*localeID)) {
+ for(; !_isTerminator(localeID[index]); index++) {
+ if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
if (needSeparator) {
- sink.Append("_", 1);
+ if (sink != nullptr) {
+ sink->Append("_", 1);
+ }
needSeparator = false;
}
- char c = (char)uprv_toupper(*localeID);
- if (c == '-' || c == ',') c = '_';
- sink.Append(&c, 1);
+ if (sink != nullptr) {
+ char c = (char)uprv_toupper(localeID[index]);
+ if (c == '-' || c == ',') c = '_';
+ sink->Append(&c, 1);
+ }
+ }
+ if (pEnd != nullptr) { *pEnd = localeID + index; }
+ }
+}
+
+} // namespace
+
+U_EXPORT CharString
+ulocimp_getLanguage(const char* localeID, UErrorCode& status) {
+ return ByteSinkUtil::viaByteSinkToCharString(
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getSubtags(
+ localeID,
+ &sink,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ status);
+ },
+ status);
+}
+
+U_EXPORT CharString
+ulocimp_getScript(const char* localeID, UErrorCode& status) {
+ return ByteSinkUtil::viaByteSinkToCharString(
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getSubtags(
+ localeID,
+ nullptr,
+ &sink,
+ nullptr,
+ nullptr,
+ nullptr,
+ status);
+ },
+ status);
+}
+
+U_EXPORT CharString
+ulocimp_getRegion(const char* localeID, UErrorCode& status) {
+ return ByteSinkUtil::viaByteSinkToCharString(
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getSubtags(
+ localeID,
+ nullptr,
+ nullptr,
+ &sink,
+ nullptr,
+ nullptr,
+ status);
+ },
+ status);
+}
+
+U_EXPORT CharString
+ulocimp_getVariant(const char* localeID, UErrorCode& status) {
+ return ByteSinkUtil::viaByteSinkToCharString(
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getSubtags(
+ localeID,
+ nullptr,
+ nullptr,
+ nullptr,
+ &sink,
+ nullptr,
+ status);
+ },
+ status);
+}
+
+U_EXPORT void
+ulocimp_getSubtags(
+ const char* localeID,
+ CharString* language,
+ CharString* script,
+ CharString* region,
+ CharString* variant,
+ const char** pEnd,
+ UErrorCode& status) {
+ if (U_FAILURE(status)) { return; }
+
+ std::optional<CharStringByteSink> languageSink;
+ std::optional<CharStringByteSink> scriptSink;
+ std::optional<CharStringByteSink> regionSink;
+ std::optional<CharStringByteSink> variantSink;
+
+ if (language != nullptr) { languageSink.emplace(language); }
+ if (script != nullptr) { scriptSink.emplace(script); }
+ if (region != nullptr) { regionSink.emplace(region); }
+ if (variant != nullptr) { variantSink.emplace(variant); }
+
+ ulocimp_getSubtags(
+ localeID,
+ languageSink.has_value() ? &*languageSink : nullptr,
+ scriptSink.has_value() ? &*scriptSink : nullptr,
+ regionSink.has_value() ? &*regionSink : nullptr,
+ variantSink.has_value() ? &*variantSink : nullptr,
+ pEnd,
+ status);
+}
+
+U_EXPORT void
+ulocimp_getSubtags(
+ const char* localeID,
+ ByteSink* language,
+ ByteSink* script,
+ ByteSink* region,
+ ByteSink* variant,
+ const char** pEnd,
+ UErrorCode& status) {
+ if (U_FAILURE(status)) { return; }
+
+ if (pEnd != nullptr) {
+ *pEnd = localeID;
+ } else if (language == nullptr &&
+ script == nullptr &&
+ region == nullptr &&
+ variant == nullptr) {
+ return;
+ }
+
+ bool hasRegion = false;
+
+ if (localeID == nullptr) {
+ localeID = uloc_getDefault();
+ }
+
+ _getLanguage(localeID, language, &localeID, status);
+ if (U_FAILURE(status)) { return; }
+ U_ASSERT(localeID != nullptr);
+
+ if (pEnd != nullptr) {
+ *pEnd = localeID;
+ } else if (script == nullptr &&
+ region == nullptr &&
+ variant == nullptr) {
+ return;
+ }
+
+ if (_isIDSeparator(*localeID)) {
+ const char* begin = localeID + 1;
+ const char* end = nullptr;
+ _getScript(begin, script, &end);
+ U_ASSERT(end != nullptr);
+ if (end != begin) {
+ localeID = end;
+ if (pEnd != nullptr) { *pEnd = localeID; }
+ }
+ }
+
+ if (region == nullptr && variant == nullptr && pEnd == nullptr) { return; }
+
+ if (_isIDSeparator(*localeID)) {
+ const char* begin = localeID + 1;
+ const char* end = nullptr;
+ _getRegion(begin, region, &end);
+ U_ASSERT(end != nullptr);
+ if (end != begin) {
+ hasRegion = true;
+ localeID = end;
+ if (pEnd != nullptr) { *pEnd = localeID; }
+ }
+ }
+
+ if (variant == nullptr && pEnd == nullptr) { return; }
+
+ if (_isIDSeparator(*localeID) && !_isBCP47Extension(localeID)) {
+ /* If there was no country ID, skip a possible extra IDSeparator */
+ if (!hasRegion && _isIDSeparator(localeID[1])) {
localeID++;
}
+ const char* begin = localeID + 1;
+ const char* end = nullptr;
+ _getVariant(begin, *localeID, variant, &end, false, status);
+ if (U_FAILURE(status)) { return; }
+ U_ASSERT(end != nullptr);
+ if (end != begin && pEnd != nullptr) { *pEnd = end; }
}
}
@@ -1359,12 +1651,11 @@ static const UEnumeration gKeywordsEnum = {
U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
{
+ if (U_FAILURE(*status)) { return nullptr; }
+
LocalMemory<UKeywordsContext> myContext;
LocalMemory<UEnumeration> result;
- if (U_FAILURE(*status)) {
- return nullptr;
- }
myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
if (myContext.isNull() || result.isNull()) {
@@ -1388,16 +1679,15 @@ U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywords(const char* localeID,
UErrorCode* status)
{
- CharString tempBuffer;
- const char* tmpLocaleID;
-
if(status==nullptr || U_FAILURE(*status)) {
- return 0;
+ return nullptr;
}
+ CharString tempBuffer;
+ const char* tmpLocaleID;
+
if (_hasBCP47Extension(localeID)) {
- CharStringByteSink sink(&tempBuffer);
- ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status);
+ tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, *status);
tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
} else {
if (localeID==nullptr) {
@@ -1406,37 +1696,21 @@ uloc_openKeywords(const char* localeID,
tmpLocaleID=localeID;
}
- /* Skip the language */
- ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
+ ulocimp_getSubtags(
+ tmpLocaleID,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ &tmpLocaleID,
+ *status);
if (U_FAILURE(*status)) {
- return 0;
- }
-
- if(_isIDSeparator(*tmpLocaleID)) {
- const char *scriptID;
- /* Skip the script if available */
- ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
- if (U_FAILURE(*status)) {
- return 0;
- }
- if(scriptID != tmpLocaleID+1) {
- /* Found optional script */
- tmpLocaleID = scriptID;
- }
- /* Skip the Country */
- if (_isIDSeparator(*tmpLocaleID)) {
- ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
- if (U_FAILURE(*status)) {
- return 0;
- }
- }
+ return nullptr;
}
/* keywords are located after '@' */
if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != nullptr) {
- CharString keywords;
- CharStringByteSink sink(&keywords);
- ulocimp_getKeywords(tmpLocaleID+1, '@', sink, false, status);
+ CharString keywords = ulocimp_getKeywords(tmpLocaleID + 1, '@', false, *status);
if (U_FAILURE(*status)) {
return nullptr;
}
@@ -1450,10 +1724,12 @@ uloc_openKeywords(const char* localeID,
#define _ULOC_STRIP_KEYWORDS 0x2
#define _ULOC_CANONICALIZE 0x1
-#define OPTION_SET(options, mask) ((options & mask) != 0)
+namespace {
-static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
-#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
+inline bool OPTION_SET(uint32_t options, uint32_t mask) { return (options & mask) != 0; }
+
+constexpr char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
+constexpr int32_t I_DEFAULT_LENGTH = UPRV_LENGTHOF(i_default);
/**
* Canonicalize the given localeID, to level 1 or to level 2,
@@ -1462,16 +1738,16 @@ static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
*
* This is the code underlying uloc_getName and uloc_canonicalize.
*/
-static void
+void
_canonicalize(const char* localeID,
ByteSink& sink,
uint32_t options,
- UErrorCode* err) {
- if (U_FAILURE(*err)) {
+ UErrorCode& err) {
+ if (U_FAILURE(err)) {
return;
}
- int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
+ int32_t j, fieldCount=0;
CharString tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this
CharString localeIDWithHyphens; // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
const char* origLocaleID;
@@ -1484,8 +1760,8 @@ _canonicalize(const char* localeID,
// convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') {
- localeIDWithHyphens.append(localeID, -1, *err);
- if (U_SUCCESS(*err)) {
+ localeIDWithHyphens.append(localeID, -1, err);
+ if (U_SUCCESS(err)) {
for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
if (*p == '_') {
*p = '-';
@@ -1495,9 +1771,8 @@ _canonicalize(const char* localeID,
}
}
- CharStringByteSink tempSink(&tempBuffer);
- ulocimp_forLanguageTag(localeIDPtr, -1, tempSink, nullptr, err);
- tmpLocaleID = U_SUCCESS(*err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeIDPtr;
+ tempBuffer = ulocimp_forLanguageTag(localeIDPtr, -1, nullptr, err);
+ tmpLocaleID = U_SUCCESS(err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeIDPtr;
} else {
if (localeID==nullptr) {
localeID=uloc_getDefault();
@@ -1508,76 +1783,70 @@ _canonicalize(const char* localeID,
origLocaleID=tmpLocaleID;
/* get all pieces, one after another, and separate with '_' */
- CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
+ CharString tag;
+ CharString script;
+ CharString country;
+ CharString variant;
+ ulocimp_getSubtags(
+ tmpLocaleID,
+ &tag,
+ &script,
+ &country,
+ &variant,
+ &tmpLocaleID,
+ err);
+ if (U_FAILURE(err)) {
+ return;
+ }
if (tag.length() == I_DEFAULT_LENGTH &&
uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
tag.clear();
- tag.append(uloc_getDefault(), *err);
- } else if(_isIDSeparator(*tmpLocaleID)) {
- const char *scriptID;
-
- ++fieldCount;
- tag.append('_', *err);
-
- CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
- tag.append(script, *err);
- scriptSize = script.length();
- if(scriptSize > 0) {
- /* Found optional script */
- tmpLocaleID = scriptID;
+ tag.append(uloc_getDefault(), err);
+ } else {
+ if (!script.isEmpty()) {
++fieldCount;
- if (_isIDSeparator(*tmpLocaleID)) {
- /* If there is something else, then we add the _ */
- tag.append('_', *err);
- }
+ tag.append('_', err);
+ tag.append(script, err);
}
-
- if (_isIDSeparator(*tmpLocaleID)) {
- const char *cntryID;
-
- CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
- tag.append(country, *err);
- if (!country.isEmpty()) {
- /* Found optional country */
- tmpLocaleID = cntryID;
- }
- if(_isIDSeparator(*tmpLocaleID)) {
- /* If there is something else, then we add the _ if we found country before. */
- if (!_isIDSeparator(*(tmpLocaleID+1))) {
- ++fieldCount;
- tag.append('_', *err);
- }
-
- variantSize = -tag.length();
- {
- CharStringByteSink s(&tag);
- _getVariant(tmpLocaleID+1, *tmpLocaleID, s, false);
- }
- variantSize += tag.length();
- if (variantSize > 0) {
- tmpLocaleID += variantSize + 1; /* skip '_' and variant */
- }
+ if (!country.isEmpty()) {
+ ++fieldCount;
+ tag.append('_', err);
+ tag.append(country, err);
+ }
+ if (!variant.isEmpty()) {
+ ++fieldCount;
+ if (country.isEmpty()) {
+ tag.append('_', err);
}
+ tag.append('_', err);
+ tag.append(variant, err);
}
}
/* Copy POSIX-style charset specifier, if any [mr.utf8] */
if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
- UBool done = false;
- do {
- char c = *tmpLocaleID;
- switch (c) {
- case 0:
- case '@':
- done = true;
- break;
- default:
- tag.append(c, *err);
- ++tmpLocaleID;
- break;
- }
- } while (!done);
+ tag.append('.', err);
+ ++tmpLocaleID;
+ const char *atPos = nullptr;
+ size_t length;
+ if((atPos = uprv_strchr(tmpLocaleID, '@')) != nullptr) {
+ length = atPos - tmpLocaleID;
+ } else {
+ length = uprv_strlen(tmpLocaleID);
+ }
+ // The longest charset name we found in IANA charset registry
+ // https://www.iana.org/assignments/character-sets/ is
+ // "Extended_UNIX_Code_Packed_Format_for_Japanese" in length 45.
+ // we therefore restrict the length here to be 64 which is a power of 2
+ // number that is longer than 45.
+ constexpr size_t kMaxCharsetLength = 64;
+ if (length > kMaxCharsetLength) {
+ err = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
+ return;
+ }
+ tag.append(tmpLocaleID, static_cast<int32_t>(length), err);
+ tmpLocaleID += length;
}
/* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
@@ -1595,7 +1864,7 @@ _canonicalize(const char* localeID,
if (c == 0) {
break;
}
- tag.append(c, *err);
+ tag.append(c, err);
++tmpLocaleID;
}
}
@@ -1604,22 +1873,16 @@ _canonicalize(const char* localeID,
/* Handle @FOO variant if @ is present and not followed by = */
if (tmpLocaleID!=nullptr && keywordAssign==nullptr) {
/* Add missing '_' if needed */
- if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
+ if (fieldCount < 2 || (fieldCount < 3 && !script.isEmpty())) {
do {
- tag.append('_', *err);
+ tag.append('_', err);
++fieldCount;
} while(fieldCount<2);
}
- int32_t posixVariantSize = -tag.length();
- {
- CharStringByteSink s(&tag);
- _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
- }
- posixVariantSize += tag.length();
- if (posixVariantSize > 0) {
- variantSize += posixVariantSize;
- }
+ CharStringByteSink s(&tag);
+ _getVariant(tmpLocaleID+1, '@', &s, nullptr, !variant.isEmpty(), err);
+ if (U_FAILURE(err)) { return; }
}
/* Look up the ID in the canonicalization map */
@@ -1630,7 +1893,7 @@ _canonicalize(const char* localeID,
break; /* Don't remap "" if keywords present */
}
tag.clear();
- tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
+ tag.append(CANONICALIZE_MAP[j].canonicalID, err);
break;
}
}
@@ -1648,6 +1911,8 @@ _canonicalize(const char* localeID,
}
}
+} // namespace
+
/* ### ID parsing API **************************************************/
U_CAPI int32_t U_EXPORT2
@@ -1656,39 +1921,35 @@ uloc_getParent(const char* localeID,
int32_t parentCapacity,
UErrorCode* err)
{
- if (U_FAILURE(*err)) {
- return 0;
- }
-
- CheckedArrayByteSink sink(parent, parentCapacity);
- ulocimp_getParent(localeID, sink, err);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*err)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(parent, parentCapacity, reslen, err);
- }
+ return ByteSinkUtil::viaByteSinkToTerminatedChars(
+ parent, parentCapacity,
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getParent(localeID, sink, status);
+ },
+ *err);
+}
- return reslen;
+U_EXPORT CharString
+ulocimp_getParent(const char* localeID,
+ UErrorCode& err)
+{
+ return ByteSinkUtil::viaByteSinkToCharString(
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getParent(localeID, sink, status);
+ },
+ err);
}
-U_CAPI void U_EXPORT2
+U_EXPORT void
ulocimp_getParent(const char* localeID,
icu::ByteSink& sink,
- UErrorCode* err)
+ UErrorCode& err)
{
+ if (U_FAILURE(err)) { return; }
+
const char *lastUnderscore;
int32_t i;
- if (U_FAILURE(*err))
- return;
-
if (localeID == nullptr)
localeID = uloc_getDefault();
@@ -1715,16 +1976,19 @@ uloc_getLanguage(const char* localeID,
UErrorCode* err)
{
/* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
-
- if (err==nullptr || U_FAILURE(*err)) {
- return 0;
- }
-
- if(localeID==nullptr) {
- localeID=uloc_getDefault();
- }
-
- return ulocimp_getLanguage(localeID, nullptr, *err).extract(language, languageCapacity, *err);
+ return ByteSinkUtil::viaByteSinkToTerminatedChars(
+ language, languageCapacity,
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getSubtags(
+ localeID,
+ &sink,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ status);
+ },
+ *err);
}
U_CAPI int32_t U_EXPORT2
@@ -1733,24 +1997,19 @@ uloc_getScript(const char* localeID,
int32_t scriptCapacity,
UErrorCode* err)
{
- if(err==nullptr || U_FAILURE(*err)) {
- return 0;
- }
-
- if(localeID==nullptr) {
- localeID=uloc_getDefault();
- }
-
- /* skip the language */
- ulocimp_getLanguage(localeID, &localeID, *err);
- if (U_FAILURE(*err)) {
- return 0;
- }
-
- if(_isIDSeparator(*localeID)) {
- return ulocimp_getScript(localeID+1, nullptr, *err).extract(script, scriptCapacity, *err);
- }
- return u_terminateChars(script, scriptCapacity, 0, err);
+ return ByteSinkUtil::viaByteSinkToTerminatedChars(
+ script, scriptCapacity,
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getSubtags(
+ localeID,
+ nullptr,
+ &sink,
+ nullptr,
+ nullptr,
+ nullptr,
+ status);
+ },
+ *err);
}
U_CAPI int32_t U_EXPORT2
@@ -1759,36 +2018,19 @@ uloc_getCountry(const char* localeID,
int32_t countryCapacity,
UErrorCode* err)
{
- if(err==nullptr || U_FAILURE(*err)) {
- return 0;
- }
-
- if(localeID==nullptr) {
- localeID=uloc_getDefault();
- }
-
- /* Skip the language */
- ulocimp_getLanguage(localeID, &localeID, *err);
- if (U_FAILURE(*err)) {
- return 0;
- }
-
- if(_isIDSeparator(*localeID)) {
- const char *scriptID;
- /* Skip the script if available */
- ulocimp_getScript(localeID+1, &scriptID, *err);
- if (U_FAILURE(*err)) {
- return 0;
- }
- if(scriptID != localeID+1) {
- /* Found optional script */
- localeID = scriptID;
- }
- if(_isIDSeparator(*localeID)) {
- return ulocimp_getCountry(localeID+1, nullptr, *err).extract(country, countryCapacity, *err);
- }
- }
- return u_terminateChars(country, countryCapacity, 0, err);
+ return ByteSinkUtil::viaByteSinkToTerminatedChars(
+ country, countryCapacity,
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getSubtags(
+ localeID,
+ nullptr,
+ nullptr,
+ &sink,
+ nullptr,
+ nullptr,
+ status);
+ },
+ *err);
}
U_CAPI int32_t U_EXPORT2
@@ -1797,77 +2039,19 @@ uloc_getVariant(const char* localeID,
int32_t variantCapacity,
UErrorCode* err)
{
- CharString tempBuffer;
- const char* tmpLocaleID;
- int32_t i=0;
-
- if(err==nullptr || U_FAILURE(*err)) {
- return 0;
- }
-
- if (_hasBCP47Extension(localeID)) {
- CharStringByteSink sink(&tempBuffer);
- ulocimp_forLanguageTag(localeID, -1, sink, nullptr, err);
- tmpLocaleID = U_SUCCESS(*err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
- } else {
- if (localeID==nullptr) {
- localeID=uloc_getDefault();
- }
- tmpLocaleID=localeID;
- }
-
- /* Skip the language */
- ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
- if (U_FAILURE(*err)) {
- return 0;
- }
-
- if(_isIDSeparator(*tmpLocaleID)) {
- const char *scriptID;
- /* Skip the script if available */
- ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
- if (U_FAILURE(*err)) {
- return 0;
- }
- if(scriptID != tmpLocaleID+1) {
- /* Found optional script */
- tmpLocaleID = scriptID;
- }
- /* Skip the Country */
- if (_isIDSeparator(*tmpLocaleID)) {
- const char *cntryID;
- ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
- if (U_FAILURE(*err)) {
- return 0;
- }
- if (cntryID != tmpLocaleID+1) {
- /* Found optional country */
- tmpLocaleID = cntryID;
- }
- if(_isIDSeparator(*tmpLocaleID)) {
- /* If there was no country ID, skip a possible extra IDSeparator */
- if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
- tmpLocaleID++;
- }
-
- CheckedArrayByteSink sink(variant, variantCapacity);
- _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, false);
-
- i = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*err)) {
- return i;
- }
-
- if (sink.Overflowed()) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- return i;
- }
- }
- }
- }
-
- return u_terminateChars(variant, variantCapacity, i, err);
+ return ByteSinkUtil::viaByteSinkToTerminatedChars(
+ variant, variantCapacity,
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getSubtags(
+ localeID,
+ nullptr,
+ nullptr,
+ nullptr,
+ &sink,
+ nullptr,
+ status);
+ },
+ *err);
}
U_CAPI int32_t U_EXPORT2
@@ -1876,32 +2060,29 @@ uloc_getName(const char* localeID,
int32_t nameCapacity,
UErrorCode* err)
{
- if (U_FAILURE(*err)) {
- return 0;
- }
-
- CheckedArrayByteSink sink(name, nameCapacity);
- ulocimp_getName(localeID, sink, err);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*err)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(name, nameCapacity, reslen, err);
- }
+ return ByteSinkUtil::viaByteSinkToTerminatedChars(
+ name, nameCapacity,
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getName(localeID, sink, status);
+ },
+ *err);
+}
- return reslen;
+U_EXPORT CharString
+ulocimp_getName(const char* localeID,
+ UErrorCode& err)
+{
+ return ByteSinkUtil::viaByteSinkToCharString(
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getName(localeID, sink, status);
+ },
+ err);
}
-U_CAPI void U_EXPORT2
+U_EXPORT void
ulocimp_getName(const char* localeID,
ByteSink& sink,
- UErrorCode* err)
+ UErrorCode& err)
{
_canonicalize(localeID, sink, 0, err);
}
@@ -1912,32 +2093,29 @@ uloc_getBaseName(const char* localeID,
int32_t nameCapacity,
UErrorCode* err)
{
- if (U_FAILURE(*err)) {
- return 0;
- }
-
- CheckedArrayByteSink sink(name, nameCapacity);
- ulocimp_getBaseName(localeID, sink, err);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*err)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(name, nameCapacity, reslen, err);
- }
+ return ByteSinkUtil::viaByteSinkToTerminatedChars(
+ name, nameCapacity,
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getBaseName(localeID, sink, status);
+ },
+ *err);
+}
- return reslen;
+U_EXPORT CharString
+ulocimp_getBaseName(const char* localeID,
+ UErrorCode& err)
+{
+ return ByteSinkUtil::viaByteSinkToCharString(
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_getBaseName(localeID, sink, status);
+ },
+ err);
}
-U_CAPI void U_EXPORT2
+U_EXPORT void
ulocimp_getBaseName(const char* localeID,
ByteSink& sink,
- UErrorCode* err)
+ UErrorCode& err)
{
_canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
}
@@ -1948,32 +2126,29 @@ uloc_canonicalize(const char* localeID,
int32_t nameCapacity,
UErrorCode* err)
{
- if (U_FAILURE(*err)) {
- return 0;
- }
-
- CheckedArrayByteSink sink(name, nameCapacity);
- ulocimp_canonicalize(localeID, sink, err);
-
- int32_t reslen = sink.NumberOfBytesAppended();
-
- if (U_FAILURE(*err)) {
- return reslen;
- }
-
- if (sink.Overflowed()) {
- *err = U_BUFFER_OVERFLOW_ERROR;
- } else {
- u_terminateChars(name, nameCapacity, reslen, err);
- }
+ return ByteSinkUtil::viaByteSinkToTerminatedChars(
+ name, nameCapacity,
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_canonicalize(localeID, sink, status);
+ },
+ *err);
+}
- return reslen;
+U_EXPORT CharString
+ulocimp_canonicalize(const char* localeID,
+ UErrorCode& err)
+{
+ return ByteSinkUtil::viaByteSinkToCharString(
+ [&](ByteSink& sink, UErrorCode& status) {
+ ulocimp_canonicalize(localeID, sink, status);
+ },
+ err);
}
-U_CAPI void U_EXPORT2
+U_EXPORT void
ulocimp_canonicalize(const char* localeID,
ByteSink& sink,
- UErrorCode* err)
+ UErrorCode& err)
{
_canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
}
@@ -1981,49 +2156,39 @@ ulocimp_canonicalize(const char* localeID,
U_CAPI const char* U_EXPORT2
uloc_getISO3Language(const char* localeID)
{
- int16_t offset;
- char lang[ULOC_LANG_CAPACITY];
UErrorCode err = U_ZERO_ERROR;
if (localeID == nullptr)
{
localeID = uloc_getDefault();
}
- uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
+ CharString lang = ulocimp_getLanguage(localeID, err);
if (U_FAILURE(err))
return "";
- offset = _findIndex(LANGUAGES, lang);
- if (offset < 0)
- return "";
- return LANGUAGES_3[offset];
+ std::optional<int16_t> offset = _findIndex(LANGUAGES, lang.data());
+ return offset.has_value() ? LANGUAGES_3[*offset] : "";
}
U_CAPI const char* U_EXPORT2
uloc_getISO3Country(const char* localeID)
{
- int16_t offset;
- char cntry[ULOC_LANG_CAPACITY];
UErrorCode err = U_ZERO_ERROR;
if (localeID == nullptr)
{
localeID = uloc_getDefault();
}
- uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
+ CharString cntry = ulocimp_getRegion(localeID, err);
if (U_FAILURE(err))
return "";
- offset = _findIndex(COUNTRIES, cntry);
- if (offset < 0)
- return "";
-
- return COUNTRIES_3[offset];
+ std::optional<int16_t> offset = _findIndex(COUNTRIES, cntry.data());
+ return offset.has_value() ? COUNTRIES_3[*offset] : "";
}
U_CAPI uint32_t U_EXPORT2
uloc_getLCID(const char* localeID)
{
UErrorCode status = U_ZERO_ERROR;
- char langID[ULOC_FULLNAME_CAPACITY];
uint32_t lcid = 0;
/* Check for incomplete id. */
@@ -2042,37 +2207,20 @@ uloc_getLCID(const char* localeID)
return lcid;
}
- uloc_getLanguage(localeID, langID, sizeof(langID), &status);
- if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
+ CharString langID = ulocimp_getLanguage(localeID, status);
+ if (U_FAILURE(status)) {
return 0;
}
if (uprv_strchr(localeID, '@')) {
// uprv_convertToLCID does not support keywords other than collation.
// Remove all keywords except collation.
- int32_t len;
- char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
-
- CharString collVal;
- {
- CharStringByteSink sink(&collVal);
- ulocimp_getKeywordValue(localeID, "collation", sink, &status);
- }
-
+ CharString collVal = ulocimp_getKeywordValue(localeID, "collation", status);
if (U_SUCCESS(status) && !collVal.isEmpty()) {
- len = uloc_getBaseName(localeID, tmpLocaleID,
- UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
-
- if (U_SUCCESS(status) && len > 0) {
- tmpLocaleID[len] = 0;
-
- len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
- UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
-
- if (U_SUCCESS(status) && len > 0) {
- tmpLocaleID[len] = 0;
- return uprv_convertToLCID(langID, tmpLocaleID, &status);
- }
+ CharString tmpLocaleID = ulocimp_getBaseName(localeID, status);
+ ulocimp_setKeywordValue("collation", collVal.data(), tmpLocaleID, status);
+ if (U_SUCCESS(status)) {
+ return uprv_convertToLCID(langID.data(), tmpLocaleID.data(), &status);
}
}
@@ -2080,7 +2228,7 @@ uloc_getLCID(const char* localeID)
status = U_ZERO_ERROR;
}
- return uprv_convertToLCID(langID, localeID, &status);
+ return uprv_convertToLCID(langID.data(), localeID, &status);
}
U_CAPI int32_t U_EXPORT2
@@ -2156,7 +2304,9 @@ uloc_toUnicodeLocaleType(const char* keyword, const char* value)
return bcpType;
}
-static UBool
+namespace {
+
+bool
isWellFormedLegacyKey(const char* legacyKey)
{
const char* p = legacyKey;
@@ -2169,7 +2319,7 @@ isWellFormedLegacyKey(const char* legacyKey)
return true;
}
-static UBool
+bool
isWellFormedLegacyType(const char* legacyType)
{
const char* p = legacyType;
@@ -2190,6 +2340,8 @@ isWellFormedLegacyType(const char* legacyType)
return (alphaNumLen != 0);
}
+} // namespace
+
U_CAPI const char* U_EXPORT2
uloc_toLegacyKey(const char* keyword)
{