summaryrefslogtreecommitdiffstats
path: root/thirdparty/icu4c/common/uloc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/uloc.cpp')
-rw-r--r--thirdparty/icu4c/common/uloc.cpp215
1 files changed, 126 insertions, 89 deletions
diff --git a/thirdparty/icu4c/common/uloc.cpp b/thirdparty/icu4c/common/uloc.cpp
index 88fe7eaadc..51887c97c3 100644
--- a/thirdparty/icu4c/common/uloc.cpp
+++ b/thirdparty/icu4c/common/uloc.cpp
@@ -30,7 +30,9 @@
l = lang, C = ctry, M = charmap, V = variant
*/
+#include <algorithm>
#include <optional>
+#include <string_view>
#include "unicode/bytestream.h"
#include "unicode/errorcode.h"
@@ -551,17 +553,17 @@ namespace {
* @param status return status (keyword too long)
* @return the keyword name
*/
-CharString locale_canonKeywordName(const char* keywordName, UErrorCode& status)
+CharString locale_canonKeywordName(std::string_view keywordName, UErrorCode& status)
{
if (U_FAILURE(status)) { return {}; }
CharString result;
- for (; *keywordName != 0; keywordName++) {
- if (!UPRV_ISALPHANUM(*keywordName)) {
+ for (char c : keywordName) {
+ if (!UPRV_ISALPHANUM(c)) {
status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
return {};
}
- result.append(uprv_tolower(*keywordName), status);
+ result.append(uprv_tolower(c), status);
}
if (result.isEmpty()) {
status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
@@ -580,8 +582,8 @@ typedef struct {
int32_t U_CALLCONV
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
- const char* leftString = ((const KeywordStruct *)left)->keyword;
- const char* rightString = ((const KeywordStruct *)right)->keyword;
+ const char* leftString = static_cast<const KeywordStruct*>(left)->keyword;
+ const char* rightString = static_cast<const KeywordStruct*>(right)->keyword;
return uprv_strcmp(leftString, rightString);
}
@@ -686,10 +688,10 @@ ulocimp_getKeywords(const char* localeID,
while(*(pos - i - 1) == ' ') {
i++;
}
- keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
+ keywordList[numKeywords].valueLen = static_cast<int32_t>(pos - equalSign - i);
pos++;
} else {
- i = (int32_t)uprv_strlen(equalSign);
+ i = static_cast<int32_t>(uprv_strlen(equalSign));
while(i && equalSign[i-1] == ' ') {
i--;
}
@@ -733,6 +735,11 @@ uloc_getKeywordValue(const char* localeID,
char* buffer, int32_t bufferCapacity,
UErrorCode* status)
{
+ if (U_FAILURE(*status)) { return 0; }
+ if (keywordName == nullptr || *keywordName == '\0') {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
return ByteSinkUtil::viaByteSinkToTerminatedChars(
buffer, bufferCapacity,
[&](ByteSink& sink, UErrorCode& status) {
@@ -743,7 +750,7 @@ uloc_getKeywordValue(const char* localeID,
U_EXPORT CharString
ulocimp_getKeywordValue(const char* localeID,
- const char* keywordName,
+ std::string_view keywordName,
UErrorCode& status)
{
return ByteSinkUtil::viaByteSinkToCharString(
@@ -755,13 +762,13 @@ ulocimp_getKeywordValue(const char* localeID,
U_EXPORT void
ulocimp_getKeywordValue(const char* localeID,
- const char* keywordName,
+ std::string_view keywordName,
icu::ByteSink& sink,
UErrorCode& status)
{
if (U_FAILURE(status)) { return; }
- if (localeID == nullptr || keywordName == nullptr || keywordName[0] == 0) {
+ if (localeID == nullptr || keywordName.empty()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
@@ -865,6 +872,11 @@ uloc_setKeywordValue(const char* keywordName,
{
if (U_FAILURE(*status)) { return 0; }
+ if (keywordName == nullptr || *keywordName == 0) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
if (bufferCapacity <= 1) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
@@ -883,7 +895,11 @@ uloc_setKeywordValue(const char* keywordName,
CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords,
bufferCapacity - baseLen - 1);
int32_t reslen = ulocimp_setKeywordValue(
- keywords, keywordName, keywordValue, sink, *status);
+ keywords == nullptr ? std::string_view() : keywords,
+ keywordName,
+ keywordValue == nullptr ? std::string_view() : keywordValue,
+ sink,
+ *status);
if (U_FAILURE(*status)) {
return *status == U_BUFFER_OVERFLOW_ERROR ? reslen + baseLen : 0;
@@ -898,24 +914,29 @@ uloc_setKeywordValue(const char* keywordName,
}
U_EXPORT void
-ulocimp_setKeywordValue(const char* keywordName,
- const char* keywordValue,
+ulocimp_setKeywordValue(std::string_view keywordName,
+ std::string_view keywordValue,
CharString& localeID,
UErrorCode& status)
{
if (U_FAILURE(status)) { return; }
- // This is safe because CharString::truncate() doesn't actually erase any
- // data, but simply sets the position for where new data will be written.
- const char* keywords = locale_getKeywordsStart(localeID.data());
- if (keywords != nullptr) localeID.truncate(keywords - localeID.data());
+ std::string_view keywords;
+ if (const char* start = locale_getKeywordsStart(localeID.data()); start != nullptr) {
+ // This is safe because CharString::truncate() doesn't actually erase any
+ // data, but simply sets the position for where new data will be written.
+ int32_t size = start - localeID.data();
+ keywords = localeID.toStringPiece();
+ keywords.remove_prefix(size);
+ localeID.truncate(size);
+ }
CharStringByteSink sink(&localeID);
ulocimp_setKeywordValue(keywords, keywordName, keywordValue, sink, status);
}
U_EXPORT int32_t
-ulocimp_setKeywordValue(const char* keywords,
- const char* keywordName,
- const char* keywordValue,
+ulocimp_setKeywordValue(std::string_view keywords,
+ std::string_view keywordName,
+ std::string_view keywordValue,
ByteSink& sink,
UErrorCode& status)
{
@@ -924,9 +945,6 @@ ulocimp_setKeywordValue(const char* keywords,
/* TODO: sorting. removal. */
int32_t needLen = 0;
int32_t rc;
- const char* nextSeparator = nullptr;
- const char* nextEqualsign = nullptr;
- const char* keywordStart = nullptr;
CharString updatedKeysAndValues;
bool handledInputKeyAndValue = false;
char keyValuePrefix = '@';
@@ -934,7 +952,7 @@ ulocimp_setKeywordValue(const char* keywords,
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR;
}
- if (keywordName == nullptr || keywordName[0] == 0) {
+ if (keywordName.empty()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
@@ -944,21 +962,19 @@ ulocimp_setKeywordValue(const char* keywords,
}
CharString canonKeywordValue;
- if(keywordValue) {
- while (*keywordValue != 0) {
- if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
- status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
- return 0;
- }
- /* Should we force lowercase in value to set? */
- canonKeywordValue.append(*keywordValue++, status);
+ for (char c : keywordValue) {
+ if (!UPRV_ISALPHANUM(c) && !UPRV_OK_VALUE_PUNCTUATION(c)) {
+ status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
+ return 0;
}
+ /* Should we force lowercase in value to set? */
+ canonKeywordValue.append(c, status);
}
if (U_FAILURE(status)) {
return 0;
}
- if (keywords == nullptr || keywords[1] == '\0') {
+ if (keywords.size() <= 1) {
if (canonKeywordValue.isEmpty()) { /* no keywords = nothing to remove */
U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
return 0;
@@ -984,23 +1000,20 @@ ulocimp_setKeywordValue(const char* keywords,
return needLen;
} /* end shortcut - no @ */
- keywordStart = keywords;
/* search for keyword */
- while(keywordStart) {
- const char* keyValueTail;
-
+ for (size_t keywordStart = 0; keywordStart != std::string_view::npos;) {
keywordStart++; /* skip @ or ; */
- nextEqualsign = uprv_strchr(keywordStart, '=');
- if (!nextEqualsign) {
+ size_t nextEqualsign = keywords.find('=', keywordStart);
+ if (nextEqualsign == std::string_view::npos) {
status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
return 0;
}
/* strip leading & trailing spaces (TC decided to tolerate these) */
- while(*keywordStart == ' ') {
+ while (keywordStart < keywords.size() && keywords[keywordStart] == ' ') {
keywordStart++;
}
- keyValueTail = nextEqualsign;
- while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
+ size_t keyValueTail = nextEqualsign;
+ while (keyValueTail > keywordStart && keywords[keyValueTail - 1] == ' ') {
keyValueTail--;
}
/* now keyValueTail points to first char after the keyName */
@@ -1011,26 +1024,26 @@ ulocimp_setKeywordValue(const char* keywords,
}
CharString localeKeywordName;
while (keywordStart < keyValueTail) {
- if (!UPRV_ISALPHANUM(*keywordStart)) {
+ if (!UPRV_ISALPHANUM(keywords[keywordStart])) {
status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
return 0;
}
- localeKeywordName.append(uprv_tolower(*keywordStart++), status);
+ localeKeywordName.append(uprv_tolower(keywords[keywordStart++]), status);
}
if (U_FAILURE(status)) {
return 0;
}
- nextSeparator = uprv_strchr(nextEqualsign, ';');
+ size_t nextSeparator = keywords.find(';', nextEqualsign);
/* start processing the value part */
nextEqualsign++; /* skip '=' */
/* First strip leading & trailing spaces (TC decided to tolerate these) */
- while(*nextEqualsign == ' ') {
+ while (nextEqualsign < keywords.size() && keywords[nextEqualsign] == ' ') {
nextEqualsign++;
}
- keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
- while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
+ keyValueTail = nextSeparator == std::string_view::npos ? keywords.size() : nextSeparator;
+ while (keyValueTail > nextEqualsign && keywords[keyValueTail - 1] == ' ') {
keyValueTail--;
}
if (nextEqualsign == keyValueTail) {
@@ -1065,9 +1078,10 @@ ulocimp_setKeywordValue(const char* keywords,
keyValuePrefix = ';'; /* for any subsequent key-value pair */
updatedKeysAndValues.append(localeKeywordName, status);
updatedKeysAndValues.append('=', status);
- updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), status);
+ updatedKeysAndValues.append(keywords.data() + nextEqualsign,
+ static_cast<int32_t>(keyValueTail - nextEqualsign), status);
}
- if (!nextSeparator && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
+ if (nextSeparator == std::string_view::npos && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
/* append new entry at the end, it sorts later than existing entries */
updatedKeysAndValues.append(keyValuePrefix, status);
/* skip keyValuePrefix update, no subsequent key-value pair */
@@ -1091,7 +1105,7 @@ ulocimp_setKeywordValue(const char* keywords,
/* if input key/value specified removal of a keyword not present in locale, or
* there was an error in CharString.append, leave original locale alone. */
U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
- return (int32_t)uprv_strlen(keywords);
+ return static_cast<int32_t>(keywords.size());
}
needLen = updatedKeysAndValues.length();
@@ -1155,7 +1169,7 @@ std::optional<int16_t> _findIndex(const char* const* list, const char* key)
while (pass++ < 2) {
while (*list) {
if (uprv_strcmp(key, *list) == 0) {
- return (int16_t)(list - anchor);
+ return static_cast<int16_t>(list - anchor);
}
list++;
}
@@ -1241,7 +1255,7 @@ _getLanguage(const char* localeID,
std::optional<int16_t> offset = _findIndex(LANGUAGES_3, buffer);
if (offset.has_value()) {
const char* const alias = LANGUAGES[*offset];
- sink->Append(alias, (int32_t)uprv_strlen(alias));
+ sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias)));
return;
}
}
@@ -1322,7 +1336,7 @@ _getRegion(const char* localeID,
std::optional<int16_t> offset = _findIndex(COUNTRIES_3, buffer);
if (offset.has_value()) {
const char* const alias = COUNTRIES[*offset];
- sink->Append(alias, (int32_t)uprv_strlen(alias));
+ sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias)));
return;
}
}
@@ -1370,7 +1384,7 @@ _getVariant(const char* localeID,
needSeparator = false;
}
if (sink != nullptr) {
- char c = (char)uprv_toupper(localeID[index]);
+ char c = uprv_toupper(localeID[index]);
if (c == '-') c = '_';
sink->Append(&c, 1);
}
@@ -1399,7 +1413,7 @@ _getVariant(const char* localeID,
needSeparator = false;
}
if (sink != nullptr) {
- char c = (char)uprv_toupper(localeID[index]);
+ char c = uprv_toupper(localeID[index]);
if (c == '-' || c == ',') c = '_';
sink->Append(&c, 1);
}
@@ -1955,7 +1969,7 @@ ulocimp_getParent(const char* localeID,
lastUnderscore=uprv_strrchr(localeID, '_');
if(lastUnderscore!=nullptr) {
- i=(int32_t)(lastUnderscore-localeID);
+ i = static_cast<int32_t>(lastUnderscore - localeID);
} else {
i=0;
}
@@ -2218,7 +2232,7 @@ uloc_getLCID(const char* localeID)
CharString collVal = ulocimp_getKeywordValue(localeID, "collation", status);
if (U_SUCCESS(status) && !collVal.isEmpty()) {
CharString tmpLocaleID = ulocimp_getBaseName(localeID, status);
- ulocimp_setKeywordValue("collation", collVal.data(), tmpLocaleID, status);
+ ulocimp_setKeywordValue("collation", collVal.toStringPiece(), tmpLocaleID, status);
if (U_SUCCESS(status)) {
return uprv_convertToLCID(langID.data(), tmpLocaleID.data(), &status);
}
@@ -2285,8 +2299,17 @@ uloc_getISOCountries()
U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleKey(const char* keyword)
{
- const char* bcpKey = ulocimp_toBcpKey(keyword);
- if (bcpKey == nullptr && ultag_isUnicodeLocaleKey(keyword, -1)) {
+ if (keyword == nullptr || *keyword == '\0') { return nullptr; }
+ std::optional<std::string_view> result = ulocimp_toBcpKeyWithFallback(keyword);
+ return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated.
+}
+
+U_EXPORT std::optional<std::string_view>
+ulocimp_toBcpKeyWithFallback(std::string_view keyword)
+{
+ std::optional<std::string_view> bcpKey = ulocimp_toBcpKey(keyword);
+ if (!bcpKey.has_value() &&
+ ultag_isUnicodeLocaleKey(keyword.data(), static_cast<int32_t>(keyword.size()))) {
// unknown keyword, but syntax is fine..
return keyword;
}
@@ -2296,8 +2319,18 @@ uloc_toUnicodeLocaleKey(const char* keyword)
U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
{
- const char* bcpType = ulocimp_toBcpType(keyword, value, nullptr, nullptr);
- if (bcpType == nullptr && ultag_isUnicodeLocaleType(value, -1)) {
+ if (keyword == nullptr || *keyword == '\0' ||
+ value == nullptr || *value == '\0') { return nullptr; }
+ std::optional<std::string_view> result = ulocimp_toBcpTypeWithFallback(keyword, value);
+ return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated.
+}
+
+U_EXPORT std::optional<std::string_view>
+ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value)
+{
+ std::optional<std::string_view> bcpType = ulocimp_toBcpType(keyword, value);
+ if (!bcpType.has_value() &&
+ ultag_isUnicodeLocaleType(value.data(), static_cast<int32_t>(value.size()))) {
// unknown keyword, but syntax is fine..
return value;
}
@@ -2307,37 +2340,28 @@ uloc_toUnicodeLocaleType(const char* keyword, const char* value)
namespace {
bool
-isWellFormedLegacyKey(const char* legacyKey)
+isWellFormedLegacyKey(std::string_view key)
{
- const char* p = legacyKey;
- while (*p) {
- if (!UPRV_ISALPHANUM(*p)) {
- return false;
- }
- p++;
- }
- return true;
+ return std::all_of(key.begin(), key.end(), UPRV_ISALPHANUM);
}
bool
-isWellFormedLegacyType(const char* legacyType)
+isWellFormedLegacyType(std::string_view legacyType)
{
- const char* p = legacyType;
int32_t alphaNumLen = 0;
- while (*p) {
- if (*p == '_' || *p == '/' || *p == '-') {
+ for (char c : legacyType) {
+ if (c == '_' || c == '/' || c == '-') {
if (alphaNumLen == 0) {
return false;
}
alphaNumLen = 0;
- } else if (UPRV_ISALPHANUM(*p)) {
+ } else if (UPRV_ISALPHANUM(c)) {
alphaNumLen++;
} else {
return false;
}
- p++;
}
- return (alphaNumLen != 0);
+ return alphaNumLen != 0;
}
} // namespace
@@ -2345,8 +2369,16 @@ isWellFormedLegacyType(const char* legacyType)
U_CAPI const char* U_EXPORT2
uloc_toLegacyKey(const char* keyword)
{
- const char* legacyKey = ulocimp_toLegacyKey(keyword);
- if (legacyKey == nullptr) {
+ if (keyword == nullptr || *keyword == '\0') { return nullptr; }
+ std::optional<std::string_view> result = ulocimp_toLegacyKeyWithFallback(keyword);
+ return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated.
+}
+
+U_EXPORT std::optional<std::string_view>
+ulocimp_toLegacyKeyWithFallback(std::string_view keyword)
+{
+ std::optional<std::string_view> legacyKey = ulocimp_toLegacyKey(keyword);
+ if (!legacyKey.has_value() && isWellFormedLegacyKey(keyword)) {
// Checks if the specified locale key is well-formed with the legacy locale syntax.
//
// Note:
@@ -2354,9 +2386,7 @@ uloc_toLegacyKey(const char* keyword)
// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
// Keys can only consist of [0-9a-zA-Z].
- if (isWellFormedLegacyKey(keyword)) {
- return keyword;
- }
+ return keyword;
}
return legacyKey;
}
@@ -2364,8 +2394,17 @@ uloc_toLegacyKey(const char* keyword)
U_CAPI const char* U_EXPORT2
uloc_toLegacyType(const char* keyword, const char* value)
{
- const char* legacyType = ulocimp_toLegacyType(keyword, value, nullptr, nullptr);
- if (legacyType == nullptr) {
+ if (keyword == nullptr || *keyword == '\0' ||
+ value == nullptr || *value == '\0') { return nullptr; }
+ std::optional<std::string_view> result = ulocimp_toLegacyTypeWithFallback(keyword, value);
+ return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated.
+}
+
+U_EXPORT std::optional<std::string_view>
+ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value)
+{
+ std::optional<std::string_view> legacyType = ulocimp_toLegacyType(keyword, value);
+ if (!legacyType.has_value() && isWellFormedLegacyType(value)) {
// Checks if the specified locale type is well-formed with the legacy locale syntax.
//
// Note:
@@ -2374,9 +2413,7 @@ uloc_toLegacyType(const char* keyword, const char* value)
// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
// Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
// we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
- if (isWellFormedLegacyType(value)) {
- return value;
- }
+ return value;
}
return legacyType;
}