diff options
Diffstat (limited to 'thirdparty/icu4c/common/uloc.cpp')
-rw-r--r-- | thirdparty/icu4c/common/uloc.cpp | 215 |
1 files changed, 126 insertions, 89 deletions
diff --git a/thirdparty/icu4c/common/uloc.cpp b/thirdparty/icu4c/common/uloc.cpp index 88fe7eaadc..51887c97c3 100644 --- a/thirdparty/icu4c/common/uloc.cpp +++ b/thirdparty/icu4c/common/uloc.cpp @@ -30,7 +30,9 @@ l = lang, C = ctry, M = charmap, V = variant */ +#include <algorithm> #include <optional> +#include <string_view> #include "unicode/bytestream.h" #include "unicode/errorcode.h" @@ -551,17 +553,17 @@ namespace { * @param status return status (keyword too long) * @return the keyword name */ -CharString locale_canonKeywordName(const char* keywordName, UErrorCode& status) +CharString locale_canonKeywordName(std::string_view keywordName, UErrorCode& status) { if (U_FAILURE(status)) { return {}; } CharString result; - for (; *keywordName != 0; keywordName++) { - if (!UPRV_ISALPHANUM(*keywordName)) { + for (char c : keywordName) { + if (!UPRV_ISALPHANUM(c)) { status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ return {}; } - result.append(uprv_tolower(*keywordName), status); + result.append(uprv_tolower(c), status); } if (result.isEmpty()) { status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */ @@ -580,8 +582,8 @@ typedef struct { int32_t U_CALLCONV compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { - const char* leftString = ((const KeywordStruct *)left)->keyword; - const char* rightString = ((const KeywordStruct *)right)->keyword; + const char* leftString = static_cast<const KeywordStruct*>(left)->keyword; + const char* rightString = static_cast<const KeywordStruct*>(right)->keyword; return uprv_strcmp(leftString, rightString); } @@ -686,10 +688,10 @@ ulocimp_getKeywords(const char* localeID, while(*(pos - i - 1) == ' ') { i++; } - keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); + keywordList[numKeywords].valueLen = static_cast<int32_t>(pos - equalSign - i); pos++; } else { - i = (int32_t)uprv_strlen(equalSign); + i = static_cast<int32_t>(uprv_strlen(equalSign)); while(i && equalSign[i-1] == ' ') { i--; } @@ -733,6 +735,11 @@ uloc_getKeywordValue(const char* localeID, char* buffer, int32_t bufferCapacity, UErrorCode* status) { + if (U_FAILURE(*status)) { return 0; } + if (keywordName == nullptr || *keywordName == '\0') { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } return ByteSinkUtil::viaByteSinkToTerminatedChars( buffer, bufferCapacity, [&](ByteSink& sink, UErrorCode& status) { @@ -743,7 +750,7 @@ uloc_getKeywordValue(const char* localeID, U_EXPORT CharString ulocimp_getKeywordValue(const char* localeID, - const char* keywordName, + std::string_view keywordName, UErrorCode& status) { return ByteSinkUtil::viaByteSinkToCharString( @@ -755,13 +762,13 @@ ulocimp_getKeywordValue(const char* localeID, U_EXPORT void ulocimp_getKeywordValue(const char* localeID, - const char* keywordName, + std::string_view keywordName, icu::ByteSink& sink, UErrorCode& status) { if (U_FAILURE(status)) { return; } - if (localeID == nullptr || keywordName == nullptr || keywordName[0] == 0) { + if (localeID == nullptr || keywordName.empty()) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } @@ -865,6 +872,11 @@ uloc_setKeywordValue(const char* keywordName, { if (U_FAILURE(*status)) { return 0; } + if (keywordName == nullptr || *keywordName == 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + if (bufferCapacity <= 1) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; @@ -883,7 +895,11 @@ uloc_setKeywordValue(const char* keywordName, CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords, bufferCapacity - baseLen - 1); int32_t reslen = ulocimp_setKeywordValue( - keywords, keywordName, keywordValue, sink, *status); + keywords == nullptr ? std::string_view() : keywords, + keywordName, + keywordValue == nullptr ? std::string_view() : keywordValue, + sink, + *status); if (U_FAILURE(*status)) { return *status == U_BUFFER_OVERFLOW_ERROR ? reslen + baseLen : 0; @@ -898,24 +914,29 @@ uloc_setKeywordValue(const char* keywordName, } U_EXPORT void -ulocimp_setKeywordValue(const char* keywordName, - const char* keywordValue, +ulocimp_setKeywordValue(std::string_view keywordName, + std::string_view keywordValue, CharString& localeID, UErrorCode& status) { if (U_FAILURE(status)) { return; } - // This is safe because CharString::truncate() doesn't actually erase any - // data, but simply sets the position for where new data will be written. - const char* keywords = locale_getKeywordsStart(localeID.data()); - if (keywords != nullptr) localeID.truncate(keywords - localeID.data()); + std::string_view keywords; + if (const char* start = locale_getKeywordsStart(localeID.data()); start != nullptr) { + // This is safe because CharString::truncate() doesn't actually erase any + // data, but simply sets the position for where new data will be written. + int32_t size = start - localeID.data(); + keywords = localeID.toStringPiece(); + keywords.remove_prefix(size); + localeID.truncate(size); + } CharStringByteSink sink(&localeID); ulocimp_setKeywordValue(keywords, keywordName, keywordValue, sink, status); } U_EXPORT int32_t -ulocimp_setKeywordValue(const char* keywords, - const char* keywordName, - const char* keywordValue, +ulocimp_setKeywordValue(std::string_view keywords, + std::string_view keywordName, + std::string_view keywordValue, ByteSink& sink, UErrorCode& status) { @@ -924,9 +945,6 @@ ulocimp_setKeywordValue(const char* keywords, /* TODO: sorting. removal. */ int32_t needLen = 0; int32_t rc; - const char* nextSeparator = nullptr; - const char* nextEqualsign = nullptr; - const char* keywordStart = nullptr; CharString updatedKeysAndValues; bool handledInputKeyAndValue = false; char keyValuePrefix = '@'; @@ -934,7 +952,7 @@ ulocimp_setKeywordValue(const char* keywords, if (status == U_STRING_NOT_TERMINATED_WARNING) { status = U_ZERO_ERROR; } - if (keywordName == nullptr || keywordName[0] == 0) { + if (keywordName.empty()) { status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -944,21 +962,19 @@ ulocimp_setKeywordValue(const char* keywords, } CharString canonKeywordValue; - if(keywordValue) { - while (*keywordValue != 0) { - if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) { - status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ - return 0; - } - /* Should we force lowercase in value to set? */ - canonKeywordValue.append(*keywordValue++, status); + for (char c : keywordValue) { + if (!UPRV_ISALPHANUM(c) && !UPRV_OK_VALUE_PUNCTUATION(c)) { + status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ + return 0; } + /* Should we force lowercase in value to set? */ + canonKeywordValue.append(c, status); } if (U_FAILURE(status)) { return 0; } - if (keywords == nullptr || keywords[1] == '\0') { + if (keywords.size() <= 1) { if (canonKeywordValue.isEmpty()) { /* no keywords = nothing to remove */ U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); return 0; @@ -984,23 +1000,20 @@ ulocimp_setKeywordValue(const char* keywords, return needLen; } /* end shortcut - no @ */ - keywordStart = keywords; /* search for keyword */ - while(keywordStart) { - const char* keyValueTail; - + for (size_t keywordStart = 0; keywordStart != std::string_view::npos;) { keywordStart++; /* skip @ or ; */ - nextEqualsign = uprv_strchr(keywordStart, '='); - if (!nextEqualsign) { + size_t nextEqualsign = keywords.find('=', keywordStart); + if (nextEqualsign == std::string_view::npos) { status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */ return 0; } /* strip leading & trailing spaces (TC decided to tolerate these) */ - while(*keywordStart == ' ') { + while (keywordStart < keywords.size() && keywords[keywordStart] == ' ') { keywordStart++; } - keyValueTail = nextEqualsign; - while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') { + size_t keyValueTail = nextEqualsign; + while (keyValueTail > keywordStart && keywords[keyValueTail - 1] == ' ') { keyValueTail--; } /* now keyValueTail points to first char after the keyName */ @@ -1011,26 +1024,26 @@ ulocimp_setKeywordValue(const char* keywords, } CharString localeKeywordName; while (keywordStart < keyValueTail) { - if (!UPRV_ISALPHANUM(*keywordStart)) { + if (!UPRV_ISALPHANUM(keywords[keywordStart])) { status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ return 0; } - localeKeywordName.append(uprv_tolower(*keywordStart++), status); + localeKeywordName.append(uprv_tolower(keywords[keywordStart++]), status); } if (U_FAILURE(status)) { return 0; } - nextSeparator = uprv_strchr(nextEqualsign, ';'); + size_t nextSeparator = keywords.find(';', nextEqualsign); /* start processing the value part */ nextEqualsign++; /* skip '=' */ /* First strip leading & trailing spaces (TC decided to tolerate these) */ - while(*nextEqualsign == ' ') { + while (nextEqualsign < keywords.size() && keywords[nextEqualsign] == ' ') { nextEqualsign++; } - keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign); - while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') { + keyValueTail = nextSeparator == std::string_view::npos ? keywords.size() : nextSeparator; + while (keyValueTail > nextEqualsign && keywords[keyValueTail - 1] == ' ') { keyValueTail--; } if (nextEqualsign == keyValueTail) { @@ -1065,9 +1078,10 @@ ulocimp_setKeywordValue(const char* keywords, keyValuePrefix = ';'; /* for any subsequent key-value pair */ updatedKeysAndValues.append(localeKeywordName, status); updatedKeysAndValues.append('=', status); - updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), status); + updatedKeysAndValues.append(keywords.data() + nextEqualsign, + static_cast<int32_t>(keyValueTail - nextEqualsign), status); } - if (!nextSeparator && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) { + if (nextSeparator == std::string_view::npos && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) { /* append new entry at the end, it sorts later than existing entries */ updatedKeysAndValues.append(keyValuePrefix, status); /* skip keyValuePrefix update, no subsequent key-value pair */ @@ -1091,7 +1105,7 @@ ulocimp_setKeywordValue(const char* keywords, /* if input key/value specified removal of a keyword not present in locale, or * there was an error in CharString.append, leave original locale alone. */ U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); - return (int32_t)uprv_strlen(keywords); + return static_cast<int32_t>(keywords.size()); } needLen = updatedKeysAndValues.length(); @@ -1155,7 +1169,7 @@ std::optional<int16_t> _findIndex(const char* const* list, const char* key) while (pass++ < 2) { while (*list) { if (uprv_strcmp(key, *list) == 0) { - return (int16_t)(list - anchor); + return static_cast<int16_t>(list - anchor); } list++; } @@ -1241,7 +1255,7 @@ _getLanguage(const char* localeID, std::optional<int16_t> offset = _findIndex(LANGUAGES_3, buffer); if (offset.has_value()) { const char* const alias = LANGUAGES[*offset]; - sink->Append(alias, (int32_t)uprv_strlen(alias)); + sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias))); return; } } @@ -1322,7 +1336,7 @@ _getRegion(const char* localeID, std::optional<int16_t> offset = _findIndex(COUNTRIES_3, buffer); if (offset.has_value()) { const char* const alias = COUNTRIES[*offset]; - sink->Append(alias, (int32_t)uprv_strlen(alias)); + sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias))); return; } } @@ -1370,7 +1384,7 @@ _getVariant(const char* localeID, needSeparator = false; } if (sink != nullptr) { - char c = (char)uprv_toupper(localeID[index]); + char c = uprv_toupper(localeID[index]); if (c == '-') c = '_'; sink->Append(&c, 1); } @@ -1399,7 +1413,7 @@ _getVariant(const char* localeID, needSeparator = false; } if (sink != nullptr) { - char c = (char)uprv_toupper(localeID[index]); + char c = uprv_toupper(localeID[index]); if (c == '-' || c == ',') c = '_'; sink->Append(&c, 1); } @@ -1955,7 +1969,7 @@ ulocimp_getParent(const char* localeID, lastUnderscore=uprv_strrchr(localeID, '_'); if(lastUnderscore!=nullptr) { - i=(int32_t)(lastUnderscore-localeID); + i = static_cast<int32_t>(lastUnderscore - localeID); } else { i=0; } @@ -2218,7 +2232,7 @@ uloc_getLCID(const char* localeID) CharString collVal = ulocimp_getKeywordValue(localeID, "collation", status); if (U_SUCCESS(status) && !collVal.isEmpty()) { CharString tmpLocaleID = ulocimp_getBaseName(localeID, status); - ulocimp_setKeywordValue("collation", collVal.data(), tmpLocaleID, status); + ulocimp_setKeywordValue("collation", collVal.toStringPiece(), tmpLocaleID, status); if (U_SUCCESS(status)) { return uprv_convertToLCID(langID.data(), tmpLocaleID.data(), &status); } @@ -2285,8 +2299,17 @@ uloc_getISOCountries() U_CAPI const char* U_EXPORT2 uloc_toUnicodeLocaleKey(const char* keyword) { - const char* bcpKey = ulocimp_toBcpKey(keyword); - if (bcpKey == nullptr && ultag_isUnicodeLocaleKey(keyword, -1)) { + if (keyword == nullptr || *keyword == '\0') { return nullptr; } + std::optional<std::string_view> result = ulocimp_toBcpKeyWithFallback(keyword); + return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated. +} + +U_EXPORT std::optional<std::string_view> +ulocimp_toBcpKeyWithFallback(std::string_view keyword) +{ + std::optional<std::string_view> bcpKey = ulocimp_toBcpKey(keyword); + if (!bcpKey.has_value() && + ultag_isUnicodeLocaleKey(keyword.data(), static_cast<int32_t>(keyword.size()))) { // unknown keyword, but syntax is fine.. return keyword; } @@ -2296,8 +2319,18 @@ uloc_toUnicodeLocaleKey(const char* keyword) U_CAPI const char* U_EXPORT2 uloc_toUnicodeLocaleType(const char* keyword, const char* value) { - const char* bcpType = ulocimp_toBcpType(keyword, value, nullptr, nullptr); - if (bcpType == nullptr && ultag_isUnicodeLocaleType(value, -1)) { + if (keyword == nullptr || *keyword == '\0' || + value == nullptr || *value == '\0') { return nullptr; } + std::optional<std::string_view> result = ulocimp_toBcpTypeWithFallback(keyword, value); + return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated. +} + +U_EXPORT std::optional<std::string_view> +ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value) +{ + std::optional<std::string_view> bcpType = ulocimp_toBcpType(keyword, value); + if (!bcpType.has_value() && + ultag_isUnicodeLocaleType(value.data(), static_cast<int32_t>(value.size()))) { // unknown keyword, but syntax is fine.. return value; } @@ -2307,37 +2340,28 @@ uloc_toUnicodeLocaleType(const char* keyword, const char* value) namespace { bool -isWellFormedLegacyKey(const char* legacyKey) +isWellFormedLegacyKey(std::string_view key) { - const char* p = legacyKey; - while (*p) { - if (!UPRV_ISALPHANUM(*p)) { - return false; - } - p++; - } - return true; + return std::all_of(key.begin(), key.end(), UPRV_ISALPHANUM); } bool -isWellFormedLegacyType(const char* legacyType) +isWellFormedLegacyType(std::string_view legacyType) { - const char* p = legacyType; int32_t alphaNumLen = 0; - while (*p) { - if (*p == '_' || *p == '/' || *p == '-') { + for (char c : legacyType) { + if (c == '_' || c == '/' || c == '-') { if (alphaNumLen == 0) { return false; } alphaNumLen = 0; - } else if (UPRV_ISALPHANUM(*p)) { + } else if (UPRV_ISALPHANUM(c)) { alphaNumLen++; } else { return false; } - p++; } - return (alphaNumLen != 0); + return alphaNumLen != 0; } } // namespace @@ -2345,8 +2369,16 @@ isWellFormedLegacyType(const char* legacyType) U_CAPI const char* U_EXPORT2 uloc_toLegacyKey(const char* keyword) { - const char* legacyKey = ulocimp_toLegacyKey(keyword); - if (legacyKey == nullptr) { + if (keyword == nullptr || *keyword == '\0') { return nullptr; } + std::optional<std::string_view> result = ulocimp_toLegacyKeyWithFallback(keyword); + return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated. +} + +U_EXPORT std::optional<std::string_view> +ulocimp_toLegacyKeyWithFallback(std::string_view keyword) +{ + std::optional<std::string_view> legacyKey = ulocimp_toLegacyKey(keyword); + if (!legacyKey.has_value() && isWellFormedLegacyKey(keyword)) { // Checks if the specified locale key is well-formed with the legacy locale syntax. // // Note: @@ -2354,9 +2386,7 @@ uloc_toLegacyKey(const char* keyword) // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax // Keys can only consist of [0-9a-zA-Z]. - if (isWellFormedLegacyKey(keyword)) { - return keyword; - } + return keyword; } return legacyKey; } @@ -2364,8 +2394,17 @@ uloc_toLegacyKey(const char* keyword) U_CAPI const char* U_EXPORT2 uloc_toLegacyType(const char* keyword, const char* value) { - const char* legacyType = ulocimp_toLegacyType(keyword, value, nullptr, nullptr); - if (legacyType == nullptr) { + if (keyword == nullptr || *keyword == '\0' || + value == nullptr || *value == '\0') { return nullptr; } + std::optional<std::string_view> result = ulocimp_toLegacyTypeWithFallback(keyword, value); + return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated. +} + +U_EXPORT std::optional<std::string_view> +ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value) +{ + std::optional<std::string_view> legacyType = ulocimp_toLegacyType(keyword, value); + if (!legacyType.has_value() && isWellFormedLegacyType(value)) { // Checks if the specified locale type is well-formed with the legacy locale syntax. // // Note: @@ -2374,9 +2413,7 @@ uloc_toLegacyType(const char* keyword, const char* value) // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv") - if (isWellFormedLegacyType(value)) { - return value; - } + return value; } return legacyType; } |