summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Nicholson <dbn@endlessos.org>2024-11-01 13:49:51 -0600
committerDan Nicholson <dbn@endlessos.org>2024-11-12 06:06:50 -0700
commit2dd409fd70fcd2d61138419fb4db763486bb3a80 (patch)
tree17b426bd86440a3363d6086d2980301a82c1173b
parent1bffd6c73b44b85e5889f54e14b2193940cf5bb1 (diff)
downloadredot-engine-2dd409fd70fcd2d61138419fb4db763486bb3a80.tar.gz
Improve locale comparison
Since 9456454109, country specific locales are always preferred over country-less locales even when the OS locale country doesn't match. For example, running the Godot editor with locale es_ES will result in the es_AR locale being chosen even though the es locale would be better. The change happened because the score of the es_AR locale and the es locale are the same when comparing to es_ES. Change this by parsing locale strings into a Locale structure and decreasing the score when script, country or variant are set in both but not matched. For the es_ES case, this causes the es_AR score to be decreased since the countries don't match. On the other hand, the es locale is not decreased since it doesn't specify a country.
-rw-r--r--core/string/translation_server.cpp137
-rw-r--r--core/string/translation_server.h18
-rw-r--r--tests/core/string/test_translation_server.h58
3 files changed, 148 insertions, 65 deletions
diff --git a/core/string/translation_server.cpp b/core/string/translation_server.cpp
index 92b473b61f..4f09360ba8 100644
--- a/core/string/translation_server.cpp
+++ b/core/string/translation_server.cpp
@@ -118,36 +118,45 @@ void TranslationServer::init_locale_info() {
}
}
-String TranslationServer::standardize_locale(const String &p_locale) const {
- return _standardize_locale(p_locale, false);
+TranslationServer::Locale::operator String() const {
+ String out = language;
+ if (!script.is_empty()) {
+ out = out + "_" + script;
+ }
+ if (!country.is_empty()) {
+ out = out + "_" + country;
+ }
+ if (!variant.is_empty()) {
+ out = out + "_" + variant;
+ }
+ return out;
}
-String TranslationServer::_standardize_locale(const String &p_locale, bool p_add_defaults) const {
+TranslationServer::Locale::Locale(const TranslationServer &p_server, const String &p_locale, bool p_add_defaults) {
// Replaces '-' with '_' for macOS style locales.
String univ_locale = p_locale.replace("-", "_");
// Extract locale elements.
- String lang_name, script_name, country_name, variant_name;
Vector<String> locale_elements = univ_locale.get_slice("@", 0).split("_");
- lang_name = locale_elements[0];
+ language = locale_elements[0];
if (locale_elements.size() >= 2) {
if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
- script_name = locale_elements[1];
+ script = locale_elements[1];
}
if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
- country_name = locale_elements[1];
+ country = locale_elements[1];
}
}
if (locale_elements.size() >= 3) {
if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
- country_name = locale_elements[2];
- } else if (variant_map.has(locale_elements[2].to_lower()) && variant_map[locale_elements[2].to_lower()] == lang_name) {
- variant_name = locale_elements[2].to_lower();
+ country = locale_elements[2];
+ } else if (p_server.variant_map.has(locale_elements[2].to_lower()) && p_server.variant_map[locale_elements[2].to_lower()] == language) {
+ variant = locale_elements[2].to_lower();
}
}
if (locale_elements.size() >= 4) {
- if (variant_map.has(locale_elements[3].to_lower()) && variant_map[locale_elements[3].to_lower()] == lang_name) {
- variant_name = locale_elements[3].to_lower();
+ if (p_server.variant_map.has(locale_elements[3].to_lower()) && p_server.variant_map[locale_elements[3].to_lower()] == language) {
+ variant = locale_elements[3].to_lower();
}
}
@@ -155,71 +164,62 @@ String TranslationServer::_standardize_locale(const String &p_locale, bool p_add
Vector<String> script_extra = univ_locale.get_slice("@", 1).split(";");
for (int i = 0; i < script_extra.size(); i++) {
if (script_extra[i].to_lower() == "cyrillic") {
- script_name = "Cyrl";
+ script = "Cyrl";
break;
} else if (script_extra[i].to_lower() == "latin") {
- script_name = "Latn";
+ script = "Latn";
break;
} else if (script_extra[i].to_lower() == "devanagari") {
- script_name = "Deva";
+ script = "Deva";
break;
- } else if (variant_map.has(script_extra[i].to_lower()) && variant_map[script_extra[i].to_lower()] == lang_name) {
- variant_name = script_extra[i].to_lower();
+ } else if (p_server.variant_map.has(script_extra[i].to_lower()) && p_server.variant_map[script_extra[i].to_lower()] == language) {
+ variant = script_extra[i].to_lower();
}
}
// Handles known non-ISO language names used e.g. on Windows.
- if (locale_rename_map.has(lang_name)) {
- lang_name = locale_rename_map[lang_name];
+ if (p_server.locale_rename_map.has(language)) {
+ language = p_server.locale_rename_map[language];
}
// Handle country renames.
- if (country_rename_map.has(country_name)) {
- country_name = country_rename_map[country_name];
+ if (p_server.country_rename_map.has(country)) {
+ country = p_server.country_rename_map[country];
}
// Remove unsupported script codes.
- if (!script_map.has(script_name)) {
- script_name = "";
+ if (!p_server.script_map.has(script)) {
+ script = "";
}
// Add script code base on language and country codes for some ambiguous cases.
if (p_add_defaults) {
- if (script_name.is_empty()) {
- for (int i = 0; i < locale_script_info.size(); i++) {
- const LocaleScriptInfo &info = locale_script_info[i];
- if (info.name == lang_name) {
- if (country_name.is_empty() || info.supported_countries.has(country_name)) {
- script_name = info.script;
+ if (script.is_empty()) {
+ for (int i = 0; i < p_server.locale_script_info.size(); i++) {
+ const LocaleScriptInfo &info = p_server.locale_script_info[i];
+ if (info.name == language) {
+ if (country.is_empty() || info.supported_countries.has(country)) {
+ script = info.script;
break;
}
}
}
}
- if (!script_name.is_empty() && country_name.is_empty()) {
+ if (!script.is_empty() && country.is_empty()) {
// Add conntry code based on script for some ambiguous cases.
- for (int i = 0; i < locale_script_info.size(); i++) {
- const LocaleScriptInfo &info = locale_script_info[i];
- if (info.name == lang_name && info.script == script_name) {
- country_name = info.default_country;
+ for (int i = 0; i < p_server.locale_script_info.size(); i++) {
+ const LocaleScriptInfo &info = p_server.locale_script_info[i];
+ if (info.name == language && info.script == script) {
+ country = info.default_country;
break;
}
}
}
}
+}
- // Combine results.
- String out = lang_name;
- if (!script_name.is_empty()) {
- out = out + "_" + script_name;
- }
- if (!country_name.is_empty()) {
- out = out + "_" + country_name;
- }
- if (!variant_name.is_empty()) {
- out = out + "_" + variant_name;
- }
- return out;
+String TranslationServer::standardize_locale(const String &p_locale) const {
+ return Locale(*this, p_locale, false).operator String();
}
int TranslationServer::compare_locales(const String &p_locale_a, const String &p_locale_b) const {
@@ -234,8 +234,8 @@ int TranslationServer::compare_locales(const String &p_locale_a, const String &p
return *cached_result;
}
- String locale_a = _standardize_locale(p_locale_a, true);
- String locale_b = _standardize_locale(p_locale_b, true);
+ Locale locale_a = Locale(*this, p_locale_a, true);
+ Locale locale_b = Locale(*this, p_locale_b, true);
if (locale_a == locale_b) {
// Exact match.
@@ -243,26 +243,41 @@ int TranslationServer::compare_locales(const String &p_locale_a, const String &p
return 10;
}
- Vector<String> locale_a_elements = locale_a.split("_");
- Vector<String> locale_b_elements = locale_b.split("_");
- if (locale_a_elements[0] != locale_b_elements[0]) {
+ if (locale_a.language != locale_b.language) {
// No match.
locale_compare_cache.insert(cache_key, 0);
return 0;
}
- // Matching language, both locales have extra parts.
- // Return number of matching elements.
- int matching_elements = 1;
- for (int i = 1; i < locale_a_elements.size(); i++) {
- for (int j = 1; j < locale_b_elements.size(); j++) {
- if (locale_a_elements[i] == locale_b_elements[j]) {
- matching_elements++;
- }
+ // Matching language, both locales have extra parts. Compare the
+ // remaining elements. If both elements are non-empty, check the
+ // match to increase or decrease the score. If either element or
+ // both are empty, leave the score as is.
+ int score = 5;
+ if (!locale_a.script.is_empty() && !locale_b.script.is_empty()) {
+ if (locale_a.script == locale_b.script) {
+ score++;
+ } else {
+ score--;
}
}
- locale_compare_cache.insert(cache_key, matching_elements);
- return matching_elements;
+ if (!locale_a.country.is_empty() && !locale_b.country.is_empty()) {
+ if (locale_a.country == locale_b.country) {
+ score++;
+ } else {
+ score--;
+ }
+ }
+ if (!locale_a.variant.is_empty() && !locale_b.variant.is_empty()) {
+ if (locale_a.variant == locale_b.variant) {
+ score++;
+ } else {
+ score--;
+ }
+ }
+
+ locale_compare_cache.insert(cache_key, score);
+ return score;
}
String TranslationServer::get_locale_name(const String &p_locale) const {
diff --git a/core/string/translation_server.h b/core/string/translation_server.h
index 2438349a69..fac41035ae 100644
--- a/core/string/translation_server.h
+++ b/core/string/translation_server.h
@@ -64,6 +64,24 @@ class TranslationServer : public Object {
};
static Vector<LocaleScriptInfo> locale_script_info;
+ struct Locale {
+ String language;
+ String script;
+ String country;
+ String variant;
+
+ bool operator==(const Locale &p_locale) const {
+ return (p_locale.language == language) &&
+ (p_locale.script == script) &&
+ (p_locale.country == country) &&
+ (p_locale.variant == variant);
+ }
+
+ operator String() const;
+
+ Locale(const TranslationServer &p_server, const String &p_locale, bool p_add_defaults);
+ };
+
static HashMap<String, String> language_map;
static HashMap<String, String> script_map;
static HashMap<String, String> locale_rename_map;
diff --git a/tests/core/string/test_translation_server.h b/tests/core/string/test_translation_server.h
index ac1599f2e8..57fdf21fa6 100644
--- a/tests/core/string/test_translation_server.h
+++ b/tests/core/string/test_translation_server.h
@@ -110,18 +110,50 @@ TEST_CASE("[TranslationServer] Comparing locales") {
locale_a = "sr-Latn-CS";
locale_b = "sr-Latn-RS";
- // Two elements from locales match.
+ // Script matches (+1) but country doesn't (-1).
res = ts->compare_locales(locale_a, locale_b);
- CHECK(res == 2);
+ CHECK(res == 5);
locale_a = "uz-Cyrl-UZ";
locale_b = "uz-Latn-UZ";
- // Two elements match, but they are not sequentual.
+ // Country matches (+1) but script doesn't (-1).
res = ts->compare_locales(locale_a, locale_b);
- CHECK(res == 2);
+ CHECK(res == 5);
+
+ locale_a = "aa-Latn-ER";
+ locale_b = "aa-Latn-ER-saaho";
+
+ // Script and country match (+2) with variant on one locale (+0).
+ res = ts->compare_locales(locale_a, locale_b);
+
+ CHECK(res == 7);
+
+ locale_a = "uz-Cyrl-UZ";
+ locale_b = "uz-Latn-KG";
+
+ // Both script and country mismatched (-2).
+ res = ts->compare_locales(locale_a, locale_b);
+
+ CHECK(res == 3);
+
+ locale_a = "es-ES";
+ locale_b = "es-AR";
+
+ // Mismatched country (-1).
+ res = ts->compare_locales(locale_a, locale_b);
+
+ CHECK(res == 4);
+
+ locale_a = "es";
+ locale_b = "es-AR";
+
+ // No country for one locale (+0).
+ res = ts->compare_locales(locale_a, locale_b);
+
+ CHECK(res == 5);
locale_a = "es-EC";
locale_b = "fr-LU";
@@ -130,6 +162,24 @@ TEST_CASE("[TranslationServer] Comparing locales") {
res = ts->compare_locales(locale_a, locale_b);
CHECK(res == 0);
+
+ locale_a = "zh-HK";
+ locale_b = "zh";
+
+ // In full standardization, zh-HK becomes zh_Hant_HK and zh becomes
+ // zh_Hans_CN. Both script and country mismatch (-2).
+ res = ts->compare_locales(locale_a, locale_b);
+
+ CHECK(res == 3);
+
+ locale_a = "zh-CN";
+ locale_b = "zh";
+
+ // In full standardization, zh and zh-CN both become zh_Hans_CN for an
+ // exact match.
+ res = ts->compare_locales(locale_a, locale_b);
+
+ CHECK(res == 10);
}
} // namespace TestTranslationServer