diff options
Diffstat (limited to 'core/string/ustring.cpp')
-rw-r--r-- | core/string/ustring.cpp | 418 |
1 files changed, 301 insertions, 117 deletions
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 3d37e17ef8..2683addd4b 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -36,7 +36,7 @@ #include "core/os/memory.h" #include "core/string/print_string.h" #include "core/string/string_name.h" -#include "core/string/translation.h" +#include "core/string/translation_server.h" #include "core/string/ucaps.h" #include "core/variant/variant.h" #include "core/version_generated.gen.h" @@ -1537,13 +1537,16 @@ Vector<double> String::split_floats(const String &p_splitter, bool p_allow_empty int from = 0; int len = length(); + String buffer = *this; while (true) { int end = find(p_splitter, from); if (end < 0) { end = len; } if (p_allow_empty || (end > from)) { - ret.push_back(String::to_float(&get_data()[from])); + buffer[end] = 0; + ret.push_back(String::to_float(&buffer.get_data()[from])); + buffer[end] = _cowdata.get(end); } if (end == len) { @@ -1561,6 +1564,7 @@ Vector<float> String::split_floats_mk(const Vector<String> &p_splitters, bool p_ int from = 0; int len = length(); + String buffer = *this; while (true) { int idx; int end = findmk(p_splitters, from, &idx); @@ -1572,7 +1576,9 @@ Vector<float> String::split_floats_mk(const Vector<String> &p_splitters, bool p_ } if (p_allow_empty || (end > from)) { - ret.push_back(String::to_float(&get_data()[from])); + buffer[end] = 0; + ret.push_back(String::to_float(&buffer.get_data()[from])); + buffer[end] = _cowdata.get(end); } if (end == len) { @@ -1639,13 +1645,43 @@ Vector<int> String::split_ints_mk(const Vector<String> &p_splitters, bool p_allo } String String::join(const Vector<String> &parts) const { + if (parts.is_empty()) { + return String(); + } else if (parts.size() == 1) { + return parts[0]; + } + + const int this_length = length(); + + int new_size = (parts.size() - 1) * this_length; + for (const String &part : parts) { + new_size += part.length(); + } + new_size += 1; + String ret; - for (int i = 0; i < parts.size(); ++i) { - if (i > 0) { - ret += *this; + ret.resize(new_size); + char32_t *ret_ptrw = ret.ptrw(); + const char32_t *this_ptr = ptr(); + + bool first = true; + for (const String &part : parts) { + if (first) { + first = false; + } else if (this_length) { + memcpy(ret_ptrw, this_ptr, this_length * sizeof(char32_t)); + ret_ptrw += this_length; + } + + const int part_length = part.length(); + if (part_length) { + memcpy(ret_ptrw, part.ptr(), part_length * sizeof(char32_t)); + ret_ptrw += part_length; } - ret += parts[i]; } + + *ret_ptrw = 0; + return ret; } @@ -1658,30 +1694,40 @@ char32_t String::char_lowercase(char32_t p_char) { } String String::to_upper() const { - String upper = *this; + if (is_empty()) { + return *this; + } - for (int i = 0; i < upper.size(); i++) { - const char32_t s = upper[i]; - const char32_t t = _find_upper(s); - if (s != t) { // avoid copy on write - upper[i] = t; - } + String upper; + upper.resize(size()); + const char32_t *old_ptr = ptr(); + char32_t *upper_ptrw = upper.ptrw(); + + while (*old_ptr) { + *upper_ptrw++ = _find_upper(*old_ptr++); } + *upper_ptrw = 0; + return upper; } String String::to_lower() const { - String lower = *this; + if (is_empty()) { + return *this; + } - for (int i = 0; i < lower.size(); i++) { - const char32_t s = lower[i]; - const char32_t t = _find_lower(s); - if (s != t) { // avoid copy on write - lower[i] = t; - } + String lower; + lower.resize(size()); + const char32_t *old_ptr = ptr(); + char32_t *lower_ptrw = lower.ptrw(); + + while (*old_ptr) { + *lower_ptrw++ = _find_lower(*old_ptr++); } + *lower_ptrw = 0; + return lower; } @@ -1919,15 +1965,16 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) { static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; String ret; - char v[2] = { 0, 0 }; + ret.resize(p_len * 2 + 1); + char32_t *ret_ptrw = ret.ptrw(); for (int i = 0; i < p_len; i++) { - v[0] = hex[p_buffer[i] >> 4]; - ret += v; - v[0] = hex[p_buffer[i] & 0xF]; - ret += v; + *ret_ptrw++ = hex[p_buffer[i] >> 4]; + *ret_ptrw++ = hex[p_buffer[i] & 0xF]; } + *ret_ptrw = 0; + return ret; } @@ -1950,11 +1997,12 @@ Vector<uint8_t> String::hex_decode() const { Vector<uint8_t> out; int len = length() / 2; out.resize(len); + uint8_t *out_ptrw = out.ptrw(); for (int i = 0; i < len; i++) { char32_t c; HEX_TO_BYTE(first, i * 2); HEX_TO_BYTE(second, i * 2 + 1); - out.write[i] = first * 16 + second; + out_ptrw[i] = first * 16 + second; } return out; #undef HEX_TO_BYTE @@ -1975,14 +2023,16 @@ CharString String::ascii(bool p_allow_extended) const { CharString cs; cs.resize(size()); + char *cs_ptrw = cs.ptrw(); + const char32_t *this_ptr = ptr(); for (int i = 0; i < size(); i++) { - char32_t c = operator[](i); + char32_t c = this_ptr[i]; if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) { - cs[i] = c; + cs_ptrw[i] = c; } else { print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c)); - cs[i] = 0x20; // ascii doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane + cs_ptrw[i] = 0x20; // ASCII doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane. } } @@ -3115,8 +3165,9 @@ Vector<uint8_t> String::md5_buffer() const { Vector<uint8_t> ret; ret.resize(16); + uint8_t *ret_ptrw = ret.ptrw(); for (int i = 0; i < 16; i++) { - ret.write[i] = hash[i]; + ret_ptrw[i] = hash[i]; } return ret; } @@ -3128,8 +3179,9 @@ Vector<uint8_t> String::sha1_buffer() const { Vector<uint8_t> ret; ret.resize(20); + uint8_t *ret_ptrw = ret.ptrw(); for (int i = 0; i < 20; i++) { - ret.write[i] = hash[i]; + ret_ptrw[i] = hash[i]; } return ret; @@ -3142,14 +3194,15 @@ Vector<uint8_t> String::sha256_buffer() const { Vector<uint8_t> ret; ret.resize(32); + uint8_t *ret_ptrw = ret.ptrw(); for (int i = 0; i < 32; i++) { - ret.write[i] = hash[i]; + ret_ptrw[i] = hash[i]; } return ret; } String String::insert(int p_at_pos, const String &p_string) const { - if (p_at_pos < 0) { + if (p_string.is_empty() || p_at_pos < 0) { return *this; } @@ -3157,17 +3210,27 @@ String String::insert(int p_at_pos, const String &p_string) const { p_at_pos = length(); } - String pre; + String ret; + ret.resize(length() + p_string.length() + 1); + char32_t *ret_ptrw = ret.ptrw(); + const char32_t *this_ptr = ptr(); + if (p_at_pos > 0) { - pre = substr(0, p_at_pos); + memcpy(ret_ptrw, this_ptr, p_at_pos * sizeof(char32_t)); + ret_ptrw += p_at_pos; } - String post; + memcpy(ret_ptrw, p_string.ptr(), p_string.length() * sizeof(char32_t)); + ret_ptrw += p_string.length(); + if (p_at_pos < length()) { - post = substr(p_at_pos, length() - p_at_pos); + memcpy(ret_ptrw, this_ptr + p_at_pos, (length() - p_at_pos) * sizeof(char32_t)); + ret_ptrw += length() - p_at_pos; } - return pre + p_string + post; + *ret_ptrw = 0; + + return ret; } String String::erase(int p_pos, int p_chars) const { @@ -3871,8 +3934,9 @@ Vector<String> String::bigrams() const { return b; } b.resize(n_pairs); + String *b_ptrw = b.ptrw(); for (int i = 0; i < n_pairs; i++) { - b.write[i] = substr(i, 2); + b_ptrw[i] = substr(i, 2); } return b; } @@ -3986,54 +4050,161 @@ String String::format(const Variant &values, const String &placeholder) const { return new_string; } -String String::replace(const String &p_key, const String &p_with) const { - String new_string; +static String _replace_common(const String &p_this, const String &p_key, const String &p_with, bool p_case_insensitive) { + if (p_key.is_empty() || p_this.is_empty()) { + return p_this; + } + + const int key_length = p_key.length(); + int search_from = 0; int result = 0; - while ((result = find(p_key, search_from)) >= 0) { - new_string += substr(search_from, result - search_from); - new_string += p_with; - search_from = result + p_key.length(); + LocalVector<int> found; + + while ((result = (p_case_insensitive ? p_this.findn(p_key, search_from) : p_this.find(p_key, search_from))) >= 0) { + found.push_back(result); + search_from = result + key_length; } - if (search_from == 0) { - return *this; + if (found.is_empty()) { + return p_this; + } + + String new_string; + + const int with_length = p_with.length(); + const int old_length = p_this.length(); + + new_string.resize(old_length + found.size() * (with_length - key_length) + 1); + + char32_t *new_ptrw = new_string.ptrw(); + const char32_t *old_ptr = p_this.ptr(); + const char32_t *with_ptr = p_with.ptr(); + + int last_pos = 0; + + for (const int &pos : found) { + if (last_pos != pos) { + memcpy(new_ptrw, old_ptr + last_pos, (pos - last_pos) * sizeof(char32_t)); + new_ptrw += (pos - last_pos); + } + if (with_length) { + memcpy(new_ptrw, with_ptr, with_length * sizeof(char32_t)); + new_ptrw += with_length; + } + last_pos = pos + key_length; + } + + if (last_pos != old_length) { + memcpy(new_ptrw, old_ptr + last_pos, (old_length - last_pos) * sizeof(char32_t)); + new_ptrw += old_length - last_pos; } - new_string += substr(search_from, length() - search_from); + *new_ptrw = 0; return new_string; } -String String::replace(const char *p_key, const char *p_with) const { - String new_string; +static String _replace_common(const String &p_this, char const *p_key, char const *p_with, bool p_case_insensitive) { + int key_length = strlen(p_key); + + if (key_length == 0 || p_this.is_empty()) { + return p_this; + } + int search_from = 0; int result = 0; - while ((result = find(p_key, search_from)) >= 0) { - new_string += substr(search_from, result - search_from); - new_string += p_with; - int k = 0; - while (p_key[k] != '\0') { - k++; + LocalVector<int> found; + + while ((result = (p_case_insensitive ? p_this.findn(p_key, search_from) : p_this.find(p_key, search_from))) >= 0) { + found.push_back(result); + search_from = result + key_length; + } + + if (found.is_empty()) { + return p_this; + } + + String new_string; + + // Create string to speed up copying as we can't do `memcopy` between `char32_t` and `char`. + const String with_string(p_with); + const int with_length = with_string.length(); + const int old_length = p_this.length(); + + new_string.resize(old_length + found.size() * (with_length - key_length) + 1); + + char32_t *new_ptrw = new_string.ptrw(); + const char32_t *old_ptr = p_this.ptr(); + const char32_t *with_ptr = with_string.ptr(); + + int last_pos = 0; + + for (const int &pos : found) { + if (last_pos != pos) { + memcpy(new_ptrw, old_ptr + last_pos, (pos - last_pos) * sizeof(char32_t)); + new_ptrw += (pos - last_pos); } - search_from = result + k; + if (with_length) { + memcpy(new_ptrw, with_ptr, with_length * sizeof(char32_t)); + new_ptrw += with_length; + } + last_pos = pos + key_length; } - if (search_from == 0) { - return *this; + if (last_pos != old_length) { + memcpy(new_ptrw, old_ptr + last_pos, (old_length - last_pos) * sizeof(char32_t)); + new_ptrw += old_length - last_pos; } - new_string += substr(search_from, length() - search_from); + *new_ptrw = 0; return new_string; } +String String::replace(const String &p_key, const String &p_with) const { + return _replace_common(*this, p_key, p_with, false); +} + +String String::replace(const char *p_key, const char *p_with) const { + return _replace_common(*this, p_key, p_with, false); +} + String String::replace_first(const String &p_key, const String &p_with) const { int pos = find(p_key); if (pos >= 0) { - return substr(0, pos) + p_with + substr(pos + p_key.length(), length()); + const int old_length = length(); + const int key_length = p_key.length(); + const int with_length = p_with.length(); + + String new_string; + new_string.resize(old_length + (with_length - key_length) + 1); + + char32_t *new_ptrw = new_string.ptrw(); + const char32_t *old_ptr = ptr(); + const char32_t *with_ptr = p_with.ptr(); + + if (pos > 0) { + memcpy(new_ptrw, old_ptr, pos * sizeof(char32_t)); + new_ptrw += pos; + } + + if (with_length) { + memcpy(new_ptrw, with_ptr, with_length * sizeof(char32_t)); + new_ptrw += with_length; + } + pos += key_length; + + if (pos != old_length) { + memcpy(new_ptrw, old_ptr + pos, (old_length - pos) * sizeof(char32_t)); + new_ptrw += (old_length - pos); + } + + *new_ptrw = 0; + + return new_string; } return *this; @@ -4042,55 +4213,45 @@ String String::replace_first(const String &p_key, const String &p_with) const { String String::replace_first(const char *p_key, const char *p_with) const { int pos = find(p_key); if (pos >= 0) { - int substring_length = strlen(p_key); - return substr(0, pos) + p_with + substr(pos + substring_length, length()); - } - - return *this; -} + const int old_length = length(); + const int key_length = strlen(p_key); + const int with_length = strlen(p_with); -String String::replacen(const String &p_key, const String &p_with) const { - String new_string; - int search_from = 0; - int result = 0; + String new_string; + new_string.resize(old_length + (with_length - key_length) + 1); - while ((result = findn(p_key, search_from)) >= 0) { - new_string += substr(search_from, result - search_from); - new_string += p_with; - search_from = result + p_key.length(); - } + char32_t *new_ptrw = new_string.ptrw(); + const char32_t *old_ptr = ptr(); - if (search_from == 0) { - return *this; - } + if (pos > 0) { + memcpy(new_ptrw, old_ptr, pos * sizeof(char32_t)); + new_ptrw += pos; + } - new_string += substr(search_from, length() - search_from); - return new_string; -} + for (int i = 0; i < with_length; ++i) { + *new_ptrw++ = p_with[i]; + } + pos += key_length; -String String::replacen(const char *p_key, const char *p_with) const { - String new_string; - int search_from = 0; - int result = 0; - int substring_length = strlen(p_key); + if (pos != old_length) { + memcpy(new_ptrw, old_ptr + pos, (old_length - pos) * sizeof(char32_t)); + new_ptrw += (old_length - pos); + } - if (substring_length == 0) { - return *this; // there's nothing to match or substitute - } + *new_ptrw = 0; - while ((result = findn(p_key, search_from)) >= 0) { - new_string += substr(search_from, result - search_from); - new_string += p_with; - search_from = result + substring_length; + return new_string; } - if (search_from == 0) { - return *this; - } + return *this; +} - new_string += substr(search_from, length() - search_from); +String String::replacen(const String &p_key, const String &p_with) const { + return _replace_common(*this, p_key, p_with, true); +} - return new_string; +String String::replacen(const char *p_key, const char *p_with) const { + return _replace_common(*this, p_key, p_with, true); } String String::repeat(int p_count) const { @@ -4384,10 +4545,7 @@ String String::simplify_path() const { dirs.remove_at(i); i--; } else if (d == "..") { - if (i == 0) { - dirs.remove_at(i); - i--; - } else { + if (i != 0) { dirs.remove_at(i); dirs.remove_at(i - 1); i -= 2; @@ -4466,7 +4624,7 @@ bool String::is_absolute_path() const { } } -String String::validate_identifier() const { +String String::validate_ascii_identifier() const { if (is_empty()) { return "_"; // Empty string is not a valid identifier; } @@ -4489,7 +4647,7 @@ String String::validate_identifier() const { return result; } -bool String::is_valid_identifier() const { +bool String::is_valid_ascii_identifier() const { int len = length(); if (len == 0) { @@ -4511,6 +4669,26 @@ bool String::is_valid_identifier() const { return true; } +bool String::is_valid_unicode_identifier() const { + const char32_t *str = ptr(); + int len = length(); + + if (len == 0) { + return false; // Empty string. + } + + if (!is_unicode_identifier_start(str[0])) { + return false; + } + + for (int i = 1; i < len; i++) { + if (!is_unicode_identifier_continue(str[i])) { + return false; + } + } + return true; +} + bool String::is_valid_string() const { int l = length(); const char32_t *src = get_data(); @@ -4757,8 +4935,9 @@ String String::xml_unescape() const { return String(); } str.resize(len + 1); - _xml_unescape(get_data(), l, str.ptrw()); - str[len] = 0; + char32_t *str_ptrw = str.ptrw(); + _xml_unescape(get_data(), l, str_ptrw); + str_ptrw[len] = 0; return str; } @@ -5321,6 +5500,11 @@ String String::lpad(int min_length, const String &character) const { // "fish %s %d pie" % ["frog", 12] // In case of an error, the string returned is the error description and "error" is true. String String::sprintf(const Array &values, bool *error) const { + static const String ZERO("0"); + static const String SPACE(" "); + static const String MINUS("-"); + static const String PLUS("+"); + String formatted; char32_t *self = (char32_t *)get_data(); bool in_format = false; @@ -5343,7 +5527,7 @@ String String::sprintf(const Array &values, bool *error) const { if (in_format) { // We have % - let's see what else we get. switch (c) { case '%': { // Replace %% with % - formatted += chr(c); + formatted += c; in_format = false; break; } @@ -5393,7 +5577,7 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. int pad_chars_count = (negative || show_sign) ? min_chars - 1 : min_chars; - String pad_char = pad_with_zeros ? String("0") : String(" "); + const String &pad_char = pad_with_zeros ? ZERO : SPACE; if (left_justified) { str = str.rpad(pad_chars_count, pad_char); } else { @@ -5402,7 +5586,7 @@ String String::sprintf(const Array &values, bool *error) const { // Sign. if (show_sign || negative) { - String sign_char = negative ? "-" : "+"; + const String &sign_char = negative ? MINUS : PLUS; if (left_justified) { str = str.insert(0, sign_char); } else { @@ -5439,7 +5623,7 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. Leave room for sign later if required. int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars; - String pad_char = (pad_with_zeros && is_finite) ? String("0") : String(" "); // Never pad NaN or inf with zeros + const String &pad_char = (pad_with_zeros && is_finite) ? ZERO : SPACE; // Never pad NaN or inf with zeros if (left_justified) { str = str.rpad(pad_chars_count, pad_char); } else { @@ -5448,7 +5632,7 @@ String String::sprintf(const Array &values, bool *error) const { // Add sign if needed. if (show_sign || is_negative) { - String sign_char = is_negative ? "-" : "+"; + const String &sign_char = is_negative ? MINUS : PLUS; if (left_justified) { str = str.insert(0, sign_char); } else { @@ -5501,7 +5685,7 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. Leave room for sign later if required. int pad_chars_count = val < 0 ? min_chars - 1 : min_chars; - String pad_char = (pad_with_zeros && is_finite) ? String("0") : String(" "); // Never pad NaN or inf with zeros + const String &pad_char = (pad_with_zeros && is_finite) ? ZERO : SPACE; // Never pad NaN or inf with zeros if (left_justified) { number_str = number_str.rpad(pad_chars_count, pad_char); } else { @@ -5511,9 +5695,9 @@ String String::sprintf(const Array &values, bool *error) const { // Add sign if needed. if (val < 0) { if (left_justified) { - number_str = number_str.insert(0, "-"); + number_str = number_str.insert(0, MINUS); } else { - number_str = number_str.insert(pad_with_zeros ? 0 : number_str.length() - initial_len, "-"); + number_str = number_str.insert(pad_with_zeros ? 0 : number_str.length() - initial_len, MINUS); } } @@ -5678,7 +5862,7 @@ String String::sprintf(const Array &values, bool *error) const { in_decimals = false; break; default: - formatted += chr(c); + formatted += c; } } } |