summaryrefslogtreecommitdiffstats
path: root/core/string/ustring.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/string/ustring.cpp')
-rw-r--r--core/string/ustring.cpp169
1 files changed, 110 insertions, 59 deletions
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index c276f20f99..9be7c04158 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -62,6 +62,7 @@ static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
const char CharString::_null = 0;
const char16_t Char16String::_null = 0;
const char32_t String::_null = 0;
+const char32_t String::_replacement_char = 0xfffd;
bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
const String &s = p_s;
@@ -307,7 +308,7 @@ void String::copy_from(const char *p_cstr) {
uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
if (c == 0 && i < len) {
print_unicode_error("NUL character", true);
- dst[i] = 0x20;
+ dst[i] = _replacement_char;
} else {
dst[i] = c;
}
@@ -340,7 +341,7 @@ void String::copy_from(const char *p_cstr, const int p_clip_to) {
uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
if (c == 0) {
print_unicode_error("NUL character", true);
- dst[i] = 0x20;
+ dst[i] = _replacement_char;
} else {
dst[i] = c;
}
@@ -373,17 +374,21 @@ void String::copy_from(const char32_t &p_char) {
print_unicode_error("NUL character", true);
return;
}
+
+ resize(2);
+
+ char32_t *dst = ptrw();
+
if ((p_char & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
- }
- if (p_char > 0x10ffff) {
+ dst[0] = _replacement_char;
+ } else if (p_char > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
+ dst[0] = _replacement_char;
+ } else {
+ dst[0] = p_char;
}
- resize(2);
-
- char32_t *dst = ptrw();
- dst[0] = p_char;
dst[1] = 0;
}
@@ -439,14 +444,18 @@ void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
for (int i = 0; i < p_length; i++) {
if (p_char[i] == 0) {
print_unicode_error("NUL character", true);
- dst[i] = 0x20;
+ dst[i] = _replacement_char;
continue;
}
if ((p_char[i] & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i]));
+ dst[i] = _replacement_char;
+ continue;
}
if (p_char[i] > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i]));
+ dst[i] = _replacement_char;
+ continue;
}
dst[i] = p_char[i];
}
@@ -538,7 +547,7 @@ String &String::operator+=(const char *p_str) {
uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]);
if (c == 0 && i < rhs_len) {
print_unicode_error("NUL character", true);
- dst[i] = 0x20;
+ dst[i] = _replacement_char;
} else {
dst[i] = c;
}
@@ -568,17 +577,21 @@ String &String::operator+=(char32_t p_char) {
print_unicode_error("NUL character", true);
return *this;
}
+
+ const int lhs_len = length();
+ resize(lhs_len + 2);
+ char32_t *dst = ptrw();
+
if ((p_char & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
- }
- if (p_char > 0x10ffff) {
+ dst[lhs_len] = _replacement_char;
+ } else if (p_char > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
+ dst[lhs_len] = _replacement_char;
+ } else {
+ dst[lhs_len] = p_char;
}
- const int lhs_len = length();
- resize(lhs_len + 2);
- char32_t *dst = ptrw();
- dst[lhs_len] = p_char;
dst[lhs_len + 1] = 0;
return *this;
@@ -1480,9 +1493,9 @@ String String::num(double p_num, int p_decimals) {
if (p_decimals < 0) {
p_decimals = 14;
- const double abs_num = ABS(p_num);
+ const double abs_num = Math::abs(p_num);
if (abs_num > 10) {
- // We want to align the digits to the above sane default, so we only
+ // We want to align the digits to the above reasonable default, so we only
// need to subtract log10 for numbers with a positive power of ten.
p_decimals -= (int)floor(log10(abs_num));
}
@@ -1737,7 +1750,7 @@ Vector<uint8_t> String::hex_decode() const {
void String::print_unicode_error(const String &p_message, bool p_critical) const {
if (p_critical) {
- print_error(vformat("Unicode parsing error, some characters were replaced with spaces: %s", p_message));
+ print_error(vformat(U"Unicode parsing error, some characters were replaced with � (U+FFFD): %s", p_message));
} else {
print_error(vformat("Unicode parsing error: %s", p_message));
}
@@ -1757,7 +1770,7 @@ CharString String::ascii(bool p_allow_extended) const {
cs[i] = c;
} else {
print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
- cs[i] = 0x20;
+ cs[i] = 0x20; // ascii doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane
}
}
@@ -1897,13 +1910,13 @@ Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
unichar = (0xff >> 7) & c;
skip = 5;
} else {
- *(dst++) = 0x20;
+ *(dst++) = _replacement_char;
unichar = 0;
skip = 0;
}
} else {
if (c < 0x80 || c > 0xbf) {
- *(dst++) = 0x20;
+ *(dst++) = _replacement_char;
skip = 0;
} else {
unichar = (unichar << 6) | (c & 0x3f);
@@ -1912,15 +1925,15 @@ Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
if (unichar == 0) {
print_unicode_error("NUL character", true);
decode_failed = true;
- unichar = 0x20;
- }
- if ((unichar & 0xfffff800) == 0xd800) {
- print_unicode_error(vformat("Unpaired surrogate (%x)", unichar));
- decode_error = true;
- }
- if (unichar > 0x10ffff) {
- print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar));
- decode_error = true;
+ unichar = _replacement_char;
+ } else if ((unichar & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", unichar), true);
+ decode_failed = true;
+ unichar = _replacement_char;
+ } else if (unichar > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar), true);
+ decode_failed = true;
+ unichar = _replacement_char;
}
*(dst++) = unichar;
}
@@ -2014,7 +2027,11 @@ CharString String::utf8() const {
APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
} else {
- APPEND_CHAR(0x20);
+ // the string is a valid UTF32, so it should never happen ...
+ print_unicode_error(vformat("Non scalar value (%x)", c), true);
+ APPEND_CHAR(uint32_t(0xe0 | ((_replacement_char >> 12) & 0x0f))); // Top 4 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((_replacement_char >> 6) & 0x3f))); // Middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | (_replacement_char & 0x3f))); // Bottom 6 bits.
}
}
#undef APPEND_CHAR
@@ -2187,7 +2204,9 @@ Char16String String::utf16() const {
APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
} else {
- APPEND_CHAR(0x20);
+ // the string is a valid UTF32, so it should never happen ...
+ APPEND_CHAR(uint32_t((_replacement_char >> 10) + 0xd7c0));
+ APPEND_CHAR(uint32_t((_replacement_char & 0x3ff) | 0xdc00));
}
}
#undef APPEND_CHAR
@@ -2744,12 +2763,13 @@ double String::to_float() const {
}
uint32_t String::hash(const char *p_cstr) {
+ // static_cast: avoid negative values on platforms where char is signed.
uint32_t hashv = 5381;
- uint32_t c = *p_cstr++;
+ uint32_t c = static_cast<uint8_t>(*p_cstr++);
while (c) {
hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
- c = *p_cstr++;
+ c = static_cast<uint8_t>(*p_cstr++);
}
return hashv;
@@ -2758,28 +2778,35 @@ uint32_t String::hash(const char *p_cstr) {
uint32_t String::hash(const char *p_cstr, int p_len) {
uint32_t hashv = 5381;
for (int i = 0; i < p_len; i++) {
- hashv = ((hashv << 5) + hashv) + p_cstr[i]; /* hash * 33 + c */
+ // static_cast: avoid negative values on platforms where char is signed.
+ hashv = ((hashv << 5) + hashv) + static_cast<uint8_t>(p_cstr[i]); /* hash * 33 + c */
}
return hashv;
}
uint32_t String::hash(const wchar_t *p_cstr, int p_len) {
+ // Avoid negative values on platforms where wchar_t is signed. Account for different sizes.
+ using wide_unsigned = std::conditional<sizeof(wchar_t) == 2, uint16_t, uint32_t>::type;
+
uint32_t hashv = 5381;
for (int i = 0; i < p_len; i++) {
- hashv = ((hashv << 5) + hashv) + p_cstr[i]; /* hash * 33 + c */
+ hashv = ((hashv << 5) + hashv) + static_cast<wide_unsigned>(p_cstr[i]); /* hash * 33 + c */
}
return hashv;
}
uint32_t String::hash(const wchar_t *p_cstr) {
+ // Avoid negative values on platforms where wchar_t is signed. Account for different sizes.
+ using wide_unsigned = std::conditional<sizeof(wchar_t) == 2, uint16_t, uint32_t>::type;
+
uint32_t hashv = 5381;
- uint32_t c = *p_cstr++;
+ uint32_t c = static_cast<wide_unsigned>(*p_cstr++);
while (c) {
hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
- c = *p_cstr++;
+ c = static_cast<wide_unsigned>(*p_cstr++);
}
return hashv;
@@ -3616,6 +3643,23 @@ String String::repeat(int p_count) const {
return new_string;
}
+String String::reverse() const {
+ int len = length();
+ if (len <= 1) {
+ return *this;
+ }
+ String new_string;
+ new_string.resize(len + 1);
+
+ const char32_t *src = ptr();
+ char32_t *dst = new_string.ptrw();
+ for (int i = 0; i < len; i++) {
+ dst[i] = src[len - i - 1];
+ }
+ dst[len] = _null;
+ return new_string;
+}
+
String String::left(int p_len) const {
if (p_len < 0) {
p_len = length() + p_len;
@@ -3629,7 +3673,9 @@ String String::left(int p_len) const {
return *this;
}
- return substr(0, p_len);
+ String s;
+ s.copy_from_unchecked(&get_data()[0], p_len);
+ return s;
}
String String::right(int p_len) const {
@@ -3645,7 +3691,9 @@ String String::right(int p_len) const {
return *this;
}
- return substr(length() - p_len);
+ String s;
+ s.copy_from_unchecked(&get_data()[length() - p_len], p_len);
+ return s;
}
char32_t String::unicode_at(int p_idx) const {
@@ -3926,24 +3974,22 @@ bool String::is_absolute_path() const {
}
}
-static _FORCE_INLINE_ bool _is_valid_identifier_bit(int p_index, char32_t p_char) {
- if (p_index == 0 && is_digit(p_char)) {
- return false; // No start with number plz.
- }
- return is_ascii_identifier_char(p_char);
-}
-
String String::validate_identifier() const {
if (is_empty()) {
return "_"; // Empty string is not a valid identifier;
}
- String result = *this;
+ String result;
+ if (is_digit(operator[](0))) {
+ result = "_" + *this;
+ } else {
+ result = *this;
+ }
+
int len = result.length();
char32_t *buffer = result.ptrw();
-
for (int i = 0; i < len; i++) {
- if (!_is_valid_identifier_bit(i, buffer[i])) {
+ if (!is_ascii_identifier_char(buffer[i])) {
buffer[i] = '_';
}
}
@@ -3958,10 +4004,14 @@ bool String::is_valid_identifier() const {
return false;
}
+ if (is_digit(operator[](0))) {
+ return false;
+ }
+
const char32_t *str = &operator[](0);
for (int i = 0; i < len; i++) {
- if (!_is_valid_identifier_bit(i, str[i])) {
+ if (!is_ascii_identifier_char(str[i])) {
return false;
}
}
@@ -4262,12 +4312,13 @@ String String::pad_zeros(int p_digits) const {
begin++;
}
- if (begin >= end) {
+ int zeros_to_add = p_digits - (end - begin);
+
+ if (zeros_to_add <= 0) {
return s;
+ } else {
+ return s.insert(begin, String("0").repeat(zeros_to_add));
}
-
- int zeros_to_add = p_digits - (end - begin);
- return s.insert(begin, String("0").repeat(zeros_to_add));
}
String String::trim_prefix(const String &p_prefix) const {
@@ -4849,8 +4900,8 @@ String String::sprintf(const Array &values, bool *error) const {
}
double value = values[value_index];
- bool is_negative = (value < 0);
- String str = String::num(ABS(value), min_decimals);
+ bool is_negative = signbit(value);
+ String str = String::num(Math::abs(value), min_decimals);
const bool is_finite = Math::is_finite(value);
// Pad decimals out.
@@ -4912,7 +4963,7 @@ String String::sprintf(const Array &values, bool *error) const {
String str = "(";
for (int i = 0; i < count; i++) {
double val = vec[i];
- String number_str = String::num(ABS(val), min_decimals);
+ String number_str = String::num(Math::abs(val), min_decimals);
const bool is_finite = Math::is_finite(val);
// Pad decimals out.