summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--modules/gdscript/editor/gdscript_highlighter.cpp43
-rw-r--r--modules/gdscript/editor/gdscript_highlighter.h1
-rw-r--r--modules/gdscript/gdscript_editor.cpp1
-rw-r--r--modules/gdscript/gdscript_tokenizer.cpp278
-rw-r--r--modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.gd2
-rw-r--r--modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.out2
-rw-r--r--modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.gd2
-rw-r--r--modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.out2
-rw-r--r--modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.gd3
-rw-r--r--modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.out2
-rw-r--r--modules/gdscript/tests/scripts/parser/features/r_strings.gd22
-rw-r--r--modules/gdscript/tests/scripts/parser/features/r_strings.out22
-rw-r--r--modules/regex/doc_classes/RegEx.xml2
13 files changed, 250 insertions, 132 deletions
diff --git a/modules/gdscript/editor/gdscript_highlighter.cpp b/modules/gdscript/editor/gdscript_highlighter.cpp
index e488d6e266..1be690d894 100644
--- a/modules/gdscript/editor/gdscript_highlighter.cpp
+++ b/modules/gdscript/editor/gdscript_highlighter.cpp
@@ -52,6 +52,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
bool in_keyword = false;
bool in_word = false;
bool in_number = false;
+ bool in_raw_string = false;
bool in_node_path = false;
bool in_node_ref = false;
bool in_annotation = false;
@@ -234,15 +235,33 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
}
if (str[from] == '\\') {
- Dictionary escape_char_highlighter_info;
- escape_char_highlighter_info["color"] = symbol_color;
- color_map[from] = escape_char_highlighter_info;
+ if (!in_raw_string) {
+ Dictionary escape_char_highlighter_info;
+ escape_char_highlighter_info["color"] = symbol_color;
+ color_map[from] = escape_char_highlighter_info;
+ }
from++;
- Dictionary region_continue_highlighter_info;
- region_continue_highlighter_info["color"] = region_color;
- color_map[from + 1] = region_continue_highlighter_info;
+ if (!in_raw_string) {
+ int esc_len = 0;
+ if (str[from] == 'u') {
+ esc_len = 4;
+ } else if (str[from] == 'U') {
+ esc_len = 6;
+ }
+ for (int k = 0; k < esc_len && from < line_length - 1; k++) {
+ if (!is_hex_digit(str[from + 1])) {
+ break;
+ }
+ from++;
+ }
+
+ Dictionary region_continue_highlighter_info;
+ region_continue_highlighter_info["color"] = region_color;
+ color_map[from + 1] = region_continue_highlighter_info;
+ }
+
continue;
}
@@ -489,6 +508,12 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
in_member_variable = false;
}
+ if (!in_raw_string && in_region == -1 && str[j] == 'r' && j < line_length - 1 && (str[j + 1] == '"' || str[j + 1] == '\'')) {
+ in_raw_string = true;
+ } else if (in_raw_string && in_region == -1) {
+ in_raw_string = false;
+ }
+
// Keep symbol color for binary '&&'. In the case of '&&&' use StringName color for the last ampersand.
if (!in_string_name && in_region == -1 && str[j] == '&' && !is_binary_op) {
if (j >= 2 && str[j - 1] == '&' && str[j - 2] != '&' && prev_is_binary_op) {
@@ -520,7 +545,9 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
in_annotation = false;
}
- if (in_node_ref) {
+ if (in_raw_string) {
+ color = string_color;
+ } else if (in_node_ref) {
next_type = NODE_REF;
color = node_ref_color;
} else if (in_annotation) {
@@ -692,7 +719,7 @@ void GDScriptSyntaxHighlighter::_update_cache() {
}
/* Strings */
- const Color string_color = EDITOR_GET("text_editor/theme/highlighting/string_color");
+ string_color = EDITOR_GET("text_editor/theme/highlighting/string_color");
List<String> strings;
gdscript->get_string_delimiters(&strings);
for (const String &string : strings) {
diff --git a/modules/gdscript/editor/gdscript_highlighter.h b/modules/gdscript/editor/gdscript_highlighter.h
index fe3b63d713..090857f397 100644
--- a/modules/gdscript/editor/gdscript_highlighter.h
+++ b/modules/gdscript/editor/gdscript_highlighter.h
@@ -78,6 +78,7 @@ private:
Color built_in_type_color;
Color number_color;
Color member_color;
+ Color string_color;
Color node_path_color;
Color node_ref_color;
Color annotation_color;
diff --git a/modules/gdscript/gdscript_editor.cpp b/modules/gdscript/gdscript_editor.cpp
index 6cad3b2b90..849a4b5cd0 100644
--- a/modules/gdscript/gdscript_editor.cpp
+++ b/modules/gdscript/gdscript_editor.cpp
@@ -59,6 +59,7 @@ void GDScriptLanguage::get_string_delimiters(List<String> *p_delimiters) const {
p_delimiters->push_back("' '");
p_delimiters->push_back("\"\"\" \"\"\"");
p_delimiters->push_back("''' '''");
+ // NOTE: StringName, NodePath and r-strings are not listed here.
}
bool GDScriptLanguage::is_using_templates() {
diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp
index 42b983ef45..d5c02eeae1 100644
--- a/modules/gdscript/gdscript_tokenizer.cpp
+++ b/modules/gdscript/gdscript_tokenizer.cpp
@@ -857,10 +857,14 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
STRING_NODEPATH,
};
+ bool is_raw = false;
bool is_multiline = false;
StringType type = STRING_REGULAR;
- if (_peek(-1) == '&') {
+ if (_peek(-1) == 'r') {
+ is_raw = true;
+ _advance();
+ } else if (_peek(-1) == '&') {
type = STRING_NAME;
_advance();
} else if (_peek(-1) == '^') {
@@ -890,7 +894,12 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
char32_t ch = _peek();
if (ch == 0x200E || ch == 0x200F || (ch >= 0x202A && ch <= 0x202E) || (ch >= 0x2066 && ch <= 0x2069)) {
- Token error = make_error("Invisible text direction control character present in the string, escape it (\"\\u" + String::num_int64(ch, 16) + "\") to avoid confusion.");
+ Token error;
+ if (is_raw) {
+ error = make_error("Invisible text direction control character present in the string, use regular string literal instead of r-string.");
+ } else {
+ error = make_error("Invisible text direction control character present in the string, escape it (\"\\u" + String::num_int64(ch, 16) + "\") to avoid confusion.");
+ }
error.start_column = column;
error.leftmost_column = error.start_column;
error.end_column = column + 1;
@@ -905,144 +914,164 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
return make_error("Unterminated string.");
}
- // Grab escape character.
- char32_t code = _peek();
- _advance();
- if (_is_at_end()) {
- return make_error("Unterminated string.");
- }
+ if (is_raw) {
+ if (_peek() == quote_char) {
+ _advance();
+ if (_is_at_end()) {
+ return make_error("Unterminated string.");
+ }
+ result += '\\';
+ result += quote_char;
+ } else if (_peek() == '\\') { // For `\\\"`.
+ _advance();
+ if (_is_at_end()) {
+ return make_error("Unterminated string.");
+ }
+ result += '\\';
+ result += '\\';
+ } else {
+ result += '\\';
+ }
+ } else {
+ // Grab escape character.
+ char32_t code = _peek();
+ _advance();
+ if (_is_at_end()) {
+ return make_error("Unterminated string.");
+ }
- char32_t escaped = 0;
- bool valid_escape = true;
+ char32_t escaped = 0;
+ bool valid_escape = true;
- switch (code) {
- case 'a':
- escaped = '\a';
- break;
- case 'b':
- escaped = '\b';
- break;
- case 'f':
- escaped = '\f';
- break;
- case 'n':
- escaped = '\n';
- break;
- case 'r':
- escaped = '\r';
- break;
- case 't':
- escaped = '\t';
- break;
- case 'v':
- escaped = '\v';
- break;
- case '\'':
- escaped = '\'';
- break;
- case '\"':
- escaped = '\"';
- break;
- case '\\':
- escaped = '\\';
- break;
- case 'U':
- case 'u': {
- // Hexadecimal sequence.
- int hex_len = (code == 'U') ? 6 : 4;
- for (int j = 0; j < hex_len; j++) {
- if (_is_at_end()) {
- return make_error("Unterminated string.");
+ switch (code) {
+ case 'a':
+ escaped = '\a';
+ break;
+ case 'b':
+ escaped = '\b';
+ break;
+ case 'f':
+ escaped = '\f';
+ break;
+ case 'n':
+ escaped = '\n';
+ break;
+ case 'r':
+ escaped = '\r';
+ break;
+ case 't':
+ escaped = '\t';
+ break;
+ case 'v':
+ escaped = '\v';
+ break;
+ case '\'':
+ escaped = '\'';
+ break;
+ case '\"':
+ escaped = '\"';
+ break;
+ case '\\':
+ escaped = '\\';
+ break;
+ case 'U':
+ case 'u': {
+ // Hexadecimal sequence.
+ int hex_len = (code == 'U') ? 6 : 4;
+ for (int j = 0; j < hex_len; j++) {
+ if (_is_at_end()) {
+ return make_error("Unterminated string.");
+ }
+
+ char32_t digit = _peek();
+ char32_t value = 0;
+ if (is_digit(digit)) {
+ value = digit - '0';
+ } else if (digit >= 'a' && digit <= 'f') {
+ value = digit - 'a';
+ value += 10;
+ } else if (digit >= 'A' && digit <= 'F') {
+ value = digit - 'A';
+ value += 10;
+ } else {
+ // Make error, but keep parsing the string.
+ Token error = make_error("Invalid hexadecimal digit in unicode escape sequence.");
+ error.start_column = column;
+ error.leftmost_column = error.start_column;
+ error.end_column = column + 1;
+ error.rightmost_column = error.end_column;
+ push_error(error);
+ valid_escape = false;
+ break;
+ }
+
+ escaped <<= 4;
+ escaped |= value;
+
+ _advance();
}
-
- char32_t digit = _peek();
- char32_t value = 0;
- if (is_digit(digit)) {
- value = digit - '0';
- } else if (digit >= 'a' && digit <= 'f') {
- value = digit - 'a';
- value += 10;
- } else if (digit >= 'A' && digit <= 'F') {
- value = digit - 'A';
- value += 10;
- } else {
- // Make error, but keep parsing the string.
- Token error = make_error("Invalid hexadecimal digit in unicode escape sequence.");
- error.start_column = column;
- error.leftmost_column = error.start_column;
- error.end_column = column + 1;
- error.rightmost_column = error.end_column;
- push_error(error);
- valid_escape = false;
+ } break;
+ case '\r':
+ if (_peek() != '\n') {
+ // Carriage return without newline in string. (???)
+ // Just add it to the string and keep going.
+ result += ch;
+ _advance();
break;
}
-
- escaped <<= 4;
- escaped |= value;
-
- _advance();
- }
- } break;
- case '\r':
- if (_peek() != '\n') {
- // Carriage return without newline in string. (???)
- // Just add it to the string and keep going.
- result += ch;
- _advance();
+ [[fallthrough]];
+ case '\n':
+ // Escaping newline.
+ newline(false);
+ valid_escape = false; // Don't add to the string.
break;
- }
- [[fallthrough]];
- case '\n':
- // Escaping newline.
- newline(false);
- valid_escape = false; // Don't add to the string.
- break;
- default:
- Token error = make_error("Invalid escape in string.");
- error.start_column = column - 2;
- error.leftmost_column = error.start_column;
- push_error(error);
- valid_escape = false;
- break;
- }
- // Parse UTF-16 pair.
- if (valid_escape) {
- if ((escaped & 0xfffffc00) == 0xd800) {
- if (prev == 0) {
- prev = escaped;
- prev_pos = column - 2;
- continue;
- } else {
- Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ default:
+ Token error = make_error("Invalid escape in string.");
error.start_column = column - 2;
error.leftmost_column = error.start_column;
push_error(error);
valid_escape = false;
- prev = 0;
+ break;
+ }
+ // Parse UTF-16 pair.
+ if (valid_escape) {
+ if ((escaped & 0xfffffc00) == 0xd800) {
+ if (prev == 0) {
+ prev = escaped;
+ prev_pos = column - 2;
+ continue;
+ } else {
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate.");
+ error.start_column = column - 2;
+ error.leftmost_column = error.start_column;
+ push_error(error);
+ valid_escape = false;
+ prev = 0;
+ }
+ } else if ((escaped & 0xfffffc00) == 0xdc00) {
+ if (prev == 0) {
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate.");
+ error.start_column = column - 2;
+ error.leftmost_column = error.start_column;
+ push_error(error);
+ valid_escape = false;
+ } else {
+ escaped = (prev << 10UL) + escaped - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+ prev = 0;
+ }
}
- } else if ((escaped & 0xfffffc00) == 0xdc00) {
- if (prev == 0) {
- Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate");
- error.start_column = column - 2;
+ if (prev != 0) {
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate.");
+ error.start_column = prev_pos;
error.leftmost_column = error.start_column;
push_error(error);
- valid_escape = false;
- } else {
- escaped = (prev << 10UL) + escaped - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
prev = 0;
}
}
- if (prev != 0) {
- Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
- error.start_column = prev_pos;
- error.leftmost_column = error.start_column;
- push_error(error);
- prev = 0;
- }
- }
- if (valid_escape) {
- result += escaped;
+ if (valid_escape) {
+ result += escaped;
+ }
}
} else if (ch == quote_char) {
if (prev != 0) {
@@ -1416,6 +1445,9 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
if (is_digit(c)) {
return number();
+ } else if (c == 'r' && (_peek() == '"' || _peek() == '\'')) {
+ // Raw string literals.
+ return string();
} else if (is_unicode_identifier_start(c)) {
return potential_identifier();
}
diff --git a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.gd b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.gd
new file mode 100644
index 0000000000..e5eecbb819
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.gd
@@ -0,0 +1,2 @@
+func test():
+ print(r"\")
diff --git a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.out b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.out
new file mode 100644
index 0000000000..c8e843b0d7
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.out
@@ -0,0 +1,2 @@
+GDTEST_PARSER_ERROR
+Unterminated string.
diff --git a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.gd b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.gd
new file mode 100644
index 0000000000..9168b69f86
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.gd
@@ -0,0 +1,2 @@
+func test():
+ print(r"\\"")
diff --git a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.out b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.out
new file mode 100644
index 0000000000..c8e843b0d7
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.out
@@ -0,0 +1,2 @@
+GDTEST_PARSER_ERROR
+Unterminated string.
diff --git a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.gd b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.gd
new file mode 100644
index 0000000000..37dc910e5f
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.gd
@@ -0,0 +1,3 @@
+func test():
+ # v
+ print(r"['"]*")
diff --git a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.out b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.out
new file mode 100644
index 0000000000..dcb5c2f289
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.out
@@ -0,0 +1,2 @@
+GDTEST_PARSER_ERROR
+Closing "]" doesn't have an opening counterpart.
diff --git a/modules/gdscript/tests/scripts/parser/features/r_strings.gd b/modules/gdscript/tests/scripts/parser/features/r_strings.gd
new file mode 100644
index 0000000000..6f546f28be
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/features/r_strings.gd
@@ -0,0 +1,22 @@
+func test():
+ print(r"test ' \' \" \\ \n \t \u2023 test")
+ print(r"\n\\[\t ]*(\w+)")
+ print(r"")
+ print(r"\"")
+ print(r"\\\"")
+ print(r"\\")
+ print(r"\" \\\" \\\\\"")
+ print(r"\ \\ \\\ \\\\ \\\\\ \\")
+ print(r'"')
+ print(r'"(?:\\.|[^"])*"')
+ print(r"""""")
+ print(r"""test \t "test"="" " \" \\\" \ \\ \\\ test""")
+ print(r'''r"""test \t "test"="" " \" \\\" \ \\ \\\ test"""''')
+ print(r"\t
+ \t")
+ print(r"\t \
+ \t")
+ print(r"""\t
+ \t""")
+ print(r"""\t \
+ \t""")
diff --git a/modules/gdscript/tests/scripts/parser/features/r_strings.out b/modules/gdscript/tests/scripts/parser/features/r_strings.out
new file mode 100644
index 0000000000..114ef0a6c3
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/features/r_strings.out
@@ -0,0 +1,22 @@
+GDTEST_OK
+test ' \' \" \\ \n \t \u2023 test
+\n\\[\t ]*(\w+)
+
+\"
+\\\"
+\\
+\" \\\" \\\\\"
+\ \\ \\\ \\\\ \\\\\ \\
+"
+"(?:\\.|[^"])*"
+
+test \t "test"="" " \" \\\" \ \\ \\\ test
+r"""test \t "test"="" " \" \\\" \ \\ \\\ test"""
+\t
+ \t
+\t \
+ \t
+\t
+ \t
+\t \
+ \t
diff --git a/modules/regex/doc_classes/RegEx.xml b/modules/regex/doc_classes/RegEx.xml
index 5770e7155e..ab74fce3a9 100644
--- a/modules/regex/doc_classes/RegEx.xml
+++ b/modules/regex/doc_classes/RegEx.xml
@@ -10,7 +10,7 @@
var regex = RegEx.new()
regex.compile("\\w-(\\d+)")
[/codeblock]
- The search pattern must be escaped first for GDScript before it is escaped for the expression. For example, [code]compile("\\d+")[/code] would be read by RegEx as [code]\d+[/code]. Similarly, [code]compile("\"(?:\\\\.|[^\"])*\"")[/code] would be read as [code]"(?:\\.|[^"])*"[/code].
+ The search pattern must be escaped first for GDScript before it is escaped for the expression. For example, [code]compile("\\d+")[/code] would be read by RegEx as [code]\d+[/code]. Similarly, [code]compile("\"(?:\\\\.|[^\"])*\"")[/code] would be read as [code]"(?:\\.|[^"])*"[/code]. In GDScript, you can also use raw string literals (r-strings). For example, [code]compile(r'"(?:\\.|[^"])*"')[/code] would be read the same.
Using [method search], you can find the pattern within the given text. If a pattern is found, [RegExMatch] is returned and you can retrieve details of the results using methods such as [method RegExMatch.get_string] and [method RegExMatch.get_start].
[codeblock]
var regex = RegEx.new()