diff options
Diffstat (limited to 'modules/regex')
-rw-r--r-- | modules/regex/SCsub | 4 | ||||
-rw-r--r-- | modules/regex/icons/RegEx.svg | 2 | ||||
-rw-r--r-- | modules/regex/icons/RegExMatch.svg | 2 | ||||
-rw-r--r-- | modules/regex/regex.cpp | 16 | ||||
-rw-r--r-- | modules/regex/tests/test_regex.h | 193 |
5 files changed, 206 insertions, 11 deletions
diff --git a/modules/regex/SCsub b/modules/regex/SCsub index 6fd7cd47f3..f5e2dd5dfc 100644 --- a/modules/regex/SCsub +++ b/modules/regex/SCsub @@ -19,6 +19,7 @@ if env["builtin_pcre2"]: thirdparty_sources = [ "pcre2_auto_possess.c", "pcre2_chartables.c", + "pcre2_chkdint.c", "pcre2_compile.c", "pcre2_config.c", "pcre2_context.c", @@ -28,7 +29,7 @@ if env["builtin_pcre2"]: "pcre2_extuni.c", "pcre2_find_bracket.c", "pcre2_jit_compile.c", - # "pcre2_jit_match.c", "pcre2_jit_misc.c", # these files are included in pcre2_jit_compile.c. + # "pcre2_jit_match.c", "pcre2_jit_misc.c", # Included in `pcre2_jit_compile.c`. "pcre2_maketables.c", "pcre2_match.c", "pcre2_match_data.c", @@ -43,6 +44,7 @@ if env["builtin_pcre2"]: "pcre2_substring.c", "pcre2_tables.c", "pcre2_ucd.c", + # "pcre2_ucptables.c", # Included in `pcre2_tables.c`. "pcre2_valid_utf.c", "pcre2_xclass.c", ] diff --git a/modules/regex/icons/RegEx.svg b/modules/regex/icons/RegEx.svg index 4df26f41c0..ba232f6f0a 100644 --- a/modules/regex/icons/RegEx.svg +++ b/modules/regex/icons/RegEx.svg @@ -1 +1 @@ -<svg height="16" viewBox="0 0 16 16" width="16" xmlns="http://www.w3.org/2000/svg"><path d="M2 14h3v-3H2zM6.561 2.855a21 21 0 0 1 2.82 1.185A21 21 0 0 1 9.137 1h1.77a21 21 0 0 1-.28 3.027 21 21 0 0 1 2.88-1.171l.562 1.733a21 21 0 0 1-3.04.684 21 21 0 0 1 2.1 2.307l-1.465 1.037a21 21 0 0 1-1.672-2.624 21 21 0 0 1-1.587 2.624L6.965 7.58a21 21 0 0 1 2.026-2.308A21 21 0 0 1 6 4.59z" fill="#e0e0e0"/></svg> +<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16"><path fill="#e0e0e0" d="M2 14h3v-3H2zM6.561 2.855a21 21 0 0 1 2.82 1.185A21 21 0 0 1 9.137 1h1.77a21 21 0 0 1-.28 3.027 21 21 0 0 1 2.88-1.171l.562 1.733a21 21 0 0 1-3.04.684 21 21 0 0 1 2.1 2.307l-1.465 1.037a21 21 0 0 1-1.672-2.624 21 21 0 0 1-1.587 2.624L6.965 7.58a21 21 0 0 1 2.026-2.308A21 21 0 0 1 6 4.59z"/></svg>
\ No newline at end of file diff --git a/modules/regex/icons/RegExMatch.svg b/modules/regex/icons/RegExMatch.svg index 889cf6cc8a..626ff36691 100644 --- a/modules/regex/icons/RegExMatch.svg +++ b/modules/regex/icons/RegExMatch.svg @@ -1 +1 @@ -<svg height="16" viewBox="0 0 16 16" width="16" xmlns="http://www.w3.org/2000/svg"><path d="M5 13h2v-2H5zm2.5-8a14 14 0 0 1 1.88.79 14 14 0 0 1-.163-2.027h1.18a14 14 0 0 1-.186 2.018 14 14 0 0 1 1.92-.78l.374 1.155a14 14 0 0 1-2.026.456 14 14 0 0 1 1.4 1.538l-.977.691a14 14 0 0 1-1.115-1.75 14 14 0 0 1-1.058 1.75l-.96-.691A14 14 0 0 1 9.12 6.61a14 14 0 0 1-1.993-.454zM1.67 2C0 5 0 11 1.67 14h2C2 11 2 5 3.67 2zm10.66 0c1.67 3 1.67 9 0 12h2c1.67-3 1.67-9 0-12z" fill="#e0e0e0"/></svg> +<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16"><path fill="#e0e0e0" d="M5 13h2v-2H5zm2.5-8a14 14 0 0 1 1.88.79 14 14 0 0 1-.163-2.027h1.18a14 14 0 0 1-.186 2.018 14 14 0 0 1 1.92-.78l.374 1.155a14 14 0 0 1-2.026.456 14 14 0 0 1 1.4 1.538l-.977.691a14 14 0 0 1-1.115-1.75 14 14 0 0 1-1.058 1.75l-.96-.691A14 14 0 0 1 9.12 6.61a14 14 0 0 1-1.993-.454zM1.67 2C0 5 0 11 1.67 14h2C2 11 2 5 3.67 2zm10.66 0c1.67 3 1.67 9 0 12h2c1.67-3 1.67-9 0-12z"/></svg>
\ No newline at end of file diff --git a/modules/regex/regex.cpp b/modules/regex/regex.cpp index 704c107f20..9f34a6ca6a 100644 --- a/modules/regex/regex.cpp +++ b/modules/regex/regex.cpp @@ -270,16 +270,18 @@ Ref<RegExMatch> RegEx::search(const String &p_subject, int p_offset, int p_end) TypedArray<RegExMatch> RegEx::search_all(const String &p_subject, int p_offset, int p_end) const { ERR_FAIL_COND_V_MSG(p_offset < 0, Array(), "RegEx search offset must be >= 0"); - int last_end = -1; + int last_end = 0; TypedArray<RegExMatch> result; Ref<RegExMatch> match = search(p_subject, p_offset, p_end); + while (match.is_valid()) { - if (last_end == match->get_end(0)) { - break; + last_end = match->get_end(0); + if (match->get_start(0) == last_end) { + last_end++; } + result.push_back(match); - last_end = match->get_end(0); - match = search(p_subject, match->get_end(0), p_end); + match = search(p_subject, last_end, p_end); } return result; } @@ -332,7 +334,7 @@ String RegEx::sub(const String &p_subject, const String &p_replacement, bool p_a return String(); } - return String(output.ptr(), olength); + return String(output.ptr(), olength) + p_subject.substr(length); } bool RegEx::is_valid() const { @@ -368,7 +370,7 @@ PackedStringArray RegEx::get_names() const { for (uint32_t i = 0; i < count; i++) { String name = &table[i * entry_size + 1]; - if (result.find(name) < 0) { + if (!result.has(name)) { result.append(name); } } diff --git a/modules/regex/tests/test_regex.h b/modules/regex/tests/test_regex.h index 3e4d769377..af58e2487b 100644 --- a/modules/regex/tests/test_regex.h +++ b/modules/regex/tests/test_regex.h @@ -133,6 +133,18 @@ TEST_CASE("[RegEx] Substitution") { RegEx re4("(a)(b){0}(c)"); REQUIRE(re4.is_valid()); CHECK(re4.sub(s4, "${1}.${3}.", true) == "a.c.a.c.a.c."); + + const String s5 = "aaaa"; + + RegEx re5("a"); + REQUIRE(re5.is_valid()); + CHECK(re5.sub(s5, "b", true, 0, 2) == "bbaa"); + CHECK(re5.sub(s5, "b", true, 1, 3) == "abba"); + CHECK(re5.sub(s5, "b", true, 0, 0) == "aaaa"); + CHECK(re5.sub(s5, "b", true, 1, 1) == "aaaa"); + CHECK(re5.sub(s5, "cc", true, 0, 2) == "ccccaa"); + CHECK(re5.sub(s5, "cc", true, 1, 3) == "acccca"); + CHECK(re5.sub(s5, "", true, 0, 2) == "aa"); } TEST_CASE("[RegEx] Substitution with empty input and/or replacement") { @@ -164,7 +176,7 @@ TEST_CASE("[RegEx] Uninitialized use") { ERR_PRINT_ON } -TEST_CASE("[RegEx] Empty Pattern") { +TEST_CASE("[RegEx] Empty pattern") { const String s = "Godot"; RegEx re; @@ -172,6 +184,48 @@ TEST_CASE("[RegEx] Empty Pattern") { CHECK(re.is_valid()); } +TEST_CASE("[RegEx] Complex Grouping") { + const String test = "https://docs.godotengine.org/en/latest/contributing/"; + + // Ignored protocol in grouping. + RegEx re("^(?:https?://)([a-zA-Z]{2,4})\\.([a-zA-Z][a-zA-Z0-9_\\-]{2,64})\\.([a-zA-Z]{2,4})"); + REQUIRE(re.is_valid()); + Ref<RegExMatch> expr = re.search(test); + + CHECK(expr->get_group_count() == 3); + + CHECK(expr->get_string(0) == "https://docs.godotengine.org"); + + CHECK(expr->get_string(1) == "docs"); + CHECK(expr->get_string(2) == "godotengine"); + CHECK(expr->get_string(3) == "org"); +} + +TEST_CASE("[RegEx] Number Expression") { + const String test = "(2.5e-3 + 35 + 46) / 2.8e0 = 28.9294642857"; + + // Not an exact regex for number but a good test. + RegEx re("([+-]?\\d+)(\\.\\d+([eE][+-]?\\d+)?)?"); + REQUIRE(re.is_valid()); + Array number_match = re.search_all(test); + + CHECK(number_match.size() == 5); + + Ref<RegExMatch> number = number_match[0]; + CHECK(number->get_string(0) == "2.5e-3"); + CHECK(number->get_string(1) == "2"); + number = number_match[1]; + CHECK(number->get_string(0) == "35"); + number = number_match[2]; + CHECK(number->get_string(0) == "46"); + number = number_match[3]; + CHECK(number->get_string(0) == "2.8e0"); + number = number_match[4]; + CHECK(number->get_string(0) == "28.9294642857"); + CHECK(number->get_string(1) == "28"); + CHECK(number->get_string(2) == ".9294642857"); +} + TEST_CASE("[RegEx] Invalid end position") { const String s = "Godot"; @@ -222,6 +276,143 @@ TEST_CASE("[RegEx] Match start and end positions") { CHECK(match->get_start("vowel") == 2); CHECK(match->get_end("vowel") == 3); } + +TEST_CASE("[RegEx] Asterisk search all") { + const String s = "Godot Engine"; + + RegEx re("o*"); + REQUIRE(re.is_valid()); + Ref<RegExMatch> match; + const Array all_results = re.search_all(s); + CHECK(all_results.size() == 13); + + match = all_results[0]; + CHECK(match->get_string(0) == ""); + match = all_results[1]; + CHECK(match->get_string(0) == "o"); + match = all_results[2]; + CHECK(match->get_string(0) == ""); + match = all_results[3]; + CHECK(match->get_string(0) == "o"); + + for (int i = 4; i < 13; i++) { + match = all_results[i]; + CHECK(match->get_string(0) == ""); + } +} + +TEST_CASE("[RegEx] Simple lookahead") { + const String s = "Godot Engine"; + + RegEx re("o(?=t)"); + REQUIRE(re.is_valid()); + Ref<RegExMatch> match = re.search(s); + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 3); + CHECK(match->get_end(0) == 4); +} + +TEST_CASE("[RegEx] Lookahead groups empty matches") { + const String s = "12"; + + RegEx re("(?=(\\d+))"); + REQUIRE(re.is_valid()); + Ref<RegExMatch> match = re.search(s); + CHECK(match->get_string(0) == ""); + CHECK(match->get_string(1) == "12"); + + const Array all_results = re.search_all(s); + CHECK(all_results.size() == 2); + + match = all_results[0]; + REQUIRE(match != nullptr); + CHECK(match->get_string(0) == String("")); + CHECK(match->get_string(1) == String("12")); + + match = all_results[1]; + REQUIRE(match != nullptr); + CHECK(match->get_string(0) == String("")); + CHECK(match->get_string(1) == String("2")); +} + +TEST_CASE("[RegEx] Simple lookbehind") { + const String s = "Godot Engine"; + + RegEx re("(?<=d)o"); + REQUIRE(re.is_valid()); + Ref<RegExMatch> match = re.search(s); + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 3); + CHECK(match->get_end(0) == 4); +} + +TEST_CASE("[RegEx] Simple lookbehind search all") { + const String s = "ababbaabab"; + + RegEx re("(?<=a)b"); + REQUIRE(re.is_valid()); + const Array all_results = re.search_all(s); + CHECK(all_results.size() == 4); + + Ref<RegExMatch> match = all_results[0]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 1); + CHECK(match->get_end(0) == 2); + + match = all_results[1]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 3); + CHECK(match->get_end(0) == 4); + + match = all_results[2]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 7); + CHECK(match->get_end(0) == 8); + + match = all_results[3]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 9); + CHECK(match->get_end(0) == 10); +} + +TEST_CASE("[RegEx] Lookbehind groups empty matches") { + const String s = "abaaabab"; + + RegEx re("(?<=(b))"); + REQUIRE(re.is_valid()); + Ref<RegExMatch> match; + + const Array all_results = re.search_all(s); + CHECK(all_results.size() == 3); + + match = all_results[0]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 2); + CHECK(match->get_end(0) == 2); + CHECK(match->get_start(1) == 1); + CHECK(match->get_end(1) == 2); + CHECK(match->get_string(0) == String("")); + CHECK(match->get_string(1) == String("b")); + + match = all_results[1]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 6); + CHECK(match->get_end(0) == 6); + CHECK(match->get_start(1) == 5); + CHECK(match->get_end(1) == 6); + CHECK(match->get_string(0) == String("")); + CHECK(match->get_string(1) == String("b")); + + match = all_results[2]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 8); + CHECK(match->get_end(0) == 8); + CHECK(match->get_start(1) == 7); + CHECK(match->get_end(1) == 8); + CHECK(match->get_string(0) == String("")); + CHECK(match->get_string(1) == String("b")); +} + } // namespace TestRegEx #endif // TEST_REGEX_H |