summaryrefslogtreecommitdiffstats
path: root/core/string
diff options
context:
space:
mode:
Diffstat (limited to 'core/string')
-rw-r--r--core/string/char_range.inc613
-rw-r--r--core/string/char_utils.h66
-rw-r--r--core/string/fuzzy_search.cpp349
-rw-r--r--core/string/fuzzy_search.h101
-rw-r--r--core/string/node_path.cpp2
-rw-r--r--core/string/translation_domain.cpp7
-rw-r--r--core/string/translation_po.cpp10
-rw-r--r--core/string/translation_server.cpp137
-rw-r--r--core/string/translation_server.h18
-rw-r--r--core/string/ustring.cpp60
-rw-r--r--core/string/ustring.h9
11 files changed, 1054 insertions, 318 deletions
diff --git a/core/string/char_range.inc b/core/string/char_range.inc
index 2b081b96de..efae757802 100644
--- a/core/string/char_range.inc
+++ b/core/string/char_range.inc
@@ -33,14 +33,17 @@
#include "core/typedefs.h"
+// Unicode Derived Core Properties
+// Source: https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt
+
struct CharRange {
char32_t start;
char32_t end;
};
-inline constexpr CharRange xid_start[] = {
+constexpr inline CharRange xid_start[] = {
{ 0x41, 0x5a },
- { 0x5f, 0x5f },
+ { 0x5f, 0x5f }, // Underscore technically isn't in XID_Start, but for our purposes it's included.
{ 0x61, 0x7a },
{ 0xaa, 0xaa },
{ 0xb5, 0xb5 },
@@ -54,7 +57,7 @@ inline constexpr CharRange xid_start[] = {
{ 0x2ee, 0x2ee },
{ 0x370, 0x374 },
{ 0x376, 0x377 },
- { 0x37a, 0x37d },
+ { 0x37b, 0x37d },
{ 0x37f, 0x37f },
{ 0x386, 0x386 },
{ 0x388, 0x38a },
@@ -182,7 +185,7 @@ inline constexpr CharRange xid_start[] = {
{ 0xdbd, 0xdbd },
{ 0xdc0, 0xdc6 },
{ 0xe01, 0xe30 },
- { 0xe32, 0xe33 },
+ { 0xe32, 0xe32 },
{ 0xe40, 0xe46 },
{ 0xe81, 0xe82 },
{ 0xe84, 0xe84 },
@@ -190,7 +193,7 @@ inline constexpr CharRange xid_start[] = {
{ 0xe8c, 0xea3 },
{ 0xea5, 0xea5 },
{ 0xea7, 0xeb0 },
- { 0xeb2, 0xeb3 },
+ { 0xeb2, 0xeb2 },
{ 0xebd, 0xebd },
{ 0xec0, 0xec4 },
{ 0xec6, 0xec6 },
@@ -245,8 +248,7 @@ inline constexpr CharRange xid_start[] = {
{ 0x17d7, 0x17d7 },
{ 0x17dc, 0x17dc },
{ 0x1820, 0x1878 },
- { 0x1880, 0x1884 },
- { 0x1887, 0x18a8 },
+ { 0x1880, 0x18a8 },
{ 0x18aa, 0x18aa },
{ 0x18b0, 0x18f5 },
{ 0x1900, 0x191e },
@@ -265,7 +267,7 @@ inline constexpr CharRange xid_start[] = {
{ 0x1c00, 0x1c23 },
{ 0x1c4d, 0x1c4f },
{ 0x1c5a, 0x1c7d },
- { 0x1c80, 0x1c88 },
+ { 0x1c80, 0x1c8a },
{ 0x1c90, 0x1cba },
{ 0x1cbd, 0x1cbf },
{ 0x1ce9, 0x1cec },
@@ -330,7 +332,7 @@ inline constexpr CharRange xid_start[] = {
{ 0x3031, 0x3035 },
{ 0x3038, 0x303c },
{ 0x3041, 0x3096 },
- { 0x309b, 0x309f },
+ { 0x309d, 0x309f },
{ 0x30a1, 0x30fa },
{ 0x30fc, 0x30ff },
{ 0x3105, 0x312f },
@@ -348,10 +350,10 @@ inline constexpr CharRange xid_start[] = {
{ 0xa6a0, 0xa6ef },
{ 0xa717, 0xa71f },
{ 0xa722, 0xa788 },
- { 0xa78b, 0xa7ca },
+ { 0xa78b, 0xa7cd },
{ 0xa7d0, 0xa7d1 },
{ 0xa7d3, 0xa7d3 },
- { 0xa7d5, 0xa7d9 },
+ { 0xa7d5, 0xa7dc },
{ 0xa7f2, 0xa801 },
{ 0xa803, 0xa805 },
{ 0xa807, 0xa80a },
@@ -406,15 +408,22 @@ inline constexpr CharRange xid_start[] = {
{ 0xfb40, 0xfb41 },
{ 0xfb43, 0xfb44 },
{ 0xfb46, 0xfbb1 },
- { 0xfbd3, 0xfd3d },
+ { 0xfbd3, 0xfc5d },
+ { 0xfc64, 0xfd3d },
{ 0xfd50, 0xfd8f },
{ 0xfd92, 0xfdc7 },
- { 0xfdf0, 0xfdfb },
- { 0xfe70, 0xfe74 },
- { 0xfe76, 0xfefc },
+ { 0xfdf0, 0xfdf9 },
+ { 0xfe71, 0xfe71 },
+ { 0xfe73, 0xfe73 },
+ { 0xfe77, 0xfe77 },
+ { 0xfe79, 0xfe79 },
+ { 0xfe7b, 0xfe7b },
+ { 0xfe7d, 0xfe7d },
+ { 0xfe7f, 0xfefc },
{ 0xff21, 0xff3a },
{ 0xff41, 0xff5a },
- { 0xff66, 0xffbe },
+ { 0xff66, 0xff9d },
+ { 0xffa0, 0xffbe },
{ 0xffc2, 0xffc7 },
{ 0xffca, 0xffcf },
{ 0xffd2, 0xffd7 },
@@ -449,6 +458,7 @@ inline constexpr CharRange xid_start[] = {
{ 0x105a3, 0x105b1 },
{ 0x105b3, 0x105b9 },
{ 0x105bb, 0x105bc },
+ { 0x105c0, 0x105f3 },
{ 0x10600, 0x10736 },
{ 0x10740, 0x10755 },
{ 0x10760, 0x10767 },
@@ -485,8 +495,11 @@ inline constexpr CharRange xid_start[] = {
{ 0x10c80, 0x10cb2 },
{ 0x10cc0, 0x10cf2 },
{ 0x10d00, 0x10d23 },
+ { 0x10d4a, 0x10d65 },
+ { 0x10d6f, 0x10d85 },
{ 0x10e80, 0x10ea9 },
{ 0x10eb0, 0x10eb1 },
+ { 0x10ec2, 0x10ec4 },
{ 0x10f00, 0x10f1c },
{ 0x10f27, 0x10f27 },
{ 0x10f30, 0x10f45 },
@@ -509,6 +522,7 @@ inline constexpr CharRange xid_start[] = {
{ 0x111dc, 0x111dc },
{ 0x11200, 0x11211 },
{ 0x11213, 0x1122b },
+ { 0x1123f, 0x11240 },
{ 0x11280, 0x11286 },
{ 0x11288, 0x11288 },
{ 0x1128a, 0x1128d },
@@ -524,6 +538,13 @@ inline constexpr CharRange xid_start[] = {
{ 0x1133d, 0x1133d },
{ 0x11350, 0x11350 },
{ 0x1135d, 0x11361 },
+ { 0x11380, 0x11389 },
+ { 0x1138b, 0x1138b },
+ { 0x1138e, 0x1138e },
+ { 0x11390, 0x113b5 },
+ { 0x113b7, 0x113b7 },
+ { 0x113d1, 0x113d1 },
+ { 0x113d3, 0x113d3 },
{ 0x11400, 0x11434 },
{ 0x11447, 0x1144a },
{ 0x1145f, 0x11461 },
@@ -558,6 +579,7 @@ inline constexpr CharRange xid_start[] = {
{ 0x11a5c, 0x11a89 },
{ 0x11a9d, 0x11a9d },
{ 0x11ab0, 0x11af8 },
+ { 0x11bc0, 0x11be0 },
{ 0x11c00, 0x11c08 },
{ 0x11c0a, 0x11c2e },
{ 0x11c40, 0x11c40 },
@@ -571,13 +593,19 @@ inline constexpr CharRange xid_start[] = {
{ 0x11d6a, 0x11d89 },
{ 0x11d98, 0x11d98 },
{ 0x11ee0, 0x11ef2 },
+ { 0x11f02, 0x11f02 },
+ { 0x11f04, 0x11f10 },
+ { 0x11f12, 0x11f33 },
{ 0x11fb0, 0x11fb0 },
{ 0x12000, 0x12399 },
{ 0x12400, 0x1246e },
{ 0x12480, 0x12543 },
{ 0x12f90, 0x12ff0 },
- { 0x13000, 0x1342e },
+ { 0x13000, 0x1342f },
+ { 0x13441, 0x13446 },
+ { 0x13460, 0x143fa },
{ 0x14400, 0x14646 },
+ { 0x16100, 0x1611d },
{ 0x16800, 0x16a38 },
{ 0x16a40, 0x16a5e },
{ 0x16a70, 0x16abe },
@@ -586,6 +614,7 @@ inline constexpr CharRange xid_start[] = {
{ 0x16b40, 0x16b43 },
{ 0x16b63, 0x16b77 },
{ 0x16b7d, 0x16b8f },
+ { 0x16d40, 0x16d6c },
{ 0x16e40, 0x16e7f },
{ 0x16f00, 0x16f4a },
{ 0x16f50, 0x16f50 },
@@ -594,12 +623,14 @@ inline constexpr CharRange xid_start[] = {
{ 0x16fe3, 0x16fe3 },
{ 0x17000, 0x187f7 },
{ 0x18800, 0x18cd5 },
- { 0x18d00, 0x18d08 },
+ { 0x18cff, 0x18d08 },
{ 0x1aff0, 0x1aff3 },
{ 0x1aff5, 0x1affb },
{ 0x1affd, 0x1affe },
{ 0x1b000, 0x1b122 },
+ { 0x1b132, 0x1b132 },
{ 0x1b150, 0x1b152 },
+ { 0x1b155, 0x1b155 },
{ 0x1b164, 0x1b167 },
{ 0x1b170, 0x1b2fb },
{ 0x1bc00, 0x1bc6a },
@@ -637,11 +668,16 @@ inline constexpr CharRange xid_start[] = {
{ 0x1d7aa, 0x1d7c2 },
{ 0x1d7c4, 0x1d7cb },
{ 0x1df00, 0x1df1e },
+ { 0x1df25, 0x1df2a },
+ { 0x1e030, 0x1e06d },
{ 0x1e100, 0x1e12c },
{ 0x1e137, 0x1e13d },
{ 0x1e14e, 0x1e14e },
{ 0x1e290, 0x1e2ad },
{ 0x1e2c0, 0x1e2eb },
+ { 0x1e4d0, 0x1e4eb },
+ { 0x1e5d0, 0x1e5ed },
+ { 0x1e5f0, 0x1e5f0 },
{ 0x1e7e0, 0x1e7e6 },
{ 0x1e7e8, 0x1e7eb },
{ 0x1e7ed, 0x1e7ee },
@@ -683,15 +719,17 @@ inline constexpr CharRange xid_start[] = {
{ 0x1eea5, 0x1eea9 },
{ 0x1eeab, 0x1eebb },
{ 0x20000, 0x2a6df },
- { 0x2a700, 0x2b738 },
+ { 0x2a700, 0x2b739 },
{ 0x2b740, 0x2b81d },
{ 0x2b820, 0x2cea1 },
{ 0x2ceb0, 0x2ebe0 },
+ { 0x2ebf0, 0x2ee5d },
{ 0x2f800, 0x2fa1d },
{ 0x30000, 0x3134a },
+ { 0x31350, 0x323af },
};
-inline constexpr CharRange xid_continue[] = {
+constexpr inline CharRange xid_continue[] = {
{ 0x30, 0x39 },
{ 0x41, 0x5a },
{ 0x5f, 0x5f },
@@ -709,7 +747,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0x2ee, 0x2ee },
{ 0x300, 0x374 },
{ 0x376, 0x377 },
- { 0x37a, 0x37d },
+ { 0x37b, 0x37d },
{ 0x37f, 0x37f },
{ 0x386, 0x38a },
{ 0x38c, 0x38c },
@@ -745,7 +783,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0x860, 0x86a },
{ 0x870, 0x887 },
{ 0x889, 0x88e },
- { 0x898, 0x8e1 },
+ { 0x897, 0x8e1 },
{ 0x8e3, 0x963 },
{ 0x966, 0x96f },
{ 0x971, 0x983 },
@@ -850,7 +888,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0xcdd, 0xcde },
{ 0xce0, 0xce3 },
{ 0xce6, 0xcef },
- { 0xcf1, 0xcf2 },
+ { 0xcf1, 0xcf3 },
{ 0xd00, 0xd0c },
{ 0xd0e, 0xd10 },
{ 0xd12, 0xd44 },
@@ -883,7 +921,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0xea7, 0xebd },
{ 0xec0, 0xec4 },
{ 0xec6, 0xec6 },
- { 0xec8, 0xecd },
+ { 0xec8, 0xece },
{ 0xed0, 0xed9 },
{ 0xedc, 0xedf },
{ 0xf00, 0xf00 },
@@ -921,8 +959,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0x1312, 0x1315 },
{ 0x1318, 0x135a },
{ 0x135d, 0x135f },
- { 0x1369, 0x1369 },
- { 0x1371, 0x1371 },
+ { 0x1369, 0x1371 },
{ 0x1380, 0x138f },
{ 0x13a0, 0x13f5 },
{ 0x13f8, 0x13fd },
@@ -969,7 +1006,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0x1c00, 0x1c37 },
{ 0x1c40, 0x1c49 },
{ 0x1c4d, 0x1c7d },
- { 0x1c80, 0x1c88 },
+ { 0x1c80, 0x1c8a },
{ 0x1c90, 0x1cba },
{ 0x1cbd, 0x1cbf },
{ 0x1cd0, 0x1cd2 },
@@ -993,6 +1030,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0x1fe0, 0x1fec },
{ 0x1ff2, 0x1ff4 },
{ 0x1ff6, 0x1ffc },
+ { 0x200c, 0x200d },
{ 0x203f, 0x2040 },
{ 0x2054, 0x2054 },
{ 0x2071, 0x2071 },
@@ -1036,9 +1074,9 @@ inline constexpr CharRange xid_continue[] = {
{ 0x3031, 0x3035 },
{ 0x3038, 0x303c },
{ 0x3041, 0x3096 },
- { 0x3099, 0x309f },
- { 0x30a1, 0x30fa },
- { 0x30fc, 0x30ff },
+ { 0x3099, 0x309a },
+ { 0x309d, 0x309f },
+ { 0x30a1, 0x30ff },
{ 0x3105, 0x312f },
{ 0x3131, 0x318e },
{ 0x31a0, 0x31bf },
@@ -1053,10 +1091,10 @@ inline constexpr CharRange xid_continue[] = {
{ 0xa67f, 0xa6f1 },
{ 0xa717, 0xa71f },
{ 0xa722, 0xa788 },
- { 0xa78b, 0xa7ca },
+ { 0xa78b, 0xa7cd },
{ 0xa7d0, 0xa7d1 },
{ 0xa7d3, 0xa7d3 },
- { 0xa7d5, 0xa7d9 },
+ { 0xa7d5, 0xa7dc },
{ 0xa7f2, 0xa827 },
{ 0xa82c, 0xa82c },
{ 0xa840, 0xa873 },
@@ -1102,21 +1140,27 @@ inline constexpr CharRange xid_continue[] = {
{ 0xfb40, 0xfb41 },
{ 0xfb43, 0xfb44 },
{ 0xfb46, 0xfbb1 },
- { 0xfbd3, 0xfd3d },
+ { 0xfbd3, 0xfc5d },
+ { 0xfc64, 0xfd3d },
{ 0xfd50, 0xfd8f },
{ 0xfd92, 0xfdc7 },
- { 0xfdf0, 0xfdfb },
+ { 0xfdf0, 0xfdf9 },
{ 0xfe00, 0xfe0f },
{ 0xfe20, 0xfe2f },
{ 0xfe33, 0xfe34 },
{ 0xfe4d, 0xfe4f },
- { 0xfe70, 0xfe74 },
- { 0xfe76, 0xfefc },
+ { 0xfe71, 0xfe71 },
+ { 0xfe73, 0xfe73 },
+ { 0xfe77, 0xfe77 },
+ { 0xfe79, 0xfe79 },
+ { 0xfe7b, 0xfe7b },
+ { 0xfe7d, 0xfe7d },
+ { 0xfe7f, 0xfefc },
{ 0xff10, 0xff19 },
{ 0xff21, 0xff3a },
{ 0xff3f, 0xff3f },
{ 0xff41, 0xff5a },
- { 0xff66, 0xffbe },
+ { 0xff65, 0xffbe },
{ 0xffc2, 0xffc7 },
{ 0xffca, 0xffcf },
{ 0xffd2, 0xffd7 },
@@ -1154,6 +1198,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0x105a3, 0x105b1 },
{ 0x105b3, 0x105b9 },
{ 0x105bb, 0x105bc },
+ { 0x105c0, 0x105f3 },
{ 0x10600, 0x10736 },
{ 0x10740, 0x10755 },
{ 0x10760, 0x10767 },
@@ -1194,10 +1239,14 @@ inline constexpr CharRange xid_continue[] = {
{ 0x10cc0, 0x10cf2 },
{ 0x10d00, 0x10d27 },
{ 0x10d30, 0x10d39 },
+ { 0x10d40, 0x10d65 },
+ { 0x10d69, 0x10d6d },
+ { 0x10d6f, 0x10d85 },
{ 0x10e80, 0x10ea9 },
{ 0x10eab, 0x10eac },
{ 0x10eb0, 0x10eb1 },
- { 0x10f00, 0x10f1c },
+ { 0x10ec2, 0x10ec4 },
+ { 0x10efc, 0x10f1c },
{ 0x10f27, 0x10f27 },
{ 0x10f30, 0x10f50 },
{ 0x10f70, 0x10f85 },
@@ -1220,7 +1269,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0x111dc, 0x111dc },
{ 0x11200, 0x11211 },
{ 0x11213, 0x11237 },
- { 0x1123e, 0x1123e },
+ { 0x1123e, 0x11241 },
{ 0x11280, 0x11286 },
{ 0x11288, 0x11288 },
{ 0x1128a, 0x1128d },
@@ -1243,6 +1292,16 @@ inline constexpr CharRange xid_continue[] = {
{ 0x1135d, 0x11363 },
{ 0x11366, 0x1136c },
{ 0x11370, 0x11374 },
+ { 0x11380, 0x11389 },
+ { 0x1138b, 0x1138b },
+ { 0x1138e, 0x1138e },
+ { 0x11390, 0x113b5 },
+ { 0x113b7, 0x113c0 },
+ { 0x113c2, 0x113c2 },
+ { 0x113c5, 0x113c5 },
+ { 0x113c7, 0x113ca },
+ { 0x113cc, 0x113d3 },
+ { 0x113e1, 0x113e2 },
{ 0x11400, 0x1144a },
{ 0x11450, 0x11459 },
{ 0x1145e, 0x11461 },
@@ -1257,6 +1316,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0x11650, 0x11659 },
{ 0x11680, 0x116b8 },
{ 0x116c0, 0x116c9 },
+ { 0x116d0, 0x116e3 },
{ 0x11700, 0x1171a },
{ 0x1171d, 0x1172b },
{ 0x11730, 0x11739 },
@@ -1280,6 +1340,8 @@ inline constexpr CharRange xid_continue[] = {
{ 0x11a50, 0x11a99 },
{ 0x11a9d, 0x11a9d },
{ 0x11ab0, 0x11af8 },
+ { 0x11bc0, 0x11be0 },
+ { 0x11bf0, 0x11bf9 },
{ 0x11c00, 0x11c08 },
{ 0x11c0a, 0x11c36 },
{ 0x11c38, 0x11c40 },
@@ -1301,13 +1363,20 @@ inline constexpr CharRange xid_continue[] = {
{ 0x11d93, 0x11d98 },
{ 0x11da0, 0x11da9 },
{ 0x11ee0, 0x11ef6 },
+ { 0x11f00, 0x11f10 },
+ { 0x11f12, 0x11f3a },
+ { 0x11f3e, 0x11f42 },
+ { 0x11f50, 0x11f5a },
{ 0x11fb0, 0x11fb0 },
{ 0x12000, 0x12399 },
{ 0x12400, 0x1246e },
{ 0x12480, 0x12543 },
{ 0x12f90, 0x12ff0 },
- { 0x13000, 0x1342e },
+ { 0x13000, 0x1342f },
+ { 0x13440, 0x13455 },
+ { 0x13460, 0x143fa },
{ 0x14400, 0x14646 },
+ { 0x16100, 0x16139 },
{ 0x16800, 0x16a38 },
{ 0x16a40, 0x16a5e },
{ 0x16a60, 0x16a69 },
@@ -1320,6 +1389,8 @@ inline constexpr CharRange xid_continue[] = {
{ 0x16b50, 0x16b59 },
{ 0x16b63, 0x16b77 },
{ 0x16b7d, 0x16b8f },
+ { 0x16d40, 0x16d6c },
+ { 0x16d70, 0x16d79 },
{ 0x16e40, 0x16e7f },
{ 0x16f00, 0x16f4a },
{ 0x16f4f, 0x16f87 },
@@ -1329,12 +1400,14 @@ inline constexpr CharRange xid_continue[] = {
{ 0x16ff0, 0x16ff1 },
{ 0x17000, 0x187f7 },
{ 0x18800, 0x18cd5 },
- { 0x18d00, 0x18d08 },
+ { 0x18cff, 0x18d08 },
{ 0x1aff0, 0x1aff3 },
{ 0x1aff5, 0x1affb },
{ 0x1affd, 0x1affe },
{ 0x1b000, 0x1b122 },
+ { 0x1b132, 0x1b132 },
{ 0x1b150, 0x1b152 },
+ { 0x1b155, 0x1b155 },
{ 0x1b164, 0x1b167 },
{ 0x1b170, 0x1b2fb },
{ 0x1bc00, 0x1bc6a },
@@ -1342,6 +1415,7 @@ inline constexpr CharRange xid_continue[] = {
{ 0x1bc80, 0x1bc88 },
{ 0x1bc90, 0x1bc99 },
{ 0x1bc9d, 0x1bc9e },
+ { 0x1ccf0, 0x1ccf9 },
{ 0x1cf00, 0x1cf2d },
{ 0x1cf30, 0x1cf46 },
{ 0x1d165, 0x1d169 },
@@ -1388,17 +1462,22 @@ inline constexpr CharRange xid_continue[] = {
{ 0x1da9b, 0x1da9f },
{ 0x1daa1, 0x1daaf },
{ 0x1df00, 0x1df1e },
+ { 0x1df25, 0x1df2a },
{ 0x1e000, 0x1e006 },
{ 0x1e008, 0x1e018 },
{ 0x1e01b, 0x1e021 },
{ 0x1e023, 0x1e024 },
{ 0x1e026, 0x1e02a },
+ { 0x1e030, 0x1e06d },
+ { 0x1e08f, 0x1e08f },
{ 0x1e100, 0x1e12c },
{ 0x1e130, 0x1e13d },
{ 0x1e140, 0x1e149 },
{ 0x1e14e, 0x1e14e },
{ 0x1e290, 0x1e2ae },
{ 0x1e2c0, 0x1e2f9 },
+ { 0x1e4d0, 0x1e4f9 },
+ { 0x1e5d0, 0x1e5fa },
{ 0x1e7e0, 0x1e7e6 },
{ 0x1e7e8, 0x1e7eb },
{ 0x1e7ed, 0x1e7ee },
@@ -1442,16 +1521,18 @@ inline constexpr CharRange xid_continue[] = {
{ 0x1eeab, 0x1eebb },
{ 0x1fbf0, 0x1fbf9 },
{ 0x20000, 0x2a6df },
- { 0x2a700, 0x2b738 },
+ { 0x2a700, 0x2b739 },
{ 0x2b740, 0x2b81d },
{ 0x2b820, 0x2cea1 },
{ 0x2ceb0, 0x2ebe0 },
+ { 0x2ebf0, 0x2ee5d },
{ 0x2f800, 0x2fa1d },
{ 0x30000, 0x3134a },
+ { 0x31350, 0x323af },
{ 0xe0100, 0xe01ef },
};
-inline constexpr CharRange uppercase_letter[] = {
+constexpr inline CharRange uppercase_letter[] = {
{ 0x41, 0x5a },
{ 0xc0, 0xd6 },
{ 0xd8, 0xde },
@@ -1728,6 +1809,7 @@ inline constexpr CharRange uppercase_letter[] = {
{ 0x10c7, 0x10c7 },
{ 0x10cd, 0x10cd },
{ 0x13a0, 0x13f5 },
+ { 0x1c89, 0x1c89 },
{ 0x1c90, 0x1cba },
{ 0x1cbd, 0x1cbf },
{ 0x1e00, 0x1e00 },
@@ -1882,7 +1964,9 @@ inline constexpr CharRange uppercase_letter[] = {
{ 0x2130, 0x2133 },
{ 0x213e, 0x213f },
{ 0x2145, 0x2145 },
+ { 0x2160, 0x216f },
{ 0x2183, 0x2183 },
+ { 0x24b6, 0x24cf },
{ 0x2c00, 0x2c2f },
{ 0x2c60, 0x2c60 },
{ 0x2c62, 0x2c64 },
@@ -2052,9 +2136,12 @@ inline constexpr CharRange uppercase_letter[] = {
{ 0xa7c2, 0xa7c2 },
{ 0xa7c4, 0xa7c7 },
{ 0xa7c9, 0xa7c9 },
+ { 0xa7cb, 0xa7cc },
{ 0xa7d0, 0xa7d0 },
{ 0xa7d6, 0xa7d6 },
{ 0xa7d8, 0xa7d8 },
+ { 0xa7da, 0xa7da },
+ { 0xa7dc, 0xa7dc },
{ 0xa7f5, 0xa7f5 },
{ 0xff21, 0xff3a },
{ 0x10400, 0x10427 },
@@ -2064,6 +2151,7 @@ inline constexpr CharRange uppercase_letter[] = {
{ 0x1058c, 0x10592 },
{ 0x10594, 0x10595 },
{ 0x10c80, 0x10cb2 },
+ { 0x10d50, 0x10d65 },
{ 0x118a0, 0x118bf },
{ 0x16e40, 0x16e5f },
{ 0x1d400, 0x1d419 },
@@ -2098,11 +2186,16 @@ inline constexpr CharRange uppercase_letter[] = {
{ 0x1d790, 0x1d7a8 },
{ 0x1d7ca, 0x1d7ca },
{ 0x1e900, 0x1e921 },
+ { 0x1f130, 0x1f149 },
+ { 0x1f150, 0x1f169 },
+ { 0x1f170, 0x1f189 },
};
-inline constexpr CharRange lowercase_letter[] = {
+constexpr inline CharRange lowercase_letter[] = {
{ 0x61, 0x7a },
+ { 0xaa, 0xaa },
{ 0xb5, 0xb5 },
+ { 0xba, 0xba },
{ 0xdf, 0xf6 },
{ 0xf8, 0xff },
{ 0x101, 0x101 },
@@ -2246,11 +2339,14 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0x24b, 0x24b },
{ 0x24d, 0x24d },
{ 0x24f, 0x293 },
- { 0x295, 0x2af },
+ { 0x295, 0x2b8 },
+ { 0x2c0, 0x2c1 },
+ { 0x2e0, 0x2e4 },
+ { 0x345, 0x345 },
{ 0x371, 0x371 },
{ 0x373, 0x373 },
{ 0x377, 0x377 },
- { 0x37b, 0x37d },
+ { 0x37a, 0x37d },
{ 0x390, 0x390 },
{ 0x3ac, 0x3ce },
{ 0x3d0, 0x3d1 },
@@ -2372,12 +2468,11 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0x52f, 0x52f },
{ 0x560, 0x588 },
{ 0x10d0, 0x10fa },
- { 0x10fd, 0x10ff },
+ { 0x10fc, 0x10ff },
{ 0x13f8, 0x13fd },
{ 0x1c80, 0x1c88 },
- { 0x1d00, 0x1d2b },
- { 0x1d6b, 0x1d77 },
- { 0x1d79, 0x1d9a },
+ { 0x1c8a, 0x1c8a },
+ { 0x1d00, 0x1dbf },
{ 0x1e01, 0x1e01 },
{ 0x1e03, 0x1e03 },
{ 0x1e05, 0x1e05 },
@@ -2522,6 +2617,9 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0x1fe0, 0x1fe7 },
{ 0x1ff2, 0x1ff4 },
{ 0x1ff6, 0x1ff7 },
+ { 0x2071, 0x2071 },
+ { 0x207f, 0x207f },
+ { 0x2090, 0x209c },
{ 0x210a, 0x210a },
{ 0x210e, 0x210f },
{ 0x2113, 0x2113 },
@@ -2531,7 +2629,9 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0x213c, 0x213d },
{ 0x2146, 0x2149 },
{ 0x214e, 0x214e },
+ { 0x2170, 0x217f },
{ 0x2184, 0x2184 },
+ { 0x24d0, 0x24e9 },
{ 0x2c30, 0x2c5f },
{ 0x2c61, 0x2c61 },
{ 0x2c65, 0x2c66 },
@@ -2540,7 +2640,7 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0x2c6c, 0x2c6c },
{ 0x2c71, 0x2c71 },
{ 0x2c73, 0x2c74 },
- { 0x2c76, 0x2c7b },
+ { 0x2c76, 0x2c7d },
{ 0x2c81, 0x2c81 },
{ 0x2c83, 0x2c83 },
{ 0x2c85, 0x2c85 },
@@ -2633,7 +2733,7 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0xa695, 0xa695 },
{ 0xa697, 0xa697 },
{ 0xa699, 0xa699 },
- { 0xa69b, 0xa69b },
+ { 0xa69b, 0xa69d },
{ 0xa723, 0xa723 },
{ 0xa725, 0xa725 },
{ 0xa727, 0xa727 },
@@ -2671,8 +2771,7 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0xa769, 0xa769 },
{ 0xa76b, 0xa76b },
{ 0xa76d, 0xa76d },
- { 0xa76f, 0xa76f },
- { 0xa771, 0xa778 },
+ { 0xa76f, 0xa778 },
{ 0xa77a, 0xa77a },
{ 0xa77c, 0xa77c },
{ 0xa77f, 0xa77f },
@@ -2705,15 +2804,18 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0xa7c3, 0xa7c3 },
{ 0xa7c8, 0xa7c8 },
{ 0xa7ca, 0xa7ca },
+ { 0xa7cd, 0xa7cd },
{ 0xa7d1, 0xa7d1 },
{ 0xa7d3, 0xa7d3 },
{ 0xa7d5, 0xa7d5 },
{ 0xa7d7, 0xa7d7 },
{ 0xa7d9, 0xa7d9 },
+ { 0xa7db, 0xa7db },
+ { 0xa7f2, 0xa7f4 },
{ 0xa7f6, 0xa7f6 },
- { 0xa7fa, 0xa7fa },
+ { 0xa7f8, 0xa7fa },
{ 0xab30, 0xab5a },
- { 0xab60, 0xab68 },
+ { 0xab5c, 0xab69 },
{ 0xab70, 0xabbf },
{ 0xfb00, 0xfb06 },
{ 0xfb13, 0xfb17 },
@@ -2724,7 +2826,12 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0x105a3, 0x105b1 },
{ 0x105b3, 0x105b9 },
{ 0x105bb, 0x105bc },
+ { 0x10780, 0x10780 },
+ { 0x10783, 0x10785 },
+ { 0x10787, 0x107b0 },
+ { 0x107b2, 0x107ba },
{ 0x10cc0, 0x10cf2 },
+ { 0x10d70, 0x10d85 },
{ 0x118c0, 0x118df },
{ 0x16e60, 0x16e7f },
{ 0x1d41a, 0x1d433 },
@@ -2758,10 +2865,11 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0x1df00, 0x1df09 },
{ 0x1df0b, 0x1df1e },
{ 0x1df25, 0x1df2a },
+ { 0x1e030, 0x1e06d },
{ 0x1e922, 0x1e943 },
};
-inline constexpr CharRange unicode_letter[] = {
+constexpr inline CharRange unicode_letter[] = {
{ 0x41, 0x5a },
{ 0x61, 0x7a },
{ 0xaa, 0xaa },
@@ -2774,7 +2882,8 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x2e0, 0x2e4 },
{ 0x2ec, 0x2ec },
{ 0x2ee, 0x2ee },
- { 0x370, 0x374 },
+ { 0x345, 0x345 },
+ { 0x363, 0x374 },
{ 0x376, 0x377 },
{ 0x37a, 0x37d },
{ 0x37f, 0x37f },
@@ -2788,49 +2897,58 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x531, 0x556 },
{ 0x559, 0x559 },
{ 0x560, 0x588 },
+ { 0x5b0, 0x5bd },
+ { 0x5bf, 0x5bf },
+ { 0x5c1, 0x5c2 },
+ { 0x5c4, 0x5c5 },
+ { 0x5c7, 0x5c7 },
{ 0x5d0, 0x5ea },
{ 0x5ef, 0x5f2 },
- { 0x620, 0x64a },
- { 0x66e, 0x66f },
- { 0x671, 0x6d3 },
- { 0x6d5, 0x6d5 },
- { 0x6e5, 0x6e6 },
- { 0x6ee, 0x6ef },
+ { 0x610, 0x61a },
+ { 0x620, 0x657 },
+ { 0x659, 0x65f },
+ { 0x66e, 0x6d3 },
+ { 0x6d5, 0x6dc },
+ { 0x6e1, 0x6e8 },
+ { 0x6ed, 0x6ef },
{ 0x6fa, 0x6fc },
{ 0x6ff, 0x6ff },
- { 0x710, 0x710 },
- { 0x712, 0x72f },
- { 0x74d, 0x7a5 },
- { 0x7b1, 0x7b1 },
+ { 0x710, 0x73f },
+ { 0x74d, 0x7b1 },
{ 0x7ca, 0x7ea },
{ 0x7f4, 0x7f5 },
{ 0x7fa, 0x7fa },
- { 0x800, 0x815 },
- { 0x81a, 0x81a },
- { 0x824, 0x824 },
- { 0x828, 0x828 },
+ { 0x800, 0x817 },
+ { 0x81a, 0x82c },
{ 0x840, 0x858 },
{ 0x860, 0x86a },
{ 0x870, 0x887 },
{ 0x889, 0x88e },
+ { 0x897, 0x897 },
{ 0x8a0, 0x8c9 },
- { 0x904, 0x939 },
- { 0x93d, 0x93d },
- { 0x950, 0x950 },
- { 0x958, 0x961 },
- { 0x971, 0x980 },
+ { 0x8d4, 0x8df },
+ { 0x8e3, 0x8e9 },
+ { 0x8f0, 0x93b },
+ { 0x93d, 0x94c },
+ { 0x94e, 0x950 },
+ { 0x955, 0x963 },
+ { 0x971, 0x983 },
{ 0x985, 0x98c },
{ 0x98f, 0x990 },
{ 0x993, 0x9a8 },
{ 0x9aa, 0x9b0 },
{ 0x9b2, 0x9b2 },
{ 0x9b6, 0x9b9 },
- { 0x9bd, 0x9bd },
+ { 0x9bd, 0x9c4 },
+ { 0x9c7, 0x9c8 },
+ { 0x9cb, 0x9cc },
{ 0x9ce, 0x9ce },
+ { 0x9d7, 0x9d7 },
{ 0x9dc, 0x9dd },
- { 0x9df, 0x9e1 },
+ { 0x9df, 0x9e3 },
{ 0x9f0, 0x9f1 },
{ 0x9fc, 0x9fc },
+ { 0xa01, 0xa03 },
{ 0xa05, 0xa0a },
{ 0xa0f, 0xa10 },
{ 0xa13, 0xa28 },
@@ -2838,30 +2956,41 @@ inline constexpr CharRange unicode_letter[] = {
{ 0xa32, 0xa33 },
{ 0xa35, 0xa36 },
{ 0xa38, 0xa39 },
+ { 0xa3e, 0xa42 },
+ { 0xa47, 0xa48 },
+ { 0xa4b, 0xa4c },
+ { 0xa51, 0xa51 },
{ 0xa59, 0xa5c },
{ 0xa5e, 0xa5e },
- { 0xa72, 0xa74 },
+ { 0xa70, 0xa75 },
+ { 0xa81, 0xa83 },
{ 0xa85, 0xa8d },
{ 0xa8f, 0xa91 },
{ 0xa93, 0xaa8 },
{ 0xaaa, 0xab0 },
{ 0xab2, 0xab3 },
{ 0xab5, 0xab9 },
- { 0xabd, 0xabd },
+ { 0xabd, 0xac5 },
+ { 0xac7, 0xac9 },
+ { 0xacb, 0xacc },
{ 0xad0, 0xad0 },
- { 0xae0, 0xae1 },
- { 0xaf9, 0xaf9 },
+ { 0xae0, 0xae3 },
+ { 0xaf9, 0xafc },
+ { 0xb01, 0xb03 },
{ 0xb05, 0xb0c },
{ 0xb0f, 0xb10 },
{ 0xb13, 0xb28 },
{ 0xb2a, 0xb30 },
{ 0xb32, 0xb33 },
{ 0xb35, 0xb39 },
- { 0xb3d, 0xb3d },
+ { 0xb3d, 0xb44 },
+ { 0xb47, 0xb48 },
+ { 0xb4b, 0xb4c },
+ { 0xb56, 0xb57 },
{ 0xb5c, 0xb5d },
- { 0xb5f, 0xb61 },
+ { 0xb5f, 0xb63 },
{ 0xb71, 0xb71 },
- { 0xb83, 0xb83 },
+ { 0xb82, 0xb83 },
{ 0xb85, 0xb8a },
{ 0xb8e, 0xb90 },
{ 0xb92, 0xb95 },
@@ -2871,65 +3000,80 @@ inline constexpr CharRange unicode_letter[] = {
{ 0xba3, 0xba4 },
{ 0xba8, 0xbaa },
{ 0xbae, 0xbb9 },
+ { 0xbbe, 0xbc2 },
+ { 0xbc6, 0xbc8 },
+ { 0xbca, 0xbcc },
{ 0xbd0, 0xbd0 },
- { 0xc05, 0xc0c },
+ { 0xbd7, 0xbd7 },
+ { 0xc00, 0xc0c },
{ 0xc0e, 0xc10 },
{ 0xc12, 0xc28 },
{ 0xc2a, 0xc39 },
- { 0xc3d, 0xc3d },
+ { 0xc3d, 0xc44 },
+ { 0xc46, 0xc48 },
+ { 0xc4a, 0xc4c },
+ { 0xc55, 0xc56 },
{ 0xc58, 0xc5a },
{ 0xc5d, 0xc5d },
- { 0xc60, 0xc61 },
- { 0xc80, 0xc80 },
+ { 0xc60, 0xc63 },
+ { 0xc80, 0xc83 },
{ 0xc85, 0xc8c },
{ 0xc8e, 0xc90 },
{ 0xc92, 0xca8 },
{ 0xcaa, 0xcb3 },
{ 0xcb5, 0xcb9 },
- { 0xcbd, 0xcbd },
+ { 0xcbd, 0xcc4 },
+ { 0xcc6, 0xcc8 },
+ { 0xcca, 0xccc },
+ { 0xcd5, 0xcd6 },
{ 0xcdd, 0xcde },
- { 0xce0, 0xce1 },
- { 0xcf1, 0xcf2 },
- { 0xd04, 0xd0c },
+ { 0xce0, 0xce3 },
+ { 0xcf1, 0xcf3 },
+ { 0xd00, 0xd0c },
{ 0xd0e, 0xd10 },
{ 0xd12, 0xd3a },
- { 0xd3d, 0xd3d },
+ { 0xd3d, 0xd44 },
+ { 0xd46, 0xd48 },
+ { 0xd4a, 0xd4c },
{ 0xd4e, 0xd4e },
- { 0xd54, 0xd56 },
- { 0xd5f, 0xd61 },
+ { 0xd54, 0xd57 },
+ { 0xd5f, 0xd63 },
{ 0xd7a, 0xd7f },
+ { 0xd81, 0xd83 },
{ 0xd85, 0xd96 },
{ 0xd9a, 0xdb1 },
{ 0xdb3, 0xdbb },
{ 0xdbd, 0xdbd },
{ 0xdc0, 0xdc6 },
- { 0xe01, 0xe30 },
- { 0xe32, 0xe33 },
+ { 0xdcf, 0xdd4 },
+ { 0xdd6, 0xdd6 },
+ { 0xdd8, 0xddf },
+ { 0xdf2, 0xdf3 },
+ { 0xe01, 0xe3a },
{ 0xe40, 0xe46 },
+ { 0xe4d, 0xe4d },
{ 0xe81, 0xe82 },
{ 0xe84, 0xe84 },
{ 0xe86, 0xe8a },
{ 0xe8c, 0xea3 },
{ 0xea5, 0xea5 },
- { 0xea7, 0xeb0 },
- { 0xeb2, 0xeb3 },
- { 0xebd, 0xebd },
+ { 0xea7, 0xeb9 },
+ { 0xebb, 0xebd },
{ 0xec0, 0xec4 },
{ 0xec6, 0xec6 },
+ { 0xecd, 0xecd },
{ 0xedc, 0xedf },
{ 0xf00, 0xf00 },
{ 0xf40, 0xf47 },
{ 0xf49, 0xf6c },
- { 0xf88, 0xf8c },
- { 0x1000, 0x102a },
- { 0x103f, 0x103f },
- { 0x1050, 0x1055 },
- { 0x105a, 0x105d },
- { 0x1061, 0x1061 },
- { 0x1065, 0x1066 },
- { 0x106e, 0x1070 },
- { 0x1075, 0x1081 },
- { 0x108e, 0x108e },
+ { 0xf71, 0xf83 },
+ { 0xf88, 0xf97 },
+ { 0xf99, 0xfbc },
+ { 0x1000, 0x1036 },
+ { 0x1038, 0x1038 },
+ { 0x103b, 0x103f },
+ { 0x1050, 0x108f },
+ { 0x109a, 0x109d },
{ 0x10a0, 0x10c5 },
{ 0x10c7, 0x10c7 },
{ 0x10cd, 0x10cd },
@@ -2957,37 +3101,44 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x166f, 0x167f },
{ 0x1681, 0x169a },
{ 0x16a0, 0x16ea },
- { 0x16f1, 0x16f8 },
- { 0x1700, 0x1711 },
- { 0x171f, 0x1731 },
- { 0x1740, 0x1751 },
+ { 0x16ee, 0x16f8 },
+ { 0x1700, 0x1713 },
+ { 0x171f, 0x1733 },
+ { 0x1740, 0x1753 },
{ 0x1760, 0x176c },
{ 0x176e, 0x1770 },
+ { 0x1772, 0x1773 },
{ 0x1780, 0x17b3 },
+ { 0x17b6, 0x17c8 },
{ 0x17d7, 0x17d7 },
{ 0x17dc, 0x17dc },
{ 0x1820, 0x1878 },
- { 0x1880, 0x1884 },
- { 0x1887, 0x18a8 },
- { 0x18aa, 0x18aa },
+ { 0x1880, 0x18aa },
{ 0x18b0, 0x18f5 },
{ 0x1900, 0x191e },
+ { 0x1920, 0x192b },
+ { 0x1930, 0x1938 },
{ 0x1950, 0x196d },
{ 0x1970, 0x1974 },
{ 0x1980, 0x19ab },
{ 0x19b0, 0x19c9 },
- { 0x1a00, 0x1a16 },
- { 0x1a20, 0x1a54 },
+ { 0x1a00, 0x1a1b },
+ { 0x1a20, 0x1a5e },
+ { 0x1a61, 0x1a74 },
{ 0x1aa7, 0x1aa7 },
- { 0x1b05, 0x1b33 },
+ { 0x1abf, 0x1ac0 },
+ { 0x1acc, 0x1ace },
+ { 0x1b00, 0x1b33 },
+ { 0x1b35, 0x1b43 },
{ 0x1b45, 0x1b4c },
- { 0x1b83, 0x1ba0 },
- { 0x1bae, 0x1baf },
+ { 0x1b80, 0x1ba9 },
+ { 0x1bac, 0x1baf },
{ 0x1bba, 0x1be5 },
- { 0x1c00, 0x1c23 },
+ { 0x1be7, 0x1bf1 },
+ { 0x1c00, 0x1c36 },
{ 0x1c4d, 0x1c4f },
{ 0x1c5a, 0x1c7d },
- { 0x1c80, 0x1c88 },
+ { 0x1c80, 0x1c8a },
{ 0x1c90, 0x1cba },
{ 0x1cbd, 0x1cbf },
{ 0x1ce9, 0x1cec },
@@ -2995,6 +3146,7 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x1cf5, 0x1cf6 },
{ 0x1cfa, 0x1cfa },
{ 0x1d00, 0x1dbf },
+ { 0x1dd3, 0x1df4 },
{ 0x1e00, 0x1f15 },
{ 0x1f18, 0x1f1d },
{ 0x1f20, 0x1f45 },
@@ -3030,7 +3182,8 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x213c, 0x213f },
{ 0x2145, 0x2149 },
{ 0x214e, 0x214e },
- { 0x2183, 0x2184 },
+ { 0x2160, 0x2188 },
+ { 0x24b6, 0x24e9 },
{ 0x2c00, 0x2ce4 },
{ 0x2ceb, 0x2cee },
{ 0x2cf2, 0x2cf3 },
@@ -3048,10 +3201,12 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x2dc8, 0x2dce },
{ 0x2dd0, 0x2dd6 },
{ 0x2dd8, 0x2dde },
+ { 0x2de0, 0x2dff },
{ 0x2e2f, 0x2e2f },
- { 0x3005, 0x3006 },
+ { 0x3005, 0x3007 },
+ { 0x3021, 0x3029 },
{ 0x3031, 0x3035 },
- { 0x303b, 0x303c },
+ { 0x3038, 0x303c },
{ 0x3041, 0x3096 },
{ 0x309d, 0x309f },
{ 0x30a1, 0x30fa },
@@ -3067,45 +3222,39 @@ inline constexpr CharRange unicode_letter[] = {
{ 0xa610, 0xa61f },
{ 0xa62a, 0xa62b },
{ 0xa640, 0xa66e },
- { 0xa67f, 0xa69d },
- { 0xa6a0, 0xa6e5 },
+ { 0xa674, 0xa67b },
+ { 0xa67f, 0xa6ef },
{ 0xa717, 0xa71f },
{ 0xa722, 0xa788 },
- { 0xa78b, 0xa7ca },
+ { 0xa78b, 0xa7cd },
{ 0xa7d0, 0xa7d1 },
{ 0xa7d3, 0xa7d3 },
- { 0xa7d5, 0xa7d9 },
- { 0xa7f2, 0xa801 },
- { 0xa803, 0xa805 },
- { 0xa807, 0xa80a },
- { 0xa80c, 0xa822 },
+ { 0xa7d5, 0xa7dc },
+ { 0xa7f2, 0xa805 },
+ { 0xa807, 0xa827 },
{ 0xa840, 0xa873 },
- { 0xa882, 0xa8b3 },
+ { 0xa880, 0xa8c3 },
+ { 0xa8c5, 0xa8c5 },
{ 0xa8f2, 0xa8f7 },
{ 0xa8fb, 0xa8fb },
- { 0xa8fd, 0xa8fe },
- { 0xa90a, 0xa925 },
- { 0xa930, 0xa946 },
+ { 0xa8fd, 0xa8ff },
+ { 0xa90a, 0xa92a },
+ { 0xa930, 0xa952 },
{ 0xa960, 0xa97c },
- { 0xa984, 0xa9b2 },
+ { 0xa980, 0xa9b2 },
+ { 0xa9b4, 0xa9bf },
{ 0xa9cf, 0xa9cf },
- { 0xa9e0, 0xa9e4 },
- { 0xa9e6, 0xa9ef },
+ { 0xa9e0, 0xa9ef },
{ 0xa9fa, 0xa9fe },
- { 0xaa00, 0xaa28 },
- { 0xaa40, 0xaa42 },
- { 0xaa44, 0xaa4b },
+ { 0xaa00, 0xaa36 },
+ { 0xaa40, 0xaa4d },
{ 0xaa60, 0xaa76 },
- { 0xaa7a, 0xaa7a },
- { 0xaa7e, 0xaaaf },
- { 0xaab1, 0xaab1 },
- { 0xaab5, 0xaab6 },
- { 0xaab9, 0xaabd },
+ { 0xaa7a, 0xaabe },
{ 0xaac0, 0xaac0 },
{ 0xaac2, 0xaac2 },
{ 0xaadb, 0xaadd },
- { 0xaae0, 0xaaea },
- { 0xaaf2, 0xaaf4 },
+ { 0xaae0, 0xaaef },
+ { 0xaaf2, 0xaaf5 },
{ 0xab01, 0xab06 },
{ 0xab09, 0xab0e },
{ 0xab11, 0xab16 },
@@ -3113,7 +3262,7 @@ inline constexpr CharRange unicode_letter[] = {
{ 0xab28, 0xab2e },
{ 0xab30, 0xab5a },
{ 0xab5c, 0xab69 },
- { 0xab70, 0xabe2 },
+ { 0xab70, 0xabea },
{ 0xac00, 0xd7a3 },
{ 0xd7b0, 0xd7c6 },
{ 0xd7cb, 0xd7fb },
@@ -3121,8 +3270,7 @@ inline constexpr CharRange unicode_letter[] = {
{ 0xfa70, 0xfad9 },
{ 0xfb00, 0xfb06 },
{ 0xfb13, 0xfb17 },
- { 0xfb1d, 0xfb1d },
- { 0xfb1f, 0xfb28 },
+ { 0xfb1d, 0xfb28 },
{ 0xfb2a, 0xfb36 },
{ 0xfb38, 0xfb3c },
{ 0xfb3e, 0xfb3e },
@@ -3149,15 +3297,16 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x1003f, 0x1004d },
{ 0x10050, 0x1005d },
{ 0x10080, 0x100fa },
+ { 0x10140, 0x10174 },
{ 0x10280, 0x1029c },
{ 0x102a0, 0x102d0 },
{ 0x10300, 0x1031f },
- { 0x1032d, 0x10340 },
- { 0x10342, 0x10349 },
- { 0x10350, 0x10375 },
+ { 0x1032d, 0x1034a },
+ { 0x10350, 0x1037a },
{ 0x10380, 0x1039d },
{ 0x103a0, 0x103c3 },
{ 0x103c8, 0x103cf },
+ { 0x103d1, 0x103d5 },
{ 0x10400, 0x1049d },
{ 0x104b0, 0x104d3 },
{ 0x104d8, 0x104fb },
@@ -3171,6 +3320,7 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x105a3, 0x105b1 },
{ 0x105b3, 0x105b9 },
{ 0x105bb, 0x105bc },
+ { 0x105c0, 0x105f3 },
{ 0x10600, 0x10736 },
{ 0x10740, 0x10755 },
{ 0x10760, 0x10767 },
@@ -3191,8 +3341,9 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x10920, 0x10939 },
{ 0x10980, 0x109b7 },
{ 0x109be, 0x109bf },
- { 0x10a00, 0x10a00 },
- { 0x10a10, 0x10a13 },
+ { 0x10a00, 0x10a03 },
+ { 0x10a05, 0x10a06 },
+ { 0x10a0c, 0x10a13 },
{ 0x10a15, 0x10a17 },
{ 0x10a19, 0x10a35 },
{ 0x10a60, 0x10a7c },
@@ -3206,104 +3357,143 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x10c00, 0x10c48 },
{ 0x10c80, 0x10cb2 },
{ 0x10cc0, 0x10cf2 },
- { 0x10d00, 0x10d23 },
+ { 0x10d00, 0x10d27 },
+ { 0x10d4a, 0x10d65 },
+ { 0x10d69, 0x10d69 },
+ { 0x10d6f, 0x10d85 },
{ 0x10e80, 0x10ea9 },
+ { 0x10eab, 0x10eac },
{ 0x10eb0, 0x10eb1 },
+ { 0x10ec2, 0x10ec4 },
+ { 0x10efc, 0x10efc },
{ 0x10f00, 0x10f1c },
{ 0x10f27, 0x10f27 },
{ 0x10f30, 0x10f45 },
{ 0x10f70, 0x10f81 },
{ 0x10fb0, 0x10fc4 },
{ 0x10fe0, 0x10ff6 },
- { 0x11003, 0x11037 },
- { 0x11071, 0x11072 },
- { 0x11075, 0x11075 },
- { 0x11083, 0x110af },
+ { 0x11000, 0x11045 },
+ { 0x11071, 0x11075 },
+ { 0x11080, 0x110b8 },
+ { 0x110c2, 0x110c2 },
{ 0x110d0, 0x110e8 },
- { 0x11103, 0x11126 },
- { 0x11144, 0x11144 },
- { 0x11147, 0x11147 },
+ { 0x11100, 0x11132 },
+ { 0x11144, 0x11147 },
{ 0x11150, 0x11172 },
{ 0x11176, 0x11176 },
- { 0x11183, 0x111b2 },
+ { 0x11180, 0x111bf },
{ 0x111c1, 0x111c4 },
+ { 0x111ce, 0x111cf },
{ 0x111da, 0x111da },
{ 0x111dc, 0x111dc },
{ 0x11200, 0x11211 },
- { 0x11213, 0x1122b },
- { 0x1123f, 0x11240 },
+ { 0x11213, 0x11234 },
+ { 0x11237, 0x11237 },
+ { 0x1123e, 0x11241 },
{ 0x11280, 0x11286 },
{ 0x11288, 0x11288 },
{ 0x1128a, 0x1128d },
{ 0x1128f, 0x1129d },
{ 0x1129f, 0x112a8 },
- { 0x112b0, 0x112de },
+ { 0x112b0, 0x112e8 },
+ { 0x11300, 0x11303 },
{ 0x11305, 0x1130c },
{ 0x1130f, 0x11310 },
{ 0x11313, 0x11328 },
{ 0x1132a, 0x11330 },
{ 0x11332, 0x11333 },
{ 0x11335, 0x11339 },
- { 0x1133d, 0x1133d },
+ { 0x1133d, 0x11344 },
+ { 0x11347, 0x11348 },
+ { 0x1134b, 0x1134c },
{ 0x11350, 0x11350 },
- { 0x1135d, 0x11361 },
- { 0x11400, 0x11434 },
+ { 0x11357, 0x11357 },
+ { 0x1135d, 0x11363 },
+ { 0x11380, 0x11389 },
+ { 0x1138b, 0x1138b },
+ { 0x1138e, 0x1138e },
+ { 0x11390, 0x113b5 },
+ { 0x113b7, 0x113c0 },
+ { 0x113c2, 0x113c2 },
+ { 0x113c5, 0x113c5 },
+ { 0x113c7, 0x113ca },
+ { 0x113cc, 0x113cd },
+ { 0x113d1, 0x113d1 },
+ { 0x113d3, 0x113d3 },
+ { 0x11400, 0x11441 },
+ { 0x11443, 0x11445 },
{ 0x11447, 0x1144a },
{ 0x1145f, 0x11461 },
- { 0x11480, 0x114af },
+ { 0x11480, 0x114c1 },
{ 0x114c4, 0x114c5 },
{ 0x114c7, 0x114c7 },
- { 0x11580, 0x115ae },
- { 0x115d8, 0x115db },
- { 0x11600, 0x1162f },
+ { 0x11580, 0x115b5 },
+ { 0x115b8, 0x115be },
+ { 0x115d8, 0x115dd },
+ { 0x11600, 0x1163e },
+ { 0x11640, 0x11640 },
{ 0x11644, 0x11644 },
- { 0x11680, 0x116aa },
+ { 0x11680, 0x116b5 },
{ 0x116b8, 0x116b8 },
{ 0x11700, 0x1171a },
+ { 0x1171d, 0x1172a },
{ 0x11740, 0x11746 },
- { 0x11800, 0x1182b },
+ { 0x11800, 0x11838 },
{ 0x118a0, 0x118df },
{ 0x118ff, 0x11906 },
{ 0x11909, 0x11909 },
{ 0x1190c, 0x11913 },
{ 0x11915, 0x11916 },
- { 0x11918, 0x1192f },
- { 0x1193f, 0x1193f },
- { 0x11941, 0x11941 },
+ { 0x11918, 0x11935 },
+ { 0x11937, 0x11938 },
+ { 0x1193b, 0x1193c },
+ { 0x1193f, 0x11942 },
{ 0x119a0, 0x119a7 },
- { 0x119aa, 0x119d0 },
+ { 0x119aa, 0x119d7 },
+ { 0x119da, 0x119df },
{ 0x119e1, 0x119e1 },
- { 0x119e3, 0x119e3 },
- { 0x11a00, 0x11a00 },
- { 0x11a0b, 0x11a32 },
- { 0x11a3a, 0x11a3a },
- { 0x11a50, 0x11a50 },
- { 0x11a5c, 0x11a89 },
+ { 0x119e3, 0x119e4 },
+ { 0x11a00, 0x11a32 },
+ { 0x11a35, 0x11a3e },
+ { 0x11a50, 0x11a97 },
{ 0x11a9d, 0x11a9d },
{ 0x11ab0, 0x11af8 },
+ { 0x11bc0, 0x11be0 },
{ 0x11c00, 0x11c08 },
- { 0x11c0a, 0x11c2e },
+ { 0x11c0a, 0x11c36 },
+ { 0x11c38, 0x11c3e },
{ 0x11c40, 0x11c40 },
{ 0x11c72, 0x11c8f },
+ { 0x11c92, 0x11ca7 },
+ { 0x11ca9, 0x11cb6 },
{ 0x11d00, 0x11d06 },
{ 0x11d08, 0x11d09 },
- { 0x11d0b, 0x11d30 },
- { 0x11d46, 0x11d46 },
+ { 0x11d0b, 0x11d36 },
+ { 0x11d3a, 0x11d3a },
+ { 0x11d3c, 0x11d3d },
+ { 0x11d3f, 0x11d41 },
+ { 0x11d43, 0x11d43 },
+ { 0x11d46, 0x11d47 },
{ 0x11d60, 0x11d65 },
{ 0x11d67, 0x11d68 },
- { 0x11d6a, 0x11d89 },
+ { 0x11d6a, 0x11d8e },
+ { 0x11d90, 0x11d91 },
+ { 0x11d93, 0x11d96 },
{ 0x11d98, 0x11d98 },
- { 0x11ee0, 0x11ef2 },
- { 0x11f02, 0x11f02 },
- { 0x11f04, 0x11f10 },
- { 0x11f12, 0x11f33 },
+ { 0x11ee0, 0x11ef6 },
+ { 0x11f00, 0x11f10 },
+ { 0x11f12, 0x11f3a },
+ { 0x11f3e, 0x11f40 },
{ 0x11fb0, 0x11fb0 },
{ 0x12000, 0x12399 },
+ { 0x12400, 0x1246e },
{ 0x12480, 0x12543 },
{ 0x12f90, 0x12ff0 },
{ 0x13000, 0x1342f },
{ 0x13441, 0x13446 },
+ { 0x13460, 0x143fa },
{ 0x14400, 0x14646 },
+ { 0x16100, 0x1612e },
{ 0x16800, 0x16a38 },
{ 0x16a40, 0x16a5e },
{ 0x16a70, 0x16abe },
@@ -3312,15 +3502,17 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x16b40, 0x16b43 },
{ 0x16b63, 0x16b77 },
{ 0x16b7d, 0x16b8f },
+ { 0x16d40, 0x16d6c },
{ 0x16e40, 0x16e7f },
{ 0x16f00, 0x16f4a },
- { 0x16f50, 0x16f50 },
- { 0x16f93, 0x16f9f },
+ { 0x16f4f, 0x16f87 },
+ { 0x16f8f, 0x16f9f },
{ 0x16fe0, 0x16fe1 },
{ 0x16fe3, 0x16fe3 },
+ { 0x16ff0, 0x16ff1 },
{ 0x17000, 0x187f7 },
{ 0x18800, 0x18cd5 },
- { 0x18d00, 0x18d08 },
+ { 0x18cff, 0x18d08 },
{ 0x1aff0, 0x1aff3 },
{ 0x1aff5, 0x1affb },
{ 0x1affd, 0x1affe },
@@ -3334,6 +3526,7 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x1bc70, 0x1bc7c },
{ 0x1bc80, 0x1bc88 },
{ 0x1bc90, 0x1bc99 },
+ { 0x1bc9e, 0x1bc9e },
{ 0x1d400, 0x1d454 },
{ 0x1d456, 0x1d49c },
{ 0x1d49e, 0x1d49f },
@@ -3366,19 +3559,28 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x1d7c4, 0x1d7cb },
{ 0x1df00, 0x1df1e },
{ 0x1df25, 0x1df2a },
+ { 0x1e000, 0x1e006 },
+ { 0x1e008, 0x1e018 },
+ { 0x1e01b, 0x1e021 },
+ { 0x1e023, 0x1e024 },
+ { 0x1e026, 0x1e02a },
{ 0x1e030, 0x1e06d },
+ { 0x1e08f, 0x1e08f },
{ 0x1e100, 0x1e12c },
{ 0x1e137, 0x1e13d },
{ 0x1e14e, 0x1e14e },
{ 0x1e290, 0x1e2ad },
{ 0x1e2c0, 0x1e2eb },
{ 0x1e4d0, 0x1e4eb },
+ { 0x1e5d0, 0x1e5ed },
+ { 0x1e5f0, 0x1e5f0 },
{ 0x1e7e0, 0x1e7e6 },
{ 0x1e7e8, 0x1e7eb },
{ 0x1e7ed, 0x1e7ee },
{ 0x1e7f0, 0x1e7fe },
{ 0x1e800, 0x1e8c4 },
{ 0x1e900, 0x1e943 },
+ { 0x1e947, 0x1e947 },
{ 0x1e94b, 0x1e94b },
{ 0x1ee00, 0x1ee03 },
{ 0x1ee05, 0x1ee1f },
@@ -3413,6 +3615,9 @@ inline constexpr CharRange unicode_letter[] = {
{ 0x1eea1, 0x1eea3 },
{ 0x1eea5, 0x1eea9 },
{ 0x1eeab, 0x1eebb },
+ { 0x1f130, 0x1f149 },
+ { 0x1f150, 0x1f169 },
+ { 0x1f170, 0x1f189 },
{ 0x20000, 0x2a6df },
{ 0x2a700, 0x2b739 },
{ 0x2b740, 0x2b81d },
diff --git a/core/string/char_utils.h b/core/string/char_utils.h
index 4acb81253f..62ab4e9584 100644
--- a/core/string/char_utils.h
+++ b/core/string/char_utils.h
@@ -38,97 +38,97 @@
#define BSEARCH_CHAR_RANGE(m_array) \
int low = 0; \
int high = sizeof(m_array) / sizeof(m_array[0]) - 1; \
- int middle; \
+ int middle = (low + high) / 2; \
\
while (low <= high) { \
- middle = (low + high) / 2; \
- \
- if (c < m_array[middle].start) { \
+ if (p_char < m_array[middle].start) { \
high = middle - 1; \
- } else if (c > m_array[middle].end) { \
+ } else if (p_char > m_array[middle].end) { \
low = middle + 1; \
} else { \
return true; \
} \
+ \
+ middle = (low + high) / 2; \
} \
\
return false
-static _FORCE_INLINE_ bool is_unicode_identifier_start(char32_t c) {
+constexpr bool is_unicode_identifier_start(char32_t p_char) {
BSEARCH_CHAR_RANGE(xid_start);
}
-static _FORCE_INLINE_ bool is_unicode_identifier_continue(char32_t c) {
+constexpr bool is_unicode_identifier_continue(char32_t p_char) {
BSEARCH_CHAR_RANGE(xid_continue);
}
-static _FORCE_INLINE_ bool is_unicode_upper_case(char32_t c) {
+constexpr bool is_unicode_upper_case(char32_t p_char) {
BSEARCH_CHAR_RANGE(uppercase_letter);
}
-static _FORCE_INLINE_ bool is_unicode_lower_case(char32_t c) {
+constexpr bool is_unicode_lower_case(char32_t p_char) {
BSEARCH_CHAR_RANGE(lowercase_letter);
}
-static _FORCE_INLINE_ bool is_unicode_letter(char32_t c) {
+constexpr bool is_unicode_letter(char32_t p_char) {
BSEARCH_CHAR_RANGE(unicode_letter);
}
#undef BSEARCH_CHAR_RANGE
-static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) {
- return (c >= 'A' && c <= 'Z');
+constexpr bool is_ascii_upper_case(char32_t p_char) {
+ return (p_char >= 'A' && p_char <= 'Z');
}
-static _FORCE_INLINE_ bool is_ascii_lower_case(char32_t c) {
- return (c >= 'a' && c <= 'z');
+constexpr bool is_ascii_lower_case(char32_t p_char) {
+ return (p_char >= 'a' && p_char <= 'z');
}
-static _FORCE_INLINE_ bool is_digit(char32_t c) {
- return (c >= '0' && c <= '9');
+constexpr bool is_digit(char32_t p_char) {
+ return (p_char >= '0' && p_char <= '9');
}
-static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
- return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
+constexpr bool is_hex_digit(char32_t p_char) {
+ return (is_digit(p_char) || (p_char >= 'a' && p_char <= 'f') || (p_char >= 'A' && p_char <= 'F'));
}
-static _FORCE_INLINE_ bool is_binary_digit(char32_t c) {
- return (c == '0' || c == '1');
+constexpr bool is_binary_digit(char32_t p_char) {
+ return (p_char == '0' || p_char == '1');
}
-static _FORCE_INLINE_ bool is_ascii_alphabet_char(char32_t c) {
- return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+constexpr bool is_ascii_alphabet_char(char32_t p_char) {
+ return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z');
}
-static _FORCE_INLINE_ bool is_ascii_alphanumeric_char(char32_t c) {
- return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
+constexpr bool is_ascii_alphanumeric_char(char32_t p_char) {
+ return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z') || (p_char >= '0' && p_char <= '9');
}
-static _FORCE_INLINE_ bool is_ascii_identifier_char(char32_t c) {
- return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
+constexpr bool is_ascii_identifier_char(char32_t p_char) {
+ return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z') || (p_char >= '0' && p_char <= '9') || p_char == '_';
}
-static _FORCE_INLINE_ bool is_symbol(char32_t c) {
- return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
+constexpr bool is_symbol(char32_t p_char) {
+ return p_char != '_' && ((p_char >= '!' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '`') || (p_char >= '{' && p_char <= '~') || p_char == '\t' || p_char == ' ');
}
-static _FORCE_INLINE_ bool is_control(char32_t p_char) {
+constexpr bool is_control(char32_t p_char) {
return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f);
}
-static _FORCE_INLINE_ bool is_whitespace(char32_t p_char) {
+constexpr bool is_whitespace(char32_t p_char) {
return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085);
}
-static _FORCE_INLINE_ bool is_linebreak(char32_t p_char) {
+constexpr bool is_linebreak(char32_t p_char) {
return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029);
}
-static _FORCE_INLINE_ bool is_punct(char32_t p_char) {
+constexpr bool is_punct(char32_t p_char) {
return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f);
}
-static _FORCE_INLINE_ bool is_underscore(char32_t p_char) {
+constexpr bool is_underscore(char32_t p_char) {
return (p_char == '_');
}
diff --git a/core/string/fuzzy_search.cpp b/core/string/fuzzy_search.cpp
new file mode 100644
index 0000000000..2fd0d3995e
--- /dev/null
+++ b/core/string/fuzzy_search.cpp
@@ -0,0 +1,349 @@
+/**************************************************************************/
+/* fuzzy_search.cpp */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#include "fuzzy_search.h"
+
+constexpr float cull_factor = 0.1f;
+constexpr float cull_cutoff = 30.0f;
+const String boundary_chars = "/\\-_.";
+
+static bool _is_valid_interval(const Vector2i &p_interval) {
+ // Empty intervals are represented as (-1, -1).
+ return p_interval.x >= 0 && p_interval.y >= p_interval.x;
+}
+
+static Vector2i _extend_interval(const Vector2i &p_a, const Vector2i &p_b) {
+ if (!_is_valid_interval(p_a)) {
+ return p_b;
+ }
+ if (!_is_valid_interval(p_b)) {
+ return p_a;
+ }
+ return Vector2i(MIN(p_a.x, p_b.x), MAX(p_a.y, p_b.y));
+}
+
+static bool _is_word_boundary(const String &p_str, int p_index) {
+ if (p_index == -1 || p_index == p_str.size()) {
+ return true;
+ }
+ return boundary_chars.find_char(p_str[p_index]) != -1;
+}
+
+bool FuzzySearchToken::try_exact_match(FuzzyTokenMatch &p_match, const String &p_target, int p_offset) const {
+ p_match.token_idx = idx;
+ p_match.token_length = string.length();
+ int match_idx = p_target.find(string, p_offset);
+ if (match_idx == -1) {
+ return false;
+ }
+ p_match.add_substring(match_idx, string.length());
+ return true;
+}
+
+bool FuzzySearchToken::try_fuzzy_match(FuzzyTokenMatch &p_match, const String &p_target, int p_offset, int p_miss_budget) const {
+ p_match.token_idx = idx;
+ p_match.token_length = string.length();
+ int run_start = -1;
+ int run_len = 0;
+
+ // Search for the subsequence p_token in p_target starting from p_offset, recording each substring for
+ // later scoring and display.
+ for (int i = 0; i < string.length(); i++) {
+ int new_offset = p_target.find_char(string[i], p_offset);
+ if (new_offset < 0) {
+ p_miss_budget--;
+ if (p_miss_budget < 0) {
+ return false;
+ }
+ } else {
+ if (run_start == -1 || p_offset != new_offset) {
+ if (run_start != -1) {
+ p_match.add_substring(run_start, run_len);
+ }
+ run_start = new_offset;
+ run_len = 1;
+ } else {
+ run_len += 1;
+ }
+ p_offset = new_offset + 1;
+ }
+ }
+
+ if (run_start != -1) {
+ p_match.add_substring(run_start, run_len);
+ }
+
+ return true;
+}
+
+void FuzzyTokenMatch::add_substring(int p_substring_start, int p_substring_length) {
+ substrings.append(Vector2i(p_substring_start, p_substring_length));
+ matched_length += p_substring_length;
+ Vector2i substring_interval = { p_substring_start, p_substring_start + p_substring_length - 1 };
+ interval = _extend_interval(interval, substring_interval);
+}
+
+bool FuzzyTokenMatch::intersects(const Vector2i &p_other_interval) const {
+ if (!_is_valid_interval(interval) || !_is_valid_interval(p_other_interval)) {
+ return false;
+ }
+ return interval.y >= p_other_interval.x && interval.x <= p_other_interval.y;
+}
+
+bool FuzzySearchResult::can_add_token_match(const FuzzyTokenMatch &p_match) const {
+ if (p_match.get_miss_count() > miss_budget) {
+ return false;
+ }
+
+ if (p_match.intersects(match_interval)) {
+ if (token_matches.size() == 1) {
+ return false;
+ }
+ for (const FuzzyTokenMatch &existing_match : token_matches) {
+ if (existing_match.intersects(p_match.interval)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool FuzzyTokenMatch::is_case_insensitive(const String &p_original, const String &p_adjusted) const {
+ for (const Vector2i &substr : substrings) {
+ const int end = substr.x + substr.y;
+ for (int i = substr.x; i < end; i++) {
+ if (p_original[i] != p_adjusted[i]) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void FuzzySearchResult::score_token_match(FuzzyTokenMatch &p_match, bool p_case_insensitive) const {
+ // This can always be tweaked more. The intuition is that exact matches should almost always
+ // be prioritized over broken up matches, and other criteria more or less act as tie breakers.
+
+ p_match.score = -20 * p_match.get_miss_count() - (p_case_insensitive ? 3 : 0);
+
+ for (const Vector2i &substring : p_match.substrings) {
+ // Score longer substrings higher than short substrings.
+ int substring_score = substring.y * substring.y;
+ // Score matches deeper in path higher than shallower matches
+ if (substring.x > dir_index) {
+ substring_score *= 2;
+ }
+ // Score matches on a word boundary higher than matches within a word
+ if (_is_word_boundary(target, substring.x - 1) || _is_word_boundary(target, substring.x + substring.y)) {
+ substring_score += 4;
+ }
+ // Score exact query matches higher than non-compact subsequence matches
+ if (substring.y == p_match.token_length) {
+ substring_score += 100;
+ }
+ p_match.score += substring_score;
+ }
+}
+
+void FuzzySearchResult::maybe_apply_score_bonus() {
+ // This adds a small bonus to results which match tokens in the same order they appear in the query.
+ int *token_range_starts = (int *)alloca(sizeof(int) * token_matches.size());
+
+ for (const FuzzyTokenMatch &match : token_matches) {
+ token_range_starts[match.token_idx] = match.interval.x;
+ }
+
+ int last = token_range_starts[0];
+ for (int i = 1; i < token_matches.size(); i++) {
+ if (last > token_range_starts[i]) {
+ return;
+ }
+ last = token_range_starts[i];
+ }
+
+ score += 1;
+}
+
+void FuzzySearchResult::add_token_match(const FuzzyTokenMatch &p_match) {
+ score += p_match.score;
+ match_interval = _extend_interval(match_interval, p_match.interval);
+ miss_budget -= p_match.get_miss_count();
+ token_matches.append(p_match);
+}
+
+void remove_low_scores(Vector<FuzzySearchResult> &p_results, float p_cull_score) {
+ // Removes all results with score < p_cull_score in-place.
+ int i = 0;
+ int j = p_results.size() - 1;
+ FuzzySearchResult *results = p_results.ptrw();
+
+ while (true) {
+ // Advances i to an element to remove and j to an element to keep.
+ while (j >= i && results[j].score < p_cull_score) {
+ j--;
+ }
+ while (i < j && results[i].score >= p_cull_score) {
+ i++;
+ }
+ if (i >= j) {
+ break;
+ }
+ results[i++] = results[j--];
+ }
+
+ p_results.resize(j + 1);
+}
+
+void FuzzySearch::sort_and_filter(Vector<FuzzySearchResult> &p_results) const {
+ if (p_results.is_empty()) {
+ return;
+ }
+
+ float avg_score = 0;
+ float max_score = 0;
+
+ for (const FuzzySearchResult &result : p_results) {
+ avg_score += result.score;
+ max_score = MAX(max_score, result.score);
+ }
+
+ // TODO: Tune scoring and culling here to display fewer subsequence soup matches when good matches
+ // are available.
+ avg_score /= p_results.size();
+ float cull_score = MIN(cull_cutoff, Math::lerp(avg_score, max_score, cull_factor));
+ remove_low_scores(p_results, cull_score);
+
+ struct FuzzySearchResultComparator {
+ bool operator()(const FuzzySearchResult &p_lhs, const FuzzySearchResult &p_rhs) const {
+ // Sort on (score, length, alphanumeric) to ensure consistent ordering.
+ if (p_lhs.score == p_rhs.score) {
+ if (p_lhs.target.length() == p_rhs.target.length()) {
+ return p_lhs.target < p_rhs.target;
+ }
+ return p_lhs.target.length() < p_rhs.target.length();
+ }
+ return p_lhs.score > p_rhs.score;
+ }
+ };
+
+ SortArray<FuzzySearchResult, FuzzySearchResultComparator> sorter;
+
+ if (p_results.size() > max_results) {
+ sorter.partial_sort(0, p_results.size(), max_results, p_results.ptrw());
+ p_results.resize(max_results);
+ } else {
+ sorter.sort(p_results.ptrw(), p_results.size());
+ }
+}
+
+void FuzzySearch::set_query(const String &p_query) {
+ tokens.clear();
+ for (const String &string : p_query.split(" ", false)) {
+ tokens.append({ static_cast<int>(tokens.size()), string });
+ }
+
+ case_sensitive = !p_query.is_lowercase();
+
+ struct TokenComparator {
+ bool operator()(const FuzzySearchToken &A, const FuzzySearchToken &B) const {
+ if (A.string.length() == B.string.length()) {
+ return A.idx < B.idx;
+ }
+ return A.string.length() > B.string.length();
+ }
+ };
+
+ // Prioritize matching longer tokens before shorter ones since match overlaps are not accepted.
+ tokens.sort_custom<TokenComparator>();
+}
+
+bool FuzzySearch::search(const String &p_target, FuzzySearchResult &p_result) const {
+ p_result.target = p_target;
+ p_result.dir_index = p_target.rfind_char('/');
+ p_result.miss_budget = max_misses;
+
+ String adjusted_target = case_sensitive ? p_target : p_target.to_lower();
+
+ // For each token, eagerly generate subsequences starting from index 0 and keep the best scoring one
+ // which does not conflict with prior token matches. This is not ensured to find the highest scoring
+ // combination of matches, or necessarily the highest scoring single subsequence, as it only considers
+ // eager subsequences for a given index, and likewise eagerly finds matches for each token in sequence.
+ for (const FuzzySearchToken &token : tokens) {
+ FuzzyTokenMatch best_match;
+ int offset = start_offset;
+
+ while (true) {
+ FuzzyTokenMatch match;
+ if (allow_subsequences) {
+ if (!token.try_fuzzy_match(match, adjusted_target, offset, p_result.miss_budget)) {
+ break;
+ }
+ } else {
+ if (!token.try_exact_match(match, adjusted_target, offset)) {
+ break;
+ }
+ }
+ if (p_result.can_add_token_match(match)) {
+ p_result.score_token_match(match, match.is_case_insensitive(p_target, adjusted_target));
+ if (best_match.token_idx == -1 || best_match.score < match.score) {
+ best_match = match;
+ }
+ }
+ if (_is_valid_interval(match.interval)) {
+ offset = match.interval.x + 1;
+ } else {
+ break;
+ }
+ }
+
+ if (best_match.token_idx == -1) {
+ return false;
+ }
+
+ p_result.add_token_match(best_match);
+ }
+
+ p_result.maybe_apply_score_bonus();
+ return true;
+}
+
+void FuzzySearch::search_all(const PackedStringArray &p_targets, Vector<FuzzySearchResult> &p_results) const {
+ p_results.clear();
+
+ for (const String &target : p_targets) {
+ FuzzySearchResult result;
+ if (search(target, result)) {
+ p_results.append(result);
+ }
+ }
+
+ sort_and_filter(p_results);
+}
diff --git a/core/string/fuzzy_search.h b/core/string/fuzzy_search.h
new file mode 100644
index 0000000000..5d8ed813c7
--- /dev/null
+++ b/core/string/fuzzy_search.h
@@ -0,0 +1,101 @@
+/**************************************************************************/
+/* fuzzy_search.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#ifndef FUZZY_SEARCH_H
+#define FUZZY_SEARCH_H
+
+#include "core/variant/variant.h"
+
+class FuzzyTokenMatch;
+
+struct FuzzySearchToken {
+ int idx = -1;
+ String string;
+
+ bool try_exact_match(FuzzyTokenMatch &p_match, const String &p_target, int p_offset) const;
+ bool try_fuzzy_match(FuzzyTokenMatch &p_match, const String &p_target, int p_offset, int p_miss_budget) const;
+};
+
+class FuzzyTokenMatch {
+ friend struct FuzzySearchToken;
+ friend class FuzzySearchResult;
+ friend class FuzzySearch;
+
+ int matched_length = 0;
+ int token_length = 0;
+ int token_idx = -1;
+ Vector2i interval = Vector2i(-1, -1); // x and y are both inclusive indices.
+
+ void add_substring(int p_substring_start, int p_substring_length);
+ bool intersects(const Vector2i &p_other_interval) const;
+ bool is_case_insensitive(const String &p_original, const String &p_adjusted) const;
+ int get_miss_count() const { return token_length - matched_length; }
+
+public:
+ int score = 0;
+ Vector<Vector2i> substrings; // x is start index, y is length.
+};
+
+class FuzzySearchResult {
+ friend class FuzzySearch;
+
+ int miss_budget = 0;
+ Vector2i match_interval = Vector2i(-1, -1);
+
+ bool can_add_token_match(const FuzzyTokenMatch &p_match) const;
+ void score_token_match(FuzzyTokenMatch &p_match, bool p_case_insensitive) const;
+ void add_token_match(const FuzzyTokenMatch &p_match);
+ void maybe_apply_score_bonus();
+
+public:
+ String target;
+ int score = 0;
+ int dir_index = -1;
+ Vector<FuzzyTokenMatch> token_matches;
+};
+
+class FuzzySearch {
+ Vector<FuzzySearchToken> tokens;
+
+ void sort_and_filter(Vector<FuzzySearchResult> &p_results) const;
+
+public:
+ int start_offset = 0;
+ bool case_sensitive = false;
+ int max_results = 100;
+ int max_misses = 2;
+ bool allow_subsequences = true;
+
+ void set_query(const String &p_query);
+ bool search(const String &p_target, FuzzySearchResult &p_result) const;
+ void search_all(const PackedStringArray &p_targets, Vector<FuzzySearchResult> &p_results) const;
+};
+
+#endif // FUZZY_SEARCH_H
diff --git a/core/string/node_path.cpp b/core/string/node_path.cpp
index fdc72bc8dc..3faf3bb0c5 100644
--- a/core/string/node_path.cpp
+++ b/core/string/node_path.cpp
@@ -420,7 +420,7 @@ NodePath::NodePath(const String &p_path) {
continue; // Allow end-of-path :
}
- ERR_FAIL_MSG("Invalid NodePath '" + p_path + "'.");
+ ERR_FAIL_MSG(vformat("Invalid NodePath '%s'.", p_path));
}
subpath.push_back(str);
diff --git a/core/string/translation_domain.cpp b/core/string/translation_domain.cpp
index 53b9ce8379..1ff8dcd752 100644
--- a/core/string/translation_domain.cpp
+++ b/core/string/translation_domain.cpp
@@ -123,7 +123,7 @@ String TranslationDomain::_double_vowels(const String &p_message) const {
}
}
return res;
-};
+}
String TranslationDomain::_replace_with_accented_string(const String &p_message) const {
String res;
@@ -247,7 +247,10 @@ PackedStringArray TranslationDomain::get_loaded_locales() const {
PackedStringArray locales;
for (const Ref<Translation> &E : translations) {
ERR_CONTINUE(E.is_null());
- locales.push_back(E->get_locale());
+ const String &locale = E->get_locale();
+ if (!locales.has(locale)) {
+ locales.push_back(locale);
+ }
}
return locales;
}
diff --git a/core/string/translation_po.cpp b/core/string/translation_po.cpp
index 8e275505b0..da79e472e7 100644
--- a/core/string/translation_po.cpp
+++ b/core/string/translation_po.cpp
@@ -246,7 +246,7 @@ void TranslationPO::add_message(const StringName &p_src_text, const StringName &
HashMap<StringName, Vector<StringName>> &map_id_str = translation_map[p_context];
if (map_id_str.has(p_src_text)) {
- WARN_PRINT("Double translations for \"" + String(p_src_text) + "\" under the same context \"" + String(p_context) + "\" for locale \"" + get_locale() + "\".\nThere should only be one unique translation for a given string under the same context.");
+ WARN_PRINT(vformat("Double translations for \"%s\" under the same context \"%s\" for locale \"%s\".\nThere should only be one unique translation for a given string under the same context.", String(p_src_text), String(p_context), get_locale()));
map_id_str[p_src_text].set(0, p_xlated_text);
} else {
map_id_str[p_src_text].push_back(p_xlated_text);
@@ -254,12 +254,12 @@ void TranslationPO::add_message(const StringName &p_src_text, const StringName &
}
void TranslationPO::add_plural_message(const StringName &p_src_text, const Vector<String> &p_plural_xlated_texts, const StringName &p_context) {
- ERR_FAIL_COND_MSG(p_plural_xlated_texts.size() != plural_forms, "Trying to add plural texts that don't match the required number of plural forms for locale \"" + get_locale() + "\"");
+ ERR_FAIL_COND_MSG(p_plural_xlated_texts.size() != plural_forms, vformat("Trying to add plural texts that don't match the required number of plural forms for locale \"%s\".", get_locale()));
HashMap<StringName, Vector<StringName>> &map_id_str = translation_map[p_context];
if (map_id_str.has(p_src_text)) {
- WARN_PRINT("Double translations for \"" + p_src_text + "\" under the same context \"" + p_context + "\" for locale " + get_locale() + ".\nThere should only be one unique translation for a given string under the same context.");
+ WARN_PRINT(vformat("Double translations for \"%s\" under the same context \"%s\" for locale %s.\nThere should only be one unique translation for a given string under the same context.", p_src_text, p_context, get_locale()));
map_id_str[p_src_text].clear();
}
@@ -280,7 +280,7 @@ StringName TranslationPO::get_message(const StringName &p_src_text, const String
if (!translation_map.has(p_context) || !translation_map[p_context].has(p_src_text)) {
return StringName();
}
- ERR_FAIL_COND_V_MSG(translation_map[p_context][p_src_text].is_empty(), StringName(), "Source text \"" + String(p_src_text) + "\" is registered but doesn't have a translation. Please report this bug.");
+ ERR_FAIL_COND_V_MSG(translation_map[p_context][p_src_text].is_empty(), StringName(), vformat("Source text \"%s\" is registered but doesn't have a translation. Please report this bug.", String(p_src_text)));
return translation_map[p_context][p_src_text][0];
}
@@ -296,7 +296,7 @@ StringName TranslationPO::get_plural_message(const StringName &p_src_text, const
if (!translation_map.has(p_context) || !translation_map[p_context].has(p_src_text)) {
return StringName();
}
- ERR_FAIL_COND_V_MSG(translation_map[p_context][p_src_text].is_empty(), StringName(), "Source text \"" + String(p_src_text) + "\" is registered but doesn't have a translation. Please report this bug.");
+ ERR_FAIL_COND_V_MSG(translation_map[p_context][p_src_text].is_empty(), StringName(), vformat("Source text \"%s\" is registered but doesn't have a translation. Please report this bug.", String(p_src_text)));
int plural_index = _get_plural_index(p_n);
ERR_FAIL_COND_V_MSG(plural_index < 0 || translation_map[p_context][p_src_text].size() < plural_index + 1, StringName(), "Plural index returned or number of plural translations is not valid. Please report this bug.");
diff --git a/core/string/translation_server.cpp b/core/string/translation_server.cpp
index 92b473b61f..4f09360ba8 100644
--- a/core/string/translation_server.cpp
+++ b/core/string/translation_server.cpp
@@ -118,36 +118,45 @@ void TranslationServer::init_locale_info() {
}
}
-String TranslationServer::standardize_locale(const String &p_locale) const {
- return _standardize_locale(p_locale, false);
+TranslationServer::Locale::operator String() const {
+ String out = language;
+ if (!script.is_empty()) {
+ out = out + "_" + script;
+ }
+ if (!country.is_empty()) {
+ out = out + "_" + country;
+ }
+ if (!variant.is_empty()) {
+ out = out + "_" + variant;
+ }
+ return out;
}
-String TranslationServer::_standardize_locale(const String &p_locale, bool p_add_defaults) const {
+TranslationServer::Locale::Locale(const TranslationServer &p_server, const String &p_locale, bool p_add_defaults) {
// Replaces '-' with '_' for macOS style locales.
String univ_locale = p_locale.replace("-", "_");
// Extract locale elements.
- String lang_name, script_name, country_name, variant_name;
Vector<String> locale_elements = univ_locale.get_slice("@", 0).split("_");
- lang_name = locale_elements[0];
+ language = locale_elements[0];
if (locale_elements.size() >= 2) {
if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
- script_name = locale_elements[1];
+ script = locale_elements[1];
}
if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
- country_name = locale_elements[1];
+ country = locale_elements[1];
}
}
if (locale_elements.size() >= 3) {
if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
- country_name = locale_elements[2];
- } else if (variant_map.has(locale_elements[2].to_lower()) && variant_map[locale_elements[2].to_lower()] == lang_name) {
- variant_name = locale_elements[2].to_lower();
+ country = locale_elements[2];
+ } else if (p_server.variant_map.has(locale_elements[2].to_lower()) && p_server.variant_map[locale_elements[2].to_lower()] == language) {
+ variant = locale_elements[2].to_lower();
}
}
if (locale_elements.size() >= 4) {
- if (variant_map.has(locale_elements[3].to_lower()) && variant_map[locale_elements[3].to_lower()] == lang_name) {
- variant_name = locale_elements[3].to_lower();
+ if (p_server.variant_map.has(locale_elements[3].to_lower()) && p_server.variant_map[locale_elements[3].to_lower()] == language) {
+ variant = locale_elements[3].to_lower();
}
}
@@ -155,71 +164,62 @@ String TranslationServer::_standardize_locale(const String &p_locale, bool p_add
Vector<String> script_extra = univ_locale.get_slice("@", 1).split(";");
for (int i = 0; i < script_extra.size(); i++) {
if (script_extra[i].to_lower() == "cyrillic") {
- script_name = "Cyrl";
+ script = "Cyrl";
break;
} else if (script_extra[i].to_lower() == "latin") {
- script_name = "Latn";
+ script = "Latn";
break;
} else if (script_extra[i].to_lower() == "devanagari") {
- script_name = "Deva";
+ script = "Deva";
break;
- } else if (variant_map.has(script_extra[i].to_lower()) && variant_map[script_extra[i].to_lower()] == lang_name) {
- variant_name = script_extra[i].to_lower();
+ } else if (p_server.variant_map.has(script_extra[i].to_lower()) && p_server.variant_map[script_extra[i].to_lower()] == language) {
+ variant = script_extra[i].to_lower();
}
}
// Handles known non-ISO language names used e.g. on Windows.
- if (locale_rename_map.has(lang_name)) {
- lang_name = locale_rename_map[lang_name];
+ if (p_server.locale_rename_map.has(language)) {
+ language = p_server.locale_rename_map[language];
}
// Handle country renames.
- if (country_rename_map.has(country_name)) {
- country_name = country_rename_map[country_name];
+ if (p_server.country_rename_map.has(country)) {
+ country = p_server.country_rename_map[country];
}
// Remove unsupported script codes.
- if (!script_map.has(script_name)) {
- script_name = "";
+ if (!p_server.script_map.has(script)) {
+ script = "";
}
// Add script code base on language and country codes for some ambiguous cases.
if (p_add_defaults) {
- if (script_name.is_empty()) {
- for (int i = 0; i < locale_script_info.size(); i++) {
- const LocaleScriptInfo &info = locale_script_info[i];
- if (info.name == lang_name) {
- if (country_name.is_empty() || info.supported_countries.has(country_name)) {
- script_name = info.script;
+ if (script.is_empty()) {
+ for (int i = 0; i < p_server.locale_script_info.size(); i++) {
+ const LocaleScriptInfo &info = p_server.locale_script_info[i];
+ if (info.name == language) {
+ if (country.is_empty() || info.supported_countries.has(country)) {
+ script = info.script;
break;
}
}
}
}
- if (!script_name.is_empty() && country_name.is_empty()) {
+ if (!script.is_empty() && country.is_empty()) {
// Add conntry code based on script for some ambiguous cases.
- for (int i = 0; i < locale_script_info.size(); i++) {
- const LocaleScriptInfo &info = locale_script_info[i];
- if (info.name == lang_name && info.script == script_name) {
- country_name = info.default_country;
+ for (int i = 0; i < p_server.locale_script_info.size(); i++) {
+ const LocaleScriptInfo &info = p_server.locale_script_info[i];
+ if (info.name == language && info.script == script) {
+ country = info.default_country;
break;
}
}
}
}
+}
- // Combine results.
- String out = lang_name;
- if (!script_name.is_empty()) {
- out = out + "_" + script_name;
- }
- if (!country_name.is_empty()) {
- out = out + "_" + country_name;
- }
- if (!variant_name.is_empty()) {
- out = out + "_" + variant_name;
- }
- return out;
+String TranslationServer::standardize_locale(const String &p_locale) const {
+ return Locale(*this, p_locale, false).operator String();
}
int TranslationServer::compare_locales(const String &p_locale_a, const String &p_locale_b) const {
@@ -234,8 +234,8 @@ int TranslationServer::compare_locales(const String &p_locale_a, const String &p
return *cached_result;
}
- String locale_a = _standardize_locale(p_locale_a, true);
- String locale_b = _standardize_locale(p_locale_b, true);
+ Locale locale_a = Locale(*this, p_locale_a, true);
+ Locale locale_b = Locale(*this, p_locale_b, true);
if (locale_a == locale_b) {
// Exact match.
@@ -243,26 +243,41 @@ int TranslationServer::compare_locales(const String &p_locale_a, const String &p
return 10;
}
- Vector<String> locale_a_elements = locale_a.split("_");
- Vector<String> locale_b_elements = locale_b.split("_");
- if (locale_a_elements[0] != locale_b_elements[0]) {
+ if (locale_a.language != locale_b.language) {
// No match.
locale_compare_cache.insert(cache_key, 0);
return 0;
}
- // Matching language, both locales have extra parts.
- // Return number of matching elements.
- int matching_elements = 1;
- for (int i = 1; i < locale_a_elements.size(); i++) {
- for (int j = 1; j < locale_b_elements.size(); j++) {
- if (locale_a_elements[i] == locale_b_elements[j]) {
- matching_elements++;
- }
+ // Matching language, both locales have extra parts. Compare the
+ // remaining elements. If both elements are non-empty, check the
+ // match to increase or decrease the score. If either element or
+ // both are empty, leave the score as is.
+ int score = 5;
+ if (!locale_a.script.is_empty() && !locale_b.script.is_empty()) {
+ if (locale_a.script == locale_b.script) {
+ score++;
+ } else {
+ score--;
}
}
- locale_compare_cache.insert(cache_key, matching_elements);
- return matching_elements;
+ if (!locale_a.country.is_empty() && !locale_b.country.is_empty()) {
+ if (locale_a.country == locale_b.country) {
+ score++;
+ } else {
+ score--;
+ }
+ }
+ if (!locale_a.variant.is_empty() && !locale_b.variant.is_empty()) {
+ if (locale_a.variant == locale_b.variant) {
+ score++;
+ } else {
+ score--;
+ }
+ }
+
+ locale_compare_cache.insert(cache_key, score);
+ return score;
}
String TranslationServer::get_locale_name(const String &p_locale) const {
diff --git a/core/string/translation_server.h b/core/string/translation_server.h
index 2438349a69..fac41035ae 100644
--- a/core/string/translation_server.h
+++ b/core/string/translation_server.h
@@ -64,6 +64,24 @@ class TranslationServer : public Object {
};
static Vector<LocaleScriptInfo> locale_script_info;
+ struct Locale {
+ String language;
+ String script;
+ String country;
+ String variant;
+
+ bool operator==(const Locale &p_locale) const {
+ return (p_locale.language == language) &&
+ (p_locale.script == script) &&
+ (p_locale.country == country) &&
+ (p_locale.variant == variant);
+ }
+
+ operator String() const;
+
+ Locale(const TranslationServer &p_server, const String &p_locale, bool p_add_defaults);
+ };
+
static HashMap<String, String> language_map;
static HashMap<String, String> script_map;
static HashMap<String, String> locale_rename_map;
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index e6f7492a18..521dfe0b8c 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -33,6 +33,7 @@
#include "core/crypto/crypto_core.h"
#include "core/math/color.h"
#include "core/math/math_funcs.h"
+#include "core/object/object.h"
#include "core/os/memory.h"
#include "core/string/print_string.h"
#include "core/string/string_name.h"
@@ -1818,7 +1819,7 @@ String String::num(double p_num, int p_decimals) {
#endif
buf[324] = 0;
- //destroy trailing zeroes
+ // Destroy trailing zeroes, except one after period.
{
bool period = false;
int z = 0;
@@ -1835,7 +1836,7 @@ String String::num(double p_num, int p_decimals) {
if (buf[z] == '0') {
buf[z] = 0;
} else if (buf[z] == '.') {
- buf[z] = 0;
+ buf[z + 1] = '0';
break;
} else {
break;
@@ -1850,6 +1851,8 @@ String String::num(double p_num, int p_decimals) {
}
String String::num_int64(int64_t p_num, int base, bool capitalize_hex) {
+ ERR_FAIL_COND_V_MSG(base < 2 || base > 36, "", "Cannot convert to base " + itos(base) + ", since the value is " + (base < 2 ? "less than 2." : "greater than 36."));
+
bool sign = p_num < 0;
int64_t n = p_num;
@@ -1888,6 +1891,8 @@ String String::num_int64(int64_t p_num, int base, bool capitalize_hex) {
}
String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) {
+ ERR_FAIL_COND_V_MSG(base < 2 || base > 36, "", "Cannot convert to base " + itos(base) + ", since the value is " + (base < 2 ? "less than 2." : "greater than 36."));
+
uint64_t n = p_num;
int chars = 0;
@@ -1924,14 +1929,28 @@ String String::num_real(double p_num, bool p_trailing) {
return num_int64((int64_t)p_num);
}
}
-#ifdef REAL_T_IS_DOUBLE
int decimals = 14;
-#else
+ // We want to align the digits to the above sane default, so we only need
+ // to subtract log10 for numbers with a positive power of ten magnitude.
+ const double abs_num = Math::abs(p_num);
+ if (abs_num > 10) {
+ decimals -= (int)floor(log10(abs_num));
+ }
+ return num(p_num, decimals);
+}
+
+String String::num_real(float p_num, bool p_trailing) {
+ if (p_num == (float)(int64_t)p_num) {
+ if (p_trailing) {
+ return num_int64((int64_t)p_num) + ".0";
+ } else {
+ return num_int64((int64_t)p_num);
+ }
+ }
int decimals = 6;
-#endif
// We want to align the digits to the above sane default, so we only need
// to subtract log10 for numbers with a positive power of ten magnitude.
- double abs_num = Math::abs(p_num);
+ const float abs_num = Math::abs(p_num);
if (abs_num > 10) {
decimals -= (int)floor(log10(abs_num));
}
@@ -3368,7 +3387,7 @@ int String::find(const char *p_str, int p_from) const {
return -1;
}
-int String::find_char(const char32_t &p_char, int p_from) const {
+int String::find_char(char32_t p_char, int p_from) const {
return _cowdata.find(p_char, p_from);
}
@@ -3605,6 +3624,10 @@ int String::rfind(const char *p_str, int p_from) const {
return -1;
}
+int String::rfind_char(char32_t p_char, int p_from) const {
+ return _cowdata.rfind(p_char, p_from);
+}
+
int String::rfindn(const String &p_str, int p_from) const {
// establish a limit
int limit = length() - p_str.length();
@@ -3818,6 +3841,15 @@ bool String::is_quoted() const {
return is_enclosed_in("\"") || is_enclosed_in("'");
}
+bool String::is_lowercase() const {
+ for (const char32_t *str = &operator[](0); *str; str++) {
+ if (is_unicode_upper_case(*str)) {
+ return false;
+ }
+ }
+ return true;
+}
+
int String::_count(const String &p_string, int p_from, int p_to, bool p_case_insensitive) const {
if (p_string.is_empty()) {
return 0;
@@ -4060,8 +4092,18 @@ String String::format(const Variant &values, const String &placeholder) const {
for (const Variant &key : keys) {
new_string = new_string.replace(placeholder.replace("_", key), d[key]);
}
+ } else if (values.get_type() == Variant::OBJECT) {
+ Object *obj = values.get_validated_object();
+ ERR_FAIL_NULL_V(obj, new_string);
+
+ List<PropertyInfo> props;
+ obj->get_property_list(&props);
+
+ for (const PropertyInfo &E : props) {
+ new_string = new_string.replace(placeholder.replace("_", E.name), obj->get(E.name));
+ }
} else {
- ERR_PRINT(String("Invalid type: use Array or Dictionary.").ascii().get_data());
+ ERR_PRINT(String("Invalid type: use Array, Dictionary or Object.").ascii().get_data());
}
return new_string;
@@ -4601,7 +4643,7 @@ String String::humanize_size(uint64_t p_size) {
}
if (magnitude == 0) {
- return String::num(p_size) + " " + RTR("B");
+ return String::num_uint64(p_size) + " " + RTR("B");
} else {
String suffix;
switch (magnitude) {
diff --git a/core/string/ustring.h b/core/string/ustring.h
index aa62c9cb18..d6e563223a 100644
--- a/core/string/ustring.h
+++ b/core/string/ustring.h
@@ -118,7 +118,7 @@ public:
Char16String &operator+=(char16_t p_char);
int length() const { return size() ? size() - 1 : 0; }
const char16_t *get_data() const;
- operator const char16_t *() const { return get_data(); };
+ operator const char16_t *() const { return get_data(); }
protected:
void copy_from(const char16_t *p_cstr);
@@ -160,7 +160,7 @@ public:
CharString &operator+=(char p_char);
int length() const { return size() ? size() - 1 : 0; }
const char *get_data() const;
- operator const char *() const { return get_data(); };
+ operator const char *() const { return get_data(); }
protected:
void copy_from(const char *p_cstr);
@@ -287,11 +287,12 @@ public:
String substr(int p_from, int p_chars = -1) const;
int find(const String &p_str, int p_from = 0) const; ///< return <0 if failed
int find(const char *p_str, int p_from = 0) const; ///< return <0 if failed
- int find_char(const char32_t &p_char, int p_from = 0) const; ///< return <0 if failed
+ int find_char(char32_t p_char, int p_from = 0) const; ///< return <0 if failed
int findn(const String &p_str, int p_from = 0) const; ///< return <0 if failed, case insensitive
int findn(const char *p_str, int p_from = 0) const; ///< return <0 if failed
int rfind(const String &p_str, int p_from = -1) const; ///< return <0 if failed
int rfind(const char *p_str, int p_from = -1) const; ///< return <0 if failed
+ int rfind_char(char32_t p_char, int p_from = -1) const; ///< return <0 if failed
int rfindn(const String &p_str, int p_from = -1) const; ///< return <0 if failed, case insensitive
int rfindn(const char *p_str, int p_from = -1) const; ///< return <0 if failed
int findmk(const Vector<String> &p_keys, int p_from = 0, int *r_key = nullptr) const; ///< return <0 if failed
@@ -305,6 +306,7 @@ public:
bool is_subsequence_of(const String &p_string) const;
bool is_subsequence_ofn(const String &p_string) const;
bool is_quoted() const;
+ bool is_lowercase() const;
Vector<String> bigrams() const;
float similarity(const String &p_string) const;
String format(const Variant &values, const String &placeholder = "{_}") const;
@@ -332,6 +334,7 @@ public:
static String num(double p_num, int p_decimals = -1);
static String num_scientific(double p_num);
static String num_real(double p_num, bool p_trailing = true);
+ static String num_real(float p_num, bool p_trailing = true);
static String num_int64(int64_t p_num, int base = 10, bool capitalize_hex = false);
static String num_uint64(uint64_t p_num, int base = 10, bool capitalize_hex = false);
static String chr(char32_t p_char);