summaryrefslogtreecommitdiffstats
path: root/drivers/nrex/nrex.cpp
diff options
context:
space:
mode:
authorZher Huei Lee <lee.zh.92@gmail.com>2015-12-04 21:18:41 +0000
committerZher Huei Lee <lee.zh.92@gmail.com>2015-12-04 21:18:41 +0000
commitd0ddf150d9c7207f87409b5899297dae0fc0b708 (patch)
tree0a4e0fcd8a4c48163c67fc19f17813f3b44ad23e /drivers/nrex/nrex.cpp
parent5b0dcab3e85a4b2147ec23a06f7845246f8cb538 (diff)
downloadredot-engine-d0ddf150d9c7207f87409b5899297dae0fc0b708.tar.gz
updated the RegEx library nrex to v0.1
After implementing unit testing to nrex I caught and fixed some errors so it should behave more like Python's RegEx In addition, I've added version numbering so it should be able to tell if the library needs updating. Here are a list of changes: - Fixed zero count quantifiers failing. - Fixed infinite recursion if quantifying zero length token. - Fixed `$` (as a string pattern on its own) not matching. - Fixed look behind rewinding beyond the start of the string. - Added support for alternative back reference format `\g{1}` similar to Python. This allows digits to be used immediately after back references. - Number of capture groups are still limited to 9 by default but can now be manually set, with option for no limit at all. (Python has no limit) - Curly bracket quantifiers `{0}` no longer interpreted as a literal string if previous token is not quantifiable. (Python behaviour)
Diffstat (limited to 'drivers/nrex/nrex.cpp')
-rw-r--r--drivers/nrex/nrex.cpp101
1 files changed, 67 insertions, 34 deletions
diff --git a/drivers/nrex/nrex.cpp b/drivers/nrex/nrex.cpp
index 104e07f887..1eb9ec38c8 100644
--- a/drivers/nrex/nrex.cpp
+++ b/drivers/nrex/nrex.cpp
@@ -1,4 +1,5 @@
// NREX: Node RegEx
+// Version 0.1
//
// Copyright (c) 2015, Zher Huei Lee
// All rights reserved.
@@ -299,6 +300,10 @@ struct nrex_node_group : public nrex_node
{
length = 1;
}
+ if (mode == LookAhead || mode == LookBehind)
+ {
+ quantifiable = false;
+ }
}
virtual ~nrex_node_group()
@@ -322,6 +327,10 @@ struct nrex_node_group : public nrex_node
int offset = 0;
if (mode == LookBehind)
{
+ if (pos < length)
+ {
+ return -1;
+ }
offset = length;
}
int res = childset[i]->test(s, pos - offset);
@@ -450,7 +459,7 @@ struct nrex_node_char : public nrex_node
int test(nrex_search* s, int pos) const
{
- if (s->end == pos || s->at(pos) != ch)
+ if (s->end <= pos || 0 > pos || s->at(pos) != ch)
{
return -1;
}
@@ -473,7 +482,7 @@ struct nrex_node_range : public nrex_node
int test(nrex_search* s, int pos) const
{
- if (s->end == pos)
+ if (s->end <= pos || 0 > pos)
{
return -1;
}
@@ -555,7 +564,7 @@ struct nrex_node_class : public nrex_node
int test(nrex_search* s, int pos) const
{
- if (s->end == pos)
+ if (s->end <= pos || 0 > pos)
{
return -1;
}
@@ -727,7 +736,7 @@ struct nrex_node_shorthand : public nrex_node
int test(nrex_search* s, int pos) const
{
- if (s->end == pos)
+ if (s->end <= pos || 0 > pos)
{
return -1;
}
@@ -811,16 +820,12 @@ struct nrex_node_quantifier : public nrex_node
int test(nrex_search* s, int pos) const
{
- return test_step(s, pos, 1);
+ return test_step(s, pos, 0, pos);
}
- int test_step(nrex_search* s, int pos, int level) const
+ int test_step(nrex_search* s, int pos, int level, int start) const
{
- if (max == 0)
- {
- return pos;
- }
- if ((max >= 1 && level > max) || pos > s->end)
+ if (pos > s->end)
{
return -1;
}
@@ -840,14 +845,26 @@ struct nrex_node_quantifier : public nrex_node
return res;
}
}
- int res = child->test(s, pos);
- if (s->complete)
+ if (max >= 0 && level > max)
{
- return res;
+ return -1;
+ }
+ if (level > 1 && level > min + 1 && pos == start)
+ {
+ return -1;
+ }
+ int res = pos;
+ if (level >= 1)
+ {
+ res = child->test(s, pos);
+ if (s->complete)
+ {
+ return res;
+ }
}
if (res >= 0)
{
- int res_step = test_step(s, res, level + 1);
+ int res_step = test_step(s, res, level + 1, start);
if (res_step >= 0)
{
return res_step;
@@ -983,6 +1000,13 @@ nrex::nrex()
{
}
+nrex::nrex(const nrex_char* pattern, int captures)
+ : _capturing(0)
+ , _root(NULL)
+{
+ compile(pattern, captures);
+}
+
nrex::~nrex()
{
if (_root)
@@ -1008,10 +1032,14 @@ void nrex::reset()
int nrex::capture_size() const
{
- return _capturing + 1;
+ if (_root)
+ {
+ return _capturing + 1;
+ }
+ return 0;
}
-bool nrex::compile(const nrex_char* pattern, bool extended)
+bool nrex::compile(const nrex_char* pattern, int captures)
{
reset();
nrex_node_group* root = NREX_NEW(nrex_node_group(_capturing));
@@ -1053,7 +1081,7 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
NREX_COMPILE_ERROR("unrecognised qualifier for group");
}
}
- else if ((!extended && _capturing < 9) || (extended && _capturing < 99))
+ else if (captures >= 0 && _capturing < captures)
{
nrex_node_group* group = NREX_NEW(nrex_node_group(++_capturing));
stack.top()->add_child(group);
@@ -1190,15 +1218,6 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
}
else if (nrex_is_quantifier(c[0]))
{
- if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
- {
- if (c[0] == '{')
- {
- stack.top()->add_child(NREX_NEW(nrex_node_char('{')));
- continue;
- }
- NREX_COMPILE_ERROR("element not quantifiable");
- }
int min = 0;
int max = -1;
bool valid_quantifier = true;
@@ -1270,6 +1289,10 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
}
if (valid_quantifier)
{
+ if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
+ {
+ NREX_COMPILE_ERROR("element not quantifiable");
+ }
nrex_node_quantifier* quant = NREX_NEW(nrex_node_quantifier(min, max));
if (min == max)
{
@@ -1323,20 +1346,26 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
stack.top()->add_child(NREX_NEW(nrex_node_shorthand(c[1])));
++c;
}
- else if ('1' <= c[1] && c[1] <= '9')
+ else if (('1' <= c[1] && c[1] <= '9') || (c[1] == 'g' && c[2] == '{'))
{
int ref = 0;
- if (extended && '0' <= c[2] && c[2] <= '9')
+ bool unclosed = false;
+ if (c[1] == 'g')
{
- ref = int(c[1] - '0') * 10 + int(c[2] - '0');
+ unclosed = true;
c = &c[2];
}
- else
+ while ('0' <= c[1] && c[1] <= '9')
{
- ref = int(c[1] - '0');
+ ref = ref * 10 + int(c[1] - '0');
++c;
}
- if (ref > _capturing)
+ if (c[1] == '}')
+ {
+ unclosed = false;
+ ++c;
+ }
+ if (ref > _capturing || ref <= 0 || unclosed)
{
NREX_COMPILE_ERROR("backreference to non-existent capture");
}
@@ -1377,6 +1406,10 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int end) const
{
+ if (!_root)
+ {
+ return false;
+ }
nrex_search s(str, captures);
if (end >= offset)
{
@@ -1386,7 +1419,7 @@ bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int en
{
s.end = NREX_STRLEN(str);
}
- for (int i = offset; i < s.end; ++i)
+ for (int i = offset; i <= s.end; ++i)
{
for (int c = 0; c <= _capturing; ++c)
{