* However, if we have a whole lot of arcs to deal with, retail duplicate
* checks become too slow. In that case we proceed by sorting and merging
* the arc lists, and then we can indeed just update the arcs in-place.
+ *
+ * On the other hand, it's also true that this is frequently called with
+ * a brand-new newState that has no existing in-arcs. In that case,
+ * de-duplication is unnecessary, so we can just blindly move all the arcs.
*/
static void
moveins(struct nfa *nfa,
{
assert(oldState != newState);
- if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
+ if (newState->nins == 0)
+ {
+ /* No need for de-duplication */
+ struct arc *a;
+
+ while ((a = oldState->ins) != NULL)
+ {
+ createarc(nfa, a->type, a->co, a->from, newState);
+ freearc(nfa, a);
+ }
+ }
+ else if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
{
/* With not too many arcs, just do them one at a time */
struct arc *a;
/*
* copyins - copy in arcs of a state to another state
+ *
+ * The comments for moveins() apply here as well. However, in current
+ * usage, this is *only* called with brand-new target states, so that
+ * only the "no need for de-duplication" code path is ever reached.
+ * We keep the rest #ifdef'd out in case it's needed in the future.
*/
static void
copyins(struct nfa *nfa,
struct state *newState)
{
assert(oldState != newState);
+ assert(newState->nins == 0); /* see comment above */
+
+ if (newState->nins == 0)
+ {
+ /* No need for de-duplication */
+ struct arc *a;
- if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
+ for (a = oldState->ins; a != NULL; a = a->inchain)
+ createarc(nfa, a->type, a->co, a->from, newState);
+ }
+#ifdef NOT_USED /* see comment above */
+ else if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
{
/* With not too many arcs, just do them one at a time */
struct arc *a;
createarc(nfa, a->type, a->co, a->from, newState);
}
}
+#endif /* NOT_USED */
}
/*
{
assert(oldState != newState);
- if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
+ if (newState->nouts == 0)
+ {
+ /* No need for de-duplication */
+ struct arc *a;
+
+ while ((a = oldState->outs) != NULL)
+ {
+ createarc(nfa, a->type, a->co, newState, a->to);
+ freearc(nfa, a);
+ }
+ }
+ else if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
{
/* With not too many arcs, just do them one at a time */
struct arc *a;
/*
* copyouts - copy out arcs of a state to another state
+ *
+ * See comments for copyins()
*/
static void
copyouts(struct nfa *nfa,
struct state *newState)
{
assert(oldState != newState);
+ assert(newState->nouts == 0); /* see comment above */
+
+ if (newState->nouts == 0)
+ {
+ /* No need for de-duplication */
+ struct arc *a;
- if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
+ for (a = oldState->outs; a != NULL; a = a->outchain)
+ createarc(nfa, a->type, a->co, newState, a->to);
+ }
+#ifdef NOT_USED /* see comment above */
+ else if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
{
/* With not too many arcs, just do them one at a time */
struct arc *a;
createarc(nfa, a->type, a->co, newState, a->to);
}
}
+#endif /* NOT_USED */
}
/*
else if (a->co == RAINBOW)
{
/* con is incompatible if it's for a pseudocolor */
+ /* (this is hypothetical; we make no such constraints today) */
if (nfa->cm->cd[con->co].flags & PSEUDO)
return INCOMPATIBLE;
/* otherwise, constraint constrains arc to be only its color */
else if (a->co == RAINBOW)
{
/* con is incompatible if it's for a pseudocolor */
+ /* (this is hypothetical; we make no such constraints today) */
if (nfa->cm->cd[con->co].flags & PSEUDO)
return INCOMPATIBLE;
/* otherwise, constraint constrains arc to be only its color */
return -1;
if (aa->to > bb->to)
return +1;
+ /* This is unreached, since there should be no duplicate arcs now: */
return 0;
}
{0,REG_ULOCALE}
(1 row)
+-- expectMatch 9.9b &iL {a[[=Y=]]} ay ay
+select * from test_regex('a[[=Y=]]', 'ay', 'iL');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {ay}
+(2 rows)
+
+select * from test_regex('a[[=Y=]]', 'ay', 'iLb');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {ay}
+(2 rows)
+
+-- expectNomatch 9.9c &L {a[[=Y=]]} ay
+select * from test_regex('a[[=Y=]]', 'ay', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+(1 row)
+
+select * from test_regex('a[[=Y=]]', 'ay', 'Lb');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+(1 row)
+
-- expectError 9.10 & {a[0-[=x=]]} ERANGE
select * from test_regex('a[0-[=x=]]', '', '');
ERROR: invalid regular expression: invalid character range
{0,REG_UUNPORT}
(1 row)
+-- expectMatch 19.6 &iM {a[B-Z]} aC aC
+select * from test_regex('a[B-Z]', 'aC', 'iM');
+ test_regex
+-----------------
+ {0,REG_UUNPORT}
+ {aC}
+(2 rows)
+
+select * from test_regex('a[B-Z]', 'aC', 'iMb');
+ test_regex
+-----------------
+ {0,REG_UUNPORT}
+ {aC}
+(2 rows)
+
+-- expectNomatch 19.7 &iM {a[^B-Z]} aC
+select * from test_regex('a[^B-Z]', 'aC', 'iM');
+ test_regex
+-----------------
+ {0,REG_UUNPORT}
+(1 row)
+
+select * from test_regex('a[^B-Z]', 'aC', 'iMb');
+ test_regex
+-----------------
+ {0,REG_UUNPORT}
+(1 row)
+
-- doing 20 "directors and embedded options"
-- expectError 20.1 & ***? BADPAT
select * from test_regex('***?', '', '');
{foo/bar/baz,foo,bar,baz}
(2 rows)
+-- expectMatch 24.14 PRT {^(.+?)(?:/(.+?))(?:/(.+?)\3)?$} {foo/bar/baz/quux} {foo/bar/baz/quux} {foo} {bar/baz/quux} {}
+select * from test_regex('^(.+?)(?:/(.+?))(?:/(.+?)\3)?$', 'foo/bar/baz/quux', 'PRT');
+ test_regex
+----------------------------------------------
+ {3,REG_UBACKREF,REG_UNONPOSIX,REG_USHORTEST}
+ {foo/bar/baz/quux,foo,bar/baz/quux,NULL}
+(2 rows)
+
-- doing 25 "mixed quantifiers"
-- # this is very incomplete as yet
-- # should include |
{"",""}
(2 rows)
+-- and now, tests not from either Spencer or the Tcl project
+-- These cases exercise additional code paths in pushfwd()/push()/combine()
+select * from test_regex('a\Y(?=45)', 'a45', 'HLP');
+ test_regex
+-----------------------------------------------
+ {0,REG_ULOOKAROUND,REG_UNONPOSIX,REG_ULOCALE}
+ {a}
+(2 rows)
+
+select * from test_regex('a(?=.)c', 'ac', 'HP');
+ test_regex
+-----------------------------------
+ {0,REG_ULOOKAROUND,REG_UNONPOSIX}
+ {ac}
+(2 rows)
+
+select * from test_regex('a(?=.).*(?=3)3*', 'azz33', 'HP');
+ test_regex
+-----------------------------------
+ {0,REG_ULOOKAROUND,REG_UNONPOSIX}
+ {azz33}
+(2 rows)
+
+select * from test_regex('a(?=\w)\w*(?=.).*', 'az3%', 'HLP');
+ test_regex
+-----------------------------------------------
+ {0,REG_ULOOKAROUND,REG_UNONPOSIX,REG_ULOCALE}
+ {az3%}
+(2 rows)
+
+-- These exercise the bulk-arc-movement paths in moveins() and moveouts();
+-- you may need to make them longer if you change BULK_ARC_OP_USE_SORT()
+select * from test_regex('ABCDEFGHIJKLMNOPQRSTUVWXYZ(?:\w|a|b|c|d|e|f|0|1|2|3|4|5|6|Q)',
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ3', 'LP');
+ test_regex
+-------------------------------
+ {0,REG_UNONPOSIX,REG_ULOCALE}
+ {ABCDEFGHIJKLMNOPQRSTUVWXYZ3}
+(2 rows)
+
+select * from test_regex('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789(\Y\Y)+',
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789Z', 'LP');
+ test_regex
+-------------------------------------------
+ {1,REG_UNONPOSIX,REG_ULOCALE}
+ {ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,""}
+(2 rows)
+
+select * from test_regex('((x|xabcdefghijklmnopqrstuvwxyz0123456789)x*|[^y]z)$',
+ 'az', '');
+ test_regex
+--------------
+ {2}
+ {az,az,NULL}
+(2 rows)
+
{0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT,REG_ULOCALE}
(1 row)
+select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237',
+ E'\u1500\u1237', 'iELMP');
+ test_regex
+----------------------------------------------------
+ {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT,REG_ULOCALE}
+ {ᔀሷ}
+(2 rows)
+
+-- systematically test char classes
+select * from test_regex('[[:alnum:]]+', E'x\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {xᔀሷ}
+(2 rows)
+
+select * from test_regex('[[:alpha:]]+', E'x\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {xᔀሷ}
+(2 rows)
+
+select * from test_regex('[[:ascii:]]+', E'x\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {x}
+(2 rows)
+
+select * from test_regex('[[:blank:]]+', E'x \t\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {" "}
+(2 rows)
+
+select * from test_regex('[[:cntrl:]]+', E'x\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+(1 row)
+
+select * from test_regex('[[:digit:]]+', E'x9\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {9}
+(2 rows)
+
+select * from test_regex('[[:graph:]]+', E'x\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {xᔀሷ}
+(2 rows)
+
+select * from test_regex('[[:lower:]]+', E'x\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {x}
+(2 rows)
+
+select * from test_regex('[[:print:]]+', E'x\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {xᔀሷ}
+(2 rows)
+
+select * from test_regex('[[:punct:]]+', E'x.\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {.}
+(2 rows)
+
+select * from test_regex('[[:space:]]+', E'x \t\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {" "}
+(2 rows)
+
+select * from test_regex('[[:upper:]]+', E'xX\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {X}
+(2 rows)
+
+select * from test_regex('[[:xdigit:]]+', E'xa9\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {a9}
+(2 rows)
+
+select * from test_regex('[[:word:]]+', E'x_\u1500\u1237', 'L');
+ test_regex
+-----------------
+ {0,REG_ULOCALE}
+ {x_ᔀሷ}
+(2 rows)
+
-- expectNomatch 9.9 &+L {a[[=x=]]} az
select * from test_regex('a[[=x=]]', 'az', '+L');
select * from test_regex('a[[=x=]]', 'az', '+Lb');
+-- expectMatch 9.9b &iL {a[[=Y=]]} ay ay
+select * from test_regex('a[[=Y=]]', 'ay', 'iL');
+select * from test_regex('a[[=Y=]]', 'ay', 'iLb');
+-- expectNomatch 9.9c &L {a[[=Y=]]} ay
+select * from test_regex('a[[=Y=]]', 'ay', 'L');
+select * from test_regex('a[[=Y=]]', 'ay', 'Lb');
-- expectError 9.10 & {a[0-[=x=]]} ERANGE
select * from test_regex('a[0-[=x=]]', '', '');
select * from test_regex('a[0-[=x=]]', '', 'b');
-- expectNomatch 19.5 &iM {a[^b-d]} aC
select * from test_regex('a[^b-d]', 'aC', 'iM');
select * from test_regex('a[^b-d]', 'aC', 'iMb');
+-- expectMatch 19.6 &iM {a[B-Z]} aC aC
+select * from test_regex('a[B-Z]', 'aC', 'iM');
+select * from test_regex('a[B-Z]', 'aC', 'iMb');
+-- expectNomatch 19.7 &iM {a[^B-Z]} aC
+select * from test_regex('a[^B-Z]', 'aC', 'iM');
+select * from test_regex('a[^B-Z]', 'aC', 'iMb');
-- doing 20 "directors and embedded options"
select * from test_regex('z*?4', '123zzzz456', 'PT');
-- expectMatch 24.13 PT {^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$} {foo/bar/baz} {foo/bar/baz} {foo} {bar} {baz}
select * from test_regex('^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', 'foo/bar/baz', 'PT');
+-- expectMatch 24.14 PRT {^(.+?)(?:/(.+?))(?:/(.+?)\3)?$} {foo/bar/baz/quux} {foo/bar/baz/quux} {foo} {bar/baz/quux} {}
+select * from test_regex('^(.+?)(?:/(.+?))(?:/(.+?)\3)?$', 'foo/bar/baz/quux', 'PRT');
-- doing 25 "mixed quantifiers"
-- # this is very incomplete as yet
-- regexp {(\Y)+} foo
-- } 1
select * from test_regex('(\Y)+', 'foo', 'LNP');
+
+
+-- and now, tests not from either Spencer or the Tcl project
+
+-- These cases exercise additional code paths in pushfwd()/push()/combine()
+select * from test_regex('a\Y(?=45)', 'a45', 'HLP');
+select * from test_regex('a(?=.)c', 'ac', 'HP');
+select * from test_regex('a(?=.).*(?=3)3*', 'azz33', 'HP');
+select * from test_regex('a(?=\w)\w*(?=.).*', 'az3%', 'HLP');
+
+-- These exercise the bulk-arc-movement paths in moveins() and moveouts();
+-- you may need to make them longer if you change BULK_ARC_OP_USE_SORT()
+select * from test_regex('ABCDEFGHIJKLMNOPQRSTUVWXYZ(?:\w|a|b|c|d|e|f|0|1|2|3|4|5|6|Q)',
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ3', 'LP');
+select * from test_regex('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789(\Y\Y)+',
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789Z', 'LP');
+select * from test_regex('((x|xabcdefghijklmnopqrstuvwxyz0123456789)x*|[^y]z)$',
+ 'az', '');
E'\u1500\u1237', 'ELMP');
select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237',
E'A\u1239', 'ELMP');
+select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237',
+ E'\u1500\u1237', 'iELMP');
+
+-- systematically test char classes
+select * from test_regex('[[:alnum:]]+', E'x\u1500\u1237', 'L');
+select * from test_regex('[[:alpha:]]+', E'x\u1500\u1237', 'L');
+select * from test_regex('[[:ascii:]]+', E'x\u1500\u1237', 'L');
+select * from test_regex('[[:blank:]]+', E'x \t\u1500\u1237', 'L');
+select * from test_regex('[[:cntrl:]]+', E'x\u1500\u1237', 'L');
+select * from test_regex('[[:digit:]]+', E'x9\u1500\u1237', 'L');
+select * from test_regex('[[:graph:]]+', E'x\u1500\u1237', 'L');
+select * from test_regex('[[:lower:]]+', E'x\u1500\u1237', 'L');
+select * from test_regex('[[:print:]]+', E'x\u1500\u1237', 'L');
+select * from test_regex('[[:punct:]]+', E'x.\u1500\u1237', 'L');
+select * from test_regex('[[:space:]]+', E'x \t\u1500\u1237', 'L');
+select * from test_regex('[[:upper:]]+', E'xX\u1500\u1237', 'L');
+select * from test_regex('[[:xdigit:]]+', E'xa9\u1500\u1237', 'L');
+select * from test_regex('[[:word:]]+', E'x_\u1500\u1237', 'L');