Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb69bdce

Browse files
committed
Avoid assertion due to disconnected NFA sub-graphs in regex parsing.
In commit08c0d6a which introduced "rainbow" arcs in regex NFAs,I didn't think terribly hard about what to do when creating the colorcomplement of a rainbow arc. Clearly, the complement cannot match anycharacters, and I took the easy way out by just not building any arcsat all in the complement arc set. That mostly works, but NikolayShaplov found a case where it doesn't: if we decide to delete thatsub-NFA later because it's inside a "{0}" quantifier, delsub()suffered an assertion failure. That's because delsub() relies onthe target sub-NFA being fully connected. That was always truebefore, and the best fix seems to be to restore that property.Hence, invent a new arc type CANTMATCH that can be generated inplace of an empty color complement, and drop it again later when westart NFA optimization. (At that point we don't need to do delsub()any more, and besides there are other cases where NFA optimization canlead to disconnected subgraphs.)It appears that this bug has no consequences in a non-assert-enabledbuild: there will be some transiently leaked NFA states/arcs, butthey'll get cleaned up eventually. Still, we don't like assertionfailures, so back-patch to v14 where rainbow arcs were introduced.Per bug #18708 from Nikolay Shaplov.Discussion:https://postgr.es/m/18708-f94f2599c9d2c005@postgresql.org
1 parent9a70f67 commitb69bdce

File tree

6 files changed

+79
-1
lines changed

6 files changed

+79
-1
lines changed

‎src/backend/regex/regc_color.c‎

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1075,9 +1075,19 @@ colorcomplement(struct nfa *nfa,
10751075

10761076
assert(of!=from);
10771077

1078-
/* A RAINBOW arc matches all colors, making the complement empty */
1078+
/*
1079+
* A RAINBOW arc matches all colors, making the complement empty. But we
1080+
* can't just return without making any arcs, because that would leave the
1081+
* NFA disconnected which would break any future delsub(). Instead, make
1082+
* a CANTMATCH arc. Also set the HASCANTMATCH flag so we know we need to
1083+
* clean that up at the start of NFA optimization.
1084+
*/
10791085
if (findarc(of,PLAIN,RAINBOW)!=NULL)
1086+
{
1087+
newarc(nfa,CANTMATCH,0,from,to);
1088+
nfa->flags |=HASCANTMATCH;
10801089
return;
1090+
}
10811091

10821092
/* Otherwise, transiently mark the colors that appear in of's out-arcs */
10831093
for (a=of->outs;a!=NULL;a=a->outchain)
@@ -1089,6 +1099,12 @@ colorcomplement(struct nfa *nfa,
10891099
assert(!UNUSEDCOLOR(cd));
10901100
cd->flags |=COLMARK;
10911101
}
1102+
1103+
/*
1104+
* There's no syntax for re-complementing a color set, so we cannot
1105+
* see CANTMATCH arcs here.
1106+
*/
1107+
assert(a->type!=CANTMATCH);
10921108
}
10931109

10941110
/* Scan colors, clear transient marks, add arcs for unmarked colors */

‎src/backend/regex/regc_nfa.c‎

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,6 +1462,7 @@ removetraverse(struct nfa *nfa,
14621462
{
14631463
casePLAIN:
14641464
caseEMPTY:
1465+
caseCANTMATCH:
14651466
/* nothing to do */
14661467
break;
14671468
caseAHEAD:
@@ -1599,6 +1600,12 @@ optimize(struct nfa *nfa,
15991600
if (verbose)
16001601
fprintf(f,"\ninitial cleanup:\n");
16011602
#endif
1603+
/* If we have any CANTMATCH arcs, drop them; but this is uncommon */
1604+
if (nfa->flags&HASCANTMATCH)
1605+
{
1606+
removecantmatch(nfa);
1607+
nfa->flags &= ~HASCANTMATCH;
1608+
}
16021609
cleanup(nfa);/* may simplify situation */
16031610
#ifdefREG_DEBUG
16041611
if (verbose)
@@ -2922,6 +2929,34 @@ clonesuccessorstates(struct nfa *nfa,
29222929
}
29232930
}
29242931

2932+
/*
2933+
* removecantmatch - remove CANTMATCH arcs, which are no longer useful
2934+
* once we are done with the parsing phase. (We need them only to
2935+
* preserve connectedness of NFA subgraphs during parsing.)
2936+
*/
2937+
staticvoid
2938+
removecantmatch(structnfa*nfa)
2939+
{
2940+
structstate*s;
2941+
2942+
for (s=nfa->states;s!=NULL;s=s->next)
2943+
{
2944+
structarc*a;
2945+
structarc*nexta;
2946+
2947+
for (a=s->outs;a!=NULL;a=nexta)
2948+
{
2949+
nexta=a->outchain;
2950+
if (a->type==CANTMATCH)
2951+
{
2952+
freearc(nfa,a);
2953+
if (NISERR())
2954+
return;
2955+
}
2956+
}
2957+
}
2958+
}
2959+
29252960
/*
29262961
* cleanup - clean up NFA after optimizations
29272962
*/
@@ -3627,6 +3662,8 @@ dumpnfa(struct nfa *nfa,
36273662
fprintf(f,", eol [%ld]", (long)nfa->eos[1]);
36283663
if (nfa->flags&HASLACONS)
36293664
fprintf(f,", haslacons");
3665+
if (nfa->flags&HASCANTMATCH)
3666+
fprintf(f,", hascantmatch");
36303667
if (nfa->flags&MATCHALL)
36313668
{
36323669
fprintf(f,", minmatchall %d",nfa->minmatchall);
@@ -3749,6 +3786,9 @@ dumparc(struct arc *a,
37493786
break;
37503787
caseEMPTY:
37513788
break;
3789+
caseCANTMATCH:
3790+
fprintf(f,"X");
3791+
break;
37523792
default:
37533793
fprintf(f,"0x%x/0%lo",a->type, (long)a->co);
37543794
break;

‎src/backend/regex/regcomp.c‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ static void clonesuccessorstates(struct nfa *nfa, struct state *ssource,
215215
structstate*spredecessor,
216216
structarc*refarc,char*curdonemap,
217217
char*outerdonemap,intnstates);
218+
staticvoidremovecantmatch(structnfa*nfa);
218219
staticvoidcleanup(structnfa*nfa);
219220
staticvoidmarkreachable(structnfa*nfa,structstate*s,
220221
structstate*okay,structstate*mark);
@@ -342,6 +343,7 @@ struct vars
342343
#defineBEHIND'r'/* color-lookbehind arc */
343344
#defineWBDRY'w'/* word boundary constraint */
344345
#defineNWBDRY'W'/* non-word-boundary constraint */
346+
#defineCANTMATCH 'x'/* arc that cannot match anything */
345347
#defineSBEGIN'A'/* beginning of string (even if not BOL) */
346348
#defineSEND'Z'/* end of string (even if not EOL) */
347349

@@ -2368,6 +2370,7 @@ nfanode(struct vars *v,
23682370
nfa=newnfa(v,v->cm,v->nfa);
23692371
NOERRZ();
23702372
dupnfa(nfa,t->begin,t->end,nfa->init,nfa->final);
2373+
nfa->flags=v->nfa->flags;
23712374
if (!ISERR())
23722375
specialcolors(nfa);
23732376
if (!ISERR())

‎src/include/regex/regguts.h‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,8 @@ struct cnfa
410410
intflags;/* bitmask of the following flags: */
411411
#defineHASLACONS01/* uses lookaround constraints */
412412
#defineMATCHALL02/* matches all strings of a range of lengths */
413+
#defineHASCANTMATCH 04/* contains CANTMATCH arcs */
414+
/* Note: HASCANTMATCH appears in nfa structs' flags, but never in cnfas */
413415
intpre;/* setup state number */
414416
intpost;/* teardown state number */
415417
colorbos[2];/* colors, if any, assigned to BOS and BOL */

‎src/test/modules/test_regex/expected/test_regex.out‎

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2071,6 +2071,20 @@ select * from test_regex('[\s\S]*', '012 3456789abc_*', 'LNPE');
20712071
{"012 3456789abc_*"}
20722072
(2 rows)
20732073

2074+
-- bug #18708:
2075+
select * from test_regex('(?:[^\d\D]){0}', '0123456789abc*', 'LNPQE');
2076+
test_regex
2077+
--------------------------------------------------------------------
2078+
{0,REG_UBOUNDS,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE,REG_UEMPTYMATCH}
2079+
{""}
2080+
(2 rows)
2081+
2082+
select * from test_regex('[^\d\D]', '0123456789abc*', 'ILPE');
2083+
test_regex
2084+
--------------------------------------------------------
2085+
{0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE,REG_UIMPOSSIBLE}
2086+
(1 row)
2087+
20742088
-- check char classes' handling of newlines
20752089
select * from test_regex('\s+', E'abc \n def', 'LP');
20762090
test_regex

‎src/test/modules/test_regex/sql/test_regex.sql‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,9 @@ select * from test_regex('[^1\D0]', 'abc0123456789*', 'LPE');
619619
select*from test_regex('\W','0123456789abc_*','LP');
620620
select*from test_regex('[\W]','0123456789abc_*','LPE');
621621
select*from test_regex('[\s\S]*','012 3456789abc_*','LNPE');
622+
-- bug #18708:
623+
select*from test_regex('(?:[^\d\D]){0}','0123456789abc*','LNPQE');
624+
select*from test_regex('[^\d\D]','0123456789abc*','ILPE');
622625

623626
-- check char classes' handling of newlines
624627
select*from test_regex('\s+', E'abc\n def','LP');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp