Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita7b61d4

Browse files
committed
Fix infinite-loop risk in fixempties() stage of regex compilation.
The previous coding of this function could get into situations where itwould never terminate, because successive passes would re-add EMPTY arcsthat had been removed by the previous pass. Rewrite the functioncompletely using a new algorithm that is guaranteed to terminate, andalso seems to be usually faster than the old one. Per Tcl bugs 3604074and 3606683.Tom Lane and Don Porter
1 parent7ccefe8 commita7b61d4

File tree

4 files changed

+280
-73
lines changed

4 files changed

+280
-73
lines changed

‎src/backend/regex/regc_nfa.c

Lines changed: 245 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,56 @@ freearc(struct nfa * nfa,
455455
from->free=victim;
456456
}
457457

458+
/*
459+
* hasnonemptyout - Does state have a non-EMPTY out arc?
460+
*/
461+
staticint
462+
hasnonemptyout(structstate*s)
463+
{
464+
structarc*a;
465+
466+
for (a=s->outs;a!=NULL;a=a->outchain)
467+
{
468+
if (a->type!=EMPTY)
469+
return1;
470+
}
471+
return0;
472+
}
473+
474+
/*
475+
* nonemptyouts - count non-EMPTY out arcs of a state
476+
*/
477+
staticint
478+
nonemptyouts(structstate*s)
479+
{
480+
intn=0;
481+
structarc*a;
482+
483+
for (a=s->outs;a!=NULL;a=a->outchain)
484+
{
485+
if (a->type!=EMPTY)
486+
n++;
487+
}
488+
returnn;
489+
}
490+
491+
/*
492+
* nonemptyins - count non-EMPTY in arcs of a state
493+
*/
494+
staticint
495+
nonemptyins(structstate*s)
496+
{
497+
intn=0;
498+
structarc*a;
499+
500+
for (a=s->ins;a!=NULL;a=a->inchain)
501+
{
502+
if (a->type!=EMPTY)
503+
n++;
504+
}
505+
returnn;
506+
}
507+
458508
/*
459509
* findarc - find arc, if any, from given source with given type and color
460510
* If there is more than one such arc, the result is random.
@@ -511,19 +561,25 @@ moveins(struct nfa * nfa,
511561
}
512562

513563
/*
514-
* copyins - copy all in arcs of a state to another state
564+
* copyins - copy in arcs of a state to another state
565+
*
566+
* Either all arcs, or only non-empty ones as determined by all value.
515567
*/
516568
staticvoid
517569
copyins(structnfa*nfa,
518570
structstate*oldState,
519-
structstate*newState)
571+
structstate*newState,
572+
intall)
520573
{
521574
structarc*a;
522575

523576
assert(oldState!=newState);
524577

525578
for (a=oldState->ins;a!=NULL;a=a->inchain)
526-
cparc(nfa,a,a->from,newState);
579+
{
580+
if (all||a->type!=EMPTY)
581+
cparc(nfa,a,a->from,newState);
582+
}
527583
}
528584

529585
/*
@@ -546,19 +602,25 @@ moveouts(struct nfa * nfa,
546602
}
547603

548604
/*
549-
* copyouts - copy all out arcs of a state to another state
605+
* copyouts - copy out arcs of a state to another state
606+
*
607+
* Either all arcs, or only non-empty ones as determined by all value.
550608
*/
551609
staticvoid
552610
copyouts(structnfa*nfa,
553611
structstate*oldState,
554-
structstate*newState)
612+
structstate*newState,
613+
intall)
555614
{
556615
structarc*a;
557616

558617
assert(oldState!=newState);
559618

560619
for (a=oldState->outs;a!=NULL;a=a->outchain)
561-
cparc(nfa,a,newState,a->to);
620+
{
621+
if (all||a->type!=EMPTY)
622+
cparc(nfa,a,newState,a->to);
623+
}
562624
}
563625

564626
/*
@@ -881,7 +943,7 @@ pull(struct nfa * nfa,
881943
if (NISERR())
882944
return0;
883945
assert(to!=from);/* con is not an inarc */
884-
copyins(nfa,from,s);/* duplicate inarcs */
946+
copyins(nfa,from,s,1);/* duplicate inarcs */
885947
cparc(nfa,con,s,to);/* move constraint arc */
886948
freearc(nfa,con);
887949
from=s;
@@ -1027,7 +1089,7 @@ push(struct nfa * nfa,
10271089
s=newstate(nfa);
10281090
if (NISERR())
10291091
return0;
1030-
copyouts(nfa,to,s);/* duplicate outarcs */
1092+
copyouts(nfa,to,s,1);/* duplicate outarcs */
10311093
cparc(nfa,con,from,s);/* move constraint */
10321094
freearc(nfa,con);
10331095
to=s;
@@ -1134,91 +1196,205 @@ fixempties(struct nfa * nfa,
11341196
FILE*f)/* for debug output; NULL none */
11351197
{
11361198
structstate*s;
1199+
structstate*s2;
11371200
structstate*nexts;
11381201
structarc*a;
11391202
structarc*nexta;
1140-
intprogress;
11411203

1142-
/* find and eliminate empties until there are no more */
1143-
do
1204+
/*
1205+
* First, get rid of any states whose sole out-arc is an EMPTY, since
1206+
* they're basically just aliases for their successor. The parsing
1207+
* algorithm creates enough of these that it's worth special-casing this.
1208+
*/
1209+
for (s=nfa->states;s!=NULL&& !NISERR();s=nexts)
11441210
{
1145-
progress=0;
1146-
for (s=nfa->states;s!=NULL&& !NISERR()&&
1147-
s->no!=FREESTATE;s=nexts)
1211+
nexts=s->next;
1212+
if (s->flag||s->nouts!=1)
1213+
continue;
1214+
a=s->outs;
1215+
assert(a!=NULL&&a->outchain==NULL);
1216+
if (a->type!=EMPTY)
1217+
continue;
1218+
if (s!=a->to)
1219+
moveins(nfa,s,a->to);
1220+
dropstate(nfa,s);
1221+
}
1222+
1223+
/*
1224+
* Similarly, get rid of any state with a single EMPTY in-arc, by folding
1225+
* it into its predecessor.
1226+
*/
1227+
for (s=nfa->states;s!=NULL&& !NISERR();s=nexts)
1228+
{
1229+
nexts=s->next;
1230+
/* while we're at it, ensure tmp fields are clear for next step */
1231+
assert(s->tmp==NULL);
1232+
if (s->flag||s->nins!=1)
1233+
continue;
1234+
a=s->ins;
1235+
assert(a!=NULL&&a->inchain==NULL);
1236+
if (a->type!=EMPTY)
1237+
continue;
1238+
if (s!=a->from)
1239+
moveouts(nfa,s,a->from);
1240+
dropstate(nfa,s);
1241+
}
1242+
1243+
/*
1244+
* For each remaining NFA state, find all other states that are reachable
1245+
* from it by a chain of one or more EMPTY arcs. Then generate new arcs
1246+
* that eliminate the need for each such chain.
1247+
*
1248+
* If we just do this straightforwardly, the algorithm gets slow in
1249+
* complex graphs, because the same arcs get copied to all intermediate
1250+
* states of an EMPTY chain, and then uselessly pushed repeatedly to the
1251+
* chain's final state; we waste a lot of time in newarc's duplicate
1252+
* checking. To improve matters, we decree that any state with only EMPTY
1253+
* out-arcs is "doomed" and will not be part of the final NFA. That can be
1254+
* ensured by not adding any new out-arcs to such a state. Having ensured
1255+
* that, we need not update the state's in-arcs list either; all arcs that
1256+
* might have gotten pushed forward to it will just get pushed directly to
1257+
* successor states. This eliminates most of the useless duplicate arcs.
1258+
*/
1259+
for (s=nfa->states;s!=NULL&& !NISERR();s=s->next)
1260+
{
1261+
for (s2=emptyreachable(s,s);s2!=s&& !NISERR();s2=nexts)
11481262
{
1149-
nexts=s->next;
1150-
for (a=s->outs;a!=NULL&& !NISERR();a=nexta)
1151-
{
1152-
nexta=a->outchain;
1153-
if (a->type==EMPTY&&unempty(nfa,a))
1154-
progress=1;
1155-
assert(nexta==NULL||s->no!=FREESTATE);
1156-
}
1263+
/*
1264+
* If s2 is doomed, we decide that (1) we will always push arcs
1265+
* forward to it, not pull them back to s; and (2) we can optimize
1266+
* away the push-forward, per comment above. So do nothing.
1267+
*/
1268+
if (s2->flag||hasnonemptyout(s2))
1269+
replaceempty(nfa,s,s2);
1270+
1271+
/* Reset the tmp fields as we walk back */
1272+
nexts=s2->tmp;
1273+
s2->tmp=NULL;
11571274
}
1158-
if (progress&&f!=NULL)
1159-
dumpnfa(nfa,f);
1160-
}while (progress&& !NISERR());
1275+
s->tmp=NULL;
1276+
}
1277+
1278+
if (NISERR())
1279+
return;
1280+
1281+
/*
1282+
* Now remove all the EMPTY arcs, since we don't need them anymore.
1283+
*/
1284+
for (s=nfa->states;s!=NULL;s=s->next)
1285+
{
1286+
for (a=s->outs;a!=NULL;a=nexta)
1287+
{
1288+
nexta=a->outchain;
1289+
if (a->type==EMPTY)
1290+
freearc(nfa,a);
1291+
}
1292+
}
1293+
1294+
/*
1295+
* And remove any states that have become useless.(This cleanup is not
1296+
* very thorough, and would be even less so if we tried to combine it with
1297+
* the previous step; but cleanup() will take care of anything we miss.)
1298+
*/
1299+
for (s=nfa->states;s!=NULL;s=nexts)
1300+
{
1301+
nexts=s->next;
1302+
if ((s->nins==0||s->nouts==0)&& !s->flag)
1303+
dropstate(nfa,s);
1304+
}
1305+
1306+
if (f!=NULL)
1307+
dumpnfa(nfa,f);
11611308
}
11621309

11631310
/*
1164-
* unempty - optimize out an EMPTY arc, if possible
1311+
* emptyreachable - recursively find all states reachable from s by EMPTY arcs
1312+
*
1313+
* The return value is the last such state found. Its tmp field links back
1314+
* to the next-to-last such state, and so on back to s, so that all these
1315+
* states can be located without searching the whole NFA.
11651316
*
1166-
* Actually, as it stands this function always succeeds, but the return
1167-
* value is kept with an eye on possible future changes.
1317+
* The maximum recursion depth here is equal to the length of the longest
1318+
* loop-free chain of EMPTY arcs, which is surely no more than the size of
1319+
* the NFA, and in practice will be a lot less than that.
11681320
*/
1169-
staticint/* 0 couldn't, 1 could */
1170-
unempty(structnfa*nfa,
1171-
structarc*a)
1321+
staticstructstate*
1322+
emptyreachable(structstate*s,structstate*lastfound)
11721323
{
1173-
structstate*from=a->from;
1174-
structstate*to=a->to;
1175-
intusefrom;/* work on from, as opposed to to? */
1176-
1177-
assert(a->type==EMPTY);
1178-
assert(from!=nfa->pre&&to!=nfa->post);
1324+
structarc*a;
11791325

1180-
if (from==to)
1181-
{/* vacuous loop */
1182-
freearc(nfa,a);
1183-
return1;
1326+
s->tmp=lastfound;
1327+
lastfound=s;
1328+
for (a=s->outs;a!=NULL;a=a->outchain)
1329+
{
1330+
if (a->type==EMPTY&&a->to->tmp==NULL)
1331+
lastfound=emptyreachable(a->to,lastfound);
11841332
}
1333+
returnlastfound;
1334+
}
11851335

1186-
/* decide which end to work on */
1187-
usefrom=1;/* default: attack from */
1188-
if (from->nouts>to->nins)
1189-
usefrom=0;
1190-
elseif (from->nouts==to->nins)
1336+
/*
1337+
* replaceempty - replace an EMPTY arc chain with some non-empty arcs
1338+
*
1339+
* The EMPTY arc(s) should be deleted later, but we can't do it here because
1340+
* they may still be needed to identify other arc chains during fixempties().
1341+
*/
1342+
staticvoid
1343+
replaceempty(structnfa*nfa,
1344+
structstate*from,
1345+
structstate*to)
1346+
{
1347+
intfromouts;
1348+
inttoins;
1349+
1350+
assert(from!=to);
1351+
1352+
/*
1353+
* Create replacement arcs that bypass the need for the EMPTY chain. We
1354+
* can do this either by pushing arcs forward (linking directly from
1355+
* "from"'s predecessors to "to") or by pulling them back (linking
1356+
* directly from "from" to "to"'s successors). In general, we choose
1357+
* whichever way creates greater fan-out or fan-in, so as to improve the
1358+
* odds of reducing the other state to zero in-arcs or out-arcs and
1359+
* thereby being able to delete it. However, if "from" is doomed (has no
1360+
* non-EMPTY out-arcs), we must keep it so, so always push forward in that
1361+
* case.
1362+
*
1363+
* The fan-out/fan-in comparison should count only non-EMPTY arcs.If
1364+
* "from" is doomed, we can skip counting "to"'s arcs, since we want to
1365+
* force taking the copyins path in that case.
1366+
*/
1367+
fromouts=nonemptyouts(from);
1368+
toins= (fromouts==0) ?1 :nonemptyins(to);
1369+
1370+
if (fromouts>toins)
11911371
{
1192-
/* decide on secondary issue: move/copy fewest arcs */
1193-
if (from->nins>to->nouts)
1194-
usefrom=0;
1372+
copyouts(nfa,to,from,0);
1373+
return;
1374+
}
1375+
if (fromouts<toins)
1376+
{
1377+
copyins(nfa,from,to,0);
1378+
return;
11951379
}
11961380

1197-
freearc(nfa,a);
1198-
if (usefrom)
1381+
/*
1382+
* fromouts == toins. Decide on secondary issue: copy fewest arcs.
1383+
*
1384+
* Doesn't seem to be worth the trouble to exclude empties from these
1385+
* comparisons; that takes extra time and doesn't seem to improve the
1386+
* resulting graph much.
1387+
*/
1388+
if (from->nins>to->nouts)
11991389
{
1200-
if (from->nouts==0)
1201-
{
1202-
/* was the state's only outarc */
1203-
moveins(nfa,from,to);
1204-
freestate(nfa,from);
1205-
}
1206-
else
1207-
copyins(nfa,from,to);
1390+
copyouts(nfa,to,from,0);
1391+
return;
12081392
}
12091393
else
12101394
{
1211-
if (to->nins==0)
1212-
{
1213-
/* was the state's only inarc */
1214-
moveouts(nfa,to,from);
1215-
freestate(nfa,to);
1216-
}
1217-
else
1218-
copyouts(nfa,to,from);
1395+
copyins(nfa,from,to,0);
1396+
return;
12191397
}
1220-
1221-
return1;
12221398
}
12231399

12241400
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp