NotificationsYou must be signed in to change notification settings
Fork5
Star26

Commita7b61d4

committed

Fix infinite-loop risk in fixempties() stage of regex compilation.

The previous coding of this function could get into situations where itwould never terminate, because successive passes would re-add EMPTY arcsthat had been removed by the previous pass. Rewrite the functioncompletely using a new algorithm that is guaranteed to terminate, andalso seems to be usually faster than the old one. Per Tcl bugs 3604074and 3606683.Tom Lane and Don Porter

1 parent7ccefe8 commita7b61d4Copy full SHA for a7b61d4

File tree

4 files changed

+280

-73

lines changed

src
- backend/regex
  - regc_nfa.c
  - regcomp.c
- test/regress
  - expected
    - regex.out
  - sql
    - regex.sql

4 files changed

+280

-73

lines changed

`‎src/backend/regex/regc_nfa.c`

Lines changed: 245 additions & 69 deletions

Original file line number	Diff line number	Diff line change
`@@ -455,6 +455,56 @@ freearc(struct nfa * nfa,`
`455`	`455`	`from->free=victim;`
`456`	`456`	`}`
`457`	`457`
	`458`	`+/*`
	`459`	`+ * hasnonemptyout - Does state have a non-EMPTY out arc?`
	`460`	`+ */`
	`461`	`+staticint`
	`462`	`+hasnonemptyout(structstate*s)`
	`463`	`+{`
	`464`	`+structarc*a;`
	`465`	`+`
	`466`	`+for (a=s->outs;a!=NULL;a=a->outchain)`
	`467`	`+{`
	`468`	`+if (a->type!=EMPTY)`
	`469`	`+return1;`
	`470`	`+}`
	`471`	`+return0;`
	`472`	`+}`
	`473`	`+`
	`474`	`+/*`
	`475`	`+ * nonemptyouts - count non-EMPTY out arcs of a state`
	`476`	`+ */`
	`477`	`+staticint`
	`478`	`+nonemptyouts(structstate*s)`
	`479`	`+{`
	`480`	`+intn=0;`
	`481`	`+structarc*a;`
	`482`	`+`
	`483`	`+for (a=s->outs;a!=NULL;a=a->outchain)`
	`484`	`+{`
	`485`	`+if (a->type!=EMPTY)`
	`486`	`+n++;`
	`487`	`+}`
	`488`	`+returnn;`
	`489`	`+}`
	`490`	`+`
	`491`	`+/*`
	`492`	`+ * nonemptyins - count non-EMPTY in arcs of a state`
	`493`	`+ */`
	`494`	`+staticint`
	`495`	`+nonemptyins(structstate*s)`
	`496`	`+{`
	`497`	`+intn=0;`
	`498`	`+structarc*a;`
	`499`	`+`
	`500`	`+for (a=s->ins;a!=NULL;a=a->inchain)`
	`501`	`+{`
	`502`	`+if (a->type!=EMPTY)`
	`503`	`+n++;`
	`504`	`+}`
	`505`	`+returnn;`
	`506`	`+}`
	`507`	`+`
`458`	`508`	`/*`
`459`	`509`	`* findarc - find arc, if any, from given source with given type and color`
`460`	`510`	`* If there is more than one such arc, the result is random.`
`@@ -511,19 +561,25 @@ moveins(struct nfa * nfa,`
`511`	`561`	`}`
`512`	`562`
`513`	`563`	`/*`
`514`		`- * copyins - copy all in arcs of a state to another state`
	`564`	`+ * copyins - copy in arcs of a state to another state`
	`565`	`+ *`
	`566`	`+ * Either all arcs, or only non-empty ones as determined by all value.`
`515`	`567`	`*/`
`516`	`568`	`staticvoid`
`517`	`569`	`copyins(structnfa*nfa,`
`518`	`570`	`structstate*oldState,`
`519`		`-structstate*newState)`
	`571`	`+structstate*newState,`
	`572`	`+intall)`
`520`	`573`	`{`
`521`	`574`	`structarc*a;`
`522`	`575`
`523`	`576`	`assert(oldState!=newState);`
`524`	`577`
`525`	`578`	`for (a=oldState->ins;a!=NULL;a=a->inchain)`
`526`		`-cparc(nfa,a,a->from,newState);`
	`579`	`+{`
	`580`	`+if (all\|\|a->type!=EMPTY)`
	`581`	`+cparc(nfa,a,a->from,newState);`
	`582`	`+}`
`527`	`583`	`}`
`528`	`584`
`529`	`585`	`/*`
`@@ -546,19 +602,25 @@ moveouts(struct nfa * nfa,`
`546`	`602`	`}`
`547`	`603`
`548`	`604`	`/*`
`549`		`- * copyouts - copy all out arcs of a state to another state`
	`605`	`+ * copyouts - copy out arcs of a state to another state`
	`606`	`+ *`
	`607`	`+ * Either all arcs, or only non-empty ones as determined by all value.`
`550`	`608`	`*/`
`551`	`609`	`staticvoid`
`552`	`610`	`copyouts(structnfa*nfa,`
`553`	`611`	`structstate*oldState,`
`554`		`-structstate*newState)`
	`612`	`+structstate*newState,`
	`613`	`+intall)`
`555`	`614`	`{`
`556`	`615`	`structarc*a;`
`557`	`616`
`558`	`617`	`assert(oldState!=newState);`
`559`	`618`
`560`	`619`	`for (a=oldState->outs;a!=NULL;a=a->outchain)`
`561`		`-cparc(nfa,a,newState,a->to);`
	`620`	`+{`
	`621`	`+if (all\|\|a->type!=EMPTY)`
	`622`	`+cparc(nfa,a,newState,a->to);`
	`623`	`+}`
`562`	`624`	`}`
`563`	`625`
`564`	`626`	`/*`
`@@ -881,7 +943,7 @@ pull(struct nfa * nfa,`
`881`	`943`	`if (NISERR())`
`882`	`944`	`return0;`
`883`	`945`	`assert(to!=from);/* con is not an inarc */`
`884`		`-copyins(nfa,from,s);/* duplicate inarcs */`
	`946`	`+copyins(nfa,from,s,1);/* duplicate inarcs */`
`885`	`947`	`cparc(nfa,con,s,to);/* move constraint arc */`
`886`	`948`	`freearc(nfa,con);`
`887`	`949`	`from=s;`
`@@ -1027,7 +1089,7 @@ push(struct nfa * nfa,`
`1027`	`1089`	`s=newstate(nfa);`
`1028`	`1090`	`if (NISERR())`
`1029`	`1091`	`return0;`
`1030`		`-copyouts(nfa,to,s);/* duplicate outarcs */`
	`1092`	`+copyouts(nfa,to,s,1);/* duplicate outarcs */`
`1031`	`1093`	`cparc(nfa,con,from,s);/* move constraint */`
`1032`	`1094`	`freearc(nfa,con);`
`1033`	`1095`	`to=s;`
`@@ -1134,91 +1196,205 @@ fixempties(struct nfa * nfa,`
`1134`	`1196`	`FILEf)/ for debug output; NULL none */`
`1135`	`1197`	`{`
`1136`	`1198`	`structstate*s;`
	`1199`	`+structstate*s2;`
`1137`	`1200`	`structstate*nexts;`
`1138`	`1201`	`structarc*a;`
`1139`	`1202`	`structarc*nexta;`
`1140`		`-intprogress;`
`1141`	`1203`
`1142`		`-/* find and eliminate empties until there are no more */`
`1143`		`-do`
	`1204`	`+/*`
	`1205`	`+ * First, get rid of any states whose sole out-arc is an EMPTY, since`
	`1206`	`+ * they're basically just aliases for their successor. The parsing`
	`1207`	`+ * algorithm creates enough of these that it's worth special-casing this.`
	`1208`	`+ */`
	`1209`	`+for (s=nfa->states;s!=NULL&& !NISERR();s=nexts)`
`1144`	`1210`	`{`
`1145`		`-progress=0;`
`1146`		`-for (s=nfa->states;s!=NULL&& !NISERR()&&`
`1147`		`-s->no!=FREESTATE;s=nexts)`
	`1211`	`+nexts=s->next;`
	`1212`	`+if (s->flag\|\|s->nouts!=1)`
	`1213`	`+continue;`
	`1214`	`+a=s->outs;`
	`1215`	`+assert(a!=NULL&&a->outchain==NULL);`
	`1216`	`+if (a->type!=EMPTY)`
	`1217`	`+continue;`
	`1218`	`+if (s!=a->to)`
	`1219`	`+moveins(nfa,s,a->to);`
	`1220`	`+dropstate(nfa,s);`
	`1221`	`+}`
	`1222`	`+`
	`1223`	`+/*`
	`1224`	`+ * Similarly, get rid of any state with a single EMPTY in-arc, by folding`
	`1225`	`+ * it into its predecessor.`
	`1226`	`+ */`
	`1227`	`+for (s=nfa->states;s!=NULL&& !NISERR();s=nexts)`
	`1228`	`+{`
	`1229`	`+nexts=s->next;`
	`1230`	`+/* while we're at it, ensure tmp fields are clear for next step */`
	`1231`	`+assert(s->tmp==NULL);`
	`1232`	`+if (s->flag\|\|s->nins!=1)`
	`1233`	`+continue;`
	`1234`	`+a=s->ins;`
	`1235`	`+assert(a!=NULL&&a->inchain==NULL);`
	`1236`	`+if (a->type!=EMPTY)`
	`1237`	`+continue;`
	`1238`	`+if (s!=a->from)`
	`1239`	`+moveouts(nfa,s,a->from);`
	`1240`	`+dropstate(nfa,s);`
	`1241`	`+}`
	`1242`	`+`
	`1243`	`+/*`
	`1244`	`+ * For each remaining NFA state, find all other states that are reachable`
	`1245`	`+ * from it by a chain of one or more EMPTY arcs. Then generate new arcs`
	`1246`	`+ * that eliminate the need for each such chain.`
	`1247`	`+ *`
	`1248`	`+ * If we just do this straightforwardly, the algorithm gets slow in`
	`1249`	`+ * complex graphs, because the same arcs get copied to all intermediate`
	`1250`	`+ * states of an EMPTY chain, and then uselessly pushed repeatedly to the`
	`1251`	`+ * chain's final state; we waste a lot of time in newarc's duplicate`
	`1252`	`+ * checking. To improve matters, we decree that any state with only EMPTY`
	`1253`	`+ * out-arcs is "doomed" and will not be part of the final NFA. That can be`
	`1254`	`+ * ensured by not adding any new out-arcs to such a state. Having ensured`
	`1255`	`+ * that, we need not update the state's in-arcs list either; all arcs that`
	`1256`	`+ * might have gotten pushed forward to it will just get pushed directly to`
	`1257`	`+ * successor states. This eliminates most of the useless duplicate arcs.`
	`1258`	`+ */`
	`1259`	`+for (s=nfa->states;s!=NULL&& !NISERR();s=s->next)`
	`1260`	`+{`
	`1261`	`+for (s2=emptyreachable(s,s);s2!=s&& !NISERR();s2=nexts)`
`1148`	`1262`	`{`
`1149`		`-nexts=s->next;`
`1150`		`-for (a=s->outs;a!=NULL&& !NISERR();a=nexta)`
`1151`		`-{`
`1152`		`-nexta=a->outchain;`
`1153`		`-if (a->type==EMPTY&&unempty(nfa,a))`
`1154`		`-progress=1;`
`1155`		`-assert(nexta==NULL\|\|s->no!=FREESTATE);`
`1156`		`-}`
	`1263`	`+/*`
	`1264`	`+ * If s2 is doomed, we decide that (1) we will always push arcs`
	`1265`	`+ * forward to it, not pull them back to s; and (2) we can optimize`
	`1266`	`+ * away the push-forward, per comment above. So do nothing.`
	`1267`	`+ */`
	`1268`	`+if (s2->flag\|\|hasnonemptyout(s2))`
	`1269`	`+replaceempty(nfa,s,s2);`
	`1270`	`+`
	`1271`	`+/* Reset the tmp fields as we walk back */`
	`1272`	`+nexts=s2->tmp;`
	`1273`	`+s2->tmp=NULL;`
`1157`	`1274`	`}`
`1158`		`-if (progress&&f!=NULL)`
`1159`		`-dumpnfa(nfa,f);`
`1160`		`-}while (progress&& !NISERR());`
	`1275`	`+s->tmp=NULL;`
	`1276`	`+}`
	`1277`	`+`
	`1278`	`+if (NISERR())`
	`1279`	`+return;`
	`1280`	`+`
	`1281`	`+/*`
	`1282`	`+ * Now remove all the EMPTY arcs, since we don't need them anymore.`
	`1283`	`+ */`
	`1284`	`+for (s=nfa->states;s!=NULL;s=s->next)`
	`1285`	`+{`
	`1286`	`+for (a=s->outs;a!=NULL;a=nexta)`
	`1287`	`+{`
	`1288`	`+nexta=a->outchain;`
	`1289`	`+if (a->type==EMPTY)`
	`1290`	`+freearc(nfa,a);`
	`1291`	`+}`
	`1292`	`+}`
	`1293`	`+`
	`1294`	`+/*`
	`1295`	`+ * And remove any states that have become useless.(This cleanup is not`
	`1296`	`+ * very thorough, and would be even less so if we tried to combine it with`
	`1297`	`+ * the previous step; but cleanup() will take care of anything we miss.)`
	`1298`	`+ */`
	`1299`	`+for (s=nfa->states;s!=NULL;s=nexts)`
	`1300`	`+{`
	`1301`	`+nexts=s->next;`
	`1302`	`+if ((s->nins==0\|\|s->nouts==0)&& !s->flag)`
	`1303`	`+dropstate(nfa,s);`
	`1304`	`+}`
	`1305`	`+`
	`1306`	`+if (f!=NULL)`
	`1307`	`+dumpnfa(nfa,f);`
`1161`	`1308`	`}`
`1162`	`1309`
`1163`	`1310`	`/*`
`1164`		`- * unempty - optimize out an EMPTY arc, if possible`
	`1311`	`+ * emptyreachable - recursively find all states reachable from s by EMPTY arcs`
	`1312`	`+ *`
	`1313`	`+ * The return value is the last such state found. Its tmp field links back`
	`1314`	`+ * to the next-to-last such state, and so on back to s, so that all these`
	`1315`	`+ * states can be located without searching the whole NFA.`
`1165`	`1316`	`*`
`1166`		`- * Actually, as it stands this function always succeeds, but the return`
`1167`		`- * value is kept with an eye on possible future changes.`
	`1317`	`+ * The maximum recursion depth here is equal to the length of the longest`
	`1318`	`+ * loop-free chain of EMPTY arcs, which is surely no more than the size of`
	`1319`	`+ * the NFA, and in practice will be a lot less than that.`
`1168`	`1320`	`*/`
`1169`		`-staticint/* 0 couldn't, 1 could */`
`1170`		`-unempty(structnfa*nfa,`
`1171`		`-structarc*a)`
	`1321`	`+staticstructstate*`
	`1322`	`+emptyreachable(structstates,structstatelastfound)`
`1172`	`1323`	`{`
`1173`		`-structstate*from=a->from;`
`1174`		`-structstate*to=a->to;`
`1175`		`-intusefrom;/* work on from, as opposed to to? */`
`1176`		`-`
`1177`		`-assert(a->type==EMPTY);`
`1178`		`-assert(from!=nfa->pre&&to!=nfa->post);`
	`1324`	`+structarc*a;`
`1179`	`1325`
`1180`		`-if (from==to)`
`1181`		`-{/* vacuous loop */`
`1182`		`-freearc(nfa,a);`
`1183`		`-return1;`
	`1326`	`+s->tmp=lastfound;`
	`1327`	`+lastfound=s;`
	`1328`	`+for (a=s->outs;a!=NULL;a=a->outchain)`
	`1329`	`+{`
	`1330`	`+if (a->type==EMPTY&&a->to->tmp==NULL)`
	`1331`	`+lastfound=emptyreachable(a->to,lastfound);`
`1184`	`1332`	`}`
	`1333`	`+returnlastfound;`
	`1334`	`+}`
`1185`	`1335`
`1186`		`-/* decide which end to work on */`
`1187`		`-usefrom=1;/* default: attack from */`
`1188`		`-if (from->nouts>to->nins)`
`1189`		`-usefrom=0;`
`1190`		`-elseif (from->nouts==to->nins)`
	`1336`	`+/*`
	`1337`	`+ * replaceempty - replace an EMPTY arc chain with some non-empty arcs`
	`1338`	`+ *`
	`1339`	`+ * The EMPTY arc(s) should be deleted later, but we can't do it here because`
	`1340`	`+ * they may still be needed to identify other arc chains during fixempties().`
	`1341`	`+ */`
	`1342`	`+staticvoid`
	`1343`	`+replaceempty(structnfa*nfa,`
	`1344`	`+structstate*from,`
	`1345`	`+structstate*to)`
	`1346`	`+{`
	`1347`	`+intfromouts;`
	`1348`	`+inttoins;`
	`1349`	`+`
	`1350`	`+assert(from!=to);`
	`1351`	`+`
	`1352`	`+/*`
	`1353`	`+ * Create replacement arcs that bypass the need for the EMPTY chain. We`
	`1354`	`+ * can do this either by pushing arcs forward (linking directly from`
	`1355`	`+ * "from"'s predecessors to "to") or by pulling them back (linking`
	`1356`	`+ * directly from "from" to "to"'s successors). In general, we choose`
	`1357`	`+ * whichever way creates greater fan-out or fan-in, so as to improve the`
	`1358`	`+ * odds of reducing the other state to zero in-arcs or out-arcs and`
	`1359`	`+ * thereby being able to delete it. However, if "from" is doomed (has no`
	`1360`	`+ * non-EMPTY out-arcs), we must keep it so, so always push forward in that`
	`1361`	`+ * case.`
	`1362`	`+ *`
	`1363`	`+ * The fan-out/fan-in comparison should count only non-EMPTY arcs.If`
	`1364`	`+ * "from" is doomed, we can skip counting "to"'s arcs, since we want to`
	`1365`	`+ * force taking the copyins path in that case.`
	`1366`	`+ */`
	`1367`	`+fromouts=nonemptyouts(from);`
	`1368`	`+toins= (fromouts==0) ?1 :nonemptyins(to);`
	`1369`	`+`
	`1370`	`+if (fromouts>toins)`
`1191`	`1371`	`{`
`1192`		`-/* decide on secondary issue: move/copy fewest arcs */`
`1193`		`-if (from->nins>to->nouts)`
`1194`		`-usefrom=0;`
	`1372`	`+copyouts(nfa,to,from,0);`
	`1373`	`+return;`
	`1374`	`+}`
	`1375`	`+if (fromouts<toins)`
	`1376`	`+{`
	`1377`	`+copyins(nfa,from,to,0);`
	`1378`	`+return;`
`1195`	`1379`	`}`
`1196`	`1380`
`1197`		`-freearc(nfa,a);`
`1198`		`-if (usefrom)`
	`1381`	`+/*`
	`1382`	`+ * fromouts == toins. Decide on secondary issue: copy fewest arcs.`
	`1383`	`+ *`
	`1384`	`+ * Doesn't seem to be worth the trouble to exclude empties from these`
	`1385`	`+ * comparisons; that takes extra time and doesn't seem to improve the`
	`1386`	`+ * resulting graph much.`
	`1387`	`+ */`
	`1388`	`+if (from->nins>to->nouts)`
`1199`	`1389`	`{`
`1200`		`-if (from->nouts==0)`
`1201`		`-{`
`1202`		`-/* was the state's only outarc */`
`1203`		`-moveins(nfa,from,to);`
`1204`		`-freestate(nfa,from);`
`1205`		`-}`
`1206`		`-else`
`1207`		`-copyins(nfa,from,to);`
	`1390`	`+copyouts(nfa,to,from,0);`
	`1391`	`+return;`
`1208`	`1392`	`}`
`1209`	`1393`	`else`
`1210`	`1394`	`{`
`1211`		`-if (to->nins==0)`
`1212`		`-{`
`1213`		`-/* was the state's only inarc */`
`1214`		`-moveouts(nfa,to,from);`
`1215`		`-freestate(nfa,to);`
`1216`		`-}`
`1217`		`-else`
`1218`		`-copyouts(nfa,to,from);`
	`1395`	`+copyins(nfa,from,to,0);`
	`1396`	`+return;`
`1219`	`1397`	`}`
`1220`		`-`
`1221`		`-return1;`
`1222`	`1398`	`}`
`1223`	`1399`
`1224`	`1400`	`/*`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commita7b61d4

File tree

4 files changed

4 files changed

`‎src/backend/regex/regc_nfa.c`

0 commit comments