NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commitf42ea83

committed

Fix use-after-free issue in regexp engine.

Commitcebc1d3 taught parseqatom() to optimize cases where a branchcontains only one, "messy", atom by getting rid of excess subRE nodes.The way we really should do that is to keep the subRE built for the"messy" child atom; but to avoid changing parseqatom's nominal API,I made it delete that node after copying its fields to the outer subREmade by parsebranch(). It seems that that actually worked at the time;but it became dangerous afterea1268f, because that later commitallowed the lower invocation of parse() to return a subRE that was alsopointed to by some v->subs[] entry. This meant we could wind up with adangling pointer in v->subs[], allowing a later backref to misbehave,but only if that subRE struct had been reused in between. So the damageseems confined to cases like '((...))...(...\2'.To fix, do what I should have done before and modify parseqatom's APIto make it possible for it to remove the caller's subRE instead of thecallee's. That's safer because we know that subRE isn't complete yet,so noplace else will have a pointer to it.Per report from Mark Dilger. Back-patch to v14 where the problematicpatches came in.Discussion:https://postgr.es/m/0203588E-E609-43AF-9F4F-902854231EE7@enterprisedb.com

1 parent51b95fb commitf42ea83Copy full SHA for f42ea83

File tree

3 files changed

+73

-67

lines changed

src
- backend/regex
  - regcomp.c
- test/modules/test_regex
  - expected
    - test_regex.out
  - sql
    - test_regex.sql

3 files changed

+73

-67

lines changed

`‎src/backend/regex/regcomp.c‎`

Lines changed: 63 additions & 67 deletions

Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ static intfreev(struct vars *, int);`
`43`	`43`	`staticvoidmakesearch(structvars,structnfa);`
`44`	`44`	`staticstructsubreparse(structvars,int,int,structstate,structstate);`
`45`	`45`	`staticstructsubreparsebranch(structvars,int,int,structstate,structstate,int);`
`46`		`-staticvoidparseqatom(structvars,int,int,structstate,structstate,structsubre);`
	`46`	`+staticstructsubreparseqatom(structvars,int,int,structstate,structstate,structsubre*);`
`47`	`47`	`staticvoidnonword(structvars,int,structstate,structstate*);`
`48`	`48`	`staticvoidword(structvars,int,structstate,structstate*);`
`49`	`49`	`staticvoidcharclass(structvars*,enumchar_classes,`
`@@ -756,7 +756,7 @@ parsebranch(struct vars *v,`
`756`	`756`	`seencontent=1;`
`757`	`757`
`758`	`758`	`/* NB, recursion in parseqatom() may swallow rest of branch */`
`759`		`-parseqatom(v,stopper,type,lp,right,t);`
	`759`	`+t=parseqatom(v,stopper,type,lp,right,t);`
`760`	`760`	`NOERRN();`
`761`	`761`	`}`
`762`	`762`
`@@ -777,8 +777,12 @@ parsebranch(struct vars *v,`
`777`	`777`	`* The bookkeeping near the end cooperates very closely with parsebranch();`
`778`	`778`	`* in particular, it contains a recursion that can involve parsing the rest`
`779`	`779`	`* of the branch, making this function's name somewhat inaccurate.`
	`780`	`+ *`
	`781`	`+ * Usually, the return value is just "top", but in some cases where we`
	`782`	`+ * have parsed the rest of the branch, we may deem "top" redundant and`
	`783`	`+ * free it, returning some child subre instead.`
`780`	`784`	`*/`
`781`		`-staticvoid`
	`785`	`+staticstructsubre*`
`782`	`786`	`parseqatom(structvars*v,`
`783`	`787`	`intstopper,/* EOS or ')' */`
`784`	`788`	`inttype,/* LACON (lookaround subRE) or PLAIN */`
`@@ -818,103 +822,103 @@ parseqatom(struct vars *v,`
`818`	`822`	`if (v->cflags&REG_NLANCH)`
`819`	`823`	`ARCV(BEHIND,v->nlcolor);`
`820`	`824`	`NEXT();`
`821`		`-return;`
	`825`	`+returntop;`
`822`	`826`	`break;`
`823`	`827`	`case'$':`
`824`	`828`	`ARCV('$',1);`
`825`	`829`	`if (v->cflags&REG_NLANCH)`
`826`	`830`	`ARCV(AHEAD,v->nlcolor);`
`827`	`831`	`NEXT();`
`828`		`-return;`
	`832`	`+returntop;`
`829`	`833`	`break;`
`830`	`834`	`caseSBEGIN:`
`831`	`835`	`ARCV('^',1);/* BOL */`
`832`	`836`	`ARCV('^',0);/* or BOS */`
`833`	`837`	`NEXT();`
`834`		`-return;`
	`838`	`+returntop;`
`835`	`839`	`break;`
`836`	`840`	`caseSEND:`
`837`	`841`	`ARCV('$',1);/* EOL */`
`838`	`842`	`ARCV('$',0);/* or EOS */`
`839`	`843`	`NEXT();`
`840`		`-return;`
	`844`	`+returntop;`
`841`	`845`	`break;`
`842`	`846`	`case'<':`
`843`	`847`	`wordchrs(v);`
`844`	`848`	`s=newstate(v->nfa);`
`845`		`-NOERR();`
	`849`	`+NOERRN();`
`846`	`850`	`nonword(v,BEHIND,lp,s);`
`847`	`851`	`word(v,AHEAD,s,rp);`
`848`	`852`	`NEXT();`
`849`		`-return;`
	`853`	`+returntop;`
`850`	`854`	`break;`
`851`	`855`	`case'>':`
`852`	`856`	`wordchrs(v);`
`853`	`857`	`s=newstate(v->nfa);`
`854`		`-NOERR();`
	`858`	`+NOERRN();`
`855`	`859`	`word(v,BEHIND,lp,s);`
`856`	`860`	`nonword(v,AHEAD,s,rp);`
`857`	`861`	`NEXT();`
`858`		`-return;`
	`862`	`+returntop;`
`859`	`863`	`break;`
`860`	`864`	`caseWBDRY:`
`861`	`865`	`wordchrs(v);`
`862`	`866`	`s=newstate(v->nfa);`
`863`		`-NOERR();`
	`867`	`+NOERRN();`
`864`	`868`	`nonword(v,BEHIND,lp,s);`
`865`	`869`	`word(v,AHEAD,s,rp);`
`866`	`870`	`s=newstate(v->nfa);`
`867`		`-NOERR();`
	`871`	`+NOERRN();`
`868`	`872`	`word(v,BEHIND,lp,s);`
`869`	`873`	`nonword(v,AHEAD,s,rp);`
`870`	`874`	`NEXT();`
`871`		`-return;`
	`875`	`+returntop;`
`872`	`876`	`break;`
`873`	`877`	`caseNWBDRY:`
`874`	`878`	`wordchrs(v);`
`875`	`879`	`s=newstate(v->nfa);`
`876`		`-NOERR();`
	`880`	`+NOERRN();`
`877`	`881`	`word(v,BEHIND,lp,s);`
`878`	`882`	`word(v,AHEAD,s,rp);`
`879`	`883`	`s=newstate(v->nfa);`
`880`		`-NOERR();`
	`884`	`+NOERRN();`
`881`	`885`	`nonword(v,BEHIND,lp,s);`
`882`	`886`	`nonword(v,AHEAD,s,rp);`
`883`	`887`	`NEXT();`
`884`		`-return;`
	`888`	`+returntop;`
`885`	`889`	`break;`
`886`	`890`	`caseLACON:/* lookaround constraint */`
`887`	`891`	`latype=v->nextvalue;`
`888`	`892`	`NEXT();`
`889`	`893`	`s=newstate(v->nfa);`
`890`	`894`	`s2=newstate(v->nfa);`
`891`		`-NOERR();`
	`895`	`+NOERRN();`
`892`	`896`	`t=parse(v,')',LACON,s,s2);`
`893`	`897`	`freesubre(v,t);/* internal structure irrelevant */`
`894`		`-NOERR();`
	`898`	`+NOERRN();`
`895`	`899`	`assert(SEE(')'));`
`896`	`900`	`NEXT();`
`897`	`901`	`processlacon(v,s,s2,latype,lp,rp);`
`898`		`-return;`
	`902`	`+returntop;`
`899`	`903`	`break;`
`900`	`904`	`/* then errors, to get them out of the way */`
`901`	`905`	`case'*':`
`902`	`906`	`case'+':`
`903`	`907`	`case'?':`
`904`	`908`	`case'{':`
`905`	`909`	`ERR(REG_BADRPT);`
`906`		`-return;`
	`910`	`+returntop;`
`907`	`911`	`break;`
`908`	`912`	`default:`
`909`	`913`	`ERR(REG_ASSERT);`
`910`		`-return;`
	`914`	`+returntop;`
`911`	`915`	`break;`
`912`	`916`	`/* then plain characters, and minor variants on that theme */`
`913`	`917`	`case')':/* unbalanced paren */`
`914`	`918`	`if ((v->cflags&REG_ADVANCED)!=REG_EXTENDED)`
`915`	`919`	`{`
`916`	`920`	`ERR(REG_EPAREN);`
`917`		`-return;`
	`921`	`+returntop;`
`918`	`922`	`}`
`919`	`923`	`/* legal in EREs due to specification botch */`
`920`	`924`	`NOTE(REG_UPBOTCH);`
`@@ -923,7 +927,7 @@ parseqatom(struct vars *v,`
`923`	`927`	`casePLAIN:`
`924`	`928`	`onechr(v,v->nextvalue,lp,rp);`
`925`	`929`	`okcolors(v->nfa,v->cm);`
`926`		`-NOERR();`
	`930`	`+NOERRN();`
`927`	`931`	`NEXT();`
`928`	`932`	`break;`
`929`	`933`	`case'[':`
`@@ -972,14 +976,14 @@ parseqatom(struct vars *v,`
`972`	`976`	`*/`
`973`	`977`	`s=newstate(v->nfa);`
`974`	`978`	`s2=newstate(v->nfa);`
`975`		`-NOERR();`
	`979`	`+NOERRN();`
`976`	`980`	`EMPTYARC(lp,s);`
`977`	`981`	`EMPTYARC(s2,rp);`
`978`		`-NOERR();`
	`982`	`+NOERRN();`
`979`	`983`	`atom=parse(v,')',type,s,s2);`
`980`	`984`	`assert(SEE(')')\|\|ISERR());`
`981`	`985`	`NEXT();`
`982`		`-NOERR();`
	`986`	`+NOERRN();`
`983`	`987`	`if (cap)`
`984`	`988`	`{`
`985`	`989`	`assert(v->subs[subno]==NULL);`
`@@ -994,7 +998,7 @@ parseqatom(struct vars *v,`
`994`	`998`	`{`
`995`	`999`	`/* generate no-op wrapper node to handle "((x))" */`
`996`	`1000`	`t=subre(v,'(',atom->flags \|CAP,lp,rp);`
`997`		`-NOERR();`
	`1001`	`+NOERRN();`
`998`	`1002`	`t->capno=subno;`
`999`	`1003`	`t->child=atom;`
`1000`	`1004`	`atom=t;`
`@@ -1006,10 +1010,10 @@ parseqatom(struct vars *v,`
`1006`	`1010`	`INSIST(type!=LACON,REG_ESUBREG);`
`1007`	`1011`	`INSIST(v->nextvalue<v->nsubs,REG_ESUBREG);`
`1008`	`1012`	`INSIST(v->subs[v->nextvalue]!=NULL,REG_ESUBREG);`
`1009`		`-NOERR();`
	`1013`	`+NOERRN();`
`1010`	`1014`	`assert(v->nextvalue>0);`
`1011`	`1015`	`atom=subre(v,'b',BACKR,lp,rp);`
`1012`		`-NOERR();`
	`1016`	`+NOERRN();`
`1013`	`1017`	`subno=v->nextvalue;`
`1014`	`1018`	`atom->backno=subno;`
`1015`	`1019`	`EMPTYARC(lp,rp);/* temporarily, so there's something */`
`@@ -1050,7 +1054,7 @@ parseqatom(struct vars *v,`
`1050`	`1054`	`if (m>n)`
`1051`	`1055`	`{`
`1052`	`1056`	`ERR(REG_BADBR);`
`1053`		`-return;`
	`1057`	`+returntop;`
`1054`	`1058`	`}`
`1055`	`1059`	`/* {m,n} exercises preference, even if it's {m,m} */`
`1056`	`1060`	`qprefer= (v->nextvalue) ?LONGER :SHORTER;`
`@@ -1064,7 +1068,7 @@ parseqatom(struct vars *v,`
`1064`	`1068`	`if (!SEE('}'))`
`1065`	`1069`	`{/* catches errors too */`
`1066`	`1070`	`ERR(REG_BADBR);`
`1067`		`-return;`
	`1071`	`+returntop;`
`1068`	`1072`	`}`
`1069`	`1073`	`NEXT();`
`1070`	`1074`	`break;`
`@@ -1083,7 +1087,7 @@ parseqatom(struct vars *v,`
`1083`	`1087`	`v->subs[subno]=NULL;`
`1084`	`1088`	`delsub(v->nfa,lp,rp);`
`1085`	`1089`	`EMPTYARC(lp,rp);`
`1086`		`-return;`
	`1090`	`+returntop;`
`1087`	`1091`	`}`
`1088`	`1092`
`1089`	`1093`	`/* if not a messy case, avoid hard part */`
`@@ -1096,7 +1100,7 @@ parseqatom(struct vars *v,`
`1096`	`1100`	`if (atom!=NULL)`
`1097`	`1101`	`freesubre(v,atom);`
`1098`	`1102`	`top->flags=f;`
`1099`		`-return;`
	`1103`	`+returntop;`
`1100`	`1104`	`}`
`1101`	`1105`
`1102`	`1106`	`/*`
`@@ -1110,7 +1114,7 @@ parseqatom(struct vars *v,`
`1110`	`1114`	`if (atom==NULL)`
`1111`	`1115`	`{`
`1112`	`1116`	`atom=subre(v,'=',0,lp,rp);`
`1113`		`-NOERR();`
	`1117`	`+NOERRN();`
`1114`	`1118`	`}`
`1115`	`1119`
`1116`	`1120`	`/*----------`
`@@ -1131,20 +1135,20 @@ parseqatom(struct vars *v,`
`1131`	`1135`	`*/`
`1132`	`1136`	`s=newstate(v->nfa);/* first, new endpoints for the atom */`
`1133`	`1137`	`s2=newstate(v->nfa);`
`1134`		`-NOERR();`
	`1138`	`+NOERRN();`
`1135`	`1139`	`moveouts(v->nfa,lp,s);`
`1136`	`1140`	`moveins(v->nfa,rp,s2);`
`1137`		`-NOERR();`
	`1141`	`+NOERRN();`
`1138`	`1142`	`atom->begin=s;`
`1139`	`1143`	`atom->end=s2;`
`1140`	`1144`	`s=newstate(v->nfa);/* set up starting state */`
`1141`		`-NOERR();`
	`1145`	`+NOERRN();`
`1142`	`1146`	`EMPTYARC(lp,s);`
`1143`		`-NOERR();`
	`1147`	`+NOERRN();`
`1144`	`1148`
`1145`	`1149`	`/* break remaining subRE into x{...} and what follows */`
`1146`	`1150`	`t=subre(v,'.',COMBINE(qprefer,atom->flags),lp,rp);`
`1147`		`-NOERR();`
	`1151`	`+NOERRN();`
`1148`	`1152`	`t->child=atom;`
`1149`	`1153`	`atomp=&t->child;`
`1150`	`1154`
`@@ -1163,7 +1167,7 @@ parseqatom(struct vars *v,`
`1163`	`1167`	`*/`
`1164`	`1168`	`assert(top->op=='='&&top->child==NULL);`
`1165`	`1169`	`top->child=subre(v,'=',top->flags,top->begin,lp);`
`1166`		`-NOERR();`
	`1170`	`+NOERRN();`
`1167`	`1171`	`top->op='.';`
`1168`	`1172`	`top->child->sibling=t;`
`1169`	`1173`	`/* top->flags will get updated later */`
`@@ -1182,11 +1186,11 @@ parseqatom(struct vars *v,`
`1182`	`1186`	`*/`
`1183`	`1187`	`dupnfa(v->nfa,v->subs[subno]->begin,v->subs[subno]->end,`
`1184`	`1188`	`atom->begin,atom->end);`
`1185`		`-NOERR();`
	`1189`	`+NOERRN();`
`1186`	`1190`
`1187`	`1191`	`/* The backref node's NFA should not enforce any constraints */`
`1188`	`1192`	`removeconstraints(v->nfa,atom->begin,atom->end);`
`1189`		`-NOERR();`
	`1193`	`+NOERRN();`
`1190`	`1194`	`}`
`1191`	`1195`
`1192`	`1196`	`/*`
`@@ -1226,7 +1230,7 @@ parseqatom(struct vars *v,`
`1226`	`1230`	`repeat(v,atom->begin,atom->end,m,n);`
`1227`	`1231`	`f=COMBINE(qprefer,atom->flags);`
`1228`	`1232`	`t=subre(v,'=',f,atom->begin,atom->end);`
`1229`		`-NOERR();`
	`1233`	`+NOERRN();`
`1230`	`1234`	`freesubre(v,atom);`
`1231`	`1235`	`*atomp=t;`
`1232`	`1236`	`/* rest of branch can be strung starting from t->end */`
`@@ -1247,9 +1251,9 @@ parseqatom(struct vars *v,`
`1247`	`1251`	`repeat(v,s,atom->begin,m-1, (n==DUPINF) ?n :n-1);`
`1248`	`1252`	`f=COMBINE(qprefer,atom->flags);`
`1249`	`1253`	`t=subre(v,'.',f,s,atom->end);/* prefix and atom */`
`1250`		`-NOERR();`
	`1254`	`+NOERRN();`
`1251`	`1255`	`t->child=subre(v,'=',PREF(f),s,atom->begin);`
`1252`		`-NOERR();`
	`1256`	`+NOERRN();`
`1253`	`1257`	`t->child->sibling=atom;`
`1254`	`1258`	`*atomp=t;`
`1255`	`1259`	`/* rest of branch can be strung starting from atom->end */`
`@@ -1259,14 +1263,14 @@ parseqatom(struct vars *v,`
`1259`	`1263`	`{`
`1260`	`1264`	`/* general case: need an iteration node */`
`1261`	`1265`	`s2=newstate(v->nfa);`
`1262`		`-NOERR();`
	`1266`	`+NOERRN();`
`1263`	`1267`	`moveouts(v->nfa,atom->end,s2);`
`1264`		`-NOERR();`
	`1268`	`+NOERRN();`
`1265`	`1269`	`dupnfa(v->nfa,atom->begin,atom->end,s,s2);`
`1266`	`1270`	`repeat(v,s,s2,m,n);`
`1267`	`1271`	`f=COMBINE(qprefer,atom->flags);`
`1268`	`1272`	`t=subre(v,'*',f,s,s2);`
`1269`		`-NOERR();`
	`1273`	`+NOERRN();`
`1270`	`1274`	`t->min= (short)m;`
`1271`	`1275`	`t->max= (short)n;`
`1272`	`1276`	`t->child=atom;`
`@@ -1280,7 +1284,7 @@ parseqatom(struct vars *v,`
`1280`	`1284`	`{`
`1281`	`1285`	`/* parse all the rest of the branch, and insert in t->child->sibling */`
`1282`	`1286`	`t->child->sibling=parsebranch(v,stopper,type,s2,rp,1);`
`1283`		`-NOERR();`
	`1287`	`+NOERRN();`
`1284`	`1288`	`assert(SEE('\|')\|\|SEE(stopper)\|\|SEE(EOS));`
`1285`	`1289`
`1286`	`1290`	`/* here's the promised update of the flags */`
`@@ -1299,9 +1303,7 @@ parseqatom(struct vars *v,`
`1299`	`1303`	`*`
`1300`	`1304`	`* If the messy atom was the first thing in the branch, then`
`1301`	`1305`	`* top->child is vacuous and we can get rid of one level of`
`1302`		`- * concatenation. Since the caller is holding a pointer to the top`
`1303`		`- * node, we can't remove that node; but we're allowed to change its`
`1304`		`- * properties.`
	`1306`	`+ * concatenation.`
`1305`	`1307`	`*/`
`1306`	`1308`	`assert(top->child->op=='=');`
`1307`	`1309`	`if (top->child->begin==top->child->end)`
`@@ -1351,21 +1353,13 @@ parseqatom(struct vars *v,`
`1351`	`1353`	`{`
`1352`	`1354`	`assert(!MESSY(top->child->flags));`
`1353`	`1355`	`t=top->child->sibling;`
`1354`		`-freesubre(v,top->child);`
`1355`		`-top->op=t->op;`
`1356`		`-top->flags=t->flags;`
`1357`		`-top->latype=t->latype;`
`1358`		`-top->id=t->id;`
`1359`		`-top->capno=t->capno;`
`1360`		`-top->backno=t->backno;`
`1361`		`-top->min=t->min;`
`1362`		`-top->max=t->max;`
`1363`		`-top->child=t->child;`
`1364`		`-top->begin=t->begin;`
`1365`		`-top->end=t->end;`
`1366`		`-freesrnode(v,t);`
	`1356`	`+top->child->sibling=NULL;`
	`1357`	`+freesubre(v,top);`
	`1358`	`+top=t;`
`1367`	`1359`	`}`
`1368`	`1360`	`}`
	`1361`	`+`
	`1362`	`+returntop;`
`1369`	`1363`	`}`
`1370`	`1364`
`1371`	`1365`	`/*`
`@@ -2109,7 +2103,9 @@ freesrnode(struct vars v,/ might be NULL */`
`2109`	`2103`
`2110`	`2104`	`if (!NULLCNFA(sr->cnfa))`
`2111`	`2105`	`freecnfa(&sr->cnfa);`
`2112`		`-sr->flags=0;`
	`2106`	`+sr->flags=0;/* in particular, not INUSE */`
	`2107`	`+sr->child=sr->sibling=NULL;`
	`2108`	`+sr->begin=sr->end=NULL;`
`2113`	`2109`
`2114`	`2110`	`if (v!=NULL&&v->treechain!=NULL)`
`2115`	`2111`	`{`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitf42ea83

File tree

3 files changed

3 files changed

`‎src/backend/regex/regcomp.c‎`

0 commit comments