Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9a32717

Browse files
committed
Prevent regexp back-refs from sometimes matching when they shouldn't.
The recursion in cdissect() was careless about clearing match datafor capturing parentheses after rejecting a partial match. Thiscould allow a later back-reference to succeed when by rights itshould fail for lack of a defined referent.To fix, think a little more rigorously about what the contractbetween different levels of cdissect's recursion needs to be.With the right spec, we can fix this using fewer rather than moreresets of the match data; the key decision being that a failedsub-match is now explicitly responsible for clearing any matchesit may have set.There are enough other cross-checks and optimizations in the codethat it's not especially easy to exhibit this problem; usually, thematch will fail as-expected. Plus, regexps that are even potentiallyvulnerable are most likely user errors, since there's just not muchpoint in writing a back-ref that doesn't always have a referent.These facts perhaps explain why the issue hasn't been detected,even though it's almost certainly a couple of decades old.Discussion:https://postgr.es/m/151435.1629733387@sss.pgh.pa.us
1 parentad12311 commit9a32717

File tree

3 files changed

+51
-8
lines changed

3 files changed

+51
-8
lines changed

‎src/backend/regex/regexec.c

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ pg_regexec(regex_t *re,
226226
}
227227
else
228228
v->pmatch=pmatch;
229+
if (v->nmatch>0)
230+
zapallsubs(v->pmatch,v->nmatch);
229231
v->details=details;
230232
v->start= (chr*)string;
231233
v->search_start= (chr*)string+search_start;
@@ -455,7 +457,6 @@ find(struct vars *v,
455457
returnREG_OKAY;
456458

457459
/* find submatches */
458-
zapallsubs(v->pmatch,v->nmatch);
459460
returncdissect(v,v->g->tree,begin,end);
460461
}
461462

@@ -566,7 +567,6 @@ cfindloop(struct vars *v,
566567
break;/* no match with this begin point, try next */
567568
MDEBUG(("tentative end %ld\n",LOFF(end)));
568569
/* Dissect the potential match to see if it really matches */
569-
zapallsubs(v->pmatch,v->nmatch);
570570
er=cdissect(v,v->g->tree,begin,end);
571571
if (er==REG_OKAY)
572572
{
@@ -614,6 +614,8 @@ cfindloop(struct vars *v,
614614

615615
/*
616616
* zapallsubs - initialize all subexpression matches to "no match"
617+
*
618+
* Note that p[0], the overall-match location, is not touched.
617619
*/
618620
staticvoid
619621
zapallsubs(regmatch_t*p,
@@ -685,8 +687,30 @@ subset(struct vars *v,
685687
* DFA and found that the proposed substring satisfies the DFA. (We make
686688
* the caller do that because in concatenation and iteration nodes, it's
687689
* much faster to check all the substrings against the child DFAs before we
688-
* recurse.) Also, caller must have cleared subexpression match data via
689-
* zaptreesubs (or zapallsubs at the top level).
690+
* recurse.)
691+
*
692+
* A side-effect of a successful match is to save match locations for
693+
* capturing subexpressions in v->pmatch[]. This is a little bit tricky,
694+
* so we make the following rules:
695+
* 1. Before initial entry to cdissect, all match data must have been
696+
* cleared (this is seen to by zapallsubs).
697+
* 2. Before any recursive entry to cdissect, the match data for that
698+
* subexpression tree must be guaranteed clear (see zaptreesubs).
699+
* 3. When returning REG_OKAY, each level of cdissect will have saved
700+
* any relevant match locations.
701+
* 4. When returning REG_NOMATCH, each level of cdissect will guarantee
702+
* that its subexpression match locations are again clear.
703+
* 5. No guarantees are made for error cases (i.e., other result codes).
704+
* 6. When a level of cdissect abandons a successful sub-match, it will
705+
* clear that subtree's match locations with zaptreesubs before trying
706+
* any new DFA match or cdissect call for that subtree or any subtree
707+
* to its right (that is, any subtree that could have a backref into the
708+
* abandoned match).
709+
* This may seem overly complicated, but it's difficult to simplify it
710+
* because of the provision that match locations must be reset before
711+
* any fresh DFA match (a rule that is needed to make dfa_backref safe).
712+
* That means it won't work to just reset relevant match locations at the
713+
* start of each cdissect level.
690714
*/
691715
staticint/* regexec return code */
692716
cdissect(structvars*v,
@@ -804,6 +828,8 @@ ccondissect(struct vars *v,
804828
MDEBUG(("successful\n"));
805829
returnREG_OKAY;
806830
}
831+
/* Reset left's matches (right should have done so itself) */
832+
zaptreesubs(v,t->left);
807833
}
808834
if (er!=REG_NOMATCH)
809835
returner;
@@ -826,8 +852,6 @@ ccondissect(struct vars *v,
826852
returnREG_NOMATCH;
827853
}
828854
MDEBUG(("%d: new midpoint %ld\n",t->id,LOFF(mid)));
829-
zaptreesubs(v,t->left);
830-
zaptreesubs(v,t->right);
831855
}
832856

833857
/* can't get here */
@@ -882,6 +906,8 @@ crevcondissect(struct vars *v,
882906
MDEBUG(("successful\n"));
883907
returnREG_OKAY;
884908
}
909+
/* Reset left's matches (right should have done so itself) */
910+
zaptreesubs(v,t->left);
885911
}
886912
if (er!=REG_NOMATCH)
887913
returner;
@@ -904,8 +930,6 @@ crevcondissect(struct vars *v,
904930
returnREG_NOMATCH;
905931
}
906932
MDEBUG(("%d: new midpoint %ld\n",t->id,LOFF(mid)));
907-
zaptreesubs(v,t->left);
908-
zaptreesubs(v,t->right);
909933
}
910934

911935
/* can't get here */
@@ -1165,6 +1189,7 @@ citerdissect(struct vars *v,
11651189

11661190
for (i=nverified+1;i <=k;i++)
11671191
{
1192+
/* zap any match data from a non-last iteration */
11681193
zaptreesubs(v,t->left);
11691194
er=cdissect(v,t->left,endpts[i-1],endpts[i]);
11701195
if (er==REG_OKAY)
@@ -1373,6 +1398,7 @@ creviterdissect(struct vars *v,
13731398

13741399
for (i=nverified+1;i <=k;i++)
13751400
{
1401+
/* zap any match data from a non-last iteration */
13761402
zaptreesubs(v,t->left);
13771403
er=cdissect(v,t->left,endpts[i-1],endpts[i]);
13781404
if (er==REG_OKAY)

‎src/test/regress/expected/regex.out

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,19 @@ select 'a' ~ '()+\1';
567567
t
568568
(1 row)
569569

570+
-- Test ancient oversight in when to apply zaptreesubs
571+
select 'abcdef' ~ '^(.)\1|\1.' as f;
572+
f
573+
---
574+
f
575+
(1 row)
576+
577+
select 'abadef' ~ '^((.)\2|..)\2' as f;
578+
f
579+
---
580+
f
581+
(1 row)
582+
570583
-- Error conditions
571584
select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs
572585
ERROR: invalid regular expression: invalid backreference number

‎src/test/regress/sql/regex.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ select 'a' ~ '.. ()|\1';
135135
select'a' ~'()*\1';
136136
select'a' ~'()+\1';
137137

138+
-- Test ancient oversight in when to apply zaptreesubs
139+
select'abcdef' ~'^(.)\1|\1.'as f;
140+
select'abadef' ~'^((.)\2|..)\2'as f;
141+
138142
-- Error conditions
139143
select'xyz' ~'x(\w)(?=\1)';-- no backrefs in LACONs
140144
select'xyz' ~'x(\w)(?=(\1))';

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp