Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9bbf6f7

Browse files
committed
Prevent regexp back-refs from sometimes matching when they shouldn't.
The recursion in cdissect() was careless about clearing match datafor capturing parentheses after rejecting a partial match. Thiscould allow a later back-reference to succeed when by rights itshould fail for lack of a defined referent.To fix, think a little more rigorously about what the contractbetween different levels of cdissect's recursion needs to be.With the right spec, we can fix this using fewer rather than moreresets of the match data; the key decision being that a failedsub-match is now explicitly responsible for clearing any matchesit may have set.There are enough other cross-checks and optimizations in the codethat it's not especially easy to exhibit this problem; usually, thematch will fail as-expected. Plus, regexps that are even potentiallyvulnerable are most likely user errors, since there's just not muchpoint in writing a back-ref that doesn't always have a referent.These facts perhaps explain why the issue hasn't been detected,even though it's almost certainly a couple of decades old.Discussion:https://postgr.es/m/151435.1629733387@sss.pgh.pa.us
1 parent515e3d8 commit9bbf6f7

File tree

5 files changed

+71
-12
lines changed

5 files changed

+71
-12
lines changed

‎src/backend/regex/regexec.c

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -216,14 +216,14 @@ pg_regexec(regex_t *re,
216216
if (backref&&nmatch <=v->g->nsub)
217217
{
218218
/* need larger work area */
219-
if (v->g->nsub+1 <=LOCALMAT)
219+
v->nmatch=v->g->nsub+1;
220+
if (v->nmatch <=LOCALMAT)
220221
v->pmatch=mat;
221222
else
222-
v->pmatch= (regmatch_t*)MALLOC((v->g->nsub+1)*
223-
sizeof(regmatch_t));
223+
v->pmatch= (regmatch_t*)MALLOC(v->nmatch*sizeof(regmatch_t));
224224
if (v->pmatch==NULL)
225225
returnREG_ESPACE;
226-
v->nmatch=v->g->nsub+1;
226+
zapallsubs(v->pmatch,v->nmatch);
227227
}
228228
else
229229
{
@@ -488,7 +488,6 @@ find(struct vars *v,
488488
returnREG_OKAY;
489489

490490
/* find submatches */
491-
zapallsubs(v->pmatch,v->nmatch);
492491
returncdissect(v,v->g->tree,begin,end);
493492
}
494493

@@ -599,7 +598,6 @@ cfindloop(struct vars *v,
599598
break;/* no match with this begin point, try next */
600599
MDEBUG(("tentative end %ld\n",LOFF(end)));
601600
/* Dissect the potential match to see if it really matches */
602-
zapallsubs(v->pmatch,v->nmatch);
603601
er=cdissect(v,v->g->tree,begin,end);
604602
if (er==REG_OKAY)
605603
{
@@ -647,6 +645,8 @@ cfindloop(struct vars *v,
647645

648646
/*
649647
* zapallsubs - initialize all subexpression matches to "no match"
648+
*
649+
* Note that p[0], the overall-match location, is not touched.
650650
*/
651651
staticvoid
652652
zapallsubs(regmatch_t*p,
@@ -716,8 +716,30 @@ subset(struct vars *v,
716716
* DFA and found that the proposed substring satisfies the DFA. (We make
717717
* the caller do that because in concatenation and iteration nodes, it's
718718
* much faster to check all the substrings against the child DFAs before we
719-
* recurse.) Also, caller must have cleared subexpression match data via
720-
* zaptreesubs (or zapallsubs at the top level).
719+
* recurse.)
720+
*
721+
* A side-effect of a successful match is to save match locations for
722+
* capturing subexpressions in v->pmatch[]. This is a little bit tricky,
723+
* so we make the following rules:
724+
* 1. Before initial entry to cdissect, all match data must have been
725+
* cleared (this is seen to by zapallsubs).
726+
* 2. Before any recursive entry to cdissect, the match data for that
727+
* subexpression tree must be guaranteed clear (see zaptreesubs).
728+
* 3. When returning REG_OKAY, each level of cdissect will have saved
729+
* any relevant match locations.
730+
* 4. When returning REG_NOMATCH, each level of cdissect will guarantee
731+
* that its subexpression match locations are again clear.
732+
* 5. No guarantees are made for error cases (i.e., other result codes).
733+
* 6. When a level of cdissect abandons a successful sub-match, it will
734+
* clear that subtree's match locations with zaptreesubs before trying
735+
* any new DFA match or cdissect call for that subtree or any subtree
736+
* to its right (that is, any subtree that could have a backref into the
737+
* abandoned match).
738+
* This may seem overly complicated, but it's difficult to simplify it
739+
* because of the provision that match locations must be reset before
740+
* any fresh DFA match (a rule that is needed to make dfa_backref safe).
741+
* That means it won't work to just reset relevant match locations at the
742+
* start of each cdissect level.
721743
*/
722744
staticint/* regexec return code */
723745
cdissect(structvars*v,
@@ -841,6 +863,8 @@ ccondissect(struct vars *v,
841863
MDEBUG(("%d: successful\n",t->id));
842864
returnREG_OKAY;
843865
}
866+
/* Reset left's matches (right should have done so itself) */
867+
zaptreesubs(v,left);
844868
}
845869
if (er!=REG_NOMATCH)
846870
returner;
@@ -863,8 +887,6 @@ ccondissect(struct vars *v,
863887
returnREG_NOMATCH;
864888
}
865889
MDEBUG(("%d: new midpoint %ld\n",t->id,LOFF(mid)));
866-
zaptreesubs(v,left);
867-
zaptreesubs(v,right);
868890
}
869891

870892
/* can't get here */
@@ -922,6 +944,8 @@ crevcondissect(struct vars *v,
922944
MDEBUG(("%d: successful\n",t->id));
923945
returnREG_OKAY;
924946
}
947+
/* Reset left's matches (right should have done so itself) */
948+
zaptreesubs(v,left);
925949
}
926950
if (er!=REG_NOMATCH)
927951
returner;
@@ -944,8 +968,6 @@ crevcondissect(struct vars *v,
944968
returnREG_NOMATCH;
945969
}
946970
MDEBUG(("%d: new midpoint %ld\n",t->id,LOFF(mid)));
947-
zaptreesubs(v,left);
948-
zaptreesubs(v,right);
949971
}
950972

951973
/* can't get here */
@@ -1214,6 +1236,7 @@ citerdissect(struct vars *v,
12141236

12151237
for (i=nverified+1;i <=k;i++)
12161238
{
1239+
/* zap any match data from a non-last iteration */
12171240
zaptreesubs(v,t->child);
12181241
er=cdissect(v,t->child,endpts[i-1],endpts[i]);
12191242
if (er==REG_OKAY)
@@ -1426,6 +1449,7 @@ creviterdissect(struct vars *v,
14261449

14271450
for (i=nverified+1;i <=k;i++)
14281451
{
1452+
/* zap any match data from a non-last iteration */
14291453
zaptreesubs(v,t->child);
14301454
er=cdissect(v,t->child,endpts[i-1],endpts[i]);
14311455
if (er==REG_OKAY)

‎src/test/modules/test_regex/expected/test_regex.out

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2664,6 +2664,20 @@ select * from test_regex('^(.+)( \1)+$', 'abc abc abd', 'RP');
26642664
{2,REG_UBACKREF,REG_UNONPOSIX}
26652665
(1 row)
26662666

2667+
-- expectNomatch14.30 RP{^(.)\1|\1.}{abcdef}
2668+
select * from test_regex('^(.)\1|\1.', 'abcdef', 'RP');
2669+
test_regex
2670+
--------------------------------
2671+
{1,REG_UBACKREF,REG_UNONPOSIX}
2672+
(1 row)
2673+
2674+
-- expectNomatch14.31 RP{^((.)\2|..)\2}{abadef}
2675+
select * from test_regex('^((.)\2|..)\2', 'abadef', 'RP');
2676+
test_regex
2677+
--------------------------------
2678+
{2,REG_UBACKREF,REG_UNONPOSIX}
2679+
(1 row)
2680+
26672681
-- back reference only matches the string, not any constraints
26682682
select * from test_regex('(^\w+).*\1', 'abc abc abc', 'LRP');
26692683
test_regex

‎src/test/modules/test_regex/sql/test_regex.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,10 @@ select * from test_regex('^(.+)( \1)+$', 'abc abc abc', 'RP');
775775
select*from test_regex('^(.+)(\1)+$','abc abd abc','RP');
776776
-- expectNomatch14.29 RP{^(.+)( \1)+$}{abc abc abd}
777777
select*from test_regex('^(.+)(\1)+$','abc abc abd','RP');
778+
-- expectNomatch14.30 RP{^(.)\1|\1.}{abcdef}
779+
select*from test_regex('^(.)\1|\1.','abcdef','RP');
780+
-- expectNomatch14.31 RP{^((.)\2|..)\2}{abadef}
781+
select*from test_regex('^((.)\2|..)\2','abadef','RP');
778782

779783
-- back reference only matches the string, not any constraints
780784
select*from test_regex('(^\w+).*\1','abc abc abc','LRP');

‎src/test/regress/expected/regex.out

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,19 @@ select 'a' ~ '()+\1';
567567
t
568568
(1 row)
569569

570+
-- Test ancient oversight in when to apply zaptreesubs
571+
select 'abcdef' ~ '^(.)\1|\1.' as f;
572+
f
573+
---
574+
f
575+
(1 row)
576+
577+
select 'abadef' ~ '^((.)\2|..)\2' as f;
578+
f
579+
---
580+
f
581+
(1 row)
582+
570583
-- Add coverage for some cases in checkmatchall
571584
select regexp_match('xy', '.|...');
572585
regexp_match

‎src/test/regress/sql/regex.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ select 'a' ~ '.. ()|\1';
135135
select'a' ~'()*\1';
136136
select'a' ~'()+\1';
137137

138+
-- Test ancient oversight in when to apply zaptreesubs
139+
select'abcdef' ~'^(.)\1|\1.'as f;
140+
select'abadef' ~'^((.)\2|..)\2'as f;
141+
138142
-- Add coverage for some cases in checkmatchall
139143
select regexp_match('xy','.|...');
140144
select regexp_match('xyz','.|...');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp