Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit51f2359

Browse files
committed
Fix potential infinite loop in regular expression execution.
In cfindloop(), if the initial call to shortest() reports that azero-length match is possible at the current search start point, but thenit is unable to construct any actual match to that, it'll just loop aroundwith the same start point, and thus make no progress. We need to force thestart point to be advanced. This is safe because the loop over "begin"points has already tried and failed to match starting at "close", so thereis surely no need to try that again.This bug was introduced in commite2bd904,wherein we allowed continued searching after we'd run out of matchpossibilities, but evidently failed to think hard enough about exactlywhere we needed to search next.Because of the way this code works, such a match failure is only possiblein the presence of backrefs --- otherwise, shortest()'s judgment that amatch is possible should always be correct. That probably explains howcome the bug has escaped detection for several years.The actual fix is a one-liner, but I took the trouble to add/improve somecomments related to the loop logic.After fixing that, the submitted test case "()*\1" didn't loop anymore.But it reported failure, though it seems like it ought to match azero-length string; both Tcl and Perl think it does. That seems to be fromoverenthusiastic optimization on my part when I rewrote the iteration matchlogic in commit173e29a: we can't just"declare victory" for a zero-length match without bothering to set matchdata for capturing parens inside the iterator node.Per fuzz testing by Greg Stark. The first part of this is a bug in allsupported branches, and the second part is a bug since 9.2 where theiteration rewrite happened.
1 parentbb704a7 commit51f2359

File tree

3 files changed

+68
-13
lines changed

3 files changed

+68
-13
lines changed

‎src/backend/regex/regexec.c

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,7 @@ cfindloop(struct vars * v,
448448
close=v->search_start;
449449
do
450450
{
451+
/* Search with the search RE for match range at/beyond "close" */
451452
MDEBUG(("\ncsearch at %ld\n",LOFF(close)));
452453
close=shortest(v,s,close,close,v->stop,&cold, (int*)NULL);
453454
if (ISERR())
@@ -456,10 +457,11 @@ cfindloop(struct vars * v,
456457
returnv->err;
457458
}
458459
if (close==NULL)
459-
break;/*NOTE BREAK */
460+
break;/*no more possible match anywhere */
460461
assert(cold!=NULL);
461462
open=cold;
462463
cold=NULL;
464+
/* Search for matches starting between "open" and "close" inclusive */
463465
MDEBUG(("cbetween %ld and %ld\n",LOFF(open),LOFF(close)));
464466
for (begin=open;begin <=close;begin++)
465467
{
@@ -468,6 +470,7 @@ cfindloop(struct vars * v,
468470
estop=v->stop;
469471
for (;;)
470472
{
473+
/* Here we use the top node's detailed RE */
471474
if (shorter)
472475
end=shortest(v,d,begin,estart,
473476
estop, (chr**)NULL,&hitend);
@@ -482,8 +485,9 @@ cfindloop(struct vars * v,
482485
if (hitend&&cold==NULL)
483486
cold=begin;
484487
if (end==NULL)
485-
break;/*NOTE BREAK OUT */
488+
break;/*no match with this begin point, try next */
486489
MDEBUG(("tentative end %ld\n",LOFF(end)));
490+
/* Dissect the potential match to see if it really matches */
487491
zapallsubs(v->pmatch,v->nmatch);
488492
er=cdissect(v,v->g->tree,begin,end);
489493
if (er==REG_OKAY)
@@ -502,21 +506,28 @@ cfindloop(struct vars * v,
502506
*coldp=cold;
503507
returner;
504508
}
505-
/*try nextshorter/longer match with same begin point */
509+
/*Try next longer/shorter match with same begin point */
506510
if (shorter)
507511
{
508512
if (end==estop)
509-
break;/*NOTE BREAK OUT */
513+
break;/*no more, so try next begin point */
510514
estart=end+1;
511515
}
512516
else
513517
{
514518
if (end==begin)
515-
break;/*NOTE BREAK OUT */
519+
break;/*no more, so try next begin point */
516520
estop=end-1;
517521
}
518522
}/* end loop over endpoint positions */
519523
}/* end loop over beginning positions */
524+
525+
/*
526+
* If we get here, there is no possible match starting at or before
527+
* "close", so consider matches beyond that. We'll do a fresh search
528+
* with the search RE to find a new promising match range.
529+
*/
530+
close++;
520531
}while (close<v->stop);
521532

522533
*coldp=cold;
@@ -963,17 +974,17 @@ citerdissect(struct vars * v,
963974
assert(begin <=end);
964975

965976
/*
966-
* If zero matches are allowed, and target string is empty, just declare
967-
* victory. OTOH, if target string isn't empty, zero matches can't work
968-
* so we pretend the min is 1.
977+
* For the moment, assume the minimum number of matches is 1. If zero
978+
* matches are allowed, and the target string is empty, we are allowed to
979+
* match regardless of the contents of the iter node --- but we would
980+
* prefer to match once, so that capturing parens get set. (An example of
981+
* the concern here is a pattern like "()*\1", which historically this
982+
* code has allowed to succeed.) Therefore, we deal with the zero-matches
983+
* case at the bottom, after failing to find any other way to match.
969984
*/
970985
min_matches=t->min;
971986
if (min_matches <=0)
972-
{
973-
if (begin==end)
974-
returnREG_OKAY;
975987
min_matches=1;
976-
}
977988

978989
/*
979990
* We need workspace to track the endpoints of each sub-match. Normally
@@ -1123,8 +1134,19 @@ citerdissect(struct vars * v,
11231134
}
11241135

11251136
/* all possibilities exhausted */
1126-
MDEBUG(("%d failed\n",t->id));
11271137
FREE(endpts);
1138+
1139+
/*
1140+
* Now consider the possibility that we can match to a zero-length string
1141+
* by using zero repetitions.
1142+
*/
1143+
if (t->min==0&&begin==end)
1144+
{
1145+
MDEBUG(("%d allowing zero matches\n",t->id));
1146+
returnREG_OKAY;
1147+
}
1148+
1149+
MDEBUG(("%d failed\n",t->id));
11281150
returnREG_NOMATCH;
11291151
}
11301152

‎src/test/regress/expected/regex.out

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,3 +196,29 @@ select regexp_matches('foo/bar/baz',
196196
{foo,bar,baz}
197197
(1 row)
198198

199+
-- Test for infinite loop in cfindloop with zero-length possible match
200+
-- but no actual match (can only happen in the presence of backrefs)
201+
select 'a' ~ '$()|^\1';
202+
?column?
203+
----------
204+
f
205+
(1 row)
206+
207+
select 'a' ~ '.. ()|\1';
208+
?column?
209+
----------
210+
f
211+
(1 row)
212+
213+
select 'a' ~ '()*\1';
214+
?column?
215+
----------
216+
t
217+
(1 row)
218+
219+
select 'a' ~ '()+\1';
220+
?column?
221+
----------
222+
t
223+
(1 row)
224+

‎src/test/regress/sql/regex.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,10 @@ select regexp_matches('Programmer', '(\w)(.*?\1)', 'g');
5050
-- Test for proper matching of non-greedy iteration (bug #11478)
5151
select regexp_matches('foo/bar/baz',
5252
'^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$','');
53+
54+
-- Test for infinite loop in cfindloop with zero-length possible match
55+
-- but no actual match (can only happen in the presence of backrefs)
56+
select'a' ~'$()|^\1';
57+
select'a' ~'.. ()|\1';
58+
select'a' ~'()*\1';
59+
select'a' ~'()+\1';

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp