Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit65dc30c

Browse files
committed
Fix regexp misbehavior with capturing parens inside "{0}".
Regexps like "(.){0}...\1" drew an "invalid backreference number".That's not unreasonable on its face, since the capture group willnever be matched if it's iterated zero times. However, other enginessuch as Perl's don't complain about this, nor do we throw an error forrelated cases such as "(.)|\1", even though that backref can neversucceed either. Also, if the zero-iterations case happens at runtimerather than compile time --- say, "(x)*...\1" when there's no "x" tobe found --- that's not an error, we just deem the backref to notmatch. Making this even less defensible, no error was thrown fornested cases such as "((.)){0}...\2"; and to add insult to injury,those cases could result in assertion failures instead. (It seemsthat nothing especially bad happened in non-assert builds, though.)Let's just fix it so that no error is thrown and instead the backrefis deemed to never match, so that compile-time detection of noiterations behaves the same as run-time detection.Per report from Mark Dilger. This appears to be an aboriginal errorin Spencer's library, so back-patch to all supported versions.Pre-v14, it turns out to also be necessary to back-patch one aspect ofcommitscb76fbd/00116dee5, namely to create capture-node subREs withthe begin/end states of their subexpressions, not the current lp/rpof the outer parseqatom invocation. Otherwise delsub complains thatwe're trying to disconnect a state from itself. This is a bit scarybut code examination shows that it's safe: in the pre-v14 code, if wewant to wrap iteration around the subexpression, the first thing we dois overwrite the atom's begin/end fields with new states. So thebogus values didn't survive long enough to be used for anything, exceptif no iteration is required, in which case it doesn't matter.Discussion:https://postgr.es/m/A099E4A8-4377-4C64-A98C-3DEDDC075502@enterprisedb.com
1 parent1046a69 commit65dc30c

File tree

5 files changed

+69
-5
lines changed

5 files changed

+69
-5
lines changed

‎src/backend/regex/regcomp.c

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,11 +1089,23 @@ parseqatom(struct vars *v,
10891089
/* annoying special case: {0} or {0,0} cancels everything */
10901090
if (m==0&&n==0)
10911091
{
1092-
if (atom!=NULL)
1093-
freesubre(v,atom);
1094-
if (atomtype=='(')
1095-
v->subs[subno]=NULL;
1096-
delsub(v->nfa,lp,rp);
1092+
/*
1093+
* If we had capturing subexpression(s) within the atom, we don't want
1094+
* to destroy them, because it's legal (if useless) to back-ref them
1095+
* later. Hence, just unlink the atom from lp/rp and then ignore it.
1096+
*/
1097+
if (atom!=NULL&& (atom->flags&CAP))
1098+
{
1099+
delsub(v->nfa,lp,atom->begin);
1100+
delsub(v->nfa,atom->end,rp);
1101+
}
1102+
else
1103+
{
1104+
/* Otherwise, we can clean up any subre infrastructure we made */
1105+
if (atom!=NULL)
1106+
freesubre(v,atom);
1107+
delsub(v->nfa,lp,rp);
1108+
}
10971109
EMPTYARC(lp,rp);
10981110
returntop;
10991111
}

‎src/test/modules/test_regex/expected/test_regex.out

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3576,6 +3576,28 @@ select * from test_regex('((.))(\2)', 'xyy', 'oRP');
35763576
{yy,NULL,NULL,NULL}
35773577
(2 rows)
35783578

3579+
-- expectNomatch21.39 PQR{(.){0}(\1)}xxx
3580+
select * from test_regex('(.){0}(\1)', 'xxx', 'PQR');
3581+
test_regex
3582+
--------------------------------------------
3583+
{2,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX}
3584+
(1 row)
3585+
3586+
-- expectNomatch21.40 PQR{((.)){0}(\2)}xxx
3587+
select * from test_regex('((.)){0}(\2)', 'xxx', 'PQR');
3588+
test_regex
3589+
--------------------------------------------
3590+
{3,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX}
3591+
(1 row)
3592+
3593+
-- expectMatch21.41 NPQR{((.)){0}(\2){0}}xyz{}{}{}{}
3594+
select * from test_regex('((.)){0}(\2){0}', 'xyz', 'NPQR');
3595+
test_regex
3596+
------------------------------------------------------------
3597+
{3,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX,REG_UEMPTYMATCH}
3598+
{"",NULL,NULL,NULL}
3599+
(2 rows)
3600+
35793601
-- doing 22 "multicharacter collating elements"
35803602
-- # again ugh
35813603
-- MCCEs are not implemented in Postgres, so we skip all these tests

‎src/test/modules/test_regex/sql/test_regex.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,12 @@ select * from test_regex('((.))(\2){0}', 'xy', 'RPQ');
10361036
select*from test_regex('((.))(\2)','xyy','RP');
10371037
-- expectMatch21.38 oRP((.))(\2)xyyyy{}{}{}
10381038
select*from test_regex('((.))(\2)','xyy','oRP');
1039+
-- expectNomatch21.39 PQR{(.){0}(\1)}xxx
1040+
select*from test_regex('(.){0}(\1)','xxx','PQR');
1041+
-- expectNomatch21.40 PQR{((.)){0}(\2)}xxx
1042+
select*from test_regex('((.)){0}(\2)','xxx','PQR');
1043+
-- expectMatch21.41 NPQR{((.)){0}(\2){0}}xyz{}{}{}{}
1044+
select*from test_regex('((.)){0}(\2){0}','xyz','NPQR');
10391045

10401046
-- doing 22 "multicharacter collating elements"
10411047
-- # again ugh

‎src/test/regress/expected/regex.out

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,25 @@ select 'a' ~ '()+\1';
567567
t
568568
(1 row)
569569

570+
-- Test incorrect removal of capture groups within {0}
571+
select 'xxx' ~ '(.){0}(\1)' as f;
572+
f
573+
---
574+
f
575+
(1 row)
576+
577+
select 'xxx' ~ '((.)){0}(\2)' as f;
578+
f
579+
---
580+
f
581+
(1 row)
582+
583+
select 'xyz' ~ '((.)){0}(\2){0}' as t;
584+
t
585+
---
586+
t
587+
(1 row)
588+
570589
-- Test ancient oversight in when to apply zaptreesubs
571590
select 'abcdef' ~ '^(.)\1|\1.' as f;
572591
f

‎src/test/regress/sql/regex.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ select 'a' ~ '.. ()|\1';
135135
select'a' ~'()*\1';
136136
select'a' ~'()+\1';
137137

138+
-- Test incorrect removal of capture groups within {0}
139+
select'xxx' ~'(.){0}(\1)'as f;
140+
select'xxx' ~'((.)){0}(\2)'as f;
141+
select'xyz' ~'((.)){0}(\2){0}'as t;
142+
138143
-- Test ancient oversight in when to apply zaptreesubs
139144
select'abcdef' ~'^(.)\1|\1.'as f;
140145
select'abadef' ~'^((.)\2|..)\2'as f;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp