Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit4e703d6

Browse files
committed
Make some minor improvements in the regex code.
Push some hopefully-uncontroversial bits extracted from an upcomingpatch series, to remove non-relevant clutter from the main patches.In compact(), return immediately after setting REG_ASSERT error;continuing the loop would just lead to assertion failure below.(Ask me how I know.)In parseqatom(), remove assertion that moresubs() did its job.When moresubs actually did its job, this is redundant with thatfunction's final assert; but when it failed on OOM, this is anassertion crash. We could avoid the crash by adding a NOERR()check before the assertion, but it seems better to subtract codethan add it. (Note that there's a NOERR exit a few lines furtherdown, and nothing else between here and there requires moresubsto have succeeded. So we don't really need an extra error exit.)This is a live bug in assert-enabled builds, but given the verylow likelihood of OOM in moresub's tiny allocation, I don't thinkit's worth back-patching.On the other hand, it seems worthwhile to add an assertion thatour intended v->subs[subno] target is still null by the time weare ready to insert into it, since there's a recursion in between.In pg_regexec, ensure we fflush any debug output on the way out,and try to make MDEBUG messages more uniform and helpful. (Inparticular, ensure that all of them are prefixed with the subre'sid number, so one can match up entry and exit reports.)Add some test cases in test_regex to improve coverage of lookaheadand lookbehind constraints. Adding these now is mainly to establishthat this is indeed the existing behavior.Discussion:https://postgr.es/m/1340281.1613018383@sss.pgh.pa.us
1 parentf40c696 commit4e703d6

File tree

5 files changed

+99
-27
lines changed

5 files changed

+99
-27
lines changed

‎src/backend/regex/regc_nfa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2902,7 +2902,7 @@ compact(struct nfa *nfa,
29022902
break;
29032903
default:
29042904
NERR(REG_ASSERT);
2905-
break;
2905+
return;
29062906
}
29072907
carcsort(first,ca-first);
29082908
ca->co=COLORLESS;

‎src/backend/regex/regcomp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -942,7 +942,6 @@ parseqatom(struct vars *v,
942942
subno=v->nsubexp;
943943
if ((size_t)subno >=v->nsubs)
944944
moresubs(v,subno);
945-
assert((size_t)subno<v->nsubs);
946945
}
947946
else
948947
atomtype=PLAIN;/* something that's not '(' */
@@ -960,6 +959,7 @@ parseqatom(struct vars *v,
960959
NOERR();
961960
if (cap)
962961
{
962+
assert(v->subs[subno]==NULL);
963963
v->subs[subno]=atom;
964964
t=subre(v,'(',atom->flags |CAP,lp,rp);
965965
NOERR();

‎src/backend/regex/regexec.c

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,11 @@ pg_regexec(regex_t *re,
324324
if (v->lblastcp!=NULL)
325325
FREE(v->lblastcp);
326326

327+
#ifdefREG_DEBUG
328+
if (v->eflags& (REG_FTRACE |REG_MTRACE))
329+
fflush(stdout);
330+
#endif
331+
327332
returnst;
328333
}
329334

@@ -668,7 +673,7 @@ subset(struct vars *v,
668673
if ((size_t)n >=v->nmatch)
669674
return;
670675

671-
MDEBUG(("setting %d\n",n));
676+
MDEBUG(("%d:setting %d = %ld-%ld\n",sub->id,n,LOFF(begin),LOFF(end)));
672677
v->pmatch[n].rm_so=OFF(begin);
673678
v->pmatch[n].rm_eo=OFF(end);
674679
}
@@ -697,7 +702,7 @@ cdissect(struct vars *v,
697702
inter;
698703

699704
assert(t!=NULL);
700-
MDEBUG(("cdissect %ld-%ld %c\n",LOFF(begin),LOFF(end),t->op));
705+
MDEBUG(("%d:cdissect %c %ld-%ld\n",t->id,t->op,LOFF(begin),LOFF(end)));
701706

702707
/* handy place to check for operation cancel */
703708
if (CANCEL_REQUESTED(v->re))
@@ -779,14 +784,14 @@ ccondissect(struct vars *v,
779784
NOERR();
780785
d2=getsubdfa(v,t->right);
781786
NOERR();
782-
MDEBUG(("cconcat %d\n",t->id));
787+
MDEBUG(("%d: ccondissect %ld-%ld\n",t->id,LOFF(begin),LOFF(end)));
783788

784789
/* pick a tentative midpoint */
785790
mid=longest(v,d,begin,end, (int*)NULL);
786791
NOERR();
787792
if (mid==NULL)
788793
returnREG_NOMATCH;
789-
MDEBUG(("tentative midpoint %ld\n",LOFF(mid)));
794+
MDEBUG(("%d:tentative midpoint %ld\n",t->id,LOFF(mid)));
790795

791796
/* iterate until satisfaction or failure */
792797
for (;;)
@@ -801,7 +806,7 @@ ccondissect(struct vars *v,
801806
if (er==REG_OKAY)
802807
{
803808
/* satisfaction */
804-
MDEBUG(("successful\n"));
809+
MDEBUG(("%d:successful\n",t->id));
805810
returnREG_OKAY;
806811
}
807812
}
@@ -814,15 +819,15 @@ ccondissect(struct vars *v,
814819
if (mid==begin)
815820
{
816821
/* all possibilities exhausted */
817-
MDEBUG(("%d no midpoint\n",t->id));
822+
MDEBUG(("%d: no midpoint\n",t->id));
818823
returnREG_NOMATCH;
819824
}
820825
mid=longest(v,d,begin,mid-1, (int*)NULL);
821826
NOERR();
822827
if (mid==NULL)
823828
{
824829
/* failed to find a new one */
825-
MDEBUG(("%d failed midpoint\n",t->id));
830+
MDEBUG(("%d: failed midpoint\n",t->id));
826831
returnREG_NOMATCH;
827832
}
828833
MDEBUG(("%d: new midpoint %ld\n",t->id,LOFF(mid)));
@@ -857,14 +862,14 @@ crevcondissect(struct vars *v,
857862
NOERR();
858863
d2=getsubdfa(v,t->right);
859864
NOERR();
860-
MDEBUG(("crevcon %d\n",t->id));
865+
MDEBUG(("%d: crevcondissect %ld-%ld\n",t->id,LOFF(begin),LOFF(end)));
861866

862867
/* pick a tentative midpoint */
863868
mid=shortest(v,d,begin,begin,end, (chr**)NULL, (int*)NULL);
864869
NOERR();
865870
if (mid==NULL)
866871
returnREG_NOMATCH;
867-
MDEBUG(("tentative midpoint %ld\n",LOFF(mid)));
872+
MDEBUG(("%d:tentative midpoint %ld\n",t->id,LOFF(mid)));
868873

869874
/* iterate until satisfaction or failure */
870875
for (;;)
@@ -879,7 +884,7 @@ crevcondissect(struct vars *v,
879884
if (er==REG_OKAY)
880885
{
881886
/* satisfaction */
882-
MDEBUG(("successful\n"));
887+
MDEBUG(("%d:successful\n",t->id));
883888
returnREG_OKAY;
884889
}
885890
}
@@ -892,15 +897,15 @@ crevcondissect(struct vars *v,
892897
if (mid==end)
893898
{
894899
/* all possibilities exhausted */
895-
MDEBUG(("%d no midpoint\n",t->id));
900+
MDEBUG(("%d: no midpoint\n",t->id));
896901
returnREG_NOMATCH;
897902
}
898903
mid=shortest(v,d,begin,mid+1,end, (chr**)NULL, (int*)NULL);
899904
NOERR();
900905
if (mid==NULL)
901906
{
902907
/* failed to find a new one */
903-
MDEBUG(("%d failed midpoint\n",t->id));
908+
MDEBUG(("%d: failed midpoint\n",t->id));
904909
returnREG_NOMATCH;
905910
}
906911
MDEBUG(("%d: new midpoint %ld\n",t->id,LOFF(mid)));
@@ -935,7 +940,8 @@ cbrdissect(struct vars *v,
935940
assert(n >=0);
936941
assert((size_t)n<v->nmatch);
937942

938-
MDEBUG(("cbackref n%d %d{%d-%d}\n",t->id,n,min,max));
943+
MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n",t->id,n,min,max,
944+
LOFF(begin),LOFF(end)));
939945

940946
/* get the backreferenced string */
941947
if (v->pmatch[n].rm_so==-1)
@@ -952,7 +958,7 @@ cbrdissect(struct vars *v,
952958
*/
953959
if (begin==end&&min <=max)
954960
{
955-
MDEBUG(("cbackrefmatched trivially\n"));
961+
MDEBUG(("%d: backrefmatched trivially\n",t->id));
956962
returnREG_OKAY;
957963
}
958964
returnREG_NOMATCH;
@@ -962,7 +968,7 @@ cbrdissect(struct vars *v,
962968
/* matches only if zero repetitions are okay */
963969
if (min==0)
964970
{
965-
MDEBUG(("cbackrefmatched trivially\n"));
971+
MDEBUG(("%d: backrefmatched trivially\n",t->id));
966972
returnREG_OKAY;
967973
}
968974
returnREG_NOMATCH;
@@ -989,7 +995,7 @@ cbrdissect(struct vars *v,
989995
p+=brlen;
990996
}
991997

992-
MDEBUG(("cbackrefmatched\n"));
998+
MDEBUG(("%d: backrefmatched\n",t->id));
993999
returnREG_OKAY;
9941000
}
9951001

@@ -1011,13 +1017,13 @@ caltdissect(struct vars *v,
10111017
assert(t->op=='|');
10121018
assert(t->left!=NULL&&t->left->cnfa.nstates>0);
10131019

1014-
MDEBUG(("calt n%d\n",t->id));
1020+
MDEBUG(("%d: caltdissect %ld-%ld\n",t->id,LOFF(begin),LOFF(end)));
10151021

10161022
d=getsubdfa(v,t->left);
10171023
NOERR();
10181024
if (longest(v,d,begin,end, (int*)NULL)==end)
10191025
{
1020-
MDEBUG(("caltmatched\n"));
1026+
MDEBUG(("%d: caltdissectmatched\n",t->id));
10211027
er=cdissect(v,t->left,begin,end);
10221028
if (er!=REG_NOMATCH)
10231029
returner;
@@ -1054,6 +1060,8 @@ citerdissect(struct vars *v,
10541060
assert(!(t->left->flags&SHORTER));
10551061
assert(begin <=end);
10561062

1063+
MDEBUG(("%d: citerdissect %ld-%ld\n",t->id,LOFF(begin),LOFF(end)));
1064+
10571065
/*
10581066
* For the moment, assume the minimum number of matches is 1. If zero
10591067
* matches are allowed, and the target string is empty, we are allowed to
@@ -1092,7 +1100,6 @@ citerdissect(struct vars *v,
10921100
FREE(endpts);
10931101
returnv->err;
10941102
}
1095-
MDEBUG(("citer %d\n",t->id));
10961103

10971104
/*
10981105
* Our strategy is to first find a set of sub-match endpoints that are
@@ -1182,7 +1189,7 @@ citerdissect(struct vars *v,
11821189
if (i>k)
11831190
{
11841191
/* satisfaction */
1185-
MDEBUG(("%d successful\n",t->id));
1192+
MDEBUG(("%d: successful\n",t->id));
11861193
FREE(endpts);
11871194
returnREG_OKAY;
11881195
}
@@ -1223,11 +1230,11 @@ citerdissect(struct vars *v,
12231230
*/
12241231
if (t->min==0&&begin==end)
12251232
{
1226-
MDEBUG(("%d allowing zero matches\n",t->id));
1233+
MDEBUG(("%d: allowing zero matches\n",t->id));
12271234
returnREG_OKAY;
12281235
}
12291236

1230-
MDEBUG(("%d failed\n",t->id));
1237+
MDEBUG(("%d: failed\n",t->id));
12311238
returnREG_NOMATCH;
12321239
}
12331240

@@ -1255,6 +1262,8 @@ creviterdissect(struct vars *v,
12551262
assert(t->left->flags&SHORTER);
12561263
assert(begin <=end);
12571264

1265+
MDEBUG(("%d: creviterdissect %ld-%ld\n",t->id,LOFF(begin),LOFF(end)));
1266+
12581267
/*
12591268
* If zero matches are allowed, and target string is empty, just declare
12601269
* victory. OTOH, if target string isn't empty, zero matches can't work
@@ -1264,7 +1273,10 @@ creviterdissect(struct vars *v,
12641273
if (min_matches <=0)
12651274
{
12661275
if (begin==end)
1276+
{
1277+
MDEBUG(("%d: allowing zero matches\n",t->id));
12671278
returnREG_OKAY;
1279+
}
12681280
min_matches=1;
12691281
}
12701282

@@ -1293,7 +1305,6 @@ creviterdissect(struct vars *v,
12931305
FREE(endpts);
12941306
returnv->err;
12951307
}
1296-
MDEBUG(("creviter %d\n",t->id));
12971308

12981309
/*
12991310
* Our strategy is to first find a set of sub-match endpoints that are
@@ -1389,7 +1400,7 @@ creviterdissect(struct vars *v,
13891400
if (i>k)
13901401
{
13911402
/* satisfaction */
1392-
MDEBUG(("%d successful\n",t->id));
1403+
MDEBUG(("%d: successful\n",t->id));
13931404
FREE(endpts);
13941405
returnREG_OKAY;
13951406
}
@@ -1415,7 +1426,7 @@ creviterdissect(struct vars *v,
14151426
}
14161427

14171428
/* all possibilities exhausted */
1418-
MDEBUG(("%d failed\n",t->id));
1429+
MDEBUG(("%d: failed\n",t->id));
14191430
FREE(endpts);
14201431
returnREG_NOMATCH;
14211432
}

‎src/test/modules/test_regex/expected/test_regex.out

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3315,6 +3315,21 @@ select * from test_regex('(?=b)b', 'a', 'HP');
33153315
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
33163316
(1 row)
33173317

3318+
-- expectMatch23.9 HP...(?!.)abcdecde
3319+
select * from test_regex('...(?!.)', 'abcde', 'HP');
3320+
test_regex
3321+
-----------------------------------
3322+
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
3323+
{cde}
3324+
(2 rows)
3325+
3326+
-- expectNomatch23.10 HP...(?=.)abc
3327+
select * from test_regex('...(?=.)', 'abc', 'HP');
3328+
test_regex
3329+
-----------------------------------
3330+
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
3331+
(1 row)
3332+
33183333
-- Postgres addition: lookbehind constraints
33193334
-- expectMatch23.11 HPN(?<=a)b*abb
33203335
select * from test_regex('(?<=a)b*', 'ab', 'HPN');
@@ -3376,6 +3391,39 @@ select * from test_regex('(?<=b)b', 'b', 'HP');
33763391
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
33773392
(1 row)
33783393

3394+
-- expectMatch23.19 HP(?<=.)..abcdebc
3395+
select * from test_regex('(?<=.)..', 'abcde', 'HP');
3396+
test_regex
3397+
-----------------------------------
3398+
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
3399+
{bc}
3400+
(2 rows)
3401+
3402+
-- expectMatch23.20 HP(?<=..)a*aaabba
3403+
select * from test_regex('(?<=..)a*', 'aaabb', 'HP');
3404+
test_regex
3405+
-----------------------------------
3406+
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
3407+
{a}
3408+
(2 rows)
3409+
3410+
-- expectMatch23.21 HP(?<=..)b*aaabb{}
3411+
-- Note: empty match here is correct, it matches after the first 2 characters
3412+
select * from test_regex('(?<=..)b*', 'aaabb', 'HP');
3413+
test_regex
3414+
-----------------------------------
3415+
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
3416+
{""}
3417+
(2 rows)
3418+
3419+
-- expectMatch23.22 HP(?<=..)b+aaabbbb
3420+
select * from test_regex('(?<=..)b+', 'aaabb', 'HP');
3421+
test_regex
3422+
-----------------------------------
3423+
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
3424+
{bb}
3425+
(2 rows)
3426+
33793427
-- doing 24 "non-greedy quantifiers"
33803428
-- expectMatch24.1 PTab+?abbab
33813429
select * from test_regex('ab+?', 'abb', 'PT');

‎src/test/modules/test_regex/sql/test_regex.sql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,10 @@ select * from test_regex('a(?!b)b*', 'a', 'HP');
10491049
select*from test_regex('(?=b)b','b','HP');
10501050
-- expectNomatch23.8 HP(?=b)ba
10511051
select*from test_regex('(?=b)b','a','HP');
1052+
-- expectMatch23.9 HP...(?!.)abcdecde
1053+
select*from test_regex('...(?!.)','abcde','HP');
1054+
-- expectNomatch23.10 HP...(?=.)abc
1055+
select*from test_regex('...(?=.)','abc','HP');
10521056

10531057
-- Postgres addition: lookbehind constraints
10541058

@@ -1068,6 +1072,15 @@ select * from test_regex('a(?<!b)b*', 'a', 'HP');
10681072
select*from test_regex('(?<=b)b','bb','HP');
10691073
-- expectNomatch23.18 HP(?<=b)bb
10701074
select*from test_regex('(?<=b)b','b','HP');
1075+
-- expectMatch23.19 HP(?<=.)..abcdebc
1076+
select*from test_regex('(?<=.)..','abcde','HP');
1077+
-- expectMatch23.20 HP(?<=..)a*aaabba
1078+
select*from test_regex('(?<=..)a*','aaabb','HP');
1079+
-- expectMatch23.21 HP(?<=..)b*aaabb{}
1080+
-- Note: empty match here is correct, it matches after the first 2 characters
1081+
select*from test_regex('(?<=..)b*','aaabb','HP');
1082+
-- expectMatch23.22 HP(?<=..)b+aaabbbb
1083+
select*from test_regex('(?<=..)b+','aaabb','HP');
10711084

10721085
-- doing 24 "non-greedy quantifiers"
10731086

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp