Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitee3a81f

Browse files
committed
Change regexp engine's ccondissect/crevdissect routines to perform DFA
matching before recursing instead of after. The DFA match eliminatesunworkable midpoint choices a lot faster than the recursive check, in mostcases, so doing it first can speed things up; particularly in pathologicalcases such as recently exhibited by Michael Glaesemann.In addition, apply some cosmetic changes that were applied upstream (in theTcl project) at the same time, in order to sync with upstream version 1.15of regexec.c.Upstream apparently intends to backpatch this, so I will too. Thepathological behavior could be unpleasant if encountered in the field,which seems to justify any risk of introducing new bugs.Tom Lane, reviewed by Donal K. Fellows of Tcl project
1 parentc85c941 commitee3a81f

File tree

1 file changed

+66
-50
lines changed

1 file changed

+66
-50
lines changed

‎src/backend/regex/regexec.c

Lines changed: 66 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
2828
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929
*
30-
* $PostgreSQL: pgsql/src/backend/regex/regexec.c,v 1.27 2005/10/15 02:49:24 momjian Exp $
30+
* $PostgreSQL: pgsql/src/backend/regex/regexec.c,v 1.28 2010/02/01 02:45:29 tgl Exp $
3131
*
3232
*/
3333

@@ -141,6 +141,7 @@ static intdissect(struct vars *, struct subre *, chr *, chr *);
141141
staticintcondissect(structvars*,structsubre*,chr*,chr*);
142142
staticintaltdissect(structvars*,structsubre*,chr*,chr*);
143143
staticintcdissect(structvars*,structsubre*,chr*,chr*);
144+
staticintccaptdissect(structvars*,structsubre*,chr*,chr*);
144145
staticintccondissect(structvars*,structsubre*,chr*,chr*);
145146
staticintcrevdissect(structvars*,structsubre*,chr*,chr*);
146147
staticintcbrdissect(structvars*,structsubre*,chr*,chr*);
@@ -560,27 +561,21 @@ dissect(struct vars * v,
560561
case'=':/* terminal node */
561562
assert(t->left==NULL&&t->right==NULL);
562563
returnREG_OKAY;/* no action, parent did the work */
563-
break;
564564
case'|':/* alternation */
565565
assert(t->left!=NULL);
566566
returnaltdissect(v,t,begin,end);
567-
break;
568567
case'b':/* back ref -- shouldn't be calling us! */
569568
returnREG_ASSERT;
570-
break;
571569
case'.':/* concatenation */
572570
assert(t->left!=NULL&&t->right!=NULL);
573571
returncondissect(v,t,begin,end);
574-
break;
575572
case'(':/* capturing */
576573
assert(t->left!=NULL&&t->right==NULL);
577574
assert(t->subno>0);
578575
subset(v,t,begin,end);
579576
returndissect(v,t->left,begin,end);
580-
break;
581577
default:
582578
returnREG_ASSERT;
583-
break;
584579
}
585580
}
586581

@@ -710,8 +705,6 @@ cdissect(struct vars * v,
710705
chr*begin,/* beginning of relevant substring */
711706
chr*end)/* end of same */
712707
{
713-
inter;
714-
715708
assert(t!=NULL);
716709
MDEBUG(("cdissect %ld-%ld %c\n",LOFF(begin),LOFF(end),t->op));
717710

@@ -720,33 +713,42 @@ cdissect(struct vars * v,
720713
case'=':/* terminal node */
721714
assert(t->left==NULL&&t->right==NULL);
722715
returnREG_OKAY;/* no action, parent did the work */
723-
break;
724716
case'|':/* alternation */
725717
assert(t->left!=NULL);
726718
returncaltdissect(v,t,begin,end);
727-
break;
728719
case'b':/* back ref -- shouldn't be calling us! */
729720
assert(t->left==NULL&&t->right==NULL);
730721
returncbrdissect(v,t,begin,end);
731-
break;
732722
case'.':/* concatenation */
733723
assert(t->left!=NULL&&t->right!=NULL);
734724
returnccondissect(v,t,begin,end);
735-
break;
736725
case'(':/* capturing */
737726
assert(t->left!=NULL&&t->right==NULL);
738-
assert(t->subno>0);
739-
er=cdissect(v,t->left,begin,end);
740-
if (er==REG_OKAY)
741-
subset(v,t,begin,end);
742-
returner;
743-
break;
727+
returnccaptdissect(v,t,begin,end);
744728
default:
745729
returnREG_ASSERT;
746-
break;
747730
}
748731
}
749732

733+
/*
734+
* ccaptdissect - capture subexpression matches (with complications)
735+
*/
736+
staticint/* regexec return code */
737+
ccaptdissect(structvars*v,
738+
structsubre*t,
739+
chr*begin,/* beginning of relevant substring */
740+
chr*end)/* end of same */
741+
{
742+
inter;
743+
744+
assert(t->subno>0);
745+
746+
er=cdissect(v,t->left,begin,end);
747+
if (er==REG_OKAY)
748+
subset(v,t,begin,end);
749+
returner;
750+
}
751+
750752
/*
751753
* ccondissect - concatenation subexpression matches (with complications)
752754
* The retry memory stores the offset of the trial midpoint from begin,
@@ -804,17 +806,27 @@ ccondissect(struct vars * v,
804806
for (;;)
805807
{
806808
/* try this midpoint on for size */
807-
er=cdissect(v,t->left,begin,mid);
808-
if (er==REG_OKAY&&
809-
longest(v,d2,mid,end, (int*)NULL)==end&&
810-
(er=cdissect(v,t->right,mid,end))==
811-
REG_OKAY)
812-
break;/* NOTE BREAK OUT */
813-
if (er!=REG_OKAY&&er!=REG_NOMATCH)
809+
if (longest(v,d2,mid,end, (int*)NULL)==end)
814810
{
815-
freedfa(d);
816-
freedfa(d2);
817-
returner;
811+
er=cdissect(v,t->left,begin,mid);
812+
if (er==REG_OKAY)
813+
{
814+
er=cdissect(v,t->right,mid,end);
815+
if (er==REG_OKAY)
816+
{
817+
/* satisfaction */
818+
MDEBUG(("successful\n"));
819+
freedfa(d);
820+
freedfa(d2);
821+
returnREG_OKAY;
822+
}
823+
}
824+
if (er!=REG_OKAY&&er!=REG_NOMATCH)
825+
{
826+
freedfa(d);
827+
freedfa(d2);
828+
returner;
829+
}
818830
}
819831

820832
/* that midpoint didn't work, find a new one */
@@ -841,11 +853,8 @@ ccondissect(struct vars * v,
841853
zapmem(v,t->right);
842854
}
843855

844-
/* satisfaction */
845-
MDEBUG(("successful\n"));
846-
freedfa(d);
847-
freedfa(d2);
848-
returnREG_OKAY;
856+
/* can't get here */
857+
returnREG_ASSERT;
849858
}
850859

851860
/*
@@ -904,17 +913,27 @@ crevdissect(struct vars * v,
904913
for (;;)
905914
{
906915
/* try this midpoint on for size */
907-
er=cdissect(v,t->left,begin,mid);
908-
if (er==REG_OKAY&&
909-
longest(v,d2,mid,end, (int*)NULL)==end&&
910-
(er=cdissect(v,t->right,mid,end))==
911-
REG_OKAY)
912-
break;/* NOTE BREAK OUT */
913-
if (er!=REG_OKAY&&er!=REG_NOMATCH)
916+
if (longest(v,d2,mid,end, (int*)NULL)==end)
914917
{
915-
freedfa(d);
916-
freedfa(d2);
917-
returner;
918+
er=cdissect(v,t->left,begin,mid);
919+
if (er==REG_OKAY)
920+
{
921+
er=cdissect(v,t->right,mid,end);
922+
if (er==REG_OKAY)
923+
{
924+
/* satisfaction */
925+
MDEBUG(("successful\n"));
926+
freedfa(d);
927+
freedfa(d2);
928+
returnREG_OKAY;
929+
}
930+
}
931+
if (er!=REG_OKAY&&er!=REG_NOMATCH)
932+
{
933+
freedfa(d);
934+
freedfa(d2);
935+
returner;
936+
}
918937
}
919938

920939
/* that midpoint didn't work, find a new one */
@@ -941,11 +960,8 @@ crevdissect(struct vars * v,
941960
zapmem(v,t->right);
942961
}
943962

944-
/* satisfaction */
945-
MDEBUG(("successful\n"));
946-
freedfa(d);
947-
freedfa(d2);
948-
returnREG_OKAY;
963+
/* can't get here */
964+
returnREG_ASSERT;
949965
}
950966

951967
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp