Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit538b3b8

Browse files
committed
Improve memory-usage accounting in regular-expression compiler.
This code previously counted the number of NFA states it created, andcomplained if a limit was exceeded, so as to prevent bizarre regex patternsfrom consuming unreasonable time or memory. That's fine as far as it went,but the code paid no attention to how many arcs linked those states. Sinceregexes can be contrived that have O(N) states but will need O(N^2) arcsafter fixempties() processing, it was still possible to blow out memory,and take a long time doing it too. To fix, modify the bookkeeping to countspace used by both states and arcs.I did not bother with including the "color map" in the accounting; itcan only grow to a few megabytes, which is not a lot in comparison towhat we're allowing for states+arcs (about 150MB on 64-bit machinesor half that on 32-bit machines).Looking at some of the larger real-world regexes captured in the Tclregression test suite suggests that the most that is likely to be neededfor regexes found in the wild is under 10MB, so I believe that the currentlimit has enough headroom to make it okay to keep it as a hard-wired limit.In connection with this, redefine REG_ETOOBIG as meaning "regularexpression is too complex"; the previous wording of "nfa has too manystates" was already somewhat inapropos because of the error code's usefor stack depth overrun, and it was not very user-friendly either.Back-patch to all supported branches.
1 parent6a71536 commit538b3b8

File tree

5 files changed

+27
-69
lines changed

5 files changed

+27
-69
lines changed

‎src/backend/regex/regc_nfa.c

Lines changed: 14 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ newnfa(struct vars * v,
6363
nfa->nstates=0;
6464
nfa->cm=cm;
6565
nfa->v=v;
66-
nfa->size=0;
6766
nfa->bos[0]=nfa->bos[1]=COLORLESS;
6867
nfa->eos[0]=nfa->eos[1]=COLORLESS;
6968
nfa->parent=parent;/* Precedes newfstate so parent is valid. */
@@ -92,57 +91,6 @@ newnfa(struct vars * v,
9291
returnnfa;
9392
}
9493

95-
/*
96-
* TooManyStates - checks if the max states exceeds the compile-time value
97-
*/
98-
staticint
99-
TooManyStates(structnfa*nfa)
100-
{
101-
structnfa*parent=nfa->parent;
102-
size_tsz=nfa->size;
103-
104-
while (parent!=NULL)
105-
{
106-
sz=parent->size;
107-
parent=parent->parent;
108-
}
109-
if (sz>REG_MAX_STATES)
110-
return1;
111-
return0;
112-
}
113-
114-
/*
115-
* IncrementSize - increases the tracked size of the NFA and its parents.
116-
*/
117-
staticvoid
118-
IncrementSize(structnfa*nfa)
119-
{
120-
structnfa*parent=nfa->parent;
121-
122-
nfa->size++;
123-
while (parent!=NULL)
124-
{
125-
parent->size++;
126-
parent=parent->parent;
127-
}
128-
}
129-
130-
/*
131-
* DecrementSize - decreases the tracked size of the NFA and its parents.
132-
*/
133-
staticvoid
134-
DecrementSize(structnfa*nfa)
135-
{
136-
structnfa*parent=nfa->parent;
137-
138-
nfa->size--;
139-
while (parent!=NULL)
140-
{
141-
parent->size--;
142-
parent=parent->parent;
143-
}
144-
}
145-
14694
/*
14795
* freenfa - free an entire NFA
14896
*/
@@ -188,25 +136,25 @@ newstate(struct nfa * nfa)
188136
returnNULL;
189137
}
190138

191-
if (TooManyStates(nfa))
192-
{
193-
NERR(REG_ETOOBIG);
194-
returnNULL;
195-
}
196-
197139
if (nfa->free!=NULL)
198140
{
199141
s=nfa->free;
200142
nfa->free=s->next;
201143
}
202144
else
203145
{
146+
if (nfa->v->spaceused >=REG_MAX_COMPILE_SPACE)
147+
{
148+
NERR(REG_ETOOBIG);
149+
returnNULL;
150+
}
204151
s= (structstate*)MALLOC(sizeof(structstate));
205152
if (s==NULL)
206153
{
207154
NERR(REG_ESPACE);
208155
returnNULL;
209156
}
157+
nfa->v->spaceused+=sizeof(structstate);
210158
s->oas.next=NULL;
211159
s->free=NULL;
212160
s->noas=0;
@@ -230,8 +178,6 @@ newstate(struct nfa * nfa)
230178
}
231179
s->prev=nfa->slast;
232180
nfa->slast=s;
233-
/* track the current size and the parent size */
234-
IncrementSize(nfa);
235181
returns;
236182
}
237183

@@ -294,7 +240,6 @@ freestate(struct nfa * nfa,
294240
s->prev=NULL;
295241
s->next=nfa->free;/* don't delete it, put it on the free list */
296242
nfa->free=s;
297-
DecrementSize(nfa);
298243
}
299244

300245
/*
@@ -312,11 +257,13 @@ destroystate(struct nfa * nfa,
312257
{
313258
abnext=ab->next;
314259
FREE(ab);
260+
nfa->v->spaceused-=sizeof(structarcbatch);
315261
}
316262
s->ins=NULL;
317263
s->outs=NULL;
318264
s->next=NULL;
319265
FREE(s);
266+
nfa->v->spaceused-=sizeof(structstate);
320267
}
321268

322269
/*
@@ -437,12 +384,18 @@ allocarc(struct nfa * nfa,
437384
structarcbatch*newAb;
438385
inti;
439386

387+
if (nfa->v->spaceused >=REG_MAX_COMPILE_SPACE)
388+
{
389+
NERR(REG_ETOOBIG);
390+
returnNULL;
391+
}
440392
newAb= (structarcbatch*)MALLOC(sizeof(structarcbatch));
441393
if (newAb==NULL)
442394
{
443395
NERR(REG_ESPACE);
444396
returnNULL;
445397
}
398+
nfa->v->spaceused+=sizeof(structarcbatch);
446399
newAb->next=s->oas.next;
447400
s->oas.next=newAb;
448401

‎src/backend/regex/regcomp.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ struct vars
248248
structcvec*cv2;/* utility cvec */
249249
structsubre*lacons;/* lookahead-constraint vector */
250250
intnlacons;/* size of lacons */
251+
size_tspaceused;/* approx. space used for compilation */
251252
};
252253

253254
/* parsing macros; most know that `v' is the struct vars pointer */
@@ -363,6 +364,7 @@ pg_regcomp(regex_t *re,
363364
v->cv2=NULL;
364365
v->lacons=NULL;
365366
v->nlacons=0;
367+
v->spaceused=0;
366368
re->re_magic=REMAGIC;
367369
re->re_info=0;/* bits get set during parse */
368370
re->re_csize=sizeof(chr);

‎src/include/regex/regerrs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
},
7676

7777
{
78-
REG_ETOOBIG,"REG_ETOOBIG","nfa hastoomany states"
78+
REG_ETOOBIG,"REG_ETOOBIG","regular expression istoocomplex"
7979
},
8080

8181
{

‎src/include/regex/regex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ typedef struct
152152
#defineREG_INVARG16/* invalid argument to regex function */
153153
#defineREG_MIXED17/* character widths of regex and string differ */
154154
#defineREG_BADOPT18/* invalid embedded option */
155-
#defineREG_ETOOBIG 19/*nfa hastoomany states */
155+
#defineREG_ETOOBIG 19/*regular expression istoocomplex */
156156
#defineREG_ECOLORS 20/* too many colors */
157157
#defineREG_CANCEL21/* operation cancelled */
158158
/* two specials for debugging and testing */

‎src/include/regex/regguts.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -334,9 +334,6 @@ struct nfa
334334
structcolormap*cm;/* the color map */
335335
colorbos[2];/* colors, if any, assigned to BOS and BOL */
336336
coloreos[2];/* colors, if any, assigned to EOS and EOL */
337-
size_tsize;/* Current NFA size; differs from nstates as
338-
* it also counts the number of states in
339-
* children of this NFA. */
340337
structvars*v;/* simplifies compile error reporting */
341338
structnfa*parent;/* parent NFA, if any */
342339
};
@@ -384,10 +381,16 @@ struct cnfa
384381
#defineNULLCNFA(cnfa)((cnfa).nstates == 0)
385382

386383
/*
387-
* Used to limit the maximum NFA size to something sane. [Tcl Bug 1810264]
384+
* This symbol limits the transient heap space used by the regex compiler,
385+
* and thereby also the maximum complexity of NFAs that we'll deal with.
386+
* Currently we only count NFA states and arcs against this; the other
387+
* transient data is generally not large enough to notice compared to those.
388+
* Note that we do not charge anything for the final output data structures
389+
* (the compacted NFA and the colormap).
388390
*/
389-
#ifndefREG_MAX_STATES
390-
#defineREG_MAX_STATES100000
391+
#ifndefREG_MAX_COMPILE_SPACE
392+
#defineREG_MAX_COMPILE_SPACE \
393+
(100000 * sizeof(struct state) + 100000 * sizeof(struct arcbatch))
391394
#endif
392395

393396
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp