Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0fc1af1

Browse files
committed
Improve memory management in regex compiler.
The previous logic here created a separate pool of arcs for eachstate, so that the out-arcs of each state were physically storedwithin it. Perhaps this choice was driven by trying to not includea "from" pointer within each arc; but Spencer gave up on that idealong ago, and it's hard to see what the value is now. The approachturns out to be fairly disastrous in terms of memory consumption,though. In the first place, NFAs built by this engine seem to haveabout 4 arcs per state on average, with a majority having only oneor two out-arcs. So pre-allocating 10 out-arcs for each state isalready cause for a factor of two or more bloat. Worse, the NFAoptimization phase moves arcs around with abandon. In a large NFA,some of the states will have hundreds of out-arcs, so towards theend of the optimization phase we have a significant number of stateswhose arc pools have room for hundreds of arcs each, even though onlya few of those arcs are in use. We have seen real-world regexes inwhich this effect bloats the memory requirement by 25X or even more.Hence, get rid of the per-state arc pools in favor of a single arcpool for the whole NFA, with variable-sized allocation batchesinstead of always asking for 10 at a time. While we're at it,let's batch the allocations of state structs too, to further reducethe malloc traffic.This incidentally allows moveouts() to be optimized in a similarway to moveins(): when moving an arc to another state, it's nowvalid to just re-link the same arc struct into a different outchain,where before the code invariants required us to make a physicallynew arc and then free the old one.These changes reduce the regex compiler's typical space consumptionfor average-size regexes by about a factor of two, and much more forlarge or complicated regexes. In a large test set of real-worldregexes, we formerly had half a dozen cases that failed with "regularexpression too complex" due to exceeding the REG_MAX_COMPILE_SPACElimit (about 150MB); we would have had to raise that limit tosomething close to 400MB to make them work with the old code. Now,none of those cases need more than 13MB to compile. Furthermore,the test set is about 10% faster overall due to less malloc traffic.Discussion:https://postgr.es/m/168861.1614298592@sss.pgh.pa.us
1 parentb3a9e98 commit0fc1af1

File tree

3 files changed

+174
-119
lines changed

3 files changed

+174
-119
lines changed

‎src/backend/regex/regc_nfa.c

Lines changed: 137 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,15 @@ newnfa(struct vars *v,
5757
returnNULL;
5858
}
5959

60+
/* Make the NFA minimally valid, so freenfa() will behave sanely */
6061
nfa->states=NULL;
6162
nfa->slast=NULL;
62-
nfa->free=NULL;
63+
nfa->freestates=NULL;
64+
nfa->freearcs=NULL;
65+
nfa->lastsb=NULL;
66+
nfa->lastab=NULL;
67+
nfa->lastsbused=0;
68+
nfa->lastabused=0;
6369
nfa->nstates=0;
6470
nfa->cm=cm;
6571
nfa->v=v;
@@ -68,9 +74,10 @@ newnfa(struct vars *v,
6874
nfa->flags=0;
6975
nfa->minmatchall=nfa->maxmatchall=-1;
7076
nfa->parent=parent;/* Precedes newfstate so parent is valid. */
77+
78+
/* Create required infrastructure */
7179
nfa->post=newfstate(nfa,'@');/* number 0 */
7280
nfa->pre=newfstate(nfa,'>');/* number 1 */
73-
7481
nfa->init=newstate(nfa);/* may become invalid later */
7582
nfa->final=newstate(nfa);
7683
if (ISERR())
@@ -99,23 +106,27 @@ newnfa(struct vars *v,
99106
staticvoid
100107
freenfa(structnfa*nfa)
101108
{
102-
structstate*s;
109+
structstatebatch*sb;
110+
structstatebatch*sbnext;
111+
structarcbatch*ab;
112+
structarcbatch*abnext;
103113

104-
while ((s=nfa->states)!=NULL)
114+
for (sb=nfa->lastsb;sb!=NULL;sb=sbnext)
105115
{
106-
s->nins=s->nouts=0;/* don't worry about arcs */
107-
freestate(nfa,s);
116+
sbnext=sb->next;
117+
nfa->v->spaceused-=STATEBATCHSIZE(sb->nstates);
118+
FREE(sb);
108119
}
109-
while ((s=nfa->free)!=NULL)
120+
nfa->lastsb=NULL;
121+
for (ab=nfa->lastab;ab!=NULL;ab=abnext)
110122
{
111-
nfa->free=s->next;
112-
destroystate(nfa,s);
123+
abnext=ab->next;
124+
nfa->v->spaceused-=ARCBATCHSIZE(ab->narcs);
125+
FREE(ab);
113126
}
127+
nfa->lastab=NULL;
114128

115-
nfa->slast=NULL;
116129
nfa->nstates=-1;
117-
nfa->pre=NULL;
118-
nfa->post=NULL;
119130
FREE(nfa);
120131
}
121132

@@ -138,28 +149,43 @@ newstate(struct nfa *nfa)
138149
returnNULL;
139150
}
140151

141-
if (nfa->free!=NULL)
152+
/* first, recycle anything that's on the freelist */
153+
if (nfa->freestates!=NULL)
154+
{
155+
s=nfa->freestates;
156+
nfa->freestates=s->next;
157+
}
158+
/* otherwise, is there anything left in the last statebatch? */
159+
elseif (nfa->lastsb!=NULL&&nfa->lastsbused<nfa->lastsb->nstates)
142160
{
143-
s=nfa->free;
144-
nfa->free=s->next;
161+
s=&nfa->lastsb->s[nfa->lastsbused++];
145162
}
163+
/* otherwise, need to allocate a new statebatch */
146164
else
147165
{
166+
structstatebatch*newSb;
167+
size_tnstates;
168+
148169
if (nfa->v->spaceused >=REG_MAX_COMPILE_SPACE)
149170
{
150171
NERR(REG_ETOOBIG);
151172
returnNULL;
152173
}
153-
s= (structstate*)MALLOC(sizeof(structstate));
154-
if (s==NULL)
174+
nstates= (nfa->lastsb!=NULL) ?nfa->lastsb->nstates*2 :FIRSTSBSIZE;
175+
if (nstates>MAXSBSIZE)
176+
nstates=MAXSBSIZE;
177+
newSb= (structstatebatch*)MALLOC(STATEBATCHSIZE(nstates));
178+
if (newSb==NULL)
155179
{
156180
NERR(REG_ESPACE);
157181
returnNULL;
158182
}
159-
nfa->v->spaceused+=sizeof(structstate);
160-
s->oas.next=NULL;
161-
s->free=NULL;
162-
s->noas=0;
183+
nfa->v->spaceused+=STATEBATCHSIZE(nstates);
184+
newSb->nstates=nstates;
185+
newSb->next=nfa->lastsb;
186+
nfa->lastsb=newSb;
187+
nfa->lastsbused=1;
188+
s=&newSb->s[0];
163189
}
164190

165191
assert(nfa->nstates >=0);
@@ -240,32 +266,8 @@ freestate(struct nfa *nfa,
240266
nfa->states=s->next;
241267
}
242268
s->prev=NULL;
243-
s->next=nfa->free;/* don't delete it, put it on the free list */
244-
nfa->free=s;
245-
}
246-
247-
/*
248-
* destroystate - really get rid of an already-freed state
249-
*/
250-
staticvoid
251-
destroystate(structnfa*nfa,
252-
structstate*s)
253-
{
254-
structarcbatch*ab;
255-
structarcbatch*abnext;
256-
257-
assert(s->no==FREESTATE);
258-
for (ab=s->oas.next;ab!=NULL;ab=abnext)
259-
{
260-
abnext=ab->next;
261-
FREE(ab);
262-
nfa->v->spaceused-=sizeof(structarcbatch);
263-
}
264-
s->ins=NULL;
265-
s->outs=NULL;
266-
s->next=NULL;
267-
FREE(s);
268-
nfa->v->spaceused-=sizeof(structstate);
269+
s->next=nfa->freestates;/* don't delete it, put it on the free list */
270+
nfa->freestates=s;
269271
}
270272

271273
/*
@@ -334,8 +336,7 @@ createarc(struct nfa *nfa,
334336
{
335337
structarc*a;
336338

337-
/* the arc is physically allocated within its from-state */
338-
a=allocarc(nfa,from);
339+
a=allocarc(nfa);
339340
if (NISERR())
340341
return;
341342
assert(a!=NULL);
@@ -369,55 +370,52 @@ createarc(struct nfa *nfa,
369370
}
370371

371372
/*
372-
* allocarc - allocate a newout-arc withina state
373+
* allocarc - allocate a new arc withinan NFA
373374
*/
374375
staticstructarc*/* NULL for failure */
375-
allocarc(structnfa*nfa,
376-
structstate*s)
376+
allocarc(structnfa*nfa)
377377
{
378378
structarc*a;
379379

380-
/*shortcut */
381-
if (s->free==NULL&&s->noas<ABSIZE)
380+
/*first, recycle anything that's on the freelist */
381+
if (nfa->freearcs!=NULL)
382382
{
383-
a=&s->oas.a[s->noas];
384-
s->noas++;
385-
returna;
383+
a=nfa->freearcs;
384+
nfa->freearcs=a->freechain;
386385
}
387-
388-
/* if none at hand, get more */
389-
if (s->free==NULL)
386+
/* otherwise, is there anything left in the last arcbatch? */
387+
elseif (nfa->lastab!=NULL&&nfa->lastabused<nfa->lastab->narcs)
388+
{
389+
a=&nfa->lastab->a[nfa->lastabused++];
390+
}
391+
/* otherwise, need to allocate a new arcbatch */
392+
else
390393
{
391394
structarcbatch*newAb;
392-
inti;
395+
size_tnarcs;
393396

394397
if (nfa->v->spaceused >=REG_MAX_COMPILE_SPACE)
395398
{
396399
NERR(REG_ETOOBIG);
397400
returnNULL;
398401
}
399-
newAb= (structarcbatch*)MALLOC(sizeof(structarcbatch));
402+
narcs= (nfa->lastab!=NULL) ?nfa->lastab->narcs*2 :FIRSTABSIZE;
403+
if (narcs>MAXABSIZE)
404+
narcs=MAXABSIZE;
405+
newAb= (structarcbatch*)MALLOC(ARCBATCHSIZE(narcs));
400406
if (newAb==NULL)
401407
{
402408
NERR(REG_ESPACE);
403409
returnNULL;
404410
}
405-
nfa->v->spaceused+=sizeof(structarcbatch);
406-
newAb->next=s->oas.next;
407-
s->oas.next=newAb;
408-
409-
for (i=0;i<ABSIZE;i++)
410-
{
411-
newAb->a[i].type=0;
412-
newAb->a[i].freechain=&newAb->a[i+1];
413-
}
414-
newAb->a[ABSIZE-1].freechain=NULL;
415-
s->free=&newAb->a[0];
411+
nfa->v->spaceused+=ARCBATCHSIZE(narcs);
412+
newAb->narcs=narcs;
413+
newAb->next=nfa->lastab;
414+
nfa->lastab=newAb;
415+
nfa->lastabused=1;
416+
a=&newAb->a[0];
416417
}
417-
assert(s->free!=NULL);
418418

419-
a=s->free;
420-
s->free=a->freechain;
421419
returna;
422420
}
423421

@@ -478,25 +476,66 @@ freearc(struct nfa *nfa,
478476
}
479477
to->nins--;
480478

481-
/* clean up and place onfrom-state's free list */
479+
/* clean up and place onNFA's free list */
482480
victim->type=0;
483481
victim->from=NULL;/* precautions... */
484482
victim->to=NULL;
485483
victim->inchain=NULL;
486484
victim->inchainRev=NULL;
487485
victim->outchain=NULL;
488486
victim->outchainRev=NULL;
489-
victim->freechain=from->free;
490-
from->free=victim;
487+
victim->freechain=nfa->freearcs;
488+
nfa->freearcs=victim;
491489
}
492490

493491
/*
494-
*changearctarget - flip an arc to have a differentto state
492+
*changearcsource - flip an arc to have a differentfrom state
495493
*
496494
* Caller must have verified that there is no pre-existing duplicate arc.
495+
*/
496+
staticvoid
497+
changearcsource(structarc*a,structstate*newfrom)
498+
{
499+
structstate*oldfrom=a->from;
500+
structarc*predecessor;
501+
502+
assert(oldfrom!=newfrom);
503+
504+
/* take it off old source's out-chain */
505+
assert(oldfrom!=NULL);
506+
predecessor=a->outchainRev;
507+
if (predecessor==NULL)
508+
{
509+
assert(oldfrom->outs==a);
510+
oldfrom->outs=a->outchain;
511+
}
512+
else
513+
{
514+
assert(predecessor->outchain==a);
515+
predecessor->outchain=a->outchain;
516+
}
517+
if (a->outchain!=NULL)
518+
{
519+
assert(a->outchain->outchainRev==a);
520+
a->outchain->outchainRev=predecessor;
521+
}
522+
oldfrom->nouts--;
523+
524+
a->from=newfrom;
525+
526+
/* prepend it to new source's out-chain */
527+
a->outchain=newfrom->outs;
528+
a->outchainRev=NULL;
529+
if (newfrom->outs)
530+
newfrom->outs->outchainRev=a;
531+
newfrom->outs=a;
532+
newfrom->nouts++;
533+
}
534+
535+
/*
536+
* changearctarget - flip an arc to have a different to state
497537
*
498-
* Note that because we store arcs in their from state, we can't easily have
499-
* a similar changearcsource function.
538+
* Caller must have verified that there is no pre-existing duplicate arc.
500539
*/
501540
staticvoid
502541
changearctarget(structarc*a,structstate*newto)
@@ -1009,6 +1048,8 @@ mergeins(struct nfa *nfa,
10091048

10101049
/*
10111050
* moveouts - move all out arcs of a state to another state
1051+
*
1052+
* See comments for moveins()
10121053
*/
10131054
staticvoid
10141055
moveouts(structnfa*nfa,
@@ -1031,9 +1072,9 @@ moveouts(struct nfa *nfa,
10311072
else
10321073
{
10331074
/*
1034-
* With many arcs, use a sort-merge approach. Notethat createarc()
1035-
* will putnew arcs onto the front of newState's chain, so it does
1036-
*notbreak our walk through the sorted part of the chain.
1075+
* With many arcs, use a sort-merge approach. Notechangearcsource()
1076+
* will putthe arc onto the front of newState's chain, so it does not
1077+
* break our walk through the sorted part of the chain.
10371078
*/
10381079
structarc*oa;
10391080
structarc*na;
@@ -1063,8 +1104,12 @@ moveouts(struct nfa *nfa,
10631104
case-1:
10641105
/* newState does not have anything matching oa */
10651106
oa=oa->outchain;
1066-
createarc(nfa,a->type,a->co,newState,a->to);
1067-
freearc(nfa,a);
1107+
1108+
/*
1109+
* Rather than doing createarc+freearc, we can just unlink
1110+
* and relink the existing arc struct.
1111+
*/
1112+
changearcsource(a,newState);
10681113
break;
10691114
case0:
10701115
/* match, advance in both lists */
@@ -1087,8 +1132,7 @@ moveouts(struct nfa *nfa,
10871132
structarc*a=oa;
10881133

10891134
oa=oa->outchain;
1090-
createarc(nfa,a->type,a->co,newState,a->to);
1091-
freearc(nfa,a);
1135+
changearcsource(a,newState);
10921136
}
10931137
}
10941138

@@ -3413,7 +3457,6 @@ dumparc(struct arc *a,
34133457
FILE*f)
34143458
{
34153459
structarc*aa;
3416-
structarcbatch*ab;
34173460

34183461
fprintf(f,"\t");
34193462
switch (a->type)
@@ -3451,16 +3494,11 @@ dumparc(struct arc *a,
34513494
}
34523495
if (a->from!=s)
34533496
fprintf(f,"?%d?",a->from->no);
3454-
for (ab=&a->from->oas;ab!=NULL;ab=ab->next)
3455-
{
3456-
for (aa=&ab->a[0];aa<&ab->a[ABSIZE];aa++)
3457-
if (aa==a)
3458-
break;/* NOTE BREAK OUT */
3459-
if (aa<&ab->a[ABSIZE])/* propagate break */
3497+
for (aa=a->from->outs;aa!=NULL;aa=aa->outchain)
3498+
if (aa==a)
34603499
break;/* NOTE BREAK OUT */
3461-
}
3462-
if (ab==NULL)
3463-
fprintf(f,"?!?");/* not in allocated space */
3500+
if (aa==NULL)
3501+
fprintf(f,"?!?");/* missing from out-chain */
34643502
fprintf(f,"->");
34653503
if (a->to==NULL)
34663504
{

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp