Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc6aae30

Browse files
committed
Simplify and document regex library's compact-NFA representation.
The previous coding abused the first element of a cNFA state's arcs listto hold a per-state flag bit, which was confusing, undocumented, and noteven particularly efficient. Get rid of that in favor of a separate"stflags" vector. Since there's only one bit in use, I chose to allocate achar per state; we could possibly replace this with a bitmap at some point,but that would make accesses a little slower. It's already about 8Xsmaller than before, so let's not get overly tense.Also document the representation better than it was before, which is to saynot at all.This patch is a byproduct of investigations towards extracting a "fixedprefix" string from the compact-NFA representation of regex patterns.Might need to back-patch it if we decide to back-patch that fix, but fornow it's just code cleanup so I'll just put it in HEAD.
1 parenta184e4d commitc6aae30

File tree

4 files changed

+44
-32
lines changed

4 files changed

+44
-32
lines changed

‎src/backend/regex/regc_nfa.c

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,14 +1330,16 @@ compact(struct nfa * nfa,
13301330
for (s=nfa->states;s!=NULL;s=s->next)
13311331
{
13321332
nstates++;
1333-
narcs+=1+s->nouts+1;
1334-
/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
1333+
narcs+=s->nouts+1;/* need one extra for endmarker */
13351334
}
13361335

1336+
cnfa->stflags= (char*)MALLOC(nstates*sizeof(char));
13371337
cnfa->states= (structcarc**)MALLOC(nstates*sizeof(structcarc*));
13381338
cnfa->arcs= (structcarc*)MALLOC(narcs*sizeof(structcarc));
1339-
if (cnfa->states==NULL||cnfa->arcs==NULL)
1339+
if (cnfa->stflags==NULL||cnfa->states==NULL||cnfa->arcs==NULL)
13401340
{
1341+
if (cnfa->stflags!=NULL)
1342+
FREE(cnfa->stflags);
13411343
if (cnfa->states!=NULL)
13421344
FREE(cnfa->states);
13431345
if (cnfa->arcs!=NULL)
@@ -1359,9 +1361,8 @@ compact(struct nfa * nfa,
13591361
for (s=nfa->states;s!=NULL;s=s->next)
13601362
{
13611363
assert((size_t)s->no<nstates);
1364+
cnfa->stflags[s->no]=0;
13621365
cnfa->states[s->no]=ca;
1363-
ca->co=0;/* clear and skip flags "arc" */
1364-
ca++;
13651366
first=ca;
13661367
for (a=s->outs;a!=NULL;a=a->outchain)
13671368
switch (a->type)
@@ -1392,8 +1393,8 @@ compact(struct nfa * nfa,
13921393

13931394
/* mark no-progress states */
13941395
for (a=nfa->pre->outs;a!=NULL;a=a->outchain)
1395-
cnfa->states[a->to->no]->co=1;
1396-
cnfa->states[nfa->pre->no]->co=1;
1396+
cnfa->stflags[a->to->no]=CNFA_NOPROGRESS;
1397+
cnfa->stflags[nfa->pre->no]=CNFA_NOPROGRESS;
13971398
}
13981399

13991400
/*
@@ -1433,6 +1434,7 @@ freecnfa(struct cnfa * cnfa)
14331434
{
14341435
assert(cnfa->nstates!=0);/* not empty already */
14351436
cnfa->nstates=0;
1437+
FREE(cnfa->stflags);
14361438
FREE(cnfa->states);
14371439
FREE(cnfa->arcs);
14381440
}
@@ -1617,7 +1619,7 @@ dumpcnfa(struct cnfa * cnfa,
16171619
fprintf(f,", haslacons");
16181620
fprintf(f,"\n");
16191621
for (st=0;st<cnfa->nstates;st++)
1620-
dumpcstate(st,cnfa->states[st],cnfa,f);
1622+
dumpcstate(st,cnfa,f);
16211623
fflush(f);
16221624
}
16231625
#endif
@@ -1629,22 +1631,20 @@ dumpcnfa(struct cnfa * cnfa,
16291631
*/
16301632
staticvoid
16311633
dumpcstate(intst,
1632-
structcarc*ca,
16331634
structcnfa*cnfa,
16341635
FILE*f)
16351636
{
1636-
inti;
1637+
structcarc*ca;
16371638
intpos;
16381639

1639-
fprintf(f,"%d%s",st, (ca[0].co) ?":" :".");
1640+
fprintf(f,"%d%s",st, (cnfa->stflags[st]&CNFA_NOPROGRESS) ?":" :".");
16401641
pos=1;
1641-
for (i=1;ca[i].co!=COLORLESS;i++)
1642+
for (ca=cnfa->states[st];ca->co!=COLORLESS;ca++)
16421643
{
1643-
if (ca[i].co<cnfa->ncolors)
1644-
fprintf(f,"\t[%ld]->%d", (long)ca[i].co,ca[i].to);
1644+
if (ca->co<cnfa->ncolors)
1645+
fprintf(f,"\t[%ld]->%d", (long)ca->co,ca->to);
16451646
else
1646-
fprintf(f,"\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors,
1647-
ca[i].to);
1647+
fprintf(f,"\t:%ld:->%d", (long) (ca->co-cnfa->ncolors),ca->to);
16481648
if (pos==5)
16491649
{
16501650
fprintf(f,"\n");
@@ -1653,7 +1653,7 @@ dumpcstate(int st,
16531653
else
16541654
pos++;
16551655
}
1656-
if (i==1||pos!=1)
1656+
if (ca==cnfa->states[st]||pos!=1)
16571657
fprintf(f,"\n");
16581658
fflush(f);
16591659
}

‎src/backend/regex/regcomp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ static void dumparcs(struct state *, FILE *);
162162
staticintdumprarcs(structarc*,structstate*,FILE*,int);
163163
staticvoiddumparc(structarc*,structstate*,FILE*);
164164
staticvoiddumpcnfa(structcnfa*,FILE*);
165-
staticvoiddumpcstate(int,structcarc*,structcnfa*,FILE*);
165+
staticvoiddumpcstate(int,structcnfa*,FILE*);
166166
#endif
167167
/* === regc_cvec.c === */
168168
staticstructcvec*newcvec(int,int);

‎src/backend/regex/rege_dfa.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -457,14 +457,14 @@ miss(struct vars * v,/* used only for debug flags */
457457
gotstate=0;
458458
for (i=0;i<d->nstates;i++)
459459
if (ISBSET(css->states,i))
460-
for (ca=cnfa->states[i]+1;ca->co!=COLORLESS;ca++)
460+
for (ca=cnfa->states[i];ca->co!=COLORLESS;ca++)
461461
if (ca->co==co)
462462
{
463463
BSET(d->work,ca->to);
464464
gotstate=1;
465465
if (ca->to==cnfa->post)
466466
ispost=1;
467-
if (!cnfa->states[ca->to]->co)
467+
if (!(cnfa->stflags[ca->to]&CNFA_NOPROGRESS))
468468
noprogress=0;
469469
FDEBUG(("%d -> %d\n",i,ca->to));
470470
}
@@ -475,10 +475,9 @@ miss(struct vars * v,/* used only for debug flags */
475475
dolacons=0;
476476
for (i=0;i<d->nstates;i++)
477477
if (ISBSET(d->work,i))
478-
for (ca=cnfa->states[i]+1;ca->co!=COLORLESS;
479-
ca++)
478+
for (ca=cnfa->states[i];ca->co!=COLORLESS;ca++)
480479
{
481-
if (ca->co <=cnfa->ncolors)
480+
if (ca->co<cnfa->ncolors)
482481
continue;/* NOTE CONTINUE */
483482
sawlacons=1;
484483
if (ISBSET(d->work,ca->to))
@@ -489,7 +488,7 @@ miss(struct vars * v,/* used only for debug flags */
489488
dolacons=1;
490489
if (ca->to==cnfa->post)
491490
ispost=1;
492-
if (!cnfa->states[ca->to]->co)
491+
if (!(cnfa->stflags[ca->to]&CNFA_NOPROGRESS))
493492
noprogress=0;
494493
FDEBUG(("%d :> %d\n",i,ca->to));
495494
}

‎src/include/regex/regguts.h

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -279,15 +279,14 @@ struct state;
279279

280280
structarc
281281
{
282-
inttype;
283-
#defineARCFREE '\0'
282+
inttype;/* 0 if free, else an NFA arc type code */
284283
colorco;
285284
structstate*from;/* where it's from (and contained within) */
286285
structstate*to;/* where it's to */
287-
structarc*outchain;/* *from's outs chain or free chain */
286+
structarc*outchain;/*link in*from's outs chain or free chain */
288287
#definefreechain outchain
289-
structarc*inchain;/* *to's ins chain */
290-
structarc*colorchain;/* color's arc chain */
288+
structarc*inchain;/*link in*to's ins chain */
289+
structarc*colorchain;/*link incolor's arc chain */
291290
structarc*colorchainRev;/* back-link in color's arc chain */
292291
};
293292

@@ -339,24 +338,38 @@ struct nfa
339338

340339
/*
341340
* definitions for compacted NFA
341+
*
342+
* The main space savings in a compacted NFA is from making the arcs as small
343+
* as possible. We store only the transition color and next-state number for
344+
* each arc. The list of out arcs for each state is an array beginning at
345+
* cnfa.states[statenumber], and terminated by a dummy carc struct with
346+
* co == COLORLESS.
347+
*
348+
* The non-dummy carc structs are of two types: plain arcs and LACON arcs.
349+
* Plain arcs just store the transition color number as "co". LACON arcs
350+
* store the lookahead constraint number plus cnfa.ncolors as "co". LACON
351+
* arcs can be distinguished from plain by testing for co >= cnfa.ncolors.
342352
*/
343353
structcarc
344354
{
345355
colorco;/* COLORLESS is list terminator */
346-
intto;/* state number */
356+
intto;/*next-state number */
347357
};
348358

349359
structcnfa
350360
{
351361
intnstates;/* number of states */
352-
intncolors;/* number of colors */
362+
intncolors;/* number of colors(max color in use + 1)*/
353363
intflags;
354-
#defineHASLACONS01/* uses lookahead constraints */
364+
#defineHASLACONS01/* uses lookahead constraints */
355365
intpre;/* setup state number */
356366
intpost;/* teardown state number */
357367
colorbos[2];/* colors, if any, assigned to BOS and BOL */
358368
coloreos[2];/* colors, if any, assigned to EOS and EOL */
369+
char*stflags;/* vector of per-state flags bytes */
370+
#defineCNFA_NOPROGRESS01/* flag bit for a no-progress state */
359371
structcarc**states;/* vector of pointers to outarc lists */
372+
/* states[n] are pointers into a single malloc'd array of arcs */
360373
structcarc*arcs;/* the area for the lists */
361374
};
362375

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp