Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit46a25ce

Browse files
committed
1 Fix bug with very short word: prefix and suffix might be overlapped,
sorry but fix can't be applyed to previous version: it's require refill tsvector...2 Small optimize of load time for huge dictionaries3 use palloc instead of malloc during load dict file
1 parenta815a57 commit46a25ce

File tree

2 files changed

+62
-53
lines changed

2 files changed

+62
-53
lines changed

‎contrib/tsearch2/ispell/spell.c

Lines changed: 57 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,19 @@
1818
#defineGETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
1919
#defineGETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
2020

21+
staticchar*VoidString="";
2122

2223
#defineMEMOUT(X) if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")))
2324

2425
staticint
2526
cmpspell(constvoid*s1,constvoid*s2)
2627
{
27-
return (strcmp(((constSPELL*)s1)->word, ((constSPELL*)s2)->word));
28+
return (strcmp((*(constSPELL**)s1)->word, (*(constSPELL**)s2)->word));
2829
}
2930
staticint
3031
cmpspellaffix(constvoid*s1,constvoid*s2)
3132
{
32-
return (strcmp(((constSPELL*)s1)->p.flag, ((constSPELL*)s2)->p.flag));
33+
return (strcmp((*(constSPELL**)s1)->p.flag, (*(constSPELL**)s2)->p.flag));
3334
}
3435

3536
staticchar*
@@ -128,18 +129,17 @@ NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
128129
if (Conf->mspell)
129130
{
130131
Conf->mspell+=1024*20;
131-
Conf->Spell= (SPELL*)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
132+
Conf->Spell= (SPELL**)repalloc(Conf->Spell,Conf->mspell*sizeof(SPELL*));
132133
}
133134
else
134135
{
135136
Conf->mspell=1024*20;
136-
Conf->Spell= (SPELL*)malloc(Conf->mspell*sizeof(SPELL));
137+
Conf->Spell= (SPELL**)palloc(Conf->mspell*sizeof(SPELL*));
137138
}
138-
MEMOUT(Conf->Spell);
139139
}
140-
Conf->Spell[Conf->nspell].word=strdup(word);
141-
MEMOUT(Conf->Spell[Conf->nspell].word);
142-
strncpy(Conf->Spell[Conf->nspell].p.flag,flag,16);
140+
Conf->Spell[Conf->nspell]=(SPELL*)palloc(SPELLHDRSZ+strlen(word)+1);
141+
strcpy(Conf->Spell[Conf->nspell]->word ,word);
142+
strncpy(Conf->Spell[Conf->nspell]->p.flag,flag,16);
143143
Conf->nspell++;
144144
return (0);
145145
}
@@ -261,13 +261,13 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
261261
{
262262
Conf->Affix[Conf->naffixes].issimple=1;
263263
Conf->Affix[Conf->naffixes].isregis=0;
264-
Conf->Affix[Conf->naffixes].mask=strdup("");
264+
Conf->Affix[Conf->naffixes].mask=VoidString;
265265
}
266266
elseif (RS_isRegis(mask))
267267
{
268268
Conf->Affix[Conf->naffixes].issimple=0;
269269
Conf->Affix[Conf->naffixes].isregis=1;
270-
Conf->Affix[Conf->naffixes].mask=strdup(mask);
270+
Conf->Affix[Conf->naffixes].mask=(mask&&*mask) ?strdup(mask) :VoidString;
271271
}
272272
else
273273
{
@@ -287,11 +287,13 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
287287
Conf->Affix[Conf->naffixes].flag=flag;
288288
Conf->Affix[Conf->naffixes].type=type;
289289

290-
Conf->Affix[Conf->naffixes].find=strdup(find);
290+
Conf->Affix[Conf->naffixes].find=(find&&*find) ?strdup(find) :VoidString;
291291
MEMOUT(Conf->Affix[Conf->naffixes].find);
292-
Conf->Affix[Conf->naffixes].repl=strdup(repl);
293-
MEMOUT(Conf->Affix[Conf->naffixes].repl);
294-
Conf->Affix[Conf->naffixes].replen=strlen(repl);
292+
if ( (Conf->Affix[Conf->naffixes].replen=strlen(repl))>0 ) {
293+
Conf->Affix[Conf->naffixes].repl=strdup(repl);
294+
MEMOUT(Conf->Affix[Conf->naffixes].repl);
295+
}else
296+
Conf->Affix[Conf->naffixes].repl=VoidString;
295297
Conf->naffixes++;
296298
return (0);
297299
}
@@ -506,10 +508,10 @@ mkSPNode(IspellDict * Conf, int low, int high, int level)
506508
intlownew=low;
507509

508510
for (i=low;i<high;i++)
509-
if (Conf->Spell[i].p.d.len>level&&lastchar!=Conf->Spell[i].word[level])
511+
if (Conf->Spell[i]->p.d.len>level&&lastchar!=Conf->Spell[i]->word[level])
510512
{
511513
nchar++;
512-
lastchar=Conf->Spell[i].word[level];
514+
lastchar=Conf->Spell[i]->word[level];
513515
}
514516

515517
if (!nchar)
@@ -523,34 +525,34 @@ mkSPNode(IspellDict * Conf, int low, int high, int level)
523525

524526
lastchar='\0';
525527
for (i=low;i<high;i++)
526-
if (Conf->Spell[i].p.d.len>level)
528+
if (Conf->Spell[i]->p.d.len>level)
527529
{
528-
if (lastchar!=Conf->Spell[i].word[level])
530+
if (lastchar!=Conf->Spell[i]->word[level])
529531
{
530532
if (lastchar)
531533
{
532534
data->node=mkSPNode(Conf,lownew,i,level+1);
533535
lownew=i;
534536
data++;
535537
}
536-
lastchar=Conf->Spell[i].word[level];
538+
lastchar=Conf->Spell[i]->word[level];
537539
}
538-
data->val= ((uint8*) (Conf->Spell[i].word))[level];
539-
if (Conf->Spell[i].p.d.len==level+1)
540+
data->val= ((uint8*) (Conf->Spell[i]->word))[level];
541+
if (Conf->Spell[i]->p.d.len==level+1)
540542
{
541-
if (data->isword&&data->affix!=Conf->Spell[i].p.d.affix)
543+
if (data->isword&&data->affix!=Conf->Spell[i]->p.d.affix)
542544
{
543545
/*
544546
* fprintf(stderr,"Word already exists: %s (affixes: '%s'
545-
* and '%s')\n", Conf->Spell[i].word,
547+
* and '%s')\n", Conf->Spell[i]->word,
546548
* Conf->AffixData[data->affix],
547-
* Conf->AffixData[Conf->Spell[i].p.d.affix] );
549+
* Conf->AffixData[Conf->Spell[i]->p.d.affix] );
548550
*/
549551
/* MergeAffix called a few times */
550-
data->affix=MergeAffix(Conf,data->affix,Conf->Spell[i].p.d.affix);
552+
data->affix=MergeAffix(Conf,data->affix,Conf->Spell[i]->p.d.affix);
551553
}
552554
else
553-
data->affix=Conf->Spell[i].p.d.affix;
555+
data->affix=Conf->Spell[i]->p.d.affix;
554556
data->isword=1;
555557
if (strchr(Conf->AffixData[data->affix],Conf->compoundcontrol))
556558
data->compoundallow=1;
@@ -562,18 +564,16 @@ mkSPNode(IspellDict * Conf, int low, int high, int level)
562564
returnrs;
563565
}
564566

565-
566-
567567
void
568568
NISortDictionary(IspellDict*Conf)
569569
{
570570
size_ti;
571571
intnaffix=3;
572572

573573
/* compress affixes */
574-
qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspellaffix);
574+
qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL*),cmpspellaffix);
575575
for (i=1;i<Conf->nspell;i++)
576-
if (strcmp(Conf->Spell[i].p.flag,Conf->Spell[i-1].p.flag))
576+
if (strcmp(Conf->Spell[i]->p.flag,Conf->Spell[i-1]->p.flag))
577577
naffix++;
578578

579579
Conf->AffixData= (char**)malloc(naffix*sizeof(char*));
@@ -582,28 +582,28 @@ NISortDictionary(IspellDict * Conf)
582582
naffix=1;
583583
Conf->AffixData[0]=strdup("");
584584
MEMOUT(Conf->AffixData[0]);
585-
Conf->AffixData[1]=strdup(Conf->Spell[0].p.flag);
585+
Conf->AffixData[1]=strdup(Conf->Spell[0]->p.flag);
586586
MEMOUT(Conf->AffixData[1]);
587-
Conf->Spell[0].p.d.affix=1;
588-
Conf->Spell[0].p.d.len=strlen(Conf->Spell[0].word);
587+
Conf->Spell[0]->p.d.affix=1;
588+
Conf->Spell[0]->p.d.len=strlen(Conf->Spell[0]->word);
589589
for (i=1;i<Conf->nspell;i++)
590590
{
591-
if (strcmp(Conf->Spell[i].p.flag,Conf->AffixData[naffix]))
591+
if (strcmp(Conf->Spell[i]->p.flag,Conf->AffixData[naffix]))
592592
{
593593
naffix++;
594-
Conf->AffixData[naffix]=strdup(Conf->Spell[i].p.flag);
594+
Conf->AffixData[naffix]=strdup(Conf->Spell[i]->p.flag);
595595
MEMOUT(Conf->AffixData[naffix]);
596596
}
597-
Conf->Spell[i].p.d.affix=naffix;
598-
Conf->Spell[i].p.d.len=strlen(Conf->Spell[i].word);
597+
Conf->Spell[i]->p.d.affix=naffix;
598+
Conf->Spell[i]->p.d.len=strlen(Conf->Spell[i]->word);
599599
}
600600

601-
qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
601+
qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL*),cmpspell);
602602
Conf->Dictionary=mkSPNode(Conf,0,Conf->nspell,0);
603603

604604
for (i=0;i<Conf->nspell;i++)
605-
free(Conf->Spell[i].word);
606-
free(Conf->Spell);
605+
pfree(Conf->Spell[i]);
606+
pfree(Conf->Spell);
607607
Conf->Spell=NULL;
608608
}
609609

@@ -724,7 +724,6 @@ NISortAffixes(IspellDict * Conf)
724724

725725
if (Conf->naffixes>1)
726726
qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
727-
728727
Conf->CompoundAffix=ptr= (CMPDAffix*)malloc(sizeof(CMPDAffix)*Conf->naffixes);
729728
MEMOUT(Conf->CompoundAffix);
730729
ptr->affix=NULL;
@@ -803,7 +802,7 @@ FinfAffixes(AffixNode * node, const char *word, int wrdlen, int *level, int type
803802
}
804803

805804
staticchar*
806-
CheckAffix(constchar*word,size_tlen,AFFIX*Affix,charflagflags,char*newword)
805+
CheckAffix(constchar*word,size_tlen,AFFIX*Affix,charflagflags,char*newword,int*baselen)
807806
{
808807

809808
if (flagflags&FF_COMPOUNDONLYAFX)
@@ -821,9 +820,15 @@ CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *ne
821820
{
822821
strcpy(newword,word);
823822
strcpy(newword+len-Affix->replen,Affix->find);
823+
if (baselen )/* store length of non-changed part of word */
824+
*baselen=len-Affix->replen;
824825
}
825826
else
826827
{
828+
/* if prefix is a all non-chaged part's length then all word contains only prefix and suffix,
829+
so out */
830+
if (baselen&&*baselen+strlen(Affix->find) <=Affix->replen )
831+
returnNULL;
827832
strcpy(newword,Affix->find);
828833
strcat(newword,word+Affix->replen);
829834
}
@@ -927,7 +932,7 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
927932
break;
928933
for (j=0;j<prefix->naff;j++)
929934
{
930-
if (CheckAffix(word,wrdlen,prefix->aff[j],flag,newword))
935+
if (CheckAffix(word,wrdlen,prefix->aff[j],flag,newword,NULL))
931936
{
932937
/* prefix success */
933938
if (FindWord(Conf,newword,prefix->aff[j]->flag,flag&FF_COMPOUNDWORD)&& (cur-forms)< (MAX_NORM-1))
@@ -948,14 +953,16 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
948953
*/
949954
while (snode)
950955
{
956+
intbaselen=0;
957+
951958
/* find possible suffix */
952959
suffix=FinfAffixes(snode,word,wrdlen,&slevel,FF_SUFFIX);
953960
if (!suffix)
954961
break;
955962
/* foreach suffix check affix */
956963
for (i=0;i<suffix->naff;i++)
957964
{
958-
if (CheckAffix(word,wrdlen,suffix->aff[i],flag,newword))
965+
if (CheckAffix(word,wrdlen,suffix->aff[i],flag,newword,&baselen))
959966
{
960967
/* suffix success */
961968
if (FindWord(Conf,newword,suffix->aff[i]->flag,flag&FF_COMPOUNDWORD)&& (cur-forms)< (MAX_NORM-1))
@@ -976,7 +983,7 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
976983
break;
977984
for (j=0;j<prefix->naff;j++)
978985
{
979-
if (CheckAffix(newword,swrdlen,prefix->aff[j],flag,pnewword))
986+
if (CheckAffix(newword,swrdlen,prefix->aff[j],flag,pnewword,&baselen))
980987
{
981988
/* prefix success */
982989
intff= (prefix->aff[j]->flagflags&suffix->aff[i]->flagflags&FF_CROSSPRODUCT) ?
@@ -1323,15 +1330,15 @@ NIFree(IspellDict * Conf)
13231330
else
13241331
pg_regfree(&(Affix[i].reg.regex));
13251332
}
1326-
free(Affix[i].mask);
1327-
free(Affix[i].find);
1328-
free(Affix[i].repl);
1333+
if (Affix[i].mask!=VoidString )free(Affix[i].mask);
1334+
if (Affix[i].find!=VoidString )free(Affix[i].find);
1335+
if (Affix[i].repl!=VoidString )free(Affix[i].repl);
13291336
}
13301337
if (Conf->Spell)
13311338
{
13321339
for (i=0;i<Conf->nspell;i++)
1333-
free(Conf->Spell[i].word);
1334-
free(Conf->Spell);
1340+
pfree(Conf->Spell[i]->word);
1341+
pfree(Conf->Spell);
13351342
}
13361343

13371344
if (Conf->Affix)

‎contrib/tsearch2/ispell/spell.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ typedef struct SPNode
3232

3333
typedefstructspell_struct
3434
{
35-
char*word;
3635
union
3736
{
3837
charflag[16];
@@ -41,9 +40,12 @@ typedef struct spell_struct
4140
intaffix;
4241
intlen;
4342
}d;
44-
}p;
43+
}p;
44+
charword[1];
4545
}SPELL;
4646

47+
#defineSPELLHDRSZ(offsetof(SPELL, word))
48+
4749
typedefstructaff_struct
4850
{
4951
uint32
@@ -106,7 +108,7 @@ typedef struct
106108

107109
intnspell;
108110
intmspell;
109-
SPELL*Spell;
111+
SPELL**Spell;
110112

111113
AffixNode*Suffix;
112114
AffixNode*Prefix;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp