Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit51e78ab

Browse files
committed
Avoid use of sscanf() to parse ispell dictionary files.
It turns out that on FreeBSD-derived platforms (including OS X), the*scanf() family of functions is pretty much brain-dead about multibytecharacters. In particular it will apply isspace() to individual bytesof input even when those bytes are part of a multibyte character, thusallowing false recognition of a field-terminating space.We appear to have little alternative other than instituting a codingrule that *scanf() is not to be used if the input string might containmultibyte characters. (There was some discussion of relying on "%ls",but that probably just moves the portability problem somewhere else,and besides it doesn't fully prevent BSD *scanf() from using isspace().)This patch is a down payment on that: it gets rid of use of sscanf()to parse ispell dictionary files, which are certainly at great riskof having a problem. The code is cleaner this way anyway, thougha bit longer.In passing, improve a few comments.Report and patch by Artur Zakirov, reviewed and somewhat tweaked by me.Back-patch to all supported branches.
1 parentc5e9b77 commit51e78ab

File tree

1 file changed

+153
-13
lines changed

1 file changed

+153
-13
lines changed

‎src/backend/tsearch/spell.c

Lines changed: 153 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -457,13 +457,149 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
457457
Conf->naffixes++;
458458
}
459459

460+
461+
/* Parsing states for parse_affentry() and friends */
460462
#definePAE_WAIT_MASK0
461-
#definePAE_INMASK1
463+
#definePAE_INMASK1
462464
#definePAE_WAIT_FIND2
463-
#definePAE_INFIND3
465+
#definePAE_INFIND3
464466
#definePAE_WAIT_REPL4
465-
#definePAE_INREPL5
467+
#definePAE_INREPL5
468+
#definePAE_WAIT_TYPE6
469+
#definePAE_WAIT_FLAG7
466470

471+
/*
472+
* Parse next space-separated field of an .affix file line.
473+
*
474+
* *str is the input pointer (will be advanced past field)
475+
* next is where to copy the field value to, with null termination
476+
*
477+
* The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
478+
*
479+
* Returns TRUE if we found a field, FALSE if not.
480+
*/
481+
staticbool
482+
get_nextfield(char**str,char*next)
483+
{
484+
intstate=PAE_WAIT_MASK;
485+
intavail=BUFSIZ;
486+
487+
while (**str)
488+
{
489+
if (state==PAE_WAIT_MASK)
490+
{
491+
if (t_iseq(*str,'#'))
492+
return false;
493+
elseif (!t_isspace(*str))
494+
{
495+
intclen=pg_mblen(*str);
496+
497+
if (clen<avail)
498+
{
499+
COPYCHAR(next,*str);
500+
next+=clen;
501+
avail-=clen;
502+
}
503+
state=PAE_INMASK;
504+
}
505+
}
506+
else/* state == PAE_INMASK */
507+
{
508+
if (t_isspace(*str))
509+
{
510+
*next='\0';
511+
return true;
512+
}
513+
else
514+
{
515+
intclen=pg_mblen(*str);
516+
517+
if (clen<avail)
518+
{
519+
COPYCHAR(next,*str);
520+
next+=clen;
521+
avail-=clen;
522+
}
523+
}
524+
}
525+
*str+=pg_mblen(*str);
526+
}
527+
528+
*next='\0';
529+
530+
return (state==PAE_INMASK);/* OK if we got a nonempty field */
531+
}
532+
533+
/*
534+
* Parses entry of an .affix file of MySpell or Hunspell format.
535+
*
536+
* An .affix file entry has the following format:
537+
* - header
538+
* <type> <flag> <cross_flag> <flag_count>
539+
* - fields after header:
540+
* <type> <flag> <find> <replace><mask>
541+
*
542+
* str is the input line
543+
* field values are returned to type etc, which must be buffers of size BUFSIZ.
544+
*
545+
* Returns number of fields found; any omitted fields are set to empty strings.
546+
*/
547+
staticint
548+
parse_ooaffentry(char*str,char*type,char*flag,char*find,
549+
char*repl,char*mask)
550+
{
551+
intstate=PAE_WAIT_TYPE;
552+
intfields_read=0;
553+
boolvalid= false;
554+
555+
*type=*flag=*find=*repl=*mask='\0';
556+
557+
while (*str)
558+
{
559+
switch (state)
560+
{
561+
casePAE_WAIT_TYPE:
562+
valid=get_nextfield(&str,type);
563+
state=PAE_WAIT_FLAG;
564+
break;
565+
casePAE_WAIT_FLAG:
566+
valid=get_nextfield(&str,flag);
567+
state=PAE_WAIT_FIND;
568+
break;
569+
casePAE_WAIT_FIND:
570+
valid=get_nextfield(&str,find);
571+
state=PAE_WAIT_REPL;
572+
break;
573+
casePAE_WAIT_REPL:
574+
valid=get_nextfield(&str,repl);
575+
state=PAE_WAIT_MASK;
576+
break;
577+
casePAE_WAIT_MASK:
578+
valid=get_nextfield(&str,mask);
579+
state=-1;/* force loop exit */
580+
break;
581+
default:
582+
elog(ERROR,"unrecognized state in parse_ooaffentry: %d",
583+
state);
584+
break;
585+
}
586+
if (valid)
587+
fields_read++;
588+
else
589+
break;/* early EOL */
590+
if (state<0)
591+
break;/* got all fields */
592+
}
593+
594+
returnfields_read;
595+
}
596+
597+
/*
598+
* Parses entry of an .affix file of Ispell format
599+
*
600+
* An .affix file entry has the following format:
601+
* <mask> > [-<find>,]<replace>
602+
*/
467603
staticbool
468604
parse_affentry(char*str,char*mask,char*find,char*repl)
469605
{
@@ -618,8 +754,6 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
618754
intflag=0;
619755
charflagflags=0;
620756
tsearch_readline_statetrst;
621-
intscanread=0;
622-
charscanbuf[BUFSIZ];
623757
char*recoded;
624758

625759
/* read file to find any flag */
@@ -682,8 +816,6 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
682816
}
683817
tsearch_readline_end(&trst);
684818

685-
sprintf(scanbuf,"%%6s %%%ds %%%ds %%%ds %%%ds",BUFSIZ /5,BUFSIZ /5,BUFSIZ /5,BUFSIZ /5);
686-
687819
if (!tsearch_readline_begin(&trst,filename))
688820
ereport(ERROR,
689821
(errcode(ERRCODE_CONFIG_FILE_ERROR),
@@ -692,18 +824,21 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
692824

693825
while ((recoded=tsearch_readline(&trst))!=NULL)
694826
{
827+
intfields_read;
828+
695829
if (*recoded=='\0'||t_isspace(recoded)||t_iseq(recoded,'#'))
696830
gotonextline;
697831

698-
scanread=sscanf(recoded,scanbuf,type,sflag,find,repl,mask);
832+
fields_read=parse_ooaffentry(recoded,type,sflag,find,repl,mask);
699833

700834
if (ptype)
701835
pfree(ptype);
702836
ptype=lowerstr_ctx(Conf,type);
703-
if (scanread<4|| (STRNCMP(ptype,"sfx")&&STRNCMP(ptype,"pfx")))
837+
if (fields_read<4||
838+
(STRNCMP(ptype,"sfx")!=0&&STRNCMP(ptype,"pfx")!=0))
704839
gotonextline;
705840

706-
if (scanread==4)
841+
if (fields_read==4)
707842
{
708843
if (strlen(sflag)!=1)
709844
gotonextline;
@@ -722,9 +857,13 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
722857
if (strlen(sflag)!=1||flag!=*sflag||flag==0)
723858
gotonextline;
724859
prepl=lowerstr_ctx(Conf,repl);
725-
/*affix flag */
860+
/*Find position of '/' in lowercased string "prepl" */
726861
if ((ptr=strchr(prepl,'/'))!=NULL)
727862
{
863+
/*
864+
* Here we use non-lowercased string "repl". We need position
865+
* of '/' in "repl".
866+
*/
728867
*ptr='\0';
729868
ptr=repl+ (ptr-prepl)+1;
730869
while (*ptr)
@@ -800,11 +939,12 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
800939

801940
if (STRNCMP(pstr,"compoundwords")==0)
802941
{
942+
/* Find position in lowercased string "pstr" */
803943
s=findchar(pstr,'l');
804944
if (s)
805945
{
806-
s=recoded+ (s-pstr);/* weneed non-lowercased
807-
* string */
946+
/*Hereweuse non-lowercased string "recoded" */
947+
s=recoded+ (s-pstr);
808948
while (*s&& !t_isspace(s))
809949
s+=pg_mblen(s);
810950
while (*s&&t_isspace(s))

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp