Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite8c81e1

Browse files
committed
Improve word parser.
- improve file and path recognition - fix misspeling - improve tag recognition
1 parent8cb4e4f commite8c81e1

File tree

2 files changed

+65
-22
lines changed

2 files changed

+65
-22
lines changed

‎contrib/tsearch2/wordparser/parser.c

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ static TParserStateActionItem actionTPS_Base[] = {
327327
{p_iseqC,'+',A_PUSH,TPS_InSignedIntFirst,0,NULL},
328328
{p_iseqC,'&',A_PUSH,TPS_InHTMLEntityFirst,0,NULL},
329329
{p_iseqC,'/',A_PUSH,TPS_InFileFirst,0,NULL},
330+
{p_iseqC,'.',A_PUSH,TPS_InPathFirst,0,NULL},
330331
{NULL,0,A_NEXT,TPS_InSpace,0,NULL}
331332
};
332333

@@ -336,15 +337,16 @@ static TParserStateActionItem actionTPS_InUWord[] = {
336337
{p_isalnum,0,A_NEXT,TPS_InUWord,0,NULL},
337338
{p_iseqC,'@',A_PUSH,TPS_InEmail,0,NULL},
338339
{p_iseqC,'/',A_PUSH,TPS_InFileFirst,0,NULL},
340+
{p_iseqC,'.',A_PUSH,TPS_InFileNext,0,NULL},
339341
{p_iseqC,'-',A_PUSH,TPS_InHyphenUWordFirst,0,NULL},
340342
{NULL,0,A_BINGO,TPS_Base,UWORD,NULL}
341343
};
342344

343345
staticTParserStateActionItemactionTPS_InLatWord[]= {
344346
{p_isEOF,0,A_BINGO,TPS_Base,LATWORD,NULL},
345347
{p_islatin,0,A_NEXT,TPS_Null,0,NULL},
346-
{p_iseqC,'.',A_PUSH,TPS_InHostFirstDomen,0,NULL},
347-
{p_iseqC,'.',A_PUSH,TPS_InFileFirst,0,NULL},
348+
{p_iseqC,'.',A_PUSH,TPS_InHostFirstDomain,0,NULL},
349+
{p_iseqC,'.',A_PUSH,TPS_InFileNext,0,NULL},
348350
{p_iseqC,'-',A_PUSH,TPS_InHostFirstAN,0,NULL},
349351
{p_iseqC,'-',A_PUSH,TPS_InHyphenLatWordFirst,0,NULL},
350352
{p_iseqC,'@',A_PUSH,TPS_InEmail,0,NULL},
@@ -366,7 +368,7 @@ static TParserStateActionItem actionTPS_InCyrWord[] = {
366368
staticTParserStateActionItemactionTPS_InUnsignedInt[]= {
367369
{p_isEOF,0,A_BINGO,TPS_Base,UNSIGNEDINT,NULL},
368370
{p_isdigit,0,A_NEXT,TPS_Null,0,NULL},
369-
{p_iseqC,'.',A_PUSH,TPS_InHostFirstDomen,0,NULL},
371+
{p_iseqC,'.',A_PUSH,TPS_InHostFirstDomain,0,NULL},
370372
{p_iseqC,'.',A_PUSH,TPS_InUDecimalFirst,0,NULL},
371373
{p_iseqC,'e',A_PUSH,TPS_InMantissaFirst,0,NULL},
372374
{p_iseqC,'E',A_PUSH,TPS_InMantissaFirst,0,NULL},
@@ -500,10 +502,19 @@ static TParserStateActionItem actionTPS_InTagFirst[] = {
500502
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
501503
{p_iseqC,'/',A_PUSH,TPS_InTagCloseFirst,0,NULL},
502504
{p_iseqC,'!',A_PUSH,TPS_InCommentFirst,0,NULL},
505+
{p_iseqC,'?',A_PUSH,TPS_InXMLBegin,0,NULL},
503506
{p_islatin,0,A_PUSH,TPS_InTag,0,NULL},
504507
{NULL,0,A_POP,TPS_Null,0,NULL}
505508
};
506509

510+
staticTParserStateActionItemactionTPS_InXMLBegin[]= {
511+
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
512+
/* <?xml ... */
513+
{p_iseqC,'x',A_NEXT,TPS_InTag,0,NULL},
514+
{p_iseqC,'X',A_NEXT,TPS_InTag,0,NULL},
515+
{NULL,0,A_POP,TPS_Null,0,NULL}
516+
};
517+
507518
staticTParserStateActionItemactionTPS_InTagCloseFirst[]= {
508519
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
509520
{p_islatin,0,A_NEXT,TPS_InTag,0,NULL},
@@ -520,6 +531,11 @@ static TParserStateActionItem actionTPS_InTag[] = {
520531
{p_iseqC,'=',A_NEXT,TPS_Null,0,NULL},
521532
{p_iseqC,'-',A_NEXT,TPS_Null,0,NULL},
522533
{p_iseqC,'#',A_NEXT,TPS_Null,0,NULL},
534+
{p_iseqC,'/',A_NEXT,TPS_Null,0,NULL},
535+
{p_iseqC,':',A_NEXT,TPS_Null,0,NULL},
536+
{p_iseqC,'.',A_NEXT,TPS_Null,0,NULL},
537+
{p_iseqC,'&',A_NEXT,TPS_Null,0,NULL},
538+
{p_iseqC,'?',A_NEXT,TPS_Null,0,NULL},
523539
{p_iseqC,'%',A_NEXT,TPS_Null,0,NULL},
524540
{p_isspace,0,A_NEXT,TPS_Null,0,SpecialTags},
525541
{NULL,0,A_POP,TPS_Null,0,NULL}
@@ -551,6 +567,9 @@ static TParserStateActionItem actionTPS_InTagEnd[] = {
551567
staticTParserStateActionItemactionTPS_InCommentFirst[]= {
552568
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
553569
{p_iseqC,'-',A_NEXT,TPS_InCommentLast,0,NULL},
570+
/* <!DOCTYPE ...>*/
571+
{p_iseqC,'D',A_NEXT,TPS_InTag,0,NULL},
572+
{p_iseqC,'d',A_NEXT,TPS_InTag,0,NULL},
554573
{NULL,0,A_POP,TPS_Null,0,NULL}
555574
};
556575

@@ -583,30 +602,30 @@ static TParserStateActionItem actionTPS_InCommentEnd[] = {
583602
{NULL,0,A_BINGO |A_CLRALL,TPS_Base,TAG,NULL}
584603
};
585604

586-
staticTParserStateActionItemactionTPS_InHostFirstDomen[]= {
605+
staticTParserStateActionItemactionTPS_InHostFirstDomain[]= {
587606
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
588-
{p_islatin,0,A_NEXT,TPS_InHostDomenSecond,0,NULL},
607+
{p_islatin,0,A_NEXT,TPS_InHostDomainSecond,0,NULL},
589608
{p_isdigit,0,A_NEXT,TPS_InHost,0,NULL},
590609
{NULL,0,A_POP,TPS_Null,0,NULL}
591610
};
592611

593-
staticTParserStateActionItemactionTPS_InHostDomenSecond[]= {
612+
staticTParserStateActionItemactionTPS_InHostDomainSecond[]= {
594613
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
595-
{p_islatin,0,A_NEXT,TPS_InHostDomen,0,NULL},
614+
{p_islatin,0,A_NEXT,TPS_InHostDomain,0,NULL},
596615
{p_isdigit,0,A_PUSH,TPS_InHost,0,NULL},
597616
{p_iseqC,'-',A_PUSH,TPS_InHostFirstAN,0,NULL},
598-
{p_iseqC,'.',A_PUSH,TPS_InHostFirstDomen,0,NULL},
617+
{p_iseqC,'.',A_PUSH,TPS_InHostFirstDomain,0,NULL},
599618
{p_iseqC,'@',A_PUSH,TPS_InEmail,0,NULL},
600619
{NULL,0,A_POP,TPS_Null,0,NULL}
601620
};
602621

603-
staticTParserStateActionItemactionTPS_InHostDomen[]= {
622+
staticTParserStateActionItemactionTPS_InHostDomain[]= {
604623
{p_isEOF,0,A_BINGO |A_CLRALL,TPS_Base,HOST,NULL},
605-
{p_islatin,0,A_NEXT,TPS_InHostDomen,0,NULL},
624+
{p_islatin,0,A_NEXT,TPS_InHostDomain,0,NULL},
606625
{p_isdigit,0,A_PUSH,TPS_InHost,0,NULL},
607626
{p_iseqC,':',A_PUSH,TPS_InPortFirst,0,NULL},
608627
{p_iseqC,'-',A_PUSH,TPS_InHostFirstAN,0,NULL},
609-
{p_iseqC,'.',A_PUSH,TPS_InHostFirstDomen,0,NULL},
628+
{p_iseqC,'.',A_PUSH,TPS_InHostFirstDomain,0,NULL},
610629
{p_iseqC,'@',A_PUSH,TPS_InEmail,0,NULL},
611630
{p_isdigit,0,A_POP,TPS_Null,0,NULL},
612631
{p_isstophost,0,A_BINGO |A_CLRALL,TPS_InURIStart,HOST,NULL},
@@ -640,7 +659,7 @@ static TParserStateActionItem actionTPS_InHost[] = {
640659
{p_isdigit,0,A_NEXT,TPS_InHost,0,NULL},
641660
{p_islatin,0,A_NEXT,TPS_InHost,0,NULL},
642661
{p_iseqC,'@',A_PUSH,TPS_InEmail,0,NULL},
643-
{p_iseqC,'.',A_PUSH,TPS_InHostFirstDomen,0,NULL},
662+
{p_iseqC,'.',A_PUSH,TPS_InHostFirstDomain,0,NULL},
644663
{p_iseqC,'-',A_PUSH,TPS_InHostFirstAN,0,NULL},
645664
{NULL,0,A_POP,TPS_Null,0,NULL}
646665
};
@@ -652,14 +671,32 @@ static TParserStateActionItem actionTPS_InEmail[] = {
652671

653672
staticTParserStateActionItemactionTPS_InFileFirst[]= {
654673
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
655-
{p_islatin,0,A_CLEAR,TPS_InFile,0,NULL},
656-
{p_isdigit,0,A_CLEAR,TPS_InFile,0,NULL},
657-
{p_iseqC,'.',A_CLEAR,TPS_InFile,0,NULL},
658-
{p_iseqC,'_',A_CLEAR,TPS_InFile,0,NULL},
674+
{p_islatin,0,A_NEXT,TPS_InFile,0,NULL},
675+
{p_isdigit,0,A_NEXT,TPS_InFile,0,NULL},
676+
{p_iseqC,'.',A_NEXT,TPS_InPathFirst,0,NULL},
677+
{p_iseqC,'_',A_NEXT,TPS_InFile,0,NULL},
659678
{p_iseqC,'?',A_PUSH,TPS_InURIFirst,0,NULL},
660679
{NULL,0,A_POP,TPS_Null,0,NULL}
661680
};
662681

682+
staticTParserStateActionItemactionTPS_InPathFirst[]= {
683+
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
684+
{p_islatin,0,A_NEXT,TPS_InFile,0,NULL},
685+
{p_isdigit,0,A_NEXT,TPS_InFile,0,NULL},
686+
{p_iseqC,'_',A_NEXT,TPS_InFile,0,NULL},
687+
{p_iseqC,'.',A_NEXT,TPS_InPathSecond,0,NULL},
688+
{p_iseqC,'/',A_NEXT,TPS_InFileFirst,0,NULL},
689+
{NULL,0,A_POP,TPS_Null,0,NULL}
690+
};
691+
692+
staticTParserStateActionItemactionTPS_InPathSecond[]= {
693+
{p_isEOF,0,A_BINGO|A_CLEAR,TPS_Base,FILEPATH,NULL},
694+
{p_iseqC,'/',A_NEXT|A_PUSH,TPS_InFileFirst,0,NULL},
695+
{p_iseqC,'/',A_BINGO|A_CLEAR,TPS_Base,FILEPATH,NULL},
696+
{p_isspace,0,A_BINGO|A_CLEAR,TPS_Base,FILEPATH,NULL},
697+
{NULL,0,A_POP,TPS_Null,0,NULL}
698+
};
699+
663700
staticTParserStateActionItemactionTPS_InFile[]= {
664701
{p_isEOF,0,A_BINGO,TPS_Base,FILEPATH,NULL},
665702
{p_islatin,0,A_NEXT,TPS_InFile,0,NULL},
@@ -894,6 +931,7 @@ static const TParserStateAction Actions[] = {
894931
{TPS_InHTMLEntityNum,actionTPS_InHTMLEntityNum},
895932
{TPS_InHTMLEntityEnd,actionTPS_InHTMLEntityEnd},
896933
{TPS_InTagFirst,actionTPS_InTagFirst},
934+
{TPS_InXMLBegin,actionTPS_InXMLBegin},
897935
{TPS_InTagCloseFirst,actionTPS_InTagCloseFirst},
898936
{TPS_InTag,actionTPS_InTag},
899937
{TPS_InTagEscapeK,actionTPS_InTagEscapeK},
@@ -906,15 +944,17 @@ static const TParserStateAction Actions[] = {
906944
{TPS_InCloseCommentFirst,actionTPS_InCloseCommentFirst},
907945
{TPS_InCloseCommentLast,actionTPS_InCloseCommentLast},
908946
{TPS_InCommentEnd,actionTPS_InCommentEnd},
909-
{TPS_InHostFirstDomen,actionTPS_InHostFirstDomen},
910-
{TPS_InHostDomenSecond,actionTPS_InHostDomenSecond},
911-
{TPS_InHostDomen,actionTPS_InHostDomen},
947+
{TPS_InHostFirstDomain,actionTPS_InHostFirstDomain},
948+
{TPS_InHostDomainSecond,actionTPS_InHostDomainSecond},
949+
{TPS_InHostDomain,actionTPS_InHostDomain},
912950
{TPS_InPortFirst,actionTPS_InPortFirst},
913951
{TPS_InPort,actionTPS_InPort},
914952
{TPS_InHostFirstAN,actionTPS_InHostFirstAN},
915953
{TPS_InHost,actionTPS_InHost},
916954
{TPS_InEmail,actionTPS_InEmail},
917955
{TPS_InFileFirst,actionTPS_InFileFirst},
956+
{TPS_InPathFirst,actionTPS_InPathFirst},
957+
{TPS_InPathSecond,actionTPS_InPathSecond},
918958
{TPS_InFile,actionTPS_InFile},
919959
{TPS_InFileNext,actionTPS_InFileNext},
920960
{TPS_InURIFirst,actionTPS_InURIFirst},

‎contrib/tsearch2/wordparser/parser.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ typedef enum
3030
TPS_InHTMLEntityNum,
3131
TPS_InHTMLEntityEnd,
3232
TPS_InTagFirst,
33+
TPS_InXMLBegin,
3334
TPS_InTagCloseFirst,
3435
TPS_InTag,
3536
TPS_InTagEscapeK,
@@ -42,15 +43,17 @@ typedef enum
4243
TPS_InCloseCommentFirst,
4344
TPS_InCloseCommentLast,
4445
TPS_InCommentEnd,
45-
TPS_InHostFirstDomen,
46-
TPS_InHostDomenSecond,
47-
TPS_InHostDomen,
46+
TPS_InHostFirstDomain,
47+
TPS_InHostDomainSecond,
48+
TPS_InHostDomain,
4849
TPS_InPortFirst,
4950
TPS_InPort,
5051
TPS_InHostFirstAN,
5152
TPS_InHost,
5253
TPS_InEmail,
5354
TPS_InFileFirst,
55+
TPS_InPathFirst,
56+
TPS_InPathSecond,
5457
TPS_InFile,
5558
TPS_InFileNext,
5659
TPS_InURIFirst,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp