@@ -327,6 +327,7 @@ static TParserStateActionItem actionTPS_Base[] = {
327327{p_iseqC ,'+' ,A_PUSH ,TPS_InSignedIntFirst ,0 ,NULL },
328328{p_iseqC ,'&' ,A_PUSH ,TPS_InHTMLEntityFirst ,0 ,NULL },
329329{p_iseqC ,'/' ,A_PUSH ,TPS_InFileFirst ,0 ,NULL },
330+ {p_iseqC ,'.' ,A_PUSH ,TPS_InPathFirst ,0 ,NULL },
330331{NULL ,0 ,A_NEXT ,TPS_InSpace ,0 ,NULL }
331332};
332333
@@ -336,15 +337,16 @@ static TParserStateActionItem actionTPS_InUWord[] = {
336337{p_isalnum ,0 ,A_NEXT ,TPS_InUWord ,0 ,NULL },
337338{p_iseqC ,'@' ,A_PUSH ,TPS_InEmail ,0 ,NULL },
338339{p_iseqC ,'/' ,A_PUSH ,TPS_InFileFirst ,0 ,NULL },
340+ {p_iseqC ,'.' ,A_PUSH ,TPS_InFileNext ,0 ,NULL },
339341{p_iseqC ,'-' ,A_PUSH ,TPS_InHyphenUWordFirst ,0 ,NULL },
340342{NULL ,0 ,A_BINGO ,TPS_Base ,UWORD ,NULL }
341343};
342344
343345static TParserStateActionItem actionTPS_InLatWord []= {
344346{p_isEOF ,0 ,A_BINGO ,TPS_Base ,LATWORD ,NULL },
345347{p_islatin ,0 ,A_NEXT ,TPS_Null ,0 ,NULL },
346- {p_iseqC ,'.' ,A_PUSH ,TPS_InHostFirstDomen ,0 ,NULL },
347- {p_iseqC ,'.' ,A_PUSH ,TPS_InFileFirst ,0 ,NULL },
348+ {p_iseqC ,'.' ,A_PUSH ,TPS_InHostFirstDomain ,0 ,NULL },
349+ {p_iseqC ,'.' ,A_PUSH ,TPS_InFileNext ,0 ,NULL },
348350{p_iseqC ,'-' ,A_PUSH ,TPS_InHostFirstAN ,0 ,NULL },
349351{p_iseqC ,'-' ,A_PUSH ,TPS_InHyphenLatWordFirst ,0 ,NULL },
350352{p_iseqC ,'@' ,A_PUSH ,TPS_InEmail ,0 ,NULL },
@@ -366,7 +368,7 @@ static TParserStateActionItem actionTPS_InCyrWord[] = {
366368static TParserStateActionItem actionTPS_InUnsignedInt []= {
367369{p_isEOF ,0 ,A_BINGO ,TPS_Base ,UNSIGNEDINT ,NULL },
368370{p_isdigit ,0 ,A_NEXT ,TPS_Null ,0 ,NULL },
369- {p_iseqC ,'.' ,A_PUSH ,TPS_InHostFirstDomen ,0 ,NULL },
371+ {p_iseqC ,'.' ,A_PUSH ,TPS_InHostFirstDomain ,0 ,NULL },
370372{p_iseqC ,'.' ,A_PUSH ,TPS_InUDecimalFirst ,0 ,NULL },
371373{p_iseqC ,'e' ,A_PUSH ,TPS_InMantissaFirst ,0 ,NULL },
372374{p_iseqC ,'E' ,A_PUSH ,TPS_InMantissaFirst ,0 ,NULL },
@@ -500,10 +502,19 @@ static TParserStateActionItem actionTPS_InTagFirst[] = {
500502{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
501503{p_iseqC ,'/' ,A_PUSH ,TPS_InTagCloseFirst ,0 ,NULL },
502504{p_iseqC ,'!' ,A_PUSH ,TPS_InCommentFirst ,0 ,NULL },
505+ {p_iseqC ,'?' ,A_PUSH ,TPS_InXMLBegin ,0 ,NULL },
503506{p_islatin ,0 ,A_PUSH ,TPS_InTag ,0 ,NULL },
504507{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
505508};
506509
510+ static TParserStateActionItem actionTPS_InXMLBegin []= {
511+ {p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
512+ /* <?xml ... */
513+ {p_iseqC ,'x' ,A_NEXT ,TPS_InTag ,0 ,NULL },
514+ {p_iseqC ,'X' ,A_NEXT ,TPS_InTag ,0 ,NULL },
515+ {NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
516+ };
517+
507518static TParserStateActionItem actionTPS_InTagCloseFirst []= {
508519{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
509520{p_islatin ,0 ,A_NEXT ,TPS_InTag ,0 ,NULL },
@@ -520,6 +531,11 @@ static TParserStateActionItem actionTPS_InTag[] = {
520531{p_iseqC ,'=' ,A_NEXT ,TPS_Null ,0 ,NULL },
521532{p_iseqC ,'-' ,A_NEXT ,TPS_Null ,0 ,NULL },
522533{p_iseqC ,'#' ,A_NEXT ,TPS_Null ,0 ,NULL },
534+ {p_iseqC ,'/' ,A_NEXT ,TPS_Null ,0 ,NULL },
535+ {p_iseqC ,':' ,A_NEXT ,TPS_Null ,0 ,NULL },
536+ {p_iseqC ,'.' ,A_NEXT ,TPS_Null ,0 ,NULL },
537+ {p_iseqC ,'&' ,A_NEXT ,TPS_Null ,0 ,NULL },
538+ {p_iseqC ,'?' ,A_NEXT ,TPS_Null ,0 ,NULL },
523539{p_iseqC ,'%' ,A_NEXT ,TPS_Null ,0 ,NULL },
524540{p_isspace ,0 ,A_NEXT ,TPS_Null ,0 ,SpecialTags },
525541{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
@@ -551,6 +567,9 @@ static TParserStateActionItem actionTPS_InTagEnd[] = {
551567static TParserStateActionItem actionTPS_InCommentFirst []= {
552568{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
553569{p_iseqC ,'-' ,A_NEXT ,TPS_InCommentLast ,0 ,NULL },
570+ /* <!DOCTYPE ...>*/
571+ {p_iseqC ,'D' ,A_NEXT ,TPS_InTag ,0 ,NULL },
572+ {p_iseqC ,'d' ,A_NEXT ,TPS_InTag ,0 ,NULL },
554573{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
555574};
556575
@@ -583,30 +602,30 @@ static TParserStateActionItem actionTPS_InCommentEnd[] = {
583602{NULL ,0 ,A_BINGO |A_CLRALL ,TPS_Base ,TAG ,NULL }
584603};
585604
586- static TParserStateActionItem actionTPS_InHostFirstDomen []= {
605+ static TParserStateActionItem actionTPS_InHostFirstDomain []= {
587606{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
588- {p_islatin ,0 ,A_NEXT ,TPS_InHostDomenSecond ,0 ,NULL },
607+ {p_islatin ,0 ,A_NEXT ,TPS_InHostDomainSecond ,0 ,NULL },
589608{p_isdigit ,0 ,A_NEXT ,TPS_InHost ,0 ,NULL },
590609{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
591610};
592611
593- static TParserStateActionItem actionTPS_InHostDomenSecond []= {
612+ static TParserStateActionItem actionTPS_InHostDomainSecond []= {
594613{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
595- {p_islatin ,0 ,A_NEXT ,TPS_InHostDomen ,0 ,NULL },
614+ {p_islatin ,0 ,A_NEXT ,TPS_InHostDomain ,0 ,NULL },
596615{p_isdigit ,0 ,A_PUSH ,TPS_InHost ,0 ,NULL },
597616{p_iseqC ,'-' ,A_PUSH ,TPS_InHostFirstAN ,0 ,NULL },
598- {p_iseqC ,'.' ,A_PUSH ,TPS_InHostFirstDomen ,0 ,NULL },
617+ {p_iseqC ,'.' ,A_PUSH ,TPS_InHostFirstDomain ,0 ,NULL },
599618{p_iseqC ,'@' ,A_PUSH ,TPS_InEmail ,0 ,NULL },
600619{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
601620};
602621
603- static TParserStateActionItem actionTPS_InHostDomen []= {
622+ static TParserStateActionItem actionTPS_InHostDomain []= {
604623{p_isEOF ,0 ,A_BINGO |A_CLRALL ,TPS_Base ,HOST ,NULL },
605- {p_islatin ,0 ,A_NEXT ,TPS_InHostDomen ,0 ,NULL },
624+ {p_islatin ,0 ,A_NEXT ,TPS_InHostDomain ,0 ,NULL },
606625{p_isdigit ,0 ,A_PUSH ,TPS_InHost ,0 ,NULL },
607626{p_iseqC ,':' ,A_PUSH ,TPS_InPortFirst ,0 ,NULL },
608627{p_iseqC ,'-' ,A_PUSH ,TPS_InHostFirstAN ,0 ,NULL },
609- {p_iseqC ,'.' ,A_PUSH ,TPS_InHostFirstDomen ,0 ,NULL },
628+ {p_iseqC ,'.' ,A_PUSH ,TPS_InHostFirstDomain ,0 ,NULL },
610629{p_iseqC ,'@' ,A_PUSH ,TPS_InEmail ,0 ,NULL },
611630{p_isdigit ,0 ,A_POP ,TPS_Null ,0 ,NULL },
612631{p_isstophost ,0 ,A_BINGO |A_CLRALL ,TPS_InURIStart ,HOST ,NULL },
@@ -640,7 +659,7 @@ static TParserStateActionItem actionTPS_InHost[] = {
640659{p_isdigit ,0 ,A_NEXT ,TPS_InHost ,0 ,NULL },
641660{p_islatin ,0 ,A_NEXT ,TPS_InHost ,0 ,NULL },
642661{p_iseqC ,'@' ,A_PUSH ,TPS_InEmail ,0 ,NULL },
643- {p_iseqC ,'.' ,A_PUSH ,TPS_InHostFirstDomen ,0 ,NULL },
662+ {p_iseqC ,'.' ,A_PUSH ,TPS_InHostFirstDomain ,0 ,NULL },
644663{p_iseqC ,'-' ,A_PUSH ,TPS_InHostFirstAN ,0 ,NULL },
645664{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
646665};
@@ -652,14 +671,32 @@ static TParserStateActionItem actionTPS_InEmail[] = {
652671
653672static TParserStateActionItem actionTPS_InFileFirst []= {
654673{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
655- {p_islatin ,0 ,A_CLEAR ,TPS_InFile ,0 ,NULL },
656- {p_isdigit ,0 ,A_CLEAR ,TPS_InFile ,0 ,NULL },
657- {p_iseqC ,'.' ,A_CLEAR , TPS_InFile ,0 ,NULL },
658- {p_iseqC ,'_' ,A_CLEAR ,TPS_InFile ,0 ,NULL },
674+ {p_islatin ,0 ,A_NEXT ,TPS_InFile ,0 ,NULL },
675+ {p_isdigit ,0 ,A_NEXT ,TPS_InFile ,0 ,NULL },
676+ {p_iseqC ,'.' ,A_NEXT , TPS_InPathFirst ,0 ,NULL },
677+ {p_iseqC ,'_' ,A_NEXT ,TPS_InFile ,0 ,NULL },
659678{p_iseqC ,'?' ,A_PUSH ,TPS_InURIFirst ,0 ,NULL },
660679{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
661680};
662681
682+ static TParserStateActionItem actionTPS_InPathFirst []= {
683+ {p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
684+ {p_islatin ,0 ,A_NEXT ,TPS_InFile ,0 ,NULL },
685+ {p_isdigit ,0 ,A_NEXT ,TPS_InFile ,0 ,NULL },
686+ {p_iseqC ,'_' ,A_NEXT ,TPS_InFile ,0 ,NULL },
687+ {p_iseqC ,'.' ,A_NEXT ,TPS_InPathSecond ,0 ,NULL },
688+ {p_iseqC ,'/' ,A_NEXT ,TPS_InFileFirst ,0 ,NULL },
689+ {NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
690+ };
691+
692+ static TParserStateActionItem actionTPS_InPathSecond []= {
693+ {p_isEOF ,0 ,A_BINGO |A_CLEAR ,TPS_Base ,FILEPATH ,NULL },
694+ {p_iseqC ,'/' ,A_NEXT |A_PUSH ,TPS_InFileFirst ,0 ,NULL },
695+ {p_iseqC ,'/' ,A_BINGO |A_CLEAR ,TPS_Base ,FILEPATH ,NULL },
696+ {p_isspace ,0 ,A_BINGO |A_CLEAR ,TPS_Base ,FILEPATH ,NULL },
697+ {NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
698+ };
699+
663700static TParserStateActionItem actionTPS_InFile []= {
664701{p_isEOF ,0 ,A_BINGO ,TPS_Base ,FILEPATH ,NULL },
665702{p_islatin ,0 ,A_NEXT ,TPS_InFile ,0 ,NULL },
@@ -894,6 +931,7 @@ static const TParserStateAction Actions[] = {
894931{TPS_InHTMLEntityNum ,actionTPS_InHTMLEntityNum },
895932{TPS_InHTMLEntityEnd ,actionTPS_InHTMLEntityEnd },
896933{TPS_InTagFirst ,actionTPS_InTagFirst },
934+ {TPS_InXMLBegin ,actionTPS_InXMLBegin },
897935{TPS_InTagCloseFirst ,actionTPS_InTagCloseFirst },
898936{TPS_InTag ,actionTPS_InTag },
899937{TPS_InTagEscapeK ,actionTPS_InTagEscapeK },
@@ -906,15 +944,17 @@ static const TParserStateAction Actions[] = {
906944{TPS_InCloseCommentFirst ,actionTPS_InCloseCommentFirst },
907945{TPS_InCloseCommentLast ,actionTPS_InCloseCommentLast },
908946{TPS_InCommentEnd ,actionTPS_InCommentEnd },
909- {TPS_InHostFirstDomen , actionTPS_InHostFirstDomen },
910- {TPS_InHostDomenSecond , actionTPS_InHostDomenSecond },
911- {TPS_InHostDomen , actionTPS_InHostDomen },
947+ {TPS_InHostFirstDomain , actionTPS_InHostFirstDomain },
948+ {TPS_InHostDomainSecond , actionTPS_InHostDomainSecond },
949+ {TPS_InHostDomain , actionTPS_InHostDomain },
912950{TPS_InPortFirst ,actionTPS_InPortFirst },
913951{TPS_InPort ,actionTPS_InPort },
914952{TPS_InHostFirstAN ,actionTPS_InHostFirstAN },
915953{TPS_InHost ,actionTPS_InHost },
916954{TPS_InEmail ,actionTPS_InEmail },
917955{TPS_InFileFirst ,actionTPS_InFileFirst },
956+ {TPS_InPathFirst ,actionTPS_InPathFirst },
957+ {TPS_InPathSecond ,actionTPS_InPathSecond },
918958{TPS_InFile ,actionTPS_InFile },
919959{TPS_InFileNext ,actionTPS_InFileNext },
920960{TPS_InURIFirst ,actionTPS_InURIFirst },