@@ -955,7 +955,7 @@ def afterDoctypeNameState(self):
955955matched = False
956956break
957957if matched :
958- self .state = self .beforeDoctypePublicIdentifierState
958+ self .state = self .afterDoctypePublicKeywordState
959959return True
960960elif data in (u"s" ,u"S" ):
961961matched = True
@@ -966,7 +966,7 @@ def afterDoctypeNameState(self):
966966matched = False
967967break
968968if matched :
969- self .state = self .beforeDoctypeSystemIdentifierState
969+ self .state = self .afterDoctypeSystemKeywordState
970970return True
971971
972972# All the characters read before the current 'data' will be
@@ -981,6 +981,26 @@ def afterDoctypeNameState(self):
981981self .state = self .bogusDoctypeState
982982
983983return True
984+
985+ def afterDoctypePublicKeywordState (self ):
986+ data = self .stream .char ()
987+ if data in spaceCharacters :
988+ self .state = self .beforeDoctypePublicIdentifierState
989+ elif data in ("'" ,'"' ):
990+ self .tokenQueue .append ({"type" :tokenTypes ["ParseError" ],"data" :
991+ "unexpected-char-in-doctype" })
992+ self .stream .unget (data )
993+ self .state = self .beforeDoctypePublicIdentifierState
994+ elif data is EOF :
995+ self .tokenQueue .append ({"type" :tokenTypes ["ParseError" ],"data" :
996+ "eof-in-doctype" })
997+ self .currentToken ["correct" ]= False
998+ self .tokenQueue .append (self .currentToken )
999+ self .state = self .dataState
1000+ else :
1001+ self .stream .unget (data )
1002+ self .state = self .beforeDoctypePublicIdentifierState
1003+ return True
9841004
9851005def beforeDoctypePublicIdentifierState (self ):
9861006data = self .stream .char ()
@@ -1054,17 +1074,47 @@ def doctypePublicIdentifierSingleQuotedState(self):
10541074def afterDoctypePublicIdentifierState (self ):
10551075data = self .stream .char ()
10561076if data in spaceCharacters :
1057- pass
1058- elif data == "\" " :
1077+ self .state = self .betweenDoctypePublicAndSystemIdentifiersState
1078+ elif data == ">" :
1079+ self .tokenQueue .append (self .currentToken )
1080+ self .state = self .dataState
1081+ elif data == '"' :
1082+ self .tokenQueue .append ({"type" :tokenTypes ["ParseError" ],"data" :
1083+ "unexpected-char-in-doctype" })
10591084self .currentToken ["systemId" ]= u""
10601085self .state = self .doctypeSystemIdentifierDoubleQuotedState
10611086elif data == "'" :
1087+ self .tokenQueue .append ({"type" :tokenTypes ["ParseError" ],"data" :
1088+ "unexpected-char-in-doctype" })
10621089self .currentToken ["systemId" ]= u""
10631090self .state = self .doctypeSystemIdentifierSingleQuotedState
1091+ elif data is EOF :
1092+ self .tokenQueue .append ({"type" :tokenTypes ["ParseError" ],"data" :
1093+ "eof-in-doctype" })
1094+ self .currentToken ["correct" ]= False
1095+ self .tokenQueue .append (self .currentToken )
1096+ self .state = self .dataState
1097+ else :
1098+ self .tokenQueue .append ({"type" :tokenTypes ["ParseError" ],"data" :
1099+ "unexpected-char-in-doctype" })
1100+ self .currentToken ["correct" ]= False
1101+ self .state = self .bogusDoctypeState
1102+ return True
1103+
1104+ def betweenDoctypePublicAndSystemIdentifiersState (self ):
1105+ data = self .stream .char ()
1106+ if data in spaceCharacters :
1107+ pass
10641108elif data == ">" :
10651109self .tokenQueue .append (self .currentToken )
10661110self .state = self .dataState
1067- elif data is EOF :
1111+ elif data == '"' :
1112+ self .currentToken ["systemId" ]= u""
1113+ self .state = self .doctypeSystemIdentifierDoubleQuotedState
1114+ elif data == "'" :
1115+ self .currentToken ["systemId" ]= u""
1116+ self .state = self .doctypeSystemIdentifierSingleQuotedState
1117+ elif data == EOF :
10681118self .tokenQueue .append ({"type" :tokenTypes ["ParseError" ],"data" :
10691119"eof-in-doctype" })
10701120self .currentToken ["correct" ]= False
@@ -1077,6 +1127,26 @@ def afterDoctypePublicIdentifierState(self):
10771127self .state = self .bogusDoctypeState
10781128return True
10791129
1130+ def afterDoctypeSystemKeywordState (self ):
1131+ data = self .stream .char ()
1132+ if data in spaceCharacters :
1133+ self .state = self .beforeDoctypeSystemIdentifierState
1134+ elif data in ("'" ,'"' ):
1135+ self .tokenQueue .append ({"type" :tokenTypes ["ParseError" ],"data" :
1136+ "unexpected-char-in-doctype" })
1137+ self .stream .unget (data )
1138+ self .state = self .beforeDoctypeSystemIdentifierState
1139+ elif data is EOF :
1140+ self .tokenQueue .append ({"type" :tokenTypes ["ParseError" ],"data" :
1141+ "eof-in-doctype" })
1142+ self .currentToken ["correct" ]= False
1143+ self .tokenQueue .append (self .currentToken )
1144+ self .state = self .dataState
1145+ else :
1146+ self .stream .unget (data )
1147+ self .state = self .beforeDoctypeSystemIdentifierState
1148+ return True
1149+
10801150def beforeDoctypeSystemIdentifierState (self ):
10811151data = self .stream .char ()
10821152if data in spaceCharacters :