NotificationsYou must be signed in to change notification settings
Fork0
Star0

Commitbd57c61

committed

Get Python tokenizer up to the version of the spec of the end of September.

1 parent7d29315 commitbd57c61Copy full SHA for bd57c61

File tree

1 file changed

+75

-5

lines changed

src/html5lib
- tokenizer.py

1 file changed

+75

-5

lines changed

`‎src/html5lib/tokenizer.py`

Lines changed: 75 additions & 5 deletions

Original file line number	Diff line number	Diff line change
`@@ -955,7 +955,7 @@ def afterDoctypeNameState(self):`
`955`	`955`	`matched=False`
`956`	`956`	`break`
`957`	`957`	`ifmatched:`
`958`		`-self.state=self.beforeDoctypePublicIdentifierState`
	`958`	`+self.state=self.afterDoctypePublicKeywordState`
`959`	`959`	`returnTrue`
`960`	`960`	`elifdatain (u"s",u"S"):`
`961`	`961`	`matched=True`
`@@ -966,7 +966,7 @@ def afterDoctypeNameState(self):`
`966`	`966`	`matched=False`
`967`	`967`	`break`
`968`	`968`	`ifmatched:`
`969`		`-self.state=self.beforeDoctypeSystemIdentifierState`
	`969`	`+self.state=self.afterDoctypeSystemKeywordState`
`970`	`970`	`returnTrue`
`971`	`971`
`972`	`972`	`# All the characters read before the current 'data' will be`
`@@ -981,6 +981,26 @@ def afterDoctypeNameState(self):`
`981`	`981`	`self.state=self.bogusDoctypeState`
`982`	`982`
`983`	`983`	`returnTrue`
	`984`	`+`
	`985`	`+defafterDoctypePublicKeywordState(self):`
	`986`	`+data=self.stream.char()`
	`987`	`+ifdatainspaceCharacters:`
	`988`	`+self.state=self.beforeDoctypePublicIdentifierState`
	`989`	`+elifdatain ("'",'"'):`
	`990`	`+self.tokenQueue.append({"type":tokenTypes["ParseError"],"data":`
	`991`	`+"unexpected-char-in-doctype"})`
	`992`	`+self.stream.unget(data)`
	`993`	`+self.state=self.beforeDoctypePublicIdentifierState`
	`994`	`+elifdataisEOF:`
	`995`	`+self.tokenQueue.append({"type":tokenTypes["ParseError"],"data":`
	`996`	`+"eof-in-doctype"})`
	`997`	`+self.currentToken["correct"]=False`
	`998`	`+self.tokenQueue.append(self.currentToken)`
	`999`	`+self.state=self.dataState`
	`1000`	`+else:`
	`1001`	`+self.stream.unget(data)`
	`1002`	`+self.state=self.beforeDoctypePublicIdentifierState`
	`1003`	`+returnTrue`
`984`	`1004`
`985`	`1005`	`defbeforeDoctypePublicIdentifierState(self):`
`986`	`1006`	`data=self.stream.char()`
`@@ -1054,17 +1074,47 @@ def doctypePublicIdentifierSingleQuotedState(self):`
`1054`	`1074`	`defafterDoctypePublicIdentifierState(self):`
`1055`	`1075`	`data=self.stream.char()`
`1056`	`1076`	`ifdatainspaceCharacters:`
`1057`		`-pass`
`1058`		`-elifdata=="\"":`
	`1077`	`+self.state=self.betweenDoctypePublicAndSystemIdentifiersState`
	`1078`	`+elifdata==">":`
	`1079`	`+self.tokenQueue.append(self.currentToken)`
	`1080`	`+self.state=self.dataState`
	`1081`	`+elifdata=='"':`
	`1082`	`+self.tokenQueue.append({"type":tokenTypes["ParseError"],"data":`
	`1083`	`+"unexpected-char-in-doctype"})`
`1059`	`1084`	`self.currentToken["systemId"]=u""`
`1060`	`1085`	`self.state=self.doctypeSystemIdentifierDoubleQuotedState`
`1061`	`1086`	`elifdata=="'":`
	`1087`	`+self.tokenQueue.append({"type":tokenTypes["ParseError"],"data":`
	`1088`	`+"unexpected-char-in-doctype"})`
`1062`	`1089`	`self.currentToken["systemId"]=u""`
`1063`	`1090`	`self.state=self.doctypeSystemIdentifierSingleQuotedState`
	`1091`	`+elifdataisEOF:`
	`1092`	`+self.tokenQueue.append({"type":tokenTypes["ParseError"],"data":`
	`1093`	`+"eof-in-doctype"})`
	`1094`	`+self.currentToken["correct"]=False`
	`1095`	`+self.tokenQueue.append(self.currentToken)`
	`1096`	`+self.state=self.dataState`
	`1097`	`+else:`
	`1098`	`+self.tokenQueue.append({"type":tokenTypes["ParseError"],"data":`
	`1099`	`+"unexpected-char-in-doctype"})`
	`1100`	`+self.currentToken["correct"]=False`
	`1101`	`+self.state=self.bogusDoctypeState`
	`1102`	`+returnTrue`
	`1103`	`+`
	`1104`	`+defbetweenDoctypePublicAndSystemIdentifiersState(self):`
	`1105`	`+data=self.stream.char()`
	`1106`	`+ifdatainspaceCharacters:`
	`1107`	`+pass`
`1064`	`1108`	`elifdata==">":`
`1065`	`1109`	`self.tokenQueue.append(self.currentToken)`
`1066`	`1110`	`self.state=self.dataState`
`1067`		`-elifdataisEOF:`
	`1111`	`+elifdata=='"':`
	`1112`	`+self.currentToken["systemId"]=u""`
	`1113`	`+self.state=self.doctypeSystemIdentifierDoubleQuotedState`
	`1114`	`+elifdata=="'":`
	`1115`	`+self.currentToken["systemId"]=u""`
	`1116`	`+self.state=self.doctypeSystemIdentifierSingleQuotedState`
	`1117`	`+elifdata==EOF:`
`1068`	`1118`	`self.tokenQueue.append({"type":tokenTypes["ParseError"],"data":`
`1069`	`1119`	`"eof-in-doctype"})`
`1070`	`1120`	`self.currentToken["correct"]=False`
`@@ -1077,6 +1127,26 @@ def afterDoctypePublicIdentifierState(self):`
`1077`	`1127`	`self.state=self.bogusDoctypeState`
`1078`	`1128`	`returnTrue`
`1079`	`1129`
	`1130`	`+defafterDoctypeSystemKeywordState(self):`
	`1131`	`+data=self.stream.char()`
	`1132`	`+ifdatainspaceCharacters:`
	`1133`	`+self.state=self.beforeDoctypeSystemIdentifierState`
	`1134`	`+elifdatain ("'",'"'):`
	`1135`	`+self.tokenQueue.append({"type":tokenTypes["ParseError"],"data":`
	`1136`	`+"unexpected-char-in-doctype"})`
	`1137`	`+self.stream.unget(data)`
	`1138`	`+self.state=self.beforeDoctypeSystemIdentifierState`
	`1139`	`+elifdataisEOF:`
	`1140`	`+self.tokenQueue.append({"type":tokenTypes["ParseError"],"data":`
	`1141`	`+"eof-in-doctype"})`
	`1142`	`+self.currentToken["correct"]=False`
	`1143`	`+self.tokenQueue.append(self.currentToken)`
	`1144`	`+self.state=self.dataState`
	`1145`	`+else:`
	`1146`	`+self.stream.unget(data)`
	`1147`	`+self.state=self.beforeDoctypeSystemIdentifierState`
	`1148`	`+returnTrue`
	`1149`	`+`
`1080`	`1150`	`defbeforeDoctypeSystemIdentifierState(self):`
`1081`	`1151`	`data=self.stream.char()`
`1082`	`1152`	`ifdatainspaceCharacters:`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitbd57c61

File tree

1 file changed

1 file changed

`‎src/html5lib/tokenizer.py`

0 commit comments