Jan 9, 2021 · Sep 20, 2021
diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py

 from .constants import spaceCharacters
 from .constants import entities
 from .constants import asciiLetters, asciiUpper2Lower
 from .constants import asciiLetters
 from .constants import digits, hexDigits, EOF
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters
        token = self.currentToken
        # Add token to the queue to be yielded
        if (token["type"] in tagTokenTypes):
            token["name"] = token["name"].translate(asciiUpper2Lower)
            token["name"] = token["name"].lower()
            if token["type"] == tokenTypes["StartTag"]:
                raw = token["data"]
                data = attributeMap(raw)
            # start tag token is emitted so values can still be safely appended
            # to attributes, but we do want to report the parse error in time.
            self.currentToken["data"][-1][0] = (
                self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
                self.currentToken["data"][-1][0].lower())
            for name, _ in self.currentToken["data"][:-1]:
                if self.currentToken["data"][-1][0] == name:
                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
    def doctypeNameState(self):
        data = self.stream.char()
        if data in spaceCharacters:
            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
            self.currentToken["name"] = self.currentToken["name"].lower()
            self.state = self.afterDoctypeNameState
        elif data == ">":
            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
            self.currentToken["name"] = self.currentToken["name"].lower()
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        elif data == "\u0000":
            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
                                    "eof-in-doctype-name"})
            self.currentToken["correct"] = False
            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
            self.currentToken["name"] = self.currentToken["name"].lower()
            self.tokenQueue.append(self.currentToken)
            self.state = self.dataState
        else:
diff --git a/html5lib/constants.py b/html5lib/constants.py
    "tr"
 ])

 asciiLowercase = frozenset(string.ascii_lowercase)
 asciiUppercase = frozenset(string.ascii_uppercase)
 asciiLetters = frozenset(string.ascii_letters)
 digits = frozenset(string.digits)
 hexDigits = frozenset(string.hexdigits)

 asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}

 # Heading elements need to be ordered
 headingElements = (
    "h1",
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py

 from . import _utils
 from .constants import (
    spaceCharacters, asciiUpper2Lower,
    spaceCharacters,
    specialElements, headingElements, cdataElements, rcdataElements,
    tokenTypes, tagTokenTypes,
    namespaces,
        if (element.name == "annotation-xml" and
                element.namespace == namespaces["mathml"]):
            return ("encoding" in element.attributes and
                    element.attributes["encoding"].translate(
                        asciiUpper2Lower) in
                    element.attributes["encoding"].lower() in
                    ("text/html", "application/xhtml+xml"))
        else:
            return (element.namespace, element.name) in htmlIntegrationPointElements
            self.tree.insertDoctype(token)

            if publicId != "":
                publicId = publicId.translate(asciiUpper2Lower)
                publicId = publicId.lower()

            if (not correct or token["name"] != "html" or
                    publicId.startswith(
            framesetOK = self.parser.framesetOK
            self.startTagVoidFormatting(token)
            if ("type" in token["data"] and
                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
                    token["data"]["type"].lower() == "hidden"):
                # input type=hidden doesn't change framesetOK
                self.parser.framesetOK = framesetOK


        def startTagInput(self, token):
            if ("type" in token["data"] and
                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
                    token["data"]["type"].lower() == "hidden"):
                self.parser.parseError("unexpected-hidden-input-in-table")
                self.tree.insertElement(token)
                # XXX associate with form
        def processEndTag(self, token):
            nodeIndex = len(self.tree.openElements) - 1
            node = self.tree.openElements[-1]
            if node.name.translate(asciiUpper2Lower) != token["name"]:
            if node.name.lower() != token["name"]:
                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

            while True:
                if node.name.translate(asciiUpper2Lower) == token["name"]:
                if node.name.lower() == token["name"]:
                    # XXX this isn't in the spec but it seems necessary
                    if self.parser.phase == self.parser.phases["inTableText"]:
                        self.parser.phase.flushCharacters()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,7 +7,7 @@

		from .constants import spaceCharacters
		from .constants import entities
		from .constants import asciiLetters, asciiUpper2Lower
		from .constants import asciiLetters
		from .constants import digits, hexDigits, EOF
		from .constants import tokenTypes, tagTokenTypes
		from .constants import replacementCharacters
Expand DownExpand Up		@@ -233,7 +233,7 @@ def emitCurrentToken(self):
		token = self.currentToken
		# Add token to the queue to be yielded
		if (token["type"] in tagTokenTypes):
		token["name"] = token["name"].translate(asciiUpper2Lower)
		token["name"] = token["name"].lower()
		if token["type"] == tokenTypes["StartTag"]:
		raw = token["data"]
		data = attributeMap(raw)
Expand DownExpand Up		@@ -927,7 +927,7 @@ def attributeNameState(self):
		# start tag token is emitted so values can still be safely appended
		# to attributes, but we do want to report the parse error in time.
		self.currentToken["data"][-1][0] = (
		self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
		self.currentToken["data"][-1][0].lower())
		for name, _ in self.currentToken["data"][:-1]:
		if self.currentToken["data"][-1][0] == name:
		self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
Expand DownExpand Up		@@ -1348,10 +1348,10 @@ def beforeDoctypeNameState(self):
		def doctypeNameState(self):
		data = self.stream.char()
		if data in spaceCharacters:
		self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
		self.currentToken["name"] = self.currentToken["name"].lower()
		self.state = self.afterDoctypeNameState
		elif data == ">":
		self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
		self.currentToken["name"] = self.currentToken["name"].lower()
		self.tokenQueue.append(self.currentToken)
		self.state = self.dataState
		elif data == "\u0000":
Expand All		@@ -1363,7 +1363,7 @@ def doctypeNameState(self):
		self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
		"eof-in-doctype-name"})
		self.currentToken["correct"] = False
		self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
		self.currentToken["name"] = self.currentToken["name"].lower()
		self.tokenQueue.append(self.currentToken)
		self.state = self.dataState
		else:
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -538,14 +538,11 @@
		"tr"
		])

		asciiLowercase = frozenset(string.ascii_lowercase)
		asciiUppercase = frozenset(string.ascii_uppercase)
		asciiLetters = frozenset(string.ascii_letters)
		digits = frozenset(string.digits)
		hexDigits = frozenset(string.hexdigits)

		asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}

		# Heading elements need to be ordered
		headingElements = (
		"h1",
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,7 +11,7 @@

		from . import _utils
		from .constants import (
		spaceCharacters, asciiUpper2Lower,
		spaceCharacters,
		specialElements, headingElements, cdataElements, rcdataElements,
		tokenTypes, tagTokenTypes,
		namespaces,
Expand DownExpand Up		@@ -183,8 +183,7 @@ def isHTMLIntegrationPoint(self, element):
		if (element.name == "annotation-xml" and
		element.namespace == namespaces["mathml"]):
		return ("encoding" in element.attributes and
		element.attributes["encoding"].translate(
		asciiUpper2Lower) in
		element.attributes["encoding"].lower() in
		("text/html", "application/xhtml+xml"))
		else:
		return (element.namespace, element.name) in htmlIntegrationPointElements
Expand DownExpand Up		@@ -520,7 +519,7 @@ def processDoctype(self, token):
		self.tree.insertDoctype(token)

		if publicId != "":
		publicId = publicId.translate(asciiUpper2Lower)
		publicId = publicId.lower()

		if (not correct or token["name"] != "html" or
		publicId.startswith(
Expand DownExpand Up		@@ -1165,7 +1164,7 @@ def startTagInput(self, token):
		framesetOK = self.parser.framesetOK
		self.startTagVoidFormatting(token)
		if ("type" in token["data"] and
		token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
		token["data"]["type"].lower() == "hidden"):
		# input type=hidden doesn't change framesetOK
		self.parser.framesetOK = framesetOK

Expand DownExpand Up		@@ -1771,7 +1770,7 @@ def startTagStyleScript(self, token):

		def startTagInput(self, token):
		if ("type" in token["data"] and
		token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
		token["data"]["type"].lower() == "hidden"):
		self.parser.parseError("unexpected-hidden-input-in-table")
		self.tree.insertElement(token)
		# XXX associate with form
Expand DownExpand Up		@@ -2512,11 +2511,11 @@ def processStartTag(self, token):
		def processEndTag(self, token):
		nodeIndex = len(self.tree.openElements) - 1
		node = self.tree.openElements[-1]
		if node.name.translate(asciiUpper2Lower) != token["name"]:
		if node.name.lower() != token["name"]:
		self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

		while True:
		if node.name.translate(asciiUpper2Lower) == token["name"]:
		if node.name.lower() == token["name"]:
		# XXX this isn't in the spec but it seems necessary
		if self.parser.phase == self.parser.phases["inTableText"]:
		self.parser.phase.flushCharacters()
Expand Down