Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Lint fixes#222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
gsnedders merged 16 commits intohtml5lib:masterfromgsnedders:lint_fixes
Jan 12, 2016
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
16 commits
Select commitHold shift + click to select a range
40c3ba6
Fix lint to expect text_type everywhere
gsneddersDec 16, 2015
fbbea1f
Update lint filter for namespaced attributes
gsneddersDec 16, 2015
8b4d7c4
Drop the content model requirements from lint
gsneddersDec 16, 2015
270a2ca
Don't let the lxml treewalker walk above the fragment root
gsneddersDec 16, 2015
66ef026
Teach lint & treewalkers that elements are only void in HTML ns
gsneddersDec 16, 2015
5bd3413
Use lint filter to ensure validity of treewalkers
gsneddersDec 16, 2015
fb9e177
Remove runtime type checks from treewalkers._base
gsneddersDec 16, 2015
2a5d7af
Make sure we have the unicode from of text in lxml fragment root
gsneddersDec 16, 2015
9eff304
Allow None as a doctype tagname in lint
gsneddersDec 16, 2015
e0ea899
Drop all the to_text magic in treewalkers._base
gsneddersDec 16, 2015
22c2b1a
Get rid of LintError and just use asserts
gsneddersDec 16, 2015
5336ebe
Lint that comments are text_type
gsneddersDec 16, 2015
dc879ff
Don't allow ParseError/SerializerError tokens, whatever they are!
gsneddersDec 16, 2015
7f8bd13
Drop end tag tree walker's data (always empty now)
gsneddersDec 16, 2015
c335295
Drop tree walker doctype correct flag, whatever that once was!
gsneddersDec 16, 2015
ca6591c
Make sure lint is testing everything treewalkers can do.
gsneddersDec 16, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 44 additions & 57 deletionshtml5lib/filters/lint.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,90 +1,77 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type

from . import _base
from ..constants importcdataElements, rcdataElements, voidElements
from ..constants importnamespaces, voidElements

from ..constants import spaceCharacters
spaceCharacters = "".join(spaceCharacters)


class LintError(Exception):
pass


class Filter(_base.Filter):
def __iter__(self):
open_elements = []
contentModelFlag = "PCDATA"
for token in _base.Filter.__iter__(self):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
namespace = token["namespace"]
name = token["name"]
if contentModelFlag != "PCDATA":
raise LintError("StartTag not in PCDATA content model flag: %(tag)s" % {"tag": name})
if not isinstance(name, str):
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
if not name:
raise LintError("Empty tag name")
if type == "StartTag" and name in voidElements:
raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name})
elif type == "EmptyTag" and name not in voidElements:
raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]})
assert namespace is None or isinstance(namespace, text_type)
assert namespace != ""
assert isinstance(name, text_type)
assert name != ""
assert isinstance(token["data"], dict)
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
assert type == "EmptyTag"
else:
assert type == "StartTag"
if type == "StartTag":
open_elements.append(name)
for name, value in token["data"]:
if not isinstance(name, str):
raise LintError("Attribute name is not a string: %(name)r" % {"name": name})
if not name:
raise LintError("Empty attribute name")
if not isinstance(value, str):
raise LintError("Attribute value is not a string: %(value)r" % {"value": value})
if name in cdataElements:
contentModelFlag = "CDATA"
elif name in rcdataElements:
contentModelFlag = "RCDATA"
elif name == "plaintext":
contentModelFlag = "PLAINTEXT"
open_elements.append((namespace, name))
for (namespace, name), value in token["data"].items():
assert namespace is None or isinstance(namespace, text_type)
assert namespace != ""
assert isinstance(name, text_type)
assert name != ""
assert isinstance(value, text_type)

elif type == "EndTag":
namespace = token["namespace"]
name = token["name"]
if not isinstance(name, str):
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
if not name:
raise LintError("Empty tag name")
if name in voidElements:
raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name})
start_name = open_elements.pop()
if start_name != name:
raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name})
contentModelFlag = "PCDATA"
assert namespace is None or isinstance(namespace, text_type)
assert namespace != ""
assert isinstance(name, text_type)
assert name != ""
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
else:
start = open_elements.pop()
assert start == (namespace, name)

elif type == "Comment":
if contentModelFlag != "PCDATA":
raise LintError("Comment not in PCDATA content model flag")
data = token["data"]
assert isinstance(data, text_type)

elif type in ("Characters", "SpaceCharacters"):
data = token["data"]
if not isinstance(data, str):
raise LintError("Attribute name is not a string: %(name)r" % {"name": data})
if not data:
raise LintError("%(type)s token with empty data" % {"type": type})
assert isinstance(data, text_type)
assert data != ""
if type == "SpaceCharacters":
data = data.strip(spaceCharacters)
if data:
raise LintError("Non-space character(s) found in SpaceCharacters token: %(token)r" % {"token": data})
assert data.strip(spaceCharacters) == ""

elif type == "Doctype":
name = token["name"]
if contentModelFlag != "PCDATA":
raise LintError("Doctype not in PCDATA content model flag: %(name)s" % {"name": name})
if not isinstance(name, str):
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
# XXX: what to do with token["data"] ?
assert name is None or isinstance(name, text_type)
assert token["publicId"] is None or isinstance(name, text_type)
assert token["systemId"] is None or isinstance(name, text_type)

elif type == "Entity":
assert isinstance(token["name"], text_type)

elif typein ("ParseError", "SerializeError"):
pass
elif type== "SerializerError":
assert isinstance(token["data"], text_type)

else:
raise LintError("Unknown token type: %(type)s" % {"type": type})
assert False,"Unknown token type: %(type)s" % {"type": type}

yield token
13 changes: 7 additions & 6 deletionshtml5lib/tests/test_treewalkers.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -14,6 +14,7 @@
from .support import get_data_files, TestData, convertExpected

from html5lib import html5parser, treewalkers, treebuilders, treeadapters, constants
from html5lib.filters.lint import Filter as Lint


treeTypes = {
Expand DownExpand Up@@ -77,21 +78,21 @@ def test_all_tokens(self):
expected = [
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
{'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
{'data': 'a', 'type': 'Characters'},
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
{'data': 'b', 'type': 'Characters'},
{'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
{'data': 'c', 'type': 'Characters'},
{'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
{'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
]
for treeName, treeCls in sorted(treeTypes.items()):
p = html5parser.HTMLParser(tree=treeCls["builder"])
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
document = treeCls.get("adapter", lambda x: x)(document)
output = treeCls["walker"](document)
output =Lint(treeCls["walker"](document))
for expectedToken, outputToken in zip(expected, output):
self.assertEqual(expectedToken, outputToken)

Expand All@@ -111,7 +112,7 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):

document = treeClass.get("adapter", lambda x: x)(document)
try:
output = treewalkers.pprint(treeClass["walker"](document))
output = treewalkers.pprint(Lint(treeClass["walker"](document)))
output = attrlist.sub(sortattrs, output)
expected = attrlist.sub(sortattrs, convertExpected(expected))
diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
Expand Down
84 changes: 17 additions & 67 deletionshtml5lib/treewalkers/_base.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type, string_types

from xml.dom import Node
from ..constants import voidElements, spaceCharacters
from ..constants importnamespaces,voidElements, spaceCharacters

__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
"TreeWalker", "NonRecursiveTreeWalker"]
Expand All@@ -18,24 +17,6 @@
spaceCharacters = "".join(spaceCharacters)


def to_text(s, blank_if_none=True):
"""Wrapper around six.text_type to convert None to empty string"""
if s is None:
if blank_if_none:
return ""
else:
return None
elif isinstance(s, text_type):
return s
else:
return text_type(s)


def is_text_or_none(string):
"""Wrapper around isinstance(string_types) or is None"""
return string is None or isinstance(string, string_types)


class TreeWalker(object):
def __init__(self, tree):
self.tree = tree
Expand All@@ -47,47 +28,25 @@ def error(self, msg):
return {"type": "SerializeError", "data": msg}

def emptyTag(self, namespace, name, attrs, hasChildren=False):
assert namespace is None or isinstance(namespace, string_types), type(namespace)
assert isinstance(name, string_types), type(name)
assert all((namespace is None or isinstance(namespace, string_types)) and
isinstance(name, string_types) and
isinstance(value, string_types)
for (namespace, name), value in attrs.items())

yield {"type": "EmptyTag", "name": to_text(name, False),
"namespace": to_text(namespace),
yield {"type": "EmptyTag", "name": name,
"namespace": namespace,
"data": attrs}
if hasChildren:
yield self.error("Void element has children")

def startTag(self, namespace, name, attrs):
assert namespace is None or isinstance(namespace, string_types), type(namespace)
assert isinstance(name, string_types), type(name)
assert all((namespace is None or isinstance(namespace, string_types)) and
isinstance(name, string_types) and
isinstance(value, string_types)
for (namespace, name), value in attrs.items())

return {"type": "StartTag",
"name": text_type(name),
"namespace": to_text(namespace),
"data": dict(((to_text(namespace, False), to_text(name)),
to_text(value, False))
for (namespace, name), value in attrs.items())}
"name": name,
"namespace": namespace,
"data": attrs}

def endTag(self, namespace, name):
assert namespace is None or isinstance(namespace, string_types), type(namespace)
assert isinstance(name, string_types), type(namespace)

return {"type": "EndTag",
"name": to_text(name, False),
"namespace": to_text(namespace),
"data": {}}
"name": name,
"namespace": namespace}

def text(self, data):
assert isinstance(data, string_types), type(data)

data = to_text(data)
data = data
middle = data.lstrip(spaceCharacters)
left = data[:len(data) - len(middle)]
if left:
Expand All@@ -101,25 +60,16 @@ def text(self, data):
yield {"type": "SpaceCharacters", "data": right}

def comment(self, data):
assert isinstance(data, string_types), type(data)

return {"type": "Comment", "data": text_type(data)}

def doctype(self, name, publicId=None, systemId=None, correct=True):
assert is_text_or_none(name), type(name)
assert is_text_or_none(publicId), type(publicId)
assert is_text_or_none(systemId), type(systemId)
return {"type": "Comment", "data": data}

def doctype(self, name, publicId=None, systemId=None):
return {"type": "Doctype",
"name": to_text(name),
"publicId": to_text(publicId),
"systemId": to_text(systemId),
"correct": to_text(correct)}
"name": name,
"publicId": publicId,
"systemId": systemId}

def entity(self, name):
assert isinstance(name, string_types), type(name)

return {"type": "Entity", "name": text_type(name)}
return {"type": "Entity", "name": name}

def unknown(self, nodeType):
return self.error("Unknown node type: " + nodeType)
Expand DownExpand Up@@ -154,7 +104,7 @@ def __iter__(self):

elif type == ELEMENT:
namespace, name, attributes, hasChildren = details
if name in voidElements:
if(not namespace or namespace == namespaces["html"]) andname in voidElements:
for token in self.emptyTag(namespace, name, attributes,
hasChildren):
yield token
Expand DownExpand Up@@ -187,7 +137,7 @@ def __iter__(self):
type, details = details[0], details[1:]
if type == ELEMENT:
namespace, name, attributes, hasChildren = details
if name not in voidElements:
if(namespace and namespace != namespaces["html"]) orname not in voidElements:
yield self.endTag(namespace, name)
if self.tree is currentNode:
currentNode = None
Expand Down
2 changes: 1 addition & 1 deletionhtml5lib/treewalkers/genshistream.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -48,7 +48,7 @@ def tokens(self, event, next):
elif kind == END:
name = data.localname
namespace = data.namespace
if name not in voidElements:
ifnamespace != namespaces["html"] orname not in voidElements:
yield self.endTag(namespace, name)

elif kind == COMMENT:
Expand Down
6 changes: 5 additions & 1 deletionhtml5lib/treewalkers/lxmletree.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -118,8 +118,10 @@ def __len__(self):
class TreeWalker(_base.NonRecursiveTreeWalker):
def __init__(self, tree):
if hasattr(tree, "getroot"):
self.fragmentChildren = set()
tree = Root(tree)
elif isinstance(tree, list):
self.fragmentChildren = set(tree)
tree = FragmentRoot(tree)
_base.NonRecursiveTreeWalker.__init__(self, tree)
self.filter = ihatexml.InfosetFilter()
Expand All@@ -137,7 +139,7 @@ def getNodeDetails(self, node):
return _base.DOCTYPE, node.name, node.public_id, node.system_id

elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
return _base.TEXT, node.obj
return _base.TEXT,ensure_str(node.obj)

elif node.tag == etree.Comment:
return _base.COMMENT, ensure_str(node.text)
Expand DownExpand Up@@ -197,5 +199,7 @@ def getParentNode(self, node):
if key == "text":
return node
# else: fallback to "normal" processing
elif node in self.fragmentChildren:
return None

return node.getparent()

[8]ページ先頭

©2009-2025 Movatter.jp