Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit73215c5

Browse files
committed
Merge pull request#222 from gsnedders/lint_fixes
Various fixes for the lint filter, and use it to validate treewalker sanity in tests.
2 parentsaf0199c +ca6591c commit73215c5

File tree

5 files changed

+74
-132
lines changed

5 files changed

+74
-132
lines changed

‎html5lib/filters/lint.py‎

Lines changed: 44 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,90 +1,77 @@
11
from __future__importabsolute_import,division,unicode_literals
22

3+
fromsiximporttext_type
4+
35
from .import_base
4-
from ..constantsimportcdataElements,rcdataElements,voidElements
6+
from ..constantsimportnamespaces,voidElements
57

68
from ..constantsimportspaceCharacters
79
spaceCharacters="".join(spaceCharacters)
810

911

10-
classLintError(Exception):
11-
pass
12-
13-
1412
classFilter(_base.Filter):
1513
def__iter__(self):
1614
open_elements= []
17-
contentModelFlag="PCDATA"
1815
fortokenin_base.Filter.__iter__(self):
1916
type=token["type"]
2017
iftypein ("StartTag","EmptyTag"):
18+
namespace=token["namespace"]
2119
name=token["name"]
22-
ifcontentModelFlag!="PCDATA":
23-
raiseLintError("StartTag not in PCDATA content model flag: %(tag)s"% {"tag":name})
24-
ifnotisinstance(name,str):
25-
raiseLintError("Tag name is not a string: %(tag)r"% {"tag":name})
26-
ifnotname:
27-
raiseLintError("Empty tag name")
28-
iftype=="StartTag"andnameinvoidElements:
29-
raiseLintError("Void element reported as StartTag token: %(tag)s"% {"tag":name})
30-
eliftype=="EmptyTag"andnamenotinvoidElements:
31-
raiseLintError("Non-void element reported as EmptyTag token: %(tag)s"% {"tag":token["name"]})
20+
assertnamespaceisNoneorisinstance(namespace,text_type)
21+
assertnamespace!=""
22+
assertisinstance(name,text_type)
23+
assertname!=""
24+
assertisinstance(token["data"],dict)
25+
if (notnamespaceornamespace==namespaces["html"])andnameinvoidElements:
26+
asserttype=="EmptyTag"
27+
else:
28+
asserttype=="StartTag"
3229
iftype=="StartTag":
33-
open_elements.append(name)
34-
forname,valueintoken["data"]:
35-
ifnotisinstance(name,str):
36-
raiseLintError("Attribute name is not a string: %(name)r"% {"name":name})
37-
ifnotname:
38-
raiseLintError("Empty attribute name")
39-
ifnotisinstance(value,str):
40-
raiseLintError("Attribute value is not a string: %(value)r"% {"value":value})
41-
ifnameincdataElements:
42-
contentModelFlag="CDATA"
43-
elifnameinrcdataElements:
44-
contentModelFlag="RCDATA"
45-
elifname=="plaintext":
46-
contentModelFlag="PLAINTEXT"
30+
open_elements.append((namespace,name))
31+
for (namespace,name),valueintoken["data"].items():
32+
assertnamespaceisNoneorisinstance(namespace,text_type)
33+
assertnamespace!=""
34+
assertisinstance(name,text_type)
35+
assertname!=""
36+
assertisinstance(value,text_type)
4737

4838
eliftype=="EndTag":
39+
namespace=token["namespace"]
4940
name=token["name"]
50-
ifnotisinstance(name,str):
51-
raiseLintError("Tag name is not a string: %(tag)r"% {"tag":name})
52-
ifnotname:
53-
raiseLintError("Empty tag name")
54-
ifnameinvoidElements:
55-
raiseLintError("Void element reported as EndTag token: %(tag)s"% {"tag":name})
56-
start_name=open_elements.pop()
57-
ifstart_name!=name:
58-
raiseLintError("EndTag (%(end)s) does not match StartTag (%(start)s)"% {"end":name,"start":start_name})
59-
contentModelFlag="PCDATA"
41+
assertnamespaceisNoneorisinstance(namespace,text_type)
42+
assertnamespace!=""
43+
assertisinstance(name,text_type)
44+
assertname!=""
45+
if (notnamespaceornamespace==namespaces["html"])andnameinvoidElements:
46+
assertFalse,"Void element reported as EndTag token: %(tag)s"% {"tag":name}
47+
else:
48+
start=open_elements.pop()
49+
assertstart== (namespace,name)
6050

6151
eliftype=="Comment":
62-
ifcontentModelFlag!="PCDATA":
63-
raiseLintError("Comment not in PCDATA content model flag")
52+
data=token["data"]
53+
assertisinstance(data,text_type)
6454

6555
eliftypein ("Characters","SpaceCharacters"):
6656
data=token["data"]
67-
ifnotisinstance(data,str):
68-
raiseLintError("Attribute name is not a string: %(name)r"% {"name":data})
69-
ifnotdata:
70-
raiseLintError("%(type)s token with empty data"% {"type":type})
57+
assertisinstance(data,text_type)
58+
assertdata!=""
7159
iftype=="SpaceCharacters":
72-
data=data.strip(spaceCharacters)
73-
ifdata:
74-
raiseLintError("Non-space character(s) found in SpaceCharacters token: %(token)r"% {"token":data})
60+
assertdata.strip(spaceCharacters)==""
7561

7662
eliftype=="Doctype":
7763
name=token["name"]
78-
ifcontentModelFlag!="PCDATA":
79-
raiseLintError("Doctype not in PCDATA content model flag: %(name)s"% {"name":name})
80-
ifnotisinstance(name,str):
81-
raiseLintError("Tag name is not a string: %(tag)r"% {"tag":name})
82-
# XXX: what to do with token["data"] ?
64+
assertnameisNoneorisinstance(name,text_type)
65+
asserttoken["publicId"]isNoneorisinstance(name,text_type)
66+
asserttoken["systemId"]isNoneorisinstance(name,text_type)
67+
68+
eliftype=="Entity":
69+
assertisinstance(token["name"],text_type)
8370

84-
eliftypein ("ParseError","SerializeError"):
85-
pass
71+
eliftype=="SerializerError":
72+
assertisinstance(token["data"],text_type)
8673

8774
else:
88-
raiseLintError("Unknown token type: %(type)s"% {"type":type})
75+
assertFalse,"Unknown token type: %(type)s"% {"type":type}
8976

9077
yieldtoken

‎html5lib/tests/test_treewalkers.py‎

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from .supportimportget_data_files,TestData,convertExpected
1515

1616
fromhtml5libimporthtml5parser,treewalkers,treebuilders,treeadapters,constants
17+
fromhtml5lib.filters.lintimportFilterasLint
1718

1819

1920
treeTypes= {
@@ -77,21 +78,21 @@ def test_all_tokens(self):
7778
expected= [
7879
{'data': {},'type':'StartTag','namespace':'http://www.w3.org/1999/xhtml','name':'html'},
7980
{'data': {},'type':'StartTag','namespace':'http://www.w3.org/1999/xhtml','name':'head'},
80-
{'data': {},'type':'EndTag','namespace':'http://www.w3.org/1999/xhtml','name':'head'},
81+
{'type':'EndTag','namespace':'http://www.w3.org/1999/xhtml','name':'head'},
8182
{'data': {},'type':'StartTag','namespace':'http://www.w3.org/1999/xhtml','name':'body'},
8283
{'data':'a','type':'Characters'},
8384
{'data': {},'type':'StartTag','namespace':'http://www.w3.org/1999/xhtml','name':'div'},
8485
{'data':'b','type':'Characters'},
85-
{'data': {},'type':'EndTag','namespace':'http://www.w3.org/1999/xhtml','name':'div'},
86+
{'type':'EndTag','namespace':'http://www.w3.org/1999/xhtml','name':'div'},
8687
{'data':'c','type':'Characters'},
87-
{'data': {},'type':'EndTag','namespace':'http://www.w3.org/1999/xhtml','name':'body'},
88-
{'data': {},'type':'EndTag','namespace':'http://www.w3.org/1999/xhtml','name':'html'}
88+
{'type':'EndTag','namespace':'http://www.w3.org/1999/xhtml','name':'body'},
89+
{'type':'EndTag','namespace':'http://www.w3.org/1999/xhtml','name':'html'}
8990
]
9091
fortreeName,treeClsinsorted(treeTypes.items()):
9192
p=html5parser.HTMLParser(tree=treeCls["builder"])
9293
document=p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
9394
document=treeCls.get("adapter",lambdax:x)(document)
94-
output=treeCls["walker"](document)
95+
output=Lint(treeCls["walker"](document))
9596
forexpectedToken,outputTokeninzip(expected,output):
9697
self.assertEqual(expectedToken,outputToken)
9798

@@ -111,7 +112,7 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
111112

112113
document=treeClass.get("adapter",lambdax:x)(document)
113114
try:
114-
output=treewalkers.pprint(treeClass["walker"](document))
115+
output=treewalkers.pprint(Lint(treeClass["walker"](document)))
115116
output=attrlist.sub(sortattrs,output)
116117
expected=attrlist.sub(sortattrs,convertExpected(expected))
117118
diff="".join(unified_diff([line+"\n"forlineinexpected.splitlines()],

‎html5lib/treewalkers/_base.py‎

Lines changed: 17 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
from __future__importabsolute_import,division,unicode_literals
2-
fromsiximporttext_type,string_types
32

43
fromxml.domimportNode
5-
from ..constantsimportvoidElements,spaceCharacters
4+
from ..constantsimportnamespaces,voidElements,spaceCharacters
65

76
__all__= ["DOCUMENT","DOCTYPE","TEXT","ELEMENT","COMMENT","ENTITY","UNKNOWN",
87
"TreeWalker","NonRecursiveTreeWalker"]
@@ -18,24 +17,6 @@
1817
spaceCharacters="".join(spaceCharacters)
1918

2019

21-
defto_text(s,blank_if_none=True):
22-
"""Wrapper around six.text_type to convert None to empty string"""
23-
ifsisNone:
24-
ifblank_if_none:
25-
return""
26-
else:
27-
returnNone
28-
elifisinstance(s,text_type):
29-
returns
30-
else:
31-
returntext_type(s)
32-
33-
34-
defis_text_or_none(string):
35-
"""Wrapper around isinstance(string_types) or is None"""
36-
returnstringisNoneorisinstance(string,string_types)
37-
38-
3920
classTreeWalker(object):
4021
def__init__(self,tree):
4122
self.tree=tree
@@ -47,47 +28,25 @@ def error(self, msg):
4728
return {"type":"SerializeError","data":msg}
4829

4930
defemptyTag(self,namespace,name,attrs,hasChildren=False):
50-
assertnamespaceisNoneorisinstance(namespace,string_types),type(namespace)
51-
assertisinstance(name,string_types),type(name)
52-
assertall((namespaceisNoneorisinstance(namespace,string_types))and
53-
isinstance(name,string_types)and
54-
isinstance(value,string_types)
55-
for (namespace,name),valueinattrs.items())
56-
57-
yield {"type":"EmptyTag","name":to_text(name,False),
58-
"namespace":to_text(namespace),
31+
yield {"type":"EmptyTag","name":name,
32+
"namespace":namespace,
5933
"data":attrs}
6034
ifhasChildren:
6135
yieldself.error("Void element has children")
6236

6337
defstartTag(self,namespace,name,attrs):
64-
assertnamespaceisNoneorisinstance(namespace,string_types),type(namespace)
65-
assertisinstance(name,string_types),type(name)
66-
assertall((namespaceisNoneorisinstance(namespace,string_types))and
67-
isinstance(name,string_types)and
68-
isinstance(value,string_types)
69-
for (namespace,name),valueinattrs.items())
70-
7138
return {"type":"StartTag",
72-
"name":text_type(name),
73-
"namespace":to_text(namespace),
74-
"data":dict(((to_text(namespace,False),to_text(name)),
75-
to_text(value,False))
76-
for (namespace,name),valueinattrs.items())}
39+
"name":name,
40+
"namespace":namespace,
41+
"data":attrs}
7742

7843
defendTag(self,namespace,name):
79-
assertnamespaceisNoneorisinstance(namespace,string_types),type(namespace)
80-
assertisinstance(name,string_types),type(namespace)
81-
8244
return {"type":"EndTag",
83-
"name":to_text(name,False),
84-
"namespace":to_text(namespace),
85-
"data": {}}
45+
"name":name,
46+
"namespace":namespace}
8647

8748
deftext(self,data):
88-
assertisinstance(data,string_types),type(data)
89-
90-
data=to_text(data)
49+
data=data
9150
middle=data.lstrip(spaceCharacters)
9251
left=data[:len(data)-len(middle)]
9352
ifleft:
@@ -101,25 +60,16 @@ def text(self, data):
10160
yield {"type":"SpaceCharacters","data":right}
10261

10362
defcomment(self,data):
104-
assertisinstance(data,string_types),type(data)
105-
106-
return {"type":"Comment","data":text_type(data)}
107-
108-
defdoctype(self,name,publicId=None,systemId=None,correct=True):
109-
assertis_text_or_none(name),type(name)
110-
assertis_text_or_none(publicId),type(publicId)
111-
assertis_text_or_none(systemId),type(systemId)
63+
return {"type":"Comment","data":data}
11264

65+
defdoctype(self,name,publicId=None,systemId=None):
11366
return {"type":"Doctype",
114-
"name":to_text(name),
115-
"publicId":to_text(publicId),
116-
"systemId":to_text(systemId),
117-
"correct":to_text(correct)}
67+
"name":name,
68+
"publicId":publicId,
69+
"systemId":systemId}
11870

11971
defentity(self,name):
120-
assertisinstance(name,string_types),type(name)
121-
122-
return {"type":"Entity","name":text_type(name)}
72+
return {"type":"Entity","name":name}
12373

12474
defunknown(self,nodeType):
12575
returnself.error("Unknown node type: "+nodeType)
@@ -154,7 +104,7 @@ def __iter__(self):
154104

155105
eliftype==ELEMENT:
156106
namespace,name,attributes,hasChildren=details
157-
ifnameinvoidElements:
107+
if(notnamespaceornamespace==namespaces["html"])andnameinvoidElements:
158108
fortokeninself.emptyTag(namespace,name,attributes,
159109
hasChildren):
160110
yieldtoken
@@ -187,7 +137,7 @@ def __iter__(self):
187137
type,details=details[0],details[1:]
188138
iftype==ELEMENT:
189139
namespace,name,attributes,hasChildren=details
190-
ifnamenotinvoidElements:
140+
if(namespaceandnamespace!=namespaces["html"])ornamenotinvoidElements:
191141
yieldself.endTag(namespace,name)
192142
ifself.treeiscurrentNode:
193143
currentNode=None

‎html5lib/treewalkers/genshistream.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def tokens(self, event, next):
4848
elifkind==END:
4949
name=data.localname
5050
namespace=data.namespace
51-
ifnamenotinvoidElements:
51+
ifnamespace!=namespaces["html"]ornamenotinvoidElements:
5252
yieldself.endTag(namespace,name)
5353

5454
elifkind==COMMENT:

‎html5lib/treewalkers/lxmletree.py‎

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,10 @@ def __len__(self):
118118
classTreeWalker(_base.NonRecursiveTreeWalker):
119119
def__init__(self,tree):
120120
ifhasattr(tree,"getroot"):
121+
self.fragmentChildren=set()
121122
tree=Root(tree)
122123
elifisinstance(tree,list):
124+
self.fragmentChildren=set(tree)
123125
tree=FragmentRoot(tree)
124126
_base.NonRecursiveTreeWalker.__init__(self,tree)
125127
self.filter=ihatexml.InfosetFilter()
@@ -137,7 +139,7 @@ def getNodeDetails(self, node):
137139
return_base.DOCTYPE,node.name,node.public_id,node.system_id
138140

139141
elifisinstance(node,FragmentWrapper)andnothasattr(node,"tag"):
140-
return_base.TEXT,node.obj
142+
return_base.TEXT,ensure_str(node.obj)
141143

142144
elifnode.tag==etree.Comment:
143145
return_base.COMMENT,ensure_str(node.text)
@@ -197,5 +199,7 @@ def getParentNode(self, node):
197199
ifkey=="text":
198200
returnnode
199201
# else: fallback to "normal" processing
202+
elifnodeinself.fragmentChildren:
203+
returnNone
200204

201205
returnnode.getparent()

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp