Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6a29350

Browse files
committed
Merge
2 parents0eacde6 +b88d31d commit6a29350

File tree

9 files changed

+277
-291
lines changed

9 files changed

+277
-291
lines changed

‎html5lib/constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,8 @@
483483
"area",
484484
"col",
485485
"input",
486-
"source"
486+
"source",
487+
"track"
487488
))
488489

489490
cdataElements=frozenset(('title','textarea'))

‎html5lib/html5parser.py

Lines changed: 126 additions & 101 deletions
Large diffs are not rendered by default.

‎html5lib/sanitizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,11 +245,11 @@ def sanitize_css(self, style):
245245

246246
classHTMLSanitizer(HTMLTokenizer,HTMLSanitizerMixin):
247247
def__init__(self,stream,encoding=None,parseMeta=True,useChardet=True,
248-
lowercaseElementName=False,lowercaseAttrName=False):
248+
lowercaseElementName=False,lowercaseAttrName=False,parser=None):
249249
#Change case matching defaults as we only output lowercase html anyway
250250
#This solution doesn't seem ideal...
251251
HTMLTokenizer.__init__(self,stream,encoding,parseMeta,useChardet,
252-
lowercaseElementName,lowercaseAttrName)
252+
lowercaseElementName,lowercaseAttrName,parser=parser)
253253

254254
def__iter__(self):
255255
fortokeninHTMLTokenizer.__iter__(self):

‎html5lib/tests/test_parser.py

Lines changed: 62 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
importsys
33
importtraceback
44
importStringIO
5-
importunittest
65
importwarnings
76
importre
87

98
warnings.simplefilter("error")
109

11-
fromsupportimporthtml5lib_test_files,TestData,convert,convertExpected
10+
fromsupportimporthtml5lib_test_filesasdata_files
11+
fromsupportimportTestData,convert,convertExpected
1212
importhtml5lib
1313
fromhtml5libimporthtml5parser,treebuilders,constants
1414

@@ -70,94 +70,71 @@ def convertTreeDump(data):
7070

7171
namespaceExpected=re.compile(r"^(\s*)<(\S+)>",re.M).sub
7272

73-
classTestCase(unittest.TestCase):
74-
defrunParserTest(self,innerHTML,input,expected,errors,treeClass,
75-
namespaceHTMLElements):
76-
#XXX - move this out into the setup function
77-
#concatenate all consecutive character tokens into a single token
78-
try:
79-
p=html5parser.HTMLParser(tree=treeClass,
80-
namespaceHTMLElements=namespaceHTMLElements)
81-
exceptconstants.DataLossWarning:
82-
return
83-
84-
try:
85-
ifinnerHTML:
86-
document=p.parseFragment(input,innerHTML)
87-
else:
88-
try:
89-
document=p.parse(input)
90-
exceptconstants.DataLossWarning:
91-
return
92-
except:
93-
errorMsg=u"\n".join([u"\n\nInput:",input,u"\nExpected:",expected,
94-
u"\nTraceback:",traceback.format_exc()])
95-
self.assertTrue(False,errorMsg.encode("utf8"))
96-
97-
output=convertTreeDump(p.tree.testSerializer(document))
98-
99-
expected=convertExpected(expected)
100-
ifnamespaceHTMLElements:
101-
expected=namespaceExpected(r"\1<html \2>",expected)
102-
103-
errorMsg=u"\n".join([u"\n\nInput:",input,u"\nExpected:",expected,
104-
u"\nReceived:",output])
105-
self.assertEquals(expected,output,errorMsg.encode("utf8"))
106-
errStr= [u"Line: %i Col: %i %s"%(line,col,
107-
constants.E[errorcode]%datavarsifisinstance(datavars,dict)else (datavars,))for
108-
((line,col),errorcode,datavars)inp.errors]
109-
110-
errorMsg2=u"\n".join([u"\n\nInput:",input,
111-
u"\nExpected errors ("+str(len(errors))+u"):\n"+u"\n".join(errors),
112-
u"\nActual errors ("+str(len(p.errors))+u"):\n"+u"\n".join(errStr)])
113-
ifcheckParseErrors:
114-
self.assertEquals(len(p.errors),len(errors),errorMsg2.encode("utf-8"))
11573

116-
defbuildTestSuite():
117-
sys.stdout.write('Testing tree builders '+" ".join(treeTypes.keys())+"\n")
118-
119-
fortreeName,treeClsintreeTypes.iteritems():
120-
files=html5lib_test_files('tree-construction')
121-
forfilenameinfiles:
122-
testName=os.path.basename(filename).replace(".dat","")
74+
defrunParserTest(innerHTML,input,expected,errors,treeClass,
75+
namespaceHTMLElements):
76+
#XXX - move this out into the setup function
77+
#concatenate all consecutive character tokens into a single token
78+
try:
79+
p=html5parser.HTMLParser(tree=treeClass,
80+
namespaceHTMLElements=namespaceHTMLElements)
81+
exceptconstants.DataLossWarning:
82+
return
12383

124-
tests=TestData(filename,"data")
84+
try:
85+
ifinnerHTML:
86+
document=p.parseFragment(input,innerHTML)
87+
else:
88+
try:
89+
document=p.parse(input)
90+
exceptconstants.DataLossWarning:
91+
return
92+
except:
93+
errorMsg=u"\n".join([u"\n\nInput:",input,u"\nExpected:",expected,
94+
u"\nTraceback:",traceback.format_exc()])
95+
assertFalse,errorMsg.encode("utf8")
96+
97+
output=convertTreeDump(p.tree.testSerializer(document))
98+
99+
expected=convertExpected(expected)
100+
ifnamespaceHTMLElements:
101+
expected=namespaceExpected(r"\1<html \2>",expected)
102+
103+
errorMsg=u"\n".join([u"\n\nInput:",input,u"\nExpected:",expected,
104+
u"\nReceived:",output])
105+
assertexpected==output,errorMsg.encode("utf8")
106+
errStr= [u"Line: %i Col: %i %s"%(line,col,
107+
constants.E[errorcode]%datavarsifisinstance(datavars,dict)else (datavars,))for
108+
((line,col),errorcode,datavars)inp.errors]
109+
110+
errorMsg2=u"\n".join([u"\n\nInput:",input,
111+
u"\nExpected errors ("+str(len(errors))+u"):\n"+u"\n".join(errors),
112+
u"\nActual errors ("+str(len(p.errors))+u"):\n"+u"\n".join(errStr)])
113+
ifcheckParseErrors:
114+
assertlen(p.errors)==len(errors),errorMsg2.encode("utf-8")
115+
116+
deftest_parser():
117+
sys.stderr.write('Testing tree builders '+" ".join(treeTypes.keys())+"\n")
118+
files=data_files('tree-construction')
119+
120+
forfilenameinfiles:
121+
testName=os.path.basename(filename).replace(".dat","")
125122

126-
forindex,testinenumerate(tests):
127-
input,errors,innerHTML,expected= [test[key]forkeyin
123+
tests=TestData(filename,"data")
124+
125+
forindex,testinenumerate(tests):
126+
input,errors,innerHTML,expected= [test[key]forkeyin
128127
'data','errors',
129128
'document-fragment',
130129
'document']
131-
iferrors:
132-
errors=errors.split("\n")
133-
130+
iferrors:
131+
errors=errors.split("\n")
132+
133+
fortreeName,treeClsintreeTypes.iteritems():
134134
fornamespaceHTMLElementsin (True,False):
135-
deftestFunc(self,innerHTML=innerHTML,input=input,
136-
expected=expected,errors=errors,treeCls=treeCls,
137-
namespaceHTMLElements=namespaceHTMLElements):
138-
returnself.runParserTest(innerHTML,input,expected,
139-
errors,treeCls,
140-
namespaceHTMLElements)
141-
testFunc.__name__="test_%s_%d_%s_%s"% (testName,index+1,treeName,namespaceHTMLElementsand"namespaced"or"no_html_namespace")
142-
setattr(TestCase,testFunc.__name__,
143-
testFunc)
135+
printinput
136+
yield (runParserTest,innerHTML,input,expected,errors,treeCls,
137+
namespaceHTMLElements)
144138
break
145-
146-
returnunittest.TestLoader().loadTestsFromTestCase(TestCase)
147-
148-
defmain():
149-
# the following is temporary while the unit tests for parse errors are
150-
# still in flux
151-
if'-p'insys.argv:# suppress check for parse errors
152-
sys.argv.remove('-p')
153-
globalcheckParseErrors
154-
checkParseErrors=False
155-
buildTestSuite()
156-
try:
157-
unittest.main()
158-
exceptSystemExit:
159-
pass
160-
161-
if__name__=="__main__":
162-
printsys.argv
163-
main()
139+
140+

‎html5lib/tests/test_sanitizer.py

Lines changed: 61 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -7,92 +7,70 @@
77
exceptImportError:
88
importsimplejsonasjson
99

10-
fromsupportimporthtml5lib_test_files
1110
fromhtml5libimporthtml5parser,sanitizer,constants
1211

13-
classSanitizeTest(unittest.TestCase):
14-
defaddTest(cls,name,expected,input):
15-
deftest(self,expected=expected,input=input):
16-
expected=''.join([token.toxml()fortokeninhtml5parser.HTMLParser().
17-
parseFragment(expected).childNodes])
18-
expected=json.loads(json.dumps(expected))
19-
self.assertEqual(expected,self.sanitize_html(input))
20-
setattr(cls,name,test)
21-
addTest=classmethod(addTest)
12+
defrunSanitizerTest(name,expected,input):
13+
expected=''.join([token.toxml()fortokeninhtml5parser.HTMLParser().
14+
parseFragment(expected).childNodes])
15+
expected=json.loads(json.dumps(expected))
16+
assertexpected==sanitize_html(input)
2217

23-
defsanitize_html(self,stream):
18+
defsanitize_html(stream):
2419
return''.join([token.toxml()fortokenin
25-
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
26-
parseFragment(stream).childNodes])
27-
28-
deftest_should_handle_astral_plane_characters(self):
29-
self.assertEqual(u"<p>\U0001d4b5\U0001d538</p>",
30-
self.sanitize_html("<p>&#x1d4b5; &#x1d538;</p>"))
31-
32-
fortag_nameinsanitizer.HTMLSanitizer.allowed_elements:
33-
iftag_namein ['caption','col','colgroup','optgroup','option','table','tbody','td','tfoot','th','thead','tr']:continue### TODO
34-
iftag_name!=tag_name.lower():continue### TODO
35-
iftag_name=='image':
36-
SanitizeTest.addTest("test_should_allow_%s_tag"%tag_name,
37-
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
38-
"<%s title='1'>foo <bad>bar</bad> baz</%s>"% (tag_name,tag_name))
39-
eliftag_name=='br':
40-
SanitizeTest.addTest("test_should_allow_%s_tag"%tag_name,
41-
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
20+
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
21+
parseFragment(stream).childNodes])
22+
23+
deftest_should_handle_astral_plane_characters():
24+
assertu"<p>\U0001d4b5\U0001d538</p>"==sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
25+
26+
deftest_sanitizer():
27+
fortag_nameinsanitizer.HTMLSanitizer.allowed_elements:
28+
iftag_namein ['caption','col','colgroup','optgroup','option','table','tbody','td','tfoot','th','thead','tr']:
29+
continue### TODO
30+
iftag_name!=tag_name.lower():
31+
continue### TODO
32+
iftag_name=='image':
33+
yield (runSanitizerTest,"test_should_allow_%s_tag"%tag_name,
34+
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
35+
"<%s title='1'>foo <bad>bar</bad> baz</%s>"% (tag_name,tag_name))
36+
eliftag_name=='br':
37+
yield (runSanitizerTest,"test_should_allow_%s_tag"%tag_name,
38+
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
39+
"<%s title='1'>foo <bad>bar</bad> baz</%s>"% (tag_name,tag_name))
40+
eliftag_nameinconstants.voidElements:
41+
yield (runSanitizerTest,"test_should_allow_%s_tag"%tag_name,
42+
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"%tag_name,
43+
"<%s title='1'>foo <bad>bar</bad> baz</%s>"% (tag_name,tag_name))
44+
else:
45+
yield (runSanitizerTest,"test_should_allow_%s_tag"%tag_name,
46+
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>"% (tag_name,tag_name),
47+
"<%s title='1'>foo <bad>bar</bad> baz</%s>"% (tag_name,tag_name))
48+
49+
fortag_nameinsanitizer.HTMLSanitizer.allowed_elements:
50+
tag_name=tag_name.upper()
51+
yield (runSanitizerTest,"test_should_forbid_%s_tag"%tag_name,
52+
"&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;"% (tag_name,tag_name),
4253
"<%s title='1'>foo <bad>bar</bad> baz</%s>"% (tag_name,tag_name))
43-
eliftag_nameinconstants.voidElements:
44-
SanitizeTest.addTest("test_should_allow_%s_tag"%tag_name,
45-
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"%tag_name,
46-
"<%s title='1'>foo <bad>bar</bad> baz</%s>"% (tag_name,tag_name))
47-
else:
48-
SanitizeTest.addTest("test_should_allow_%s_tag"%tag_name,
49-
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>"% (tag_name,tag_name),
50-
"<%s title='1'>foo <bad>bar</bad> baz</%s>"% (tag_name,tag_name))
51-
52-
fortag_nameinsanitizer.HTMLSanitizer.allowed_elements:
53-
tag_name=tag_name.upper()
54-
SanitizeTest.addTest("test_should_forbid_%s_tag"%tag_name,
55-
"&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;"% (tag_name,tag_name),
56-
"<%s title='1'>foo <bad>bar</bad> baz</%s>"% (tag_name,tag_name))
57-
58-
forattribute_nameinsanitizer.HTMLSanitizer.allowed_attributes:
59-
ifattribute_name!=attribute_name.lower():continue### TODO
60-
ifattribute_name=='style':continue
61-
SanitizeTest.addTest("test_should_allow_%s_attribute"%attribute_name,
62-
"<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"%attribute_name,
63-
"<p %s='foo'>foo <bad>bar</bad> baz</p>"%attribute_name)
64-
65-
forattribute_nameinsanitizer.HTMLSanitizer.allowed_attributes:
66-
attribute_name=attribute_name.upper()
67-
SanitizeTest.addTest("test_should_forbid_%s_attribute"%attribute_name,
68-
"<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
69-
"<p %s='display: none;'>foo <bad>bar</bad> baz</p>"%attribute_name)
70-
71-
forprotocolinsanitizer.HTMLSanitizer.allowed_protocols:
72-
SanitizeTest.addTest("test_should_allow_%s_uris"%protocol,
73-
"<a href=\"%s\">foo</a>"%protocol,
74-
"""<a href="%s">foo</a>"""%protocol)
75-
76-
forprotocolinsanitizer.HTMLSanitizer.allowed_protocols:
77-
SanitizeTest.addTest("test_should_allow_uppercase_%s_uris"%protocol,
78-
"<a href=\"%s\">foo</a>"%protocol,
79-
"""<a href="%s">foo</a>"""%protocol)
80-
81-
defbuildTestSuite():
82-
forfilenameinhtml5lib_test_files("sanitizer"):
83-
fortestinjson.load(file(filename)):
84-
SanitizeTest.addTest('test_'+test['name'],test['output'],test['input'])
85-
86-
returnunittest.TestLoader().loadTestsFromTestCase(SanitizeTest)
87-
88-
defsanitize_html(stream):
89-
return''.join([token.toxml()fortokenin
90-
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
91-
parseFragment(stream).childNodes])
92-
93-
defmain():
94-
buildTestSuite()
95-
unittest.main()
9654

97-
if__name__=="__main__":
98-
main()
55+
forattribute_nameinsanitizer.HTMLSanitizer.allowed_attributes:
56+
ifattribute_name!=attribute_name.lower():continue### TODO
57+
ifattribute_name=='style':continue
58+
yield (runSanitizerTest,"test_should_allow_%s_attribute"%attribute_name,
59+
"<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"%attribute_name,
60+
"<p %s='foo'>foo <bad>bar</bad> baz</p>"%attribute_name)
61+
62+
forattribute_nameinsanitizer.HTMLSanitizer.allowed_attributes:
63+
attribute_name=attribute_name.upper()
64+
yield (runSanitizerTest,"test_should_forbid_%s_attribute"%attribute_name,
65+
"<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
66+
"<p %s='display: none;'>foo <bad>bar</bad> baz</p>"%attribute_name)
67+
68+
forprotocolinsanitizer.HTMLSanitizer.allowed_protocols:
69+
yield (runSanitizerTest,"test_should_allow_%s_uris"%protocol,
70+
"<a href=\"%s\">foo</a>"%protocol,
71+
"""<a href="%s">foo</a>"""%protocol)
72+
73+
forprotocolinsanitizer.HTMLSanitizer.allowed_protocols:
74+
yield (runSanitizerTest,"test_should_allow_uppercase_%s_uris"%protocol,
75+
"<a href=\"%s\">foo</a>"%protocol,
76+
"""<a href="%s">foo</a>"""%protocol)

‎html5lib/tokenizer.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
foreinentities:
2424
entitiesByFirstChar.setdefault(e[0], []).append(e)
2525

26-
classHTMLTokenizer:
26+
classHTMLTokenizer(object):
2727
""" This class takes care of tokenizing HTML.
2828
2929
* self.currentToken
@@ -36,8 +36,6 @@ class HTMLTokenizer:
3636
Points to HTMLInputStream object.
3737
"""
3838

39-
# XXX need to fix documentation
40-
4139
def__init__(self,stream,encoding=None,parseMeta=True,useChardet=True,
4240
lowercaseElementName=True,lowercaseAttrName=True,parser=None):
4341

@@ -56,6 +54,7 @@ def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
5654

5755
# The current token being created
5856
self.currentToken=None
57+
super(HTMLTokenizer,self).__init__()
5958

6059
def__iter__(self):
6160
""" This is where the magic happens.
@@ -1151,7 +1150,7 @@ def markupDeclarationOpenState(self):
11511150
returnTrue
11521151
elif (charStack[-1]=="["and
11531152
self.parserisnotNoneand
1154-
self.parser.phase==self.parser.phases["inForeignContent"]and
1153+
self.parser.tree.openElementsand
11551154
self.parser.tree.openElements[-1].namespace!=self.parser.tree.defaultNamespace):
11561155
matched=True
11571156
forexpectedin ["C","D","A","T","A","["]:

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp