Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit376b4a5

Browse files
committed
Welcome to Python 3.
We now fail the same three tests on both Py2 and Py3. I'm fairly certain themeta-preparser among other things is broken on Py3, but we have no tests forit. (We should fix that.)
1 parenteb7f702 commit376b4a5

File tree

12 files changed

+115
-82
lines changed

12 files changed

+115
-82
lines changed

‎html5lib/inputstream.py‎

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
0xDFFFF,0xEFFFE,0xEFFFF,0xFFFFE,0xFFFFF,
2424
0x10FFFE,0x10FFFF])
2525

26-
ascii_punctuation_re=re.compile(ur"[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
26+
ascii_punctuation_re=re.compile(u"[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
2727

2828
# Cache for charsUntil()
2929
charsUntilRegEx= {}
@@ -193,7 +193,8 @@ def openStream(self, source):
193193
else:
194194
# Otherwise treat source as a string and convert to a file object
195195
ifisinstance(source,unicode):
196-
source=source.encode('utf-8')
196+
# XXX: we should handle lone surrogates here
197+
source=source.encode('utf-8',errors="replace")
197198
self.charEncoding= ("utf-8","certain")
198199
try:
199200
fromioimportBytesIO
@@ -230,7 +231,7 @@ def detectEncoding(self, parseMeta=True, chardet=True):
230231
detector=UniversalDetector()
231232
whilenotdetector.done:
232233
buffer=self.rawStream.read(self.numBytesChardet)
233-
assertisinstance(buffer,str)
234+
assertisinstance(buffer,bytes)
234235
ifnotbuffer:
235236
break
236237
buffers.append(buffer)
@@ -279,7 +280,7 @@ def detectBOM(self):
279280

280281
# Go to beginning of file and read in 4 bytes
281282
string=self.rawStream.read(4)
282-
assertisinstance(string,str)
283+
assertisinstance(string,bytes)
283284

284285
# Try detecting the BOM using bytes from the string
285286
encoding=bomDict.get(string[:3])# UTF-8
@@ -302,7 +303,7 @@ def detectEncodingMeta(self):
302303
"""Report the encoding declared by the meta element
303304
"""
304305
buffer=self.rawStream.read(self.numBytesMeta)
305-
assertisinstance(buffer,str)
306+
assertisinstance(buffer,bytes)
306307
parser=EncodingParser(buffer)
307308
self.rawStream.seek(0)
308309
encoding=parser.getEncoding()
@@ -781,7 +782,7 @@ def parse(self):
781782
defcodecName(encoding):
782783
"""Return the python codec name corresponding to an encoding or None if the
783784
string doesn't correspond to a valid encoding."""
784-
if(encodingisnotNoneandtype(encoding)intypes.StringTypes):
785+
ifencoding:
785786
canonicalName=ascii_punctuation_re.sub("",encoding).lower()
786787
returnencodings.get(canonicalName,None)
787788
else:

‎html5lib/serializer/htmlserializer.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ def serialize(self, treewalker, encoding=None):
303303

304304
defrender(self,treewalker,encoding=None):
305305
ifencoding:
306-
return"".join(list(self.serialize(treewalker,encoding)))
306+
returnb"".join(list(self.serialize(treewalker,encoding)))
307307
else:
308308
returnu"".join(list(self.serialize(treewalker)))
309309

‎html5lib/tests/support.py‎

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464

6565
defhtml5lib_test_files(subdirectory,files='*.dat'):
6666
returnglob.glob(os.path.join(test_dir,subdirectory,files))
67+
html5lib_test_files.__test__=False
6768

6869
classDefaultDict(dict):
6970
def__init__(self,default,*args,**kwargs):
@@ -77,6 +78,9 @@ class TestData(object):
7778
def__init__(self,filename,newTestHeading="data"):
7879
self.f=codecs.open(filename,encoding="utf8")
7980
self.newTestHeading=newTestHeading
81+
82+
def__del__(self):
83+
self.f.close()
8084

8185
def__iter__(self):
8286
data=DefaultDict(None)
@@ -114,14 +118,14 @@ def normaliseOutput(self, data):
114118
defconvert(stripChars):
115119
defconvertData(data):
116120
"""convert the output of str(document) to the format used in the testcases"""
117-
data=data.split("\n")
121+
data=data.split(u"\n")
118122
rv= []
119123
forlineindata:
120-
ifline.startswith("|"):
124+
ifline.startswith(u"|"):
121125
rv.append(line[stripChars:])
122126
else:
123127
rv.append(line)
124-
return"\n".join(rv)
128+
returnu"\n".join(rv)
125129
returnconvertData
126130

127131
convertExpected=convert(2)

‎html5lib/tests/test_encoding.py‎

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,27 @@
1+
importre
12
importos
23
importunittest
3-
fromsupportimporthtml5lib_test_files,TestData,test_dir
44

5-
fromhtml5libimportHTMLParser,inputstream
5+
try:
6+
unittest.TestCase.assertEqual
7+
exceptAttributeError:
8+
unittest.TestCase.assertEqual=unittest.TestCase.assertEquals
69

7-
importre,unittest
10+
fromsupportimporthtml5lib_test_files,TestData,test_dir
11+
fromhtml5libimportHTMLParser,inputstream
812

913
classHtml5EncodingTestCase(unittest.TestCase):
10-
deftest_codec_name(self):
11-
self.assertEquals(inputstream.codecName("utf-8"),"utf-8")
12-
self.assertEquals(inputstream.codecName("utf8"),"utf-8")
13-
self.assertEquals(inputstream.codecName(" utf8 "),"utf-8")
14-
self.assertEquals(inputstream.codecName("ISO_8859--1"),"windows-1252")
14+
deftest_codec_name_a(self):
15+
self.assertEqual(inputstream.codecName("utf-8"),"utf-8")
16+
17+
deftest_codec_name_b(self):
18+
self.assertEqual(inputstream.codecName("utf8"),"utf-8")
19+
20+
deftest_codec_name_c(self):
21+
self.assertEqual(inputstream.codecName(" utf8 "),"utf-8")
22+
23+
deftest_codec_name_d(self):
24+
self.assertEqual(inputstream.codecName("ISO_8859--1"),"windows-1252")
1525

1626
defbuildTestSuite():
1727
forfilenameinhtml5lib_test_files("encoding"):

‎html5lib/tests/test_parser.py‎

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
#XXX - There should just be one function here but for some reason the testcase
1919
#format differs from the treedump format by a single space character
2020
defconvertTreeDump(data):
21-
return"\n".join(convert(3)(data).split("\n")[1:])
21+
returnu"\n".join(convert(3)(data).split(u"\n")[1:])
2222

23-
namespaceExpected=re.compile(r"^(\s*)<(\S+)>",re.M).sub
23+
namespaceExpected=re.compile(ur"^(\s*)<(\S+)>",re.M).sub
2424

2525

2626
defrunParserTest(innerHTML,input,expected,errors,treeClass,
@@ -44,17 +44,17 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
4444
except:
4545
errorMsg=u"\n".join([u"\n\nInput:",input,u"\nExpected:",expected,
4646
u"\nTraceback:",traceback.format_exc().decode('utf8')])
47-
assertFalse,errorMsg.encode("utf8")
47+
assertFalse,errorMsg
4848

4949
output=convertTreeDump(p.tree.testSerializer(document))
5050

5151
expected=convertExpected(expected)
5252
ifnamespaceHTMLElements:
53-
expected=namespaceExpected(r"\1<html \2>",expected)
53+
expected=namespaceExpected(ur"\1<html \2>",expected)
5454

5555
errorMsg=u"\n".join([u"\n\nInput:",input,u"\nExpected:",expected,
5656
u"\nReceived:",output])
57-
assertexpected==output,errorMsg.encode("utf8")
57+
assertexpected==output,errorMsg
5858
errStr= [u"Line: %i Col: %i %s"%(line,col,
5959
constants.E[errorcode]%datavarsifisinstance(datavars,dict)else (datavars,))for
6060
((line,col),errorcode,datavars)inp.errors]
@@ -63,7 +63,7 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
6363
u"\nExpected errors ("+unicode(len(errors))+u"):\n"+u"\n".join(errors),
6464
u"\nActual errors ("+unicode(len(p.errors))+u"):\n"+u"\n".join(errStr)])
6565
ifcheckParseErrors:
66-
assertlen(p.errors)==len(errors),errorMsg2.encode("utf-8")
66+
assertlen(p.errors)==len(errors),errorMsg2
6767

6868
deftest_parser():
6969
sys.stderr.write('Testing tree builders '+" ".join(treeTypes.keys())+"\n")
@@ -87,6 +87,3 @@ def test_parser():
8787
printinput
8888
yield (runParserTest,innerHTML,input,expected,errors,treeCls,
8989
namespaceHTMLElements)
90-
break
91-
92-

‎html5lib/tests/test_serializer.py‎

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
exceptImportError:
88
importsimplejsonasjson
99

10+
try:
11+
unittest.TestCase.assertEqual
12+
exceptAttributeError:
13+
unittest.TestCase.assertEqual=unittest.TestCase.assertEquals
14+
1015
importhtml5lib
1116
fromhtml5libimporthtml5parser,serializer,constants
1217
fromhtml5lib.treewalkers._baseimportTreeWalker
@@ -83,7 +88,16 @@ def serialize_xhtml(input, options):
8388
options=dict([(str(k),v)fork,vinoptions.iteritems()])
8489
returnserializer.XHTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
8590

86-
defmake_test(input,expected,xhtml,options):
91+
defrunSerializerTest(input,expected,xhtml,options):
92+
encoding=options.get("encoding",None)
93+
94+
ifencoding:
95+
encode=lambdax:x.encode(encoding)
96+
expected=map(encode,expected)
97+
ifxhtml:
98+
xhtml=map(encode,xhtml)
99+
100+
87101
result=serialize_html(input,options)
88102
iflen(expected)==1:
89103
assertexpected[0]==result,"Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0],result,str(options))
@@ -114,13 +128,12 @@ def testDoctypeSystemId(self):
114128
self.throwsWithLatin1([["Doctype",u"potato",u"potato",u"\u0101"]])
115129

116130
deftestCdataCharacters(self):
117-
self.assertEquals("<style>&amacr;",serialize_html([["StartTag","http://www.w3.org/1999/xhtml","style", {}],
118-
["Characters",u"\u0101"]],
119-
{"encoding":"iso-8859-1"}))
131+
runSerializerTest([["StartTag","http://www.w3.org/1999/xhtml","style", {}], ["Characters",u"\u0101"]],
132+
[u"<style>&amacr;"],None, {"encoding":"iso-8859-1"})
120133

121134
deftestCharacters(self):
122-
self.assertEquals("&amacr;",serialize_html([["Characters",u"\u0101"]],
123-
{"encoding":"iso-8859-1"}))
135+
runSerializerTest([["Characters",u"\u0101"]],
136+
[u"&amacr;"],None,{"encoding":"iso-8859-1"})
124137

125138
deftestStartTagName(self):
126139
self.throwsWithLatin1([["StartTag",u"http://www.w3.org/1999/xhtml",u"\u0101", []]])
@@ -132,9 +145,9 @@ def testAttributeName(self):
132145
self.throwsWithLatin1([["StartTag",u"http://www.w3.org/1999/xhtml",u"span", [{"namespace":None,"name":u"\u0101","value":u"potato"}]]])
133146

134147
deftestAttributeValue(self):
135-
self.assertEquals("<span potato=&amacr;>",serialize_html([["StartTag",u"http://www.w3.org/1999/xhtml",u"span",
136-
[{"namespace":None,"name":u"potato","value":u"\u0101"}]]],
137-
{"encoding":"iso-8859-1"}))
148+
runSerializerTest([["StartTag",u"http://www.w3.org/1999/xhtml",u"span",
149+
[{"namespace":None,"name":u"potato","value":u"\u0101"}]]],
150+
[u"<span potato=&amacr;>"],None,{"encoding":"iso-8859-1"})
138151

139152
deftestEndTagName(self):
140153
self.throwsWithLatin1([["EndTag",u"http://www.w3.org/1999/xhtml",u"\u0101"]])
@@ -154,27 +167,27 @@ def testEntityReplacement(self):
154167
doc="""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
155168
tree=etree.fromstring(doc,parser=self.parser).getroottree()
156169
result=serializer.serialize(tree,tree="lxml",omit_optional_tags=False)
157-
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""",result)
170+
self.assertEqual(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""",result)
158171

159172
deftestEntityXML(self):
160173
doc="""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
161174
tree=etree.fromstring(doc,parser=self.parser).getroottree()
162175
result=serializer.serialize(tree,tree="lxml",omit_optional_tags=False)
163-
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""",result)
176+
self.assertEqual(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""",result)
164177

165178
deftestEntityNoResolve(self):
166179
doc="""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
167180
tree=etree.fromstring(doc,parser=self.parser).getroottree()
168181
result=serializer.serialize(tree,tree="lxml",omit_optional_tags=False,
169182
resolve_entities=False)
170-
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""",result)
183+
self.assertEqual(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""",result)
171184

172185
deftest_serializer():
173186
forfilenameinhtml5lib_test_files('serializer','*.test'):
174-
tests=json.load(file(filename))
187+
tests=json.load(open(filename))
175188
test_name=os.path.basename(filename).replace('.test','')
176189
forindex,testinenumerate(tests['tests']):
177190
xhtml=test.get("xhtml",test["expected"])
178191
iftest_name=='optionaltags':
179192
xhtml=None
180-
yieldmake_test,test["input"],test["expected"],xhtml,test.get("options", {})
193+
yieldrunSerializerTest,test["input"],test["expected"],xhtml,test.get("options", {})

‎html5lib/tests/test_tokenizer.py‎

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__importwith_statement
2+
13
importsys
24
importos
35
importunittest
@@ -137,6 +139,7 @@ def decode(inp):
137139
deltoken[2][key]
138140
token[2][decode(key)]=decode(value)
139141
returntest
142+
unescape_test.__test__=False
140143

141144

142145
defrunTokenizerTest(test):
@@ -161,7 +164,7 @@ def runTokenizerTest(test):
161164
"\nInput:",unicode(test['input']),
162165
"\nExpected:",unicode(expected),
163166
"\nreceived:",unicode(tokens)])
164-
errorMsg=errorMsg.encode("utf-8")
167+
errorMsg=errorMsg
165168
ignoreErrorOrder=test.get('ignoreErrorOrder',False)
166169
asserttokensMatch(expected,received,ignoreErrorOrder),errorMsg
167170

@@ -179,15 +182,16 @@ def capitalize(s):
179182

180183
deftest_tokenizer():
181184
forfilenameinhtml5lib_test_files('tokenizer','*.test'):
182-
tests=json.load(file(filename))
183-
testName=os.path.basename(filename).replace(".test","")
184-
if'tests'intests:
185-
forindex,testinenumerate(tests['tests']):
186-
#Skip tests with a self closing flag
187-
skip=False
188-
if'initialStates'notintest:
189-
test["initialStates"]= ["Data state"]
190-
forinitialStateintest["initialStates"]:
191-
test["initialState"]=capitalize(initialState)
192-
yieldrunTokenizerTest,test
185+
withopen(filename)asfp:
186+
tests=json.load(fp)
187+
testName=os.path.basename(filename).replace(".test","")
188+
if'tests'intests:
189+
forindex,testinenumerate(tests['tests']):
190+
#Skip tests with a self closing flag
191+
skip=False
192+
if'initialStates'notintest:
193+
test["initialStates"]= ["Data state"]
194+
forinitialStateintest["initialStates"]:
195+
test["initialState"]=capitalize(initialState)
196+
yieldrunTokenizerTest,test
193197

‎html5lib/tests/test_treewalkers.py‎

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
importunittest
44
importwarnings
55

6+
try:
7+
unittest.TestCase.assertEqual
8+
exceptAttributeError:
9+
unittest.TestCase.assertEqual=unittest.TestCase.assertEquals
10+
611
warnings.simplefilter("error")
712

813
fromsupportimporthtml5lib_test_files,TestData,convertExpected
@@ -263,9 +268,9 @@ def test_all_tokens(self):
263268
document=treeCls.get("adapter",lambdax:x)(document)
264269
output=treeCls["walker"](document)
265270
forexpectedToken,outputTokeninzip(expected,output):
266-
self.assertEquals(expectedToken,outputToken)
271+
self.assertEqual(expectedToken,outputToken)
267272

268-
defrun_test(innerHTML,input,expected,errors,treeClass):
273+
defrunTreewalkerTest(innerHTML,input,expected,errors,treeClass):
269274
try:
270275
p=html5parser.HTMLParser(tree=treeClass["builder"])
271276
ifinnerHTML:
@@ -305,6 +310,6 @@ def test_treewalker():
305310
"document-fragment",
306311
"document")]
307312
errors=errors.split("\n")
308-
yieldrun_test,innerHTML,input,expected,errors,treeCls
313+
yieldrunTreewalkerTest,innerHTML,input,expected,errors,treeCls
309314

310315

‎html5lib/tests/test_whitespace_filter.py‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,18 @@
44
fromhtml5lib.constantsimportspaceCharacters
55
spaceCharacters=u"".join(spaceCharacters)
66

7+
try:
8+
unittest.TestCase.assertEqual
9+
exceptAttributeError:
10+
unittest.TestCase.assertEqual=unittest.TestCase.assertEquals
11+
712
classTestCase(unittest.TestCase):
813
defrunTest(self,input,expected):
914
output=list(Filter(input))
1015
errorMsg="\n".join(["\n\nInput:",str(input),
1116
"\nExpected:",str(expected),
1217
"\nReceived:",str(output)])
13-
self.assertEquals(output,expected,errorMsg)
18+
self.assertEqual(output,expected,errorMsg)
1419

1520
defrunTestUnmodifiedOutput(self,input):
1621
self.runTest(input,input)

‎html5lib/treebuilders/_base.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def __init__(self, name):
4242
self.childNodes= []
4343
self._flags= []
4444

45-
def__unicode__(self):
45+
def__str__(self):
4646
attributesStr=" ".join(["%s=\"%s\""%(name,value)
4747
forname,valuein
4848
self.attributes.iteritems()])

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp