Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit911cf45

Browse files
committed
Merge pull request#241 from gsnedders/encoding_tests
Get encoding reparsing actually working; r=nobody!
2 parents9dc49f6 +b0ae0c2 commit911cf45

File tree

3 files changed

+28
-7
lines changed

3 files changed

+28
-7
lines changed

‎html5lib/html5parser.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,11 @@ def _parse(self, stream, innerHTML=False, container="div",
8989
parser=self,**kwargs)
9090
self.reset()
9191

92-
whileTrue:
93-
try:
94-
self.mainLoop()
95-
break
96-
exceptReparseException:
97-
self.reset()
92+
try:
93+
self.mainLoop()
94+
exceptReparseException:
95+
self.reset()
96+
self.mainLoop()
9897

9998
defreset(self):
10099
self.tree.reset()

‎html5lib/inputstream.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,8 +509,8 @@ def changeEncoding(self, newEncoding):
509509
self.charEncoding= (self.charEncoding[0],"certain")
510510
else:
511511
self.rawStream.seek(0)
512-
self.reset()
513512
self.charEncoding= (newEncoding,"certain")
513+
self.reset()
514514
raiseReparseException("Encoding changed from %s to %s"% (self.charEncoding[0],newEncoding))
515515

516516
defdetectBOM(self):

‎html5lib/tests/test_encoding.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,28 @@
1212
fromhtml5libimportHTMLParser,inputstream
1313

1414

15+
deftest_basic_prescan_length():
16+
data="<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
17+
pad=1024-len(data)+1
18+
data=data.replace(b"-a-",b"-"+ (b"a"*pad)+b"-")
19+
assertlen(data)==1024# Sanity
20+
stream=inputstream.HTMLBinaryInputStream(data,chardet=False)
21+
assert'utf-8'==stream.charEncoding[0].name
22+
23+
24+
deftest_parser_reparse():
25+
data="<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
26+
pad=10240-len(data)+1
27+
data=data.replace(b"-a-",b"-"+ (b"a"*pad)+b"-")
28+
assertlen(data)==10240# Sanity
29+
stream=inputstream.HTMLBinaryInputStream(data,chardet=False)
30+
assert'windows-1252'==stream.charEncoding[0].name
31+
p=HTMLParser(namespaceHTMLElements=False)
32+
doc=p.parse(data,useChardet=False)
33+
assert'utf-8'==p.documentEncoding
34+
assertdoc.find(".//title").text=="Caf\u00E9"
35+
36+
1537
defrunParserEncodingTest(data,encoding):
1638
p=HTMLParser()
1739
assertp.documentEncodingisNone

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp