Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbabe4a3

Browse files
committed
Attempt at merging svgmathml branch to the default branch
--HG--branch : svgmathmlrename : python/parse.py => python3/parse.pyrename : python/src/html5lib/__init__.py => python3/src/html5lib/__init__.pyrename : python/src/html5lib/constants.py => python3/src/html5lib/constants.pyrename : python/src/html5lib/filters/optionaltags.py => python3/src/html5lib/filters/optionaltags.pyrename : python/src/html5lib/html5parser.py => python3/src/html5lib/html5parser.pyrename : python/src/html5lib/inputstream.py => python3/src/html5lib/inputstream.pyrename : python/src/html5lib/sanitizer.py => python3/src/html5lib/sanitizer.pyrename : python/src/html5lib/serializer/__init__.py => python3/src/html5lib/serializer/__init__.pyrename : python/src/html5lib/tokenizer.py => python3/src/html5lib/tokenizer.pyrename : python/src/html5lib/treebuilders/etree_lxml.py => python3/src/html5lib/treebuilders/etree_lxml.pyrename : python/src/html5lib/treebuilders/simpletree.py => python3/src/html5lib/treebuilders/simpletree.pyrename : python/tests/test_encoding.py => python3/tests/test_encoding.pyrename : python/tests/test_parser.py => python3/tests/test_parser.pyrename : python/tests/test_tokenizer.py => python3/tests/test_tokenizer.py
1 parent768ba79 commitbabe4a3

File tree

8 files changed

+35
-36
lines changed

8 files changed

+35
-36
lines changed

‎parse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def parse():
5757
else:
5858
tokenizer=HTMLTokenizer
5959

60-
ifopts.xml:
60+
ifopts.liberalxml:
6161
p=liberalxmlparser.XHTMLParser(tree=treebuilder,tokenizer=tokenizer)
6262
else:
6363
p=html5parser.HTMLParser(tree=treebuilder,tokenizer=tokenizer)

‎src/html5lib/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1070,7 +1070,6 @@
10701070
'utf16':'utf-16',
10711071
'utf16be':'utf-16-be',
10721072
'utf16le':'utf-16-le',
1073-
'utf7':'utf-7',
10741073
'utf8':'utf-8',
10751074
'windows1250':'cp1250',
10761075
'windows1251':'cp1251',

‎src/html5lib/filters/optionaltags.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@ def is_optional_start(self, tagname, previous, next):
3131
eliftagname=='head':
3232
# A head element's start tag may be omitted if the first thing
3333
# inside the head element is an element.
34-
returntype=="StartTag"
34+
# XXX: we also omit the start tag if the head element is empty
35+
iftypein ("StartTag","EmptyTag"):
36+
returnTrue
37+
eliftype=="EndTag":
38+
returnnext["name"]=="head"
3539
eliftagname=='body':
3640
# A body element's start tag may be omitted if the first thing
3741
# inside the body element is not a space character or a comment,
@@ -52,7 +56,7 @@ def is_optional_start(self, tagname, previous, next):
5256
# inside the colgroup element is a col element, and if the element
5357
# is not immediately preceeded by another colgroup element whose
5458
# end tag has been omitted.
55-
iftype=="StartTag":
59+
iftypein ("StartTag","EmptyTag"):
5660
# XXX: we do not look at the preceding event, so instead we never
5761
# omit the colgroup element's end tag when it is immediately
5862
# followed by another colgroup element. See is_optional_end.
@@ -114,7 +118,7 @@ def is_optional_end(self, tagname, next):
114118
# footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
115119
# nav, ol, p, pre, section, table, or ul, element, or if
116120
# there is no more content in the parent element.
117-
iftype=="StartTag":
121+
iftypein ("StartTag","EmptyTag"):
118122
returnnext["name"]in ('address','article','aside', \
119123
'blockquote','datagrid','dialog','dir','div', \
120124
'dl','fieldset','footer','form','h1','h2','h3', \

‎src/html5lib/html5parser.py

Lines changed: 17 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ def _parse(self, stream, innerHTML=False, container="div",
108108
# We only seem to have InBodyPhase testcases where the following is
109109
# relevant ... need others too
110110
self.lastPhase=None
111-
112111
self.beforeRCDataPhase=None
113112

114113
CharactersToken=tokenTypes["Characters"]
@@ -120,6 +119,8 @@ def _parse(self, stream, innerHTML=False, container="div",
120119

121120

122121
fortokeninself.normalizedTokens():
122+
#print self.phase.__class__.__name__
123+
#print token
123124
type=token["type"]
124125
iftype==CharactersToken:
125126
self.phase.processCharacters(token)
@@ -271,18 +272,6 @@ def __init__(self, parser, tree):
271272

272273
defprocessEOF(self):
273274
raiseNotImplementedError
274-
self.tree.generateImpliedEndTags()
275-
iflen(self.tree.openElements)>2:
276-
self.parser.parseError("expected-closing-tag-but-got-eof")
277-
eliflen(self.tree.openElements)==2and\
278-
self.tree.openElements[1].name!="body":
279-
# This happens for framesets or something?
280-
self.parser.parseError("expected-closing-tag-but-got-eof")
281-
elifself.parser.innerHTMLandlen(self.tree.openElements)>1 :
282-
# XXX This is not what the specification says. Not sure what to do
283-
# here.
284-
self.parser.parseError("eof-in-innerhtml")
285-
# Betting ends.
286275

287276
defprocessComment(self,token):
288277
# For most phases the following is correct. Where it's not it will be
@@ -318,7 +307,7 @@ class InitialPhase(Phase):
318307
# this.
319308
defprocessEOF(self):
320309
self.parser.parseError("expected-doctype-but-got-eof")
321-
self.compatMode="quirks"
310+
self.parser.compatMode="quirks"
322311
self.parser.phase=self.parser.phases["beforeHtml"]
323312
self.parser.phase.processEOF()
324313

@@ -346,8 +335,9 @@ def processDoctype(self, token):
346335
ifpublicId!="":
347336
publicId=publicId.translate(asciiUpper2Lower)
348337

349-
if (notcorrectortoken["name"]!="html"
350-
orpublicIdin
338+
339+
if ((notcorrect)ornameLower!="html"
340+
orpublicIdin
351341
("+//silmaril//dtd html pro v0r11 19970101//en",
352342
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
353343
"-//as//dtd html 3.0 aswedit + extensions//en",
@@ -419,19 +409,18 @@ def processDoctype(self, token):
419409
"html")
420410
or (publicIdin
421411
("-//w3c//dtd html 4.01 frameset//EN",
422-
"-//w3c//dtd html 4.01 transitional//EN")and
423-
systemId==None)
412+
"-//w3c//dtd html 4.01 transitional//EN")andsystemId==None)
424413
or (systemId!=Noneand
425-
systemId=="http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")):
426-
self.compatMode="quirks"
414+
systemId==
415+
"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")):
416+
self.parser.compatMode="quirks"
427417
elif (publicIdin
428-
("-//w3c//dtd xhtml 1.0 frameset//EN",
429-
"-//w3c//dtd xhtml 1.0 transitional//EN")
418+
("-//w3c//dtd xhtml 1.0 frameset//EN",
419+
"-//w3c//dtd xhtml 1.0 transitional//EN")
430420
or (publicIdin
431421
("-//w3c//dtd html 4.01 frameset//EN",
432-
"-//w3c//dtd html 4.01 transitional//EN")and
433-
systemId==None)):
434-
self.compatMode="limited quirks"
422+
"-//w3c//dtd html 4.01 transitional//EN")andsystemId==None)):
423+
self.parser.compatMode="limited quirks"
435424

436425
self.parser.phase=self.parser.phases["beforeHtml"]
437426

@@ -440,7 +429,7 @@ def processSpaceCharacters(self, token):
440429

441430
defprocessCharacters(self,token):
442431
self.parser.parseError("expected-doctype-but-got-chars")
443-
self.compatMode="quirks"
432+
self.parser.compatMode="quirks"
444433
self.parser.phase=self.parser.phases["beforeHtml"]
445434
self.parser.phase.processCharacters(token)
446435

@@ -595,7 +584,8 @@ def startTagMeta(self, token):
595584
codec=inputstream.codecName(attributes["charset"])
596585
self.parser.tokenizer.stream.changeEncoding(codec)
597586
elif"content"inattributes:
598-
data=inputstream.EncodingBytes(attributes["content"])
587+
data=inputstream.EncodingBytes(
588+
attributes["content"].encode(self.parser.tokenizer.stream.charEncoding[0]))
599589
parser=inputstream.ContentAttrParser(data)
600590
codec=parser.parse()
601591
self.parser.tokenizer.stream.changeEncoding(codec)

‎src/html5lib/inputstream.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
importcodecs
22
importre
33
importtypes
4+
importsys
45

56
from .constantsimportEOF,spaceCharacters,asciiLetters,asciiUppercase
67
from .constantsimportencodings,ReparseException
@@ -188,7 +189,8 @@ def openStream(self, source):
188189
importio
189190
stream=io.BytesIO(bytes(source))
190191

191-
ifnot(hasattr(stream,"tell")andhasattr(stream,"seek")):
192+
if (not(hasattr(stream,"tell")andhasattr(stream,"seek"))or
193+
streamissys.stdin):
192194
stream=BufferedStream(stream)
193195

194196
returnstream
@@ -452,6 +454,9 @@ class EncodingBytes(bytes):
452454
"""Bytes-like object with an assosiated position and various extra methods
453455
If the position is ever greater than the string length then an exception is
454456
raised"""
457+
def__new__(self,value):
458+
returnstr.__new__(self,value)
459+
455460
def__init__(self,value):
456461
self._position=-1
457462

‎src/html5lib/sanitizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def sanitize_token(self, token):
152152
continue
153153
val_unescaped=re.sub("[`\000-\040\177-\240\s]+",'',
154154
unescape(attrs[attr])).lower()
155-
if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped)or
155+
if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped)and
156156
(val_unescaped.split(':')[0]notin
157157
self.allowed_protocols)):
158158
delattrs[attr]

‎src/html5lib/tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def consumeNumberEntity(self, isHex):
142142
# Certain characters get replaced with U+FFFD
143143
if ((charAsInt<=0x0008)or (charAsInt==0x000B)or (0x000E<=charAsInt<=0x001F)
144144
or (0x007F<=charAsInt<=0x009F)
145-
or (0xD800<=charAsInt<=0xDFFF)or (0xFDD0<=charAsInt<=0xFDDF)
145+
or (0xD800<=charAsInt<=0xDFFF)or (0xFDD0<=charAsInt<=0xFDEF)
146146
or (charAsInt&0xFFFE==0xFFFE)# catch all U+?FFFE and U+?FFFF, where ? is 0..10
147147
or (0x10FFFF<charAsInt)):
148148
char="\uFFFD"

‎tests/test_parser.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ def buildTestSuite():
142142
deftestFunc(self,innerHTML=innerHTML,input=input,
143143
expected=expected,errors=errors,treeCls=treeCls):
144144
returnself.runParserTest(innerHTML,input,expected,errors,treeCls)
145-
setattr(TestCase,"test_%s_%d_%s"% (testName,index+1,treeName),
145+
testFunc.__name__="test_%s_%d_%s"% (testName,index+1,treeName)
146+
setattr(TestCase,testFunc.__name__,
146147
testFunc)
147148

148149
returnunittest.TestLoader().loadTestsFromTestCase(TestCase)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp