Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit139836d

Browse files
committed
Get lxml treebuilder passing tests again.
1 parentb2c4ede commit139836d

File tree

4 files changed

+28
-9
lines changed

4 files changed

+28
-9
lines changed

‎html5lib/ihatexml.py‎

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
importre
2+
importwarnings
3+
4+
from .constantsimportDataLossWarning
25

36
baseChar="""[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
47

@@ -117,10 +120,11 @@ def __init__(self, replaceChars = None,
117120

118121
defcoerceAttribute(self,name,namespace=None):
119122
ifself.dropXmlnsLocalNameandname.startswith("xmlns:"):
120-
#Need a datalosswarning here
123+
warnings.warn("Attributes cannot begin with xmlns",DataLossWarning)
121124
returnNone
122125
elif (self.dropXmlnsAttrNsand
123126
namespace=="http://www.w3.org/2000/xmlns/"):
127+
warnings.warn("Attributes cannot be in the xml namespace",DataLossWarning)
124128
returnNone
125129
else:
126130
returnself.toXmlName(name)
@@ -131,11 +135,14 @@ def coerceElement(self, name, namespace=None):
131135
defcoerceComment(self,data):
132136
ifself.preventDoubleDashComments:
133137
while"--"indata:
138+
warnings.warn("Comments cannot contain adjacent dashes",DataLossWarning)
134139
data=data.replace("--","- -")
135140
returndata
136141

137142
defcoerceCharacters(self,data):
138143
ifself.replaceFormFeedCharacters:
144+
foriinrange(data.count("\x0C")):
145+
warnings.warn("Text cannot contain U+000C",DataLossWarning)
139146
data=data.replace("\x0C"," ")
140147
#Other non-xml characters
141148
returndata
@@ -145,13 +152,15 @@ def toXmlName(self, name):
145152
nameRest=name[1:]
146153
m=nonXmlNameFirstBMPRegexp.match(nameFirst)
147154
ifm:
155+
warnings.warn("Coercing non-XML name",DataLossWarning)
148156
nameFirstOutput=self.getReplacementCharacter(nameFirst)
149157
else:
150158
nameFirstOutput=nameFirst
151159

152160
nameRestOutput=nameRest
153161
replaceChars=set(nonXmlNameBMPRegexp.findall(nameRest))
154162
forcharinreplaceChars:
163+
warnings.warn("Coercing non-XML name",DataLossWarning)
155164
replacement=self.getReplacementCharacter(char)
156165
nameRestOutput=nameRestOutput.replace(char,replacement)
157166
returnnameFirstOutput+nameRestOutput

‎html5lib/treebuilders/etree.py‎

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,11 @@ def _setSystemId(self, value):
178178

179179
classDocument(Element):
180180
def__init__(self):
181-
Element.__init__(self,"<DOCUMENT_ROOT>")
181+
Element.__init__(self,"DOCUMENT_ROOT")
182182

183183
classDocumentFragment(Element):
184184
def__init__(self):
185-
Element.__init__(self,"<DOCUMENT_FRAGMENT>")
185+
Element.__init__(self,"DOCUMENT_FRAGMENT")
186186

187187
deftestSerializer(element):
188188
rv= []
@@ -198,7 +198,7 @@ def serializeElement(element, indent=0):
198198
element.text,publicId,systemId))
199199
else:
200200
rv.append("<!DOCTYPE %s>"%(element.text,))
201-
elifelement.tag=="<DOCUMENT_ROOT>":
201+
elifelement.tag=="DOCUMENT_ROOT":
202202
rv.append("#document")
203203
ifelement.text:
204204
rv.append("|%s\"%s\""%(' '*(indent+2),element.text))
@@ -263,7 +263,7 @@ def serializeElement(element):
263263
element.text,publicId,systemId))
264264
else:
265265
rv.append("<!DOCTYPE %s>"%(element.text,))
266-
elifelement.tag=="<DOCUMENT_ROOT>":
266+
elifelement.tag=="DOCUMENT_ROOT":
267267
ifelement.text:
268268
rv.append(element.text)
269269
ifelement.tail:

‎html5lib/treebuilders/etree_lxml.py‎

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ def serializeElement(element, indent=0):
8080
serializeElement(next_element,indent+2)
8181
eliftype(element.tag)==type(etree.Comment):
8282
rv.append("|%s<!-- %s -->"%(' '*indent,element.text))
83+
ifhasattr(element,"tail")andelement.tail:
84+
rv.append("|%s\"%s\""%(' '*indent,element.tail))
8385
else:
8486
nsmatch=etree_builders.tag_regexp.match(element.tag)
8587
ifnsmatchisnotNone:
@@ -113,8 +115,8 @@ def serializeElement(element, indent=0):
113115
indent+=2
114116
forchildinelement.getchildren():
115117
serializeElement(child,indent)
116-
ifhasattr(element,"tail")andelement.tail:
117-
rv.append("|%s\"%s\""%(' '*(indent-2),element.tail))
118+
ifhasattr(element,"tail")andelement.tail:
119+
rv.append("|%s\"%s\""%(' '*(indent-2),element.tail))
118120
serializeElement(element,0)
119121

120122
iffinalTextisnotNone:
@@ -286,6 +288,12 @@ def insertDoctype(self, token):
286288

287289
definsertCommentInitial(self,data,parent=None):
288290
self.initial_comments.append(data)
291+
292+
definsertCommentMain(self,data,parent=None):
293+
if (parent==self.documentand
294+
type(self.document._elementTree.getroot()[-1].tag)==type(etree.Comment)):
295+
warnings.warn("lxml cannot represent adjacent comments beyond the root elements",DataLossWarning)
296+
super().insertComment(data,parent)
289297

290298
definsertRoot(self,token):
291299
"""Create the document root"""
@@ -301,6 +309,8 @@ def insertRoot(self, token):
301309
docStr+=' PUBLIC "%s" "%s"'%(self.doctype.publicIdor"",
302310
self.doctype.systemIdor"")
303311
docStr+=">"
312+
ifself.doctype.name!=token["name"]:
313+
warnings.warn("lxml cannot represent doctype with a different name to the root element",DataLossWarning)
304314
docStr+="<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
305315

306316
try:
@@ -333,4 +343,4 @@ def insertRoot(self, token):
333343
self.openElements.append(root_element)
334344

335345
#Reset to the default insert comment function
336-
self.insertComment=super(TreeBuilder,self).insertComment
346+
self.insertComment=self.insertCommentMain

‎html5lib/treewalkers/etree.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def getNodeDetails(self, node):
3838
ifnot(hasattr(node,"tag")):
3939
node=node.getroot()
4040

41-
ifnode.tagin ("<DOCUMENT_ROOT>","<DOCUMENT_FRAGMENT>"):
41+
ifnode.tagin ("DOCUMENT_ROOT","DOCUMENT_FRAGMENT"):
4242
return (_base.DOCUMENT,)
4343

4444
elifnode.tag=="<!DOCTYPE>":

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp