Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5999365

Browse files
committed
Support publicIds and systemIds in doctypes
--HG--extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401142
1 parentafe181d commit5999365

File tree

9 files changed

+66
-31
lines changed

9 files changed

+66
-31
lines changed

‎src/html5lib/treebuilders/_base.py‎

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,7 @@ def insertRoot(self, name):
213213
self.document.appendChild(element)
214214

215215
definsertDoctype(self,name,publicId,systemId):
216-
doctype=self.doctypeClass(name)
217-
doctype.publicId=publicId
218-
doctype.systemId=systemId
216+
doctype=self.doctypeClass(name,publicId,systemId)
219217
self.document.appendChild(doctype)
220218

221219
definsertComment(self,data,parent=None):

‎src/html5lib/treebuilders/dom.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def serializeElement(element, indent=0):
141141
ifelement.publicIdorelement.systemId:
142142
publicId=element.publicIdor""
143143
systemId=element.systemIdor""
144-
rv.append("""|%s<!DOCTYPE %sPUBLIC"%s" "%s">"""%(
144+
rv.append("""|%s<!DOCTYPE %s "%s" "%s">"""%(
145145
' '*indent,element.name,publicId,systemId))
146146
else:
147147
rv.append("|%s<!DOCTYPE %s>"%(' '*indent,element.name))

‎src/html5lib/treebuilders/etree.py‎

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,14 @@ def _setData(self, value):
132132
data=property(_getData,_setData)
133133

134134
classDocumentType(Element):
135-
def__init__(self,name):
135+
def__init__(self,name,publicId,systemId):
136136
Element.__init__(self,"<!DOCTYPE>")
137137
self._element.text=name
138+
self.publicId=publicId
139+
self.systemId=systemId
138140

139141
def_getPublicId(self):
140-
returnself._element.get(u"publicId",None)
142+
returnself._element.get(u"publicId","")
141143

142144
def_setPublicId(self,value):
143145
ifvalueisnotNone:
@@ -146,7 +148,7 @@ def _setPublicId(self, value):
146148
publicId=property(_getPublicId,_setPublicId)
147149

148150
def_getSystemId(self):
149-
returnself._element.get(u"systemId",None)
151+
returnself._element.get(u"systemId","")
150152

151153
def_setSystemId(self,value):
152154
ifvalueisnotNone:
@@ -172,7 +174,7 @@ def serializeElement(element, indent=0):
172174
ifelement.get("publicId")orelement.get("systemId"):
173175
publicId=element.get("publicId")or""
174176
systemId=element.get("systemId")or""
175-
rv.append("""<!DOCTYPE %sPUBLIC"%s" "%s">"""%(
177+
rv.append("""<!DOCTYPE %s "%s" "%s">"""%(
176178
element.text,publicId,systemId))
177179
else:
178180
rv.append("<!DOCTYPE %s>"%(element.text,))

‎src/html5lib/treebuilders/etree_lxml.py‎

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
"""
2424

2525
classDocumentType(object):
26-
def__init__(self,name,publicId=None,systemId=None):
26+
def__init__(self,name,publicId,systemId):
2727
self.name=name
2828
ifname!=name.lower():
29-
warnings.warn("lxml does not preserve doctype case",DataLossWarning)
29+
warnings.warn("lxml does not preserve doctype case",DataLossWarning)
3030
self.publicId=publicId
3131
self.systemId=systemId
3232

@@ -56,7 +56,7 @@ def serializeElement(element, indent=0):
5656
element.docinfo.system_url):
5757
dtd_str="<!DOCTYPE %s>"%element.docinfo.root_name
5858
else:
59-
dtd_str="""<!DOCTYPE %sPUBLIC"%s" "%s">"""%(
59+
dtd_str="""<!DOCTYPE %s "%s" "%s">"""%(
6060
element.docinfo.root_name,
6161
element.docinfo.public_id,
6262
element.docinfo.system_url)
@@ -181,9 +181,7 @@ def getFragment(self):
181181
definsertDoctype(self,name,publicId,systemId):
182182
ifnotname:
183183
warnings.warn("lxml cannot represent null doctype",DataLossWarning)
184-
doctype=self.doctypeClass(name)
185-
doctype.publicId=publicId
186-
doctype.systemId=systemId
184+
doctype=self.doctypeClass(name,publicId,systemId)
187185
self.doctype=doctype
188186

189187
definsertCommentInitial(self,data,parent=None):
@@ -196,7 +194,7 @@ def insertRoot(self, name):
196194
#Therefore we need to use the built-in parser to create our iniial
197195
#tree, after which we can add elements like normal
198196
docStr=""
199-
ifself.doctype:
197+
ifself.doctypeandself.doctype.name:
200198
docStr+="<!DOCTYPE %s"%self.doctype.name
201199
ifself.doctype.publicIdisnotNoneorself.doctype.systemIdisnotNone:
202200
docStr+=' PUBLIC "%s" "%s"'%(self.doctype.publicIdor"",

‎src/html5lib/treebuilders/simpletree.py‎

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -106,17 +106,15 @@ def __unicode__(self):
106106

107107
classDocumentType(Node):
108108
type=3
109-
def__init__(self,name):
109+
def__init__(self,name,publicId,systemId):
110110
Node.__init__(self,name)
111-
self.publicId=u""
112-
self.systemId=u""
111+
self.publicId=publicId
112+
self.systemId=systemId
113113

114114
def__unicode__(self):
115115
ifself.publicIdorself.systemId:
116-
publicId=self.publicIdor""
117-
systemId=self.systemIdor""
118-
return"""<!DOCTYPE %s PUBLIC "%s" "%s">"""%(
119-
self.name,publicId,systemId)
116+
return"""<!DOCTYPE %s "%s" "%s">"""%(
117+
self.name,self.publicId,self.systemId)
120118

121119
else:
122120
returnu"<!DOCTYPE %s>"%self.name

‎src/html5lib/treebuilders/soup.py‎

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,11 @@ def documentClass(self):
104104
returnElement(self.soup,self.soup)
105105

106106
definsertDoctype(self,name,publicId,systemId):
107-
ifpublicIdorsystemId:
108-
publicId=publicIdor""
109-
systemId=systemIdor""
107+
ifpublicId:
110108
self.soup.insert(0,Declaration("%s PUBLIC\"%s\"\"%s\""%(name,publicId,systemId)))
109+
elifsystemId:
110+
self.soup.insert(0,Declaration("%s SYSTEM\"%s\""%
111+
(name,systemId)))
111112
else:
112113
self.soup.insert(0,Declaration(name))
113114

@@ -135,10 +136,25 @@ def getFragment(self):
135136
return_base.TreeBuilder.getFragment(self).element
136137

137138
deftestSerializer(element):
139+
importre
138140
rv= []
139141
defserializeElement(element,indent=0):
140142
ifisinstance(element,Declaration):
141-
rv.append("|%s<!DOCTYPE %s>"%(' '*indent,element.string))
143+
doctype_regexp=r'(?P<name>[^\s]*)( PUBLIC "(?P<publicId>.*)" "(?P<systemId1>.*)"| SYSTEM "(?P<systemId2>.*)")?'
144+
m=re.compile(doctype_regexp).match(element.string)
145+
assertmisnotNone,"DOCTYPE did not match expected format"
146+
name=m.group('name')
147+
publicId=m.group('publicId')
148+
ifpublicIdisnotNone:
149+
systemId=m.group('systemId1')
150+
else:
151+
systemId=m.group('systemId2')
152+
153+
ifpublicIdisnotNoneorsystemIdisnotNone:
154+
rv.append("""|%s<!DOCTYPE %s "%s" "%s">"""%
155+
(' '*indent,name,publicIdor"",systemIdor""))
156+
else:
157+
rv.append("|%s<!DOCTYPE %s>"%(' '*indent,name))
142158

143159
elifisinstance(element,BeautifulSoup):
144160
ifelement.name=="[document_fragment]":

‎src/html5lib/treewalkers/etree.py‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ def getNodeDetails(self, node):
5252
return (_base.DOCUMENT,)
5353

5454
elifnode.tag=="<!DOCTYPE>":
55-
return_base.DOCTYPE,node.text
55+
return (_base.DOCTYPE,node.text,
56+
node.get("publicId"),node.get("systemId"))
5657

5758
eliftype(node.tag)==type(ElementTree.Comment):
5859
return_base.COMMENT,node.text

‎src/html5lib/treewalkers/lxmletree.py‎

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ def __getitem__(self, key):
3333
defgetnext(self):
3434
returnNone
3535

36+
def__len__(self):
37+
return1
38+
3639
classDoctype(object):
3740
def__init__(self,root_node,name,public_id,system_id):
3841
self.root_node=root_node
@@ -91,6 +94,9 @@ def getparent(self):
9194
def__str__(self):
9295
returnstr(self.obj)
9396

97+
def__len__(self):
98+
returnlen(self.obj)
99+
94100

95101
classTreeWalker(_base.NonRecursiveTreeWalker):
96102
def__init__(self,tree):
@@ -119,12 +125,12 @@ def getNodeDetails(self, node):
119125

120126
else:
121127
#This is assumed to be an ordinary element
122-
return_base.ELEMENT,node.tag,node.attrib.items(),bool(node)ornode.text
128+
return_base.ELEMENT,node.tag,node.attrib.items(),len(node)>0ornode.text
123129

124130
defgetFirstChild(self,node):
125131
assertnotisinstance(node,tuple),_("Text nodes have no children")
126132

127-
assertbool(node)ornode.text,"Node has no children"
133+
assertlen(node)ornode.text,"Node has no children"
128134
ifnode.text:
129135
return (node,"text")
130136
else:
@@ -137,7 +143,7 @@ def getNextSibling(self, node):
137143
ifkey=="text":
138144
# XXX: we cannot use a "bool(node) and node[0] or None" construct here
139145
# because node[0] might evaluate to False if it has no child element
140-
ifbool(node):
146+
iflen(node):
141147
returnnode[0]
142148
else:
143149
returnNone

‎src/html5lib/treewalkers/soup.py‎

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
importre
12
importgettext
23
_=gettext.gettext
34

@@ -6,13 +7,28 @@
67
import_base
78

89
classTreeWalker(_base.NonRecursiveTreeWalker):
10+
doctype_regexp=re.compile(
11+
r'(?P<name>[^\s]*)(\s*PUBLIC\s*"(?P<publicId>.*)"\s*"(?P<systemId1>.*)"|\s*SYSTEM\s*"(?P<systemId2>.*)")?')
912
defgetNodeDetails(self,node):
1013
ifisinstance(node,BeautifulSoup):# Document or DocumentFragment
1114
return (_base.DOCUMENT,)
1215

1316
elifisinstance(node,Declaration):# DocumentType
1417
#Slice needed to remove markup added during unicode conversion
15-
return_base.DOCTYPE,unicode(node.string)[2:-1]
18+
m=self.doctype_regexp.match(unicode(node.string)[2:-1])
19+
#This regexp approach seems wrong and fragile
20+
#but beautiful soup stores the doctype as a single thing and we want the seperate bits
21+
#It should work as long as the tree is created by html5lib itself but may be wrong if it's
22+
#been modified at all
23+
#We could just feed to it a html5lib tokenizer, I guess...
24+
assertmisnotNone,"DOCTYPE did not match expected format"
25+
name=m.group('name')
26+
publicId=m.group('publicId')
27+
ifpublicIdisnotNone:
28+
systemId=m.group('systemId1')
29+
else:
30+
systemId=m.group('systemId2')
31+
return_base.DOCTYPE,name,publicIdor"",systemIdor""
1632

1733
elifisinstance(node,Comment):
1834
return_base.COMMENT,unicode(node.string)[4:-3]

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp