Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit566ca68

Browse files
committed
Move RCData parser to where it should have been to begin with in order to fix last lxml unit test
--HG--extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401151
1 parentabfca04 commit566ca68

File tree

3 files changed

+47
-38
lines changed

3 files changed

+47
-38
lines changed

‎src/html5lib/html5parser.py

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def __init__(self, strict = False, tree=simpletree.TreeBuilder,
7575
"afterBody":AfterBodyPhase(self,self.tree),
7676
"inFrameset":InFramesetPhase(self,self.tree),
7777
"afterFrameset":AfterFramesetPhase(self,self.tree),
78-
"trailingEnd":TrailingEndPhase(self,self.tree)
78+
"trailingEnd":TrailingEndPhase(self,self.tree),
7979
# XXX after after body
8080
# XXX after after frameset
8181
# XXX trailingEnd is gone
@@ -117,10 +117,11 @@ def _parse(self, stream, innerHTML=False, container="div",
117117
# relevant ... need others too
118118
self.lastPhase=None
119119

120+
self.beforeRCDataPhase=None
121+
120122
# XXX This is temporary for the moment so there isn't any other
121123
# changes needed for the parser to work with the iterable tokenizer
122-
fortokeninself.tokenizer:
123-
token=self.normalizeToken(token)
124+
fortokeninself.normalizedTokens():
124125
type=token["type"]
125126
method=getattr(self.phase,"process%s"%type,None)
126127
iftypein ("Characters","SpaceCharacters","Comment"):
@@ -137,6 +138,10 @@ def _parse(self, stream, innerHTML=False, container="div",
137138
# When the loop finishes it's EOF
138139
self.phase.processEOF()
139140

141+
defnormalizedTokens(self):
142+
fortokeninself.tokenizer:
143+
yieldself.normalizeToken(token)
144+
140145
defparse(self,stream,encoding=None,parseMeta=True,useChardet=True):
141146
"""Parse a HTML document into a well-formed tree
142147
@@ -238,6 +243,29 @@ def resetInsertionMode(self):
238243
self.phase=self.phases["inBody"]
239244
break
240245

246+
defparseRCDataCData(self,name,attributes,contentType):
247+
"""Generic (R)CDATA Parsing algorithm
248+
contentType - RCDATA or CDATA
249+
"""
250+
assertcontentTypein ("CDATA","RCDATA")
251+
252+
element=self.tree.insertElement(name,attributes)
253+
self.tokenizer.contentModelFlag=contentModelFlags[contentType]
254+
255+
fortokeninself.normalizedTokens():
256+
iftoken["type"]in ("Characters","SpaceCharacters"):
257+
self.tree.insertText(token["data"])
258+
eliftoken["type"]=="ParseError":
259+
self.parseError(token["data"],token.get("datavars", {}))
260+
else:
261+
assertself.tokenizer.contentModelFlag==contentModelFlags["PCDATA"]
262+
asserttoken["type"]=="EndTag"andtoken["name"]==name,repr(token)
263+
assertself.tree.openElements.pop()==element
264+
return
265+
#Otherwise we hit EOF
266+
assertself.tree.openElements.pop()==element
267+
self.parseError("expected-closing-tag-but-got-eof")
268+
241269
classPhase(object):
242270
"""Base class for helper object that implements each phase of processing
243271
"""
@@ -298,29 +326,6 @@ def startTagHtml(self, name, attributes):
298326
defprocessEndTag(self,name):
299327
self.endTagHandler[name](name)
300328

301-
defparseRCDataCData(self,name,attributes,contentType):
302-
"""Generic (R)CDATA Parsing algorithm
303-
contentType - RCDATA or CDATA
304-
"""
305-
assertcontentTypein ("CDATA","RCDATA")
306-
element=self.tree.insertElement(name,attributes)
307-
self.parser.tokenizer.contentModelFlag=contentModelFlags[contentType]
308-
fortokeninself.parser.tokenizer:
309-
token=self.parser.normalizeToken(token)
310-
iftoken["type"]in ("Characters","SpaceCharacters"):
311-
self.tree.insertText(token["data"])
312-
eliftoken["type"]=="ParseError":
313-
self.parser.parseError(token["data"],token.get("datavars", {}))
314-
else:
315-
assertself.parser.tokenizer.contentModelFlag==contentModelFlags["PCDATA"]
316-
asserttoken["type"]=="EndTag"andtoken["name"]==name,repr(token)
317-
assertself.tree.openElements.pop()==element
318-
return
319-
#Otherwise we hit EOF
320-
assertself.tree.openElements.pop()==element
321-
self.parser.parseError("expected-closing-tag-but-got-eof")
322-
323-
324329
classInitialPhase(Phase):
325330
# This phase deals with error handling as well which is currently not
326331
# covered in the specification. The error handling is typically known as
@@ -586,18 +591,18 @@ def startTagHead(self, name, attributes):
586591
self.parser.parseError("two-heads-are-not-better-than-one")
587592

588593
defstartTagTitle(self,name,attributes):
589-
self.parseRCDataCData(name,attributes,"RCDATA")
594+
self.parser.parseRCDataCData(name,attributes,"RCDATA")
590595

591596
defstartTagStyle(self,name,attributes):
592-
self.parseRCDataCData(name,attributes,"CDATA")
597+
self.parser.parseRCDataCData(name,attributes,"CDATA")
593598

594599
defstartTagNoScript(self,name,attributes):
595600
#Need to decide whether to implement the scripting-disabled case
596-
self.parseRCDataCData(name,attributes,"CDATA")
601+
self.parser.parseRCDataCData(name,attributes,"CDATA")
597602

598603
defstartTagScript(self,name,attributes):
599604
#I think this is equivalent to the CDATA stuff since we don't execute script
600-
self.parseRCDataCData(name,attributes,"CDATA")
605+
self.parser.parseRCDataCData(name,attributes,"CDATA")
601606

602607
defstartTagBaseLinkMeta(self,name,attributes):
603608
if (self.tree.headPointerisnotNoneandself.parser.phase==self.parser.phases["inHead"]):
@@ -612,7 +617,7 @@ def startTagOther(self, name, attributes):
612617
self.parser.phase.processStartTag(name,attributes)
613618

614619
defendTagHead(self,name):
615-
assertself.tree.openElements[-1].name=="head"
620+
assertself.tree.openElements[-1].name=="head","Expected head got %s"%self.tree.openElements[-1].name
616621
self.tree.openElements.pop()
617622
self.parser.phase=self.parser.phases["afterHead"]
618623

@@ -922,7 +927,7 @@ def startTagAppletMarqueeObject(self, name, attributes):
922927

923928
defstartTagXmp(self,name,attributes):
924929
self.tree.reconstructActiveFormattingElements()
925-
self.parseRCDataCData(name,attributes,"CDATA")
930+
self.parser.parseRCDataCData(name,attributes,"CDATA")
926931

927932
defstartTagTable(self,name,attributes):
928933
ifself.tree.elementInScope("p"):
@@ -982,7 +987,7 @@ def startTagTextarea(self, name, attributes):
982987

983988
defstartTagCdata(self,name,attributes):
984989
"""iframe, noembed noframes, noscript(if scripting enabled)"""
985-
self.parseRCDataCData(name,attributes,"CDATA")
990+
self.parser.parseRCDataCData(name,attributes,"CDATA")
986991

987992
defstartTagSelect(self,name,attributes):
988993
self.tree.reconstructActiveFormattingElements()

‎src/html5lib/liberalxmlparser.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ def _parse(self, stream, innerHTML=False, container="div", encoding=None,
6363
encoding,lowercaseElementName=False,
6464
lowercaseAttrName=False)
6565

66+
defparseRCDataCData(self,name,attributes,contentType):
67+
self.tree.insertElement(name,attributes)
68+
6669
classXHTMLParser(XMLParser):
6770
""" liberal XMTHML parser """
6871

‎src/html5lib/tokenizer.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -315,10 +315,11 @@ def dataState(self):
315315
self.escapeFlagand"".join(self.lastFourChars)=="<!--":
316316
self.escapeFlag=True
317317
self.tokenQueue.append({"type":"Characters","data":data})
318-
elifdata=="<"and (self.contentModelFlag==\
319-
contentModelFlags["PCDATA"]or (self.contentModelFlagin
320-
(contentModelFlags["CDATA"],contentModelFlags["RCDATA"])and\
321-
self.escapeFlag==False)):
318+
elif (data=="<"and (self.contentModelFlag==contentModelFlags["PCDATA"]
319+
or (self.contentModelFlagin
320+
(contentModelFlags["CDATA"],
321+
contentModelFlags["RCDATA"])and
322+
self.escapeFlag==False))):
322323
self.state=self.states["tagOpen"]
323324
elifdata==">"andself.contentModelFlagin\
324325
(contentModelFlags["CDATA"],contentModelFlags["RCDATA"])and\
@@ -343,7 +344,7 @@ def dataState(self):
343344
self.lastFourChars+=chars[-4:]
344345
self.lastFourChars=self.lastFourChars[-4:]
345346
returnTrue
346-
347+
347348
defentityDataState(self):
348349
entity=self.consumeEntity()
349350
ifentity:

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp