@@ -150,6 +150,19 @@ def reset(self):
150150self .beforeRCDataPhase = None
151151
152152self .framesetOK = True
153+
154+ def isHTMLIntegrationPoint (self ,element ):
155+ if (element .name == "annotation-xml" and
156+ element .namespace == namespaces ["mathml" ]):
157+ return ("encoding" in element .attributes and
158+ element .attributes ["encoding" ].translate (
159+ asciiUpper2Lower )in
160+ ("text/html" ,"application/xhtml+xml" ))
161+ else :
162+ return (element .namespace ,element .name )in htmlIntegrationPointElements
163+
164+ def isMathMLTextIntegrationPoint (self ,element ):
165+ return (element .namespace ,element .name )in mathmlTextIntegrationPointElements
153166
154167def mainLoop (self ):
155168CharactersToken = tokenTypes ["Characters" ]
@@ -158,27 +171,48 @@ def mainLoop(self):
158171EndTagToken = tokenTypes ["EndTag" ]
159172CommentToken = tokenTypes ["Comment" ]
160173DoctypeToken = tokenTypes ["Doctype" ]
161-
174+ ParseErrorToken = tokenTypes [ "ParseError" ]
162175
163176for token in self .normalizedTokens ():
164177new_token = token
165178while new_token is not None :
179+ currentNode = self .tree .openElements [- 1 ]if self .tree .openElements else None
180+ currentNodeNamespace = currentNode .namespace if currentNode else None
181+ currentNodeName = currentNode .name if currentNode else None
182+
166183type = new_token ["type" ]
167- if type == CharactersToken :
168- new_token = self .phase .processCharacters (new_token )
169- elif type == SpaceCharactersToken :
170- new_token = self .phase .processSpaceCharacters (new_token )
171- elif type == StartTagToken :
172- new_token = self .phase .processStartTag (new_token )
173- elif type == EndTagToken :
174- new_token = self .phase .processEndTag (new_token )
175- elif type == CommentToken :
176- new_token = self .phase .processComment (new_token )
177- elif type == DoctypeToken :
178- new_token = self .phase .processDoctype (new_token )
179- else :
184+
185+ if type == ParseErrorToken :
180186self .parseError (new_token ["data" ],new_token .get ("datavars" , {}))
181187new_token = None
188+ else :
189+ if (len (self .tree .openElements )== 0 or
190+ currentNodeNamespace == self .tree .defaultNamespace or
191+ (self .isMathMLTextIntegrationPoint (currentNode )and
192+ ((type == StartTagToken and
193+ token ["name" ]not in frozenset (["mglyph" ,"malignmark" ]))or
194+ type in (CharactersToken ,SpaceCharactersToken )))or
195+ (currentNodeNamespace == namespaces ["mathml" ]and
196+ currentNodeName == "annotation-xml" and
197+ token ["name" ]== "svg" )or
198+ (self .isHTMLIntegrationPoint (currentNode )and
199+ type in (StartTagToken ,CharactersToken ,SpaceCharactersToken ))):
200+ phase = self .phase
201+ else :
202+ phase = self .phases ["inForeignContent" ]
203+
204+ if type == CharactersToken :
205+ new_token = phase .processCharacters (new_token )
206+ elif type == SpaceCharactersToken :
207+ new_token = phase .processSpaceCharacters (new_token )
208+ elif type == StartTagToken :
209+ new_token = phase .processStartTag (new_token )
210+ elif type == EndTagToken :
211+ new_token = phase .processEndTag (new_token )
212+ elif type == CommentToken :
213+ new_token = phase .processComment (new_token )
214+ elif type == DoctypeToken :
215+ new_token = phase .processDoctype (new_token )
182216
183217if (type == StartTagToken and token ["selfClosing" ]
184218and not token ["selfClosingAcknowledged" ]):
@@ -379,12 +413,12 @@ def resetInsertionMode(self):
379413if nodeName in ("select" ,"colgroup" ,"head" ,"html" ):
380414assert self .innerHTML
381415
416+ if not last and node .namespace != self .tree .defaultNamespace :
417+ continue
418+
382419if nodeName in newModes :
383420new_phase = self .phases [newModes [nodeName ]]
384421break
385- elif node .namespace in (namespaces ["mathml" ],namespaces ["svg" ]):
386- new_phase = self .phases ["inForeignContent" ]
387- break
388422elif last :
389423new_phase = self .phases ["inBody" ]
390424break
@@ -419,7 +453,6 @@ def wrapped(self, *args, **kwargs):
419453try :
420454info = {"type" :type_names [token ['type' ]]}
421455except :
422- print token
423456raise
424457if token ['type' ]in constants .tagTokenTypes :
425458info ["name" ]= token ['name' ]
@@ -1243,7 +1276,6 @@ def startTagMath(self, token):
12431276self .tree .insertElement (token )
12441277#Need to get the parse error right for the case where the token
12451278#has a namespace not equal to the xmlns attribute
1246- self .parser .phase = self .parser .phases ["inForeignContent" ]
12471279if token ["selfClosing" ]:
12481280self .tree .openElements .pop ()
12491281token ["selfClosingAcknowledged" ]= True
@@ -1256,7 +1288,6 @@ def startTagSvg(self, token):
12561288self .tree .insertElement (token )
12571289#Need to get the parse error right for the case where the token
12581290#has a namespace not equal to the xmlns attribute
1259- self .parser .phase = self .parser .phases ["inForeignContent" ]
12601291if token ["selfClosing" ]:
12611292self .tree .openElements .pop ()
12621293token ["selfClosingAcknowledged" ]= True
@@ -1741,7 +1772,7 @@ def processSpaceCharacters(self, token):
17411772self .characterTokens .append (token )
17421773# assert False
17431774
1744- def processStartTag (self ,token ):
1775+ def processStartTag (self ,token ):
17451776self .flushCharacters ()
17461777self .parser .phase = self .originalPhase
17471778return token
@@ -2298,7 +2329,7 @@ def endTagOther(self, token):
22982329class InForeignContentPhase (Phase ):
22992330breakoutElements = frozenset (["b" ,"big" ,"blockquote" ,"body" ,"br" ,
23002331"center" ,"code" ,"dd" ,"div" ,"dl" ,"dt" ,
2301- "em" ,"embed" ,"font" , " h1" ,"h2" ,"h3" ,
2332+ "em" ,"embed" ,"h1" ,"h2" ,"h3" ,
23022333"h4" ,"h5" ,"h6" ,"head" ,"hr" ,"i" ,"img" ,
23032334"li" ,"listing" ,"menu" ,"meta" ,"nobr" ,
23042335"ol" ,"p" ,"pre" ,"ruby" ,"s" ,"small" ,
@@ -2307,19 +2338,6 @@ class InForeignContentPhase(Phase):
23072338def __init__ (self ,parser ,tree ):
23082339Phase .__init__ (self ,parser ,tree )
23092340
2310- def isHTMLIntegrationPoint (self ,element ):
2311- if (element .name == "annotation-xml" and
2312- element .namespace == namespaces ["mathml" ]):
2313- return ("encoding" in element .attributes and
2314- element .attributes ["encoding" ].translate (
2315- asciiUpper2Lower )in
2316- ("text/html" ,"application/xhtml+xml" ))
2317- else :
2318- return (element .namespace ,element .name )in htmlIntegrationPointElements
2319-
2320- def isMathMLTextIntegrationPoint (self ,element ):
2321- return (element .namespace ,element .name )in mathmlTextIntegrationPointElements
2322-
23232341def adjustSVGTagNames (self ,token ):
23242342replacements = {u"altglyph" :u"altGlyph" ,
23252343u"altglyphdef" :u"altGlyphDef" ,
@@ -2362,48 +2380,25 @@ def adjustSVGTagNames(self, token):
23622380token ["name" ]= replacements [token ["name" ]]
23632381
23642382def processCharacters (self ,token ):
2365- if (self .tree .openElements [- 1 ].namespace == self .tree .defaultNamespace or
2366- self .isHTMLIntegrationPoint (self .tree .openElements [- 1 ])):
2367- new_token = self .parser .phases ["inBody" ].processCharacters (token )
2368- self .parser .resetInsertionMode ()
2369- return new_token
2370- elif token ["data" ]== u"\u0000 " :
2383+ if token ["data" ]== u"\u0000 " :
23712384token ["data" ]= u"\uFFFD "
23722385elif (self .parser .framesetOK and
23732386any (char not in spaceCharacters for char in token ["data" ])):
23742387self .parser .framesetOK = False
23752388Phase .processCharacters (self ,token )
23762389
2377- def processEOF (self ):
2378- reprocess = self .parser .phases ["inBody" ].processEOF ()
2379- self .parser .resetInsertionMode ()
2380- return reprocess
2381-
23822390def processStartTag (self ,token ):
23832391currentNode = self .tree .openElements [- 1 ]
2384- currentNodeNamespace = currentNode .namespace
2385- currentNodeName = currentNode .name
2386- if (currentNodeNamespace == self .tree .defaultNamespace or
2387- (self .isMathMLTextIntegrationPoint (currentNode )and
2388- token ["name" ]not in frozenset (["mglyph" ,"malignmark" ]))or
2389- (currentNodeNamespace == namespaces ["mathml" ]and
2390- currentNodeName == "annotation-xml" and
2391- token ["name" ]== "svg" )or
2392- self .isHTMLIntegrationPoint (currentNode )):
2393-
2394- new_token = self .parser .phases ["inBody" ].processStartTag (token )
2395- self .parser .resetInsertionMode ()
2396- return new_token
2397-
2398- elif token ["name" ]in self .breakoutElements :
2392+ if (token ["name" ]in self .breakoutElements or
2393+ (token ["name" ]== "font" and
2394+ set (token ["data" ].keys ())| set ("color" ,"face" ,"size" ))):
23992395self .parser .parseError ("unexpected-html-element-in-foreign-content" ,
24002396token ["name" ])
24012397while (self .tree .openElements [- 1 ].namespace !=
24022398self .tree .defaultNamespace and
2403- not self .isHTMLIntegrationPoint (self .tree .openElements [- 1 ])and
2404- not self .isMathMLTextIntegrationPoint (self .tree .openElements [- 1 ])):
2399+ not self .parser . isHTMLIntegrationPoint (self .tree .openElements [- 1 ])and
2400+ not self .parser . isMathMLTextIntegrationPoint (self .tree .openElements [- 1 ])):
24052401self .tree .openElements .pop ()
2406- self .parser .resetInsertionMode ()
24072402return token
24082403
24092404else :
@@ -2420,33 +2415,29 @@ def processStartTag(self, token):
24202415token ["selfClosingAcknowledged" ]= True
24212416
24222417def processEndTag (self ,token ):
2423- if self .tree .openElements [- 1 ].namespace == self .tree .defaultNamespace :
2424- new_token = self .parser .phases ["inBody" ].processEndTag (token )
2425- self .parser .resetInsertionMode ()
2426- return new_token
2427- else :
2428- nodeIndex = len (self .tree .openElements )- 1
2429- node = self .tree .openElements [- 1 ]
2430- if node .name != token ["name" ]:
2431- self .parser .parseError ("unexpected-end-tag" ,token ["name" ])
2432-
2433- while True :
2434- if node .name .translate (asciiUpper2Lower )== token ["name" ]:
2435- while self .tree .openElements .pop ()!= node :
2436- assert self .tree .openElements
2437- new_token = None
2438- break
2439- nodeIndex -= 1
2418+ nodeIndex = len (self .tree .openElements )- 1
2419+ node = self .tree .openElements [- 1 ]
2420+ if node .name != token ["name" ]:
2421+ self .parser .parseError ("unexpected-end-tag" ,token ["name" ])
2422+
2423+ while True :
2424+ if node .name .translate (asciiUpper2Lower )== token ["name" ]:
2425+ if self .parser .phase == self .parser .phases ["inTableText" ]:
2426+ self .parser .phase .flushCharacters ()
2427+ self .parser .phase = self .parser .phase .originalPhase
2428+ while self .tree .openElements .pop ()!= node :
2429+ assert self .tree .openElements
2430+ new_token = None
2431+ break
2432+ nodeIndex -= 1
24402433
2441- node = self .tree .openElements [nodeIndex ]
2442- if node .namespace != self .tree .defaultNamespace :
2443- continue
2444- else :
2445- new_token = self .parser .phases ["inBody" ].processEndTag (token )
2446- break
2447- if self .parser .phase == self :
2448- self .parser .resetInsertionMode ()
2449- return new_token
2434+ node = self .tree .openElements [nodeIndex ]
2435+ if node .namespace != self .tree .defaultNamespace :
2436+ continue
2437+ else :
2438+ new_token = self .parser .phase .processEndTag (token )
2439+ break
2440+ return new_token
24502441
24512442
24522443class AfterBodyPhase (Phase ):