@@ -298,6 +298,27 @@ def startTagHtml(self, name, attributes):
298298def processEndTag (self ,name ):
299299self .endTagHandler [name ](name )
300300
301+ def parseRCDataCData (self ,name ,attributes ,contentType ):
302+ """Generic (R)CDATA Parsing algorithm
303+ contentType - RCDATA or CDATA
304+ """
305+ assert contentType in ("CDATA" ,"RCDATA" )
306+ element = self .tree .insertElement (name ,attributes )
307+ self .parser .tokenizer .contentModelFlag = contentModelFlags [contentType ]
308+ for token in self .parser .tokenizer :
309+ if token ["type" ]in ("Characters" ,"SpaceCharacters" ):
310+ self .tree .insertText (token ["data" ])
311+ elif token ["type" ]== "ParseError" :
312+ self .parser .parseError (token ["data" ],token .get ("datavars" , {}))
313+ else :
314+ assert self .parser .tokenizer .contentModelFlag == contentModelFlags ["PCDATA" ]
315+ assert token ["type" ]== "EndTag" and token ["name" ]== name ,repr (token )
316+ assert self .tree .openElements .pop ()== element
317+ return
318+ #Otherwise we hit EOF
319+ assert self .tree .openElements .pop ()== element
320+ self .parser .parseError ("expected-closing-tag-but-got-eof" )
321+
301322
302323class InitialPhase (Phase ):
303324# This phase deals with error handling as well which is currently not
@@ -549,10 +570,6 @@ def appendToHead(self, element):
549570
550571# the real thing
551572def processEOF (self ):
552- if self .tree .openElements [- 1 ].name in ("title" ,"style" ,"script" ,"noscript" ):
553- self .parser .parseError ("expected-named-closing-tag-but-got-eof" ,
554- {"name" :self .tree .openElements [- 1 ].name })
555- self .tree .openElements .pop ()
556573self .anythingElse ()
557574self .parser .phase .processEOF ()
558575
@@ -568,44 +585,18 @@ def startTagHead(self, name, attributes):
568585self .parser .parseError ("two-heads-are-not-better-than-one" )
569586
570587def startTagTitle (self ,name ,attributes ):
571- if self .tree .headPointer is not None and self .parser .phase == self .parser .phases ["inHead" ]:
572- element = self .tree .createElement (name ,attributes )
573- self .appendToHead (element )
574- self .tree .openElements .append (element )
575- else :
576- self .tree .insertElement (name ,attributes )
577- self .parser .tokenizer .contentModelFlag = contentModelFlags ["RCDATA" ]
588+ self .parseRCDataCData (name ,attributes ,"RCDATA" )
578589
579590def startTagStyle (self ,name ,attributes ):
580- if self .tree .headPointer is not None and self .parser .phase == self .parser .phases ["inHead" ]:
581- element = self .tree .createElement (name ,attributes )
582- self .appendToHead (element )
583- self .tree .openElements .append (element )
584- else :
585- self .tree .insertElement (name ,attributes )
586- self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
591+ self .parseRCDataCData (name ,attributes ,"CDATA" )
587592
588593def startTagNoScript (self ,name ,attributes ):
589- # XXX Need to decide whether to implement the scripting disabled case.
590- if self .tree .headPointer is not None and self .parser .phase == self .parser .phases ["inHead" ]:
591- element = self .tree .createElement (name ,attributes )
592- self .appendToHead (element )
593- self .tree .openElements .append (element )
594- else :
595- self .tree .insertElement (name ,attributes )
596- self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
594+ #Need to decide whether to implement the scripting-disabled case
595+ self .parseRCDataCData (name ,attributes ,"CDATA" )
597596
598597def startTagScript (self ,name ,attributes ):
599- #XXX Inner HTML case may be wrong
600- element = None
601- if (self .tree .headPointer is not None and self .parser .phase == self .parser .phases ["inHead" ]):
602- element = self .tree .createElement (name ,attributes )
603- self .appendToHead (element )
604- self .tree .openElements .append (element )
605- else :
606- element = self .tree .insertElement (name ,attributes )
607- element ._flags .append ("parser-inserted" )
608- self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
598+ #I think this is equivalent to the CDATA stuff since we don't execute script
599+ self .parseRCDataCData (name ,attributes ,"CDATA" )
609600
610601def startTagBaseLinkMeta (self ,name ,attributes ):
611602if (self .tree .headPointer is not None and self .parser .phase == self .parser .phases ["inHead" ]):
@@ -620,10 +611,8 @@ def startTagOther(self, name, attributes):
620611self .parser .phase .processStartTag (name ,attributes )
621612
622613def endTagHead (self ,name ):
623- if self .tree .openElements [- 1 ].name == "head" :
624- self .tree .openElements .pop ()
625- else :
626- self .parser .parseError ("unexpected-end-tag" , {"name" :"head" })
614+ assert self .tree .openElements [- 1 ].name == "head"
615+ self .tree .openElements .pop ()
627616self .parser .phase = self .parser .phases ["afterHead" ]
628617
629618def endTagImplyAfterHead (self ,name ):
@@ -640,10 +629,8 @@ def endTagOther(self, name):
640629self .parser .parseError ("unexpected-end-tag" , {"name" :name })
641630
642631def anythingElse (self ):
643- if self .tree .openElements [- 1 ].name == "head" :
644- self .endTagHead ("head" )
645- else :
646- self .parser .phase = self .parser .phases ["afterHead" ]
632+ self .endTagHead ("head" )
633+
647634
648635# XXX If we implement a parser for which scripting is disabled we need to
649636# implement this phase.
@@ -682,8 +669,10 @@ def startTagFrameset(self, name, attributes):
682669def startTagFromHead (self ,name ,attributes ):
683670self .parser .parseError ("unexpected-start-tag-out-of-my-head" ,
684671 {"name" :name })
685- self .parser .phase = self .parser .phases ["inHead" ]
686- self .parser .phase .processStartTag (name ,attributes )
672+ self .tree .openElements .append (self .tree .headPointer )
673+ self .parser .phases ["inHead" ].processStartTag (name ,attributes )
674+ node = self .tree .openElements .pop ()
675+ assert node is self .tree .headPointer ,"Node name is %s, expected head" % node .name
687676
688677def startTagOther (self ,name ,attributes ):
689678self .anythingElse ()
@@ -932,8 +921,7 @@ def startTagAppletMarqueeObject(self, name, attributes):
932921
933922def startTagXmp (self ,name ,attributes ):
934923self .tree .reconstructActiveFormattingElements ()
935- self .tree .insertElement (name ,attributes )
936- self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
924+ self .parseRCDataCData (name ,attributes ,"CDATA" )
937925
938926def startTagTable (self ,name ,attributes ):
939927if self .tree .elementInScope ("p" ):
@@ -993,8 +981,7 @@ def startTagTextarea(self, name, attributes):
993981
994982def startTagCdata (self ,name ,attributes ):
995983"""iframe, noembed noframes, noscript(if scripting enabled)"""
996- self .tree .insertElement (name ,attributes )
997- self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
984+ self .parseRCDataCData (name ,attributes ,"CDATA" )
998985
999986def startTagSelect (self ,name ,attributes ):
1000987self .tree .reconstructActiveFormattingElements ()