@@ -58,7 +58,7 @@ def __init__(self, strict = False, tree=simpletree.TreeBuilder,
5858
5959self .phases = {
6060"initial" :InitialPhase (self ,self .tree ),
61- "rootElement " :RootElementPhase (self ,self .tree ),
61+ "beforeHtml " :BeforeHtmlPhase (self ,self .tree ),
6262"beforeHead" :BeforeHeadPhase (self ,self .tree ),
6363"inHead" :InHeadPhase (self ,self .tree ),
6464# XXX "inHeadNoscript": InHeadNoScriptPhase(self, self.tree),
@@ -71,10 +71,14 @@ def __init__(self, strict = False, tree=simpletree.TreeBuilder,
7171"inRow" :InRowPhase (self ,self .tree ),
7272"inCell" :InCellPhase (self ,self .tree ),
7373"inSelect" :InSelectPhase (self ,self .tree ),
74+ # XXX inSelectInTable
7475"afterBody" :AfterBodyPhase (self ,self .tree ),
7576"inFrameset" :InFramesetPhase (self ,self .tree ),
7677"afterFrameset" :AfterFramesetPhase (self ,self .tree ),
7778"trailingEnd" :TrailingEndPhase (self ,self .tree )
79+ # XXX after after body
80+ # XXX after after frameset
81+ # XXX trailingEnd is gone
7882 }
7983
8084def _parse (self ,stream ,innerHTML = False ,container = "div" ,
@@ -101,7 +105,7 @@ def _parse(self, stream, innerHTML=False, container="div",
101105# contentModelFlag already is PCDATA
102106#self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["PCDATA"]
103107pass
104- self .phase = self .phases ["rootElement " ]
108+ self .phase = self .phases ["beforeHtml " ]
105109self .phase .insertHtmlElement ()
106110self .resetInsertionMode ()
107111else :
@@ -300,7 +304,7 @@ class InitialPhase(Phase):
300304# this.
301305def processEOF (self ):
302306self .parser .parseError ("expected-doctype-but-got-eof" )
303- self .parser .phase = self .parser .phases ["rootElement " ]
307+ self .parser .phase = self .parser .phases ["beforeHtml " ]
304308self .parser .phase .processEOF ()
305309
306310def processComment (self ,data ):
@@ -401,30 +405,30 @@ def processDoctype(self, name, publicId, systemId, correct):
401405#XXX quirks mode
402406pass
403407
404- self .parser .phase = self .parser .phases ["rootElement " ]
408+ self .parser .phase = self .parser .phases ["beforeHtml " ]
405409
406410def processSpaceCharacters (self ,data ):
407411pass
408412
409413def processCharacters (self ,data ):
410414self .parser .parseError ("expected-doctype-but-got-chars" )
411- self .parser .phase = self .parser .phases ["rootElement " ]
415+ self .parser .phase = self .parser .phases ["beforeHtml " ]
412416self .parser .phase .processCharacters (data )
413417
414418def processStartTag (self ,name ,attributes ):
415419self .parser .parseError ("expected-doctype-but-got-start-tag" ,
416420 {"name" :name })
417- self .parser .phase = self .parser .phases ["rootElement " ]
421+ self .parser .phase = self .parser .phases ["beforeHtml " ]
418422self .parser .phase .processStartTag (name ,attributes )
419423
420424def processEndTag (self ,name ):
421425self .parser .parseError ("expected-doctype-but-got-end-tag" ,
422426 {"name" :name })
423- self .parser .phase = self .parser .phases ["rootElement " ]
427+ self .parser .phase = self .parser .phases ["beforeHtml " ]
424428self .parser .phase .processEndTag (name )
425429
426430
427- class RootElementPhase (Phase ):
431+ class BeforeHtmlPhase (Phase ):
428432# helper methods
429433def insertHtmlElement (self ):
430434self .tree .insertRoot ("html" )
@@ -475,6 +479,9 @@ def processEOF(self):
475479self .startTagHead ("head" , {})
476480self .parser .phase .processEOF ()
477481
482+ def processSpaceCharacters (self ,data ):
483+ pass
484+
478485def processCharacters (self ,data ):
479486self .startTagHead ("head" , {})
480487self .parser .phase .processCharacters (data )
@@ -548,33 +555,36 @@ def startTagHead(self, name, attributes):
548555self .parser .parseError ("two-heads-are-not-better-than-one" )
549556
550557def startTagTitle (self ,name ,attributes ):
551- element = self .tree .createElement (name ,attributes )
552- self .appendToHead (element )
553- self .tree .openElements .append (element )
558+ if self .tree .headPointer is not None and \
559+ self .parser .phase == self .parser .phases ["inHead" ]:
560+ element = self .tree .createElement (name ,attributes )
561+ self .appendToHead (element )
562+ self .tree .openElements .append (element )
563+ else :
564+ self .tree .insertElement (name ,attributes )
554565self .parser .tokenizer .contentModelFlag = contentModelFlags ["RCDATA" ]
555566
556567def startTagStyle (self ,name ,attributes ):
557- element = self .tree .createElement (name ,attributes )
558568if self .tree .headPointer is not None and \
559569self .parser .phase == self .parser .phases ["inHead" ]:
570+ element = self .tree .createElement (name ,attributes )
560571self .appendToHead (element )
572+ self .tree .openElements .append (element )
561573else :
562- self .tree .openElements [- 1 ].appendChild (element )
563- self .tree .openElements .append (element )
574+ self .tree .insertElement (name ,attributes )
564575self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
565576
566577def startTagNoScript (self ,name ,attributes ):
567578# XXX Need to decide whether to implement the scripting disabled case.
568- element = self .tree .createElement (name ,attributes )
569579if self .tree .headPointer is not None and \
570580self .parser .phase == self .parser .phases ["inHead" ]:
581+ element = self .tree .createElement (name ,attributes )
571582self .appendToHead (element )
583+ self .tree .openElements .append (element )
572584else :
573- self .tree .openElements [- 1 ].appendChild (element )
574- self .tree .openElements .append (element )
585+ self .tree .insertElement (name ,attributes )
575586self .parser .tokenizer .contentModelFlag = contentModelFlags ["CDATA" ]
576-
577-
587+
578588def startTagScript (self ,name ,attributes ):
579589#XXX Inner HTML case may be wrong
580590element = self .tree .createElement (name ,attributes )
@@ -689,9 +699,8 @@ def __init__(self, parser, tree):
689699
690700self .startTagHandler = utils .MethodDispatcher ([
691701 ("html" ,self .startTagHtml ),
692- (("base" ,"link" ,"meta" ,"script" ,"style" ),
702+ (("base" ,"link" ,"meta" ,"script" ,"style" , "title" ),
693703self .startTagProcessInHead ),
694- ("title" ,self .startTagTitle ),
695704 ("body" ,self .startTagBody ),
696705 (("address" ,"blockquote" ,"center" ,"dir" ,"div" ,"dl" ,
697706"fieldset" ,"listing" ,"menu" ,"ol" ,"p" ,"pre" ,"ul" ),
@@ -705,7 +714,7 @@ def __init__(self, parser, tree):
705714"tt" ,"u" ),self .startTagFormatting ),
706715 ("nobr" ,self .startTagNobr ),
707716 ("button" ,self .startTagButton ),
708- (("marquee" ,"object" ),self .startTagMarqueeObject ),
717+ (("applet" , " marquee" ,"object" ),self .startTagAppletMarqueeObject ),
709718 ("xmp" ,self .startTagXmp ),
710719 ("table" ,self .startTagTable ),
711720 (("area" ,"basefont" ,"bgsound" ,"br" ,"embed" ,"img" ,"param" ,
@@ -736,7 +745,7 @@ def __init__(self, parser, tree):
736745 (headingElements ,self .endTagHeading ),
737746 (("a" ,"b" ,"big" ,"em" ,"font" ,"i" ,"nobr" ,"s" ,"small" ,
738747"strike" ,"strong" ,"tt" ,"u" ),self .endTagFormatting ),
739- (("marquee" ,"object" ,"button" ),self .endTagButtonMarqueeObject ),
748+ (("applet" , " marquee" ,"object" ,"button" ),self .endTagAppletButtonMarqueeObject ),
740749 (("head" ,"frameset" ,"select" ,"optgroup" ,"option" ,"table" ,
741750"caption" ,"colgroup" ,"col" ,"thead" ,"tfoot" ,"tbody" ,"tr" ,
742751"td" ,"th" ),self .endTagMisplaced ),
@@ -759,11 +768,11 @@ def addFormattingElement(self, name, attributes):
759768
760769# the real deal
761770def processSpaceCharactersDropNewline (self ,data ):
762- # Sometimes (start of <pre> and <textarea> blocks) we want to drop
763- # leading newlines
771+ # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
772+ #want to drop leading newlines
764773self .processSpaceCharacters = self .processSpaceCharactersNonPre
765774if (data .startswith ("\n " )and
766- self .tree .openElements [- 1 ].name in ("pre" ,"textarea" )and
775+ self .tree .openElements [- 1 ].name in ("pre" ,"listing" , " textarea" )and
767776not self .tree .openElements [- 1 ].hasContent ()):
768777data = data [1 :]
769778if data :
@@ -785,11 +794,6 @@ def processSpaceCharacters(self, data):
785794def startTagProcessInHead (self ,name ,attributes ):
786795self .parser .phases ["inHead" ].processStartTag (name ,attributes )
787796
788- def startTagTitle (self ,name ,attributes ):
789- self .parser .parseError ("unexpected-start-tag-out-of-my-head" ,
790- {"name" :name })
791- self .parser .phases ["inHead" ].processStartTag (name ,attributes )
792-
793797def startTagBody (self ,name ,attributes ):
794798self .parser .parseError ("unexpected-start-tag" , {"name" :"body" })
795799if (len (self .tree .openElements )== 1
@@ -804,7 +808,7 @@ def startTagCloseP(self, name, attributes):
804808if self .tree .elementInScope ("p" ):
805809self .endTagP ("p" )
806810self .tree .insertElement (name ,attributes )
807- if name == "pre" :
811+ if name in ( "pre" , "listing" ) :
808812self .processSpaceCharacters = self .processSpaceCharactersDropNewline
809813
810814def startTagForm (self ,name ,attributes ):
@@ -902,7 +906,7 @@ def startTagButton(self, name, attributes):
902906self .tree .insertElement (name ,attributes )
903907self .tree .activeFormattingElements .append (Marker )
904908
905- def startTagMarqueeObject (self ,name ,attributes ):
909+ def startTagAppletMarqueeObject (self ,name ,attributes ):
906910self .tree .reconstructActiveFormattingElements ()
907911self .tree .insertElement (name ,attributes )
908912self .tree .activeFormattingElements .append (Marker )
@@ -1201,7 +1205,7 @@ def endTagFormatting(self, name):
12011205self .tree .openElements .insert (
12021206self .tree .openElements .index (furthestBlock )+ 1 ,clone )
12031207
1204- def endTagButtonMarqueeObject (self ,name ):
1208+ def endTagAppletButtonMarqueeObject (self ,name ):
12051209if self .tree .elementInScope (name ):
12061210self .tree .generateImpliedEndTags ()
12071211if self .tree .openElements [- 1 ].name != name :
@@ -1269,12 +1273,15 @@ def __init__(self, parser, tree):
12691273 ("col" ,self .startTagCol ),
12701274 (("tbody" ,"tfoot" ,"thead" ),self .startTagRowGroup ),
12711275 (("td" ,"th" ,"tr" ),self .startTagImplyTbody ),
1272- ("table" ,self .startTagTable )
1276+ ("table" ,self .startTagTable ),
1277+ (("style" ,"script" ),self .startTagStyleScript ),
1278+ ("input" ,self .startTagInput )
12731279 ])
12741280self .startTagHandler .default = self .startTagOther
12751281
12761282self .endTagHandler = utils .MethodDispatcher ([
12771283 ("table" ,self .endTagTable ),
1284+ (("style" ,"script" ),self .endTagStyleScript ),
12781285 (("body" ,"caption" ,"col" ,"colgroup" ,"html" ,"tbody" ,"td" ,
12791286"tfoot" ,"th" ,"thead" ,"tr" ),self .endTagIgnore )
12801287 ])
@@ -1289,14 +1296,30 @@ def clearStackToTableContext(self):
12891296self .tree .openElements .pop ()
12901297# When the current node is <html> it's an innerHTML case
12911298
1299+ def getCurrentTable (self ):
1300+ i = - 1
1301+ while self .tree .openElements [i ].name != "table" :
1302+ i -= 1
1303+ return self .tree .openElements [i ]
1304+
12921305# processing methods
1306+ def processSpaceCharacters (self ,data ):
1307+ if "tainted" not in self .getCurrentTable ()._flags :
1308+ self .tree .insertText (data )
1309+ else :
1310+ self .processCharacters (data )
1311+
12931312def processCharacters (self ,data ):
1294- self .parser .parseError ("unexpected-char-implies-table-voodoo" )
1295- # Make all the special element rearranging voodoo kick in
1296- self .tree .insertFromTable = True
1297- # Process the character in the "in body" mode
1298- self .parser .phases ["inBody" ].processCharacters (data )
1299- self .tree .insertFromTable = False
1313+ if self .tree .openElements [- 1 ].name in ("style" ,"script" ):
1314+ self .tree .insertText (data )
1315+ else :
1316+ if "tainted" not in self .getCurrentTable ()._flags :
1317+ self .parser .parseError ("unexpected-char-implies-table-voodoo" )
1318+ self .getCurrentTable ()._flags .append ("tainted" )
1319+ # Do the table magic!
1320+ self .tree .insertFromTable = True
1321+ self .parser .phases ["inBody" ].processCharacters (data )
1322+ self .tree .insertFromTable = False
13001323
13011324def startTagCaption (self ,name ,attributes ):
13021325self .clearStackToTableContext ()
@@ -1329,12 +1352,27 @@ def startTagTable(self, name, attributes):
13291352if not self .parser .innerHTML :
13301353self .parser .phase .processStartTag (name ,attributes )
13311354
1355+ def startTagStyleScript (self ,name ,attributes ):
1356+ if "tainted" not in self .getCurrentTable ()._flags :
1357+ self .parser .phases ["inHead" ].processStartTag (name ,attributes )
1358+ else :
1359+ self .startTagOther (name ,attributes )
1360+
1361+ def startTagInput (self ,name ,attributes ):
1362+ if "type" in attributes and attributes ["type" ].translate (asciiUpper2Lower )== "hidden" and "tainted" not in self .getCurrentTable ()._flags :
1363+ self .parser .parseError ("unpexted-hidden-input-in-table" )
1364+ self .tree .insertElement (name ,attributes )
1365+ # XXX associate with form
1366+ self .tree .openElements .pop ()
1367+ else :
1368+ self .startTagOther (name ,attributes )
1369+
13321370def startTagOther (self ,name ,attributes ):
1333- self .parser .parseError ("unexpected-start-tag-implies-table-voodoo" ,
1334- {"name" :name })
1335- # Make all the special element rearranging voodoo kick in
1371+ if "tainted" not in self .getCurrentTable ()._flags :
1372+ self .parser .parseError ("unexpected-start-tag-implies-table-voodoo" , {"name" :name })
1373+ self .getCurrentTable ()._flags .append ("tainted" )
1374+ # Do the table magic!
13361375self .tree .insertFromTable = True
1337- # Process the start tag in the "in body" mode
13381376self .parser .phases ["inBody" ].processStartTag (name ,attributes )
13391377self .tree .insertFromTable = False
13401378
@@ -1354,15 +1392,21 @@ def endTagTable(self, name):
13541392assert self .parser .innerHTML
13551393self .parser .parseError ()
13561394
1395+ def endTagStyleScript (self ,name ):
1396+ if "tainted" not in self .getCurrentTable ()._flags :
1397+ self .parser .phases ["inHead" ].processEndTag (name )
1398+ else :
1399+ self .endTagOther (name )
1400+
13571401def endTagIgnore (self ,name ):
13581402self .parser .parseError ("unexpected-end-tag" , {"name" :name })
13591403
13601404def endTagOther (self ,name ):
1361- self .parser .parseError ("unexpected-end-tag-implies-table-voodoo" ,
1362- {"name" :name })
1363- # Make all the special element rearranging voodoo kick in
1405+ if "tainted" not in self .getCurrentTable ()._flags :
1406+ self .parser .parseError ("unexpected-end-tag-implies-table-voodoo" , {"name" :name })
1407+ self .getCurrentTable ()._flags .append ("tainted" )
1408+ # Do the table magic!
13641409self .tree .insertFromTable = True
1365- # Process the end tag in the "in body" mode
13661410self .parser .phases ["inBody" ].processEndTag (name )
13671411self .tree .insertFromTable = False
13681412
@@ -1749,7 +1793,8 @@ def __init__(self, parser, tree):
17491793 ("html" ,self .startTagHtml ),
17501794 ("option" ,self .startTagOption ),
17511795 ("optgroup" ,self .startTagOptgroup ),
1752- ("select" ,self .startTagSelect )
1796+ ("select" ,self .startTagSelect ),
1797+ ("input" ,self .startTagInput )
17531798 ])
17541799self .startTagHandler .default = self .startTagOther
17551800
@@ -1783,6 +1828,11 @@ def startTagSelect(self, name, attributes):
17831828self .parser .parseError ("unexpected-select-in-select" )
17841829self .endTagSelect ("select" )
17851830
1831+ def startTagInput (self ,name ,attributes ):
1832+ self .parser .parseError ("unexpected-input-in-select" )
1833+ self .endTagSelect ("select" )
1834+ self .parser .phase .processStartTag (name ,attributes )
1835+
17861836def startTagOther (self ,name ,attributes ):
17871837self .parser .parseError ("unexpected-start-tag-in-select" ,
17881838 {"name" :name })