@@ -108,7 +108,6 @@ def _parse(self, stream, innerHTML=False, container="div",
108108# We only seem to have InBodyPhase testcases where the following is
109109# relevant ... need others too
110110self .lastPhase = None
111-
112111self .beforeRCDataPhase = None
113112
114113CharactersToken = tokenTypes ["Characters" ]
@@ -120,6 +119,8 @@ def _parse(self, stream, innerHTML=False, container="div",
120119
121120
122121for token in self .normalizedTokens ():
122+ #print self.phase.__class__.__name__
123+ #print token
123124type = token ["type" ]
124125if type == CharactersToken :
125126self .phase .processCharacters (token )
@@ -271,18 +272,6 @@ def __init__(self, parser, tree):
271272
272273def processEOF (self ):
273274raise NotImplementedError
274- self .tree .generateImpliedEndTags ()
275- if len (self .tree .openElements )> 2 :
276- self .parser .parseError ("expected-closing-tag-but-got-eof" )
277- elif len (self .tree .openElements )== 2 and \
278- self .tree .openElements [1 ].name != "body" :
279- # This happens for framesets or something?
280- self .parser .parseError ("expected-closing-tag-but-got-eof" )
281- elif self .parser .innerHTML and len (self .tree .openElements )> 1 :
282- # XXX This is not what the specification says. Not sure what to do
283- # here.
284- self .parser .parseError ("eof-in-innerhtml" )
285- # Betting ends.
286275
287276def processComment (self ,token ):
288277# For most phases the following is correct. Where it's not it will be
@@ -318,7 +307,7 @@ class InitialPhase(Phase):
318307# this.
319308def processEOF (self ):
320309self .parser .parseError ("expected-doctype-but-got-eof" )
321- self .compatMode = "quirks"
310+ self .parser . compatMode = "quirks"
322311self .parser .phase = self .parser .phases ["beforeHtml" ]
323312self .parser .phase .processEOF ()
324313
@@ -346,8 +335,9 @@ def processDoctype(self, token):
346335if publicId != "" :
347336publicId = publicId .translate (asciiUpper2Lower )
348337
349- if (not correct or token ["name" ]!= "html"
350- or publicId in
338+
339+ if ((not correct )or nameLower != "html"
340+ or publicId in
351341 ("+//silmaril//dtd html pro v0r11 19970101//en" ,
352342"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" ,
353343"-//as//dtd html 3.0 aswedit + extensions//en" ,
@@ -419,19 +409,18 @@ def processDoctype(self, token):
419409"html" )
420410or (publicId in
421411 ("-//w3c//dtd html 4.01 frameset//EN" ,
422- "-//w3c//dtd html 4.01 transitional//EN" )and
423- systemId == None )
412+ "-//w3c//dtd html 4.01 transitional//EN" )and systemId == None )
424413or (systemId != None and
425- systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" )):
426- self .compatMode = "quirks"
414+ systemId ==
415+ "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" )):
416+ self .parser .compatMode = "quirks"
427417elif (publicId in
428- ("-//w3c//dtd xhtml 1.0 frameset//EN" ,
429- "-//w3c//dtd xhtml 1.0 transitional//EN" )
418+ ("-//w3c//dtd xhtml 1.0 frameset//EN" ,
419+ "-//w3c//dtd xhtml 1.0 transitional//EN" )
430420or (publicId in
431421 ("-//w3c//dtd html 4.01 frameset//EN" ,
432- "-//w3c//dtd html 4.01 transitional//EN" )and
433- systemId == None )):
434- self .compatMode = "limited quirks"
422+ "-//w3c//dtd html 4.01 transitional//EN" )and systemId == None )):
423+ self .parser .compatMode = "limited quirks"
435424
436425self .parser .phase = self .parser .phases ["beforeHtml" ]
437426
@@ -440,7 +429,7 @@ def processSpaceCharacters(self, token):
440429
441430def processCharacters (self ,token ):
442431self .parser .parseError ("expected-doctype-but-got-chars" )
443- self .compatMode = "quirks"
432+ self .parser . compatMode = "quirks"
444433self .parser .phase = self .parser .phases ["beforeHtml" ]
445434self .parser .phase .processCharacters (token )
446435
@@ -595,7 +584,8 @@ def startTagMeta(self, token):
595584codec = inputstream .codecName (attributes ["charset" ])
596585self .parser .tokenizer .stream .changeEncoding (codec )
597586elif "content" in attributes :
598- data = inputstream .EncodingBytes (attributes ["content" ])
587+ data = inputstream .EncodingBytes (
588+ attributes ["content" ].encode (self .parser .tokenizer .stream .charEncoding [0 ]))
599589parser = inputstream .ContentAttrParser (data )
600590codec = parser .parse ()
601591self .parser .tokenizer .stream .changeEncoding (codec )