@@ -460,152 +460,133 @@ def processEndTag(self, token):
460460self .endTagHandler [token ["name" ]](token )
461461
462462class InitialPhase (Phase ):
463- # This phase deals with error handling as well which is currently not
464- # covered in the specification. The error handling is typically known as
465- # "quirks mode". It is expected that a future version of HTML5 will defin
466- # this.
467- def processEOF (self ):
468- self .parser .parseError ("expected-doctype-but-got-eof" )
469- self .parser .compatMode = "quirks"
470- self .parser .phase = self .parser .phases ["beforeHtml" ]
471- self .parser .phase .processEOF ()
472-
463+ def processSpaceCharacters (self ,token ):
464+ pass
465+
473466def processComment (self ,token ):
474467self .tree .insertComment (token ,self .tree .document )
475468
476469def processDoctype (self ,token ):
477-
478470name = token ["name" ]
479471publicId = token ["publicId" ]
480472systemId = token ["systemId" ]
481473correct = token ["correct" ]
482474
483475if (name != "html" or publicId != None or
484- systemId != None ):
476+ systemId != None and systemId != "about:legacy-compat" ):
485477self .parser .parseError ("unknown-doctype" )
486478
487479if publicId is None :
488480publicId = ""
489- if systemId is None :
490- systemId = ""
491481
492482self .tree .insertDoctype (token )
493483
494484if publicId != "" :
495485publicId = publicId .translate (asciiUpper2Lower )
496486
497487if (not correct or token ["name" ]!= "html"
498- or publicId in
499- ("+//silmaril//dtd html pro v0r11 19970101//en" ,
500- "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" ,
501- "-//as//dtd html 3.0 aswedit + extensions//en" ,
502- "-//ietf//dtd html 2.0 level 1//en" ,
503- "-//ietf//dtd html 2.0 level 2//en" ,
504- "-//ietf//dtd html 2.0 strict level 1//en" ,
505- "-//ietf//dtd html 2.0 strict level 2//en" ,
506- "-//ietf//dtd html 2.0 strict//en" ,
507- "-//ietf//dtd html 2.0//en" ,
508- "-//ietf//dtd html 2.1e//en" ,
509- "-//ietf//dtd html 3.0//en" ,
510- "-//ietf//dtd html 3.0//en//" ,
511- "-//ietf//dtd html 3.2 final//en" ,
512- "-//ietf//dtd html 3.2//en" ,
513- "-//ietf//dtd html 3//en" ,
514- "-//ietf//dtd html level 0//en" ,
515- "-//ietf//dtd html level 0//en//2.0" ,
516- "-//ietf//dtd html level 1//en" ,
517- "-//ietf//dtd html level 1//en//2.0" ,
518- "-//ietf//dtd html level 2//en" ,
519- "-//ietf//dtd html level 2//en//2.0" ,
520- "-//ietf//dtd html level 3//en" ,
521- "-//ietf//dtd html level 3//en//3.0" ,
522- "-//ietf//dtd html strict level 0//en" ,
523- "-//ietf//dtd html strict level 0//en//2.0" ,
524- "-//ietf//dtd html strict level 1//en" ,
525- "-//ietf//dtd html strict level 1//en//2.0" ,
526- "-//ietf//dtd html strict level 2//en" ,
527- "-//ietf//dtd html strict level 2//en//2.0" ,
528- "-//ietf//dtd html strict level 3//en" ,
529- "-//ietf//dtd html strict level 3//en//3.0" ,
530- "-//ietf//dtd html strict//en" ,
531- "-//ietf//dtd html strict//en//2.0" ,
532- "-//ietf//dtd html strict//en//3.0" ,
533- "-//ietf//dtd html//en" ,
534- "-//ietf//dtd html//en//2.0" ,
535- "-//ietf//dtd html//en//3.0" ,
536- "-//metrius//dtd metrius presentational//en" ,
537- "-//microsoft//dtd internet explorer 2.0 html strict//en" ,
538- "-//microsoft//dtd internet explorer 2.0 html//en" ,
539- "-//microsoft//dtd internet explorer 2.0 tables//en" ,
540- "-//microsoft//dtd internet explorer 3.0 html strict//en" ,
541- "-//microsoft//dtd internet explorer 3.0 html//en" ,
542- "-//microsoft//dtd internet explorer 3.0 tables//en" ,
543- "-//netscape comm. corp.//dtd html//en" ,
544- "-//netscape comm. corp.//dtd strict html//en" ,
545- "-//o'reilly and associates//dtd html 2.0//en" ,
546- "-//o'reilly and associates//dtd html extended 1.0//en" ,
547- "-//o'reilly and associates//dtd html extended relaxed 1.0//en" ,
548- "-//spyglass//dtd html 2.0 extended//en" ,
549- "-//sq//dtd html 2.0 hotmetal + extensions//en" ,
550- "-//sun microsystems corp.//dtd hotjava html//en" ,
551- "-//sun microsystems corp.//dtd hotjava strict html//en" ,
552- "-//w3c//dtd html 3 1995-03-24//en" ,
553- "-//w3c//dtd html 3.2 draft//en" ,
554- "-//w3c//dtd html 3.2 final//en" ,
555- "-//w3c//dtd html 3.2//en" ,
556- "-//w3c//dtd html 3.2s draft//en" ,
557- "-//w3c//dtd html 4.0 frameset//en" ,
558- "-//w3c//dtd html 4.0 transitional//en" ,
559- "-//w3c//dtd html experimental 19960712//en" ,
560- "-//w3c//dtd html experimental 970421//en" ,
561- "-//w3c//dtd w3 html//en" ,
562- "-//w3o//dtd w3 html 3.0//en" ,
563- "-//w3o//dtd w3 html 3.0//en//" ,
564- "-//w3o//dtd w3 html strict 3.0//en//" ,
565- "-//webtechs//dtd mozilla html 2.0//en" ,
566- "-//webtechs//dtd mozilla html//en" ,
567- "-/w3c/dtd html 4.0 transitional/en" ,
568- "html" )
569- or (publicId in
570- ("-//w3c//dtd html 4.01 frameset//EN" ,
571- "-//w3c//dtd html 4.01 transitional//EN" )and
572- systemId == None )
573- or (systemId != None and
574- systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" )):
488+ or publicId .startswith (
489+ ("+//silmaril//dtd html pro v0r11 19970101//" ,
490+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//" ,
491+ "-//as//dtd html 3.0 aswedit + extensions//" ,
492+ "-//ietf//dtd html 2.0 level 1//" ,
493+ "-//ietf//dtd html 2.0 level 2//" ,
494+ "-//ietf//dtd html 2.0 strict level 1//" ,
495+ "-//ietf//dtd html 2.0 strict level 2//" ,
496+ "-//ietf//dtd html 2.0 strict//" ,
497+ "-//ietf//dtd html 2.0//" ,
498+ "-//ietf//dtd html 2.1e//" ,
499+ "-//ietf//dtd html 3.0//" ,
500+ "-//ietf//dtd html 3.2 final//" ,
501+ "-//ietf//dtd html 3.2//" ,
502+ "-//ietf//dtd html 3//" ,
503+ "-//ietf//dtd html level 0//" ,
504+ "-//ietf//dtd html level 1//" ,
505+ "-//ietf//dtd html level 2//" ,
506+ "-//ietf//dtd html level 3//" ,
507+ "-//ietf//dtd html strict level 0//" ,
508+ "-//ietf//dtd html strict level 1//" ,
509+ "-//ietf//dtd html strict level 2//" ,
510+ "-//ietf//dtd html strict level 3//" ,
511+ "-//ietf//dtd html strict//" ,
512+ "-//ietf//dtd html//" ,
513+ "-//metrius//dtd metrius presentational//" ,
514+ "-//microsoft//dtd internet explorer 2.0 html strict//" ,
515+ "-//microsoft//dtd internet explorer 2.0 html//" ,
516+ "-//microsoft//dtd internet explorer 2.0 tables//" ,
517+ "-//microsoft//dtd internet explorer 3.0 html strict//" ,
518+ "-//microsoft//dtd internet explorer 3.0 html//" ,
519+ "-//microsoft//dtd internet explorer 3.0 tables//" ,
520+ "-//netscape comm. corp.//dtd html//" ,
521+ "-//netscape comm. corp.//dtd strict html//" ,
522+ "-//o'reilly and associates//dtd html 2.0//" ,
523+ "-//o'reilly and associates//dtd html extended 1.0//" ,
524+ "-//o'reilly and associates//dtd html extended relaxed 1.0//" ,
525+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//" ,
526+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//" ,
527+ "-//spyglass//dtd html 2.0 extended//" ,
528+ "-//sq//dtd html 2.0 hotmetal + extensions//" ,
529+ "-//sun microsystems corp.//dtd hotjava html//" ,
530+ "-//sun microsystems corp.//dtd hotjava strict html//" ,
531+ "-//w3c//dtd html 3 1995-03-24//" ,
532+ "-//w3c//dtd html 3.2 draft//" ,
533+ "-//w3c//dtd html 3.2 final//" ,
534+ "-//w3c//dtd html 3.2//" ,
535+ "-//w3c//dtd html 3.2s draft//" ,
536+ "-//w3c//dtd html 4.0 frameset//" ,
537+ "-//w3c//dtd html 4.0 transitional//" ,
538+ "-//w3c//dtd html experimental 19960712//" ,
539+ "-//w3c//dtd html experimental 970421//" ,
540+ "-//w3c//dtd w3 html//" ,
541+ "-//w3o//dtd w3 html 3.0//" ,
542+ "-//webtechs//dtd mozilla html 2.0//" ,
543+ "-//webtechs//dtd mozilla html//" ))
544+ or publicId in
545+ ("-//w3o//dtd w3 html strict 3.0//en//" ,
546+ "-/w3c/dtd html 4.0 transitional/en" ,
547+ "html" )
548+ or publicId .startswith (
549+ ("-//w3c//dtd html 4.01 frameset//" ,
550+ "-//w3c//dtd html 4.01 transitional//" ))and
551+ systemId == None
552+ or systemId and systemId .lower ()== "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" ):
575553self .parser .compatMode = "quirks"
576- elif (publicId in
577- ("-//w3c//dtd xhtml 1.0 frameset//EN " ,
578- "-//w3c//dtd xhtml 1.0 transitional//EN" )
579- or ( publicId in
580- ("-//w3c//dtd html 4.01 frameset//EN " ,
581- "-//w3c//dtd html 4.01 transitional//EN" )and
582- systemId == None ) ):
554+ elif (publicId . startswith (
555+ ("-//w3c//dtd xhtml 1.0 frameset//" ,
556+ "-//w3c//dtd xhtml 1.0 transitional//" ) )
557+ or publicId . startswith (
558+ ("-//w3c//dtd html 4.01 frameset//" ,
559+ "-//w3c//dtd html 4.01 transitional//" ) )and
560+ systemId != None ):
583561self .parser .compatMode = "limited quirks"
584562
585563self .parser .phase = self .parser .phases ["beforeHtml" ]
586-
587- def processSpaceCharacters (self ,token ):
588- pass
564+
565+ def anythingElse (self ):
566+ self .parser .compatMode = "quirks"
567+ self .parser .phase = self .parser .phases ["beforeHtml" ]
589568
590569def processCharacters (self ,token ):
591570self .parser .parseError ("expected-doctype-but-got-chars" )
592- self .parser .compatMode = "quirks"
593- self .parser .phase = self .parser .phases ["beforeHtml" ]
571+ self .anythingElse ()
594572self .parser .phase .processCharacters (token )
595573
596574def processStartTag (self ,token ):
597575self .parser .parseError ("expected-doctype-but-got-start-tag" ,
598576 {"name" :token ["name" ]})
599- self .parser .compatMode = "quirks"
600- self .parser .phase = self .parser .phases ["beforeHtml" ]
577+ self .anythingElse ()
601578self .parser .phase .processStartTag (token )
602579
603580def processEndTag (self ,token ):
604581self .parser .parseError ("expected-doctype-but-got-end-tag" ,
605582 {"name" :token ["name" ]})
606- self .parser .compatMode = "quirks"
607- self .parser .phase = self .parser .phases ["beforeHtml" ]
583+ self .anythingElse ()
608584self .parser .phase .processEndTag (token )
585+
586+ def processEOF (self ):
587+ self .parser .parseError ("expected-doctype-but-got-eof" )
588+ self .anythingElse ()
589+ self .parser .phase .processEOF ()
609590
610591
611592class BeforeHtmlPhase (Phase ):