Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3385106

Browse files
committed
Python parser: support <listing> followed by newline; <style> & <script> in table; tainted tables; <title> everywhere; throw less voodoo parse errors; support <input type=hidden> in table; support <input> in in select; applet is scoped; add tests
--HG--extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401107
1 parent0e4a080 commit3385106

File tree

4 files changed

+117
-63
lines changed

4 files changed

+117
-63
lines changed

‎src/html5lib/constants.py‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@
170170
"unexpected-char-implies-table-voodoo":
171171
_(u"Unexpected non-space characters in "
172172
u"table context caused voodoo mode."),
173+
"unpexted-hidden-input-in-table":
174+
_(u"Unexpected input with type hidden in table context."),
173175
"unexpected-start-tag-implies-table-voodoo":
174176
_(u"Unexpected start tag (%(name)s) in "
175177
u"table context caused voodoo mode."),
@@ -190,7 +192,9 @@
190192
_(u"Unexpected end tag (%(name)s) in the table row phase. Ignored."),
191193
"unexpected-select-in-select":
192194
_(u"Unexpected select start tag in the select phase "
193-
u"implies select start tag."),
195+
u"treated as select end tag."),
196+
"unexpected-input-in-select":
197+
_(u"Unexpected input start tag in the select phase."),
194198
"unexpected-start-tag-in-select":
195199
_(u"Unexpected start tag token (%(name)s in the select phase. "
196200
u"Ignored."),
@@ -244,6 +248,7 @@
244248
}
245249

246250
scopingElements=frozenset((
251+
"applet",
247252
"button",
248253
"caption",
249254
"html",

‎src/html5lib/html5parser.py‎

Lines changed: 100 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __init__(self, strict = False, tree=simpletree.TreeBuilder,
5858

5959
self.phases= {
6060
"initial":InitialPhase(self,self.tree),
61-
"rootElement":RootElementPhase(self,self.tree),
61+
"beforeHtml":BeforeHtmlPhase(self,self.tree),
6262
"beforeHead":BeforeHeadPhase(self,self.tree),
6363
"inHead":InHeadPhase(self,self.tree),
6464
# XXX "inHeadNoscript": InHeadNoScriptPhase(self, self.tree),
@@ -71,10 +71,14 @@ def __init__(self, strict = False, tree=simpletree.TreeBuilder,
7171
"inRow":InRowPhase(self,self.tree),
7272
"inCell":InCellPhase(self,self.tree),
7373
"inSelect":InSelectPhase(self,self.tree),
74+
# XXX inSelectInTable
7475
"afterBody":AfterBodyPhase(self,self.tree),
7576
"inFrameset":InFramesetPhase(self,self.tree),
7677
"afterFrameset":AfterFramesetPhase(self,self.tree),
7778
"trailingEnd":TrailingEndPhase(self,self.tree)
79+
# XXX after after body
80+
# XXX after after frameset
81+
# XXX trailingEnd is gone
7882
}
7983

8084
def_parse(self,stream,innerHTML=False,container="div",
@@ -101,7 +105,7 @@ def _parse(self, stream, innerHTML=False, container="div",
101105
# contentModelFlag already is PCDATA
102106
#self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["PCDATA"]
103107
pass
104-
self.phase=self.phases["rootElement"]
108+
self.phase=self.phases["beforeHtml"]
105109
self.phase.insertHtmlElement()
106110
self.resetInsertionMode()
107111
else:
@@ -300,7 +304,7 @@ class InitialPhase(Phase):
300304
# this.
301305
defprocessEOF(self):
302306
self.parser.parseError("expected-doctype-but-got-eof")
303-
self.parser.phase=self.parser.phases["rootElement"]
307+
self.parser.phase=self.parser.phases["beforeHtml"]
304308
self.parser.phase.processEOF()
305309

306310
defprocessComment(self,data):
@@ -401,30 +405,30 @@ def processDoctype(self, name, publicId, systemId, correct):
401405
#XXX quirks mode
402406
pass
403407

404-
self.parser.phase=self.parser.phases["rootElement"]
408+
self.parser.phase=self.parser.phases["beforeHtml"]
405409

406410
defprocessSpaceCharacters(self,data):
407411
pass
408412

409413
defprocessCharacters(self,data):
410414
self.parser.parseError("expected-doctype-but-got-chars")
411-
self.parser.phase=self.parser.phases["rootElement"]
415+
self.parser.phase=self.parser.phases["beforeHtml"]
412416
self.parser.phase.processCharacters(data)
413417

414418
defprocessStartTag(self,name,attributes):
415419
self.parser.parseError("expected-doctype-but-got-start-tag",
416420
{"name":name})
417-
self.parser.phase=self.parser.phases["rootElement"]
421+
self.parser.phase=self.parser.phases["beforeHtml"]
418422
self.parser.phase.processStartTag(name,attributes)
419423

420424
defprocessEndTag(self,name):
421425
self.parser.parseError("expected-doctype-but-got-end-tag",
422426
{"name":name})
423-
self.parser.phase=self.parser.phases["rootElement"]
427+
self.parser.phase=self.parser.phases["beforeHtml"]
424428
self.parser.phase.processEndTag(name)
425429

426430

427-
classRootElementPhase(Phase):
431+
classBeforeHtmlPhase(Phase):
428432
# helper methods
429433
definsertHtmlElement(self):
430434
self.tree.insertRoot("html")
@@ -475,6 +479,9 @@ def processEOF(self):
475479
self.startTagHead("head", {})
476480
self.parser.phase.processEOF()
477481

482+
defprocessSpaceCharacters(self,data):
483+
pass
484+
478485
defprocessCharacters(self,data):
479486
self.startTagHead("head", {})
480487
self.parser.phase.processCharacters(data)
@@ -548,33 +555,36 @@ def startTagHead(self, name, attributes):
548555
self.parser.parseError("two-heads-are-not-better-than-one")
549556

550557
defstartTagTitle(self,name,attributes):
551-
element=self.tree.createElement(name,attributes)
552-
self.appendToHead(element)
553-
self.tree.openElements.append(element)
558+
ifself.tree.headPointerisnotNoneand\
559+
self.parser.phase==self.parser.phases["inHead"]:
560+
element=self.tree.createElement(name,attributes)
561+
self.appendToHead(element)
562+
self.tree.openElements.append(element)
563+
else:
564+
self.tree.insertElement(name,attributes)
554565
self.parser.tokenizer.contentModelFlag=contentModelFlags["RCDATA"]
555566

556567
defstartTagStyle(self,name,attributes):
557-
element=self.tree.createElement(name,attributes)
558568
ifself.tree.headPointerisnotNoneand\
559569
self.parser.phase==self.parser.phases["inHead"]:
570+
element=self.tree.createElement(name,attributes)
560571
self.appendToHead(element)
572+
self.tree.openElements.append(element)
561573
else:
562-
self.tree.openElements[-1].appendChild(element)
563-
self.tree.openElements.append(element)
574+
self.tree.insertElement(name,attributes)
564575
self.parser.tokenizer.contentModelFlag=contentModelFlags["CDATA"]
565576

566577
defstartTagNoScript(self,name,attributes):
567578
# XXX Need to decide whether to implement the scripting disabled case.
568-
element=self.tree.createElement(name,attributes)
569579
ifself.tree.headPointerisnotNoneand\
570580
self.parser.phase==self.parser.phases["inHead"]:
581+
element=self.tree.createElement(name,attributes)
571582
self.appendToHead(element)
583+
self.tree.openElements.append(element)
572584
else:
573-
self.tree.openElements[-1].appendChild(element)
574-
self.tree.openElements.append(element)
585+
self.tree.insertElement(name,attributes)
575586
self.parser.tokenizer.contentModelFlag=contentModelFlags["CDATA"]
576-
577-
587+
578588
defstartTagScript(self,name,attributes):
579589
#XXX Inner HTML case may be wrong
580590
element=self.tree.createElement(name,attributes)
@@ -689,9 +699,8 @@ def __init__(self, parser, tree):
689699

690700
self.startTagHandler=utils.MethodDispatcher([
691701
("html",self.startTagHtml),
692-
(("base","link","meta","script","style"),
702+
(("base","link","meta","script","style","title"),
693703
self.startTagProcessInHead),
694-
("title",self.startTagTitle),
695704
("body",self.startTagBody),
696705
(("address","blockquote","center","dir","div","dl",
697706
"fieldset","listing","menu","ol","p","pre","ul"),
@@ -705,7 +714,7 @@ def __init__(self, parser, tree):
705714
"tt","u"),self.startTagFormatting),
706715
("nobr",self.startTagNobr),
707716
("button",self.startTagButton),
708-
(("marquee","object"),self.startTagMarqueeObject),
717+
(("applet","marquee","object"),self.startTagAppletMarqueeObject),
709718
("xmp",self.startTagXmp),
710719
("table",self.startTagTable),
711720
(("area","basefont","bgsound","br","embed","img","param",
@@ -736,7 +745,7 @@ def __init__(self, parser, tree):
736745
(headingElements,self.endTagHeading),
737746
(("a","b","big","em","font","i","nobr","s","small",
738747
"strike","strong","tt","u"),self.endTagFormatting),
739-
(("marquee","object","button"),self.endTagButtonMarqueeObject),
748+
(("applet","marquee","object","button"),self.endTagAppletButtonMarqueeObject),
740749
(("head","frameset","select","optgroup","option","table",
741750
"caption","colgroup","col","thead","tfoot","tbody","tr",
742751
"td","th"),self.endTagMisplaced),
@@ -759,11 +768,11 @@ def addFormattingElement(self, name, attributes):
759768

760769
# the real deal
761770
defprocessSpaceCharactersDropNewline(self,data):
762-
# Sometimes (start of <pre>and <textarea> blocks) we want to drop
763-
# leading newlines
771+
# Sometimes (start of <pre>, <listing>,and <textarea> blocks) we
772+
#want to dropleading newlines
764773
self.processSpaceCharacters=self.processSpaceCharactersNonPre
765774
if (data.startswith("\n")and
766-
self.tree.openElements[-1].namein ("pre","textarea")and
775+
self.tree.openElements[-1].namein ("pre","listing","textarea")and
767776
notself.tree.openElements[-1].hasContent()):
768777
data=data[1:]
769778
ifdata:
@@ -785,11 +794,6 @@ def processSpaceCharacters(self, data):
785794
defstartTagProcessInHead(self,name,attributes):
786795
self.parser.phases["inHead"].processStartTag(name,attributes)
787796

788-
defstartTagTitle(self,name,attributes):
789-
self.parser.parseError("unexpected-start-tag-out-of-my-head",
790-
{"name":name})
791-
self.parser.phases["inHead"].processStartTag(name,attributes)
792-
793797
defstartTagBody(self,name,attributes):
794798
self.parser.parseError("unexpected-start-tag", {"name":"body"})
795799
if (len(self.tree.openElements)==1
@@ -804,7 +808,7 @@ def startTagCloseP(self, name, attributes):
804808
ifself.tree.elementInScope("p"):
805809
self.endTagP("p")
806810
self.tree.insertElement(name,attributes)
807-
ifname=="pre":
811+
ifnamein ("pre","listing"):
808812
self.processSpaceCharacters=self.processSpaceCharactersDropNewline
809813

810814
defstartTagForm(self,name,attributes):
@@ -902,7 +906,7 @@ def startTagButton(self, name, attributes):
902906
self.tree.insertElement(name,attributes)
903907
self.tree.activeFormattingElements.append(Marker)
904908

905-
defstartTagMarqueeObject(self,name,attributes):
909+
defstartTagAppletMarqueeObject(self,name,attributes):
906910
self.tree.reconstructActiveFormattingElements()
907911
self.tree.insertElement(name,attributes)
908912
self.tree.activeFormattingElements.append(Marker)
@@ -1201,7 +1205,7 @@ def endTagFormatting(self, name):
12011205
self.tree.openElements.insert(
12021206
self.tree.openElements.index(furthestBlock)+1,clone)
12031207

1204-
defendTagButtonMarqueeObject(self,name):
1208+
defendTagAppletButtonMarqueeObject(self,name):
12051209
ifself.tree.elementInScope(name):
12061210
self.tree.generateImpliedEndTags()
12071211
ifself.tree.openElements[-1].name!=name:
@@ -1269,12 +1273,15 @@ def __init__(self, parser, tree):
12691273
("col",self.startTagCol),
12701274
(("tbody","tfoot","thead"),self.startTagRowGroup),
12711275
(("td","th","tr"),self.startTagImplyTbody),
1272-
("table",self.startTagTable)
1276+
("table",self.startTagTable),
1277+
(("style","script"),self.startTagStyleScript),
1278+
("input",self.startTagInput)
12731279
])
12741280
self.startTagHandler.default=self.startTagOther
12751281

12761282
self.endTagHandler=utils.MethodDispatcher([
12771283
("table",self.endTagTable),
1284+
(("style","script"),self.endTagStyleScript),
12781285
(("body","caption","col","colgroup","html","tbody","td",
12791286
"tfoot","th","thead","tr"),self.endTagIgnore)
12801287
])
@@ -1289,14 +1296,30 @@ def clearStackToTableContext(self):
12891296
self.tree.openElements.pop()
12901297
# When the current node is <html> it's an innerHTML case
12911298

1299+
defgetCurrentTable(self):
1300+
i=-1
1301+
whileself.tree.openElements[i].name!="table":
1302+
i-=1
1303+
returnself.tree.openElements[i]
1304+
12921305
# processing methods
1306+
defprocessSpaceCharacters(self,data):
1307+
if"tainted"notinself.getCurrentTable()._flags:
1308+
self.tree.insertText(data)
1309+
else:
1310+
self.processCharacters(data)
1311+
12931312
defprocessCharacters(self,data):
1294-
self.parser.parseError("unexpected-char-implies-table-voodoo")
1295-
# Make all the special element rearranging voodoo kick in
1296-
self.tree.insertFromTable=True
1297-
# Process the character in the "in body" mode
1298-
self.parser.phases["inBody"].processCharacters(data)
1299-
self.tree.insertFromTable=False
1313+
ifself.tree.openElements[-1].namein ("style","script"):
1314+
self.tree.insertText(data)
1315+
else:
1316+
if"tainted"notinself.getCurrentTable()._flags:
1317+
self.parser.parseError("unexpected-char-implies-table-voodoo")
1318+
self.getCurrentTable()._flags.append("tainted")
1319+
# Do the table magic!
1320+
self.tree.insertFromTable=True
1321+
self.parser.phases["inBody"].processCharacters(data)
1322+
self.tree.insertFromTable=False
13001323

13011324
defstartTagCaption(self,name,attributes):
13021325
self.clearStackToTableContext()
@@ -1329,12 +1352,27 @@ def startTagTable(self, name, attributes):
13291352
ifnotself.parser.innerHTML:
13301353
self.parser.phase.processStartTag(name,attributes)
13311354

1355+
defstartTagStyleScript(self,name,attributes):
1356+
if"tainted"notinself.getCurrentTable()._flags:
1357+
self.parser.phases["inHead"].processStartTag(name,attributes)
1358+
else:
1359+
self.startTagOther(name,attributes)
1360+
1361+
defstartTagInput(self,name,attributes):
1362+
if"type"inattributesandattributes["type"].translate(asciiUpper2Lower)=="hidden"and"tainted"notinself.getCurrentTable()._flags:
1363+
self.parser.parseError("unpexted-hidden-input-in-table")
1364+
self.tree.insertElement(name,attributes)
1365+
# XXX associate with form
1366+
self.tree.openElements.pop()
1367+
else:
1368+
self.startTagOther(name,attributes)
1369+
13321370
defstartTagOther(self,name,attributes):
1333-
self.parser.parseError("unexpected-start-tag-implies-table-voodoo",
1334-
{"name":name})
1335-
# Make all the special element rearranging voodoo kick in
1371+
if"tainted"notinself.getCurrentTable()._flags:
1372+
self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name":name})
1373+
self.getCurrentTable()._flags.append("tainted")
1374+
# Do the table magic!
13361375
self.tree.insertFromTable=True
1337-
# Process the start tag in the "in body" mode
13381376
self.parser.phases["inBody"].processStartTag(name,attributes)
13391377
self.tree.insertFromTable=False
13401378

@@ -1354,15 +1392,21 @@ def endTagTable(self, name):
13541392
assertself.parser.innerHTML
13551393
self.parser.parseError()
13561394

1395+
defendTagStyleScript(self,name):
1396+
if"tainted"notinself.getCurrentTable()._flags:
1397+
self.parser.phases["inHead"].processEndTag(name)
1398+
else:
1399+
self.endTagOther(name)
1400+
13571401
defendTagIgnore(self,name):
13581402
self.parser.parseError("unexpected-end-tag", {"name":name})
13591403

13601404
defendTagOther(self,name):
1361-
self.parser.parseError("unexpected-end-tag-implies-table-voodoo",
1362-
{"name":name})
1363-
# Make all the special element rearranging voodoo kick in
1405+
if"tainted"notinself.getCurrentTable()._flags:
1406+
self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name":name})
1407+
self.getCurrentTable()._flags.append("tainted")
1408+
# Do the table magic!
13641409
self.tree.insertFromTable=True
1365-
# Process the end tag in the "in body" mode
13661410
self.parser.phases["inBody"].processEndTag(name)
13671411
self.tree.insertFromTable=False
13681412

@@ -1749,7 +1793,8 @@ def __init__(self, parser, tree):
17491793
("html",self.startTagHtml),
17501794
("option",self.startTagOption),
17511795
("optgroup",self.startTagOptgroup),
1752-
("select",self.startTagSelect)
1796+
("select",self.startTagSelect),
1797+
("input",self.startTagInput)
17531798
])
17541799
self.startTagHandler.default=self.startTagOther
17551800

@@ -1783,6 +1828,11 @@ def startTagSelect(self, name, attributes):
17831828
self.parser.parseError("unexpected-select-in-select")
17841829
self.endTagSelect("select")
17851830

1831+
defstartTagInput(self,name,attributes):
1832+
self.parser.parseError("unexpected-input-in-select")
1833+
self.endTagSelect("select")
1834+
self.parser.phase.processStartTag(name,attributes)
1835+
17861836
defstartTagOther(self,name,attributes):
17871837
self.parser.parseError("unexpected-start-tag-in-select",
17881838
{"name":name})

‎src/html5lib/liberalxmlparser.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def normalizeToken(self, token):
9393

9494
returntoken
9595

96-
classXhmlRootPhase(html5parser.RootElementPhase):
96+
classXhmlRootPhase(html5parser.BeforeHtmlPhase):
9797
definsertHtmlElement(self):
9898
element=self.tree.createElement("html", {'xmlns':'http://www.w3.org/1999/xhtml'})
9999
self.tree.openElements.append(element)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp