Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitaa0c3d1

Browse files
miss-islingtonezio-melottiserhiy-storchaka
authored
[3.13]gh-77057: Fix handling of invalid markup declarations in HTMLParser (GH-9295) (GH-133834)
(cherry picked from commit76c0b01)Co-authored-by: Ezio Melotti <ezio.melotti@gmail.com>Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent3de6546 commitaa0c3d1

File tree

3 files changed

+68
-19
lines changed

3 files changed

+68
-19
lines changed

‎Lib/html/parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def parse_html_declaration(self, i):
278278
ifrawdata[i:i+4]=='<!--':
279279
# this case is actually already handled in goahead()
280280
returnself.parse_comment(i)
281-
elifrawdata[i:i+3]=='<![':
281+
elifrawdata[i:i+9]=='<![CDATA[':
282282
returnself.parse_marked_section(i)
283283
elifrawdata[i:i+9].lower()=='<!doctype':
284284
# find the closing >
@@ -295,7 +295,7 @@ def parse_html_declaration(self, i):
295295
defparse_bogus_comment(self,i,report=1):
296296
rawdata=self.rawdata
297297
assertrawdata[i:i+2]in ('<!','</'), ('unexpected call to '
298-
'parse_comment()')
298+
'parse_bogus_comment()')
299299
pos=rawdata.find('>',i+2)
300300
ifpos==-1:
301301
return-1

‎Lib/test/test_htmlparser.py

Lines changed: 64 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -566,52 +566,99 @@ def test_EOF_in_charref(self):
566566
forhtml,expectedindata:
567567
self._run_check(html,expected)
568568

569-
deftest_broken_comments(self):
569+
deftest_EOF_in_comments_or_decls(self):
570+
data= [
571+
('<!', [('data','<!')]),
572+
('<!-', [('data','<!-')]),
573+
('<!--', [('data','<!--')]),
574+
('<![', [('data','<![')]),
575+
('<![CDATA[', [('data','<![CDATA[')]),
576+
('<![CDATA[x', [('data','<![CDATA[x')]),
577+
('<!DOCTYPE', [('data','<!DOCTYPE')]),
578+
('<!DOCTYPE HTML', [('data','<!DOCTYPE HTML')]),
579+
]
580+
forhtml,expectedindata:
581+
self._run_check(html,expected)
582+
deftest_bogus_comments(self):
570583
html= ('<! not really a comment >'
571584
'<! not a comment either -->'
572585
'<! -- close enough -->'
573586
'<!><!<-- this was an empty comment>'
574-
'<!!! another bogus comment !!!>')
587+
'<!!! another bogus comment !!!>'
588+
# see #32876
589+
'<![with square brackets]!>'
590+
'<![\nmultiline\nbogusness\n]!>'
591+
'<![more brackets]-[and a hyphen]!>'
592+
'<![cdata[should be uppercase]]>'
593+
'<![CDATA [whitespaces are not ignored]]>'
594+
'<![CDATA]]>'# required '[' after CDATA
595+
)
575596
expected= [
576597
('comment',' not really a comment '),
577598
('comment',' not a comment either --'),
578599
('comment',' -- close enough --'),
579600
('comment',''),
580601
('comment','<-- this was an empty comment'),
581602
('comment','!! another bogus comment !!!'),
603+
('comment','[with square brackets]!'),
604+
('comment','[\nmultiline\nbogusness\n]!'),
605+
('comment','[more brackets]-[and a hyphen]!'),
606+
('comment','[cdata[should be uppercase]]'),
607+
('comment','[CDATA [whitespaces are not ignored]]'),
608+
('comment','[CDATA]]'),
582609
]
583610
self._run_check(html,expected)
584611

585612
deftest_broken_condcoms(self):
586613
# these condcoms are missing the '--' after '<!' and before the '>'
614+
# and they are considered bogus comments according to
615+
# "8.2.4.42. Markup declaration open state"
587616
html= ('<![if !(IE)]>broken condcom<![endif]>'
588617
'<![if ! IE]><link href="favicon.tiff"/><![endif]>'
589618
'<![if !IE 6]><img src="firefox.png" /><![endif]>'
590619
'<![if !ie 6]><b>foo</b><![endif]>'
591620
'<![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>')
592-
# According to the HTML5 specs sections "8.2.4.44 Bogus comment state"
593-
# and "8.2.4.45 Markup declaration open state", comment tokens should
594-
# be emitted instead of 'unknown decl', but calling unknown_decl
595-
# provides more flexibility.
596-
# See also Lib/_markupbase.py:parse_declaration
597621
expected= [
598-
('unknown decl','if !(IE)'),
622+
('comment','[if !(IE)]'),
599623
('data','broken condcom'),
600-
('unknown decl','endif'),
601-
('unknown decl','if ! IE'),
624+
('comment','[endif]'),
625+
('comment','[if ! IE]'),
602626
('startendtag','link', [('href','favicon.tiff')]),
603-
('unknown decl','endif'),
604-
('unknown decl','if !IE 6'),
627+
('comment','[endif]'),
628+
('comment','[if !IE 6]'),
605629
('startendtag','img', [('src','firefox.png')]),
606-
('unknown decl','endif'),
607-
('unknown decl','if !ie 6'),
630+
('comment','[endif]'),
631+
('comment','[if !ie 6]'),
608632
('starttag','b', []),
609633
('data','foo'),
610634
('endtag','b'),
611-
('unknown decl','endif'),
612-
('unknown decl','if (!IE)|(lt IE 9)'),
635+
('comment','[endif]'),
636+
('comment','[if (!IE)|(lt IE 9)]'),
613637
('startendtag','img', [('src','mammoth.bmp')]),
614-
('unknown decl','endif')
638+
('comment','[endif]')
639+
]
640+
self._run_check(html,expected)
641+
642+
deftest_cdata_declarations(self):
643+
# More tests should be added. See also "8.2.4.42. Markup
644+
# declaration open state", "8.2.4.69. CDATA section state",
645+
# and issue 32876
646+
html= ('<![CDATA[just some plain text]]>')
647+
expected= [('unknown decl','CDATA[just some plain text')]
648+
self._run_check(html,expected)
649+
650+
deftest_cdata_declarations_multiline(self):
651+
html= ('<code><![CDATA['
652+
' if (a < b && a > b) {'
653+
' printf("[<marquee>How?</marquee>]");'
654+
' }'
655+
']]></code>')
656+
expected= [
657+
('starttag','code', []),
658+
('unknown decl',
659+
'CDATA[ if (a < b && a > b) { '
660+
'printf("[<marquee>How?</marquee>]"); }'),
661+
('endtag','code')
615662
]
616663
self._run_check(html,expected)
617664

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix handling of invalid markup declarations in
2+
:class:`html.parser.HTMLParser`.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp