Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit447b711

Browse files
committed
Don't crash and burn when non-ascii characters are found in the pre-parse
--HG--extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401060
1 parentb24ea63 commit447b711

File tree

2 files changed

+18
-13
lines changed

2 files changed

+18
-13
lines changed

‎src/html5lib/inputstream.py‎

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
fromconstantsimportencodings
77
fromutilsimportMethodDispatcher
88

9+
#Non-unicode versions of constants for use in the pre-parser
10+
spaceCharactersBytes= [str(item)foriteminspaceCharacters]
11+
asciiLettersBytes= [str(item)foriteminasciiLetters]
12+
asciiUppercaseBytes= [str(item)foriteminasciiUppercase]
13+
914
try:
1015
fromcollectionsimportdeque
1116
exceptImportError:
@@ -357,7 +362,7 @@ def getCurrentByte(self):
357362

358363
currentByte=property(getCurrentByte)
359364

360-
defskip(self,chars=spaceCharacters):
365+
defskip(self,chars=spaceCharactersBytes):
361366
"""Skip past a list of characters"""
362367
whileself.currentByteinchars:
363368
self.position+=1
@@ -432,7 +437,7 @@ def handleComment(self):
432437
returnself.data.jumpTo("-->")
433438

434439
defhandleMeta(self):
435-
ifself.data.currentBytenotinspaceCharacters:
440+
ifself.data.currentBytenotinspaceCharactersBytes:
436441
#if we have <meta not followed by a space so just keep going
437442
returnTrue
438443
#We have a valid meta element we want to search for attributes
@@ -462,7 +467,7 @@ def handlePossibleEndTag(self):
462467
returnself.handlePossibleTag(True)
463468

464469
defhandlePossibleTag(self,endTag):
465-
ifself.data.currentBytenotinasciiLetters:
470+
ifself.data.currentBytenotinasciiLettersBytes:
466471
#If the next byte is not an ascii letter either ignore this
467472
#fragment (possible start tag case) or treat it according to
468473
#handleOther
@@ -471,7 +476,7 @@ def handlePossibleTag(self, endTag):
471476
self.handleOther()
472477
returnTrue
473478

474-
self.data.findNext(list(spaceCharacters)+ ["<",">"])
479+
self.data.findNext(list(spaceCharactersBytes)+ ["<",">"])
475480
ifself.data.currentByte=="<":
476481
#return to the first step in the overall "two step" algorithm
477482
#reprocessing the < byte
@@ -489,7 +494,7 @@ def handleOther(self):
489494
defgetAttribute(self):
490495
"""Return a name,value pair for the next attribute in the stream,
491496
if one is found, or None"""
492-
self.data.skip(list(spaceCharacters)+["/"])
497+
self.data.skip(list(spaceCharactersBytes)+["/"])
493498
ifself.data.currentByte=="<":
494499
self.data.position-=1
495500
returnNone
@@ -502,12 +507,12 @@ def getAttribute(self):
502507
whileTrue:
503508
ifself.data.currentByte=="="andattrName:
504509
break
505-
elifself.data.currentByteinspaceCharacters:
510+
elifself.data.currentByteinspaceCharactersBytes:
506511
spaceFound=True
507512
break
508513
elifself.data.currentBytein ("/","<",">"):
509514
return"".join(attrName),""
510-
elifself.data.currentByteinasciiUppercase:
515+
elifself.data.currentByteinasciiUppercaseBytes:
511516
attrName.extend(self.data.currentByte.lower())
512517
else:
513518
attrName.extend(self.data.currentByte)
@@ -536,23 +541,23 @@ def getAttribute(self):
536541
self.data.position+=1
537542
return"".join(attrName),"".join(attrValue)
538543
#11.4
539-
elifself.data.currentByteinasciiUppercase:
544+
elifself.data.currentByteinasciiUppercaseBytes:
540545
attrValue.extend(self.data.currentByte.lower())
541546
#11.5
542547
else:
543548
attrValue.extend(self.data.currentByte)
544549
elifself.data.currentBytein (">","<"):
545550
return"".join(attrName),""
546-
elifself.data.currentByteinasciiUppercase:
551+
elifself.data.currentByteinasciiUppercaseBytes:
547552
attrValue.extend(self.data.currentByte.lower())
548553
else:
549554
attrValue.extend(self.data.currentByte)
550555
whileTrue:
551556
self.data.position+=1
552557
ifself.data.currentBytein (
553-
list(spaceCharacters)+ [">","<"]):
558+
list(spaceCharactersBytes)+ [">","<"]):
554559
return"".join(attrName),"".join(attrValue)
555-
elifself.data.currentByteinasciiUppercase:
560+
elifself.data.currentByteinasciiUppercaseBytes:
556561
attrValue.extend(self.data.currentByte.lower())
557562
else:
558563
attrValue.extend(self.data.currentByte)
@@ -588,7 +593,7 @@ def parse(self):
588593
#Unquoted value
589594
oldPosition=self.data.position
590595
try:
591-
self.data.findNext(spaceCharacters)
596+
self.data.findNext(spaceCharactersBytes)
592597
returnself.data[oldPosition:self.data.position]
593598
exceptStopIteration:
594599
#Return the whole remaining value

‎src/html5lib/tokenizer.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class HTMLTokenizer(object):
3131
# XXX need to fix documentation
3232

3333
def__init__(self,stream,encoding=None,parseMeta=True,useChardet=True,
34-
lowercaseElementName=True,lowercaseAttrName=True,):
34+
lowercaseElementName=True,lowercaseAttrName=True):
3535
self.stream=HTMLInputStream(stream,encoding,parseMeta,useChardet)
3636

3737
#Perform case conversions?

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp