Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit47b430e

Browse files
committed
Update encoding detection to spec, make the entire encoding detection case-insensitive (it is effectively in spec), and fix bug shown by the update in the encoding stream.
1 parent19f48dd commit47b430e

File tree

1 file changed

+37
-35
lines changed

1 file changed

+37
-35
lines changed

‎src/html5lib/inputstream.py

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ class EncodingBytes(str):
471471
If the position is ever greater than the string length then an exception is
472472
raised"""
473473
def__new__(self,value):
474-
returnstr.__new__(self,value)
474+
returnstr.__new__(self,value.lower())
475475

476476
def__init__(self,value):
477477
self._position=-1
@@ -539,14 +539,12 @@ def skipUntil(self, chars):
539539
self._position=p
540540
returnNone
541541

542-
defmatchBytes(self,bytes,lower=False):
542+
defmatchBytes(self,bytes):
543543
"""Look for a sequence of bytes at the start of a string. If the bytes
544544
are found return True and advance the position to the byte after the
545545
match. Otherwise return False and leave the position alone"""
546546
p=self.position
547547
data=self[p:p+len(bytes)]
548-
iflower:
549-
data=data.lower()
550548
rv=data.startswith(bytes)
551549
ifrv:
552550
self.position+=len(bytes)
@@ -557,6 +555,9 @@ def jumpTo(self, bytes):
557555
a match is found advance the position to the last byte of the match"""
558556
newPosition=self[self.position:].find(bytes)
559557
ifnewPosition>-1:
558+
# XXX: This is ugly, but I can't see a nicer way to fix this.
559+
ifself._position==-1:
560+
self._position=0
560561
self._position+= (newPosition+len(bytes)-1)
561562
returnTrue
562563
else:
@@ -581,7 +582,7 @@ def getEncoding(self):
581582
forbyteinself.data:
582583
keepParsing=True
583584
forkey,methodinmethodDispatch:
584-
ifself.data.matchBytes(key,lower=True):
585+
ifself.data.matchBytes(key):
585586
try:
586587
keepParsing=method()
587588
break
@@ -659,72 +660,75 @@ def getAttribute(self):
659660
"""Return a name,value pair for the next attribute in the stream,
660661
if one is found, or None"""
661662
data=self.data
663+
# Step 1 (skip chars)
662664
c=data.skip(spaceCharactersBytes|frozenset("/"))
663-
ifc=="<":
664-
data.previous()
665-
returnNone
666-
elifc==">"orcisNone:
665+
# Step 2
666+
ifcin (">",None):
667667
returnNone
668+
# Step 3
668669
attrName= []
669670
attrValue= []
670-
spaceFound=False
671-
#Step 5 attribute name
671+
#Step 4 attribute name
672672
whileTrue:
673673
ifc=="="andattrName:
674674
break
675675
elifcinspaceCharactersBytes:
676-
spaceFound=True
676+
#Step 6!
677+
c=data.skip()
678+
c=data.next()
677679
break
678-
elifcin ("/","<",">"):
680+
elifcin ("/",">"):
679681
return"".join(attrName),""
680682
elifcinasciiUppercaseBytes:
681683
attrName.append(c.lower())
684+
elifc==None:
685+
returnNone
682686
else:
683687
attrName.append(c)
684-
#Step6
688+
#Step5
685689
c=data.next()
686690
#Step 7
687-
ifspaceFound:
688-
c=data.skip()
689-
#Step 8
690-
ifc!="=":
691-
data.previous()
692-
return"".join(attrName),""
693-
#XXX need to advance position in both spaces and value case
694-
#Step 9
691+
ifc!="=":
692+
data.previous()
693+
return"".join(attrName),""
694+
#Step 8
695695
data.next()
696-
#Step10
696+
#Step9
697697
c=data.skip()
698-
#Step11
698+
#Step10
699699
ifcin ("'",'"'):
700-
#11.1
700+
#10.1
701701
quoteChar=c
702702
whileTrue:
703-
#11.3
703+
#10.2
704704
c=data.next()
705+
#10.3
705706
ifc==quoteChar:
706707
data.next()
707708
return"".join(attrName),"".join(attrValue)
708-
#11.4
709+
#10.4
709710
elifcinasciiUppercaseBytes:
710711
attrValue.append(c.lower())
711-
#11.5
712+
#10.5
712713
else:
713714
attrValue.append(c)
714-
elifcin (">","<"):
715+
elifc==">":
715716
return"".join(attrName),""
716717
elifcinasciiUppercaseBytes:
717718
attrValue.append(c.lower())
718719
elifcisNone:
719720
returnNone
720721
else:
721722
attrValue.append(c)
723+
# Step 11
722724
whileTrue:
723725
c=data.next()
724726
ifcinspacesAngleBrackets:
725727
return"".join(attrName),"".join(attrValue)
726728
elifcinasciiUppercaseBytes:
727729
attrValue.append(c.lower())
730+
elifcisNone:
731+
returnNone
728732
else:
729733
attrValue.append(c)
730734

@@ -734,10 +738,6 @@ def __init__(self, data):
734738
self.data=data
735739
defparse(self):
736740
try:
737-
#Skip to the first ";"
738-
self.data.jumpTo(";")
739-
self.data.position+=1
740-
self.data.skip()
741741
#Check if the attr name is charset
742742
#otherwise return
743743
self.data.jumpTo("charset")
@@ -753,8 +753,10 @@ def parse(self):
753753
quoteMark=self.data.currentByte
754754
self.data.position+=1
755755
oldPosition=self.data.position
756-
self.data.jumpTo(quoteMark)
757-
returnself.data[oldPosition:self.data.position]
756+
ifself.data.jumpTo(quoteMark):
757+
returnself.data[oldPosition:self.data.position]
758+
else:
759+
returnNone
758760
else:
759761
#Unquoted value
760762
oldPosition=self.data.position

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp