Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit18e5d32

Browse files
committed
Updated Python tokeniser to match new tests
--HG--extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401104
1 parent13725e0 commit18e5d32

File tree

2 files changed

+50
-12
lines changed

2 files changed

+50
-12
lines changed

‎src/html5lib/constants.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@
667667
"kappa;":u"\u03BA",
668668
"lArr;":u"\u21D0",
669669
"lambda;":u"\u03BB",
670-
"lang;":u"\u3008",
670+
"lang;":u"\u27E8",
671671
"laquo;":u"\u00AB",
672672
"laquo":u"\u00AB",
673673
"larr;":u"\u2190",
@@ -747,7 +747,7 @@
747747
"quot":u"\u0022",
748748
"rArr;":u"\u21D2",
749749
"radic;":u"\u221A",
750-
"rang;":u"\u3009",
750+
"rang;":u"\u27E9",
751751
"raquo;":u"\u00BB",
752752
"raquo":u"\u00BB",
753753
"rarr;":u"\u2192",

‎src/html5lib/tokenizer.py‎

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
5151
"attributeValueDoubleQuoted":self.attributeValueDoubleQuotedState,
5252
"attributeValueSingleQuoted":self.attributeValueSingleQuotedState,
5353
"attributeValueUnQuoted":self.attributeValueUnQuotedState,
54+
"afterAttributeValue":self.afterAttributeValueState,
5455
"bogusComment":self.bogusCommentState,
5556
"markupDeclarationOpen":self.markupDeclarationOpenState,
5657
"commentStart":self.commentStartState,
@@ -185,10 +186,11 @@ def consumeNumberEntity(self, isHex):
185186

186187
returnchar
187188

188-
defconsumeEntity(self,fromAttribute=False):
189+
defconsumeEntity(self,allowedChar=None,fromAttribute=False):
189190
char=None
190191
charStack= [self.stream.char()]
191-
ifcharStack[0]inspaceCharactersorcharStack[0]in (EOF,"<","&"):
192+
ifcharStack[0]inspaceCharactersorcharStack[0]in (EOF,"<","&")\
193+
or (allowedCharisnotNoneandallowedChar==charStack[0]):
192194
self.stream.unget(charStack)
193195
elifcharStack[0]==u"#":
194196
# We might have a number entity here.
@@ -260,10 +262,10 @@ def entitiesStartingWith(name):
260262
self.stream.unget(charStack)
261263
returnchar
262264

263-
defprocessEntityInAttribute(self):
265+
defprocessEntityInAttribute(self,allowedChar):
264266
"""This method replaces the need for "entityInAttributeValueState".
265267
"""
266-
entity=self.consumeEntity(True)
268+
entity=self.consumeEntity(allowedChar=allowedChar,fromAttribute=True)
267269
ifentity:
268270
self.currentToken["data"][-1][1]+=entity
269271
else:
@@ -479,6 +481,11 @@ def beforeAttributeNameState(self):
479481
self.emitCurrentToken()
480482
elifdata==u"/":
481483
self.processSolidusInTag()
484+
elifdata==u"'"ordata==u'"'ordata==u"=":
485+
self.tokenQueue.append({"type":"ParseError","data":
486+
"invalid-character-in-attribute-name"})
487+
self.currentToken["data"].append([data,""])
488+
self.state=self.states["attributeName"]
482489
elifdata==EOF:
483490
self.tokenQueue.append({"type":"ParseError","data":
484491
"expected-attribute-name-but-got-eof"})
@@ -508,6 +515,11 @@ def attributeNameState(self):
508515
elifdata==u"/":
509516
self.processSolidusInTag()
510517
self.state=self.states["beforeAttributeName"]
518+
elifdata==u"'"ordata==u'"':
519+
self.tokenQueue.append({"type":"ParseError","data":
520+
"invalid-character-in-attribute-name"})
521+
self.currentToken["data"][-1][0]+=data
522+
leavingThisState=False
511523
elifdata==EOF:
512524
self.tokenQueue.append({"type":"ParseError","data":
513525
"eof-in-attribute-name"})
@@ -570,6 +582,11 @@ def beforeAttributeValueState(self):
570582
self.state=self.states["attributeValueSingleQuoted"]
571583
elifdata==u">":
572584
self.emitCurrentToken()
585+
elifdata==u"=":
586+
self.tokenQueue.append({"type":"ParseError","data":
587+
"equals-in-unquoted-attribute-value"})
588+
self.currentToken["data"][-1][1]+=data
589+
self.state=self.states["attributeValueUnQuoted"]
573590
elifdata==EOF:
574591
self.tokenQueue.append({"type":"ParseError","data":
575592
"expected-attribute-value-but-got-eof"})
@@ -582,9 +599,9 @@ def beforeAttributeValueState(self):
582599
defattributeValueDoubleQuotedState(self):
583600
data=self.stream.char()
584601
ifdata=="\"":
585-
self.state=self.states["beforeAttributeName"]
602+
self.state=self.states["afterAttributeValue"]
586603
elifdata==u"&":
587-
self.processEntityInAttribute()
604+
self.processEntityInAttribute(u'"')
588605
elifdata==EOF:
589606
self.tokenQueue.append({"type":"ParseError","data":
590607
"eof-in-attribute-value-double-quote"})
@@ -597,9 +614,9 @@ def attributeValueDoubleQuotedState(self):
597614
defattributeValueSingleQuotedState(self):
598615
data=self.stream.char()
599616
ifdata=="'":
600-
self.state=self.states["beforeAttributeName"]
617+
self.state=self.states["afterAttributeValue"]
601618
elifdata==u"&":
602-
self.processEntityInAttribute()
619+
self.processEntityInAttribute(u"'")
603620
elifdata==EOF:
604621
self.tokenQueue.append({"type":"ParseError","data":
605622
"eof-in-attribute-value-single-quote"})
@@ -614,16 +631,37 @@ def attributeValueUnQuotedState(self):
614631
ifdatainspaceCharacters:
615632
self.state=self.states["beforeAttributeName"]
616633
elifdata==u"&":
617-
self.processEntityInAttribute()
634+
self.processEntityInAttribute(None)
618635
elifdata==u">":
619636
self.emitCurrentToken()
637+
elifdata==u'"'ordata==u"'"ordata==u"=":
638+
self.tokenQueue.append({"type":"ParseError","data":
639+
"unexpected-character-in-unquoted-attribute-value"})
640+
self.currentToken["data"][-1][1]+=data
620641
elifdata==EOF:
621642
self.tokenQueue.append({"type":"ParseError","data":
622643
"eof-in-attribute-value-no-quotes"})
623644
self.emitCurrentToken()
624645
else:
625646
self.currentToken["data"][-1][1]+=data+self.stream.charsUntil( \
626-
frozenset(("&",">","<"))|spaceCharacters)
647+
frozenset(("&",">","<","=","'",'"'))|spaceCharacters)
648+
returnTrue
649+
650+
defafterAttributeValueState(self):
651+
data=self.stream.char()
652+
ifdatainspaceCharacters:
653+
self.state=self.states["beforeAttributeName"]
654+
elifdata==u">":
655+
self.emitCurrentToken()
656+
self.state=self.states["data"]
657+
elifdata==u"/":
658+
self.processSolidusInTag()
659+
self.state=self.states["beforeAttributeName"]
660+
else:
661+
self.tokenQueue.append({"type":"ParseError","data":
662+
"unexpected-character-after-attribute-value"})
663+
self.stream.unget(data)
664+
self.state=self.states["beforeAttributeName"]
627665
returnTrue
628666

629667
defbogusCommentState(self):

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp