html5lib/html5lib-pythonPublic

NotificationsYou must be signed in to change notification settings
Fork302
Star1.2k

Commit5d925be

committed

Regenerate the Py2 code using awkwardduet 1.1a4.

This finally sorts out the unicode/str mess, so yay!

1 parentc1029a4 commit5d925beCopy full SHA for 5d925be

File tree

54 files changed

+4659

-3743

lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+4659

-3743

lines changed

`‎html5lib/init.py‎`

Lines changed: 7 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-"""`
	`1`	`+u"""`
`2`	`2`	`HTML parsing library based on the WHATWG "HTML5"`
`3`	`3`	`specification. The parser is designed to be compatible with existing`
`4`	`4`	`HTML found in the wild and implements well-defined error recovery that`
`@@ -10,8 +10,9 @@`
`10`	`10`	`f = open("my_document.html")`
`11`	`11`	`tree = html5lib.parse(f)`
`12`	`12`	`"""`
`13`		`-__version__="0.95-dev"`
`14`		`-fromhtml5parserimportHTMLParser,parse,parseFragment`
`15`		`-fromtreebuildersimportgetTreeBuilder`
`16`		`-fromtreewalkersimportgetTreeWalker`
`17`		`-fromserializerimportserialize`
	`13`	`+from __future__importabsolute_import`
	`14`	`+__version__=u"0.95-dev"`
	`15`	`+from .html5parserimportHTMLParser,parse,parseFragment`
	`16`	`+from .treebuildersimportgetTreeBuilder`
	`17`	`+from .treewalkersimportgetTreeWalker`
	`18`	`+from .serializerimportserialize`

`‎html5lib/constants.py‎`

Lines changed: 543 additions & 542 deletions

Large diffs are not rendered by default.

`‎html5lib/filters/_base.py‎`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -2,9 +2,12 @@`
`2`	`2`	`classFilter(object):`
`3`	`3`	`def__init__(self,source):`
`4`	`4`	`self.source=source`
	`5`	`+__init__.func_annotations= {}`
`5`	`6`
`6`	`7`	`def__iter__(self):`
`7`	`8`	`returniter(self.source)`
	`9`	`+__iter__.func_annotations= {}`
`8`	`10`
`9`	`11`	`def__getattr__(self,name):`
`10`	`12`	`returngetattr(self.source,name)`
	`13`	`+__getattr__.func_annotations= {}`

`‎html5lib/filters/inject_meta_charset.py‎`

Lines changed: 27 additions & 24 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,62 +1,65 @@`
`1`		`-import_base`
	`1`	`+from __future__importabsolute_import`
	`2`	`+from .import_base`
`2`	`3`
`3`	`4`	`classFilter(_base.Filter):`
`4`	`5`	`def__init__(self,source,encoding):`
`5`	`6`	`_base.Filter.__init__(self,source)`
`6`	`7`	`self.encoding=encoding`
	`8`	`+__init__.func_annotations= {}`
`7`	`9`
`8`	`10`	`def__iter__(self):`
`9`		`-state="pre_head"`
	`11`	`+state=u"pre_head"`
`10`	`12`	`meta_found= (self.encodingisNone)`
`11`	`13`	`pending= []`
`12`	`14`
`13`	`15`	`fortokenin_base.Filter.__iter__(self):`
`14`		`-type=token["type"]`
`15`		`-iftype=="StartTag":`
`16`		`-iftoken["name"].lower()==u"head":`
`17`		`-state="in_head"`
	`16`	`+type=token[u"type"]`
	`17`	`+iftype==u"StartTag":`
	`18`	`+iftoken[u"name"].lower()==u"head":`
	`19`	`+state=u"in_head"`
`18`	`20`
`19`		`-eliftype=="EmptyTag":`
`20`		`-iftoken["name"].lower()==u"meta":`
	`21`	`+eliftype==u"EmptyTag":`
	`22`	`+iftoken[u"name"].lower()==u"meta":`
`21`	`23`	`# replace charset with actual encoding`
`22`	`24`	`has_http_equiv_content_type=False`
`23`		`-for (namespace,name),valueintoken["data"].iteritems():`
	`25`	`+for (namespace,name),valueintoken[u"data"].items():`
`24`	`26`	`ifnamespace!=None:`
`25`	`27`	`continue`
`26`	`28`	`elifname.lower()==u'charset':`
`27`		`-token["data"][(namespace,name)]=self.encoding`
	`29`	`+token[u"data"][(namespace,name)]=self.encoding`
`28`	`30`	`meta_found=True`
`29`	`31`	`break`
`30`	`32`	`elifname==u'http-equiv'andvalue.lower()==u'content-type':`
`31`	`33`	`has_http_equiv_content_type=True`
`32`	`34`	`else:`
`33`		`-ifhas_http_equiv_content_typeand (None,u"content")intoken["data"]:`
`34`		`-token["data"][(None,u"content")]=u'text/html; charset=%s'%self.encoding`
	`35`	`+ifhas_http_equiv_content_typeand (None,u"content")intoken[u"data"]:`
	`36`	`+token[u"data"][(None,u"content")]=u'text/html; charset=%s'%self.encoding`
`35`	`37`	`meta_found=True`
`36`	`38`
`37`		`-eliftoken["name"].lower()==u"head"andnotmeta_found:`
	`39`	`+eliftoken[u"name"].lower()==u"head"andnotmeta_found:`
`38`	`40`	`# insert meta into empty head`
`39`		`-yield {"type":"StartTag","name":u"head",`
`40`		`-"data":token["data"]}`
`41`		`-yield {"type":"EmptyTag","name":u"meta",`
`42`		`-"data": {(None,u"charset"):self.encoding}}`
`43`		`-yield {"type":"EndTag","name":u"head"}`
	`41`	`+yield {u"type":u"StartTag",u"name":u"head",`
	`42`	`+u"data":token[u"data"]}`
	`43`	`+yield {u"type":u"EmptyTag",u"name":u"meta",`
	`44`	`+u"data": {(None,u"charset"):self.encoding}}`
	`45`	`+yield {u"type":u"EndTag",u"name":u"head"}`
`44`	`46`	`meta_found=True`
`45`	`47`	`continue`
`46`	`48`
`47`		`-eliftype=="EndTag":`
`48`		`-iftoken["name"].lower()==u"head"andpending:`
	`49`	`+eliftype==u"EndTag":`
	`50`	`+iftoken[u"name"].lower()==u"head"andpending:`
`49`	`51`	`# insert meta into head (if necessary) and flush pending queue`
`50`	`52`	`yieldpending.pop(0)`
`51`	`53`	`ifnotmeta_found:`
`52`		`-yield {"type":"EmptyTag","name":u"meta",`
`53`		`-"data": {(None,u"charset"):self.encoding}}`
	`54`	`+yield {u"type":u"EmptyTag",u"name":u"meta",`
	`55`	`+u"data": {(None,u"charset"):self.encoding}}`
`54`	`56`	`whilepending:`
`55`	`57`	`yieldpending.pop(0)`
`56`	`58`	`meta_found=True`
`57`		`-state="post_head"`
	`59`	`+state=u"post_head"`
`58`	`60`
`59`		`-ifstate=="in_head":`
	`61`	`+ifstate==u"in_head":`
`60`	`62`	`pending.append(token)`
`61`	`63`	`else:`
`62`	`64`	`yieldtoken`
	`65`	`+__iter__.func_annotations= {}`

`‎html5lib/filters/lint.py‎`

Lines changed: 35 additions & 33 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,8 @@`
	`1`	`+from __future__importabsolute_import`
`1`	`2`	`fromgettextimportgettext`
`2`	`3`	`_=gettext`
`3`	`4`
`4`		`-import_base`
	`5`	`+from .import_base`
`5`	`6`	`fromhtml5lib.constantsimportcdataElements,rcdataElements,voidElements`
`6`	`7`
`7`	`8`	`fromhtml5lib.constantsimportspaceCharacters`
`@@ -12,39 +13,39 @@ class LintError(Exception): pass`
`12`	`13`	`classFilter(_base.Filter):`
`13`	`14`	`def__iter__(self):`
`14`	`15`	`open_elements= []`
`15`		`-contentModelFlag="PCDATA"`
	`16`	`+contentModelFlag=u"PCDATA"`
`16`	`17`	`fortokenin_base.Filter.__iter__(self):`
`17`		`-type=token["type"]`
`18`		`-iftypein ("StartTag","EmptyTag"):`
`19`		`-name=token["name"]`
`20`		`-ifcontentModelFlag!="PCDATA":`
`21`		`-raiseLintError(_("StartTag not in PCDATA content model flag: %s")%name)`
	`18`	`+type=token[u"type"]`
	`19`	`+iftypein (u"StartTag",u"EmptyTag"):`
	`20`	`+name=token[u"name"]`
	`21`	`+ifcontentModelFlag!=u"PCDATA":`
	`22`	`+raiseLintError(_(u"StartTag not in PCDATA content model flag: %s")%name)`
`22`	`23`	`ifnotisinstance(name,unicode):`
`23`	`24`	`raiseLintError(_(u"Tag name is not a string: %r")%name)`
`24`	`25`	`ifnotname:`
`25`	`26`	`raiseLintError(_(u"Empty tag name"))`
`26`		`-iftype=="StartTag"andnameinvoidElements:`
	`27`	`+iftype==u"StartTag"andnameinvoidElements:`
`27`	`28`	`raiseLintError(_(u"Void element reported as StartTag token: %s")%name)`
`28`		`-eliftype=="EmptyTag"andnamenotinvoidElements:`
`29`		`-raiseLintError(_(u"Non-void element reported as EmptyTag token: %s")%token["name"])`
`30`		`-iftype=="StartTag":`
	`29`	`+eliftype==u"EmptyTag"andnamenotinvoidElements:`
	`30`	`+raiseLintError(_(u"Non-void element reported as EmptyTag token: %s")%token[u"name"])`
	`31`	`+iftype==u"StartTag":`
`31`	`32`	`open_elements.append(name)`
`32`		`-forname,valueintoken["data"]:`
	`33`	`+forname,valueintoken[u"data"]:`
`33`	`34`	`ifnotisinstance(name,unicode):`
`34`		`-raiseLintError(_("Attribute name is not a string: %r")%name)`
	`35`	`+raiseLintError(_(u"Attribute name is not a string: %r")%name)`
`35`	`36`	`ifnotname:`
`36`	`37`	`raiseLintError(_(u"Empty attribute name"))`
`37`	`38`	`ifnotisinstance(value,unicode):`
`38`		`-raiseLintError(_("Attribute value is not a string: %r")%value)`
	`39`	`+raiseLintError(_(u"Attribute value is not a string: %r")%value)`
`39`	`40`	`ifnameincdataElements:`
`40`		`-contentModelFlag="CDATA"`
	`41`	`+contentModelFlag=u"CDATA"`
`41`	`42`	`elifnameinrcdataElements:`
`42`		`-contentModelFlag="RCDATA"`
`43`		`-elifname=="plaintext":`
`44`		`-contentModelFlag="PLAINTEXT"`
	`43`	`+contentModelFlag=u"RCDATA"`
	`44`	`+elifname==u"plaintext":`
	`45`	`+contentModelFlag=u"PLAINTEXT"`
`45`	`46`
`46`		`-eliftype=="EndTag":`
`47`		`-name=token["name"]`
	`47`	`+eliftype==u"EndTag":`
	`48`	`+name=token[u"name"]`
`48`	`49`	`ifnotisinstance(name,unicode):`
`49`	`50`	`raiseLintError(_(u"Tag name is not a string: %r")%name)`
`50`	`51`	`ifnotname:`
`@@ -54,35 +55,36 @@ def __iter__(self):`
`54`	`55`	`start_name=open_elements.pop()`
`55`	`56`	`ifstart_name!=name:`
`56`	`57`	`raiseLintError(_(u"EndTag (%s) does not match StartTag (%s)")% (name,start_name))`
`57`		`-contentModelFlag="PCDATA"`
	`58`	`+contentModelFlag=u"PCDATA"`
`58`	`59`
`59`		`-eliftype=="Comment":`
`60`		`-ifcontentModelFlag!="PCDATA":`
`61`		`-raiseLintError(_("Comment not in PCDATA content model flag"))`
	`60`	`+eliftype==u"Comment":`
	`61`	`+ifcontentModelFlag!=u"PCDATA":`
	`62`	`+raiseLintError(_(u"Comment not in PCDATA content model flag"))`
`62`	`63`
`63`		`-eliftypein ("Characters","SpaceCharacters"):`
`64`		`-data=token["data"]`
	`64`	`+eliftypein (u"Characters",u"SpaceCharacters"):`
	`65`	`+data=token[u"data"]`
`65`	`66`	`ifnotisinstance(data,unicode):`
`66`		`-raiseLintError(_("Attribute name is not a string: %r")%data)`
	`67`	`+raiseLintError(_(u"Attribute name is not a string: %r")%data)`
`67`	`68`	`ifnotdata:`
`68`	`69`	`raiseLintError(_(u"%s token with empty data")%type)`
`69`		`-iftype=="SpaceCharacters":`
	`70`	`+iftype==u"SpaceCharacters":`
`70`	`71`	`data=data.strip(spaceCharacters)`
`71`	`72`	`ifdata:`
`72`	`73`	`raiseLintError(_(u"Non-space character(s) found in SpaceCharacters token: ")%data)`
`73`	`74`
`74`		`-eliftype=="Doctype":`
`75`		`-name=token["name"]`
`76`		`-ifcontentModelFlag!="PCDATA":`
`77`		`-raiseLintError(_("Doctype not in PCDATA content model flag: %s")%name)`
	`75`	`+eliftype==u"Doctype":`
	`76`	`+name=token[u"name"]`
	`77`	`+ifcontentModelFlag!=u"PCDATA":`
	`78`	`+raiseLintError(_(u"Doctype not in PCDATA content model flag: %s")%name)`
`78`	`79`	`ifnotisinstance(name,unicode):`
`79`	`80`	`raiseLintError(_(u"Tag name is not a string: %r")%name)`
`80`	`81`	`# XXX: what to do with token["data"] ?`
`81`	`82`
`82`		`-eliftypein ("ParseError","SerializeError"):`
	`83`	`+eliftypein (u"ParseError",u"SerializeError"):`
`83`	`84`	`pass`
`84`	`85`
`85`	`86`	`else:`
`86`	`87`	`raiseLintError(_(u"Unknown token type: %s")%type)`
`87`	`88`
`88`	`89`	`yieldtoken`
	`90`	`+__iter__.func_annotations= {}`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit5d925be

File tree

54 files changed

Some content is hidden

54 files changed

`‎html5lib/init.py‎`

`‎html5lib/constants.py‎`

`‎html5lib/filters/_base.py‎`

`‎html5lib/filters/inject_meta_charset.py‎`

`‎html5lib/filters/lint.py‎`

0 commit comments

Movatterモバイル変換

File tree

54 files changed

Some content is hidden

54 files changed

‎html5lib/__init__.py‎

‎html5lib/constants.py‎

‎html5lib/filters/_base.py‎

‎html5lib/filters/inject_meta_charset.py‎

‎html5lib/filters/lint.py‎

0 commit comments

`‎html5lib/init.py‎`

`‎html5lib/constants.py‎`

`‎html5lib/filters/_base.py‎`

`‎html5lib/filters/inject_meta_charset.py‎`

`‎html5lib/filters/lint.py‎`