Commit5288737

committed

Merge pull request#245 from gsnedders/calibre-selected-1

Selected patches from Calibre; r=nobody!

2 parents143b0d4 +761f3ab commit5288737Copy full SHA for 5288737

File tree

8 files changed

+167

-126

lines changed

README.rst
html5lib
- constants.py
- html5parser.py
- tests
  - test_parser2.py
- treebuilders
  - dom.py
- utils.py
requirements-optional.txt
requirements.txt

8 files changed

+167

-126

lines changed

`‎README.rst‎`

Lines changed: 0 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -116,10 +116,6 @@ functionality:`
`116`	`116`	- ``chardet`` can be used as a fallback when character encoding cannot
`117`	`117`	`be determined.`
`118`	`118`
`119`		-- ``ordereddict`` can be used under Python 2.6
`120`		- (``collections.OrderedDict`` is used instead on later versions) to
`121`		`- serialize attributes in alphabetical order.`
`122`		`-`
`123`	`119`
`124`	`120`	`Bugs`
`125`	`121`	`----`

`‎html5lib/constants.py‎`

Lines changed: 67 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -437,6 +437,73 @@`
`437`	`437`	`(namespaces["mathml"],"mtext")`
`438`	`438`	`])`
`439`	`439`
	`440`	`+adjustSVGAttributes= {`
	`441`	`+"attributename":"attributeName",`
	`442`	`+"attributetype":"attributeType",`
	`443`	`+"basefrequency":"baseFrequency",`
	`444`	`+"baseprofile":"baseProfile",`
	`445`	`+"calcmode":"calcMode",`
	`446`	`+"clippathunits":"clipPathUnits",`
	`447`	`+"contentscripttype":"contentScriptType",`
	`448`	`+"contentstyletype":"contentStyleType",`
	`449`	`+"diffuseconstant":"diffuseConstant",`
	`450`	`+"edgemode":"edgeMode",`
	`451`	`+"externalresourcesrequired":"externalResourcesRequired",`
	`452`	`+"filterres":"filterRes",`
	`453`	`+"filterunits":"filterUnits",`
	`454`	`+"glyphref":"glyphRef",`
	`455`	`+"gradienttransform":"gradientTransform",`
	`456`	`+"gradientunits":"gradientUnits",`
	`457`	`+"kernelmatrix":"kernelMatrix",`
	`458`	`+"kernelunitlength":"kernelUnitLength",`
	`459`	`+"keypoints":"keyPoints",`
	`460`	`+"keysplines":"keySplines",`
	`461`	`+"keytimes":"keyTimes",`
	`462`	`+"lengthadjust":"lengthAdjust",`
	`463`	`+"limitingconeangle":"limitingConeAngle",`
	`464`	`+"markerheight":"markerHeight",`
	`465`	`+"markerunits":"markerUnits",`
	`466`	`+"markerwidth":"markerWidth",`
	`467`	`+"maskcontentunits":"maskContentUnits",`
	`468`	`+"maskunits":"maskUnits",`
	`469`	`+"numoctaves":"numOctaves",`
	`470`	`+"pathlength":"pathLength",`
	`471`	`+"patterncontentunits":"patternContentUnits",`
	`472`	`+"patterntransform":"patternTransform",`
	`473`	`+"patternunits":"patternUnits",`
	`474`	`+"pointsatx":"pointsAtX",`
	`475`	`+"pointsaty":"pointsAtY",`
	`476`	`+"pointsatz":"pointsAtZ",`
	`477`	`+"preservealpha":"preserveAlpha",`
	`478`	`+"preserveaspectratio":"preserveAspectRatio",`
	`479`	`+"primitiveunits":"primitiveUnits",`
	`480`	`+"refx":"refX",`
	`481`	`+"refy":"refY",`
	`482`	`+"repeatcount":"repeatCount",`
	`483`	`+"repeatdur":"repeatDur",`
	`484`	`+"requiredextensions":"requiredExtensions",`
	`485`	`+"requiredfeatures":"requiredFeatures",`
	`486`	`+"specularconstant":"specularConstant",`
	`487`	`+"specularexponent":"specularExponent",`
	`488`	`+"spreadmethod":"spreadMethod",`
	`489`	`+"startoffset":"startOffset",`
	`490`	`+"stddeviation":"stdDeviation",`
	`491`	`+"stitchtiles":"stitchTiles",`
	`492`	`+"surfacescale":"surfaceScale",`
	`493`	`+"systemlanguage":"systemLanguage",`
	`494`	`+"tablevalues":"tableValues",`
	`495`	`+"targetx":"targetX",`
	`496`	`+"targety":"targetY",`
	`497`	`+"textlength":"textLength",`
	`498`	`+"viewbox":"viewBox",`
	`499`	`+"viewtarget":"viewTarget",`
	`500`	`+"xchannelselector":"xChannelSelector",`
	`501`	`+"ychannelselector":"yChannelSelector",`
	`502`	`+"zoomandpan":"zoomAndPan"`
	`503`	`+}`
	`504`	`+`
	`505`	`+adjustMathMLAttributes= {"definitionurl":"definitionURL"}`
	`506`	`+`
`440`	`507`	`adjustForeignAttributes= {`
`441`	`508`	`"xlink:actuate": ("xlink","actuate",namespaces["xlink"]),`
`442`	`509`	`"xlink:arcrole": ("xlink","arcrole",namespaces["xlink"]),`

`‎html5lib/html5parser.py‎`

Lines changed: 36 additions & 105 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,24 +1,31 @@`
`1`	`1`	`from __future__importabsolute_import,division,unicode_literals`
`2`		`-fromsiximportwith_metaclass`
	`2`	`+fromsiximportwith_metaclass,viewkeys,PY3`
`3`	`3`
`4`	`4`	`importtypes`
`5`	`5`
	`6`	`+try:`
	`7`	`+fromcollectionsimportOrderedDict`
	`8`	`+exceptImportError:`
	`9`	`+fromordereddictimportOrderedDict`
	`10`	`+`
`6`	`11`	`from .importinputstream`
`7`	`12`	`from .importtokenizer`
`8`	`13`
`9`	`14`	`from .importtreebuilders`
`10`	`15`	`from .treebuilders._baseimportMarker`
`11`	`16`
`12`	`17`	`from .importutils`
`13`		`-from .importconstants`
`14`		`-from .constantsimportspaceCharacters,asciiUpper2Lower`
`15`		`-from .constantsimportspecialElements`
`16`		`-from .constantsimportheadingElements`
`17`		`-from .constantsimportcdataElements,rcdataElements`
`18`		`-from .constantsimporttokenTypes,ReparseException,namespaces`
`19`		`-from .constantsimporthtmlIntegrationPointElements,mathmlTextIntegrationPointElements`
`20`		`-from .constantsimportadjustForeignAttributesasadjustForeignAttributesMap`
`21`		`-from .constantsimportE`
	`18`	`+from .constantsimport (`
	`19`	`+spaceCharacters,asciiUpper2Lower,`
	`20`	`+specialElements,headingElements,cdataElements,rcdataElements,`
	`21`	`+tokenTypes,tagTokenTypes,`
	`22`	`+namespaces,`
	`23`	`+htmlIntegrationPointElements,mathmlTextIntegrationPointElements,`
	`24`	`+adjustForeignAttributesasadjustForeignAttributesMap,`
	`25`	`+adjustMathMLAttributes,adjustSVGAttributes,`
	`26`	`+E,`
	`27`	`+ReparseException`
	`28`	`+)`
`22`	`29`
`23`	`30`
`24`	`31`	`defparse(doc,treebuilder="etree",encoding=None,`
`@@ -272,96 +279,18 @@ def normalizeToken(self, token):`
`272`	`279`	`""" HTML5 specific normalizations to the token stream """`
`273`	`280`
`274`	`281`	`iftoken["type"]==tokenTypes["StartTag"]:`
`275`		`-token["data"]=dict(token["data"][::-1])`
	`282`	`+token["data"]=OrderedDict(token['data'][::-1])`
`276`	`283`
`277`	`284`	`returntoken`
`278`	`285`
`279`	`286`	`defadjustMathMLAttributes(self,token):`
`280`		`-replacements= {"definitionurl":"definitionURL"}`
`281`		`-fork,vinreplacements.items():`
`282`		`-ifkintoken["data"]:`
`283`		`-token["data"][v]=token["data"][k]`
`284`		`-deltoken["data"][k]`
	`287`	`+adjust_attributes(token,adjustMathMLAttributes)`
`285`	`288`
`286`	`289`	`defadjustSVGAttributes(self,token):`
`287`		`-replacements= {`
`288`		`-"attributename":"attributeName",`
`289`		`-"attributetype":"attributeType",`
`290`		`-"basefrequency":"baseFrequency",`
`291`		`-"baseprofile":"baseProfile",`
`292`		`-"calcmode":"calcMode",`
`293`		`-"clippathunits":"clipPathUnits",`
`294`		`-"contentscripttype":"contentScriptType",`
`295`		`-"contentstyletype":"contentStyleType",`
`296`		`-"diffuseconstant":"diffuseConstant",`
`297`		`-"edgemode":"edgeMode",`
`298`		`-"externalresourcesrequired":"externalResourcesRequired",`
`299`		`-"filterres":"filterRes",`
`300`		`-"filterunits":"filterUnits",`
`301`		`-"glyphref":"glyphRef",`
`302`		`-"gradienttransform":"gradientTransform",`
`303`		`-"gradientunits":"gradientUnits",`
`304`		`-"kernelmatrix":"kernelMatrix",`
`305`		`-"kernelunitlength":"kernelUnitLength",`
`306`		`-"keypoints":"keyPoints",`
`307`		`-"keysplines":"keySplines",`
`308`		`-"keytimes":"keyTimes",`
`309`		`-"lengthadjust":"lengthAdjust",`
`310`		`-"limitingconeangle":"limitingConeAngle",`
`311`		`-"markerheight":"markerHeight",`
`312`		`-"markerunits":"markerUnits",`
`313`		`-"markerwidth":"markerWidth",`
`314`		`-"maskcontentunits":"maskContentUnits",`
`315`		`-"maskunits":"maskUnits",`
`316`		`-"numoctaves":"numOctaves",`
`317`		`-"pathlength":"pathLength",`
`318`		`-"patterncontentunits":"patternContentUnits",`
`319`		`-"patterntransform":"patternTransform",`
`320`		`-"patternunits":"patternUnits",`
`321`		`-"pointsatx":"pointsAtX",`
`322`		`-"pointsaty":"pointsAtY",`
`323`		`-"pointsatz":"pointsAtZ",`
`324`		`-"preservealpha":"preserveAlpha",`
`325`		`-"preserveaspectratio":"preserveAspectRatio",`
`326`		`-"primitiveunits":"primitiveUnits",`
`327`		`-"refx":"refX",`
`328`		`-"refy":"refY",`
`329`		`-"repeatcount":"repeatCount",`
`330`		`-"repeatdur":"repeatDur",`
`331`		`-"requiredextensions":"requiredExtensions",`
`332`		`-"requiredfeatures":"requiredFeatures",`
`333`		`-"specularconstant":"specularConstant",`
`334`		`-"specularexponent":"specularExponent",`
`335`		`-"spreadmethod":"spreadMethod",`
`336`		`-"startoffset":"startOffset",`
`337`		`-"stddeviation":"stdDeviation",`
`338`		`-"stitchtiles":"stitchTiles",`
`339`		`-"surfacescale":"surfaceScale",`
`340`		`-"systemlanguage":"systemLanguage",`
`341`		`-"tablevalues":"tableValues",`
`342`		`-"targetx":"targetX",`
`343`		`-"targety":"targetY",`
`344`		`-"textlength":"textLength",`
`345`		`-"viewbox":"viewBox",`
`346`		`-"viewtarget":"viewTarget",`
`347`		`-"xchannelselector":"xChannelSelector",`
`348`		`-"ychannelselector":"yChannelSelector",`
`349`		`-"zoomandpan":"zoomAndPan"`
`350`		`- }`
`351`		`-fororiginalNameinlist(token["data"].keys()):`
`352`		`-iforiginalNameinreplacements:`
`353`		`-svgName=replacements[originalName]`
`354`		`-token["data"][svgName]=token["data"][originalName]`
`355`		`-deltoken["data"][originalName]`
	`290`	`+adjust_attributes(token,adjustSVGAttributes)`
`356`	`291`
`357`	`292`	`defadjustForeignAttributes(self,token):`
`358`		`-replacements=adjustForeignAttributesMap`
`359`		`-`
`360`		`-fororiginalNameintoken["data"].keys():`
`361`		`-iforiginalNameinreplacements:`
`362`		`-foreignName=replacements[originalName]`
`363`		`-token["data"][foreignName]=token["data"][originalName]`
`364`		`-deltoken["data"][originalName]`
	`293`	`+adjust_attributes(token,adjustForeignAttributesMap)`
`365`	`294`
`366`	`295`	`defreparseTokenNormal(self,token):`
`367`	`296`	`# pylint:disable=unused-argument`
`@@ -434,7 +363,7 @@ def getPhases(debug):`
`434`	`363`	`deflog(function):`
`435`	`364`	`"""Logger that records which phase processes each token"""`
`436`	`365`	`type_names=dict((value,key)forkey,valuein`
`437`		`-constants.tokenTypes.items())`
	`366`	`+tokenTypes.items())`
`438`	`367`
`439`	`368`	`defwrapped(self,args,*kwargs):`
`440`	`369`	`iffunction.__name__.startswith("process")andlen(args)>0:`
`@@ -443,7 +372,7 @@ def wrapped(self, args, *kwargs):`
`443`	`372`	`info= {"type":type_names[token['type']]}`
`444`	`373`	`except:`
`445`	`374`	`raise`
`446`		`-iftoken['type']inconstants.tagTokenTypes:`
	`375`	`+iftoken['type']intagTokenTypes:`
`447`	`376`	`info["name"]=token['name']`
`448`	`377`
`449`	`378`	`self.parser.log.append((self.parser.tokenizer.state.__name__,`
`@@ -1022,17 +951,9 @@ def __init__(self, parser, tree):`
`1022`	`951`	`self.endTagHandler.default=self.endTagOther`
`1023`	`952`
`1024`	`953`	`defisMatchingFormattingElement(self,node1,node2):`
`1025`		`-ifnode1.name!=node2.nameornode1.namespace!=node2.namespace:`
`1026`		`-returnFalse`
`1027`		`-eliflen(node1.attributes)!=len(node2.attributes):`
`1028`		`-returnFalse`
`1029`		`-else:`
`1030`		`-attributes1=sorted(node1.attributes.items())`
`1031`		`-attributes2=sorted(node2.attributes.items())`
`1032`		`-forattr1,attr2inzip(attributes1,attributes2):`
`1033`		`-ifattr1!=attr2:`
`1034`		`-returnFalse`
`1035`		`-returnTrue`
	`954`	`+return (node1.name==node2.nameand`
	`955`	`+node1.namespace==node2.namespaceand`
	`956`	`+node1.attributes==node2.attributes)`
`1036`	`957`
`1037`	`958`	`# helper`
`1038`	`959`	`defaddFormattingElement(self,token):`
`@@ -2798,6 +2719,16 @@ def processEndTag(self, token):`
`2798`	`2719`	`}`
`2799`	`2720`
`2800`	`2721`
	`2722`	`+defadjust_attributes(token,replacements):`
	`2723`	`+ifPY3orutils.PY27:`
	`2724`	`+needs_adjustment=viewkeys(token['data'])&viewkeys(replacements)`
	`2725`	`+else:`
	`2726`	`+needs_adjustment=frozenset(token['data'])&frozenset(replacements)`
	`2727`	`+ifneeds_adjustment:`
	`2728`	`+token['data']=OrderedDict((replacements.get(k,k),v)`
	`2729`	`+fork,vintoken['data'].items())`
	`2730`	`+`
	`2731`	`+`
`2801`	`2732`	`defimpliedTagToken(name,type="EndTag",attributes=None,`
`2802`	`2733`	`selfClosing=False):`
`2803`	`2734`	`ifattributesisNone:`

`‎html5lib/tests/test_parser2.py‎`

Lines changed: 40 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1,11 +1,13 @@`
`1`	`1`	`from __future__importabsolute_import,division,unicode_literals`
`2`	`2`
	`3`	`+fromsiximportPY2,text_type`
	`4`	`+`
`3`	`5`	`importio`
`4`	`6`
`5`	`7`	`from .importsupport# noqa`
`6`	`8`
`7`	`9`	`fromhtml5lib.constantsimportnamespaces`
`8`		`-fromhtml5libimportparse`
	`10`	`+fromhtml5libimportparse,HTMLParser`
`9`	`11`
`10`	`12`
`11`	`13`	`# tests that aren't autogenerated from text files`
`@@ -49,3 +51,40 @@ def test_namespace_html_elements_1_etree():`
`49`	`51`
`50`	`52`	`deftest_unicode_file():`
`51`	`53`	`assertparse(io.StringIO("a"))isnotNone`
	`54`	`+`
	`55`	`+`
	`56`	`+deftest_duplicate_attribute():`
	`57`	`+# This is here because we impl it in parser and not tokenizer`
	`58`	`+doc=parse('<p class=a class=b>')`
	`59`	`+el=doc[1][0]`
	`60`	`+assertel.get("class")=="a"`
	`61`	`+`
	`62`	`+`
	`63`	`+deftest_debug_log():`
	`64`	`+parser=HTMLParser(debug=True)`
	`65`	`+parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")`
	`66`	`+`
	`67`	`+expected= [('dataState','InitialPhase','InitialPhase','processDoctype', {'type':'Doctype'}),`
	`68`	`+ ('dataState','BeforeHtmlPhase','BeforeHtmlPhase','processStartTag', {'name':'title','type':'StartTag'}),`
	`69`	`+ ('dataState','BeforeHeadPhase','BeforeHeadPhase','processStartTag', {'name':'title','type':'StartTag'}),`
	`70`	`+ ('dataState','InHeadPhase','InHeadPhase','processStartTag', {'name':'title','type':'StartTag'}),`
	`71`	`+ ('rcdataState','TextPhase','TextPhase','processCharacters', {'type':'Characters'}),`
	`72`	`+ ('dataState','TextPhase','TextPhase','processEndTag', {'name':'title','type':'EndTag'}),`
	`73`	`+ ('dataState','InHeadPhase','InHeadPhase','processStartTag', {'name':'p','type':'StartTag'}),`
	`74`	`+ ('dataState','AfterHeadPhase','AfterHeadPhase','processStartTag', {'name':'p','type':'StartTag'}),`
	`75`	`+ ('dataState','InBodyPhase','InBodyPhase','processStartTag', {'name':'p','type':'StartTag'}),`
	`76`	`+ ('dataState','InBodyPhase','InBodyPhase','processCharacters', {'type':'Characters'}),`
	`77`	`+ ('dataState','InBodyPhase','InBodyPhase','processStartTag', {'name':'script','type':'StartTag'}),`
	`78`	`+ ('dataState','InBodyPhase','InHeadPhase','processStartTag', {'name':'script','type':'StartTag'}),`
	`79`	`+ ('scriptDataState','TextPhase','TextPhase','processCharacters', {'type':'Characters'}),`
	`80`	`+ ('dataState','TextPhase','TextPhase','processEndTag', {'name':'script','type':'EndTag'}),`
	`81`	`+ ('dataState','InBodyPhase','InBodyPhase','processCharacters', {'type':'Characters'}),`
	`82`	`+ ('dataState','InBodyPhase','InBodyPhase','processEndTag', {'name':'p','type':'EndTag'}),`
	`83`	`+ ('dataState','InBodyPhase','InBodyPhase','processCharacters', {'type':'Characters'})]`
	`84`	`+`
	`85`	`+ifPY2:`
	`86`	`+fori,loginenumerate(expected):`
	`87`	`+log= [x.encode("ascii")ifisinstance(x,text_type)elsexforxinlog]`
	`88`	`+expected[i]=tuple(log)`
	`89`	`+`
	`90`	`+assertparser.log==expected`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit5288737

File tree

8 files changed

8 files changed

`‎README.rst‎`

`‎html5lib/constants.py‎`

`‎html5lib/html5parser.py‎`

`‎html5lib/tests/test_parser2.py‎`

0 commit comments