11from __future__import absolute_import ,division ,unicode_literals
2- from six import with_metaclass
2+ from six import with_metaclass , viewkeys , PY3
33
44import types
55
6+ try :
7+ from collections import OrderedDict
8+ except ImportError :
9+ from ordereddict import OrderedDict
10+
611from .import inputstream
712from .import tokenizer
813
914from .import treebuilders
1015from .treebuilders ._base import Marker
1116
1217from .import utils
13- from .import constants
14- from .constants import spaceCharacters ,asciiUpper2Lower
15- from .constants import specialElements
16- from .constants import headingElements
17- from .constants import cdataElements ,rcdataElements
18- from .constants import tokenTypes ,ReparseException ,namespaces
19- from .constants import htmlIntegrationPointElements ,mathmlTextIntegrationPointElements
20- from .constants import adjustForeignAttributes as adjustForeignAttributesMap
21- from .constants import E
18+ from .constants import (
19+ spaceCharacters ,asciiUpper2Lower ,
20+ specialElements ,headingElements ,cdataElements ,rcdataElements ,
21+ tokenTypes ,tagTokenTypes ,
22+ namespaces ,
23+ htmlIntegrationPointElements ,mathmlTextIntegrationPointElements ,
24+ adjustForeignAttributes as adjustForeignAttributesMap ,
25+ adjustMathMLAttributes ,adjustSVGAttributes ,
26+ E ,
27+ ReparseException
28+ )
2229
2330
2431def parse (doc ,treebuilder = "etree" ,encoding = None ,
@@ -272,96 +279,18 @@ def normalizeToken(self, token):
272279""" HTML5 specific normalizations to the token stream """
273280
274281if token ["type" ]== tokenTypes ["StartTag" ]:
275- token ["data" ]= dict (token [" data" ][::- 1 ])
282+ token ["data" ]= OrderedDict (token [' data' ][::- 1 ])
276283
277284return token
278285
279286def adjustMathMLAttributes (self ,token ):
280- replacements = {"definitionurl" :"definitionURL" }
281- for k ,v in replacements .items ():
282- if k in token ["data" ]:
283- token ["data" ][v ]= token ["data" ][k ]
284- del token ["data" ][k ]
287+ adjust_attributes (token ,adjustMathMLAttributes )
285288
286289def adjustSVGAttributes (self ,token ):
287- replacements = {
288- "attributename" :"attributeName" ,
289- "attributetype" :"attributeType" ,
290- "basefrequency" :"baseFrequency" ,
291- "baseprofile" :"baseProfile" ,
292- "calcmode" :"calcMode" ,
293- "clippathunits" :"clipPathUnits" ,
294- "contentscripttype" :"contentScriptType" ,
295- "contentstyletype" :"contentStyleType" ,
296- "diffuseconstant" :"diffuseConstant" ,
297- "edgemode" :"edgeMode" ,
298- "externalresourcesrequired" :"externalResourcesRequired" ,
299- "filterres" :"filterRes" ,
300- "filterunits" :"filterUnits" ,
301- "glyphref" :"glyphRef" ,
302- "gradienttransform" :"gradientTransform" ,
303- "gradientunits" :"gradientUnits" ,
304- "kernelmatrix" :"kernelMatrix" ,
305- "kernelunitlength" :"kernelUnitLength" ,
306- "keypoints" :"keyPoints" ,
307- "keysplines" :"keySplines" ,
308- "keytimes" :"keyTimes" ,
309- "lengthadjust" :"lengthAdjust" ,
310- "limitingconeangle" :"limitingConeAngle" ,
311- "markerheight" :"markerHeight" ,
312- "markerunits" :"markerUnits" ,
313- "markerwidth" :"markerWidth" ,
314- "maskcontentunits" :"maskContentUnits" ,
315- "maskunits" :"maskUnits" ,
316- "numoctaves" :"numOctaves" ,
317- "pathlength" :"pathLength" ,
318- "patterncontentunits" :"patternContentUnits" ,
319- "patterntransform" :"patternTransform" ,
320- "patternunits" :"patternUnits" ,
321- "pointsatx" :"pointsAtX" ,
322- "pointsaty" :"pointsAtY" ,
323- "pointsatz" :"pointsAtZ" ,
324- "preservealpha" :"preserveAlpha" ,
325- "preserveaspectratio" :"preserveAspectRatio" ,
326- "primitiveunits" :"primitiveUnits" ,
327- "refx" :"refX" ,
328- "refy" :"refY" ,
329- "repeatcount" :"repeatCount" ,
330- "repeatdur" :"repeatDur" ,
331- "requiredextensions" :"requiredExtensions" ,
332- "requiredfeatures" :"requiredFeatures" ,
333- "specularconstant" :"specularConstant" ,
334- "specularexponent" :"specularExponent" ,
335- "spreadmethod" :"spreadMethod" ,
336- "startoffset" :"startOffset" ,
337- "stddeviation" :"stdDeviation" ,
338- "stitchtiles" :"stitchTiles" ,
339- "surfacescale" :"surfaceScale" ,
340- "systemlanguage" :"systemLanguage" ,
341- "tablevalues" :"tableValues" ,
342- "targetx" :"targetX" ,
343- "targety" :"targetY" ,
344- "textlength" :"textLength" ,
345- "viewbox" :"viewBox" ,
346- "viewtarget" :"viewTarget" ,
347- "xchannelselector" :"xChannelSelector" ,
348- "ychannelselector" :"yChannelSelector" ,
349- "zoomandpan" :"zoomAndPan"
350- }
351- for originalName in list (token ["data" ].keys ()):
352- if originalName in replacements :
353- svgName = replacements [originalName ]
354- token ["data" ][svgName ]= token ["data" ][originalName ]
355- del token ["data" ][originalName ]
290+ adjust_attributes (token ,adjustSVGAttributes )
356291
357292def adjustForeignAttributes (self ,token ):
358- replacements = adjustForeignAttributesMap
359-
360- for originalName in token ["data" ].keys ():
361- if originalName in replacements :
362- foreignName = replacements [originalName ]
363- token ["data" ][foreignName ]= token ["data" ][originalName ]
364- del token ["data" ][originalName ]
293+ adjust_attributes (token ,adjustForeignAttributesMap )
365294
366295def reparseTokenNormal (self ,token ):
367296# pylint:disable=unused-argument
@@ -434,7 +363,7 @@ def getPhases(debug):
434363def log (function ):
435364"""Logger that records which phase processes each token"""
436365type_names = dict ((value ,key )for key ,value in
437- constants . tokenTypes .items ())
366+ tokenTypes .items ())
438367
439368def wrapped (self ,* args ,** kwargs ):
440369if function .__name__ .startswith ("process" )and len (args )> 0 :
@@ -443,7 +372,7 @@ def wrapped(self, *args, **kwargs):
443372info = {"type" :type_names [token ['type' ]]}
444373except :
445374raise
446- if token ['type' ]in constants . tagTokenTypes :
375+ if token ['type' ]in tagTokenTypes :
447376info ["name" ]= token ['name' ]
448377
449378self .parser .log .append ((self .parser .tokenizer .state .__name__ ,
@@ -1022,17 +951,9 @@ def __init__(self, parser, tree):
1022951self .endTagHandler .default = self .endTagOther
1023952
1024953def isMatchingFormattingElement (self ,node1 ,node2 ):
1025- if node1 .name != node2 .name or node1 .namespace != node2 .namespace :
1026- return False
1027- elif len (node1 .attributes )!= len (node2 .attributes ):
1028- return False
1029- else :
1030- attributes1 = sorted (node1 .attributes .items ())
1031- attributes2 = sorted (node2 .attributes .items ())
1032- for attr1 ,attr2 in zip (attributes1 ,attributes2 ):
1033- if attr1 != attr2 :
1034- return False
1035- return True
954+ return (node1 .name == node2 .name and
955+ node1 .namespace == node2 .namespace and
956+ node1 .attributes == node2 .attributes )
1036957
1037958# helper
1038959def addFormattingElement (self ,token ):
@@ -2798,6 +2719,16 @@ def processEndTag(self, token):
27982719 }
27992720
28002721
2722+ def adjust_attributes (token ,replacements ):
2723+ if PY3 or utils .PY27 :
2724+ needs_adjustment = viewkeys (token ['data' ])& viewkeys (replacements )
2725+ else :
2726+ needs_adjustment = frozenset (token ['data' ])& frozenset (replacements )
2727+ if needs_adjustment :
2728+ token ['data' ]= OrderedDict ((replacements .get (k ,k ),v )
2729+ for k ,v in token ['data' ].items ())
2730+
2731+
28012732def impliedTagToken (name ,type = "EndTag" ,attributes = None ,
28022733selfClosing = False ):
28032734if attributes is None :