1
1
from __future__import absolute_import ,division ,unicode_literals
2
- from six import with_metaclass
2
+ from six import with_metaclass , viewkeys , PY3
3
3
4
4
import types
5
5
6
+ try :
7
+ from collections import OrderedDict
8
+ except ImportError :
9
+ from ordereddict import OrderedDict
10
+
6
11
from .import inputstream
7
12
from .import tokenizer
8
13
9
14
from .import treebuilders
10
15
from .treebuilders ._base import Marker
11
16
12
17
from .import utils
13
- from .import constants
14
- from .constants import spaceCharacters ,asciiUpper2Lower
15
- from .constants import specialElements
16
- from .constants import headingElements
17
- from .constants import cdataElements ,rcdataElements
18
- from .constants import tokenTypes ,ReparseException ,namespaces
19
- from .constants import htmlIntegrationPointElements ,mathmlTextIntegrationPointElements
20
- from .constants import adjustForeignAttributes as adjustForeignAttributesMap
21
- from .constants import E
18
+ from .constants import (
19
+ spaceCharacters ,asciiUpper2Lower ,
20
+ specialElements ,headingElements ,cdataElements ,rcdataElements ,
21
+ tokenTypes ,tagTokenTypes ,
22
+ namespaces ,
23
+ htmlIntegrationPointElements ,mathmlTextIntegrationPointElements ,
24
+ adjustForeignAttributes as adjustForeignAttributesMap ,
25
+ adjustMathMLAttributes ,adjustSVGAttributes ,
26
+ E ,
27
+ ReparseException
28
+ )
22
29
23
30
24
31
def parse (doc ,treebuilder = "etree" ,encoding = None ,
@@ -272,96 +279,18 @@ def normalizeToken(self, token):
272
279
""" HTML5 specific normalizations to the token stream """
273
280
274
281
if token ["type" ]== tokenTypes ["StartTag" ]:
275
- token ["data" ]= dict (token [" data" ][::- 1 ])
282
+ token ["data" ]= OrderedDict (token [' data' ][::- 1 ])
276
283
277
284
return token
278
285
279
286
def adjustMathMLAttributes (self ,token ):
280
- replacements = {"definitionurl" :"definitionURL" }
281
- for k ,v in replacements .items ():
282
- if k in token ["data" ]:
283
- token ["data" ][v ]= token ["data" ][k ]
284
- del token ["data" ][k ]
287
+ adjust_attributes (token ,adjustMathMLAttributes )
285
288
286
289
def adjustSVGAttributes (self ,token ):
287
- replacements = {
288
- "attributename" :"attributeName" ,
289
- "attributetype" :"attributeType" ,
290
- "basefrequency" :"baseFrequency" ,
291
- "baseprofile" :"baseProfile" ,
292
- "calcmode" :"calcMode" ,
293
- "clippathunits" :"clipPathUnits" ,
294
- "contentscripttype" :"contentScriptType" ,
295
- "contentstyletype" :"contentStyleType" ,
296
- "diffuseconstant" :"diffuseConstant" ,
297
- "edgemode" :"edgeMode" ,
298
- "externalresourcesrequired" :"externalResourcesRequired" ,
299
- "filterres" :"filterRes" ,
300
- "filterunits" :"filterUnits" ,
301
- "glyphref" :"glyphRef" ,
302
- "gradienttransform" :"gradientTransform" ,
303
- "gradientunits" :"gradientUnits" ,
304
- "kernelmatrix" :"kernelMatrix" ,
305
- "kernelunitlength" :"kernelUnitLength" ,
306
- "keypoints" :"keyPoints" ,
307
- "keysplines" :"keySplines" ,
308
- "keytimes" :"keyTimes" ,
309
- "lengthadjust" :"lengthAdjust" ,
310
- "limitingconeangle" :"limitingConeAngle" ,
311
- "markerheight" :"markerHeight" ,
312
- "markerunits" :"markerUnits" ,
313
- "markerwidth" :"markerWidth" ,
314
- "maskcontentunits" :"maskContentUnits" ,
315
- "maskunits" :"maskUnits" ,
316
- "numoctaves" :"numOctaves" ,
317
- "pathlength" :"pathLength" ,
318
- "patterncontentunits" :"patternContentUnits" ,
319
- "patterntransform" :"patternTransform" ,
320
- "patternunits" :"patternUnits" ,
321
- "pointsatx" :"pointsAtX" ,
322
- "pointsaty" :"pointsAtY" ,
323
- "pointsatz" :"pointsAtZ" ,
324
- "preservealpha" :"preserveAlpha" ,
325
- "preserveaspectratio" :"preserveAspectRatio" ,
326
- "primitiveunits" :"primitiveUnits" ,
327
- "refx" :"refX" ,
328
- "refy" :"refY" ,
329
- "repeatcount" :"repeatCount" ,
330
- "repeatdur" :"repeatDur" ,
331
- "requiredextensions" :"requiredExtensions" ,
332
- "requiredfeatures" :"requiredFeatures" ,
333
- "specularconstant" :"specularConstant" ,
334
- "specularexponent" :"specularExponent" ,
335
- "spreadmethod" :"spreadMethod" ,
336
- "startoffset" :"startOffset" ,
337
- "stddeviation" :"stdDeviation" ,
338
- "stitchtiles" :"stitchTiles" ,
339
- "surfacescale" :"surfaceScale" ,
340
- "systemlanguage" :"systemLanguage" ,
341
- "tablevalues" :"tableValues" ,
342
- "targetx" :"targetX" ,
343
- "targety" :"targetY" ,
344
- "textlength" :"textLength" ,
345
- "viewbox" :"viewBox" ,
346
- "viewtarget" :"viewTarget" ,
347
- "xchannelselector" :"xChannelSelector" ,
348
- "ychannelselector" :"yChannelSelector" ,
349
- "zoomandpan" :"zoomAndPan"
350
- }
351
- for originalName in list (token ["data" ].keys ()):
352
- if originalName in replacements :
353
- svgName = replacements [originalName ]
354
- token ["data" ][svgName ]= token ["data" ][originalName ]
355
- del token ["data" ][originalName ]
290
+ adjust_attributes (token ,adjustSVGAttributes )
356
291
357
292
def adjustForeignAttributes (self ,token ):
358
- replacements = adjustForeignAttributesMap
359
-
360
- for originalName in token ["data" ].keys ():
361
- if originalName in replacements :
362
- foreignName = replacements [originalName ]
363
- token ["data" ][foreignName ]= token ["data" ][originalName ]
364
- del token ["data" ][originalName ]
293
+ adjust_attributes (token ,adjustForeignAttributesMap )
365
294
366
295
def reparseTokenNormal (self ,token ):
367
296
# pylint:disable=unused-argument
@@ -434,7 +363,7 @@ def getPhases(debug):
434
363
def log (function ):
435
364
"""Logger that records which phase processes each token"""
436
365
type_names = dict ((value ,key )for key ,value in
437
- constants . tokenTypes .items ())
366
+ tokenTypes .items ())
438
367
439
368
def wrapped (self ,* args ,** kwargs ):
440
369
if function .__name__ .startswith ("process" )and len (args )> 0 :
@@ -443,7 +372,7 @@ def wrapped(self, *args, **kwargs):
443
372
info = {"type" :type_names [token ['type' ]]}
444
373
except :
445
374
raise
446
- if token ['type' ]in constants . tagTokenTypes :
375
+ if token ['type' ]in tagTokenTypes :
447
376
info ["name" ]= token ['name' ]
448
377
449
378
self .parser .log .append ((self .parser .tokenizer .state .__name__ ,
@@ -1022,17 +951,9 @@ def __init__(self, parser, tree):
1022
951
self .endTagHandler .default = self .endTagOther
1023
952
1024
953
def isMatchingFormattingElement (self ,node1 ,node2 ):
1025
- if node1 .name != node2 .name or node1 .namespace != node2 .namespace :
1026
- return False
1027
- elif len (node1 .attributes )!= len (node2 .attributes ):
1028
- return False
1029
- else :
1030
- attributes1 = sorted (node1 .attributes .items ())
1031
- attributes2 = sorted (node2 .attributes .items ())
1032
- for attr1 ,attr2 in zip (attributes1 ,attributes2 ):
1033
- if attr1 != attr2 :
1034
- return False
1035
- return True
954
+ return (node1 .name == node2 .name and
955
+ node1 .namespace == node2 .namespace and
956
+ node1 .attributes == node2 .attributes )
1036
957
1037
958
# helper
1038
959
def addFormattingElement (self ,token ):
@@ -2798,6 +2719,16 @@ def processEndTag(self, token):
2798
2719
}
2799
2720
2800
2721
2722
+ def adjust_attributes (token ,replacements ):
2723
+ if PY3 or utils .PY27 :
2724
+ needs_adjustment = viewkeys (token ['data' ])& viewkeys (replacements )
2725
+ else :
2726
+ needs_adjustment = frozenset (token ['data' ])& frozenset (replacements )
2727
+ if needs_adjustment :
2728
+ token ['data' ]= OrderedDict ((replacements .get (k ,k ),v )
2729
+ for k ,v in token ['data' ].items ())
2730
+
2731
+
2801
2732
def impliedTagToken (name ,type = "EndTag" ,attributes = None ,
2802
2733
selfClosing = False ):
2803
2734
if attributes is None :