1
1
from __future__import absolute_import ,division ,unicode_literals
2
2
3
- import os
4
- import sys
5
- import unittest
6
- import warnings
7
- from difflib import unified_diff
3
+ import pytest
8
4
9
- try :
10
- unittest .TestCase .assertEqual
11
- except AttributeError :
12
- unittest .TestCase .assertEqual = unittest .TestCase .assertEquals
5
+ from .support import treeTypes
13
6
14
- from .support import get_data_files ,TestData ,convertExpected
15
-
16
- from html5lib import html5parser ,treewalkers ,treebuilders ,treeadapters ,constants
7
+ from html5lib import html5parser ,treewalkers
17
8
from html5lib .filters .lint import Filter as Lint
18
9
19
-
20
- treeTypes = {
21
- "DOM" : {"builder" :treebuilders .getTreeBuilder ("dom" ),
22
- "walker" :treewalkers .getTreeWalker ("dom" )},
23
- }
24
-
25
- # Try whatever etree implementations are available from a list that are
26
- #"supposed" to work
27
- try :
28
- import xml .etree .ElementTree as ElementTree
29
- except ImportError :
30
- pass
31
- else :
32
- treeTypes ['ElementTree' ]= \
33
- {"builder" :treebuilders .getTreeBuilder ("etree" ,ElementTree ,fullTree = True ),
34
- "walker" :treewalkers .getTreeWalker ("etree" ,ElementTree )}
35
-
36
- try :
37
- import xml .etree .cElementTree as ElementTree
38
- except ImportError :
39
- pass
40
- else :
41
- treeTypes ['cElementTree' ]= \
42
- {"builder" :treebuilders .getTreeBuilder ("etree" ,ElementTree ,fullTree = True ),
43
- "walker" :treewalkers .getTreeWalker ("etree" ,ElementTree )}
44
-
45
-
46
- try :
47
- import lxml .etree as ElementTree # flake8: noqa
48
- except ImportError :
49
- pass
50
- else :
51
- treeTypes ['lxml_native' ]= \
52
- {"builder" :treebuilders .getTreeBuilder ("lxml" ),
53
- "walker" :treewalkers .getTreeWalker ("lxml" )}
54
-
55
-
56
- try :
57
- import genshi # flake8: noqa
58
- except ImportError :
59
- pass
60
- else :
61
- treeTypes ["genshi" ]= \
62
- {"builder" :treebuilders .getTreeBuilder ("dom" ),
63
- "adapter" :lambda tree :treeadapters .genshi .to_genshi (treewalkers .getTreeWalker ("dom" )(tree )),
64
- "walker" :treewalkers .getTreeWalker ("genshi" )}
65
-
66
10
import re
67
11
attrlist = re .compile (r"^(\s+)\w+=.*(\n\1\w+=.*)+" ,re .M )
68
12
@@ -73,80 +17,29 @@ def sortattrs(x):
73
17
return "\n " .join (lines )
74
18
75
19
76
- class TokenTestCase (unittest .TestCase ):
77
- def test_all_tokens (self ):
78
- expected = [
79
- {'data' : {},'type' :'StartTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'html' },
80
- {'data' : {},'type' :'StartTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'head' },
81
- {'type' :'EndTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'head' },
82
- {'data' : {},'type' :'StartTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'body' },
83
- {'data' :'a' ,'type' :'Characters' },
84
- {'data' : {},'type' :'StartTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'div' },
85
- {'data' :'b' ,'type' :'Characters' },
86
- {'type' :'EndTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'div' },
87
- {'data' :'c' ,'type' :'Characters' },
88
- {'type' :'EndTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'body' },
89
- {'type' :'EndTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'html' }
90
- ]
91
- for treeName ,treeCls in sorted (treeTypes .items ()):
92
- p = html5parser .HTMLParser (tree = treeCls ["builder" ])
93
- document = p .parse ("<html><head></head><body>a<div>b</div>c</body></html>" )
94
- document = treeCls .get ("adapter" ,lambda x :x )(document )
95
- output = Lint (treeCls ["walker" ](document ))
96
- for expectedToken ,outputToken in zip (expected ,output ):
97
- self .assertEqual (expectedToken ,outputToken )
98
-
99
-
100
- def runTreewalkerTest (innerHTML ,input ,expected ,errors ,treeClass ):
101
- warnings .resetwarnings ()
102
- warnings .simplefilter ("error" )
103
- try :
104
- p = html5parser .HTMLParser (tree = treeClass ["builder" ])
105
- if innerHTML :
106
- document = p .parseFragment (input ,innerHTML )
107
- else :
108
- document = p .parse (input )
109
- except constants .DataLossWarning :
110
- # Ignore testcases we know we don't pass
111
- return
112
-
113
- document = treeClass .get ("adapter" ,lambda x :x )(document )
114
- try :
115
- output = treewalkers .pprint (Lint (treeClass ["walker" ](document )))
116
- output = attrlist .sub (sortattrs ,output )
117
- expected = attrlist .sub (sortattrs ,convertExpected (expected ))
118
- diff = "" .join (unified_diff ([line + "\n " for line in expected .splitlines ()],
119
- [line + "\n " for line in output .splitlines ()],
120
- "Expected" ,"Received" ))
121
- assert expected == output ,"\n " .join ([
122
- "" ,"Input:" ,input ,
123
- "" ,"Expected:" ,expected ,
124
- "" ,"Received:" ,output ,
125
- "" ,"Diff:" ,diff ,
126
- ])
127
- except NotImplementedError :
128
- pass # Amnesty for those that confess...
129
-
130
-
131
- def test_treewalker ():
132
- sys .stdout .write ('Testing tree walkers ' + " " .join (list (treeTypes .keys ()))+ "\n " )
133
-
20
+ def test_all_tokens ():
21
+ expected = [
22
+ {'data' : {},'type' :'StartTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'html' },
23
+ {'data' : {},'type' :'StartTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'head' },
24
+ {'type' :'EndTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'head' },
25
+ {'data' : {},'type' :'StartTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'body' },
26
+ {'data' :'a' ,'type' :'Characters' },
27
+ {'data' : {},'type' :'StartTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'div' },
28
+ {'data' :'b' ,'type' :'Characters' },
29
+ {'type' :'EndTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'div' },
30
+ {'data' :'c' ,'type' :'Characters' },
31
+ {'type' :'EndTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'body' },
32
+ {'type' :'EndTag' ,'namespace' :'http://www.w3.org/1999/xhtml' ,'name' :'html' }
33
+ ]
134
34
for treeName ,treeCls in sorted (treeTypes .items ()):
135
- files = get_data_files ('tree-construction' )
136
- for filename in files :
137
- testName = os .path .basename (filename ).replace (".dat" ,"" )
138
- if testName in ("template" ,):
139
- continue
140
-
141
- tests = TestData (filename ,"data" )
142
-
143
- for index ,test in enumerate (tests ):
144
- (input ,errors ,
145
- innerHTML ,expected )= [test [key ]for key in ("data" ,"errors" ,
146
- "document-fragment" ,
147
- "document" )]
148
- errors = errors .split ("\n " )
149
- yield runTreewalkerTest ,innerHTML ,input ,expected ,errors ,treeCls
35
+ if treeCls is None :
36
+ continue
37
+ p = html5parser .HTMLParser (tree = treeCls ["builder" ])
38
+ document = p .parse ("<html><head></head><body>a<div>b</div>c</body></html>" )
39
+ document = treeCls .get ("adapter" ,lambda x :x )(document )
40
+ output = Lint (treeCls ["walker" ](document ))
41
+ for expectedToken ,outputToken in zip (expected ,output ):
42
+ assert expectedToken == outputToken
150
43
151
44
152
45
def set_attribute_on_first_child (docfrag ,name ,value ,treeName ):
@@ -164,6 +57,8 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
164
57
def runTreewalkerEditTest (intext ,expected ,attrs_to_add ,tree ):
165
58
"""tests what happens when we add attributes to the intext"""
166
59
treeName ,treeClass = tree
60
+ if treeClass is None :
61
+ pytest .skip ("Treebuilder not loaded" )
167
62
parser = html5parser .HTMLParser (tree = treeClass ["builder" ])
168
63
document = parser .parseFragment (intext )
169
64
for nom ,val in attrs_to_add :
@@ -172,7 +67,7 @@ def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
172
67
document = treeClass .get ("adapter" ,lambda x :x )(document )
173
68
output = treewalkers .pprint (treeClass ["walker" ](document ))
174
69
output = attrlist .sub (sortattrs ,output )
175
- if not output in expected :
70
+ if output not in expected :
176
71
raise AssertionError ("TreewalkerEditTest: %s\n Expected:\n %s\n Received:\n %s" % (treeName ,expected ,output ))
177
72
178
73