11from __future__import absolute_import ,division ,unicode_literals
22
3+ import codecs
34import json
45import warnings
56import re
67
8+ import pytest
79from six import unichr
810
9- from .support import get_data_files
10-
1111from html5lib .tokenizer import HTMLTokenizer
1212from html5lib import constants ,utils
1313
@@ -172,27 +172,6 @@ def repl(m):
172172return test
173173
174174
175- def runTokenizerTest (test ):
176- warnings .resetwarnings ()
177- warnings .simplefilter ("error" )
178-
179- expected = test ['output' ]
180- if 'lastStartTag' not in test :
181- test ['lastStartTag' ]= None
182- parser = TokenizerTestParser (test ['initialState' ],
183- test ['lastStartTag' ])
184- tokens = parser .parse (test ['input' ])
185- received = normalizeTokens (tokens )
186- errorMsg = "\n " .join (["\n \n Initial state:" ,
187- test ['initialState' ],
188- "\n Input:" ,test ['input' ],
189- "\n Expected:" ,repr (expected ),
190- "\n received:" ,repr (tokens )])
191- errorMsg = errorMsg
192- ignoreErrorOrder = test .get ('ignoreErrorOrder' ,False )
193- assert tokensMatch (expected ,received ,ignoreErrorOrder ,True ),errorMsg
194-
195-
196175def _doCapitalize (match ):
197176return match .group (1 ).upper ()
198177
@@ -205,18 +184,68 @@ def capitalize(s):
205184return s
206185
207186
208- def testTokenizer ( ):
209- for filename in get_data_files ( 'tokenizer' , '*.test' ):
210- with open (filename )as fp :
187+ class TokenizerFile ( pytest . File ):
188+ def collect ( self ):
189+ with codecs . open (str ( self . fspath ), "r" , encoding = "utf-8" )as fp :
211190tests = json .load (fp )
212- if 'tests' in tests :
213- for index ,test in enumerate (tests ['tests' ]):
214- if 'initialStates' not in test :
215- test ["initialStates" ]= ["Data state" ]
216- if 'doubleEscaped' in test :
217- test = unescape (test )
218- if test ["input" ]is None :
219- continue # Not valid input for this platform
220- for initialState in test ["initialStates" ]:
221- test ["initialState" ]= capitalize (initialState )
222- yield runTokenizerTest ,test
191+ if 'tests' in tests :
192+ for i ,test in enumerate (tests ['tests' ]):
193+ yield TokenizerTestCollector (str (i ),self ,testdata = test )
194+
195+
196+ class TokenizerTestCollector (pytest .Collector ):
197+ def __init__ (self ,name ,parent = None ,config = None ,session = None ,testdata = None ):
198+ super (TokenizerTestCollector ,self ).__init__ (name ,parent ,config ,session )
199+ if 'initialStates' not in testdata :
200+ testdata ["initialStates" ]= ["Data state" ]
201+ if 'doubleEscaped' in testdata :
202+ testdata = unescape (testdata )
203+ self .testdata = testdata
204+
205+ def collect (self ):
206+ for initialState in self .testdata ["initialStates" ]:
207+ initialState = capitalize (initialState )
208+ item = TokenizerTest (initialState ,
209+ self ,
210+ self .testdata ,
211+ initialState )
212+ if self .testdata ["input" ]is None :
213+ item .add_marker (pytest .mark .skipif (True ,reason = "Relies on lone surrogates" ))
214+ yield item
215+
216+
217+ class TokenizerTest (pytest .Item ):
218+ def __init__ (self ,name ,parent ,test ,initialState ):
219+ super (TokenizerTest ,self ).__init__ (name ,parent )
220+ self .obj = lambda :1 # this is to hack around skipif needing a function!
221+ self .test = test
222+ self .initialState = initialState
223+
224+ def runtest (self ):
225+ warnings .resetwarnings ()
226+ warnings .simplefilter ("error" )
227+
228+ expected = self .test ['output' ]
229+ if 'lastStartTag' not in self .test :
230+ self .test ['lastStartTag' ]= None
231+ parser = TokenizerTestParser (self .initialState ,
232+ self .test ['lastStartTag' ])
233+ tokens = parser .parse (self .test ['input' ])
234+ received = normalizeTokens (tokens )
235+ errorMsg = "\n " .join (["\n \n Initial state:" ,
236+ self .initialState ,
237+ "\n Input:" ,self .test ['input' ],
238+ "\n Expected:" ,repr (expected ),
239+ "\n received:" ,repr (tokens )])
240+ errorMsg = errorMsg
241+ ignoreErrorOrder = self .test .get ('ignoreErrorOrder' ,False )
242+ assert tokensMatch (expected ,received ,ignoreErrorOrder ,True ),errorMsg
243+
244+ def repr_failure (self ,excinfo ):
245+ traceback = excinfo .traceback
246+ ntraceback = traceback .cut (path = __file__ )
247+ excinfo .traceback = ntraceback .filter ()
248+
249+ return excinfo .getrepr (funcargs = True ,
250+ showlocals = False ,
251+ style = "short" ,tbfilter = False )