@@ -23,42 +23,30 @@ def test_codec_name_c(self):
2323def test_codec_name_d (self ):
2424self .assertEqual (inputstream .codecName ("ISO_8859--1" ),"windows-1252" )
2525
26- def buildTestSuite ():
26+ def runEncodingTest (data ,encoding ):
27+ p = HTMLParser ()
28+ t = p .parse (data ,useChardet = False )
29+ encoding = encoding .lower ().decode ("ascii" )
30+
31+ errorMessage = ("Input:\n %s\n Expected:\n %s\n Recieved\n %s\n " %
32+ (data ,repr (encoding ),
33+ repr (p .tokenizer .stream .charEncoding [0 ])))
34+ assert encoding == p .tokenizer .stream .charEncoding [0 ],errorMessage
35+
36+ def test_encoding ():
2737for filename in get_data_files ("encoding" ):
2838test_name = os .path .basename (filename ).replace ('.dat' ,'' ). \
2939replace ('-' ,'' )
30- tests = TestData (filename ,"data" )
40+ tests = TestData (filename ,b "data", encoding = None )
3141for idx ,test in enumerate (tests ):
32- def encodingTest (self ,data = test ['data' ],
33- encoding = test ['encoding' ]):
34- p = HTMLParser ()
35- t = p .parse (data ,useChardet = False )
36-
37- errorMessage = ("Input:\n %s\n Expected:\n %s\n Recieved\n %s\n " %
38- (data ,repr (encoding .lower ()),
39- repr (p .tokenizer .stream .charEncoding )))
40- self .assertEquals (encoding .lower (),
41- p .tokenizer .stream .charEncoding [0 ],
42- errorMessage )
43- setattr (Html5EncodingTestCase ,'test_%s_%d' % (test_name ,idx + 1 ),
44- encodingTest )
45-
46- try :
47- import chardet
48- def test_chardet (self ):
49- data = open (os .path .join (test_dir ,"encoding" ,"chardet" ,"test_big5.txt" )).read ()
50- encoding = inputstream .HTMLInputStream (data ).charEncoding
51- assert encoding [0 ].lower ()== "big5"
52- setattr (Html5EncodingTestCase ,'test_chardet' ,test_chardet )
53- except ImportError :
54- print ("chardet not found, skipping chardet tests" )
55-
42+ yield (runEncodingTest ,test [b'data' ],test [b'encoding' ])
5643
57- return unittest .defaultTestLoader .loadTestsFromName (__name__ )
58-
59- def main ():
60- buildTestSuite ()
61- unittest .main ()
62-
63- if __name__ == "__main__" :
64- main ()
44+ try :
45+ import chardet
46+ def test_chardet (self ):
47+ data = open (os .path .join (test_dir ,"encoding" ,"chardet" ,"test_big5.txt" )).read ()
48+ encoding = inputstream .HTMLInputStream (data ).charEncoding
49+ assert encoding [0 ].lower ()== "big5"
50+ setattr (Html5EncodingTestCase ,'test_chardet' ,test_chardet )
51+ except ImportError :
52+ print ("chardet not found, skipping chardet tests" )