11from .import support
22import unittest ,codecs
33
4- from html5lib .inputstream import HTMLInputStream
4+ from html5lib .inputstream import HTMLInputStream , HTMLUnicodeInputStream , HTMLBinaryInputStream
55
6- class HTMLInputStreamShortChunk (HTMLInputStream ):
6+ class HTMLUnicodeInputStreamShortChunk (HTMLUnicodeInputStream ):
7+ _defaultChunkSize = 2
8+
9+ class HTMLBinaryInputStreamShortChunk (HTMLBinaryInputStream ):
710_defaultChunkSize = 2
811
912class HTMLInputStreamTest (unittest .TestCase ):
1013
1114def test_char_ascii (self ):
12- stream = HTMLInputStream ("'" ,encoding = 'ascii' )
15+ stream = HTMLInputStream (b "'" ,encoding = 'ascii' )
1316self .assertEquals (stream .charEncoding [0 ],'ascii' )
1417self .assertEquals (stream .char (),"'" )
1518
16- def test_char_null (self ):
17- stream = HTMLInputStream ("\x00 " )
18- self .assertEquals (stream .char (),'\ufffd ' )
19-
2019def test_char_utf8 (self ):
2120stream = HTMLInputStream ('\u2018 ' .encode ('utf-8' ),encoding = 'utf-8' )
2221self .assertEquals (stream .charEncoding [0 ],'utf-8' )
@@ -30,7 +29,7 @@ def test_char_win1252(self):
3029self .assertEquals (stream .char (),"\u2019 " )
3130
3231def test_bom (self ):
33- stream = HTMLInputStream (codecs .BOM_UTF8 + "'" )
32+ stream = HTMLInputStream (codecs .BOM_UTF8 + b "'" )
3433self .assertEquals (stream .charEncoding [0 ],'utf-8' )
3534self .assertEquals (stream .char (),"'" )
3635
@@ -40,7 +39,7 @@ def test_utf_16(self):
4039self .assertEquals (len (stream .charsUntil (' ' ,True )),1025 )
4140
4241def test_newlines (self ):
43- stream = HTMLInputStreamShortChunk (codecs .BOM_UTF8 + "a\n bb\r \n ccc\r ddddxe" )
42+ stream = HTMLBinaryInputStreamShortChunk (codecs .BOM_UTF8 + b "a\n bb\r \n ccc\r ddddxe" )
4443self .assertEquals (stream .position (), (1 ,0 ))
4544self .assertEquals (stream .charsUntil ('c' ),"a\n bb\n " )
4645self .assertEquals (stream .position (), (3 ,0 ))
@@ -50,12 +49,12 @@ def test_newlines(self):
5049self .assertEquals (stream .position (), (4 ,5 ))
5150
5251def test_newlines2 (self ):
53- size = HTMLInputStream ._defaultChunkSize
52+ size = HTMLUnicodeInputStream ._defaultChunkSize
5453stream = HTMLInputStream ("\r " * size + "\n " )
5554self .assertEquals (stream .charsUntil ('x' ),"\n " * size )
5655
5756def test_position (self ):
58- stream = HTMLInputStreamShortChunk (codecs .BOM_UTF8 + "a\n bb\n ccc\n ddde\n f\n gh" )
57+ stream = HTMLBinaryInputStreamShortChunk (codecs .BOM_UTF8 + b "a\n bb\n ccc\n ddde\n f\n gh" )
5958self .assertEquals (stream .position (), (1 ,0 ))
6059self .assertEquals (stream .charsUntil ('c' ),"a\n bb\n " )
6160self .assertEquals (stream .position (), (3 ,0 ))
@@ -73,7 +72,7 @@ def test_position(self):
7372self .assertEquals (stream .position (), (6 ,1 ))
7473
7574def test_position2 (self ):
76- stream = HTMLInputStreamShortChunk ("abc\n d" )
75+ stream = HTMLUnicodeInputStreamShortChunk ("abc\n d" )
7776self .assertEquals (stream .position (), (1 ,0 ))
7877self .assertEquals (stream .char (),"a" )
7978self .assertEquals (stream .position (), (1 ,1 ))