99from optparse import OptionParser
1010
1111from html5lib import html5parser
12- from html5lib .tokenizer import HTMLTokenizer
1312from html5lib import treebuilders ,serializer ,treewalkers
1413from html5lib import constants
1514from html5lib import utils
@@ -53,9 +52,7 @@ def parse():
5352
5453treebuilder = treebuilders .getTreeBuilder (opts .treebuilder )
5554
56- tokenizer = HTMLTokenizer
57-
58- p = html5parser .HTMLParser (tree = treebuilder ,tokenizer = tokenizer ,debug = opts .log )
55+ p = html5parser .HTMLParser (tree = treebuilder ,debug = opts .log )
5956
6057if opts .fragment :
6158parseMethod = p .parseFragment
@@ -96,7 +93,7 @@ def parse():
9693
9794def run (parseMethod ,f ,encoding ,scripting ):
9895try :
99- document = parseMethod (f ,encoding = encoding ,scripting = scripting )
96+ document = parseMethod (f ,override_encoding = encoding ,scripting = scripting )
10097except :
10198document = None
10299traceback .print_exc ()
@@ -117,16 +114,14 @@ def printOutput(parser, document, opts):
117114document .writexml (sys .stdout ,encoding = "utf-8" )
118115elif tb == "lxml" :
119116import lxml .etree
120- sys .stdout .write (lxml .etree .tostring (document ))
117+ sys .stdout .write (lxml .etree .tostring (document , encoding = "unicode" ))
121118elif tb == "etree" :
122- sys .stdout .write (utils .default_etree .tostring (document ))
119+ sys .stdout .write (utils .default_etree .tostring (document , encoding = "unicode" ))
123120elif opts .tree :
124121if not hasattr (document ,'__getitem__' ):
125122document = [document ]
126123for fragment in document :
127124print (parser .tree .testSerializer (fragment ))
128- elif opts .hilite :
129- sys .stdout .write (document .hilite ("utf-8" ))
130125elif opts .html :
131126kwargs = {}
132127for opt in serializer .HTMLSerializer .options :
@@ -188,9 +183,6 @@ def getOptParser():
188183parser .add_option ("" ,"--no-html" ,action = "store_false" ,default = True ,
189184dest = "html" ,help = "Don't output html" )
190185
191- parser .add_option ("" ,"--hilite" ,action = "store_true" ,default = False ,
192- dest = "hilite" ,help = "Output as formatted highlighted code." )
193-
194186parser .add_option ("-c" ,"--encoding" ,action = "store_true" ,default = False ,
195187dest = "encoding" ,help = "Print character encoding used" )
196188