@@ -59,18 +59,13 @@ class HTMLParser(object):
5959"""HTML parser. Generates a tree structure from a stream of (possibly
6060 malformed) HTML"""
6161
62- def __init__ (self ,tree = None ,tokenizer = tokenizer .HTMLTokenizer ,
63- strict = False ,namespaceHTMLElements = True ,debug = False ):
62+ def __init__ (self ,tree = None ,strict = False ,namespaceHTMLElements = True ,debug = False ):
6463"""
6564 strict - raise an exception when a parse error is encountered
6665
6766 tree - a treebuilder class controlling the type of tree that will be
6867 returned. Built in treebuilders can be accessed through
6968 html5lib.treebuilders.getTreeBuilder(treeType)
70-
71- tokenizer - a class that provides a stream of tokens to the treebuilder.
72- This may be replaced for e.g. a sanitizer which converts some tags to
73- text
7469 """
7570
7671# Raise an exception on the first error encountered
@@ -79,7 +74,6 @@ def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
7974if tree is None :
8075tree = treebuilders .getTreeBuilder ("etree" )
8176self .tree = tree (namespaceHTMLElements )
82- self .tokenizer_class = tokenizer
8377self .errors = []
8478
8579self .phases = dict ([(name ,cls (self ,self .tree ))for name ,cls in
@@ -91,9 +85,9 @@ def _parse(self, stream, innerHTML=False, container="div", encoding=None,
9185self .innerHTMLMode = innerHTML
9286self .container = container
9387self .scripting = scripting
94- self .tokenizer = self . tokenizer_class (stream ,encoding = encoding ,
95- useChardet = useChardet ,
96- parser = self ,** kwargs )
88+ self .tokenizer = tokenizer . HTMLTokenizer (stream ,encoding = encoding ,
89+ useChardet = useChardet ,
90+ parser = self ,** kwargs )
9791self .reset ()
9892
9993try :