Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd8d5bb6

Browse files
committed
Remove ability to use a custom tokenizer
This should be unneeded since the sanitizer changes (#110)
1 parente65bee9 commitd8d5bb6

File tree

1 file changed

+4
-10
lines changed

1 file changed

+4
-10
lines changed

‎html5lib/html5parser.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,13 @@ class HTMLParser(object):
5959
"""HTML parser. Generates a tree structure from a stream of (possibly
6060
malformed) HTML"""
6161

62-
def__init__(self,tree=None,tokenizer=tokenizer.HTMLTokenizer,
63-
strict=False,namespaceHTMLElements=True,debug=False):
62+
def__init__(self,tree=None,strict=False,namespaceHTMLElements=True,debug=False):
6463
"""
6564
strict - raise an exception when a parse error is encountered
6665
6766
tree - a treebuilder class controlling the type of tree that will be
6867
returned. Built in treebuilders can be accessed through
6968
html5lib.treebuilders.getTreeBuilder(treeType)
70-
71-
tokenizer - a class that provides a stream of tokens to the treebuilder.
72-
This may be replaced for e.g. a sanitizer which converts some tags to
73-
text
7469
"""
7570

7671
# Raise an exception on the first error encountered
@@ -79,7 +74,6 @@ def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
7974
iftreeisNone:
8075
tree=treebuilders.getTreeBuilder("etree")
8176
self.tree=tree(namespaceHTMLElements)
82-
self.tokenizer_class=tokenizer
8377
self.errors= []
8478

8579
self.phases=dict([(name,cls(self,self.tree))forname,clsin
@@ -91,9 +85,9 @@ def _parse(self, stream, innerHTML=False, container="div", encoding=None,
9185
self.innerHTMLMode=innerHTML
9286
self.container=container
9387
self.scripting=scripting
94-
self.tokenizer=self.tokenizer_class(stream,encoding=encoding,
95-
useChardet=useChardet,
96-
parser=self,**kwargs)
88+
self.tokenizer=tokenizer.HTMLTokenizer(stream,encoding=encoding,
89+
useChardet=useChardet,
90+
parser=self,**kwargs)
9791
self.reset()
9892

9993
try:

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp