Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit93365ea

Browse files
committed
Re-run 3to2, and fix where something had gone uncommitted in Python3 before.
1 parent5d925be commit93365ea

File tree

5 files changed

+181
-19
lines changed

5 files changed

+181
-19
lines changed

‎html5lib/tokenizer.py‎

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@
1919

2020
from .inputstreamimportHTMLInputStream
2121

22-
# Group entities by their first character, for faster lookups
23-
entitiesByFirstChar= {}
24-
foreinentities:
25-
entitiesByFirstChar.setdefault(e[0], []).append(e)
22+
from .trieimportTrie
23+
24+
entitiesTrie=Trie(entities)
2625

2726
classHTMLTokenizer(object):
2827
u""" This class takes care of tokenizing HTML.
@@ -183,29 +182,20 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
183182
#
184183
# Consume characters and compare to these to a substring of the
185184
# entity names in the list until the substring no longer matches.
186-
filteredEntityList=entitiesByFirstChar.get(charStack[0], [])
187-
188-
defentitiesStartingWith(name):
189-
return [eforeinfilteredEntityListife.startswith(name)]
190-
entitiesStartingWith.func_annotations= {}
191-
192185
while (charStack[-1]isnotEOF):
193-
filteredEntityList=entitiesStartingWith(u"".join(charStack))
194-
ifnotfilteredEntityList:
186+
ifnotentitiesTrie.has_keys_with_prefix(u"".join(charStack)):
195187
break
196188
charStack.append(self.stream.char())
197189

198190
# At this point we have a string that starts with some characters
199191
# that may match an entity
200-
entityName=None
201-
202192
# Try to find the longest entity the string will match to take care
203193
# of &noti for instance.
204-
forentityLengthinxrange(len(charStack)-1,1,-1):
205-
possibleEntityName=u"".join(charStack[:entityLength])
206-
ifpossibleEntityNameinentities:
207-
entityName=possibleEntityName
208-
break
194+
try:
195+
entityName=entitiesTrie.longest_prefix(u"".join(charStack[:-1]))
196+
entityLength=len(entityName)
197+
exceptKeyError:
198+
entityName=None
209199

210200
ifentityNameisnotNone:
211201
ifentityName[-1]!=u";":

‎html5lib/trie/__init__.py‎

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from __future__importabsolute_import
2+
from .pyimportTrieasPyTrie
3+
4+
Trie=PyTrie
5+
6+
try:
7+
from .datrieimportTrieasDATrie
8+
exceptImportError:
9+
pass
10+
else:
11+
Trie=DATrie

‎html5lib/trie/_base.py‎

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from __future__importabsolute_import
2+
fromcollectionsimportMapping
3+
4+
classTrie(Mapping):
5+
u"""Abstract base class for tries"""
6+
7+
defkeys(self,prefix=None):
8+
keys=super(Trie,self).keys()
9+
10+
ifprefixisNone:
11+
returnset(keys)
12+
13+
returnset(xforxinkeysifx.startswith(prefix))
14+
keys.func_annotations= {}
15+
16+
defhas_keys_with_prefix(self,prefix):
17+
forkeyinself.keys():
18+
ifkey.startswith(prefix):
19+
returnTrue
20+
21+
returnFalse
22+
has_keys_with_prefix.func_annotations= {}
23+
24+
deflongest_prefix(self,prefix):
25+
ifprefixinself:
26+
returnprefix
27+
28+
foriinxrange(1,len(prefix)+1):
29+
ifprefix[:-i]inself:
30+
returnprefix[:-i]
31+
32+
raiseKeyError(prefix)
33+
longest_prefix.func_annotations= {}
34+
35+
deflongest_prefix_item(self,prefix):
36+
lprefix=self.longest_prefix(prefix)
37+
return (lprefix,self[lprefix])
38+
longest_prefix_item.func_annotations= {}

‎html5lib/trie/datrie.py‎

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from __future__importabsolute_import
2+
fromitertoolsimportchain
3+
4+
fromdatrieimportTrieasDATrie
5+
6+
from ._baseimportTrieasABCTrie
7+
8+
classTrie(ABCTrie):
9+
def__init__(self,data):
10+
chars=set()
11+
forkeyindata.keys():
12+
ifnotisinstance(key,unicode):
13+
raiseTypeError(u"All keys must be strings")
14+
forcharinkey:
15+
chars.add(char)
16+
17+
self._data=DATrie(u"".join(chars))
18+
forkey,valueindata.items():
19+
self._data[key]=value
20+
__init__.func_annotations= {}
21+
22+
def__contains__(self,key):
23+
returnkeyinself._data
24+
__contains__.func_annotations= {}
25+
26+
def__len__(self):
27+
returnlen(self._data)
28+
__len__.func_annotations= {}
29+
30+
def__iter__(self):
31+
raiseNotImplementedError()
32+
__iter__.func_annotations= {}
33+
34+
def__getitem__(self,key):
35+
returnself._data[key]
36+
__getitem__.func_annotations= {}
37+
38+
defkeys(self,prefix=None):
39+
returnself._data.keys(prefix)
40+
keys.func_annotations= {}
41+
42+
defhas_keys_with_prefix(self,prefix):
43+
returnself._data.has_keys_with_prefix(prefix)
44+
has_keys_with_prefix.func_annotations= {}
45+
46+
deflongest_prefix(self,prefix):
47+
returnself._data.longest_prefix(prefix)
48+
longest_prefix.func_annotations= {}
49+
50+
deflongest_prefix_item(self,prefix):
51+
returnself._data.longest_prefix_item(prefix)
52+
longest_prefix_item.func_annotations= {}

‎html5lib/trie/py.py‎

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from __future__importabsolute_import
2+
frombisectimportbisect_left
3+
4+
from ._baseimportTrieasABCTrie
5+
6+
classTrie(ABCTrie):
7+
def__init__(self,data):
8+
ifnotall(isinstance(x,unicode)forxindata.keys()):
9+
raiseTypeError(u"All keys must be strings")
10+
11+
self._data=data
12+
self._keys=sorted(data.keys())
13+
self._cachestr=u""
14+
self._cachepoints= (0,len(data))
15+
__init__.func_annotations= {}
16+
17+
def__contains__(self,key):
18+
returnkeyinself._data
19+
__contains__.func_annotations= {}
20+
21+
def__len__(self):
22+
returnlen(self._data)
23+
__len__.func_annotations= {}
24+
25+
def__iter__(self):
26+
returniter(self._data)
27+
__iter__.func_annotations= {}
28+
29+
def__getitem__(self,key):
30+
returnself._data[key]
31+
__getitem__.func_annotations= {}
32+
33+
defkeys(self,prefix=None):
34+
ifprefixisNoneorprefix==u""ornotself._keys:
35+
returnset(self._keys)
36+
37+
ifprefix.startswith(self._cachestr):
38+
lo,hi=self._cachepoints
39+
start=i=bisect_left(self._keys,prefix,lo,hi)
40+
else:
41+
start=i=bisect_left(self._keys,prefix)
42+
43+
keys=set()
44+
ifstart==len(self._keys):
45+
returnkeys
46+
47+
whileself._keys[i].startswith(prefix):
48+
keys.add(self._keys[i])
49+
i+=1
50+
51+
self._cachestr=prefix
52+
self._cachepoints= (start,i)
53+
54+
returnkeys
55+
keys.func_annotations= {}
56+
57+
defhas_keys_with_prefix(self,prefix):
58+
ifprefixinself._data:
59+
returnTrue
60+
61+
ifprefix.startswith(self._cachestr):
62+
lo,hi=self._cachepoints
63+
i=bisect_left(self._keys,prefix,lo,hi)
64+
else:
65+
i=bisect_left(self._keys,prefix)
66+
67+
ifi==len(self._keys):
68+
returnFalse
69+
70+
returnself._keys[i].startswith(prefix)
71+
has_keys_with_prefix.func_annotations= {}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp