Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb1fae75

Browse files
committed
Use tokenize from stdlib, detach completely from lib2to3 and fix some typos
1 parent3d593ef commitb1fae75

File tree

5 files changed

+140
-48
lines changed

5 files changed

+140
-48
lines changed

‎Makefile.pre.in‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,7 @@ regen-grammar: regen-token
788788
# from Grammar/Grammar using pgen
789789
@$(MKDIR_P) Include
790790
$(PYTHON_FOR_REGEN) -m Parser.pgen $(srcdir)/Grammar/Grammar \
791+
$(srcdir)/Grammar/Tokens \
791792
$(srcdir)/Include/graminit.h.new \
792793
$(srcdir)/Python/graminit.c.new
793794
$(UPDATE_FILE) $(srcdir)/Include/graminit.h $(srcdir)/Include/graminit.h.new

‎Parser/pgen/__main__.py‎

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,26 @@ def main():
88
"grammar",type=str,help="The file with the grammar definition in EBNF format"
99
)
1010
parser.add_argument(
11-
"gramminit_h",
11+
"tokens",type=str,help="The file with the token definitions"
12+
)
13+
parser.add_argument(
14+
"graminit_h",
1215
type=argparse.FileType('w'),
1316
help="The path to write the grammar's non-terminals as #defines",
1417
)
1518
parser.add_argument(
16-
"gramminit_c",
19+
"graminit_c",
1720
type=argparse.FileType('w'),
1821
help="The path to write the grammar as initialized data",
1922
)
23+
2024
parser.add_argument("--verbose","-v",action="count")
2125
args=parser.parse_args()
2226

23-
p=ParserGenerator(args.grammar,verbose=args.verbose)
27+
p=ParserGenerator(args.grammar,args.tokens,verbose=args.verbose)
2428
grammar=p.make_grammar()
25-
grammar.produce_graminit_h(args.gramminit_h.write)
26-
grammar.produce_graminit_c(args.gramminit_c.write)
29+
grammar.produce_graminit_h(args.graminit_h.write)
30+
grammar.produce_graminit_c(args.graminit_c.write)
2731

2832

2933
if__name__=="__main__":

‎Parser/pgen/grammar.py‎

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,69 @@
1-
fromlib2to3.pgen2importgrammar
1+
importcollections
22

3-
classGrammar(grammar.Grammar):
3+
classGrammar:
4+
"""Pgen parsing tables conversion class.
5+
6+
Once initialized, this class supplies the grammar tables for the
7+
parsing engine implemented by parse.py. The parsing engine
8+
accesses the instance variables directly. The class here does not
9+
provide initialization of the tables; several subclasses exist to
10+
do this (see the conv and pgen modules).
11+
12+
The load() method reads the tables from a pickle file, which is
13+
much faster than the other ways offered by subclasses. The pickle
14+
file is written by calling dump() (after loading the grammar
15+
tables using a subclass). The report() method prints a readable
16+
representation of the tables to stdout, for debugging.
17+
18+
The instance variables are as follows:
19+
20+
symbol2number -- a dict mapping symbol names to numbers. Symbol
21+
numbers are always 256 or higher, to distinguish
22+
them from token numbers, which are between 0 and
23+
255 (inclusive).
24+
25+
number2symbol -- a dict mapping numbers to symbol names;
26+
these two are each other's inverse.
27+
28+
states -- a list of DFAs, where each DFA is a list of
29+
states, each state is a list of arcs, and each
30+
arc is a (i, j) pair where i is a label and j is
31+
a state number. The DFA number is the index into
32+
this list. (This name is slightly confusing.)
33+
Final states are represented by a special arc of
34+
the form (0, j) where j is its own state number.
35+
36+
dfas -- a dict mapping symbol numbers to (DFA, first)
37+
pairs, where DFA is an item from the states list
38+
above, and first is a set of tokens that can
39+
begin this grammar rule (represented by a dict
40+
whose values are always 1).
41+
42+
labels -- a list of (x, y) pairs where x is either a token
43+
number or a symbol number, and y is either None
44+
or a string; the strings are keywords. The label
45+
number is the index in this list; label numbers
46+
are used to mark state transitions (arcs) in the
47+
DFAs.
48+
49+
start -- the number of the grammar's start symbol.
50+
51+
keywords -- a dict mapping keyword strings to arc labels.
52+
53+
tokens -- a dict mapping token numbers to arc labels.
54+
55+
"""
56+
57+
def__init__(self):
58+
self.symbol2number=collections.OrderedDict()
59+
self.number2symbol=collections.OrderedDict()
60+
self.states= []
61+
self.dfas=collections.OrderedDict()
62+
self.labels= [(0,"EMPTY")]
63+
self.keywords=collections.OrderedDict()
64+
self.tokens=collections.OrderedDict()
65+
self.symbol2label=collections.OrderedDict()
66+
self.start=256
467

568
defproduce_graminit_h(self,writer):
669
writer("/* Generated by Parser/pgen */\n\n")

‎Parser/pgen/pgen.py‎

Lines changed: 25 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,23 @@
1-
importos
2-
importsys
31
importcollections
4-
importimportlib.machinery
2+
importtokenize# from stdlib
53

6-
# Use Lib/token.py and Lib/tokenize.py to obtain the tokens. To maintain this
7-
# compatible with older versions of Python, we need to make sure that we only
8-
# import these two files (and not any of the dependencies of these files).
9-
10-
CURRENT_FOLDER_LOCATION=os.path.dirname(os.path.realpath(__file__))
11-
LIB_LOCATION=os.path.realpath(os.path.join(CURRENT_FOLDER_LOCATION,'..','..','Lib'))
12-
TOKEN_LOCATION=os.path.join(LIB_LOCATION,'token.py')
13-
TOKENIZE_LOCATION=os.path.join(LIB_LOCATION,'tokenize.py')
14-
15-
token=importlib.machinery.SourceFileLoader('token',
16-
TOKEN_LOCATION).load_module()
17-
# Add token to the module cache so tokenize.py uses that excact one instead of
18-
# the one in the stdlib of the interpreter executing this file.
19-
sys.modules['token']=token
20-
tokenize=importlib.machinery.SourceFileLoader('tokenize',
21-
TOKENIZE_LOCATION).load_module()
22-
23-
from .importgrammar
4+
from .importgrammar,token
245

256
classParserGenerator(object):
267

27-
def__init__(self,filename,stream=None,verbose=False):
8+
def__init__(self,grammar_file,token_file,stream=None,verbose=False):
289
close_stream=None
2910
ifstreamisNone:
30-
stream=open(filename)
11+
stream=open(grammar_file)
3112
close_stream=stream.close
32-
self.tokens=token
33-
self.opmap=token.EXACT_TOKEN_TYPES
13+
withopen(token_file)astok_file:
14+
token_lines=tok_file.readlines()
15+
self.tokens=dict(token.generate_tokens(token_lines))
16+
self.opmap=dict(token.generate_opmap(token_lines))
3417
# Manually add <> so it does not collide with !=
35-
self.opmap['<>']=self.tokens.NOTEQUAL
18+
self.opmap['<>']="NOTEQUAL"
3619
self.verbose=verbose
37-
self.filename=filename
20+
self.filename=grammar_file
3821
self.stream=stream
3922
self.generator=tokenize.generate_tokens(stream.readline)
4023
self.gettoken()# Initialize lookahead
@@ -108,9 +91,9 @@ def make_label(self, c, label):
10891
returnilabel
10992
else:
11093
# A named token (NAME, NUMBER, STRING)
111-
itoken=getattr(self.tokens,label,None)
94+
itoken=self.tokens.get(label,None)
11295
assertisinstance(itoken,int),label
113-
assertitokeninself.tokens.tok_name,label
96+
assertitokeninself.tokens.values(),label
11497
ifitokeninc.tokens:
11598
returnc.tokens[itoken]
11699
else:
@@ -126,12 +109,13 @@ def make_label(self, c, label):
126109
ifvalueinc.keywords:
127110
returnc.keywords[value]
128111
else:
129-
c.labels.append((self.tokens.NAME,value))
112+
c.labels.append((self.tokens["NAME"],value))
130113
c.keywords[value]=ilabel
131114
returnilabel
132115
else:
133116
# An operator (any non-numeric token)
134-
itoken=self.opmap[value]# Fails if unknown token
117+
tok_name=self.opmap[value]# Fails if unknown token
118+
itoken=self.tokens[tok_name]
135119
ifitokeninc.tokens:
136120
returnc.tokens[itoken]
137121
else:
@@ -184,16 +168,16 @@ def parse(self):
184168
dfas=collections.OrderedDict()
185169
startsymbol=None
186170
# MSTART: (NEWLINE | RULE)* ENDMARKER
187-
whileself.type!=self.tokens.ENDMARKER:
188-
whileself.type==self.tokens.NEWLINE:
171+
whileself.type!=tokenize.ENDMARKER:
172+
whileself.type==tokenize.NEWLINE:
189173
self.gettoken()
190174
# RULE: NAME ':' RHS NEWLINE
191-
name=self.expect(self.tokens.NAME)
175+
name=self.expect(tokenize.NAME)
192176
ifself.verbose:
193177
print("Processing rule {dfa_name}".format(dfa_name=name))
194-
self.expect(self.tokens.OP,":")
178+
self.expect(tokenize.OP,":")
195179
a,z=self.parse_rhs()
196-
self.expect(self.tokens.NEWLINE)
180+
self.expect(tokenize.NEWLINE)
197181
ifself.verbose:
198182
self.dump_nfa(name,a,z)
199183
dfa=self.make_dfa(a,z)
@@ -309,7 +293,7 @@ def parse_alt(self):
309293
# ALT: ITEM+
310294
a,b=self.parse_item()
311295
while (self.valuein ("(","[")or
312-
self.typein (self.tokens.NAME,self.tokens.STRING)):
296+
self.typein (tokenize.NAME,tokenize.STRING)):
313297
c,d=self.parse_item()
314298
b.addarc(c)
315299
b=d
@@ -320,7 +304,7 @@ def parse_item(self):
320304
ifself.value=="[":
321305
self.gettoken()
322306
a,z=self.parse_rhs()
323-
self.expect(self.tokens.OP,"]")
307+
self.expect(tokenize.OP,"]")
324308
a.addarc(z)
325309
returna,z
326310
else:
@@ -340,9 +324,9 @@ def parse_atom(self):
340324
ifself.value=="(":
341325
self.gettoken()
342326
a,z=self.parse_rhs()
343-
self.expect(self.tokens.OP,")")
327+
self.expect(tokenize.OP,")")
344328
returna,z
345-
elifself.typein (self.tokens.NAME,self.tokens.STRING):
329+
elifself.typein (tokenize.NAME,tokenize.STRING):
346330
a=NFAState()
347331
z=NFAState()
348332
a.addarc(z,self.value)
@@ -365,7 +349,7 @@ def gettoken(self):
365349
whiletup[0]in (tokenize.COMMENT,tokenize.NL):
366350
tup=next(self.generator)
367351
self.type,self.value,self.begin,self.end,self.line=tup
368-
#print self.tokens['tok_name'][self.type], repr(self.value)
352+
#print(getattr(tokenize,'tok_name')[self.type], repr(self.value))
369353

370354
defraise_error(self,msg,*args):
371355
ifargs:

‎Parser/pgen/token.py‎

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
importitertools
2+
3+
defgenerate_tokens(tokens):
4+
numbers=itertools.count(0)
5+
forlineintokens:
6+
line=line.strip()
7+
8+
ifnotline:
9+
continue
10+
ifline.strip().startswith('#'):
11+
continue
12+
13+
name=line.split()[0]
14+
yield (name,next(numbers))
15+
16+
yield ('N_TOKENS',next(numbers))
17+
yield ('NT_OFFSET',256)
18+
19+
defgenerate_opmap(tokens):
20+
forlineintokens:
21+
line=line.strip()
22+
23+
ifnotline:
24+
continue
25+
ifline.strip().startswith('#'):
26+
continue
27+
28+
pieces=line.split()
29+
30+
iflen(pieces)!=2:
31+
continue
32+
33+
name,op=pieces
34+
yield (op.strip("'"),name)
35+
36+
# Yield independently <>. This is needed so it does not collide
37+
# with the token generation in "generate_tokens" because if this
38+
# symbol is included in Grammar/Tokens, it will collide with !=
39+
# as it has the same name (NOTEQUAL).
40+
yield ('<>','NOTEQUAL')

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp