1- import os
2- import sys
31import collections
4- import importlib . machinery
2+ import tokenize # from stdlib
53
6- # Use Lib/token.py and Lib/tokenize.py to obtain the tokens. To maintain this
7- # compatible with older versions of Python, we need to make sure that we only
8- # import these two files (and not any of the dependencies of these files).
9-
10- CURRENT_FOLDER_LOCATION = os .path .dirname (os .path .realpath (__file__ ))
11- LIB_LOCATION = os .path .realpath (os .path .join (CURRENT_FOLDER_LOCATION ,'..' ,'..' ,'Lib' ))
12- TOKEN_LOCATION = os .path .join (LIB_LOCATION ,'token.py' )
13- TOKENIZE_LOCATION = os .path .join (LIB_LOCATION ,'tokenize.py' )
14-
15- token = importlib .machinery .SourceFileLoader ('token' ,
16- TOKEN_LOCATION ).load_module ()
17- # Add token to the module cache so tokenize.py uses that excact one instead of
18- # the one in the stdlib of the interpreter executing this file.
19- sys .modules ['token' ]= token
20- tokenize = importlib .machinery .SourceFileLoader ('tokenize' ,
21- TOKENIZE_LOCATION ).load_module ()
22-
23- from .import grammar
4+ from .import grammar ,token
245
256class ParserGenerator (object ):
267
27- def __init__ (self ,filename ,stream = None ,verbose = False ):
8+ def __init__ (self ,grammar_file , token_file ,stream = None ,verbose = False ):
289close_stream = None
2910if stream is None :
30- stream = open (filename )
11+ stream = open (grammar_file )
3112close_stream = stream .close
32- self .tokens = token
33- self .opmap = token .EXACT_TOKEN_TYPES
13+ with open (token_file )as tok_file :
14+ token_lines = tok_file .readlines ()
15+ self .tokens = dict (token .generate_tokens (token_lines ))
16+ self .opmap = dict (token .generate_opmap (token_lines ))
3417# Manually add <> so it does not collide with !=
35- self .opmap ['<>' ]= self . tokens . NOTEQUAL
18+ self .opmap ['<>' ]= " NOTEQUAL"
3619self .verbose = verbose
37- self .filename = filename
20+ self .filename = grammar_file
3821self .stream = stream
3922self .generator = tokenize .generate_tokens (stream .readline )
4023self .gettoken ()# Initialize lookahead
@@ -108,9 +91,9 @@ def make_label(self, c, label):
10891return ilabel
10992else :
11093# A named token (NAME, NUMBER, STRING)
111- itoken = getattr ( self .tokens , label ,None )
94+ itoken = self .tokens . get ( label ,None )
11295assert isinstance (itoken ,int ),label
113- assert itoken in self .tokens .tok_name ,label
96+ assert itoken in self .tokens .values () ,label
11497if itoken in c .tokens :
11598return c .tokens [itoken ]
11699else :
@@ -126,12 +109,13 @@ def make_label(self, c, label):
126109if value in c .keywords :
127110return c .keywords [value ]
128111else :
129- c .labels .append ((self .tokens . NAME ,value ))
112+ c .labels .append ((self .tokens [ " NAME" ] ,value ))
130113c .keywords [value ]= ilabel
131114return ilabel
132115else :
133116# An operator (any non-numeric token)
134- itoken = self .opmap [value ]# Fails if unknown token
117+ tok_name = self .opmap [value ]# Fails if unknown token
118+ itoken = self .tokens [tok_name ]
135119if itoken in c .tokens :
136120return c .tokens [itoken ]
137121else :
@@ -184,16 +168,16 @@ def parse(self):
184168dfas = collections .OrderedDict ()
185169startsymbol = None
186170# MSTART: (NEWLINE | RULE)* ENDMARKER
187- while self .type != self . tokens .ENDMARKER :
188- while self .type == self . tokens .NEWLINE :
171+ while self .type != tokenize .ENDMARKER :
172+ while self .type == tokenize .NEWLINE :
189173self .gettoken ()
190174# RULE: NAME ':' RHS NEWLINE
191- name = self .expect (self . tokens .NAME )
175+ name = self .expect (tokenize .NAME )
192176if self .verbose :
193177print ("Processing rule {dfa_name}" .format (dfa_name = name ))
194- self .expect (self . tokens .OP ,":" )
178+ self .expect (tokenize .OP ,":" )
195179a ,z = self .parse_rhs ()
196- self .expect (self . tokens .NEWLINE )
180+ self .expect (tokenize .NEWLINE )
197181if self .verbose :
198182self .dump_nfa (name ,a ,z )
199183dfa = self .make_dfa (a ,z )
@@ -309,7 +293,7 @@ def parse_alt(self):
309293# ALT: ITEM+
310294a ,b = self .parse_item ()
311295while (self .value in ("(" ,"[" )or
312- self .type in (self . tokens . NAME ,self . tokens .STRING )):
296+ self .type in (tokenize . NAME ,tokenize .STRING )):
313297c ,d = self .parse_item ()
314298b .addarc (c )
315299b = d
@@ -320,7 +304,7 @@ def parse_item(self):
320304if self .value == "[" :
321305self .gettoken ()
322306a ,z = self .parse_rhs ()
323- self .expect (self . tokens .OP ,"]" )
307+ self .expect (tokenize .OP ,"]" )
324308a .addarc (z )
325309return a ,z
326310else :
@@ -340,9 +324,9 @@ def parse_atom(self):
340324if self .value == "(" :
341325self .gettoken ()
342326a ,z = self .parse_rhs ()
343- self .expect (self . tokens .OP ,")" )
327+ self .expect (tokenize .OP ,")" )
344328return a ,z
345- elif self .type in (self . tokens . NAME ,self . tokens .STRING ):
329+ elif self .type in (tokenize . NAME ,tokenize .STRING ):
346330a = NFAState ()
347331z = NFAState ()
348332a .addarc (z ,self .value )
@@ -365,7 +349,7 @@ def gettoken(self):
365349while tup [0 ]in (tokenize .COMMENT ,tokenize .NL ):
366350tup = next (self .generator )
367351self .type ,self .value ,self .begin ,self .end ,self .line = tup
368- #print self.tokens[ 'tok_name'] [self.type], repr(self.value)
352+ # print(getattr(tokenize, 'tok_name') [self.type], repr(self.value) )
369353
370354def raise_error (self ,msg ,* args ):
371355if args :