Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

gh-102856: Python tokenizer implementation for PEP 701#104323

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
pablogsal merged 20 commits intopython:mainfrommgmacias95:python_tokenizer
May 21, 2023
Merged
Show file tree
Hide file tree
Changes from1 commit
Commits
Show all changes
20 commits
Select commitHold shift + click to select a range
008f8e5
First iteration
mgmacias95Apr 19, 2023
67a6ad6
Handle escaping {
mgmacias95Apr 27, 2023
f58104d
nested expressions
mgmacias95Apr 29, 2023
26102cc
Recursive expression tokenization
mgmacias95May 2, 2023
a5f4b40
Remove intermediate token created for dev purposes
mgmacias95May 2, 2023
598bab4
More improvements
mgmacias95May 3, 2023
a0ed816
fix handling of } tokens
mgmacias95May 7, 2023
90b4ab1
other tokenizer
pablogsalMay 16, 2023
63ef1c1
Some progress
pablogsalMay 17, 2023
6833b1a
Fix more bugs
pablogsalMay 18, 2023
90da796
Fix more problems
pablogsalMay 18, 2023
b5ccd94
Use IA to clean code
pablogsalMay 18, 2023
b1c3b2a
Remove lel
pablogsalMay 18, 2023
e941f12
Remove whitespace
pablogsalMay 18, 2023
67a0239
Fix docs
mgmacias95May 18, 2023
dcd221f
Moar tests and fix location error
pablogsalMay 19, 2023
fd8b60a
Some cleanups
pablogsalMay 19, 2023
f1a5090
pass the vacuum cleaner
pablogsalMay 19, 2023
7fb58b0
Fix refleaks
mgmacias95May 20, 2023
e1b5d35
📜🤖 Added by blurb_it.
blurb-it[bot]May 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
PrevPrevious commit
NextNext commit
Fix more problems
  • Loading branch information
@pablogsal
pablogsal committedMay 18, 2023
commit90da796a9dd191845ad10b5a8570591a8ef37e0c
10 changes: 10 additions & 0 deletionsLib/tabnanny.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -107,6 +107,10 @@ def check(file):
errprint("%r: Token Error: %s" % (file, msg))
return

except SyntaxError as msg:
errprint("%r: Token Error: %s" % (file, msg))
return

except IndentationError as msg:
errprint("%r: Indentation Error: %s" % (file, msg))
return
Expand DownExpand Up@@ -272,6 +276,12 @@ def format_witnesses(w):
return prefix + " " + ', '.join(firsts)

def process_tokens(tokens):
try:
_process_tokens(tokens)
except TabError as e:
raise NannyNag(e.lineno, e.msg, e.text)

def _process_tokens(tokens):
INDENT = tokenize.INDENT
DEDENT = tokenize.DEDENT
NEWLINE = tokenize.NEWLINE
Expand Down
4 changes: 2 additions & 2 deletionsLib/test/test_tabnanny.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -223,7 +223,7 @@ def test_when_nannynag_error_verbose(self):
with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as file_path:
out = f"{file_path!r}: *** Line 3: trouble in tab city! ***\n"
out += "offending line: '\\tprint(\"world\")\\n'\n"
out += "indent not equal e.g. at tab size 1\n"
out += "inconsistent use of tabs and spaces in indentation\n"

tabnanny.verbose = 1
self.verify_tabnanny_check(file_path, out=out)
Expand DownExpand Up@@ -315,7 +315,7 @@ def validate_cmd(self, *args, stdout="", stderr="", partial=False, expect_failur
def test_with_errored_file(self):
"""Should displays error when errored python file is given."""
with TemporaryPyFile(SOURCE_CODES["wrong_indented"]) as file_path:
stderr = f"{file_path!r}:Indentation Error: "
stderr = f"{file_path!r}:Token Error: "
stderr += ('unindent does not match any outer indentation level'
' (<tokenize>, line 3)')
self.validate_cmd(file_path, stderr=stderr, expect_failure=True)
Expand Down
5 changes: 4 additions & 1 deletionLib/tokenize.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -517,7 +517,10 @@ def error(message, filename=None, location=None):
tokens = list(tokenize(f.readline))
else:
filename = "<stdin>"
tokens = _tokenize(sys.stdin.readline, None)
tokens = _tokenize(
(x.encode('utf-8') for x in iter(sys.stdin.readline, "")
), "utf-8")


# Output the tokenization
for token in tokens:
Expand Down
1 change: 0 additions & 1 deletionLib/trace.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -360,7 +360,6 @@ def _find_strings(filename, encoding=None):
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
with open(filename, encoding=encoding) as f:
print(filename)
tok = tokenize.generate_tokens(f.readline)
for ttype, tstr, start, end, line in tok:
if ttype == token.STRING:
Expand Down
39 changes: 36 additions & 3 deletionsPython/Python-tokenize.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -89,8 +89,10 @@ _tokenizer_error(struct tok_state *tok)
}
return -1;
case E_DEDENT:
PyErr_SetString(PyExc_IndentationError,
"unindent does not match any outer indentation level");
PyErr_Format(PyExc_IndentationError,
"unindent does not match any outer indentation level "
"(<tokenize>, line %d)",
tok->lineno);
return -1;
case E_INTR:
if (!PyErr_Occurred()) {
Expand All@@ -115,7 +117,38 @@ _tokenizer_error(struct tok_state *tok)
default:
msg = "unknown tokenization error";
}
PyErr_SetString(errtype, msg);

// TODO: Clean up this code and factor out common error paths

PyObject* errstr = NULL;
PyObject* error_line = NULL;

Py_ssize_t size = tok->inp - tok->buf;
error_line = PyUnicode_DecodeUTF8(tok->buf, size, "replace");
if (!error_line) {
goto error;
}
PyObject *tmp = Py_BuildValue("(OnnOii)", tok->filename, tok->lineno, 0, error_line, 0, 0);
if (!tmp) {
goto error;
}
Py_CLEAR(error_line);
errstr = PyUnicode_FromString(msg);
if (!errstr) {
goto error;
}
PyObject* value = PyTuple_Pack(2, errstr, tmp);
Py_DECREF(errstr);
Py_DECREF(tmp);
if (!value) {
goto error;
}
PyErr_SetObject(errtype, value);
Py_DECREF(value);
return 0;
error:
Py_XDECREF(errstr);
Py_XDECREF(error_line);
return -1;
}

Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp