Uh oh!
There was an error while loading.Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork33.3k
Closed
Description
Bug report
There is a private function_splitlines_no_ff which is only ever called inast.get_source_segment. This functions splits the entire source given to it, butast.get_source_segment only needs at mostnode.end_lineo lines to work.
Lines 308 to 330 in1acdfec
| def_splitlines_no_ff(source): | |
| """Split a string into lines ignoring form feed and other chars. | |
| This mimics how the Python parser splits source code. | |
| """ | |
| idx=0 | |
| lines= [] | |
| next_line='' | |
| whileidx<len(source): | |
| c=source[idx] | |
| next_line+=c | |
| idx+=1 | |
| # Keep \r\n together | |
| ifc=='\r'andidx<len(source)andsource[idx]=='\n': | |
| next_line+='\n' | |
| idx+=1 | |
| ifcin'\r\n': | |
| lines.append(next_line) | |
| next_line='' | |
| ifnext_line: | |
| lines.append(next_line) | |
| returnlines |
Lines 344 to 378 in1acdfec
| defget_source_segment(source,node,*,padded=False): | |
| """Get source code segment of the *source* that generated *node*. | |
| If some location information (`lineno`, `end_lineno`, `col_offset`, | |
| or `end_col_offset`) is missing, return None. | |
| If *padded* is `True`, the first line of a multi-line statement will | |
| be padded with spaces to match its original position. | |
| """ | |
| try: | |
| ifnode.end_linenoisNoneornode.end_col_offsetisNone: | |
| returnNone | |
| lineno=node.lineno-1 | |
| end_lineno=node.end_lineno-1 | |
| col_offset=node.col_offset | |
| end_col_offset=node.end_col_offset | |
| exceptAttributeError: | |
| returnNone | |
| lines=_splitlines_no_ff(source) | |
| ifend_lineno==lineno: | |
| returnlines[lineno].encode()[col_offset:end_col_offset].decode() | |
| ifpadded: | |
| padding=_pad_whitespace(lines[lineno].encode()[:col_offset].decode()) | |
| else: | |
| padding='' | |
| first=padding+lines[lineno].encode()[col_offset:].decode() | |
| last=lines[end_lineno].encode()[:end_col_offset].decode() | |
| lines=lines[lineno+1:end_lineno] | |
| lines.insert(0,first) | |
| lines.append(last) | |
| return''.join(lines) |
If, for example, you want to extract an import line from a very long file, this can seriously degrade performance.
The introduction of amax_lines kwarg in_splitlines_no_ff which functions likemaxsplit instr.split would minimize unneeded work. An implementation of the proposed fix is below (which makes my use case twice as fast):
--- a/Lib/ast.py+++ b/Lib/ast.py@@ -305,11 +305,16 @@ def get_docstring(node, clean=True): return text-def _splitlines_no_ff(source):+def _splitlines_no_ff(source, max_lines=-1): """Split a string into lines ignoring form feed and other chars. This mimics how the Python parser splits source code.++ If max_lines is given, at most max_lines will be returned. If max_lines is not+ specified or negative, then there is no limit on the number of lines returned. """+ if not max_lines:+ return [] idx = 0 lines = [] next_line = ''@@ -323,6 +328,8 @@ def _splitlines_no_ff(source): idx += 1 if c in '\r\n': lines.append(next_line)+ if max_lines == len(lines):+ return lines next_line = '' if next_line:@@ -360,7 +367,7 @@ def get_source_segment(source, node, *, padded=False): except AttributeError: return None- lines = _splitlines_no_ff(source)+ lines = _splitlines_no_ff(source, max_lines=end_lineno + 1) if end_lineno == lineno: return lines[lineno].encode()[col_offset:end_col_offset].decode()
Your environment
- CPython versions tested on: 3.11