|
| 1 | +# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt) |
| 2 | +# Source for "Build a Reasoning Model (From Scratch)": https://mng.bz/lZ5B |
| 3 | +# Code repository: https://github.com/rasbt/reasoning-from-scratch |
| 4 | + |
| 5 | +# Verify that Python source files (and optionally notebooks) use double quotes for strings. |
| 6 | + |
| 7 | +importargparse |
| 8 | +importast |
| 9 | +importio |
| 10 | +importjson |
| 11 | +importsys |
| 12 | +importtokenize |
| 13 | +frompathlibimportPath |
| 14 | + |
| 15 | +EXCLUDED_DIRS= { |
| 16 | +".git", |
| 17 | +".hg", |
| 18 | +".mypy_cache", |
| 19 | +".pytest_cache", |
| 20 | +".ruff_cache", |
| 21 | +".svn", |
| 22 | +".tox", |
| 23 | +".venv", |
| 24 | +"__pycache__", |
| 25 | +"build", |
| 26 | +"dist", |
| 27 | +"node_modules", |
| 28 | +} |
| 29 | + |
| 30 | +PREFIX_CHARS= {"r","u","f","b"} |
| 31 | +SINGLE_QUOTE="'" |
| 32 | +DOUBLE_QUOTE="\"" |
| 33 | +TRIPLE_SINGLE=SINGLE_QUOTE*3 |
| 34 | +TRIPLE_DOUBLE=DOUBLE_QUOTE*3 |
| 35 | + |
| 36 | + |
| 37 | +defshould_skip(path): |
| 38 | +parts=set(path.parts) |
| 39 | +returnbool(EXCLUDED_DIRS&parts) |
| 40 | + |
| 41 | + |
| 42 | +defcollect_fstring_expr_string_positions(source): |
| 43 | +""" |
| 44 | + Return set of (lineno, col_offset) for string literals that appear inside |
| 45 | + formatted expressions of f-strings. These should be exempt from the double |
| 46 | + quote check, since enforcing double quotes there is unnecessarily strict. |
| 47 | + """ |
| 48 | +try: |
| 49 | +tree=ast.parse(source) |
| 50 | +exceptSyntaxError: |
| 51 | +returnset() |
| 52 | + |
| 53 | +positions=set() |
| 54 | + |
| 55 | +classCollector(ast.NodeVisitor): |
| 56 | +defvisit_JoinedStr(self,node): |
| 57 | +forvalueinnode.values: |
| 58 | +ifisinstance(value,ast.FormattedValue): |
| 59 | +self._collect_from_expr(value.value) |
| 60 | +# Continue walking to catch nested f-strings within expressions |
| 61 | +self.generic_visit(node) |
| 62 | + |
| 63 | +def_collect_from_expr(self,node): |
| 64 | +ifisinstance(node,ast.Constant)andisinstance(node.value,str): |
| 65 | +positions.add((node.lineno,node.col_offset)) |
| 66 | +elifisinstance(node,ast.Str):# Python <3.8 compatibility |
| 67 | +positions.add((node.lineno,node.col_offset)) |
| 68 | +else: |
| 69 | +forchildinast.iter_child_nodes(node): |
| 70 | +self._collect_from_expr(child) |
| 71 | + |
| 72 | +Collector().visit(tree) |
| 73 | +returnpositions |
| 74 | + |
| 75 | + |
| 76 | +defcheck_quotes_in_source(source,path): |
| 77 | +violations= [] |
| 78 | +ignored_positions=collect_fstring_expr_string_positions(source) |
| 79 | +tokens=tokenize.generate_tokens(io.StringIO(source).readline) |
| 80 | +fortok_type,tok_str,start,_,_intokens: |
| 81 | +iftok_type==tokenize.STRING: |
| 82 | +ifstartinignored_positions: |
| 83 | +continue |
| 84 | +lowered=tok_str.lower() |
| 85 | +# ignore triple-quoted strings |
| 86 | +iflowered.startswith((TRIPLE_DOUBLE,TRIPLE_SINGLE)): |
| 87 | +continue |
| 88 | + |
| 89 | +# find the prefix and quote type |
| 90 | +# prefix = "" |
| 91 | +forcinPREFIX_CHARS: |
| 92 | +iflowered.startswith(c): |
| 93 | +# prefix = c |
| 94 | +lowered=lowered[1:] |
| 95 | +break |
| 96 | + |
| 97 | +# report if not using double quotes |
| 98 | +iflowered.startswith(SINGLE_QUOTE): |
| 99 | +line,col=start |
| 100 | +violations.append(f"{path}:{line}:{col}: uses single quotes") |
| 101 | +returnviolations |
| 102 | + |
| 103 | + |
| 104 | +defcheck_file(path): |
| 105 | +try: |
| 106 | +ifpath.suffix==".ipynb": |
| 107 | +returncheck_notebook(path) |
| 108 | +else: |
| 109 | +text=path.read_text(encoding="utf-8") |
| 110 | +returncheck_quotes_in_source(text,path) |
| 111 | +exceptExceptionase: |
| 112 | +return [f"{path}: failed to check ({e})"] |
| 113 | + |
| 114 | + |
| 115 | +defcheck_notebook(path): |
| 116 | +violations= [] |
| 117 | +withopen(path,encoding="utf-8")asf: |
| 118 | +nb=json.load(f) |
| 119 | +forcellinnb.get("cells", []): |
| 120 | +ifcell.get("cell_type")=="code": |
| 121 | +src="".join(cell.get("source", [])) |
| 122 | +violations.extend(check_quotes_in_source(src,path)) |
| 123 | +returnviolations |
| 124 | + |
| 125 | + |
| 126 | +defparse_args(): |
| 127 | +parser=argparse.ArgumentParser(description="Verify double-quoted string literals.") |
| 128 | +parser.add_argument( |
| 129 | +"--include-notebooks", |
| 130 | +action="store_true", |
| 131 | +help="Also scan Jupyter notebooks (.ipynb files) for single-quoted strings.", |
| 132 | + ) |
| 133 | +returnparser.parse_args() |
| 134 | + |
| 135 | + |
| 136 | +defmain(): |
| 137 | +args=parse_args() |
| 138 | +project_root=Path(".").resolve() |
| 139 | +py_files=sorted(project_root.rglob("*.py")) |
| 140 | +notebook_files=sorted(project_root.rglob("*.ipynb"))ifargs.include_notebookselse [] |
| 141 | + |
| 142 | +violations= [] |
| 143 | +forpathinpy_files+notebook_files: |
| 144 | +ifshould_skip(path): |
| 145 | +continue |
| 146 | +violations.extend(check_file(path)) |
| 147 | + |
| 148 | +ifviolations: |
| 149 | +print("\n".join(violations)) |
| 150 | +print(f"\n{len(violations)} violations found.") |
| 151 | +return1 |
| 152 | + |
| 153 | +print("All files use double quotes correctly.") |
| 154 | +return0 |
| 155 | + |
| 156 | + |
| 157 | +if__name__=="__main__": |
| 158 | +sys.exit(main()) |