| #!/usr/bin/env python3 |
| # Copyright 2021 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Helper for adding or removing an include to/from source file(s). |
| |
| clang-format already provides header sorting functionality; however, the |
| functionality is limited to sorting headers within a block of headers surrounded |
| by blank lines (these are a heuristic to avoid clang breaking ordering for |
| headers sensitive to inclusion order, e.g. <windows.h>). |
| |
| As a result, inserting a new header is a bit more complex than simply inserting |
| the new header at the top and running clang-format. |
| |
| This script implements additional logic to: |
| - classify different blocks of headers by type (C system, C++ system, user) |
| - find the appropriate insertion point for the new header |
| - creating a new header block if necessary |
| |
| As a bonus, it does *also* sort the includes, though any sorting disagreements |
| with clang-format should be resolved in favor of clang-format. |
| |
| It also supports removing a header with option `--remove`. |
| |
| Usage: |
| tools/add_header.py --header '<utility>' foo/bar.cc foo/baz.cc foo/baz.h |
| tools/add_header.py --header '<vector>' --remove foo/bar.cc foo/baz.cc foo/baz.h |
| """ |
| |
| import argparse |
| import difflib |
| import os.path |
| import re |
| import sys |
| |
| # The specific values of these constants are also used as a sort key for |
| # ordering different header types in the correct relative order. |
| _HEADER_TYPE_C_SYSTEM=0 |
| _HEADER_TYPE_CXX_SYSTEM=1 |
| _HEADER_TYPE_USER=2 |
| _HEADER_TYPE_INVALID=-1 |
| |
| |
| defClassifyHeader(decorated_name): |
| ifIsCSystemHeader(decorated_name): |
| return _HEADER_TYPE_C_SYSTEM |
| elifIsCXXSystemHeader(decorated_name): |
| return _HEADER_TYPE_CXX_SYSTEM |
| elifIsUserHeader(decorated_name): |
| return _HEADER_TYPE_USER |
| else: |
| return _HEADER_TYPE_INVALID |
| |
| |
| defUndecoratedName(decorated_name): |
| """Returns the undecorated version of decorated_name by removing "" or <>.""" |
| assertIsSystemHeader(decorated_name)orIsUserHeader(decorated_name) |
| return decorated_name[1:-1] |
| |
| |
| defIsSystemHeader(decorated_name): |
| """Returns true if decorated_name looks like a system header.""" |
| return decorated_name[0]=='<'and decorated_name[-1]=='>' |
| |
| |
| defIsCSystemHeader(decorated_name): |
| """Returns true if decoraed_name looks like a C system header.""" |
| returnIsSystemHeader(decorated_name)andUndecoratedName( |
| decorated_name).endswith('.h') |
| |
| |
| defIsCXXSystemHeader(decorated_name): |
| """Returns true if decoraed_name looks like a C++ system header.""" |
| returnIsSystemHeader( |
| decorated_name)andnotUndecoratedName(decorated_name).endswith('.h') |
| |
| |
| defIsUserHeader(decorated_name): |
| """Returns true if decoraed_name looks like a user header.""" |
| return decorated_name[0]=='"'and decorated_name[-1]=='"' |
| |
| |
| _EMPTY_LINE_RE= re.compile(r'\s*$') |
| _COMMENT_RE= re.compile(r'\s*//(.*)$') |
| _INCLUDE_RE= re.compile( |
| r'\s*#(import|include)\s+([<"].+?[">])\s*?(?://(.*))?$') |
| |
| |
| defFindIncludes(lines): |
| """Finds the block of #includes, assuming Google+Chrome C++ style source. |
| |
| Note that this doesn't simply return a slice of the input lines, because |
| having the actual indices simplifies things when generatingn the updated |
| source text. |
| |
| Args: |
| lines: The source text split into lines. |
| |
| Returns: |
| A tuple of begin, end indices that can be used to slice the input lines to |
| contain the includes to process. Returns -1, -1 if no such block of |
| input lines could be found. |
| """ |
| begin= end=-1 |
| for idx, linein enumerate(lines): |
| # Skip over any initial comments (e.g. the copyright boilerplate) or empty |
| # lines. |
| # TODO(dcheng): This means that any preamble comment associated with the |
| # first header will be dropped. So far, this hasn't broken anything, but |
| # maybe this needs to be more clever. |
| # TODO(dcheng): #define and #undef should probably also be allowed. |
| if _EMPTY_LINE_RE.match(line)or _COMMENT_RE.match(line): |
| continue |
| m= _INCLUDE_RE.match(line) |
| ifnot m: |
| if begin<0: |
| # No match, but no #includes have been seen yet. Keep scanning for the |
| # first #include. |
| continue |
| # Give up, it's something weird that probably requires manual |
| # intervention. |
| break |
| |
| if begin<0: |
| begin= idx |
| end= idx+1 |
| return begin, end |
| |
| |
| classInclude(object): |
| """Represents an #include/#import and any interesting metadata for it. |
| |
| Attributes: |
| decorated_name: The name of the header file, decorated with <> for system |
| headers or "" for user headers. |
| |
| directive: 'include' or 'import' |
| TODO(dcheng): In the future, this may need to support C++ modules. |
| |
| preamble: Any comment lines that precede this include line, e.g.: |
| |
| // This is a preamble comment |
| // for a header file. |
| #include <windows.h> |
| |
| would have a preamble of |
| |
| ['// This is a preamble comment', '// for a header file.']. |
| |
| inline_comment: Any comment that comes after the #include on the same line, |
| e.g. |
| |
| #include <windows.h> // For CreateWindowExW() |
| |
| would be parsed with an inline comment of ' For CreateWindowExW'. |
| |
| header_type: The header type corresponding to decorated_name as determined |
| by ClassifyHeader(). |
| |
| is_primary_header: True if this is the primary related header of a C++ |
| implementation file. Any primary header will be sorted to the top in its |
| own separate block. |
| """ |
| |
| def __init__(self, decorated_name, directive, preamble, inline_comment): |
| self.decorated_name= decorated_name |
| assert directive=='include'or directive=='import' |
| self.directive= directive |
| self.preamble= preamble |
| self.inline_comment= inline_comment |
| self.header_type=ClassifyHeader(decorated_name) |
| assert self.header_type!= _HEADER_TYPE_INVALID |
| self.is_primary_header=False |
| |
| def __repr__(self): |
| return str((self.decorated_name, self.directive, self.preamble, |
| self.inline_comment, self.header_type, self.is_primary_header)) |
| |
| defShouldInsertNewline(self, previous_include): |
| # Per the Google C++ style guide, different blocks of headers should be |
| # separated by an empty line. |
| return(self.is_primary_header!= previous_include.is_primary_header |
| or self.header_type!= previous_include.header_type) |
| |
| defToSource(self): |
| """Generates a C++ source representation of this include.""" |
| source=[] |
| source.extend(self.preamble) |
| include_line='#%s %s'%(self.directive, self.decorated_name) |
| if self.inline_comment: |
| include_line= include_line+' //'+ self.inline_comment |
| source.append(include_line) |
| return[line.rstrip()for linein source] |
| |
| |
| defParseIncludes(lines): |
| """Parses lines into a list of Include objects. Returns None on failure. |
| |
| Args: |
| lines: A list of strings representing C++ source text. |
| |
| Returns: |
| A list of Include objects representing the parsed input lines, or None if |
| the input lines could not be parsed. |
| """ |
| includes=[] |
| preamble=[] |
| for linein lines: |
| if _EMPTY_LINE_RE.match(line): |
| if preamble: |
| # preamble contents are flushed when an #include directive is matched. |
| # If preamble is non-empty, that means there is a preamble separated |
| # from its #include directive by at least one newline. Just give up, |
| # since the sorter has no idea how to preserve structure in this case. |
| returnNone |
| continue |
| m= _INCLUDE_RE.match(line) |
| ifnot m: |
| preamble.append(line) |
| continue |
| includes.append(Include(m.group(2), m.group(1), preamble, m.group(3))) |
| preamble=[] |
| # In theory, the caller should never pass a list of lines with a dangling |
| # preamble. But there's a test case that exercises this, and just in case it |
| # actually happens, fail a bit more gracefully. |
| if preamble: |
| returnNone |
| return includes |
| |
| |
| def_DecomposePath(filename): |
| """Decomposes a filename into a list of directories and the basename. |
| |
| Args: |
| filename: A filename! |
| |
| Returns: |
| A tuple of a list of directories and a string basename. |
| """ |
| dirs=[] |
| dirname, basename= os.path.split(filename) |
| while dirname: |
| dirname, last= os.path.split(dirname) |
| dirs.append(last) |
| dirs.reverse() |
| # Remove the extension from the basename. |
| basename= os.path.splitext(basename)[0] |
| return dirs, basename |
| |
| |
| _PLATFORM_SUFFIX=( |
| r'(?:_(?:android|aura|chromeos|fuchsia|ios|linux|mac|ozone|posix|win|x11))?' |
| ) |
| _TEST_SUFFIX= r'(?:_(?:browser|interactive_ui|perf|ui|unit)?test)?' |
| |
| |
| defMarkPrimaryInclude(includes, filename): |
| """Finds the primary header in includes and marks it as such. |
| |
| Per the style guide, if moo.cc's main purpose is to implement or test the |
| functionality in moo.h, moo.h should be ordered first in the includes. |
| |
| Args: |
| includes: A list of Include objects. |
| filename: The filename to use as the basis for finding the primary header. |
| """ |
| # Header files never have a primary include. |
| if filename.endswith('.h'): |
| return |
| |
| # First pass. Looking for exact match primary header. |
| exact_match_primary_header= f'{os.path.splitext(filename)[0]}.h' |
| for includein includes: |
| ifIsUserHeader(include.decorated_name)andUndecoratedName( |
| include.decorated_name)== exact_match_primary_header: |
| include.is_primary_header=True |
| return |
| |
| basis=_DecomposePath(filename) |
| |
| # Second pass. The list of includes is searched in reverse order of length. |
| # Even though matching is fuzzy, moo_posix.h should take precedence over moo.h |
| # when considering moo_posix.cc. |
| includes.sort(key=lambda i:-len(i.decorated_name)) |
| for includein includes: |
| if include.header_type!= _HEADER_TYPE_USER: |
| continue |
| to_test=_DecomposePath(UndecoratedName(include.decorated_name)) |
| |
| # If the basename to test is longer than the basis, just skip it and |
| # continue. moo.c should never match against moo_posix.h. |
| if len(to_test[1])> len(basis[1]): |
| continue |
| |
| # The basename in the two paths being compared need to fuzzily match. |
| # This allows for situations where moo_posix.cc implements the interfaces |
| # defined in moo.h. |
| escaped_basename= re.escape(to_test[1]) |
| ifnot(re.match(escaped_basename+ _PLATFORM_SUFFIX+ _TEST_SUFFIX+'$', |
| basis[1])or |
| re.match(escaped_basename+ _TEST_SUFFIX+ _PLATFORM_SUFFIX+'$', |
| basis[1])): |
| continue |
| |
| # The topmost directory name must match, and the rest of the directory path |
| # should be 'substantially similar'. |
| s= difflib.SequenceMatcher(None, to_test[0], basis[0]) |
| first_matched=False |
| total_matched=0 |
| for matchin s.get_matching_blocks(): |
| if total_matched==0and match.a==0and match.b==0: |
| first_matched=True |
| total_matched+= match.size |
| |
| ifnot first_matched: |
| continue |
| |
| # 'Substantially similar' is defined to be: |
| # - no more than two differences |
| # - at least one match besides the topmost directory |
| total_differences= abs(total_matched- |
| len(to_test[0]))+ abs(total_matched- |
| len(basis[0])) |
| # Note: total_differences != 0 is mainly intended to allow more succinct |
| # tests (otherwise tests with just a basename would always trip the |
| # total_matched < 2 check). |
| if total_differences!=0and(total_differences>2or total_matched<2): |
| continue |
| |
| include.is_primary_header=True |
| return |
| |
| |
| defSerializeIncludes(includes): |
| """Turns includes back into the corresponding C++ source text. |
| |
| Args: |
| includes: a list of Include objects. |
| |
| Returns: |
| A list of strings representing C++ source text. |
| """ |
| source=[] |
| |
| # LINT.IfChange(winheader) |
| # Headers that are sorted above others to prevent inclusion order issues. |
| # NOTE: The order of these headers is the sort key and will be the order in |
| # the output file. It should be set to match whatever clang-format will do. |
| special_headers=[ |
| # Listed first because it must be before initguid.h in the block below. |
| '<objbase.h>', |
| |
| # Alphabetized block that don't matter relative to each other, but need to |
| # be included before any instance of the listed other header. These other |
| # listed headers are non-exhaustive examples. |
| '<atlbase.h>',# atlapp.h |
| '<initguid.h>',# emi.h |
| '<mmdeviceapi.h>',# functiondiscoverykeys_devpkey.h |
| '<ocidl.h>',# commdlg.h |
| '<ole2.h>',# intshcut.h |
| '<shobjidl.h>',# propkey.h |
| '<tchar.h>',# tpcshrd.h |
| '<unknwn.h>',# intshcut.h |
| '<windows.h>',# hidclass.h, memoryapi.h, ncrypt.h, shellapi.h, |
| # versionhelpers.h, winbase.h, etc. |
| '<winsock2.h>',# ws2tcpip.h |
| '<winternl.h>',# ntsecapi.h; also needs `#define _NTDEF_` |
| '<ws2tcpip.h>',# iphlpapi.h |
| ] |
| |
| # LINT.ThenChange(/.clang-format:winheader) |
| |
| # Ensure that headers are sorted as follows: |
| # |
| # 1. The primary header, if any, appears first. |
| # 2. All headers of the same type (e.g. C system, C++ system headers, et |
| # cetera) are grouped contiguously. |
| # 3. Any special sorting rules needed within each group for satisfying |
| # platform header idiosyncrasies. In practice, this only applies to C |
| # system headers. |
| # 4. The remaining headers without special sorting rules are sorted |
| # lexicographically. |
| # |
| # The for loop below that outputs the actual source text depends on #2 above |
| # to insert newlines between different groups of headers. |
| defSortKey(include): |
| defSpecialSortKey(include): |
| lower_name= include.decorated_name.lower() |
| for iin range(len(special_headers)): |
| if special_headers[i]== lower_name: |
| return i |
| return len(special_headers) |
| |
| return(not include.is_primary_header, include.header_type, |
| SpecialSortKey(include), include.decorated_name) |
| |
| includes.sort(key=SortKey) |
| |
| # Assume there's always at least one include. |
| previous_include=None |
| for includein includes: |
| if previous_includeand include.ShouldInsertNewline(previous_include): |
| source.append('') |
| source.extend(include.ToSource()) |
| previous_include= include |
| return source |
| |
| |
| defAddHeaderToSource(filename, source, decorated_name, remove=False): |
| """Adds or removes the specified header into/from the source text, if needed. |
| |
| Args: |
| filename: The name of the source file. |
| source: A string containing the contents of the source file. |
| decorated_name: The decorated name of the header to add or remove. |
| remove: If true, remove instead of adding. |
| |
| Returns: |
| None if no changes are needed or the modified source text otherwise. |
| """ |
| lines= source.splitlines() |
| begin, end=FindIncludes(lines) |
| |
| # No #includes in this file. Just give up. |
| # TODO(dcheng): Be more clever and insert it after the file-level comment or |
| # include guard as appropriate. |
| if begin<0: |
| print(f'Skipping {filename}: unable to find includes!') |
| returnNone |
| |
| includes=ParseIncludes(lines[begin:end]) |
| ifnot includes: |
| print(f'Skipping {filename}: unable to parse includes!') |
| returnNone |
| |
| if remove: |
| for iin includes: |
| if decorated_name== i.decorated_name: |
| includes.remove(i) |
| break |
| else: |
| print(f'Skipping {filename}: unable to find {decorated_name}') |
| returnNone |
| else: |
| if decorated_namein[i.decorated_namefor iin includes]: |
| # Nothing to do. |
| print(f'Skipping {filename}: no changes required!') |
| returnNone |
| else: |
| includes.append(Include(decorated_name,'include',[],None)) |
| |
| MarkPrimaryInclude(includes, filename) |
| |
| lines[begin:end]=SerializeIncludes(includes) |
| lines.append('')# To avoid eating the newline at the end of the file. |
| return'\n'.join(lines) |
| |
| |
| def main(): |
| parser= argparse.ArgumentParser( |
| description='Mass add (or remove) a new header into a bunch of files.') |
| parser.add_argument( |
| '--header', |
| help='The decorated filename of the header to insert (e.g. "a" or <a>)', |
| required=True) |
| parser.add_argument('--remove', |
| help='Remove the header file instead of adding it', |
| action='store_true') |
| parser.add_argument('files', nargs='+') |
| args= parser.parse_args() |
| ifClassifyHeader(args.header)== _HEADER_TYPE_INVALID: |
| print('--header argument must be a decorated filename, e.g.') |
| print(' --header "<utility>"') |
| print('or') |
| print(' --header \'"moo.h"\'') |
| return1 |
| operation='Removing'if args.removeelse'Inserting' |
| print(f'{operation} #include {args.header}...') |
| for filenamein args.files: |
| with open(filename,'r')as f: |
| new_source=AddHeaderToSource(os.path.normpath(filename), f.read(), |
| args.header, args.remove) |
| ifnot new_source: |
| continue |
| with open(filename,'w', newline='\n')as f: |
| f.write(new_source) |
| |
| |
| if __name__=='__main__': |
| sys.exit(main()) |