| #!/usr/bin/env python3 |
| # Copyright 2014 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| from __future__import print_function |
| import argparse, os, sys, json, subprocess, pickle |
| |
| try: |
| fromStringIOimportStringIO# Python 2 |
| except: |
| from ioimportStringIO |
| |
| parser= argparse.ArgumentParser( |
| description= |
| "Process the Blink points-to graph generated by the Blink GC plugin.") |
| |
| parser.add_argument( |
| '-', dest='use_stdin', action='store_true', |
| help='Read JSON graph files from stdin') |
| |
| parser.add_argument( |
| '-c','--detect-cycles', action='store_true', |
| help='Detect cycles containing GC roots') |
| |
| parser.add_argument( |
| '-s','--print-stats', action='store_true', |
| help='Statistics about ref-counted and traced objects') |
| |
| parser.add_argument( |
| '-v','--verbose', action='store_true', |
| help='Verbose output') |
| |
| parser.add_argument( |
| '--ignore-cycles', default=None, metavar='FILE', |
| help='File with cycles to ignore') |
| |
| parser.add_argument( |
| '--ignore-classes', nargs='*', default=[], metavar='CLASS', |
| help='Classes to ignore when detecting cycles') |
| |
| parser.add_argument( |
| '--pickle-graph', default=None, metavar='FILE', |
| help='File to read/save the graph from/to') |
| |
| parser.add_argument( |
| 'files', metavar='FILE_OR_DIR', nargs='*', default=[], |
| help='JSON graph files or directories containing them') |
| |
| # Command line args after parsing. |
| args=None |
| |
| # Map from node labels to nodes. |
| graph={} |
| |
| # Set of root nodes. |
| roots=[] |
| |
| # List of cycles to ignore. |
| ignored_cycles=[] |
| |
| # Global flag to determine exit code. |
| global_reported_error=False |
| |
| try: |
| # Python3 remove sys.maxint. |
| maxint= sys.maxint |
| exceptAttributeError: |
| # Also see https://stackoverflow.com/a/13795777/4052492. |
| maxint= sys.maxsize |
| |
| |
| def set_reported_error(value): |
| global global_reported_error |
| global_reported_error= value |
| |
| def reported_error(): |
| return global_reported_error |
| |
| def log(msg): |
| if args.verbose: |
| print(msg) |
| |
| |
| global_inc_copy=0 |
| def inc_copy(): |
| global global_inc_copy |
| global_inc_copy+=1 |
| |
| def get_node(name): |
| return graph.setdefault(name,Node(name)) |
| |
| ptr_types=('raw','ref','mem') |
| |
| def inc_ptr(dst, ptr): |
| if ptrin ptr_types: |
| node= graph.get(dst) |
| ifnot node:return |
| node.counts[ptr]+=1 |
| |
| def add_counts(s1, s2): |
| for(k, v)in s2.iteritems(): |
| s1[k]+= s2[k] |
| |
| # Representation of graph nodes. Basically a map of directed edges. |
| classNode: |
| def __init__(self, name): |
| self.name= name |
| self.edges={} |
| self.reset() |
| def __repr__(self): |
| return"%s(%s) %s"%(self.name, self.visited, self.edges) |
| def update_node(self, decl): |
| # Currently we don't track any node info besides its edges. |
| pass |
| def update_edge(self, e): |
| new_edge=Edge(**e) |
| edge= self.edges.get(new_edge.key) |
| if edge: |
| # If an edge exist, its kind is the strongest of the two. |
| edge.kind= max(edge.kind, new_edge.kind) |
| else: |
| self.edges[new_edge.key]= new_edge |
| def super_edges(self): |
| return[efor ein self.edges.values()if e.is_super()] |
| |
| def subclass_edges(self): |
| return[efor ein self.edges.values()if e.is_subclass()] |
| |
| def reset(self): |
| self.cost= maxint |
| self.visited=False |
| self.path=None |
| self.counts={} |
| for ptrin ptr_types: |
| self.counts[ptr]=0 |
| def update_counts(self): |
| for ein self.edges.values(): |
| inc_ptr(e.dst, e.ptr) |
| |
| # Representation of directed graph edges. |
| classEdge: |
| def __init__(self,**decl): |
| self.src= decl['src'] |
| self.dst= decl['dst'] |
| self.lbl= decl['lbl'] |
| self.ptr= decl['ptr'] |
| self.kind= decl['kind']# 0 = weak, 1 = strong, 2 = root |
| self.loc= decl['loc'] |
| # The label does not uniquely determine an edge from a node. We |
| # define the semi-unique key to be the concatenation of the |
| # label and dst name. This is sufficient to track the strongest |
| # edge to a particular type. For example, if the field A::m_f |
| # has type HashMap<WeakMember<B>, Member<B>> we will have a |
| # strong edge with key m_f#B from A to B. |
| self.key='%s#%s'%(self.lbl, self.dst) |
| def __repr__(self): |
| return'%s (%s) => %s'%(self.src, self.lbl, self.dst) |
| def is_root(self): |
| return self.kind==2 |
| def is_weak(self): |
| return self.kind==0 |
| def keeps_alive(self): |
| return self.kind>0 |
| def is_subclass(self): |
| return self.lbl.startswith('<subclass>') |
| def is_super(self): |
| return self.lbl.startswith('<super>') |
| |
| def parse_file(filename): |
| obj= json.load(open(filename)) |
| return obj |
| |
| def build_graphs_in_dir(dirname): |
| # TODO: Use plateform independent code, eg, os.walk |
| files= subprocess.check_output( |
| ['find', dirname,'-name','*.graph.json']).split('\n') |
| log("Found %d files"% len(files)) |
| for fin files: |
| f.strip() |
| if len(f)<1: |
| continue |
| build_graph(f) |
| |
| def build_graph(filename): |
| for declin parse_file(filename): |
| if'name'in decl: |
| # Add/update a node entry |
| name= decl['name'] |
| node= get_node(name) |
| node.update_node(decl) |
| else: |
| # Add/update an edge entry |
| name= decl['src'] |
| node= get_node(name) |
| node.update_edge(decl) |
| |
| # Copy all non-weak edges from super classes to their subclasses. |
| # This causes all fields of a super to be considered fields of a |
| # derived class without tranitively relating derived classes with |
| # each other. For example, if B <: A, C <: A, and for some D, D => B, |
| # we don't want that to entail that D => C. |
| def copy_super_edges(edge): |
| if edge.is_weak()ornot edge.is_super(): |
| return |
| inc_copy() |
| # Make the super-class edge weak (prohibits processing twice). |
| edge.kind=0 |
| # If the super class is not in our graph exit early. |
| super_node= graph.get(edge.dst) |
| if super_nodeisNone:return |
| # Recursively copy all super-class edges. |
| for ein super_node.super_edges(): |
| copy_super_edges(e) |
| # Copy strong super-class edges (ignoring sub-class edges) to the sub class. |
| sub_node= graph[edge.src] |
| for ein super_node.edges.values(): |
| if e.keeps_alive()andnot e.is_subclass(): |
| new_edge=Edge( |
| src= sub_node.name, |
| dst= e.dst, |
| lbl='%s <: %s'%(super_node.name, e.lbl), |
| ptr= e.ptr, |
| kind= e.kind, |
| loc= e.loc, |
| ) |
| sub_node.edges[new_edge.key]= new_edge |
| # Add a strong sub-class edge. |
| sub_edge=Edge( |
| src= super_node.name, |
| dst= sub_node.name, |
| lbl='<subclass>', |
| ptr= edge.ptr, |
| kind=1, |
| loc= edge.loc, |
| ) |
| super_node.edges[sub_edge.key]= sub_edge |
| |
| def complete_graph(): |
| for nodein graph.values(): |
| for edgein node.super_edges(): |
| copy_super_edges(edge) |
| for edgein node.edges.values(): |
| if edge.is_root(): |
| roots.append(edge) |
| log("Copied edges down <super> edges for %d graph nodes"% global_inc_copy) |
| |
| def reset_graph(): |
| for nin graph.values(): |
| n.reset() |
| |
| def shortest_path(start, end): |
| start.cost=0 |
| minlist=[start] |
| while len(minlist)>0: |
| minlist.sort(key=lambda n:-n.cost) |
| current= minlist.pop() |
| current.visited=True |
| if current== endor current.cost>= end.cost+1: |
| return |
| for ein current.edges.values(): |
| ifnot e.keeps_alive(): |
| continue |
| dst= graph.get(e.dst) |
| if dstisNoneor dst.visited: |
| continue |
| if current.cost< dst.cost: |
| dst.cost= current.cost+1 |
| dst.path= e |
| minlist.append(dst) |
| |
| def detect_cycles(): |
| for root_edgein roots: |
| reset_graph() |
| # Mark ignored classes as already visited |
| for ignorein args.ignore_classes: |
| name= ignore.find("::")>0and ignoreor("blink::"+ ignore) |
| node= graph.get(name) |
| if node: |
| node.visited=True |
| src= graph[root_edge.src] |
| dst= graph.get(root_edge.dst) |
| if src.visited: |
| continue |
| if root_edge.dst=="WTF::String": |
| continue |
| if dstisNone: |
| print("\nPersistent root to incomplete destination object:") |
| print(root_edge) |
| set_reported_error(True) |
| continue |
| # Find the shortest path from the root target (dst) to its host (src) |
| shortest_path(dst, src) |
| if src.cost< maxint: |
| report_cycle(root_edge) |
| |
| def is_ignored_cycle(cycle): |
| for blockin ignored_cycles: |
| if block_match(cycle, block): |
| returnTrue |
| |
| def block_match(b1, b2): |
| if len(b1)!= len(b2): |
| returnFalse |
| for(l1, l2)in zip(b1, b2): |
| if l1!= l2: |
| returnFalse |
| returnTrue |
| |
| def report_cycle(root_edge): |
| dst= graph[root_edge.dst] |
| path=[] |
| edge= root_edge |
| dst.path=None |
| while edge: |
| path.append(edge) |
| edge= graph[edge.src].path |
| path.append(root_edge) |
| path.reverse() |
| # Find the max loc length for pretty printing. |
| max_loc=0 |
| for pin path: |
| if len(p.loc)> max_loc: |
| max_loc= len(p.loc) |
| out=StringIO() |
| for pin path[:-1]: |
| print((p.loc+':').ljust(max_loc+1), p, file=out) |
| sout= out.getvalue() |
| ifnot is_ignored_cycle(sout): |
| print("\nFound a potentially leaking cycle starting from a GC root:\n", |
| sout, sep="") |
| set_reported_error(True) |
| |
| def load_graph(): |
| global graph |
| global roots |
| log("Reading graph from pickled file: "+ args.pickle_graph) |
| dump= pickle.load(open(args.pickle_graph,'rb')) |
| graph= dump[0] |
| roots= dump[1] |
| |
| def save_graph(): |
| log("Saving graph to pickle file: "+ args.pickle_graph) |
| dump=(graph, roots) |
| pickle.dump(dump, open(args.pickle_graph,'wb')) |
| |
| def read_ignored_cycles(): |
| global ignored_cycles |
| ifnot args.ignore_cycles: |
| return |
| log("Reading ignored cycles from file: "+ args.ignore_cycles) |
| block=[] |
| for lin open(args.ignore_cycles): |
| line= l.strip() |
| ifnot lineor line.startswith('Found'): |
| if len(block)>0: |
| ignored_cycles.append(block) |
| block=[] |
| else: |
| block+= l |
| if len(block)>0: |
| ignored_cycles.append(block) |
| |
| gc_bases=( |
| 'cppgc::GarbageCollected', |
| 'cppgc::GarbageCollectedMixin', |
| ) |
| ref_bases=( |
| 'WTF::RefCounted', |
| 'WTF::ThreadSafeRefCounted', |
| ) |
| gcref_bases=( |
| 'blink::RefCountedGarbageCollected', |
| 'blink::ThreadSafeRefCountedGarbageCollected', |
| ) |
| ref_mixins=( |
| 'blink::EventTarget', |
| 'blink::EventTargetWithInlineData', |
| 'blink::ActiveDOMObject', |
| ) |
| |
| def print_stats(): |
| gcref_managed=[] |
| ref_managed=[] |
| gc_managed=[] |
| hierarchies=[] |
| |
| for nodein graph.values(): |
| node.update_counts() |
| for supin node.super_edges(): |
| if sup.dstin gcref_bases: |
| gcref_managed.append(node) |
| elif sup.dstin ref_bases: |
| ref_managed.append(node) |
| elif sup.dstin gc_bases: |
| gc_managed.append(node) |
| |
| groups=[("GC manged ", gc_managed), |
| ("ref counted ", ref_managed), |
| ("in transition", gcref_managed)] |
| total= sum([len(g)for(s,g)in groups]) |
| for(s, g)in groups: |
| percent= len(g)*100/ total |
| print("%2d%% is %s (%d hierarchies)"%(percent, s, len(g))) |
| |
| for basein gcref_managed: |
| stats= dict({'classes':0,'ref-mixins':0}) |
| for ptrin ptr_types: stats[ptr]=0 |
| hierarchy_stats(base, stats) |
| hierarchies.append((base, stats)) |
| |
| print("\nHierarchies in transition (RefCountedGarbageCollected):") |
| hierarchies.sort(key=lambda n:-n[1]['classes']) |
| for(node, stats)in hierarchies: |
| total= stats['mem']+ stats['ref']+ stats['raw'] |
| print( |
| ("%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" |
| %( |
| stats['ref']==0and stats['ref-mixins']==0and"*"or" ", |
| total==0and100or stats['mem']*100/ total, |
| node.name.replace('blink::','').replace( |
| 'cppgc::subtle::','').replace('cppgc::',''), |
| stats['classes'], |
| stats['mem'], |
| stats['ref'], |
| stats['raw'], |
| stats['ref-mixins'], |
| ))) |
| |
| |
| def hierarchy_stats(node, stats): |
| ifnot node:return |
| stats['classes']+=1 |
| add_counts(stats, node.counts) |
| for edgein node.super_edges(): |
| if edge.dstin ref_mixins: |
| stats['ref-mixins']+=1 |
| for edgein node.subclass_edges(): |
| hierarchy_stats(graph.get(edge.dst), stats) |
| |
| def main(): |
| global args |
| args= parser.parse_args() |
| ifnot(args.detect_cyclesor args.print_stats): |
| print("Please select an operation to perform (eg, -c to detect cycles)") |
| parser.print_help() |
| return1 |
| if args.pickle_graphand os.path.isfile(args.pickle_graph): |
| load_graph() |
| else: |
| if args.use_stdin: |
| log("Reading files from stdin") |
| for fin sys.stdin: |
| build_graph(f.strip()) |
| else: |
| log("Reading files and directories from command line") |
| if len(args.files)==0: |
| print("Please provide files or directores for building the graph") |
| parser.print_help() |
| return1 |
| for fin args.files: |
| if os.path.isdir(f): |
| log("Building graph from files in directory: "+ f) |
| build_graphs_in_dir(f) |
| else: |
| log("Building graph from file: "+ f) |
| build_graph(f) |
| log("Completing graph construction (%d graph nodes)"% len(graph)) |
| complete_graph() |
| if args.pickle_graph: |
| save_graph() |
| if args.detect_cycles: |
| read_ignored_cycles() |
| log("Detecting cycles containg GC roots") |
| detect_cycles() |
| if args.print_stats: |
| log("Printing statistics") |
| print_stats() |
| if reported_error(): |
| return1 |
| return0 |
| |
| if __name__=='__main__': |
| sys.exit(main()) |