Movatterモバイル変換

chromium /chromium /src /main /. /tools /clang /blink_gc_plugin /process-graph.py

blob: 4f5f4ce7591c5cb73db1606dadd15a10cfb92f51 [file] [log] [blame]

	#!/usr/bin/env python3
	# Copyright 2014 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	from __future__import print_function
	import argparse, os, sys, json, subprocess, pickle

	try:
	fromStringIOimportStringIO# Python 2
	except:
	from ioimportStringIO

	parser= argparse.ArgumentParser(
	description=
	"Process the Blink points-to graph generated by the Blink GC plugin.")

	parser.add_argument(
	'-', dest='use_stdin', action='store_true',
	help='Read JSON graph files from stdin')

	parser.add_argument(
	'-c','--detect-cycles', action='store_true',
	help='Detect cycles containing GC roots')

	parser.add_argument(
	'-s','--print-stats', action='store_true',
	help='Statistics about ref-counted and traced objects')

	parser.add_argument(
	'-v','--verbose', action='store_true',
	help='Verbose output')

	parser.add_argument(
	'--ignore-cycles', default=None, metavar='FILE',
	help='File with cycles to ignore')

	parser.add_argument(
	'--ignore-classes', nargs='*', default=[], metavar='CLASS',
	help='Classes to ignore when detecting cycles')

	parser.add_argument(
	'--pickle-graph', default=None, metavar='FILE',
	help='File to read/save the graph from/to')

	parser.add_argument(
	'files', metavar='FILE_OR_DIR', nargs='*', default=[],
	help='JSON graph files or directories containing them')

	# Command line args after parsing.
	args=None

	# Map from node labels to nodes.
	graph={}

	# Set of root nodes.
	roots=[]

	# List of cycles to ignore.
	ignored_cycles=[]

	# Global flag to determine exit code.
	global_reported_error=False

	try:
	# Python3 remove sys.maxint.
	maxint= sys.maxint
	exceptAttributeError:
	# Also see https://stackoverflow.com/a/13795777/4052492.
	maxint= sys.maxsize


	def set_reported_error(value):
	global global_reported_error
	global_reported_error= value

	def reported_error():
	return global_reported_error

	def log(msg):
	if args.verbose:
	print(msg)


	global_inc_copy=0
	def inc_copy():
	global global_inc_copy
	global_inc_copy+=1

	def get_node(name):
	return graph.setdefault(name,Node(name))

	ptr_types=('raw','ref','mem')

	def inc_ptr(dst, ptr):
	if ptrin ptr_types:
	node= graph.get(dst)
	ifnot node:return
	node.counts[ptr]+=1

	def add_counts(s1, s2):
	for(k, v)in s2.iteritems():
	s1[k]+= s2[k]

	# Representation of graph nodes. Basically a map of directed edges.
	classNode:
	def __init__(self, name):
	self.name= name
	self.edges={}
	self.reset()
	def __repr__(self):
	return"%s(%s) %s"%(self.name, self.visited, self.edges)
	def update_node(self, decl):
	# Currently we don't track any node info besides its edges.
	pass
	def update_edge(self, e):
	new_edge=Edge(**e)
	edge= self.edges.get(new_edge.key)
	if edge:
	# If an edge exist, its kind is the strongest of the two.
	edge.kind= max(edge.kind, new_edge.kind)
	else:
	self.edges[new_edge.key]= new_edge
	def super_edges(self):
	return[efor ein self.edges.values()if e.is_super()]

	def subclass_edges(self):
	return[efor ein self.edges.values()if e.is_subclass()]

	def reset(self):
	self.cost= maxint
	self.visited=False
	self.path=None
	self.counts={}
	for ptrin ptr_types:
	self.counts[ptr]=0
	def update_counts(self):
	for ein self.edges.values():
	inc_ptr(e.dst, e.ptr)

	# Representation of directed graph edges.
	classEdge:
	def __init__(self,**decl):
	self.src= decl['src']
	self.dst= decl['dst']
	self.lbl= decl['lbl']
	self.ptr= decl['ptr']
	self.kind= decl['kind']# 0 = weak, 1 = strong, 2 = root
	self.loc= decl['loc']
	# The label does not uniquely determine an edge from a node. We
	# define the semi-unique key to be the concatenation of the
	# label and dst name. This is sufficient to track the strongest
	# edge to a particular type. For example, if the field A::m_f
	# has type HashMap<WeakMember<B>, Member<B>> we will have a
	# strong edge with key m_f#B from A to B.
	self.key='%s#%s'%(self.lbl, self.dst)
	def __repr__(self):
	return'%s (%s) => %s'%(self.src, self.lbl, self.dst)
	def is_root(self):
	return self.kind==2
	def is_weak(self):
	return self.kind==0
	def keeps_alive(self):
	return self.kind>0
	def is_subclass(self):
	return self.lbl.startswith('<subclass>')
	def is_super(self):
	return self.lbl.startswith('<super>')

	def parse_file(filename):
	obj= json.load(open(filename))
	return obj

	def build_graphs_in_dir(dirname):
	# TODO: Use plateform independent code, eg, os.walk
	files= subprocess.check_output(
	['find', dirname,'-name','*.graph.json']).split('\n')
	log("Found %d files"% len(files))
	for fin files:
	f.strip()
	if len(f)<1:
	continue
	build_graph(f)

	def build_graph(filename):
	for declin parse_file(filename):
	if'name'in decl:
	# Add/update a node entry
	name= decl['name']
	node= get_node(name)
	node.update_node(decl)
	else:
	# Add/update an edge entry
	name= decl['src']
	node= get_node(name)
	node.update_edge(decl)

	# Copy all non-weak edges from super classes to their subclasses.
	# This causes all fields of a super to be considered fields of a
	# derived class without tranitively relating derived classes with
	# each other. For example, if B <: A, C <: A, and for some D, D => B,
	# we don't want that to entail that D => C.
	def copy_super_edges(edge):
	if edge.is_weak()ornot edge.is_super():
	return
	inc_copy()
	# Make the super-class edge weak (prohibits processing twice).
	edge.kind=0
	# If the super class is not in our graph exit early.
	super_node= graph.get(edge.dst)
	if super_nodeisNone:return
	# Recursively copy all super-class edges.
	for ein super_node.super_edges():
	copy_super_edges(e)
	# Copy strong super-class edges (ignoring sub-class edges) to the sub class.
	sub_node= graph[edge.src]
	for ein super_node.edges.values():
	if e.keeps_alive()andnot e.is_subclass():
	new_edge=Edge(
	src= sub_node.name,
	dst= e.dst,
	lbl='%s <: %s'%(super_node.name, e.lbl),
	ptr= e.ptr,
	kind= e.kind,
	loc= e.loc,
	)
	sub_node.edges[new_edge.key]= new_edge
	# Add a strong sub-class edge.
	sub_edge=Edge(
	src= super_node.name,
	dst= sub_node.name,
	lbl='<subclass>',
	ptr= edge.ptr,
	kind=1,
	loc= edge.loc,
	)
	super_node.edges[sub_edge.key]= sub_edge

	def complete_graph():
	for nodein graph.values():
	for edgein node.super_edges():
	copy_super_edges(edge)
	for edgein node.edges.values():
	if edge.is_root():
	roots.append(edge)
	log("Copied edges down <super> edges for %d graph nodes"% global_inc_copy)

	def reset_graph():
	for nin graph.values():
	n.reset()

	def shortest_path(start, end):
	start.cost=0
	minlist=[start]
	while len(minlist)>0:
	minlist.sort(key=lambda n:-n.cost)
	current= minlist.pop()
	current.visited=True
	if current== endor current.cost>= end.cost+1:
	return
	for ein current.edges.values():
	ifnot e.keeps_alive():
	continue
	dst= graph.get(e.dst)
	if dstisNoneor dst.visited:
	continue
	if current.cost< dst.cost:
	dst.cost= current.cost+1
	dst.path= e
	minlist.append(dst)

	def detect_cycles():
	for root_edgein roots:
	reset_graph()
	# Mark ignored classes as already visited
	for ignorein args.ignore_classes:
	name= ignore.find("::")>0and ignoreor("blink::"+ ignore)
	node= graph.get(name)
	if node:
	node.visited=True
	src= graph[root_edge.src]
	dst= graph.get(root_edge.dst)
	if src.visited:
	continue
	if root_edge.dst=="WTF::String":
	continue
	if dstisNone:
	print("\nPersistent root to incomplete destination object:")
	print(root_edge)
	set_reported_error(True)
	continue
	# Find the shortest path from the root target (dst) to its host (src)
	shortest_path(dst, src)
	if src.cost< maxint:
	report_cycle(root_edge)

	def is_ignored_cycle(cycle):
	for blockin ignored_cycles:
	if block_match(cycle, block):
	returnTrue

	def block_match(b1, b2):
	if len(b1)!= len(b2):
	returnFalse
	for(l1, l2)in zip(b1, b2):
	if l1!= l2:
	returnFalse
	returnTrue

	def report_cycle(root_edge):
	dst= graph[root_edge.dst]
	path=[]
	edge= root_edge
	dst.path=None
	while edge:
	path.append(edge)
	edge= graph[edge.src].path
	path.append(root_edge)
	path.reverse()
	# Find the max loc length for pretty printing.
	max_loc=0
	for pin path:
	if len(p.loc)> max_loc:
	max_loc= len(p.loc)
	out=StringIO()
	for pin path[:-1]:
	print((p.loc+':').ljust(max_loc+1), p, file=out)
	sout= out.getvalue()
	ifnot is_ignored_cycle(sout):
	print("\nFound a potentially leaking cycle starting from a GC root:\n",
	sout, sep="")
	set_reported_error(True)

	def load_graph():
	global graph
	global roots
	log("Reading graph from pickled file: "+ args.pickle_graph)
	dump= pickle.load(open(args.pickle_graph,'rb'))
	graph= dump[0]
	roots= dump[1]

	def save_graph():
	log("Saving graph to pickle file: "+ args.pickle_graph)
	dump=(graph, roots)
	pickle.dump(dump, open(args.pickle_graph,'wb'))

	def read_ignored_cycles():
	global ignored_cycles
	ifnot args.ignore_cycles:
	return
	log("Reading ignored cycles from file: "+ args.ignore_cycles)
	block=[]
	for lin open(args.ignore_cycles):
	line= l.strip()
	ifnot lineor line.startswith('Found'):
	if len(block)>0:
	ignored_cycles.append(block)
	block=[]
	else:
	block+= l
	if len(block)>0:
	ignored_cycles.append(block)

	gc_bases=(
	'cppgc::GarbageCollected',
	'cppgc::GarbageCollectedMixin',
	)
	ref_bases=(
	'WTF::RefCounted',
	'WTF::ThreadSafeRefCounted',
	)
	gcref_bases=(
	'blink::RefCountedGarbageCollected',
	'blink::ThreadSafeRefCountedGarbageCollected',
	)
	ref_mixins=(
	'blink::EventTarget',
	'blink::EventTargetWithInlineData',
	'blink::ActiveDOMObject',
	)

	def print_stats():
	gcref_managed=[]
	ref_managed=[]
	gc_managed=[]
	hierarchies=[]

	for nodein graph.values():
	node.update_counts()
	for supin node.super_edges():
	if sup.dstin gcref_bases:
	gcref_managed.append(node)
	elif sup.dstin ref_bases:
	ref_managed.append(node)
	elif sup.dstin gc_bases:
	gc_managed.append(node)

	groups=[("GC manged ", gc_managed),
	("ref counted ", ref_managed),
	("in transition", gcref_managed)]
	total= sum([len(g)for(s,g)in groups])
	for(s, g)in groups:
	percent= len(g)*100/ total
	print("%2d%% is %s (%d hierarchies)"%(percent, s, len(g)))

	for basein gcref_managed:
	stats= dict({'classes':0,'ref-mixins':0})
	for ptrin ptr_types: stats[ptr]=0
	hierarchy_stats(base, stats)
	hierarchies.append((base, stats))

	print("\nHierarchies in transition (RefCountedGarbageCollected):")
	hierarchies.sort(key=lambda n:-n[1]['classes'])
	for(node, stats)in hierarchies:
	total= stats['mem']+ stats['ref']+ stats['raw']
	print(
	("%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins"
	%(
	stats['ref']==0and stats['ref-mixins']==0and"*"or" ",
	total==0and100or stats['mem']*100/ total,
	node.name.replace('blink::','').replace(
	'cppgc::subtle::','').replace('cppgc::',''),
	stats['classes'],
	stats['mem'],
	stats['ref'],
	stats['raw'],
	stats['ref-mixins'],
	)))


	def hierarchy_stats(node, stats):
	ifnot node:return
	stats['classes']+=1
	add_counts(stats, node.counts)
	for edgein node.super_edges():
	if edge.dstin ref_mixins:
	stats['ref-mixins']+=1
	for edgein node.subclass_edges():
	hierarchy_stats(graph.get(edge.dst), stats)

	def main():
	global args
	args= parser.parse_args()
	ifnot(args.detect_cyclesor args.print_stats):
	print("Please select an operation to perform (eg, -c to detect cycles)")
	parser.print_help()
	return1
	if args.pickle_graphand os.path.isfile(args.pickle_graph):
	load_graph()
	else:
	if args.use_stdin:
	log("Reading files from stdin")
	for fin sys.stdin:
	build_graph(f.strip())
	else:
	log("Reading files and directories from command line")
	if len(args.files)==0:
	print("Please provide files or directores for building the graph")
	parser.print_help()
	return1
	for fin args.files:
	if os.path.isdir(f):
	log("Building graph from files in directory: "+ f)
	build_graphs_in_dir(f)
	else:
	log("Building graph from file: "+ f)
	build_graph(f)
	log("Completing graph construction (%d graph nodes)"% len(graph))
	complete_graph()
	if args.pickle_graph:
	save_graph()
	if args.detect_cycles:
	read_ignored_cycles()
	log("Detecting cycles containg GC roots")
	detect_cycles()
	if args.print_stats:
	log("Printing statistics")
	print_stats()
	if reported_error():
	return1
	return0

	if __name__=='__main__':
	sys.exit(main())