1//===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This pass builds a ModuleSummaryIndex object for the module, to be written 10// to bitcode or LLVM assembly. 12//===----------------------------------------------------------------------===// 62#define DEBUG_TYPE "module-summary-analysis" 64// Option to force edges cold which will block importing when the 65// -import-cold-multiplier is set to 0. Useful for debugging. 73cl::desc(
"Force all edges in the function summary to cold"),
76"all-non-critical",
"All non-critical edges."),
81cl::desc(
"File to emit dot graph of new summary into"));
86"Enable MemProf support for summarizing and cloning indirect calls"));
94// Walk through the operands of a given User via worklist iteration and populate 95// the set of GlobalValue references encountered. Invoked either on an 96// Instruction or a GlobalVariable (which walks its initializer). 97// Return true if any of the operands contains blockaddress. This is important 98// to know when computing summary for global var, because if global variable 99// references basic block address we can't import it separately from function 100// containing that basic block. For simplicity we currently don't import such 101// global vars at all. When importing function we aren't interested if any 102// instruction in it takes an address of any basic block, because instruction 103// can only take an address of basic block located in the same function. 104// Set `RefLocalLinkageIFunc` to true if the analyzed value references a 105// local-linkage ifunc. 110bool &RefLocalLinkageIFunc) {
111bool HasBlockAddress =
false;
113if (Visited.
insert(CurUser).second)
116while (!Worklist.
empty()) {
118constauto *CB = dyn_cast<CallBase>(U);
120for (
constauto &OI : U->operands()) {
121constUser *Operand = dyn_cast<User>(OI);
124if (isa<BlockAddress>(Operand)) {
125 HasBlockAddress =
true;
128if (
auto *GV = dyn_cast<GlobalValue>(Operand)) {
129// We have a reference to a global value. This should be added to 130// the reference set unless it is a callee. Callees are handled 131// specially by WriteFunction and are added to a separate list. 132if (!(CB && CB->isCallee(&OI))) {
133// If an ifunc has local linkage, do not add it into ref edges, and 134// sets `RefLocalLinkageIFunc` to true. The referencer is not eligible 135// for import. An ifunc doesn't have summary and ThinLTO cannot 136// promote it; importing the referencer may cause linkage errors. 137if (
auto *GI = dyn_cast_if_present<GlobalIFunc>(GV);
138 GI && GI->hasLocalLinkage()) {
139 RefLocalLinkageIFunc =
true;
142 RefEdges.insert(Index.getOrInsertValueInfo(GV));
146if (Visited.
insert(Operand).second)
154// MaxNumVTableAnnotations is the maximum number of vtables annotated on 159for (
constauto &V : ValueDataArray)
160 RefEdges.insert(Index.getOrInsertValueInfo(
/* VTableGUID = */ 163return HasBlockAddress;
169return CalleeInfo::HotnessType::Unknown;
171return CalleeInfo::HotnessType::Hot;
173return CalleeInfo::HotnessType::Cold;
174return CalleeInfo::HotnessType::None;
181/// Determine whether this call has all constant integer arguments (excluding 182/// "this") and summarize it to VCalls or ConstVCalls as appropriate. 188 std::vector<FunctionSummary::ConstVCall>> &ConstVCalls) {
189 std::vector<uint64_t> Args;
190// Start from the second argument to skip the "this" pointer. 192auto *CI = dyn_cast<ConstantInt>(Arg);
193if (!CI || CI->getBitWidth() > 64) {
194 VCalls.insert({
Guid, Call.Offset});
197 Args.push_back(CI->getZExtValue());
199 ConstVCalls.insert({{
Guid, Call.Offset}, std::move(Args)});
202/// If this intrinsic call requires that we add information to the function 203/// summary, do so via the non-constant reference arguments. 208 &TypeTestAssumeVCalls,
210 &TypeCheckedLoadVCalls,
212 std::vector<FunctionSummary::ConstVCall>>
213 &TypeTestAssumeConstVCalls,
215 std::vector<FunctionSummary::ConstVCall>>
216 &TypeCheckedLoadConstVCalls,
219case Intrinsic::type_test:
220case Intrinsic::public_type_test: {
221auto *TypeMDVal = cast<MetadataAsValue>(CI->
getArgOperand(1));
222auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
227// Produce a summary from type.test intrinsics. We only summarize type.test 228// intrinsics that are used other than by an llvm.assume intrinsic. 229// Intrinsics that are assumed are relevant only to the devirtualization 230// pass, not the type test lowering pass. 232 return !isa<AssumeInst>(CIU.getUser());
235 TypeTests.insert(
Guid);
240for (
auto &Call : DevirtCalls)
242 TypeTestAssumeConstVCalls);
247case Intrinsic::type_checked_load_relative:
248case Intrinsic::type_checked_load: {
249auto *TypeMDVal = cast<MetadataAsValue>(CI->
getArgOperand(2));
250auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
258bool HasNonCallUses =
false;
260 HasNonCallUses, CI, DT);
261// Any non-call uses of the result of llvm.type.checked.load will 262// prevent us from optimizing away the llvm.type.test. 264 TypeTests.insert(
Guid);
265for (
auto &Call : DevirtCalls)
267 TypeCheckedLoadConstVCalls);
277if (
constauto *LI = dyn_cast<LoadInst>(
I))
278return !LI->isVolatile();
284if (
constauto *SI = dyn_cast<StoreInst>(
I))
285return !SI->isVolatile();
290// Returns true if the function definition must be unreachable. 292// Note if this helper function returns true, `F` is guaranteed 293// to be unreachable; if it returns false, `F` might still 294// be unreachable but not covered by this helper function. 296// A function must be unreachable if its entry block ends with an 299return isa<UnreachableInst>(
F.getEntryBlock().getTerminator());
308// Summary not currently supported for anonymous functions, they should 312unsigned NumInsts = 0;
313// Map from callee ValueId to profile count. Used to accumulate profile 314// counts for all static calls to a given callee. 322 TypeTestAssumeVCalls, TypeCheckedLoadVCalls;
324 std::vector<FunctionSummary::ConstVCall>>
325 TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls;
329// Add personality function, prefix data and prologue data to function's ref 331bool HasLocalIFuncCallOrRef =
false;
332findRefEdges(Index, &
F, RefEdges, Visited, HasLocalIFuncCallOrRef);
333 std::vector<const Instruction *> NonVolatileLoads;
334 std::vector<const Instruction *> NonVolatileStores;
336 std::vector<CallsiteInfo> Callsites;
337 std::vector<AllocInfo> Allocs;
343bool HasInlineAsmMaybeReferencingInternal =
false;
344bool HasIndirBranchToBlockAddress =
false;
345bool HasUnknownCall =
false;
348// We don't allow inlining of function with indirect branch to blockaddress. 349// If the blockaddress escapes the function, e.g., via a global variable, 350// inlining may lead to an invalid cross-function reference. So we shouldn't 351// import such function either. 352if (BB.hasAddressTaken()) {
354if (!isa<CallBrInst>(*U)) {
355 HasIndirBranchToBlockAddress =
true;
361if (
I.isDebugOrPseudoInst())
365// Regular LTO module doesn't participate in ThinLTO import, 366// so no reference from it can be read/writeonly, since this 367// would require importing variable as local copy 370// Postpone processing of non-volatile load instructions 373 NonVolatileLoads.push_back(&
I);
377 NonVolatileStores.push_back(&
I);
378// All references from second operand of store (destination address) 379// can be considered write-only if they're not referenced by any 380// non-store instruction. References from first operand of store 381// (stored value) can't be treated either as read- or as write-only 382// so we add them to RefEdges as we do with all other instructions 383// except non-volatile load. 384Value *Stored =
I.getOperand(0);
385if (
auto *GV = dyn_cast<GlobalValue>(Stored))
386// findRefEdges will try to examine GV operands, so instead 387// of calling it we should add GV to RefEdges directly. 388 RefEdges.
insert(Index.getOrInsertValueInfo(GV));
389elseif (
auto *U = dyn_cast<User>(Stored))
390findRefEdges(Index, U, RefEdges, Visited, HasLocalIFuncCallOrRef);
394findRefEdges(Index, &
I, RefEdges, Visited, HasLocalIFuncCallOrRef);
395constauto *CB = dyn_cast<CallBase>(&
I);
402constauto *CI = dyn_cast<CallInst>(&
I);
403// Since we don't know exactly which local values are referenced in inline 404// assembly, conservatively mark the function as possibly referencing 405// a local value from inline assembly to ensure we don't export a 406// reference (which would require renaming and promotion of the 408if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm())
409 HasInlineAsmMaybeReferencingInternal =
true;
411// Compute this once per indirect call. 416auto *CalledValue = CB->getCalledOperand();
417auto *CalledFunction = CB->getCalledFunction();
418if (CalledValue && !CalledFunction) {
419 CalledValue = CalledValue->stripPointerCasts();
420// Stripping pointer casts can reveal a called function. 421 CalledFunction = dyn_cast<Function>(CalledValue);
423// Check if this is an alias to a function. If so, get the 424// called aliasee for the checks below. 425if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
426assert(!CalledFunction &&
"Expected null called function in callsite for alias");
427 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
429// Check if this is a direct call to a known function or a known 430// intrinsic, or an indirect call with profile data. 432if (CI && CalledFunction->isIntrinsic()) {
434 CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls,
435 TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT);
438// We should have named any anonymous globals 439assert(CalledFunction->hasName());
441auto Hotness = ScaledCount ?
getHotness(*ScaledCount, PSI)
442 : CalleeInfo::HotnessType::Unknown;
444 Hotness = CalleeInfo::HotnessType::Cold;
446// Use the original CalledValue, in case it was an alias. We want 447// to record the call edge to the alias in that case. Eventually 448// an alias summary will be created to associate the alias and 450auto &
ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo(
451 cast<GlobalValue>(CalledValue))];
455// Add the relative block frequency to CalleeInfo if there is no profile 457if (BFI !=
nullptr && Hotness == CalleeInfo::HotnessType::Unknown) {
459uint64_t EntryFreq = BFI->getEntryFreq().getFrequency();
463 HasUnknownCall =
true;
464// If F is imported, a local linkage ifunc (e.g. target_clones on a 465// static function) called by F will be cloned. Since summaries don't 466// track ifunc, we do not know implementation functions referenced by 467// the ifunc resolver need to be promoted in the exporter, and we will 468// get linker errors due to cloned declarations for implementation 469// functions. As a simple fix, just mark F as not eligible for import. 470// Non-local ifunc is not cloned and does not have the issue. 471if (
auto *GI = dyn_cast_if_present<GlobalIFunc>(CalledValue))
472if (GI->hasLocalLinkage())
473 HasLocalIFuncCallOrRef =
true;
474// Skip inline assembly calls. 475if (CI && CI->isInlineAsm())
478if (!CalledValue || isa<Constant>(CalledValue))
481// Check if the instruction has a callees metadata. If so, add callees 482// to CallGraphEdges to reflect the references from the metadata, and 483// to enable importing for subsequent indirect call promotion and 485if (
auto *MD =
I.getMetadata(LLVMContext::MD_callees)) {
486for (
constauto &
Op : MD->operands()) {
487Function *Callee = mdconst::extract_or_null<Function>(
Op);
489 CallGraphEdges[Index.getOrInsertValueInfo(Callee)];
493 CandidateProfileData =
496for (
constauto &Candidate : CandidateProfileData)
497 CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)]
498 .updateHotness(
getHotness(Candidate.Count, PSI));
501// Summarize memprof related metadata. This is only needed for ThinLTO. 505// Skip indirect calls if we haven't enabled memprof ICP. 509// Ensure we keep this analysis in sync with the handling in the ThinLTO 510// backend (see MemProfContextDisambiguation::applyImport). Save this call 511// so that we can skip it in checking the reverse case later. 514 CallsThatMayHaveMemprofSummary.
insert(CB);
517// Compute the list of stack ids first (so we can trim them from the stack 520I.getMetadata(LLVMContext::MD_callsite));
521auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
523 std::vector<MIBInfo> MIBs;
524 std::vector<uint64_t> TotalSizes;
525 std::vector<std::vector<ContextTotalSize>> ContextSizeInfos;
526for (
auto &MDOp : MemProfMD->operands()) {
527auto *MIBMD = cast<const MDNode>(MDOp);
532// Collapse out any on the allocation call (inlining). 533for (
auto ContextIter =
535 ContextIter != StackContext.
end(); ++ContextIter) {
536unsigned StackIdIdx = Index.addOrGetStackIdIndex(*ContextIter);
537// If this is a direct recursion, simply skip the duplicate 538// entries. If this is mutual recursion, handling is left to 539// the LTO link analysis client. 540if (StackIdIndices.
empty() || StackIdIndices.
back() != StackIdIdx)
543// If we have context size information, collect it for inclusion in 546if (MIBMD->getNumOperands() > 2) {
547 std::vector<ContextTotalSize> ContextSizes;
548for (
unsignedI = 2;
I < MIBMD->getNumOperands();
I++) {
549MDNode *ContextSizePair = dyn_cast<MDNode>(MIBMD->getOperand(
I));
551uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>(
554uint64_t TS = mdconst::dyn_extract<ConstantInt>(
557 ContextSizes.push_back({FullStackId, TS});
559 ContextSizeInfos.push_back(std::move(ContextSizes));
564 Allocs.push_back(
AllocInfo(std::move(MIBs)));
566if (!ContextSizeInfos.empty()) {
567assert(Allocs.back().MIBs.size() == ContextSizeInfos.size());
568 Allocs.back().ContextSizeInfos = std::move(ContextSizeInfos);
570 }
elseif (!InstCallsite.
empty()) {
572for (
auto StackId : InstCallsite)
573 StackIdIndices.
push_back(Index.addOrGetStackIdIndex(StackId));
575// Use the original CalledValue, in case it was an alias. We want 576// to record the call edge to the alias in that case. Eventually 577// an alias summary will be created to associate the alias and 579auto CalleeValueInfo =
580 Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue));
581 Callsites.push_back({CalleeValueInfo, StackIdIndices});
584// For indirect callsites, create multiple Callsites, one per target. 585// This enables having a different set of clone versions per target, 586// and we will apply the cloning decisions while speculatively 587// devirtualizing in the ThinLTO backends. 588for (
constauto &Candidate : CandidateProfileData) {
589auto CalleeValueInfo = Index.getOrInsertValueInfo(Candidate.Value);
590 Callsites.push_back({CalleeValueInfo, StackIdIndices});
598 Index.addBlockCount(
F.size());
603 [&](
const std::vector<const Instruction *> &Instrs,
606for (
constauto *
I : Instrs) {
612// By now we processed all instructions in a function, except 613// non-volatile loads and non-volatile value stores. Let's find 614// ref edges for both of instruction sets 615 AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited);
616// We can add some values to the Visited set when processing load 617// instructions which are also used by stores in NonVolatileStores. 618// For example this can happen if we have following code: 620// store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**) 621// %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**) 623// After processing loads we'll add bitcast to the Visited set, and if 624// we use the same set while processing stores, we'll never see store 625// to @bar and @bar will be mistakenly treated as readonly. 627 AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache);
629// If both load and store instruction reference the same variable 630// we won't be able to optimize it. Add all such reference edges 632for (
constauto &VI : StoreRefEdges)
633if (LoadRefEdges.
remove(VI))
636unsigned RefCnt = RefEdges.
size();
637// All new reference edges inserted in two loops below are either 638// read or write only. They will be grouped in the end of RefEdges 639// vector, so we can use a single integer value to identify them. 640for (
constauto &VI : LoadRefEdges)
643unsigned FirstWORef = RefEdges.
size();
644for (
constauto &VI : StoreRefEdges)
648for (; RefCnt < FirstWORef; ++RefCnt)
649 Refs[RefCnt].setReadOnly();
651for (; RefCnt < Refs.
size(); ++RefCnt)
652 Refs[RefCnt].setWriteOnly();
656// Explicit add hot edges to enforce importing for designated GUIDs for 657// sample PGO, to enable the same inlines as the profiled optimized binary. 658for (
auto &
I :
F.getImportGUIDs())
659 CallGraphEdges[Index.getOrInsertValueInfo(
I)].updateHotness(
661 ? CalleeInfo::HotnessType::Cold
662 : CalleeInfo::HotnessType::Critical);
665// Make sure that all calls we decided could not have memprof summaries get a 666// false value for mayHaveMemprofSummary, to ensure that this handling remains 667// in sync with the ThinLTO backend handling. 671constauto *CB = dyn_cast<CallBase>(&
I);
674// We already checked these above. 675if (CallsThatMayHaveMemprofSummary.
count(CB))
684bool NotEligibleForImport =
685 NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
686 HasIndirBranchToBlockAddress || HasLocalIFuncCallOrRef;
688F.getLinkage(),
F.getVisibility(), NotEligibleForImport,
689/* Live = */false,
F.isDSOLocal(),
F.canBeOmittedFromSymbolTable(),
690 GlobalValueSummary::ImportKind::Definition);
692F.doesNotAccessMemory(),
F.onlyReadsMemory() && !
F.doesNotAccessMemory(),
693F.hasFnAttribute(Attribute::NoRecurse),
F.returnDoesNotAlias(),
694// FIXME: refactor this to use the same code that inliner is using. 695// Don't try to import functions with noinline attribute. 696F.getAttributes().hasFnAttr(Attribute::NoInline),
697F.hasFnAttribute(Attribute::AlwaysInline),
698F.hasFnAttribute(Attribute::NoUnwind), MayThrow, HasUnknownCall,
700 std::vector<FunctionSummary::ParamAccess> ParamAccesses;
701if (
auto *SSI = GetSSICallback(
F))
702 ParamAccesses = SSI->getParamAccesses(Index);
703auto FuncSummary = std::make_unique<FunctionSummary>(
704 Flags, NumInsts, FunFlags, std::move(Refs), CallGraphEdges.
takeVector(),
707 TypeTestAssumeConstVCalls.takeVector(),
708 TypeCheckedLoadConstVCalls.takeVector(), std::move(ParamAccesses),
709 std::move(Callsites), std::move(Allocs));
710if (NonRenamableLocal)
711 CantBePromoted.
insert(
F.getGUID());
712 Index.addGlobalValueSummary(
F, std::move(FuncSummary));
715/// Find function pointers referenced within the given vtable initializer 716/// (or subset of an initializer) \p I. The starting offset of \p I within 717/// the vtable initializer is \p StartingOffset. Any discovered function 718/// pointers are added to \p VTableFuncs along with their cumulative offset 719/// within the initializer. 724// First check if this is a function pointer. 725if (
I->getType()->isPointerTy()) {
726autoC =
I->stripPointerCasts();
727autoA = dyn_cast<GlobalAlias>(
C);
728if (isa<Function>(
C) || (
A && isa<Function>(
A->getAliasee()))) {
729auto GV = dyn_cast<GlobalValue>(
C);
731// We can disregard __cxa_pure_virtual as a possible call target, as 732// calls to pure virtuals are UB. 733if (GV && GV->getName() !=
"__cxa_pure_virtual")
734 VTableFuncs.push_back({Index.getOrInsertValueInfo(GV), StartingOffset});
739// Walk through the elements in the constant struct or array and recursively 740// look for virtual function pointers. 742if (
auto *
C = dyn_cast<ConstantStruct>(
I)) {
743StructType *STy = dyn_cast<StructType>(
C->getType());
751 StartingOffset +
Offset, M, Index, VTableFuncs, OrigGV);
753 }
elseif (
auto *
C = dyn_cast<ConstantArray>(
I)) {
755Type *EltTy = ATy->getElementType();
757for (
unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
759 StartingOffset + i * EltSize, M, Index, VTableFuncs,
762 }
elseif (
constauto *CE = dyn_cast<ConstantExpr>(
I)) {
763// For relative vtables, the next sub-component should be a trunc. 764if (CE->getOpcode() != Instruction::Trunc ||
765 !(CE = dyn_cast<ConstantExpr>(CE->getOperand(0))))
768// If this constant can be reduced to the offset between a function and a 769// global, then we know this is a valid virtual function if the RHS is the 770// original vtable we're scanning through. 771if (CE->getOpcode() == Instruction::Sub) {
773APSInt LHSOffset, RHSOffset;
778// For relative vtables, this component should point to the callable 779// function without any offsets. 782// Also, the RHS should always point to somewhere within the vtable. 791// Identify the function pointers referenced by vtable definition \p V. 802// Validate that the VTableFuncs list is ordered by offset. 804for (
auto &
P : VTableFuncs) {
805// The findVFuncPointers traversal should have encountered the 806// functions in offset order. We need to use ">=" since PrevOffset 808assert(
P.VTableOffset >= PrevOffset);
809 PrevOffset =
P.VTableOffset;
814/// Record vtable definition \p V for each type metadata it references. 824 cast<ConstantAsMetadata>(
Type->getOperand(0))->getValue())
827if (
auto *TypeId = dyn_cast<MDString>(
TypeID))
828 Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId->getString())
829 .push_back({
Offset, Index.getOrInsertValueInfo(&V)});
840bool RefLocalIFunc =
false;
841bool HasBlockAddress =
842findRefEdges(Index, &V, RefEdges, Visited, RefLocalIFunc);
843constbool NotEligibleForImport = (HasBlockAddress || RefLocalIFunc);
846 V.getLinkage(), V.getVisibility(), NonRenamableLocal,
847/* Live = */false, V.isDSOLocal(), V.canBeOmittedFromSymbolTable(),
851// If splitting is not enabled, then we compute the summary information 852// necessary for index-based whole program devirtualization. 853if (!Index.enableSplitLTOUnit()) {
855 V.getMetadata(LLVMContext::MD_type, Types);
857// Identify the function pointers referenced by this vtable definition. 860// Record this vtable definition for each type metadata it references. 865// Don't mark variables we won't be able to internalize as read/write-only. 866bool CanBeInternalized =
867 !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
868 !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass();
873auto GVarSummary = std::make_unique<GlobalVarSummary>(Flags, VarFlags,
875if (NonRenamableLocal)
876 CantBePromoted.
insert(V.getGUID());
877if (NotEligibleForImport)
878 GVarSummary->setNotEligibleToImport();
879if (!VTableFuncs.empty())
880 GVarSummary->setVTableFuncs(VTableFuncs);
881 Index.addGlobalValueSummary(V, std::move(GVarSummary));
886// Skip summary for indirect function aliases as summary for aliasee will not 889if (isa<GlobalIFunc>(Aliasee))
893A.getLinkage(),
A.getVisibility(), NonRenamableLocal,
894/* Live = */false,
A.isDSOLocal(),
A.canBeOmittedFromSymbolTable(),
896auto AS = std::make_unique<AliasSummary>(Flags);
897auto AliaseeVI = Index.getValueInfo(Aliasee->
getGUID());
898assert(AliaseeVI &&
"Alias expects aliasee summary to be available");
899assert(AliaseeVI.getSummaryList().size() == 1 &&
900"Expected a single entry per aliasee in per-module index");
901 AS->setAliasee(AliaseeVI, AliaseeVI.getSummaryList()[0].get());
902if (NonRenamableLocal)
903 CantBePromoted.
insert(
A.getGUID());
904 Index.addGlobalValueSummary(
A, std::move(AS));
907// Set LiveRoot flag on entries matching the given value name. 910for (
constauto &Summary : VI.getSummaryList())
911 Summary->setLive(
true);
920bool EnableSplitLTOUnit =
false;
921bool UnifiedLTO =
false;
922if (
auto *MD = mdconst::extract_or_null<ConstantInt>(
923 M.getModuleFlag(
"EnableSplitLTOUnit")))
924 EnableSplitLTOUnit = MD->getZExtValue();
926 mdconst::extract_or_null<ConstantInt>(M.getModuleFlag(
"UnifiedLTO")))
927 UnifiedLTO = MD->getZExtValue();
930// Identify the local values in the llvm.used and llvm.compiler.used sets, 931// which should not be exported as they would then require renaming and 932// promotion, but we may have opaque uses e.g. in inline asm. We collect them 933// here because we use this information to mark functions containing inline 934// assembly calls as not importable. 937// First collect those in the llvm.used set. 939// Next collect those in the llvm.compiler.used set. 942for (
auto *V : Used) {
943if (V->hasLocalLinkage()) {
945 CantBePromoted.
insert(V->getGUID());
949bool HasLocalInlineAsmSymbol =
false;
950if (!M.getModuleInlineAsm().empty()) {
951// Collect the local values defined by module level asm, and set up 952// summaries for these symbols so that they can be marked as NoRename, 953// to prevent export of any use of them in regular IR that would require 954// renaming within the module level asm. Note we don't need to create a 955// summary for weak or global defs, as they don't need to be flagged as 956// NoRename, and defs in module level asm can't be imported anyway. 957// Also, any values used but not defined within module level asm should 958// be listed on the llvm.used or llvm.compiler.used global and marked as 959// referenced from there. 962// Symbols not marked as Weak or Global are local definitions. 966 HasLocalInlineAsmSymbol =
true;
973/* NotEligibleToImport = */true,
978// Create the appropriate summary type. 979if (
Function *
F = dyn_cast<Function>(GV)) {
980 std::unique_ptr<FunctionSummary> Summary =
981 std::make_unique<FunctionSummary>(
982 GVFlags,
/*InstCount=*/0,
984F->hasFnAttribute(Attribute::ReadNone),
985F->hasFnAttribute(Attribute::ReadOnly),
986F->hasFnAttribute(Attribute::NoRecurse),
987F->returnDoesNotAlias(),
988/* NoInline = */false,
989F->hasFnAttribute(Attribute::AlwaysInline),
990F->hasFnAttribute(Attribute::NoUnwind),
992/* HasUnknownCall */true,
993/* MustBeUnreachable */false},
1003 Index.addGlobalValueSummary(*GV, std::move(Summary));
1005 std::unique_ptr<GlobalVarSummary> Summary =
1006 std::make_unique<GlobalVarSummary>(
1009false,
false, cast<GlobalVariable>(GV)->
isConstant(),
1012 Index.addGlobalValueSummary(*GV, std::move(Summary));
1017bool IsThinLTO =
true;
1019 mdconst::extract_or_null<ConstantInt>(M.getModuleFlag(
"ThinLTO")))
1020 IsThinLTO = MD->getZExtValue();
1022// Compute summaries for all functions defined in module, and save in the 1024for (
constauto &
F : M) {
1025if (
F.isDeclaration())
1030 std::unique_ptr<BlockFrequencyInfo> BFIPtr;
1032 BFI = GetBFICallback(
F);
1033elseif (
F.hasProfileData()) {
1036 BFIPtr = std::make_unique<BlockFrequencyInfo>(
F, BPI, LI);
1041 !LocalsUsed.
empty() || HasLocalInlineAsmSymbol,
1042 CantBePromoted, IsThinLTO, GetSSICallback);
1045// Compute summaries for all variables defined in module, and save in the 1049if (
G.isDeclaration())
1054// Compute summaries for all aliases defined in module, and save in the 1059// Iterate through ifuncs, set their resolvers all alive. 1061I.applyAlongResolverPath([&Index](
constGlobalValue &GV) {
1062 Index.getGlobalValueSummary(GV)->setLive(
true);
1066for (
auto *V : LocalsUsed) {
1067auto *Summary = Index.getGlobalValueSummary(*V);
1068assert(Summary &&
"Missing summary for global value");
1069 Summary->setNotEligibleToImport();
1072// The linker doesn't know about these LLVM produced values, so we need 1073// to flag them as live in the index to ensure index-based dead value 1074// analysis treats them as live roots of the analysis. 1081for (
auto &GlobalList : Index) {
1082// Ignore entries for references that are undefined in the current module. 1083if (GlobalList.second.SummaryList.empty())
1086assert(GlobalList.second.SummaryList.size() == 1 &&
1087"Expected module's index to have one summary per GUID");
1088auto &Summary = GlobalList.second.SummaryList[0];
1090 Summary->setNotEligibleToImport();
1094bool AllRefsCanBeExternallyReferenced =
1096 return !CantBePromoted.count(VI.getGUID());
1098if (!AllRefsCanBeExternallyReferenced) {
1099 Summary->setNotEligibleToImport();
1103if (
auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) {
1106 return !CantBePromoted.count(Edge.first.getGUID());
1108if (!AllCallsCanBeExternallyReferenced)
1109 Summary->setNotEligibleToImport();
1119 Index.exportToDot(OSDot, {});
1149"Module Summary Analysis",
false,
true)
1166auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1171return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>(
1177return NeedSSI ? &getAnalysis<StackSafetyInfoWrapperPass>(
1217"Module summary info",
false,
true)
1222if (CB->isDebugOrPseudoInst())
1224auto *CI = dyn_cast<CallInst>(CB);
1225auto *CalledValue = CB->getCalledOperand();
1226auto *CalledFunction = CB->getCalledFunction();
1227if (CalledValue && !CalledFunction) {
1228 CalledValue = CalledValue->stripPointerCasts();
1229// Stripping pointer casts can reveal a called function. 1230 CalledFunction = dyn_cast<Function>(CalledValue);
1232// Check if this is an alias to a function. If so, get the 1233// called aliasee for the checks below. 1234if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
1236"Expected null called function in callsite for alias");
1237 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
1239// Check if this is a direct call to a known function or a known 1240// intrinsic, or an indirect call with profile data. 1241if (CalledFunction) {
1242if (CI && CalledFunction->isIntrinsic())
1245// Skip indirect calls if we haven't enabled memprof ICP. 1248// Skip inline assembly calls. 1249if (CI && CI->isInlineAsm())
1251// Skip direct calls via Constant. 1252if (!CalledValue || isa<Constant>(CalledValue))
static bool isConstant(const MachineInstr &MI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
This defines the Use class.
Interface to identify indirect call promotion candidates.
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid, SetVector< FunctionSummary::VFuncId, std::vector< FunctionSummary::VFuncId > > &VCalls, SetVector< FunctionSummary::ConstVCall, std::vector< FunctionSummary::ConstVCall > > &ConstVCalls)
Determine whether this call has all constant integer arguments (excluding "this") and summarize it to...
cl::opt< unsigned > MaxNumVTableAnnotations
static void computeVTableFuncs(ModuleSummaryIndex &Index, const GlobalVariable &V, const Module &M, VTableFuncList &VTableFuncs)
static void computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, DenseSet< GlobalValue::GUID > &CantBePromoted)
static void findFuncPointers(const Constant *I, uint64_t StartingOffset, const Module &M, ModuleSummaryIndex &Index, VTableFuncList &VTableFuncs, const GlobalVariable &OrigGV)
Find function pointers referenced within the given vtable initializer (or subset of an initializer) I...
static void computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, DenseSet< GlobalValue::GUID > &CantBePromoted, const Module &M, SmallVectorImpl< MDNode * > &Types)
static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name)
static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount, ProfileSummaryInfo *PSI)
static cl::opt< bool > EnableMemProfIndirectCallSupport("enable-memprof-indirect-call-support", cl::init(false), cl::Hidden, cl::desc("Enable MemProf support for summarizing and cloning indirect calls"))
static bool isNonVolatileLoad(const Instruction *I)
cl::opt< bool > ScalePartialSampleProfileWorkingSetSize
cl::opt< bool > MemProfReportHintedSizes
static bool isNonRenamableLocal(const GlobalValue &GV)
static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, const Function &F, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, bool HasLocalsInUsedOrAsm, DenseSet< GlobalValue::GUID > &CantBePromoted, bool IsThinLTO, std::function< const StackSafetyInfo *(const Function &F)> GetSSICallback)
static cl::opt< FunctionSummary::ForceSummaryHotnessType, true > FSEC("force-summary-edges-cold", cl::Hidden, cl::location(ForceSummaryEdgesCold), cl::desc("Force all edges in the function summary to cold"), cl::values(clEnumValN(FunctionSummary::FSHT_None, "none", "None."), clEnumValN(FunctionSummary::FSHT_AllNonCritical, "all-non-critical", "All non-critical edges."), clEnumValN(FunctionSummary::FSHT_All, "all", "All edges.")))
static bool mustBeUnreachableFunction(const Function &F)
static bool isNonVolatileStore(const Instruction *I)
static cl::opt< std::string > ModuleSummaryDotFile("module-summary-dot-file", cl::Hidden, cl::value_desc("filename"), cl::desc("File to emit dot graph of new summary into"))
static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, SetVector< ValueInfo, SmallVector< ValueInfo, 0 > > &RefEdges, SmallPtrSet< const User *, 8 > &Visited, bool &RefLocalLinkageIFunc)
static void addIntrinsicToSummary(const CallInst *CI, SetVector< GlobalValue::GUID, std::vector< GlobalValue::GUID > > &TypeTests, SetVector< FunctionSummary::VFuncId, std::vector< FunctionSummary::VFuncId > > &TypeTestAssumeVCalls, SetVector< FunctionSummary::VFuncId, std::vector< FunctionSummary::VFuncId > > &TypeCheckedLoadVCalls, SetVector< FunctionSummary::ConstVCall, std::vector< FunctionSummary::ConstVCall > > &TypeTestAssumeConstVCalls, SetVector< FunctionSummary::ConstVCall, std::vector< FunctionSummary::ConstVCall > > &TypeCheckedLoadConstVCalls, DominatorTree &DT)
If this intrinsic call requires that we add information to the function summary, do so via the non-co...
static void recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index, const GlobalVariable &V, SmallVectorImpl< MDNode * > &Types)
Record vtable definition V for each type metadata it references.
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
An arbitrary precision integer that knows its signedness.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Analysis providing branch probability information.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Implements a dense probed hash-table based set.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
std::pair< ValueInfo, CalleeInfo > EdgeTy
<CalleeValueInfo, CalleeInfo> call edge pair.
ForceSummaryHotnessType
Types for -force-summary-edges-cold debugging option.
Class to represent profile counts.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
bool hasLocalLinkage() const
static GUID getGUID(StringRef GlobalName)
Return a 64-bit global unique ID constructed from global value name (i.e.
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
@ DefaultVisibility
The GV is visible.
bool canBeOmittedFromSymbolTable() const
True if GV can be left out of the object symbol table.
@ InternalLinkage
Rename collisions when linking (static functions).
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
MutableArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates)
Returns reference to array of InstrProfValueData for the given instruction I.
Legacy wrapper pass to provide the ModuleSummaryIndex object.
ImmutableModuleSummaryIndexWrapperPass(const ModuleSummaryIndex *Index=nullptr)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
ImmutablePass class - This class is used to provide information that does not need to be run.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
This class implements a map that also provides access to all stored values in a deterministic order.
VectorType takeVector()
Clear the MapVector and return the underlying vector.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Result run(Module &M, ModuleAnalysisManager &AM)
Legacy wrapper pass to provide the ModuleSummaryIndex object.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
ModuleSummaryIndexWrapperPass()
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static void CollectAsmSymbols(const Module &M, function_ref< void(StringRef, object::BasicSymbolRef::Flags)> AsmSymbol)
Parse inline ASM and collect the symbols that are defined or referenced in the current module.
A Module instance is used to store all the information related to an LLVM module.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
std::optional< uint64_t > getProfileCount(const CallBase &CallInst, BlockFrequencyInfo *BFI, bool AllowSynthetic=false) const
Returns the profile count for CallInst.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool hasPartialSampleProfile() const
Returns true if module M has partial-profile sample profile.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
A vector that has set insertion semantics.
bool remove(const value_type &X)
Remove an item from the set vector.
size_type size() const
Determine the number of elements in the SetVector.
Vector takeVector()
Clear the SetVector and return the underlying vector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackSafetyInfo wrapper for the new pass manager.
StackSafetyInfo wrapper for the legacy pass manager.
Interface to access stack safety analysis results for single function.
StringRef - Represent a constant reference to a string, i.e.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
ArrayRef< Type * > elements() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
TypeID
Definitions of all of the base types for the Type system.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< use_iterator > uses()
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
A raw_ostream that writes to a file descriptor.
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, const DataLayout &DL, DSOLocalEquivalent **DSOEquiv=nullptr)
If this constant is a constant offset from a global, return the global and the constant.
FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold
bool needsParamAccessSummary(const Module &M)
ModuleSummaryIndex buildModuleSummaryIndex(const Module &M, std::function< BlockFrequencyInfo *(const Function &F)> GetBFICallback, ProfileSummaryInfo *PSI, std::function< const StackSafetyInfo *(const Function &F)> GetSSICallback=[](const Function &F) -> const StackSafetyInfo *{ return nullptr;})
Direct function to compute a ModuleSummaryIndex from a given module.
void initializeModuleSummaryIndexWrapperPassPass(PassRegistry &)
std::vector< VirtFuncOffset > VTableFuncList
List of functions referenced by a particular vtable definition.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void findDevirtualizableCallsForTypeCheckedLoad(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< Instruction * > &LoadedPtrs, SmallVectorImpl< Instruction * > &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.checked.load, find all devirtualizable call sites based on t...
SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
ModulePass * createModuleSummaryIndexWrapperPass()
ImmutablePass * createImmutableModuleSummaryIndexWrapperPass(const ModuleSummaryIndex *Index)
void initializeImmutableModuleSummaryIndexWrapperPassPass(PassRegistry &)
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Summary of memprof metadata on allocations.
A special type used by analysis passes to provide an address that identifies that particular analysis...
A call site that could be devirtualized.
A specification for a virtual function call with all constant integer arguments.
Flags specific to function summaries.
An "identifier" for a virtual function.
Group flags (Linkage, NotEligibleToImport, etc.) as a bitfield.
Summary of a single MIB in a memprof metadata on allocations.
Struct that holds a reference to a particular GUID in a global value summary.