1////===- SampleProfileLoadBaseImpl.h - Profile loader base impl --*- C++-*-===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 10/// This file provides the interface for the sampled PGO profile loader base 13//===----------------------------------------------------------------------===// 15#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H 16#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H 48using namespacesampleprof;
49using namespacesampleprofutil;
56#define DEBUG_TYPE "sample-profile-impl" 77return &
F->getEntryBlock();
83}
// end namespace afdo_detail 85// This class serves sample counts correlation for SampleProfileLoader by 86// analyzing pseudo probes and their function descriptors injected by 87// SampleProfileProber. 95for (
constauto *Operand : FuncInfo->operands()) {
96constauto *MD = cast<MDNode>(Operand);
97auto GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))
99auto Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))
107autoI = GUIDToProbeDescMap.
find(GUID);
108returnI == GUIDToProbeDescMap.
end() ? nullptr : &
I->second;
130bool IsAvailableExternallyLinkage =
132// Always check the function attribute to determine checksum mismatch for 133// `available_externally` functions even if their desc are available. This 134// is because the desc is computed based on the original internal function 135// and it's substituted by the `available_externally` function during link 136// time. However, when unstable IR or ODR violation issue occurs, the 137// definitions of the same function across different translation units could 138// be different and result in different checksums. So we should use the 139// state from the new (available_externally) function, which is saved in its 141// TODO: If the function's profile only exists as nested inlinee profile in 142// a different module, we don't have the attr mismatch state(unknown), we 143// need to fix it later. 144if (IsAvailableExternallyLinkage || !
Desc)
145return !
F.hasFnAttribute(
"profile-checksum-mismatch");
156returnF.isDeclaration() || !
F.hasFnAttribute(
"use-sample-profile");
161 std::vector<Function *> &FunctionOrderList) {
168 FunctionOrderList.push_back(&
F);
172 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
183usingBT = std::remove_pointer_t<NodeRef>;
207usingEdge = std::pair<const BasicBlockT *, const BasicBlockT *>;
265 /// Map basic blocks to their computed weights. 267 /// The weight of a basic block is defined to be the maximum 268 /// of all the instruction weights in that block. 271 /// Map edges to their computed weights. 273 /// Edge weights are computed by propagating basic block weights in 274 /// SampleProfile::propagateWeights. 277 /// Set of visited blocks during propagation. 280 /// Set of visited edges during propagation. 283 /// Equivalence classes for block weights. 285 /// Two blocks BB1 and BB2 are in the same equivalence class if they 286 /// dominate and post-dominate each other, and they are in the same loop 287 /// nest. When this happens, the two blocks are guaranteed to execute 288 /// the same number of times. 291 /// Dominance, post-dominance and loop information. 296 /// Predecessors for each basic block in the CFG. 299 /// Successors for each basic block in the CFG. 302 /// Profile coverage tracker. 305 /// Profile reader object. 306 std::unique_ptr<SampleProfileReader>
Reader;
308 /// Synthetic samples created by duplicating the samples of inlined functions 309 /// from the original profile as if they were top level sample profiles. 310 /// Use std::map because insertion may happen while its content is referenced. 313// A pseudo probe helper to correlate the imported sample counts. 316 /// Samples collected for the body of this function. 319 /// Name of the profile file to load. 322 /// Name of the profile remapping file to load. 325 /// VirtualFileSystem to load profile files from. 328 /// Profile Summary Info computed from sample profile. 331 /// Optimization Remark Emitter used to emit diagnostic remarks. 335/// Clear all the per-function data used to load samples and propagate weights. 336template <
typename BT>
338 BlockWeights.clear();
340 VisitedBlocks.clear();
341 VisitedEdges.clear();
342 EquivalenceClass.clear();
348 Predecessors.clear();
350 CoverageTracker.clear();
354/// Print the weight of edge \p E on stream \p OS. 356/// \param OS Stream to emit the output to. 357/// \param E Edge to print. 358template <
typename BT>
360OS <<
"weight[" <<
E.first->getName() <<
"->" <<
E.second->getName()
361 <<
"]: " << EdgeWeights[
E] <<
"\n";
364/// Print the equivalence class of block \p BB on stream \p OS. 366/// \param OS Stream to emit the output to. 367/// \param BB Block to print. 368template <
typename BT>
372OS <<
"equivalence[" << BB->getName()
373 <<
"]: " << ((Equiv) ? EquivalenceClass[BB]->
getName() :
"NONE") <<
"\n";
376/// Print the weight of block \p BB on stream \p OS. 378/// \param OS Stream to emit the output to. 379/// \param BB Block to print. 380template <
typename BT>
383constauto &
I = BlockWeights.find(BB);
384uint64_t W = (
I == BlockWeights.end() ? 0 :
I->second);
385OS <<
"weight[" << BB->getName() <<
"]: " << W <<
"\n";
389/// Get the weight for an instruction. 391/// The "weight" of an instruction \p Inst is the number of samples 392/// collected on that instruction at runtime. To retrieve it, we 393/// need to compute the line number of \p Inst relative to the start of its 394/// function. We use HeaderLineno to compute the offset. We then 395/// look up the samples collected for \p Inst using BodySamples. 397/// \param Inst Instruction to query. 399/// \returns the weight of \p Inst. 400template <
typename BT>
404return getProbeWeight(Inst);
405return getInstWeightImpl(Inst);
408template <
typename BT>
413return std::error_code();
415constDebugLoc &DLoc = Inst.getDebugLoc();
417return std::error_code();
423 Discriminator = DIL->getDiscriminator();
430 CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
435Remark <<
" samples from profile (offset: ";
446 << Inst <<
" (line offset: " << LineOffset <<
"." 447 << Discriminator <<
" - weight: " << R.get() <<
")\n");
452template <
typename BT>
456"Profile is not pseudo probe based");
458// Ignore the non-probe instruction. If none of the instruction in the BB is 459// probe, we choose to infer the BB's weight. 461return std::error_code();
465// If we can't find the function samples for a probe, it could be due to the 466// probe is later optimized away or the inlining context is mismatced. We 467// treat it as unknown, leaving it to profile inference instead of forcing a 469return std::error_code();
472auto R = FS->findSamplesAt(Probe->Id, Probe->Discriminator);
474uint64_t Samples = R.get() * Probe->Factor;
475bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
480Remark <<
" samples from profile (ProbeId=";
482if (Probe->Discriminator) {
488Remark <<
", OriginalSamples=";
495if (Probe->Discriminator)
496dbgs() <<
"." << Probe->Discriminator;
497dbgs() <<
":" << Inst <<
" - weight: " << R.get()
498 <<
" - factor: " <<
format(
"%0.2f", Probe->Factor) <<
")\n";});
504/// Compute the weight of a basic block. 506/// The weight of basic block \p BB is the maximum weight of all the 507/// instructions in BB. 509/// \param BB The basic block to query. 511/// \returns the weight for \p BB. 512template <
typename BT>
516bool HasWeight =
false;
520 Max = std::max(Max, R.get());
527/// Compute and store the weights of every basic block. 529/// This populates the BlockWeights map by computing 530/// the weights of every basic block in the CFG. 532/// \param F The function to query. 533template <
typename BT>
537for (
constauto &BB :
F) {
540 BlockWeights[&BB] = Weight.
get();
541 VisitedBlocks.insert(&BB);
550/// Get the FunctionSamples for an instruction. 552/// The FunctionSamples of an instruction \p Inst is the inlined instance 553/// in which that instruction is coming from. We traverse the inline stack 554/// of that instruction, and match it with the tree nodes in the profile. 556/// \param Inst Instruction to query. 558/// \returns the FunctionSamples pointer to the inlined instance. 559template <
typename BT>
566auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
568 it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper());
570return it.first->second;
573/// Find equivalence classes for the given block. 575/// This finds all the blocks that are guaranteed to execute the same 576/// number of times as \p BB1. To do this, it traverses all the 577/// descendants of \p BB1 in the dominator or post-dominator tree. 579/// A block BB2 will be in the same equivalence class as \p BB1 if 580/// the following holds: 582/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2 583/// is a descendant of \p BB1 in the dominator tree, then BB2 should 584/// dominate BB1 in the post-dominator tree. 586/// 2- Both BB2 and \p BB1 must be in the same loop. 588/// For every block BB2 that meets those two requirements, we set BB2's 589/// equivalence class to \p BB1. 591/// \param BB1 Block to check. 592/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree. 593/// \param DomTree Opposite dominator tree. If \p Descendants is filled 594/// with blocks from \p BB1's dominator tree, then 595/// this is the post-dominator tree, and vice versa. 596template <
typename BT>
602for (
constauto *BB2 : Descendants) {
603bool IsDomParent = DomTree->dominates(BB2, BB1);
604bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
605if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
606 EquivalenceClass[BB2] = EC;
607// If BB2 is visited, then the entire EC should be marked as visited. 608if (VisitedBlocks.count(BB2)) {
609 VisitedBlocks.insert(EC);
612// If BB2 is heavier than BB1, make BB2 have the same weight 615// Note that we don't worry about the opposite situation here 616// (when BB2 is lighter than BB1). We will deal with this 617// during the propagation phase. Right now, we just want to 618// make sure that BB1 has the largest weight of all the 619// members of its equivalence set. 620 Weight = std::max(Weight, BlockWeights[BB2]);
623constBasicBlockT *EntryBB = getEntryBB(EC->getParent());
625 BlockWeights[EC] = Samples->getHeadSamples() + 1;
627 BlockWeights[EC] = Weight;
631/// Find equivalence classes. 633/// Since samples may be missing from blocks, we can fill in the gaps by setting 634/// the weights of all the blocks in the same equivalence class to the same 635/// weight. To compute the concept of equivalence, we use dominance and loop 636/// information. Two blocks B1 and B2 are in the same equivalence class if B1 637/// dominates B2, B2 post-dominates B1 and both are in the same loop. 639/// \param F The function to query. 640template <
typename BT>
644// Find equivalence sets based on dominance and post-dominance information. 648// Compute BB1's equivalence class once. 649if (EquivalenceClass.count(BB1)) {
654// By default, blocks are in their own equivalence class. 655 EquivalenceClass[BB1] = BB1;
657// Traverse all the blocks dominated by BB1. We are looking for 658// every basic block BB2 such that: 660// 1- BB1 dominates BB2. 661// 2- BB2 post-dominates BB1. 662// 3- BB1 and BB2 are in the same loop nest. 664// If all those conditions hold, it means that BB2 is executed 665// as many times as BB1, so they are placed in the same equivalence 666// class by making BB2's equivalence class be BB1. 667 DominatedBBs.
clear();
669 findEquivalencesFor(BB1, DominatedBBs, &*PDT);
674// Assign weights to equivalence classes. 676// All the basic blocks in the same equivalence class will execute 677// the same number of times. Since we know that the head block in 678// each equivalence class has the largest weight, assign that weight 679// to all the blocks in that equivalence class. 681dbgs() <<
"\nAssign the same weight to all blocks in the same class\n");
686 BlockWeights[BB] = BlockWeights[EquivBB];
691/// Visit the given edge to decide if it has a valid weight. 693/// If \p E has not been visited before, we copy to \p UnknownEdge 694/// and increment the count of unknown edges. 696/// \param E Edge to visit. 697/// \param NumUnknownEdges Current number of unknown edges. 698/// \param UnknownEdge Set if E has not been visited before. 700/// \returns E's weight, if known. Otherwise, return 0. 701template <
typename BT>
703unsigned *NumUnknownEdges,
705if (!VisitedEdges.count(
E)) {
706 (*NumUnknownEdges)++;
711return EdgeWeights[
E];
714/// Propagate weights through incoming/outgoing edges. 716/// If the weight of a basic block is known, and there is only one edge 717/// with an unknown weight, we can calculate the weight of that edge. 719/// Similarly, if all the edges have a known count, we can calculate the 720/// count of the basic block, if needed. 722/// \param F Function to process. 723/// \param UpdateBlockCount Whether we should update basic block counts that 724/// has already been annotated. 726/// \returns True if new weights were assigned to edges or blocks. 727template <
typename BT>
732for (
constauto &BI :
F) {
736// Visit all the predecessor and successor edges to determine 737// which ones have a weight assigned already. Note that it doesn't 738// matter that we only keep track of a single unknown edge. The 739// only case we are interested in handling is when only a single 740// edge is unknown (see setEdgeOrBlockWeight). 741for (
unsigned i = 0; i < 2; i++) {
743unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
744Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
747// First, visit all predecessor edges. 748 NumTotalEdges = Predecessors[BB].size();
749for (
auto *Pred : Predecessors[BB]) {
750EdgeE = std::make_pair(Pred, BB);
751 TotalWeight += visitEdge(
E, &NumUnknownEdges, &UnknownEdge);
752if (
E.first ==
E.second)
753 SelfReferentialEdge =
E;
755if (NumTotalEdges == 1) {
756 SingleEdge = std::make_pair(Predecessors[BB][0], BB);
759// On the second round, visit all successor edges. 760 NumTotalEdges = Successors[BB].size();
761for (
auto *Succ : Successors[BB]) {
762EdgeE = std::make_pair(BB, Succ);
763 TotalWeight += visitEdge(
E, &NumUnknownEdges, &UnknownEdge);
765if (NumTotalEdges == 1) {
766 SingleEdge = std::make_pair(BB, Successors[BB][0]);
770// After visiting all the edges, there are three cases that we 771// can handle immediately: 773// - All the edge weights are known (i.e., NumUnknownEdges == 0). 774// In this case, we simply check that the sum of all the edges 775// is the same as BB's weight. If not, we change BB's weight 776// to match. Additionally, if BB had not been visited before, 777// we mark it visited. 779// - Only one edge is unknown and BB has already been visited. 780// In this case, we can compute the weight of the edge by 781// subtracting the total block weight from all the known 782// edge weights. If the edges weight more than BB, then the 783// edge of the last remaining edge is set to zero. 785// - There exists a self-referential edge and the weight of BB is 786// known. In this case, this edge can be based on BB's weight. 787// We add up all the other known edges and set the weight on 788// the self-referential edge as we did in the previous case. 790// In any other case, we must continue iterating. Eventually, 791// all edges will get a weight, or iteration will stop when 792// it reaches SampleProfileMaxPropagateIterations. 793if (NumUnknownEdges <= 1) {
794uint64_t &BBWeight = BlockWeights[EC];
795if (NumUnknownEdges == 0) {
796if (!VisitedBlocks.count(EC)) {
797// If we already know the weight of all edges, the weight of the 798// basic block can be computed. It should be no larger than the sum 799// of all edge weights. 800if (TotalWeight > BBWeight) {
801 BBWeight = TotalWeight;
804 <<
" known. Set weight for block: ";
805 printBlockWeight(
dbgs(), BB););
807 }
elseif (NumTotalEdges == 1 &&
808 EdgeWeights[SingleEdge] < BlockWeights[EC]) {
809// If there is only one edge for the visited basic block, use the 810// block weight to adjust edge weight if edge weight is smaller. 811 EdgeWeights[SingleEdge] = BlockWeights[EC];
814 }
elseif (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
815// If there is a single unknown edge and the block has been 816// visited, then we can compute E's weight. 817if (BBWeight >= TotalWeight)
818 EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
820 EdgeWeights[UnknownEdge] = 0;
823 OtherEC = EquivalenceClass[UnknownEdge.first];
825 OtherEC = EquivalenceClass[UnknownEdge.second];
826// Edge weights should never exceed the BB weights it connects. 827if (VisitedBlocks.count(OtherEC) &&
828 EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
829 EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
830 VisitedEdges.insert(UnknownEdge);
833 printEdgeWeight(
dbgs(), UnknownEdge));
835 }
elseif (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
836// If a block Weights 0, all its in/out edges should weight 0. 838for (
auto *Pred : Predecessors[BB]) {
839EdgeE = std::make_pair(Pred, BB);
841 VisitedEdges.insert(
E);
844for (
auto *Succ : Successors[BB]) {
845EdgeE = std::make_pair(BB, Succ);
847 VisitedEdges.insert(
E);
850 }
elseif (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
851uint64_t &BBWeight = BlockWeights[BB];
852// We have a self-referential edge and the weight of BB is known. 853if (BBWeight >= TotalWeight)
854 EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
856 EdgeWeights[SelfReferentialEdge] = 0;
857 VisitedEdges.insert(SelfReferentialEdge);
860 printEdgeWeight(
dbgs(), SelfReferentialEdge));
862if (UpdateBlockCount && TotalWeight > 0 &&
863 VisitedBlocks.insert(EC).second) {
864 BlockWeights[EC] = TotalWeight;
873/// Build in/out edge lists for each basic block in the CFG. 875/// We are interested in unique edges. If a block B1 has multiple 876/// edges to another block B2, we only add a single B1->B2 edge. 877template <
typename BT>
882// Add predecessors for B1. 884if (!Predecessors[B1].empty())
886for (
auto *B2 : getPredecessors(B1))
887if (Visited.
insert(B2).second)
888 Predecessors[B1].push_back(B2);
890// Add successors for B1. 892if (!Successors[B1].empty())
894for (
auto *B2 : getSuccessors(B1))
895if (Visited.
insert(B2).second)
896 Successors[B1].push_back(B2);
900/// Propagate weights into edges 902/// The following rules are applied to every block BB in the CFG: 904/// - If BB has a single predecessor/successor, then the weight 905/// of that edge is the weight of the block. 907/// - If all incoming or outgoing edges are known except one, and the 908/// weight of the block is already known, the weight of the unknown 909/// edge will be the weight of the block minus the sum of all the known 910/// edges. If the sum of all the known edges is larger than BB's weight, 911/// we set the unknown edge weight to zero. 913/// - If there is a self-referential edge, and the weight of the block is 914/// known, the weight for that edge is set to the weight of the block 915/// minus the weight of the other incoming edges to that block (if 917template <
typename BT>
919// Flow-based profile inference is only usable with BasicBlock instantiation 920// of SampleProfileLoaderBaseImpl. 922// Prepare block sample counts for inference. 924for (
constauto &BI :
F) {
927 SampleBlockWeights[&BI] = Weight.
get();
929// Fill in BlockWeights and EdgeWeights using an inference algorithm. 930 applyProfi(
F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
935// If BB weight is larger than its corresponding loop's header BB weight, 936// use the BB weight to replace the loop header BB weight. 939LoopT *L = LI->getLoopFor(BB);
944if (Header && BlockWeights[BB] > BlockWeights[Header]) {
945 BlockWeights[Header] = BlockWeights[BB];
949// Propagate until we converge or we go past the iteration limit. 951 Changed = propagateThroughEdges(
F,
false);
954// The first propagation propagates BB counts from annotated BBs to unknown 955// BBs. The 2nd propagation pass resets edges weights, and use all BB 956// weights to propagate edge weights. 957 VisitedEdges.clear();
960 Changed = propagateThroughEdges(
F,
false);
963// The 3rd propagation pass allows adjust annotated BB weights that are 967 Changed = propagateThroughEdges(
F,
true);
972template <
typename FT>
977 Infer.apply(BlockWeights, EdgeWeights);
980/// Generate branch weight metadata for all branches in \p F. 982/// Branch weights are computed out of instruction samples using a 983/// propagation heuristic. Propagation proceeds in 3 phases: 985/// 1- Assignment of block weights. All the basic blocks in the function 986/// are initial assigned the same weight as their most frequently 987/// executed instruction. 989/// 2- Creation of equivalence classes. Since samples may be missing from 990/// blocks, we can fill in the gaps by setting the weights of all the 991/// blocks in the same equivalence class to the same weight. To compute 992/// the concept of equivalence, we use dominance and loop information. 993/// Two blocks B1 and B2 are in the same equivalence class if B1 994/// dominates B2, B2 post-dominates B1 and both are in the same loop. 996/// 3- Propagation of block weights into edges. This uses a simple 997/// propagation heuristic. The following rules are applied to every 998/// block BB in the CFG: 1000/// - If BB has a single predecessor/successor, then the weight 1001/// of that edge is the weight of the block. 1003/// - If all the edges are known except one, and the weight of the 1004/// block is already known, the weight of the unknown edge will 1005/// be the weight of the block minus the sum of all the known 1006/// edges. If the sum of all the known edges is larger than BB's weight, 1007/// we set the unknown edge weight to zero. 1009/// - If there is a self-referential edge, and the weight of the block is 1010/// known, the weight for that edge is set to the weight of the block 1011/// minus the weight of the other incoming edges to that block (if 1014/// Since this propagation is not guaranteed to finalize for every CFG, we 1015/// only allow it to proceed for a limited number of iterations (controlled 1016/// by -sample-profile-max-propagate-iterations). 1018/// FIXME: Try to replace this propagation heuristic with a scheme 1019/// that is guaranteed to finalize. A work-list approach similar to 1020/// the standard value propagation algorithm used by SSA-CCP might 1023/// \param F The function to query. 1025/// \returns true if \p F was modified. Returns false, otherwise. 1026template <
typename BT>
1029bool Changed = (InlinedGUIDs.
size() != 0);
1031// Compute basic block weights. 1032 Changed |= computeBlockWeights(
F);
1035// Initialize propagation. 1036 initWeightPropagation(
F, InlinedGUIDs);
1038// Propagate weights to all edges. 1039 propagateWeights(
F);
1041// Post-process propagated weights. 1042 finalizeWeightPropagation(
F, InlinedGUIDs);
1048template <
typename BT>
1051// Add an entry count to the function using the samples gathered at the 1053// Sets the GUIDs that are inlined in the profiled binary. This is used 1054// for ThinLink to make correct liveness analysis, and also make the IR 1055// match the profiled binary before annotation. 1061// Compute dominance and loop info needed for propagation. 1062 computeDominanceAndLoopInfo(
F);
1064// Find equivalence classes. 1065 findEquivalenceClasses(
F);
1068// Before propagation starts, build, for each block, a list of 1069// unique predecessors and successors. This is necessary to handle 1070// identical edges in multiway branches. Since we visit all blocks and all 1071// edges of the CFG, it is cleaner to build these lists once at the start 1076template <
typename BT>
1079// If we utilize a flow-based count inference, then we trust the computed 1080// counts and set the entry count as computed by the algorithm. This is 1081// primarily done to sync the counts produced by profi and BFI inference, 1082// which uses the entry count for mass propagation. 1083// If profi produces a zero-value for the entry count, we fallback to 1084// Samples->getHeadSamples() + 1 to avoid functions with zero count. 1088if (BlockWeights[EntryBB] > 0) {
1096template <
typename BT>
1098// If coverage checking was requested, compute it now. 1101unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
1102unsignedTotal = CoverageTracker.countBodyRecords(Samples, PSI);
1103unsigned Coverage = CoverageTracker.computeCoverage(Used,
Total);
1106 Func.getSubprogram()->getFilename(), getFunctionLoc(
F),
1108Twine(Coverage) +
"%) were applied",
1114uint64_t Used = CoverageTracker.getTotalUsedSamples();
1115uint64_tTotal = CoverageTracker.countBodySamples(Samples, PSI);
1116unsigned Coverage = CoverageTracker.computeCoverage(Used,
Total);
1119 Func.getSubprogram()->getFilename(), getFunctionLoc(
F),
1121Twine(Coverage) +
"%) were applied",
1127/// Get the line number for the function header. 1129/// This looks up function \p F in the current compilation unit and 1130/// retrieves the line number where the function is defined. This is 1131/// line 0 for all the samples read from the profile file. Every line 1132/// number is relative to this line. 1134/// \param F Function object to query. 1136/// \returns the line number where \p F is defined. If it returns 0, 1137/// it means that there is no debug information available for \p F. 1138template <
typename BT>
1147// If the start of \p F is missing, emit a diagnostic to inform the user 1148// about the missed opportunity. 1150"No debug information found in function " + Func.getName() +
1151": Function profile not used",
1159#endif// LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static Function * getFunction(Constant *C)
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This file defines the RefCountedBase, ThreadSafeRefCountedBase, and IntrusiveRefCntPtr classes.
Implements a lazy call graph analysis and related passes for the new pass manager.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for the profile inference algorithm, profi.
This file provides the utility functions for the sampled PGO loader base implementation.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Implements a dense probed hash-table based set.
Diagnostic information for the sample profiler.
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
Represents either an error or a value T.
Class to represent profile counts.
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
A node in the call graph.
A RefSCC of the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
iterator_range< postorder_ref_scc_iterator > postorder_ref_sccs()
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis providing profile information.
uint64_t getFunctionHash() const
const PseudoProbeDescriptor * getDesc(StringRef FProfileName) const
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(const Function &F) const
bool moduleIsProbed(const Module &M) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
PseudoProbeManager(const Module &M)
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
Sample profile inference pass.
bool computeAndPropagateWeights(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Generate branch weight metadata for all branches in F.
void computeDominanceAndLoopInfo(FunctionT &F)
typename afdo_detail::IRTraits< BT >::BasicBlockT BasicBlockT
IntrusiveRefCntPtr< vfs::FileSystem > FS
VirtualFileSystem to load profile files from.
typename afdo_detail::IRTraits< BT >::OptRemarkAnalysisT OptRemarkAnalysisT
typename afdo_detail::IRTraits< BT >::SuccRangeT SuccRangeT
EdgeWeightMap EdgeWeights
Map edges to their computed weights.
Function & getFunction(FunctionT &F)
SmallSet< Edge, 32 > VisitedEdges
Set of visited edges during propagation.
std::map< SampleContext, FunctionSamples > OutlineFunctionSamples
Synthetic samples created by duplicating the samples of inlined functions from the original profile a...
OptRemarkEmitterT * ORE
Optimization Remark Emitter used to emit diagnostic remarks.
const BasicBlockT * getEntryBB(const FunctionT *F)
ErrorOr< uint64_t > getBlockWeight(const BasicBlockT *BB)
Compute the weight of a basic block.
unsigned getFunctionLoc(FunctionT &Func)
Get the line number for the function header.
PredRangeT getPredecessors(BasicBlockT *BB)
ErrorOr< uint64_t > getInstWeightImpl(const InstructionT &Inst)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
SmallPtrSet< const BasicBlockT *, 32 > VisitedBlocks
Set of visited blocks during propagation.
EquivalenceClassMap EquivalenceClass
Equivalence classes for block weights.
typename afdo_detail::IRTraits< BT >::DominatorTreePtrT DominatorTreePtrT
SampleCoverageTracker CoverageTracker
Profile coverage tracker.
typename afdo_detail::IRTraits< BT >::LoopT LoopT
typename GraphTraits< FT * >::NodeRef NodeRef
std::unique_ptr< SampleProfileReader > Reader
Profile reader object.
typename afdo_detail::IRTraits< BT >::OptRemarkEmitterT OptRemarkEmitterT
void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const
Print the weight of block BB on stream OS.
DominatorTreePtrT DT
Dominance, post-dominance and loop information.
void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB)
Print the equivalence class of block BB on stream OS.
std::remove_pointer_t< NodeRef > BT
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName, IntrusiveRefCntPtr< vfs::FileSystem > FS)
std::unique_ptr< PseudoProbeManager > ProbeManager
~SampleProfileLoaderBaseImpl()=default
typename afdo_detail::IRTraits< BT >::LoopInfoPtrT LoopInfoPtrT
void emitCoverageRemarks(FunctionT &F)
SuccRangeT getSuccessors(BasicBlockT *BB)
std::string Filename
Name of the profile file to load.
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount)
Propagate weights through incoming/outgoing edges.
PostDominatorTreePtrT PDT
typename afdo_detail::IRTraits< BT >::InstructionT InstructionT
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge)
Visit the given edge to decide if it has a valid weight.
typename afdo_detail::IRTraits< BT >::PostDominatorTreePtrT PostDominatorTreePtrT
void initWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
BlockEdgeMap Predecessors
Predecessors for each basic block in the CFG.
void finalizeWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
bool computeBlockWeights(FunctionT &F)
Compute and store the weights of every basic block.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
typename afdo_detail::IRTraits< BT >::PostDominatorTreeT PostDominatorTreeT
virtual ErrorOr< uint64_t > getProbeWeight(const InstructionT &Inst)
std::string RemappingFilename
Name of the profile remapping file to load.
typename afdo_detail::IRTraits< BT >::PredRangeT PredRangeT
void applyProfi(FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights)
typename afdo_detail::IRTraits< BT >::BlockFrequencyInfoT BlockFrequencyInfoT
BlockEdgeMap Successors
Successors for each basic block in the CFG.
FunctionSamples * Samples
Samples collected for the body of this function.
void findEquivalenceClasses(FunctionT &F)
Find equivalence classes.
std::pair< const BasicBlockT *, const BasicBlockT * > Edge
ProfileSummaryInfo * PSI
Profile Summary Info computed from sample profile.
void clearFunctionData(bool ResetDT=true)
Clear all the per-function data used to load samples and propagate weights.
DenseMap< const DILocation *, const FunctionSamples * > DILocation2SampleMap
void buildEdges(FunctionT &F)
Build in/out edge lists for each basic block in the CFG.
void findEquivalencesFor(BasicBlockT *BB1, ArrayRef< BasicBlockT * > Descendants, PostDominatorTreeT *DomTree)
Find equivalence classes for the given block.
void printEdgeWeight(raw_ostream &OS, Edge E)
Print the weight of edge E on stream OS.
typename afdo_detail::IRTraits< BT >::FunctionT FunctionT
BlockWeightMap BlockWeights
Map basic blocks to their computed weights.
void propagateWeights(FunctionT &F)
Propagate weights into edges.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Representation of the samples collected for a function.
uint64_t getFunctionHash() const
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
cl::opt< unsigned > SampleProfileSampleCoverage
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
cl::opt< unsigned > SampleProfileRecordCoverage
cl::opt< unsigned > SampleProfileMaxPropagateIterations
cl::opt< bool > SampleProfileUseProfi
cl::opt< bool > EnableFSDiscriminator
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< succ_iterator > succ_range
cl::opt< bool > NoWarnSampleUnused
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
iterator_range< pred_iterator > pred_range
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
static bool skipProfileForFunction(const Function &F)
auto predecessors(const MachineBasicBlock *BB)
constexpr const char * PseudoProbeDescMetadataName
Implement std::hash so that hash_code can be used in STL containers.
Description of the encoding of one expression Op.
typename GraphType::UnknownGraphTypeError NodeRef
std::unique_ptr< LoopInfo > LoopInfoPtrT
std::unique_ptr< PostDominatorTree > PostDominatorTreePtrT
static Function & getFunction(Function &F)
static pred_range getPredecessors(BasicBlock *BB)
static succ_range getSuccessors(BasicBlock *BB)
std::unique_ptr< DominatorTree > DominatorTreePtrT
static const BasicBlock * getEntryBB(const Function *F)