1//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// Loops should be simplified before this analysis. 11//===----------------------------------------------------------------------===// 51#define DEBUG_TYPE "branch-prob" 55cl::desc(
"Print the branch probability info."));
59cl::desc(
"The option to specify the name of the function " 60"whose branch probability info is printed."));
63"Branch Probability Analysis",
false,
true)
79// Weights are for internal use only. They are used by heuristics to help to 80// estimate edges' probability. Example: 82// Using "Loop Branch Heuristics" we predict weights of edges for the 97// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 98// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 102/// Unreachable-terminating branch taken probability. 104/// This is the probability for a branch being taken to a block that terminates 105/// (eventually) in unreachable. These are predicted as unlikely as possible. 106/// All reachable probability will proportionally share the remaining part. 109/// Heuristics and lookup tables for non-loop branches: 110/// Pointer Heuristics (PH) 121/// Pointer comparisons: 127/// Zero Heuristics (ZH) 135/// Integer compares with 0: 143/// Integer compares with -1: 147// InstCombine canonicalizes X >= 0 into X > -1 151/// Integer compares with 1: 153// InstCombine canonicalizes X <= 0 into X < 1 157/// strcmp and similar functions return zero, negative, or positive, if the 158/// first string is equal, less, or greater than the second. We consider it 159/// likely that the strings are not equal, so a comparison with zero is 160/// probably false, but also a comparison with any other number is also 161/// probably false given that what exactly is returned for nonzero values is 162/// not specified. Any kind of comparison other than equality we know 169// Floating-Point Heuristics (FPH) 173/// This is the probability for an ordered floating point comparison. 175/// This is the probability for an unordered floating point comparison, it means 176/// one or two of the operands are NaN. Usually it is used to test for an 177/// exceptional case, so the result is unlikely. 189/// Floating-Point compares: 195/// Set of dedicated "absolute" execution weights for a block. These weights are 196/// meaningful relative to each other and their derivatives only. 198 /// Special weight used for cases with exact zero probability. 200 /// Minimal possible non zero weight. 202 /// Weight to an 'unreachable' block. 204 /// Weight to a block containing non returning call. 206 /// Weight to 'unwind' block of an invoke instruction. 208 /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked 209 /// with attribute 'cold'. 211 /// Default weight is used in cases when there is no dedicated execution 212 /// weight set. It is not propagated through the domination line either. 217// Record SCC numbers of blocks in the CFG to identify irreducible loops. 218// FIXME: We could only calculate this if the CFG is known to be irreducible 219// (perhaps cache this info in LoopInfo if we can easily calculate it there?). 223// Ignore single-block SCCs since they either aren't loops or LoopInfo will 225const std::vector<const BasicBlock *> &Scc = *It;
230for (
constauto *BB : Scc) {
232 SccNums[BB] = SccNum;
233 calculateSccBlockType(BB, SccNum);
240auto SccIt = SccNums.find(BB);
241if (SccIt == SccNums.end())
249for (
auto MapIt : SccBlocks[SccNum]) {
250constauto *BB = MapIt.first;
251if (isSCCHeader(BB, SccNum))
253if (getSCCNum(Pred) != SccNum)
260for (
auto MapIt : SccBlocks[SccNum]) {
261constauto *BB = MapIt.first;
262if (isSCCExitingBlock(BB, SccNum))
264if (getSCCNum(Succ) != SccNum)
271assert(getSCCNum(BB) == SccNum);
273assert(SccBlocks.size() >
static_cast<unsigned>(SccNum) &&
"Unknown SCC");
274constauto &SccBlockTypes = SccBlocks[SccNum];
276auto It = SccBlockTypes.find(BB);
277if (It != SccBlockTypes.end()) {
283void BranchProbabilityInfo::SccInfo::calculateSccBlockType(
constBasicBlock *BB,
285assert(getSCCNum(BB) == SccNum);
289// Consider any block that is an entry point to the SCC as 291return getSCCNum(Pred) != SccNum;
296return getSCCNum(Succ) != SccNum;
300// Lazily compute the set of headers for a given SCC and cache the results 301// in the SccHeaderMap. 302if (SccBlocks.size() <=
static_cast<unsigned>(SccNum))
303 SccBlocks.resize(SccNum + 1);
304auto &SccBlockTypes = SccBlocks[SccNum];
306if (BlockType != Inner) {
308 std::tie(std::ignore, IsInserted) =
309 SccBlockTypes.insert(std::make_pair(BB, BlockType));
310assert(IsInserted &&
"Duplicated block in SCC");
314BranchProbabilityInfo::LoopBlock::LoopBlock(
constBasicBlock *BB,
320LD.second = SccI.getSCCNum(BB);
324bool BranchProbabilityInfo::isLoopEnteringEdge(
const LoopEdge &Edge)
const{
325constauto &SrcBlock = Edge.first;
326constauto &DstBlock = Edge.second;
327return (DstBlock.getLoop() &&
328 !DstBlock.getLoop()->contains(SrcBlock.getLoop())) ||
329// Assume that SCCs can't be nested. 330 (DstBlock.getSccNum() != -1 &&
331 SrcBlock.getSccNum() != DstBlock.getSccNum());
334bool BranchProbabilityInfo::isLoopExitingEdge(
const LoopEdge &Edge)
const{
335return isLoopEnteringEdge({Edge.second, Edge.first});
338bool BranchProbabilityInfo::isLoopEnteringExitingEdge(
339const LoopEdge &Edge)
const{
340return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge);
343bool BranchProbabilityInfo::isLoopBackEdge(
const LoopEdge &Edge)
const{
344constauto &SrcBlock = Edge.first;
345constauto &DstBlock = Edge.second;
346return SrcBlock.belongsToSameLoop(DstBlock) &&
347 ((DstBlock.getLoop() &&
348 DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) ||
349 (DstBlock.getSccNum() != -1 &&
350 SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum())));
353void BranchProbabilityInfo::getLoopEnterBlocks(
356auto *Header = LB.getLoop()->getHeader();
359assert(LB.getSccNum() != -1 &&
"LB doesn't belong to any loop?");
360 SccI->getSccEnterBlocks(LB.getSccNum(), Enters);
364void BranchProbabilityInfo::getLoopExitBlocks(
367 LB.getLoop()->getExitBlocks(Exits);
369assert(LB.getSccNum() != -1 &&
"LB doesn't belong to any loop?");
370 SccI->getSccExitBlocks(LB.getSccNum(), Exits);
374// Propagate existing explicit probabilities from either profile data or 375// 'expect' intrinsic processing. Examine metadata against unreachable 376// heuristic. The probability of the edge coming to unreachable block is 377// set to min of metadata and unreachable heuristic. 378bool BranchProbabilityInfo::calcMetadataWeights(
constBasicBlock *BB) {
381if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI) ||
382 isa<InvokeInst>(TI) || isa<CallBrInst>(TI)))
389// Check that the number of successors is manageable. 392// Build up the final weights that will be used in a temporary buffer. 393// Compute the sum of all weights to later decide whether they need to 394// be scaled to fit in 32 bits. 401for (
unsignedI = 0, E = Weights.
size();
I != E; ++
I) {
402 WeightSum += Weights[
I];
403const LoopBlock SrcLoopBB = getLoopBlock(BB);
404const LoopBlock DstLoopBB = getLoopBlock(TI->
getSuccessor(
I));
405auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
406if (EstimatedWeight &&
407 *EstimatedWeight <=
static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
414// If the sum of weights does not fit in 32 bits, scale every weight down 417 (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
419if (ScalingFactor > 1) {
422 Weights[
I] /= ScalingFactor;
423 WeightSum += Weights[
I];
426assert(WeightSum <= UINT32_MAX &&
427"Expected weights to scale down to 32 bits");
429if (WeightSum == 0 || ReachableIdxs.
size() == 0) {
435// Set the probability. 440// Examine the metadata against unreachable heuristic. 441// If the unreachable heuristic is more strong then we use it for this edge. 442if (UnreachableIdxs.
size() == 0 || ReachableIdxs.
size() == 0) {
448for (
autoI : UnreachableIdxs)
449if (UnreachableProb < BP[
I]) {
450 BP[
I] = UnreachableProb;
453// Sum of all edge probabilities must be 1.0. If we modified the probability 454// of some edges then we must distribute the introduced difference over the 457// Proportional distribution: the relation between probabilities of the 458// reachable edges is kept unchanged. That is for any reachable edges i and j: 459// newBP[i] / newBP[j] == oldBP[i] / oldBP[j] => 460// newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K 461// Where K is independent of i,j. 462// newBP[i] == oldBP[i] * K 464// Make sum of all reachables of the left and right parts: 465// sum_of_reachable(newBP) == K * sum_of_reachable(oldBP) 466// Sum of newBP must be equal to 1.0: 467// sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 => 468// sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP) 469// Where sum_of_unreachable(newBP) is what has been just changed. 471// K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) => 472// K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP) 474for (
autoI : UnreachableIdxs)
475 NewUnreachableSum += BP[
I];
481for (
autoI : ReachableIdxs)
482 OldReachableSum += BP[
I];
484if (OldReachableSum != NewReachableSum) {
// Anything to dsitribute? 485if (OldReachableSum.
isZero()) {
486// If all oldBP[i] are zeroes then the proportional distribution results 487// in all zero probabilities and the error stays big. In this case we 488// evenly spread NewReachableSum over the reachable edges. 490for (
autoI : ReachableIdxs)
493for (
autoI : ReachableIdxs) {
494// We use uint64_t to avoid double rounding error of the following 495// calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum 496// The formula is taken from the private constructor 497// BranchProbability(uint32_t Numerator, uint32_t Denominator) 499 BP[
I].getNumerator();
512// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison 513// between two pointer or pointer and NULL will fail. 514bool BranchProbabilityInfo::calcPointerHeuristics(
constBasicBlock *BB) {
538// Compute the unlikely successors to the block BB in the loop L, specifically 539// those that are unlikely because this is a loop, and add them to the 540// UnlikelyBlocks set. 544// Sometimes in a loop we have a branch whose condition is made false by 545// taking it. This is typically something like 552// In this sort of situation taking the branch means that at the very least it 553// won't be taken again in the next iteration of the loop, so we should 554// consider it less likely than a typical branch. 556// We detect this by looking back through the graph of PHI nodes that sets the 557// value that the condition depends on, and seeing if we can reach a successor 558// block which can be determined to make the condition false. 560// FIXME: We currently consider unlikely blocks to be half as likely as other 561// blocks, but if we consider the example above the likelyhood is actually 562// 1/MAX. We could therefore be more precise in how unlikely we consider 563// blocks to be, but it would require more careful examination of the form 564// of the comparison expression. 569// Check if the branch is based on an instruction compared with a constant 571if (!CI || !isa<Instruction>(CI->
getOperand(0)) ||
575// Either the instruction must be a PHI, or a chain of operations involving 576// constants that ends in a PHI which we can then collapse into a single value 577// if the PHI value is known. 579PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS);
581// Collect the instructions until we hit a PHI 583while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) &&
585// Stop if the chain extends outside of the loop 586if (!L->contains(CmpLHS))
588 InstChain.
push_back(cast<BinaryOperator>(CmpLHS));
589 CmpLHS = dyn_cast<Instruction>(CmpLHS->
getOperand(0));
591 CmpPHI = dyn_cast<PHINode>(CmpLHS);
593if (!CmpPHI || !L->contains(CmpPHI))
596// Trace the phi node to find all values that come from successors of BB 600 VisitedInsts.
insert(CmpPHI);
601while (!WorkList.
empty()) {
604// Skip blocks that aren't part of the loop 607Value *V =
P->getIncomingValueForBlock(
B);
608// If the source is a PHI add it to the work list if we haven't 609// already visited it. 610if (
PHINode *PN = dyn_cast<PHINode>(V)) {
611if (VisitedInsts.
insert(PN).second)
615// If this incoming value is a constant and B is a successor of BB, then 616// we can constant-evaluate the compare to see if it makes the branch be 618Constant *CmpLHSConst = dyn_cast<Constant>(V);
621// First collapse InstChain 625I->getOpcode(), CmpLHSConst, cast<Constant>(
I->getOperand(1)),
DL);
631// Now constant-evaluate the compare 634// If the result means we don't branch to the block then that block is 644std::optional<uint32_t>
645BranchProbabilityInfo::getEstimatedBlockWeight(
constBasicBlock *BB)
const{
646auto WeightIt = EstimatedBlockWeight.find(BB);
647if (WeightIt == EstimatedBlockWeight.end())
649return WeightIt->second;
652std::optional<uint32_t>
653BranchProbabilityInfo::getEstimatedLoopWeight(
const LoopData &L)
const{
654auto WeightIt = EstimatedLoopWeight.
find(L);
655if (WeightIt == EstimatedLoopWeight.
end())
657return WeightIt->second;
660std::optional<uint32_t>
661BranchProbabilityInfo::getEstimatedEdgeWeight(
const LoopEdge &Edge)
const{
662// For edges entering a loop take weight of a loop rather than an individual 664return isLoopEnteringEdge(Edge)
665 ? getEstimatedLoopWeight(Edge.second.getLoopData())
666 : getEstimatedBlockWeight(Edge.second.getBlock());
669template <
class IterT>
670std::optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
673 std::optional<uint32_t> MaxWeight;
675const LoopBlock DstLoopBB = getLoopBlock(DstBB);
676auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
681if (!MaxWeight || *MaxWeight < *Weight)
688// Updates \p LoopBB's weight and returns true. If \p LoopBB has already 689// an associated weight it is unchanged and false is returned. 691// Please note by the algorithm the weight is not expected to change once set 692// thus 'false' status is used to track visited blocks. 693bool BranchProbabilityInfo::updateEstimatedBlockWeight(
694 LoopBlock &LoopBB,
uint32_t BBWeight,
699// In general, weight is assigned to a block when it has final value and 700// can't/shouldn't be changed. However, there are cases when a block 701// inherently has several (possibly "contradicting") weights. For example, 702// "unwind" block may also contain "cold" call. In that case the first 703// set weight is favored and all consequent weights are ignored. 704if (!EstimatedBlockWeight.insert({BB, BBWeight}).second)
708 LoopBlock PredLoop = getLoopBlock(PredBlock);
709// Add affected block/loop to a working list. 710if (isLoopExitingEdge({PredLoop, LoopBB})) {
711if (!EstimatedLoopWeight.
count(PredLoop.getLoopData()))
713 }
elseif (!EstimatedBlockWeight.count(PredBlock))
719// Starting from \p BB traverse through dominator blocks and assign \p BBWeight 720// to all such blocks that are post dominated by \BB. In other words to all 721// blocks that the one is executed if and only if another one is executed. 722// Importantly, we skip loops here for two reasons. First weights of blocks in 723// a loop should be scaled by trip count (yet possibly unknown). Second there is 724// no any value in doing that because that doesn't give any additional 725// information regarding distribution of probabilities inside the loop. 726// Exception is loop 'enter' and 'exit' edges that are handled in a special way 727// at calcEstimatedHeuristics. 729// In addition, \p WorkList is populated with basic blocks if at leas one 730// successor has updated estimated weight. 731void BranchProbabilityInfo::propagateEstimatedBlockWeight(
736constauto *DTStartNode = DT->
getNode(BB);
737constauto *PDTStartNode = PDT->
getNode(BB);
739// TODO: Consider propagating weight down the domination line as well. 740for (
constauto *DTNode = DTStartNode; DTNode !=
nullptr;
741 DTNode = DTNode->getIDom()) {
742auto *DomBB = DTNode->getBlock();
743// Consider blocks which lie on one 'line'. 745// If BB doesn't post dominate DomBB it will not post dominate dominators 749 LoopBlock DomLoopBB = getLoopBlock(DomBB);
750const LoopEdge Edge{DomLoopBB, LoopBB};
751// Don't propagate weight to blocks belonging to different loops. 752if (!isLoopEnteringExitingEdge(Edge)) {
753if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList,
755// If DomBB has weight set then all it's predecessors are already 756// processed (since we propagate weight up to the top of IR each time). 758 }
elseif (isLoopExitingEdge(Edge)) {
764std::optional<uint32_t>
765BranchProbabilityInfo::getInitialEstimatedBlockWeight(
constBasicBlock *BB) {
766// Returns true if \p BB has call marked with "NoReturn" attribute. 769if (
constCallInst *CI = dyn_cast<CallInst>(&
I))
770if (CI->hasFnAttr(Attribute::NoReturn))
776// Important note regarding the order of checks. They are ordered by weight 777// from lowest to highest. Doing that allows to avoid "unstable" results 778// when several conditions heuristics can be applied simultaneously. 780// If this block is terminated by a call to 781// @llvm.experimental.deoptimize then treat it like an unreachable 782// since it is expected to practically never execute. 783// TODO: Should we actually treat as never returning call? 785return hasNoReturn(BB)
786 ?
static_cast<uint32_t>(BlockExecWeight::NORETURN)
787 :
static_cast<uint32_t>(BlockExecWeight::UNREACHABLE);
789// Check if the block is an exception handling block. 791returnstatic_cast<uint32_t>(BlockExecWeight::UNWIND);
793// Check if the block contains 'cold' call. 794for (
constauto &
I : *BB)
795if (
constCallInst *CI = dyn_cast<CallInst>(&
I))
796if (CI->hasFnAttr(Attribute::Cold))
797returnstatic_cast<uint32_t>(BlockExecWeight::COLD);
802// Does RPO traversal over all blocks in \p F and assigns weights to 803// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its 804// best to propagate the weight to up/down the IR. 805void BranchProbabilityInfo::computeEestimateBlockWeight(
811// By doing RPO we make sure that all predecessors already have weights 812// calculated before visiting theirs successors. 814for (
constauto *BB : RPOT)
815if (
auto BBWeight = getInitialEstimatedBlockWeight(BB))
816// If we were able to find estimated weight for the block set it to this 817// block and propagate up the IR. 818 propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, *BBWeight,
819 BlockWorkList, LoopWorkList);
821// BlockWorklist/LoopWorkList contains blocks/loops with at least one 822// successor/exit having estimated weight. Try to propagate weight to such 823// blocks/loops from successors/exits. 824// Process loops and blocks. Order is not important. 826while (!LoopWorkList.
empty()) {
828const LoopData
LD = LoopBB.getLoopData();
829if (EstimatedLoopWeight.
count(LD))
835 getLoopExitBlocks(LoopBB, Exits);
836auto LoopWeight = getMaxEstimatedEdgeWeight(
840// If we never exit the loop then we can enter it once at maximum. 841if (LoopWeight <=
static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
842 LoopWeight =
static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
844 EstimatedLoopWeight.
insert({
LD, *LoopWeight});
845// Add all blocks entering the loop into working list. 846 getLoopEnterBlocks(LoopBB, BlockWorkList);
850while (!BlockWorkList.
empty()) {
851// We can reach here only if BlockWorkList is not empty. 853if (EstimatedBlockWeight.count(BB))
856// We take maximum over all weights of successors. In other words we take 857// weight of "hot" path. In theory we can probably find a better function 858// which gives higher accuracy results (comparing to "maximum") but I 860// think of any right now. And I doubt it will make any difference in 862const LoopBlock LoopBB = getLoopBlock(BB);
863auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB,
successors(BB));
866 propagateEstimatedBlockWeight(LoopBB, DT, PDT, *MaxWeight,
867 BlockWorkList, LoopWorkList);
869 }
while (!BlockWorkList.
empty() || !LoopWorkList.
empty());
872// Calculate edge probabilities based on block's estimated weight. 873// Note that gathered weights were not scaled for loops. Thus edges entering 874// and exiting loops requires special processing. 875bool BranchProbabilityInfo::calcEstimatedHeuristics(
constBasicBlock *BB) {
877"expected more than one successor!");
879const LoopBlock LoopBB = getLoopBlock(BB);
886// Changed to 'true' if at least one successor has estimated weight. 887bool FoundEstimatedWeight =
false;
890// Go over all successors of BB and put their weights into SuccWeights. 892 std::optional<uint32_t> Weight;
893const LoopBlock SuccLoopBB = getLoopBlock(SuccBB);
894const LoopEdge Edge{LoopBB, SuccLoopBB};
896 Weight = getEstimatedEdgeWeight(Edge);
898if (isLoopExitingEdge(Edge) &&
899// Avoid adjustment of ZERO weight since it should remain unchanged. 900 Weight !=
static_cast<uint32_t>(BlockExecWeight::ZERO)) {
901// Scale down loop exiting weight by trip count. 903static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
904 Weight.value_or(
static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
907bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.
contains(SuccBB);
909// Avoid adjustment of ZERO weight since it should remain unchanged. 910 Weight !=
static_cast<uint32_t>(BlockExecWeight::ZERO)) {
911// 'Unlikely' blocks have twice lower weight. 913static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
914 Weight.value_or(
static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2);
918 FoundEstimatedWeight =
true;
921 Weight.value_or(
static_cast<uint32_t>(BlockExecWeight::DEFAULT));
922 TotalWeight += WeightVal;
926// If non of blocks have estimated weight bail out. 927// If TotalWeight is 0 that means weight of each successor is 0 as well and 928// equally likely. Bail out early to not deal with devision by zero. 929if (!FoundEstimatedWeight || TotalWeight == 0)
933constunsigned SuccCount = SuccWeights.
size();
935// If the sum of weights does not fit in 32 bits, scale every weight down 937if (TotalWeight > UINT32_MAX) {
938uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1;
940for (
unsignedIdx = 0;
Idx < SuccCount; ++
Idx) {
941 SuccWeights[
Idx] /= ScalingFactor;
942if (SuccWeights[
Idx] ==
static_cast<uint32_t>(BlockExecWeight::ZERO))
944static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
945 TotalWeight += SuccWeights[
Idx];
947assert(TotalWeight <= UINT32_MAX &&
"Total weight overflows");
950// Finally set probabilities to edges according to estimated block weights. 954for (
unsignedIdx = 0;
Idx < SuccCount; ++
Idx) {
955 EdgeProbabilities[
Idx] =
962bool BranchProbabilityInfo::calcZeroHeuristics(
constBasicBlock *BB,
973auto GetConstantInt = [](
Value *
V) {
974if (
auto *
I = dyn_cast<BitCastInst>(V))
975return dyn_cast<ConstantInt>(
I->getOperand(0));
976return dyn_cast<ConstantInt>(V);
984// If the LHS is the result of AND'ing a value with a single bit bitmask, 985// we don't have information about probabilities. 987if (
LHS->getOpcode() == Instruction::And)
989if (AndRHS->getValue().isPowerOf2())
992// Check if the LHS is the return value of a library function 999 ProbabilityTable::const_iterator Search;
1000if (Func == LibFunc_strcasecmp ||
1001 Func == LibFunc_strcmp ||
1002 Func == LibFunc_strncasecmp ||
1003 Func == LibFunc_strncmp ||
1004 Func == LibFunc_memcmp ||
1005 Func == LibFunc_bcmp) {
1013 }
elseif (CV->
isOne()) {
1029bool BranchProbabilityInfo::calcFloatingPointHeuristics(
constBasicBlock *BB) {
1042// f1 == f2 -> Unlikely 1044// f1 != f2 -> Likely 1050 ProbList = Search->second;
1064// Check whether the analysis, all analyses on functions, or the function's 1065// CFG have been preserved. 1072OS <<
"---- Branch Probabilities ----\n";
1073// We print the probabilities from the last function the analysis ran over, 1074// or the function it is currently running over. 1075assert(LastF &&
"Cannot print prior to running over a function");
1076for (
constauto &BI : *LastF) {
1084// Hot probability is at least 4/5 = 80% 1085// FIXME: Compare against a static "hot" BranchProbability. 1089/// Get the raw edge probability for the edge. If can't find it, return a 1090/// default probability 1/N where N is the number of successors. Here an edge is 1091/// specified using PredBlock and an 1092/// index to the successors. 1095unsigned IndexInSuccessors)
const{
1096autoI = Probs.find(std::make_pair(Src, IndexInSuccessors));
1097assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) ==
1098 (Probs.end() ==
I) &&
1099"Probability for I-th successor must always be defined along with the " 1100"probability for the first successor");
1102if (
I != Probs.end())
1114/// Get the raw edge probability calculated for the block pair. This returns the 1115/// sum of all raw edge probabilities from Src to Dst. 1119if (!Probs.count(std::make_pair(Src, 0)))
1125 Prob += Probs.find(std::make_pair(Src,
I.getSuccessorIndex()))->second;
1130/// Set the edge probability for all edges at once. 1133assert(Src->getTerminator()->getNumSuccessors() == Probs.
size());
1135if (Probs.
size() == 0)
1136return;
// Nothing to set. 1138 Handles.insert(BasicBlockCallbackVH(Src,
this));
1140for (
unsigned SuccIdx = 0; SuccIdx < Probs.
size(); ++SuccIdx) {
1141 this->Probs[std::make_pair(Src, SuccIdx)] = Probs[SuccIdx];
1142LLVM_DEBUG(
dbgs() <<
"set edge " << Src->getName() <<
" -> " << SuccIdx
1143 <<
" successor probability to " << Probs[SuccIdx]
1145 TotalNumerator += Probs[SuccIdx].getNumerator();
1148// Because of rounding errors the total probability cannot be checked to be 1149// 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator. 1150// Instead, every single probability in Probs must be as accurate as possible. 1151// This results in error 1/denominator at most, thus the total absolute error 1152// should be within Probs.size / BranchProbability::getDenominator. 1155 (void)TotalNumerator;
1161unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors();
1162assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors());
1163if (NumSuccessors == 0)
1164return;
// Nothing to set. 1165if (!this->Probs.contains(std::make_pair(Src, 0)))
1166return;
// No probability is set for edges from Src. Keep the same for Dst. 1168 Handles.insert(BasicBlockCallbackVH(Dst,
this));
1169for (
unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) {
1170auto Prob = this->Probs[std::make_pair(Src, SuccIdx)];
1171 this->Probs[std::make_pair(Dst, SuccIdx)] = Prob;
1172LLVM_DEBUG(
dbgs() <<
"set edge " << Dst->getName() <<
" -> " << SuccIdx
1173 <<
" successor probability to " << Prob <<
"\n");
1178assert(Src->getTerminator()->getNumSuccessors() == 2);
1179auto It0 = Probs.find(std::make_pair(Src, 0));
1180if (It0 == Probs.end())
1181return;
// No probability is set for edges from Src 1182auto It1 = Probs.find(std::make_pair(Src, 1));
1183assert(It1 != Probs.end());
1193 Src->printAsOperand(
OS,
false, Src->getModule());
1195 Dst->printAsOperand(
OS,
false, Dst->getModule());
1196OS <<
" probability is " << Prob
1197 << (
isEdgeHot(Src, Dst) ?
" [HOT edge]\n" :
"\n");
1205// Note that we cannot use successors of BB because the terminator of BB may 1206// have changed when eraseBlock is called as a BasicBlockCallbackVH callback. 1207// Instead we remove prob data for the block by iterating successors by their 1208// indices from 0 till the last which exists. There could not be prob data for 1209// a pair (BB, N) if there is no data for (BB, N-1) because the data is always 1210// set for all successors from 0 to M at once by the method 1211// setEdgeProbability(). 1212 Handles.erase(BasicBlockCallbackVH(BB,
this));
1213for (
unsignedI = 0;; ++
I) {
1214auto MapI = Probs.find(std::make_pair(BB,
I));
1215if (MapI == Probs.end()) {
1216assert(Probs.count(std::make_pair(BB,
I + 1)) == 0 &&
1217"Must be no more successors");
1230 LastF = &
F;
// Store the last function we ran on for printing. 1233 SccI = std::make_unique<SccInfo>(
F);
1235assert(EstimatedBlockWeight.empty());
1238 std::unique_ptr<DominatorTree> DTPtr;
1239 std::unique_ptr<PostDominatorTree> PDTPtr;
1242 DTPtr = std::make_unique<DominatorTree>(
const_cast<Function &
>(
F));
1247 PDTPtr = std::make_unique<PostDominatorTree>(
const_cast<Function &
>(
F));
1251 computeEestimateBlockWeight(
F, DT, PDT);
1253// Walk the basic blocks in post-order so that we can build up state about 1254// the successors of a block iteratively. 1255for (
constauto *BB :
post_order(&
F.getEntryBlock())) {
1258// If there is no at least two successors, no sense to set probability. 1261if (calcMetadataWeights(BB))
1263if (calcEstimatedHeuristics(BB))
1265if (calcPointerHeuristics(BB))
1267if (calcZeroHeuristics(BB, TLI))
1269if (calcFloatingPointHeuristics(BB))
1273 EstimatedLoopWeight.
clear();
1274 EstimatedBlockWeight.clear();
1285// We require DT so it's available when LI is available. The LI updating code 1286// asserts that DT is also present so if we don't make sure that we have DT 1287// here, that assert will trigger. 1297constLoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1299 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1300DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1302 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
1303 BPI.calculate(
F, LI, &TLI, &DT, &PDT);
1328OS <<
"Printing analysis 'Branch Probability Analysis' for function '" 1329 <<
F.getName() <<
"':\n";
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
BlockExecWeight
Set of dedicated "absolute" execution weights for a block.
@ NORETURN
Weight to a block containing non returning call.
@ UNWIND
Weight to 'unwind' block of an invoke instruction.
@ COLD
Weight to a 'cold' block.
@ ZERO
Special weight used for cases with exact zero probability.
@ UNREACHABLE
Weight to an 'unreachable' block.
@ DEFAULT
Default weight is used in cases when there is no dedicated execution weight set.
@ LOWEST_NON_ZERO
Minimal possible non zero weight.
static const uint32_t FPH_TAKEN_WEIGHT
static const BranchProbability FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const uint32_t LBH_TAKEN_WEIGHT
static const uint32_t ZH_NONTAKEN_WEIGHT
static const ProbabilityTable ICmpWithLibCallTable
strcmp and similar functions return zero, negative, or positive, if the first string is equal,...
static const ProbabilityTable ICmpWithMinusOneTable
Integer compares with -1:
static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const ProbabilityTable ICmpWithZeroTable
Integer compares with 0:
static const uint32_t PH_NONTAKEN_WEIGHT
std::map< CmpInst::Predicate, ProbabilityList > ProbabilityTable
static const BranchProbability PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const BranchProbability FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const uint32_t PH_TAKEN_WEIGHT
Heuristics and lookup tables for non-loop branches: Pointer Heuristics (PH)
static const BranchProbability FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
branch Branch Probability Analysis
static const BranchProbability PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
SmallVector< BranchProbability > ProbabilityList
static const ProbabilityTable ICmpWithOneTable
Integer compares with 1:
static const uint32_t ZH_TAKEN_WEIGHT
Zero Heuristics (ZH)
static const uint32_t FPH_NONTAKEN_WEIGHT
static const BranchProbability UR_TAKEN_PROB
Unreachable-terminating branch taken probability.
static const ProbabilityTable FCmpTable
Floating-Point compares:
static const uint32_t LBH_NONTAKEN_WEIGHT
static const ProbabilityTable PointerTable
Pointer comparisons:
static const uint32_t FPH_ORD_WEIGHT
This is the probability for an ordered floating point comparison.
static void computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, SmallPtrSetImpl< const BasicBlock * > &UnlikelyBlocks)
static const uint32_t FPH_UNO_WEIGHT
This is the probability for an unordered floating point comparison, it means one or two of the operan...
cl::opt< std::string > PrintBranchProbFuncName("print-bpi-func-name", cl::Hidden, cl::desc("The option to specify the name of the function " "whose branch probability info is printed."))
static cl::opt< bool > PrintBranchProb("print-bpi", cl::init(false), cl::Hidden, cl::desc("Print the branch probability info."))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for metadata subclasses.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM Basic Block Representation.
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
bool isEHPad() const
Return true if this basic block is an exception handling block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
BranchProbabilityInfo run(Function &F, FunctionAnalysisManager &AM)
Run the analysis pass over a function and produce BPI.
Legacy analysis pass which computes BranchProbabilityInfo.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void print(raw_ostream &OS, const Module *M=nullptr) const override
print - Print out the internal state of the pass.
void getSccEnterBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Enters) const
Fills in Enters vector with all such blocks that don't belong to SCC with SccNum ID but there is an e...
SccInfo(const Function &F)
void getSccExitBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Exits) const
Fills in Exits vector with all such blocks that don't belong to SCC with SccNum ID but there is an ed...
int getSCCNum(const BasicBlock *BB) const
If BB belongs to some SCC then ID of that SCC is returned, otherwise -1 is returned.
Analysis providing branch probability information.
void eraseBlock(const BasicBlock *BB)
Forget analysis results for the given basic block.
void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
bool invalidate(Function &, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &)
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void calculate(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI, DominatorTree *DT, PostDominatorTree *PDT)
bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const
Test if an edge is hot relative to other out-edges of the Src.
void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
void print(raw_ostream &OS) const
raw_ostream & printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, const BasicBlock *Dst) const
Print an edge's probability.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static uint32_t getDenominator()
static BranchProbability getRaw(uint32_t N)
static BranchProbability getOne()
static BranchProbability getUnknown()
uint32_t getNumerator() const
static BranchProbability getZero()
Represents analyses that only rely on functions' control flow.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
@ ICMP_SLT
signed less than
@ ICMP_SGT
signed greater than
bool isTrueWhenEqual() const
This is just a convenience.
Predicate getPredicate() const
Return the predicate for this instruction.
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate Pred)
FunctionPass class - This class is used to implement most global optimizations.
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Analysis pass that exposes the LoopInfo for a function.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isPointerTy() const
True if this is an instance of PointerType.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
StringRef getName() const
Return a constant reference to the value's name.
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
BlockType
Used as immediate MachineOperands for block signatures.
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto pred_end(const MachineBasicBlock *BB)
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< po_iterator< T > > post_order(const T &G)
void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &)
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
MDNode * getValidBranchWeightMDNode(const Instruction &I)
Get the valid branch weights metadata node.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto succ_size(const MachineBasicBlock *BB)
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Mul
Product of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A special type used by analysis passes to provide an address that identifies that particular analysis...