1//===- JumpThreading.cpp - Thread control through conditional blocks ------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This file implements the Jump Threading pass. 11//===----------------------------------------------------------------------===// 78using namespacejumpthreading;
80#define DEBUG_TYPE "jump-threading" 84STATISTIC(NumDupes,
"Number of branch blocks duplicated to eliminate phi");
88cl::desc(
"Max block size to duplicate for jump threading"),
93"jump-threading-implication-search-threshold",
94cl::desc(
"The number of predecessors to search for a stronger " 95"condition to use to thread over a weaker condition"),
99"jump-threading-phi-threshold",
104"jump-threading-across-loop-headers",
105cl::desc(
"Allow JumpThreading to thread across loop headers, for testing"),
112// Update branch probability information according to conditional 113// branch probability. This is usually made possible for cloned branches 114// in inline instances by the context specific profile in the caller. 126// cond = PN([true, %A], [..., %B]); // PHI node 129// ... // P(cond == true) = 1% 132// Here we know that when block A is taken, cond must be true, which means 133// P(cond == true | A) = 1 135// Given that P(cond == true) = P(cond == true | A) * P(A) + 136// P(cond == true | B) * P(B) 138// P(cond == true ) = P(A) + P(cond == true | B) * P(B) 141// P(A) is less than P(cond == true), i.e. 142// P(t == true) <= P(cond == true) 144// In other words, if we know P(cond == true) is unlikely, we know 145// that P(t == true) is also unlikely. 156if (TrueWeight + FalseWeight == 0)
157// Zero branch_weights do not give a hint for getting branch probabilities. 158// Technically it would result in division by zero denominator, which is 159// TrueWeight + FalseWeight. 162// Returns the outgoing edge of the dominating predecessor block 163// that leads to the PhiNode's incoming block: 166BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
167auto *PredBB = IncomingBB;
171BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
173return {PredBB, SuccBB};
175auto *SinglePredBB = PredBB->getSinglePredecessor();
177return {
nullptr,
nullptr};
179// Stop searching when SinglePredBB has been visited. It means we see 180// an unreachable loop. 181if (Visited.
count(SinglePredBB))
182return {
nullptr,
nullptr};
185 PredBB = SinglePredBB;
198 TrueWeight, TrueWeight + FalseWeight)
200 FalseWeight, TrueWeight + FalseWeight));
203if (!PredOutEdge.first)
211uint64_t PredTrueWeight, PredFalseWeight;
212// FIXME: We currently only set the profile data when it is missing. 213// With PGO, this can be used to refine even existing profile data with 214// context information. This needs to be done after more performance 219// We can not infer anything useful when BP >= 50%, because BP is the 220// upper bound probability value. 239// Jump Threading has no sense for the targets with divergent CF 249 std::make_unique<DomTreeUpdater>(
250 &DT,
nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
251 std::nullopt, std::nullopt);
259#if defined(EXPENSIVE_CHECKS) 261 DominatorTree::VerificationLevel::Full) &&
262"DT broken after JumpThreading");
266"PDT broken after JumpThreading");
269 DominatorTree::VerificationLevel::Fast) &&
270"DT broken after JumpThreading");
274"PDT broken after JumpThreading");
277return getPreservedAnalysis();
284 std::unique_ptr<DomTreeUpdater> DTU_,
285 std::optional<BlockFrequencyInfo *> BFI_,
286 std::optional<BranchProbabilityInfo *> BPI_) {
294 DTU = std::move(DTU_);
298 F->
getParent(), Intrinsic::experimental_guard);
299 HasGuards = GuardDecl && !GuardDecl->use_empty();
301// Reduce the number of instructions duplicated when optimizing strictly for 308 BBDupThreshold = DefaultBBDupThreshold;
310// JumpThreading must not processes blocks unreachable from entry. It's a 311// waste of compute time and can potentially lead to hangs. 313assert(DTU &&
"DTU isn't passed into JumpThreading before using it.");
314assert(DTU->hasDomTree() &&
"JumpThreading relies on DomTree to proceed.");
323bool EverChanged =
false;
328if (Unreachable.
count(&BB))
330while (
processBlock(&BB))
// Thread all of the branches we can over BB. 331 Changed = ChangedSinceLastAnalysisUpdate =
true;
333// Stop processing BB if it's the entry or is now deleted. The following 334// routines attempt to eliminate BB and locating a suitable replacement 335// for the entry is non-trivial. 336if (&BB == &
F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
340// When processBlock makes BB unreachable it doesn't bother to fix up 341// the instructions in it. We must remove BB to prevent invalid IR. 343 <<
"' with terminator: " << *BB.getTerminator()
345 LoopHeaders.erase(&BB);
348 Changed = ChangedSinceLastAnalysisUpdate =
true;
352// processBlock doesn't thread BBs with unconditional TIs. However, if BB 353// is "almost empty", we attempt to merge BB with its sole successor. 354auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
355if (BI && BI->isUnconditional()) {
358// The terminator must be the only non-phi instruction in BB. 359 BB.getFirstNonPHIOrDbg(
true)->isTerminator() &&
360// Don't alter Loop headers and latches to ensure another pass can 361// detect and transform nested loops later. 362 !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
364// BB is valid for cleanup here because we passed in DTU. F remains 365// BB's parent until a DTU->getDomTree() event. 367 Changed = ChangedSinceLastAnalysisUpdate =
true;
371 EverChanged |= Changed;
374// Jump threading may have introduced redundant debug values into F which 385// Replace uses of Cond with ToVal when safe to do so. If all uses are 386// replaced, we can remove Cond. We cannot blindly replace all uses of Cond 387// because we may incorrectly replace uses when guards/assumes are uses of 388// of `Cond` and we used the guards/assume to reason about the `Cond` value 389// at the end of block. RAUW unconditionally replaces all uses 390// including the guards/assumes themselves and the uses before the 396// We can unconditionally replace all uses in non-local blocks (i.e. uses 397// strictly dominated by BB), since LVI information is true from the 399if (
Cond->getParent() == KnownAtEndOfBB)
402// Replace any debug-info record users of Cond with ToVal. 404 DVR.replaceVariableLocationOp(
Cond, ToVal,
true);
406// Reached the Cond whose uses we are trying to replace, so there are no 410// We only replace uses in instructions that are guaranteed to reach the end 411// of BB, where we know Cond is ToVal. 414 Changed |=
I.replaceUsesOfWith(
Cond, ToVal);
416if (
Cond->use_empty() && !
Cond->mayHaveSideEffects()) {
417Cond->eraseFromParent();
423/// Return the cost of duplicating a piece of this block from first non-phi 424/// and before StopAt instruction to thread across it. Stop scanning the block 425/// when exceeding the threshold. If duplication is impossible, returns ~0U. 430assert(StopAt->
getParent() == BB &&
"Not an instruction from proper BB?");
432// Do not duplicate the BB if it has a lot of PHI nodes. 433// If a threadable chain is too long then the number of PHI nodes can add up, 434// leading to a substantial increase in compile time when rewriting the SSA. 435unsigned PhiCount = 0;
438if (!isa<PHINode>(&
I)) {
446 /// Ignore PHI nodes, these will be flattened when duplication happens. 449// FIXME: THREADING will delete values that are just used to compute the 450// branch, so they shouldn't count against the duplication cost. 454// Threading through a switch statement is particularly profitable. If this 455// block ends in a switch, decrease its cost to make it more likely to 457if (isa<SwitchInst>(StopAt))
460// The same holds for indirect branches, but slightly more so. 461if (isa<IndirectBrInst>(StopAt))
465// Bump the threshold up so the early exit from the loop doesn't skip the 466// terminator-based Size adjustment at the end. 469// Sum up the cost of each instruction until we get to the terminator. Don't 470// include the terminator because the copy won't include it. 472for (; &*
I != StopAt; ++
I) {
474// Stop scanning the block if we've reached the threshold. 478// Bail out if this instruction gives back a token type, it is not possible 479// to duplicate it if it is used outside this BB. 480if (
I->getType()->isTokenTy() &&
I->isUsedOutsideOfBlock(BB))
483// Blocks with NoDuplicate are modelled as having infinite cost, so they 484// are never duplicated. 485if (
constCallInst *CI = dyn_cast<CallInst>(
I))
486if (CI->cannotDuplicate() || CI->isConvergent())
493// All other instructions count for at least one unit. 496// Calls are more expensive. If they are non-intrinsic calls, we model them 497// as having cost of 4. If they are a non-vector intrinsic, we model them 498// as having cost of 2 total, and if they are a vector intrinsic, we model 499// them as having cost 1. 500if (
constCallInst *CI = dyn_cast<CallInst>(
I)) {
501if (!isa<IntrinsicInst>(CI))
503elseif (!CI->getType()->isVectorTy())
508returnSize > Bonus ?
Size - Bonus : 0;
511/// findLoopHeaders - We do not want jump threading to turn proper loop 512/// structures into irreducible loops. Doing this breaks up the loop nesting 513/// hierarchy and pessimizes later transformations. To prevent this from 514/// happening, we first have to find the loop headers. Here we approximate this 515/// by finding targets of backedges in the CFG. 517/// Note that there definitely are cases when we want to allow threading of 518/// edges across a loop header. For example, threading a jump from outside the 519/// loop (the preheader) to an exit block of the loop is definitely profitable. 520/// It is also almost always profitable to thread backedges from within the loop 521/// to exit blocks, and is often profitable to thread backedges to other blocks 522/// within the loop (forming a nested loop). This simple analysis is not rich 523/// enough to track all of these properties and keep it up-to-date as the CFG 524/// mutates, so we don't allow any of these transformations. 529for (
constauto &Edge : Edges)
530 LoopHeaders.insert(Edge.second);
533/// getKnownConstant - Helper method to determine if we can thread over a 534/// terminator with the given value as its condition, and if so what value to 535/// use for that. What kind of value this is depends on whether we want an 536/// integer or a block address, but an undef is always accepted. 537/// Returns null if Val is null or not an appropriate constant. 542// Undef is "known" enough. 549return dyn_cast<ConstantInt>(Val);
552/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see 553/// if we can infer that the value is a known ConstantInt/BlockAddress or undef 554/// in any of our predecessors. If so, return the known list of value and pred 555/// BB in the result vector. 557/// This returns true if there were any known values. 564// This method walks up use-def chains recursively. Because of this, we could 565// get into an infinite loop going around loops in the use-def chain. To 566// prevent this, keep track of what (value, block) pairs we've already visited 567// and terminate the search if we loop back to them 568if (!RecursionSet.
insert(V).second)
571// If V is a constant, then it is known in all predecessors. 574 Result.emplace_back(KC, Pred);
576return !Result.empty();
579// If V is a non-instruction value, or an instruction in a different block, 580// then it can't be derived from a PHI. 582if (!
I ||
I->getParent() != BB) {
584// Okay, if this is a live-in value, see if it has a known value at the any 585// edge from our predecessors. 587using namespacePatternMatch;
588// If the value is known by LazyValueInfo to be a constant in a 589// predecessor, use that information to try to thread this block. 591// If I is a non-local compare-with-constant instruction, use more-rich 592// 'getPredicateOnEdge' method. This would be able to handle value 593// inequalities better, for example if the compare is "X < 4" and "X < 3" 594// is known true but "X < 4" itself is not available. 601 Result.emplace_back(KC,
P);
604return !Result.empty();
607 /// If I is a PHI node, then we know the incoming values for any constants. 608if (
PHINode *PN = dyn_cast<PHINode>(
I)) {
609for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
610Value *InVal = PN->getIncomingValue(i);
612 Result.emplace_back(KC, PN->getIncomingBlock(i));
615 PN->getIncomingBlock(i),
618 Result.emplace_back(KC, PN->getIncomingBlock(i));
622return !Result.empty();
625// Handle Cast instructions. 626if (
CastInst *CI = dyn_cast<CastInst>(
I)) {
627Value *Source = CI->getOperand(0);
634// Convert the known values. 635for (
auto &Val : Vals)
638 Result.emplace_back(Folded, Val.second);
640return !Result.empty();
644Value *Source = FI->getOperand(0);
652return !Result.empty();
655// Handle some boolean conditions. 656if (
I->getType()->getPrimitiveSizeInBits() == 1) {
657using namespacePatternMatch;
683// Scan for the sentinel. If we find an undef, force it to the 684// interesting value: x|undef -> true and x&undef -> false. 685for (
constauto &LHSVal : LHSVals)
686if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
687 Result.emplace_back(InterestingVal, LHSVal.second);
688 LHSKnownBBs.
insert(LHSVal.second);
690for (
constauto &RHSVal : RHSVals)
691if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
692// If we already inferred a value for this block on the LHS, don't 694if (!LHSKnownBBs.
count(RHSVal.second))
695 Result.emplace_back(InterestingVal, RHSVal.second);
698return !Result.empty();
701// Handle the NOT form of XOR. 702if (
I->getOpcode() == Instruction::Xor &&
703 isa<ConstantInt>(
I->getOperand(1)) &&
704 cast<ConstantInt>(
I->getOperand(1))->isOne()) {
710// Invert the known values. 711for (
auto &R : Result)
717// Try to simplify some other binary operator values. 721if (
ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
726// Try to use constant folding to simplify the binary operator. 727for (
constauto &LHSVal : LHSVals) {
733 Result.emplace_back(KC, LHSVal.second);
737return !Result.empty();
740// Handle compare with phi operand, where the PHI is defined in this block. 741if (
CmpInst *Cmp = dyn_cast<CmpInst>(
I)) {
744Type *CmpType = Cmp->getType();
745Value *CmpLHS = Cmp->getOperand(0);
746Value *CmpRHS = Cmp->getOperand(1);
749PHINode *PN = dyn_cast<PHINode>(CmpLHS);
751 PN = dyn_cast<PHINode>(CmpRHS);
752// Do not perform phi translation across a loop header phi, because this 753// may result in comparison of values from two different loop iterations. 754// FIXME: This check is broken if LoopHeaders is not populated. 755if (PN && PN->
getParent() == BB && !LoopHeaders.contains(BB)) {
757// We can do this simplification if any comparisons fold to true or false. 771if (!isa<Constant>(
RHS))
774// getPredicateOnEdge call will make no sense if LHS is defined in BB. 775auto LHSInst = dyn_cast<Instruction>(
LHS);
776if (LHSInst && LHSInst->getParent() == BB)
780 BB, CxtI ? CxtI : Cmp);
784 Result.emplace_back(KC, PredBB);
787return !Result.empty();
790// If comparing a live-in value against a constant, see if we know the 791// live-in value on any predecessors. 792if (isa<Constant>(CmpRHS) && !CmpType->
isVectorTy()) {
793Constant *CmpConst = cast<Constant>(CmpRHS);
795if (!isa<Instruction>(CmpLHS) ||
796 cast<Instruction>(CmpLHS)->
getParent() != BB) {
798// If the value is known by LazyValueInfo to be a constant in a 799// predecessor, use that information to try to thread this block. 803 Result.emplace_back(KC,
P);
806return !Result.empty();
809// InstCombine can fold some forms of constant range checks into 810// (icmp (add (x, C1)), C2). See if we have we have such a thing with 813using namespacePatternMatch;
817if (isa<ConstantInt>(CmpConst) &&
819if (!isa<Instruction>(AddLHS) ||
820 cast<Instruction>(AddLHS)->
getParent() != BB) {
822// If the value is known by LazyValueInfo to be a ConstantRange in 823// a predecessor, use that information to try to thread this 826 AddLHS,
P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
827// Propagate the range through the addition. 830// Get the range where the compare returns true. 832 Pred, cast<ConstantInt>(CmpConst)->getValue());
842 Result.emplace_back(ResC,
P);
845return !Result.empty();
850// Try to find a constant value for the LHS of a comparison, 851// and evaluate it statically if we can. 856for (
constauto &LHSVal : LHSVals) {
861 Result.emplace_back(KC, LHSVal.second);
864return !Result.empty();
869// Handle select instructions where at least one operand is a known constant 870// and we can figure out the condition value for any predecessor block. 874if ((TrueVal || FalseVal) &&
877for (
auto &
C : Conds) {
880// Figure out what value to use for the condition. 884 KnownCond = CI->isOne();
886assert(isa<UndefValue>(
Cond) &&
"Unexpected condition value");
887// Either operand will do, so be sure to pick the one that's a known 889// FIXME: Do this more cleverly if both values are known constants? 890 KnownCond = (TrueVal !=
nullptr);
893// See if the select has a known constant value for this predecessor. 894if (
Constant *Val = KnownCond ? TrueVal : FalseVal)
895 Result.emplace_back(Val,
C.second);
898return !Result.empty();
902// If all else fails, see if LVI can figure out a constant value for us. 907 Result.emplace_back(KC, Pred);
910return !Result.empty();
913/// GetBestDestForBranchOnUndef - If we determine that the specified block ends 914/// in an undefined jump, decide which block is best to revector to. 916/// Since we can pick an arbitrary destination, we pick the successor with the 917/// fewest predecessors. This should reduce the in-degree of the others. 922// Compute the successor with the minimum number of predecessors. 927if (NumPreds < MinNumPreds) {
929 MinNumPreds = NumPreds;
939// If the block has its address taken, it may be a tree of dead constants 940// hanging off of it. These shouldn't keep the block alive. 946/// processBlock - If there are any predecessors whose control can be threaded 947/// through to a successor, transform them now. 949// If the block is trivially dead, just return and let the caller nuke it. 950// This simplifies other transformations. 951if (DTU->isBBPendingDeletion(BB) ||
955// If this block has a single predecessor, and if that pred has a single 956// successor, merge the blocks. This encourages recursive jump threading 957// because now the condition in this block can be threaded through 958// predecessors of our predecessor block. 965// Look if we can propagate guards to predecessors. 969// What kind of constant we're looking for. 972// Look to see if the terminator is a conditional branch, switch or indirect 973// branch, if not we can't thread it. 976if (
BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
977// Can't thread an unconditional jump. 978if (BI->isUnconditional())
returnfalse;
979 Condition = BI->getCondition();
980 }
elseif (
SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
981 Condition = SI->getCondition();
982 }
elseif (
IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
983// Can't thread indirect branch with no successors. 984if (IB->getNumSuccessors() == 0)
returnfalse;
985 Condition = IB->getAddress()->stripPointerCasts();
988returnfalse;
// Must be an invoke or callbr. 991// Keep track if we constant folded the condition in this invocation. 992bool ConstantFolded =
false;
994// Run constant folding to see if we can reduce the condition to a simple 1000I->replaceAllUsesWith(SimpleVal);
1002I->eraseFromParent();
1003 Condition = SimpleVal;
1004 ConstantFolded =
true;
1008// If the terminator is branching on an undef or freeze undef, we can pick any 1009// of the successors to branch to. Let getBestDestForJumpOnUndef decide. 1010auto *FI = dyn_cast<FreezeInst>(Condition);
1011if (isa<UndefValue>(Condition) ||
1012 (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1014 std::vector<DominatorTree::UpdateType> Updates;
1016// Fold the branch/switch. 1020if (i == BestSucc)
continue;
1027 <<
"' folding undef terminator: " << *BBTerm <<
'\n');
1032 DTU->applyUpdatesPermissive(Updates);
1034 FI->eraseFromParent();
1038// If the terminator of this block is branching on a constant, simplify the 1039// terminator to an unconditional branch. This can occur due to threading in 1047if (
auto *BPI = getBPI())
1048 BPI->eraseBlock(BB);
1052Instruction *CondInst = dyn_cast<Instruction>(Condition);
1054// All the rest of our checks depend on the condition being an instruction. 1056// FIXME: Unify this with code below. 1059return ConstantFolded;
1062// Some of the following optimization can safely work on the unfrozen cond. 1063Value *CondWithoutFreeze = CondInst;
1064if (
auto *FI = dyn_cast<FreezeInst>(CondInst))
1065 CondWithoutFreeze = FI->getOperand(0);
1067if (
CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
1068// If we're branching on a conditional, LVI might be able to determine 1069// it's value at the branch instruction. We only handle comparisons 1070// against a constant at this time. 1071if (
Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
1073 LVI->
getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1075/*UseBlockValue=*/false);
1077// We can safely replace *some* uses of the CondInst if it has 1078// exactly one value as returned by LVI. RAUW is incorrect in the 1079// presence of guards and assumes, that have the `Cond` as the use. This 1080// is because we use the guards/assume to reason about the `Cond` value 1081// at the end of block, but RAUW unconditionally replaces all uses 1082// including the guards/assumes themselves and the uses before the 1088// We did not manage to simplify this branch, try to see whether 1089// CondCmp depends on a known phi-select pattern. 1099// Check for some cases that are worth simplifying. Right now we want to look 1100// for loads that are used by a switch or by the condition for the branch. If 1101// we see one, check to see if it's partially redundant. If so, insert a PHI 1102// which can then be used to thread the values. 1103Value *SimplifyValue = CondWithoutFreeze;
1105if (
CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1106if (isa<Constant>(CondCmp->getOperand(1)))
1107 SimplifyValue = CondCmp->getOperand(0);
1109// TODO: There are other places where load PRE would be profitable, such as 1110// more complex comparisons. 1111if (
LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1115// Before threading, try to propagate profile data backwards: 1116if (
PHINode *PN = dyn_cast<PHINode>(CondInst))
1117if (PN->getParent() == BB && isa<BranchInst>(BB->
getTerminator()))
1120// Handle a variety of cases where we are branching on something derived from 1121// a PHI node in the current block. If we can prove that any predecessors 1122// compute a predictable value based on a PHI node, thread those predecessors. 1126// If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in 1127// the current block, see if we can simplify. 1128PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
1132// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify. 1133if (CondInst->
getOpcode() == Instruction::Xor &&
1137// Search for a stronger dominating condition that can be used to simplify a 1138// conditional branch leaving BB. 1147if (!BI || !BI->isConditional())
1151// Assuming that predecessor's branch was taken, if pred's branch condition 1152// (V) implies Cond, Cond can be either true, undef, or poison. In this case, 1153// freeze(Cond) is either true or a nondeterministic value. 1154// If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true 1155// without affecting other instructions. 1156auto *FICond = dyn_cast<FreezeInst>(
Cond);
1157if (FICond && FICond->hasOneUse())
1158Cond = FICond->getOperand(0);
1169auto *PBI = dyn_cast<BranchInst>(CurrentPred->
getTerminator());
1170if (!PBI || !PBI->isConditional())
1172if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1175bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1176 std::optional<bool> Implication =
1179// If the branch condition of BB (which is Cond) and CurrentPred are 1180// exactly the same freeze instruction, Cond can be folded into CondIsTrue. 1181if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
1182if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
1183 FICond->getOperand(0))
1184 Implication = CondIsTrue;
1188BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1189BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1194 BI->eraseFromParent();
1196 FICond->eraseFromParent();
1199if (
auto *BPI = getBPI())
1200 BPI->eraseBlock(BB);
1203 CurrentBB = CurrentPred;
1210/// Return true if Op is an instruction defined in the given block. 1213if (OpInst->getParent() == BB)
1218/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially 1219/// redundant load instruction, eliminate it by replacing it with a PHI node. 1220/// This is an important optimization that encourages jump threading, and needs 1221/// to be run interlaced with other jump threading tasks. 1223// Don't hack volatile and ordered loads. 1226// If the load is defined in a block with exactly one predecessor, it can't be 1227// partially redundant. 1232// If the load is defined in an EH pad, it can't be partially redundant, 1233// because the edges between the invoke and the EH pad cannot have other 1234// instructions between them. 1240// If the loaded operand is defined in the LoadBB and its not a phi, 1241// it can't be available in predecessors. 1245// Scan a few instructions up from the load, to see if it is obviously live at 1246// the entry to its block. 1250// The dominator tree is updated lazily and may not be valid at this point. 1254// If the value of the load is locally available within the block, just use 1255// it. This frequently occurs for reg2mem'd allocas. 1258LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1263// If the returned value is the load itself, replace with poison. This can 1264// only happen in dead loops. 1265if (AvailableVal == LoadI)
1267if (AvailableVal->getType() != LoadI->
getType()) {
1270 cast<Instruction>(AvailableVal)->setDebugLoc(LoadI->
getDebugLoc());
1277// Otherwise, if we scanned the whole block and got to the top of the block, 1278// we know the block is locally transparent to the load. If not, something 1279// might clobber its value. 1280if (BBIt != LoadBB->
begin())
1283// If all of the loads and stores that feed the value have the same AA tags, 1284// then we can propagate them onto any newly inserted loads. 1291 AvailablePredsTy AvailablePreds;
1295// If we got here, the loaded value is transparent through to the start of the 1296// block. Check to see if it is available in any of the predecessor blocks. 1298// If we already scanned this predecessor, skip it. 1299if (!PredsScanned.
insert(PredBB).second)
1302 BBIt = PredBB->
end();
1303unsigned NumScanedInst = 0;
1304Value *PredAvailable =
nullptr;
1305// NOTE: We don't CSE load that is volatile or anything stronger than 1306// unordered, that should have been checked when we entered the function. 1308"Attempting to CSE volatile or atomic loads");
1309// If this is a load on a phi pointer, phi-translate it and search 1310// for available load/store to the pointer in predecessors. 1318 &BatchAA, &IsLoadCSE, &NumScanedInst);
1320// If PredBB has a single predecessor, continue scanning through the 1321// single predecessor. 1323while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->
begin() &&
1327 BBIt = SinglePredBB->
end();
1329 Loc, AccessTy, LoadI->
isAtomic(), SinglePredBB, BBIt,
1335if (!PredAvailable) {
1336 OneUnavailablePred = PredBB;
1341 CSELoads.
push_back(cast<LoadInst>(PredAvailable));
1343// If so, this load is partially redundant. Remember this info so that we 1344// can create a PHI node. 1345 AvailablePreds.emplace_back(PredBB, PredAvailable);
1348// If the loaded value isn't available in any predecessor, it isn't partially 1350if (AvailablePreds.empty())
returnfalse;
1352// Okay, the loaded value is available in at least one (and maybe all!) 1353// predecessors. If the value is unavailable in more than one unique 1354// predecessor, we want to insert a merge block for those common predecessors. 1355// This ensures that we only have to insert one reload, thus not increasing 1359// If the value is unavailable in one of predecessors, we will end up 1360// inserting a new instruction into them. It is only valid if all the 1361// instructions before LoadI are guaranteed to pass execution to its 1362// successor, or if LoadI is safe to speculate. 1363// TODO: If this logic becomes more complex, and we will perform PRE insertion 1364// farther than to a predecessor, we need to reuse the code from GVN's PRE. 1365// It requires domination tree analysis, so for this simple case it is an 1367if (PredsScanned.
size() != AvailablePreds.size() &&
1369for (
autoI = LoadBB->
begin(); &*
I != LoadI; ++
I)
1373// If there is exactly one predecessor where the value is unavailable, the 1374// already computed 'OneUnavailablePred' block is it. If it ends in an 1375// unconditional branch, we know that it isn't a critical edge. 1376if (PredsScanned.
size() == AvailablePreds.size()+1 &&
1378 UnavailablePred = OneUnavailablePred;
1379 }
elseif (PredsScanned.
size() != AvailablePreds.size()) {
1380// Otherwise, we had multiple unavailable predecessors or we had a critical 1381// edge from the one. 1385for (
constauto &AvailablePred : AvailablePreds)
1386 AvailablePredSet.
insert(AvailablePred.first);
1388// Add all the unavailable predecessors to the PredsToSplit list. 1390// If the predecessor is an indirect goto, we can't split the edge. 1391if (isa<IndirectBrInst>(
P->getTerminator()))
1394if (!AvailablePredSet.
count(
P))
1398// Split them out to their own block. 1399 UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit,
"thread-pre-split");
1402// If the value isn't available in all predecessors, then there will be 1403// exactly one where it isn't available. Insert a load on that edge and add 1404// it to the AvailablePreds list. 1405if (UnavailablePred) {
1407"Can't handle critical edge here!");
1417 AvailablePreds.emplace_back(UnavailablePred, NewVal);
1420// Now we know that each predecessor of this block has a value in 1421// AvailablePreds, sort them for efficient access as we're walking the preds. 1424// Create a PHI node at the start of the block for the PRE'd load value. 1430// Insert new entries into the PHI for each predecessor. A single block may 1431// have multiple entries here. 1433 AvailablePredsTy::iterator
I =
1436assert(
I != AvailablePreds.end() &&
I->first ==
P &&
1437"Didn't find entry for predecessor!");
1439// If we have an available predecessor but it requires casting, insert the 1440// cast in the predecessor and use the cast. Note that we have to update the 1441// AvailablePreds vector as we go so that all of the PHI entries for this 1442// predecessor use the same bitcast. 1446 PredV, LoadI->
getType(),
"",
P->getTerminator()->getIterator());
1451for (
LoadInst *PredLoadI : CSELoads) {
1462/// findMostPopularDest - The specified list contains multiple possible 1463/// threadable destinations. Pick the one that occurs the most frequently in 1469assert(!PredToDestList.empty());
1471// Determine popularity. If there are multiple possible destinations, we 1472// explicitly choose to ignore 'undef' destinations. We prefer to thread 1473// blocks with known and real destinations to threading undef. We'll handle 1474// them later if interesting. 1477// Populate DestPopularity with the successors in the order they appear in the 1478// successor list. This way, we ensure determinism by iterating it in the 1479// same order in llvm::max_element below. We map nullptr to 0 so that we can 1480// return nullptr when PredToDestList contains nullptr only. 1481 DestPopularity[
nullptr] = 0;
1483 DestPopularity[SuccBB] = 0;
1485for (
constauto &PredToDest : PredToDestList)
1486if (PredToDest.second)
1487 DestPopularity[PredToDest.second]++;
1489// Find the most popular dest. 1492// Okay, we have finally picked the most popular destination. 1493return MostPopular->first;
1496// Try to evaluate the value of V when the control flows from PredPredBB to 1497// BB->getSinglePredecessor() and then on to BB. 1503assert(PredBB &&
"Expected a single predecessor");
1505if (
Constant *Cst = dyn_cast<Constant>(V)) {
1509// Consult LVI if V is not an instruction in BB or PredBB. 1511if (!
I || (
I->getParent() != BB &&
I->getParent() != PredBB)) {
1515// Look into a PHI argument. 1517if (
PHI->getParent() == PredBB)
1518return dyn_cast<Constant>(
PHI->getIncomingValueForBlock(PredPredBB));
1522// If we have a CmpInst, try to fold it for each incoming edge into PredBB. 1523if (
CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1524if (CondCmp->getParent() == BB) {
1543// If threading this would thread across a loop header, don't even try to 1545if (LoopHeaders.count(BB))
1551// We don't have known values in predecessors. See if we can thread through 1552// BB and its sole predecessor. 1557"computeValueKnownInPredecessors returned true with no values");
1560for (
constauto &PredValue : PredValues) {
1562 <<
"': FOUND condition = " << *PredValue.first
1563 <<
" for pred '" << PredValue.second->getName() <<
"'.\n";
1566// Decide what we want to thread through. Convert our list of known values to 1567// a list of known destinations for each pred. This also discards duplicate 1568// predecessors and keeps track of the undefined inputs (which are represented 1569// as a null dest in the PredToDestList). 1578for (
constauto &PredValue : PredValues) {
1580if (!SeenPreds.insert(Pred).second)
1581continue;
// Duplicate predecessor entry. 1586if (isa<UndefValue>(Val))
1589assert(isa<ConstantInt>(Val) &&
"Expecting a constant integer");
1590 DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->
isZero());
1592assert(isa<ConstantInt>(Val) &&
"Expecting a constant integer");
1593 DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1596 &&
"Unexpected terminator");
1597assert(isa<BlockAddress>(Val) &&
"Expecting a constant blockaddress");
1598 DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1601// If we have exactly one destination, remember it for efficiency below. 1602if (PredToDestList.
empty()) {
1606if (OnlyDest != DestBB)
1607 OnlyDest = MultipleDestSentinel;
1608// It possible we have same destination, but different value, e.g. default 1609// case in switchinst. 1611 OnlyVal = MultipleVal;
1614// If the predecessor ends with an indirect goto, we can't change its 1622// If all edges were unthreadable, we fail. 1623if (PredToDestList.
empty())
1626// If all the predecessors go to a single known successor, we want to fold, 1627// not thread. By doing so, we do not need to duplicate the current block and 1628// also miss potential opportunities in case we dont/cant duplicate. 1629if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1631bool SeenFirstBranchToOnlyDest =
false;
1632 std::vector <DominatorTree::UpdateType> Updates;
1635if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1636 SeenFirstBranchToOnlyDest =
true;
// Don't modify the first branch. 1638 SuccBB->removePredecessor(BB,
true);
// This is unreachable successor. 1643// Finally update the terminator. 1648 Term->eraseFromParent();
1649 DTU->applyUpdatesPermissive(Updates);
1650if (
auto *BPI = getBPI())
1651 BPI->eraseBlock(BB);
1653// If the condition is now dead due to the removal of the old terminator, 1655if (
auto *CondInst = dyn_cast<Instruction>(
Cond)) {
1656if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1657 CondInst->eraseFromParent();
1658// We can safely replace *some* uses of the CondInst if it has 1659// exactly one value as returned by LVI. RAUW is incorrect in the 1660// presence of guards and assumes, that have the `Cond` as the use. This 1661// is because we use the guards/assume to reason about the `Cond` value 1662// at the end of block, but RAUW unconditionally replaces all uses 1663// including the guards/assumes themselves and the uses before the 1665elseif (OnlyVal && OnlyVal != MultipleVal)
1672// Determine which is the most common successor. If we have many inputs and 1673// this block is a switch, we want to start by threading the batch that goes 1674// to the most popular destination first. If we only know about one 1675// threadable destination (the common case) we can avoid this. 1678if (MostPopularDest == MultipleDestSentinel) {
1679// Remove any loop headers from the Dest list, threadEdge conservatively 1680// won't process them, but we might have other destination that are eligible 1681// and we still want to process. 1683 [&](
const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1684return LoopHeaders.contains(PredToDest.second);
1687if (PredToDestList.
empty())
1693// Now that we know what the most popular destination is, factor all 1694// predecessors that will jump to it into a single predecessor. 1696for (
constauto &PredToDest : PredToDestList)
1697if (PredToDest.second == MostPopularDest) {
1700// This predecessor may be a switch or something else that has multiple 1701// edges to the block. Factor each of these edges by listing them 1702// according to # occurrences in PredsToFactor. 1708// If the threadable edges are branching on an undefined value, we get to pick 1709// the destination that these predecessors should get to. 1710if (!MostPopularDest)
1714// Ok, try to thread it! 1718/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on 1719/// a PHI node (or freeze PHI) in the current block. See if there are any 1720/// simplifications we can do based on inputs to the phi node. 1724// TODO: We could make use of this to do it once for blocks with common PHI 1729// If any of the predecessor blocks end in an unconditional branch, we can 1730// *duplicate* the conditional branch into that block in order to further 1731// encourage jump threading and to eliminate cases where we have branch on a 1732// phi of an icmp (branch on icmp is much better). 1733// This is still beneficial when a frozen phi is used as the branch condition 1734// because it allows CodeGenPrepare to further canonicalize br(freeze(icmp)) 1735// to br(icmp(freeze ...)). 1739if (PredBr->isUnconditional()) {
1740 PredBBs[0] = PredBB;
1741// Try to duplicate BB into PredBB. 1750/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on 1751/// a xor instruction in the current block. See if there are any 1752/// simplifications we can do based on inputs to the xor. 1756// If either the LHS or RHS of the xor is a constant, don't do this 1762// If the first instruction in BB isn't a phi, we won't be able to infer 1763// anything special about any particular predecessor. 1764if (!isa<PHINode>(BB->
front()))
1767// If this BB is a landing pad, we won't be able to split the edge into it. 1771// If we have a xor as the branch input to this block, and we know that the 1772// LHS or RHS of the xor in any predecessor is true/false, then we can clone 1773// the condition into the predecessor and fix that value to true, saving some 1774// logical ops on that path and encouraging other paths to simplify. 1776// This copies something like this: 1779// %X = phi i1 [1], [%X'] 1780// %Y = icmp eq i32 %A, %B 1781// %Z = xor i1 %X, %Y 1786// %Y = icmp ne i32 %A, %B 1801"computeValueKnownInPredecessors returned true with no values");
1803// Scan the information to see which is most popular: true or false. The 1804// predecessors can be of the set true, false, or undef. 1805unsigned NumTrue = 0, NumFalse = 0;
1806for (
constauto &XorOpValue : XorOpValues) {
1807if (isa<UndefValue>(XorOpValue.first))
1808// Ignore undefs for the count. 1810if (cast<ConstantInt>(XorOpValue.first)->isZero())
1816// Determine which value to split on, true, false, or undef if neither. 1818if (NumTrue > NumFalse)
1820elseif (NumTrue != 0 || NumFalse != 0)
1823// Collect all of the blocks that this can be folded into so that we can 1824// factor this once and clone it once. 1826for (
constauto &XorOpValue : XorOpValues) {
1827if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1830 BlocksToFoldInto.
push_back(XorOpValue.second);
1833// If we inferred a value for all of the predecessors, then duplication won't 1834// help us. However, we can just replace the LHS or RHS with the constant. 1835if (BlocksToFoldInto.
size() ==
1836 cast<PHINode>(BB->
front()).getNumIncomingValues()) {
1838// If all preds provide undef, just nuke the xor, because it is undef too. 1842// If all preds provide 0, replace the xor with the other input. 1846// If all preds provide 1, set the computed value to 1. 1853// If any of predecessors end with an indirect goto, we can't change its 1860// Try to duplicate BB into PredBB. 1864/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new 1865/// predecessor to the PHIBB block. If it has PHI nodes, add entries for 1866/// NewPred using the entries from OldPred (suitably mapped). 1872// Ok, we have a PHI node. Figure out what the incoming value was for the 1874Value *
IV = PN.getIncomingValueForBlock(OldPred);
1876// Remap the value if necessary. 1883 PN.addIncoming(
IV, NewPred);
1887/// Merge basic block BB into its sole predecessor if possible. 1898// If SinglePred was a loop header, BB becomes one. 1899if (LoopHeaders.erase(SinglePred))
1900 LoopHeaders.insert(BB);
1905// Now that BB is merged into SinglePred (i.e. SinglePred code followed by 1906// BB code within one basic block `BB`), we need to invalidate the LVI 1907// information associated with BB, because the LVI information need not be 1908// true for all of BB after the merge. For example, 1909// Before the merge, LVI info and code is as follows: 1910// SinglePred: <LVI info1 for %p val> 1912// call @exit() // need not transfer execution to successor. 1913// assume(%p) // from this point on %p is true 1915// BB: <LVI info2 for %p val, i.e. %p is true> 1919// Note that this LVI info for blocks BB and SinglPred is correct for %p 1920// (info2 and info1 respectively). After the merge and the deletion of the 1921// LVI info1 for SinglePred. We have the following code: 1922// BB: <LVI info2 for %p val> 1926// %x = use of %p <-- LVI info2 is correct from here onwards. 1928// LVI info2 for BB is incorrect at the beginning of BB. 1930// Invalidate LVI information for BB if the LVI is not provably true for 1937/// Update the SSA form. NewBB contains instructions that are copied from BB. 1938/// ValueMapping maps old values in BB to new ones in NewBB. 1941// If there were values defined in BB that are used outside the block, then we 1942// now have to update all uses of the value to use either the original value, 1943// the cloned value, or some PHI derived value. This can require arbitrary 1944// PHI insertion, of which we are prepared to do, clean these up now. 1951// Scan all uses of this instruction to see if it is used outside of its 1952// block, and if so, record them in UsesToRename. 1953for (
Use &U :
I.uses()) {
1956if (UserPN->getIncomingBlock(U) == BB)
1958 }
elseif (
User->getParent() == BB)
1964// Find debug values outside of the block 1973// If there are no uses outside the block, we're done with this instruction. 1974if (UsesToRename.
empty() && DbgValues.
empty() && DbgVariableRecords.
empty())
1978// We found a use of I outside of BB. Rename all uses of I that are outside 1979// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks 1980// with the two values we know. 1985while (!UsesToRename.
empty())
1987if (!DbgValues.
empty() || !DbgVariableRecords.
empty()) {
1991 DbgVariableRecords.
clear();
1998/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone 1999/// arguments that come from PredBB. Return the map from the variables in the 2000/// source basic block to the variables in the newly created basic block. 2007// We are going to have to map operands from the source basic block to the new 2008// copy of the block 'NewBB'. If there are PHI nodes in the source basic 2009// block, evaluate them to account for entry from PredBB. 2011// Retargets llvm.dbg.value to any renamed variables. 2012auto RetargetDbgValueIfPossible = [&](
Instruction *NewInst) ->
bool {
2013auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst);
2018for (
auto DbgOperand : DbgInstruction->location_ops()) {
2019auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand);
2020if (!DbgOperandInstruction)
2023autoI = ValueMapping.
find(DbgOperandInstruction);
2024if (
I != ValueMapping.
end()) {
2026 std::pair<Value *, Value *>(DbgOperand,
I->second));
2030for (
auto &[OldOp, MappedOp] : OperandsToRemap)
2031 DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp);
2035// Duplicate implementation of the above dbg.value code, using 2036// DbgVariableRecords instead. 2039for (
auto *
Op : DVR->location_ops()) {
2044autoI = ValueMapping.
find(OpInst);
2045if (
I != ValueMapping.
end())
2046 OperandsToRemap.
insert({OpInst,
I->second});
2049for (
auto &[OldOp, MappedOp] : OperandsToRemap)
2050 DVR->replaceVariableLocationOp(OldOp, MappedOp);
2055// Clone the phi nodes of the source basic block into NewBB. The resulting 2056// phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater 2057// might need to rewrite the operand of the cloned phi. 2058for (;
PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2061 ValueMapping[PN] = NewPN;
2064// Clone noalias scope declarations in the threaded block. When threading a 2065// loop exit, we would otherwise end up with two idential scope declarations 2066// visible at the same time. 2076 RetargetDbgVariableRecordIfPossible(&DVR);
2079// Clone the non-phi instructions of the source basic block into NewBB, 2080// keeping track of the mapping and using it to remap operands in the cloned 2082for (; BI != BE; ++BI) {
2084 New->setName(BI->getName());
2085 New->insertInto(NewBB, NewBB->
end());
2086 ValueMapping[&*BI] = New;
2089 CloneAndRemapDbgInfo(New, &*BI);
2091if (RetargetDbgValueIfPossible(New))
2094// Remap operands to patch up intra-block references. 2095for (
unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2096if (
Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2098if (
I != ValueMapping.
end())
2099 New->setOperand(i,
I->second);
2103// There may be DbgVariableRecords on the terminator, clone directly from 2104// marker to marker as there isn't an instruction there. 2105if (BE != RangeBB->
end() && BE->hasDbgRecords()) {
2106// Dump them at the end. 2111 RetargetDbgVariableRecordIfPossible(&DVR);
2115/// Attempt to thread through two successive basic blocks. 2121// %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ] 2122// %tobool = icmp eq i32 %cond, 0 2123// br i1 %tobool, label %BB, label ... 2126// %cmp = icmp eq i32* %var, null 2127// br i1 %cmp, label ..., label ... 2129// We don't know the value of %var at BB even if we know which incoming edge 2130// we take to BB. However, once we duplicate PredBB for each of its incoming 2131// edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of 2132// PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB. 2134// Require that BB end with a Branch for simplicity. 2139// BB must have exactly one predecessor. 2144// Require that PredBB end with a conditional Branch. If PredBB ends with an 2145// unconditional branch, we should be merging PredBB and BB instead. For 2146// simplicity, we don't deal with a switch. 2151// If PredBB has exactly one incoming edge, we don't gain anything by copying 2156// Don't thread through PredBB if it contains a successor edge to itself, in 2157// which case we would infinite loop. Suppose we are threading an edge from 2158// PredPredBB through PredBB and BB to SuccBB with PredBB containing a 2159// successor edge to itself. If we allowed jump threading in this case, we 2160// could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since 2161// PredBB.thread has a successor edge to PredBB, we would immediately come up 2162// with another jump threading opportunity from PredBB.thread through PredBB 2163// and BB to SuccBB. This jump threading would repeatedly occur. That is, we 2164// would keep peeling one iteration from PredBB. 2168// Don't thread across a loop header. 2169if (LoopHeaders.count(PredBB))
2172// Avoid complication with duplicating EH pads. 2176// Find a predecessor that we can thread. For simplicity, we only consider a 2177// successor edge out of BB to which we thread exactly one incoming edge into 2179unsigned ZeroCount = 0;
2180unsigned OneCount = 0;
2185// If PredPred ends with IndirectBrInst, we can't handle it. 2186if (isa<IndirectBrInst>(
P->getTerminator()))
2188if (
ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2193 }
elseif (CI->isOne()) {
2200// Disregard complicated cases where we have to thread multiple edges. 2202if (ZeroCount == 1) {
2203 PredPredBB = ZeroPred;
2204 }
elseif (OneCount == 1) {
2205 PredPredBB = OnePred;
2212// If threading to the same block as we come from, we would infinite loop. 2215 <<
"' - would thread to self!\n");
2219// If threading this would thread across a loop header, don't thread the edge. 2220// See the comments above findLoopHeaders for justifications and caveats. 2221if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2223bool BBIsHeader = LoopHeaders.count(BB);
2224bool SuccIsHeader = LoopHeaders.count(SuccBB);
2225dbgs() <<
" Not threading across " 2226 << (BBIsHeader ?
"loop header BB '" :
"block BB '")
2227 << BB->
getName() <<
"' to dest " 2228 << (SuccIsHeader ?
"loop header BB '" :
"block BB '")
2230 <<
"' - it might create an irreducible loop!\n";
2235// Compute the cost of duplicating BB and PredBB. 2241// Give up if costs are too high. We need to check BBCost and PredBBCost 2242// individually before checking their sum because getJumpThreadDuplicationCost 2243// return (unsigned)~0 for those basic blocks that cannot be duplicated. 2244if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2245 BBCost + PredBBCost > BBDupThreshold) {
2247 <<
"' - Cost is too high: " << PredBBCost
2248 <<
" for PredBB, " << BBCost <<
"for BB\n");
2252// Now we are ready to duplicate PredBB. 2264// Build BPI/BFI before any changes are made to IR. 2265bool HasProfile = doesBlockHaveProfileData(BB);
2266auto *BFI = getOrCreateBFI(HasProfile);
2267auto *BPI = getOrCreateBPI(BFI !=
nullptr);
2277// Set the block frequency of NewBB. 2279assert(BPI &&
"It's expected BPI to exist along with BFI");
2280auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2281 BPI->getEdgeProbability(PredPredBB, PredBB);
2282 BFI->setBlockFreq(NewBB, NewBBFreq);
2285// We are going to have to map operands from the original BB block to the new 2286// copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them 2287// to account for entry from PredPredBB. 2292// Copy the edge probabilities from PredBB to NewBB. 2294 BPI->copyEdgeProbabilities(PredBB, NewBB);
2296// Update the terminator of PredPredBB to jump to NewBB instead of PredBB. 2297// This eliminates predecessors from PredPredBB, which requires us to simplify 2298// any PHI nodes in PredBB. 2311 DTU->applyUpdatesPermissive(
2319// Clean up things like PHI nodes with single operands, dead instructions, 2329/// tryThreadEdge - Thread an edge if it's safe and profitable to do so. 2333// If threading to the same block as we come from, we would infinite loop. 2336 <<
"' - would thread to self!\n");
2340// If threading this would thread across a loop header, don't thread the edge. 2341// See the comments above findLoopHeaders for justifications and caveats. 2342if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2344bool BBIsHeader = LoopHeaders.count(BB);
2345bool SuccIsHeader = LoopHeaders.count(SuccBB);
2346dbgs() <<
" Not threading across " 2347 << (BBIsHeader ?
"loop header BB '" :
"block BB '") << BB->
getName()
2348 <<
"' to dest " << (SuccIsHeader ?
"loop header BB '" :
"block BB '")
2349 << SuccBB->
getName() <<
"' - it might create an irreducible loop!\n";
2356if (JumpThreadCost > BBDupThreshold) {
2358 <<
"' - Cost is too high: " << JumpThreadCost <<
"\n");
2366/// threadEdge - We have decided that it is safe and profitable to factor the 2367/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB 2368/// across BB. Transform the IR to reflect this change. 2372assert(SuccBB != BB &&
"Don't create an infinite loop");
2374assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2375"Don't thread across loop headers");
2377// Build BPI/BFI before any changes are made to IR. 2378bool HasProfile = doesBlockHaveProfileData(BB);
2379auto *BFI = getOrCreateBFI(HasProfile);
2380auto *BPI = getOrCreateBPI(BFI !=
nullptr);
2382// And finally, do it! Start by factoring the predecessors if needed. 2384if (PredBBs.
size() == 1)
2385 PredBB = PredBBs[0];
2388 <<
" common predecessors.\n");
2389 PredBB = splitBlockPreds(BB, PredBBs,
".thr_comm");
2392// And finally, do it! 2394 <<
"' to '" << SuccBB->
getName()
2395 <<
", across block:\n " << *BB <<
"\n");
2404// Set the block frequency of NewBB. 2406assert(BPI &&
"It's expected BPI to exist along with BFI");
2408 BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2409 BFI->setBlockFreq(NewBB, NewBBFreq);
2412// Copy all the instructions from BB to NewBB except the terminator. 2417// We didn't copy the terminator from BB over to NewBB, because there is now 2418// an unconditional jump to SuccBB. Insert the unconditional jump. 2422// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the 2423// PHI nodes for NewBB now. 2426// Update the terminator of PredBB to jump to NewBB instead of BB. This 2427// eliminates predecessors from BB, which requires us to simplify any PHI 2436// Enqueue required DT updates. 2443// At this point, the IR is fully up to date and consistent. Do a quick scan 2444// over the new instructions and zap any that are constants or dead. This 2445// frequently happens because of phi translation. 2448// Update the edge weight from BB to SuccBB, which should be less than before. 2449 updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
2455/// Create a new basic block that will be the predecessor of BB and successor of 2456/// all blocks in Preds. When profile data is available, update the frequency of 2463// Collect the frequencies of all predecessors of BB, which will be used to 2464// update the edge weight of the result of splitting predecessors. 2466auto *BFI = getBFI();
2468auto *BPI = getOrCreateBPI(
true);
2469for (
auto *Pred : Preds)
2470 FreqMap.
insert(std::make_pair(
2471 Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2474// In the case when BB is a LandingPad block we create 2 new predecessors 2475// instead of just one. 2477 std::string NewName = std::string(Suffix) +
".split-lp";
2483 std::vector<DominatorTree::UpdateType> Updates;
2484 Updates.reserve((2 * Preds.size()) + NewBBs.
size());
2485for (
auto *NewBB : NewBBs) {
2491if (BFI)
// Update frequencies between Pred -> NewBB. 2492 NewBBFreq += FreqMap.
lookup(Pred);
2494if (BFI)
// Apply the summed frequency to NewBB. 2495 BFI->setBlockFreq(NewBB, NewBBFreq);
2498 DTU->applyUpdatesPermissive(Updates);
2502bool JumpThreadingPass::doesBlockHaveProfileData(
BasicBlock *BB) {
2510/// Update the block frequency of BB and branch weight and the metadata on the 2511/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 - 2512/// Freq(PredBB->BB) / Freq(BB->SuccBB). 2513void JumpThreadingPass::updateBlockFreqAndEdgeWeight(
BasicBlock *PredBB,
2520assert(((BFI && BPI) || (!BFI && !BFI)) &&
2521"Both BFI & BPI should either be set or unset");
2525"It's expected to have BFI/BPI when profile info exists");
2529// As the edge from PredBB to BB is deleted, we have to update the block 2531auto BBOrigFreq =
BFI->getBlockFreq(BB);
2532auto NewBBFreq =
BFI->getBlockFreq(NewBB);
2534auto BBNewFreq = BBOrigFreq - NewBBFreq;
2535BFI->setBlockFreq(BB, BBNewFreq);
2537// Collect updated outgoing edges' frequencies from BB and use them to update 2538// edge probabilities. 2541auto SuccFreq = (Succ == SuccBB)
2542 ? BB2SuccBBFreq - NewBBFreq
2544 BBSuccFreq.
push_back(SuccFreq.getFrequency());
2550if (MaxBBSuccFreq == 0)
2552 {1, static_cast<unsigned>(BBSuccFreq.size())});
2557// Normalize edge probabilities so that they sum up to one. 2562// Update edge probabilities in BPI. 2565// Update the profile metadata as well. 2567// Don't do this if the profile of the transformed blocks was statically 2568// estimated. (This could occur despite the function having an entry 2569// frequency in completely cold parts of the CFG.) 2571// In this case we don't want to suggest to subsequent passes that the 2572// calculated weights are fully consistent. Consider this graph: 2587// Assuming the blocks check_* all compare the same value against 1, 2 and 3, 2588// the overall probabilities are inconsistent; the total probability that the 2589// value is either 1, 2 or 3 is 150%. 2591// As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3 2592// becomes 0%. This is even worse if the edge whose probability becomes 0% is 2593// the loop exit edge. Then based solely on static estimation we would assume 2594// the loop was extremely hot. 2596// FIXME this locally as well so that BPI and BFI are consistent as well. We 2597// shouldn't make edges extremely likely or unlikely based solely on static 2599if (BBSuccProbs.
size() >= 2 && HasProfile) {
2601for (
auto Prob : BBSuccProbs)
2609/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch 2610/// to BB which contains an i1 PHI node and a conditional branch on that PHI. 2611/// If we can duplicate the contents of BB up into PredBB do so now, this 2612/// improves the odds that the branch will be on an analyzable instruction like 2616assert(!PredBBs.
empty() &&
"Can't handle an empty set");
2618// If BB is a loop header, then duplicating this block outside the loop would 2619// cause us to transform this into an irreducible loop, don't do this. 2620// See the comments above findLoopHeaders for justifications and caveats. 2621if (LoopHeaders.count(BB)) {
2623 <<
"' into predecessor block '" << PredBBs[0]->getName()
2624 <<
"' - it might create an irreducible loop!\n");
2630if (DuplicationCost > BBDupThreshold) {
2632 <<
"' - Cost is too high: " << DuplicationCost <<
"\n");
2636// And finally, do it! Start by factoring the predecessors if needed. 2637 std::vector<DominatorTree::UpdateType> Updates;
2639if (PredBBs.
size() == 1)
2640 PredBB = PredBBs[0];
2643 <<
" common predecessors.\n");
2644 PredBB = splitBlockPreds(BB, PredBBs,
".thr_comm");
2648// Okay, we decided to do this! Clone all the instructions in BB onto the end 2651 <<
"' into end of '" << PredBB->
getName()
2652 <<
"' to eliminate branch on phi. Cost: " 2653 << DuplicationCost <<
" block is:" << *BB <<
"\n");
2655// Unless PredBB ends with an unconditional branch, split the edge so that we 2656// can just clone the bits from BB into the end of the new PredBB. 2668// We are going to have to map operands from the original BB block into the 2669// PredBB block. Evaluate PHI nodes in BB. 2673for (;
PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2675// Clone the non-phi instructions of BB into PredBB, keeping track of the 2676// mapping and using it to remap operands in the cloned instructions. 2677for (; BI != BB->
end(); ++BI) {
2679 New->insertInto(PredBB, OldPredBranch->
getIterator());
2681// Remap operands to patch up intra-block references. 2682for (
unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2683if (
Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2685if (
I != ValueMapping.
end())
2686 New->setOperand(i,
I->second);
2689// Remap debug variable operands. 2692// If this instruction can be simplified after the operands are updated, 2693// just use the simplified value instead. This frequently happens due to 2698 ValueMapping[&*BI] =
IV;
2699if (!New->mayHaveSideEffects()) {
2700 New->eraseFromParent();
2702// Clone debug-info on the elided instruction to the destination 2707 ValueMapping[&*BI] = New;
2710// Otherwise, insert the new instruction into the block. 2711 New->setName(BI->getName());
2712// Clone across any debug-info attached to the old instruction. 2713 New->cloneDebugInfoFrom(&*BI);
2714// Update Dominance from simplified New instruction operands. 2715for (
unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2716if (
BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2721// Check to see if the targets of the branch had PHI nodes. If so, we need to 2722// add entries to the PHI nodes for branch from PredBB now. 2731// PredBB no longer jumps to BB, remove entries in the PHI node for the edge 2735// Remove the unconditional branch at the end of the PredBB block. 2737if (
auto *BPI = getBPI())
2739 DTU->applyUpdatesPermissive(Updates);
2745// Pred is a predecessor of BB with an unconditional branch to BB. SI is 2746// a Select instruction in Pred. BB has other predecessors and SI is used in 2747// a PHI node in BB. SI has no other use. 2748// A new basic block, NewBB, is created and SI is converted to compare and 2749// conditional branch. SI is erased from parent. 2753// Expand the select. 2765// Move the unconditional branch to NewBB. 2768// Create a conditional branch and update PHI nodes. 2770 BI->applyMergedLocation(PredTerm->
getDebugLoc(), SI->getDebugLoc());
2771 BI->copyMetadata(*SI, {LLVMContext::MD_prof});
2777// Copy probabilities from 'SI' to created conditional branch in 'Pred'. 2779 (TrueWeight + FalseWeight) != 0) {
2782 TrueWeight, TrueWeight + FalseWeight));
2784 FalseWeight, TrueWeight + FalseWeight));
2785// Update BPI if exists. 2786if (
auto *BPI = getBPI())
2789// Set the block frequency of NewBB. 2790if (
auto *BFI = getBFI()) {
2791if ((TrueWeight + FalseWeight) == 0) {
2796 TrueWeight, TrueWeight + FalseWeight);
2797auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
2798 BFI->setBlockFreq(NewBB, NewBBFreq);
2801// The select is now dead. 2802 SI->eraseFromParent();
2806// Update any other PHI nodes in BB. 2808PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2810 Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2814PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2816if (!CondPHI || CondPHI->
getParent() != BB)
2823// The second and third condition can be potentially relaxed. Currently 2824// the conditions help to simplify the code and allow us to reuse existing 2825// code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *) 2839/// tryToUnfoldSelect - Look for blocks of the form 2845/// %p = phi [%a, %bb1] ... 2849/// And expand the select into a branch structure if one of its arms allows %c 2850/// to be folded. This later enables threading from bb1 over bb2. 2864// Look if one of the incoming values is a select in the corresponding 2866if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2873// Now check if one of the select values would allow us to constant fold the 2874// terminator in BB. We don't do the transform if both sides fold, those 2875// cases will be threaded in any case. 2878 CondRHS, Pred, BB, CondCmp);
2881 CondRHS, Pred, BB, CondCmp);
2882if ((LHSRes || RHSRes) && LHSRes != RHSRes) {
2890/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the 2891/// same BB in the form 2893/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ... 2894/// %s = select %p, trueval, falseval 2899/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ... 2901/// %s = select %c, trueval, falseval 2903/// And expand the select into a branch structure. This later enables 2904/// jump-threading over bb in this pass. 2906/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold 2907/// select if the associated PHI has at least one constant. If the unfolded 2908/// select is not jump-threaded, it will be folded again in the later 2911// This transform would reduce the quality of msan diagnostics. 2912// Disable this transform under MemorySanitizer. 2916// If threading this would thread across a loop header, don't thread the edge. 2917// See the comments above findLoopHeaders for justifications and caveats. 2918if (LoopHeaders.count(BB))
2922PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2923// Look for a Phi having at least one constant incoming value. 2925 [](
Value *V) { return !isa<ConstantInt>(V); }))
2929using namespacePatternMatch;
2931// Check if SI is in BB and use V as condition. 2932if (SI->getParent() != BB)
2936returnCond &&
Cond == V &&
Cond->getType()->isIntegerTy(1) && !IsAndOr;
2940for (
Use &U : PN->uses()) {
2941if (
ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2942// Look for a ICmp in BB that compares PN with a constant and is the 2943// condition of a Select. 2944if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2945 isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2946if (
SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2947if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2951 }
elseif (
SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2952// Look for a Select in BB that uses PN as condition. 2953if (isUnfoldCandidate(SelectI, U.get())) {
2962// Expand the select. 2972 NewPN->
addIncoming(SI->getTrueValue(), Term->getParent());
2975 SI->replaceAllUsesWith(NewPN);
2976 SI->eraseFromParent();
2977// NewBB and SplitBB are newly created blocks which require insertion. 2978 std::vector<DominatorTree::UpdateType> Updates;
2983// BB's successors were moved to SplitBB, update DTU accordingly. 2988 DTU->applyUpdatesPermissive(Updates);
2994/// Try to propagate a guard from the current BB into one of its predecessors 2995/// in case if another branch of execution implies that the condition of this 2996/// guard is always true. Currently we only process the simplest case that 3001/// br i1 %cond, label %T1, label %F1 3008/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ] 3010/// And cond either implies condGuard or !condGuard. In this case all the 3011/// instructions before the guard can be duplicated in both branches, and the 3012/// guard is then threaded to one of them. 3014using namespacePatternMatch;
3016// We only want to deal with two predecessors. 3030// Try to thread one of the guards of the block. 3031// TODO: Look up deeper than to immediate predecessor? 3036if (
auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
3044/// Try to propagate the guard from BB which is the lower block of a diamond 3045/// to one of its branches, in case if diamond's condition implies guard's 3057bool TrueDestIsSafe =
false;
3058bool FalseDestIsSafe =
false;
3060// True dest is safe if BranchCond => GuardCond. 3063 TrueDestIsSafe =
true;
3065// False dest is safe if !BranchCond => GuardCond. 3068 FalseDestIsSafe =
true;
3071if (!TrueDestIsSafe && !FalseDestIsSafe)
3074BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3075BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3081if (
Cost > BBDupThreshold)
3083// Duplicate all instructions before the guard and the guard itself to the 3084// branch where implication is not proved. 3086 BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3087assert(GuardedBlock &&
"Could not create the guarded block?");
3088// Duplicate all instructions before the guard in the unguarded branch. 3089// Since we have successfully duplicated the guarded block and this block 3090// has fewer instructions, we expect it to succeed. 3092 BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3093assert(UnguardedBlock &&
"Could not create the unguarded block?");
3095 << GuardedBlock->
getName() <<
"\n");
3096// Some instructions before the guard may still have uses. For them, we need 3097// to create Phi nodes merging their copies in both guarded and unguarded 3098// branches. Those instructions that have no uses can be just removed. 3100for (
auto BI = BB->
begin(); &*BI != AfterGuard; ++BI)
3101if (!isa<PHINode>(&*BI))
3106// Substitute with Phis & remove. 3108if (!Inst->use_empty()) {
3110 NewPN->
addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3111 NewPN->
addIncoming(GuardedMapping[Inst], GuardedBlock);
3114 Inst->replaceAllUsesWith(NewPN);
3116 Inst->dropDbgRecords();
3117 Inst->eraseFromParent();
3127// TODO: We would like to preserve BPI/BFI. Enable once all paths update them. 3128// TODO: Would be nice to verify BPI/BFI consistency as well. 3132template <
typename AnalysisT>
3133typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
3134assert(FAM &&
"Can't run external analysis without FunctionAnalysisManager");
3136// If there were no changes since last call to 'runExternalAnalysis' then all 3137// analysis is either up to date or explicitly invalidated. Just go ahead and 3138// run the "external" analysis. 3139if (!ChangedSinceLastAnalysisUpdate) {
3140assert(!DTU->hasPendingUpdates() &&
3141"Lost update of 'ChangedSinceLastAnalysisUpdate'?");
3142// Run the "external" analysis. 3145 ChangedSinceLastAnalysisUpdate =
false;
3147auto PA = getPreservedAnalysis();
3148// TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI 3152// Report everything except explicitly preserved as invalid. 3156// Make sure DT/PDT are valid before running "external" analysis. 3157assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
3158assert((!DTU->hasPostDomTree() ||
3159 DTU->getPostDomTree().verify(
3161// Run the "external" analysis. 3163// Update analysis JumpThreading depends on and not explicitly preserved. 3173assert(FAM &&
"Can't create BPI without FunctionAnalysisManager");
3181assert(FAM &&
"Can't create BFI without FunctionAnalysisManager");
3187// Important note on validity of BPI/BFI. JumpThreading tries to preserve 3188// BPI/BFI as it goes. Thus if cached instance exists it will be updated. 3189// Otherwise, new instance of BPI/BFI is created (up to date by definition). 3191auto *Res = getBPI();
3196 BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
3202auto *Res = getBFI();
3207BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
BlockVerifier::State From
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
static cl::opt< unsigned > PhiDuplicateThreshold("jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden)
static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB)
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, ValueToValueMapTy &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
static bool hasAddressTakenAndUsed(BasicBlock *BB)
See the comments on JumpThreadingPass.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
DbgMarker * createMarker(Instruction *I)
Attach a DbgMarker to the given instruction.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
InstListType::const_iterator const_iterator
const Instruction & front() const
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
DbgMarker * getMarker(InstListType::iterator It)
Return the DbgMarker for the position given by It, so that DbgRecords can be inserted there.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
bool isLandingPad() const
Return true if this basic block is a landing pad.
bool isEHPad() const
Return true if this basic block is an exception handling block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void disableDominatorTree()
Disable the use of the dominator tree during alias analysis queries.
The address of a basic block.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
uint32_t getNumerator() const
BranchProbability getCompl() const
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getPredicate() const
Return the predicate for this instruction.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static Constant * getNot(Constant *C)
This is the shared class of boolean and integer constants.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
static ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static ConstantInt * getFalse(LLVMContext &Context)
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
This is an important base class in LLVM.
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Per-instruction record of debug-info.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(DbgMarker *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere, bool InsertAtHead=false)
Clone all DbgMarkers from From into this marker.
const BasicBlock * getParent() const
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a freeze function that returns random concrete value if an operand is either a ...
const BasicBlock & getEntryBlock() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Indirect Branch Instruction.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
bool isSpecialTerminator() const
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
void updateSSA(BasicBlock *BB, BasicBlock *NewBB, ValueToValueMapTy &ValueMapping)
Update the SSA form.
bool computeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
JumpThreadingPass(int T=-1)
void cloneInstructions(ValueToValueMapTy &ValueMapping, BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, FunctionAnalysisManager *FAM, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, std::unique_ptr< DomTreeUpdater > DTU, std::optional< BlockFrequencyInfo * > BFI, std::optional< BranchProbabilityInfo * > BPI)
Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond, const DataLayout &DL)
bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, SmallPtrSet< Value *, 4 > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
DomTreeUpdater * getDomTreeUpdater() const
bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
bool processImpliedCondition(BasicBlock *BB)
bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
This is an important class for using LLVM in a threaded context.
Analysis to compute lazy value information.
This pass computes, caches, and vends lazy value constraint information.
void eraseBlock(BasicBlock *BB)
Inform the analysis cache that we have erased a block.
void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc)
Inform the analysis cache that we have threaded an edge from PredBB to OldSucc to be from PredBB to N...
Constant * getPredicateOnEdge(CmpInst::Predicate Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value comparison with a constant is known to be true or false on the ...
Constant * getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value is known to be a constant on the specified edge.
ConstantRange getConstantRangeOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Return the ConstantRage constraint that is known to hold for the specified value on the specified edg...
Constant * getConstant(Value *V, Instruction *CxtI)
Determine whether the specified value is known to be a constant at the specified instruction.
void forgetValue(Value *V)
Remove information related to this value from the cache.
Constant * getPredicateAt(CmpInst::Predicate Pred, Value *V, Constant *C, Instruction *CxtI, bool UseBlockValue)
Determine whether the specified value comparison with a constant is known to be true or false at the ...
An instruction for reading from memory.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
This class implements a map that also provides access to all stored values in a deterministic order.
Representation for a specific memory location.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
Helper class for SSA formation on a set of values defined in multiple blocks.
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
void UpdateDebugValues(Instruction *I)
Rewrite debug value intrinsics to conform to a new SSA form.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntegerTy() const
True if this is an instance of IntegerType.
'undef' values are things that do not have specified contents.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
iterator find(const KeyT &Val)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
auto pred_end(const MachineBasicBlock *BB)
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
auto successors(const MachineBasicBlock *BB)
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
void remapDebugVariable(ValueToValueMapTy &Mapping, Instruction *Inst)
Remap the operands of the debug records attached to Inst, and the operands of Inst itself if it's a d...
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
auto pred_size(const MachineBasicBlock *BB)
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
auto reverse(ContainerTy &&C)
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
bool hasValidBranchWeightMD(const Instruction &I)
Checks if an instructions has valid Branch Weight Metadata.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool pred_empty(const BasicBlock *BB)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Function object to check whether the second component of a container supported by std::get (like std:...