1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// Peephole optimize the CFG. 11//===----------------------------------------------------------------------===// 92using namespacePatternMatch;
94#define DEBUG_TYPE "simplifycfg" 97"simplifycfg-require-and-preserve-domtree",
cl::Hidden,
100"Temporary development switch used to gradually uplift SimplifyCFG " 101"into preserving DomTree,"));
103// Chosen as 2 so as to be cheap, but still to have enough power to fold 104// a select, so the "clamp" idiom (of a min followed by a max) will be caught. 105// To catch this, we need to fold a compare and a select, hence '2' being the 106// minimum reasonable default. 110"Control the amount of phi node folding to perform (default = 2)"));
114cl::desc(
"Control the maximal total instruction cost that we are willing " 115"to speculatively execute to fold a 2-entry PHI node into a " 116"select (default = 4)"));
120cl::desc(
"Hoist common instructions up to the parent block"));
123"simplifycfg-hoist-loads-stores-with-cond-faulting",
cl::Hidden,
125cl::desc(
"Hoist loads/stores if the target supports " 126"conditional faulting"));
130cl::desc(
"Control the maximal conditional load/store that we are willing " 131"to speculatively execute to eliminate conditional branch " 137cl::desc(
"Allow reordering across at most this many " 138"instructions when hoisting"));
142cl::desc(
"Sink common instructions down to the end block"));
146cl::desc(
"Hoist conditional stores if an unconditional store precedes"));
150cl::desc(
"Hoist conditional stores even if an unconditional store does not " 151"precede - hoist multiple conditional stores into a single " 156cl::desc(
"When merging conditional stores, do so even if the resultant " 157"basic blocks are unlikely to be if-converted as a result"));
161cl::desc(
"Allow exactly one expensive instruction to be speculatively " 166cl::desc(
"Limit maximum recursion depth when calculating costs of " 167"speculatively executed instructions"));
172cl::desc(
"Max size of a block which is still considered " 173"small enough to thread through"));
175// Two is chosen to allow one negation and a logical combine. 179cl::desc(
"Maximum cost of combining conditions when " 183"simplifycfg-branch-fold-common-dest-vector-multiplier",
cl::Hidden,
185cl::desc(
"Multiplier to apply to threshold when determining whether or not " 186"to fold branch to common destination when vector operations are " 191cl::desc(
"Allow SimplifyCFG to merge invokes together when appropriate"));
195cl::desc(
"Limit cases to analyze when converting a switch to select"));
197STATISTIC(NumBitMaps,
"Number of switch instructions turned into bitmaps");
199"Number of switch instructions turned into linear mapping");
201"Number of switch instructions turned into lookup tables");
203 NumLookupTablesHoles,
204"Number of switch instructions turned into lookup tables (holes checked)");
205STATISTIC(NumTableCmpReuses,
"Number of reused switch table lookup compares");
207"Number of value comparisons folded into predecessor basic blocks");
209"Number of branches folded into predecessor basic block");
212"Number of common instruction 'blocks' hoisted up to the begin block");
214"Number of common instructions hoisted up to the begin block");
216"Number of common instruction 'blocks' sunk down to the end block");
218"Number of common instructions sunk down to the end block");
219STATISTIC(NumSpeculations,
"Number of speculative executed instructions");
221"Number of invokes with empty resume blocks simplified into calls");
222STATISTIC(NumInvokesMerged,
"Number of invokes that were merged together");
223STATISTIC(NumInvokeSetsFormed,
"Number of invoke sets that were formed");
227// The first field contains the value that the switch produces when a certain 228// case group is selected, and the second field is a vector containing the 229// cases composing the case group. 230usingSwitchCaseResultVectorTy =
233// The first field contains the phi node that generates a result of the switch 234// and the second field contains the value generated for a certain case in the 235// switch for that PHI. 238/// ValueEqualityComparisonCase - Represents a case of a switch. 239structValueEqualityComparisonCase {
246booloperator<(ValueEqualityComparisonCase RHS)
const{
247// Comparing pointers is ok as we only rely on the order for uniquing. 264Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
265bool simplifyEqualityComparisonWithOnlyPredecessor(
Instruction *TI,
268bool performValueComparisonIntoPredecessorFolding(
Instruction *TI,
Value *&CV,
271bool foldValueComparisonIntoPredecessors(
Instruction *TI,
286bool tryToSimplifyUncondBranchWithICmpInIt(
ICmpInst *ICI,
289bool hoistCommonCodeFromSuccessors(
Instruction *TI,
bool AllInstsEqOnly);
290bool hoistSuccIdenticalTerminatorToSwitchOrIf(
307 :
TTI(
TTI), DTU(DTU),
DL(
DL), LoopHeaders(LoopHeaders), Options(Opts) {
309"SimplifyCFG is not yet capable of maintaining validity of a " 310"PostDomTree, so don't ask for it.");
316// Helper to set Resimplify and return change indication. 317bool requestResimplify() {
323}
// end anonymous namespace 325/// Return true if all the PHI nodes in the basic block \p BB 326/// receive compatible (identical) incoming values when coming from 327/// all of the predecessor blocks that are specified in \p IncomingBlocks. 329/// Note that if the values aren't exactly identical, but \p EquivalenceSet 330/// is provided, and *both* of the values are present in the set, 331/// then they are considered equal. 336"Only for a pair of incoming blocks at the time!");
338// FIXME: it is okay if one of the incoming values is an `undef` value, 339// iff the other incoming value is guaranteed to be a non-poison value. 340// FIXME: it is okay if one of the incoming values is a `poison` value. 342 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
343 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
346 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
347 EquivalenceSet->contains(IV1))
353/// Return true if it is safe to merge these two 354/// terminator instructions together. 359returnfalse;
// Can't merge with self! 361// It is not safe to merge these two switch instructions if they have a common 362// successor, and if that successor has a PHI node, and if *that* PHI node has 363// conflicting incoming values from the two switch blocks. 370if (!SI1Succs.
count(Succ))
376 FailBlocks->insert(Succ);
384/// Update PHI nodes in Succ to indicate that there will now be entries in it 385/// from the 'NewPred' block. The values that will be flowing into the PHI nodes 386/// will be the same as those coming in from ExistPred, an existing predecessor 392 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
394if (
auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
395 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
398/// Compute an abstract "cost" of speculating the given instruction, 399/// which is assumed to be safe to speculate. TCC_Free means cheap, 400/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively 407/// If we have a merge point of an "if condition" as accepted above, 408/// return true if the specified value dominates the block. We don't handle 409/// the true generality of domination here, just a special case which works 410/// well enough for us. 412/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to 413/// see if V (which must be an instruction) and its recursive operands 414/// that do not dominate BB have a combined cost lower than Budget and 415/// are non-trapping. If both are true, the instruction is inserted into the 416/// set and true is returned. 418/// The cost for most non-trapping instructions is defined as 1 except for 419/// Select whose cost is 2. 421/// After this function returns, Cost is increased by the cost of 422/// V plus its non-dominating operands. If that cost is greater than 423/// Budget, false is returned and Cost is undefined. 429// It is possible to hit a zero-cost cycle (phi/gep instructions for example), 430// so limit the recursion depth. 431// TODO: While this recursion limit does prevent pathological behavior, it 432// would be better to track visited instructions to avoid cycles. 438// Non-instructions dominate all instructions and can be executed 444// We don't want to allow weird loops that might have the "if condition" in 445// the bottom of this block. 449// If this instruction is defined in a block that contains an unconditional 450// branch to BB, then it must be in the 'conditional' part of the "if 451// statement". If not, it definitely dominates the region. 456// If we have seen this instruction before, don't count it again. 457if (AggressiveInsts.
count(
I))
460// Okay, it looks like the instruction IS in the "condition". Check to 461// see if it's a cheap instruction to unconditionally compute, and if it 462// only uses stuff defined outside of the condition. If so, hoist it out. 468// Allow exactly one instruction to be speculated regardless of its cost 469// (as long as it is safe to do so). 470// This is intended to flatten the CFG even if the instruction is a division 471// or other expensive operation. The speculation of an expensive instruction 472// is expected to be undone in CodeGenPrepare if the speculation has not 473// enabled further IR optimizations. 479// Okay, we can only really hoist these out if their operands do 480// not take us over the cost threshold. 481for (
Use &
Op :
I->operands())
485// Okay, it's safe to do this! Remember this instruction. 490/// Extract ConstantInt from value, looking through IntToPtr 491/// and PointerNullValue. Return NULL if value is not a constant int. 493// Normal constant int. 495if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
496DL.isNonIntegralPointerType(V->getType()))
499// This is some kind of pointer constant. Turn it into a pointer-sized 500// ConstantInt if possible. 501IntegerType *PtrTy = cast<IntegerType>(
DL.getIntPtrType(V->getType()));
503// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). 504if (isa<ConstantPointerNull>(V))
505return ConstantInt::get(PtrTy, 0);
507// IntToPtr const int. 509if (CE->getOpcode() == Instruction::IntToPtr)
510if (
ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
511// The constant is very likely to have the right type already. 515return cast<ConstantInt>(
523/// Given a chain of or (||) or and (&&) comparison of a value against a 524/// constant, this will try to recover the information required for a switch 526/// It will depth-first traverse the chain of comparison, seeking for patterns 527/// like %a == 12 or %a < 4 and combine them to produce a set of integer 528/// representing the different cases for the switch. 529/// Note that if the chain is composed of '||' it will build the set of elements 530/// that matches the comparisons (i.e. any of this value validate the chain) 531/// while for a chain of '&&' it will build the set elements that make the test 533structConstantComparesGatherer {
536 /// Value found for the switch comparison 537Value *CompValue =
nullptr;
539 /// Extra clause to be checked before the switch 540Value *Extra =
nullptr;
542 /// Set of integers to match in switch 545 /// Number of comparisons matched in the and/or chain 546unsigned UsedICmps = 0;
548 /// Construct and compute the result for the comparison instruction Cond 553 ConstantComparesGatherer(
const ConstantComparesGatherer &) =
delete;
554 ConstantComparesGatherer &
555 operator=(
const ConstantComparesGatherer &) =
delete;
558 /// Try to set the current value used for the comparison, it succeeds only if 559 /// it wasn't set before or if the new value is the same as the old one 560bool setValueOnce(
Value *NewVal) {
561if (CompValue && CompValue != NewVal)
564return (CompValue !=
nullptr);
567 /// Try to match Instruction "I" as a comparison against a constant and 568 /// populates the array Vals with the set of values that match (or do not 569 /// match depending on isEQ). 570 /// Return false on failure. On success, the Value the comparison matched 571 /// against is placed in CompValue. 572 /// If CompValue is already set, the function is expected to fail if a match 573 /// is found but the value compared to is different. 575// If this is an icmp against a constant, handle this as one of the cases. 578if (!((ICI = dyn_cast<ICmpInst>(
I)) &&
586// Pattern match a special case 587// (x & ~2^z) == y --> x == y || x == y|2^z 588// This undoes a transformation done by instcombine to fuse 2 compares. 589if (ICI->
getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
590// It's a little bit hard to see why the following transformations are 591// correct. Here is a CVC3 program to verify them for 64-bit values: 594 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63); 598 mask : BITVECTOR(64) = BVSHL(ONE, z); 599 QUERY( (y & ~mask = y) => 600 ((x & ~mask = y) <=> (x = y OR x = (y | mask))) 602 QUERY( (y | mask = y) => 603 ((x | mask = y) <=> (x = y OR x = (y & ~mask))) 607// Please note that each pattern must be a dual implication (<--> or 608// iff). One directional implication can create spurious matches. If the 609// implication is only one-way, an unsatisfiable condition on the left 610// side can imply a satisfiable condition on the right side. Dual 611// implication ensures that satisfiable conditions are transformed to 612// other satisfiable conditions and unsatisfiable conditions are 613// transformed to other unsatisfiable conditions. 615// Here is a concrete example of a unsatisfiable condition on the left 616// implying a satisfiable condition on the right: 619// (x & ~mask) == y --> (x == y || x == (y | mask)) 621// Substituting y = 3, z = 0 yields: 622// (x & -2) == 3 --> (x == 3 || x == 2) 624// Pattern match a special case: 626 QUERY( (y & ~mask = y) => 627 ((x & ~mask = y) <=> (x = y OR x = (y | mask))) 633if (
Mask.isPowerOf2() && (
C->getValue() & ~Mask) ==
C->getValue()) {
634// If we already have a value for the switch, it has to match! 635if (!setValueOnce(RHSVal))
640 ConstantInt::get(
C->getContext(),
641C->getValue() | Mask));
647// Pattern match a special case: 649 QUERY( (y | mask = y) => 650 ((x | mask = y) <=> (x = y OR x = (y & ~mask))) 656if (
Mask.isPowerOf2() && (
C->getValue() | Mask) ==
C->getValue()) {
657// If we already have a value for the switch, it has to match! 658if (!setValueOnce(RHSVal))
662 Vals.
push_back(ConstantInt::get(
C->getContext(),
663C->getValue() & ~Mask));
669// If we already have a value for the switch, it has to match! 678// If we have "x ult 3", for example, then we can add 0,1,2 to the set. 682// Shift the range if the compare is fed by an add. This is the range 683// compare idiom as emitted by instcombine. 684Value *CandidateVal =
I->getOperand(0);
687 CandidateVal = RHSVal;
690// If this is an and/!= check, then we are looking to build the set of 691// value that *don't* pass the and chain. I.e. to turn "x ugt 2" into 696// If there are a ton of values, we don't want to make a ginormous switch. 701// If we already have a value for the switch, it has to match! 702if (!setValueOnce(CandidateVal))
705// Add all values from the range to the set 707 Vals.
push_back(ConstantInt::get(
I->getContext(), Tmp));
713 /// Given a potentially 'or'd or 'and'd together collection of icmp 714 /// eq/ne/lt/gt instructions that compare a value against a constant, extract 715 /// the value being compared, and stick the list constants into the Vals 717 /// One "Extra" case is allowed to differ from the other. 718void gather(
Value *V) {
721// Keep a stack (SmallVector for efficiency) for depth-first traversal 729while (!DFT.
empty()) {
733// If it is a || (or && depending on isEQ), process the operands. 737if (Visited.
insert(Op1).second)
739if (Visited.
insert(Op0).second)
745// Try to match the current instruction 746if (matchInstruction(
I, isEQ))
747// Match succeed, continue the loop 751// One element of the sequence of || (or &&) could not be match as a 752// comparison against the same value as the others. 753// We allow only one "Extra" case to be checked before the switch 758// Failed to parse a proper sequence, abort now 765}
// end anonymous namespace 770if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
771Cond = dyn_cast<Instruction>(SI->getCondition());
772 }
elseif (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
773if (BI->isConditional())
774Cond = dyn_cast<Instruction>(BI->getCondition());
776Cond = dyn_cast<Instruction>(IBI->getAddress());
784/// Return true if the specified terminator checks 785/// to see if a value is equal to constant integer value. 788if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
789// Do not permit merging of large switch instructions into their 790// predecessors unless there is only one predecessor. 791if (!
SI->getParent()->hasNPredecessorsOrMore(128 /
SI->getNumSuccessors()))
792 CV =
SI->getCondition();
793 }
elseif (
BranchInst *BI = dyn_cast<BranchInst>(TI))
794if (BI->isConditional() && BI->getCondition()->hasOneUse())
795if (
ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
800// Unwrap any lossless ptrtoint cast. 803Value *
Ptr = PTII->getPointerOperand();
804if (PTII->getType() ==
DL.getIntPtrType(
Ptr->getType()))
811/// Given a value comparison instruction, 812/// decode all of the 'cases' that it represents and return the 'default' block. 813BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
814Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
815if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
816 Cases.reserve(
SI->getNumCases());
817for (
auto Case :
SI->cases())
818 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
819 Case.getCaseSuccessor()));
820returnSI->getDefaultDest();
826 Cases.push_back(ValueEqualityComparisonCase(
831/// Given a vector of bb/value pairs, remove any entries 832/// in the list that match the specified block. 835 std::vector<ValueEqualityComparisonCase> &Cases) {
839/// Return true if there are any keys in C1 that exist in C2 as well. 841 std::vector<ValueEqualityComparisonCase> &C2) {
842 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
844// Make V1 be smaller than V2. 845if (V1->size() > V2->size())
850if (V1->size() == 1) {
853for (
const ValueEqualityComparisonCase &VECC : *V2)
854if (TheVal == VECC.
Value)
858// Otherwise, just sort both lists and compare element by element. 861unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
862while (i1 != e1 && i2 != e2) {
873// Set branch weights on SwitchInst. This sets the metadata if there is at 874// least one non-zero weight. 877// Check that there is at least one non-zero weight. Otherwise, pass 878// nullptr to setMetadata which will erase the existing metadata. 883 SI->setMetadata(LLVMContext::MD_prof,
N);
886// Similar to the above, but for branch and select instructions that take 889uint32_t FalseWeight,
bool IsExpected) {
890assert(isa<BranchInst>(
I) || isa<SelectInst>(
I));
891// Check that there is at least one non-zero weight. Otherwise, pass 892// nullptr to setMetadata which will erase the existing metadata. 894if (TrueWeight || FalseWeight)
897I->setMetadata(LLVMContext::MD_prof,
N);
900/// If TI is known to be a terminator instruction and its block is known to 901/// only have a single predecessor block, check to see if that predecessor is 902/// also a value comparison with the same value, and if that comparison 903/// determines the outcome of this comparison. If so, simplify TI. This does a 904/// very limited form of jump threading. 905bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
909returnfalse;
// Not a value comparison in predecessor. 911Value *ThisVal = isValueEqualityComparison(TI);
912assert(ThisVal &&
"This isn't a value comparison!!");
913if (ThisVal != PredVal)
914returnfalse;
// Different predicates. 916// TODO: Preserve branch weight metadata, similarly to how 917// foldValueComparisonIntoPredecessors preserves it. 919// Find out information about when control will move from Pred to TI's block. 920 std::vector<ValueEqualityComparisonCase> PredCases;
922 getValueEqualityComparisonCases(Pred->
getTerminator(), PredCases);
925// Find information about how control leaves this block. 926 std::vector<ValueEqualityComparisonCase> ThisCases;
927BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
930// If TI's block is the default block from Pred's comparison, potentially 931// simplify TI based on this knowledge. 933// If we are here, we know that the value is none of those cases listed in 934// PredCases. If there are any cases in ThisCases that are in PredCases, we 939if (isa<BranchInst>(TI)) {
940// Okay, one of the successors of this condbr is dead. Convert it to a 942assert(ThisCases.size() == 1 &&
"Branch can only have one case!");
943// Insert the new branch. 947// Remove PHI node entries for the dead edge. 948 ThisCases[0].Dest->removePredecessor(PredDef);
951 <<
"Through successor TI: " << *TI <<
"Leaving: " << *NI
958 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
964// Okay, TI has cases that are statically dead, prune them away. 966for (
unsigned i = 0, e = PredCases.size(); i != e; ++i)
970 <<
"Through successor TI: " << *TI);
978if (DeadCases.
count(i->getCaseValue())) {
987 std::vector<DominatorTree::UpdateType> Updates;
988for (
const std::pair<BasicBlock *, int> &
I : NumPerSuccessorCases)
990 Updates.push_back({DominatorTree::Delete, PredDef,
I.first});
991 DTU->applyUpdates(Updates);
998// Otherwise, TI's block must correspond to some matched value. Find out 999// which value (or set of values) this is. 1002for (
constauto &[
Value, Dest] : PredCases)
1005returnfalse;
// Cannot handle multiple values coming to this block. 1008assert(TIV &&
"No edge from pred to succ?");
1010// Okay, we found the one constant that our value can be if we get into TI's 1011// BB. Find out which successor will unconditionally be branched to. 1013for (
constauto &[
Value, Dest] : ThisCases)
1019// If not handled by any explicit cases, it is handled by the default case. 1021 TheRealDest = ThisDef;
1025// Remove PHI node entries for dead edges. 1028if (Succ != CheckEdge) {
1029if (Succ != TheRealDest)
1030 RemovedSuccs.
insert(Succ);
1035// Insert the new branch. 1040 <<
"Through successor TI: " << *TI <<
"Leaving: " << *NI
1047for (
auto *RemovedSucc : RemovedSuccs)
1048 Updates.
push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1049 DTU->applyUpdates(Updates);
1056/// This class implements a stable ordering of constant 1057/// integers that does not depend on their address. This is important for 1058/// applications that sort ConstantInt's to ensure uniqueness. 1059structConstantIntOrdering {
1061returnLHS->getValue().ult(
RHS->getValue());
1065}
// end anonymous namespace 1073returnLHS->getValue().ult(
RHS->getValue()) ? 1 : -1;
1076/// Get Weights of a given terminator, the default weight is at the front 1077/// of the vector. If TI is a conditional eq, we need to swap the branch-weight 1082assert(MD &&
"Invalid branch-weight metadata");
1085// If TI is a conditional eq, the default case is the false case, 1086// and the corresponding branch-weight data is at index 2. We swap the 1087// default weight to be the first entry. 1088if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1096/// Keep halving the weights until all can fit in uint32_t. 1099if (Max > UINT_MAX) {
1110// If we have bonus instructions, clone them into the predecessor block. 1111// Note that there may be multiple predecessor blocks, so we cannot move 1112// bonus instructions to a predecessor block. 1114if (BonusInst.isTerminator())
1119if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1121// Unless the instruction has the same !dbg location as the original 1122// branch, drop it. When we fold the bonus instructions we want to make 1123// sure we reset their debug locations in order to avoid stepping on 1124// dead code caused by folding dead branches. 1131// If we speculated an instruction, we need to drop any metadata that may 1132// result in undefined behavior, as the metadata might have been valid 1133// only given the branch precondition. 1134// Similarly strip attributes on call parameters that may cause UB in 1135// location the call is moved to. 1143if (isa<DbgInfoIntrinsic>(BonusInst))
1146 NewBonusInst->
takeName(&BonusInst);
1147 BonusInst.setName(NewBonusInst->
getName() +
".old");
1148 VMap[&BonusInst] = NewBonusInst;
1150// Update (liveout) uses of bonus instructions, 1151// now that the bonus instruction has been cloned into predecessor. 1152// Note that we expect to be in a block-closed SSA form for this to work! 1154auto *UI = cast<Instruction>(U.getUser());
1155auto *PN = dyn_cast<PHINode>(UI);
1157assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1158"If the user is not a PHI node, then it should be in the same " 1159"block as, and come after, the original bonus instruction.");
1160continue;
// Keep using the original bonus instruction. 1162// Is this the block-closed SSA form PHI node? 1163if (PN->getIncomingBlock(U) == BB)
1164continue;
// Great, keep using the original bonus instruction. 1165// The only other alternative is an "use" when coming from 1166// the predecessor block - here we should refer to the cloned bonus instr. 1167assert(PN->getIncomingBlock(U) == PredBlock &&
1168"Not in block-closed SSA form?");
1169 U.set(NewBonusInst);
1174bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1181// Figure out which 'cases' to copy from SI to PSI. 1182 std::vector<ValueEqualityComparisonCase> BBCases;
1183BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1185 std::vector<ValueEqualityComparisonCase> PredCases;
1186BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1188// Based on whether the default edge from PTI goes to BB or not, fill in 1189// PredCases and PredDefault with the new switch cases we would like to 1193// Update the branch weight metadata along the way 1198if (PredHasWeights) {
1200// branch-weight metadata is inconsistent here. 1201if (Weights.
size() != 1 + PredCases.size())
1202 PredHasWeights = SuccHasWeights =
false;
1203 }
elseif (SuccHasWeights)
1204// If there are no predecessor weights but there are successor weights, 1205// populate Weights with 1, which will later be scaled to the sum of 1206// successor's weights 1207 Weights.
assign(1 + PredCases.size(), 1);
1210if (SuccHasWeights) {
1212// branch-weight metadata is inconsistent here. 1213if (SuccWeights.
size() != 1 + BBCases.size())
1214 PredHasWeights = SuccHasWeights =
false;
1215 }
elseif (PredHasWeights)
1216 SuccWeights.
assign(1 + BBCases.size(), 1);
1218if (PredDefault == BB) {
1219// If this is the default destination from PTI, only the edges in TI 1220// that don't occur in PTI, or that branch to BB will be activated. 1221 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1222for (
unsigned i = 0, e = PredCases.size(); i != e; ++i)
1223if (PredCases[i].Dest != BB)
1224 PTIHandled.insert(PredCases[i].
Value);
1226// The default destination is BB, we don't need explicit targets. 1227std::swap(PredCases[i], PredCases.back());
1229if (PredHasWeights || SuccHasWeights) {
1230// Increase weight for the default case. 1231 Weights[0] += Weights[i + 1];
1236 PredCases.pop_back();
1241// Reconstruct the new switch statement we will be building. 1242if (PredDefault != BBDefault) {
1244if (DTU && PredDefault != BB)
1245 Updates.
push_back({DominatorTree::Delete, Pred, PredDefault});
1246 PredDefault = BBDefault;
1247 ++NewSuccessors[BBDefault];
1250unsigned CasesFromPred = Weights.
size();
1252for (
unsigned i = 0, e = BBCases.size(); i != e; ++i)
1253if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1254 PredCases.push_back(BBCases[i]);
1255 ++NewSuccessors[BBCases[i].Dest];
1256if (SuccHasWeights || PredHasWeights) {
1257// The default weight is at index 0, so weight for the ith case 1258// should be at index i+1. Scale the cases from successor by 1259// PredDefaultWeight (Weights[0]). 1260 Weights.
push_back(Weights[0] * SuccWeights[i + 1]);
1261 ValidTotalSuccWeight += SuccWeights[i + 1];
1265if (SuccHasWeights || PredHasWeights) {
1266 ValidTotalSuccWeight += SuccWeights[0];
1267// Scale the cases from predecessor by ValidTotalSuccWeight. 1268for (
unsigned i = 1; i < CasesFromPred; ++i)
1269 Weights[i] *= ValidTotalSuccWeight;
1270// Scale the default weight by SuccDefaultWeight (SuccWeights[0]). 1271 Weights[0] *= SuccWeights[0];
1274// If this is not the default destination from PSI, only the edges 1275// in SI that occur in PSI with a destination of BB will be 1277 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1278 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1279for (
unsigned i = 0, e = PredCases.size(); i != e; ++i)
1280if (PredCases[i].Dest == BB) {
1283if (PredHasWeights || SuccHasWeights) {
1284 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1289std::swap(PredCases[i], PredCases.back());
1290 PredCases.pop_back();
1295// Okay, now we know which constants were sent to BB from the 1296// predecessor. Figure out where they will all go now. 1297for (
unsigned i = 0, e = BBCases.size(); i != e; ++i)
1298if (PTIHandled.count(BBCases[i].Value)) {
1299// If this is one we are capable of getting... 1300if (PredHasWeights || SuccHasWeights)
1302 PredCases.push_back(BBCases[i]);
1303 ++NewSuccessors[BBCases[i].Dest];
1304 PTIHandled.
erase(BBCases[i].
Value);
// This constant is taken care of 1307// If there are any constants vectored to BB that TI doesn't handle, 1308// they must go to the default destination of TI. 1310if (PredHasWeights || SuccHasWeights)
1312 PredCases.push_back(ValueEqualityComparisonCase(
I, BBDefault));
1313 ++NewSuccessors[BBDefault];
1317// Okay, at this point, we know which new successor Pred will get. Make 1318// sure we update the number of entries in the PHI nodes for these 1325for (
const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1327for (
autoI :
seq(NewSuccessor.second)) {
1331if (DTU && !SuccsOfPred.
contains(NewSuccessor.first))
1332 Updates.
push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1336// Convert pointer to int before we switch. 1342// Now that the successors are updated, create the new Switch instruction. 1345for (ValueEqualityComparisonCase &V : PredCases)
1348if (PredHasWeights || SuccHasWeights) {
1349// Halve the weights if any of them cannot fit in an uint32_t 1359// Okay, last check. If BB is still a successor of PSI, then we must 1360// have an infinite loop case. If so, add an infinitely looping block 1361// to handle the case to preserve the behavior of the code. 1366// Insert it at the end of the function, because it's either code, 1367// or it won't matter if it's hot. :) 1373 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1380 Updates.
push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1382 Updates.
push_back({DominatorTree::Delete, Pred, BB});
1384 DTU->applyUpdates(Updates);
1387 ++NumFoldValueComparisonIntoPredecessors;
1391/// The specified terminator is a value equality comparison instruction 1392/// (either a switch or a branch on "X == c"). 1393/// See if any of the predecessors of the terminator block are value comparisons 1394/// on the same value. If so, and if safe to do so, fold them together. 1395bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(
Instruction *TI,
1398Value *CV = isValueEqualityComparison(TI);
// CondVal 1399assert(CV &&
"Not a comparison?");
1404while (!Preds.empty()) {
1408// Don't try to fold into itself. 1412// See if the predecessor is a comparison with the same value. 1413Value *PCV = isValueEqualityComparison(PTI);
// PredCondVal 1419for (
auto *Succ : FailBlocks) {
1425 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1431// If we would need to insert a select that uses the value of this invoke 1432// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would 1433// need to do this), we can't hoist the invoke, as there is nowhere to put the 1434// select in this case. 1439Value *BB1V = PN.getIncomingValueForBlock(BB1);
1440Value *BB2V = PN.getIncomingValueForBlock(BB2);
1441if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1449// Get interesting characteristics of instructions that 1450// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of 1451// instructions can be reordered across. 1460if (
I->mayReadFromMemory())
1462// We can't arbitrarily move around allocas, e.g. moving allocas (especially 1463// inalloca) across stacksave/stackrestore boundaries. 1464if (
I->mayHaveSideEffects() || isa<AllocaInst>(
I))
1471// Returns true if it is safe to reorder an instruction across preceding 1472// instructions in a basic block. 1474// Don't reorder a store over a load. 1478// If we have seen an instruction with side effects, it's unsafe to reorder an 1479// instruction which reads memory or itself has side effects. 1481 (
I->mayReadFromMemory() ||
I->mayHaveSideEffects() || isa<AllocaInst>(
I)))
1484// Reordering across an instruction which does not necessarily transfer 1485// control to the next instruction is speculation. 1489// Hoisting of llvm.deoptimize is only legal together with the next return 1490// instruction, which this pass is not always able to do. 1491if (
auto *CB = dyn_cast<CallBase>(
I))
1492if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1495// It's also unsafe/illegal to hoist an instruction above its instruction 1499if (
auto *J = dyn_cast<Instruction>(
Op))
1500if (J->getParent() == BB)
1509/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical 1510/// instructions \p I1 and \p I2 can and should be hoisted. 1513// If we're going to hoist a call, make sure that the two instructions 1514// we're commoning/hoisting are both marked with musttail, or neither of 1515// them is marked as such. Otherwise, we might end up in a situation where 1516// we hoist from a block where the terminator is a `ret` to a block where 1517// the terminator is a `br`, and `musttail` calls expect to be followed by 1519auto *C1 = dyn_cast<CallInst>(I1);
1520auto *C2 = dyn_cast<CallInst>(I2);
1522if (C1->isMustTailCall() != C2->isMustTailCall())
1528// If any of the two call sites has nomerge or convergent attribute, stop 1530if (
constauto *CB1 = dyn_cast<CallBase>(I1))
1531if (CB1->cannotMerge() || CB1->isConvergent())
1533if (
constauto *CB2 = dyn_cast<CallBase>(I2))
1534if (CB2->cannotMerge() || CB2->isConvergent())
1540/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical 1541/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in 1542/// hoistCommonCodeFromSuccessors. e.g. The input: 1543/// I1 DVRs: { x, z }, 1544/// OtherInsts: { I2 DVRs: { x, y, z } } 1545/// would result in hoisting only DbgVariableRecord x. 1549if (!I1->hasDbgRecords())
1551usingCurrentAndEndIt =
1552 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1553// Vector of {Current, End} iterators. 1556// Helper lambdas for lock-step checks: 1557// Return true if this Current == End. 1558auto atEnd = [](
const CurrentAndEndIt &Pair) {
1559return Pair.first == Pair.second;
1561// Return true if all Current are identical. 1565return Itrs[0].first->isIdenticalToWhenDefined(*
I);
1569// Collect the iterators. 1571 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1573if (!
Other->hasDbgRecords())
1576 {
Other->getDbgRecordRange().begin(),
Other->getDbgRecordRange().end()});
1579// Iterate in lock-step until any of the DbgRecord lists are exausted. If 1580// the lock-step DbgRecord are identical, hoist all of them to TI. 1581// This replicates the dbg.* intrinsic behaviour in 1582// hoistCommonCodeFromSuccessors. 1584bool HoistDVRs = allIdentical(Itrs);
1585for (CurrentAndEndIt &Pair : Itrs) {
1586// Increment Current iterator now as we may be about to move the 1599if (I1->isIdenticalToWhenDefined(I2,
/*IntersectAttrs=*/true))
1602if (
auto *Cmp1 = dyn_cast<CmpInst>(I1))
1603if (
auto *Cmp2 = dyn_cast<CmpInst>(I2))
1604return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1605 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1606 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1608if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1609return I1->getOperand(0) == I2->
getOperand(1) &&
1617/// If the target supports conditional faulting, 1618/// we look for the following pattern: 1622/// %cond = icmp ult %x, %y 1623/// br i1 %cond, label %TrueBB, label %FalseBB 1625/// store i32 1, ptr %q, align 4 1628/// %maskedloadstore = load i32, ptr %b, align 4 1629/// store i32 %maskedloadstore, ptr %p, align 4 1633/// and transform it into: 1638/// %cond = icmp ult %x, %y 1639/// %maskedloadstore = cload i32, ptr %b, %cond 1640/// cstore i32 %maskedloadstore, ptr %p, %cond 1641/// cstore i32 1, ptr %q, ~%cond 1642/// br i1 %cond, label %TrueBB, label %FalseBB 1649/// where cload/cstore are represented by llvm.masked.load/store intrinsics, 1653/// %vcond = bitcast i1 %cond to <1 x i1> 1654/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0 1655/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison) 1656/// %maskedloadstore = bitcast <1 x i32> %v0 to i32 1657/// call void @llvm.masked.store.v1i32.p0 1658/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond) 1659/// %cond.not = xor i1 %cond, true 1660/// %vcond.not = bitcast i1 %cond.not to <1 x i> 1661/// call void @llvm.masked.store.v1i32.p0 1662/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not) 1665/// So we need to turn hoisted load/store into cload/cstore. 1667/// \param BI The branch instruction. 1668/// \param SpeculatedConditionalLoadsStores The load/store instructions that 1669/// will be speculated. 1670/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG. 1674 std::optional<bool> Invert) {
1675auto &Context = BI->
getParent()->getContext();
1678// Construct the condition if needed. 1681 Invert.has_value() ? SpeculatedConditionalLoadsStores.
back() : BI);
1682Value *Mask =
nullptr;
1683Value *MaskFalse =
nullptr;
1684Value *MaskTrue =
nullptr;
1685if (Invert.has_value()) {
1694auto PeekThroughBitcasts = [](
Value *V) {
1695while (
auto *BitCast = dyn_cast<BitCastInst>(V))
1696 V = BitCast->getOperand(0);
1699for (
auto *
I : SpeculatedConditionalLoadsStores) {
1701if (!Invert.has_value())
1702 Mask =
I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1703// We currently assume conditional faulting load/store is supported for 1704// scalar types only when creating new instructions. This can be easily 1705// extended for vector types in the future. 1707auto *Op0 =
I->getOperand(0);
1709if (
auto *LI = dyn_cast<LoadInst>(
I)) {
1711auto *Ty =
I->getType();
1713Value *PassThru =
nullptr;
1714if (Invert.has_value())
1715for (
User *U :
I->users())
1716if ((PN = dyn_cast<PHINode>(U))) {
1727I->replaceAllUsesWith(NewLoadStore);
1733 StoredVal,
I->getOperand(1), cast<StoreInst>(
I)->getAlign(), Mask);
1735// For non-debug metadata, only !annotation, !range, !nonnull and !align are 1736// kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata). 1738// !nonnull, !align : Not support pointer type, no need to keep. 1739// !range: Load type is changed from scalar to vector, but the metadata on 1740// vector specifies a per-element range, so the semantics stay the 1742// !annotation: Not impact semantics. Keep it. 1743if (
constMDNode *Ranges =
I->getMetadata(LLVMContext::MD_range))
1745I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1746// FIXME: DIAssignID is not supported for masked store yet. 1747// (Verifier::visitDIAssignIDMetadata) 1749I->eraseMetadataIf([](
unsigned MDKind,
MDNode *
Node) {
1750returnNode->getMetadataID() == Metadata::DIAssignIDKind;
1753I->eraseFromParent();
1759// Not handle volatile or atomic. 1760if (
auto *L = dyn_cast<LoadInst>(
I)) {
1763 }
elseif (
auto *S = dyn_cast<StoreInst>(
I)) {
1769// llvm.masked.load/store use i32 for alignment while load/store use i64. 1770// That's why we have the alignment limitation. 1771// FIXME: Update the prototype of the intrinsics? 1778// LockstepReverseIterator - Iterates through instructions 1779// in a set of blocks in reverse order from the first non-terminator. 1780// For example (assume all blocks have size n): 1781// LockstepReverseIterator I([B1, B2, B3]); 1782// *I-- = [B1[n], B2[n], B3[n]]; 1783// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; 1784// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; 1786classLockstepReverseIterator {
1799for (
auto *BB : Blocks) {
1801for (Inst = Inst->
getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1804// Block wasn't big enough. 1817for (
auto *&Inst : Insts) {
1818for (Inst = Inst->
getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1820// Already at beginning of block. 1831for (
auto *&Inst : Insts) {
1832for (Inst = Inst->
getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1834// Already at end of block. 1845}
// end anonymous namespace 1847/// Hoist any common code in the successor blocks up into the block. This 1848/// function guarantees that BB dominates all successors. If AllInstsEqOnly is 1849/// given, only perform hoisting in case all successors blocks contain matching 1850/// instructions only. In that case, all instructions can be hoisted and the 1851/// original branch will be replaced and selects for PHIs are added. 1852bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(
Instruction *TI,
1853bool AllInstsEqOnly) {
1854// This does very trivial matching, with limited scanning, to find identical 1855// instructions in the two blocks. In particular, we don't want to get into 1856// O(N1*N2*...) situations here where Ni are the sizes of these successors. As 1857// such, we currently just scan for obviously identical instructions in an 1858// identical order, possibly separated by the same number of non-identical 1865// If either of the blocks has it's address taken, then we can't do this fold, 1866// because the code we'd hoist would no longer run when we jump into the block 1872// The second of pair is a SkipFlags bitmask. 1873usingSuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1877if (isa<PHINode>(*SuccItr))
1879 SuccIterPairs.
push_back(SuccIterPair(SuccItr, 0));
1882if (AllInstsEqOnly) {
1883// Check if all instructions in the successor blocks match. This allows 1884// hoisting all instructions and removing the blocks we are hoisting from, 1885// so does not add any new instructions. 1887// Check if sizes and terminators of all successors match. 1891return !
Term->isSameOperationAs(Term0) ||
1893 Succs[0]->size() != Succ->
size();
1898 LockstepReverseIterator LRI(Succs);
1899while (LRI.isValid()) {
1909// Now we know that all instructions in all successors can be hoisted. Let 1910// the loop below handle the hoisting. 1913// Count how many instructions were not hoisted so far. There's a limit on how 1914// many instructions we skip, serving as a compilation time control as well as 1915// preventing excessive increase of life ranges. 1916unsigned NumSkipped = 0;
1917// If we find an unreachable instruction at the beginning of a basic block, we 1918// can still hoist instructions from the rest of the basic blocks. 1919if (SuccIterPairs.
size() > 2) {
1921 [](
constauto &Pair) {
return isa<UnreachableInst>(Pair.first); });
1922if (SuccIterPairs.
size() < 2)
1929auto *SuccIterPairBegin = SuccIterPairs.
begin();
1930auto &BB1ItrPair = *SuccIterPairBegin++;
1931auto OtherSuccIterPairRange =
1937// Skip debug info if it is not identical. 1938bool AllDbgInstsAreIdentical =
all_of(OtherSuccIterRange, [I1](
auto &Iter) {
1940returnI1->isIdenticalToWhenDefined(I2);
1942if (!AllDbgInstsAreIdentical) {
1943while (isa<DbgInfoIntrinsic>(I1))
1944I1 = &*++BB1ItrPair.first;
1945for (
auto &SuccIter : OtherSuccIterRange) {
1947while (isa<DbgInfoIntrinsic>(I2))
1952bool AllInstsAreIdentical =
true;
1953bool HasTerminator =
I1->isTerminator();
1954for (
auto &SuccIter : OtherSuccIterRange) {
1959 AllInstsAreIdentical =
false;
1963for (
auto &SuccIter : OtherSuccIterRange)
1966// If we are hoisting the terminator instruction, don't move one (making a 1967// broken BB), instead clone it, and remove BI. 1969// Even if BB, which contains only one unreachable instruction, is ignored 1970// at the beginning of the loop, we can hoist the terminator instruction. 1971// If any instructions remain in the block, we cannot hoist terminators. 1972if (NumSkipped || !AllInstsAreIdentical) {
1977return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1981if (AllInstsAreIdentical) {
1982unsigned SkipFlagsBB1 = BB1ItrPair.second;
1983 AllInstsAreIdentical =
1985all_of(OtherSuccIterPairRange, [=](
constauto &Pair) {
1987unsigned SkipFlagsBB2 = Pair.second;
1988// Even if the instructions are identical, it may not 1989// be safe to hoist them if we have skipped over 1990// instructions with side effects or their operands 1997if (AllInstsAreIdentical) {
1999if (isa<DbgInfoIntrinsic>(I1)) {
2000// The debug location is an integral part of a debug info intrinsic 2001// and can't be separated from it or replaced. Instead of attempting 2002// to merge locations, simply hoist both copies of the intrinsic. 2004// We've just hoisted DbgVariableRecords; move I1 after them (before TI) 2005// and leave any that were not hoisted behind (by calling moveBefore 2006// rather than moveBeforePreserving). 2008for (
auto &SuccIter : OtherSuccIterRange) {
2009auto *I2 = &*SuccIter++;
2010assert(isa<DbgInfoIntrinsic>(I2));
2014// For a normal instruction, we just move one to right before the 2015// branch, then replace all uses of the other with the first. Finally, 2016// we remove the now redundant second instruction. 2018// We've just hoisted DbgVariableRecords; move I1 after them (before TI) 2019// and leave any that were not hoisted behind (by calling moveBefore 2020// rather than moveBeforePreserving). 2022for (
auto &SuccIter : OtherSuccIterRange) {
2028if (
auto *CB = dyn_cast<CallBase>(I1)) {
2029boolSuccess = CB->tryIntersectAttributes(cast<CallBase>(I2));
2030assert(
Success &&
"We should not be trying to hoist callbases " 2031"with non-intersectable attributes");
2032// For NDEBUG Compile. 2037// I1 and I2 are being combined into a single instruction. Its debug 2038// location is the merged locations of the original instructions. 2044 NumHoistCommonCode += SuccIterPairs.
size();
2046 NumHoistCommonInstrs += SuccIterPairs.
size();
2052// We are about to skip over a pair of non-identical instructions. Record 2053// if any have characteristics that would prevent reordering instructions 2055for (
auto &SuccIterPair : SuccIterPairs) {
2064bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2068auto *BI = dyn_cast<BranchInst>(TI);
2074// Use only for an if statement. 2075auto *I2 = *OtherSuccTIs.
begin();
2083// In the case of an if statement, we try to hoist an invoke. 2084// FIXME: Can we define a safety predicate for CallBr? 2085// FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll 2086// removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit? 2090// TODO: callbr hoisting currently disabled pending further study. 2091if (isa<CallBrInst>(I1))
2096Value *BB1V = PN.getIncomingValueForBlock(BB1);
2098Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2102// In the case of an if statement, check for 2103// passingValueIsAlwaysUndefined here because we would rather eliminate 2104// undefined control flow then converting it to a select. 2112// Hoist DbgVariableRecords attached to the terminator to match dbg.* 2113// intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors. 2115// Clone the terminator and hoist it into the pred, without any debug info. 2118if (!
NT->getType()->isVoidTy()) {
2119I1->replaceAllUsesWith(NT);
2121 OtherSuccTI->replaceAllUsesWith(NT);
2125 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2127// Ensure terminator gets a debug location, even an unknown one, in case 2128// it involves inlinable calls. 2131for (
auto *OtherSuccTI : OtherSuccTIs)
2132 Locs.
push_back(OtherSuccTI->getDebugLoc());
2135// PHIs created below will adopt NT's merged DebugLoc. 2138// In the case of an if statement, hoisting one of the terminators from our 2139// successor is a great thing. Unfortunately, the successors of the if/else 2140// blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2 2141// must agree for all PHI nodes, so we insert select instruction to compute 2144 std::map<std::pair<Value *, Value *>,
SelectInst *> InsertedSelects;
2147Value *BB1V = PN.getIncomingValueForBlock(BB1);
2148Value *BB2V = PN.getIncomingValueForBlock(BB2);
2152// These values do not agree. Insert a select instruction before NT 2153// that determines the right value. 2154SelectInst *&
SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2156// Propagate fast-math-flags from phi node to its replacement select. 2159 isa<FPMathOperator>(PN) ? &PN :
nullptr,
2163// Make the PHI node use the select for all incoming values for BB1/BB2 2164for (
unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2165if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2166 PN.setIncomingValue(i, SI);
2173// Update any PHI nodes in our new successors. 2177 Updates.
push_back({DominatorTree::Insert, TIParent, Succ});
2182 Updates.
push_back({DominatorTree::Delete, TIParent, Succ});
2186 DTU->applyUpdates(Updates);
2190// Check lifetime markers. 2192if (
autoII = dyn_cast<IntrinsicInst>(
I)) {
2193switch (
II->getIntrinsicID()) {
2196case Intrinsic::lifetime_start:
2197case Intrinsic::lifetime_end:
2204// TODO: Refine this. This should avoid cases like turning constant memcpy sizes 2208// Divide/Remainder by constant is typically much cheaper than by variable. 2209if (
I->isIntDivRem())
2211return !isa<IntrinsicInst>(
I);
2214// All instructions in Insts belong to different blocks that all unconditionally 2215// branch to a common successor. Analyze each instruction and return true if it 2216// would be possible to sink them into their successor, creating one common 2217// instruction instead. For every value that would be required to be provided by 2218// PHI node (because an operand varies in each input block), add to PHIOperands. 2222// Prune out obviously bad instructions to move. Each instruction must have 2223// the same number of uses, and we check later that the uses are consistent. 2224 std::optional<unsigned> NumUses;
2225for (
auto *
I : Insts) {
2226// These instructions may change or break semantics if moved. 2227if (isa<PHINode>(
I) ||
I->isEHPad() || isa<AllocaInst>(
I) ||
2228I->getType()->isTokenTy())
2231// Do not try to sink an instruction in an infinite loop - it can cause 2232// this algorithm to infinite loop. 2233if (
I->getParent()->getSingleSuccessor() ==
I->getParent())
2236// Conservatively return false if I is an inline-asm instruction. Sinking 2237// and merging inline-asm instructions can potentially create arguments 2238// that cannot satisfy the inline-asm constraints. 2239// If the instruction has nomerge or convergent attribute, return false. 2240if (
constauto *
C = dyn_cast<CallBase>(
I))
2241if (
C->isInlineAsm() ||
C->cannotMerge() ||
C->isConvergent())
2245 NumUses =
I->getNumUses();
2246elseif (NumUses !=
I->getNumUses())
2252for (
auto *
I : Insts) {
2256// swifterror pointers can only be used by a load or store; sinking a load 2257// or store would require introducing a select for the pointer operand, 2258// which isn't allowed for swifterror pointers. 2259if (isa<StoreInst>(
I) &&
I->getOperand(1)->isSwiftError())
2261if (isa<LoadInst>(
I) &&
I->getOperand(0)->isSwiftError())
2264// Treat MMRAs conservatively. This pass can be quite aggressive and 2265// could drop a lot of MMRAs otherwise. 2270// Uses must be consistent: If I0 is used in a phi node in the sink target, 2271// then the other phi operands must match the instructions from Insts. This 2272// also has to hold true for any phi nodes that would be created as a result 2273// of sinking. Both of these cases are represented by PhiOperands. 2274for (
constUse &U : I0->
uses()) {
2275auto It = PHIOperands.find(&U);
2276if (It == PHIOperands.end())
2277// There may be uses in other blocks when sinking into a loop header. 2279if (!
equal(Insts, It->second))
2283// For calls to be sinkable, they must all be indirect, or have same callee. 2284// I.e. if we have two direct calls to different callees, we don't want to 2285// turn that into an indirect call. Likewise, if we have an indirect call, 2286// and a direct call, we don't actually want to have a single indirect call. 2287if (isa<CallBase>(I0)) {
2289return cast<CallBase>(
I)->isIndirectCall();
2291bool HaveIndirectCalls =
any_of(Insts, IsIndirectCall);
2292bool AllCallsAreIndirect =
all_of(Insts, IsIndirectCall);
2293if (HaveIndirectCalls) {
2294if (!AllCallsAreIndirect)
2297// All callees must be identical. 2298Value *Callee =
nullptr;
2300Value *CurrCallee = cast<CallBase>(
I)->getCalledOperand();
2302 Callee = CurrCallee;
2303elseif (Callee != CurrCallee)
2309for (
unsigned OI = 0, OE = I0->
getNumOperands(); OI != OE; ++OI) {
2311if (
Op->getType()->isTokenTy())
2312// Don't touch any operand of token type. 2319if (!
all_of(Insts, SameAsI0)) {
2320// SROA can't speculate lifetime markers of selects/phis, and the 2321// backend may handle such lifetimes incorrectly as well (#104776). 2322// Don't sink lifetimes if it would introduce a phi on the pointer 2326return isa<AllocaInst>(
I->getOperand(1)->stripPointerCasts());
2332// We can't create a PHI from this GEP. 2335for (
auto *
I : Insts)
2336 Ops.push_back(
I->getOperand(OI));
2342// Assuming canSinkInstructions(Blocks) has returned true, sink the last 2343// instruction of every block in Blocks to their common successor, commoning 2344// into one instruction. 2346auto *BBEnd =
Blocks[0]->getTerminator()->getSuccessor(0);
2348// canSinkInstructions returning true guarantees that every block has at 2349// least one non-terminator instruction. 2355 }
while (isa<DbgInfoIntrinsic>(
I) &&
I != &BB->
front());
2356if (!isa<DbgInfoIntrinsic>(
I))
2360// We don't need to do any more checking here; canSinkInstructions should 2361// have done it all for us. 2365// This check is different to that in canSinkInstructions. There, we 2366// cared about the global view once simplifycfg (and instcombine) have 2367// completed - it takes into account PHIs that become trivially 2368// simplifiable. However here we need a more local view; if an operand 2369// differs we create a PHI and rely on instcombine to clean up the very 2370// small mess we may make. 2379// Create a new PHI in the successor block and populate it. 2381assert(!
Op->getType()->isTokenTy() &&
"Can't PHI tokens!");
2384 PN->insertBefore(BBEnd->begin());
2385for (
auto *
I : Insts)
2386 PN->addIncoming(
I->getOperand(O),
I->getParent());
2390// Arbitrarily use I0 as the new "common" instruction; remap its operands 2391// and move it to the start of the successor block. 2395 I0->
moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2397// Update metadata and IR flags, and merge debug locations. 2398for (
auto *
I : Insts)
2400// The debug location for the "common" instruction is the merged locations 2401// of all the commoned instructions. We start with the original location 2402// of the "common" instruction and iteratively merge each location in the 2404// This is an N-way merge, which will be inefficient if I0 is a CallInst. 2405// However, as N-way merge for CallInst is rare, so we use simplified API 2406// instead of using complex API for N-way merge. 2410if (
auto *CB = dyn_cast<CallBase>(I0)) {
2411boolSuccess = CB->tryIntersectAttributes(cast<CallBase>(
I));
2412assert(
Success &&
"We should not be trying to sink callbases " 2413"with non-intersectable attributes");
2414// For NDEBUG Compile. 2420// canSinkLastInstruction checked that all instructions are only used by 2421// phi nodes in a way that allows replacing the phi node with the common 2423auto *PN = cast<PHINode>(U);
2424 PN->replaceAllUsesWith(I0);
2425 PN->eraseFromParent();
2428// Finally nuke all instructions apart from the common instruction. 2429for (
auto *
I : Insts) {
2432// The remaining uses are debug users, replace those with the common inst. 2433// In most (all?) cases this just introduces a use-before-def. 2434assert(
I->user_empty() &&
"Inst unexpectedly still has non-dbg users");
2435I->replaceAllUsesWith(I0);
2436I->eraseFromParent();
2440/// Check whether BB's predecessors end with unconditional branches. If it is 2441/// true, sink any common code from the predecessors to BB. 2444// We support two situations: 2445// (1) all incoming arcs are unconditional 2446// (2) there are non-unconditional incoming arcs 2448// (2) is very common in switch defaults and 2465// [end] has two unconditional predecessor arcs and one conditional. The 2466// conditional refers to the implicit empty 'else' arc. This conditional 2467// arc can also be caused by an empty default block in a switch. 2469// In this case, we attempt to sink code from all *unconditional* arcs. 2470// If we can sink instructions from these arcs (determined during the scan 2471// phase below) we insert a common successor for all unconditional arcs and 2472// connect that to [end], to enable sinking: 2486bool HaveNonUnconditionalPredecessors =
false;
2488auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2489if (PredBr && PredBr->isUnconditional())
2492 HaveNonUnconditionalPredecessors =
true;
2494if (UnconditionalPreds.
size() < 2)
2497// We take a two-step approach to tail sinking. First we scan from the end of 2498// each block upwards in lockstep. If the n'th instruction from the end of each 2499// block can be sunk, those instructions are added to ValuesToSink and we 2500// carry on. If we can sink an instruction but need to PHI-merge some operands 2501// (because they're not identical in each instruction) we add these to 2503// We prepopulate PHIOperands with the phis that already exist in BB. 2507for (
constUse &U : PN.incoming_values())
2508 IncomingVals.
insert({PN.getIncomingBlock(U), &U});
2509auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2511 Ops.push_back(*IncomingVals[Pred]);
2516 LockstepReverseIterator LRI(UnconditionalPreds);
2517while (LRI.isValid() &&
2519LLVM_DEBUG(
dbgs() <<
"SINK: instruction can be sunk: " << *(*LRI)[0]
2521 InstructionsToSink.
insert((*LRI).begin(), (*LRI).end());
2526// If no instructions can be sunk, early-return. 2532if (!followedByDeoptOrUnreachable) {
2533// Check whether this is the pointer operand of a load/store. 2534auto IsMemOperand = [](
Use &U) {
2535auto *
I = cast<Instruction>(U.getUser());
2536if (isa<LoadInst>(
I))
2538if (isa<StoreInst>(
I))
2543// Okay, we *could* sink last ScanIdx instructions. But how many can we 2544// actually sink before encountering instruction that is unprofitable to 2546auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2547unsigned NumPHIInsts = 0;
2548for (
Use &U : (*LRI)[0]->operands()) {
2549auto It = PHIOperands.
find(&U);
2550if (It != PHIOperands.
end() && !
all_of(It->second, [&](
Value *V) {
2551 return InstructionsToSink.contains(V);
2554// Do not separate a load/store from the gep producing the address. 2555// The gep can likely be folded into the load/store as an addressing 2556// mode. Additionally, a load of a gep is easier to analyze than a 2558if (IsMemOperand(U) &&
2559any_of(It->second, [](
Value *V) { return isa<GEPOperator>(V); }))
2561// FIXME: this check is overly optimistic. We may end up not sinking 2562// said instruction, due to the very same profitability check. 2563// See @creating_too_many_phis in sink-common-code.ll. 2567return NumPHIInsts <= 1;
2570// We've determined that we are going to sink last ScanIdx instructions, 2571// and recorded them in InstructionsToSink. Now, some instructions may be 2572// unprofitable to sink. But that determination depends on the instructions 2573// that we are going to sink. 2575// First, forward scan: find the first instruction unprofitable to sink, 2576// recording all the ones that are profitable to sink. 2577// FIXME: would it be better, after we detect that not all are profitable. 2578// to either record the profitable ones, or erase the unprofitable ones? 2579// Maybe we need to choose (at runtime) the one that will touch least 2584while (
Idx < ScanIdx) {
2585if (!ProfitableToSinkInstruction(LRI)) {
2586// Too many PHIs would be created. 2588dbgs() <<
"SINK: stopping here, too many PHIs would be created!\n");
2591 InstructionsProfitableToSink.
insert((*LRI).begin(), (*LRI).end());
2596// If no instructions can be sunk, early-return. 2600// Did we determine that (only) some instructions are unprofitable to sink? 2602// Okay, some instructions are unprofitable. 2604 InstructionsToSink = InstructionsProfitableToSink;
2606// But, that may make other instructions unprofitable, too. 2607// So, do a backward scan, do any earlier instructions become 2610 !ProfitableToSinkInstruction(LRI) &&
2611"We already know that the last instruction is unprofitable to sink");
2615// If we detect that an instruction becomes unprofitable to sink, 2616// all earlier instructions won't be sunk either, 2617// so preemptively keep InstructionsProfitableToSink in sync. 2618// FIXME: is this the most performant approach? 2620 InstructionsProfitableToSink.
erase(
I);
2621if (!ProfitableToSinkInstruction(LRI)) {
2622// Everything starting with this instruction won't be sunk. 2624 InstructionsToSink = InstructionsProfitableToSink;
2631// If no instructions can be sunk, early-return. 2638if (HaveNonUnconditionalPredecessors) {
2639if (!followedByDeoptOrUnreachable) {
2640// It is always legal to sink common instructions from unconditional 2641// predecessors. However, if not all predecessors are unconditional, 2642// this transformation might be pessimizing. So as a rule of thumb, 2643// don't do it unless we'd sink at least one non-speculatable instruction. 2644// See https://bugs.llvm.org/show_bug.cgi?id=30244 2647bool Profitable =
false;
2648while (
Idx < ScanIdx) {
2661// We have a conditional edge and we're going to sink some instructions. 2662// Insert a new block postdominating all blocks we're going to sink from. 2664// Edges couldn't be split. 2669// Now that we've analyzed all potential sinking candidates, perform the 2670// actual sink. We iteratively sink the last non-terminator of the source 2671// blocks into their common successor unless doing so would require too 2672// many PHI instructions to be generated (currently only one PHI is allowed 2673// per sunk instruction). 2675// We can use InstructionsToSink to discount values needing PHI-merging that will 2676// actually be sunk in a later iteration. This allows us to be more 2677// aggressive in what we sink. This does allow a false positive where we 2678// sink presuming a later value will also be sunk, but stop half way through 2679// and never actually sink it which means we produce more PHIs than intended. 2680// This is unlikely in practice though. 2682for (; SinkIdx != ScanIdx; ++SinkIdx) {
2684 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2687// Because we've sunk every instruction in turn, the current instruction to 2688// sink is always at index 0. 2692 NumSinkCommonInstrs++;
2696 ++NumSinkCommonCode;
2702structCompatibleSets {
2715// Perform a linear scan over all the existing sets, see if the new `invoke` 2716// is compatible with any particular set. Since we know that all the `invokes` 2717// within a set are compatible, only check the first `invoke` in each set. 2718// WARNING: at worst, this has quadratic complexity. 2720if (CompatibleSets::shouldBelongToSameSet({
Set.front(),
II}))
2724// Otherwise, we either had no sets yet, or this invoke forms a new set. 2725return Sets.emplace_back();
2729 getCompatibleSet(
II).emplace_back(
II);
2733assert(Invokes.
size() == 2 &&
"Always called with exactly two candidates.");
2735// Can we theoretically merge these `invoke`s? 2737returnII->cannotMerge() ||
II->isInlineAsm();
2739if (
any_of(Invokes, IsIllegalToMerge))
2742// Either both `invoke`s must be direct, 2743// or both `invoke`s must be indirect. 2744auto IsIndirectCall = [](
InvokeInst *
II) {
returnII->isIndirectCall(); };
2745bool HaveIndirectCalls =
any_of(Invokes, IsIndirectCall);
2746bool AllCallsAreIndirect =
all_of(Invokes, IsIndirectCall);
2747if (HaveIndirectCalls) {
2748if (!AllCallsAreIndirect)
2751// All callees must be identical. 2754Value *CurrCallee =
II->getCalledOperand();
2755assert(CurrCallee &&
"There is always a called operand.");
2758elseif (Callee != CurrCallee)
2763// Either both `invoke`s must not have a normal destination, 2764// or both `invoke`s must have a normal destination, 2766return !isa<UnreachableInst>(
II->getNormalDest()->getFirstNonPHIOrDbg());
2768if (
any_of(Invokes, HasNormalDest)) {
2769// Do not merge `invoke` that does not have a normal destination with one 2770// that does have a normal destination, even though doing so would be legal. 2771if (!
all_of(Invokes, HasNormalDest))
2774// All normal destinations must be identical. 2778assert(CurrNormalBB &&
"There is always a 'continue to' basic block.");
2780 NormalBB = CurrNormalBB;
2781elseif (NormalBB != CurrNormalBB)
2785// In the normal destination, the incoming values for these two `invoke`s 2786// must be compatible. 2789 NormalBB, {Invokes[0]->getParent(), Invokes[1]->
getParent()},
2795// All unwind destinations must be identical. 2796// We know that because we have started from said unwind destination. 2800assert(CurrUnwindBB &&
"There is always an 'unwind to' basic block.");
2802 UnwindBB = CurrUnwindBB;
2804assert(UnwindBB == CurrUnwindBB &&
"Unexpected unwind destination.");
2808// In the unwind destination, the incoming values for these two `invoke`s 2809// must be compatible. 2811 Invokes.front()->getUnwindDest(),
2812 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2815// Ignoring arguments, these `invoke`s must be identical, 2816// including operand bundles. 2818for (
auto *
II : Invokes.drop_front())
2822// Can we theoretically form the data operands for the merged `invoke`? 2823auto IsIllegalToMergeArguments = [](
auto Ops) {
2824Use &U0 = std::get<0>(Ops);
2825Use &U1 = std::get<1>(Ops);
2832assert(Invokes.size() == 2 &&
"Always called with exactly two candidates.");
2833if (
any_of(
zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2834 IsIllegalToMergeArguments))
2842// Merge all invokes in the provided set, all of which are compatible 2843// as per the `CompatibleSets::shouldBelongToSameSet()`. 2846assert(Invokes.
size() >= 2 &&
"Must have at least two invokes to merge.");
2853 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2855// Clone one of the invokes into a new basic block. 2856// Since they are all compatible, it doesn't matter which invoke is cloned. 2857InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2861 II0->
getParent()->getIterator()->getNextNode();
2866 Ctx, II0BB->
getName() +
".invoke", Func, InsertBeforeBlock);
2868auto *MergedInvoke = cast<InvokeInst>(II0->
clone());
2869// NOTE: all invokes have the same attributes, so no handling needed. 2870 MergedInvoke->
insertInto(MergedInvokeBB, MergedInvokeBB->
end());
2872if (!HasNormalDest) {
2873// This set does not have a normal destination, 2874// so just form a new block with unreachable terminator. 2876 Ctx, II0BB->
getName() +
".cont", Func, InsertBeforeBlock);
2881// The unwind destination, however, remainds identical for all invokes here. 2887// Predecessor blocks that contained these invokes will now branch to 2888// the new block that contains the merged invoke, ... 2891 {DominatorTree::Insert,
II->getParent(), MergedInvoke->
getParent()});
2893// ... which has the new `unreachable` block as normal destination, 2894// or unwinds to the (same for all `invoke`s in this set) `landingpad`, 2897 SuccBBOfMergedInvoke});
2899// Since predecessor blocks now unconditionally branch to a new block, 2900// they no longer branch to their original successors. 2904 {DominatorTree::Delete,
II->getParent(), SuccOfPredBB});
2907bool IsIndirectCall = Invokes[0]->isIndirectCall();
2909// Form the merged operands for the merged invoke. 2910for (
Use &U : MergedInvoke->operands()) {
2911// Only PHI together the indirect callees and data operands. 2912if (MergedInvoke->isCallee(&U)) {
2915 }
elseif (!MergedInvoke->isDataOperand(&U))
2918// Don't create trivial PHI's with all-identical incoming values. 2920returnII->getOperand(
U.getOperandNo()) !=
U.get();
2925// Form a PHI out of all the data ops under this index. 2927U->getType(),
/*NumReservedValues=*/Invokes.size(),
"", MergedInvoke->getIterator());
2934// We've ensured that each PHI node has compatible (identical) incoming values 2935// when coming from each of the `invoke`s in the current merge set, 2936// so update the PHI nodes accordingly. 2939/*ExistPred=*/Invokes.front()->getParent());
2941// And finally, replace the original `invoke`s with an unconditional branch 2942// to the block with the merged `invoke`. Also, give that merged `invoke` 2943// the merged debugloc of all the original `invoke`s. 2946// Compute the debug location common to all the original `invoke`s. 2948 MergedDebugLoc =
II->getDebugLoc();
2953// And replace the old `invoke` with an unconditionally branch 2954// to the block with the merged `invoke`. 2956 OrigSuccBB->removePredecessor(
II->getParent());
2958// The unconditional branch is part of the replacement for the original 2959// invoke, so should use its DebugLoc. 2961boolSuccess = MergedInvoke->tryIntersectAttributes(
II);
2962assert(
Success &&
"Merged invokes with incompatible attributes");
2963// For NDEBUG Compile 2965II->replaceAllUsesWith(MergedInvoke);
2966II->eraseFromParent();
2969 MergedInvoke->setDebugLoc(MergedDebugLoc);
2970 ++NumInvokeSetsFormed;
2973 DTU->applyUpdates(Updates);
2976/// If this block is a `landingpad` exception handling block, categorize all 2977/// the predecessor `invoke`s into sets, with all `invoke`s in each set 2978/// being "mergeable" together, and then merge invokes in each set together. 2980/// This is a weird mix of hoisting and sinking. Visually, it goes from: 2983/// [invoke0] [invoke1] 2985/// [cont0] [landingpad] [cont1] 2991/// [cont] [landingpad] 2993/// But of course we can only do that if the invokes share the `landingpad`, 2994/// edges invoke0->cont0 and invoke1->cont1 are "compatible", 2995/// and the invoked functions are "compatible". 3002// FIXME: generalize to all exception handling blocks? 3006 CompatibleSets Grouper;
3008// Record all the predecessors of this `landingpad`. As per verifier, 3009// the only allowed predecessor is the unwind edge of an `invoke`. 3010// We want to group "compatible" `invokes` into the same set to be merged. 3012 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
3014// And now, merge `invoke`s that were grouped togeter. 3016if (Invokes.
size() < 2)
3026/// Track ephemeral values, which should be ignored for cost-modelling 3027/// purposes. Requires walking instructions in reverse order. 3028classEphemeralValueTracker {
3032if (isa<AssumeInst>(
I))
3034return !
I->mayHaveSideEffects() && !
I->isTerminator() &&
3036 return EphValues.count(cast<Instruction>(U));
3042if (isEphemeral(
I)) {
3053/// Determine if we can hoist sink a sole store instruction out of a 3054/// conditional block. 3056/// We are looking for code like the following: 3058/// store i32 %add, i32* %arrayidx2 3059/// ... // No other stores or function calls (we could be calling a memory 3060/// ... // function). 3061/// %cmp = icmp ult %x, %y 3062/// br i1 %cmp, label %EndBB, label %ThenBB 3064/// store i32 %add5, i32* %arrayidx2 3068/// We are going to transform this into: 3070/// store i32 %add, i32* %arrayidx2 3072/// %cmp = icmp ult %x, %y 3073/// %add.add5 = select i1 %cmp, i32 %add, %add5 3074/// store i32 %add.add5, i32* %arrayidx2 3077/// \return The pointer to the value of the previous store if the store can be 3078/// hoisted into the predecessor block. 0 otherwise. 3081StoreInst *StoreToHoist = dyn_cast<StoreInst>(
I);
3085// Volatile or atomic. 3092// Look for a store to the same pointer in BrBB. 3093unsigned MaxNumInstToLookAt = 9;
3094// Skip pseudo probe intrinsic calls which are not really killing any memory 3097if (!MaxNumInstToLookAt)
3099 --MaxNumInstToLookAt;
3101// Could be calling an instruction that affects memory like free(). 3102if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3105if (
auto *SI = dyn_cast<StoreInst>(&CurI)) {
3106// Found the previous store to same location and type. Make sure it is 3107// simple, to avoid introducing a spurious non-atomic write after an 3109if (SI->getPointerOperand() == StorePtr &&
3110 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3111 SI->getAlign() >= StoreToHoist->
getAlign())
3112// Found the previous store, return its value operand. 3113return SI->getValueOperand();
3114returnnullptr;
// Unknown store. 3117if (
auto *LI = dyn_cast<LoadInst>(&CurI)) {
3118if (LI->getPointerOperand() == StorePtr && LI->
getType() == StoreTy &&
3119 LI->isSimple() && LI->getAlign() >= StoreToHoist->
getAlign()) {
3121bool ExplicitlyDereferenceableOnly;
3124/*StoreCaptures=*/true) &&
3125 (!ExplicitlyDereferenceableOnly ||
3127 LI->getDataLayout()))) {
3128// Found a previous load, return it. 3132// The load didn't work out, but we may still find a store. 3139/// Estimate the cost of the insertion(s) and check that the PHI nodes can be 3140/// converted to selects. 3143unsigned &SpeculatedInstructions,
3151bool HaveRewritablePHIs =
false;
3156// FIXME: Try to remove some of the duplication with 3157// hoistCommonCodeFromSuccessors. Skip PHIs which are trivial. 3164// Don't convert to selects if we could remove undefined behavior instead. 3169 HaveRewritablePHIs =
true;
3172if (!OrigCE && !ThenCE)
3173continue;
// Known cheap (FIXME: Maybe not true for aggregates). 3179if (OrigCost + ThenCost > MaxCost)
3182// Account for the cost of an unfolded ConstantExpr which could end up 3183// getting expanded into Instructions. 3184// FIXME: This doesn't account for how many operations are combined in the 3185// constant expression. 3186 ++SpeculatedInstructions;
3187if (SpeculatedInstructions > 1)
3191return HaveRewritablePHIs;
3195 std::optional<bool> Invert,
3197// If the branch is non-unpredictable, and is predicted to *not* branch to 3198// the `then` block, then avoid speculating it. 3199if (BI->
getMetadata(LLVMContext::MD_unpredictable))
3206if (!Invert.has_value())
3209uint64_t EndWeight = *Invert ? TWeight : FWeight;
3213return BIEndProb < Likely;
3216/// Speculate a conditional basic block flattening the CFG. 3218/// Note that this is a very risky transform currently. Speculating 3219/// instructions like this is most often not desirable. Instead, there is an MI 3220/// pass which can do it with full awareness of the resource constraints. 3221/// However, some cases are "obvious" and we should do directly. An example of 3222/// this is speculating a single, reasonably cheap instruction. 3224/// There is only one distinct advantage to flattening the CFG at the IR level: 3225/// it makes very common but simplistic optimizations such as are common in 3226/// instcombine and the DAG combiner more powerful by removing CFG edges and 3227/// modeling their effects with easier to reason about SSA value graphs. 3230/// An illustration of this transform is turning this IR: 3233/// %cmp = icmp ult %x, %y 3234/// br i1 %cmp, label %EndBB, label %ThenBB 3236/// %sub = sub %x, %y 3239/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ] 3246/// %cmp = icmp ult %x, %y 3247/// %sub = sub %x, %y 3248/// %cond = select i1 %cmp, 0, %sub 3252/// \returns true if the conditional block is removed. 3253bool SimplifyCFGOpt::speculativelyExecuteBB(
BranchInst *BI,
3258// Be conservative for now. FP select instruction can often be expensive. 3260if (isa<FCmpInst>(BrCond))
3268// If ThenBB is actually on the false edge of the conditional branch, remember 3269// to swap the select operands later. 3280// Keep a count of how many times instructions are used within ThenBB when 3281// they are candidates for sinking into ThenBB. Specifically: 3282// - They are defined in BB, and 3283// - They have no side effects, and 3284// - All of their uses are in ThenBB. 3289unsigned SpeculatedInstructions = 0;
3291Options.HoistLoadsStoresWithCondFaulting;
3293Value *SpeculatedStoreValue =
nullptr;
3295 EphemeralValueTracker EphTracker;
3298if (isa<DbgInfoIntrinsic>(
I)) {
3303// Skip pseudo probes. The consequence is we lose track of the branch 3304// probability for ThenBB, which is fine since the optimization here takes 3305// place regardless of the branch probability. 3306if (isa<PseudoProbeInst>(
I)) {
3307// The probe should be deleted so that it will not be over-counted when 3308// the samples collected on the non-conditional path are counted towards 3309// the conditional path. We leave it for the counts inference algorithm to 3310// figure out a proper count for an unknown probe. 3315// Ignore ephemeral values, they will be dropped by the transform. 3316if (EphTracker.track(&
I))
3319// Only speculatively execute a single instruction (not counting the 3320// terminator) for now. 3321bool IsSafeCheapLoadStore = HoistLoadsStores &&
3323 SpeculatedConditionalLoadsStores.
size() <
3325// Not count load/store into cost if target supports conditional faulting 3326// b/c it's cheap to speculate it. 3327if (IsSafeCheapLoadStore)
3328 SpeculatedConditionalLoadsStores.
push_back(&
I);
3330 ++SpeculatedInstructions;
3332if (SpeculatedInstructions > 1)
3335// Don't hoist the instruction if it's unsafe or expensive. 3336if (!IsSafeCheapLoadStore &&
3339 (SpeculatedStoreValue =
3342if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3347// Store the store speculation candidate. 3348if (!SpeculatedStore && SpeculatedStoreValue)
3349 SpeculatedStore = cast<StoreInst>(&
I);
3351// Do not hoist the instruction if any of its operands are defined but not 3352// used in BB. The transformation will prevent the operand from 3353// being sunk into the use block. 3354for (
Use &
Op :
I.operands()) {
3357continue;
// Not a candidate for sinking. 3359 ++SinkCandidateUseCounts[OpI];
3363// Consider any sink candidates which are only used in ThenBB as costs for 3364// speculation. Note, while we iterate over a DenseMap here, we are summing 3365// and so iteration order isn't significant. 3366for (
constauto &[Inst, Count] : SinkCandidateUseCounts)
3368 ++SpeculatedInstructions;
3369if (SpeculatedInstructions > 1)
3373// Check that we can insert the selects and that it's not too expensive to do 3376 SpeculatedStore !=
nullptr || !SpeculatedConditionalLoadsStores.
empty();
3379 SpeculatedInstructions,
Cost,
TTI);
3380if (!Convert ||
Cost > Budget)
3383// If we get here, we can hoist the instruction and if-convert. 3384LLVM_DEBUG(
dbgs() <<
"SPECULATIVELY EXECUTING BB" << *ThenBB <<
"\n";);
3386// Insert a select of the value of the speculated store. 3387if (SpeculatedStoreValue) {
3391Value *FalseV = SpeculatedStoreValue;
3395 BrCond, TrueV, FalseV,
"spec.store.select", BI);
3399// The value stored is still conditional, but the store itself is now 3400// unconditonally executed, so we must be sure that any linked dbg.assign 3401// intrinsics are tracking the new stored value (the result of the 3402// select). If we don't, and the store were to be removed by another pass 3403// (e.g. DSE), then we'd eventually end up emitting a location describing 3404// the conditional value, unconditionally. 3406// === Before this transformation === 3408// store %one, %x.dest, !DIAssignID !1 3409// dbg.assign %one, "x", ..., !1, ... 3413// store %two, %x.dest, !DIAssignID !2 3414// dbg.assign %two, "x", ..., !2, ... 3416// === After this transformation === 3418// store %one, %x.dest, !DIAssignID !1 3419// dbg.assign %one, "x", ..., !1 3421// %merge = select %cond, %two, %one 3422// store %merge, %x.dest, !DIAssignID !2 3423// dbg.assign %merge, "x", ..., !2 3424auto replaceVariable = [OrigV, S](
auto *DbgAssign) {
3426 DbgAssign->replaceVariableLocationOp(OrigV, S);
3432// Metadata can be dependent on the condition we are hoisting above. 3433// Strip all UB-implying metadata on the instruction. Drop the debug loc 3434// to avoid making it appear as if the condition is a constant, which would 3435// be misleading while debugging. 3436// Similarly strip attributes that maybe dependent on condition we are 3439if (!SpeculatedStoreValue || &
I != SpeculatedStore) {
3440// Don't update the DILocation of dbg.assign intrinsics. 3441if (!isa<DbgAssignIntrinsic>(&
I))
3444I.dropUBImplyingAttrsAndMetadata();
3446// Drop ephemeral values. 3447if (EphTracker.contains(&
I)) {
3453// Hoist the instructions. 3454// In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached 3455// to these instructions, in the same way that dbg.value intrinsics are 3456// dropped at the end of this block. 3459// Drop all records except assign-kind DbgVariableRecords (dbg.assign 3463 It.dropOneDbgRecord(&DR);
3465 std::prev(ThenBB->
end()));
3467if (!SpeculatedConditionalLoadsStores.
empty())
3470// Insert selects and rewrite the PHI operands. 3478// Skip PHIs which are trivial. 3482// Create a select whose true value is the speculatively executed value and 3483// false value is the pre-existing value. Swap them if the branch 3484// destinations were inverted. 3485Value *TrueV = ThenV, *FalseV = OrigV;
3493// Remove speculated dbg intrinsics. 3494// FIXME: Is it possible to do this in a more elegant way? Moving/merging the 3495// dbg value for the different flows and inserting it after the select. 3497// We still want to know that an assignment took place so don't remove 3498// dbg.assign intrinsics. 3499if (!isa<DbgAssignIntrinsic>(
I))
3500I->eraseFromParent();
3507/// Return true if we can thread a branch across this block. 3510 EphemeralValueTracker EphTracker;
3512// Walk the loop in reverse so that we can identify ephemeral values properly 3513// (values only feeding assumes). 3515// Can't fold blocks that contain noduplicate or convergent calls. 3516if (
CallInst *CI = dyn_cast<CallInst>(&
I))
3517if (CI->cannotDuplicate() || CI->isConvergent())
3520// Ignore ephemeral values which are deleted during codegen. 3521// We will delete Phis while threading, so Phis should not be accounted in 3523if (!EphTracker.track(&
I) && !isa<PHINode>(
I)) {
3525returnfalse;
// Don't clone large BB's. 3528// We can only support instructions that do not define values that are 3529// live outside of the current basic block. 3530for (
User *U :
I.users()) {
3532if (UI->
getParent() != BB || isa<PHINode>(UI))
3536// Looks ok, continue checking. 3544// Don't look past the block defining the value, we might get the value from 3545// a previous loop iteration. 3546auto *
I = dyn_cast<Instruction>(V);
3547if (
I &&
I->getParent() == To)
3550// We know the value if the From block branches on it. 3551auto *BI = dyn_cast<BranchInst>(
From->getTerminator());
3560/// If we have a conditional branch on something for which we know the constant 3561/// value in predecessors (e.g. a phi node in the current block), thread edges 3562/// from the predecessor to their ultimate destination. 3563static std::optional<bool>
3572// Degenerate case of a single entry PHI. 3579if (
auto *CB = dyn_cast<ConstantInt>(U))
3584 KnownValues[CB].
insert(Pred);
3588if (KnownValues.
empty())
3591// Now we know that this block has multiple preds and two succs. 3592// Check that the block is small enough and values defined in the block are 3593// not used outside of it. 3597for (
constauto &Pair : KnownValues) {
3598// Okay, we now know that all edges from PredBB should be revectored to 3599// branch to RealDest. 3605continue;
// Skip self loops. 3607// Skip if the predecessor's terminator is an indirect branch. 3615 <<
" has value " << *Pair.first <<
" in predecessors:\n";
3618dbgs() <<
"Threading to destination " << RealDest->
getName() <<
".\n";
3621// Split the predecessors we are threading into a new edge block. We'll 3622// clone the instructions into this block, and then redirect it to RealDest. 3625// TODO: These just exist to reduce test diff, we can drop them if we like. 3626 EdgeBB->setName(RealDest->
getName() +
".critedge");
3627 EdgeBB->moveBefore(RealDest);
3632// BB may have instructions that are being threaded over. Clone these 3633// instructions into EdgeBB. We know that there will be no uses of the 3634// cloned instructions outside of EdgeBB. 3637 TranslateMap[
Cond] = CB;
3639// RemoveDIs: track instructions that we optimise away while folding, so 3640// that we can copy DbgVariableRecords from them later. 3643if (
PHINode *PN = dyn_cast<PHINode>(BBI)) {
3647// Clone the instruction. 3649// Insert the new instruction into its new home. 3650N->insertInto(EdgeBB, InsertPt);
3653N->setName(BBI->getName() +
".c");
3655// Update operands due to translation. 3656for (
Use &
Op :
N->operands()) {
3658if (PI != TranslateMap.
end())
3662// Check for trivial simplification. 3664if (!BBI->use_empty())
3665 TranslateMap[&*BBI] = V;
3666if (!
N->mayHaveSideEffects()) {
3667N->eraseFromParent();
// Instruction folded away, don't need actual 3672if (!BBI->use_empty())
3673 TranslateMap[&*BBI] =
N;
3676// Copy all debug-info attached to instructions from the last we 3677// successfully clone, up to this instruction (they might have been 3679for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3680N->cloneDebugInfoFrom(&*SrcDbgCursor);
3681 SrcDbgCursor = std::next(BBI);
3682// Clone debug-info on this instruction too. 3683N->cloneDebugInfoFrom(&*BBI);
3685// Register the new instruction with the assumption cache if necessary. 3686if (
auto *Assume = dyn_cast<AssumeInst>(
N))
3692for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3693 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3694 InsertPt->cloneDebugInfoFrom(BI);
3697BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3703 Updates.
push_back({DominatorTree::Delete, EdgeBB, BB});
3704 Updates.
push_back({DominatorTree::Insert, EdgeBB, RealDest});
3708// For simplicity, we created a separate basic block for the edge. Merge 3709// it back into the predecessor if possible. This not only avoids 3710// unnecessary SimplifyCFG iterations, but also makes sure that we don't 3711// bypass the check for trivial cycles above. 3714// Signal repeat, simplifying any other constants. 3725 std::optional<bool> Result;
3726bool EverChanged =
false;
3728// Note that None means "we changed things, but recurse further." 3730 EverChanged |= Result == std::nullopt || *Result;
3731 }
while (Result == std::nullopt);
3735/// Given a BB that starts with the specified two-entry PHI node, 3736/// see if we can eliminate it. 3740bool SpeculateUnpredictables) {
3741// Ok, this is a two entry PHI node. Check to see if this is a simple "if 3742// statement", which has a very simple dominance structure. Basically, we 3743// are trying to find the condition that is being branched on, which 3744// subsequently causes this merge to happen. We really want control 3745// dependence information for this check, but simplifycfg can't keep it up 3746// to date, and this catches most of the cases we care about anyway. 3754// Don't bother if the branch will be constant folded trivially. 3755if (isa<ConstantInt>(IfCond))
3762 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3765"Will have either one or two blocks to speculate.");
3767// If the branch is non-unpredictable, see if we either predictably jump to 3768// the merge bb (if we have only a single 'then' block), or if we predictably 3769// jump to one specific 'then' block (if we have two of them). 3770// It isn't beneficial to speculatively execute the code 3771// from the block that we know is predictably not entered. 3772bool IsUnpredictable = DomBI->
getMetadata(LLVMContext::MD_unpredictable);
3773if (!IsUnpredictable) {
3776 (TWeight + FWeight) != 0) {
3781if (IfBlocks.
size() == 1) {
3783 DomBI->
getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3784if (BIBBProb >= Likely)
3787if (BITrueProb >= Likely || BIFalseProb >= Likely)
3793// Don't try to fold an unreachable block. For example, the phi node itself 3794// can't be the candidate if-condition for a select that we want to form. 3795if (
auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3796if (IfCondPhiInst->getParent() == BB)
3799// Okay, we found that we can merge this two-entry phi node into a select. 3800// Doing so would require us to fold *all* two entry phi nodes in this block. 3801// At some point this becomes non-profitable (particularly if the target 3802// doesn't support cmov's). Only do this transformation if there are two or 3803// fewer PHI nodes in this block. 3804unsigned NumPhis = 0;
3809// Loop over the PHI's seeing if we can promote them all to select 3810// instructions. While we are at it, keep track of the instructions 3811// that need to be moved to the dominating block. 3816if (SpeculateUnpredictables && IsUnpredictable)
3830 AggressiveInsts,
Cost, Budget,
TTI, AC) ||
3832 AggressiveInsts,
Cost, Budget,
TTI, AC))
3836// If we folded the first phi, PN dangles at this point. Refresh it. If 3837// we ran out of PHIs then we simplified them all. 3838 PN = dyn_cast<PHINode>(BB->
begin());
3842// Return true if at least one of these is a 'not', and another is either 3843// a 'not' too, or a constant. 3844auto CanHoistNotFromBothValues = [](
Value *V0,
Value *V1) {
3851// Don't fold i1 branches on PHIs which contain binary operators or 3852// (possibly inverted) select form of or/ands, unless one of 3853// the incoming values is an 'not' and another one is freely invertible. 3854// These can often be turned into switches and other things. 3855auto IsBinOpOrAnd = [](
Value *V) {
3866// If all PHI nodes are promotable, check to make sure that all instructions 3867// in the predecessor blocks can be promoted as well. If not, we won't be able 3868// to get rid of the control flow, so it's not worth promoting to select 3872if (!AggressiveInsts.
count(&*
I) && !
I->isDebugOrPseudoInst()) {
3873// This is not an aggressive instruction that we can promote. 3874// Because of this, we won't be able to get rid of the control flow, so 3875// the xform is not worth it. 3879// If either of the blocks has it's address taken, we can't do this fold. 3885if (IsUnpredictable)
dbgs() <<
" (unpredictable)";
3887 <<
" F: " << IfFalse->
getName() <<
"\n");
3889// If we can still promote the PHI nodes after this gauntlet of tests, 3890// do all of the PHI's now. 3892// Move all 'aggressive' instructions, which are defined in the 3893// conditional parts of the if's up to the dominating block. 3898// Propagate fast-math-flags from phi nodes to replacement selects. 3900// Change the PHI node into a select instruction. 3905 isa<FPMathOperator>(PN) ? PN :
nullptr,
3909 PN->eraseFromParent();
3912// At this point, all IfBlocks are empty, so our if statement 3913// has been flattened. Change DomBlock to jump directly to our new block to 3914// avoid other simplifycfg's kicking in on the diamond. 3919 Updates.
push_back({DominatorTree::Insert, DomBlock, BB});
3934// Try to relax logical op to binary op. 3937if (Opc == Instruction::And)
3939if (Opc == Instruction::Or)
3944/// Return true if either PBI or BI has branch weight available, and store 3945/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does 3946/// not have branch weight, use 1:1 as its weight. 3952bool PredHasWeights =
3954bool SuccHasWeights =
3956if (PredHasWeights || SuccHasWeights) {
3958 PredTrueWeight = PredFalseWeight = 1;
3960 SuccTrueWeight = SuccFalseWeight = 1;
3967/// Determine if the two branches share a common destination and deduce a glue 3968/// that joins the branches' conditions to arrive at the common destination if 3969/// that would be profitable. 3970static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3974"Both blocks must end with a conditional branches.");
3976"PredBB must be a predecessor of BB.");
3978// We have the potential to fold the conditions together, but if the 3979// predecessor branch is predictable, we may not want to merge them. 3984 (PTWeight + PFWeight) != 0) {
3991// Speculate the 2nd condition unless the 1st is probably true. 3992if (PBITrueProb.
isUnknown() || PBITrueProb < Likely)
3995// Speculate the 2nd condition unless the 1st is probably false. 3999// Speculate the 2nd condition unless the 1st is probably true. 4000if (PBITrueProb.
isUnknown() || PBITrueProb < Likely)
4003// Speculate the 2nd condition unless the 1st is probably false. 4017// Determine if the two branches share a common destination. 4021 std::tie(CommonSucc, Opc, InvertPredCond) =
4024LLVM_DEBUG(
dbgs() <<
"FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4027// The builder is used to create instructions to eliminate the branch in BB. 4028// If BB's terminator has !annotation metadata, add it to the new 4031 {LLVMContext::MD_annotation});
4033// If we need to invert the condition in the pred block to match, do so now. 4034if (InvertPredCond) {
4041// Before cloning instructions, notify the successor basic block that it 4042// is about to have a new predecessor. This will update PHI nodes, 4043// which will allow us to update live-out uses of bonus instructions. 4046// Try to update branch weights. 4047uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4049 SuccTrueWeight, SuccFalseWeight)) {
4053// PBI: br i1 %x, BB, FalseDest 4054// BI: br i1 %y, UniqueSucc, FalseDest 4055// TrueWeight is TrueWeight for PBI * TrueWeight for BI. 4056 NewWeights.
push_back(PredTrueWeight * SuccTrueWeight);
4057// FalseWeight is FalseWeight for PBI * TotalWeight for BI + 4058// TrueWeight for PBI * FalseWeight for BI. 4059// We assume that total weights of a BranchInst can fit into 32 bits. 4060// Therefore, we will not have overflow using 64-bit arithmetic. 4062 (SuccFalseWeight + SuccTrueWeight) +
4063 PredTrueWeight * SuccFalseWeight);
4065// PBI: br i1 %x, TrueDest, BB 4066// BI: br i1 %y, TrueDest, UniqueSucc 4067// TrueWeight is TrueWeight for PBI * TotalWeight for BI + 4068// FalseWeight for PBI * TrueWeight for BI. 4069 NewWeights.
push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4070 PredFalseWeight * SuccTrueWeight);
4071// FalseWeight is FalseWeight for PBI * FalseWeight for BI. 4072 NewWeights.
push_back(PredFalseWeight * SuccFalseWeight);
4075// Halve the weights if any of them cannot fit in an uint32_t 4081// TODO: If BB is reachable from all paths through PredBlock, then we 4082// could replace PBI's branch probabilities with BI's. 4086// Now, update the CFG. 4090 DTU->
applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4091 {DominatorTree::Delete, PredBlock, BB}});
4093// If BI was a loop latch, it may have had associated loop metadata. 4094// We need to copy it to the new latch, that is, PBI. 4112// Now that the Cond was cloned into the predecessor basic block, 4113// or/and the two conditions together. 4118 ++NumFoldBranchToCommonDest;
4122/// Return if an instruction's type or any of its operands' types are a vector 4125returnI.getType()->isVectorTy() ||
any_of(
I.operands(), [](
Use &U) {
4126 return U->getType()->isVectorTy();
4130/// If this basic block is simple enough, and if a predecessor branches to us 4131/// and one of our successors, fold the block into the predecessor and use 4132/// logical operations to pick the right destination. 4136unsigned BonusInstThreshold) {
4137// If this block ends with an unconditional branch, 4138// let speculativelyExecuteBB() deal with it. 4150 (!isa<CmpInst>(
Cond) && !isa<BinaryOperator>(
Cond) &&
4151 !isa<SelectInst>(
Cond)) ||
4152Cond->getParent() != BB || !
Cond->hasOneUse())
4155// Finally, don't infinitely unroll conditional loops. 4159// With which predecessors will we want to deal with? 4162BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4164// Check that we have two conditional branches. If there is a PHI node in 4165// the common successor, verify that the same value flows in from both 4170// Determine if the two branches share a common destination. 4175 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4179// Check the cost of inserting the necessary logic before performing the 4192// Ok, we do want to deal with this predecessor. Record it. 4196// If there aren't any predecessors into which we can fold, 4197// don't bother checking the cost. 4201// Only allow this transformation if computing the condition doesn't involve 4202// too many instructions and these involved instructions can be executed 4203// unconditionally. We denote all involved instructions except the condition 4204// as "bonus instructions", and only allow this transformation when the 4205// number of the bonus instructions we'll need to create when cloning into 4206// each predecessor does not exceed a certain threshold. 4207unsigned NumBonusInsts = 0;
4208bool SawVectorOp =
false;
4209constunsigned PredCount = Preds.
size();
4211// Don't check the branch condition comparison itself. 4214// Ignore dbg intrinsics, and the terminator. 4215if (isa<DbgInfoIntrinsic>(
I) || isa<BranchInst>(
I))
4217// I must be safe to execute unconditionally. 4222// Account for the cost of duplicating this instruction into each 4223// predecessor. Ignore free instructions. 4226 NumBonusInsts += PredCount;
4228// Early exits once we reach the limit. 4234auto IsBCSSAUse = [BB, &
I](
Use &U) {
4235auto *UI = cast<Instruction>(U.getUser());
4236if (
auto *PN = dyn_cast<PHINode>(UI))
4238return UI->
getParent() == BB &&
I.comesBefore(UI);
4241// Does this instruction require rewriting of uses? 4242if (!
all_of(
I.uses(), IsBCSSAUse))
4246 BonusInstThreshold *
4250// Ok, we have the budget. Perform the transformation. 4252auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4258// If there is only one store in BB1 and BB2, return it, otherwise return 4262for (
auto *BB : {BB1, BB2}) {
4266if (
auto *SI = dyn_cast<StoreInst>(&
I)) {
4268// Multiple stores seen. 4278Value *AlternativeV =
nullptr) {
4279// PHI is going to be a PHI node that allows the value V that is defined in 4280// BB to be referenced in BB's only successor. 4282// If AlternativeV is nullptr, the only value we care about in PHI is V. It 4283// doesn't matter to us what the other operand is (it'll never get used). We 4284// could just create a new PHI with an undef incoming value, but that could 4285// increase register pressure if EarlyCSE/InstCombine can't fold it with some 4286// other PHI. So here we directly look for some PHI in BB's successor with V 4287// as an incoming operand. If we find one, we use it, else we create a new 4290// If AlternativeV is not nullptr, we care about both incoming values in PHI. 4291// PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV] 4292// where OtherBB is the single other predecessor of BB's only successor. 4296for (
autoI = Succ->
begin(); isa<PHINode>(
I); ++
I)
4297if (cast<PHINode>(
I)->getIncomingValueForBlock(BB) == V) {
4298PHI = cast<PHINode>(
I);
4304BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4305if (
PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4312// If V is not an instruction defined in BB, just return it. 4314 (!isa<Instruction>(V) || cast<Instruction>(V)->
getParent() != BB))
4319PHI->addIncoming(V, BB);
4331// For every pointer, there must be exactly two stores, one coming from 4332// PTB or PFB, and the other from QTB or QFB. We don't support more than one 4333// store (to any address) in PTB,PFB or QTB,QFB. 4334// FIXME: We could relax this restriction with a bit more work and performance 4338if (!PStore || !QStore)
4341// Now check the stores are compatible. 4347// Check that sinking the store won't cause program behavior changes. Sinking 4348// the store out of the Q blocks won't change any behavior as we're sinking 4349// from a block to its unconditional successor. But we're moving a store from 4350// the P blocks down through the middle block (QBI) and past both QFB and QTB. 4351// So we need to check that there are no aliasing loads or stores in 4352// QBI, QTB and QFB. We also need to check there are no conflicting memory 4353// operations between PStore and the end of its parent block. 4355// The ideal way to do this is to query AliasAnalysis, but we don't 4356// preserve AA currently so that is dangerous. Be super safe and just 4357// check there are no other memory operations at all. 4359if (
I.mayReadOrWriteMemory())
4362if (&
I != QStore &&
I.mayReadOrWriteMemory())
4366if (&
I != QStore &&
I.mayReadOrWriteMemory())
4370if (&*
I != PStore &&
I->mayReadOrWriteMemory())
4373// If we're not in aggressive mode, we only optimize if we have some 4374// confidence that by optimizing we'll allow P and/or Q to be if-converted. 4378// Heuristic: if the block can be if-converted/phi-folded and the 4379// instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to 4380// thread this store. 4385// Consider terminator instruction to be free. 4386if (
I.isTerminator())
4388// If this is one the stores that we want to speculate out of this BB, 4389// then don't count it's cost, consider it to be free. 4390if (
auto *S = dyn_cast<StoreInst>(&
I))
4393// Else, we have a white-list of instructions that we are ak speculating. 4394if (!isa<BinaryOperator>(
I) && !isa<GetElementPtrInst>(
I))
4395returnfalse;
// Not in white-list - not worthwhile folding. 4396// And finally, if this is a non-free instruction that we are okay 4397// speculating, ensure that we consider the speculation budget. 4401returnfalse;
// Eagerly refuse to fold as soon as we're out of budget. 4404"When we run out of budget we will eagerly return from within the " 4405"per-instruction loop.");
4409const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4411 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4412 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4415// If PostBB has more than two predecessors, we need to split it so we can 4418// We know that QFB's only successor is PostBB. And QFB has a single 4419// predecessor. If QTB exists, then its only successor is also PostBB. 4420// If QTB does not exist, then QFB's only predecessor has a conditional 4421// branch to QFB and PostBB. 4430// OK, we're going to sink the stores to PostBB. The store has to be 4431// conditional though, so first create the predicate. 4457/*Unreachable=*/false,
4458/*BranchWeights=*/nullptr, DTU);
4463// Choose the minimum alignment. If we could prove both stores execute, we 4464// could use biggest one. In this case, though, we only know that one of the 4465// stores executes. And we don't know it's safe to take the alignment from a 4466// store that doesn't execute. 4478// The intention here is to find diamonds or triangles (see below) where each 4479// conditional block contains a store to the same address. Both of these 4480// stores are conditional, so they can't be unconditionally sunk. But it may 4481// be profitable to speculatively sink the stores into one merged store at the 4482// end, and predicate the merged store on the union of the two conditions of 4485// This can reduce the number of stores executed if both of the conditions are 4486// true, and can allow the blocks to become small enough to be if-converted. 4487// This optimization will also chain, so that ladders of test-and-set 4488// sequences can be if-converted away. 4490// We only deal with simple diamonds or triangles: 4492// PBI or PBI or a combination of the two 4502// We model triangles as a type of diamond with a nullptr "true" block. 4503// Triangles are canonicalized so that the fallthrough edge is represented by 4504// a true condition, as in the diagram above. 4511// Make sure we have a good guess for PostBB. If QTB's only successor is 4512// QFB, then QFB is a better PostBB. 4516// If we couldn't find a good PostBB, stop. 4520bool InvertPCond =
false, InvertQCond =
false;
4521// Canonicalize fallthroughs to the true branches. 4531// From this point on we can assume PTB or QTB may be fallthroughs but PFB 4532// and QFB may not. Model fallthroughs as a nullptr block. 4538// Legality bailouts. We must have at least the non-fallthrough blocks and 4539// the post-dominating block, and the non-fallthroughs must only have one 4545 !HasOnePredAndOneSucc(QFB, QBI->
getParent(), PostBB))
4548 (QTB && !HasOnePredAndOneSucc(QTB, QBI->
getParent(), PostBB)))
4553// OK, this is a sequence of two diamonds or triangles. 4554// Check if there are stores in PTB or PFB that are repeated in QTB or QFB. 4556for (
auto *BB : {PTB, PFB}) {
4561 PStoreAddresses.
insert(SI->getPointerOperand());
4563for (
auto *BB : {QTB, QFB}) {
4568 QStoreAddresses.
insert(SI->getPointerOperand());
4572// set_intersect mutates PStoreAddresses in place. Rename it here to make it 4573// clear what it contains. 4574auto &CommonAddresses = PStoreAddresses;
4577for (
auto *
Address : CommonAddresses)
4580 InvertPCond, InvertQCond, DTU,
DL,
TTI);
4584/// If the previous block ended with a widenable branch, determine if reusing 4585/// the target block is profitable and legal. This will have the effect of 4586/// "widening" PBI, but doesn't require us to reason about hosting safety. 4589// TODO: This can be generalized in two important ways: 4590// 1) We can allow phi nodes in IfFalseBB and simply reuse all the input 4591// values from the PBI edge. 4592// 2) We can sink side effecting instructions into BI's fallthrough 4593// successor provided they doesn't contribute to computation of 4598 !BI->
getParent()->getSinglePredecessor())
4600if (!IfFalseBB->
phis().empty())
4602// This helps avoid infinite loop with SimplifyCondBranchToCondBranch which 4603// may undo the transform done here. 4604// TODO: There might be a more fine-grained solution to this. 4607// Use lambda to lazily compute expensive condition after cheap ones. 4610returnI.mayWriteToMemory() ||
I.mayHaveSideEffects();
4613if (BI->
getSuccessor(1) != IfFalseBB &&
// no inf looping 4621 {{DominatorTree::Insert, BI->
getParent(), IfFalseBB},
4622 {DominatorTree::Delete, BI->
getParent(), OldSuccessor}});
4625if (BI->
getSuccessor(0) != IfFalseBB &&
// no inf looping 4633 {{DominatorTree::Insert, BI->
getParent(), IfFalseBB},
4634 {DominatorTree::Delete, BI->
getParent(), OldSuccessor}});
4640/// If we have a conditional branch as a predecessor of another block, 4641/// this function tries to simplify it. We know 4642/// that PBI and BI are both conditional branches, and BI is in one of the 4643/// successor blocks of PBI - PBI branches to BI. 4651// If this block ends with a branch instruction, and if there is a 4652// predecessor that ends on a branch of the same condition, make 4653// this conditional branch redundant. 4656// Okay, the outcome of this conditional branch is statically 4657// knowable. If this block had a single pred, handle specially, otherwise 4658// foldCondBranchOnValueKnownInPredecessor() will handle it. 4660// Turn this into a branch on constant. 4664returntrue;
// Nuke the branch on constant. 4668// If the previous block ended with a widenable branch, determine if reusing 4669// the target block is profitable and legal. This will have the effect of 4670// "widening" PBI, but doesn't require us to reason about hosting safety. 4674// If both branches are conditional and both contain stores to the same 4675// address, remove the stores from the conditionals and create a conditional 4676// merged store at the end. 4680// If this is a conditional branch in an empty block, and if any 4681// predecessors are a conditional branch to one of our destinations, 4682// fold the conditions into logical ops and one cond br. 4684// Ignore dbg intrinsics. 4705// Check to make sure that the other destination of this branch 4706// isn't BB itself. If so, this is an infinite loop that will 4707// keep getting unwound. 4711// If predecessor's branch probability to BB is too low don't merge branches. 4713if (!PBI->
getMetadata(LLVMContext::MD_unpredictable) &&
4715 (
static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4719static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4722if (CommonDestProb >= Likely)
4726// Do not perform this transformation if it would require 4727// insertion of a large number of select instructions. For targets 4728// without predication/cmovs, this is a big pessimization. 4732unsigned NumPhis = 0;
4735if (NumPhis > 2)
// Disable this xform. 4739// Finally, if everything is ok, fold the branches to logical ops. 4747// If OtherDest *is* BB, then BB is a basic block with a single conditional 4748// branch in it, where one edge (OtherDest) goes back to itself but the other 4749// exits. We don't *know* that the program avoids the infinite loop 4750// (even though that seems likely). If we do this xform naively, we'll end up 4751// recursively unpeeling the loop. Since we know that (after the xform is 4752// done) that the block *is* infinite if reached, we just make it an obviously 4753// infinite loop with no cond branch. 4754if (OtherDest == BB) {
4755// Insert it at the end of the function, because it's either code, 4756// or it won't matter if it's hot. :) 4761 Updates.
push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4762 OtherDest = InfLoopBlock;
4767// BI may have other predecessors. Because of this, we leave 4768// it alone, but modify PBI. 4770// Make sure we get to CommonDest on True&True directions. 4780// Merge the conditions. 4784// Modify PBI to branch on the new condition to the new dests. 4796// Update branch weight for PBI. 4797uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4798uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4801 SuccTrueWeight, SuccFalseWeight);
4803 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4804 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4805 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4806 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4807// The weight to CommonDest should be PredCommon * SuccTotal + 4808// PredOther * SuccCommon. 4809// The weight to OtherDest should be PredOther * SuccOther. 4810uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4811 PredOther * SuccCommon,
4812 PredOther * SuccOther};
4813// Halve the weights if any of them cannot fit in an uint32_t 4819// OtherDest may have phi nodes. If so, add an entry from PBI's 4820// block that are identical to the entries for BI's block. 4823// We know that the CommonDest already had an edge from PBI to 4824// it. If it has PHIs though, the PHIs may have different 4825// entries for BB and PBI's BB. If so, insert a select to make 4832// Insert a select in PBI to pick the right value. 4836// Although the select has the same condition as PBI, the original branch 4837// weights for PBI do not apply to the new select because the select's 4838// 'logical' edges are incoming edges of the phi that is eliminated, not 4839// the outgoing edges of PBI. 4841uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4842uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4843uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4844uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4845// The weight to PredCommonDest should be PredCommon * SuccTotal. 4846// The weight to PredOtherDest should be PredOther * SuccCommon. 4847uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4848 PredOther * SuccCommon};
4853/*IsExpected=*/false);
4861// This basic block is probably dead. We know it has at least 4862// one fewer predecessor. 4866// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is 4867// true or to FalseBB if Cond is false. 4868// Takes care of updating the successors and removing the old terminator. 4869// Also makes sure not to introduce new successors by assuming that edges to 4870// non-successor TrueBBs and FalseBBs aren't reachable. 4871bool SimplifyCFGOpt::simplifyTerminatorOnSelect(
Instruction *OldTerm,
4877// Remove any superfluous successor edges from the CFG. 4878// First, figure out which successors to preserve. 4879// If TrueBB and FalseBB are equal, only try to preserve one copy of that 4882BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB :
nullptr;
4886// Then remove the rest. 4888// Make sure only to keep exactly one copy of each edge. 4889if (Succ == KeepEdge1)
4891elseif (Succ == KeepEdge2)
4895/*KeepOneInputPHIs=*/true);
4897if (Succ != TrueBB && Succ != FalseBB)
4898 RemovedSuccessors.
insert(Succ);
4905// Insert an appropriate new terminator. 4906if (!KeepEdge1 && !KeepEdge2) {
4907if (TrueBB == FalseBB) {
4908// We were only looking for one successor, and it was present. 4909// Create an unconditional branch to it. 4912// We found both of the successors we were looking for. 4913// Create a conditional branch sharing the condition of the select. 4915if (TrueWeight != FalseWeight)
4918 }
elseif (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4919// Neither of the selected blocks were successors, so this 4920// terminator must be unreachable. 4923// One of the selected values was a successor, but the other wasn't. 4924// Insert an unconditional branch to the one that was found; 4925// the edge to the one that wasn't must be unreachable. 4927// Only TrueBB was found. 4930// Only FalseBB was found. 4940for (
auto *RemovedSuccessor : RemovedSuccessors)
4941 Updates.
push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4942 DTU->applyUpdates(Updates);
4949// (switch (select cond, X, Y)) on constant X, Y 4950// with a branch - conditional if X and Y lead to distinct BBs, 4951// unconditional otherwise. 4952bool SimplifyCFGOpt::simplifySwitchOnSelect(
SwitchInst *SI,
4954// Check for constant integer values in the select. 4957if (!TrueVal || !FalseVal)
4960// Find the relevant condition and destinations. 4962BasicBlock *TrueBB =
SI->findCaseValue(TrueVal)->getCaseSuccessor();
4963BasicBlock *FalseBB =
SI->findCaseValue(FalseVal)->getCaseSuccessor();
4965// Get weight for TrueBB and FalseBB. 4966uint32_t TrueWeight = 0, FalseWeight = 0;
4971if (Weights.
size() == 1 +
SI->getNumCases()) {
4973 (
uint32_t)Weights[
SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4975 (
uint32_t)Weights[
SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4979// Perform the actual simplification. 4980return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4985// (indirectbr (select cond, blockaddress(@fn, BlockA), 4986// blockaddress(@fn, BlockB))) 4988// (br cond, BlockA, BlockB). 4989bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(
IndirectBrInst *IBI,
4991// Check that both operands of the select are block addresses. 4997// Extract the actual blocks. 5001// Perform the actual simplification. 5002return simplifyTerminatorOnSelect(IBI,
SI->getCondition(), TrueBB, FalseBB, 0,
5006/// This is called when we find an icmp instruction 5007/// (a seteq/setne with a constant) as the only instruction in a 5008/// block that ends with an uncond branch. We are looking for a very specific 5009/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In 5010/// this case, we merge the first two "or's of icmp" into a switch, but then the 5011/// default value goes to an uncond block with a seteq in it, we get something 5014/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] 5016/// %tmp = icmp eq i8 %A, 92 5019/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] 5021/// We prefer to split the edge to 'end' so that there is a true/false entry to 5022/// the PHI, merging the third icmp into the switch. 5023bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5027// If the block has any PHIs in it or the icmp has multiple uses, it is too 5035// The pattern we're looking for is where our only predecessor is a switch on 5036// 'V' and this block is the default case for the switch. In this case we can 5037// fold the compared value into the switch to simplify things. 5043if (
SI->getCondition() != V)
5046// If BB is reachable on a non-default case, then we simply know the value of 5047// V in this block. Substitute it and constant fold the icmp instruction 5049if (
SI->getDefaultDest() != BB) {
5051assert(VVal &&
"Should have a unique destination value");
5058// BB is now empty, so it is likely to simplify away. 5059return requestResimplify();
5062// Ok, the block is reachable from the default dest. If the constant we're 5063// comparing exists in one of the other edges, then we can constant fold ICI 5065if (
SI->findCaseValue(Cst) !=
SI->case_default()) {
5074// BB is now empty, so it is likely to simplify away. 5075return requestResimplify();
5078// The use of the icmp has to be in the 'end' block, by the only PHI node in 5082if (PHIUse ==
nullptr || PHIUse != &SuccBlock->
front() ||
5086// If the icmp is a SETEQ, then the default dest gets false, the new edge gets 5094// Replace ICI (which is used by the PHI for the default value) with true or 5095// false depending on if it is EQ or NE. 5101// Okay, the switch goes to this block on a default value. Add an edge from 5102// the switch to the merge point on the compared value. 5107auto W0 = SIW.getSuccessorWeight(0);
5111 SIW.setSuccessorWeight(0, *NewW);
5113 SIW.addCase(Cst, NewBB, NewW);
5115 Updates.
push_back({DominatorTree::Insert, Pred, NewBB});
5118// NewBB branches to the phi block, add the uncond branch and the phi entry. 5124 Updates.
push_back({DominatorTree::Insert, NewBB, SuccBlock});
5125 DTU->applyUpdates(Updates);
5130/// The specified branch is a conditional branch. 5131/// Check to see if it is branching on an or/and chain of icmp instructions, and 5132/// fold it into a switch instruction if so. 5133bool SimplifyCFGOpt::simplifyBranchOnICmpChain(
BranchInst *BI,
5140// Change br (X == 0 | X == 1), T, F into a switch instruction. 5141// If this is a bunch of seteq's or'd together, or if it's a bunch of 5142// 'setne's and'ed together, collect them. 5144// Try to gather values from a chain of and/or to be turned into a switch 5145 ConstantComparesGatherer ConstantCompare(
Cond,
DL);
5148Value *CompVal = ConstantCompare.CompValue;
5149unsigned UsedICmps = ConstantCompare.UsedICmps;
5150Value *ExtraCase = ConstantCompare.Extra;
5152// If we didn't have a multiply compared value, fail. 5156// Avoid turning single icmps into a switch. 5162// There might be duplicate constants in the list, which the switch 5163// instruction can't handle, remove them now. 5167// If Extra was used, we require at least two switch values to do the 5168// transformation. A switch with one value is just a conditional branch. 5169if (ExtraCase && Values.
size() < 2)
5172// TODO: Preserve branch weight metadata, similarly to how 5173// foldValueComparisonIntoPredecessors preserves it. 5175// Figure out which block is which destination. 5184 <<
" cases into SWITCH. BB is:\n" 5189// If there are any extra values that couldn't be folded into the switch 5190// then we evaluate them with an explicit branch first. Split the block 5191// right before the condbr to handle it. 5194/*MSSAU=*/nullptr,
"switch.early.test");
5196// Remove the uncond branch added to the old block. 5200// There can be an unintended UB if extra values are Poison. Before the 5201// transformation, extra values may not be evaluated according to the 5202// condition, and it will not raise UB. But after transformation, we are 5203// evaluating extra values before checking the condition, and it will raise 5204// UB. It can be solved by adding freeze instruction to extra values. 5218 Updates.
push_back({DominatorTree::Insert, BB, EdgeBB});
5220// If there are PHI nodes in EdgeBB, then we need to add a new entry to them 5221// for the edge we just added. 5224LLVM_DEBUG(
dbgs() <<
" ** 'icmp' chain unhandled condition: " << *ExtraCase
5225 <<
"\nEXTRABB = " << *BB);
5230// Convert pointer to int before we switch. 5233 CompVal,
DL.getIntPtrType(CompVal->
getType()),
"magicptr");
5236// Create the new switch instruction now. 5239// Add all of the 'cases' to the switch instruction. 5240for (
unsigned i = 0, e = Values.
size(); i != e; ++i)
5241New->addCase(Values[i], EdgeBB);
5243// We added edges from PI to the EdgeBB. As such, if there were any 5244// PHI nodes in EdgeBB, they need entries to be added corresponding to 5245// the number of edges added. 5247PHINode *PN = cast<PHINode>(BBI);
5249for (
unsigned i = 0, e = Values.
size() - 1; i != e; ++i)
5253// Erase the old branch instruction. 5256 DTU->applyUpdates(Updates);
5258LLVM_DEBUG(
dbgs() <<
" ** 'icmp' chain result is:\n" << *BB <<
'\n');
5264return simplifyCommonResume(RI);
5265elseif (isa<LandingPadInst>(RI->
getParent()->getFirstNonPHIIt()) &&
5267// The resume must unwind the exception that caused control to branch here. 5268return simplifySingleResume(RI);
5273// Check if cleanup block is empty 5276auto *
II = dyn_cast<IntrinsicInst>(&
I);
5281switch (IntrinsicID) {
5282case Intrinsic::dbg_declare:
5283case Intrinsic::dbg_value:
5284case Intrinsic::dbg_label:
5285case Intrinsic::lifetime_end:
5294// Simplify resume that is shared by several landing pads (phi of landing pad). 5295bool SimplifyCFGOpt::simplifyCommonResume(
ResumeInst *RI) {
5298// Check that there are no other instructions except for debug and lifetime 5299// intrinsics between the phi's and resume instruction. 5305auto *PhiLPInst = cast<PHINode>(RI->
getValue());
5307// Check incoming blocks to see if any of them are trivial. 5308for (
unsignedIdx = 0,
End = PhiLPInst->getNumIncomingValues();
Idx !=
End;
5310auto *IncomingBB = PhiLPInst->getIncomingBlock(
Idx);
5311auto *IncomingValue = PhiLPInst->getIncomingValue(
Idx);
5313// If the block has other successors, we can not delete it because 5314// it has other dependents. 5315if (IncomingBB->getUniqueSuccessor() != BB)
5318auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5319// Not the landing pad that caused the control to branch here. 5320if (IncomingValue != LandingPad)
5324make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5325 TrivialUnwindBlocks.
insert(IncomingBB);
5328// If no trivial unwind blocks, don't do any simplifications. 5329if (TrivialUnwindBlocks.
empty())
5332// Turn all invokes that unwind here into calls. 5333for (
auto *TrivialBB : TrivialUnwindBlocks) {
5334// Blocks that will be simplified should be removed from the phi node. 5335// Note there could be multiple edges to the resume block, and we need 5336// to remove them all. 5337while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5346// In each SimplifyCFG run, only the current processed block can be erased. 5347// Otherwise, it will break the iteration of SimplifyCFG pass. So instead 5348// of erasing TrivialBB, we only remove the branch to the common resume 5349// block so that we can later erase the resume block since it has no 5351 TrivialBB->getTerminator()->eraseFromParent();
5354 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5357// Delete the resume block if all its predecessors have been removed. 5361return !TrivialUnwindBlocks.empty();
5364// Simplify resume that is only used by a single (non-phi) landing pad. 5365bool SimplifyCFGOpt::simplifySingleResume(
ResumeInst *RI) {
5369"Resume must unwind the exception that caused control to here");
5371// Check that there are no other instructions except for debug intrinsics. 5373 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5376// Turn all invokes that unwind here into calls and delete the basic block. 5382// The landingpad is now unreachable. Zap it. 5388// If this is a trivial cleanup pad that executes no instructions, it can be 5389// eliminated. If the cleanup pad continues to the caller, any predecessor 5390// that is an EH pad will be updated to continue to the caller and any 5391// predecessor that terminates with an invoke instruction will have its invoke 5392// instruction converted to a call instruction. If the cleanup pad being 5393// simplified does not continue to the caller, each predecessor will be 5394// updated to continue to the unwind destination of the cleanup pad being 5399// This isn't an empty cleanup. 5402// We cannot kill the pad if it has multiple uses. This typically arises 5403// from unreachable basic blocks. 5407// Check that there are no other instructions except for benign intrinsics. 5409 make_range<Instruction *>(CPInst->
getNextNode(), RI)))
5412// If the cleanup return we are simplifying unwinds to the caller, this will 5413// set UnwindDest to nullptr. 5416// We're about to remove BB from the control flow. Before we do, sink any 5417// PHINodes into the unwind destination. Doing this before changing the 5418// control flow avoids some potentially slow checks, since we can currently 5419// be certain that UnwindDest and BB have no common predecessors (since they 5420// are both EH pads). 5422// First, go through the PHI nodes in UnwindDest and update any nodes that 5423// reference the block we are removing 5425intIdx = DestPN.getBasicBlockIndex(BB);
5426// Since BB unwinds to UnwindDest, it has to be in the PHI node. 5428// This PHI node has an incoming value that corresponds to a control 5429// path through the cleanup pad we are removing. If the incoming 5430// value is in the cleanup pad, it must be a PHINode (because we 5431// verified above that the block is otherwise empty). Otherwise, the 5432// value is either a constant or a value that dominates the cleanup 5433// pad being removed. 5435// Because BB and UnwindDest are both EH pads, all of their 5436// predecessors must unwind to these blocks, and since no instruction 5437// can have multiple unwind destinations, there will be no overlap in 5438// incoming blocks between SrcPN and DestPN. 5439Value *SrcVal = DestPN.getIncomingValue(
Idx);
5440PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5442bool NeedPHITranslation = SrcPN && SrcPN->
getParent() == BB;
5446 DestPN.addIncoming(
Incoming, Pred);
5450// Sink any remaining PHI nodes directly into UnwindDest. 5454// If the PHI node has no uses or all of its uses are in this basic 5455// block (meaning they are debug or lifetime intrinsics), just leave 5456// it. It will be erased when we erase BB below. 5459// Otherwise, sink this PHI node into UnwindDest. 5460// Any predecessors to UnwindDest which are not already represented 5461// must be back edges which inherit the value from the path through 5462// BB. In this case, the PHI value must reference itself. 5467// Also, add a dummy incoming value for the original BB itself, 5468// so that the PHI is well-formed until we drop said predecessor. 5473 std::vector<DominatorTree::UpdateType> Updates;
5475// We use make_early_inc_range here because we will remove all predecessors. 5477if (UnwindDest ==
nullptr) {
5489 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5490 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5503// Try to merge two cleanuppads together. 5505// Skip any cleanuprets which unwind to caller, there is nothing to merge 5511// This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't 5512// be safe to merge without code duplication. 5516// Verify that our cleanuppad's unwind destination is another cleanuppad. 5517auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->
front());
5518if (!SuccessorCleanupPad)
5522// Replace any uses of the successor cleanupad with the predecessor pad 5523// The only cleanuppad uses should be this cleanupret, it's cleanupret and 5524// funclet bundle operands. 5526// Remove the old cleanuppad. 5527 SuccessorCleanupPad->eraseFromParent();
5528// Now, we simply replace the cleanupret with a branch to the unwind 5537// It is possible to transiantly have an undef cleanuppad operand because we 5538// have deleted some, but not all, dead blocks. 5539// Eventually, this block will be deleted. 5552// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()! 5558// Ensure that any debug-info records that used to occur after the Unreachable 5559// are moved to in front of it -- otherwise they'll "dangle" at the end of 5563// Debug-info records on the unreachable inst itself should be deleted, as 5564// below we delete everything past the final executable instruction. 5567// If there are any instructions immediately before the unreachable that can 5568// be removed, do so. 5574break;
// Can not drop any more instructions. We're done here. 5575// Otherwise, this instruction can be freely erased, 5576// even if it is not side-effect free. 5578// Note that deleting EH's here is in fact okay, although it involves a bit 5579// of subtle reasoning. If this inst is an EH, all the predecessors of this 5580// block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn, 5581// and we can therefore guarantee this block will be erased. 5583// If we're deleting this, we're deleting any subsequent debug info, so 5584// delete DbgRecords. 5585 BBI->dropDbgRecords();
5587// Delete this instruction (any uses are guaranteed to be dead) 5589 BBI->eraseFromParent();
5593// If the unreachable instruction is the first in the block, take a gander 5594// at all of the predecessors of this instruction, and simplify them. 5595if (&BB->
front() != UI)
5598 std::vector<DominatorTree::UpdateType> Updates;
5604if (
auto *BI = dyn_cast<BranchInst>(TI)) {
5605// We could either have a proper unconditional branch, 5606// or a degenerate conditional branch with matching destinations. 5608 [BB](
auto *
Successor) { return Successor == BB; })) {
5616"The destinations are guaranteed to be different here.");
5627Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5633 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5634 }
elseif (
auto *SI = dyn_cast<SwitchInst>(TI)) {
5636for (
auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5637if (i->getCaseSuccessor() != BB) {
5642 i = SU.removeCase(i);
5646// Note that the default destination can't be removed! 5647if (DTU &&
SI->getDefaultDest() != BB)
5648 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5649 }
elseif (
auto *
II = dyn_cast<InvokeInst>(TI)) {
5650if (
II->getUnwindDest() == BB) {
5652 DTU->applyUpdates(Updates);
5656if (!CI->doesNotThrow())
5657 CI->setDoesNotThrow();
5660 }
elseif (
auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5661if (CSI->getUnwindDest() == BB) {
5663 DTU->applyUpdates(Updates);
5672 E = CSI->handler_end();
5675 CSI->removeHandler(
I);
5682 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5683if (CSI->getNumHandlers() == 0) {
5684if (CSI->hasUnwindDest()) {
5685// Redirect all predecessors of the block containing CatchSwitchInst 5686// to instead branch to the CatchSwitchInst's unwind destination. 5688for (
auto *PredecessorOfPredecessor :
predecessors(Predecessor)) {
5689 Updates.push_back({DominatorTree::Insert,
5690 PredecessorOfPredecessor,
5691 CSI->getUnwindDest()});
5692 Updates.push_back({DominatorTree::Delete,
5693 PredecessorOfPredecessor, Predecessor});
5696 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5698// Rewrite all preds to unwind to caller (or from invoke to call). 5700 DTU->applyUpdates(Updates);
5707// The catchswitch is no longer reachable. 5709 CSI->eraseFromParent();
5712 }
elseif (
auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5714assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5715"Expected to always have an unwind to BB.");
5717 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5725 DTU->applyUpdates(Updates);
5727// If this block is now dead, remove it. 5740for (
size_tI = 1, E = Cases.
size();
I != E; ++
I) {
5741if (Cases[
I - 1]->getValue() != Cases[
I]->getValue() + 1)
5749bool RemoveOrigDefaultBlock =
true) {
5751auto *BB = Switch->getParent();
5752auto *OrigDefaultBlock = Switch->getDefaultDest();
5753if (RemoveOrigDefaultBlock)
5754 OrigDefaultBlock->removePredecessor(BB);
5759 Switch->setDefaultDest(&*NewDefaultBlock);
5762 Updates.
push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5763if (RemoveOrigDefaultBlock &&
5765 Updates.
push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5770/// Turn a switch into an integer range comparison and branch. 5771/// Switches with more than 2 destinations are ignored. 5772/// Switches with 1 destination are also ignored. 5773bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(
SwitchInst *SI,
5775assert(
SI->getNumCases() > 1 &&
"Degenerate switch?");
5778 !isa<UnreachableInst>(
SI->getDefaultDest()->getFirstNonPHIOrDbg());
5780auto *BB =
SI->getParent();
5782// Partition the cases into two sets with different destinations. 5783BasicBlock *DestA = HasDefault ?
SI->getDefaultDest() :
nullptr;
5788for (
auto Case :
SI->cases()) {
5802returnfalse;
// More than two destinations. 5805returnfalse;
// All destinations are the same and the default is unreachable 5808"Single-destination switch should have been folded.");
5810assert(DestB !=
SI->getDefaultDest());
5811assert(!CasesB.
empty() &&
"There must be non-default cases.");
5814// Figure out if one of the sets of cases form a contiguous range. 5819 ContiguousCases = &CasesA;
5820 ContiguousDest = DestA;
5823 ContiguousCases = &CasesB;
5824 ContiguousDest = DestB;
5829// Start building the compare and branch. 5833 ConstantInt::get(
Offset->getType(), ContiguousCases->
size());
5835Value *Sub =
SI->getCondition();
5836if (!
Offset->isNullValue())
5840// If NumCases overflowed, then all possible values jump to the successor. 5847// Update weight for the newly-created conditional branch. 5851if (Weights.
size() == 1 +
SI->getNumCases()) {
5854for (
size_tI = 0, E = Weights.
size();
I != E; ++
I) {
5855if (
SI->getSuccessor(
I) == ContiguousDest)
5856 TrueWeight += Weights[
I];
5858 FalseWeight += Weights[
I];
5860while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5868// Prune obsolete incoming values off the successors' PHI nodes. 5869for (
auto BBI = ContiguousDest->
begin(); isa<PHINode>(BBI); ++BBI) {
5870unsigned PreviousEdges = ContiguousCases->
size();
5871if (ContiguousDest ==
SI->getDefaultDest())
5873for (
unsignedI = 0, E = PreviousEdges - 1;
I != E; ++
I)
5874 cast<PHINode>(BBI)->removeIncomingValue(
SI->getParent());
5876for (
auto BBI = OtherDest->
begin(); isa<PHINode>(BBI); ++BBI) {
5877unsigned PreviousEdges =
SI->getNumCases() - ContiguousCases->
size();
5878if (OtherDest ==
SI->getDefaultDest())
5880for (
unsignedI = 0, E = PreviousEdges - 1;
I != E; ++
I)
5881 cast<PHINode>(BBI)->removeIncomingValue(
SI->getParent());
5884// Clean up the default block - it may have phis or other instructions before 5885// the unreachable terminator. 5889auto *UnreachableDefault =
SI->getDefaultDest();
5892SI->eraseFromParent();
5894if (!HasDefault && DTU)
5895 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5900/// Compute masked bits for the condition of a switch 5901/// and use it to remove dead cases. 5908// We can also eliminate cases by determining that their values are outside of 5909// the limited range of the condition based on how many significant (non-sign) 5910// bits are in the condition value. 5911unsigned MaxSignificantBitsInCond =
5914// Gather dead cases. 5918for (
constauto &Case : SI->cases()) {
5919auto *
Successor = Case.getCaseSuccessor();
5925constAPInt &CaseVal = Case.getCaseValue()->getValue();
5928 DeadCases.
push_back(Case.getCaseValue());
5936// If we can prove that the cases must cover all possible values, the 5937// default destination becomes dead and we can remove it. If we know some 5938// of the bits in the value, we can use that to more precisely compute the 5939// number of possible unique case values. 5941 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5942constunsigned NumUnknownBits =
5945if (HasDefault && DeadCases.
empty() &&
5946 NumUnknownBits < 64
/* avoid overflow */) {
5947uint64_t AllNumCases = 1ULL << NumUnknownBits;
5948if (SI->getNumCases() == AllNumCases) {
5952// When only one case value is missing, replace default with that case. 5953// Eliminating the default branch will provide more opportunities for 5954// optimization, such as lookup tables. 5955if (SI->getNumCases() == AllNumCases - 1) {
5956assert(NumUnknownBits > 1 &&
"Should be canonicalized to a branch");
5963for (
constauto &Case : SI->cases())
5964 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5966 cast<ConstantInt>(ConstantInt::get(
Cond->getType(), MissingCaseVal));
5975if (DeadCases.
empty())
5981assert(CaseI != SI->case_default() &&
5982"Case was not found. Probably mistake in DeadCases forming.");
5983// Prune unused values from PHI nodes. 5984 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5989 std::vector<DominatorTree::UpdateType> Updates;
5991if (NumPerSuccessorCases[
Successor] == 0)
5992 Updates.push_back({DominatorTree::Delete, SI->getParent(),
Successor});
5999/// If BB would be eligible for simplification by 6000/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated 6001/// by an unconditional branch), look at the phi node for BB in the successor 6002/// block and see if the incoming value is equal to CaseValue. If so, return 6003/// the phi node, and set PhiIndex to BB's index in the phi node. 6007returnnullptr;
// BB must be empty to be a candidate for simplification. 6009returnnullptr;
// BB must be dominated by the switch. 6012if (!Branch || !Branch->isUnconditional())
6013returnnullptr;
// Terminator must be unconditional branch. 6018intIdx =
PHI.getBasicBlockIndex(BB);
6019assert(
Idx >= 0 &&
"PHI has no entry for predecessor?");
6022if (InValue != CaseValue)
6032/// Try to forward the condition of a switch instruction to a phi node 6033/// dominated by the switch, if that would mean that some of the destination 6034/// blocks of the switch can be folded away. Return true if a change is made. 6038 ForwardingNodesMap ForwardingNodes;
6041for (
constauto &Case : SI->cases()) {
6043BasicBlock *CaseDest = Case.getCaseSuccessor();
6045// Replace phi operands in successor blocks that are using the constant case 6046// value rather than the switch condition variable: 6048// switch i32 %x, label %default [ 6049// i32 17, label %succ 6052// %r = phi i32 ... [ 17, %switchbb ] ... 6054// %r = phi i32 ... [ %x, %switchbb ] ... 6057// This only works if there is exactly 1 incoming edge from the switch to 6058// a phi. If there is >1, that means multiple cases of the switch map to 1 6059// value in the phi, and that phi value is not the switch condition. Thus, 6060// this transform would not make sense (the phi would be invalid because 6061// a phi can't have different incoming values from the same block). 6062int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6063if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6064count(Phi.blocks(), SwitchBlock) == 1) {
6065 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6070// Collect phi nodes that are indirectly using this switch's case constants. 6073 ForwardingNodes[Phi].push_back(PhiIdx);
6076for (
auto &ForwardingNode : ForwardingNodes) {
6077PHINode *Phi = ForwardingNode.first;
6079// Check if it helps to fold PHI. 6083for (
int Index : Indexes)
6084 Phi->setIncomingValue(Index, SI->getCondition());
6091/// Return true if the backend will be able to handle 6092/// initializing an array of constants like C. 6094if (
C->isThreadDependent())
6096if (
C->isDLLImportDependent())
6099if (!isa<ConstantFP>(
C) && !isa<ConstantInt>(
C) &&
6100 !isa<ConstantPointerNull>(
C) && !isa<GlobalValue>(
C) &&
6101 !isa<UndefValue>(
C) && !isa<ConstantExpr>(
C))
6105// Pointer casts and in-bounds GEPs will not prohibit the backend from 6106// materializing the array of constants. 6107Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6118/// If V is a Constant, return it. Otherwise, try to look up 6119/// its constant value in ConstantPool, returning 0 if it's not there. 6128/// Try to fold instruction I into a constant. This works for 6129/// simple instructions such as binary operations where both operands are 6130/// constant or can be replaced by constants from the ConstantPool. Returns the 6131/// resulting constant on success, 0 otherwise. 6139if (
A->isAllOnesValue())
6141if (
A->isNullValue())
6147for (
unsignedN = 0, E =
I->getNumOperands();
N != E; ++
N) {
6157/// Try to determine the resulting constant values in phi nodes 6158/// at the common destination basic block, *CommonDest, for one of the case 6159/// destionations CaseDest corresponding to value CaseVal (0 for the default 6160/// case), of a switch instruction SI. 6166// The block from which we enter the common destination. 6169// If CaseDest is empty except for some side-effect free instructions through 6170// which we can constant-propagate the CaseVal, continue to its successor. 6172ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6174if (
I.isTerminator()) {
6175// If the terminator is a simple branch, continue to the next block. 6176if (
I.getNumSuccessors() != 1 ||
I.isSpecialTerminator())
6179 CaseDest =
I.getSuccessor(0);
6181// Instruction is side-effect free and constant. 6183// If the instruction has uses outside this block or a phi node slot for 6184// the block, it is not safe to bypass the instruction since it would then 6185// no longer dominate all its uses. 6186for (
auto &
Use :
I.uses()) {
6189if (
I->getParent() == CaseDest)
6192if (Phi->getIncomingBlock(
Use) == CaseDest)
6203// If we did not have a CommonDest before, use the current one. 6205 *CommonDest = CaseDest;
6206// If the destination isn't the common one, abort. 6207if (CaseDest != *CommonDest)
6210// Get the values for this case from phi nodes in the destination block. 6212intIdx =
PHI.getBasicBlockIndex(Pred);
6221// Be conservative about which kinds of constants we support. 6225 Res.push_back(std::make_pair(&
PHI, ConstVal));
6228return Res.size() > 0;
6231// Helper function used to add CaseVal to the list of cases that generate 6232// Result. Returns the updated number of cases that generate this result. 6234 SwitchCaseResultVectorTy &UniqueResults,
6236for (
auto &
I : UniqueResults) {
6237if (
I.first == Result) {
6238I.second.push_back(CaseVal);
6239returnI.second.size();
6242 UniqueResults.push_back(
6247// Helper function that initializes a map containing 6248// results for the PHI node of the common destination block for a switch 6249// instruction. Returns false if multiple PHI nodes have been found or if 6250// there is not a common destination block for the switch. 6253 SwitchCaseResultVectorTy &UniqueResults,
6257 uintptr_t MaxUniqueResults) {
6258for (
constauto &
I : SI->cases()) {
6261// Resulting value at phi nodes for this case value. 6267// Only one value per case is permitted. 6271// Add the case->result mapping to UniqueResults. 6272constsize_t NumCasesForResult =
6275// Early out if there are too many cases for this result. 6279// Early out if there are too many unique results. 6280if (UniqueResults.size() > MaxUniqueResults)
6283// Check the PHI consistency. 6289// Find the default result value. 6291BasicBlock *DefaultDest = SI->getDefaultDest();
6292getCaseResults(SI,
nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6294// If the default value is not found abort unless the default destination 6297 DefaultResults.
size() == 1 ? DefaultResults.
begin()->second :
nullptr;
6298if ((!DefaultResult &&
6305// Helper function that checks if it is possible to transform a switch with only 6306// two cases (or two cases + default) that produces a result into a select. 6307// TODO: Handle switches with more than 2 cases that map to the same result. 6311// If we are selecting between only two cases transform into a simple 6312// select or a two-way select if default is possible. 6314// switch (a) { %0 = icmp eq i32 %a, 10 6315// case 10: return 42; %1 = select i1 %0, i32 42, i32 4 6316// case 20: return 2; ----> %2 = icmp eq i32 %a, 20 6317// default: return 4; %3 = select i1 %2, i32 2, i32 %1 6319if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6320 ResultVector[1].second.size() == 1) {
6321ConstantInt *FirstCase = ResultVector[0].second[0];
6322ConstantInt *SecondCase = ResultVector[1].second[0];
6323Value *SelectValue = ResultVector[1].first;
6325Value *ValueCompare =
6326 Builder.
CreateICmpEQ(Condition, SecondCase,
"switch.selectcmp");
6327 SelectValue = Builder.
CreateSelect(ValueCompare, ResultVector[1].first,
6328 DefaultResult,
"switch.select");
6330Value *ValueCompare =
6331 Builder.
CreateICmpEQ(Condition, FirstCase,
"switch.selectcmp");
6332return Builder.
CreateSelect(ValueCompare, ResultVector[0].first,
6333 SelectValue,
"switch.select");
6336// Handle the degenerate case where two cases have the same result value. 6337if (ResultVector.size() == 1 && DefaultResult) {
6339unsigned CaseCount = CaseValues.
size();
6340// n bits group cases map to the same result: 6341// case 0,4 -> Cond & 0b1..1011 == 0 ? result : default 6342// case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default 6343// case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default 6346// Find mininal value. 6347for (
auto *Case : CaseValues)
6348if (Case->getValue().slt(MinCaseVal->
getValue()))
6351// Mark the bits case number touched. 6353for (
auto *Case : CaseValues)
6354 BitMask |= (Case->getValue() - MinCaseVal->
getValue());
6356// Check if cases with the same result can cover all number 6360 Condition = Builder.
CreateSub(Condition, MinCaseVal);
6364return Builder.
CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6368// Handle the degenerate case where two cases have the same value. 6369if (CaseValues.
size() == 2) {
6371"switch.selectcmp.case1");
6373"switch.selectcmp.case2");
6374Value *Cmp = Builder.
CreateOr(Cmp1, Cmp2,
"switch.selectcmp");
6375return Builder.
CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6382// Helper function to cleanup a switch instruction that has been converted into 6383// a select, fixing up PHI nodes and basic blocks. 6388 std::vector<DominatorTree::UpdateType> Updates;
6394 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6397// Remove the switch. 6399PHI->removeIncomingValueIf(
6400 [&](
unsignedIdx) {
returnPHI->getIncomingBlock(
Idx) == SelectBB; });
6401PHI->addIncoming(SelectValue, SelectBB);
6404for (
unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6410if (DTU && RemovedSuccessors.
insert(Succ).second)
6411 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6413 SI->eraseFromParent();
6418/// If a switch is only used to initialize one or more phi nodes in a common 6419/// successor block with only two different constant values, try to replace the 6420/// switch with a select. Returns true if the fold was made. 6428 SwitchCaseResultVectorTy UniqueResults;
6429// Collect all the cases that will deliver the same value from the switch. 6431DL,
TTI,
/*MaxUniqueResults*/ 2))
6434assert(
PHI !=
nullptr &&
"PHI for value select not found");
6447/// This class represents a lookup table that can be used to replace a switch. 6448classSwitchLookupTable {
6450 /// Create a lookup table to use as a switch replacement with the contents 6451 /// of Values, using DefaultValue to fill any holes in the table. 6457 /// Build instructions with Builder to retrieve the value at 6458 /// the position given by Index in the lookup table. 6461 /// Return true if a table with TableSize elements of 6462 /// type ElementType would fit in a target-legal register. 6467// Depending on the contents of the table, it can be represented in 6470// For tables where each element contains the same value, we just have to 6471// store that single value and return it for each lookup. 6474// For tables where there is a linear relationship between table index 6475// and values. We calculate the result with a simple multiplication 6476// and addition instead of a table lookup. 6479// For small tables with integer elements, we can pack them into a bitmap 6480// that fits into a target-legal register. Values are retrieved by 6481// shift and mask operations. 6484// The table is stored as an array of values. Values are retrieved by load 6485// instructions from the table. 6489// For SingleValueKind, this is the single value. 6492// For BitMapKind, this is the bitmap. 6496// For LinearMapKind, these are the constants used to derive the value. 6499bool LinearMapValWrapped =
false;
6501// For ArrayKind, this is the array. 6505}
// end anonymous namespace 6507SwitchLookupTable::SwitchLookupTable(
6511assert(Values.
size() &&
"Can't build lookup table without values!");
6512assert(TableSize >= Values.
size() &&
"Can't fit values in table!");
6514// If all values in the table are equal, this is that value. 6515 SingleValue = Values.
begin()->second;
6519// Build up the table contents. 6521for (
size_tI = 0, E = Values.
size();
I != E; ++
I) {
6527 TableContents[
Idx] = CaseRes;
6529if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6530 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes :
nullptr;
6533// Fill in any holes in the table with the default result. 6534if (Values.
size() < TableSize) {
6536"Need a default value to fill the lookup table holes.");
6539if (!TableContents[
I])
6540 TableContents[
I] = DefaultValue;
6543// If the default value is poison, all the holes are poison. 6544bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6546if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6547 SingleValue =
nullptr;
6550// If each element in the table contains the same value, we only need to store 6551// that single value. 6553Kind = SingleValueKind;
6557// Check if we can derive the value with a linear transformation from the 6560bool LinearMappingPossible =
true;
6563// When linear map is monotonic and signed overflow doesn't happen on 6564// maximum index, we can attach nsw on Add and Mul. 6565bool NonMonotonic =
false;
6566assert(TableSize >= 2 &&
"Should be a SingleValue table.");
6567// Check if there is the same distance between two consecutive values. 6569ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[
I]);
6571if (!ConstVal && isa<PoisonValue>(TableContents[
I])) {
6572// This is an poison, so it's (probably) a lookup table hole. 6573// To prevent any regressions from before we switched to using poison as 6574// the default value, holes will fall back to using the first value. 6575// This can be removed once we add proper handling for poisons in lookup 6577 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6581// This is an undef. We could deal with it, but undefs in lookup tables 6582// are very seldom. It's probably not worth the additional complexity. 6583 LinearMappingPossible =
false;
6588APInt Dist = Val - PrevVal;
6591 }
elseif (Dist != DistToPrev) {
6592 LinearMappingPossible =
false;
6600if (LinearMappingPossible) {
6601 LinearOffset = cast<ConstantInt>(TableContents[0]);
6602 LinearMultiplier = ConstantInt::get(
M.getContext(), DistToPrev);
6603APIntM = LinearMultiplier->getValue();
6605if (
isIntN(
M.getBitWidth(), TableSize - 1))
6606 (void)
M.smul_ov(
APInt(
M.getBitWidth(), TableSize - 1), MayWrap);
6607 LinearMapValWrapped = NonMonotonic || MayWrap;
6608Kind = LinearMapKind;
6614// If the type is integer and the table fits in a register, build a bitmap. 6617APInt TableInt(TableSize *
IT->getBitWidth(), 0);
6619 TableInt <<=
IT->getBitWidth();
6620// Insert values into the bitmap. Undef values are set to zero. 6621if (!isa<UndefValue>(TableContents[
I - 1])) {
6622ConstantInt *Val = cast<ConstantInt>(TableContents[
I - 1]);
6623 TableInt |= Val->
getValue().
zext(TableInt.getBitWidth());
6626 BitMap = ConstantInt::get(
M.getContext(), TableInt);
6627 BitMapElementTy =
IT;
6633// Store the table in an array. 6638 GlobalVariable::PrivateLinkage, Initializer,
6639"switch.table." + FuncName);
6640Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6641// Set the alignment to that of an array items. We will be only loading one 6649case SingleValueKind:
6651case LinearMapKind: {
6652// Derive the result value from the input value. 6654false,
"switch.idx.cast");
6655if (!LinearMultiplier->isOne())
6658/*HasNSW = */ !LinearMapValWrapped);
6660if (!LinearOffset->isZero())
6663/*HasNSW = */ !LinearMapValWrapped);
6667// Type of the bitmap (e.g. i59). 6670// Cast Index to the same type as the bitmap. 6671// Note: The Index is <= the number of elements in the table, so 6672// truncating it to the width of the bitmask is safe. 6675// Multiply the shift amount by the element width. NUW/NSW can always be 6676// set, because wouldFitInRegister guarantees Index * ShiftAmt is in 6677// BitMap's bit width. 6679 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6680"switch.shiftamt",
/*HasNUW =*/true,
/*HasNSW =*/true);
6684 Builder.
CreateLShr(BitMap, ShiftAmt,
"switch.downshift");
6686return Builder.
CreateTrunc(DownShifted, BitMapElementTy,
"switch.masked");
6689// Make sure the table index will not overflow when treated as signed. 6692Array->getInitializer()->getType()->getArrayNumElements();
6693if (TableSize > (1ULL << std::min(
IT->getBitWidth() - 1, 63u)))
6696"switch.tableidx.zext");
6700 GEPIndices,
"switch.gep");
6702 cast<ArrayType>(
Array->getValueType())->getElementType(),
GEP,
6709bool SwitchLookupTable::wouldFitInRegister(
constDataLayout &
DL,
6712auto *
IT = dyn_cast<IntegerType>(ElementType);
6715// FIXME: If the type is wider than it needs to be, e.g. i8 but all values 6716// are <= 15, we could try to narrow the type. 6718// Avoid overflow, fitsInLegalInteger uses unsigned int for the width. 6719if (TableSize >= UINT_MAX /
IT->getBitWidth())
6721returnDL.fitsInLegalInteger(TableSize *
IT->getBitWidth());
6726// Allow any legal type. 6730auto *
IT = dyn_cast<IntegerType>(Ty);
6734// Also allow power of 2 integer types that have at least 8 bits and fit in 6735// a register. These types are common in frontend languages and targets 6736// usually support loads of these types. 6737// TODO: We could relax this to any integer that fits in a register and rely 6738// on ABI alignment and padding in the table to allow the load to be widened. 6739// Or we could widen the constants and truncate the load. 6742DL.fitsInLegalInteger(
IT->getBitWidth());
6746// 40% is the default density for building a jump table in optsize/minsize 6747// mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this 6748// function was based on. 6752returnfalse;
// Avoid multiplication overflows below. 6754return NumCases * 100 >= CaseRange * MinDensity;
6761returnfalse;
// Overflow. 6766/// Determine whether a lookup table should be built for this switch, based on 6767/// the number of cases, size of the table, and the types of the results. 6768// TODO: We could support larger than legal types by limiting based on the 6769// number of loads required and/or table size. If the constants are small we 6770// could use smaller table entries and extend after the load. 6775if (SI->getNumCases() > TableSize)
6776returnfalse;
// TableSize overflowed. 6778bool AllTablesFitInRegister =
true;
6779bool HasIllegalType =
false;
6780for (
constauto &
I : ResultTypes) {
6783// Saturate this flag to true. 6786// Saturate this flag to false. 6787 AllTablesFitInRegister =
6788 AllTablesFitInRegister &&
6789 SwitchLookupTable::wouldFitInRegister(
DL, TableSize, Ty);
6791// If both flags saturate, we're done. NOTE: This *only* works with 6792// saturating flags, and all flags have to saturate first due to the 6793// non-deterministic behavior of iterating over a dense map. 6794if (HasIllegalType && !AllTablesFitInRegister)
6798// If each table would fit in a register, we should build it anyway. 6799if (AllTablesFitInRegister)
6802// Don't build a table that doesn't fit in-register if it has illegal types. 6816 MaxCaseVal.
getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6819returnall_of(ResultTypes, [&](
constauto &KV) {
6820return SwitchLookupTable::wouldFitInRegister(
6822 KV.second
/* ResultType */);
6826/// Try to reuse the switch table index compare. Following pattern: 6828/// if (idx < tablesize) 6829/// r = table[idx]; // table does not contain default_value 6831/// r = default_value; 6832/// if (r != default_value) 6837/// cond = idx < tablesize; 6841/// r = default_value; 6845/// Jump threading will then eliminate the second if(cond). 6854// We require that the compare is in the same block as the phi so that jump 6855// threading can do its work afterwards. 6867// Check if the compare with the default value is constant true or false. 6871if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6874// Check if the compare with the case values is distinct from the default 6876for (
auto ValuePair : Values) {
6879if (!CaseConst || CaseConst == DefaultConst ||
6880 (CaseConst != TrueConst && CaseConst != FalseConst))
6884// Check if the branch instruction dominates the phi node. It's a simple 6885// dominance check, but sufficient for our needs. 6886// Although this check is invariant in the calling loops, it's better to do it 6887// at this late stage. Practically we do it at most once for a switch. 6894if (DefaultConst == FalseConst) {
6895// The compare yields the same result. We can replace it. 6897 ++NumTableCmpReuses;
6899// The compare yields the same result, just inverted. We can replace it. 6900Value *InvertedTableCmp = BinaryOperator::CreateXor(
6901 RangeCmp, ConstantInt::get(RangeCmp->
getType(), 1),
"inverted.cmp",
6904 ++NumTableCmpReuses;
6908/// If the switch is only used to initialize one or more phi nodes in a common 6909/// successor block with different constant values, replace the switch with 6914assert(SI->getNumCases() > 1 &&
"Degenerate switch?");
6918// Only build lookup table when we have a target that supports it or the 6919// attribute is not set. 6924// FIXME: If the switch is too sparse for a lookup table, perhaps we could 6925// split off a dense part and build a lookup table for that. 6927// FIXME: This creates arrays of GEPs to constant strings, which means each 6928// GEP needs a runtime relocation in PIC code. We should just build one big 6929// string and lookup indices into that. 6931// Ignore switches with less than three cases. Lookup tables will not make 6932// them faster, so we don't analyze them. 6933if (SI->getNumCases() < 3)
6936// Figure out the corresponding result for each case value and phi node in the 6937// common destination, as well as the min and max case values. 6938assert(!SI->cases().empty());
6955 MinCaseVal = CaseVal;
6957 MaxCaseVal = CaseVal;
6959// Resulting value at phi nodes for this case value. 6962if (!
getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6966// Append the result from this case to the list for each phi. 6972 ResultLists[
PHI].push_back(std::make_pair(CaseVal,
Value));
6976// Keep track of the result types. 6978 ResultTypes[
PHI] = ResultLists[
PHI][0].second->getType();
6983// If the table has holes, we need a constant result for the default case 6984// or a bitmask that fits in a register. 6986bool HasDefaultResults =
6988 DefaultResultsList,
DL,
TTI);
6990for (
constauto &
I : DefaultResultsList) {
6993 DefaultResults[
PHI] = Result;
6997 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes,
DL,
TTI);
6999if (UseSwitchConditionAsTableIndex)
7005// If the default destination is unreachable, or if the lookup table covers 7006// all values of the conditional variable, branch directly to the lookup table 7007// BB. Otherwise, check that the condition is within the case range. 7008bool DefaultIsReachable = !SI->defaultDestUndefined();
7010bool TableHasHoles = (NumResults < TableSize);
7012// If the table has holes but the default destination doesn't produce any 7013// constant results, the lookup table entries corresponding to the holes will 7015bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7017// If the default destination doesn't produce a constant result but is still 7018// reachable, and the lookup table has holes, we need to use a mask to 7019// determine if the current index should load from the lookup table or jump 7020// to the default case. 7021// The mask is unnecessary if the table has holes but the default destination 7022// is unreachable, as in that case the holes must also be unreachable. 7023bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7025// As an extra penalty for the validity test we require more cases. 7026if (SI->getNumCases() < 4)
// FIXME: Find best threshold value (benchmark). 7028if (!
DL.fitsInLegalInteger(TableSize))
7035 std::vector<DominatorTree::UpdateType> Updates;
7037// Compute the maximum table size representable by the integer type we are 7041assert(MaxTableSize >= TableSize &&
7042"It is impossible for a switch to have more entries than the max " 7043"representable value of its input integer type's size.");
7045// Create the BB that does the lookups. 7048Mod.getContext(),
"switch.lookup", CommonDest->
getParent(), CommonDest);
7050// Compute the table index value. 7054if (UseSwitchConditionAsTableIndex) {
7055 TableIndexOffset = ConstantInt::get(MaxCaseVal->
getIntegerType(), 0);
7056 TableIndex = SI->getCondition();
7058 TableIndexOffset = MinCaseVal;
7059// If the default is unreachable, all case values are s>= MinCaseVal. Then 7060// we can try to attach nsw. 7062if (!DefaultIsReachable) {
7067 TableIndex = Builder.
CreateSub(SI->getCondition(), TableIndexOffset,
7068"switch.tableidx",
/*HasNUW =*/false,
7069/*HasNSW =*/!MayWrap);
7074// Grow the table to cover all possible index values to avoid the range check. 7075// It will use the default result to fill in the table hole later, so make 7077if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
7079// Grow the table shouldn't have any size impact by checking 7080// wouldFitInRegister. 7081// TODO: Consider growing the table also when it doesn't fit in a register 7082// if no optsize is specified. 7085return SwitchLookupTable::wouldFitInRegister(
7086DL, UpperBound, KV.second
/* ResultType */);
7088// There may be some case index larger than the UpperBound (unreachable 7089// case), so make sure the table size does not get smaller. 7090 TableSize = std::max(UpperBound, TableSize);
7091// The default branch is unreachable after we enlarge the lookup table. 7092// Adjust DefaultIsReachable to reuse code path. 7093 DefaultIsReachable =
false;
7097constbool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7098if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7101 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7102// Note: We call removeProdecessor later since we need to be able to get the 7103// PHI value for the default case in case we're using a bit mask. 7106 TableIndex, ConstantInt::get(MinCaseVal->
getType(), TableSize));
7108 Builder.
CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7110 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7113// Populate the BB that does the lookups. 7117// Before doing the lookup, we do the hole check. The LookupBB is therefore 7118// re-purposed to do the hole check, and we create a new LookupBB. 7120 MaskBB->
setName(
"switch.hole_check");
7124// Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid 7125// unnecessary illegal types. 7127APInt MaskInt(TableSizePowOf2, 0);
7128APInt One(TableSizePowOf2, 1);
7129// Build bitmask; fill in a 1 bit for every case. 7130const ResultListTy &ResultList = ResultLists[PHIs[0]];
7131for (
size_tI = 0, E = ResultList.size();
I != E; ++
I) {
7134 MaskInt |= One <<
Idx;
7136ConstantInt *TableMask = ConstantInt::get(
Mod.getContext(), MaskInt);
7138// Get the TableIndex'th bit of the bitmask. 7139// If this bit is 0 (meaning hole) jump to the default destination, 7140// else continue with table lookup. 7144Value *Shifted = Builder.
CreateLShr(TableMask, MaskIndex,
"switch.shifted");
7147 Builder.
CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7149 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7150 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7156if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7157// We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later, 7158// do not delete PHINodes here. 7159 SI->getDefaultDest()->removePredecessor(BB,
7160/*KeepOneInputPHIs=*/true);
7162 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7166const ResultListTy &ResultList = ResultLists[
PHI];
7168Type *ResultType = ResultList.begin()->second->getType();
7170// Use any value to fill the lookup table holes. 7174 SwitchLookupTable Table(
Mod, TableSize, TableIndexOffset, ResultList, DV,
7177Value *Result = Table.buildLookup(TableIndex, Builder);
7179// Do a small peephole optimization: re-use the switch table compare if 7181if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7183// Search for compare instructions which use the phi. 7184for (
auto *
User :
PHI->users()) {
7189PHI->addIncoming(Result, LookupBB);
7194 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7196// Remove the switch. 7198for (
unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7201if (Succ == SI->getDefaultDest())
7204if (DTU && RemovedSuccessors.
insert(Succ).second)
7205 Updates.push_back({DominatorTree::Delete, BB, Succ});
7207 SI->eraseFromParent();
7214 ++NumLookupTablesHoles;
7218/// Try to transform a switch that has "holes" in it to a contiguous sequence 7221/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be 7222/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. 7224/// This converts a sparse switch into a dense switch which allows better 7225/// lowering and could also allow transforming into a lookup table. 7229auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7230if (CondTy->getIntegerBitWidth() > 64 ||
7231 !
DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7233// Only bother with this optimization if there are more than 3 switch cases; 7234// SDAG will only bother creating jump tables for 4 or more cases. 7235if (SI->getNumCases() < 4)
7238// This transform is agnostic to the signedness of the input or case values. We 7239// can treat the case values as signed or unsigned. We can optimize more common 7240// cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values 7243for (
constauto &
C : SI->cases())
7244 Values.
push_back(
C.getCaseValue()->getValue().getSExtValue());
7247// If the switch is already dense, there's nothing useful to do here. 7251// First, transform the values such that they start at zero and ascend. 7252 int64_t
Base = Values[0];
7253for (
auto &V : Values)
7256// Now we have signed numbers that have been shifted so that, given enough 7257// precision, there are no negative values. Since the rest of the transform 7258// is bitwise only, we switch now to an unsigned representation. 7260// This transform can be done speculatively because it is so cheap - it 7261// results in a single rotate operation being inserted. 7263// countTrailingZeros(0) returns 64. As Values is guaranteed to have more than 7264// one element and LLVM disallows duplicate cases, Shift is guaranteed to be 7267for (
auto &V : Values)
7271for (
auto &V : Values)
7272 V = (int64_t)((
uint64_t)V >> Shift);
7275// Transform didn't create a dense switch. 7278// The obvious transform is to shift the switch condition right and emit a 7279// check that the condition actually cleanly divided by GCD, i.e. 7280// C & (1 << Shift - 1) == 0 7281// inserting a new CFG edge to handle the case where it didn't divide cleanly. 7283// A cheaper way of doing this is a simple ROTR(C, Shift). This performs the 7284// shift and puts the shifted-off bits in the uppermost bits. If any of these 7285// are nonzero then the switch condition will be very large and will hit the 7288auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7291 Builder.
CreateSub(SI->getCondition(), ConstantInt::get(Ty,
Base));
7293 Ty, Intrinsic::fshl,
7294 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7295 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7297for (
auto Case : SI->cases()) {
7298auto *Orig = Case.getCaseValue();
7299auto Sub = Orig->getValue() -
APInt(Ty->getBitWidth(),
Base,
true);
7300 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7305/// Tries to transform switch of powers of two to reduce switch range. 7306/// For example, switch like: 7307/// switch (C) { case 1: case 2: case 64: case 128: } 7308/// will be transformed to: 7309/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: } 7311/// This transformation allows better lowering and could allow transforming into 7316Value *Condition = SI->getCondition();
7318auto *CondTy = cast<IntegerType>(Condition->
getType());
7320if (CondTy->getIntegerBitWidth() > 64 ||
7321 !
DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7330// Inserting intrinsic is too expensive. 7333// Only bother with this optimization if there are more than 3 switch cases. 7334// SDAG will only bother creating jump tables for 4 or more cases. 7335if (SI->getNumCases() < 4)
7338// We perform this optimization only for switches with 7339// unreachable default case. 7340// This assumtion will save us from checking if `Condition` is a power of two. 7341if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7344// Check that switch cases are powers of two. 7346for (
constauto &Case : SI->cases()) {
7347uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7354// isSwichDense requires case values to be sorted. 7358// Transform is unable to generate dense switch. 7363// Replace each case with its trailing zeros number. 7364for (
auto &Case : SI->cases()) {
7365auto *OrigValue = Case.getCaseValue();
7366 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7367 OrigValue->getValue().countr_zero()));
7370// Replace condition with its trailing zeros number. 7374 SI->setCondition(ConditionTrailingZeros);
7379/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have 7380/// the same destination. 7383auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7384if (!Cmp || !Cmp->hasOneUse())
7390 Weights.
resize(4);
// Avoid checking HasWeights everywhere. 7392// Normalize to [us]cmp == Res ? Succ : OtherSucc. 7395uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7398if (SI->getNumCases() == 2) {
7399// Find which of 1, 0 or -1 is missing (handled by default dest). 7405 Succ = SI->getDefaultDest();
7406 SuccWeight = Weights[0];
7408for (
auto &Case : SI->cases()) {
7409 std::optional<int64_t> Val =
7413if (!Missing.erase(*Val))
7418 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7421assert(Missing.size() == 1 &&
"Should have one case left");
7422 Res = *Missing.begin();
7423 }
elseif (SI->getNumCases() == 3 && SI->defaultDestUndefined()) {
7424// Normalize so that Succ is taken once and OtherSucc twice. 7425 Unreachable = SI->getDefaultDest();
7427for (
auto &Case : SI->cases()) {
7428BasicBlock *NewSucc = Case.getCaseSuccessor();
7429uint32_t Weight = Weights[Case.getSuccessorIndex()];
7432 OtherSuccWeight += Weight;
7435 SuccWeight = Weight;
7436 }
elseif (Succ == NewSucc) {
7442for (
auto &Case : SI->cases()) {
7443 std::optional<int64_t> Val =
7445if (!Val || (Val != 1 && Val != 0 && Val != -1))
7447if (Case.getCaseSuccessor() == Succ) {
7456// Determine predicate for the missing case. 7460 Pred = ICmpInst::ICMP_UGT;
7463 Pred = ICmpInst::ICMP_EQ;
7466 Pred = ICmpInst::ICMP_ULT;
7472MDNode *NewWeights =
nullptr;
7481 SI->getMetadata(LLVMContext::MD_unpredictable));
7485 SI->eraseFromParent();
7486 Cmp->eraseFromParent();
7487if (DTU && Unreachable)
7488 DTU->
applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7492/// Checking whether two cases of SI are equal depends on the contents of the 7493/// BasicBlock and the incoming values of their successor PHINodes. 7494/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid 7495/// calling this function on each BasicBlock every time isEqual is called, 7496/// especially since the same BasicBlock may be passed as an argument multiple 7497/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock -> 7498/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking 7499/// of the incoming values. 7519"Only supporting unconditional branches for now");
7521"Expected unconditional branches to have one successor");
7522assert(Succ->
size() == 1 &&
"Expected just a single branch in the BB");
7524// Since we assume the BB is just a single BranchInst with a single 7525// successor, we hash as the BB and the incoming Values of its successor 7526// PHIs. Initially, we tried to just use the successor BB as the hash, but 7527// including the incoming PHI values leads to better performance. 7528// We also tried to build a map from BB -> Succs.IncomingValues ahead of 7529// time and passing it in SwitchSuccWrapper, but this slowed down the 7530// average compile time without having any impact on the worst case compile 7544if (
LHS == EKey ||
RHS == EKey ||
LHS == TKey ||
RHS == TKey)
7550// FIXME: we checked that the size of A and B are both 1 in 7551// simplifyDuplicateSwitchArms to make the Case list smaller to 7552// improve performance. If we decide to support BasicBlocks with more 7553// than just a single instruction, we need to check that A.size() == 7554// B.size() here, and we need to check more than just the BranchInsts 7557BranchInst *ABI = cast<BranchInst>(
A->getTerminator());
7558BranchInst *BBI = cast<BranchInst>(
B->getTerminator());
7560"Only supporting unconditional branches for now");
7564// Need to check that PHIs in successor have matching values 7567auto &PredIVs = (*
LHS->PhiPredIVs)[&Phi];
7568if (PredIVs[
A] != PredIVs[
B])
7577bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(
SwitchInst *SI,
7579// Build Cases. Skip BBs that are not candidates for simplification. Mark 7580// PHINodes which need to be processed into PhiPredIVs. We decide to process 7581// an entire PHI at once after the loop, opposed to calling 7582// getIncomingValueForBlock inside this loop, since each call to 7583// getIncomingValueForBlock is O(|Preds|). 7591for (
unsignedI = 0;
I <
SI->getNumSuccessors(); ++
I) {
7594// FIXME: Support more than just a single BranchInst. One way we could do 7595// this is by taking a hashing approach of all insts in BB. 7599// FIXME: This case needs some extra care because the terminators other than 7600// SI need to be updated. For now, consider only backedges to the SI. 7605// FIXME: Relax that the terminator is a BranchInst by checking for equality 7606// on other kinds of terminators. We decide to only support unconditional 7607// branches for now for compile time reasons. 7612if (Seen.
insert(BB).second) {
7613// Keep track of which PHIs we need as keys in PhiPredIVs below. 7617// Add the successor only if not previously visited. 7621 BBToSuccessorIndexes[BB].emplace_back(
I);
7624// Precompute a data structure to improve performance of isEqual for 7625// SwitchSuccWrapper. 7630for (
auto &
IV :
Phi->incoming_values())
7634// Build a set such that if the SwitchSuccWrapper exists in the set and 7635// another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper 7636// which is not in the set should be replaced with the one in the set. If the 7637// SwitchSuccWrapper is not in the set, then it should be added to the set so 7638// other SwitchSuccWrappers can check against it in the same manner. We use 7639// SwitchSuccWrapper instead of just BasicBlock because we'd like to pass 7640// around information to isEquality, getHashValue, and when doing the 7641// replacement with better performance. 7647bool MadeChange =
false;
7648for (
auto &SSW : Cases) {
7649// SSW is a candidate for simplification. If we find a duplicate BB, 7653// We know that SI's parent BB no longer dominates the old case successor 7654// since we are making it dead. 7656constauto &Successors = BBToSuccessorIndexes.
at(SSW.Dest);
7657for (
unsignedIdx : Successors)
7658SI->setSuccessor(
Idx, (*It)->Dest);
7672if (isValueEqualityComparison(SI)) {
7673// If we only have one predecessor, and if it is a branch on this value, 7674// see if that predecessor totally determines the outcome of this switch. 7676if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7677return requestResimplify();
7681if (simplifySwitchOnSelect(SI,
Select))
7682return requestResimplify();
7684// If the block only contains the switch, see if we can fold the block 7685// away into any preds. 7687if (foldValueComparisonIntoPredecessors(SI, Builder))
7688return requestResimplify();
7691// Try to transform the switch into an icmp and a branch. 7692// The conversion from switch to comparison may lose information on 7693// impossible switch values, so disable it early in the pipeline. 7694if (
Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7695return requestResimplify();
7697// Remove unreachable cases. 7699return requestResimplify();
7702return requestResimplify();
7705return requestResimplify();
7708return requestResimplify();
7710// The conversion from switch to lookup tables results in difficult-to-analyze 7711// code and makes pruning branches much harder. This is a problem if the 7712// switch expression itself can still be restricted as a result of inlining or 7713// CVP. Therefore, only apply this transformation during late stages of the 7714// optimisation pipeline. 7715if (
Options.ConvertSwitchToLookupTable &&
7717return requestResimplify();
7720return requestResimplify();
7723return requestResimplify();
7726 hoistCommonCodeFromSuccessors(SI, !
Options.HoistCommonInsts))
7727return requestResimplify();
7729if (simplifyDuplicateSwitchArms(SI, DTU))
7730return requestResimplify();
7739// Eliminate redundant destinations. 7746 RemovedSuccs.
insert(Dest);
7756 std::vector<DominatorTree::UpdateType> Updates;
7758for (
auto *RemovedSucc : RemovedSuccs)
7764// If the indirectbr has no successors, change it to unreachable. 7771// If the indirectbr has one successor, change it to a direct branch. 7778if (simplifyIndirectBrOnSelect(IBI, SI))
7779return requestResimplify();
7784/// Given an block with only a single landing pad and a unconditional branch 7785/// try to find another basic block which this one can be merged with. This 7786/// handles cases where we have multiple invokes with unique landing pads, but 7787/// a shared handler. 7789/// We specifically choose to not worry about merging non-empty blocks 7790/// here. That is a PRE/scheduling problem and is best solved elsewhere. In 7791/// practice, the optimizer produces empty landing pad blocks quite frequently 7792/// when dealing with exception dense code. (see: instcombine, gvn, if-else 7793/// sinking in this file) 7795/// This is primarily a code size optimization. We need to avoid performing 7796/// any transform which might inhibit optimization (such as our ability to 7797/// specialize a particular handler via tail commoning). We do this by not 7798/// merging any blocks which require us to introduce a phi. Since the same 7799/// values are flowing through both blocks, we don't lose any ability to 7800/// specialize. If anything, we make such specialization more likely. 7802/// TODO - This transformation could remove entries from a phi in the target 7803/// block when the inputs in the phi are the same for the two blocks being 7804/// merged. In some cases, this could result in removal of the PHI entirely. 7809// If there's a phi in the successor block, we'd likely have to introduce 7810// a phi into the merged landing pad block. 7811if (isa<PHINode>(*Succ->
begin()))
7821for (++
I; isa<DbgInfoIntrinsic>(
I); ++
I)
7827 std::vector<DominatorTree::UpdateType> Updates;
7829// We've found an identical block. Update our predecessors to take that 7830// path instead and make ourselves dead. 7834assert(
II->getNormalDest() != BB &&
II->getUnwindDest() == BB &&
7835"unexpected successor");
7836II->setUnwindDest(OtherPred);
7843// The debug info in OtherPred doesn't cover the merged control flow that 7844// used to go through BB. We need to delete it or update it. 7846if (isa<DbgInfoIntrinsic>(Inst))
7867returnBranch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7868 : simplifyCondBranch(
Branch, Builder);
7871bool SimplifyCFGOpt::simplifyUncondBranch(
BranchInst *BI,
7876// If the Terminator is the only non-phi instruction, simplify the block. 7877// If LoopHeader is provided, check if the block or its successor is a loop 7878// header. (This is for early invocations before loop simplify and 7879// vectorization to keep canonical loop forms for nested loops. These blocks 7880// can be eliminated when the pass is invoked later in the back-end.) 7881// Note that if BB has only one predecessor then we do not introduce new 7882// backedge, so we can eliminate BB. 7883bool NeedCanonicalLoop =
7892// If the only instruction in the block is a seteq/setne comparison against a 7893// constant, try to simplify the block. 7894if (
ICmpInst *ICI = dyn_cast<ICmpInst>(
I))
7896for (++
I; isa<DbgInfoIntrinsic>(
I); ++
I)
7898if (
I->isTerminator() &&
7899 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7903// See if we can merge an empty landing pad block with another which is 7906for (++
I; isa<DbgInfoIntrinsic>(
I); ++
I)
7912// If this basic block is ONLY a compare and a branch, and if a predecessor 7913// branches to us and our successor, fold the comparison into the 7914// predecessor and use logical operations to update the incoming value 7915// for PHI nodes in common successor. 7919return requestResimplify();
7927if (!PPred || (PredPred && PredPred != PPred))
7934/// Fold the following pattern: 7936/// br i1 %cond1, label %bb1, label %bb2 7938/// br i1 %cond2, label %bb3, label %bb4 7940/// br i1 %cond2, label %bb4, label %bb3 7947/// %cond = xor i1 %cond1, %cond2 7948/// br i1 %cond, label %bb4, label %bb3 7953/// NOTE: %cond2 always dominates the terminator of bb0. 7964if (!SuccBI || !SuccBI->isConditional())
7968return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7969 !isa<PHINode>(Succ1->
front()) && !isa<PHINode>(Succ2->
front());
7972if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7998bool HasWeight =
false;
8003 BBTWeight = BBFWeight = 1;
8008 BB1TWeight = BB1FWeight = 1;
8013 BB2TWeight = BB2FWeight = 1;
8015uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8016 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8027"Tautological conditional branch should have been eliminated already.");
8030if (!
Options.SimplifyCondBranch ||
8034// Conditional branch 8035if (isValueEqualityComparison(BI)) {
8036// If we only have one predecessor, and if it is a branch on this value, 8037// see if that predecessor totally determines the outcome of this 8040if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8041return requestResimplify();
8043// This block must be empty, except for the setcond inst, if it exists. 8044// Ignore dbg and pseudo intrinsics. 8047if (foldValueComparisonIntoPredecessors(BI, Builder))
8048return requestResimplify();
8051if (&*
I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8052return requestResimplify();
8056// Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. 8057if (simplifyBranchOnICmpChain(BI, Builder,
DL))
8060// If this basic block has dominating predecessor blocks and the dominating 8061// blocks' conditions imply BI's condition, we know the direction of BI. 8064// Turn this into a branch on constant. 8070return requestResimplify();
8073// If this basic block is ONLY a compare and a branch, and if a predecessor 8074// branches to us and one of our successors, fold the comparison into the 8075// predecessor and use logical operations to pick the right destination. 8079return requestResimplify();
8081// We have a conditional branch to two blocks that are only reachable 8082// from BI. We know that the condbr dominates the two blocks, so see if 8083// there is any identical code in the "then" and "else" blocks. If so, we 8084// can hoist it up to the branching block. 8088 hoistCommonCodeFromSuccessors(BI, !
Options.HoistCommonInsts))
8089return requestResimplify();
8092Options.HoistLoadsStoresWithCondFaulting &&
8095auto CanSpeculateConditionalLoadsStores = [&]() {
8098if (
I.isTerminator()) {
8099if (
I.getNumSuccessors() > 1)
8103 SpeculatedConditionalLoadsStores.
size() ==
8107 SpeculatedConditionalLoadsStores.
push_back(&
I);
8110return !SpeculatedConditionalLoadsStores.
empty();
8113if (CanSpeculateConditionalLoadsStores()) {
8116return requestResimplify();
8120// If Successor #1 has multiple preds, we may be able to conditionally 8121// execute Successor #0 if it branches to Successor #1. 8126return requestResimplify();
8129// If Successor #0 has multiple preds, we may be able to conditionally 8130// execute Successor #1 if it branches to Successor #0. 8135return requestResimplify();
8138// If this is a branch on something for which we know the constant value in 8139// predecessors (e.g. a phi node in the current block), thread control 8140// through this block. 8142return requestResimplify();
8144// Scan predecessor blocks for conditional branches. 8147if (PBI != BI && PBI->isConditional())
8149return requestResimplify();
8151// Look for diamond patterns. 8154if (
BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8155if (PBI != BI && PBI->isConditional())
8157return requestResimplify();
8159// Look for nested conditional branches. 8161return requestResimplify();
8166/// Check if passing a value to an instruction will cause undefined behavior. 8175if (
C->isNullValue() || isa<UndefValue>(
C)) {
8176// Only look at the first use we can handle, avoid hurting compile time with 8179 auto *Use = cast<Instruction>(U);
8180// Change this list when we want to add new instructions. 8181 switch (Use->getOpcode()) {
8184 case Instruction::GetElementPtr:
8185 case Instruction::Ret:
8186 case Instruction::BitCast:
8187 case Instruction::Load:
8188 case Instruction::Store:
8189 case Instruction::Call:
8190 case Instruction::CallBr:
8191 case Instruction::Invoke:
8192 case Instruction::UDiv:
8193 case Instruction::URem:
8194// Note: signed div/rem of INT_MIN / -1 is also immediate UB, not 8195// implemented to avoid code complexity as it is unclear how useful such 8197 case Instruction::SDiv:
8198 case Instruction::SRem:
8202if (FindUse ==
I->user_end())
8204auto *
Use = cast<Instruction>(*FindUse);
8205// Bail out if Use is not in the same BB as I or Use == I or Use comes 8206// before I in the block. The latter two can be the case if Use is a 8208if (
Use->getParent() !=
I->getParent() ||
Use ==
I ||
Use->comesBefore(
I))
8211// Now make sure that there are no instructions in between that can alter 8212// control flow (eg. calls) 8220// Look through GEPs. A load from a GEP derived from NULL is still undefined 8222if (
GEP->getPointerOperand() ==
I) {
8223// The current base address is null, there are four cases to consider: 8224// getelementptr (TY, null, 0) -> null 8225// getelementptr (TY, null, not zero) -> may be modified 8226// getelementptr inbounds (TY, null, 0) -> null 8227// getelementptr inbounds (TY, null, not zero) -> poison iff null is 8229if (!
GEP->hasAllZeroIndices() &&
8230 (!
GEP->isInBounds() ||
8232GEP->getPointerAddressSpace())))
8233 PtrValueMayBeModified =
true;
8237// Look through return. 8239bool HasNoUndefAttr =
8240 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8241// Return undefined to a noundef return value is undefined. 8242if (isa<UndefValue>(
C) && HasNoUndefAttr)
8244// Return null to a nonnull+noundef return value is undefined. 8245if (
C->isNullValue() && HasNoUndefAttr &&
8246 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8247return !PtrValueMayBeModified;
8251// Load from null is undefined. 8253if (!LI->isVolatile())
8255 LI->getPointerAddressSpace());
8257// Store to null is undefined. 8259if (!SI->isVolatile())
8261 SI->getPointerAddressSpace())) &&
8262 SI->getPointerOperand() ==
I;
8264// llvm.assume(false/undef) always triggers immediate UB. 8265if (
auto *Assume = dyn_cast<AssumeInst>(
Use)) {
8266// Ignore assume operand bundles. 8267if (
I == Assume->getArgOperand(0))
8271if (
auto *CB = dyn_cast<CallBase>(
Use)) {
8274// A call to null is undefined. 8275if (CB->getCalledOperand() ==
I)
8278if (
C->isNullValue()) {
8281unsigned ArgIdx = CB->getArgOperandNo(&Arg);
8282if (CB->isPassingUndefUB(ArgIdx) &&
8283 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
8284// Passing null to a nonnnull+noundef argument is undefined. 8285return !PtrValueMayBeModified;
8288 }
elseif (isa<UndefValue>(
C)) {
8289// Passing undef to a noundef argument is undefined. 8292unsigned ArgIdx = CB->getArgOperandNo(&Arg);
8293if (CB->isPassingUndefUB(ArgIdx)) {
8294// Passing undef to a noundef argument is undefined. 8300// Div/Rem by zero is immediate UB 8307/// If BB has an incoming value that will always trigger undefined behavior 8308/// (eg. null pointer dereference), remove the branch leading here. 8313for (
unsigned i = 0, e =
PHI.getNumIncomingValues(); i != e; ++i)
8320// Turn unconditional branches into unreachables and remove the dead 8321// destination from conditional branches. 8325// Preserve guarding condition in assume, because it might not be 8326// inferrable from any dominating condition. 8342 }
elseif (
SwitchInst *SI = dyn_cast<SwitchInst>(
T)) {
8343// Redirect all branches leading to UB into 8344// a newly created unreachable block. 8348// The new block contains only one instruction: Unreachable 8350for (
constauto &Case : SI->cases())
8351if (Case.getCaseSuccessor() == BB) {
8353 Case.setSuccessor(Unreachable);
8355if (SI->getDefaultDest() == BB) {
8357 SI->setDefaultDest(Unreachable);
8371bool SimplifyCFGOpt::simplifyOnce(
BasicBlock *BB) {
8377// Remove basic blocks that have no predecessors (except the entry block)... 8378// or that just have themself as a predecessor. These are unreachable. 8386// Check to see if we can constant propagate this terminator instruction 8389/*TLI=*/nullptr, DTU);
8391// Check for and eliminate duplicate PHI nodes in this block. 8394// Check for and remove branches that will always cause undefined behavior. 8396return requestResimplify();
8398// Merge basic blocks into their predecessor if there is only one distinct 8399// pred, and if there is only one distinct successor of the predecessor, and 8400// if there are no PHI nodes. 8407// sinkCommonCodeFromPredecessors() does not automatically CSE PHI's, 8408// so we may now how duplicate PHI's. 8409// Let's rerun EliminateDuplicatePHINodes() first, 8410// before foldTwoEntryPHINode() potentially converts them into select's, 8411// after which we'd need a whole EarlyCSE pass run to cleanup them. 8419// If there is a trivial two-entry PHI node in this basic block, and we can 8420// eliminate it, do so now. 8421if (
auto *PN = dyn_cast<PHINode>(BB->
begin()))
8424Options.SpeculateUnpredictables))
8431case Instruction::Br:
8432 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8434case Instruction::Resume:
8435 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8437case Instruction::CleanupRet:
8438 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8440case Instruction::Switch:
8441 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8443case Instruction::Unreachable:
8444 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8446case Instruction::IndirectBr:
8447 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8457// Repeated simplify BB as long as resimplification is requested. 8461// Perform one round of simplifcation. Resimplify flag will be set if 8462// another iteration is requested. 8463 Changed |= simplifyOnce(BB);
8464 }
while (Resimplify);
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
DenseMap< Block *, BlockRelaxAux > Blocks
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< bool > HoistLoadsStoresWithCondFaulting("simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads/stores if the target supports " "conditional faulting"))
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights, bool IsExpected)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert)
If the target supports conditional faulting, we look for the following pattern:
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
bool sgt(const APInt &RHS) const
Signed greater than comparison.
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
bool sle(const APInt &RHS) const
Signed less or equal comparison.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
bool slt(const APInt &RHS) const
Signed less than comparison.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
const Instruction & front() const
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
const Function * getParent() const
Return the enclosing method, or null if none.
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
The address of a basic block.
BasicBlock * getBasicBlock() const
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getPredicate() const
Return the predicate for this instruction.
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
static Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
const BasicBlock & getEntryBlock() const
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
bool hasPostDomTree() const
Returns true if it holds a PostDomTreeT.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
UnreachableInst * CreateUnreachable()
Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateNot(Value *V, const Twine &Name="")
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
A Module instance is used to store all the information related to an LLVM module.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isTokenTy() const
Return true if this is 'token'.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
unsigned getOperandNo() const
Return the operand # of this use in its User.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
static constexpr uint64_t MaximumAlignment
Value(Type *Ty, unsigned scid)
void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool operator<(int64_t V1, const APSInt &V2)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
bool succ_empty(const Instruction *I)
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
APInt operator*(APInt a, uint64_t RHS)
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
auto succ_size(const MachineBasicBlock *BB)
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
@ And
Bitwise or logical AND of integers.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
A MapVector that performs no allocations if smaller than a certain size.