1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 69#define DEBUG_TYPE "simple-loop-unswitch" 74STATISTIC(NumBranches,
"Number of branches unswitched");
75STATISTIC(NumSwitches,
"Number of switches unswitched");
76STATISTIC(NumSelects,
"Number of selects turned into branches for unswitching");
77STATISTIC(NumGuards,
"Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial,
"Number of unswitches that are trivial");
80 NumCostMultiplierSkipped,
81"Number of unswitch candidates that had their cost multiplier skipped");
83"Number of invariant conditions injected and unswitched");
87cl::desc(
"Forcibly enables non-trivial loop unswitching rather than " 88"following the configuration passed into the pass."));
92cl::desc(
"The cost threshold for unswitching a loop."));
96cl::desc(
"Enable unswitch cost multiplier that prohibits exponential " 97"explosion in nontrivial unswitch."));
100cl::desc(
"Toplevel siblings divisor for cost multiplier."));
103cl::desc(
"Number of unswitch candidates that are ignored when calculating " 107cl::desc(
"If enabled, simple loop unswitching will also consider " 108"llvm.experimental.guard intrinsics as unswitch candidates."));
110"simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
112cl::desc(
"If enabled, drop make.implicit metadata in unswitched implicit " 113"null checks to save time analyzing if we can keep it."));
116cl::desc(
"Max number of memory uses to explore during " 117"partial unswitching analysis"),
121cl::desc(
"If enabled, the freeze instruction will be added to condition " 122"of loop unswitch to prevent miscompilation."));
125"simple-loop-unswitch-inject-invariant-conditions",
cl::Hidden,
126cl::desc(
"Whether we should inject new invariants and unswitch them to " 127"eliminate some existing (non-invariant) conditions."),
131"simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
133"unswitch on them to eliminate branches that are " 134"not-taken 1/<this option> times or less."),
145 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
148structInjectedInvariant {
156 : Pred(Pred),
LHS(
LHS),
RHS(
RHS), InLoopSucc(InLoopSucc) {}
159structNonTrivialUnswitchCandidate {
162 std::optional<InstructionCost>
Cost;
163 std::optional<InjectedInvariant> PendingInjection;
164 NonTrivialUnswitchCandidate(
166 std::optional<InstructionCost>
Cost = std::nullopt,
167 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
168 : TI(TI), Invariants(Invariants),
Cost(
Cost),
169 PendingInjection(PendingInjection) {};
171bool hasPendingInjection()
const{
return PendingInjection.has_value(); }
173}
// end anonymous namespace. 175// Helper to skip (select x, true, false), which matches both a logical AND and 176// OR and can confuse code that tries to determine if \p Cond is either a 177// logical AND or OR but not both. 185/// Collect all of the loop invariant input values transitively used by the 186/// homogeneous instruction graph from a given root. 188/// This essentially walks from a root recursively through loop variant operands 189/// which have perform the same logical operation (AND or OR) and finds all 190/// inputs which are loop invariant. For some operations these can be 191/// re-associated and unswitched out of the loop entirely. 195assert(!L.isLoopInvariant(&Root) &&
196"Only need to walk the graph if root itself is not invariant.");
202// Build a worklist and recurse through operators collecting invariants. 209for (
Value *OpV :
I.operand_values()) {
210// Skip constants as unswitching isn't interesting for them. 211if (isa<Constant>(OpV))
214// Add it to our result if loop invariant. 215if (L.isLoopInvariant(OpV)) {
220// If not an instruction with the same opcode, nothing we can do. 225// Visit this operand. 226if (Visited.
insert(OpI).second)
230 }
while (!Worklist.
empty());
237assert(!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?");
239// Replace uses of LIC in the loop with the given constant. 240// We use make_early_inc_range as set invalidates the iterator. 242Instruction *UserI = dyn_cast<Instruction>(U.getUser());
244// Replace this use within the loop body. 245if (UserI && L.contains(UserI))
250/// Check that all the LCSSA PHI nodes in the loop exit block have trivial 251/// incoming values along this edge. 256auto *PN = dyn_cast<PHINode>(&
I);
258// No more PHIs to check. 261// If the incoming value for this edge isn't loop invariant the unswitch 263if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
269/// Copy a set of loop invariant values \p ToDuplicate and insert them at the 270/// end of \p BB and conditionally branch on the copied condition. We only 271/// branch on a single value. 279for (
Value *Inv : Invariants) {
288Direction ? &NormalSucc : &UnswitchedSucc);
291/// Copy a set of loop invariant values, and conditionally branch on them. 297for (
auto *Val :
reverse(ToDuplicate)) {
311auto *DefiningAccess = MemUse->getDefiningAccess();
312// Get the first defining access before the loop. 313while (L.contains(DefiningAccess->getBlock())) {
314// If the defining access is a MemoryPhi, get the incoming 315// value for the pre-header as defining access. 316if (
auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
318 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
320 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
331Direction ? &NormalSucc : &UnswitchedSucc);
334/// Rewrite the PHI nodes in an unswitched loop exit basic block. 336/// Requires that the loop exit and unswitched basic block are the same, and 337/// that the exiting block was a unique predecessor of that block. Rewrites the 338/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial 339/// PHI nodes from the old preheader that now contains the unswitched 345// When the loop exit is directly unswitched we just need to update the 346// incoming basic block. We loop to handle weird cases with repeated 347// incoming blocks, but expect to typically only have one operand here. 348for (
auto i : seq<int>(0, PN.getNumOperands())) {
349assert(PN.getIncomingBlock(i) == &OldExitingBB &&
350"Found incoming block different from unique predecessor!");
351 PN.setIncomingBlock(i, &OldPH);
356/// Rewrite the PHI nodes in the loop exit basic block and the split off 359/// Because the exit block remains an exit from the loop, this rewrites the 360/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI 361/// nodes into the unswitched basic block to select between the value in the 362/// old preheader and the loop exit. 368assert(&ExitBB != &UnswitchedBB &&
369"Must have different loop exit and unswitched blocks!");
373 PN.getName() +
".split");
374 NewPN->insertBefore(InsertPt);
376// Walk backwards over the old PHI node's inputs to minimize the cost of 377// removing each one. We have to do this weird loop manually so that we 378// create the same number of new incoming edges in the new PHI as we expect 379// each case-based edge to be included in the unswitched switch in some 381// FIXME: This is really, really gross. It would be much cleaner if LLVM 382// allowed us to create a single entry for a predecessor block without 383// having separate entries for each "edge" even though these edges are 384// required to produce identical results. 385for (
int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
386if (PN.getIncomingBlock(i) != &OldExitingBB)
391// No more edge from the old exiting block to the exit block. 392 PN.removeIncomingValue(i);
394 NewPN->addIncoming(
Incoming, &OldPH);
397// Now replace the old PHI with the new one and wire the old one in as an 398// input to the new one. 399 PN.replaceAllUsesWith(NewPN);
400 NewPN->addIncoming(&PN, &ExitBB);
404/// Hoist the current loop up to the innermost loop containing a remaining exit. 406/// Because we've removed an exit from the loop, we may have changed the set of 407/// loops reachable and need to move the current loop up the loop nest or even 408/// to an entirely separate nest. 412// If the loop is already at the top level, we can't hoist it anywhere. 413Loop *OldParentL = L.getParentLoop();
418 L.getExitBlocks(Exits);
419Loop *NewParentL =
nullptr;
420for (
auto *ExitBB : Exits)
422if (!NewParentL || NewParentL->
contains(ExitL))
425if (NewParentL == OldParentL)
428// The new parent loop (if different) should always contain the old one. 431"Can only hoist this loop up the nest!");
433// The preheader will need to move with the body of this loop. However, 434// because it isn't in this loop we also need to update the primary loop map. 436"Parent loop of this loop should contain this loop's preheader!");
439// Remove this loop from its old parent. 442// Add the loop either to the new parent or as a top-level loop. 448// Remove this loops blocks from the old parent and every other loop up the 449// nest until reaching the new parent. Also update all of these 450// no-longer-containing loops to reflect the nesting change. 451for (
Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
455 return BB == &Preheader || L.contains(BB);
458 OldContainingL->getBlocksSet().erase(&Preheader);
460 OldContainingL->getBlocksSet().erase(BB);
462// Because we just hoisted a loop out of this one, we have essentially 463// created new exit paths from it. That means we need to form LCSSA PHI 464// nodes for values used in the no-longer-nested loop. 467// We shouldn't need to form dedicated exits because the exit introduced 468// here is the (just split by unswitching) preheader. However, after trivial 469// unswitching it is possible to get new non-dedicated exits out of parent 470// loop so let's conservatively form dedicated exit blocks and figure out 471// if we can optimize later. 473/*PreserveLCSSA*/true);
477// Return the top-most loop containing ExitBB and having ExitBB as exiting block 478// or the loop containing ExitBB, if there is no parent loop containing ExitBB 483Loop *Current = TopMost;
492/// Unswitch a trivial branch if the condition is loop invariant. 494/// This routine should only be called when loop code leading to the branch has 495/// been validated as trivial (no side effects). This routine checks if the 496/// condition is invariant and one of the successors is a loop exit. This 497/// allows us to unswitch without duplicating the loop, making it trivial. 499/// If this routine fails to unswitch the branch it returns false. 501/// If the branch can be unswitched, this routine splits the preheader and 502/// hoists the branch above that split. Preserves loop simplified form 503/// (splitting the exit block as necessary). It simplifies the branch within 504/// the loop to an unconditional branch but doesn't remove it entirely. Further 505/// cleanup can be done with some simplifycfg like pass. 507/// If `SE` is not null, it will be updated based on the potential loop SCEVs 508/// invalidated by this. 515// The loop invariant values that we want to unswitch. 518// When true, we're fully unswitching the branch rather than just unswitching 519// some input conditions to the branch. 520bool FullUnswitch =
false;
523if (L.isLoopInvariant(
Cond)) {
527if (
auto *CondInst = dyn_cast<Instruction>(
Cond))
529if (Invariants.
empty()) {
535// Check that one of the branch's successors exits, and which one. 536bool ExitDirection =
true;
537int LoopExitSuccIdx = 0;
539if (L.contains(LoopExitBB)) {
540 ExitDirection =
false;
543if (L.contains(LoopExitBB)) {
555// When unswitching only part of the branch's condition, we need the exit 556// block to be reached directly from the partially unswitched input. This can 557// be done when the exit block is along the true edge and the branch condition 558// is a graph of `or` operations, or the exit block is along the false edge 559// and the condition is a graph of `and` operations. 564"non-full unswitch!\n");
570dbgs() <<
" unswitching trivial invariant conditions for: " << BI
572for (
Value *Invariant : Invariants) {
573dbgs() <<
" " << *Invariant <<
" == true";
574if (Invariant != Invariants.back())
580// If we have scalar evolutions, we need to invalidate them including this 581// loop, the loop containing the exit block and the topmost parent loop 582// exiting via LoopExitBB. 587// Forget the entire nest as this exits the entire nest. 595// Split the preheader, so that we know that there is a safe place to insert 596// the conditional branch. We will change the preheader to have a conditional 597// branch on LoopCond. 601// Now that we have a place to insert the conditional branch, create a place 602// to branch to: this is the exit block out of the loop that we are 603// unswitching. We need to split this if there are other loop predecessors. 604// Because the loop is in simplified form, *any* other predecessor is enough. 606if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
608"A branch's parent isn't a predecessor!");
609 UnswitchedBB = LoopExitBB;
612SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU,
"",
false);
618// Actually move the invariant uses into the unswitched position. If possible, 619// we do this by moving the instructions, but when doing partial unswitching 620// we do it by building a new merge of the values in the unswitched position. 623// If fully unswitching, we can use the existing branch instruction. 624// Splice it into the old PH to gate reaching the new preheader and re-point 629// Temporarily clone the terminator, to make MSSA update cheaper by 630// separating "insert edge" updates from "remove edge" ones. 633// Create a new unconditional branch that will continue the loop as a new 641// Only unswitching a subset of inputs to the condition, so we will need to 642// build a new branch that merges the invariant inputs. 645"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the " 649"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the" 652 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
656// Update the dominator tree with the added edge. 659// After the dominator tree was updated with the added edge, update MemorySSA 663 Updates.
push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
667// Finish updating dominator tree and memory ssa for full unswitch. 671// Remove the cloned branch instruction and create unconditional branch 675 Term->eraseFromParent();
684// Rewrite the relevant PHI nodes. 685if (UnswitchedBB == LoopExitBB)
689 *ParentBB, *OldPH, FullUnswitch);
691// The constant we can replace all of our invariants with inside the loop 692// body. If any of the invariants have a value other than this the loop won't 698// Since this is an i1 condition we can also trivially replace uses of it 699// within the loop with a constant. 700for (
Value *Invariant : Invariants)
703// If this was full unswitching, we may have changed the nesting relationship 704// for this loop so hoist it to its correct parent if needed. 717/// Unswitch a trivial switch if the condition is loop invariant. 719/// This routine should only be called when loop code leading to the switch has 720/// been validated as trivial (no side effects). This routine checks if the 721/// condition is invariant and that at least one of the successors is a loop 722/// exit. This allows us to unswitch without duplicating the loop, making it 725/// If this routine fails to unswitch the switch it returns false. 727/// If the switch can be unswitched, this routine splits the preheader and 728/// copies the switch above that split. If the default case is one of the 729/// exiting cases, it copies the non-exiting cases and points them at the new 730/// preheader. If the default case is not exiting, it copies the exiting cases 731/// and points the default at the preheader. It preserves loop simplified form 732/// (splitting the exit blocks as necessary). It simplifies the switch within 733/// the loop by removing now-dead cases. If the default case is one of those 734/// unswitched, it replaces its destination with a new basic block containing 735/// only unreachable. Such basic blocks, while technically loop exits, are not 736/// considered for unswitching so this is a stable transform and the same 737/// switch will not be revisited. If after unswitching there is only a single 738/// in-loop successor, the switch is further simplified to an unconditional 739/// branch. Still more cleanup can be done with some simplifycfg like pass. 741/// If `SE` is not null, it will be updated based on the potential loop SCEVs 742/// invalidated by this. 747Value *LoopCond = SI.getCondition();
749// If this isn't switching on an invariant condition, we can't unswitch it. 750if (!L.isLoopInvariant(LoopCond))
753auto *ParentBB = SI.getParent();
755// The same check must be used both for the default and the exit cases. We 756// should never leave edges from the switch instruction to a basic block that 757// we are unswitching, hence the condition used to determine the default case 758// needs to also be used to populate ExitCaseIndices, which is then used to 759// remove cases from the switch. 760auto IsTriviallyUnswitchableExitBlock = [&](
BasicBlock &BBToCheck) {
761// BBToCheck is not an exit block if it is inside loop L. 762if (L.contains(&BBToCheck))
764// BBToCheck is not trivial to unswitch if its phis aren't loop invariant. 767// We do not unswitch a block that only has an unreachable statement, as 768// it's possible this is a previously unswitched block. Only unswitch if 769// either the terminator is not unreachable, or, if it is, it's not the only 770// instruction in the block. 771auto *TI = BBToCheck.getTerminator();
772bool isUnreachable = isa<UnreachableInst>(TI);
773return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
777for (
auto Case : SI.cases())
778if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
779 ExitCaseIndices.
push_back(Case.getCaseIndex());
783if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
784 DefaultExitBB = SI.getDefaultDest();
785 }
elseif (ExitCaseIndices.
empty())
793// We may need to invalidate SCEVs for the outermost loop reached by any of 798// Check the loop containing this exit. 800if (!ExitL || ExitL->
contains(OuterL))
803for (
unsigned Index : ExitCaseIndices) {
804auto CaseI = SI.case_begin() + Index;
805// Compute the outer loop from this exit. 807if (!ExitL || ExitL->
contains(OuterL))
819// Clear out the default destination temporarily to allow accurate 820// predecessor lists to be examined below. 821 SI.setDefaultDest(
nullptr);
824// Store the exit cases into a separate data structure and remove them from 829 ExitCases.reserve(ExitCaseIndices.
size());
831// We walk the case indices backwards so that we remove the last case first 832// and don't disrupt the earlier indices. 833for (
unsigned Index :
reverse(ExitCaseIndices)) {
834auto CaseI = SI.case_begin() + Index;
835// Save the value of this case. 837 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
838// Delete the unswitched cases. 842// Check if after this all of the remaining cases point at the same 845if (SI.getNumCases() > 0 &&
847 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
849 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
851// If we're not unswitching the default, we need it to match any cases to 852// have a common successor or if we have no cases it is the common 854if (SI.getNumCases() == 0)
855 CommonSuccBB = SI.getDefaultDest();
856elseif (SI.getDefaultDest() != CommonSuccBB)
857 CommonSuccBB =
nullptr;
860// Split the preheader, so that we know that there is a safe place to insert 866// Now add the unswitched switch. This new switch instruction inherits the 867// debug location of the old switch, because it semantically replace the old 873// Rewrite the IR for the unswitched basic blocks. This requires two steps. 874// First, we split any exit blocks with remaining in-loop predecessors. Then 875// we update the PHIs in one of two ways depending on if there was a split. 876// We walk in reverse so that we split in the same order as the cases 877// appeared. This is purely for convenience of reading the resulting IR, but 878// it doesn't cost anything really. 881// Handle the default exit if necessary. 882// FIXME: It'd be great if we could merge this with the loop below but LLVM's 883// ranges aren't quite powerful enough yet. 886 UnswitchedExitBBs.
insert(DefaultExitBB);
893/*FullUnswitch*/true);
894 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
897// Note that we must use a reference in the for loop so that we update the 899for (
auto &ExitCase :
reverse(ExitCases)) {
900// Grab a reference to the exit block in the pair so that we can update it. 903// If this case is the last edge into the exit block, we can simply reuse it 904// as it will no longer be a loop exit. No mapping necessary. 907if (UnswitchedExitBBs.
insert(ExitBB).second)
912// Otherwise we need to split the exit block so that we retain an exit 913// block from the loop and a target for the unswitched condition. 914BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
916// If this is the first time we see this, do the split and remember it. 920/*FullUnswitch*/true);
922// Update the case pair to point to the split block. 923 std::get<1>(ExitCase) = SplitExitBB;
926// Now add the unswitched cases. We do this in reverse order as we built them 928for (
auto &ExitCase :
reverse(ExitCases)) {
930BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
932 NewSIW.
addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
935// If the default was unswitched, re-point it and add explicit cases for 941// We removed all the exit cases, so we just copy the cases to the 943for (
constauto &Case : SI.cases())
946 }
elseif (DefaultCaseWeight) {
947// We have to set branch weight of the default case. 949for (
constauto &Case : SI.cases()) {
952"case weight must be defined as default case weight is defined");
958// If we ended up with a common successor for every path through the switch 959// after unswitching, rewrite it to an unconditional branch to make it easy 960// to recognize. Otherwise we potentially have to recognize the default case 961// pointing at unreachable and other complexity. 964// We may have had multiple edges to this common successor block, so remove 965// them as predecessors. We skip the first one, either the default or the 967bool SkippedFirst = DefaultExitBB ==
nullptr;
968for (
auto Case : SI.cases()) {
970"Non-common successor!");
977/*KeepOneInputPHIs*/true);
979// Now nuke the switch and replace it with a direct branch. 983 }
elseif (DefaultExitBB) {
984assert(SI.getNumCases() > 0 &&
985"If we had no cases we'd have a common successor!");
986// Move the last case to the default successor. This is valid as if the 987// default got unswitched it cannot be reached. This has the advantage of 988// being simple and keeping the number of edges from this switch to 989// successors the same, and avoiding any PHI update complexity. 990auto LastCaseI = std::prev(SI.case_end());
992 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
998// Walk the unswitched exit blocks and the unswitched split blocks and update 999// the dominator tree based on the CFG edits. While we are walking unordered 1000// containers here, the API for applyUpdates takes an unordered list of 1001// updates and requires them to not contain duplicates. 1003for (
auto *UnswitchedExitBB : UnswitchedExitBBs) {
1007for (
auto SplitUnswitchedPair : SplitExitBBMap) {
1008 DTUpdates.
push_back({DT.
Delete, ParentBB, SplitUnswitchedPair.first});
1020assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
1022// We may have changed the nesting relationship for this loop so hoist it to 1023// its correct parent if needed. 1035/// This routine scans the loop to find a branch or switch which occurs before 1036/// any side effects occur. These can potentially be unswitched without 1037/// duplicating the loop. If a branch or switch is successfully unswitched the 1038/// scanning continues to see if subsequent branches or switches have become 1039/// trivial. Once all trivial candidates have been unswitched, this routine 1042/// The return value indicates whether anything was unswitched (and therefore 1045/// If `SE` is not null, it will be updated based on the potential loop SCEVs 1046/// invalidated by this. 1052// If loop header has only one reachable successor we should keep looking for 1053// trivial condition candidates in the successor as well. An alternative is 1054// to constant fold conditions and merge successors into loop header (then we 1055// only need to check header's terminator). The reason for not doing this in 1056// LoopUnswitch pass is that it could potentially break LoopPassManager's 1057// invariants. Folding dead branches could either eliminate the current loop 1058// or make other loops unreachable. LCSSA form might also not be preserved 1059// after deleting branches. The following code keeps traversing loop header's 1060// successors until it finds the trivial condition candidate (condition that 1061// is not a constant). Since unswitching generates branches with constant 1062// conditions, this scenario could be very common in practice. 1065 Visited.
insert(CurrentBB);
1067// Check if there are any side-effecting instructions (e.g. stores, calls, 1068// volatile loads) in the part of the loop that the code *would* execute 1069// without unswitching. 1070if (MSSAU)
// Possible early exit with MSSA 1072if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1080if (
auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1081// Don't bother trying to unswitch past a switch with a constant 1082// condition. This should be removed prior to running this pass by 1084if (isa<Constant>(SI->getCondition()))
1088// Couldn't unswitch this one so we're done. 1091// Mark that we managed to unswitch something. 1094// If unswitching turned the terminator into an unconditional branch then 1095// we can continue. The unswitching logic specifically works to fold any 1096// cases it can into an unconditional branch to make it easier to 1099if (!BI || BI->isConditional())
1102 CurrentBB = BI->getSuccessor(0);
1106auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1108// We do not understand other terminator instructions. 1111// Don't bother trying to unswitch past an unconditional branch or a branch 1112// with a constant value. These should be removed by simplifycfg prior to 1113// running this pass. 1114if (!BI->isConditional() ||
1118// Found a trivial condition candidate: non-foldable conditional branch. If 1119// we fail to unswitch this, we can't do anything else that is trivial. 1123// Mark that we managed to unswitch something. 1126// If we only unswitched some of the conditions feeding the branch, we won't 1127// have collapsed it to a single successor. 1129if (BI->isConditional())
1132// Follow the newly unconditional branch into its successor. 1133 CurrentBB = BI->getSuccessor(0);
1135// When continuing, if we exit the loop or reach a previous visited block, 1136// then we can not reach any trivial condition candidates (unfoldable 1137// branch instructions or switch instructions) and no unswitch can happen. 1138 }
while (L.contains(CurrentBB) && Visited.
insert(CurrentBB).second);
1143/// Build the cloned blocks for an unswitched copy of the given loop. 1145/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and 1146/// after the split block (`SplitBB`) that will be used to select between the 1147/// cloned and original loop. 1149/// This routine handles cloning all of the necessary loop blocks and exit 1150/// blocks including rewriting their instructions and the relevant PHI nodes. 1151/// Any loop blocks or exit blocks which are dominated by a different successor 1152/// than the one for this clone of the loop blocks can be trivially skipped. We 1153/// use the `DominatingSucc` map to determine whether a block satisfies that 1154/// property with a simple map lookup. 1156/// It also correctly creates the unconditional branch in the cloned 1157/// unswitched parent block to only point at the unswitched successor. 1159/// This does not handle most of the necessary updates to `LoopInfo`. Only exit 1160/// block splitting is correctly reflected in `LoopInfo`, essentially all of 1161/// the cloned blocks (and their loops) are left without full `LoopInfo` 1162/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned 1163/// blocks to them but doesn't create the cloned `DominatorTree` structure and 1164/// instead the caller must recompute an accurate DT. It *does* correctly 1165/// update the `AssumptionCache` provided in `AC`. 1176 NewBlocks.
reserve(L.getNumBlocks() + ExitBlocks.
size());
1178// We will need to clone a bunch of blocks, wrap up the clone operation in 1181// Clone the basic block and insert it before the new preheader. 1185// Record this block and the mapping. 1187 VMap[OldBB] = NewBB;
1192// We skip cloning blocks when they have a dominating succ that is not the 1193// succ we are cloning for. 1195auto It = DominatingSucc.
find(BB);
1196return It != DominatingSucc.
end() && It->second != UnswitchedSuccBB;
1199// First, clone the preheader. 1200auto *ClonedPH = CloneBlock(LoopPH);
1202// Then clone all the loop blocks, skipping the ones that aren't necessary. 1203for (
auto *LoopBB : L.blocks())
1204if (!SkipBlock(LoopBB))
1207// Split all the loop exit edges so that when we clone the exit blocks, if 1208// any of the exit blocks are *also* a preheader for some other loop, we 1209// don't create multiple predecessors entering the loop header. 1210for (
auto *ExitBB : ExitBlocks) {
1211if (SkipBlock(ExitBB))
1214// When we are going to clone an exit, we don't need to clone all the 1215// instructions in the exit block and we want to ensure we have an easy 1216// place to merge the CFG, so split the exit first. This is always safe to 1217// do because there cannot be any non-loop predecessors of a loop exit in 1218// loop simplified form. 1219auto *MergeBB =
SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1221// Rearrange the names to make it easier to write test cases by having the 1222// exit block carry the suffix rather than the merge block carrying the 1224 MergeBB->takeName(ExitBB);
1225 ExitBB->setName(
Twine(MergeBB->getName()) +
".split");
1227// Now clone the original exit block. 1228auto *ClonedExitBB = CloneBlock(ExitBB);
1229assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1230"Exit block should have been split to have one successor!");
1231assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1232"Cloned exit block has the wrong successor!");
1234// Remap any cloned instructions and create a merge phi node for them. 1238 std::prev(ClonedExitBB->end())))) {
1242// The only instructions in the exit block should be PHI nodes and 1243// potentially a landing pad. 1245 (isa<PHINode>(
I) || isa<LandingPadInst>(
I) || isa<CatchPadInst>(
I)) &&
1246"Bad instruction in exit block!");
1247// We should have a value map between the instruction and its clone. 1248assert(VMap.
lookup(&
I) == &ClonedI &&
"Mismatch in the value map!");
1250// Forget SCEVs based on exit phis in case SCEV looked through the phi. 1252if (
auto *PN = dyn_cast<PHINode>(&
I))
1259 MergePN->insertBefore(InsertPt);
1260 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1261I.replaceAllUsesWith(MergePN);
1262 MergePN->addIncoming(&
I, ExitBB);
1263 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1267// Rewrite the instructions in the cloned blocks to refer to the instructions 1268// in the cloned blocks. We have to do this as a second pass so that we have 1269// everything available. Also, we have inserted new instructions which may 1270// include assume intrinsics, so we update the assumption cache while 1272Module *M = ClonedPH->getParent()->getParent();
1273for (
auto *ClonedBB : NewBlocks)
1279if (
auto *
II = dyn_cast<AssumeInst>(&
I))
1283// Update any PHI nodes in the cloned successors of the skipped blocks to not 1284// have spurious incoming values. 1285for (
auto *LoopBB : L.blocks())
1286if (SkipBlock(LoopBB))
1288if (
auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB)))
1289for (
PHINode &PN : ClonedSuccBB->phis())
1290 PN.removeIncomingValue(LoopBB,
/*DeletePHIIfEmpty*/false);
1292// Remove the cloned parent as a predecessor of any successor we ended up 1293// cloning other than the unswitched one. 1294auto *ClonedParentBB = cast<BasicBlock>(VMap.
lookup(ParentBB));
1296if (SuccBB == UnswitchedSuccBB)
1299auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB));
1303 ClonedSuccBB->removePredecessor(ClonedParentBB,
1304/*KeepOneInputPHIs*/true);
1307// Replace the cloned branch with an unconditional branch to the cloned 1308// unswitched successor. 1309auto *ClonedSuccBB = cast<BasicBlock>(VMap.
lookup(UnswitchedSuccBB));
1310Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1311// Trivial Simplification. If Terminator is a conditional branch and 1312// condition becomes dead - erase it. 1313Value *ClonedConditionToErase =
nullptr;
1314if (
auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1315 ClonedConditionToErase = BI->getCondition();
1316elseif (
auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1317 ClonedConditionToErase = SI->getCondition();
1323if (ClonedConditionToErase)
1327// If there are duplicate entries in the PHI nodes because of multiple edges 1328// to the unswitched successor, we need to nuke all but one as we replaced it 1329// with a direct branch. 1330for (
PHINode &PN : ClonedSuccBB->phis()) {
1332// Loop over the incoming operands backwards so we can easily delete as we 1333// go without invalidating the index. 1334for (
int i = PN.getNumOperands() - 1; i >= 0; --i) {
1335if (PN.getIncomingBlock(i) != ClonedParentBB)
1341 PN.removeIncomingValue(i,
/*DeletePHIIfEmpty*/false);
1345// Record the domtree updates for the new blocks. 1347for (
auto *ClonedBB : NewBlocks) {
1349if (SuccSet.
insert(SuccBB).second)
1350 DTUpdates.
push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1357/// Recursively clone the specified loop and all of its children. 1359/// The target parent loop for the clone should be provided, or can be null if 1360/// the clone is a top-level loop. While cloning, all the blocks are mapped 1361/// with the provided value map. The entire original loop must be present in 1362/// the value map. The cloned loop is returned. 1365auto AddClonedBlocksToLoop = [&](
Loop &OrigL,
Loop &ClonedL) {
1366assert(ClonedL.getBlocks().empty() &&
"Must start with an empty loop!");
1368for (
auto *BB : OrigL.
blocks()) {
1369auto *ClonedBB = cast<BasicBlock>(VMap.
lookup(BB));
1370 ClonedL.addBlockEntry(ClonedBB);
1376// We specially handle the first loop because it may get cloned into 1377// a different parent and because we most commonly are cloning leaf loops. 1383 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1388// If we have a nest, we can quickly clone the entire loop nest using an 1389// iterative approach because it is a tree. We keep the cloned parent in the 1390// data structure to avoid repeatedly querying through a map to find it. 1392// Build up the loops to clone in reverse order as we'll clone them from the 1395 LoopsToClone.
push_back({ClonedRootL, ChildL});
1397Loop *ClonedParentL, *L;
1398 std::tie(ClonedParentL, L) = LoopsToClone.
pop_back_val();
1401 AddClonedBlocksToLoop(*L, *ClonedL);
1403 LoopsToClone.
push_back({ClonedL, ChildL});
1404 }
while (!LoopsToClone.
empty());
1409/// Build the cloned loops of an original loop from unswitching. 1411/// Because unswitching simplifies the CFG of the loop, this isn't a trivial 1412/// operation. We need to re-verify that there even is a loop (as the backedge 1413/// may not have been cloned), and even if there are remaining backedges the 1414/// backedge set may be different. However, we know that each child loop is 1415/// undisturbed, we only need to find where to place each child loop within 1416/// either any parent loop or within a cloned version of the original loop. 1418/// Because child loops may end up cloned outside of any cloned version of the 1419/// original loop, multiple cloned sibling loops may be created. All of them 1420/// are returned so that the newly introduced loop nest roots can be 1425Loop *ClonedL =
nullptr;
1430auto *ClonedPH = cast<BasicBlock>(VMap.
lookup(OrigPH));
1431auto *ClonedHeader = cast<BasicBlock>(VMap.
lookup(OrigHeader));
1433// We need to know the loops of the cloned exit blocks to even compute the 1434// accurate parent loop. If we only clone exits to some parent of the 1435// original parent, we want to clone into that outer loop. We also keep track 1436// of the loops that our cloned exit blocks participate in. 1437Loop *ParentL =
nullptr;
1441for (
auto *ExitBB : ExitBlocks)
1442if (
auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.
lookup(ExitBB)))
1444 ExitLoopMap[ClonedExitBB] = ExitL;
1445 ClonedExitsInLoops.
push_back(ClonedExitBB);
1446if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1451"The computed parent loop should always contain (or be) the parent of " 1452"the original loop.");
1454// We build the set of blocks dominated by the cloned header from the set of 1455// cloned blocks out of the original loop. While not all of these will 1456// necessarily be in the cloned loop, it is enough to establish that they 1457// aren't in unreachable cycles, etc. 1459for (
auto *BB : OrigL.
blocks())
1460if (
auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB)))
1461 ClonedLoopBlocks.
insert(ClonedBB);
1463// Rebuild the set of blocks that will end up in the cloned loop. We may have 1464// skipped cloning some region of this loop which can in turn skip some of 1465// the backedges so we have to rebuild the blocks in the loop based on the 1466// backedges that remain after cloning. 1470// The only possible non-loop header predecessor is the preheader because 1471// we know we cloned the loop in simplified form. 1472if (Pred == ClonedPH)
1475// Because the loop was in simplified form, the only non-loop predecessor 1476// should be the preheader. 1477assert(ClonedLoopBlocks.
count(Pred) &&
"Found a predecessor of the loop " 1478"header other than the preheader " 1479"that is not part of the loop!");
1481// Insert this block into the loop set and on the first visit (and if it 1482// isn't the header we're currently walking) put it into the worklist to 1484if (BlocksInClonedLoop.
insert(Pred).second && Pred != ClonedHeader)
1488// If we had any backedges then there *is* a cloned loop. Put the header into 1489// the loop set and then walk the worklist backwards to find all the blocks 1490// that remain within the loop after cloning. 1491if (!BlocksInClonedLoop.
empty()) {
1492 BlocksInClonedLoop.
insert(ClonedHeader);
1494while (!Worklist.
empty()) {
1497"Didn't put block into the loop set!");
1499// Insert any predecessors that are in the possible set into the cloned 1500// set, and if the insert is successful, add them to the worklist. Note 1501// that we filter on the blocks that are definitely reachable via the 1502// backedge to the loop header so we may prune out dead code within the 1505if (ClonedLoopBlocks.
count(Pred) &&
1506 BlocksInClonedLoop.
insert(Pred).second)
1520// We don't want to just add the cloned loop blocks based on how we 1521// discovered them. The original order of blocks was carefully built in 1522// a way that doesn't rely on predecessor ordering. Rather than re-invent 1523// that logic, we just re-walk the original blocks (and those of the child 1524// loops) and filter them as we add them into the cloned loop. 1525for (
auto *BB : OrigL.
blocks()) {
1526auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB));
1527if (!ClonedBB || !BlocksInClonedLoop.
count(ClonedBB))
1530// Directly add the blocks that are only in this loop. 1536// We want to manually add it to this loop and parents. 1537// Registering it with LoopInfo will happen when we clone the top 1538// loop for this block. 1539for (
Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1540 PL->addBlockEntry(ClonedBB);
1543// Now add each child loop whose header remains within the cloned loop. All 1544// of the blocks within the loop must satisfy the same constraints as the 1545// header so once we pass the header checks we can just clone the entire 1547for (
Loop *ChildL : OrigL) {
1548auto *ClonedChildHeader =
1549 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1550if (!ClonedChildHeader || !BlocksInClonedLoop.
count(ClonedChildHeader))
1554// We should never have a cloned child loop header but fail to have 1555// all of the blocks for that child loop. 1556for (
auto *ChildLoopBB : ChildL->blocks())
1558 cast<BasicBlock>(VMap.
lookup(ChildLoopBB))) &&
1559"Child cloned loop has a header within the cloned outer " 1560"loop but not all of its blocks!");
1567// Now that we've handled all the components of the original loop that were 1568// cloned into a new loop, we still need to handle anything from the original 1569// loop that wasn't in a cloned loop. 1571// Figure out what blocks are left to place within any loop nest containing 1572// the unswitched loop. If we never formed a loop, the cloned PH is one of 1575if (BlocksInClonedLoop.
empty())
1576 UnloopedBlockSet.
insert(ClonedPH);
1577for (
auto *ClonedBB : ClonedLoopBlocks)
1578if (!BlocksInClonedLoop.
count(ClonedBB))
1579 UnloopedBlockSet.
insert(ClonedBB);
1581// Copy the cloned exits and sort them in ascending loop depth, we'll work 1582// backwards across these to process them inside out. The order shouldn't 1583// matter as we're just trying to build up the map from inside-out; we use 1584// the map in a more stably ordered way below. 1585auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1587return ExitLoopMap.
lookup(
LHS)->getLoopDepth() <
1588 ExitLoopMap.
lookup(
RHS)->getLoopDepth();
1591// Populate the existing ExitLoopMap with everything reachable from each 1592// exit, starting from the inner most exit. 1593while (!UnloopedBlockSet.
empty() && !OrderedClonedExitsInLoops.empty()) {
1594assert(Worklist.
empty() &&
"Didn't clear worklist!");
1596BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1599// Walk the CFG back until we hit the cloned PH adding everything reachable 1600// and in the unlooped set to this exit block's loop. 1604// We can stop recursing at the cloned preheader (if we get there). 1609// If this pred has already been moved to our set or is part of some 1610// (inner) loop, no update needed. 1611if (!UnloopedBlockSet.
erase(PredBB)) {
1613 (BlocksInClonedLoop.
count(PredBB) || ExitLoopMap.
count(PredBB)) &&
1614"Predecessor not mapped to a loop!");
1618// We just insert into the loop set here. We'll add these blocks to the 1619// exit loop after we build up the set in an order that doesn't rely on 1620// predecessor order (which in turn relies on use list order). 1621bool Inserted = ExitLoopMap.
insert({PredBB, ExitL}).second;
1623assert(Inserted &&
"Should only visit an unlooped block once!");
1625// And recurse through to its predecessors. 1628 }
while (!Worklist.
empty());
1631// Now that the ExitLoopMap gives as mapping for all the non-looping cloned 1632// blocks to their outer loops, walk the cloned blocks and the cloned exits 1633// in their original order adding them to the correct loop. 1635// We need a stable insertion order. We use the order of the original loop 1636// order and map into the correct parent loop. 1637for (
auto *BB : llvm::concat<BasicBlock *const>(
1638ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1640 OuterL->addBasicBlockToLoop(BB, LI);
1643for (
auto &BBAndL : ExitLoopMap) {
1644auto *BB = BBAndL.first;
1645auto *OuterL = BBAndL.second;
1647"Failed to put all blocks into outer loops!");
1651// Now that all the blocks are placed into the correct containing loop in the 1652// absence of child loops, find all the potentially cloned child loops and 1653// clone them into whatever outer loop we placed their header into. 1654for (
Loop *ChildL : OrigL) {
1655auto *ClonedChildHeader =
1656 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1657if (!ClonedChildHeader || BlocksInClonedLoop.
count(ClonedChildHeader))
1661for (
auto *ChildLoopBB : ChildL->blocks())
1663"Cloned a child loop header but not all of that loops blocks!");
1667 *ChildL, ExitLoopMap.
lookup(ClonedChildHeader), VMap, LI));
1673ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1675// Find all the dead clones, and remove them from their successors. 1677for (
BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1678for (
constauto &VMap : VMaps)
1679if (
BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1682 SuccBB->removePredecessor(ClonedBB);
1686// Remove all MemorySSA in the dead blocks 1693// Drop any remaining references to break cycles. 1695 BB->dropAllReferences();
1696// Erase them from the IR. 1698 BB->eraseFromParent();
1707// Find all the dead blocks tied to this loop, and remove them from their 1711// Start with loop/exit blocks and get a transitive closure of reachable dead 1715 DeathCandidates.
append(L.blocks().begin(), L.blocks().end());
1716while (!DeathCandidates.
empty()) {
1720 SuccBB->removePredecessor(BB);
1727// Remove all MemorySSA in the dead blocks 1731// Filter out the dead blocks from the exit blocks list so that it can be 1732// used in the caller. 1736// Walk from this loop up through its parents removing all of the dead blocks. 1737for (
Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1738for (
auto *BB : DeadBlockSet)
1739 ParentL->getBlocksSet().erase(BB);
1741 [&](
BasicBlock *BB) { return DeadBlockSet.count(BB); });
1744// Now delete the dead child loops. This raw delete will clear them 1747 if (!DeadBlockSet.count(ChildL->getHeader()))
1750 assert(llvm::all_of(ChildL->blocks(),
1751 [&](BasicBlock *ChildBB) {
1752 return DeadBlockSet.count(ChildBB);
1754"If the child loop header is dead all blocks in the child loop must " 1763// Remove the loop mappings for the dead blocks and drop all the references 1764// from these blocks to others to handle cyclic references as we start 1765// deleting the blocks themselves. 1766for (
auto *BB : DeadBlockSet) {
1767// Check that the dominator tree has already been updated. 1768assert(!DT.
getNode(BB) &&
"Should already have cleared domtree!");
1769 LI.changeLoopFor(BB,
nullptr);
1770// Drop all uses of the instructions to make sure we won't have dangling 1771// uses in other blocks. 1775 BB->dropAllReferences();
1778// Actually delete the blocks now that they've been fully unhooked from the 1780for (
auto *BB : DeadBlockSet)
1781 BB->eraseFromParent();
1784/// Recompute the set of blocks in a loop after unswitching. 1786/// This walks from the original headers predecessors to rebuild the loop. We 1787/// take advantage of the fact that new blocks can't have been added, and so we 1788/// filter by the original loop's blocks. This also handles potentially 1789/// unreachable code that we don't want to explore but might be found examining 1790/// the predecessors of the header. 1792/// If the original loop is no longer a loop, this will return an empty set. If 1793/// it remains a loop, all the blocks within it will be added to the set 1794/// (including those blocks in inner loops). 1799auto *PH = L.getLoopPreheader();
1800auto *Header = L.getHeader();
1802// A worklist to use while walking backwards from the header. 1805// First walk the predecessors of the header to find the backedges. This will 1806// form the basis of our walk. 1808// Skip the preheader. 1812// Because the loop was in simplified form, the only non-loop predecessor 1814assert(L.contains(Pred) &&
"Found a predecessor of the loop header other " 1815"than the preheader that is not part of the " 1818// Insert this block into the loop set and on the first visit and, if it 1819// isn't the header we're currently walking, put it into the worklist to 1821if (LoopBlockSet.
insert(Pred).second && Pred != Header)
1825// If no backedges were found, we're done. 1826if (LoopBlockSet.
empty())
1829// We found backedges, recurse through them to identify the loop blocks. 1830while (!Worklist.
empty()) {
1832assert(LoopBlockSet.
count(BB) &&
"Didn't put block into the loop set!");
1834// No need to walk past the header. 1838// Because we know the inner loop structure remains valid we can use the 1839// loop structure to jump immediately across the entire nested loop. 1840// Further, because it is in loop simplified form, we can directly jump 1841// to its preheader afterward. 1844assert(L.contains(InnerL) &&
1845"Should not reach a loop *outside* this loop!");
1846// The preheader is the only possible predecessor of the loop so 1847// insert it into the set and check whether it was already handled. 1848auto *InnerPH = InnerL->getLoopPreheader();
1849assert(L.contains(InnerPH) &&
"Cannot contain an inner loop block " 1850"but not contain the inner loop " 1852if (!LoopBlockSet.
insert(InnerPH).second)
1853// The only way to reach the preheader is through the loop body 1854// itself so if it has been visited the loop is already handled. 1857// Insert all of the blocks (other than those already present) into 1858// the loop set. We expect at least the block that led us to find the 1859// inner loop to be in the block set, but we may also have other loop 1860// blocks if they were already enqueued as predecessors of some other 1862for (
auto *InnerBB : InnerL->blocks()) {
1865"Block should already be in the set!");
1869 LoopBlockSet.
insert(InnerBB);
1872// Add the preheader to the worklist so we will continue past the 1878// Insert any predecessors that were in the original loop into the new 1879// set, and if the insert is successful, add them to the worklist. 1881if (L.contains(Pred) && LoopBlockSet.
insert(Pred).second)
1885assert(LoopBlockSet.
count(Header) &&
"Cannot fail to add the header!");
1887// We've found all the blocks participating in the loop, return our completed 1892/// Rebuild a loop after unswitching removes some subset of blocks and edges. 1894/// The removal may have removed some child loops entirely but cannot have 1895/// disturbed any remaining child loops. However, they may need to be hoisted 1896/// to the parent loop (or to be top-level loops). The original loop may be 1897/// completely removed. 1899/// The sibling loops resulting from this update are returned. If the original 1900/// loop remains a valid loop, it will be the first entry in this list with all 1901/// of the newly sibling loops following it. 1903/// Returns true if the loop remains a loop after unswitching, and false if it 1904/// is no longer a loop after unswitching (and should not continue to be 1910auto *PH = L.getLoopPreheader();
1912// Compute the actual parent loop from the exit blocks. Because we may have 1913// pruned some exits the loop may be different from the original parent. 1914Loop *ParentL =
nullptr;
1918for (
auto *ExitBB : ExitBlocks)
1922if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1926// Recompute the blocks participating in this loop. This may be empty if it 1927// is no longer a loop. 1930// If we still have a loop, we need to re-set the loop's parent as the exit 1931// block set changing may have moved it within the loop nest. Note that this 1932// can only happen when this loop has a parent as it can only hoist the loop 1934if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1935// Remove this loop's (original) blocks from all of the intervening loops. 1936for (
Loop *IL = L.getParentLoop(); IL != ParentL;
1938 IL->getBlocksSet().erase(PH);
1939for (
auto *BB : L.blocks())
1940 IL->getBlocksSet().erase(BB);
1942 return BB == PH || L.contains(BB);
1947 L.getParentLoop()->removeChildLoop(&L);
1954// Now we update all the blocks which are no longer within the loop. 1955auto &
Blocks = L.getBlocksVector();
1957 LoopBlockSet.empty()
1959 : std::stable_partition(
1961 [&](
BasicBlock *BB) { return LoopBlockSet.count(BB); });
1963// Before we erase the list of unlooped blocks, build a set of them. 1965if (LoopBlockSet.empty())
1966 UnloopedBlocks.
insert(PH);
1968// Now erase these blocks from the loop. 1970 L.getBlocksSet().erase(BB);
1973// Sort the exits in ascending loop depth, we'll work backwards across these 1974// to process them inside out. 1979// We'll build up a set for each exit loop. 1981Loop *PrevExitL = L.getParentLoop();
// The deepest possible exit loop. 1983auto RemoveUnloopedBlocksFromLoop =
1985for (
auto *BB : UnloopedBlocks)
1986 L.getBlocksSet().erase(BB);
1988 return UnloopedBlocks.count(BB);
1993while (!UnloopedBlocks.
empty() && !ExitsInLoops.
empty()) {
1994assert(Worklist.
empty() &&
"Didn't clear worklist!");
1995assert(NewExitLoopBlocks.empty() &&
"Didn't clear loop set!");
1997// Grab the next exit block, in decreasing loop depth order. 2000assert(ExitL.
contains(&L) &&
"Exit loop must contain the inner loop!");
2002// Erase all of the unlooped blocks from the loops between the previous 2003// exit loop and this exit loop. This works because the ExitInLoops list is 2004// sorted in increasing order of loop depth and thus we visit loops in 2005// decreasing order of loop depth. 2006for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->
getParentLoop())
2007 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2009// Walk the CFG back until we hit the cloned PH adding everything reachable 2010// and in the unlooped set to this exit block's loop. 2014// We can stop recursing at the cloned preheader (if we get there). 2019// If this pred has already been moved to our set or is part of some 2020// (inner) loop, no update needed. 2021if (!UnloopedBlocks.
erase(PredBB)) {
2022assert((NewExitLoopBlocks.count(PredBB) ||
2024"Predecessor not in a nested loop (or already visited)!");
2028// We just insert into the loop set here. We'll add these blocks to the 2029// exit loop after we build up the set in a deterministic order rather 2030// than the predecessor-influenced visit order. 2031bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2033assert(Inserted &&
"Should only visit an unlooped block once!");
2035// And recurse through to its predecessors. 2038 }
while (!Worklist.
empty());
2040// If blocks in this exit loop were directly part of the original loop (as 2041// opposed to a child loop) update the map to point to this exit loop. This 2042// just updates a map and so the fact that the order is unstable is fine. 2043for (
auto *BB : NewExitLoopBlocks)
2045if (BBL == &L || !L.contains(BBL))
2048// We will remove the remaining unlooped blocks from this loop in the next 2049// iteration or below. 2050 NewExitLoopBlocks.clear();
2053// Any remaining unlooped blocks are no longer part of any loop unless they 2054// are part of some child loop. 2056 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2057for (
auto *BB : UnloopedBlocks)
2059if (BBL == &L || !L.contains(BBL))
2062// Sink all the child loops whose headers are no longer in the loop set to 2063// the parent (or to be top level loops). We reach into the loop and directly 2064// update its subloop vector to make this batch update efficient. 2065auto &SubLoops = L.getSubLoopsVector();
2066auto SubLoopsSplitI =
2067 LoopBlockSet.empty()
2069 : std::stable_partition(
2070 SubLoops.begin(), SubLoops.end(), [&](
Loop *SubL) {
2071 return LoopBlockSet.count(SubL->getHeader());
2073for (
auto *HoistedL :
make_range(SubLoopsSplitI, SubLoops.end())) {
2075 HoistedL->setParentLoop(
nullptr);
2077// To compute the new parent of this hoisted loop we look at where we 2078// placed the preheader above. We can't lookup the header itself because we 2079// retained the mapping from the header to the hoisted loop. But the 2080// preheader and header should have the exact same new parent computed 2081// based on the set of exit blocks from the original loop as the preheader 2082// is a predecessor of the header and so reached in the reverse walk. And 2083// because the loops were all in simplified form the preheader of the 2084// hoisted loop can't be part of some *other* loop. 2085if (
auto *NewParentL = LI.
getLoopFor(HoistedL->getLoopPreheader()))
2086 NewParentL->addChildLoop(HoistedL);
2090 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2092// Actually delete the loop if nothing remained within it. 2094assert(SubLoops.empty() &&
2095"Failed to remove all subloops from the original loop!");
2096if (
Loop *ParentL = L.getParentLoop())
2100// markLoopAsDeleted for L should be triggered by the caller (it is 2101// typically done within postUnswitch). 2111/// Helper to visit a dominator subtree, invoking a callable on each node. 2113/// Returning false at any point will stop walking past that node of the tree. 2114template <
typename CallableT>
2126if (!Callable(
N->getBlock()))
2129// Accumulate the child nodes. 2132"Cannot visit a node twice when walking a tree!");
2135 }
while (!DomWorklist.
empty());
2139bool CurrentLoopValid,
bool PartiallyInvariant,
2141// If we did a non-trivial unswitch, we have added new (cloned) loops. 2142if (!NewLoops.
empty())
2143 U.addSiblingLoops(NewLoops);
2145// If the current loop remains valid, we should revisit it to catch any 2146// other unswitch opportunities. Otherwise, we need to mark it as deleted. 2147if (CurrentLoopValid) {
2148if (PartiallyInvariant) {
2149// Mark the new loop as partially unswitched, to avoid unswitching on 2150// the same condition again. 2151auto &Context = L.getHeader()->getContext();
2154MDString::get(Context,
"llvm.loop.unswitch.partial.disable"));
2156 Context, L.getLoopID(), {
"llvm.loop.unswitch.partial"},
2157 {DisableUnswitchMD});
2158 L.setLoopID(NewLoopID);
2159 }
elseif (InjectedCondition) {
2160// Do the same for injection of invariant conditions. 2161auto &Context = L.getHeader()->getContext();
2164MDString::get(Context,
"llvm.loop.unswitch.injection.disable"));
2166 Context, L.getLoopID(), {
"llvm.loop.unswitch.injection"},
2167 {DisableUnswitchMD});
2168 L.setLoopID(NewLoopID);
2170 U.revisitCurrentLoop();
2172 U.markLoopAsDeleted(L, LoopName);
2179LPMUpdater &LoopUpdater,
bool InsertFreeze,
bool InjectedCondition) {
2182SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2184// Save the current loop name in a variable so that we can report it even 2185// after it has been deleted. 2186 std::string LoopName(L.getName());
2188// We can only unswitch switches, conditional branches with an invariant 2189// condition, or combining invariant conditions with an instruction or 2190// partially invariant instructions. 2192"Can only unswitch switches and conditional branch!");
2196 !PartiallyInvariant);
2199"Cannot have other invariants with full unswitching!");
2202"Partial unswitching requires an instruction as the condition!");
2207// Constant and BBs tracking the cloned and continuing successor. When we are 2208// unswitching the entire condition, this can just be trivially chosen to 2209// unswitch towards `true`. However, when we are unswitching a set of 2210// invariants combined with `and` or `or` or partially invariant instructions, 2211// the combining operation determines the best direction to unswitch: we want 2212// to unswitch the direction that will collapse the branch. 2219 PartiallyInvariant) &&
2220"Only `or`, `and`, an `select`, partially invariant instructions " 2221"can combine invariants being unswitched.");
2232 BI ? BI->
getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2237for (
auto Case : SI->cases())
2238if (Case.getCaseSuccessor() != RetainedSuccBB)
2239 UnswitchedSuccBBs.
insert(Case.getCaseSuccessor());
2242"Should not unswitch the same successor we are retaining!");
2244// The branch should be in this exact loop. Any inner loop's invariant branch 2245// should be handled by unswitching that inner loop. The caller of this 2246// routine should filter out any candidates that remain (but were skipped for 2250// Compute the parent loop now before we start hacking on things. 2251Loop *ParentL = L.getParentLoop();
2252// Get blocks in RPO order for MSSA update, before changing the CFG. 2257// Compute the outer-most loop containing one of our exit blocks. This is the 2258// furthest up our loopnest which can be mutated, which we will use below to 2260Loop *OuterExitL = &L;
2262 L.getUniqueExitBlocks(ExitBlocks);
2263for (
auto *ExitBB : ExitBlocks) {
2264// ExitBB can be an exit block for several levels in the loop nest. Make 2265// sure we find the top most. 2267if (!NewOuterExitL) {
2268// We exited the entire nest with this block, so we're done. 2269 OuterExitL =
nullptr;
2272if (NewOuterExitL != OuterExitL && NewOuterExitL->
contains(OuterExitL))
2273 OuterExitL = NewOuterExitL;
2276// At this point, we're definitely going to unswitch something so invalidate 2277// any cached information in ScalarEvolution for the outer most loop 2278// containing an exit block and all nested loops. 2287// If the edge from this terminator to a successor dominates that successor, 2288// store a map from each block in its dominator subtree to it. This lets us 2289// tell when cloning for a particular successor if a block is dominated by 2290// some *other* successor with a single data structure. We use this to 2291// significantly reduce cloning. 2293for (
auto *SuccBB : llvm::concat<BasicBlock *const>(
ArrayRef(RetainedSuccBB),
2295if (SuccBB->getUniquePredecessor() ||
2297return PredBB == ParentBB || DT.
dominates(SuccBB, PredBB);
2300 DominatingSucc[BB] = SuccBB;
2304// Split the preheader, so that we know that there is a safe place to insert 2305// the conditional branch. We will change the preheader to have a conditional 2306// branch on LoopCond. The original preheader will become the split point 2307// between the unswitched versions, and we will have a new preheader for the 2312// Keep track of the dominator tree updates needed. 2315// Clone the loop for each unswitched successor. 2319for (
auto *SuccBB : UnswitchedSuccBBs) {
2322 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2323 DominatingSucc, *VMaps.
back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2326// Drop metadata if we may break its semantics by moving this instr into the 2328if (TI.
getMetadata(LLVMContext::MD_make_implicit)) {
2330// Do not spend time trying to understand if we can keep it, just drop it 2331// to save compile time. 2332 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2334// It is only legal to preserve make.implicit metadata if we are 2335// guaranteed no reach implicit null check after following this branch. 2339 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2343// The stitching of the branched code back together depends on whether we're 2344// doing full unswitching or not with the exception that we always want to 2345// nuke the initial terminator placed in the split block. 2346 SplitBB->getTerminator()->eraseFromParent();
2348// Keep a clone of the terminator for MSSA updates. 2350 NewTI->
insertInto(ParentBB, ParentBB->end());
2352// Splice the terminator from the original loop and rewrite its 2357// First wire up the moved terminator to the preheaders. 2364// We don't give any debug location to the new freeze, because the 2365// BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted 2370 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2372assert(SI &&
"Must either be a branch or switch!");
2374// Walk the cases and directly update their successors. 2375assert(SI->getDefaultDest() == RetainedSuccBB &&
2376"Not retaining default successor!");
2377 SI->setDefaultDest(LoopPH);
2378for (
constauto &Case : SI->cases())
2379if (Case.getCaseSuccessor() == RetainedSuccBB)
2380 Case.setSuccessor(LoopPH);
2382 Case.setSuccessor(ClonedPHs.
find(Case.getCaseSuccessor())->second);
2385 SI->setCondition(
newFreezeInst(SI->getCondition(),
2386 SI->getCondition()->getName() +
".fr",
2387 SI->getIterator()));
2389// We need to use the set to populate domtree updates as even when there 2390// are multiple cases pointing at the same successor we only want to 2391// remove and insert one edge in the domtree. 2394 {DominatorTree::Insert, SplitBB, ClonedPHs.
find(SuccBB)->second});
2401// Remove all but one edge to the retained block and all unswitched 2402// blocks. This is to avoid having duplicate entries in the cloned Phis, 2403// when we know we only keep a single edge for each case. 2408for (
auto &VMap : VMaps)
2410/*IgnoreIncomingWithNoClones=*/true);
2413// Remove all edges to unswitched blocks. 2418// Now unhook the successor relationship as we'll be replacing 2419// the terminator with a direct branch. This is much simpler for branches 2420// than switches so we handle those first. 2422// Remove the parent as a predecessor of the unswitched successor. 2424"Only one possible unswitched block for a branch!");
2427/*KeepOneInputPHIs*/true);
2428 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2430// Note that we actually want to remove the parent block as a predecessor 2431// of *every* case successor. The case successor is either unswitched, 2432// completely eliminating an edge from the parent to that successor, or it 2433// is a duplicate edge to the retained successor as the retained successor 2434// is always the default successor and as we'll replace this with a direct 2435// branch we no longer need the duplicate entries in the PHI nodes. 2438"Not retaining default successor!");
2439for (
constauto &Case : NewSI->
cases())
2440 Case.getCaseSuccessor()->removePredecessor(
2442/*KeepOneInputPHIs*/true);
2444// We need to use the set to populate domtree updates as even when there 2445// are multiple cases pointing at the same successor we only want to 2446// remove and insert one edge in the domtree. 2448 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, SuccBB});
2451// Create a new unconditional branch to the continuing block (as opposed to 2456// After MSSAU update, remove the cloned terminator instruction NewTI. 2459assert(BI &&
"Only branches have partial unswitching.");
2461"Only one possible unswitched block for a branch!");
2463// When doing a partial unswitch, we have to do a bit more work to build up 2464// the branch in the split block. 2465if (PartiallyInvariant)
2467 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH, L, MSSAU);
2470 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH,
2473 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2479// Perform MSSA cloning updates. 2480for (
auto &VMap : VMaps)
2482/*IgnoreIncomingWithNoClones=*/true);
2487// Apply the updates accumulated above to get an up-to-date dominator tree. 2490// Now that we have an accurate dominator tree, first delete the dead cloned 2491// blocks so that we can accurately build any cloned loops. It is important to 2492// not delete the blocks from the original loop yet because we still want to 2493// reference the original loop to understand the cloned loop's structure. 2496// Build the cloned loop structure itself. This may be substantially 2497// different from the original structure due to the simplified CFG. This also 2498// handles inserting all the cloned blocks into the correct loops. 2500for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2503// Now that our cloned loops have been built, we can update the original loop. 2504// First we delete the dead blocks from it and then we rebuild the loop 2505// structure taking these deletions into account. 2518// This transformation has a high risk of corrupting the dominator tree, and 2519// the below steps to rebuild loop structures will result in hard to debug 2520// errors in that case so verify that the dominator tree is sane first. 2521// FIXME: Remove this when the bugs stop showing up and rely on existing 2522// verification steps. 2523assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
2525if (BI && !PartiallyInvariant) {
2526// If we unswitched a branch which collapses the condition to a known 2527// constant we want to replace all the uses of the invariants within both 2528// the original and cloned blocks. We do this here so that we can use the 2529// now updated dominator tree to identify which side the users are on. 2531"Only one possible unswitched block for a branch!");
2534// When considering multiple partially-unswitched invariants 2535// we cant just go replace them with constants in both branches. 2537// For 'AND' we infer that true branch ("continue") means true 2538// for each invariant operand. 2539// For 'OR' we can infer that false branch ("continue") means false 2540// for each invariant operand. 2541// So it happens that for multiple-partial case we dont replace 2542// in the unswitched branch. 2543bool ReplaceUnswitched =
2544 FullUnswitch || (Invariants.
size() == 1) || PartiallyInvariant;
2552for (
Value *Invariant : Invariants) {
2553assert(!isa<Constant>(Invariant) &&
2554"Should not be replacing constant values!");
2555// Use make_early_inc_range here as set invalidates the iterator. 2557Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2561// Replace it with the 'continue' side if in the main loop body, and the 2562// unswitched if in the cloned blocks. 2564 U.set(ContinueReplacement);
2565elseif (ReplaceUnswitched &&
2567 U.set(UnswitchedReplacement);
2572// We can change which blocks are exit blocks of all the cloned sibling 2573// loops, the current loop, and any parent loops which shared exit blocks 2574// with the current loop. As a consequence, we need to re-form LCSSA for 2575// them. But we shouldn't need to re-form LCSSA for any child loops. 2576// FIXME: This could be made more efficient by tracking which exit blocks are 2577// new, and focusing on them, but that isn't likely to be necessary. 2579// In order to reasonably rebuild LCSSA we need to walk inside-out across the 2580// loop nest and update every loop that could have had its exits changed. We 2581// also need to cover any intervening loops. We add all of these loops to 2582// a list and sort them by loop depth to achieve this without updating 2583// unnecessary loops. 2584auto UpdateLoop = [&](
Loop &UpdateL) {
2586 UpdateL.verifyLoop();
2587for (
Loop *ChildL : UpdateL) {
2588 ChildL->verifyLoop();
2589assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2590"Perturbed a child loop's LCSSA form!");
2593// First build LCSSA for this loop so that we can preserve it when 2594// forming dedicated exits. We don't want to perturb some other loop's 2595// LCSSA while doing that CFG edit. 2598// For loops reached by this loop's original exit blocks we may 2599// introduced new, non-dedicated exits. At least try to re-form dedicated 2600// exits for these loops. This may fail if they couldn't have dedicated 2601// exits to start with. 2605// For non-child cloned loops and hoisted loops, we just need to update LCSSA 2606// and we can do it in any order as they don't nest relative to each other. 2608// Also check if any of the loops we have updated have become top-level loops 2609// as that will necessitate widening the outer loop scope. 2610for (
Loop *UpdatedL :
2611 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2612 UpdateLoop(*UpdatedL);
2613if (UpdatedL->isOutermost())
2614 OuterExitL =
nullptr;
2619 OuterExitL =
nullptr;
2622// If the original loop had exit blocks, walk up through the outer most loop 2623// of those exit blocks to update LCSSA and form updated dedicated exits. 2624if (OuterExitL != &L)
2625for (
Loop *OuterL = ParentL; OuterL != OuterExitL;
2627 UpdateLoop(*OuterL);
2630// Verify the entire loop structure to catch any incorrect updates before we 2631// progress in the pass pipeline. 2635// Now that we've unswitched something, make callbacks to report the changes. 2636// For that we need to merge together the updated loops and the cloned loops 2637// and check whether the original loop survived. 2639for (
Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2640if (UpdatedL->getParentLoop() == ParentL)
2642postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2643 InjectedCondition, SibLoops);
2654/// Recursively compute the cost of a dominator subtree based on the per-block 2655/// cost map provided. 2657/// The recursive computation is memozied into the provided DT-indexed cost map 2658/// to allow querying it for most nodes in the domtree without it becoming 2664// Don't accumulate cost (or recurse through) blocks not in our block cost 2665// map and thus not part of the duplication cost being considered. 2666auto BBCostIt = BBCostMap.
find(
N.getBlock());
2667if (BBCostIt == BBCostMap.
end())
2670// Lookup this node to see if we already computed its cost. 2671auto DTCostIt = DTCostMap.
find(&
N);
2672if (DTCostIt != DTCostMap.
end())
2673return DTCostIt->second;
2675// If not, we have to compute it. We can't use insert above and update 2676// because computing the cost may insert more things into the map. 2678N.begin(),
N.end(), BBCostIt->second,
2680 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2682bool Inserted = DTCostMap.
insert({&
N,
Cost}).second;
2684assert(Inserted &&
"Should not insert a node while visiting children!");
2688/// Turns a select instruction into implicit control flow branch, 2689/// making the following replacement: 2692/// --code before select-- 2693/// select %cond, %trueval, %falseval 2694/// --code after select-- 2699/// --code before select-- 2700/// br i1 %cond, label %then, label %tail 2706/// phi [ %trueval, %then ], [ %falseval, %head] 2709/// It also makes all relevant DT and LI updates, so that all structures are in 2710/// valid state after this transform. 2719 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2722 *TailBB = CondBr->getSuccessor(1);
2727PHINode::Create(SI->getType(), 2,
"unswitched.select", SI->getIterator());
2728 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2729 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2730 Phi->setDebugLoc(SI->getDebugLoc());
2731 SI->replaceAllUsesWith(Phi);
2732 SI->eraseFromParent();
2741/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch, 2742/// making the following replacement: 2744/// --code before guard-- 2745/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] 2746/// --code after guard-- 2750/// --code before guard-- 2751/// br i1 %cond, label %guarded, label %deopt 2754/// --code after guard-- 2757/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ] 2760/// It also makes all relevant DT and LI updates, so that all structures are in 2761/// valid state after this transform. 2775 GI->
getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2777// SplitBlockAndInsertIfThen inserts control flow that branches to 2778// DeoptBlockTerm if the condition is true. We want the opposite. 2782 GuardedBlock->
setName(
"guarded");
2805/// Cost multiplier is a way to limit potentially exponential behavior 2806/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch 2807/// candidates available. Also accounting for the number of "sibling" loops with 2808/// the idea to account for previous unswitches that already happened on this 2809/// cluster of loops. There was an attempt to keep this formula simple, 2810/// just enough to limit the worst case behavior. Even if it is not that simple 2811/// now it is still not an attempt to provide a detailed heuristic size 2814/// TODO: Make a proper accounting of "explosion" effect for all kinds of 2815/// unswitch candidates, making adequate predictions instead of wild guesses. 2816/// That requires knowing not just the number of "remaining" candidates but 2817/// also costs of unswitching for each of these candidates. 2823// Guards and other exiting conditions do not contribute to exponential 2824// explosion as soon as they dominate the latch (otherwise there might be 2825// another path to the latch remaining that does not allow to eliminate the 2826// loop copy on unswitch). 2833return L.contains(SuccBB);
2835 NumCostMultiplierSkipped++;
2839auto *ParentL = L.getParentLoop();
2840int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2841 : std::distance(LI.
begin(), LI.
end()));
2842// Count amount of clones that all the candidates might cause during 2843// unswitching. Branch/guard/select counts as 1, switch counts as log2 of its 2845int UnswitchedClones = 0;
2846for (
constauto &Candidate : UnswitchCandidates) {
2849bool SkipExitingSuccessors = DT.
dominates(CondBlock, Latch);
2850if (isa<SelectInst>(CI)) {
2855if (!SkipExitingSuccessors)
2859int NonExitingSuccessors =
2861 [SkipExitingSuccessors, &L](
constBasicBlock *SuccBB) {
2862return !SkipExitingSuccessors || L.contains(SuccBB);
2864 UnswitchedClones +=
Log2_32(NonExitingSuccessors);
2867// Ignore up to the "unscaled candidates" number of unswitch candidates 2868// when calculating the power-of-two scaling of the cost. The main idea 2869// with this control is to allow a small number of unswitches to happen 2870// and rely more on siblings multiplier (see below) when the number 2871// of candidates is small. 2872unsigned ClonesPower =
2875// Allowing top-level loops to spread a bit more than nested ones. 2876int SiblingsMultiplier =
2877 std::max((ParentL ? SiblingsCount
2880// Compute the cost multiplier in a way that won't overflow by saturating 2881// at an upper bound. 2887 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2891 <<
" (siblings " << SiblingsMultiplier <<
" * clones " 2892 << (1 << ClonesPower) <<
")" 2893 <<
" for unswitch candidate: " << TI <<
"\n");
2894return CostMultiplier;
2906if (isa<Constant>(
Cond))
2908if (L.isLoopInvariant(
Cond)) {
2916if (!Invariants.
empty())
2917 UnswitchCandidates.
push_back({
I, std::move(Invariants)});
2921// Whether or not we should also collect guards in the loop. 2922bool CollectGuards =
false;
2925 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
2926if (GuardDecl && !GuardDecl->use_empty())
2927 CollectGuards =
true;
2930for (
auto *BB : L.blocks()) {
2934for (
auto &
I : *BB) {
2935if (
auto *SI = dyn_cast<SelectInst>(&
I)) {
2936auto *
Cond = SI->getCondition();
2937// Do not unswitch vector selects and logical and/or selects 2938if (
Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2939 AddUnswitchCandidatesForInst(SI,
Cond);
2940 }
elseif (CollectGuards &&
isGuard(&
I)) {
2943// TODO: Support AND, OR conditions and partial unswitching. 2944if (!isa<Constant>(
Cond) && L.isLoopInvariant(
Cond))
2949if (
auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2950// We can only consider fully loop-invariant switch conditions as we need 2951// to completely eliminate the switch after unswitching. 2952if (!isa<Constant>(SI->getCondition()) &&
2953 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2954 UnswitchCandidates.
push_back({SI, {SI->getCondition()}});
2958auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2959if (!BI || !BI->isConditional() ||
2960 BI->getSuccessor(0) == BI->getSuccessor(1))
2963 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2967 !
any_of(UnswitchCandidates, [&L](
auto &TerminatorAndInvariants) {
2968return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2973dbgs() <<
"simple-loop-unswitch: Found partially invariant condition " 2974 << *
Info->InstToDuplicate[0] <<
"\n");
2975 PartialIVInfo = *
Info;
2976 PartialIVCondBranch = L.getHeader()->getTerminator();
2980 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2983return !UnswitchCandidates.
empty();
2986/// Tries to canonicalize condition described by: 2988/// br (LHS pred RHS), label IfTrue, label IfFalse 2990/// into its equivalent where `Pred` is something that we support for injected 2991/// invariants (so far it is limited to ult), LHS in canonicalized form is 2992/// non-invariant and RHS is an invariant. 2998if (!L.contains(IfTrue)) {
2999 Pred = ICmpInst::getInversePredicate(Pred);
3003// Move loop-invariant argument to RHS position. 3004if (L.isLoopInvariant(
LHS)) {
3005 Pred = ICmpInst::getSwappedPredicate(Pred);
3010// Turn "x >=s 0" into "x <u UMIN_INT" 3011 Pred = ICmpInst::ICMP_ULT;
3012RHS = ConstantInt::get(
3018/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS ) 3019/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by 3020/// injecting a loop-invariant condition. 3024if (L.isLoopInvariant(
LHS) || !L.isLoopInvariant(
RHS))
3026// TODO: Support other predicates. 3027if (Pred != ICmpInst::ICMP_ULT)
3029// TODO: Support non-loop-exiting branches? 3030if (!L.contains(IfTrue) || L.contains(IfFalse))
3032// FIXME: For some reason this causes problems with MSSA updates, need to 3033// investigate why. So far, just don't unswitch latch. 3034if (L.getHeader() == IfTrue)
3039/// Returns true, if metadata on \p BI allows us to optimize branching into \p 3040/// TakenSucc via injection of invariant conditions. The branch should be not 3041/// enough and not previously unswitched, the information about this comes from 3051assert(Weights.
size() == 2 &&
"Unexpected profile data!");
3053auto Num = Weights[
Idx];
3054auto Denom = Weights[0] + Weights[1];
3055// Degenerate or overflowed metadata. 3056if (Denom == 0 || Num > Denom)
3059if (LikelyTaken > ActualTaken)
3064/// Materialize pending invariant condition of the given candidate into IR. The 3065/// injected loop-invariant condition implies the original loop-variant branch 3066/// condition, so the materialization turns 3070/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc 3075/// %invariant_cond = LHS pred RHS 3078/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck 3080/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc 3082static NonTrivialUnswitchCandidate
3086assert(Candidate.hasPendingInjection() &&
"Nothing to inject!");
3088assert(Preheader &&
"Loop is not in simplified form?");
3090"Unswitching branch of inner loop!");
3092auto Pred = Candidate.PendingInjection->Pred;
3093auto *
LHS = Candidate.PendingInjection->LHS;
3094auto *
RHS = Candidate.PendingInjection->RHS;
3095auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3096auto *TI = cast<BranchInst>(Candidate.TI);
3097auto *BB = Candidate.TI->getParent();
3098auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3099 : TI->getSuccessor(0);
3100// FIXME: Remove this once limitation on successors is lifted. 3101assert(L.contains(InLoopSucc) &&
"Not supported yet!");
3102assert(!L.contains(OutOfLoopSucc) &&
"Not supported yet!");
3103auto &Ctx = BB->getContext();
3106assert(ICmpInst::isUnsigned(Pred) &&
"Not supported yet!");
3114// Do not use builder here: CreateICmp may simplify this into a constant and 3115// unswitching will break. Better optimize it away later. 3117 ICmpInst::Create(Instruction::ICmp, Pred,
LHS,
RHS,
"injected.cond",
3121 BB->getParent(), InLoopSucc);
3124 Builder.
CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3127 Builder.
CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3128 TI->getSuccessor(1));
3132for (
auto &
I : *InLoopSucc) {
3133auto *PN = dyn_cast<PHINode>(&
I);
3136auto *Inc = PN->getIncomingValueForBlock(BB);
3137 PN->addIncoming(Inc, CheckBlock);
3139 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3142 { DominatorTree::Insert, BB, CheckBlock },
3143 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3144 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3145 { DominatorTree::Delete, BB, OutOfLoopSucc }
3151 L.addBasicBlockToLoop(CheckBlock, LI);
3160// TODO: In fact, cost of unswitching a new invariant candidate is *slightly* 3161// higher because we have just inserted a new block. Need to think how to 3162// adjust the cost of injected candidates when it was first computed. 3163LLVM_DEBUG(
dbgs() <<
"Injected a new loop-invariant branch " << *InvariantBr
3164 <<
" and considering it for unswitching.");
3165 ++NumInvariantConditionsInjected;
3166return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3170/// Given chain of loop branch conditions looking like: 3171/// br (Variant < Invariant1) 3172/// br (Variant < Invariant2) 3173/// br (Variant < Invariant3) 3175/// collect set of invariant conditions on which we want to unswitch, which 3177/// Invariant1 <= Invariant2 3178/// Invariant2 <= Invariant3 3180/// Though they might not immediately exist in the IR, we can still inject them. 3187assert(ICmpInst::isStrictPredicate(Pred));
3188if (Compares.
size() < 2)
3191for (
auto Prev = Compares.
begin(), Next = Compares.
begin() + 1;
3192 Next != Compares.
end(); ++Prev, ++Next) {
3196 InjectedInvariant ToInject(NonStrictPred,
LHS,
RHS, InLoopSucc);
3197 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3198 std::nullopt, std::move(ToInject));
3199 UnswitchCandidates.
push_back(std::move(Candidate));
3204/// Collect unswitch candidates by invariant conditions that are not immediately 3205/// present in the loop. However, they can be injected into the code if we 3206/// decide it's profitable. 3207/// An example of such conditions is following: 3211/// if (! x <u C1) break; 3212/// if (! x <u C2) break; 3216/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <= 3217/// C2" automatically implies "x <u C2", so we can get rid of one of 3218/// loop-variant checks in unswitched loop version. 3229auto *Latch = L.getLoopLatch();
3230// Need to have a single latch and a preheader. 3233assert(L.getLoopPreheader() &&
"Must have a preheader!");
3236// Traverse the conditions that dominate latch (and therefore dominate each 3238for (
auto *DTN = DT.
getNode(Latch); L.contains(DTN->getBlock());
3239 DTN = DTN->getIDom()) {
3242BasicBlock *IfTrue =
nullptr, *IfFalse =
nullptr;
3243auto *BB = DTN->getBlock();
3244// Ignore inner loops. 3247auto *Term = BB->getTerminator();
3259// Strip ZEXT for unsigned predicate. 3260// TODO: once signed predicates are supported, also strip SEXT. 3261 CompareDesc
Desc(cast<BranchInst>(Term),
RHS, IfTrue);
3262while (
auto *Zext = dyn_cast<ZExtInst>(
LHS))
3263LHS = Zext->getOperand(0);
3264 CandidatesULT[
LHS].push_back(
Desc);
3268for (
auto &It : CandidatesULT)
3270 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3275if (!L.isSafeToClone())
3277for (
auto *BB : L.blocks())
3278for (
auto &
I : *BB) {
3279if (
I.getType()->isTokenTy() &&
I.isUsedOutsideOfBlock(BB))
3281if (
auto *CB = dyn_cast<CallBase>(&
I)) {
3282assert(!CB->cannotDuplicate() &&
"Checked by L.isSafeToClone().");
3283if (CB->isConvergent())
3288// Check if there are irreducible CFG cycles in this loop. If so, we cannot 3289// easily unswitch non-trivial edges out of the loop. Doing so might turn the 3290// irreducible control flow into reducible control flow and introduce new 3291// loops "out of thin air". If we ever discover important use cases for doing 3292// this, we can add support to loop unswitch, but it is a lot of complexity 3293// for what seems little or no real world benefit. 3296if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3300 L.getUniqueExitBlocks(ExitBlocks);
3301// We cannot unswitch if exit blocks contain a cleanuppad/catchswitch 3302// instruction as we don't know how to split those exit blocks. 3303// FIXME: We should teach SplitBlock to handle this and remove this 3305for (
auto *ExitBB : ExitBlocks) {
3306auto It = ExitBB->getFirstNonPHIIt();
3307if (isa<CleanupPadInst>(It) || isa<CatchSwitchInst>(It)) {
3308LLVM_DEBUG(
dbgs() <<
"Cannot unswitch because of cleanuppad/catchswitch " 3321// Given that unswitching these terminators will require duplicating parts of 3322// the loop, so we need to be able to model that cost. Compute the ephemeral 3323// values and set up a data structure to hold per-BB costs. We cache each 3324// block's cost so that we don't recompute this when considering different 3325// subsets of the loop for duplication during unswitching. 3330// Compute the cost of each block, as well as the total loop cost. Also, bail 3331// out if we see instructions which are incompatible with loop unswitching 3332// (convergent, noduplicate, or cross-basic-block tokens). 3333// FIXME: We might be able to safely handle some of these in non-duplicated 3336 L.getHeader()->getParent()->hasMinSize()
3340for (
auto *BB : L.blocks()) {
3342for (
auto &
I : *BB) {
3347assert(
Cost >= 0 &&
"Must not have negative costs!");
3349assert(LoopCost >= 0 &&
"Must not have negative loop costs!");
3350 BBCostMap[BB] =
Cost;
3354// Now we find the best candidate by searching for the one with the following 3355// properties in order: 3357// 1) An unswitching cost below the threshold 3358// 2) The smallest number of duplicated unswitch candidates (to avoid 3359// creating redundant subsequent unswitching) 3360// 3) The smallest cost after unswitching. 3362// We prioritize reducing fanout of unswitch candidates provided the cost 3363// remains below the threshold because this has a multiplicative effect. 3365// This requires memoizing each dominator subtree to avoid redundant work. 3367// FIXME: Need to actually do the number of candidates part above. 3369// Given a terminator which might be unswitched, computes the non-duplicated 3370// cost for that terminator. 3373// Unswitching selects unswitches the entire loop. 3374if (isa<SelectInst>(TI))
3382// Don't count successors more than once. 3383if (!Visited.
insert(SuccBB).second)
3386// If this is a partial unswitch candidate, then it must be a conditional 3387// branch with a condition of either `or`, `and`, their corresponding 3388// select forms or partially invariant instructions. In that case, one of 3389// the successors is necessarily duplicated, so don't even try to remove 3392auto &BI = cast<BranchInst>(TI);
3395if (SuccBB == BI.getSuccessor(1))
3398if (SuccBB == BI.getSuccessor(0))
3401 SuccBB == BI.getSuccessor(0)) ||
3403 SuccBB == BI.getSuccessor(1)))
3407// This successor's domtree will not need to be duplicated after 3408// unswitching if the edge to the successor dominates it (and thus the 3409// entire tree). This essentially means there is no other path into this 3410// subtree and so it will end up live in only one clone of the loop. 3411if (SuccBB->getUniquePredecessor() ||
3413return PredBB == &BB || DT.
dominates(SuccBB, PredBB);
3417"Non-duplicated cost should never exceed total loop cost!");
3421// Now scale the cost by the number of unique successors minus one. We 3422// subtract one because there is already at least one copy of the entire 3423// loop. This is computing the new cost of unswitching a condition. 3424// Note that guards always have 2 unique successors that are implicit and 3425// will be materialized if we decide to unswitch it. 3426int SuccessorsCount =
isGuard(&TI) ? 2 : Visited.
size();
3427assert(SuccessorsCount > 1 &&
3428"Cannot unswitch a condition without multiple distinct successors!");
3429return (LoopCost -
Cost) * (SuccessorsCount - 1);
3432 std::optional<NonTrivialUnswitchCandidate> Best;
3433for (
auto &Candidate : UnswitchCandidates) {
3438 !BI || Candidate.hasPendingInjection() ||
3439 (Invariants.
size() == 1 &&
3441InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3442// Calculate cost multiplier which is a tool to limit potentially 3443// exponential behavior of loop-unswitch. 3449"cost multiplier needs to be in the range of 1..UnswitchThreshold");
3450 CandidateCost *= CostMultiplier;
3452 <<
" (multiplier: " << CostMultiplier <<
")" 3453 <<
" for unswitch candidate: " << TI <<
"\n");
3456 <<
" for unswitch candidate: " << TI <<
"\n");
3459if (!Best || CandidateCost < Best->
Cost) {
3461 Best->Cost = CandidateCost;
3464assert(Best &&
"Must be!");
3468// Insert a freeze on an unswitched branch if all is true: 3469// 1. freeze-loop-unswitch-cond option is true 3470// 2. The branch may not execute in the loop pre-transformation. If a branch may 3471// not execute and could cause UB, it would always cause UB if it is hoisted outside 3472// of the loop. Insert a freeze to prevent this case. 3473// 3. The branch condition may be poison or undef 3476assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3486if (
BranchInst *BI = dyn_cast<BranchInst>(&TI))
3491Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3499// Collect all invariant conditions within this loop (as opposed to an inner 3500// loop which would be handled when visiting that inner loop). 3505 PartialIVCondBranch, L, LI, AA, MSSAU);
3508 PartialIVCondBranch, L, DT, LI, AA,
3510// If we didn't find any candidates, we're done. 3511if (UnswitchCandidates.
empty())
3515dbgs() <<
"Considering " << UnswitchCandidates.
size()
3516 <<
" non-trivial loop invariant conditions for unswitching.\n");
3519 UnswitchCandidates, L, DT, LI, AC,
TTI, PartialIVInfo);
3521assert(Best.TI &&
"Failed to find loop unswitch candidate");
3522assert(Best.Cost &&
"Failed to compute cost");
3525LLVM_DEBUG(
dbgs() <<
"Cannot unswitch, lowest cost found: " << *Best.Cost
3530bool InjectedCondition =
false;
3531if (Best.hasPendingInjection()) {
3533 InjectedCondition =
true;
3535assert(!Best.hasPendingInjection() &&
3536"All injections should have been done by now!");
3538if (Best.TI != PartialIVCondBranch)
3542if (
auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3543// If the best candidate is a select, turn it into a branch. Select 3544// instructions with a poison conditional do not propagate poison, but 3545// branching on poison causes UB. Insert a freeze on the select 3546// conditional to prevent UB after turning the select into a branch. 3548 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3551// If the best candidate is a guard, turn it into a branch. 3558LLVM_DEBUG(
dbgs() <<
" Unswitching non-trivial (cost = " << Best.Cost
3559 <<
") terminator: " << *Best.TI <<
"\n");
3561 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3566/// Unswitch control flow predicated on loop invariant conditions. 3568/// This first hoists all branches or switches which are trivial (IE, do not 3569/// require duplicating any part of the loop) out of the loop body. It then 3570/// looks at other loop invariant control flows and tries to unswitch those as 3571/// well by cloning the loop if the result is small enough. 3573/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are 3574/// also updated based on the unswitch. The `MSSA` analysis is also updated if 3575/// valid (i.e. its use is enabled). 3577/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is 3578/// true, we will attempt to do non-trivial unswitching as well as trivial 3581/// The `postUnswitch` function will be run after unswitching is complete 3582/// with information on whether or not the provided loop remains a loop and 3583/// a list of new sibling loops created. 3585/// If `SE` is non-null, we will update that analysis based on the unswitching 3593assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3594"Loops must be in LCSSA form before unswitching.");
3596// Must be in loop simplified form: we need a preheader and dedicated exits. 3597if (!L.isLoopSimplifyForm())
3600// Try trivial unswitch first before loop over other basic blocks in the loop. 3602// If we unswitched successfully we will want to clean up the loop before 3603// processing it further so just mark it as unswitched and return. 3605/*CurrentLoopValid*/true,
/*PartiallyInvariant*/false,
3606/*InjectedCondition*/false, {});
3610constFunction *
F = L.getHeader()->getParent();
3612// Check whether we should continue with non-trivial conditions. 3613// EnableNonTrivialUnswitch: Global variable that forces non-trivial 3614// unswitching for testing and debugging. 3615// NonTrivial: Parameter that enables non-trivial unswitching for this 3616// invocation of the transform. But this should be allowed only 3617// for targets without branch divergence. 3619// FIXME: If divergence analysis becomes available to a loop 3620// transform, we should allow unswitching for non-trivial uniform 3621// branches even on targets that have divergence. 3622// https://bugs.llvm.org/show_bug.cgi?id=48819 3623bool ContinueWithNonTrivial =
3625if (!ContinueWithNonTrivial)
3628// Skip non-trivial unswitching for optsize functions. 3632// Returns true if Loop L's loop nest is cold, i.e. if the headers of L, 3633// of the loops L is nested in, and of the loops nested in L are all cold. 3634auto IsLoopNestCold = [&](
constLoop *L) {
3635// Check L and all of its parent loops. 3640 Parent = Parent->getParentLoop();
3642// Next check all loops nested within L. 3644 Worklist.
insert(Worklist.
end(), L->getSubLoops().begin(),
3645 L->getSubLoops().end());
3646while (!Worklist.
empty()) {
3650 Worklist.
insert(Worklist.
end(), CurLoop->getSubLoops().begin(),
3651 CurLoop->getSubLoops().end());
3656// Skip cold loops in cold loop nests, as unswitching them brings little 3657// benefit but increases the code size 3663// Perform legality checks. 3667// For non-trivial unswitching, because it often creates new loops, we rely on 3668// the pass manager to iterate on the loops rather than trying to immediately 3669// reach a fixed point. There is no substantial advantage to iterating 3670// internally, and if any of the new loops are simplified enough to contain 3671// trivial unswitching we want to prefer those. 3673// Try to unswitch the best invariant condition. We prefer this full unswitch to 3674// a partial unswitch when possible below the threshold. 3678// No other opportunities to unswitch. 3685Function &
F = *L.getHeader()->getParent();
3688if (
auto OuterProxy =
3695 std::optional<MemorySSAUpdater> MSSAU;
3702 &AR.
SE, MSSAU ? &*MSSAU :
nullptr, PSI, AR.
BFI, U))
3708// Historically this pass has had issues with the dominator tree so verify it 3709// in asserts builds. 3721OS, MapClassName2PassName);
3724OS << (NonTrivial ?
"" :
"no-") <<
"nontrivial;";
3725OS << (Trivial ?
"" :
"no-") <<
"trivial";
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
Module.h This file contains the declarations for the Module class.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
bool isOneValue() const
Returns true if the value is one.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
StringRef getName() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
An analysis that produces MemorySSA for a function.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
A Module instance is used to store all the information related to an LLVM module.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void forgetTopmostLoop(const Loop *L)
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
unsigned getIntegerBitWidth() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
auto reverse(ContainerTy &&C)
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
void sort(IteratorTy Start, IteratorTy End)
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
bool VerifyLoopInfo
Enable verification of loop info.
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A special type used by analysis passes to provide an address that identifies that particular analysis...
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Direction
An enum for the direction of the loop.
A CRTP mix-in to automatically provide informational APIs needed for passes.