1//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This pass transforms loops by placing phi nodes at the end of the loops for 10// all values that are live across the loop boundary. For example, it turns 11// the left into the right code: 18// X3 = phi(X1, X2) X3 = phi(X1, X2) 19// ... = X3 + 4 X4 = phi(X3) 22// This is still valid LLVM; the extra phi nodes are purely redundant, and will 23// be trivially eliminated by InstCombine. The major benefit of this 24// transformation is that it makes many other loop optimizations, such as 25// LoopUnswitching, simpler. 27//===----------------------------------------------------------------------===// 54#define DEBUG_TYPE "lcssa" 56STATISTIC(NumLCSSA,
"Number of live out of a loop variables");
58#ifdef EXPENSIVE_CHECKS 66cl::desc(
"Verify loop lcssa form (time consuming)"));
68/// Return true if the specified block is in the list. 74// Cache the Loop ExitBlocks computed during the analysis. We expect to get a 75// lot of instructions within the same loops, computing the exit blocks is 76// expensive, and we're not mutating the loop structure. 79/// For every instruction from the worklist, check to see if it has any uses 80/// that are outside the current loop. If so, insert LCSSA PHI nodes and 94while (!Worklist.
empty()) {
95 UsesToRewrite.
clear();
98assert(!
I->getType()->isTokenTy() &&
"Tokens shouldn't be in the worklist");
101assert(L &&
"Instruction belongs to a BB that's not part of a loop");
102if (!LoopExitBlocks.
count(L))
103 L->getExitBlocks(LoopExitBlocks[L]);
107if (ExitBlocks.
empty())
114// Skip uses in unreachable blocks. 120// For practical purposes, we consider that the use in a PHI 121// occurs in the respective predecessor block. For more info, 122// see the `phi` doc in LangRef and the LCSSA doc. 123if (
auto *PN = dyn_cast<PHINode>(
User))
124 UserBB = PN->getIncomingBlock(U);
126if (InstBB != UserBB && !L->contains(UserBB))
130// If there are no uses outside the loop, exit with no change. 131if (UsesToRewrite.
empty())
134 ++NumLCSSA;
// We are applying the transformation 136// Invoke instructions are special in that their result value is not 137// available along their unwind edge. The code below tests to see whether 138// DomBB dominates the value, so adjust DomBB to the normal destination 139// block, which is effectively where the value is first usable. 141if (
auto *Inv = dyn_cast<InvokeInst>(
I))
142 DomBB = Inv->getNormalDest();
153// Insert the LCSSA phi's into all of the exit blocks dominated by the 154// value, and add them to the Phi's map. 155bool HasSCEV = SE && SE->
isSCEVable(
I->getType()) &&
161// If we already inserted something for this BB, don't reprocess it. 165I->getName() +
".lcssa");
169// Get the debug location from the original instruction. 172// Add inputs from inside the loop for this PHI. This is valid 173// because `I` dominates `ExitBB` (checked above). This implies 174// that every incoming block/edge is dominated by `I` as well, 175// i.e. we can add uses of `I` to those incoming edges/append to the incoming 176// blocks without violating the SSA dominance property. 180// If the exit block has a predecessor not within the loop, arrange for 181// the incoming value use corresponding to that predecessor to be 182// rewritten in terms of a different LCSSA PHI. 183if (!L->contains(Pred))
191// Remember that this phi makes the value alive in this block. 194// LoopSimplify might fail to simplify some loops (e.g. when indirect 195// branches are involved). In such situations, it might happen that an 196// exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we 197// create PHIs in such an exit block, we are also inserting PHIs into L2's 198// header. This could break LCSSA form for L2 because these inserted PHIs 199// can also have uses outside of L2. Remember all PHIs in such situation 200// as to revisit than later on. FIXME: Remove this if indirectbr support 201// into LoopSimplify gets improved. 203if (!L->contains(OtherLoop))
206// If we have a cached SCEV for the original instruction, make sure the 207// new LCSSA phi node is also cached. This makes sures that BECounts 208// based on it will be invalidated when the LCSSA phi node is invalidated, 209// which some passes rely on. 214// Rewrite all uses outside the loop in terms of the new PHIs we just 216for (
Use *UseToRewrite : UsesToRewrite) {
220// For practical purposes, we consider that the use in a PHI 221// occurs in the respective predecessor block. For more info, 222// see the `phi` doc in LangRef and the LCSSA doc. 223if (
auto *PN = dyn_cast<PHINode>(
User))
224 UserBB = PN->getIncomingBlock(*UseToRewrite);
226// If this use is in an exit block, rewrite to use the newly inserted PHI. 227// This is required for correctness because SSAUpdate doesn't handle uses 228// in the same block. It assumes the PHI we inserted is at the end of the 231 UseToRewrite->set(&UserBB->
front());
235// If we added a single PHI, it must dominate all uses and we can directly 237if (AddedPHIs.
size() == 1) {
238 UseToRewrite->set(AddedPHIs[0]);
242// Otherwise, do full PHI insertion. 250// Update pre-existing debug value uses that reside outside the loop. 251for (
auto *DVI : DbgValues) {
253if (InstBB == UserBB || L->contains(UserBB))
255// We currently only handle debug values residing in blocks that were 256// traversed while rewriting the uses. If we inserted just a single PHI, 257// we will handle all relevant debug values. 258Value *V = AddedPHIs.
size() == 1 ? AddedPHIs[0]
261 DVI->replaceVariableLocationOp(
I, V);
264// RemoveDIs: copy-paste of block above, using non-instruction debug-info 268if (InstBB == UserBB || L->contains(UserBB))
270// We currently only handle debug values residing in blocks that were 271// traversed while rewriting the uses. If we inserted just a single PHI, 272// we will handle all relevant debug values. 273Value *V = AddedPHIs.
size() == 1 ? AddedPHIs[0]
276 DVR->replaceVariableLocationOp(
I, V);
279// SSAUpdater might have inserted phi-nodes inside other loops. We'll need 280// to post-process them to keep LCSSA form. 281for (
PHINode *InsertedPN : LocalInsertedPHIs) {
282if (
auto *OtherLoop = LI.
getLoopFor(InsertedPN->getParent()))
283if (!L->contains(OtherLoop))
289// Post process PHI instructions that were inserted into another disjoint 290// loop and update their exits properly. 291for (
auto *PostProcessPN : PostProcessPHIs)
292if (!PostProcessPN->use_empty())
295// Keep track of PHI nodes that we want to remove because they did not have 296// any uses rewritten. 299 LocalPHIsToRemove.
insert(PN);
304// Remove PHI nodes that did not have any uses rewritten or add them to 305// PHIsToRemove, so the caller can remove them after some additional cleanup. 306// We need to redo the use_empty() check here, because even if the PHI node 307// wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be 308// using it. This cleanup is not guaranteed to handle trees/cycles of PHI 309// nodes that only are used by each other. Such situations has only been 310// noticed when the input IR contains unreachable code, and leaving some extra 311// redundant PHI nodes in such situations is considered a minor problem. 313 PHIsToRemove->
append(LocalPHIsToRemove.
begin(), LocalPHIsToRemove.
end());
315for (
PHINode *PN : LocalPHIsToRemove)
317 PN->eraseFromParent();
322/// For every instruction from the worklist, check to see if it has any uses 323/// that are outside the current loop. If so, insert LCSSA PHI nodes and 333 InsertedPHIs, LoopExitBlocks);
336// Compute the set of BasicBlocks in the loop `L` dominating at least one exit. 340// We start from the exit blocks, as every block trivially dominates itself 344while (!BBWorklist.
empty()) {
347// Check if this is a loop header. If this is the case, we're done. 348if (L.getHeader() == BB)
351// Otherwise, add its immediate predecessor in the dominator tree to the 352// worklist, unless we visited it already. 355// Exit blocks can have an immediate dominator not belonging to the 356// loop. For an exit block to be immediately dominated by another block 357// outside the loop, it implies not all paths from that dominator, to the 358// exit block, go through the loop. 369// C is the exit block of the loop and it's immediately dominated by A, 370// which doesn't belong to the loop. 371if (!L.contains(IDomBB))
374if (BlocksDominatingExits.
insert(IDomBB))
384#ifdef EXPENSIVE_CHECKS 385// Verify all sub-loops are in LCSSA form already. 386for (
Loop *SubLoop: L) {
387 (void)SubLoop;
// Silence unused variable warning. 388assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) &&
"Subloop not in LCSSA!");
392if (!LoopExitBlocks.
count(&L))
393 L.getExitBlocks(LoopExitBlocks[&L]);
395if (ExitBlocks.
empty())
400// We want to avoid use-scanning leveraging dominance informations. 401// If a block doesn't dominate any of the loop exits, the none of the values 402// defined in the loop can be used outside. 403// We compute the set of blocks fullfilling the conditions in advance 404// walking the dominator tree upwards until we hit a loop header. 409// Look at all the instructions in the loop, checking to see if they have uses 410// outside the loop. If so, put them into the worklist to rewrite those uses. 412// Skip blocks that are part of any sub-loops, they must be in LCSSA 417// Reject two common cases fast: instructions with no uses (like stores) 418// and instructions with one use that is in the same block as this. 420 (
I.hasOneUse() &&
I.user_back()->getParent() == BB &&
421 !isa<PHINode>(
I.user_back())))
424// Tokens cannot be used in PHI nodes, so we skip over them. 425// We can run into tokens which are live out of a loop with catchswitch 426// instructions in Windows EH if the catchswitch has one catchpad which 427// is inside the loop and another which is not. 428if (
I.getType()->isTokenTy())
436nullptr, LoopExitBlocks);
450/// Process a loop nest depth first. 456// Recurse depth-first through inner loops. 457for (
Loop *SubLoop : L.getSubLoops())
464/// Process a loop nest depth first. 472/// Process all loops in the function, inner-most out. 476for (
constauto &L : *LI)
483staticcharID;
// Pass identification, replacement for typeid 488// Cached analysis information for the current function. 495// This check is very expensive. On the loop intensive compiles it may cause 496// up to 10x slowdown. Currently it's disabled by default. LPPassManager 497// always does limited form of the LCSSA verification. Similar reasoning 498// was used for the LoopInfo verifier. 502returnL->isRecursivelyLCSSAForm(*DT, *LI);
504"LCSSA form is broken!");
508 /// This transformation requires natural loop information & requires that 509 /// loop preheaders be inserted into the CFG. It maintains both of these, 510 /// as well as the CFG. It also requires dominator information. 525// This is needed to perform LCSSA verification inside LPPassManager 532char LCSSAWrapperPass::ID = 0;
544/// Transform \p F into loop-closed SSA form. 545bool LCSSAWrapperPass::runOnFunction(
Function &
F) {
546 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
547 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
548auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
549 SE = SEWP ? &SEWP->getSE() :
nullptr;
564// BPI maps terminators to probabilities, since we don't modify the CFG, no 565// updates are needed to preserve it. This is the interface for LLVM's primary stateless and local alias analysis.
This is the interface for a simple mod/ref and alias analysis over globals.
static bool formLCSSAForInstructionsImpl(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove, SmallVectorImpl< PHINode * > *InsertedPHIs, LoopExitBlocksTy &LoopExitBlocks)
For every instruction from the worklist, check to see if it has any uses that are outside the current...
static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &ExitBlocks)
Return true if the specified block is in the list.
static bool VerifyLoopLCSSA
static bool formLCSSAImpl(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE, LoopExitBlocksTy &LoopExitBlocks)
static bool formLCSSAOnAllLoops(const LoopInfo *LI, const DominatorTree &DT, ScalarEvolution *SE)
Process all loops in the function, inner-most out.
static void computeBlocksDominatingExits(Loop &L, const DominatorTree &DT, ArrayRef< BasicBlock * > ExitBlocks, SmallSetVector< BasicBlock *, 8 > &BlocksDominatingExits)
static bool formLCSSARecursivelyImpl(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE, LoopExitBlocksTy &LoopExitBlocks)
Process a loop nest depth first.
static cl::opt< bool, true > VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA), cl::Hidden, cl::desc("Verify loop lcssa form (time consuming)"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This is the interface for a SCEV-based alias analysis.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Instruction & front() const
const Function * getParent() const
Return the enclosing method, or null if none.
DbgMarker * getMarker(InstListType::iterator It)
Return the DbgMarker for the position given by It, so that DbgRecords can be inserted there.
Analysis pass which computes BranchProbabilityInfo.
Legacy analysis pass which computes BranchProbabilityInfo.
Represents analyses that only rely on functions' control flow.
const BasicBlock * getParent() const
Record of a variable value-assignment, aka a non instruction representation of the dbg....
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
DomTreeNodeBase * getIDom() const
Analysis pass which computes a DominatorTree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Legacy wrapper pass to provide the GlobalsAAResult object.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Analysis pass that exposes the LoopInfo for a function.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static unsigned getOperandNumForIncomingValue(unsigned i)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual void verifyAnalysis() const
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
PredIteratorCache - This class is an extremely trivial cache for predecessor iterator queries.
size_t size(BasicBlock *BB)
ArrayRef< BasicBlock * > get(BasicBlock *BB)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
Helper class for SSA formation on a set of values defined in multiple blocks.
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Value * FindValueForBlock(BasicBlock *BB) const
Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
bool HasValueForBlock(BasicBlock *BB) const
Return true if the SSAUpdater already has a value for the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getExistingSCEV(Value *V)
Return an existing SCEV for V if there is one, otherwise return nullptr.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void initializeLCSSAWrapperPassPass(PassRegistry &)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.