Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
14#include "llvm/ADT/SmallPtrSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Analysis/AssumptionCache.h"
19#include "llvm/Analysis/BlockFrequencyInfo.h"
20#include "llvm/Analysis/CFG.h"
21#include "llvm/Analysis/CodeMetrics.h"
22#include "llvm/Analysis/DomTreeUpdater.h"
23#include "llvm/Analysis/GuardUtils.h"
24#include "llvm/Analysis/LoopAnalysisManager.h"
25#include "llvm/Analysis/LoopInfo.h"
26#include "llvm/Analysis/LoopIterator.h"
27#include "llvm/Analysis/MemorySSA.h"
28#include "llvm/Analysis/MemorySSAUpdater.h"
29#include "llvm/Analysis/MustExecute.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/Analysis/ScalarEvolution.h"
32#include "llvm/Analysis/TargetTransformInfo.h"
33#include "llvm/Analysis/ValueTracking.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constant.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
42#include "llvm/IR/Instructions.h"
43#include "llvm/IR/IntrinsicInst.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PatternMatch.h"
46#include "llvm/IR/ProfDataUtils.h"
47#include "llvm/IR/Use.h"
48#include "llvm/IR/Value.h"
49#include "llvm/Support/Casting.h"
50#include "llvm/Support/CommandLine.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/Support/GenericDomTree.h"
54#include "llvm/Support/InstructionCost.h"
55#include "llvm/Support/raw_ostream.h"
56#include "llvm/Transforms/Scalar/LoopPassManager.h"
57#include "llvm/Transforms/Utils/BasicBlockUtils.h"
58#include "llvm/Transforms/Utils/Cloning.h"
59#include "llvm/Transforms/Utils/Local.h"
60#include "llvm/Transforms/Utils/LoopUtils.h"
61#include "llvm/Transforms/Utils/ValueMapper.h"
62#include <algorithm>
63#include <cassert>
64#include <iterator>
65#include <numeric>
66#include <optional>
67#include <utility>
68
69#define DEBUG_TYPE "simple-loop-unswitch"
70
71using namespacellvm;
72using namespacellvm::PatternMatch;
73
74STATISTIC(NumBranches,"Number of branches unswitched");
75STATISTIC(NumSwitches,"Number of switches unswitched");
76STATISTIC(NumSelects,"Number of selects turned into branches for unswitching");
77STATISTIC(NumGuards,"Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial,"Number of unswitches that are trivial");
79STATISTIC(
80 NumCostMultiplierSkipped,
81"Number of unswitch candidates that had their cost multiplier skipped");
82STATISTIC(NumInvariantConditionsInjected,
83"Number of invariant conditions injected and unswitched");
84
85staticcl::opt<bool>EnableNonTrivialUnswitch(
86"enable-nontrivial-unswitch",cl::init(false),cl::Hidden,
87cl::desc("Forcibly enables non-trivial loop unswitching rather than "
88"following the configuration passed into the pass."));
89
90staticcl::opt<int>
91UnswitchThreshold("unswitch-threshold",cl::init(50),cl::Hidden,
92cl::desc("The cost threshold for unswitching a loop."));
93
94staticcl::opt<bool>EnableUnswitchCostMultiplier(
95"enable-unswitch-cost-multiplier",cl::init(true),cl::Hidden,
96cl::desc("Enable unswitch cost multiplier that prohibits exponential "
97"explosion in nontrivial unswitch."));
98staticcl::opt<int>UnswitchSiblingsToplevelDiv(
99"unswitch-siblings-toplevel-div",cl::init(2),cl::Hidden,
100cl::desc("Toplevel siblings divisor for cost multiplier."));
101staticcl::opt<int>UnswitchNumInitialUnscaledCandidates(
102"unswitch-num-initial-unscaled-candidates",cl::init(8),cl::Hidden,
103cl::desc("Number of unswitch candidates that are ignored when calculating "
104"cost multiplier."));
105staticcl::opt<bool>UnswitchGuards(
106"simple-loop-unswitch-guards",cl::init(true),cl::Hidden,
107cl::desc("If enabled, simple loop unswitching will also consider "
108"llvm.experimental.guard intrinsics as unswitch candidates."));
109staticcl::opt<bool>DropNonTrivialImplicitNullChecks(
110"simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
111cl::init(false),cl::Hidden,
112cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
113"null checks to save time analyzing if we can keep it."));
114staticcl::opt<unsigned>
115MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
116cl::desc("Max number of memory uses to explore during "
117"partial unswitching analysis"),
118cl::init(100),cl::Hidden);
119staticcl::opt<bool>FreezeLoopUnswitchCond(
120"freeze-loop-unswitch-cond",cl::init(true),cl::Hidden,
121cl::desc("If enabled, the freeze instruction will be added to condition "
122"of loop unswitch to prevent miscompilation."));
123
124staticcl::opt<bool>InjectInvariantConditions(
125"simple-loop-unswitch-inject-invariant-conditions",cl::Hidden,
126cl::desc("Whether we should inject new invariants and unswitch them to "
127"eliminate some existing (non-invariant) conditions."),
128cl::init(true));
129
130staticcl::opt<unsigned>InjectInvariantConditionHotnesThreshold(
131"simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
132cl::Hidden,cl::desc("Only try to inject loop invariant conditions and "
133"unswitch on them to eliminate branches that are "
134"not-taken 1/<this option> times or less."),
135cl::init(16));
136
137AnalysisKeyShouldRunExtraSimpleLoopUnswitch::Key;
138namespace{
139structCompareDesc {
140BranchInst *Term;
141Value *Invariant;
142BasicBlock *InLoopSucc;
143
144 CompareDesc(BranchInst *Term,Value *Invariant,BasicBlock *InLoopSucc)
145 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
146};
147
148structInjectedInvariant {
149ICmpInst::Predicate Pred;
150Value *LHS;
151Value *RHS;
152BasicBlock *InLoopSucc;
153
154 InjectedInvariant(ICmpInst::Predicate Pred,Value *LHS,Value *RHS,
155BasicBlock *InLoopSucc)
156 : Pred(Pred),LHS(LHS),RHS(RHS), InLoopSucc(InLoopSucc) {}
157};
158
159structNonTrivialUnswitchCandidate {
160Instruction *TI =nullptr;
161TinyPtrVector<Value *> Invariants;
162 std::optional<InstructionCost>Cost;
163 std::optional<InjectedInvariant> PendingInjection;
164 NonTrivialUnswitchCandidate(
165Instruction *TI,ArrayRef<Value *> Invariants,
166 std::optional<InstructionCost>Cost = std::nullopt,
167 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
168 : TI(TI), Invariants(Invariants),Cost(Cost),
169 PendingInjection(PendingInjection) {};
170
171bool hasPendingInjection() const{return PendingInjection.has_value(); }
172};
173}// end anonymous namespace.
174
175// Helper to skip (select x, true, false), which matches both a logical AND and
176// OR and can confuse code that tries to determine if \p Cond is either a
177// logical AND or OR but not both.
178staticValue *skipTrivialSelect(Value *Cond) {
179Value *CondNext;
180while (match(Cond,m_Select(m_Value(CondNext),m_One(),m_Zero())))
181Cond = CondNext;
182returnCond;
183}
184
185/// Collect all of the loop invariant input values transitively used by the
186/// homogeneous instruction graph from a given root.
187///
188/// This essentially walks from a root recursively through loop variant operands
189/// which have perform the same logical operation (AND or OR) and finds all
190/// inputs which are loop invariant. For some operations these can be
191/// re-associated and unswitched out of the loop entirely.
192staticTinyPtrVector<Value *>
193collectHomogenousInstGraphLoopInvariants(constLoop &L,Instruction &Root,
194constLoopInfo &LI) {
195assert(!L.isLoopInvariant(&Root) &&
196"Only need to walk the graph if root itself is not invariant.");
197TinyPtrVector<Value *> Invariants;
198
199bool IsRootAnd =match(&Root,m_LogicalAnd());
200bool IsRootOr =match(&Root,m_LogicalOr());
201
202// Build a worklist and recurse through operators collecting invariants.
203SmallVector<Instruction *, 4> Worklist;
204SmallPtrSet<Instruction *, 8> Visited;
205 Worklist.push_back(&Root);
206 Visited.insert(&Root);
207do {
208Instruction &I = *Worklist.pop_back_val();
209for (Value *OpV :I.operand_values()) {
210// Skip constants as unswitching isn't interesting for them.
211if (isa<Constant>(OpV))
212continue;
213
214// Add it to our result if loop invariant.
215if (L.isLoopInvariant(OpV)) {
216 Invariants.push_back(OpV);
217continue;
218 }
219
220// If not an instruction with the same opcode, nothing we can do.
221Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
222
223if (OpI && ((IsRootAnd &&match(OpI,m_LogicalAnd())) ||
224 (IsRootOr &&match(OpI,m_LogicalOr())))) {
225// Visit this operand.
226if (Visited.insert(OpI).second)
227 Worklist.push_back(OpI);
228 }
229 }
230 }while (!Worklist.empty());
231
232return Invariants;
233}
234
235staticvoidreplaceLoopInvariantUses(constLoop &L,Value *Invariant,
236Constant &Replacement) {
237assert(!isa<Constant>(Invariant) &&"Why are we unswitching on a constant?");
238
239// Replace uses of LIC in the loop with the given constant.
240// We use make_early_inc_range as set invalidates the iterator.
241for (Use &U :llvm::make_early_inc_range(Invariant->uses())) {
242Instruction *UserI = dyn_cast<Instruction>(U.getUser());
243
244// Replace this use within the loop body.
245if (UserI && L.contains(UserI))
246 U.set(&Replacement);
247 }
248}
249
250/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
251/// incoming values along this edge.
252staticboolareLoopExitPHIsLoopInvariant(constLoop &L,
253constBasicBlock &ExitingBB,
254constBasicBlock &ExitBB) {
255for (constInstruction &I : ExitBB) {
256auto *PN = dyn_cast<PHINode>(&I);
257if (!PN)
258// No more PHIs to check.
259returntrue;
260
261// If the incoming value for this edge isn't loop invariant the unswitch
262// won't be trivial.
263if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
264returnfalse;
265 }
266llvm_unreachable("Basic blocks should never be empty!");
267}
268
269/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
270/// end of \p BB and conditionally branch on the copied condition. We only
271/// branch on a single value.
272staticvoidbuildPartialUnswitchConditionalBranch(
273BasicBlock &BB,ArrayRef<Value *> Invariants,boolDirection,
274BasicBlock &UnswitchedSucc,BasicBlock &NormalSucc,bool InsertFreeze,
275constInstruction *I,AssumptionCache *AC,constDominatorTree &DT) {
276IRBuilder<> IRB(&BB);
277
278SmallVector<Value *> FrozenInvariants;
279for (Value *Inv : Invariants) {
280if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC,I, &DT))
281 Inv = IRB.CreateFreeze(Inv, Inv->getName() +".fr");
282 FrozenInvariants.push_back(Inv);
283 }
284
285Value *Cond =Direction ? IRB.CreateOr(FrozenInvariants)
286 : IRB.CreateAnd(FrozenInvariants);
287 IRB.CreateCondBr(Cond,Direction ? &UnswitchedSucc : &NormalSucc,
288Direction ? &NormalSucc : &UnswitchedSucc);
289}
290
291/// Copy a set of loop invariant values, and conditionally branch on them.
292staticvoidbuildPartialInvariantUnswitchConditionalBranch(
293BasicBlock &BB,ArrayRef<Value *> ToDuplicate,boolDirection,
294BasicBlock &UnswitchedSucc,BasicBlock &NormalSucc,Loop &L,
295MemorySSAUpdater *MSSAU) {
296ValueToValueMapTy VMap;
297for (auto *Val :reverse(ToDuplicate)) {
298Instruction *Inst = cast<Instruction>(Val);
299Instruction *NewInst = Inst->clone();
300 NewInst->insertInto(&BB, BB.end());
301RemapInstruction(NewInst, VMap,
302RF_NoModuleLevelChanges |RF_IgnoreMissingLocals);
303 VMap[Val] = NewInst;
304
305if (!MSSAU)
306continue;
307
308MemorySSA *MSSA = MSSAU->getMemorySSA();
309if (auto *MemUse =
310 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
311auto *DefiningAccess = MemUse->getDefiningAccess();
312// Get the first defining access before the loop.
313while (L.contains(DefiningAccess->getBlock())) {
314// If the defining access is a MemoryPhi, get the incoming
315// value for the pre-header as defining access.
316if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
317 DefiningAccess =
318 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
319else
320 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
321 }
322 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
323 NewInst->getParent(),
324MemorySSA::BeforeTerminator);
325 }
326 }
327
328IRBuilder<> IRB(&BB);
329Value *Cond = VMap[ToDuplicate[0]];
330 IRB.CreateCondBr(Cond,Direction ? &UnswitchedSucc : &NormalSucc,
331Direction ? &NormalSucc : &UnswitchedSucc);
332}
333
334/// Rewrite the PHI nodes in an unswitched loop exit basic block.
335///
336/// Requires that the loop exit and unswitched basic block are the same, and
337/// that the exiting block was a unique predecessor of that block. Rewrites the
338/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
339/// PHI nodes from the old preheader that now contains the unswitched
340/// terminator.
341staticvoidrewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
342BasicBlock &OldExitingBB,
343BasicBlock &OldPH) {
344for (PHINode &PN : UnswitchedBB.phis()) {
345// When the loop exit is directly unswitched we just need to update the
346// incoming basic block. We loop to handle weird cases with repeated
347// incoming blocks, but expect to typically only have one operand here.
348for (auto i : seq<int>(0, PN.getNumOperands())) {
349assert(PN.getIncomingBlock(i) == &OldExitingBB &&
350"Found incoming block different from unique predecessor!");
351 PN.setIncomingBlock(i, &OldPH);
352 }
353 }
354}
355
356/// Rewrite the PHI nodes in the loop exit basic block and the split off
357/// unswitched block.
358///
359/// Because the exit block remains an exit from the loop, this rewrites the
360/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
361/// nodes into the unswitched basic block to select between the value in the
362/// old preheader and the loop exit.
363staticvoidrewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
364BasicBlock &UnswitchedBB,
365BasicBlock &OldExitingBB,
366BasicBlock &OldPH,
367bool FullUnswitch) {
368assert(&ExitBB != &UnswitchedBB &&
369"Must have different loop exit and unswitched blocks!");
370BasicBlock::iterator InsertPt = UnswitchedBB.begin();
371for (PHINode &PN : ExitBB.phis()) {
372auto *NewPN =PHINode::Create(PN.getType(),/*NumReservedValues*/ 2,
373 PN.getName() +".split");
374 NewPN->insertBefore(InsertPt);
375
376// Walk backwards over the old PHI node's inputs to minimize the cost of
377// removing each one. We have to do this weird loop manually so that we
378// create the same number of new incoming edges in the new PHI as we expect
379// each case-based edge to be included in the unswitched switch in some
380// cases.
381// FIXME: This is really, really gross. It would be much cleaner if LLVM
382// allowed us to create a single entry for a predecessor block without
383// having separate entries for each "edge" even though these edges are
384// required to produce identical results.
385for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
386if (PN.getIncomingBlock(i) != &OldExitingBB)
387continue;
388
389Value *Incoming = PN.getIncomingValue(i);
390if (FullUnswitch)
391// No more edge from the old exiting block to the exit block.
392 PN.removeIncomingValue(i);
393
394 NewPN->addIncoming(Incoming, &OldPH);
395 }
396
397// Now replace the old PHI with the new one and wire the old one in as an
398// input to the new one.
399 PN.replaceAllUsesWith(NewPN);
400 NewPN->addIncoming(&PN, &ExitBB);
401 }
402}
403
404/// Hoist the current loop up to the innermost loop containing a remaining exit.
405///
406/// Because we've removed an exit from the loop, we may have changed the set of
407/// loops reachable and need to move the current loop up the loop nest or even
408/// to an entirely separate nest.
409staticvoidhoistLoopToNewParent(Loop &L,BasicBlock &Preheader,
410DominatorTree &DT,LoopInfo &LI,
411MemorySSAUpdater *MSSAU,ScalarEvolution *SE) {
412// If the loop is already at the top level, we can't hoist it anywhere.
413Loop *OldParentL = L.getParentLoop();
414if (!OldParentL)
415return;
416
417SmallVector<BasicBlock *, 4> Exits;
418 L.getExitBlocks(Exits);
419Loop *NewParentL =nullptr;
420for (auto *ExitBB : Exits)
421if (Loop *ExitL = LI.getLoopFor(ExitBB))
422if (!NewParentL || NewParentL->contains(ExitL))
423 NewParentL = ExitL;
424
425if (NewParentL == OldParentL)
426return;
427
428// The new parent loop (if different) should always contain the old one.
429if (NewParentL)
430assert(NewParentL->contains(OldParentL) &&
431"Can only hoist this loop up the nest!");
432
433// The preheader will need to move with the body of this loop. However,
434// because it isn't in this loop we also need to update the primary loop map.
435assert(OldParentL == LI.getLoopFor(&Preheader) &&
436"Parent loop of this loop should contain this loop's preheader!");
437 LI.changeLoopFor(&Preheader, NewParentL);
438
439// Remove this loop from its old parent.
440 OldParentL->removeChildLoop(&L);
441
442// Add the loop either to the new parent or as a top-level loop.
443if (NewParentL)
444 NewParentL->addChildLoop(&L);
445else
446 LI.addTopLevelLoop(&L);
447
448// Remove this loops blocks from the old parent and every other loop up the
449// nest until reaching the new parent. Also update all of these
450// no-longer-containing loops to reflect the nesting change.
451for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
452 OldContainingL = OldContainingL->getParentLoop()) {
453llvm::erase_if(OldContainingL->getBlocksVector(),
454 [&](constBasicBlock *BB) {
455 return BB == &Preheader || L.contains(BB);
456 });
457
458 OldContainingL->getBlocksSet().erase(&Preheader);
459for (BasicBlock *BB : L.blocks())
460 OldContainingL->getBlocksSet().erase(BB);
461
462// Because we just hoisted a loop out of this one, we have essentially
463// created new exit paths from it. That means we need to form LCSSA PHI
464// nodes for values used in the no-longer-nested loop.
465formLCSSA(*OldContainingL, DT, &LI, SE);
466
467// We shouldn't need to form dedicated exits because the exit introduced
468// here is the (just split by unswitching) preheader. However, after trivial
469// unswitching it is possible to get new non-dedicated exits out of parent
470// loop so let's conservatively form dedicated exit blocks and figure out
471// if we can optimize later.
472formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
473/*PreserveLCSSA*/true);
474 }
475}
476
477// Return the top-most loop containing ExitBB and having ExitBB as exiting block
478// or the loop containing ExitBB, if there is no parent loop containing ExitBB
479// as exiting block.
480staticLoop *getTopMostExitingLoop(constBasicBlock *ExitBB,
481constLoopInfo &LI) {
482Loop *TopMost = LI.getLoopFor(ExitBB);
483Loop *Current = TopMost;
484while (Current) {
485if (Current->isLoopExiting(ExitBB))
486 TopMost = Current;
487 Current = Current->getParentLoop();
488 }
489return TopMost;
490}
491
492/// Unswitch a trivial branch if the condition is loop invariant.
493///
494/// This routine should only be called when loop code leading to the branch has
495/// been validated as trivial (no side effects). This routine checks if the
496/// condition is invariant and one of the successors is a loop exit. This
497/// allows us to unswitch without duplicating the loop, making it trivial.
498///
499/// If this routine fails to unswitch the branch it returns false.
500///
501/// If the branch can be unswitched, this routine splits the preheader and
502/// hoists the branch above that split. Preserves loop simplified form
503/// (splitting the exit block as necessary). It simplifies the branch within
504/// the loop to an unconditional branch but doesn't remove it entirely. Further
505/// cleanup can be done with some simplifycfg like pass.
506///
507/// If `SE` is not null, it will be updated based on the potential loop SCEVs
508/// invalidated by this.
509staticboolunswitchTrivialBranch(Loop &L,BranchInst &BI,DominatorTree &DT,
510LoopInfo &LI,ScalarEvolution *SE,
511MemorySSAUpdater *MSSAU) {
512assert(BI.isConditional() &&"Can only unswitch a conditional branch!");
513LLVM_DEBUG(dbgs() <<" Trying to unswitch branch: " << BI <<"\n");
514
515// The loop invariant values that we want to unswitch.
516TinyPtrVector<Value *> Invariants;
517
518// When true, we're fully unswitching the branch rather than just unswitching
519// some input conditions to the branch.
520bool FullUnswitch =false;
521
522Value *Cond =skipTrivialSelect(BI.getCondition());
523if (L.isLoopInvariant(Cond)) {
524 Invariants.push_back(Cond);
525 FullUnswitch =true;
526 }else {
527if (auto *CondInst = dyn_cast<Instruction>(Cond))
528 Invariants =collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
529if (Invariants.empty()) {
530LLVM_DEBUG(dbgs() <<" Couldn't find invariant inputs!\n");
531returnfalse;
532 }
533 }
534
535// Check that one of the branch's successors exits, and which one.
536bool ExitDirection =true;
537int LoopExitSuccIdx = 0;
538auto *LoopExitBB = BI.getSuccessor(0);
539if (L.contains(LoopExitBB)) {
540 ExitDirection =false;
541 LoopExitSuccIdx = 1;
542 LoopExitBB = BI.getSuccessor(1);
543if (L.contains(LoopExitBB)) {
544LLVM_DEBUG(dbgs() <<" Branch doesn't exit the loop!\n");
545returnfalse;
546 }
547 }
548auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
549auto *ParentBB = BI.getParent();
550if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
551LLVM_DEBUG(dbgs() <<" Loop exit PHI's aren't loop-invariant!\n");
552returnfalse;
553 }
554
555// When unswitching only part of the branch's condition, we need the exit
556// block to be reached directly from the partially unswitched input. This can
557// be done when the exit block is along the true edge and the branch condition
558// is a graph of `or` operations, or the exit block is along the false edge
559// and the condition is a graph of `and` operations.
560if (!FullUnswitch) {
561if (ExitDirection ? !match(Cond,m_LogicalOr())
562 : !match(Cond,m_LogicalAnd())) {
563LLVM_DEBUG(dbgs() <<" Branch condition is in improper form for "
564"non-full unswitch!\n");
565returnfalse;
566 }
567 }
568
569LLVM_DEBUG({
570dbgs() <<" unswitching trivial invariant conditions for: " << BI
571 <<"\n";
572for (Value *Invariant : Invariants) {
573dbgs() <<" " << *Invariant <<" == true";
574if (Invariant != Invariants.back())
575dbgs() <<" ||";
576dbgs() <<"\n";
577 }
578 });
579
580// If we have scalar evolutions, we need to invalidate them including this
581// loop, the loop containing the exit block and the topmost parent loop
582// exiting via LoopExitBB.
583if (SE) {
584if (constLoop *ExitL =getTopMostExitingLoop(LoopExitBB, LI))
585 SE->forgetLoop(ExitL);
586else
587// Forget the entire nest as this exits the entire nest.
588 SE->forgetTopmostLoop(&L);
589 SE->forgetBlockAndLoopDispositions();
590 }
591
592if (MSSAU &&VerifyMemorySSA)
593 MSSAU->getMemorySSA()->verifyMemorySSA();
594
595// Split the preheader, so that we know that there is a safe place to insert
596// the conditional branch. We will change the preheader to have a conditional
597// branch on LoopCond.
598BasicBlock *OldPH = L.getLoopPreheader();
599BasicBlock *NewPH =SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
600
601// Now that we have a place to insert the conditional branch, create a place
602// to branch to: this is the exit block out of the loop that we are
603// unswitching. We need to split this if there are other loop predecessors.
604// Because the loop is in simplified form, *any* other predecessor is enough.
605BasicBlock *UnswitchedBB;
606if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
607assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
608"A branch's parent isn't a predecessor!");
609 UnswitchedBB = LoopExitBB;
610 }else {
611 UnswitchedBB =
612SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU,"",false);
613 }
614
615if (MSSAU &&VerifyMemorySSA)
616 MSSAU->getMemorySSA()->verifyMemorySSA();
617
618// Actually move the invariant uses into the unswitched position. If possible,
619// we do this by moving the instructions, but when doing partial unswitching
620// we do it by building a new merge of the values in the unswitched position.
621 OldPH->getTerminator()->eraseFromParent();
622if (FullUnswitch) {
623// If fully unswitching, we can use the existing branch instruction.
624// Splice it into the old PH to gate reaching the new preheader and re-point
625// its successors.
626 BI.moveBefore(*OldPH, OldPH->end());
627 BI.setCondition(Cond);
628if (MSSAU) {
629// Temporarily clone the terminator, to make MSSA update cheaper by
630// separating "insert edge" updates from "remove edge" ones.
631 BI.clone()->insertInto(ParentBB, ParentBB->end());
632 }else {
633// Create a new unconditional branch that will continue the loop as a new
634// terminator.
635Instruction *NewBI =BranchInst::Create(ContinueBB, ParentBB);
636 NewBI->setDebugLoc(BI.getDebugLoc());
637 }
638 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
639 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
640 }else {
641// Only unswitching a subset of inputs to the condition, so we will need to
642// build a new branch that merges the invariant inputs.
643if (ExitDirection)
644assert(match(skipTrivialSelect(BI.getCondition()),m_LogicalOr()) &&
645"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
646"condition!");
647else
648assert(match(skipTrivialSelect(BI.getCondition()),m_LogicalAnd()) &&
649"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
650" condition!");
651buildPartialUnswitchConditionalBranch(
652 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
653FreezeLoopUnswitchCond, OldPH->getTerminator(),nullptr, DT);
654 }
655
656// Update the dominator tree with the added edge.
657 DT.insertEdge(OldPH, UnswitchedBB);
658
659// After the dominator tree was updated with the added edge, update MemorySSA
660// if available.
661if (MSSAU) {
662SmallVector<CFGUpdate, 1> Updates;
663 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
664 MSSAU->applyInsertUpdates(Updates, DT);
665 }
666
667// Finish updating dominator tree and memory ssa for full unswitch.
668if (FullUnswitch) {
669if (MSSAU) {
670Instruction *Term = ParentBB->getTerminator();
671// Remove the cloned branch instruction and create unconditional branch
672// now.
673Instruction *NewBI =BranchInst::Create(ContinueBB, ParentBB);
674 NewBI->setDebugLoc(Term->getDebugLoc());
675 Term->eraseFromParent();
676 MSSAU->removeEdge(ParentBB, LoopExitBB);
677 }
678 DT.deleteEdge(ParentBB, LoopExitBB);
679 }
680
681if (MSSAU &&VerifyMemorySSA)
682 MSSAU->getMemorySSA()->verifyMemorySSA();
683
684// Rewrite the relevant PHI nodes.
685if (UnswitchedBB == LoopExitBB)
686rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
687else
688rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
689 *ParentBB, *OldPH, FullUnswitch);
690
691// The constant we can replace all of our invariants with inside the loop
692// body. If any of the invariants have a value other than this the loop won't
693// be entered.
694ConstantInt *Replacement = ExitDirection
695 ?ConstantInt::getFalse(BI.getContext())
696 :ConstantInt::getTrue(BI.getContext());
697
698// Since this is an i1 condition we can also trivially replace uses of it
699// within the loop with a constant.
700for (Value *Invariant : Invariants)
701replaceLoopInvariantUses(L, Invariant, *Replacement);
702
703// If this was full unswitching, we may have changed the nesting relationship
704// for this loop so hoist it to its correct parent if needed.
705if (FullUnswitch)
706hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
707
708if (MSSAU &&VerifyMemorySSA)
709 MSSAU->getMemorySSA()->verifyMemorySSA();
710
711LLVM_DEBUG(dbgs() <<" done: unswitching trivial branch...\n");
712 ++NumTrivial;
713 ++NumBranches;
714returntrue;
715}
716
717/// Unswitch a trivial switch if the condition is loop invariant.
718///
719/// This routine should only be called when loop code leading to the switch has
720/// been validated as trivial (no side effects). This routine checks if the
721/// condition is invariant and that at least one of the successors is a loop
722/// exit. This allows us to unswitch without duplicating the loop, making it
723/// trivial.
724///
725/// If this routine fails to unswitch the switch it returns false.
726///
727/// If the switch can be unswitched, this routine splits the preheader and
728/// copies the switch above that split. If the default case is one of the
729/// exiting cases, it copies the non-exiting cases and points them at the new
730/// preheader. If the default case is not exiting, it copies the exiting cases
731/// and points the default at the preheader. It preserves loop simplified form
732/// (splitting the exit blocks as necessary). It simplifies the switch within
733/// the loop by removing now-dead cases. If the default case is one of those
734/// unswitched, it replaces its destination with a new basic block containing
735/// only unreachable. Such basic blocks, while technically loop exits, are not
736/// considered for unswitching so this is a stable transform and the same
737/// switch will not be revisited. If after unswitching there is only a single
738/// in-loop successor, the switch is further simplified to an unconditional
739/// branch. Still more cleanup can be done with some simplifycfg like pass.
740///
741/// If `SE` is not null, it will be updated based on the potential loop SCEVs
742/// invalidated by this.
743staticboolunswitchTrivialSwitch(Loop &L,SwitchInst &SI,DominatorTree &DT,
744LoopInfo &LI,ScalarEvolution *SE,
745MemorySSAUpdater *MSSAU) {
746LLVM_DEBUG(dbgs() <<" Trying to unswitch switch: " << SI <<"\n");
747Value *LoopCond = SI.getCondition();
748
749// If this isn't switching on an invariant condition, we can't unswitch it.
750if (!L.isLoopInvariant(LoopCond))
751returnfalse;
752
753auto *ParentBB = SI.getParent();
754
755// The same check must be used both for the default and the exit cases. We
756// should never leave edges from the switch instruction to a basic block that
757// we are unswitching, hence the condition used to determine the default case
758// needs to also be used to populate ExitCaseIndices, which is then used to
759// remove cases from the switch.
760auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
761// BBToCheck is not an exit block if it is inside loop L.
762if (L.contains(&BBToCheck))
763returnfalse;
764// BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
765if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
766returnfalse;
767// We do not unswitch a block that only has an unreachable statement, as
768// it's possible this is a previously unswitched block. Only unswitch if
769// either the terminator is not unreachable, or, if it is, it's not the only
770// instruction in the block.
771auto *TI = BBToCheck.getTerminator();
772bool isUnreachable = isa<UnreachableInst>(TI);
773return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
774 };
775
776SmallVector<int, 4> ExitCaseIndices;
777for (auto Case : SI.cases())
778if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
779 ExitCaseIndices.push_back(Case.getCaseIndex());
780BasicBlock *DefaultExitBB =nullptr;
781SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
782SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
783if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
784 DefaultExitBB = SI.getDefaultDest();
785 }elseif (ExitCaseIndices.empty())
786returnfalse;
787
788LLVM_DEBUG(dbgs() <<" unswitching trivial switch...\n");
789
790if (MSSAU &&VerifyMemorySSA)
791 MSSAU->getMemorySSA()->verifyMemorySSA();
792
793// We may need to invalidate SCEVs for the outermost loop reached by any of
794// the exits.
795Loop *OuterL = &L;
796
797if (DefaultExitBB) {
798// Check the loop containing this exit.
799Loop *ExitL =getTopMostExitingLoop(DefaultExitBB, LI);
800if (!ExitL || ExitL->contains(OuterL))
801 OuterL = ExitL;
802 }
803for (unsigned Index : ExitCaseIndices) {
804auto CaseI = SI.case_begin() + Index;
805// Compute the outer loop from this exit.
806Loop *ExitL =getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
807if (!ExitL || ExitL->contains(OuterL))
808 OuterL = ExitL;
809 }
810
811if (SE) {
812if (OuterL)
813 SE->forgetLoop(OuterL);
814else
815 SE->forgetTopmostLoop(&L);
816 }
817
818if (DefaultExitBB) {
819// Clear out the default destination temporarily to allow accurate
820// predecessor lists to be examined below.
821 SI.setDefaultDest(nullptr);
822 }
823
824// Store the exit cases into a separate data structure and remove them from
825// the switch.
826SmallVector<std::tuple<ConstantInt *,BasicBlock *,
827SwitchInstProfUpdateWrapper::CaseWeightOpt>,
828 4> ExitCases;
829 ExitCases.reserve(ExitCaseIndices.size());
830SwitchInstProfUpdateWrapper SIW(SI);
831// We walk the case indices backwards so that we remove the last case first
832// and don't disrupt the earlier indices.
833for (unsigned Index :reverse(ExitCaseIndices)) {
834auto CaseI = SI.case_begin() + Index;
835// Save the value of this case.
836auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
837 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
838// Delete the unswitched cases.
839 SIW.removeCase(CaseI);
840 }
841
842// Check if after this all of the remaining cases point at the same
843// successor.
844BasicBlock *CommonSuccBB =nullptr;
845if (SI.getNumCases() > 0 &&
846all_of(drop_begin(SI.cases()), [&SI](constSwitchInst::CaseHandle &Case) {
847 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
848 }))
849 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
850if (!DefaultExitBB) {
851// If we're not unswitching the default, we need it to match any cases to
852// have a common successor or if we have no cases it is the common
853// successor.
854if (SI.getNumCases() == 0)
855 CommonSuccBB = SI.getDefaultDest();
856elseif (SI.getDefaultDest() != CommonSuccBB)
857 CommonSuccBB =nullptr;
858 }
859
860// Split the preheader, so that we know that there is a safe place to insert
861// the switch.
862BasicBlock *OldPH = L.getLoopPreheader();
863BasicBlock *NewPH =SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
864 OldPH->getTerminator()->eraseFromParent();
865
866// Now add the unswitched switch. This new switch instruction inherits the
867// debug location of the old switch, because it semantically replace the old
868// one.
869auto *NewSI =SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
870 NewSI->setDebugLoc(SIW->getDebugLoc());
871SwitchInstProfUpdateWrapper NewSIW(*NewSI);
872
873// Rewrite the IR for the unswitched basic blocks. This requires two steps.
874// First, we split any exit blocks with remaining in-loop predecessors. Then
875// we update the PHIs in one of two ways depending on if there was a split.
876// We walk in reverse so that we split in the same order as the cases
877// appeared. This is purely for convenience of reading the resulting IR, but
878// it doesn't cost anything really.
879SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
880SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
881// Handle the default exit if necessary.
882// FIXME: It'd be great if we could merge this with the loop below but LLVM's
883// ranges aren't quite powerful enough yet.
884if (DefaultExitBB) {
885if (pred_empty(DefaultExitBB)) {
886 UnswitchedExitBBs.insert(DefaultExitBB);
887rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
888 }else {
889auto *SplitBB =
890SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
891rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
892 *ParentBB, *OldPH,
893/*FullUnswitch*/true);
894 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
895 }
896 }
897// Note that we must use a reference in the for loop so that we update the
898// container.
899for (auto &ExitCase :reverse(ExitCases)) {
900// Grab a reference to the exit block in the pair so that we can update it.
901BasicBlock *ExitBB = std::get<1>(ExitCase);
902
903// If this case is the last edge into the exit block, we can simply reuse it
904// as it will no longer be a loop exit. No mapping necessary.
905if (pred_empty(ExitBB)) {
906// Only rewrite once.
907if (UnswitchedExitBBs.insert(ExitBB).second)
908rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
909continue;
910 }
911
912// Otherwise we need to split the exit block so that we retain an exit
913// block from the loop and a target for the unswitched condition.
914BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
915if (!SplitExitBB) {
916// If this is the first time we see this, do the split and remember it.
917 SplitExitBB =SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
918rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
919 *ParentBB, *OldPH,
920/*FullUnswitch*/true);
921 }
922// Update the case pair to point to the split block.
923 std::get<1>(ExitCase) = SplitExitBB;
924 }
925
926// Now add the unswitched cases. We do this in reverse order as we built them
927// in reverse order.
928for (auto &ExitCase :reverse(ExitCases)) {
929ConstantInt *CaseVal = std::get<0>(ExitCase);
930BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
931
932 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
933 }
934
935// If the default was unswitched, re-point it and add explicit cases for
936// entering the loop.
937if (DefaultExitBB) {
938 NewSIW->setDefaultDest(DefaultExitBB);
939 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
940
941// We removed all the exit cases, so we just copy the cases to the
942// unswitched switch.
943for (constauto &Case : SI.cases())
944 NewSIW.addCase(Case.getCaseValue(), NewPH,
945 SIW.getSuccessorWeight(Case.getSuccessorIndex()));
946 }elseif (DefaultCaseWeight) {
947// We have to set branch weight of the default case.
948uint64_t SW = *DefaultCaseWeight;
949for (constauto &Case : SI.cases()) {
950auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
951assert(W &&
952"case weight must be defined as default case weight is defined");
953 SW += *W;
954 }
955 NewSIW.setSuccessorWeight(0, SW);
956 }
957
958// If we ended up with a common successor for every path through the switch
959// after unswitching, rewrite it to an unconditional branch to make it easy
960// to recognize. Otherwise we potentially have to recognize the default case
961// pointing at unreachable and other complexity.
962if (CommonSuccBB) {
963BasicBlock *BB = SI.getParent();
964// We may have had multiple edges to this common successor block, so remove
965// them as predecessors. We skip the first one, either the default or the
966// actual first case.
967bool SkippedFirst = DefaultExitBB ==nullptr;
968for (auto Case : SI.cases()) {
969assert(Case.getCaseSuccessor() == CommonSuccBB &&
970"Non-common successor!");
971 (void)Case;
972if (!SkippedFirst) {
973 SkippedFirst =true;
974continue;
975 }
976 CommonSuccBB->removePredecessor(BB,
977/*KeepOneInputPHIs*/true);
978 }
979// Now nuke the switch and replace it with a direct branch.
980Instruction *NewBI =BranchInst::Create(CommonSuccBB, BB);
981 NewBI->setDebugLoc(SIW->getDebugLoc());
982 SIW.eraseFromParent();
983 }elseif (DefaultExitBB) {
984assert(SI.getNumCases() > 0 &&
985"If we had no cases we'd have a common successor!");
986// Move the last case to the default successor. This is valid as if the
987// default got unswitched it cannot be reached. This has the advantage of
988// being simple and keeping the number of edges from this switch to
989// successors the same, and avoiding any PHI update complexity.
990auto LastCaseI = std::prev(SI.case_end());
991
992 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
993 SIW.setSuccessorWeight(
994 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
995 SIW.removeCase(LastCaseI);
996 }
997
998// Walk the unswitched exit blocks and the unswitched split blocks and update
999// the dominator tree based on the CFG edits. While we are walking unordered
1000// containers here, the API for applyUpdates takes an unordered list of
1001// updates and requires them to not contain duplicates.
1002SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
1003for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
1004 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1005 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1006 }
1007for (auto SplitUnswitchedPair : SplitExitBBMap) {
1008 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1009 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1010 }
1011
1012if (MSSAU) {
1013 MSSAU->applyUpdates(DTUpdates, DT,/*UpdateDT=*/true);
1014if (VerifyMemorySSA)
1015 MSSAU->getMemorySSA()->verifyMemorySSA();
1016 }else {
1017 DT.applyUpdates(DTUpdates);
1018 }
1019
1020assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1021
1022// We may have changed the nesting relationship for this loop so hoist it to
1023// its correct parent if needed.
1024hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1025
1026if (MSSAU &&VerifyMemorySSA)
1027 MSSAU->getMemorySSA()->verifyMemorySSA();
1028
1029 ++NumTrivial;
1030 ++NumSwitches;
1031LLVM_DEBUG(dbgs() <<" done: unswitching trivial switch...\n");
1032returntrue;
1033}
1034
1035/// This routine scans the loop to find a branch or switch which occurs before
1036/// any side effects occur. These can potentially be unswitched without
1037/// duplicating the loop. If a branch or switch is successfully unswitched the
1038/// scanning continues to see if subsequent branches or switches have become
1039/// trivial. Once all trivial candidates have been unswitched, this routine
1040/// returns.
1041///
1042/// The return value indicates whether anything was unswitched (and therefore
1043/// changed).
1044///
1045/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1046/// invalidated by this.
1047staticboolunswitchAllTrivialConditions(Loop &L,DominatorTree &DT,
1048LoopInfo &LI,ScalarEvolution *SE,
1049MemorySSAUpdater *MSSAU) {
1050bool Changed =false;
1051
1052// If loop header has only one reachable successor we should keep looking for
1053// trivial condition candidates in the successor as well. An alternative is
1054// to constant fold conditions and merge successors into loop header (then we
1055// only need to check header's terminator). The reason for not doing this in
1056// LoopUnswitch pass is that it could potentially break LoopPassManager's
1057// invariants. Folding dead branches could either eliminate the current loop
1058// or make other loops unreachable. LCSSA form might also not be preserved
1059// after deleting branches. The following code keeps traversing loop header's
1060// successors until it finds the trivial condition candidate (condition that
1061// is not a constant). Since unswitching generates branches with constant
1062// conditions, this scenario could be very common in practice.
1063BasicBlock *CurrentBB = L.getHeader();
1064SmallPtrSet<BasicBlock *, 8> Visited;
1065 Visited.insert(CurrentBB);
1066do {
1067// Check if there are any side-effecting instructions (e.g. stores, calls,
1068// volatile loads) in the part of the loop that the code *would* execute
1069// without unswitching.
1070if (MSSAU)// Possible early exit with MSSA
1071if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1072if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1073return Changed;
1074if (llvm::any_of(*CurrentBB,
1075 [](Instruction &I) {returnI.mayHaveSideEffects(); }))
1076return Changed;
1077
1078Instruction *CurrentTerm = CurrentBB->getTerminator();
1079
1080if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1081// Don't bother trying to unswitch past a switch with a constant
1082// condition. This should be removed prior to running this pass by
1083// simplifycfg.
1084if (isa<Constant>(SI->getCondition()))
1085return Changed;
1086
1087if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1088// Couldn't unswitch this one so we're done.
1089return Changed;
1090
1091// Mark that we managed to unswitch something.
1092 Changed =true;
1093
1094// If unswitching turned the terminator into an unconditional branch then
1095// we can continue. The unswitching logic specifically works to fold any
1096// cases it can into an unconditional branch to make it easier to
1097// recognize here.
1098auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1099if (!BI || BI->isConditional())
1100return Changed;
1101
1102 CurrentBB = BI->getSuccessor(0);
1103continue;
1104 }
1105
1106auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1107if (!BI)
1108// We do not understand other terminator instructions.
1109return Changed;
1110
1111// Don't bother trying to unswitch past an unconditional branch or a branch
1112// with a constant value. These should be removed by simplifycfg prior to
1113// running this pass.
1114if (!BI->isConditional() ||
1115 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1116return Changed;
1117
1118// Found a trivial condition candidate: non-foldable conditional branch. If
1119// we fail to unswitch this, we can't do anything else that is trivial.
1120if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1121return Changed;
1122
1123// Mark that we managed to unswitch something.
1124 Changed =true;
1125
1126// If we only unswitched some of the conditions feeding the branch, we won't
1127// have collapsed it to a single successor.
1128 BI = cast<BranchInst>(CurrentBB->getTerminator());
1129if (BI->isConditional())
1130return Changed;
1131
1132// Follow the newly unconditional branch into its successor.
1133 CurrentBB = BI->getSuccessor(0);
1134
1135// When continuing, if we exit the loop or reach a previous visited block,
1136// then we can not reach any trivial condition candidates (unfoldable
1137// branch instructions or switch instructions) and no unswitch can happen.
1138 }while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1139
1140return Changed;
1141}
1142
1143/// Build the cloned blocks for an unswitched copy of the given loop.
1144///
1145/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1146/// after the split block (`SplitBB`) that will be used to select between the
1147/// cloned and original loop.
1148///
1149/// This routine handles cloning all of the necessary loop blocks and exit
1150/// blocks including rewriting their instructions and the relevant PHI nodes.
1151/// Any loop blocks or exit blocks which are dominated by a different successor
1152/// than the one for this clone of the loop blocks can be trivially skipped. We
1153/// use the `DominatingSucc` map to determine whether a block satisfies that
1154/// property with a simple map lookup.
1155///
1156/// It also correctly creates the unconditional branch in the cloned
1157/// unswitched parent block to only point at the unswitched successor.
1158///
1159/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1160/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1161/// the cloned blocks (and their loops) are left without full `LoopInfo`
1162/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1163/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1164/// instead the caller must recompute an accurate DT. It *does* correctly
1165/// update the `AssumptionCache` provided in `AC`.
1166staticBasicBlock *buildClonedLoopBlocks(
1167Loop &L,BasicBlock *LoopPH,BasicBlock *SplitBB,
1168ArrayRef<BasicBlock *> ExitBlocks,BasicBlock *ParentBB,
1169BasicBlock *UnswitchedSuccBB,BasicBlock *ContinueSuccBB,
1170constSmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1171ValueToValueMapTy &VMap,
1172SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates,AssumptionCache &AC,
1173DominatorTree &DT,LoopInfo &LI,MemorySSAUpdater *MSSAU,
1174ScalarEvolution *SE) {
1175SmallVector<BasicBlock *, 4> NewBlocks;
1176 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1177
1178// We will need to clone a bunch of blocks, wrap up the clone operation in
1179// a helper.
1180auto CloneBlock = [&](BasicBlock *OldBB) {
1181// Clone the basic block and insert it before the new preheader.
1182BasicBlock *NewBB =CloneBasicBlock(OldBB, VMap,".us", OldBB->getParent());
1183 NewBB->moveBefore(LoopPH);
1184
1185// Record this block and the mapping.
1186 NewBlocks.push_back(NewBB);
1187 VMap[OldBB] = NewBB;
1188
1189return NewBB;
1190 };
1191
1192// We skip cloning blocks when they have a dominating succ that is not the
1193// succ we are cloning for.
1194auto SkipBlock = [&](BasicBlock *BB) {
1195auto It = DominatingSucc.find(BB);
1196return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1197 };
1198
1199// First, clone the preheader.
1200auto *ClonedPH = CloneBlock(LoopPH);
1201
1202// Then clone all the loop blocks, skipping the ones that aren't necessary.
1203for (auto *LoopBB : L.blocks())
1204if (!SkipBlock(LoopBB))
1205 CloneBlock(LoopBB);
1206
1207// Split all the loop exit edges so that when we clone the exit blocks, if
1208// any of the exit blocks are *also* a preheader for some other loop, we
1209// don't create multiple predecessors entering the loop header.
1210for (auto *ExitBB : ExitBlocks) {
1211if (SkipBlock(ExitBB))
1212continue;
1213
1214// When we are going to clone an exit, we don't need to clone all the
1215// instructions in the exit block and we want to ensure we have an easy
1216// place to merge the CFG, so split the exit first. This is always safe to
1217// do because there cannot be any non-loop predecessors of a loop exit in
1218// loop simplified form.
1219auto *MergeBB =SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1220
1221// Rearrange the names to make it easier to write test cases by having the
1222// exit block carry the suffix rather than the merge block carrying the
1223// suffix.
1224 MergeBB->takeName(ExitBB);
1225 ExitBB->setName(Twine(MergeBB->getName()) +".split");
1226
1227// Now clone the original exit block.
1228auto *ClonedExitBB = CloneBlock(ExitBB);
1229assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1230"Exit block should have been split to have one successor!");
1231assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1232"Cloned exit block has the wrong successor!");
1233
1234// Remap any cloned instructions and create a merge phi node for them.
1235for (auto ZippedInsts :llvm::zip_first(
1236llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1237llvm::make_range(ClonedExitBB->begin(),
1238 std::prev(ClonedExitBB->end())))) {
1239Instruction &I = std::get<0>(ZippedInsts);
1240Instruction &ClonedI = std::get<1>(ZippedInsts);
1241
1242// The only instructions in the exit block should be PHI nodes and
1243// potentially a landing pad.
1244assert(
1245 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1246"Bad instruction in exit block!");
1247// We should have a value map between the instruction and its clone.
1248assert(VMap.lookup(&I) == &ClonedI &&"Mismatch in the value map!");
1249
1250// Forget SCEVs based on exit phis in case SCEV looked through the phi.
1251if (SE)
1252if (auto *PN = dyn_cast<PHINode>(&I))
1253 SE->forgetLcssaPhiWithNewPredecessor(&L, PN);
1254
1255BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
1256
1257auto *MergePN =
1258PHINode::Create(I.getType(),/*NumReservedValues*/ 2,".us-phi");
1259 MergePN->insertBefore(InsertPt);
1260 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1261I.replaceAllUsesWith(MergePN);
1262 MergePN->addIncoming(&I, ExitBB);
1263 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1264 }
1265 }
1266
1267// Rewrite the instructions in the cloned blocks to refer to the instructions
1268// in the cloned blocks. We have to do this as a second pass so that we have
1269// everything available. Also, we have inserted new instructions which may
1270// include assume intrinsics, so we update the assumption cache while
1271// processing this.
1272Module *M = ClonedPH->getParent()->getParent();
1273for (auto *ClonedBB : NewBlocks)
1274for (Instruction &I : *ClonedBB) {
1275RemapDbgRecordRange(M,I.getDbgRecordRange(), VMap,
1276RF_NoModuleLevelChanges |RF_IgnoreMissingLocals);
1277RemapInstruction(&I, VMap,
1278RF_NoModuleLevelChanges |RF_IgnoreMissingLocals);
1279if (auto *II = dyn_cast<AssumeInst>(&I))
1280 AC.registerAssumption(II);
1281 }
1282
1283// Update any PHI nodes in the cloned successors of the skipped blocks to not
1284// have spurious incoming values.
1285for (auto *LoopBB : L.blocks())
1286if (SkipBlock(LoopBB))
1287for (auto *SuccBB :successors(LoopBB))
1288if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1289for (PHINode &PN : ClonedSuccBB->phis())
1290 PN.removeIncomingValue(LoopBB,/*DeletePHIIfEmpty*/false);
1291
1292// Remove the cloned parent as a predecessor of any successor we ended up
1293// cloning other than the unswitched one.
1294auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1295for (auto *SuccBB :successors(ParentBB)) {
1296if (SuccBB == UnswitchedSuccBB)
1297continue;
1298
1299auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1300if (!ClonedSuccBB)
1301continue;
1302
1303 ClonedSuccBB->removePredecessor(ClonedParentBB,
1304/*KeepOneInputPHIs*/true);
1305 }
1306
1307// Replace the cloned branch with an unconditional branch to the cloned
1308// unswitched successor.
1309auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1310Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1311// Trivial Simplification. If Terminator is a conditional branch and
1312// condition becomes dead - erase it.
1313Value *ClonedConditionToErase =nullptr;
1314if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1315 ClonedConditionToErase = BI->getCondition();
1316elseif (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1317 ClonedConditionToErase = SI->getCondition();
1318
1319Instruction *BI =BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1320 BI->setDebugLoc(ClonedTerminator->getDebugLoc());
1321 ClonedTerminator->eraseFromParent();
1322
1323if (ClonedConditionToErase)
1324RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase,nullptr,
1325 MSSAU);
1326
1327// If there are duplicate entries in the PHI nodes because of multiple edges
1328// to the unswitched successor, we need to nuke all but one as we replaced it
1329// with a direct branch.
1330for (PHINode &PN : ClonedSuccBB->phis()) {
1331bool Found =false;
1332// Loop over the incoming operands backwards so we can easily delete as we
1333// go without invalidating the index.
1334for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1335if (PN.getIncomingBlock(i) != ClonedParentBB)
1336continue;
1337if (!Found) {
1338 Found =true;
1339continue;
1340 }
1341 PN.removeIncomingValue(i,/*DeletePHIIfEmpty*/false);
1342 }
1343 }
1344
1345// Record the domtree updates for the new blocks.
1346SmallPtrSet<BasicBlock *, 4> SuccSet;
1347for (auto *ClonedBB : NewBlocks) {
1348for (auto *SuccBB :successors(ClonedBB))
1349if (SuccSet.insert(SuccBB).second)
1350 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1351 SuccSet.clear();
1352 }
1353
1354return ClonedPH;
1355}
1356
1357/// Recursively clone the specified loop and all of its children.
1358///
1359/// The target parent loop for the clone should be provided, or can be null if
1360/// the clone is a top-level loop. While cloning, all the blocks are mapped
1361/// with the provided value map. The entire original loop must be present in
1362/// the value map. The cloned loop is returned.
1363staticLoop *cloneLoopNest(Loop &OrigRootL,Loop *RootParentL,
1364constValueToValueMapTy &VMap,LoopInfo &LI) {
1365auto AddClonedBlocksToLoop = [&](Loop &OrigL,Loop &ClonedL) {
1366assert(ClonedL.getBlocks().empty() &&"Must start with an empty loop!");
1367 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1368for (auto *BB : OrigL.blocks()) {
1369auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1370 ClonedL.addBlockEntry(ClonedBB);
1371if (LI.getLoopFor(BB) == &OrigL)
1372 LI.changeLoopFor(ClonedBB, &ClonedL);
1373 }
1374 };
1375
1376// We specially handle the first loop because it may get cloned into
1377// a different parent and because we most commonly are cloning leaf loops.
1378Loop *ClonedRootL = LI.AllocateLoop();
1379if (RootParentL)
1380 RootParentL->addChildLoop(ClonedRootL);
1381else
1382 LI.addTopLevelLoop(ClonedRootL);
1383 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1384
1385if (OrigRootL.isInnermost())
1386return ClonedRootL;
1387
1388// If we have a nest, we can quickly clone the entire loop nest using an
1389// iterative approach because it is a tree. We keep the cloned parent in the
1390// data structure to avoid repeatedly querying through a map to find it.
1391SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1392// Build up the loops to clone in reverse order as we'll clone them from the
1393// back.
1394for (Loop *ChildL :llvm::reverse(OrigRootL))
1395 LoopsToClone.push_back({ClonedRootL, ChildL});
1396do {
1397Loop *ClonedParentL, *L;
1398 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1399Loop *ClonedL = LI.AllocateLoop();
1400 ClonedParentL->addChildLoop(ClonedL);
1401 AddClonedBlocksToLoop(*L, *ClonedL);
1402for (Loop *ChildL :llvm::reverse(*L))
1403 LoopsToClone.push_back({ClonedL, ChildL});
1404 }while (!LoopsToClone.empty());
1405
1406return ClonedRootL;
1407}
1408
1409/// Build the cloned loops of an original loop from unswitching.
1410///
1411/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1412/// operation. We need to re-verify that there even is a loop (as the backedge
1413/// may not have been cloned), and even if there are remaining backedges the
1414/// backedge set may be different. However, we know that each child loop is
1415/// undisturbed, we only need to find where to place each child loop within
1416/// either any parent loop or within a cloned version of the original loop.
1417///
1418/// Because child loops may end up cloned outside of any cloned version of the
1419/// original loop, multiple cloned sibling loops may be created. All of them
1420/// are returned so that the newly introduced loop nest roots can be
1421/// identified.
1422staticvoidbuildClonedLoops(Loop &OrigL,ArrayRef<BasicBlock *> ExitBlocks,
1423constValueToValueMapTy &VMap,LoopInfo &LI,
1424SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1425Loop *ClonedL =nullptr;
1426
1427auto *OrigPH = OrigL.getLoopPreheader();
1428auto *OrigHeader = OrigL.getHeader();
1429
1430auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1431auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1432
1433// We need to know the loops of the cloned exit blocks to even compute the
1434// accurate parent loop. If we only clone exits to some parent of the
1435// original parent, we want to clone into that outer loop. We also keep track
1436// of the loops that our cloned exit blocks participate in.
1437Loop *ParentL =nullptr;
1438SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1439SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
1440 ClonedExitsInLoops.reserve(ExitBlocks.size());
1441for (auto *ExitBB : ExitBlocks)
1442if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1443if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1444 ExitLoopMap[ClonedExitBB] = ExitL;
1445 ClonedExitsInLoops.push_back(ClonedExitBB);
1446if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1447 ParentL = ExitL;
1448 }
1449assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1450 ParentL->contains(OrigL.getParentLoop())) &&
1451"The computed parent loop should always contain (or be) the parent of "
1452"the original loop.");
1453
1454// We build the set of blocks dominated by the cloned header from the set of
1455// cloned blocks out of the original loop. While not all of these will
1456// necessarily be in the cloned loop, it is enough to establish that they
1457// aren't in unreachable cycles, etc.
1458SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1459for (auto *BB : OrigL.blocks())
1460if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1461 ClonedLoopBlocks.insert(ClonedBB);
1462
1463// Rebuild the set of blocks that will end up in the cloned loop. We may have
1464// skipped cloning some region of this loop which can in turn skip some of
1465// the backedges so we have to rebuild the blocks in the loop based on the
1466// backedges that remain after cloning.
1467SmallVector<BasicBlock *, 16> Worklist;
1468SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1469for (auto *Pred :predecessors(ClonedHeader)) {
1470// The only possible non-loop header predecessor is the preheader because
1471// we know we cloned the loop in simplified form.
1472if (Pred == ClonedPH)
1473continue;
1474
1475// Because the loop was in simplified form, the only non-loop predecessor
1476// should be the preheader.
1477assert(ClonedLoopBlocks.count(Pred) &&"Found a predecessor of the loop "
1478"header other than the preheader "
1479"that is not part of the loop!");
1480
1481// Insert this block into the loop set and on the first visit (and if it
1482// isn't the header we're currently walking) put it into the worklist to
1483// recurse through.
1484if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1485 Worklist.push_back(Pred);
1486 }
1487
1488// If we had any backedges then there *is* a cloned loop. Put the header into
1489// the loop set and then walk the worklist backwards to find all the blocks
1490// that remain within the loop after cloning.
1491if (!BlocksInClonedLoop.empty()) {
1492 BlocksInClonedLoop.insert(ClonedHeader);
1493
1494while (!Worklist.empty()) {
1495BasicBlock *BB = Worklist.pop_back_val();
1496assert(BlocksInClonedLoop.count(BB) &&
1497"Didn't put block into the loop set!");
1498
1499// Insert any predecessors that are in the possible set into the cloned
1500// set, and if the insert is successful, add them to the worklist. Note
1501// that we filter on the blocks that are definitely reachable via the
1502// backedge to the loop header so we may prune out dead code within the
1503// cloned loop.
1504for (auto *Pred :predecessors(BB))
1505if (ClonedLoopBlocks.count(Pred) &&
1506 BlocksInClonedLoop.insert(Pred).second)
1507 Worklist.push_back(Pred);
1508 }
1509
1510 ClonedL = LI.AllocateLoop();
1511if (ParentL) {
1512 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1513 ParentL->addChildLoop(ClonedL);
1514 }else {
1515 LI.addTopLevelLoop(ClonedL);
1516 }
1517 NonChildClonedLoops.push_back(ClonedL);
1518
1519 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1520// We don't want to just add the cloned loop blocks based on how we
1521// discovered them. The original order of blocks was carefully built in
1522// a way that doesn't rely on predecessor ordering. Rather than re-invent
1523// that logic, we just re-walk the original blocks (and those of the child
1524// loops) and filter them as we add them into the cloned loop.
1525for (auto *BB : OrigL.blocks()) {
1526auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1527if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1528continue;
1529
1530// Directly add the blocks that are only in this loop.
1531if (LI.getLoopFor(BB) == &OrigL) {
1532 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1533continue;
1534 }
1535
1536// We want to manually add it to this loop and parents.
1537// Registering it with LoopInfo will happen when we clone the top
1538// loop for this block.
1539for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1540 PL->addBlockEntry(ClonedBB);
1541 }
1542
1543// Now add each child loop whose header remains within the cloned loop. All
1544// of the blocks within the loop must satisfy the same constraints as the
1545// header so once we pass the header checks we can just clone the entire
1546// child loop nest.
1547for (Loop *ChildL : OrigL) {
1548auto *ClonedChildHeader =
1549 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1550if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1551continue;
1552
1553#ifndef NDEBUG
1554// We should never have a cloned child loop header but fail to have
1555// all of the blocks for that child loop.
1556for (auto *ChildLoopBB : ChildL->blocks())
1557assert(BlocksInClonedLoop.count(
1558 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1559"Child cloned loop has a header within the cloned outer "
1560"loop but not all of its blocks!");
1561#endif
1562
1563cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1564 }
1565 }
1566
1567// Now that we've handled all the components of the original loop that were
1568// cloned into a new loop, we still need to handle anything from the original
1569// loop that wasn't in a cloned loop.
1570
1571// Figure out what blocks are left to place within any loop nest containing
1572// the unswitched loop. If we never formed a loop, the cloned PH is one of
1573// them.
1574SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1575if (BlocksInClonedLoop.empty())
1576 UnloopedBlockSet.insert(ClonedPH);
1577for (auto *ClonedBB : ClonedLoopBlocks)
1578if (!BlocksInClonedLoop.count(ClonedBB))
1579 UnloopedBlockSet.insert(ClonedBB);
1580
1581// Copy the cloned exits and sort them in ascending loop depth, we'll work
1582// backwards across these to process them inside out. The order shouldn't
1583// matter as we're just trying to build up the map from inside-out; we use
1584// the map in a more stably ordered way below.
1585auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1586llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS,BasicBlock *RHS) {
1587return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1588 ExitLoopMap.lookup(RHS)->getLoopDepth();
1589 });
1590
1591// Populate the existing ExitLoopMap with everything reachable from each
1592// exit, starting from the inner most exit.
1593while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1594assert(Worklist.empty() &&"Didn't clear worklist!");
1595
1596BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1597Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1598
1599// Walk the CFG back until we hit the cloned PH adding everything reachable
1600// and in the unlooped set to this exit block's loop.
1601 Worklist.push_back(ExitBB);
1602do {
1603BasicBlock *BB = Worklist.pop_back_val();
1604// We can stop recursing at the cloned preheader (if we get there).
1605if (BB == ClonedPH)
1606continue;
1607
1608for (BasicBlock *PredBB :predecessors(BB)) {
1609// If this pred has already been moved to our set or is part of some
1610// (inner) loop, no update needed.
1611if (!UnloopedBlockSet.erase(PredBB)) {
1612assert(
1613 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1614"Predecessor not mapped to a loop!");
1615continue;
1616 }
1617
1618// We just insert into the loop set here. We'll add these blocks to the
1619// exit loop after we build up the set in an order that doesn't rely on
1620// predecessor order (which in turn relies on use list order).
1621bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1622 (void)Inserted;
1623assert(Inserted &&"Should only visit an unlooped block once!");
1624
1625// And recurse through to its predecessors.
1626 Worklist.push_back(PredBB);
1627 }
1628 }while (!Worklist.empty());
1629 }
1630
1631// Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1632// blocks to their outer loops, walk the cloned blocks and the cloned exits
1633// in their original order adding them to the correct loop.
1634
1635// We need a stable insertion order. We use the order of the original loop
1636// order and map into the correct parent loop.
1637for (auto *BB : llvm::concat<BasicBlock *const>(
1638ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1639if (Loop *OuterL = ExitLoopMap.lookup(BB))
1640 OuterL->addBasicBlockToLoop(BB, LI);
1641
1642#ifndef NDEBUG
1643for (auto &BBAndL : ExitLoopMap) {
1644auto *BB = BBAndL.first;
1645auto *OuterL = BBAndL.second;
1646assert(LI.getLoopFor(BB) == OuterL &&
1647"Failed to put all blocks into outer loops!");
1648 }
1649#endif
1650
1651// Now that all the blocks are placed into the correct containing loop in the
1652// absence of child loops, find all the potentially cloned child loops and
1653// clone them into whatever outer loop we placed their header into.
1654for (Loop *ChildL : OrigL) {
1655auto *ClonedChildHeader =
1656 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1657if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1658continue;
1659
1660#ifndef NDEBUG
1661for (auto *ChildLoopBB : ChildL->blocks())
1662assert(VMap.count(ChildLoopBB) &&
1663"Cloned a child loop header but not all of that loops blocks!");
1664#endif
1665
1666 NonChildClonedLoops.push_back(cloneLoopNest(
1667 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1668 }
1669}
1670
1671staticvoid
1672deleteDeadClonedBlocks(Loop &L,ArrayRef<BasicBlock *> ExitBlocks,
1673ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1674DominatorTree &DT,MemorySSAUpdater *MSSAU) {
1675// Find all the dead clones, and remove them from their successors.
1676SmallVector<BasicBlock *, 16> DeadBlocks;
1677for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1678for (constauto &VMap : VMaps)
1679if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1680if (!DT.isReachableFromEntry(ClonedBB)) {
1681for (BasicBlock *SuccBB :successors(ClonedBB))
1682 SuccBB->removePredecessor(ClonedBB);
1683 DeadBlocks.push_back(ClonedBB);
1684 }
1685
1686// Remove all MemorySSA in the dead blocks
1687if (MSSAU) {
1688SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1689 DeadBlocks.end());
1690 MSSAU->removeBlocks(DeadBlockSet);
1691 }
1692
1693// Drop any remaining references to break cycles.
1694for (BasicBlock *BB : DeadBlocks)
1695 BB->dropAllReferences();
1696// Erase them from the IR.
1697for (BasicBlock *BB : DeadBlocks)
1698 BB->eraseFromParent();
1699}
1700
1701staticvoiddeleteDeadBlocksFromLoop(Loop &L,
1702SmallVectorImpl<BasicBlock *> &ExitBlocks,
1703DominatorTree &DT,LoopInfo &LI,
1704MemorySSAUpdater *MSSAU,
1705ScalarEvolution *SE,
1706LPMUpdater &LoopUpdater) {
1707// Find all the dead blocks tied to this loop, and remove them from their
1708// successors.
1709SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1710
1711// Start with loop/exit blocks and get a transitive closure of reachable dead
1712// blocks.
1713SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1714 ExitBlocks.end());
1715 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1716while (!DeathCandidates.empty()) {
1717auto *BB = DeathCandidates.pop_back_val();
1718if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1719for (BasicBlock *SuccBB :successors(BB)) {
1720 SuccBB->removePredecessor(BB);
1721 DeathCandidates.push_back(SuccBB);
1722 }
1723 DeadBlockSet.insert(BB);
1724 }
1725 }
1726
1727// Remove all MemorySSA in the dead blocks
1728if (MSSAU)
1729 MSSAU->removeBlocks(DeadBlockSet);
1730
1731// Filter out the dead blocks from the exit blocks list so that it can be
1732// used in the caller.
1733llvm::erase_if(ExitBlocks,
1734 [&](BasicBlock *BB) {return DeadBlockSet.count(BB); });
1735
1736// Walk from this loop up through its parents removing all of the dead blocks.
1737for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1738for (auto *BB : DeadBlockSet)
1739 ParentL->getBlocksSet().erase(BB);
1740llvm::erase_if(ParentL->getBlocksVector(),
1741 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1742 }
1743
1744// Now delete the dead child loops. This raw delete will clear them
1745// recursively.
1746llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1747 if (!DeadBlockSet.count(ChildL->getHeader()))
1748 return false;
1749
1750 assert(llvm::all_of(ChildL->blocks(),
1751 [&](BasicBlock *ChildBB) {
1752 return DeadBlockSet.count(ChildBB);
1753 }) &&
1754"If the child loop header is dead all blocks in the child loop must "
1755"be dead as well!");
1756 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1757if (SE)
1758 SE->forgetBlockAndLoopDispositions();
1759 LI.destroy(ChildL);
1760returntrue;
1761 });
1762
1763// Remove the loop mappings for the dead blocks and drop all the references
1764// from these blocks to others to handle cyclic references as we start
1765// deleting the blocks themselves.
1766for (auto *BB : DeadBlockSet) {
1767// Check that the dominator tree has already been updated.
1768assert(!DT.getNode(BB) &&"Should already have cleared domtree!");
1769 LI.changeLoopFor(BB,nullptr);
1770// Drop all uses of the instructions to make sure we won't have dangling
1771// uses in other blocks.
1772for (auto &I : *BB)
1773if (!I.use_empty())
1774I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1775 BB->dropAllReferences();
1776 }
1777
1778// Actually delete the blocks now that they've been fully unhooked from the
1779// IR.
1780for (auto *BB : DeadBlockSet)
1781 BB->eraseFromParent();
1782}
1783
1784/// Recompute the set of blocks in a loop after unswitching.
1785///
1786/// This walks from the original headers predecessors to rebuild the loop. We
1787/// take advantage of the fact that new blocks can't have been added, and so we
1788/// filter by the original loop's blocks. This also handles potentially
1789/// unreachable code that we don't want to explore but might be found examining
1790/// the predecessors of the header.
1791///
1792/// If the original loop is no longer a loop, this will return an empty set. If
1793/// it remains a loop, all the blocks within it will be added to the set
1794/// (including those blocks in inner loops).
1795staticSmallPtrSet<const BasicBlock *, 16>recomputeLoopBlockSet(Loop &L,
1796LoopInfo &LI) {
1797SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
1798
1799auto *PH = L.getLoopPreheader();
1800auto *Header = L.getHeader();
1801
1802// A worklist to use while walking backwards from the header.
1803SmallVector<BasicBlock *, 16> Worklist;
1804
1805// First walk the predecessors of the header to find the backedges. This will
1806// form the basis of our walk.
1807for (auto *Pred :predecessors(Header)) {
1808// Skip the preheader.
1809if (Pred == PH)
1810continue;
1811
1812// Because the loop was in simplified form, the only non-loop predecessor
1813// is the preheader.
1814assert(L.contains(Pred) &&"Found a predecessor of the loop header other "
1815"than the preheader that is not part of the "
1816"loop!");
1817
1818// Insert this block into the loop set and on the first visit and, if it
1819// isn't the header we're currently walking, put it into the worklist to
1820// recurse through.
1821if (LoopBlockSet.insert(Pred).second && Pred != Header)
1822 Worklist.push_back(Pred);
1823 }
1824
1825// If no backedges were found, we're done.
1826if (LoopBlockSet.empty())
1827return LoopBlockSet;
1828
1829// We found backedges, recurse through them to identify the loop blocks.
1830while (!Worklist.empty()) {
1831BasicBlock *BB = Worklist.pop_back_val();
1832assert(LoopBlockSet.count(BB) &&"Didn't put block into the loop set!");
1833
1834// No need to walk past the header.
1835if (BB == Header)
1836continue;
1837
1838// Because we know the inner loop structure remains valid we can use the
1839// loop structure to jump immediately across the entire nested loop.
1840// Further, because it is in loop simplified form, we can directly jump
1841// to its preheader afterward.
1842if (Loop *InnerL = LI.getLoopFor(BB))
1843if (InnerL != &L) {
1844assert(L.contains(InnerL) &&
1845"Should not reach a loop *outside* this loop!");
1846// The preheader is the only possible predecessor of the loop so
1847// insert it into the set and check whether it was already handled.
1848auto *InnerPH = InnerL->getLoopPreheader();
1849assert(L.contains(InnerPH) &&"Cannot contain an inner loop block "
1850"but not contain the inner loop "
1851"preheader!");
1852if (!LoopBlockSet.insert(InnerPH).second)
1853// The only way to reach the preheader is through the loop body
1854// itself so if it has been visited the loop is already handled.
1855continue;
1856
1857// Insert all of the blocks (other than those already present) into
1858// the loop set. We expect at least the block that led us to find the
1859// inner loop to be in the block set, but we may also have other loop
1860// blocks if they were already enqueued as predecessors of some other
1861// outer loop block.
1862for (auto *InnerBB : InnerL->blocks()) {
1863if (InnerBB == BB) {
1864assert(LoopBlockSet.count(InnerBB) &&
1865"Block should already be in the set!");
1866continue;
1867 }
1868
1869 LoopBlockSet.insert(InnerBB);
1870 }
1871
1872// Add the preheader to the worklist so we will continue past the
1873// loop body.
1874 Worklist.push_back(InnerPH);
1875continue;
1876 }
1877
1878// Insert any predecessors that were in the original loop into the new
1879// set, and if the insert is successful, add them to the worklist.
1880for (auto *Pred :predecessors(BB))
1881if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1882 Worklist.push_back(Pred);
1883 }
1884
1885assert(LoopBlockSet.count(Header) &&"Cannot fail to add the header!");
1886
1887// We've found all the blocks participating in the loop, return our completed
1888// set.
1889return LoopBlockSet;
1890}
1891
1892/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1893///
1894/// The removal may have removed some child loops entirely but cannot have
1895/// disturbed any remaining child loops. However, they may need to be hoisted
1896/// to the parent loop (or to be top-level loops). The original loop may be
1897/// completely removed.
1898///
1899/// The sibling loops resulting from this update are returned. If the original
1900/// loop remains a valid loop, it will be the first entry in this list with all
1901/// of the newly sibling loops following it.
1902///
1903/// Returns true if the loop remains a loop after unswitching, and false if it
1904/// is no longer a loop after unswitching (and should not continue to be
1905/// referenced).
1906staticboolrebuildLoopAfterUnswitch(Loop &L,ArrayRef<BasicBlock *> ExitBlocks,
1907LoopInfo &LI,
1908SmallVectorImpl<Loop *> &HoistedLoops,
1909ScalarEvolution *SE) {
1910auto *PH = L.getLoopPreheader();
1911
1912// Compute the actual parent loop from the exit blocks. Because we may have
1913// pruned some exits the loop may be different from the original parent.
1914Loop *ParentL =nullptr;
1915SmallVector<Loop *, 4> ExitLoops;
1916SmallVector<BasicBlock *, 4> ExitsInLoops;
1917 ExitsInLoops.reserve(ExitBlocks.size());
1918for (auto *ExitBB : ExitBlocks)
1919if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1920 ExitLoops.push_back(ExitL);
1921 ExitsInLoops.push_back(ExitBB);
1922if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1923 ParentL = ExitL;
1924 }
1925
1926// Recompute the blocks participating in this loop. This may be empty if it
1927// is no longer a loop.
1928auto LoopBlockSet =recomputeLoopBlockSet(L, LI);
1929
1930// If we still have a loop, we need to re-set the loop's parent as the exit
1931// block set changing may have moved it within the loop nest. Note that this
1932// can only happen when this loop has a parent as it can only hoist the loop
1933// *up* the nest.
1934if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1935// Remove this loop's (original) blocks from all of the intervening loops.
1936for (Loop *IL = L.getParentLoop(); IL != ParentL;
1937 IL = IL->getParentLoop()) {
1938 IL->getBlocksSet().erase(PH);
1939for (auto *BB : L.blocks())
1940 IL->getBlocksSet().erase(BB);
1941llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1942 return BB == PH || L.contains(BB);
1943 });
1944 }
1945
1946 LI.changeLoopFor(PH, ParentL);
1947 L.getParentLoop()->removeChildLoop(&L);
1948if (ParentL)
1949 ParentL->addChildLoop(&L);
1950else
1951 LI.addTopLevelLoop(&L);
1952 }
1953
1954// Now we update all the blocks which are no longer within the loop.
1955auto &Blocks = L.getBlocksVector();
1956auto BlocksSplitI =
1957 LoopBlockSet.empty()
1958 ?Blocks.begin()
1959 : std::stable_partition(
1960Blocks.begin(),Blocks.end(),
1961 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1962
1963// Before we erase the list of unlooped blocks, build a set of them.
1964SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI,Blocks.end());
1965if (LoopBlockSet.empty())
1966 UnloopedBlocks.insert(PH);
1967
1968// Now erase these blocks from the loop.
1969for (auto *BB :make_range(BlocksSplitI,Blocks.end()))
1970 L.getBlocksSet().erase(BB);
1971Blocks.erase(BlocksSplitI,Blocks.end());
1972
1973// Sort the exits in ascending loop depth, we'll work backwards across these
1974// to process them inside out.
1975llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS,BasicBlock *RHS) {
1976return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1977 });
1978
1979// We'll build up a set for each exit loop.
1980SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1981Loop *PrevExitL = L.getParentLoop();// The deepest possible exit loop.
1982
1983auto RemoveUnloopedBlocksFromLoop =
1984 [](Loop &L,SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1985for (auto *BB : UnloopedBlocks)
1986 L.getBlocksSet().erase(BB);
1987llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1988 return UnloopedBlocks.count(BB);
1989 });
1990 };
1991
1992SmallVector<BasicBlock *, 16> Worklist;
1993while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1994assert(Worklist.empty() &&"Didn't clear worklist!");
1995assert(NewExitLoopBlocks.empty() &&"Didn't clear loop set!");
1996
1997// Grab the next exit block, in decreasing loop depth order.
1998BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1999Loop &ExitL = *LI.getLoopFor(ExitBB);
2000assert(ExitL.contains(&L) &&"Exit loop must contain the inner loop!");
2001
2002// Erase all of the unlooped blocks from the loops between the previous
2003// exit loop and this exit loop. This works because the ExitInLoops list is
2004// sorted in increasing order of loop depth and thus we visit loops in
2005// decreasing order of loop depth.
2006for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
2007 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2008
2009// Walk the CFG back until we hit the cloned PH adding everything reachable
2010// and in the unlooped set to this exit block's loop.
2011 Worklist.push_back(ExitBB);
2012do {
2013BasicBlock *BB = Worklist.pop_back_val();
2014// We can stop recursing at the cloned preheader (if we get there).
2015if (BB == PH)
2016continue;
2017
2018for (BasicBlock *PredBB :predecessors(BB)) {
2019// If this pred has already been moved to our set or is part of some
2020// (inner) loop, no update needed.
2021if (!UnloopedBlocks.erase(PredBB)) {
2022assert((NewExitLoopBlocks.count(PredBB) ||
2023 ExitL.contains(LI.getLoopFor(PredBB))) &&
2024"Predecessor not in a nested loop (or already visited)!");
2025continue;
2026 }
2027
2028// We just insert into the loop set here. We'll add these blocks to the
2029// exit loop after we build up the set in a deterministic order rather
2030// than the predecessor-influenced visit order.
2031bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2032 (void)Inserted;
2033assert(Inserted &&"Should only visit an unlooped block once!");
2034
2035// And recurse through to its predecessors.
2036 Worklist.push_back(PredBB);
2037 }
2038 }while (!Worklist.empty());
2039
2040// If blocks in this exit loop were directly part of the original loop (as
2041// opposed to a child loop) update the map to point to this exit loop. This
2042// just updates a map and so the fact that the order is unstable is fine.
2043for (auto *BB : NewExitLoopBlocks)
2044if (Loop *BBL = LI.getLoopFor(BB))
2045if (BBL == &L || !L.contains(BBL))
2046 LI.changeLoopFor(BB, &ExitL);
2047
2048// We will remove the remaining unlooped blocks from this loop in the next
2049// iteration or below.
2050 NewExitLoopBlocks.clear();
2051 }
2052
2053// Any remaining unlooped blocks are no longer part of any loop unless they
2054// are part of some child loop.
2055for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2056 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2057for (auto *BB : UnloopedBlocks)
2058if (Loop *BBL = LI.getLoopFor(BB))
2059if (BBL == &L || !L.contains(BBL))
2060 LI.changeLoopFor(BB,nullptr);
2061
2062// Sink all the child loops whose headers are no longer in the loop set to
2063// the parent (or to be top level loops). We reach into the loop and directly
2064// update its subloop vector to make this batch update efficient.
2065auto &SubLoops = L.getSubLoopsVector();
2066auto SubLoopsSplitI =
2067 LoopBlockSet.empty()
2068 ? SubLoops.begin()
2069 : std::stable_partition(
2070 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2071 return LoopBlockSet.count(SubL->getHeader());
2072 });
2073for (auto *HoistedL :make_range(SubLoopsSplitI, SubLoops.end())) {
2074 HoistedLoops.push_back(HoistedL);
2075 HoistedL->setParentLoop(nullptr);
2076
2077// To compute the new parent of this hoisted loop we look at where we
2078// placed the preheader above. We can't lookup the header itself because we
2079// retained the mapping from the header to the hoisted loop. But the
2080// preheader and header should have the exact same new parent computed
2081// based on the set of exit blocks from the original loop as the preheader
2082// is a predecessor of the header and so reached in the reverse walk. And
2083// because the loops were all in simplified form the preheader of the
2084// hoisted loop can't be part of some *other* loop.
2085if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2086 NewParentL->addChildLoop(HoistedL);
2087else
2088 LI.addTopLevelLoop(HoistedL);
2089 }
2090 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2091
2092// Actually delete the loop if nothing remained within it.
2093if (Blocks.empty()) {
2094assert(SubLoops.empty() &&
2095"Failed to remove all subloops from the original loop!");
2096if (Loop *ParentL = L.getParentLoop())
2097 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2098else
2099 LI.removeLoop(llvm::find(LI, &L));
2100// markLoopAsDeleted for L should be triggered by the caller (it is
2101// typically done within postUnswitch).
2102if (SE)
2103 SE->forgetBlockAndLoopDispositions();
2104 LI.destroy(&L);
2105returnfalse;
2106 }
2107
2108returntrue;
2109}
2110
2111/// Helper to visit a dominator subtree, invoking a callable on each node.
2112///
2113/// Returning false at any point will stop walking past that node of the tree.
2114template <typename CallableT>
2115voidvisitDomSubTree(DominatorTree &DT,BasicBlock *BB, CallableT Callable) {
2116SmallVector<DomTreeNode *, 4> DomWorklist;
2117 DomWorklist.push_back(DT[BB]);
2118#ifndef NDEBUG
2119SmallPtrSet<DomTreeNode *, 4> Visited;
2120 Visited.insert(DT[BB]);
2121#endif
2122do {
2123DomTreeNode *N = DomWorklist.pop_back_val();
2124
2125// Visit this node.
2126if (!Callable(N->getBlock()))
2127continue;
2128
2129// Accumulate the child nodes.
2130for (DomTreeNode *ChildN : *N) {
2131assert(Visited.insert(ChildN).second &&
2132"Cannot visit a node twice when walking a tree!");
2133 DomWorklist.push_back(ChildN);
2134 }
2135 }while (!DomWorklist.empty());
2136}
2137
2138voidpostUnswitch(Loop &L,LPMUpdater &U,StringRef LoopName,
2139bool CurrentLoopValid,bool PartiallyInvariant,
2140bool InjectedCondition,ArrayRef<Loop *> NewLoops) {
2141// If we did a non-trivial unswitch, we have added new (cloned) loops.
2142if (!NewLoops.empty())
2143 U.addSiblingLoops(NewLoops);
2144
2145// If the current loop remains valid, we should revisit it to catch any
2146// other unswitch opportunities. Otherwise, we need to mark it as deleted.
2147if (CurrentLoopValid) {
2148if (PartiallyInvariant) {
2149// Mark the new loop as partially unswitched, to avoid unswitching on
2150// the same condition again.
2151auto &Context = L.getHeader()->getContext();
2152MDNode *DisableUnswitchMD =MDNode::get(
2153 Context,
2154MDString::get(Context,"llvm.loop.unswitch.partial.disable"));
2155MDNode *NewLoopID =makePostTransformationMetadata(
2156 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2157 {DisableUnswitchMD});
2158 L.setLoopID(NewLoopID);
2159 }elseif (InjectedCondition) {
2160// Do the same for injection of invariant conditions.
2161auto &Context = L.getHeader()->getContext();
2162MDNode *DisableUnswitchMD =MDNode::get(
2163 Context,
2164MDString::get(Context,"llvm.loop.unswitch.injection.disable"));
2165MDNode *NewLoopID =makePostTransformationMetadata(
2166 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2167 {DisableUnswitchMD});
2168 L.setLoopID(NewLoopID);
2169 }else
2170 U.revisitCurrentLoop();
2171 }else
2172 U.markLoopAsDeleted(L, LoopName);
2173}
2174
2175staticvoidunswitchNontrivialInvariants(
2176Loop &L,Instruction &TI,ArrayRef<Value *> Invariants,
2177IVConditionInfo &PartialIVInfo,DominatorTree &DT,LoopInfo &LI,
2178AssumptionCache &AC,ScalarEvolution *SE,MemorySSAUpdater *MSSAU,
2179LPMUpdater &LoopUpdater,bool InsertFreeze,bool InjectedCondition) {
2180auto *ParentBB = TI.getParent();
2181BranchInst *BI = dyn_cast<BranchInst>(&TI);
2182SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2183
2184// Save the current loop name in a variable so that we can report it even
2185// after it has been deleted.
2186 std::string LoopName(L.getName());
2187
2188// We can only unswitch switches, conditional branches with an invariant
2189// condition, or combining invariant conditions with an instruction or
2190// partially invariant instructions.
2191assert((SI || (BI && BI->isConditional())) &&
2192"Can only unswitch switches and conditional branch!");
2193bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2194bool FullUnswitch =
2195 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2196 !PartiallyInvariant);
2197if (FullUnswitch)
2198assert(Invariants.size() == 1 &&
2199"Cannot have other invariants with full unswitching!");
2200else
2201assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
2202"Partial unswitching requires an instruction as the condition!");
2203
2204if (MSSAU &&VerifyMemorySSA)
2205 MSSAU->getMemorySSA()->verifyMemorySSA();
2206
2207// Constant and BBs tracking the cloned and continuing successor. When we are
2208// unswitching the entire condition, this can just be trivially chosen to
2209// unswitch towards `true`. However, when we are unswitching a set of
2210// invariants combined with `and` or `or` or partially invariant instructions,
2211// the combining operation determines the best direction to unswitch: we want
2212// to unswitch the direction that will collapse the branch.
2213boolDirection =true;
2214int ClonedSucc = 0;
2215if (!FullUnswitch) {
2216Value *Cond =skipTrivialSelect(BI->getCondition());
2217 (void)Cond;
2218assert(((match(Cond,m_LogicalAnd()) ^match(Cond,m_LogicalOr())) ||
2219 PartiallyInvariant) &&
2220"Only `or`, `and`, an `select`, partially invariant instructions "
2221"can combine invariants being unswitched.");
2222if (!match(Cond,m_LogicalOr())) {
2223if (match(Cond,m_LogicalAnd()) ||
2224 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2225Direction =false;
2226 ClonedSucc = 1;
2227 }
2228 }
2229 }
2230
2231BasicBlock *RetainedSuccBB =
2232 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2233SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2234if (BI)
2235 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2236else
2237for (auto Case : SI->cases())
2238if (Case.getCaseSuccessor() != RetainedSuccBB)
2239 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2240
2241assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2242"Should not unswitch the same successor we are retaining!");
2243
2244// The branch should be in this exact loop. Any inner loop's invariant branch
2245// should be handled by unswitching that inner loop. The caller of this
2246// routine should filter out any candidates that remain (but were skipped for
2247// whatever reason).
2248assert(LI.getLoopFor(ParentBB) == &L &&"Branch in an inner loop!");
2249
2250// Compute the parent loop now before we start hacking on things.
2251Loop *ParentL = L.getParentLoop();
2252// Get blocks in RPO order for MSSA update, before changing the CFG.
2253LoopBlocksRPO LBRPO(&L);
2254if (MSSAU)
2255 LBRPO.perform(&LI);
2256
2257// Compute the outer-most loop containing one of our exit blocks. This is the
2258// furthest up our loopnest which can be mutated, which we will use below to
2259// update things.
2260Loop *OuterExitL = &L;
2261SmallVector<BasicBlock *, 4> ExitBlocks;
2262 L.getUniqueExitBlocks(ExitBlocks);
2263for (auto *ExitBB : ExitBlocks) {
2264// ExitBB can be an exit block for several levels in the loop nest. Make
2265// sure we find the top most.
2266Loop *NewOuterExitL =getTopMostExitingLoop(ExitBB, LI);
2267if (!NewOuterExitL) {
2268// We exited the entire nest with this block, so we're done.
2269 OuterExitL =nullptr;
2270break;
2271 }
2272if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2273 OuterExitL = NewOuterExitL;
2274 }
2275
2276// At this point, we're definitely going to unswitch something so invalidate
2277// any cached information in ScalarEvolution for the outer most loop
2278// containing an exit block and all nested loops.
2279if (SE) {
2280if (OuterExitL)
2281 SE->forgetLoop(OuterExitL);
2282else
2283 SE->forgetTopmostLoop(&L);
2284 SE->forgetBlockAndLoopDispositions();
2285 }
2286
2287// If the edge from this terminator to a successor dominates that successor,
2288// store a map from each block in its dominator subtree to it. This lets us
2289// tell when cloning for a particular successor if a block is dominated by
2290// some *other* successor with a single data structure. We use this to
2291// significantly reduce cloning.
2292SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
2293for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2294 UnswitchedSuccBBs))
2295if (SuccBB->getUniquePredecessor() ||
2296llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2297return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2298 }))
2299visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2300 DominatingSucc[BB] = SuccBB;
2301returntrue;
2302 });
2303
2304// Split the preheader, so that we know that there is a safe place to insert
2305// the conditional branch. We will change the preheader to have a conditional
2306// branch on LoopCond. The original preheader will become the split point
2307// between the unswitched versions, and we will have a new preheader for the
2308// original loop.
2309BasicBlock *SplitBB = L.getLoopPreheader();
2310BasicBlock *LoopPH =SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2311
2312// Keep track of the dominator tree updates needed.
2313SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2314
2315// Clone the loop for each unswitched successor.
2316SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2317 VMaps.reserve(UnswitchedSuccBBs.size());
2318SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
2319for (auto *SuccBB : UnswitchedSuccBBs) {
2320 VMaps.emplace_back(newValueToValueMapTy());
2321 ClonedPHs[SuccBB] =buildClonedLoopBlocks(
2322 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2323 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2324 }
2325
2326// Drop metadata if we may break its semantics by moving this instr into the
2327// split block.
2328if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2329if (DropNonTrivialImplicitNullChecks)
2330// Do not spend time trying to understand if we can keep it, just drop it
2331// to save compile time.
2332 TI.setMetadata(LLVMContext::MD_make_implicit,nullptr);
2333else {
2334// It is only legal to preserve make.implicit metadata if we are
2335// guaranteed no reach implicit null check after following this branch.
2336ICFLoopSafetyInfo SafetyInfo;
2337 SafetyInfo.computeLoopSafetyInfo(&L);
2338if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2339 TI.setMetadata(LLVMContext::MD_make_implicit,nullptr);
2340 }
2341 }
2342
2343// The stitching of the branched code back together depends on whether we're
2344// doing full unswitching or not with the exception that we always want to
2345// nuke the initial terminator placed in the split block.
2346 SplitBB->getTerminator()->eraseFromParent();
2347if (FullUnswitch) {
2348// Keep a clone of the terminator for MSSA updates.
2349Instruction *NewTI = TI.clone();
2350 NewTI->insertInto(ParentBB, ParentBB->end());
2351
2352// Splice the terminator from the original loop and rewrite its
2353// successors.
2354 TI.moveBefore(*SplitBB, SplitBB->end());
2355 TI.dropLocation();
2356
2357// First wire up the moved terminator to the preheaders.
2358if (BI) {
2359BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2360 BI->setSuccessor(ClonedSucc, ClonedPH);
2361 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2362Value *Cond =skipTrivialSelect(BI->getCondition());
2363if (InsertFreeze) {
2364// We don't give any debug location to the new freeze, because the
2365// BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted
2366// out of the loop.
2367Cond =newFreezeInst(Cond,Cond->getName() +".fr", BI->getIterator());
2368 }
2369 BI->setCondition(Cond);
2370 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2371 }else {
2372assert(SI &&"Must either be a branch or switch!");
2373
2374// Walk the cases and directly update their successors.
2375assert(SI->getDefaultDest() == RetainedSuccBB &&
2376"Not retaining default successor!");
2377 SI->setDefaultDest(LoopPH);
2378for (constauto &Case : SI->cases())
2379if (Case.getCaseSuccessor() == RetainedSuccBB)
2380 Case.setSuccessor(LoopPH);
2381else
2382 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2383
2384if (InsertFreeze)
2385 SI->setCondition(newFreezeInst(SI->getCondition(),
2386 SI->getCondition()->getName() +".fr",
2387 SI->getIterator()));
2388
2389// We need to use the set to populate domtree updates as even when there
2390// are multiple cases pointing at the same successor we only want to
2391// remove and insert one edge in the domtree.
2392for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2393 DTUpdates.push_back(
2394 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2395 }
2396
2397if (MSSAU) {
2398 DT.applyUpdates(DTUpdates);
2399 DTUpdates.clear();
2400
2401// Remove all but one edge to the retained block and all unswitched
2402// blocks. This is to avoid having duplicate entries in the cloned Phis,
2403// when we know we only keep a single edge for each case.
2404 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2405for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2406 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2407
2408for (auto &VMap : VMaps)
2409 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2410/*IgnoreIncomingWithNoClones=*/true);
2411 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2412
2413// Remove all edges to unswitched blocks.
2414for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2415 MSSAU->removeEdge(ParentBB, SuccBB);
2416 }
2417
2418// Now unhook the successor relationship as we'll be replacing
2419// the terminator with a direct branch. This is much simpler for branches
2420// than switches so we handle those first.
2421if (BI) {
2422// Remove the parent as a predecessor of the unswitched successor.
2423assert(UnswitchedSuccBBs.size() == 1 &&
2424"Only one possible unswitched block for a branch!");
2425BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2426 UnswitchedSuccBB->removePredecessor(ParentBB,
2427/*KeepOneInputPHIs*/true);
2428 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2429 }else {
2430// Note that we actually want to remove the parent block as a predecessor
2431// of *every* case successor. The case successor is either unswitched,
2432// completely eliminating an edge from the parent to that successor, or it
2433// is a duplicate edge to the retained successor as the retained successor
2434// is always the default successor and as we'll replace this with a direct
2435// branch we no longer need the duplicate entries in the PHI nodes.
2436SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2437assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2438"Not retaining default successor!");
2439for (constauto &Case : NewSI->cases())
2440 Case.getCaseSuccessor()->removePredecessor(
2441 ParentBB,
2442/*KeepOneInputPHIs*/true);
2443
2444// We need to use the set to populate domtree updates as even when there
2445// are multiple cases pointing at the same successor we only want to
2446// remove and insert one edge in the domtree.
2447for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2448 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2449 }
2450
2451// Create a new unconditional branch to the continuing block (as opposed to
2452// the one cloned).
2453Instruction *NewBI =BranchInst::Create(RetainedSuccBB, ParentBB);
2454 NewBI->setDebugLoc(NewTI->getDebugLoc());
2455
2456// After MSSAU update, remove the cloned terminator instruction NewTI.
2457 NewTI->eraseFromParent();
2458 }else {
2459assert(BI &&"Only branches have partial unswitching.");
2460assert(UnswitchedSuccBBs.size() == 1 &&
2461"Only one possible unswitched block for a branch!");
2462BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2463// When doing a partial unswitch, we have to do a bit more work to build up
2464// the branch in the split block.
2465if (PartiallyInvariant)
2466buildPartialInvariantUnswitchConditionalBranch(
2467 *SplitBB, Invariants,Direction, *ClonedPH, *LoopPH, L, MSSAU);
2468else {
2469buildPartialUnswitchConditionalBranch(
2470 *SplitBB, Invariants,Direction, *ClonedPH, *LoopPH,
2471FreezeLoopUnswitchCond, BI, &AC, DT);
2472 }
2473 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2474
2475if (MSSAU) {
2476 DT.applyUpdates(DTUpdates);
2477 DTUpdates.clear();
2478
2479// Perform MSSA cloning updates.
2480for (auto &VMap : VMaps)
2481 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2482/*IgnoreIncomingWithNoClones=*/true);
2483 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2484 }
2485 }
2486
2487// Apply the updates accumulated above to get an up-to-date dominator tree.
2488 DT.applyUpdates(DTUpdates);
2489
2490// Now that we have an accurate dominator tree, first delete the dead cloned
2491// blocks so that we can accurately build any cloned loops. It is important to
2492// not delete the blocks from the original loop yet because we still want to
2493// reference the original loop to understand the cloned loop's structure.
2494deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2495
2496// Build the cloned loop structure itself. This may be substantially
2497// different from the original structure due to the simplified CFG. This also
2498// handles inserting all the cloned blocks into the correct loops.
2499SmallVector<Loop *, 4> NonChildClonedLoops;
2500for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2501buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2502
2503// Now that our cloned loops have been built, we can update the original loop.
2504// First we delete the dead blocks from it and then we rebuild the loop
2505// structure taking these deletions into account.
2506deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2507
2508if (MSSAU &&VerifyMemorySSA)
2509 MSSAU->getMemorySSA()->verifyMemorySSA();
2510
2511SmallVector<Loop *, 4> HoistedLoops;
2512bool IsStillLoop =
2513rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2514
2515if (MSSAU &&VerifyMemorySSA)
2516 MSSAU->getMemorySSA()->verifyMemorySSA();
2517
2518// This transformation has a high risk of corrupting the dominator tree, and
2519// the below steps to rebuild loop structures will result in hard to debug
2520// errors in that case so verify that the dominator tree is sane first.
2521// FIXME: Remove this when the bugs stop showing up and rely on existing
2522// verification steps.
2523assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2524
2525if (BI && !PartiallyInvariant) {
2526// If we unswitched a branch which collapses the condition to a known
2527// constant we want to replace all the uses of the invariants within both
2528// the original and cloned blocks. We do this here so that we can use the
2529// now updated dominator tree to identify which side the users are on.
2530assert(UnswitchedSuccBBs.size() == 1 &&
2531"Only one possible unswitched block for a branch!");
2532BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2533
2534// When considering multiple partially-unswitched invariants
2535// we cant just go replace them with constants in both branches.
2536//
2537// For 'AND' we infer that true branch ("continue") means true
2538// for each invariant operand.
2539// For 'OR' we can infer that false branch ("continue") means false
2540// for each invariant operand.
2541// So it happens that for multiple-partial case we dont replace
2542// in the unswitched branch.
2543bool ReplaceUnswitched =
2544 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2545
2546ConstantInt *UnswitchedReplacement =
2547Direction ?ConstantInt::getTrue(BI->getContext())
2548 :ConstantInt::getFalse(BI->getContext());
2549ConstantInt *ContinueReplacement =
2550Direction ?ConstantInt::getFalse(BI->getContext())
2551 :ConstantInt::getTrue(BI->getContext());
2552for (Value *Invariant : Invariants) {
2553assert(!isa<Constant>(Invariant) &&
2554"Should not be replacing constant values!");
2555// Use make_early_inc_range here as set invalidates the iterator.
2556for (Use &U :llvm::make_early_inc_range(Invariant->uses())) {
2557Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2558if (!UserI)
2559continue;
2560
2561// Replace it with the 'continue' side if in the main loop body, and the
2562// unswitched if in the cloned blocks.
2563if (DT.dominates(LoopPH, UserI->getParent()))
2564 U.set(ContinueReplacement);
2565elseif (ReplaceUnswitched &&
2566 DT.dominates(ClonedPH, UserI->getParent()))
2567 U.set(UnswitchedReplacement);
2568 }
2569 }
2570 }
2571
2572// We can change which blocks are exit blocks of all the cloned sibling
2573// loops, the current loop, and any parent loops which shared exit blocks
2574// with the current loop. As a consequence, we need to re-form LCSSA for
2575// them. But we shouldn't need to re-form LCSSA for any child loops.
2576// FIXME: This could be made more efficient by tracking which exit blocks are
2577// new, and focusing on them, but that isn't likely to be necessary.
2578//
2579// In order to reasonably rebuild LCSSA we need to walk inside-out across the
2580// loop nest and update every loop that could have had its exits changed. We
2581// also need to cover any intervening loops. We add all of these loops to
2582// a list and sort them by loop depth to achieve this without updating
2583// unnecessary loops.
2584auto UpdateLoop = [&](Loop &UpdateL) {
2585#ifndef NDEBUG
2586 UpdateL.verifyLoop();
2587for (Loop *ChildL : UpdateL) {
2588 ChildL->verifyLoop();
2589assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2590"Perturbed a child loop's LCSSA form!");
2591 }
2592#endif
2593// First build LCSSA for this loop so that we can preserve it when
2594// forming dedicated exits. We don't want to perturb some other loop's
2595// LCSSA while doing that CFG edit.
2596formLCSSA(UpdateL, DT, &LI, SE);
2597
2598// For loops reached by this loop's original exit blocks we may
2599// introduced new, non-dedicated exits. At least try to re-form dedicated
2600// exits for these loops. This may fail if they couldn't have dedicated
2601// exits to start with.
2602formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU,/*PreserveLCSSA*/true);
2603 };
2604
2605// For non-child cloned loops and hoisted loops, we just need to update LCSSA
2606// and we can do it in any order as they don't nest relative to each other.
2607//
2608// Also check if any of the loops we have updated have become top-level loops
2609// as that will necessitate widening the outer loop scope.
2610for (Loop *UpdatedL :
2611 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2612 UpdateLoop(*UpdatedL);
2613if (UpdatedL->isOutermost())
2614 OuterExitL =nullptr;
2615 }
2616if (IsStillLoop) {
2617 UpdateLoop(L);
2618if (L.isOutermost())
2619 OuterExitL =nullptr;
2620 }
2621
2622// If the original loop had exit blocks, walk up through the outer most loop
2623// of those exit blocks to update LCSSA and form updated dedicated exits.
2624if (OuterExitL != &L)
2625for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2626 OuterL = OuterL->getParentLoop())
2627 UpdateLoop(*OuterL);
2628
2629#ifndef NDEBUG
2630// Verify the entire loop structure to catch any incorrect updates before we
2631// progress in the pass pipeline.
2632 LI.verify(DT);
2633#endif
2634
2635// Now that we've unswitched something, make callbacks to report the changes.
2636// For that we need to merge together the updated loops and the cloned loops
2637// and check whether the original loop survived.
2638SmallVector<Loop *, 4> SibLoops;
2639for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2640if (UpdatedL->getParentLoop() == ParentL)
2641 SibLoops.push_back(UpdatedL);
2642postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2643 InjectedCondition, SibLoops);
2644
2645if (MSSAU &&VerifyMemorySSA)
2646 MSSAU->getMemorySSA()->verifyMemorySSA();
2647
2648if (BI)
2649 ++NumBranches;
2650else
2651 ++NumSwitches;
2652}
2653
2654/// Recursively compute the cost of a dominator subtree based on the per-block
2655/// cost map provided.
2656///
2657/// The recursive computation is memozied into the provided DT-indexed cost map
2658/// to allow querying it for most nodes in the domtree without it becoming
2659/// quadratic.
2660staticInstructionCostcomputeDomSubtreeCost(
2661DomTreeNode &N,
2662constSmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
2663SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
2664// Don't accumulate cost (or recurse through) blocks not in our block cost
2665// map and thus not part of the duplication cost being considered.
2666auto BBCostIt = BBCostMap.find(N.getBlock());
2667if (BBCostIt == BBCostMap.end())
2668return 0;
2669
2670// Lookup this node to see if we already computed its cost.
2671auto DTCostIt = DTCostMap.find(&N);
2672if (DTCostIt != DTCostMap.end())
2673return DTCostIt->second;
2674
2675// If not, we have to compute it. We can't use insert above and update
2676// because computing the cost may insert more things into the map.
2677InstructionCostCost = std::accumulate(
2678N.begin(),N.end(), BBCostIt->second,
2679 [&](InstructionCost Sum,DomTreeNode *ChildN) ->InstructionCost {
2680 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2681 });
2682bool Inserted = DTCostMap.insert({&N,Cost}).second;
2683 (void)Inserted;
2684assert(Inserted &&"Should not insert a node while visiting children!");
2685returnCost;
2686}
2687
2688/// Turns a select instruction into implicit control flow branch,
2689/// making the following replacement:
2690///
2691/// head:
2692/// --code before select--
2693/// select %cond, %trueval, %falseval
2694/// --code after select--
2695///
2696/// into
2697///
2698/// head:
2699/// --code before select--
2700/// br i1 %cond, label %then, label %tail
2701///
2702/// then:
2703/// br %tail
2704///
2705/// tail:
2706/// phi [ %trueval, %then ], [ %falseval, %head]
2707/// unreachable
2708///
2709/// It also makes all relevant DT and LI updates, so that all structures are in
2710/// valid state after this transform.
2711staticBranchInst *turnSelectIntoBranch(SelectInst *SI,DominatorTree &DT,
2712LoopInfo &LI,MemorySSAUpdater *MSSAU,
2713AssumptionCache *AC) {
2714LLVM_DEBUG(dbgs() <<"Turning " << *SI <<" into a branch.\n");
2715BasicBlock *HeadBB = SI->getParent();
2716
2717DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2718SplitBlockAndInsertIfThen(SI->getCondition(), SI,false,
2719 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2720auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2721BasicBlock *ThenBB = CondBr->getSuccessor(0),
2722 *TailBB = CondBr->getSuccessor(1);
2723if (MSSAU)
2724 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2725
2726PHINode *Phi =
2727PHINode::Create(SI->getType(), 2,"unswitched.select", SI->getIterator());
2728 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2729 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2730 Phi->setDebugLoc(SI->getDebugLoc());
2731 SI->replaceAllUsesWith(Phi);
2732 SI->eraseFromParent();
2733
2734if (MSSAU &&VerifyMemorySSA)
2735 MSSAU->getMemorySSA()->verifyMemorySSA();
2736
2737 ++NumSelects;
2738return CondBr;
2739}
2740
2741/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2742/// making the following replacement:
2743///
2744/// --code before guard--
2745/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2746/// --code after guard--
2747///
2748/// into
2749///
2750/// --code before guard--
2751/// br i1 %cond, label %guarded, label %deopt
2752///
2753/// guarded:
2754/// --code after guard--
2755///
2756/// deopt:
2757/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2758/// unreachable
2759///
2760/// It also makes all relevant DT and LI updates, so that all structures are in
2761/// valid state after this transform.
2762staticBranchInst *turnGuardIntoBranch(IntrinsicInst *GI,Loop &L,
2763DominatorTree &DT,LoopInfo &LI,
2764MemorySSAUpdater *MSSAU) {
2765SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2766LLVM_DEBUG(dbgs() <<"Turning " << *GI <<" into a branch.\n");
2767BasicBlock *CheckBB = GI->getParent();
2768
2769if (MSSAU &&VerifyMemorySSA)
2770 MSSAU->getMemorySSA()->verifyMemorySSA();
2771
2772DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2773Instruction *DeoptBlockTerm =
2774SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI,true,
2775 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2776BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2777// SplitBlockAndInsertIfThen inserts control flow that branches to
2778// DeoptBlockTerm if the condition is true. We want the opposite.
2779 CheckBI->swapSuccessors();
2780
2781BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2782 GuardedBlock->setName("guarded");
2783 CheckBI->getSuccessor(1)->setName("deopt");
2784BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2785
2786if (MSSAU)
2787 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2788
2789 GI->moveBefore(DeoptBlockTerm->getIterator());
2790 GI->setArgOperand(0,ConstantInt::getFalse(GI->getContext()));
2791
2792if (MSSAU) {
2793MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2794 MSSAU->moveToPlace(MD, DeoptBlock,MemorySSA::BeforeTerminator);
2795if (VerifyMemorySSA)
2796 MSSAU->getMemorySSA()->verifyMemorySSA();
2797 }
2798
2799if (VerifyLoopInfo)
2800 LI.verify(DT);
2801 ++NumGuards;
2802return CheckBI;
2803}
2804
2805/// Cost multiplier is a way to limit potentially exponential behavior
2806/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2807/// candidates available. Also accounting for the number of "sibling" loops with
2808/// the idea to account for previous unswitches that already happened on this
2809/// cluster of loops. There was an attempt to keep this formula simple,
2810/// just enough to limit the worst case behavior. Even if it is not that simple
2811/// now it is still not an attempt to provide a detailed heuristic size
2812/// prediction.
2813///
2814/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2815/// unswitch candidates, making adequate predictions instead of wild guesses.
2816/// That requires knowing not just the number of "remaining" candidates but
2817/// also costs of unswitching for each of these candidates.
2818staticintCalculateUnswitchCostMultiplier(
2819constInstruction &TI,constLoop &L,constLoopInfo &LI,
2820constDominatorTree &DT,
2821ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2822
2823// Guards and other exiting conditions do not contribute to exponential
2824// explosion as soon as they dominate the latch (otherwise there might be
2825// another path to the latch remaining that does not allow to eliminate the
2826// loop copy on unswitch).
2827constBasicBlock *Latch = L.getLoopLatch();
2828constBasicBlock *CondBlock = TI.getParent();
2829if (DT.dominates(CondBlock, Latch) &&
2830 (isGuard(&TI) ||
2831 (TI.isTerminator() &&
2832llvm::count_if(successors(&TI), [&L](constBasicBlock *SuccBB) {
2833return L.contains(SuccBB);
2834 }) <= 1))) {
2835 NumCostMultiplierSkipped++;
2836return 1;
2837 }
2838
2839auto *ParentL = L.getParentLoop();
2840int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2841 : std::distance(LI.begin(), LI.end()));
2842// Count amount of clones that all the candidates might cause during
2843// unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2844// cases.
2845int UnswitchedClones = 0;
2846for (constauto &Candidate : UnswitchCandidates) {
2847constInstruction *CI = Candidate.TI;
2848constBasicBlock *CondBlock = CI->getParent();
2849bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2850if (isa<SelectInst>(CI)) {
2851 UnswitchedClones++;
2852continue;
2853 }
2854if (isGuard(CI)) {
2855if (!SkipExitingSuccessors)
2856 UnswitchedClones++;
2857continue;
2858 }
2859int NonExitingSuccessors =
2860llvm::count_if(successors(CondBlock),
2861 [SkipExitingSuccessors, &L](constBasicBlock *SuccBB) {
2862return !SkipExitingSuccessors || L.contains(SuccBB);
2863 });
2864 UnswitchedClones +=Log2_32(NonExitingSuccessors);
2865 }
2866
2867// Ignore up to the "unscaled candidates" number of unswitch candidates
2868// when calculating the power-of-two scaling of the cost. The main idea
2869// with this control is to allow a small number of unswitches to happen
2870// and rely more on siblings multiplier (see below) when the number
2871// of candidates is small.
2872unsigned ClonesPower =
2873 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2874
2875// Allowing top-level loops to spread a bit more than nested ones.
2876int SiblingsMultiplier =
2877 std::max((ParentL ? SiblingsCount
2878 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2879 1);
2880// Compute the cost multiplier in a way that won't overflow by saturating
2881// at an upper bound.
2882int CostMultiplier;
2883if (ClonesPower >Log2_32(UnswitchThreshold) ||
2884 SiblingsMultiplier >UnswitchThreshold)
2885 CostMultiplier =UnswitchThreshold;
2886else
2887 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2888 (int)UnswitchThreshold);
2889
2890LLVM_DEBUG(dbgs() <<" Computed multiplier " << CostMultiplier
2891 <<" (siblings " << SiblingsMultiplier <<" * clones "
2892 << (1 << ClonesPower) <<")"
2893 <<" for unswitch candidate: " << TI <<"\n");
2894return CostMultiplier;
2895}
2896
2897staticboolcollectUnswitchCandidates(
2898SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
2899IVConditionInfo &PartialIVInfo,Instruction *&PartialIVCondBranch,
2900constLoop &L,constLoopInfo &LI,AAResults &AA,
2901constMemorySSAUpdater *MSSAU) {
2902assert(UnswitchCandidates.empty() &&"Should be!");
2903
2904auto AddUnswitchCandidatesForInst = [&](Instruction *I,Value *Cond) {
2905Cond =skipTrivialSelect(Cond);
2906if (isa<Constant>(Cond))
2907return;
2908if (L.isLoopInvariant(Cond)) {
2909 UnswitchCandidates.push_back({I, {Cond}});
2910return;
2911 }
2912if (match(Cond,m_CombineOr(m_LogicalAnd(),m_LogicalOr()))) {
2913TinyPtrVector<Value *> Invariants =
2914collectHomogenousInstGraphLoopInvariants(
2915 L, *static_cast<Instruction *>(Cond), LI);
2916if (!Invariants.empty())
2917 UnswitchCandidates.push_back({I, std::move(Invariants)});
2918 }
2919 };
2920
2921// Whether or not we should also collect guards in the loop.
2922bool CollectGuards =false;
2923if (UnswitchGuards) {
2924auto *GuardDecl =Intrinsic::getDeclarationIfExists(
2925 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
2926if (GuardDecl && !GuardDecl->use_empty())
2927 CollectGuards =true;
2928 }
2929
2930for (auto *BB : L.blocks()) {
2931if (LI.getLoopFor(BB) != &L)
2932continue;
2933
2934for (auto &I : *BB) {
2935if (auto *SI = dyn_cast<SelectInst>(&I)) {
2936auto *Cond = SI->getCondition();
2937// Do not unswitch vector selects and logical and/or selects
2938if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2939 AddUnswitchCandidatesForInst(SI,Cond);
2940 }elseif (CollectGuards &&isGuard(&I)) {
2941auto *Cond =
2942skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2943// TODO: Support AND, OR conditions and partial unswitching.
2944if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2945 UnswitchCandidates.push_back({&I, {Cond}});
2946 }
2947 }
2948
2949if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2950// We can only consider fully loop-invariant switch conditions as we need
2951// to completely eliminate the switch after unswitching.
2952if (!isa<Constant>(SI->getCondition()) &&
2953 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2954 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2955continue;
2956 }
2957
2958auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2959if (!BI || !BI->isConditional() ||
2960 BI->getSuccessor(0) == BI->getSuccessor(1))
2961continue;
2962
2963 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2964 }
2965
2966if (MSSAU && !findOptionMDForLoop(&L,"llvm.loop.unswitch.partial.disable") &&
2967 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2968return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2969 })) {
2970MemorySSA *MSSA = MSSAU->getMemorySSA();
2971if (autoInfo =hasPartialIVCondition(L,MSSAThreshold, *MSSA, AA)) {
2972LLVM_DEBUG(
2973dbgs() <<"simple-loop-unswitch: Found partially invariant condition "
2974 << *Info->InstToDuplicate[0] <<"\n");
2975 PartialIVInfo = *Info;
2976 PartialIVCondBranch = L.getHeader()->getTerminator();
2977TinyPtrVector<Value *> ValsToDuplicate;
2978llvm::append_range(ValsToDuplicate,Info->InstToDuplicate);
2979 UnswitchCandidates.push_back(
2980 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2981 }
2982 }
2983return !UnswitchCandidates.empty();
2984}
2985
2986/// Tries to canonicalize condition described by:
2987///
2988/// br (LHS pred RHS), label IfTrue, label IfFalse
2989///
2990/// into its equivalent where `Pred` is something that we support for injected
2991/// invariants (so far it is limited to ult), LHS in canonicalized form is
2992/// non-invariant and RHS is an invariant.
2993staticvoidcanonicalizeForInvariantConditionInjection(CmpPredicate &Pred,
2994Value *&LHS,Value *&RHS,
2995BasicBlock *&IfTrue,
2996BasicBlock *&IfFalse,
2997constLoop &L) {
2998if (!L.contains(IfTrue)) {
2999 Pred = ICmpInst::getInversePredicate(Pred);
3000std::swap(IfTrue, IfFalse);
3001 }
3002
3003// Move loop-invariant argument to RHS position.
3004if (L.isLoopInvariant(LHS)) {
3005 Pred = ICmpInst::getSwappedPredicate(Pred);
3006std::swap(LHS,RHS);
3007 }
3008
3009if (Pred == ICmpInst::ICMP_SGE &&match(RHS,m_Zero())) {
3010// Turn "x >=s 0" into "x <u UMIN_INT"
3011 Pred = ICmpInst::ICMP_ULT;
3012RHS = ConstantInt::get(
3013RHS->getContext(),
3014APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
3015 }
3016}
3017
3018/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
3019/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
3020/// injecting a loop-invariant condition.
3021staticboolshouldTryInjectInvariantCondition(
3022constICmpInst::Predicate Pred,constValue *LHS,constValue *RHS,
3023constBasicBlock *IfTrue,constBasicBlock *IfFalse,constLoop &L) {
3024if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3025returnfalse;
3026// TODO: Support other predicates.
3027if (Pred != ICmpInst::ICMP_ULT)
3028returnfalse;
3029// TODO: Support non-loop-exiting branches?
3030if (!L.contains(IfTrue) || L.contains(IfFalse))
3031returnfalse;
3032// FIXME: For some reason this causes problems with MSSA updates, need to
3033// investigate why. So far, just don't unswitch latch.
3034if (L.getHeader() == IfTrue)
3035returnfalse;
3036returntrue;
3037}
3038
3039/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3040/// TakenSucc via injection of invariant conditions. The branch should be not
3041/// enough and not previously unswitched, the information about this comes from
3042/// the metadata.
3043boolshouldTryInjectBasingOnMetadata(constBranchInst *BI,
3044constBasicBlock *TakenSucc) {
3045SmallVector<uint32_t> Weights;
3046if (!extractBranchWeights(*BI, Weights))
3047returnfalse;
3048unsignedT =InjectInvariantConditionHotnesThreshold;
3049BranchProbability LikelyTaken(T - 1,T);
3050
3051assert(Weights.size() == 2 &&"Unexpected profile data!");
3052size_tIdx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3053auto Num = Weights[Idx];
3054auto Denom = Weights[0] + Weights[1];
3055// Degenerate or overflowed metadata.
3056if (Denom == 0 || Num > Denom)
3057returnfalse;
3058BranchProbability ActualTaken(Num, Denom);
3059if (LikelyTaken > ActualTaken)
3060returnfalse;
3061returntrue;
3062}
3063
3064/// Materialize pending invariant condition of the given candidate into IR. The
3065/// injected loop-invariant condition implies the original loop-variant branch
3066/// condition, so the materialization turns
3067///
3068/// loop_block:
3069/// ...
3070/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3071///
3072/// into
3073///
3074/// preheader:
3075/// %invariant_cond = LHS pred RHS
3076/// ...
3077/// loop_block:
3078/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3079/// OriginalCheck:
3080/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3081/// ...
3082static NonTrivialUnswitchCandidate
3083injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate,Loop &L,
3084DominatorTree &DT,LoopInfo &LI,
3085AssumptionCache &AC,MemorySSAUpdater *MSSAU) {
3086assert(Candidate.hasPendingInjection() &&"Nothing to inject!");
3087BasicBlock *Preheader = L.getLoopPreheader();
3088assert(Preheader &&"Loop is not in simplified form?");
3089assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3090"Unswitching branch of inner loop!");
3091
3092auto Pred = Candidate.PendingInjection->Pred;
3093auto *LHS = Candidate.PendingInjection->LHS;
3094auto *RHS = Candidate.PendingInjection->RHS;
3095auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3096auto *TI = cast<BranchInst>(Candidate.TI);
3097auto *BB = Candidate.TI->getParent();
3098auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3099 : TI->getSuccessor(0);
3100// FIXME: Remove this once limitation on successors is lifted.
3101assert(L.contains(InLoopSucc) &&"Not supported yet!");
3102assert(!L.contains(OutOfLoopSucc) &&"Not supported yet!");
3103auto &Ctx = BB->getContext();
3104
3105IRBuilder<> Builder(Preheader->getTerminator());
3106assert(ICmpInst::isUnsigned(Pred) &&"Not supported yet!");
3107if (LHS->getType() !=RHS->getType()) {
3108if (LHS->getType()->getIntegerBitWidth() <
3109RHS->getType()->getIntegerBitWidth())
3110LHS = Builder.CreateZExt(LHS,RHS->getType(),LHS->getName() +".wide");
3111else
3112RHS = Builder.CreateZExt(RHS,LHS->getType(),RHS->getName() +".wide");
3113 }
3114// Do not use builder here: CreateICmp may simplify this into a constant and
3115// unswitching will break. Better optimize it away later.
3116auto *InjectedCond =
3117 ICmpInst::Create(Instruction::ICmp, Pred,LHS,RHS,"injected.cond",
3118 Preheader->getTerminator()->getIterator());
3119
3120BasicBlock *CheckBlock =BasicBlock::Create(Ctx, BB->getName() +".check",
3121 BB->getParent(), InLoopSucc);
3122 Builder.SetInsertPoint(TI);
3123auto *InvariantBr =
3124 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3125
3126 Builder.SetInsertPoint(CheckBlock);
3127 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3128 TI->getSuccessor(1));
3129 TI->eraseFromParent();
3130
3131// Fixup phis.
3132for (auto &I : *InLoopSucc) {
3133auto *PN = dyn_cast<PHINode>(&I);
3134if (!PN)
3135break;
3136auto *Inc = PN->getIncomingValueForBlock(BB);
3137 PN->addIncoming(Inc, CheckBlock);
3138 }
3139 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3140
3141SmallVector<DominatorTree::UpdateType, 4> DTUpdates = {
3142 { DominatorTree::Insert, BB, CheckBlock },
3143 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3144 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3145 { DominatorTree::Delete, BB, OutOfLoopSucc }
3146 };
3147
3148 DT.applyUpdates(DTUpdates);
3149if (MSSAU)
3150 MSSAU->applyUpdates(DTUpdates, DT);
3151 L.addBasicBlockToLoop(CheckBlock, LI);
3152
3153#ifndef NDEBUG
3154 DT.verify();
3155 LI.verify(DT);
3156if (MSSAU &&VerifyMemorySSA)
3157 MSSAU->getMemorySSA()->verifyMemorySSA();
3158#endif
3159
3160// TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3161// higher because we have just inserted a new block. Need to think how to
3162// adjust the cost of injected candidates when it was first computed.
3163LLVM_DEBUG(dbgs() <<"Injected a new loop-invariant branch " << *InvariantBr
3164 <<" and considering it for unswitching.");
3165 ++NumInvariantConditionsInjected;
3166return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3167 Candidate.Cost);
3168}
3169
3170/// Given chain of loop branch conditions looking like:
3171/// br (Variant < Invariant1)
3172/// br (Variant < Invariant2)
3173/// br (Variant < Invariant3)
3174/// ...
3175/// collect set of invariant conditions on which we want to unswitch, which
3176/// look like:
3177/// Invariant1 <= Invariant2
3178/// Invariant2 <= Invariant3
3179/// ...
3180/// Though they might not immediately exist in the IR, we can still inject them.
3181staticboolinsertCandidatesWithPendingInjections(
3182SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,Loop &L,
3183ICmpInst::Predicate Pred,ArrayRef<CompareDesc> Compares,
3184constDominatorTree &DT) {
3185
3186assert(ICmpInst::isRelational(Pred));
3187assert(ICmpInst::isStrictPredicate(Pred));
3188if (Compares.size() < 2)
3189returnfalse;
3190ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
3191for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3192 Next != Compares.end(); ++Prev, ++Next) {
3193Value *LHS = Next->Invariant;
3194Value *RHS = Prev->Invariant;
3195BasicBlock *InLoopSucc = Prev->InLoopSucc;
3196 InjectedInvariant ToInject(NonStrictPred,LHS,RHS, InLoopSucc);
3197 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3198 std::nullopt, std::move(ToInject));
3199 UnswitchCandidates.push_back(std::move(Candidate));
3200 }
3201returntrue;
3202}
3203
3204/// Collect unswitch candidates by invariant conditions that are not immediately
3205/// present in the loop. However, they can be injected into the code if we
3206/// decide it's profitable.
3207/// An example of such conditions is following:
3208///
3209/// for (...) {
3210/// x = load ...
3211/// if (! x <u C1) break;
3212/// if (! x <u C2) break;
3213/// <do something>
3214/// }
3215///
3216/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3217/// C2" automatically implies "x <u C2", so we can get rid of one of
3218/// loop-variant checks in unswitched loop version.
3219staticboolcollectUnswitchCandidatesWithInjections(
3220SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
3221IVConditionInfo &PartialIVInfo,Instruction *&PartialIVCondBranch,Loop &L,
3222constDominatorTree &DT,constLoopInfo &LI,AAResults &AA,
3223constMemorySSAUpdater *MSSAU) {
3224if (!InjectInvariantConditions)
3225returnfalse;
3226
3227if (!DT.isReachableFromEntry(L.getHeader()))
3228returnfalse;
3229auto *Latch = L.getLoopLatch();
3230// Need to have a single latch and a preheader.
3231if (!Latch)
3232returnfalse;
3233assert(L.getLoopPreheader() &&"Must have a preheader!");
3234
3235DenseMap<Value *, SmallVector<CompareDesc, 4> > CandidatesULT;
3236// Traverse the conditions that dominate latch (and therefore dominate each
3237// other).
3238for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3239 DTN = DTN->getIDom()) {
3240CmpPredicate Pred;
3241Value *LHS =nullptr, *RHS =nullptr;
3242BasicBlock *IfTrue =nullptr, *IfFalse =nullptr;
3243auto *BB = DTN->getBlock();
3244// Ignore inner loops.
3245if (LI.getLoopFor(BB) != &L)
3246continue;
3247auto *Term = BB->getTerminator();
3248if (!match(Term,m_Br(m_ICmp(Pred,m_Value(LHS),m_Value(RHS)),
3249m_BasicBlock(IfTrue),m_BasicBlock(IfFalse))))
3250continue;
3251if (!LHS->getType()->isIntegerTy())
3252continue;
3253canonicalizeForInvariantConditionInjection(Pred,LHS,RHS, IfTrue, IfFalse,
3254 L);
3255if (!shouldTryInjectInvariantCondition(Pred,LHS,RHS, IfTrue, IfFalse, L))
3256continue;
3257if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
3258continue;
3259// Strip ZEXT for unsigned predicate.
3260// TODO: once signed predicates are supported, also strip SEXT.
3261 CompareDescDesc(cast<BranchInst>(Term),RHS, IfTrue);
3262while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3263LHS = Zext->getOperand(0);
3264 CandidatesULT[LHS].push_back(Desc);
3265 }
3266
3267bool Found =false;
3268for (auto &It : CandidatesULT)
3269 Found |=insertCandidatesWithPendingInjections(
3270 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3271return Found;
3272}
3273
3274staticboolisSafeForNoNTrivialUnswitching(Loop &L,LoopInfo &LI) {
3275if (!L.isSafeToClone())
3276returnfalse;
3277for (auto *BB : L.blocks())
3278for (auto &I : *BB) {
3279if (I.getType()->isTokenTy() &&I.isUsedOutsideOfBlock(BB))
3280returnfalse;
3281if (auto *CB = dyn_cast<CallBase>(&I)) {
3282assert(!CB->cannotDuplicate() &&"Checked by L.isSafeToClone().");
3283if (CB->isConvergent())
3284returnfalse;
3285 }
3286 }
3287
3288// Check if there are irreducible CFG cycles in this loop. If so, we cannot
3289// easily unswitch non-trivial edges out of the loop. Doing so might turn the
3290// irreducible control flow into reducible control flow and introduce new
3291// loops "out of thin air". If we ever discover important use cases for doing
3292// this, we can add support to loop unswitch, but it is a lot of complexity
3293// for what seems little or no real world benefit.
3294LoopBlocksRPO RPOT(&L);
3295 RPOT.perform(&LI);
3296if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3297returnfalse;
3298
3299SmallVector<BasicBlock *, 4> ExitBlocks;
3300 L.getUniqueExitBlocks(ExitBlocks);
3301// We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3302// instruction as we don't know how to split those exit blocks.
3303// FIXME: We should teach SplitBlock to handle this and remove this
3304// restriction.
3305for (auto *ExitBB : ExitBlocks) {
3306auto It = ExitBB->getFirstNonPHIIt();
3307if (isa<CleanupPadInst>(It) || isa<CatchSwitchInst>(It)) {
3308LLVM_DEBUG(dbgs() <<"Cannot unswitch because of cleanuppad/catchswitch "
3309"in exit block\n");
3310returnfalse;
3311 }
3312 }
3313
3314returntrue;
3315}
3316
3317static NonTrivialUnswitchCandidatefindBestNonTrivialUnswitchCandidate(
3318ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates,constLoop &L,
3319constDominatorTree &DT,constLoopInfo &LI,AssumptionCache &AC,
3320constTargetTransformInfo &TTI,constIVConditionInfo &PartialIVInfo) {
3321// Given that unswitching these terminators will require duplicating parts of
3322// the loop, so we need to be able to model that cost. Compute the ephemeral
3323// values and set up a data structure to hold per-BB costs. We cache each
3324// block's cost so that we don't recompute this when considering different
3325// subsets of the loop for duplication during unswitching.
3326SmallPtrSet<const Value *, 4> EphValues;
3327CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3328SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;
3329
3330// Compute the cost of each block, as well as the total loop cost. Also, bail
3331// out if we see instructions which are incompatible with loop unswitching
3332// (convergent, noduplicate, or cross-basic-block tokens).
3333// FIXME: We might be able to safely handle some of these in non-duplicated
3334// regions.
3335TargetTransformInfo::TargetCostKindCostKind =
3336 L.getHeader()->getParent()->hasMinSize()
3337 ?TargetTransformInfo::TCK_CodeSize
3338 :TargetTransformInfo::TCK_SizeAndLatency;
3339InstructionCost LoopCost = 0;
3340for (auto *BB : L.blocks()) {
3341InstructionCostCost = 0;
3342for (auto &I : *BB) {
3343if (EphValues.count(&I))
3344continue;
3345Cost +=TTI.getInstructionCost(&I,CostKind);
3346 }
3347assert(Cost >= 0 &&"Must not have negative costs!");
3348 LoopCost +=Cost;
3349assert(LoopCost >= 0 &&"Must not have negative loop costs!");
3350 BBCostMap[BB] =Cost;
3351 }
3352LLVM_DEBUG(dbgs() <<" Total loop cost: " << LoopCost <<"\n");
3353
3354// Now we find the best candidate by searching for the one with the following
3355// properties in order:
3356//
3357// 1) An unswitching cost below the threshold
3358// 2) The smallest number of duplicated unswitch candidates (to avoid
3359// creating redundant subsequent unswitching)
3360// 3) The smallest cost after unswitching.
3361//
3362// We prioritize reducing fanout of unswitch candidates provided the cost
3363// remains below the threshold because this has a multiplicative effect.
3364//
3365// This requires memoizing each dominator subtree to avoid redundant work.
3366//
3367// FIXME: Need to actually do the number of candidates part above.
3368SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
3369// Given a terminator which might be unswitched, computes the non-duplicated
3370// cost for that terminator.
3371auto ComputeUnswitchedCost = [&](Instruction &TI,
3372bool FullUnswitch) ->InstructionCost {
3373// Unswitching selects unswitches the entire loop.
3374if (isa<SelectInst>(TI))
3375return LoopCost;
3376
3377BasicBlock &BB = *TI.getParent();
3378SmallPtrSet<BasicBlock *, 4> Visited;
3379
3380InstructionCostCost = 0;
3381for (BasicBlock *SuccBB :successors(&BB)) {
3382// Don't count successors more than once.
3383if (!Visited.insert(SuccBB).second)
3384continue;
3385
3386// If this is a partial unswitch candidate, then it must be a conditional
3387// branch with a condition of either `or`, `and`, their corresponding
3388// select forms or partially invariant instructions. In that case, one of
3389// the successors is necessarily duplicated, so don't even try to remove
3390// its cost.
3391if (!FullUnswitch) {
3392auto &BI = cast<BranchInst>(TI);
3393Value *Cond =skipTrivialSelect(BI.getCondition());
3394if (match(Cond,m_LogicalAnd())) {
3395if (SuccBB == BI.getSuccessor(1))
3396continue;
3397 }elseif (match(Cond,m_LogicalOr())) {
3398if (SuccBB == BI.getSuccessor(0))
3399continue;
3400 }elseif ((PartialIVInfo.KnownValue->isOneValue() &&
3401 SuccBB == BI.getSuccessor(0)) ||
3402 (!PartialIVInfo.KnownValue->isOneValue() &&
3403 SuccBB == BI.getSuccessor(1)))
3404continue;
3405 }
3406
3407// This successor's domtree will not need to be duplicated after
3408// unswitching if the edge to the successor dominates it (and thus the
3409// entire tree). This essentially means there is no other path into this
3410// subtree and so it will end up live in only one clone of the loop.
3411if (SuccBB->getUniquePredecessor() ||
3412llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3413return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3414 })) {
3415Cost +=computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3416assert(Cost <= LoopCost &&
3417"Non-duplicated cost should never exceed total loop cost!");
3418 }
3419 }
3420
3421// Now scale the cost by the number of unique successors minus one. We
3422// subtract one because there is already at least one copy of the entire
3423// loop. This is computing the new cost of unswitching a condition.
3424// Note that guards always have 2 unique successors that are implicit and
3425// will be materialized if we decide to unswitch it.
3426int SuccessorsCount =isGuard(&TI) ? 2 : Visited.size();
3427assert(SuccessorsCount > 1 &&
3428"Cannot unswitch a condition without multiple distinct successors!");
3429return (LoopCost -Cost) * (SuccessorsCount - 1);
3430 };
3431
3432 std::optional<NonTrivialUnswitchCandidate> Best;
3433for (auto &Candidate : UnswitchCandidates) {
3434Instruction &TI = *Candidate.TI;
3435ArrayRef<Value *> Invariants = Candidate.Invariants;
3436BranchInst *BI = dyn_cast<BranchInst>(&TI);
3437bool FullUnswitch =
3438 !BI || Candidate.hasPendingInjection() ||
3439 (Invariants.size() == 1 &&
3440 Invariants[0] ==skipTrivialSelect(BI->getCondition()));
3441InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3442// Calculate cost multiplier which is a tool to limit potentially
3443// exponential behavior of loop-unswitch.
3444if (EnableUnswitchCostMultiplier) {
3445int CostMultiplier =
3446CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3447assert(
3448 (CostMultiplier > 0 && CostMultiplier <=UnswitchThreshold) &&
3449"cost multiplier needs to be in the range of 1..UnswitchThreshold");
3450 CandidateCost *= CostMultiplier;
3451LLVM_DEBUG(dbgs() <<" Computed cost of " << CandidateCost
3452 <<" (multiplier: " << CostMultiplier <<")"
3453 <<" for unswitch candidate: " << TI <<"\n");
3454 }else {
3455LLVM_DEBUG(dbgs() <<" Computed cost of " << CandidateCost
3456 <<" for unswitch candidate: " << TI <<"\n");
3457 }
3458
3459if (!Best || CandidateCost < Best->Cost) {
3460 Best = Candidate;
3461 Best->Cost = CandidateCost;
3462 }
3463 }
3464assert(Best &&"Must be!");
3465return *Best;
3466}
3467
3468// Insert a freeze on an unswitched branch if all is true:
3469// 1. freeze-loop-unswitch-cond option is true
3470// 2. The branch may not execute in the loop pre-transformation. If a branch may
3471// not execute and could cause UB, it would always cause UB if it is hoisted outside
3472// of the loop. Insert a freeze to prevent this case.
3473// 3. The branch condition may be poison or undef
3474staticboolshouldInsertFreeze(Loop &L,Instruction &TI,DominatorTree &DT,
3475AssumptionCache &AC) {
3476assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3477if (!FreezeLoopUnswitchCond)
3478returnfalse;
3479
3480ICFLoopSafetyInfo SafetyInfo;
3481 SafetyInfo.computeLoopSafetyInfo(&L);
3482if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3483returnfalse;
3484
3485Value *Cond;
3486if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3487Cond =skipTrivialSelect(BI->getCondition());
3488else
3489Cond =skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
3490return !isGuaranteedNotToBeUndefOrPoison(
3491Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3492}
3493
3494staticboolunswitchBestCondition(Loop &L,DominatorTree &DT,LoopInfo &LI,
3495AssumptionCache &AC,AAResults &AA,
3496TargetTransformInfo &TTI,ScalarEvolution *SE,
3497MemorySSAUpdater *MSSAU,
3498LPMUpdater &LoopUpdater) {
3499// Collect all invariant conditions within this loop (as opposed to an inner
3500// loop which would be handled when visiting that inner loop).
3501SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
3502IVConditionInfo PartialIVInfo;
3503Instruction *PartialIVCondBranch =nullptr;
3504collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3505 PartialIVCondBranch, L, LI, AA, MSSAU);
3506if (!findOptionMDForLoop(&L,"llvm.loop.unswitch.injection.disable"))
3507collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3508 PartialIVCondBranch, L, DT, LI, AA,
3509 MSSAU);
3510// If we didn't find any candidates, we're done.
3511if (UnswitchCandidates.empty())
3512returnfalse;
3513
3514LLVM_DEBUG(
3515dbgs() <<"Considering " << UnswitchCandidates.size()
3516 <<" non-trivial loop invariant conditions for unswitching.\n");
3517
3518 NonTrivialUnswitchCandidate Best =findBestNonTrivialUnswitchCandidate(
3519 UnswitchCandidates, L, DT, LI, AC,TTI, PartialIVInfo);
3520
3521assert(Best.TI &&"Failed to find loop unswitch candidate");
3522assert(Best.Cost &&"Failed to compute cost");
3523
3524if (*Best.Cost >=UnswitchThreshold) {
3525LLVM_DEBUG(dbgs() <<"Cannot unswitch, lowest cost found: " << *Best.Cost
3526 <<"\n");
3527returnfalse;
3528 }
3529
3530bool InjectedCondition =false;
3531if (Best.hasPendingInjection()) {
3532 Best =injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3533 InjectedCondition =true;
3534 }
3535assert(!Best.hasPendingInjection() &&
3536"All injections should have been done by now!");
3537
3538if (Best.TI != PartialIVCondBranch)
3539 PartialIVInfo.InstToDuplicate.clear();
3540
3541bool InsertFreeze;
3542if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3543// If the best candidate is a select, turn it into a branch. Select
3544// instructions with a poison conditional do not propagate poison, but
3545// branching on poison causes UB. Insert a freeze on the select
3546// conditional to prevent UB after turning the select into a branch.
3547 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3548 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3549 Best.TI =turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3550 }else {
3551// If the best candidate is a guard, turn it into a branch.
3552if (isGuard(Best.TI))
3553 Best.TI =
3554turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3555 InsertFreeze =shouldInsertFreeze(L, *Best.TI, DT, AC);
3556 }
3557
3558LLVM_DEBUG(dbgs() <<" Unswitching non-trivial (cost = " << Best.Cost
3559 <<") terminator: " << *Best.TI <<"\n");
3560unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3561 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3562 InjectedCondition);
3563returntrue;
3564}
3565
3566/// Unswitch control flow predicated on loop invariant conditions.
3567///
3568/// This first hoists all branches or switches which are trivial (IE, do not
3569/// require duplicating any part of the loop) out of the loop body. It then
3570/// looks at other loop invariant control flows and tries to unswitch those as
3571/// well by cloning the loop if the result is small enough.
3572///
3573/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3574/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3575/// valid (i.e. its use is enabled).
3576///
3577/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3578/// true, we will attempt to do non-trivial unswitching as well as trivial
3579/// unswitching.
3580///
3581/// The `postUnswitch` function will be run after unswitching is complete
3582/// with information on whether or not the provided loop remains a loop and
3583/// a list of new sibling loops created.
3584///
3585/// If `SE` is non-null, we will update that analysis based on the unswitching
3586/// done.
3587staticboolunswitchLoop(Loop &L,DominatorTree &DT,LoopInfo &LI,
3588AssumptionCache &AC,AAResults &AA,
3589TargetTransformInfo &TTI,bool Trivial,
3590bool NonTrivial,ScalarEvolution *SE,
3591MemorySSAUpdater *MSSAU,ProfileSummaryInfo *PSI,
3592BlockFrequencyInfo *BFI,LPMUpdater &LoopUpdater) {
3593assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3594"Loops must be in LCSSA form before unswitching.");
3595
3596// Must be in loop simplified form: we need a preheader and dedicated exits.
3597if (!L.isLoopSimplifyForm())
3598returnfalse;
3599
3600// Try trivial unswitch first before loop over other basic blocks in the loop.
3601if (Trivial &&unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3602// If we unswitched successfully we will want to clean up the loop before
3603// processing it further so just mark it as unswitched and return.
3604postUnswitch(L, LoopUpdater, L.getName(),
3605/*CurrentLoopValid*/true,/*PartiallyInvariant*/false,
3606/*InjectedCondition*/false, {});
3607returntrue;
3608 }
3609
3610constFunction *F = L.getHeader()->getParent();
3611
3612// Check whether we should continue with non-trivial conditions.
3613// EnableNonTrivialUnswitch: Global variable that forces non-trivial
3614// unswitching for testing and debugging.
3615// NonTrivial: Parameter that enables non-trivial unswitching for this
3616// invocation of the transform. But this should be allowed only
3617// for targets without branch divergence.
3618//
3619// FIXME: If divergence analysis becomes available to a loop
3620// transform, we should allow unswitching for non-trivial uniform
3621// branches even on targets that have divergence.
3622// https://bugs.llvm.org/show_bug.cgi?id=48819
3623bool ContinueWithNonTrivial =
3624EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
3625if (!ContinueWithNonTrivial)
3626returnfalse;
3627
3628// Skip non-trivial unswitching for optsize functions.
3629if (F->hasOptSize())
3630returnfalse;
3631
3632// Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
3633// of the loops L is nested in, and of the loops nested in L are all cold.
3634auto IsLoopNestCold = [&](constLoop *L) {
3635// Check L and all of its parent loops.
3636auto *Parent = L;
3637while (Parent) {
3638if (!PSI->isColdBlock(Parent->getHeader(), BFI))
3639returnfalse;
3640 Parent = Parent->getParentLoop();
3641 }
3642// Next check all loops nested within L.
3643SmallVector<const Loop *, 4> Worklist;
3644 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
3645 L->getSubLoops().end());
3646while (!Worklist.empty()) {
3647auto *CurLoop = Worklist.pop_back_val();
3648if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
3649returnfalse;
3650 Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),
3651 CurLoop->getSubLoops().end());
3652 }
3653returntrue;
3654 };
3655
3656// Skip cold loops in cold loop nests, as unswitching them brings little
3657// benefit but increases the code size
3658if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
3659LLVM_DEBUG(dbgs() <<" Skip cold loop: " << L <<"\n");
3660returnfalse;
3661 }
3662
3663// Perform legality checks.
3664if (!isSafeForNoNTrivialUnswitching(L, LI))
3665returnfalse;
3666
3667// For non-trivial unswitching, because it often creates new loops, we rely on
3668// the pass manager to iterate on the loops rather than trying to immediately
3669// reach a fixed point. There is no substantial advantage to iterating
3670// internally, and if any of the new loops are simplified enough to contain
3671// trivial unswitching we want to prefer those.
3672
3673// Try to unswitch the best invariant condition. We prefer this full unswitch to
3674// a partial unswitch when possible below the threshold.
3675if (unswitchBestCondition(L, DT, LI, AC, AA,TTI, SE, MSSAU, LoopUpdater))
3676returntrue;
3677
3678// No other opportunities to unswitch.
3679returnfalse;
3680}
3681
3682PreservedAnalysesSimpleLoopUnswitchPass::run(Loop &L,LoopAnalysisManager &AM,
3683LoopStandardAnalysisResults &AR,
3684LPMUpdater &U) {
3685Function &F = *L.getHeader()->getParent();
3686 (void)F;
3687ProfileSummaryInfo *PSI =nullptr;
3688if (auto OuterProxy =
3689 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
3690 .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3691 PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3692LLVM_DEBUG(dbgs() <<"Unswitching loop in " <<F.getName() <<": " << L
3693 <<"\n");
3694
3695 std::optional<MemorySSAUpdater> MSSAU;
3696if (AR.MSSA) {
3697 MSSAU =MemorySSAUpdater(AR.MSSA);
3698if (VerifyMemorySSA)
3699 AR.MSSA->verifyMemorySSA();
3700 }
3701if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3702 &AR.SE, MSSAU ? &*MSSAU :nullptr, PSI, AR.BFI, U))
3703returnPreservedAnalyses::all();
3704
3705if (AR.MSSA &&VerifyMemorySSA)
3706 AR.MSSA->verifyMemorySSA();
3707
3708// Historically this pass has had issues with the dominator tree so verify it
3709// in asserts builds.
3710assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3711
3712auto PA =getLoopPassPreservedAnalyses();
3713if (AR.MSSA)
3714 PA.preserve<MemorySSAAnalysis>();
3715return PA;
3716}
3717
3718voidSimpleLoopUnswitchPass::printPipeline(
3719raw_ostream &OS,function_ref<StringRef(StringRef)> MapClassName2PassName) {
3720static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
3721OS, MapClassName2PassName);
3722
3723OS <<'<';
3724OS << (NonTrivial ?"" :"no-") <<"nontrivial;";
3725OS << (Trivial ?"" :"no-") <<"trivial";
3726OS <<'>';
3727}
CFG.h
GuardUtils.h
AssumptionCache.h
BasicBlockUtils.h
BlockFrequencyInfo.h
Info
Analysis containing CSE Info
Definition:CSEInfo.cpp:27
Casting.h
Cloning.h
CodeMetrics.h
CommandLine.h
Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
Debug.h
LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition:Debug.h:106
DenseMap.h
This file defines the DenseMap class.
DomTreeUpdater.h
Dominators.h
Blocks
DenseMap< Block *, BlockRelaxAux > Blocks
Definition:ELF_riscv.cpp:507
GenericDomTree.h
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
IRBuilder.h
BasicBlock.h
Constant.h
Function.h
Instruction.h
IntrinsicInst.h
Module.h
Module.h This file contains the declarations for the Module class.
Use.h
This defines the Use class.
Value.h
InstrTypes.h
InstructionCost.h
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
Instructions.h
LoopAnalysisManager.h
This header provides classes for managing per-loop analyses.
LoopInfo.h
LoopIterator.h
LoopPassManager.h
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
LoopUtils.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
MemorySSAUpdater.h
MemorySSA.h
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
MustExecute.h
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
II
uint64_t IntrinsicInst * II
Definition:NVVMIntrRange.cpp:51
PatternMatch.h
ProfDataUtils.h
This file contains the declarations for profiling metadata utility functions.
ProfileSummaryInfo.h
Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition:RISCVRedundantCopyElimination.cpp:75
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.
OS
raw_pwrite_stream & OS
Definition:SampleProfWriter.cpp:51
ScalarEvolution.h
Sequence.h
Provides some synthesis utilities to produce sequences of values.
SetVector.h
This file implements a set that has insertion order iteration characteristics.
rewritePHINodesForUnswitchedExitBlock
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
Definition:SimpleLoopUnswitch.cpp:341
unswitchLoop
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
Definition:SimpleLoopUnswitch.cpp:3587
unswitchAllTrivialConditions
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
Definition:SimpleLoopUnswitch.cpp:1047
EnableNonTrivialUnswitch
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
UnswitchGuards
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
recomputeLoopBlockSet
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
Definition:SimpleLoopUnswitch.cpp:1795
CalculateUnswitchCostMultiplier
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
Definition:SimpleLoopUnswitch.cpp:2818
buildPartialInvariantUnswitchConditionalBranch
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
Definition:SimpleLoopUnswitch.cpp:292
collectHomogenousInstGraphLoopInvariants
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
Definition:SimpleLoopUnswitch.cpp:193
deleteDeadClonedBlocks
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
Definition:SimpleLoopUnswitch.cpp:1672
visitDomSubTree
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
Definition:SimpleLoopUnswitch.cpp:2115
turnSelectIntoBranch
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
Definition:SimpleLoopUnswitch.cpp:2711
isSafeForNoNTrivialUnswitching
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
Definition:SimpleLoopUnswitch.cpp:3274
postUnswitch
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
Definition:SimpleLoopUnswitch.cpp:2138
buildPartialUnswitchConditionalBranch
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
Definition:SimpleLoopUnswitch.cpp:272
UnswitchNumInitialUnscaledCandidates
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
shouldTryInjectInvariantCondition
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
Definition:SimpleLoopUnswitch.cpp:3021
findBestNonTrivialUnswitchCandidate
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
Definition:SimpleLoopUnswitch.cpp:3317
EnableUnswitchCostMultiplier
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
skipTrivialSelect
static Value * skipTrivialSelect(Value *Cond)
Definition:SimpleLoopUnswitch.cpp:178
getTopMostExitingLoop
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
Definition:SimpleLoopUnswitch.cpp:480
collectUnswitchCandidatesWithInjections
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
Definition:SimpleLoopUnswitch.cpp:3219
UnswitchThreshold
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
replaceLoopInvariantUses
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
Definition:SimpleLoopUnswitch.cpp:235
unswitchTrivialBranch
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
Definition:SimpleLoopUnswitch.cpp:509
collectUnswitchCandidates
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Definition:SimpleLoopUnswitch.cpp:2897
InjectInvariantConditions
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
FreezeLoopUnswitchCond
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
computeDomSubtreeCost
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
Definition:SimpleLoopUnswitch.cpp:2660
shouldInsertFreeze
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
Definition:SimpleLoopUnswitch.cpp:3474
UnswitchSiblingsToplevelDiv
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
MSSAThreshold
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
canonicalizeForInvariantConditionInjection
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
Definition:SimpleLoopUnswitch.cpp:2993
areLoopExitPHIsLoopInvariant
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
Definition:SimpleLoopUnswitch.cpp:252
rewritePHINodesForExitAndUnswitchedBlocks
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
Definition:SimpleLoopUnswitch.cpp:363
insertCandidatesWithPendingInjections
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
Definition:SimpleLoopUnswitch.cpp:3181
injectPendingInvariantConditions
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
Definition:SimpleLoopUnswitch.cpp:3083
DropNonTrivialImplicitNullChecks
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
InjectInvariantConditionHotnesThreshold
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
unswitchTrivialSwitch
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
Definition:SimpleLoopUnswitch.cpp:743
unswitchNontrivialInvariants
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
Definition:SimpleLoopUnswitch.cpp:2175
rebuildLoopAfterUnswitch
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
Definition:SimpleLoopUnswitch.cpp:1906
unswitchBestCondition
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
Definition:SimpleLoopUnswitch.cpp:3494
buildClonedLoopBlocks
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
Definition:SimpleLoopUnswitch.cpp:1166
deleteDeadBlocksFromLoop
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
Definition:SimpleLoopUnswitch.cpp:1701
shouldTryInjectBasingOnMetadata
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
Definition:SimpleLoopUnswitch.cpp:3043
turnGuardIntoBranch
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
Definition:SimpleLoopUnswitch.cpp:2762
cloneLoopNest
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
Definition:SimpleLoopUnswitch.cpp:1363
hoistLoopToNewParent
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
Definition:SimpleLoopUnswitch.cpp:409
buildClonedLoops
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
Definition:SimpleLoopUnswitch.cpp:1422
SimpleLoopUnswitch.h
SmallPtrSet.h
This file defines the SmallPtrSet class.
SmallVector.h
This file defines the SmallVector class.
Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition:Statistic.h:166
TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.
Local.h
Twine.h
ValueMapper.h
ValueTracking.h
RHS
Value * RHS
Definition:X86PartialReduction.cpp:74
LHS
Value * LHS
Definition:X86PartialReduction.cpp:73
T
llvm::AAResults
Definition:AliasAnalysis.h:314
llvm::APInt::getSignedMinValue
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition:APInt.h:219
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition:PassManager.h:253
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition:PassManager.h:410
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition:ArrayRef.h:41
llvm::ArrayRef::end
iterator end() const
Definition:ArrayRef.h:157
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition:ArrayRef.h:168
llvm::ArrayRef::begin
iterator begin() const
Definition:ArrayRef.h:156
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition:ArrayRef.h:163
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition:AssumptionCache.h:42
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition:AssumptionCache.cpp:185
llvm::BasicBlock
LLVM Basic Block Representation.
Definition:BasicBlock.h:61
llvm::BasicBlock::end
iterator end()
Definition:BasicBlock.h:474
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition:BasicBlock.h:461
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition:BasicBlock.h:530
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition:BasicBlock.h:213
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition:BasicBlock.h:177
llvm::BasicBlock::size
size_t size() const
Definition:BasicBlock.h:482
llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition:BasicBlock.h:389
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition:BasicBlock.h:240
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition:BasicBlock.cpp:538
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition:BlockFrequencyInfo.h:37
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition:Instructions.h:3016
llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition:Instructions.h:3097
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition:Instructions.cpp:1168
llvm::BranchInst::isConditional
bool isConditional() const
Definition:Instructions.h:3090
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Definition:Instructions.h:3072
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition:Instructions.h:3104
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition:Instructions.h:3109
llvm::BranchInst::getCondition
Value * getCondition() const
Definition:Instructions.h:3092
llvm::BranchProbability
Definition:BranchProbability.h:30
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition:InstrTypes.h:1286
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition:InstrTypes.h:1291
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition:InstrTypes.h:673
llvm::CmpPredicate
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition:CmpPredicate.h:22
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition:Constants.h:83
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition:Constants.cpp:866
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition:Constants.cpp:873
llvm::Constant
This is an important base class in LLVM.
Definition:Constant.h:42
llvm::Constant::isOneValue
bool isOneValue() const
Returns true if the value is one.
Definition:Constants.cpp:124
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition:DenseMap.h:194
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition:DenseMap.h:156
llvm::DenseMapBase::begin
iterator begin()
Definition:DenseMap.h:75
llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition:DenseMap.h:152
llvm::DenseMapBase::end
iterator end()
Definition:DenseMap.h:84
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition:DenseMap.h:211
llvm::DenseMap
Definition:DenseMap.h:727
llvm::DomTreeNodeBase< BasicBlock >
llvm::DomTreeUpdater
Definition:DomTreeUpdater.h:30
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition:GenericDomTree.h:905
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition:GenericDomTree.h:612
llvm::DominatorTreeBase::insertEdge
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
Definition:GenericDomTree.h:652
llvm::DominatorTreeBase::Delete
static constexpr UpdateKind Delete
Definition:GenericDomTree.h:253
llvm::DominatorTreeBase::Insert
static constexpr UpdateKind Insert
Definition:GenericDomTree.h:252
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition:GenericDomTree.h:670
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition:GenericDomTree.h:401
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition:Dominators.h:162
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition:Dominators.cpp:321
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition:Dominators.cpp:122
llvm::FreezeInst
This class represents a freeze function that returns random concrete value if an operand is either a ...
Definition:Instructions.h:5088
llvm::Function
Definition:Function.h:63
llvm::ICFLoopSafetyInfo
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition:MustExecute.h:131
llvm::ICFLoopSafetyInfo::isGuaranteedToExecute
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
Definition:MustExecute.cpp:285
llvm::ICFLoopSafetyInfo::computeLoopSafetyInfo
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition:MustExecute.cpp:77
llvm::ICmpInst::isRelational
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Definition:Instructions.h:1305
llvm::IRBuilderBase::CreateFreeze
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition:IRBuilder.h:2574
llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition:IRBuilder.h:1164
llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition:IRBuilder.h:2033
llvm::IRBuilderBase::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition:IRBuilder.h:1518
llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition:IRBuilder.h:1540
llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition:IRBuilder.h:199
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition:IRBuilder.h:2705
llvm::InstructionCost
Definition:InstructionCost.h:29
llvm::Instruction
Definition:Instruction.h:68
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition:Instruction.cpp:1364
llvm::Instruction::dropLocation
void dropLocation()
Drop the instruction's debug location.
Definition:DebugInfo.cpp:984
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition:Instruction.h:511
llvm::Instruction::eraseFromParent
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition:Instruction.cpp:94
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition:Instruction.h:426
llvm::Instruction::isTerminator
bool isTerminator() const
Definition:Instruction.h:313
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition:Metadata.cpp:1679
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition:Instruction.h:508
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition:Instruction.cpp:175
llvm::Instruction::insertInto
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Definition:Instruction.cpp:123
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition:IntrinsicInst.h:48
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition:LoopPassManager.h:229
llvm::LPMUpdater::markLoopAsDeleted
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
Definition:LoopPassManager.h:249
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition:GenericLoopInfo.h:124
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition:GenericLoopInfo.h:167
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition:GenericLoopInfo.h:187
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition:GenericLoopInfo.h:90
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition:GenericLoopInfoImpl.h:282
llvm::LoopBase::reserveBlocks
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition:GenericLoopInfo.h:432
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition:GenericLoopInfo.h:180
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition:GenericLoopInfo.h:391
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition:GenericLoopInfoImpl.h:210
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition:GenericLoopInfo.h:99
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition:GenericLoopInfo.h:227
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition:GenericLoopInfo.h:400
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition:LoopIterator.h:172
llvm::LoopBlocksRPO::perform
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition:LoopIterator.h:180
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition:GenericLoopInfoImpl.h:718
llvm::LoopInfoBase::addTopLevelLoop
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition:GenericLoopInfo.h:663
llvm::LoopInfoBase::end
iterator end() const
Definition:GenericLoopInfo.h:582
llvm::LoopInfoBase::AllocateLoop
LoopT * AllocateLoop(ArgsTy &&...Args)
Definition:GenericLoopInfo.h:570
llvm::LoopInfoBase::removeLoop
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition:GenericLoopInfo.h:633
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition:GenericLoopInfo.h:644
llvm::LoopInfoBase::getLoopDepth
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition:GenericLoopInfo.h:613
llvm::LoopInfoBase::begin
iterator begin() const
Definition:GenericLoopInfo.h:581
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition:GenericLoopInfo.h:606
llvm::LoopInfoBase::destroy
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition:GenericLoopInfo.h:710
llvm::LoopInfo
Definition:LoopInfo.h:407
llvm::Loop
Represents a single loop in the control flow graph.
Definition:LoopInfo.h:39
llvm::Loop::getName
StringRef getName() const
Definition:LoopInfo.h:388
llvm::MDNode
Metadata node.
Definition:Metadata.h:1073
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition:Metadata.h:1549
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition:Metadata.cpp:606
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition:MemorySSA.h:370
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition:MemorySSA.h:928
llvm::MemorySSAUpdater
Definition:MemorySSAUpdater.h:54
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition:MemorySSAUpdater.h:240
llvm::MemorySSAUpdater::removeEdge
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
Definition:MemorySSAUpdater.cpp:531
llvm::MemorySSAUpdater::updateForClonedLoop
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
Definition:MemorySSAUpdater.cpp:669
llvm::MemorySSAUpdater::removeDuplicatePhiEdgesBetween
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
Definition:MemorySSAUpdater.cpp:538
llvm::MemorySSAUpdater::removeBlocks
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
Definition:MemorySSAUpdater.cpp:1357
llvm::MemorySSAUpdater::moveAllAfterSpliceBlocks
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
Definition:MemorySSAUpdater.cpp:1232
llvm::MemorySSAUpdater::createMemoryAccessInBB
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
Definition:MemorySSAUpdater.cpp:1414
llvm::MemorySSAUpdater::applyInsertUpdates
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
Definition:MemorySSAUpdater.cpp:845
llvm::MemorySSAUpdater::applyUpdates
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
Definition:MemorySSAUpdater.cpp:794
llvm::MemorySSAUpdater::moveToPlace
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Definition:MemorySSAUpdater.cpp:1185
llvm::MemorySSAUpdater::updateExitBlocksForClonedLoop
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Definition:MemorySSAUpdater.cpp:772
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition:MemorySSA.h:701
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition:MemorySSA.cpp:1905
llvm::MemorySSA::BeforeTerminator
@ BeforeTerminator
Definition:MemorySSA.h:790
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition:MemorySSA.h:719
llvm::MemorySSA::getBlockDefs
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition:MemorySSA.h:767
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition:Module.h:65
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition:PassManager.h:692
llvm::PHINode
Definition:Instructions.h:2600
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition:Instructions.h:2635
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition:Constants.cpp:1878
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition:Analysis.h:111
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition:Analysis.h:117
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition:ProfileSummaryInfo.h:372
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition:ProfileSummaryInfo.h:41
llvm::ProfileSummaryInfo::hasProfileSummary
bool hasProfileSummary() const
Returns true if profile summary is available.
Definition:ProfileSummaryInfo.h:70
llvm::ProfileSummaryInfo::isColdBlock
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
Definition:ProfileSummaryInfo.h:204
llvm::ScalarEvolution
The main scalar evolution driver.
Definition:ScalarEvolution.h:447
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition:ScalarEvolution.cpp:8496
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition:ScalarEvolution.cpp:8538
llvm::ScalarEvolution::forgetBlockAndLoopDispositions
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
Definition:ScalarEvolution.cpp:8597
llvm::ScalarEvolution::forgetLcssaPhiWithNewPredecessor
void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
Definition:ScalarEvolution.cpp:8557
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition:Instructions.h:1657
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition:SetVector.h:98
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition:SetVector.h:264
llvm::SetVector::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition:SetVector.h:103
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition:SetVector.h:162
llvm::SimpleLoopUnswitchPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition:SimpleLoopUnswitch.cpp:3718
llvm::SimpleLoopUnswitchPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition:SimpleLoopUnswitch.cpp:3682
llvm::SmallDenseMap
Definition:DenseMap.h:883
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition:SmallPtrSet.h:94
llvm::SmallPtrSetImplBase::clear
void clear()
Definition:SmallPtrSet.h:97
llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition:SmallPtrSet.h:93
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition:SmallPtrSet.h:363
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition:SmallPtrSet.h:401
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition:SmallPtrSet.h:452
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition:SmallPtrSet.h:384
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition:SmallPtrSet.h:519
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition:SetVector.h:370
llvm::SmallVectorBase::empty
bool empty() const
Definition:SmallVector.h:81
llvm::SmallVectorBase::size
size_t size() const
Definition:SmallVector.h:78
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition:SmallVector.h:573
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition:SmallVector.h:673
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition:SmallVector.h:937
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition:SmallVector.h:663
llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition:SmallVector.h:683
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition:SmallVector.h:805
llvm::SmallVectorImpl::clear
void clear()
Definition:SmallVector.h:610
llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition:SmallVector.h:413
llvm::SmallVectorTemplateCommon::end
iterator end()
Definition:SmallVector.h:269
llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition:SmallVector.h:267
llvm::SmallVectorTemplateCommon::back
reference back()
Definition:SmallVector.h:308
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::SwitchInstProfUpdateWrapper
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
Definition:Instructions.h:3492
llvm::SwitchInstProfUpdateWrapper::setSuccessorWeight
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Definition:Instructions.cpp:4141
llvm::SwitchInstProfUpdateWrapper::eraseFromParent
Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
Definition:Instructions.cpp:4126
llvm::SwitchInstProfUpdateWrapper::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
Definition:Instructions.cpp:4107
llvm::SwitchInstProfUpdateWrapper::getSuccessorWeight
CaseWeightOpt getSuccessorWeight(unsigned idx)
Definition:Instructions.cpp:4135
llvm::SwitchInstProfUpdateWrapper::CaseWeightOpt
std::optional< uint32_t > CaseWeightOpt
Definition:Instructions.h:3503
llvm::SwitchInstProfUpdateWrapper::removeCase
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Definition:Instructions.cpp:4093
llvm::SwitchInst::CaseHandleImpl::getSuccessorIndex
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
Definition:Instructions.h:3233
llvm::SwitchInst::CaseHandleImpl::getCaseSuccessor
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
Definition:Instructions.h:3222
llvm::SwitchInst::CaseHandleImpl::getCaseValue
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Definition:Instructions.h:3215
llvm::SwitchInst::CaseHandle
Definition:Instructions.h:3250
llvm::SwitchInst
Multiway switch.
Definition:Instructions.h:3154
llvm::SwitchInst::getDefaultDest
BasicBlock * getDefaultDest() const
Definition:Instructions.h:3351
llvm::SwitchInst::Create
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
Definition:Instructions.h:3338
llvm::SwitchInst::setDefaultDest
void setDefaultDest(BasicBlock *DefaultCase)
Definition:Instructions.h:3361
llvm::SwitchInst::cases
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
Definition:Instructions.h:3396
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition:TargetTransformInfo.h:212
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
Definition:TargetTransformInfo.cpp:289
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition:TargetTransformInfo.h:263
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition:TargetTransformInfo.h:266
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition:TargetTransformInfo.h:267
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition:TargetTransformInfo.cpp:270
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition:TinyPtrVector.h:29
llvm::TinyPtrVector::push_back
void push_back(EltTy NewVal)
Definition:TinyPtrVector.h:242
llvm::TinyPtrVector::empty
bool empty() const
Definition:TinyPtrVector.h:162
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition:Twine.h:81
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition:Type.h:237
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition:Use.h:43
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition:ValueMap.h:164
llvm::ValueMap::count
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition:ValueMap.h:151
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition:Value.h:255
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition:Value.cpp:377
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition:Value.cpp:1075
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition:Value.h:376
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition:Value.cpp:309
llvm::cl::opt
Definition:CommandLine.h:1423
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition:STLFunctionalExtras.h:37
llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition:ilist_node.h:32
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition:ilist_node.h:132
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition:raw_ostream.h:52
uint64_t
ErrorHandling.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
llvm::Intrinsic::getDeclarationIfExists
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
Definition:Intrinsics.cpp:747
llvm::PatternMatch
Definition:PatternMatch.h:47
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition:PatternMatch.h:49
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition:PatternMatch.h:592
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition:PatternMatch.h:1799
llvm::PatternMatch::m_LogicalOr
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
Definition:PatternMatch.h:3099
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition:PatternMatch.h:2220
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition:PatternMatch.h:92
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Definition:PatternMatch.h:1627
llvm::PatternMatch::m_LogicalAnd
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
Definition:PatternMatch.h:3081
llvm::PatternMatch::m_BasicBlock
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition:PatternMatch.h:189
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition:PatternMatch.h:612
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition:PatternMatch.h:239
llvm::cl::Hidden
@ Hidden
Definition:CommandLine.h:137
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition:CommandLine.h:443
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition:STLExtras.h:329
llvm::stable_sort
void stable_sort(R &&Range)
Definition:STLExtras.h:2037
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1759
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1739
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition:Local.cpp:546
llvm::successors
auto successors(const MachineBasicBlock *BB)
Definition:MachineBasicBlock.h:1376
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition:iterator_range.h:77
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition:STLExtras.h:2115
llvm::RemapDbgRecordRange
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition:ValueMapper.h:299
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition:STLExtras.h:657
llvm::findOptionMDForLoop
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition:LoopInfo.cpp:1067
llvm::Desc
Op::Description Desc
Definition:DWARFExpression.cpp:23
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1746
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition:MathExtras.h:341
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition:GuardUtils.cpp:18
llvm::reverse
auto reverse(ContainerTy &&C)
Definition:STLExtras.h:420
llvm::zip_first
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition:STLExtras.h:877
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition:STLExtras.h:1664
llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition:ValueMapper.h:96
llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition:ValueMapper.h:78
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition:Debug.cpp:163
llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition:ValueMapper.h:277
llvm::VerifyLoopInfo
bool VerifyLoopInfo
Enable verification of loop info.
Definition:LoopInfo.cpp:51
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition:CloneFunction.cpp:44
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition:MemorySSA.cpp:84
llvm::formDedicatedExitBlocks
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition:LoopUtils.cpp:57
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition:ValueTracking.cpp:7841
llvm::ValueToValueMapTy
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
Definition:MemorySSAUpdater.h:50
llvm::extractBranchWeights
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition:ProfDataUtils.cpp:170
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition:STLExtras.h:1945
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition:BasicBlockUtils.cpp:1084
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition:LoopAnalysisManager.cpp:138
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition:LoopInfo.cpp:1170
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition:STLExtras.h:2099
llvm::predecessors
auto predecessors(const MachineBasicBlock *BB)
Definition:MachineBasicBlock.h:1377
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition:CFG.h:118
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition:BasicBlockUtils.cpp:1609
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition:BasicBlockUtils.cpp:762
llvm::hasPartialIVCondition
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition:LoopUtils.cpp:2058
llvm::formLCSSA
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition:LCSSA.cpp:443
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition:BitVector.h:860
raw_ostream.h
N
#define N
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition:Analysis.h:28
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition:CodeMetrics.cpp:71
llvm::DWARFExpression::Operation::Description
Description of the encoding of one expression Op.
Definition:DWARFExpression.h:66
llvm::IVConditionInfo
Struct to hold information about a partially invariant condition.
Definition:LoopUtils.h:556
llvm::IVConditionInfo::InstToDuplicate
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition:LoopUtils.h:559
llvm::IVConditionInfo::KnownValue
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition:LoopUtils.h:562
llvm::Incoming
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
Definition:SILowerI1Copies.h:25
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition:LoopAnalysisManager.h:53
llvm::LoopStandardAnalysisResults::BFI
BlockFrequencyInfo * BFI
Definition:LoopAnalysisManager.h:61
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition:LoopAnalysisManager.h:58
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition:LoopAnalysisManager.h:63
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition:LoopAnalysisManager.h:60
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition:LoopAnalysisManager.h:55
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition:LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition:LoopAnalysisManager.h:56
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition:LoopAnalysisManager.h:54
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition:LoopInfo.h:215
llvm::PassInfoMixin
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition:PassManager.h:69
llvm::ShouldRunExtraSimpleLoopUnswitch::Key
static AnalysisKey Key
Definition:SimpleLoopUnswitch.h:88
llvm::cl::desc
Definition:CommandLine.h:409

Generated on Fri Jul 18 2025 16:13:54 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp