Movatterモバイル変換

Go to the documentation of this file.

1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

9#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"

10#include "llvm/ADT/DenseMap.h"

11#include "llvm/ADT/STLExtras.h"

12#include "llvm/ADT/Sequence.h"

13#include "llvm/ADT/SetVector.h"

14#include "llvm/ADT/SmallPtrSet.h"

15#include "llvm/ADT/SmallVector.h"

16#include "llvm/ADT/Statistic.h"

17#include "llvm/ADT/Twine.h"

18#include "llvm/Analysis/AssumptionCache.h"

19#include "llvm/Analysis/BlockFrequencyInfo.h"

20#include "llvm/Analysis/CFG.h"

21#include "llvm/Analysis/CodeMetrics.h"

22#include "llvm/Analysis/DomTreeUpdater.h"

23#include "llvm/Analysis/GuardUtils.h"

24#include "llvm/Analysis/LoopAnalysisManager.h"

25#include "llvm/Analysis/LoopInfo.h"

26#include "llvm/Analysis/LoopIterator.h"

27#include "llvm/Analysis/MemorySSA.h"

28#include "llvm/Analysis/MemorySSAUpdater.h"

29#include "llvm/Analysis/MustExecute.h"

30#include "llvm/Analysis/ProfileSummaryInfo.h"

31#include "llvm/Analysis/ScalarEvolution.h"

32#include "llvm/Analysis/TargetTransformInfo.h"

33#include "llvm/Analysis/ValueTracking.h"

34#include "llvm/IR/BasicBlock.h"

35#include "llvm/IR/Constant.h"

36#include "llvm/IR/Constants.h"

37#include "llvm/IR/Dominators.h"

38#include "llvm/IR/Function.h"

39#include "llvm/IR/IRBuilder.h"

40#include "llvm/IR/InstrTypes.h"

41#include "llvm/IR/Instruction.h"

42#include "llvm/IR/Instructions.h"

43#include "llvm/IR/IntrinsicInst.h"

44#include "llvm/IR/Module.h"

45#include "llvm/IR/PatternMatch.h"

46#include "llvm/IR/ProfDataUtils.h"

47#include "llvm/IR/Use.h"

48#include "llvm/IR/Value.h"

49#include "llvm/Support/Casting.h"

50#include "llvm/Support/CommandLine.h"

51#include "llvm/Support/Debug.h"

52#include "llvm/Support/ErrorHandling.h"

53#include "llvm/Support/GenericDomTree.h"

54#include "llvm/Support/InstructionCost.h"

55#include "llvm/Support/raw_ostream.h"

56#include "llvm/Transforms/Scalar/LoopPassManager.h"

57#include "llvm/Transforms/Utils/BasicBlockUtils.h"

58#include "llvm/Transforms/Utils/Cloning.h"

59#include "llvm/Transforms/Utils/Local.h"

60#include "llvm/Transforms/Utils/LoopUtils.h"

61#include "llvm/Transforms/Utils/ValueMapper.h"

62#include <algorithm>

63#include <cassert>

64#include <iterator>

65#include <numeric>

66#include <optional>

67#include <utility>

69#define DEBUG_TYPE "simple-loop-unswitch"

71using namespacellvm;

72using namespacellvm::PatternMatch;

74STATISTIC(NumBranches,"Number of branches unswitched");

75STATISTIC(NumSwitches,"Number of switches unswitched");

76STATISTIC(NumSelects,"Number of selects turned into branches for unswitching");

77STATISTIC(NumGuards,"Number of guards turned into branches for unswitching");

78STATISTIC(NumTrivial,"Number of unswitches that are trivial");

79STATISTIC(

80 NumCostMultiplierSkipped,

81"Number of unswitch candidates that had their cost multiplier skipped");

82STATISTIC(NumInvariantConditionsInjected,

83"Number of invariant conditions injected and unswitched");

85staticcl::opt<bool>EnableNonTrivialUnswitch(

86"enable-nontrivial-unswitch",cl::init(false),cl::Hidden,

87cl::desc("Forcibly enables non-trivial loop unswitching rather than "

88"following the configuration passed into the pass."));

90staticcl::opt<int>

91UnswitchThreshold("unswitch-threshold",cl::init(50),cl::Hidden,

92cl::desc("The cost threshold for unswitching a loop."));

94staticcl::opt<bool>EnableUnswitchCostMultiplier(

95"enable-unswitch-cost-multiplier",cl::init(true),cl::Hidden,

96cl::desc("Enable unswitch cost multiplier that prohibits exponential "

97"explosion in nontrivial unswitch."));

98staticcl::opt<int>UnswitchSiblingsToplevelDiv(

99"unswitch-siblings-toplevel-div",cl::init(2),cl::Hidden,

100cl::desc("Toplevel siblings divisor for cost multiplier."));

101staticcl::opt<int>UnswitchNumInitialUnscaledCandidates(

102"unswitch-num-initial-unscaled-candidates",cl::init(8),cl::Hidden,

103cl::desc("Number of unswitch candidates that are ignored when calculating "

104"cost multiplier."));

105staticcl::opt<bool>UnswitchGuards(

106"simple-loop-unswitch-guards",cl::init(true),cl::Hidden,

107cl::desc("If enabled, simple loop unswitching will also consider "

108"llvm.experimental.guard intrinsics as unswitch candidates."));

109staticcl::opt<bool>DropNonTrivialImplicitNullChecks(

110"simple-loop-unswitch-drop-non-trivial-implicit-null-checks",

111cl::init(false),cl::Hidden,

112cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "

113"null checks to save time analyzing if we can keep it."));

114staticcl::opt<unsigned>

115MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",

116cl::desc("Max number of memory uses to explore during "

117"partial unswitching analysis"),

118cl::init(100),cl::Hidden);

119staticcl::opt<bool>FreezeLoopUnswitchCond(

120"freeze-loop-unswitch-cond",cl::init(true),cl::Hidden,

121cl::desc("If enabled, the freeze instruction will be added to condition "

122"of loop unswitch to prevent miscompilation."));

123

124staticcl::opt<bool>InjectInvariantConditions(

125"simple-loop-unswitch-inject-invariant-conditions",cl::Hidden,

126cl::desc("Whether we should inject new invariants and unswitch them to "

127"eliminate some existing (non-invariant) conditions."),

128cl::init(true));

129

130staticcl::opt<unsigned>InjectInvariantConditionHotnesThreshold(

131"simple-loop-unswitch-inject-invariant-condition-hotness-threshold",

132cl::Hidden,cl::desc("Only try to inject loop invariant conditions and "

133"unswitch on them to eliminate branches that are "

134"not-taken 1/<this option> times or less."),

135cl::init(16));

136

137AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;

138namespace{

139structCompareDesc {

140BranchInst *Term;

141Value *Invariant;

142BasicBlock *InLoopSucc;

143

144 CompareDesc(BranchInst *Term,Value *Invariant,BasicBlock *InLoopSucc)

145 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}

146};

147

148structInjectedInvariant {

149ICmpInst::Predicate Pred;

150Value *LHS;

151Value *RHS;

152BasicBlock *InLoopSucc;

153

154 InjectedInvariant(ICmpInst::Predicate Pred,Value *LHS,Value *RHS,

155BasicBlock *InLoopSucc)

156 : Pred(Pred),LHS(LHS),RHS(RHS), InLoopSucc(InLoopSucc) {}

157};

158

159structNonTrivialUnswitchCandidate {

160Instruction *TI =nullptr;

161TinyPtrVector<Value *> Invariants;

162 std::optional<InstructionCost>Cost;

163 std::optional<InjectedInvariant> PendingInjection;

164 NonTrivialUnswitchCandidate(

165Instruction *TI,ArrayRef<Value *> Invariants,

166 std::optional<InstructionCost>Cost = std::nullopt,

167 std::optional<InjectedInvariant> PendingInjection = std::nullopt)

168 : TI(TI), Invariants(Invariants),Cost(Cost),

169 PendingInjection(PendingInjection) {};

170

171bool hasPendingInjection() const{return PendingInjection.has_value(); }

172};

173}// end anonymous namespace.

174

175// Helper to skip (select x, true, false), which matches both a logical AND and

176// OR and can confuse code that tries to determine if \p Cond is either a

177// logical AND or OR but not both.

178staticValue *skipTrivialSelect(Value *Cond) {

179Value *CondNext;

180while (match(Cond,m_Select(m_Value(CondNext),m_One(),m_Zero())))

181Cond = CondNext;

182returnCond;

183}

184

185/// Collect all of the loop invariant input values transitively used by the

186/// homogeneous instruction graph from a given root.

187///

188/// This essentially walks from a root recursively through loop variant operands

189/// which have perform the same logical operation (AND or OR) and finds all

190/// inputs which are loop invariant. For some operations these can be

191/// re-associated and unswitched out of the loop entirely.

192staticTinyPtrVector<Value *>

193collectHomogenousInstGraphLoopInvariants(constLoop &L,Instruction &Root,

194constLoopInfo &LI) {

195assert(!L.isLoopInvariant(&Root) &&

196"Only need to walk the graph if root itself is not invariant.");

197TinyPtrVector<Value *> Invariants;

198

199bool IsRootAnd =match(&Root,m_LogicalAnd());

200bool IsRootOr =match(&Root,m_LogicalOr());

201

202// Build a worklist and recurse through operators collecting invariants.

203SmallVector<Instruction *, 4> Worklist;

204SmallPtrSet<Instruction *, 8> Visited;

205 Worklist.push_back(&Root);

206 Visited.insert(&Root);

207do {

208Instruction &I = *Worklist.pop_back_val();

209for (Value *OpV :I.operand_values()) {

210// Skip constants as unswitching isn't interesting for them.

211if (isa<Constant>(OpV))

212continue;

213

214// Add it to our result if loop invariant.

215if (L.isLoopInvariant(OpV)) {

216 Invariants.push_back(OpV);

217continue;

218 }

219

220// If not an instruction with the same opcode, nothing we can do.

221Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));

222

223if (OpI && ((IsRootAnd &&match(OpI,m_LogicalAnd())) ||

224 (IsRootOr &&match(OpI,m_LogicalOr())))) {

225// Visit this operand.

226if (Visited.insert(OpI).second)

227 Worklist.push_back(OpI);

228 }

229 }

230 }while (!Worklist.empty());

231

232return Invariants;

233}

234

235staticvoidreplaceLoopInvariantUses(constLoop &L,Value *Invariant,

236Constant &Replacement) {

237assert(!isa<Constant>(Invariant) &&"Why are we unswitching on a constant?");

238

239// Replace uses of LIC in the loop with the given constant.

240// We use make_early_inc_range as set invalidates the iterator.

241for (Use &U :llvm::make_early_inc_range(Invariant->uses())) {

242Instruction *UserI = dyn_cast<Instruction>(U.getUser());

243

244// Replace this use within the loop body.

245if (UserI && L.contains(UserI))

246 U.set(&Replacement);

247 }

248}

249

250/// Check that all the LCSSA PHI nodes in the loop exit block have trivial

251/// incoming values along this edge.

252staticboolareLoopExitPHIsLoopInvariant(constLoop &L,

253constBasicBlock &ExitingBB,

254constBasicBlock &ExitBB) {

255for (constInstruction &I : ExitBB) {

256auto *PN = dyn_cast<PHINode>(&I);

257if (!PN)

258// No more PHIs to check.

259returntrue;

260

261// If the incoming value for this edge isn't loop invariant the unswitch

262// won't be trivial.

263if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))

264returnfalse;

265 }

266llvm_unreachable("Basic blocks should never be empty!");

267}

268

269/// Copy a set of loop invariant values \p ToDuplicate and insert them at the

270/// end of \p BB and conditionally branch on the copied condition. We only

271/// branch on a single value.

272staticvoidbuildPartialUnswitchConditionalBranch(

273BasicBlock &BB,ArrayRef<Value *> Invariants,boolDirection,

274BasicBlock &UnswitchedSucc,BasicBlock &NormalSucc,bool InsertFreeze,

275constInstruction *I,AssumptionCache *AC,constDominatorTree &DT) {

276IRBuilder<> IRB(&BB);

277

278SmallVector<Value *> FrozenInvariants;

279for (Value *Inv : Invariants) {

280if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC,I, &DT))

281 Inv = IRB.CreateFreeze(Inv, Inv->getName() +".fr");

282 FrozenInvariants.push_back(Inv);

283 }

284

285Value *Cond =Direction ? IRB.CreateOr(FrozenInvariants)

286 : IRB.CreateAnd(FrozenInvariants);

287 IRB.CreateCondBr(Cond,Direction ? &UnswitchedSucc : &NormalSucc,

288Direction ? &NormalSucc : &UnswitchedSucc);

289}

290

291/// Copy a set of loop invariant values, and conditionally branch on them.

292staticvoidbuildPartialInvariantUnswitchConditionalBranch(

293BasicBlock &BB,ArrayRef<Value *> ToDuplicate,boolDirection,

294BasicBlock &UnswitchedSucc,BasicBlock &NormalSucc,Loop &L,

295MemorySSAUpdater *MSSAU) {

296ValueToValueMapTy VMap;

297for (auto *Val :reverse(ToDuplicate)) {

298Instruction *Inst = cast<Instruction>(Val);

299Instruction *NewInst = Inst->clone();

300 NewInst->insertInto(&BB, BB.end());

301RemapInstruction(NewInst, VMap,

302RF_NoModuleLevelChanges |RF_IgnoreMissingLocals);

303 VMap[Val] = NewInst;

304

305if (!MSSAU)

306continue;

307

308MemorySSA *MSSA = MSSAU->getMemorySSA();

309if (auto *MemUse =

310 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {

311auto *DefiningAccess = MemUse->getDefiningAccess();

312// Get the first defining access before the loop.

313while (L.contains(DefiningAccess->getBlock())) {

314// If the defining access is a MemoryPhi, get the incoming

315// value for the pre-header as defining access.

316if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))

317 DefiningAccess =

318 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());

319else

320 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();

321 }

322 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,

323 NewInst->getParent(),

324MemorySSA::BeforeTerminator);

325 }

326 }

327

328IRBuilder<> IRB(&BB);

329Value *Cond = VMap[ToDuplicate[0]];

330 IRB.CreateCondBr(Cond,Direction ? &UnswitchedSucc : &NormalSucc,

331Direction ? &NormalSucc : &UnswitchedSucc);

332}

333

334/// Rewrite the PHI nodes in an unswitched loop exit basic block.

335///

336/// Requires that the loop exit and unswitched basic block are the same, and

337/// that the exiting block was a unique predecessor of that block. Rewrites the

338/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial

339/// PHI nodes from the old preheader that now contains the unswitched

340/// terminator.

341staticvoidrewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,

342BasicBlock &OldExitingBB,

343BasicBlock &OldPH) {

344for (PHINode &PN : UnswitchedBB.phis()) {

345// When the loop exit is directly unswitched we just need to update the

346// incoming basic block. We loop to handle weird cases with repeated

347// incoming blocks, but expect to typically only have one operand here.

348for (auto i : seq<int>(0, PN.getNumOperands())) {

349assert(PN.getIncomingBlock(i) == &OldExitingBB &&

350"Found incoming block different from unique predecessor!");

351 PN.setIncomingBlock(i, &OldPH);

352 }

353 }

354}

355

356/// Rewrite the PHI nodes in the loop exit basic block and the split off

357/// unswitched block.

358///

359/// Because the exit block remains an exit from the loop, this rewrites the

360/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI

361/// nodes into the unswitched basic block to select between the value in the

362/// old preheader and the loop exit.

363staticvoidrewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,

364BasicBlock &UnswitchedBB,

365BasicBlock &OldExitingBB,

366BasicBlock &OldPH,

367bool FullUnswitch) {

368assert(&ExitBB != &UnswitchedBB &&

369"Must have different loop exit and unswitched blocks!");

370BasicBlock::iterator InsertPt = UnswitchedBB.begin();

371for (PHINode &PN : ExitBB.phis()) {

372auto *NewPN =PHINode::Create(PN.getType(),/*NumReservedValues*/ 2,

373 PN.getName() +".split");

374 NewPN->insertBefore(InsertPt);

375

376// Walk backwards over the old PHI node's inputs to minimize the cost of

377// removing each one. We have to do this weird loop manually so that we

378// create the same number of new incoming edges in the new PHI as we expect

379// each case-based edge to be included in the unswitched switch in some

380// cases.

381// FIXME: This is really, really gross. It would be much cleaner if LLVM

382// allowed us to create a single entry for a predecessor block without

383// having separate entries for each "edge" even though these edges are

384// required to produce identical results.

385for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {

386if (PN.getIncomingBlock(i) != &OldExitingBB)

387continue;

388

389Value *Incoming = PN.getIncomingValue(i);

390if (FullUnswitch)

391// No more edge from the old exiting block to the exit block.

392 PN.removeIncomingValue(i);

393

394 NewPN->addIncoming(Incoming, &OldPH);

395 }

396

397// Now replace the old PHI with the new one and wire the old one in as an

398// input to the new one.

399 PN.replaceAllUsesWith(NewPN);

400 NewPN->addIncoming(&PN, &ExitBB);

401 }

402}

403

404/// Hoist the current loop up to the innermost loop containing a remaining exit.

405///

406/// Because we've removed an exit from the loop, we may have changed the set of

407/// loops reachable and need to move the current loop up the loop nest or even

408/// to an entirely separate nest.

409staticvoidhoistLoopToNewParent(Loop &L,BasicBlock &Preheader,

410DominatorTree &DT,LoopInfo &LI,

411MemorySSAUpdater *MSSAU,ScalarEvolution *SE) {

412// If the loop is already at the top level, we can't hoist it anywhere.

413Loop *OldParentL = L.getParentLoop();

414if (!OldParentL)

415return;

416

417SmallVector<BasicBlock *, 4> Exits;

418 L.getExitBlocks(Exits);

419Loop *NewParentL =nullptr;

420for (auto *ExitBB : Exits)

421if (Loop *ExitL = LI.getLoopFor(ExitBB))

422if (!NewParentL || NewParentL->contains(ExitL))

423 NewParentL = ExitL;

424

425if (NewParentL == OldParentL)

426return;

427

428// The new parent loop (if different) should always contain the old one.

429if (NewParentL)

430assert(NewParentL->contains(OldParentL) &&

431"Can only hoist this loop up the nest!");

432

433// The preheader will need to move with the body of this loop. However,

434// because it isn't in this loop we also need to update the primary loop map.

435assert(OldParentL == LI.getLoopFor(&Preheader) &&

436"Parent loop of this loop should contain this loop's preheader!");

437 LI.changeLoopFor(&Preheader, NewParentL);

438

439// Remove this loop from its old parent.

440 OldParentL->removeChildLoop(&L);

441

442// Add the loop either to the new parent or as a top-level loop.

443if (NewParentL)

444 NewParentL->addChildLoop(&L);

445else

446 LI.addTopLevelLoop(&L);

447

448// Remove this loops blocks from the old parent and every other loop up the

449// nest until reaching the new parent. Also update all of these

450// no-longer-containing loops to reflect the nesting change.

451for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;

452 OldContainingL = OldContainingL->getParentLoop()) {

453llvm::erase_if(OldContainingL->getBlocksVector(),

454 [&](constBasicBlock *BB) {

455 return BB == &Preheader || L.contains(BB);

456 });

457

458 OldContainingL->getBlocksSet().erase(&Preheader);

459for (BasicBlock *BB : L.blocks())

460 OldContainingL->getBlocksSet().erase(BB);

461

462// Because we just hoisted a loop out of this one, we have essentially

463// created new exit paths from it. That means we need to form LCSSA PHI

464// nodes for values used in the no-longer-nested loop.

465formLCSSA(*OldContainingL, DT, &LI, SE);

466

467// We shouldn't need to form dedicated exits because the exit introduced

468// here is the (just split by unswitching) preheader. However, after trivial

469// unswitching it is possible to get new non-dedicated exits out of parent

470// loop so let's conservatively form dedicated exit blocks and figure out

471// if we can optimize later.

472formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,

473/*PreserveLCSSA*/true);

474 }

475}

476

477// Return the top-most loop containing ExitBB and having ExitBB as exiting block

478// or the loop containing ExitBB, if there is no parent loop containing ExitBB

479// as exiting block.

480staticLoop *getTopMostExitingLoop(constBasicBlock *ExitBB,

481constLoopInfo &LI) {

482Loop *TopMost = LI.getLoopFor(ExitBB);

483Loop *Current = TopMost;

484while (Current) {

485if (Current->isLoopExiting(ExitBB))

486 TopMost = Current;

487 Current = Current->getParentLoop();

488 }

489return TopMost;

490}

491

492/// Unswitch a trivial branch if the condition is loop invariant.

493///

494/// This routine should only be called when loop code leading to the branch has

495/// been validated as trivial (no side effects). This routine checks if the

496/// condition is invariant and one of the successors is a loop exit. This

497/// allows us to unswitch without duplicating the loop, making it trivial.

498///

499/// If this routine fails to unswitch the branch it returns false.

500///

501/// If the branch can be unswitched, this routine splits the preheader and

502/// hoists the branch above that split. Preserves loop simplified form

503/// (splitting the exit block as necessary). It simplifies the branch within

504/// the loop to an unconditional branch but doesn't remove it entirely. Further

505/// cleanup can be done with some simplifycfg like pass.

506///

507/// If `SE` is not null, it will be updated based on the potential loop SCEVs

508/// invalidated by this.

509staticboolunswitchTrivialBranch(Loop &L,BranchInst &BI,DominatorTree &DT,

510LoopInfo &LI,ScalarEvolution *SE,

511MemorySSAUpdater *MSSAU) {

512assert(BI.isConditional() &&"Can only unswitch a conditional branch!");

513LLVM_DEBUG(dbgs() <<" Trying to unswitch branch: " << BI <<"\n");

514

515// The loop invariant values that we want to unswitch.

516TinyPtrVector<Value *> Invariants;

517

518// When true, we're fully unswitching the branch rather than just unswitching

519// some input conditions to the branch.

520bool FullUnswitch =false;

521

522Value *Cond =skipTrivialSelect(BI.getCondition());

523if (L.isLoopInvariant(Cond)) {

524 Invariants.push_back(Cond);

525 FullUnswitch =true;

526 }else {

527if (auto *CondInst = dyn_cast<Instruction>(Cond))

528 Invariants =collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);

529if (Invariants.empty()) {

530LLVM_DEBUG(dbgs() <<" Couldn't find invariant inputs!\n");

531returnfalse;

532 }

533 }

534

535// Check that one of the branch's successors exits, and which one.

536bool ExitDirection =true;

537int LoopExitSuccIdx = 0;

538auto *LoopExitBB = BI.getSuccessor(0);

539if (L.contains(LoopExitBB)) {

540 ExitDirection =false;

541 LoopExitSuccIdx = 1;

542 LoopExitBB = BI.getSuccessor(1);

543if (L.contains(LoopExitBB)) {

544LLVM_DEBUG(dbgs() <<" Branch doesn't exit the loop!\n");

545returnfalse;

546 }

547 }

548auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);

549auto *ParentBB = BI.getParent();

550if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {

551LLVM_DEBUG(dbgs() <<" Loop exit PHI's aren't loop-invariant!\n");

552returnfalse;

553 }

554

555// When unswitching only part of the branch's condition, we need the exit

556// block to be reached directly from the partially unswitched input. This can

557// be done when the exit block is along the true edge and the branch condition

558// is a graph of `or` operations, or the exit block is along the false edge

559// and the condition is a graph of `and` operations.

560if (!FullUnswitch) {

561if (ExitDirection ? !match(Cond,m_LogicalOr())

562 : !match(Cond,m_LogicalAnd())) {

563LLVM_DEBUG(dbgs() <<" Branch condition is in improper form for "

564"non-full unswitch!\n");

565returnfalse;

566 }

567 }

568

569LLVM_DEBUG({

570dbgs() <<" unswitching trivial invariant conditions for: " << BI

571 <<"\n";

572for (Value *Invariant : Invariants) {

573dbgs() <<" " << *Invariant <<" == true";

574if (Invariant != Invariants.back())

575dbgs() <<" ||";

576dbgs() <<"\n";

577 }

578 });

579

580// If we have scalar evolutions, we need to invalidate them including this

581// loop, the loop containing the exit block and the topmost parent loop

582// exiting via LoopExitBB.

583if (SE) {

584if (constLoop *ExitL =getTopMostExitingLoop(LoopExitBB, LI))

585 SE->forgetLoop(ExitL);

586else

587// Forget the entire nest as this exits the entire nest.

588 SE->forgetTopmostLoop(&L);

589 SE->forgetBlockAndLoopDispositions();

590 }

591

592if (MSSAU &&VerifyMemorySSA)

593 MSSAU->getMemorySSA()->verifyMemorySSA();

594

595// Split the preheader, so that we know that there is a safe place to insert

596// the conditional branch. We will change the preheader to have a conditional

597// branch on LoopCond.

598BasicBlock *OldPH = L.getLoopPreheader();

599BasicBlock *NewPH =SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);

600

601// Now that we have a place to insert the conditional branch, create a place

602// to branch to: this is the exit block out of the loop that we are

603// unswitching. We need to split this if there are other loop predecessors.

604// Because the loop is in simplified form, *any* other predecessor is enough.

605BasicBlock *UnswitchedBB;

606if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {

607assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&

608"A branch's parent isn't a predecessor!");

609 UnswitchedBB = LoopExitBB;

610 }else {

611 UnswitchedBB =

612SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU,"",false);

613 }

614

615if (MSSAU &&VerifyMemorySSA)

616 MSSAU->getMemorySSA()->verifyMemorySSA();

617

618// Actually move the invariant uses into the unswitched position. If possible,

619// we do this by moving the instructions, but when doing partial unswitching

620// we do it by building a new merge of the values in the unswitched position.

621 OldPH->getTerminator()->eraseFromParent();

622if (FullUnswitch) {

623// If fully unswitching, we can use the existing branch instruction.

624// Splice it into the old PH to gate reaching the new preheader and re-point

625// its successors.

626 BI.moveBefore(*OldPH, OldPH->end());

627 BI.setCondition(Cond);

628if (MSSAU) {

629// Temporarily clone the terminator, to make MSSA update cheaper by

630// separating "insert edge" updates from "remove edge" ones.

631 BI.clone()->insertInto(ParentBB, ParentBB->end());

632 }else {

633// Create a new unconditional branch that will continue the loop as a new

634// terminator.

635Instruction *NewBI =BranchInst::Create(ContinueBB, ParentBB);

636 NewBI->setDebugLoc(BI.getDebugLoc());

637 }

638 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);

639 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);

640 }else {

641// Only unswitching a subset of inputs to the condition, so we will need to

642// build a new branch that merges the invariant inputs.

643if (ExitDirection)

644assert(match(skipTrivialSelect(BI.getCondition()),m_LogicalOr()) &&

645"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "

646"condition!");

647else

648assert(match(skipTrivialSelect(BI.getCondition()),m_LogicalAnd()) &&

649"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"

650" condition!");

651buildPartialUnswitchConditionalBranch(

652 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,

653FreezeLoopUnswitchCond, OldPH->getTerminator(),nullptr, DT);

654 }

655

656// Update the dominator tree with the added edge.

657 DT.insertEdge(OldPH, UnswitchedBB);

658

659// After the dominator tree was updated with the added edge, update MemorySSA

660// if available.

661if (MSSAU) {

662SmallVector<CFGUpdate, 1> Updates;

663 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});

664 MSSAU->applyInsertUpdates(Updates, DT);

665 }

666

667// Finish updating dominator tree and memory ssa for full unswitch.

668if (FullUnswitch) {

669if (MSSAU) {

670Instruction *Term = ParentBB->getTerminator();

671// Remove the cloned branch instruction and create unconditional branch

672// now.

673Instruction *NewBI =BranchInst::Create(ContinueBB, ParentBB);

674 NewBI->setDebugLoc(Term->getDebugLoc());

675 Term->eraseFromParent();

676 MSSAU->removeEdge(ParentBB, LoopExitBB);

677 }

678 DT.deleteEdge(ParentBB, LoopExitBB);

679 }

680

681if (MSSAU &&VerifyMemorySSA)

682 MSSAU->getMemorySSA()->verifyMemorySSA();

683

684// Rewrite the relevant PHI nodes.

685if (UnswitchedBB == LoopExitBB)

686rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);

687else

688rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,

689 *ParentBB, *OldPH, FullUnswitch);

690

691// The constant we can replace all of our invariants with inside the loop

692// body. If any of the invariants have a value other than this the loop won't

693// be entered.

694ConstantInt *Replacement = ExitDirection

695 ?ConstantInt::getFalse(BI.getContext())

696 :ConstantInt::getTrue(BI.getContext());

697

698// Since this is an i1 condition we can also trivially replace uses of it

699// within the loop with a constant.

700for (Value *Invariant : Invariants)

701replaceLoopInvariantUses(L, Invariant, *Replacement);

702

703// If this was full unswitching, we may have changed the nesting relationship

704// for this loop so hoist it to its correct parent if needed.

705if (FullUnswitch)

706hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);

707

708if (MSSAU &&VerifyMemorySSA)

709 MSSAU->getMemorySSA()->verifyMemorySSA();

710

711LLVM_DEBUG(dbgs() <<" done: unswitching trivial branch...\n");

712 ++NumTrivial;

713 ++NumBranches;

714returntrue;

715}

716

717/// Unswitch a trivial switch if the condition is loop invariant.

718///

719/// This routine should only be called when loop code leading to the switch has

720/// been validated as trivial (no side effects). This routine checks if the

721/// condition is invariant and that at least one of the successors is a loop

722/// exit. This allows us to unswitch without duplicating the loop, making it

723/// trivial.

724///

725/// If this routine fails to unswitch the switch it returns false.

726///

727/// If the switch can be unswitched, this routine splits the preheader and

728/// copies the switch above that split. If the default case is one of the

729/// exiting cases, it copies the non-exiting cases and points them at the new

730/// preheader. If the default case is not exiting, it copies the exiting cases

731/// and points the default at the preheader. It preserves loop simplified form

732/// (splitting the exit blocks as necessary). It simplifies the switch within

733/// the loop by removing now-dead cases. If the default case is one of those

734/// unswitched, it replaces its destination with a new basic block containing

735/// only unreachable. Such basic blocks, while technically loop exits, are not

736/// considered for unswitching so this is a stable transform and the same

737/// switch will not be revisited. If after unswitching there is only a single

738/// in-loop successor, the switch is further simplified to an unconditional

739/// branch. Still more cleanup can be done with some simplifycfg like pass.

740///

741/// If `SE` is not null, it will be updated based on the potential loop SCEVs

742/// invalidated by this.

743staticboolunswitchTrivialSwitch(Loop &L,SwitchInst &SI,DominatorTree &DT,

744LoopInfo &LI,ScalarEvolution *SE,

745MemorySSAUpdater *MSSAU) {

746LLVM_DEBUG(dbgs() <<" Trying to unswitch switch: " << SI <<"\n");

747Value *LoopCond = SI.getCondition();

748

749// If this isn't switching on an invariant condition, we can't unswitch it.

750if (!L.isLoopInvariant(LoopCond))

751returnfalse;

752

753auto *ParentBB = SI.getParent();

754

755// The same check must be used both for the default and the exit cases. We

756// should never leave edges from the switch instruction to a basic block that

757// we are unswitching, hence the condition used to determine the default case

758// needs to also be used to populate ExitCaseIndices, which is then used to

759// remove cases from the switch.

760auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {

761// BBToCheck is not an exit block if it is inside loop L.

762if (L.contains(&BBToCheck))

763returnfalse;

764// BBToCheck is not trivial to unswitch if its phis aren't loop invariant.

765if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))

766returnfalse;

767// We do not unswitch a block that only has an unreachable statement, as

768// it's possible this is a previously unswitched block. Only unswitch if

769// either the terminator is not unreachable, or, if it is, it's not the only

770// instruction in the block.

771auto *TI = BBToCheck.getTerminator();

772bool isUnreachable = isa<UnreachableInst>(TI);

773return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;

774 };

775

776SmallVector<int, 4> ExitCaseIndices;

777for (auto Case : SI.cases())

778if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))

779 ExitCaseIndices.push_back(Case.getCaseIndex());

780BasicBlock *DefaultExitBB =nullptr;

781SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =

782SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);

783if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {

784 DefaultExitBB = SI.getDefaultDest();

785 }elseif (ExitCaseIndices.empty())

786returnfalse;

787

788LLVM_DEBUG(dbgs() <<" unswitching trivial switch...\n");

789

790if (MSSAU &&VerifyMemorySSA)

791 MSSAU->getMemorySSA()->verifyMemorySSA();

792

793// We may need to invalidate SCEVs for the outermost loop reached by any of

794// the exits.

795Loop *OuterL = &L;

796

797if (DefaultExitBB) {

798// Check the loop containing this exit.

799Loop *ExitL =getTopMostExitingLoop(DefaultExitBB, LI);

800if (!ExitL || ExitL->contains(OuterL))

801 OuterL = ExitL;

802 }

803for (unsigned Index : ExitCaseIndices) {

804auto CaseI = SI.case_begin() + Index;

805// Compute the outer loop from this exit.

806Loop *ExitL =getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);

807if (!ExitL || ExitL->contains(OuterL))

808 OuterL = ExitL;

809 }

810

811if (SE) {

812if (OuterL)

813 SE->forgetLoop(OuterL);

814else

815 SE->forgetTopmostLoop(&L);

816 }

817

818if (DefaultExitBB) {

819// Clear out the default destination temporarily to allow accurate

820// predecessor lists to be examined below.

821 SI.setDefaultDest(nullptr);

822 }

823

824// Store the exit cases into a separate data structure and remove them from

825// the switch.

826SmallVector<std::tuple<ConstantInt *,BasicBlock *,

827SwitchInstProfUpdateWrapper::CaseWeightOpt>,

828 4> ExitCases;

829 ExitCases.reserve(ExitCaseIndices.size());

830SwitchInstProfUpdateWrapper SIW(SI);

831// We walk the case indices backwards so that we remove the last case first

832// and don't disrupt the earlier indices.

833for (unsigned Index :reverse(ExitCaseIndices)) {

834auto CaseI = SI.case_begin() + Index;

835// Save the value of this case.

836auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());

837 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);

838// Delete the unswitched cases.

839 SIW.removeCase(CaseI);

840 }

841

842// Check if after this all of the remaining cases point at the same

843// successor.

844BasicBlock *CommonSuccBB =nullptr;

845if (SI.getNumCases() > 0 &&

846all_of(drop_begin(SI.cases()), [&SI](constSwitchInst::CaseHandle &Case) {

847 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();

848 }))

849 CommonSuccBB = SI.case_begin()->getCaseSuccessor();

850if (!DefaultExitBB) {

851// If we're not unswitching the default, we need it to match any cases to

852// have a common successor or if we have no cases it is the common

853// successor.

854if (SI.getNumCases() == 0)

855 CommonSuccBB = SI.getDefaultDest();

856elseif (SI.getDefaultDest() != CommonSuccBB)

857 CommonSuccBB =nullptr;

858 }

859

860// Split the preheader, so that we know that there is a safe place to insert

861// the switch.

862BasicBlock *OldPH = L.getLoopPreheader();

863BasicBlock *NewPH =SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);

864 OldPH->getTerminator()->eraseFromParent();

865

866// Now add the unswitched switch. This new switch instruction inherits the

867// debug location of the old switch, because it semantically replace the old

868// one.

869auto *NewSI =SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);

870 NewSI->setDebugLoc(SIW->getDebugLoc());

871SwitchInstProfUpdateWrapper NewSIW(*NewSI);

872

873// Rewrite the IR for the unswitched basic blocks. This requires two steps.

874// First, we split any exit blocks with remaining in-loop predecessors. Then

875// we update the PHIs in one of two ways depending on if there was a split.

876// We walk in reverse so that we split in the same order as the cases

877// appeared. This is purely for convenience of reading the resulting IR, but

878// it doesn't cost anything really.

879SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;

880SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;

881// Handle the default exit if necessary.

882// FIXME: It'd be great if we could merge this with the loop below but LLVM's

883// ranges aren't quite powerful enough yet.

884if (DefaultExitBB) {

885if (pred_empty(DefaultExitBB)) {

886 UnswitchedExitBBs.insert(DefaultExitBB);

887rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);

888 }else {

889auto *SplitBB =

890SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);

891rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,

892 *ParentBB, *OldPH,

893/*FullUnswitch*/true);

894 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;

895 }

896 }

897// Note that we must use a reference in the for loop so that we update the

898// container.

899for (auto &ExitCase :reverse(ExitCases)) {

900// Grab a reference to the exit block in the pair so that we can update it.

901BasicBlock *ExitBB = std::get<1>(ExitCase);

902

903// If this case is the last edge into the exit block, we can simply reuse it

904// as it will no longer be a loop exit. No mapping necessary.

905if (pred_empty(ExitBB)) {

906// Only rewrite once.

907if (UnswitchedExitBBs.insert(ExitBB).second)

908rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);

909continue;

910 }

911

912// Otherwise we need to split the exit block so that we retain an exit

913// block from the loop and a target for the unswitched condition.

914BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];

915if (!SplitExitBB) {

916// If this is the first time we see this, do the split and remember it.

917 SplitExitBB =SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);

918rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,

919 *ParentBB, *OldPH,

920/*FullUnswitch*/true);

921 }

922// Update the case pair to point to the split block.

923 std::get<1>(ExitCase) = SplitExitBB;

924 }

925

926// Now add the unswitched cases. We do this in reverse order as we built them

927// in reverse order.

928for (auto &ExitCase :reverse(ExitCases)) {

929ConstantInt *CaseVal = std::get<0>(ExitCase);

930BasicBlock *UnswitchedBB = std::get<1>(ExitCase);

931

932 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));

933 }

934

935// If the default was unswitched, re-point it and add explicit cases for

936// entering the loop.

937if (DefaultExitBB) {

938 NewSIW->setDefaultDest(DefaultExitBB);

939 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);

940

941// We removed all the exit cases, so we just copy the cases to the

942// unswitched switch.

943for (constauto &Case : SI.cases())

944 NewSIW.addCase(Case.getCaseValue(), NewPH,

945 SIW.getSuccessorWeight(Case.getSuccessorIndex()));

946 }elseif (DefaultCaseWeight) {

947// We have to set branch weight of the default case.

948uint64_t SW = *DefaultCaseWeight;

949for (constauto &Case : SI.cases()) {

950auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());

951assert(W &&

952"case weight must be defined as default case weight is defined");

953 SW += *W;

954 }

955 NewSIW.setSuccessorWeight(0, SW);

956 }

957

958// If we ended up with a common successor for every path through the switch

959// after unswitching, rewrite it to an unconditional branch to make it easy

960// to recognize. Otherwise we potentially have to recognize the default case

961// pointing at unreachable and other complexity.

962if (CommonSuccBB) {

963BasicBlock *BB = SI.getParent();

964// We may have had multiple edges to this common successor block, so remove

965// them as predecessors. We skip the first one, either the default or the

966// actual first case.

967bool SkippedFirst = DefaultExitBB ==nullptr;

968for (auto Case : SI.cases()) {

969assert(Case.getCaseSuccessor() == CommonSuccBB &&

970"Non-common successor!");

971 (void)Case;

972if (!SkippedFirst) {

973 SkippedFirst =true;

974continue;

975 }

976 CommonSuccBB->removePredecessor(BB,

977/*KeepOneInputPHIs*/true);

978 }

979// Now nuke the switch and replace it with a direct branch.

980Instruction *NewBI =BranchInst::Create(CommonSuccBB, BB);

981 NewBI->setDebugLoc(SIW->getDebugLoc());

982 SIW.eraseFromParent();

983 }elseif (DefaultExitBB) {

984assert(SI.getNumCases() > 0 &&

985"If we had no cases we'd have a common successor!");

986// Move the last case to the default successor. This is valid as if the

987// default got unswitched it cannot be reached. This has the advantage of

988// being simple and keeping the number of edges from this switch to

989// successors the same, and avoiding any PHI update complexity.

990auto LastCaseI = std::prev(SI.case_end());

991

992 SI.setDefaultDest(LastCaseI->getCaseSuccessor());

993 SIW.setSuccessorWeight(

994 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));

995 SIW.removeCase(LastCaseI);

996 }

997

998// Walk the unswitched exit blocks and the unswitched split blocks and update

999// the dominator tree based on the CFG edits. While we are walking unordered

1000// containers here, the API for applyUpdates takes an unordered list of

1001// updates and requires them to not contain duplicates.

1002SmallVector<DominatorTree::UpdateType, 4> DTUpdates;

1003for (auto *UnswitchedExitBB : UnswitchedExitBBs) {

1004 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});

1005 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});

1006 }

1007for (auto SplitUnswitchedPair : SplitExitBBMap) {

1008 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});

1009 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});

1010 }

1011

1012if (MSSAU) {

1013 MSSAU->applyUpdates(DTUpdates, DT,/*UpdateDT=*/true);

1014if (VerifyMemorySSA)

1015 MSSAU->getMemorySSA()->verifyMemorySSA();

1016 }else {

1017 DT.applyUpdates(DTUpdates);

1018 }

1019

1020assert(DT.verify(DominatorTree::VerificationLevel::Fast));

1021

1022// We may have changed the nesting relationship for this loop so hoist it to

1023// its correct parent if needed.

1024hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);

1025

1026if (MSSAU &&VerifyMemorySSA)

1027 MSSAU->getMemorySSA()->verifyMemorySSA();

1028

1029 ++NumTrivial;

1030 ++NumSwitches;

1031LLVM_DEBUG(dbgs() <<" done: unswitching trivial switch...\n");

1032returntrue;

1033}

1034

1035/// This routine scans the loop to find a branch or switch which occurs before

1036/// any side effects occur. These can potentially be unswitched without

1037/// duplicating the loop. If a branch or switch is successfully unswitched the

1038/// scanning continues to see if subsequent branches or switches have become

1039/// trivial. Once all trivial candidates have been unswitched, this routine

1040/// returns.

1041///

1042/// The return value indicates whether anything was unswitched (and therefore

1043/// changed).

1044///

1045/// If `SE` is not null, it will be updated based on the potential loop SCEVs

1046/// invalidated by this.

1047staticboolunswitchAllTrivialConditions(Loop &L,DominatorTree &DT,

1048LoopInfo &LI,ScalarEvolution *SE,

1049MemorySSAUpdater *MSSAU) {

1050bool Changed =false;

1051

1052// If loop header has only one reachable successor we should keep looking for

1053// trivial condition candidates in the successor as well. An alternative is

1054// to constant fold conditions and merge successors into loop header (then we

1055// only need to check header's terminator). The reason for not doing this in

1056// LoopUnswitch pass is that it could potentially break LoopPassManager's

1057// invariants. Folding dead branches could either eliminate the current loop

1058// or make other loops unreachable. LCSSA form might also not be preserved

1059// after deleting branches. The following code keeps traversing loop header's

1060// successors until it finds the trivial condition candidate (condition that

1061// is not a constant). Since unswitching generates branches with constant

1062// conditions, this scenario could be very common in practice.

1063BasicBlock *CurrentBB = L.getHeader();

1064SmallPtrSet<BasicBlock *, 8> Visited;

1065 Visited.insert(CurrentBB);

1066do {

1067// Check if there are any side-effecting instructions (e.g. stores, calls,

1068// volatile loads) in the part of the loop that the code *would* execute

1069// without unswitching.

1070if (MSSAU)// Possible early exit with MSSA

1071if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))

1072if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))

1073return Changed;

1074if (llvm::any_of(*CurrentBB,

1075 [](Instruction &I) {returnI.mayHaveSideEffects(); }))

1076return Changed;

1077

1078Instruction *CurrentTerm = CurrentBB->getTerminator();

1079

1080if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {

1081// Don't bother trying to unswitch past a switch with a constant

1082// condition. This should be removed prior to running this pass by

1083// simplifycfg.

1084if (isa<Constant>(SI->getCondition()))

1085return Changed;

1086

1087if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))

1088// Couldn't unswitch this one so we're done.

1089return Changed;

1090

1091// Mark that we managed to unswitch something.

1092 Changed =true;

1093

1094// If unswitching turned the terminator into an unconditional branch then

1095// we can continue. The unswitching logic specifically works to fold any

1096// cases it can into an unconditional branch to make it easier to

1097// recognize here.

1098auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());

1099if (!BI || BI->isConditional())

1100return Changed;

1101

1102 CurrentBB = BI->getSuccessor(0);

1103continue;

1104 }

1105

1106auto *BI = dyn_cast<BranchInst>(CurrentTerm);

1107if (!BI)

1108// We do not understand other terminator instructions.

1109return Changed;

1110

1111// Don't bother trying to unswitch past an unconditional branch or a branch

1112// with a constant value. These should be removed by simplifycfg prior to

1113// running this pass.

1114if (!BI->isConditional() ||

1115 isa<Constant>(skipTrivialSelect(BI->getCondition())))

1116return Changed;

1117

1118// Found a trivial condition candidate: non-foldable conditional branch. If

1119// we fail to unswitch this, we can't do anything else that is trivial.

1120if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))

1121return Changed;

1122

1123// Mark that we managed to unswitch something.

1124 Changed =true;

1125

1126// If we only unswitched some of the conditions feeding the branch, we won't

1127// have collapsed it to a single successor.

1128 BI = cast<BranchInst>(CurrentBB->getTerminator());

1129if (BI->isConditional())

1130return Changed;

1131

1132// Follow the newly unconditional branch into its successor.

1133 CurrentBB = BI->getSuccessor(0);

1134

1135// When continuing, if we exit the loop or reach a previous visited block,

1136// then we can not reach any trivial condition candidates (unfoldable

1137// branch instructions or switch instructions) and no unswitch can happen.

1138 }while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);

1139

1140return Changed;

1141}

1142

1143/// Build the cloned blocks for an unswitched copy of the given loop.

1144///

1145/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and

1146/// after the split block (`SplitBB`) that will be used to select between the

1147/// cloned and original loop.

1148///

1149/// This routine handles cloning all of the necessary loop blocks and exit

1150/// blocks including rewriting their instructions and the relevant PHI nodes.

1151/// Any loop blocks or exit blocks which are dominated by a different successor

1152/// than the one for this clone of the loop blocks can be trivially skipped. We

1153/// use the `DominatingSucc` map to determine whether a block satisfies that

1154/// property with a simple map lookup.

1155///

1156/// It also correctly creates the unconditional branch in the cloned

1157/// unswitched parent block to only point at the unswitched successor.

1158///

1159/// This does not handle most of the necessary updates to `LoopInfo`. Only exit

1160/// block splitting is correctly reflected in `LoopInfo`, essentially all of

1161/// the cloned blocks (and their loops) are left without full `LoopInfo`

1162/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned

1163/// blocks to them but doesn't create the cloned `DominatorTree` structure and

1164/// instead the caller must recompute an accurate DT. It *does* correctly

1165/// update the `AssumptionCache` provided in `AC`.

1166staticBasicBlock *buildClonedLoopBlocks(

1167Loop &L,BasicBlock *LoopPH,BasicBlock *SplitBB,

1168ArrayRef<BasicBlock *> ExitBlocks,BasicBlock *ParentBB,

1169BasicBlock *UnswitchedSuccBB,BasicBlock *ContinueSuccBB,

1170constSmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,

1171ValueToValueMapTy &VMap,

1172SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates,AssumptionCache &AC,

1173DominatorTree &DT,LoopInfo &LI,MemorySSAUpdater *MSSAU,

1174ScalarEvolution *SE) {

1175SmallVector<BasicBlock *, 4> NewBlocks;

1176 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());

1177

1178// We will need to clone a bunch of blocks, wrap up the clone operation in

1179// a helper.

1180auto CloneBlock = [&](BasicBlock *OldBB) {

1181// Clone the basic block and insert it before the new preheader.

1182BasicBlock *NewBB =CloneBasicBlock(OldBB, VMap,".us", OldBB->getParent());

1183 NewBB->moveBefore(LoopPH);

1184

1185// Record this block and the mapping.

1186 NewBlocks.push_back(NewBB);

1187 VMap[OldBB] = NewBB;

1188

1189return NewBB;

1190 };

1191

1192// We skip cloning blocks when they have a dominating succ that is not the

1193// succ we are cloning for.

1194auto SkipBlock = [&](BasicBlock *BB) {

1195auto It = DominatingSucc.find(BB);

1196return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;

1197 };

1198

1199// First, clone the preheader.

1200auto *ClonedPH = CloneBlock(LoopPH);

1201

1202// Then clone all the loop blocks, skipping the ones that aren't necessary.

1203for (auto *LoopBB : L.blocks())

1204if (!SkipBlock(LoopBB))

1205 CloneBlock(LoopBB);

1206

1207// Split all the loop exit edges so that when we clone the exit blocks, if

1208// any of the exit blocks are *also* a preheader for some other loop, we

1209// don't create multiple predecessors entering the loop header.

1210for (auto *ExitBB : ExitBlocks) {

1211if (SkipBlock(ExitBB))

1212continue;

1213

1214// When we are going to clone an exit, we don't need to clone all the

1215// instructions in the exit block and we want to ensure we have an easy

1216// place to merge the CFG, so split the exit first. This is always safe to

1217// do because there cannot be any non-loop predecessors of a loop exit in

1218// loop simplified form.

1219auto *MergeBB =SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);

1220

1221// Rearrange the names to make it easier to write test cases by having the

1222// exit block carry the suffix rather than the merge block carrying the

1223// suffix.

1224 MergeBB->takeName(ExitBB);

1225 ExitBB->setName(Twine(MergeBB->getName()) +".split");

1226

1227// Now clone the original exit block.

1228auto *ClonedExitBB = CloneBlock(ExitBB);

1229assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&

1230"Exit block should have been split to have one successor!");

1231assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&

1232"Cloned exit block has the wrong successor!");

1233

1234// Remap any cloned instructions and create a merge phi node for them.

1235for (auto ZippedInsts :llvm::zip_first(

1236llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),

1237llvm::make_range(ClonedExitBB->begin(),

1238 std::prev(ClonedExitBB->end())))) {

1239Instruction &I = std::get<0>(ZippedInsts);

1240Instruction &ClonedI = std::get<1>(ZippedInsts);

1241

1242// The only instructions in the exit block should be PHI nodes and

1243// potentially a landing pad.

1244assert(

1245 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&

1246"Bad instruction in exit block!");

1247// We should have a value map between the instruction and its clone.

1248assert(VMap.lookup(&I) == &ClonedI &&"Mismatch in the value map!");

1249

1250// Forget SCEVs based on exit phis in case SCEV looked through the phi.

1251if (SE)

1252if (auto *PN = dyn_cast<PHINode>(&I))

1253 SE->forgetLcssaPhiWithNewPredecessor(&L, PN);

1254

1255BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();

1256

1257auto *MergePN =

1258PHINode::Create(I.getType(),/*NumReservedValues*/ 2,".us-phi");

1259 MergePN->insertBefore(InsertPt);

1260 MergePN->setDebugLoc(InsertPt->getDebugLoc());

1261I.replaceAllUsesWith(MergePN);

1262 MergePN->addIncoming(&I, ExitBB);

1263 MergePN->addIncoming(&ClonedI, ClonedExitBB);

1264 }

1265 }

1266

1267// Rewrite the instructions in the cloned blocks to refer to the instructions

1268// in the cloned blocks. We have to do this as a second pass so that we have

1269// everything available. Also, we have inserted new instructions which may

1270// include assume intrinsics, so we update the assumption cache while

1271// processing this.

1272Module *M = ClonedPH->getParent()->getParent();

1273for (auto *ClonedBB : NewBlocks)

1274for (Instruction &I : *ClonedBB) {

1275RemapDbgRecordRange(M,I.getDbgRecordRange(), VMap,

1276RF_NoModuleLevelChanges |RF_IgnoreMissingLocals);

1277RemapInstruction(&I, VMap,

1278RF_NoModuleLevelChanges |RF_IgnoreMissingLocals);

1279if (auto *II = dyn_cast<AssumeInst>(&I))

1280 AC.registerAssumption(II);

1281 }

1282

1283// Update any PHI nodes in the cloned successors of the skipped blocks to not

1284// have spurious incoming values.

1285for (auto *LoopBB : L.blocks())

1286if (SkipBlock(LoopBB))

1287for (auto *SuccBB :successors(LoopBB))

1288if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))

1289for (PHINode &PN : ClonedSuccBB->phis())

1290 PN.removeIncomingValue(LoopBB,/*DeletePHIIfEmpty*/false);

1291

1292// Remove the cloned parent as a predecessor of any successor we ended up

1293// cloning other than the unswitched one.

1294auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));

1295for (auto *SuccBB :successors(ParentBB)) {

1296if (SuccBB == UnswitchedSuccBB)

1297continue;

1298

1299auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));

1300if (!ClonedSuccBB)

1301continue;

1302

1303 ClonedSuccBB->removePredecessor(ClonedParentBB,

1304/*KeepOneInputPHIs*/true);

1305 }

1306

1307// Replace the cloned branch with an unconditional branch to the cloned

1308// unswitched successor.

1309auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));

1310Instruction *ClonedTerminator = ClonedParentBB->getTerminator();

1311// Trivial Simplification. If Terminator is a conditional branch and

1312// condition becomes dead - erase it.

1313Value *ClonedConditionToErase =nullptr;

1314if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))

1315 ClonedConditionToErase = BI->getCondition();

1316elseif (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))

1317 ClonedConditionToErase = SI->getCondition();

1318

1319Instruction *BI =BranchInst::Create(ClonedSuccBB, ClonedParentBB);

1320 BI->setDebugLoc(ClonedTerminator->getDebugLoc());

1321 ClonedTerminator->eraseFromParent();

1322

1323if (ClonedConditionToErase)

1324RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase,nullptr,

1325 MSSAU);

1326

1327// If there are duplicate entries in the PHI nodes because of multiple edges

1328// to the unswitched successor, we need to nuke all but one as we replaced it

1329// with a direct branch.

1330for (PHINode &PN : ClonedSuccBB->phis()) {

1331bool Found =false;

1332// Loop over the incoming operands backwards so we can easily delete as we

1333// go without invalidating the index.

1334for (int i = PN.getNumOperands() - 1; i >= 0; --i) {

1335if (PN.getIncomingBlock(i) != ClonedParentBB)

1336continue;

1337if (!Found) {

1338 Found =true;

1339continue;

1340 }

1341 PN.removeIncomingValue(i,/*DeletePHIIfEmpty*/false);

1342 }

1343 }

1344

1345// Record the domtree updates for the new blocks.

1346SmallPtrSet<BasicBlock *, 4> SuccSet;

1347for (auto *ClonedBB : NewBlocks) {

1348for (auto *SuccBB :successors(ClonedBB))

1349if (SuccSet.insert(SuccBB).second)

1350 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});

1351 SuccSet.clear();

1352 }

1353

1354return ClonedPH;

1355}

1356

1357/// Recursively clone the specified loop and all of its children.

1358///

1359/// The target parent loop for the clone should be provided, or can be null if

1360/// the clone is a top-level loop. While cloning, all the blocks are mapped

1361/// with the provided value map. The entire original loop must be present in

1362/// the value map. The cloned loop is returned.

1363staticLoop *cloneLoopNest(Loop &OrigRootL,Loop *RootParentL,

1364constValueToValueMapTy &VMap,LoopInfo &LI) {

1365auto AddClonedBlocksToLoop = [&](Loop &OrigL,Loop &ClonedL) {

1366assert(ClonedL.getBlocks().empty() &&"Must start with an empty loop!");

1367 ClonedL.reserveBlocks(OrigL.getNumBlocks());

1368for (auto *BB : OrigL.blocks()) {

1369auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));

1370 ClonedL.addBlockEntry(ClonedBB);

1371if (LI.getLoopFor(BB) == &OrigL)

1372 LI.changeLoopFor(ClonedBB, &ClonedL);

1373 }

1374 };

1375

1376// We specially handle the first loop because it may get cloned into

1377// a different parent and because we most commonly are cloning leaf loops.

1378Loop *ClonedRootL = LI.AllocateLoop();

1379if (RootParentL)

1380 RootParentL->addChildLoop(ClonedRootL);

1381else

1382 LI.addTopLevelLoop(ClonedRootL);

1383 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);

1384

1385if (OrigRootL.isInnermost())

1386return ClonedRootL;

1387

1388// If we have a nest, we can quickly clone the entire loop nest using an

1389// iterative approach because it is a tree. We keep the cloned parent in the

1390// data structure to avoid repeatedly querying through a map to find it.

1391SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;

1392// Build up the loops to clone in reverse order as we'll clone them from the

1393// back.

1394for (Loop *ChildL :llvm::reverse(OrigRootL))

1395 LoopsToClone.push_back({ClonedRootL, ChildL});

1396do {

1397Loop *ClonedParentL, *L;

1398 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();

1399Loop *ClonedL = LI.AllocateLoop();

1400 ClonedParentL->addChildLoop(ClonedL);

1401 AddClonedBlocksToLoop(*L, *ClonedL);

1402for (Loop *ChildL :llvm::reverse(*L))

1403 LoopsToClone.push_back({ClonedL, ChildL});

1404 }while (!LoopsToClone.empty());

1405

1406return ClonedRootL;

1407}

1408

1409/// Build the cloned loops of an original loop from unswitching.

1410///

1411/// Because unswitching simplifies the CFG of the loop, this isn't a trivial

1412/// operation. We need to re-verify that there even is a loop (as the backedge

1413/// may not have been cloned), and even if there are remaining backedges the

1414/// backedge set may be different. However, we know that each child loop is

1415/// undisturbed, we only need to find where to place each child loop within

1416/// either any parent loop or within a cloned version of the original loop.

1417///

1418/// Because child loops may end up cloned outside of any cloned version of the

1419/// original loop, multiple cloned sibling loops may be created. All of them

1420/// are returned so that the newly introduced loop nest roots can be

1421/// identified.

1422staticvoidbuildClonedLoops(Loop &OrigL,ArrayRef<BasicBlock *> ExitBlocks,

1423constValueToValueMapTy &VMap,LoopInfo &LI,

1424SmallVectorImpl<Loop *> &NonChildClonedLoops) {

1425Loop *ClonedL =nullptr;

1426

1427auto *OrigPH = OrigL.getLoopPreheader();

1428auto *OrigHeader = OrigL.getHeader();

1429

1430auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));

1431auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));

1432

1433// We need to know the loops of the cloned exit blocks to even compute the

1434// accurate parent loop. If we only clone exits to some parent of the

1435// original parent, we want to clone into that outer loop. We also keep track

1436// of the loops that our cloned exit blocks participate in.

1437Loop *ParentL =nullptr;

1438SmallVector<BasicBlock *, 4> ClonedExitsInLoops;

1439SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;

1440 ClonedExitsInLoops.reserve(ExitBlocks.size());

1441for (auto *ExitBB : ExitBlocks)

1442if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))

1443if (Loop *ExitL = LI.getLoopFor(ExitBB)) {

1444 ExitLoopMap[ClonedExitBB] = ExitL;

1445 ClonedExitsInLoops.push_back(ClonedExitBB);

1446if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))

1447 ParentL = ExitL;

1448 }

1449assert((!ParentL || ParentL == OrigL.getParentLoop() ||

1450 ParentL->contains(OrigL.getParentLoop())) &&

1451"The computed parent loop should always contain (or be) the parent of "

1452"the original loop.");

1453

1454// We build the set of blocks dominated by the cloned header from the set of

1455// cloned blocks out of the original loop. While not all of these will

1456// necessarily be in the cloned loop, it is enough to establish that they

1457// aren't in unreachable cycles, etc.

1458SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;

1459for (auto *BB : OrigL.blocks())

1460if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))

1461 ClonedLoopBlocks.insert(ClonedBB);

1462

1463// Rebuild the set of blocks that will end up in the cloned loop. We may have

1464// skipped cloning some region of this loop which can in turn skip some of

1465// the backedges so we have to rebuild the blocks in the loop based on the

1466// backedges that remain after cloning.

1467SmallVector<BasicBlock *, 16> Worklist;

1468SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;

1469for (auto *Pred :predecessors(ClonedHeader)) {

1470// The only possible non-loop header predecessor is the preheader because

1471// we know we cloned the loop in simplified form.

1472if (Pred == ClonedPH)

1473continue;

1474

1475// Because the loop was in simplified form, the only non-loop predecessor

1476// should be the preheader.

1477assert(ClonedLoopBlocks.count(Pred) &&"Found a predecessor of the loop "

1478"header other than the preheader "

1479"that is not part of the loop!");

1480

1481// Insert this block into the loop set and on the first visit (and if it

1482// isn't the header we're currently walking) put it into the worklist to

1483// recurse through.

1484if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)

1485 Worklist.push_back(Pred);

1486 }

1487

1488// If we had any backedges then there *is* a cloned loop. Put the header into

1489// the loop set and then walk the worklist backwards to find all the blocks

1490// that remain within the loop after cloning.

1491if (!BlocksInClonedLoop.empty()) {

1492 BlocksInClonedLoop.insert(ClonedHeader);

1493

1494while (!Worklist.empty()) {

1495BasicBlock *BB = Worklist.pop_back_val();

1496assert(BlocksInClonedLoop.count(BB) &&

1497"Didn't put block into the loop set!");

1498

1499// Insert any predecessors that are in the possible set into the cloned

1500// set, and if the insert is successful, add them to the worklist. Note

1501// that we filter on the blocks that are definitely reachable via the

1502// backedge to the loop header so we may prune out dead code within the

1503// cloned loop.

1504for (auto *Pred :predecessors(BB))

1505if (ClonedLoopBlocks.count(Pred) &&

1506 BlocksInClonedLoop.insert(Pred).second)

1507 Worklist.push_back(Pred);

1508 }

1509

1510 ClonedL = LI.AllocateLoop();

1511if (ParentL) {

1512 ParentL->addBasicBlockToLoop(ClonedPH, LI);

1513 ParentL->addChildLoop(ClonedL);

1514 }else {

1515 LI.addTopLevelLoop(ClonedL);

1516 }

1517 NonChildClonedLoops.push_back(ClonedL);

1518

1519 ClonedL->reserveBlocks(BlocksInClonedLoop.size());

1520// We don't want to just add the cloned loop blocks based on how we

1521// discovered them. The original order of blocks was carefully built in

1522// a way that doesn't rely on predecessor ordering. Rather than re-invent

1523// that logic, we just re-walk the original blocks (and those of the child

1524// loops) and filter them as we add them into the cloned loop.

1525for (auto *BB : OrigL.blocks()) {

1526auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));

1527if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))

1528continue;

1529

1530// Directly add the blocks that are only in this loop.

1531if (LI.getLoopFor(BB) == &OrigL) {

1532 ClonedL->addBasicBlockToLoop(ClonedBB, LI);

1533continue;

1534 }

1535

1536// We want to manually add it to this loop and parents.

1537// Registering it with LoopInfo will happen when we clone the top

1538// loop for this block.

1539for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())

1540 PL->addBlockEntry(ClonedBB);

1541 }

1542

1543// Now add each child loop whose header remains within the cloned loop. All

1544// of the blocks within the loop must satisfy the same constraints as the

1545// header so once we pass the header checks we can just clone the entire

1546// child loop nest.

1547for (Loop *ChildL : OrigL) {

1548auto *ClonedChildHeader =

1549 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));

1550if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))

1551continue;

1552

1553#ifndef NDEBUG

1554// We should never have a cloned child loop header but fail to have

1555// all of the blocks for that child loop.

1556for (auto *ChildLoopBB : ChildL->blocks())

1557assert(BlocksInClonedLoop.count(

1558 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&

1559"Child cloned loop has a header within the cloned outer "

1560"loop but not all of its blocks!");

1561#endif

1562

1563cloneLoopNest(*ChildL, ClonedL, VMap, LI);

1564 }

1565 }

1566

1567// Now that we've handled all the components of the original loop that were

1568// cloned into a new loop, we still need to handle anything from the original

1569// loop that wasn't in a cloned loop.

1570

1571// Figure out what blocks are left to place within any loop nest containing

1572// the unswitched loop. If we never formed a loop, the cloned PH is one of

1573// them.

1574SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;

1575if (BlocksInClonedLoop.empty())

1576 UnloopedBlockSet.insert(ClonedPH);

1577for (auto *ClonedBB : ClonedLoopBlocks)

1578if (!BlocksInClonedLoop.count(ClonedBB))

1579 UnloopedBlockSet.insert(ClonedBB);

1580

1581// Copy the cloned exits and sort them in ascending loop depth, we'll work

1582// backwards across these to process them inside out. The order shouldn't

1583// matter as we're just trying to build up the map from inside-out; we use

1584// the map in a more stably ordered way below.

1585auto OrderedClonedExitsInLoops = ClonedExitsInLoops;

1586llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS,BasicBlock *RHS) {

1587return ExitLoopMap.lookup(LHS)->getLoopDepth() <

1588 ExitLoopMap.lookup(RHS)->getLoopDepth();

1589 });

1590

1591// Populate the existing ExitLoopMap with everything reachable from each

1592// exit, starting from the inner most exit.

1593while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {

1594assert(Worklist.empty() &&"Didn't clear worklist!");

1595

1596BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();

1597Loop *ExitL = ExitLoopMap.lookup(ExitBB);

1598

1599// Walk the CFG back until we hit the cloned PH adding everything reachable

1600// and in the unlooped set to this exit block's loop.

1601 Worklist.push_back(ExitBB);

1602do {

1603BasicBlock *BB = Worklist.pop_back_val();

1604// We can stop recursing at the cloned preheader (if we get there).

1605if (BB == ClonedPH)

1606continue;

1607

1608for (BasicBlock *PredBB :predecessors(BB)) {

1609// If this pred has already been moved to our set or is part of some

1610// (inner) loop, no update needed.

1611if (!UnloopedBlockSet.erase(PredBB)) {

1612assert(

1613 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&

1614"Predecessor not mapped to a loop!");

1615continue;

1616 }

1617

1618// We just insert into the loop set here. We'll add these blocks to the

1619// exit loop after we build up the set in an order that doesn't rely on

1620// predecessor order (which in turn relies on use list order).

1621bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;

1622 (void)Inserted;

1623assert(Inserted &&"Should only visit an unlooped block once!");

1624

1625// And recurse through to its predecessors.

1626 Worklist.push_back(PredBB);

1627 }

1628 }while (!Worklist.empty());

1629 }

1630

1631// Now that the ExitLoopMap gives as mapping for all the non-looping cloned

1632// blocks to their outer loops, walk the cloned blocks and the cloned exits

1633// in their original order adding them to the correct loop.

1634

1635// We need a stable insertion order. We use the order of the original loop

1636// order and map into the correct parent loop.

1637for (auto *BB : llvm::concat<BasicBlock *const>(

1638ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))

1639if (Loop *OuterL = ExitLoopMap.lookup(BB))

1640 OuterL->addBasicBlockToLoop(BB, LI);

1641

1642#ifndef NDEBUG

1643for (auto &BBAndL : ExitLoopMap) {

1644auto *BB = BBAndL.first;

1645auto *OuterL = BBAndL.second;

1646assert(LI.getLoopFor(BB) == OuterL &&

1647"Failed to put all blocks into outer loops!");

1648 }

1649#endif

1650

1651// Now that all the blocks are placed into the correct containing loop in the

1652// absence of child loops, find all the potentially cloned child loops and

1653// clone them into whatever outer loop we placed their header into.

1654for (Loop *ChildL : OrigL) {

1655auto *ClonedChildHeader =

1656 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));

1657if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))

1658continue;

1659

1660#ifndef NDEBUG

1661for (auto *ChildLoopBB : ChildL->blocks())

1662assert(VMap.count(ChildLoopBB) &&

1663"Cloned a child loop header but not all of that loops blocks!");

1664#endif

1665

1666 NonChildClonedLoops.push_back(cloneLoopNest(

1667 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));

1668 }

1669}

1670

1671staticvoid

1672deleteDeadClonedBlocks(Loop &L,ArrayRef<BasicBlock *> ExitBlocks,

1673ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,

1674DominatorTree &DT,MemorySSAUpdater *MSSAU) {

1675// Find all the dead clones, and remove them from their successors.

1676SmallVector<BasicBlock *, 16> DeadBlocks;

1677for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))

1678for (constauto &VMap : VMaps)

1679if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))

1680if (!DT.isReachableFromEntry(ClonedBB)) {

1681for (BasicBlock *SuccBB :successors(ClonedBB))

1682 SuccBB->removePredecessor(ClonedBB);

1683 DeadBlocks.push_back(ClonedBB);

1684 }

1685

1686// Remove all MemorySSA in the dead blocks

1687if (MSSAU) {

1688SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),

1689 DeadBlocks.end());

1690 MSSAU->removeBlocks(DeadBlockSet);

1691 }

1692

1693// Drop any remaining references to break cycles.

1694for (BasicBlock *BB : DeadBlocks)

1695 BB->dropAllReferences();

1696// Erase them from the IR.

1697for (BasicBlock *BB : DeadBlocks)

1698 BB->eraseFromParent();

1699}

1700

1701staticvoiddeleteDeadBlocksFromLoop(Loop &L,

1702SmallVectorImpl<BasicBlock *> &ExitBlocks,

1703DominatorTree &DT,LoopInfo &LI,

1704MemorySSAUpdater *MSSAU,

1705ScalarEvolution *SE,

1706LPMUpdater &LoopUpdater) {

1707// Find all the dead blocks tied to this loop, and remove them from their

1708// successors.

1709SmallSetVector<BasicBlock *, 8> DeadBlockSet;

1710

1711// Start with loop/exit blocks and get a transitive closure of reachable dead

1712// blocks.

1713SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),

1714 ExitBlocks.end());

1715 DeathCandidates.append(L.blocks().begin(), L.blocks().end());

1716while (!DeathCandidates.empty()) {

1717auto *BB = DeathCandidates.pop_back_val();

1718if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {

1719for (BasicBlock *SuccBB :successors(BB)) {

1720 SuccBB->removePredecessor(BB);

1721 DeathCandidates.push_back(SuccBB);

1722 }

1723 DeadBlockSet.insert(BB);

1724 }

1725 }

1726

1727// Remove all MemorySSA in the dead blocks

1728if (MSSAU)

1729 MSSAU->removeBlocks(DeadBlockSet);

1730

1731// Filter out the dead blocks from the exit blocks list so that it can be

1732// used in the caller.

1733llvm::erase_if(ExitBlocks,

1734 [&](BasicBlock *BB) {return DeadBlockSet.count(BB); });

1735

1736// Walk from this loop up through its parents removing all of the dead blocks.

1737for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {

1738for (auto *BB : DeadBlockSet)

1739 ParentL->getBlocksSet().erase(BB);

1740llvm::erase_if(ParentL->getBlocksVector(),

1741 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });

1742 }

1743

1744// Now delete the dead child loops. This raw delete will clear them

1745// recursively.

1746llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {

1747 if (!DeadBlockSet.count(ChildL->getHeader()))

1748 return false;

1749

1750 assert(llvm::all_of(ChildL->blocks(),

1751 [&](BasicBlock *ChildBB) {

1752 return DeadBlockSet.count(ChildBB);

1753 }) &&

1754"If the child loop header is dead all blocks in the child loop must "

1755"be dead as well!");

1756 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());

1757if (SE)

1758 SE->forgetBlockAndLoopDispositions();

1759 LI.destroy(ChildL);

1760returntrue;

1761 });

1762

1763// Remove the loop mappings for the dead blocks and drop all the references

1764// from these blocks to others to handle cyclic references as we start

1765// deleting the blocks themselves.

1766for (auto *BB : DeadBlockSet) {

1767// Check that the dominator tree has already been updated.

1768assert(!DT.getNode(BB) &&"Should already have cleared domtree!");

1769 LI.changeLoopFor(BB,nullptr);

1770// Drop all uses of the instructions to make sure we won't have dangling

1771// uses in other blocks.

1772for (auto &I : *BB)

1773if (!I.use_empty())

1774I.replaceAllUsesWith(PoisonValue::get(I.getType()));

1775 BB->dropAllReferences();

1776 }

1777

1778// Actually delete the blocks now that they've been fully unhooked from the

1779// IR.

1780for (auto *BB : DeadBlockSet)

1781 BB->eraseFromParent();

1782}

1783

1784/// Recompute the set of blocks in a loop after unswitching.

1785///

1786/// This walks from the original headers predecessors to rebuild the loop. We

1787/// take advantage of the fact that new blocks can't have been added, and so we

1788/// filter by the original loop's blocks. This also handles potentially

1789/// unreachable code that we don't want to explore but might be found examining

1790/// the predecessors of the header.

1791///

1792/// If the original loop is no longer a loop, this will return an empty set. If

1793/// it remains a loop, all the blocks within it will be added to the set

1794/// (including those blocks in inner loops).

1795staticSmallPtrSet<const BasicBlock *, 16>recomputeLoopBlockSet(Loop &L,

1796LoopInfo &LI) {

1797SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;

1798

1799auto *PH = L.getLoopPreheader();

1800auto *Header = L.getHeader();

1801

1802// A worklist to use while walking backwards from the header.

1803SmallVector<BasicBlock *, 16> Worklist;

1804

1805// First walk the predecessors of the header to find the backedges. This will

1806// form the basis of our walk.

1807for (auto *Pred :predecessors(Header)) {

1808// Skip the preheader.

1809if (Pred == PH)

1810continue;

1811

1812// Because the loop was in simplified form, the only non-loop predecessor

1813// is the preheader.

1814assert(L.contains(Pred) &&"Found a predecessor of the loop header other "

1815"than the preheader that is not part of the "

1816"loop!");

1817

1818// Insert this block into the loop set and on the first visit and, if it

1819// isn't the header we're currently walking, put it into the worklist to

1820// recurse through.

1821if (LoopBlockSet.insert(Pred).second && Pred != Header)

1822 Worklist.push_back(Pred);

1823 }

1824

1825// If no backedges were found, we're done.

1826if (LoopBlockSet.empty())

1827return LoopBlockSet;

1828

1829// We found backedges, recurse through them to identify the loop blocks.

1830while (!Worklist.empty()) {

1831BasicBlock *BB = Worklist.pop_back_val();

1832assert(LoopBlockSet.count(BB) &&"Didn't put block into the loop set!");

1833

1834// No need to walk past the header.

1835if (BB == Header)

1836continue;

1837

1838// Because we know the inner loop structure remains valid we can use the

1839// loop structure to jump immediately across the entire nested loop.

1840// Further, because it is in loop simplified form, we can directly jump

1841// to its preheader afterward.

1842if (Loop *InnerL = LI.getLoopFor(BB))

1843if (InnerL != &L) {

1844assert(L.contains(InnerL) &&

1845"Should not reach a loop *outside* this loop!");

1846// The preheader is the only possible predecessor of the loop so

1847// insert it into the set and check whether it was already handled.

1848auto *InnerPH = InnerL->getLoopPreheader();

1849assert(L.contains(InnerPH) &&"Cannot contain an inner loop block "

1850"but not contain the inner loop "

1851"preheader!");

1852if (!LoopBlockSet.insert(InnerPH).second)

1853// The only way to reach the preheader is through the loop body

1854// itself so if it has been visited the loop is already handled.

1855continue;

1856

1857// Insert all of the blocks (other than those already present) into

1858// the loop set. We expect at least the block that led us to find the

1859// inner loop to be in the block set, but we may also have other loop

1860// blocks if they were already enqueued as predecessors of some other

1861// outer loop block.

1862for (auto *InnerBB : InnerL->blocks()) {

1863if (InnerBB == BB) {

1864assert(LoopBlockSet.count(InnerBB) &&

1865"Block should already be in the set!");

1866continue;

1867 }

1868

1869 LoopBlockSet.insert(InnerBB);

1870 }

1871

1872// Add the preheader to the worklist so we will continue past the

1873// loop body.

1874 Worklist.push_back(InnerPH);

1875continue;

1876 }

1877

1878// Insert any predecessors that were in the original loop into the new

1879// set, and if the insert is successful, add them to the worklist.

1880for (auto *Pred :predecessors(BB))

1881if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)

1882 Worklist.push_back(Pred);

1883 }

1884

1885assert(LoopBlockSet.count(Header) &&"Cannot fail to add the header!");

1886

1887// We've found all the blocks participating in the loop, return our completed

1888// set.

1889return LoopBlockSet;

1890}

1891

1892/// Rebuild a loop after unswitching removes some subset of blocks and edges.

1893///

1894/// The removal may have removed some child loops entirely but cannot have

1895/// disturbed any remaining child loops. However, they may need to be hoisted

1896/// to the parent loop (or to be top-level loops). The original loop may be

1897/// completely removed.

1898///

1899/// The sibling loops resulting from this update are returned. If the original

1900/// loop remains a valid loop, it will be the first entry in this list with all

1901/// of the newly sibling loops following it.

1902///

1903/// Returns true if the loop remains a loop after unswitching, and false if it

1904/// is no longer a loop after unswitching (and should not continue to be

1905/// referenced).

1906staticboolrebuildLoopAfterUnswitch(Loop &L,ArrayRef<BasicBlock *> ExitBlocks,

1907LoopInfo &LI,

1908SmallVectorImpl<Loop *> &HoistedLoops,

1909ScalarEvolution *SE) {

1910auto *PH = L.getLoopPreheader();

1911

1912// Compute the actual parent loop from the exit blocks. Because we may have

1913// pruned some exits the loop may be different from the original parent.

1914Loop *ParentL =nullptr;

1915SmallVector<Loop *, 4> ExitLoops;

1916SmallVector<BasicBlock *, 4> ExitsInLoops;

1917 ExitsInLoops.reserve(ExitBlocks.size());

1918for (auto *ExitBB : ExitBlocks)

1919if (Loop *ExitL = LI.getLoopFor(ExitBB)) {

1920 ExitLoops.push_back(ExitL);

1921 ExitsInLoops.push_back(ExitBB);

1922if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))

1923 ParentL = ExitL;

1924 }

1925

1926// Recompute the blocks participating in this loop. This may be empty if it

1927// is no longer a loop.

1928auto LoopBlockSet =recomputeLoopBlockSet(L, LI);

1929

1930// If we still have a loop, we need to re-set the loop's parent as the exit

1931// block set changing may have moved it within the loop nest. Note that this

1932// can only happen when this loop has a parent as it can only hoist the loop

1933// *up* the nest.

1934if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {

1935// Remove this loop's (original) blocks from all of the intervening loops.

1936for (Loop *IL = L.getParentLoop(); IL != ParentL;

1937 IL = IL->getParentLoop()) {

1938 IL->getBlocksSet().erase(PH);

1939for (auto *BB : L.blocks())

1940 IL->getBlocksSet().erase(BB);

1941llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {

1942 return BB == PH || L.contains(BB);

1943 });

1944 }

1945

1946 LI.changeLoopFor(PH, ParentL);

1947 L.getParentLoop()->removeChildLoop(&L);

1948if (ParentL)

1949 ParentL->addChildLoop(&L);

1950else

1951 LI.addTopLevelLoop(&L);

1952 }

1953

1954// Now we update all the blocks which are no longer within the loop.

1955auto &Blocks = L.getBlocksVector();

1956auto BlocksSplitI =

1957 LoopBlockSet.empty()

1958 ?Blocks.begin()

1959 : std::stable_partition(

1960Blocks.begin(),Blocks.end(),

1961 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });

1962

1963// Before we erase the list of unlooped blocks, build a set of them.

1964SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI,Blocks.end());

1965if (LoopBlockSet.empty())

1966 UnloopedBlocks.insert(PH);

1967

1968// Now erase these blocks from the loop.

1969for (auto *BB :make_range(BlocksSplitI,Blocks.end()))

1970 L.getBlocksSet().erase(BB);

1971Blocks.erase(BlocksSplitI,Blocks.end());

1972

1973// Sort the exits in ascending loop depth, we'll work backwards across these

1974// to process them inside out.

1975llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS,BasicBlock *RHS) {

1976return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);

1977 });

1978

1979// We'll build up a set for each exit loop.

1980SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;

1981Loop *PrevExitL = L.getParentLoop();// The deepest possible exit loop.

1982

1983auto RemoveUnloopedBlocksFromLoop =

1984 [](Loop &L,SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {

1985for (auto *BB : UnloopedBlocks)

1986 L.getBlocksSet().erase(BB);

1987llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {

1988 return UnloopedBlocks.count(BB);

1989 });

1990 };

1991

1992SmallVector<BasicBlock *, 16> Worklist;

1993while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {

1994assert(Worklist.empty() &&"Didn't clear worklist!");

1995assert(NewExitLoopBlocks.empty() &&"Didn't clear loop set!");

1996

1997// Grab the next exit block, in decreasing loop depth order.

1998BasicBlock *ExitBB = ExitsInLoops.pop_back_val();

1999Loop &ExitL = *LI.getLoopFor(ExitBB);

2000assert(ExitL.contains(&L) &&"Exit loop must contain the inner loop!");

2001

2002// Erase all of the unlooped blocks from the loops between the previous

2003// exit loop and this exit loop. This works because the ExitInLoops list is

2004// sorted in increasing order of loop depth and thus we visit loops in

2005// decreasing order of loop depth.

2006for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())

2007 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);

2008

2009// Walk the CFG back until we hit the cloned PH adding everything reachable

2010// and in the unlooped set to this exit block's loop.

2011 Worklist.push_back(ExitBB);

2012do {

2013BasicBlock *BB = Worklist.pop_back_val();

2014// We can stop recursing at the cloned preheader (if we get there).

2015if (BB == PH)

2016continue;

2017

2018for (BasicBlock *PredBB :predecessors(BB)) {

2019// If this pred has already been moved to our set or is part of some

2020// (inner) loop, no update needed.

2021if (!UnloopedBlocks.erase(PredBB)) {

2022assert((NewExitLoopBlocks.count(PredBB) ||

2023 ExitL.contains(LI.getLoopFor(PredBB))) &&

2024"Predecessor not in a nested loop (or already visited)!");

2025continue;

2026 }

2027

2028// We just insert into the loop set here. We'll add these blocks to the

2029// exit loop after we build up the set in a deterministic order rather

2030// than the predecessor-influenced visit order.

2031bool Inserted = NewExitLoopBlocks.insert(PredBB).second;

2032 (void)Inserted;

2033assert(Inserted &&"Should only visit an unlooped block once!");

2034

2035// And recurse through to its predecessors.

2036 Worklist.push_back(PredBB);

2037 }

2038 }while (!Worklist.empty());

2039

2040// If blocks in this exit loop were directly part of the original loop (as

2041// opposed to a child loop) update the map to point to this exit loop. This

2042// just updates a map and so the fact that the order is unstable is fine.

2043for (auto *BB : NewExitLoopBlocks)

2044if (Loop *BBL = LI.getLoopFor(BB))

2045if (BBL == &L || !L.contains(BBL))

2046 LI.changeLoopFor(BB, &ExitL);

2047

2048// We will remove the remaining unlooped blocks from this loop in the next

2049// iteration or below.

2050 NewExitLoopBlocks.clear();

2051 }

2052

2053// Any remaining unlooped blocks are no longer part of any loop unless they

2054// are part of some child loop.

2055for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())

2056 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);

2057for (auto *BB : UnloopedBlocks)

2058if (Loop *BBL = LI.getLoopFor(BB))

2059if (BBL == &L || !L.contains(BBL))

2060 LI.changeLoopFor(BB,nullptr);

2061

2062// Sink all the child loops whose headers are no longer in the loop set to

2063// the parent (or to be top level loops). We reach into the loop and directly

2064// update its subloop vector to make this batch update efficient.

2065auto &SubLoops = L.getSubLoopsVector();

2066auto SubLoopsSplitI =

2067 LoopBlockSet.empty()

2068 ? SubLoops.begin()

2069 : std::stable_partition(

2070 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {

2071 return LoopBlockSet.count(SubL->getHeader());

2072 });

2073for (auto *HoistedL :make_range(SubLoopsSplitI, SubLoops.end())) {

2074 HoistedLoops.push_back(HoistedL);

2075 HoistedL->setParentLoop(nullptr);

2076

2077// To compute the new parent of this hoisted loop we look at where we

2078// placed the preheader above. We can't lookup the header itself because we

2079// retained the mapping from the header to the hoisted loop. But the

2080// preheader and header should have the exact same new parent computed

2081// based on the set of exit blocks from the original loop as the preheader

2082// is a predecessor of the header and so reached in the reverse walk. And

2083// because the loops were all in simplified form the preheader of the

2084// hoisted loop can't be part of some *other* loop.

2085if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))

2086 NewParentL->addChildLoop(HoistedL);

2087else

2088 LI.addTopLevelLoop(HoistedL);

2089 }

2090 SubLoops.erase(SubLoopsSplitI, SubLoops.end());

2091

2092// Actually delete the loop if nothing remained within it.

2093if (Blocks.empty()) {

2094assert(SubLoops.empty() &&

2095"Failed to remove all subloops from the original loop!");

2096if (Loop *ParentL = L.getParentLoop())

2097 ParentL->removeChildLoop(llvm::find(*ParentL, &L));

2098else

2099 LI.removeLoop(llvm::find(LI, &L));

2100// markLoopAsDeleted for L should be triggered by the caller (it is

2101// typically done within postUnswitch).

2102if (SE)

2103 SE->forgetBlockAndLoopDispositions();

2104 LI.destroy(&L);

2105returnfalse;

2106 }

2107

2108returntrue;

2109}

2110

2111/// Helper to visit a dominator subtree, invoking a callable on each node.

2112///

2113/// Returning false at any point will stop walking past that node of the tree.

2114template <typename CallableT>

2115voidvisitDomSubTree(DominatorTree &DT,BasicBlock *BB, CallableT Callable) {

2116SmallVector<DomTreeNode *, 4> DomWorklist;

2117 DomWorklist.push_back(DT[BB]);

2118#ifndef NDEBUG

2119SmallPtrSet<DomTreeNode *, 4> Visited;

2120 Visited.insert(DT[BB]);

2121#endif

2122do {

2123DomTreeNode *N = DomWorklist.pop_back_val();

2124

2125// Visit this node.

2126if (!Callable(N->getBlock()))

2127continue;

2128

2129// Accumulate the child nodes.

2130for (DomTreeNode *ChildN : *N) {

2131assert(Visited.insert(ChildN).second &&

2132"Cannot visit a node twice when walking a tree!");

2133 DomWorklist.push_back(ChildN);

2134 }

2135 }while (!DomWorklist.empty());

2136}

2137

2138voidpostUnswitch(Loop &L,LPMUpdater &U,StringRef LoopName,

2139bool CurrentLoopValid,bool PartiallyInvariant,

2140bool InjectedCondition,ArrayRef<Loop *> NewLoops) {

2141// If we did a non-trivial unswitch, we have added new (cloned) loops.

2142if (!NewLoops.empty())

2143 U.addSiblingLoops(NewLoops);

2144

2145// If the current loop remains valid, we should revisit it to catch any

2146// other unswitch opportunities. Otherwise, we need to mark it as deleted.

2147if (CurrentLoopValid) {

2148if (PartiallyInvariant) {

2149// Mark the new loop as partially unswitched, to avoid unswitching on

2150// the same condition again.

2151auto &Context = L.getHeader()->getContext();

2152MDNode *DisableUnswitchMD =MDNode::get(

2153 Context,

2154MDString::get(Context,"llvm.loop.unswitch.partial.disable"));

2155MDNode *NewLoopID =makePostTransformationMetadata(

2156 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},

2157 {DisableUnswitchMD});

2158 L.setLoopID(NewLoopID);

2159 }elseif (InjectedCondition) {

2160// Do the same for injection of invariant conditions.

2161auto &Context = L.getHeader()->getContext();

2162MDNode *DisableUnswitchMD =MDNode::get(

2163 Context,

2164MDString::get(Context,"llvm.loop.unswitch.injection.disable"));

2165MDNode *NewLoopID =makePostTransformationMetadata(

2166 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},

2167 {DisableUnswitchMD});

2168 L.setLoopID(NewLoopID);

2169 }else

2170 U.revisitCurrentLoop();

2171 }else

2172 U.markLoopAsDeleted(L, LoopName);

2173}

2174

2175staticvoidunswitchNontrivialInvariants(

2176Loop &L,Instruction &TI,ArrayRef<Value *> Invariants,

2177IVConditionInfo &PartialIVInfo,DominatorTree &DT,LoopInfo &LI,

2178AssumptionCache &AC,ScalarEvolution *SE,MemorySSAUpdater *MSSAU,

2179LPMUpdater &LoopUpdater,bool InsertFreeze,bool InjectedCondition) {

2180auto *ParentBB = TI.getParent();

2181BranchInst *BI = dyn_cast<BranchInst>(&TI);

2182SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);

2183

2184// Save the current loop name in a variable so that we can report it even

2185// after it has been deleted.

2186 std::string LoopName(L.getName());

2187

2188// We can only unswitch switches, conditional branches with an invariant

2189// condition, or combining invariant conditions with an instruction or

2190// partially invariant instructions.

2191assert((SI || (BI && BI->isConditional())) &&

2192"Can only unswitch switches and conditional branch!");

2193bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();

2194bool FullUnswitch =

2195 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&

2196 !PartiallyInvariant);

2197if (FullUnswitch)

2198assert(Invariants.size() == 1 &&

2199"Cannot have other invariants with full unswitching!");

2200else

2201assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&

2202"Partial unswitching requires an instruction as the condition!");

2203

2204if (MSSAU &&VerifyMemorySSA)

2205 MSSAU->getMemorySSA()->verifyMemorySSA();

2206

2207// Constant and BBs tracking the cloned and continuing successor. When we are

2208// unswitching the entire condition, this can just be trivially chosen to

2209// unswitch towards `true`. However, when we are unswitching a set of

2210// invariants combined with `and` or `or` or partially invariant instructions,

2211// the combining operation determines the best direction to unswitch: we want

2212// to unswitch the direction that will collapse the branch.

2213boolDirection =true;

2214int ClonedSucc = 0;

2215if (!FullUnswitch) {

2216Value *Cond =skipTrivialSelect(BI->getCondition());

2217 (void)Cond;

2218assert(((match(Cond,m_LogicalAnd()) ^match(Cond,m_LogicalOr())) ||

2219 PartiallyInvariant) &&

2220"Only `or`, `and`, an `select`, partially invariant instructions "

2221"can combine invariants being unswitched.");

2222if (!match(Cond,m_LogicalOr())) {

2223if (match(Cond,m_LogicalAnd()) ||

2224 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {

2225Direction =false;

2226 ClonedSucc = 1;

2227 }

2228 }

2229 }

2230

2231BasicBlock *RetainedSuccBB =

2232 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();

2233SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;

2234if (BI)

2235 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));

2236else

2237for (auto Case : SI->cases())

2238if (Case.getCaseSuccessor() != RetainedSuccBB)

2239 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());

2240

2241assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&

2242"Should not unswitch the same successor we are retaining!");

2243

2244// The branch should be in this exact loop. Any inner loop's invariant branch

2245// should be handled by unswitching that inner loop. The caller of this

2246// routine should filter out any candidates that remain (but were skipped for

2247// whatever reason).

2248assert(LI.getLoopFor(ParentBB) == &L &&"Branch in an inner loop!");

2249

2250// Compute the parent loop now before we start hacking on things.

2251Loop *ParentL = L.getParentLoop();

2252// Get blocks in RPO order for MSSA update, before changing the CFG.

2253LoopBlocksRPO LBRPO(&L);

2254if (MSSAU)

2255 LBRPO.perform(&LI);

2256

2257// Compute the outer-most loop containing one of our exit blocks. This is the

2258// furthest up our loopnest which can be mutated, which we will use below to

2259// update things.

2260Loop *OuterExitL = &L;

2261SmallVector<BasicBlock *, 4> ExitBlocks;

2262 L.getUniqueExitBlocks(ExitBlocks);

2263for (auto *ExitBB : ExitBlocks) {

2264// ExitBB can be an exit block for several levels in the loop nest. Make

2265// sure we find the top most.

2266Loop *NewOuterExitL =getTopMostExitingLoop(ExitBB, LI);

2267if (!NewOuterExitL) {

2268// We exited the entire nest with this block, so we're done.

2269 OuterExitL =nullptr;

2270break;

2271 }

2272if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))

2273 OuterExitL = NewOuterExitL;

2274 }

2275

2276// At this point, we're definitely going to unswitch something so invalidate

2277// any cached information in ScalarEvolution for the outer most loop

2278// containing an exit block and all nested loops.

2279if (SE) {

2280if (OuterExitL)

2281 SE->forgetLoop(OuterExitL);

2282else

2283 SE->forgetTopmostLoop(&L);

2284 SE->forgetBlockAndLoopDispositions();

2285 }

2286

2287// If the edge from this terminator to a successor dominates that successor,

2288// store a map from each block in its dominator subtree to it. This lets us

2289// tell when cloning for a particular successor if a block is dominated by

2290// some *other* successor with a single data structure. We use this to

2291// significantly reduce cloning.

2292SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;

2293for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),

2294 UnswitchedSuccBBs))

2295if (SuccBB->getUniquePredecessor() ||

2296llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {

2297return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);

2298 }))

2299visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {

2300 DominatingSucc[BB] = SuccBB;

2301returntrue;

2302 });

2303

2304// Split the preheader, so that we know that there is a safe place to insert

2305// the conditional branch. We will change the preheader to have a conditional

2306// branch on LoopCond. The original preheader will become the split point

2307// between the unswitched versions, and we will have a new preheader for the

2308// original loop.

2309BasicBlock *SplitBB = L.getLoopPreheader();

2310BasicBlock *LoopPH =SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);

2311

2312// Keep track of the dominator tree updates needed.

2313SmallVector<DominatorTree::UpdateType, 4> DTUpdates;

2314

2315// Clone the loop for each unswitched successor.

2316SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;

2317 VMaps.reserve(UnswitchedSuccBBs.size());

2318SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;

2319for (auto *SuccBB : UnswitchedSuccBBs) {

2320 VMaps.emplace_back(newValueToValueMapTy());

2321 ClonedPHs[SuccBB] =buildClonedLoopBlocks(

2322 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,

2323 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);

2324 }

2325

2326// Drop metadata if we may break its semantics by moving this instr into the

2327// split block.

2328if (TI.getMetadata(LLVMContext::MD_make_implicit)) {

2329if (DropNonTrivialImplicitNullChecks)

2330// Do not spend time trying to understand if we can keep it, just drop it

2331// to save compile time.

2332 TI.setMetadata(LLVMContext::MD_make_implicit,nullptr);

2333else {

2334// It is only legal to preserve make.implicit metadata if we are

2335// guaranteed no reach implicit null check after following this branch.

2336ICFLoopSafetyInfo SafetyInfo;

2337 SafetyInfo.computeLoopSafetyInfo(&L);

2338if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))

2339 TI.setMetadata(LLVMContext::MD_make_implicit,nullptr);

2340 }

2341 }

2342

2343// The stitching of the branched code back together depends on whether we're

2344// doing full unswitching or not with the exception that we always want to

2345// nuke the initial terminator placed in the split block.

2346 SplitBB->getTerminator()->eraseFromParent();

2347if (FullUnswitch) {

2348// Keep a clone of the terminator for MSSA updates.

2349Instruction *NewTI = TI.clone();

2350 NewTI->insertInto(ParentBB, ParentBB->end());

2351

2352// Splice the terminator from the original loop and rewrite its

2353// successors.

2354 TI.moveBefore(*SplitBB, SplitBB->end());

2355 TI.dropLocation();

2356

2357// First wire up the moved terminator to the preheaders.

2358if (BI) {

2359BasicBlock *ClonedPH = ClonedPHs.begin()->second;

2360 BI->setSuccessor(ClonedSucc, ClonedPH);

2361 BI->setSuccessor(1 - ClonedSucc, LoopPH);

2362Value *Cond =skipTrivialSelect(BI->getCondition());

2363if (InsertFreeze) {

2364// We don't give any debug location to the new freeze, because the

2365// BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted

2366// out of the loop.

2367Cond =newFreezeInst(Cond,Cond->getName() +".fr", BI->getIterator());

2368 }

2369 BI->setCondition(Cond);

2370 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});

2371 }else {

2372assert(SI &&"Must either be a branch or switch!");

2373

2374// Walk the cases and directly update their successors.

2375assert(SI->getDefaultDest() == RetainedSuccBB &&

2376"Not retaining default successor!");

2377 SI->setDefaultDest(LoopPH);

2378for (constauto &Case : SI->cases())

2379if (Case.getCaseSuccessor() == RetainedSuccBB)

2380 Case.setSuccessor(LoopPH);

2381else

2382 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);

2383

2384if (InsertFreeze)

2385 SI->setCondition(newFreezeInst(SI->getCondition(),

2386 SI->getCondition()->getName() +".fr",

2387 SI->getIterator()));

2388

2389// We need to use the set to populate domtree updates as even when there

2390// are multiple cases pointing at the same successor we only want to

2391// remove and insert one edge in the domtree.

2392for (BasicBlock *SuccBB : UnswitchedSuccBBs)

2393 DTUpdates.push_back(

2394 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});

2395 }

2396

2397if (MSSAU) {

2398 DT.applyUpdates(DTUpdates);

2399 DTUpdates.clear();

2400

2401// Remove all but one edge to the retained block and all unswitched

2402// blocks. This is to avoid having duplicate entries in the cloned Phis,

2403// when we know we only keep a single edge for each case.

2404 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);

2405for (BasicBlock *SuccBB : UnswitchedSuccBBs)

2406 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);

2407

2408for (auto &VMap : VMaps)

2409 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,

2410/*IgnoreIncomingWithNoClones=*/true);

2411 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);

2412

2413// Remove all edges to unswitched blocks.

2414for (BasicBlock *SuccBB : UnswitchedSuccBBs)

2415 MSSAU->removeEdge(ParentBB, SuccBB);

2416 }

2417

2418// Now unhook the successor relationship as we'll be replacing

2419// the terminator with a direct branch. This is much simpler for branches

2420// than switches so we handle those first.

2421if (BI) {

2422// Remove the parent as a predecessor of the unswitched successor.

2423assert(UnswitchedSuccBBs.size() == 1 &&

2424"Only one possible unswitched block for a branch!");

2425BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();

2426 UnswitchedSuccBB->removePredecessor(ParentBB,

2427/*KeepOneInputPHIs*/true);

2428 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});

2429 }else {

2430// Note that we actually want to remove the parent block as a predecessor

2431// of *every* case successor. The case successor is either unswitched,

2432// completely eliminating an edge from the parent to that successor, or it

2433// is a duplicate edge to the retained successor as the retained successor

2434// is always the default successor and as we'll replace this with a direct

2435// branch we no longer need the duplicate entries in the PHI nodes.

2436SwitchInst *NewSI = cast<SwitchInst>(NewTI);

2437assert(NewSI->getDefaultDest() == RetainedSuccBB &&

2438"Not retaining default successor!");

2439for (constauto &Case : NewSI->cases())

2440 Case.getCaseSuccessor()->removePredecessor(

2441 ParentBB,

2442/*KeepOneInputPHIs*/true);

2443

2444// We need to use the set to populate domtree updates as even when there

2445// are multiple cases pointing at the same successor we only want to

2446// remove and insert one edge in the domtree.

2447for (BasicBlock *SuccBB : UnswitchedSuccBBs)

2448 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});

2449 }

2450

2451// Create a new unconditional branch to the continuing block (as opposed to

2452// the one cloned).

2453Instruction *NewBI =BranchInst::Create(RetainedSuccBB, ParentBB);

2454 NewBI->setDebugLoc(NewTI->getDebugLoc());

2455

2456// After MSSAU update, remove the cloned terminator instruction NewTI.

2457 NewTI->eraseFromParent();

2458 }else {

2459assert(BI &&"Only branches have partial unswitching.");

2460assert(UnswitchedSuccBBs.size() == 1 &&

2461"Only one possible unswitched block for a branch!");

2462BasicBlock *ClonedPH = ClonedPHs.begin()->second;

2463// When doing a partial unswitch, we have to do a bit more work to build up

2464// the branch in the split block.

2465if (PartiallyInvariant)

2466buildPartialInvariantUnswitchConditionalBranch(

2467 *SplitBB, Invariants,Direction, *ClonedPH, *LoopPH, L, MSSAU);

2468else {

2469buildPartialUnswitchConditionalBranch(

2470 *SplitBB, Invariants,Direction, *ClonedPH, *LoopPH,

2471FreezeLoopUnswitchCond, BI, &AC, DT);

2472 }

2473 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});

2474

2475if (MSSAU) {

2476 DT.applyUpdates(DTUpdates);

2477 DTUpdates.clear();

2478

2479// Perform MSSA cloning updates.

2480for (auto &VMap : VMaps)

2481 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,

2482/*IgnoreIncomingWithNoClones=*/true);

2483 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);

2484 }

2485 }

2486

2487// Apply the updates accumulated above to get an up-to-date dominator tree.

2488 DT.applyUpdates(DTUpdates);

2489

2490// Now that we have an accurate dominator tree, first delete the dead cloned

2491// blocks so that we can accurately build any cloned loops. It is important to

2492// not delete the blocks from the original loop yet because we still want to

2493// reference the original loop to understand the cloned loop's structure.

2494deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);

2495

2496// Build the cloned loop structure itself. This may be substantially

2497// different from the original structure due to the simplified CFG. This also

2498// handles inserting all the cloned blocks into the correct loops.

2499SmallVector<Loop *, 4> NonChildClonedLoops;

2500for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)

2501buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);

2502

2503// Now that our cloned loops have been built, we can update the original loop.

2504// First we delete the dead blocks from it and then we rebuild the loop

2505// structure taking these deletions into account.

2506deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);

2507

2508if (MSSAU &&VerifyMemorySSA)

2509 MSSAU->getMemorySSA()->verifyMemorySSA();

2510

2511SmallVector<Loop *, 4> HoistedLoops;

2512bool IsStillLoop =

2513rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);

2514

2515if (MSSAU &&VerifyMemorySSA)

2516 MSSAU->getMemorySSA()->verifyMemorySSA();

2517

2518// This transformation has a high risk of corrupting the dominator tree, and

2519// the below steps to rebuild loop structures will result in hard to debug

2520// errors in that case so verify that the dominator tree is sane first.

2521// FIXME: Remove this when the bugs stop showing up and rely on existing

2522// verification steps.

2523assert(DT.verify(DominatorTree::VerificationLevel::Fast));

2524

2525if (BI && !PartiallyInvariant) {

2526// If we unswitched a branch which collapses the condition to a known

2527// constant we want to replace all the uses of the invariants within both

2528// the original and cloned blocks. We do this here so that we can use the

2529// now updated dominator tree to identify which side the users are on.

2530assert(UnswitchedSuccBBs.size() == 1 &&

2531"Only one possible unswitched block for a branch!");

2532BasicBlock *ClonedPH = ClonedPHs.begin()->second;

2533

2534// When considering multiple partially-unswitched invariants

2535// we cant just go replace them with constants in both branches.

2536//

2537// For 'AND' we infer that true branch ("continue") means true

2538// for each invariant operand.

2539// For 'OR' we can infer that false branch ("continue") means false

2540// for each invariant operand.

2541// So it happens that for multiple-partial case we dont replace

2542// in the unswitched branch.

2543bool ReplaceUnswitched =

2544 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;

2545

2546ConstantInt *UnswitchedReplacement =

2547Direction ?ConstantInt::getTrue(BI->getContext())

2548 :ConstantInt::getFalse(BI->getContext());

2549ConstantInt *ContinueReplacement =

2550Direction ?ConstantInt::getFalse(BI->getContext())

2551 :ConstantInt::getTrue(BI->getContext());

2552for (Value *Invariant : Invariants) {

2553assert(!isa<Constant>(Invariant) &&

2554"Should not be replacing constant values!");

2555// Use make_early_inc_range here as set invalidates the iterator.

2556for (Use &U :llvm::make_early_inc_range(Invariant->uses())) {

2557Instruction *UserI = dyn_cast<Instruction>(U.getUser());

2558if (!UserI)

2559continue;

2560

2561// Replace it with the 'continue' side if in the main loop body, and the

2562// unswitched if in the cloned blocks.

2563if (DT.dominates(LoopPH, UserI->getParent()))

2564 U.set(ContinueReplacement);

2565elseif (ReplaceUnswitched &&

2566 DT.dominates(ClonedPH, UserI->getParent()))

2567 U.set(UnswitchedReplacement);

2568 }

2569 }

2570 }

2571

2572// We can change which blocks are exit blocks of all the cloned sibling

2573// loops, the current loop, and any parent loops which shared exit blocks

2574// with the current loop. As a consequence, we need to re-form LCSSA for

2575// them. But we shouldn't need to re-form LCSSA for any child loops.

2576// FIXME: This could be made more efficient by tracking which exit blocks are

2577// new, and focusing on them, but that isn't likely to be necessary.

2578//

2579// In order to reasonably rebuild LCSSA we need to walk inside-out across the

2580// loop nest and update every loop that could have had its exits changed. We

2581// also need to cover any intervening loops. We add all of these loops to

2582// a list and sort them by loop depth to achieve this without updating

2583// unnecessary loops.

2584auto UpdateLoop = [&](Loop &UpdateL) {

2585#ifndef NDEBUG

2586 UpdateL.verifyLoop();

2587for (Loop *ChildL : UpdateL) {

2588 ChildL->verifyLoop();

2589assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&

2590"Perturbed a child loop's LCSSA form!");

2591 }

2592#endif

2593// First build LCSSA for this loop so that we can preserve it when

2594// forming dedicated exits. We don't want to perturb some other loop's

2595// LCSSA while doing that CFG edit.

2596formLCSSA(UpdateL, DT, &LI, SE);

2597

2598// For loops reached by this loop's original exit blocks we may

2599// introduced new, non-dedicated exits. At least try to re-form dedicated

2600// exits for these loops. This may fail if they couldn't have dedicated

2601// exits to start with.

2602formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU,/*PreserveLCSSA*/true);

2603 };

2604

2605// For non-child cloned loops and hoisted loops, we just need to update LCSSA

2606// and we can do it in any order as they don't nest relative to each other.

2607//

2608// Also check if any of the loops we have updated have become top-level loops

2609// as that will necessitate widening the outer loop scope.

2610for (Loop *UpdatedL :

2611 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {

2612 UpdateLoop(*UpdatedL);

2613if (UpdatedL->isOutermost())

2614 OuterExitL =nullptr;

2615 }

2616if (IsStillLoop) {

2617 UpdateLoop(L);

2618if (L.isOutermost())

2619 OuterExitL =nullptr;

2620 }

2621

2622// If the original loop had exit blocks, walk up through the outer most loop

2623// of those exit blocks to update LCSSA and form updated dedicated exits.

2624if (OuterExitL != &L)

2625for (Loop *OuterL = ParentL; OuterL != OuterExitL;

2626 OuterL = OuterL->getParentLoop())

2627 UpdateLoop(*OuterL);

2628

2629#ifndef NDEBUG

2630// Verify the entire loop structure to catch any incorrect updates before we

2631// progress in the pass pipeline.

2632 LI.verify(DT);

2633#endif

2634

2635// Now that we've unswitched something, make callbacks to report the changes.

2636// For that we need to merge together the updated loops and the cloned loops

2637// and check whether the original loop survived.

2638SmallVector<Loop *, 4> SibLoops;

2639for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))

2640if (UpdatedL->getParentLoop() == ParentL)

2641 SibLoops.push_back(UpdatedL);

2642postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,

2643 InjectedCondition, SibLoops);

2644

2645if (MSSAU &&VerifyMemorySSA)

2646 MSSAU->getMemorySSA()->verifyMemorySSA();

2647

2648if (BI)

2649 ++NumBranches;

2650else

2651 ++NumSwitches;

2652}

2653

2654/// Recursively compute the cost of a dominator subtree based on the per-block

2655/// cost map provided.

2656///

2657/// The recursive computation is memozied into the provided DT-indexed cost map

2658/// to allow querying it for most nodes in the domtree without it becoming

2659/// quadratic.

2660staticInstructionCost computeDomSubtreeCost(

2661DomTreeNode &N,

2662constSmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,

2663SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {

2664// Don't accumulate cost (or recurse through) blocks not in our block cost

2665// map and thus not part of the duplication cost being considered.

2666auto BBCostIt = BBCostMap.find(N.getBlock());

2667if (BBCostIt == BBCostMap.end())

2668return 0;

2669

2670// Lookup this node to see if we already computed its cost.

2671auto DTCostIt = DTCostMap.find(&N);

2672if (DTCostIt != DTCostMap.end())

2673return DTCostIt->second;

2674

2675// If not, we have to compute it. We can't use insert above and update

2676// because computing the cost may insert more things into the map.

2677InstructionCost Cost = std::accumulate(

2678N.begin(),N.end(), BBCostIt->second,

2679 [&](InstructionCost Sum,DomTreeNode *ChildN) ->InstructionCost {

2680 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);

2681 });

2682bool Inserted = DTCostMap.insert({&N,Cost}).second;

2683 (void)Inserted;

2684assert(Inserted &&"Should not insert a node while visiting children!");

2685returnCost;

2686}

2687

2688/// Turns a select instruction into implicit control flow branch,

2689/// making the following replacement:

2690///

2691/// head:

2692/// --code before select--

2693/// select %cond, %trueval, %falseval

2694/// --code after select--

2695///

2696/// into

2697///

2698/// head:

2699/// --code before select--

2700/// br i1 %cond, label %then, label %tail

2701///

2702/// then:

2703/// br %tail

2704///

2705/// tail:

2706/// phi [ %trueval, %then ], [ %falseval, %head]

2707/// unreachable

2708///

2709/// It also makes all relevant DT and LI updates, so that all structures are in

2710/// valid state after this transform.

2711staticBranchInst *turnSelectIntoBranch(SelectInst *SI,DominatorTree &DT,

2712LoopInfo &LI,MemorySSAUpdater *MSSAU,

2713AssumptionCache *AC) {

2714LLVM_DEBUG(dbgs() <<"Turning " << *SI <<" into a branch.\n");

2715BasicBlock *HeadBB = SI->getParent();

2716

2717DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

2718SplitBlockAndInsertIfThen(SI->getCondition(), SI,false,

2719 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);

2720auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());

2721BasicBlock *ThenBB = CondBr->getSuccessor(0),

2722 *TailBB = CondBr->getSuccessor(1);

2723if (MSSAU)

2724 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);

2725

2726PHINode *Phi =

2727PHINode::Create(SI->getType(), 2,"unswitched.select", SI->getIterator());

2728 Phi->addIncoming(SI->getTrueValue(), ThenBB);

2729 Phi->addIncoming(SI->getFalseValue(), HeadBB);

2730 Phi->setDebugLoc(SI->getDebugLoc());

2731 SI->replaceAllUsesWith(Phi);

2732 SI->eraseFromParent();

2733

2734if (MSSAU &&VerifyMemorySSA)

2735 MSSAU->getMemorySSA()->verifyMemorySSA();

2736

2737 ++NumSelects;

2738return CondBr;

2739}

2740

2741/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,

2742/// making the following replacement:

2743///

2744/// --code before guard--

2745/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]

2746/// --code after guard--

2747///

2748/// into

2749///

2750/// --code before guard--

2751/// br i1 %cond, label %guarded, label %deopt

2752///

2753/// guarded:

2754/// --code after guard--

2755///

2756/// deopt:

2757/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]

2758/// unreachable

2759///

2760/// It also makes all relevant DT and LI updates, so that all structures are in

2761/// valid state after this transform.

2762staticBranchInst *turnGuardIntoBranch(IntrinsicInst *GI,Loop &L,

2763DominatorTree &DT,LoopInfo &LI,

2764MemorySSAUpdater *MSSAU) {

2765SmallVector<DominatorTree::UpdateType, 4> DTUpdates;

2766LLVM_DEBUG(dbgs() <<"Turning " << *GI <<" into a branch.\n");

2767BasicBlock *CheckBB = GI->getParent();

2768

2769if (MSSAU &&VerifyMemorySSA)

2770 MSSAU->getMemorySSA()->verifyMemorySSA();

2771

2772DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

2773Instruction *DeoptBlockTerm =

2774SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI,true,

2775 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);

2776BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());

2777// SplitBlockAndInsertIfThen inserts control flow that branches to

2778// DeoptBlockTerm if the condition is true. We want the opposite.

2779 CheckBI->swapSuccessors();

2780

2781BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);

2782 GuardedBlock->setName("guarded");

2783 CheckBI->getSuccessor(1)->setName("deopt");

2784BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);

2785

2786if (MSSAU)

2787 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);

2788

2789 GI->moveBefore(DeoptBlockTerm->getIterator());

2790 GI->setArgOperand(0,ConstantInt::getFalse(GI->getContext()));

2791

2792if (MSSAU) {

2793MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));

2794 MSSAU->moveToPlace(MD, DeoptBlock,MemorySSA::BeforeTerminator);

2795if (VerifyMemorySSA)

2796 MSSAU->getMemorySSA()->verifyMemorySSA();

2797 }

2798

2799if (VerifyLoopInfo)

2800 LI.verify(DT);

2801 ++NumGuards;

2802return CheckBI;

2803}

2804

2805/// Cost multiplier is a way to limit potentially exponential behavior

2806/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch

2807/// candidates available. Also accounting for the number of "sibling" loops with

2808/// the idea to account for previous unswitches that already happened on this

2809/// cluster of loops. There was an attempt to keep this formula simple,

2810/// just enough to limit the worst case behavior. Even if it is not that simple

2811/// now it is still not an attempt to provide a detailed heuristic size

2812/// prediction.

2813///

2814/// TODO: Make a proper accounting of "explosion" effect for all kinds of

2815/// unswitch candidates, making adequate predictions instead of wild guesses.

2816/// That requires knowing not just the number of "remaining" candidates but

2817/// also costs of unswitching for each of these candidates.

2818staticintCalculateUnswitchCostMultiplier(

2819constInstruction &TI,constLoop &L,constLoopInfo &LI,

2820constDominatorTree &DT,

2821ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {

2822

2823// Guards and other exiting conditions do not contribute to exponential

2824// explosion as soon as they dominate the latch (otherwise there might be

2825// another path to the latch remaining that does not allow to eliminate the

2826// loop copy on unswitch).

2827constBasicBlock *Latch = L.getLoopLatch();

2828constBasicBlock *CondBlock = TI.getParent();

2829if (DT.dominates(CondBlock, Latch) &&

2830 (isGuard(&TI) ||

2831 (TI.isTerminator() &&

2832llvm::count_if(successors(&TI), [&L](constBasicBlock *SuccBB) {

2833return L.contains(SuccBB);

2834 }) <= 1))) {

2835 NumCostMultiplierSkipped++;

2836return 1;

2837 }

2838

2839auto *ParentL = L.getParentLoop();

2840int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()

2841 : std::distance(LI.begin(), LI.end()));

2842// Count amount of clones that all the candidates might cause during

2843// unswitching. Branch/guard/select counts as 1, switch counts as log2 of its

2844// cases.

2845int UnswitchedClones = 0;

2846for (constauto &Candidate : UnswitchCandidates) {

2847constInstruction *CI = Candidate.TI;

2848constBasicBlock *CondBlock = CI->getParent();

2849bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);

2850if (isa<SelectInst>(CI)) {

2851 UnswitchedClones++;

2852continue;

2853 }

2854if (isGuard(CI)) {

2855if (!SkipExitingSuccessors)

2856 UnswitchedClones++;

2857continue;

2858 }

2859int NonExitingSuccessors =

2860llvm::count_if(successors(CondBlock),

2861 [SkipExitingSuccessors, &L](constBasicBlock *SuccBB) {

2862return !SkipExitingSuccessors || L.contains(SuccBB);

2863 });

2864 UnswitchedClones +=Log2_32(NonExitingSuccessors);

2865 }

2866

2867// Ignore up to the "unscaled candidates" number of unswitch candidates

2868// when calculating the power-of-two scaling of the cost. The main idea

2869// with this control is to allow a small number of unswitches to happen

2870// and rely more on siblings multiplier (see below) when the number

2871// of candidates is small.

2872unsigned ClonesPower =

2873 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);

2874

2875// Allowing top-level loops to spread a bit more than nested ones.

2876int SiblingsMultiplier =

2877 std::max((ParentL ? SiblingsCount

2878 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),

2879 1);

2880// Compute the cost multiplier in a way that won't overflow by saturating

2881// at an upper bound.

2882int CostMultiplier;

2883if (ClonesPower >Log2_32(UnswitchThreshold) ||

2884 SiblingsMultiplier >UnswitchThreshold)

2885 CostMultiplier =UnswitchThreshold;

2886else

2887 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),

2888 (int)UnswitchThreshold);

2889

2890LLVM_DEBUG(dbgs() <<" Computed multiplier " << CostMultiplier

2891 <<" (siblings " << SiblingsMultiplier <<" * clones "

2892 << (1 << ClonesPower) <<")"

2893 <<" for unswitch candidate: " << TI <<"\n");

2894return CostMultiplier;

2895}

2896

2897staticboolcollectUnswitchCandidates(

2898SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,

2899IVConditionInfo &PartialIVInfo,Instruction *&PartialIVCondBranch,

2900constLoop &L,constLoopInfo &LI,AAResults &AA,

2901constMemorySSAUpdater *MSSAU) {

2902assert(UnswitchCandidates.empty() &&"Should be!");

2903

2904auto AddUnswitchCandidatesForInst = [&](Instruction *I,Value *Cond) {

2905Cond =skipTrivialSelect(Cond);

2906if (isa<Constant>(Cond))

2907return;

2908if (L.isLoopInvariant(Cond)) {

2909 UnswitchCandidates.push_back({I, {Cond}});

2910return;

2911 }

2912if (match(Cond,m_CombineOr(m_LogicalAnd(),m_LogicalOr()))) {

2913TinyPtrVector<Value *> Invariants =

2914collectHomogenousInstGraphLoopInvariants(

2915 L, *static_cast<Instruction *>(Cond), LI);

2916if (!Invariants.empty())

2917 UnswitchCandidates.push_back({I, std::move(Invariants)});

2918 }

2919 };

2920

2921// Whether or not we should also collect guards in the loop.

2922bool CollectGuards =false;

2923if (UnswitchGuards) {

2924auto *GuardDecl =Intrinsic::getDeclarationIfExists(

2925 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);

2926if (GuardDecl && !GuardDecl->use_empty())

2927 CollectGuards =true;

2928 }

2929

2930for (auto *BB : L.blocks()) {

2931if (LI.getLoopFor(BB) != &L)

2932continue;

2933

2934for (auto &I : *BB) {

2935if (auto *SI = dyn_cast<SelectInst>(&I)) {

2936auto *Cond = SI->getCondition();

2937// Do not unswitch vector selects and logical and/or selects

2938if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))

2939 AddUnswitchCandidatesForInst(SI,Cond);

2940 }elseif (CollectGuards &&isGuard(&I)) {

2941auto *Cond =

2942skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));

2943// TODO: Support AND, OR conditions and partial unswitching.

2944if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))

2945 UnswitchCandidates.push_back({&I, {Cond}});

2946 }

2947 }

2948

2949if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {

2950// We can only consider fully loop-invariant switch conditions as we need

2951// to completely eliminate the switch after unswitching.

2952if (!isa<Constant>(SI->getCondition()) &&

2953 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())

2954 UnswitchCandidates.push_back({SI, {SI->getCondition()}});

2955continue;

2956 }

2957

2958auto *BI = dyn_cast<BranchInst>(BB->getTerminator());

2959if (!BI || !BI->isConditional() ||

2960 BI->getSuccessor(0) == BI->getSuccessor(1))

2961continue;

2962

2963 AddUnswitchCandidatesForInst(BI, BI->getCondition());

2964 }

2965

2966if (MSSAU && !findOptionMDForLoop(&L,"llvm.loop.unswitch.partial.disable") &&

2967 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {

2968return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();

2969 })) {

2970MemorySSA *MSSA = MSSAU->getMemorySSA();

2971if (autoInfo =hasPartialIVCondition(L,MSSAThreshold, *MSSA, AA)) {

2972LLVM_DEBUG(

2973dbgs() <<"simple-loop-unswitch: Found partially invariant condition "

2974 << *Info->InstToDuplicate[0] <<"\n");

2975 PartialIVInfo = *Info;

2976 PartialIVCondBranch = L.getHeader()->getTerminator();

2977TinyPtrVector<Value *> ValsToDuplicate;

2978llvm::append_range(ValsToDuplicate,Info->InstToDuplicate);

2979 UnswitchCandidates.push_back(

2980 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});

2981 }

2982 }

2983return !UnswitchCandidates.empty();

2984}

2985

2986/// Tries to canonicalize condition described by:

2987///

2988/// br (LHS pred RHS), label IfTrue, label IfFalse

2989///

2990/// into its equivalent where `Pred` is something that we support for injected

2991/// invariants (so far it is limited to ult), LHS in canonicalized form is

2992/// non-invariant and RHS is an invariant.

2993staticvoidcanonicalizeForInvariantConditionInjection(CmpPredicate &Pred,

2994Value *&LHS,Value *&RHS,

2995BasicBlock *&IfTrue,

2996BasicBlock *&IfFalse,

2997constLoop &L) {

2998if (!L.contains(IfTrue)) {

2999 Pred = ICmpInst::getInversePredicate(Pred);

3000std::swap(IfTrue, IfFalse);

3001 }

3002

3003// Move loop-invariant argument to RHS position.

3004if (L.isLoopInvariant(LHS)) {

3005 Pred = ICmpInst::getSwappedPredicate(Pred);

3006std::swap(LHS,RHS);

3007 }

3008

3009if (Pred == ICmpInst::ICMP_SGE &&match(RHS,m_Zero())) {

3010// Turn "x >=s 0" into "x <u UMIN_INT"

3011 Pred = ICmpInst::ICMP_ULT;

3012RHS = ConstantInt::get(

3013RHS->getContext(),

3014APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));

3015 }

3016}

3017

3018/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )

3019/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by

3020/// injecting a loop-invariant condition.

3021staticboolshouldTryInjectInvariantCondition(

3022constICmpInst::Predicate Pred,constValue *LHS,constValue *RHS,

3023constBasicBlock *IfTrue,constBasicBlock *IfFalse,constLoop &L) {

3024if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))

3025returnfalse;

3026// TODO: Support other predicates.

3027if (Pred != ICmpInst::ICMP_ULT)

3028returnfalse;

3029// TODO: Support non-loop-exiting branches?

3030if (!L.contains(IfTrue) || L.contains(IfFalse))

3031returnfalse;

3032// FIXME: For some reason this causes problems with MSSA updates, need to

3033// investigate why. So far, just don't unswitch latch.

3034if (L.getHeader() == IfTrue)

3035returnfalse;

3036returntrue;

3037}

3038

3039/// Returns true, if metadata on \p BI allows us to optimize branching into \p

3040/// TakenSucc via injection of invariant conditions. The branch should be not

3041/// enough and not previously unswitched, the information about this comes from

3042/// the metadata.

3043boolshouldTryInjectBasingOnMetadata(constBranchInst *BI,

3044constBasicBlock *TakenSucc) {

3045SmallVector<uint32_t> Weights;

3046if (!extractBranchWeights(*BI, Weights))

3047returnfalse;

3048unsignedT =InjectInvariantConditionHotnesThreshold;

3049BranchProbability LikelyTaken(T - 1,T);

3050

3051assert(Weights.size() == 2 &&"Unexpected profile data!");

3052size_tIdx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;

3053auto Num = Weights[Idx];

3054auto Denom = Weights[0] + Weights[1];

3055// Degenerate or overflowed metadata.

3056if (Denom == 0 || Num > Denom)

3057returnfalse;

3058BranchProbability ActualTaken(Num, Denom);

3059if (LikelyTaken > ActualTaken)

3060returnfalse;

3061returntrue;

3062}

3063

3064/// Materialize pending invariant condition of the given candidate into IR. The

3065/// injected loop-invariant condition implies the original loop-variant branch

3066/// condition, so the materialization turns

3067///

3068/// loop_block:

3069/// ...

3070/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc

3071///

3072/// into

3073///

3074/// preheader:

3075/// %invariant_cond = LHS pred RHS

3076/// ...

3077/// loop_block:

3078/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck

3079/// OriginalCheck:

3080/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc

3081/// ...

3082static NonTrivialUnswitchCandidate

3083injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate,Loop &L,

3084DominatorTree &DT,LoopInfo &LI,

3085AssumptionCache &AC,MemorySSAUpdater *MSSAU) {

3086assert(Candidate.hasPendingInjection() &&"Nothing to inject!");

3087BasicBlock *Preheader = L.getLoopPreheader();

3088assert(Preheader &&"Loop is not in simplified form?");

3089assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&

3090"Unswitching branch of inner loop!");

3091

3092auto Pred = Candidate.PendingInjection->Pred;

3093auto *LHS = Candidate.PendingInjection->LHS;

3094auto *RHS = Candidate.PendingInjection->RHS;

3095auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;

3096auto *TI = cast<BranchInst>(Candidate.TI);

3097auto *BB = Candidate.TI->getParent();

3098auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)

3099 : TI->getSuccessor(0);

3100// FIXME: Remove this once limitation on successors is lifted.

3101assert(L.contains(InLoopSucc) &&"Not supported yet!");

3102assert(!L.contains(OutOfLoopSucc) &&"Not supported yet!");

3103auto &Ctx = BB->getContext();

3104

3105IRBuilder<> Builder(Preheader->getTerminator());

3106assert(ICmpInst::isUnsigned(Pred) &&"Not supported yet!");

3107if (LHS->getType() !=RHS->getType()) {

3108if (LHS->getType()->getIntegerBitWidth() <

3109RHS->getType()->getIntegerBitWidth())

3110LHS = Builder.CreateZExt(LHS,RHS->getType(),LHS->getName() +".wide");

3111else

3112RHS = Builder.CreateZExt(RHS,LHS->getType(),RHS->getName() +".wide");

3113 }

3114// Do not use builder here: CreateICmp may simplify this into a constant and

3115// unswitching will break. Better optimize it away later.

3116auto *InjectedCond =

3117 ICmpInst::Create(Instruction::ICmp, Pred,LHS,RHS,"injected.cond",

3118 Preheader->getTerminator()->getIterator());

3119

3120BasicBlock *CheckBlock =BasicBlock::Create(Ctx, BB->getName() +".check",

3121 BB->getParent(), InLoopSucc);

3122 Builder.SetInsertPoint(TI);

3123auto *InvariantBr =

3124 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);

3125

3126 Builder.SetInsertPoint(CheckBlock);

3127 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),

3128 TI->getSuccessor(1));

3129 TI->eraseFromParent();

3130

3131// Fixup phis.

3132for (auto &I : *InLoopSucc) {

3133auto *PN = dyn_cast<PHINode>(&I);

3134if (!PN)

3135break;

3136auto *Inc = PN->getIncomingValueForBlock(BB);

3137 PN->addIncoming(Inc, CheckBlock);

3138 }

3139 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);

3140

3141SmallVector<DominatorTree::UpdateType, 4> DTUpdates = {

3142 { DominatorTree::Insert, BB, CheckBlock },

3143 { DominatorTree::Insert, CheckBlock, InLoopSucc },

3144 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },

3145 { DominatorTree::Delete, BB, OutOfLoopSucc }

3146 };

3147

3148 DT.applyUpdates(DTUpdates);

3149if (MSSAU)

3150 MSSAU->applyUpdates(DTUpdates, DT);

3151 L.addBasicBlockToLoop(CheckBlock, LI);

3152

3153#ifndef NDEBUG

3154 DT.verify();

3155 LI.verify(DT);

3156if (MSSAU &&VerifyMemorySSA)

3157 MSSAU->getMemorySSA()->verifyMemorySSA();

3158#endif

3159

3160// TODO: In fact, cost of unswitching a new invariant candidate is *slightly*

3161// higher because we have just inserted a new block. Need to think how to

3162// adjust the cost of injected candidates when it was first computed.

3163LLVM_DEBUG(dbgs() <<"Injected a new loop-invariant branch " << *InvariantBr

3164 <<" and considering it for unswitching.");

3165 ++NumInvariantConditionsInjected;

3166return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },

3167 Candidate.Cost);

3168}

3169

3170/// Given chain of loop branch conditions looking like:

3171/// br (Variant < Invariant1)

3172/// br (Variant < Invariant2)

3173/// br (Variant < Invariant3)

3174/// ...

3175/// collect set of invariant conditions on which we want to unswitch, which

3176/// look like:

3177/// Invariant1 <= Invariant2

3178/// Invariant2 <= Invariant3

3179/// ...

3180/// Though they might not immediately exist in the IR, we can still inject them.

3181staticboolinsertCandidatesWithPendingInjections(

3182SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,Loop &L,

3183ICmpInst::Predicate Pred,ArrayRef<CompareDesc> Compares,

3184constDominatorTree &DT) {

3185

3186assert(ICmpInst::isRelational(Pred));

3187assert(ICmpInst::isStrictPredicate(Pred));

3188if (Compares.size() < 2)

3189returnfalse;

3190ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);

3191for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;

3192 Next != Compares.end(); ++Prev, ++Next) {

3193Value *LHS = Next->Invariant;

3194Value *RHS = Prev->Invariant;

3195BasicBlock *InLoopSucc = Prev->InLoopSucc;

3196 InjectedInvariant ToInject(NonStrictPred,LHS,RHS, InLoopSucc);

3197 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },

3198 std::nullopt, std::move(ToInject));

3199 UnswitchCandidates.push_back(std::move(Candidate));

3200 }

3201returntrue;

3202}

3203

3204/// Collect unswitch candidates by invariant conditions that are not immediately

3205/// present in the loop. However, they can be injected into the code if we

3206/// decide it's profitable.

3207/// An example of such conditions is following:

3208///

3209/// for (...) {

3210/// x = load ...

3211/// if (! x <u C1) break;

3212/// if (! x <u C2) break;

3213/// <do something>

3214/// }

3215///

3216/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=

3217/// C2" automatically implies "x <u C2", so we can get rid of one of

3218/// loop-variant checks in unswitched loop version.

3219staticboolcollectUnswitchCandidatesWithInjections(

3220SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,

3221IVConditionInfo &PartialIVInfo,Instruction *&PartialIVCondBranch,Loop &L,

3222constDominatorTree &DT,constLoopInfo &LI,AAResults &AA,

3223constMemorySSAUpdater *MSSAU) {

3224if (!InjectInvariantConditions)

3225returnfalse;

3226

3227if (!DT.isReachableFromEntry(L.getHeader()))

3228returnfalse;

3229auto *Latch = L.getLoopLatch();

3230// Need to have a single latch and a preheader.

3231if (!Latch)

3232returnfalse;

3233assert(L.getLoopPreheader() &&"Must have a preheader!");

3234

3235DenseMap<Value *, SmallVector<CompareDesc, 4> > CandidatesULT;

3236// Traverse the conditions that dominate latch (and therefore dominate each

3237// other).

3238for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());

3239 DTN = DTN->getIDom()) {

3240CmpPredicate Pred;

3241Value *LHS =nullptr, *RHS =nullptr;

3242BasicBlock *IfTrue =nullptr, *IfFalse =nullptr;

3243auto *BB = DTN->getBlock();

3244// Ignore inner loops.

3245if (LI.getLoopFor(BB) != &L)

3246continue;

3247auto *Term = BB->getTerminator();

3248if (!match(Term,m_Br(m_ICmp(Pred,m_Value(LHS),m_Value(RHS)),

3249m_BasicBlock(IfTrue),m_BasicBlock(IfFalse))))

3250continue;

3251if (!LHS->getType()->isIntegerTy())

3252continue;

3253canonicalizeForInvariantConditionInjection(Pred,LHS,RHS, IfTrue, IfFalse,

3254 L);

3255if (!shouldTryInjectInvariantCondition(Pred,LHS,RHS, IfTrue, IfFalse, L))

3256continue;

3257if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))

3258continue;

3259// Strip ZEXT for unsigned predicate.

3260// TODO: once signed predicates are supported, also strip SEXT.

3261 CompareDescDesc(cast<BranchInst>(Term),RHS, IfTrue);

3262while (auto *Zext = dyn_cast<ZExtInst>(LHS))

3263LHS = Zext->getOperand(0);

3264 CandidatesULT[LHS].push_back(Desc);

3265 }

3266

3267bool Found =false;

3268for (auto &It : CandidatesULT)

3269 Found |=insertCandidatesWithPendingInjections(

3270 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);

3271return Found;

3272}

3273

3274staticboolisSafeForNoNTrivialUnswitching(Loop &L,LoopInfo &LI) {

3275if (!L.isSafeToClone())

3276returnfalse;

3277for (auto *BB : L.blocks())

3278for (auto &I : *BB) {

3279if (I.getType()->isTokenTy() &&I.isUsedOutsideOfBlock(BB))

3280returnfalse;

3281if (auto *CB = dyn_cast<CallBase>(&I)) {

3282assert(!CB->cannotDuplicate() &&"Checked by L.isSafeToClone().");

3283if (CB->isConvergent())

3284returnfalse;

3285 }

3286 }

3287

3288// Check if there are irreducible CFG cycles in this loop. If so, we cannot

3289// easily unswitch non-trivial edges out of the loop. Doing so might turn the

3290// irreducible control flow into reducible control flow and introduce new

3291// loops "out of thin air". If we ever discover important use cases for doing

3292// this, we can add support to loop unswitch, but it is a lot of complexity

3293// for what seems little or no real world benefit.

3294LoopBlocksRPO RPOT(&L);

3295 RPOT.perform(&LI);

3296if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))

3297returnfalse;

3298

3299SmallVector<BasicBlock *, 4> ExitBlocks;

3300 L.getUniqueExitBlocks(ExitBlocks);

3301// We cannot unswitch if exit blocks contain a cleanuppad/catchswitch

3302// instruction as we don't know how to split those exit blocks.

3303// FIXME: We should teach SplitBlock to handle this and remove this

3304// restriction.

3305for (auto *ExitBB : ExitBlocks) {

3306auto It = ExitBB->getFirstNonPHIIt();

3307if (isa<CleanupPadInst>(It) || isa<CatchSwitchInst>(It)) {

3308LLVM_DEBUG(dbgs() <<"Cannot unswitch because of cleanuppad/catchswitch "

3309"in exit block\n");

3310returnfalse;

3311 }

3312 }

3313

3314returntrue;

3315}

3316

3317static NonTrivialUnswitchCandidatefindBestNonTrivialUnswitchCandidate(

3318ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates,constLoop &L,

3319constDominatorTree &DT,constLoopInfo &LI,AssumptionCache &AC,

3320constTargetTransformInfo &TTI,constIVConditionInfo &PartialIVInfo) {

3321// Given that unswitching these terminators will require duplicating parts of

3322// the loop, so we need to be able to model that cost. Compute the ephemeral

3323// values and set up a data structure to hold per-BB costs. We cache each

3324// block's cost so that we don't recompute this when considering different

3325// subsets of the loop for duplication during unswitching.

3326SmallPtrSet<const Value *, 4> EphValues;

3327CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);

3328SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;

3329

3330// Compute the cost of each block, as well as the total loop cost. Also, bail

3331// out if we see instructions which are incompatible with loop unswitching

3332// (convergent, noduplicate, or cross-basic-block tokens).

3333// FIXME: We might be able to safely handle some of these in non-duplicated

3334// regions.

3335TargetTransformInfo::TargetCostKind CostKind =

3336 L.getHeader()->getParent()->hasMinSize()

3337 ?TargetTransformInfo::TCK_CodeSize

3338 :TargetTransformInfo::TCK_SizeAndLatency;

3339InstructionCost LoopCost = 0;

3340for (auto *BB : L.blocks()) {

3341InstructionCost Cost = 0;

3342for (auto &I : *BB) {

3343if (EphValues.count(&I))

3344continue;

3345Cost +=TTI.getInstructionCost(&I,CostKind);

3346 }

3347assert(Cost >= 0 &&"Must not have negative costs!");

3348 LoopCost +=Cost;

3349assert(LoopCost >= 0 &&"Must not have negative loop costs!");

3350 BBCostMap[BB] =Cost;

3351 }

3352LLVM_DEBUG(dbgs() <<" Total loop cost: " << LoopCost <<"\n");

3353

3354// Now we find the best candidate by searching for the one with the following

3355// properties in order:

3356//

3357// 1) An unswitching cost below the threshold

3358// 2) The smallest number of duplicated unswitch candidates (to avoid

3359// creating redundant subsequent unswitching)

3360// 3) The smallest cost after unswitching.

3361//

3362// We prioritize reducing fanout of unswitch candidates provided the cost

3363// remains below the threshold because this has a multiplicative effect.

3364//

3365// This requires memoizing each dominator subtree to avoid redundant work.

3366//

3367// FIXME: Need to actually do the number of candidates part above.

3368SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;

3369// Given a terminator which might be unswitched, computes the non-duplicated

3370// cost for that terminator.

3371auto ComputeUnswitchedCost = [&](Instruction &TI,

3372bool FullUnswitch) ->InstructionCost {

3373// Unswitching selects unswitches the entire loop.

3374if (isa<SelectInst>(TI))

3375return LoopCost;

3376

3377BasicBlock &BB = *TI.getParent();

3378SmallPtrSet<BasicBlock *, 4> Visited;

3379

3380InstructionCost Cost = 0;

3381for (BasicBlock *SuccBB :successors(&BB)) {

3382// Don't count successors more than once.

3383if (!Visited.insert(SuccBB).second)

3384continue;

3385

3386// If this is a partial unswitch candidate, then it must be a conditional

3387// branch with a condition of either `or`, `and`, their corresponding

3388// select forms or partially invariant instructions. In that case, one of

3389// the successors is necessarily duplicated, so don't even try to remove

3390// its cost.

3391if (!FullUnswitch) {

3392auto &BI = cast<BranchInst>(TI);

3393Value *Cond =skipTrivialSelect(BI.getCondition());

3394if (match(Cond,m_LogicalAnd())) {

3395if (SuccBB == BI.getSuccessor(1))

3396continue;

3397 }elseif (match(Cond,m_LogicalOr())) {

3398if (SuccBB == BI.getSuccessor(0))

3399continue;

3400 }elseif ((PartialIVInfo.KnownValue->isOneValue() &&

3401 SuccBB == BI.getSuccessor(0)) ||

3402 (!PartialIVInfo.KnownValue->isOneValue() &&

3403 SuccBB == BI.getSuccessor(1)))

3404continue;

3405 }

3406

3407// This successor's domtree will not need to be duplicated after

3408// unswitching if the edge to the successor dominates it (and thus the

3409// entire tree). This essentially means there is no other path into this

3410// subtree and so it will end up live in only one clone of the loop.

3411if (SuccBB->getUniquePredecessor() ||

3412llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {

3413return PredBB == &BB || DT.dominates(SuccBB, PredBB);

3414 })) {

3415Cost +=computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);

3416assert(Cost <= LoopCost &&

3417"Non-duplicated cost should never exceed total loop cost!");

3418 }

3419 }

3420

3421// Now scale the cost by the number of unique successors minus one. We

3422// subtract one because there is already at least one copy of the entire

3423// loop. This is computing the new cost of unswitching a condition.

3424// Note that guards always have 2 unique successors that are implicit and

3425// will be materialized if we decide to unswitch it.

3426int SuccessorsCount =isGuard(&TI) ? 2 : Visited.size();

3427assert(SuccessorsCount > 1 &&

3428"Cannot unswitch a condition without multiple distinct successors!");

3429return (LoopCost -Cost) * (SuccessorsCount - 1);

3430 };

3431

3432 std::optional<NonTrivialUnswitchCandidate> Best;

3433for (auto &Candidate : UnswitchCandidates) {

3434Instruction &TI = *Candidate.TI;

3435ArrayRef<Value *> Invariants = Candidate.Invariants;

3436BranchInst *BI = dyn_cast<BranchInst>(&TI);

3437bool FullUnswitch =

3438 !BI || Candidate.hasPendingInjection() ||

3439 (Invariants.size() == 1 &&

3440 Invariants[0] ==skipTrivialSelect(BI->getCondition()));

3441InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);

3442// Calculate cost multiplier which is a tool to limit potentially

3443// exponential behavior of loop-unswitch.

3444if (EnableUnswitchCostMultiplier) {

3445int CostMultiplier =

3446CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);

3447assert(

3448 (CostMultiplier > 0 && CostMultiplier <=UnswitchThreshold) &&

3449"cost multiplier needs to be in the range of 1..UnswitchThreshold");

3450 CandidateCost *= CostMultiplier;

3451LLVM_DEBUG(dbgs() <<" Computed cost of " << CandidateCost

3452 <<" (multiplier: " << CostMultiplier <<")"

3453 <<" for unswitch candidate: " << TI <<"\n");

3454 }else {

3455LLVM_DEBUG(dbgs() <<" Computed cost of " << CandidateCost

3456 <<" for unswitch candidate: " << TI <<"\n");

3457 }

3458

3459if (!Best || CandidateCost < Best->Cost) {

3460 Best = Candidate;

3461 Best->Cost = CandidateCost;

3462 }

3463 }

3464assert(Best &&"Must be!");

3465return *Best;

3466}

3467

3468// Insert a freeze on an unswitched branch if all is true:

3469// 1. freeze-loop-unswitch-cond option is true

3470// 2. The branch may not execute in the loop pre-transformation. If a branch may

3471// not execute and could cause UB, it would always cause UB if it is hoisted outside

3472// of the loop. Insert a freeze to prevent this case.

3473// 3. The branch condition may be poison or undef

3474staticboolshouldInsertFreeze(Loop &L,Instruction &TI,DominatorTree &DT,

3475AssumptionCache &AC) {

3476assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));

3477if (!FreezeLoopUnswitchCond)

3478returnfalse;

3479

3480ICFLoopSafetyInfo SafetyInfo;

3481 SafetyInfo.computeLoopSafetyInfo(&L);

3482if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))

3483returnfalse;

3484

3485Value *Cond;

3486if (BranchInst *BI = dyn_cast<BranchInst>(&TI))

3487Cond =skipTrivialSelect(BI->getCondition());

3488else

3489Cond =skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());

3490return !isGuaranteedNotToBeUndefOrPoison(

3491Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);

3492}

3493

3494staticboolunswitchBestCondition(Loop &L,DominatorTree &DT,LoopInfo &LI,

3495AssumptionCache &AC,AAResults &AA,

3496TargetTransformInfo &TTI,ScalarEvolution *SE,

3497MemorySSAUpdater *MSSAU,

3498LPMUpdater &LoopUpdater) {

3499// Collect all invariant conditions within this loop (as opposed to an inner

3500// loop which would be handled when visiting that inner loop).

3501SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;

3502IVConditionInfo PartialIVInfo;

3503Instruction *PartialIVCondBranch =nullptr;

3504collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,

3505 PartialIVCondBranch, L, LI, AA, MSSAU);

3506if (!findOptionMDForLoop(&L,"llvm.loop.unswitch.injection.disable"))

3507collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,

3508 PartialIVCondBranch, L, DT, LI, AA,

3509 MSSAU);

3510// If we didn't find any candidates, we're done.

3511if (UnswitchCandidates.empty())

3512returnfalse;

3513

3514LLVM_DEBUG(

3515dbgs() <<"Considering " << UnswitchCandidates.size()

3516 <<" non-trivial loop invariant conditions for unswitching.\n");

3517

3518 NonTrivialUnswitchCandidate Best =findBestNonTrivialUnswitchCandidate(

3519 UnswitchCandidates, L, DT, LI, AC,TTI, PartialIVInfo);

3520

3521assert(Best.TI &&"Failed to find loop unswitch candidate");

3522assert(Best.Cost &&"Failed to compute cost");

3523

3524if (*Best.Cost >=UnswitchThreshold) {

3525LLVM_DEBUG(dbgs() <<"Cannot unswitch, lowest cost found: " << *Best.Cost

3526 <<"\n");

3527returnfalse;

3528 }

3529

3530bool InjectedCondition =false;

3531if (Best.hasPendingInjection()) {

3532 Best =injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);

3533 InjectedCondition =true;

3534 }

3535assert(!Best.hasPendingInjection() &&

3536"All injections should have been done by now!");

3537

3538if (Best.TI != PartialIVCondBranch)

3539 PartialIVInfo.InstToDuplicate.clear();

3540

3541bool InsertFreeze;

3542if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {

3543// If the best candidate is a select, turn it into a branch. Select

3544// instructions with a poison conditional do not propagate poison, but

3545// branching on poison causes UB. Insert a freeze on the select

3546// conditional to prevent UB after turning the select into a branch.

3547 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(

3548 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);

3549 Best.TI =turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);

3550 }else {

3551// If the best candidate is a guard, turn it into a branch.

3552if (isGuard(Best.TI))

3553 Best.TI =

3554turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);

3555 InsertFreeze =shouldInsertFreeze(L, *Best.TI, DT, AC);

3556 }

3557

3558LLVM_DEBUG(dbgs() <<" Unswitching non-trivial (cost = " << Best.Cost

3559 <<") terminator: " << *Best.TI <<"\n");

3560unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,

3561 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,

3562 InjectedCondition);

3563returntrue;

3564}

3565

3566/// Unswitch control flow predicated on loop invariant conditions.

3567///

3568/// This first hoists all branches or switches which are trivial (IE, do not

3569/// require duplicating any part of the loop) out of the loop body. It then

3570/// looks at other loop invariant control flows and tries to unswitch those as

3571/// well by cloning the loop if the result is small enough.

3572///

3573/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are

3574/// also updated based on the unswitch. The `MSSA` analysis is also updated if

3575/// valid (i.e. its use is enabled).

3576///

3577/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is

3578/// true, we will attempt to do non-trivial unswitching as well as trivial

3579/// unswitching.

3580///

3581/// The `postUnswitch` function will be run after unswitching is complete

3582/// with information on whether or not the provided loop remains a loop and

3583/// a list of new sibling loops created.

3584///

3585/// If `SE` is non-null, we will update that analysis based on the unswitching

3586/// done.

3587staticboolunswitchLoop(Loop &L,DominatorTree &DT,LoopInfo &LI,

3588AssumptionCache &AC,AAResults &AA,

3589TargetTransformInfo &TTI,bool Trivial,

3590bool NonTrivial,ScalarEvolution *SE,

3591MemorySSAUpdater *MSSAU,ProfileSummaryInfo *PSI,

3592BlockFrequencyInfo *BFI,LPMUpdater &LoopUpdater) {

3593assert(L.isRecursivelyLCSSAForm(DT, LI) &&

3594"Loops must be in LCSSA form before unswitching.");

3595

3596// Must be in loop simplified form: we need a preheader and dedicated exits.

3597if (!L.isLoopSimplifyForm())

3598returnfalse;

3599

3600// Try trivial unswitch first before loop over other basic blocks in the loop.

3601if (Trivial &&unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {

3602// If we unswitched successfully we will want to clean up the loop before

3603// processing it further so just mark it as unswitched and return.

3604postUnswitch(L, LoopUpdater, L.getName(),

3605/*CurrentLoopValid*/true,/*PartiallyInvariant*/false,

3606/*InjectedCondition*/false, {});

3607returntrue;

3608 }

3609

3610constFunction *F = L.getHeader()->getParent();

3611

3612// Check whether we should continue with non-trivial conditions.

3613// EnableNonTrivialUnswitch: Global variable that forces non-trivial

3614// unswitching for testing and debugging.

3615// NonTrivial: Parameter that enables non-trivial unswitching for this

3616// invocation of the transform. But this should be allowed only

3617// for targets without branch divergence.

3618//

3619// FIXME: If divergence analysis becomes available to a loop

3620// transform, we should allow unswitching for non-trivial uniform

3621// branches even on targets that have divergence.

3622// https://bugs.llvm.org/show_bug.cgi?id=48819

3623bool ContinueWithNonTrivial =

3624EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));

3625if (!ContinueWithNonTrivial)

3626returnfalse;

3627

3628// Skip non-trivial unswitching for optsize functions.

3629if (F->hasOptSize())

3630returnfalse;

3631

3632// Returns true if Loop L's loop nest is cold, i.e. if the headers of L,

3633// of the loops L is nested in, and of the loops nested in L are all cold.

3634auto IsLoopNestCold = [&](constLoop *L) {

3635// Check L and all of its parent loops.

3636auto *Parent = L;

3637while (Parent) {

3638if (!PSI->isColdBlock(Parent->getHeader(), BFI))

3639returnfalse;

3640 Parent = Parent->getParentLoop();

3641 }

3642// Next check all loops nested within L.

3643SmallVector<const Loop *, 4> Worklist;

3644 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),

3645 L->getSubLoops().end());

3646while (!Worklist.empty()) {

3647auto *CurLoop = Worklist.pop_back_val();

3648if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))

3649returnfalse;

3650 Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),

3651 CurLoop->getSubLoops().end());

3652 }

3653returntrue;

3654 };

3655

3656// Skip cold loops in cold loop nests, as unswitching them brings little

3657// benefit but increases the code size

3658if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {

3659LLVM_DEBUG(dbgs() <<" Skip cold loop: " << L <<"\n");

3660returnfalse;

3661 }

3662

3663// Perform legality checks.

3664if (!isSafeForNoNTrivialUnswitching(L, LI))

3665returnfalse;

3666

3667// For non-trivial unswitching, because it often creates new loops, we rely on

3668// the pass manager to iterate on the loops rather than trying to immediately

3669// reach a fixed point. There is no substantial advantage to iterating

3670// internally, and if any of the new loops are simplified enough to contain

3671// trivial unswitching we want to prefer those.

3672

3673// Try to unswitch the best invariant condition. We prefer this full unswitch to

3674// a partial unswitch when possible below the threshold.

3675if (unswitchBestCondition(L, DT, LI, AC, AA,TTI, SE, MSSAU, LoopUpdater))

3676returntrue;

3677

3678// No other opportunities to unswitch.

3679returnfalse;

3680}

3681

3682PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L,LoopAnalysisManager &AM,

3683LoopStandardAnalysisResults &AR,

3684LPMUpdater &U) {

3685Function &F = *L.getHeader()->getParent();

3686 (void)F;

3687ProfileSummaryInfo *PSI =nullptr;

3688if (auto OuterProxy =

3689 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)

3690 .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))

3691 PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());

3692LLVM_DEBUG(dbgs() <<"Unswitching loop in " <<F.getName() <<": " << L

3693 <<"\n");

3694

3695 std::optional<MemorySSAUpdater> MSSAU;

3696if (AR.MSSA) {

3697 MSSAU =MemorySSAUpdater(AR.MSSA);

3698if (VerifyMemorySSA)

3699 AR.MSSA->verifyMemorySSA();

3700 }

3701if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,

3702 &AR.SE, MSSAU ? &*MSSAU :nullptr, PSI, AR.BFI, U))

3703returnPreservedAnalyses::all();

3704

3705if (AR.MSSA &&VerifyMemorySSA)

3706 AR.MSSA->verifyMemorySSA();

3707

3708// Historically this pass has had issues with the dominator tree so verify it

3709// in asserts builds.

3710assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));

3711

3712auto PA =getLoopPassPreservedAnalyses();

3713if (AR.MSSA)

3714 PA.preserve<MemorySSAAnalysis>();

3715return PA;

3716}

3717

3718voidSimpleLoopUnswitchPass::printPipeline(

3719raw_ostream &OS,function_ref<StringRef(StringRef)> MapClassName2PassName) {

3720static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(

3721OS, MapClassName2PassName);

3722

3723OS <<'<';

3724OS << (NonTrivial ?"" :"no-") <<"nontrivial;";

3725OS << (Trivial ?"" :"no-") <<"trivial";

3726OS <<'>';

3727}

Analysis containing CSE Info

Definition:CSEInfo.cpp:27

This file contains the declarations for the subclasses of Constant, which represent the different fla...

CostKind

static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

Debug.h

LLVM_DEBUG

#define LLVM_DEBUG(...)

Definition:Debug.h:106

DenseMap.h

This file defines the DenseMap class.

DomTreeUpdater.h

Dominators.h

Blocks

DenseMap< Block *, BlockRelaxAux > Blocks

Definition:ELF_riscv.cpp:507

GenericDomTree.h

This file defines a set of templates that efficiently compute a dominator tree over a generic graph.

Module.h This file contains the declarations for the Module class.

Use.h

This defines the Use class.

Value.h

InstrTypes.h

InstructionCost.h

This file defines an InstructionCost class that is used when calculating the cost of an instruction,...

Instructions.h

LoopAnalysisManager.h

This header provides classes for managing per-loop analyses.

LoopInfo.h

LoopIterator.h

LoopPassManager.h

This header provides classes for managing a pipeline of passes over loops in LLVM IR.

LoopUtils.h

#define F(x, y, z)

Definition:MD5.cpp:55

#define I(x, y, z)

Definition:MD5.cpp:58

MemorySSAUpdater.h

MemorySSA.h

This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

MustExecute.h

Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...

uint64_t IntrinsicInst * II

Definition:NVVMIntrRange.cpp:51

PatternMatch.h

ProfDataUtils.h

This file contains the declarations for profiling metadata utility functions.

ProfileSummaryInfo.h

Cond

const SmallVectorImpl< MachineOperand > & Cond

Definition:RISCVRedundantCopyElimination.cpp:75

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h

This file contains some templates that are useful if you are working with the STL at all.

raw_pwrite_stream & OS

Definition:SampleProfWriter.cpp:51

ScalarEvolution.h

Sequence.h

Provides some synthesis utilities to produce sequences of values.

SetVector.h

This file implements a set that has insertion order iteration characteristics.

rewritePHINodesForUnswitchedExitBlock

static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)

Rewrite the PHI nodes in an unswitched loop exit basic block.

Definition:SimpleLoopUnswitch.cpp:341

unswitchLoop

static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)

Unswitch control flow predicated on loop invariant conditions.

Definition:SimpleLoopUnswitch.cpp:3587

unswitchAllTrivialConditions

static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)

This routine scans the loop to find a branch or switch which occurs before any side effects occur.

Definition:SimpleLoopUnswitch.cpp:1047

EnableNonTrivialUnswitch

static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))

UnswitchGuards

static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))

recomputeLoopBlockSet

static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)

Recompute the set of blocks in a loop after unswitching.

Definition:SimpleLoopUnswitch.cpp:1795

CalculateUnswitchCostMultiplier

static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)

Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.

Definition:SimpleLoopUnswitch.cpp:2818

buildPartialInvariantUnswitchConditionalBranch

static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)

Copy a set of loop invariant values, and conditionally branch on them.

Definition:SimpleLoopUnswitch.cpp:292

collectHomogenousInstGraphLoopInvariants

static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)

Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...

Definition:SimpleLoopUnswitch.cpp:193

deleteDeadClonedBlocks

static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)

Definition:SimpleLoopUnswitch.cpp:1672

visitDomSubTree

void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)

Helper to visit a dominator subtree, invoking a callable on each node.

Definition:SimpleLoopUnswitch.cpp:2115

turnSelectIntoBranch

static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)

Turns a select instruction into implicit control flow branch, making the following replacement:

Definition:SimpleLoopUnswitch.cpp:2711

isSafeForNoNTrivialUnswitching

static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)

Definition:SimpleLoopUnswitch.cpp:3274

postUnswitch

void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)

Definition:SimpleLoopUnswitch.cpp:2138

buildPartialUnswitchConditionalBranch

static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)

Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...

Definition:SimpleLoopUnswitch.cpp:272

UnswitchNumInitialUnscaledCandidates

static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))

shouldTryInjectInvariantCondition

static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)

Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...

Definition:SimpleLoopUnswitch.cpp:3021

findBestNonTrivialUnswitchCandidate

static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)

Definition:SimpleLoopUnswitch.cpp:3317

EnableUnswitchCostMultiplier

static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))

skipTrivialSelect

static Value * skipTrivialSelect(Value *Cond)

Definition:SimpleLoopUnswitch.cpp:178

getTopMostExitingLoop

static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)

Definition:SimpleLoopUnswitch.cpp:480

collectUnswitchCandidatesWithInjections

static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)

Collect unswitch candidates by invariant conditions that are not immediately present in the loop.

Definition:SimpleLoopUnswitch.cpp:3219

UnswitchThreshold

static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))

replaceLoopInvariantUses

static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)

Definition:SimpleLoopUnswitch.cpp:235

unswitchTrivialBranch

static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)

Unswitch a trivial branch if the condition is loop invariant.

Definition:SimpleLoopUnswitch.cpp:509

collectUnswitchCandidates

static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)

Definition:SimpleLoopUnswitch.cpp:2897

InjectInvariantConditions

static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))

FreezeLoopUnswitchCond

static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))

computeDomSubtreeCost

static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)

Recursively compute the cost of a dominator subtree based on the per-block cost map provided.

Definition:SimpleLoopUnswitch.cpp:2660

shouldInsertFreeze

static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)

Definition:SimpleLoopUnswitch.cpp:3474

UnswitchSiblingsToplevelDiv

static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))

MSSAThreshold

static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)

canonicalizeForInvariantConditionInjection

static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)

Tries to canonicalize condition described by:

Definition:SimpleLoopUnswitch.cpp:2993

areLoopExitPHIsLoopInvariant

static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)

Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...

Definition:SimpleLoopUnswitch.cpp:252

rewritePHINodesForExitAndUnswitchedBlocks

static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)

Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.

Definition:SimpleLoopUnswitch.cpp:363

insertCandidatesWithPendingInjections

static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)

Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...

Definition:SimpleLoopUnswitch.cpp:3181

injectPendingInvariantConditions

static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)

Materialize pending invariant condition of the given candidate into IR.

Definition:SimpleLoopUnswitch.cpp:3083

DropNonTrivialImplicitNullChecks

static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))

InjectInvariantConditionHotnesThreshold

static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))

unswitchTrivialSwitch

static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)

Unswitch a trivial switch if the condition is loop invariant.

Definition:SimpleLoopUnswitch.cpp:743

unswitchNontrivialInvariants

static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)

Definition:SimpleLoopUnswitch.cpp:2175

rebuildLoopAfterUnswitch

static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)

Rebuild a loop after unswitching removes some subset of blocks and edges.

Definition:SimpleLoopUnswitch.cpp:1906

unswitchBestCondition

static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)

Definition:SimpleLoopUnswitch.cpp:3494

buildClonedLoopBlocks

static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)

Build the cloned blocks for an unswitched copy of the given loop.

Definition:SimpleLoopUnswitch.cpp:1166

deleteDeadBlocksFromLoop

static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)

Definition:SimpleLoopUnswitch.cpp:1701

shouldTryInjectBasingOnMetadata

bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)

Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...

Definition:SimpleLoopUnswitch.cpp:3043

turnGuardIntoBranch

static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)

Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...

Definition:SimpleLoopUnswitch.cpp:2762

cloneLoopNest

static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)

Recursively clone the specified loop and all of its children.

Definition:SimpleLoopUnswitch.cpp:1363

hoistLoopToNewParent

static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)

Hoist the current loop up to the innermost loop containing a remaining exit.

Definition:SimpleLoopUnswitch.cpp:409

buildClonedLoops

static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)

Build the cloned loops of an original loop from unswitching.

Definition:SimpleLoopUnswitch.cpp:1422

SimpleLoopUnswitch.h

SmallPtrSet.h

This file defines the SmallPtrSet class.

SmallVector.h

This file defines the SmallVector class.

Statistic.h

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC

#define STATISTIC(VARNAME, DESC)

Definition:Statistic.h:166

TargetTransformInfo.h

This pass exposes codegen information to IR-level passes.

Value * RHS

Definition:X86PartialReduction.cpp:74

LHS

Value * LHS

Definition:X86PartialReduction.cpp:73

llvm::AAResults

Definition:AliasAnalysis.h:314

llvm::APInt::getSignedMinValue

static APInt getSignedMinValue(unsigned numBits)

Gets minimum signed value of APInt for a specific bit width.

Definition:APInt.h:219

llvm::AnalysisManager

A container for analyses that lazily runs them and caches their results.

Definition:PassManager.h:253

llvm::AnalysisManager::getResult

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Definition:PassManager.h:410

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition:ArrayRef.h:41

llvm::ArrayRef::end

iterator end() const

Definition:ArrayRef.h:157

llvm::ArrayRef::size

size_t size() const

size - Get the array size.

Definition:ArrayRef.h:168

llvm::ArrayRef::begin

iterator begin() const

Definition:ArrayRef.h:156

llvm::ArrayRef::empty

bool empty() const

empty - Check if the array is empty.

Definition:ArrayRef.h:163

llvm::AssumptionCache

A cache of @llvm.assume calls within a function.

Definition:AssumptionCache.h:42

llvm::AssumptionCache::registerAssumption

void registerAssumption(AssumeInst *CI)

Add an @llvm.assume intrinsic to this function's cache.

Definition:AssumptionCache.cpp:185

llvm::BasicBlock

LLVM Basic Block Representation.

Definition:BasicBlock.h:61

llvm::BasicBlock::end

iterator end()

Definition:BasicBlock.h:474

llvm::BasicBlock::begin

iterator begin()

Instruction iterator methods.

Definition:BasicBlock.h:461

llvm::BasicBlock::phis

iterator_range< const_phi_iterator > phis() const

Returns a range that iterates over the phis in the basic block.

Definition:BasicBlock.h:530

llvm::BasicBlock::Create

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

Definition:BasicBlock.h:213

llvm::BasicBlock::iterator

InstListType::iterator iterator

Instruction iterators...

Definition:BasicBlock.h:177

llvm::BasicBlock::size

size_t size() const

Definition:BasicBlock.h:482

llvm::BasicBlock::moveBefore

void moveBefore(BasicBlock *MovePos)

Unlink this basic block from its current function and insert it into the function that MovePos lives ...

Definition:BasicBlock.h:389

llvm::BasicBlock::getTerminator

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Definition:BasicBlock.h:240

llvm::BasicBlock::removePredecessor

void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)

Update PHI nodes in this BasicBlock before removal of predecessor Pred.

Definition:BasicBlock.cpp:538

llvm::BlockFrequencyInfo

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

Definition:BlockFrequencyInfo.h:37

llvm::BranchInst

Conditional or Unconditional Branch instruction.

Definition:Instructions.h:3016

llvm::BranchInst::setCondition

void setCondition(Value *V)

Definition:Instructions.h:3097

llvm::BranchInst::swapSuccessors

void swapSuccessors()

Swap the successors of this branch instruction.

Definition:Instructions.cpp:1168

llvm::BranchInst::isConditional

bool isConditional() const

Definition:Instructions.h:3090

llvm::BranchInst::Create

static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)

Definition:Instructions.h:3072

llvm::BranchInst::getSuccessor

BasicBlock * getSuccessor(unsigned i) const

Definition:Instructions.h:3104

llvm::BranchInst::setSuccessor

void setSuccessor(unsigned idx, BasicBlock *NewSucc)

Definition:Instructions.h:3109

llvm::BranchInst::getCondition

Value * getCondition() const

Definition:Instructions.h:3092

llvm::BranchProbability

Definition:BranchProbability.h:30

llvm::CallBase::getArgOperand

Value * getArgOperand(unsigned i) const

Definition:InstrTypes.h:1286

llvm::CallBase::setArgOperand

void setArgOperand(unsigned i, Value *v)

Definition:InstrTypes.h:1291

llvm::CmpInst::Predicate

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

Definition:InstrTypes.h:673

llvm::CmpPredicate

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

Definition:CmpPredicate.h:22

llvm::ConstantInt

This is the shared class of boolean and integer constants.

Definition:Constants.h:83

llvm::ConstantInt::getTrue

static ConstantInt * getTrue(LLVMContext &Context)

Definition:Constants.cpp:866

llvm::ConstantInt::getFalse

static ConstantInt * getFalse(LLVMContext &Context)

Definition:Constants.cpp:873

llvm::Constant

This is an important base class in LLVM.

Definition:Constant.h:42

llvm::Constant::isOneValue

bool isOneValue() const

Returns true if the value is one.

Definition:Constants.cpp:124

llvm::DenseMapBase::lookup

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

Definition:DenseMap.h:194

llvm::DenseMapBase::find

iterator find(const_arg_type_t< KeyT > Val)

Definition:DenseMap.h:156

llvm::DenseMapBase::begin

iterator begin()

Definition:DenseMap.h:75

llvm::DenseMapBase::count

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

Definition:DenseMap.h:152

llvm::DenseMapBase::end

iterator end()

Definition:DenseMap.h:84

llvm::DenseMapBase::insert

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Definition:DenseMap.h:211

llvm::DenseMap

Definition:DenseMap.h:727

llvm::DomTreeNodeBase< BasicBlock >

llvm::DomTreeUpdater

Definition:DomTreeUpdater.h:30

llvm::DominatorTreeBase::verify

bool verify(VerificationLevel VL=VerificationLevel::Full) const

verify - checks if the tree is correct.

Definition:GenericDomTree.h:905

llvm::DominatorTreeBase::applyUpdates

void applyUpdates(ArrayRef< UpdateType > Updates)

Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...

Definition:GenericDomTree.h:612

llvm::DominatorTreeBase::insertEdge

void insertEdge(NodeT *From, NodeT *To)

Inform the dominator tree about a CFG edge insertion and update the tree.

Definition:GenericDomTree.h:652

llvm::DominatorTreeBase::Delete

static constexpr UpdateKind Delete

Definition:GenericDomTree.h:253

llvm::DominatorTreeBase::Insert

static constexpr UpdateKind Insert

Definition:GenericDomTree.h:252

llvm::DominatorTreeBase::deleteEdge

void deleteEdge(NodeT *From, NodeT *To)

Inform the dominator tree about a CFG edge deletion and update the tree.

Definition:GenericDomTree.h:670

llvm::DominatorTreeBase::getNode

DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const

getNode - return the (Post)DominatorTree node for the specified basic block.

Definition:GenericDomTree.h:401

llvm::DominatorTree

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

Definition:Dominators.h:162

llvm::DominatorTree::isReachableFromEntry

bool isReachableFromEntry(const Use &U) const

Provide an overload for a Use.

Definition:Dominators.cpp:321

llvm::DominatorTree::dominates

bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

Definition:Dominators.cpp:122

llvm::FreezeInst

This class represents a freeze function that returns random concrete value if an operand is either a ...

Definition:Instructions.h:5088

llvm::Function

Definition:Function.h:63

llvm::ICFLoopSafetyInfo

This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...

Definition:MustExecute.h:131

llvm::ICFLoopSafetyInfo::isGuaranteedToExecute

bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override

Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...

Definition:MustExecute.cpp:285

llvm::ICFLoopSafetyInfo::computeLoopSafetyInfo

void computeLoopSafetyInfo(const Loop *CurLoop) override

Computes safety information for a loop checks loop body & header for the possibility of may throw exc...

Definition:MustExecute.cpp:77

llvm::ICmpInst::isRelational

bool isRelational() const

Return true if the predicate is relational (not EQ or NE).

Definition:Instructions.h:1305

llvm::IRBuilderBase::CreateFreeze

Value * CreateFreeze(Value *V, const Twine &Name="")

Definition:IRBuilder.h:2574

llvm::IRBuilderBase::CreateCondBr

BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)

Create a conditional 'br Cond, TrueDest, FalseDest' instruction.

Definition:IRBuilder.h:1164

llvm::IRBuilderBase::CreateZExt

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

Definition:IRBuilder.h:2033

llvm::IRBuilderBase::CreateAnd

Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")

Definition:IRBuilder.h:1518

llvm::IRBuilderBase::CreateOr

Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")

Definition:IRBuilder.h:1540

llvm::IRBuilderBase::SetInsertPoint

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Definition:IRBuilder.h:199

llvm::IRBuilder

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Definition:IRBuilder.h:2705

llvm::InstructionCost

Definition:InstructionCost.h:29

llvm::Instruction

Definition:Instruction.h:68

llvm::Instruction::clone

Instruction * clone() const

Create a copy of 'this' instruction that is identical in all ways except the following:

Definition:Instruction.cpp:1364

llvm::Instruction::dropLocation

void dropLocation()

Drop the instruction's debug location.

Definition:DebugInfo.cpp:984

llvm::Instruction::getDebugLoc

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

Definition:Instruction.h:511

llvm::Instruction::eraseFromParent

InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

Definition:Instruction.cpp:94

llvm::Instruction::getMetadata

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

Definition:Instruction.h:426

llvm::Instruction::isTerminator

bool isTerminator() const

Definition:Instruction.h:313

llvm::Instruction::setMetadata

void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

Definition:Metadata.cpp:1679

llvm::Instruction::setDebugLoc

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

Definition:Instruction.h:508

llvm::Instruction::moveBefore

void moveBefore(Instruction *MovePos)

Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...

Definition:Instruction.cpp:175

llvm::Instruction::insertInto

InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)

Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...

Definition:Instruction.cpp:123

llvm::IntrinsicInst

A wrapper class for inspecting calls to intrinsic functions.

Definition:IntrinsicInst.h:48

llvm::LPMUpdater

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

Definition:LoopPassManager.h:229

llvm::LPMUpdater::markLoopAsDeleted

void markLoopAsDeleted(Loop &L, llvm::StringRef Name)

Loop passes should use this method to indicate they have deleted a loop from the nest.

Definition:LoopPassManager.h:249

llvm::LoopBase::contains

bool contains(const LoopT *L) const

Return true if the specified loop is contained within in this loop.

Definition:GenericLoopInfo.h:124

llvm::LoopBase::isInnermost

bool isInnermost() const

Return true if the loop does not contain any (natural) loops.

Definition:GenericLoopInfo.h:167

llvm::LoopBase::getNumBlocks

unsigned getNumBlocks() const

Get the number of blocks in this loop in constant time.

Definition:GenericLoopInfo.h:187

llvm::LoopBase::getHeader

BlockT * getHeader() const

Definition:GenericLoopInfo.h:90

llvm::LoopBase::addBasicBlockToLoop

void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)

This method is used by other analyses to update loop information.

Definition:GenericLoopInfoImpl.h:282

llvm::LoopBase::reserveBlocks

void reserveBlocks(unsigned size)

interface to do reserve() for Blocks

Definition:GenericLoopInfo.h:432

llvm::LoopBase::blocks

iterator_range< block_iterator > blocks() const

Definition:GenericLoopInfo.h:180

llvm::LoopBase::addChildLoop

void addChildLoop(LoopT *NewChild)

Add the specified loop to be a child of this loop.

Definition:GenericLoopInfo.h:391

llvm::LoopBase::getLoopPreheader

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

Definition:GenericLoopInfoImpl.h:210

llvm::LoopBase::getParentLoop

LoopT * getParentLoop() const

Return the parent loop if it exists or nullptr for top level loops.

Definition:GenericLoopInfo.h:99

llvm::LoopBase::isLoopExiting

bool isLoopExiting(const BlockT *BB) const

True if terminator in the block can branch to another block that is outside of the current loop.

Definition:GenericLoopInfo.h:227

llvm::LoopBase::removeChildLoop

LoopT * removeChildLoop(iterator I)

This removes the specified child from being a subloop of this loop.

Definition:GenericLoopInfo.h:400

llvm::LoopBlocksRPO

Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...

Definition:LoopIterator.h:172

llvm::LoopBlocksRPO::perform

void perform(const LoopInfo *LI)

Traverse the loop blocks and store the DFS result.

Definition:LoopIterator.h:180

llvm::LoopInfoBase::verify

void verify(const DominatorTreeBase< BlockT, false > &DomTree) const

Definition:GenericLoopInfoImpl.h:718

llvm::LoopInfoBase::addTopLevelLoop

void addTopLevelLoop(LoopT *New)

This adds the specified loop to the collection of top-level loops.

Definition:GenericLoopInfo.h:663

llvm::LoopInfoBase::end

iterator end() const

Definition:GenericLoopInfo.h:582

llvm::LoopInfoBase::AllocateLoop

LoopT * AllocateLoop(ArgsTy &&...Args)

Definition:GenericLoopInfo.h:570

llvm::LoopInfoBase::removeLoop

LoopT * removeLoop(iterator I)

This removes the specified top-level loop from this loop info object.

Definition:GenericLoopInfo.h:633

llvm::LoopInfoBase::changeLoopFor

void changeLoopFor(BlockT *BB, LoopT *L)

Change the top-level loop that contains BB to the specified loop.

Definition:GenericLoopInfo.h:644

llvm::LoopInfoBase::getLoopDepth

unsigned getLoopDepth(const BlockT *BB) const

Return the loop nesting level of the specified block.

Definition:GenericLoopInfo.h:613

llvm::LoopInfoBase::begin

iterator begin() const

Definition:GenericLoopInfo.h:581

llvm::LoopInfoBase::getLoopFor

LoopT * getLoopFor(const BlockT *BB) const

Return the inner most loop that BB lives in.

Definition:GenericLoopInfo.h:606

llvm::LoopInfoBase::destroy

void destroy(LoopT *L)

Destroy a loop that has been removed from the LoopInfo nest.

Definition:GenericLoopInfo.h:710

llvm::LoopInfo

Definition:LoopInfo.h:407

llvm::Loop

Represents a single loop in the control flow graph.

Definition:LoopInfo.h:39

llvm::Loop::getName

StringRef getName() const

Definition:LoopInfo.h:388

llvm::MDNode

Metadata node.

Definition:Metadata.h:1073

llvm::MDNode::get

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

Definition:Metadata.h:1549

llvm::MDString::get

static MDString * get(LLVMContext &Context, StringRef Str)

Definition:Metadata.cpp:606

llvm::MemoryDef

Represents a read-write access to memory, whether it is a must-alias, or a may-alias.

Definition:MemorySSA.h:370

llvm::MemorySSAAnalysis

An analysis that produces MemorySSA for a function.

Definition:MemorySSA.h:928

llvm::MemorySSAUpdater

Definition:MemorySSAUpdater.h:54

llvm::MemorySSAUpdater::getMemorySSA

MemorySSA * getMemorySSA() const

Get handle on MemorySSA.

Definition:MemorySSAUpdater.h:240

llvm::MemorySSAUpdater::removeEdge

void removeEdge(BasicBlock *From, BasicBlock *To)

Update the MemoryPhi in To following an edge deletion between From and To.

Definition:MemorySSAUpdater.cpp:531

llvm::MemorySSAUpdater::updateForClonedLoop

void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)

Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...

Definition:MemorySSAUpdater.cpp:669

llvm::MemorySSAUpdater::removeDuplicatePhiEdgesBetween

void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)

Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...

Definition:MemorySSAUpdater.cpp:538

llvm::MemorySSAUpdater::removeBlocks

void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)

Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.

Definition:MemorySSAUpdater.cpp:1357

llvm::MemorySSAUpdater::moveAllAfterSpliceBlocks

void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)

From block was spliced into From and To.

Definition:MemorySSAUpdater.cpp:1232

llvm::MemorySSAUpdater::createMemoryAccessInBB

MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)

Create a MemoryAccess in MemorySSA at a specified point in a block.

Definition:MemorySSAUpdater.cpp:1414

llvm::MemorySSAUpdater::applyInsertUpdates

void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)

Apply CFG insert updates, analogous with the DT edge updates.

Definition:MemorySSAUpdater.cpp:845

llvm::MemorySSAUpdater::applyUpdates

void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)

Apply CFG updates, analogous with the DT edge updates.

Definition:MemorySSAUpdater.cpp:794

llvm::MemorySSAUpdater::moveToPlace

void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)

Definition:MemorySSAUpdater.cpp:1185

llvm::MemorySSAUpdater::updateExitBlocksForClonedLoop

void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)

Update phi nodes in exit block successors following cloning.

Definition:MemorySSAUpdater.cpp:772

llvm::MemorySSA

Encapsulates MemorySSA, including all data associated with memory accesses.

Definition:MemorySSA.h:701

llvm::MemorySSA::verifyMemorySSA

void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const

Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...

Definition:MemorySSA.cpp:1905

llvm::MemorySSA::BeforeTerminator

@ BeforeTerminator

Definition:MemorySSA.h:790

llvm::MemorySSA::getMemoryAccess

MemoryUseOrDef * getMemoryAccess(const Instruction *I) const

Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.

Definition:MemorySSA.h:719

llvm::MemorySSA::getBlockDefs

const DefsList * getBlockDefs(const BasicBlock *BB) const

Return the list of MemoryDef's and MemoryPhi's for a given basic block.

Definition:MemorySSA.h:767

llvm::Module

A Module instance is used to store all the information related to an LLVM module.

Definition:Module.h:65

llvm::OuterAnalysisManagerProxy

An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...

Definition:PassManager.h:692

llvm::PHINode

Definition:Instructions.h:2600

llvm::PHINode::Create

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

Definition:Instructions.h:2635

llvm::PoisonValue::get

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

Definition:Constants.cpp:1878

llvm::PreservedAnalyses

A set of analyses that are preserved following a run of a transformation pass.

Definition:Analysis.h:111

llvm::PreservedAnalyses::all

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Definition:Analysis.h:117

llvm::ProfileSummaryAnalysis

An analysis pass based on the new PM to deliver ProfileSummaryInfo.

Definition:ProfileSummaryInfo.h:372

llvm::ProfileSummaryInfo

Analysis providing profile information.

Definition:ProfileSummaryInfo.h:41

llvm::ProfileSummaryInfo::hasProfileSummary

bool hasProfileSummary() const

Returns true if profile summary is available.

Definition:ProfileSummaryInfo.h:70

llvm::ProfileSummaryInfo::isColdBlock

bool isColdBlock(const BBType *BB, BFIT *BFI) const

Returns true if BasicBlock BB is considered cold.

Definition:ProfileSummaryInfo.h:204

llvm::ScalarEvolution

The main scalar evolution driver.

Definition:ScalarEvolution.h:447

llvm::ScalarEvolution::forgetLoop

void forgetLoop(const Loop *L)

This method should be called by the client when it has changed a loop in a way that may effect Scalar...

Definition:ScalarEvolution.cpp:8496

llvm::ScalarEvolution::forgetTopmostLoop

void forgetTopmostLoop(const Loop *L)

Definition:ScalarEvolution.cpp:8538

llvm::ScalarEvolution::forgetBlockAndLoopDispositions

void forgetBlockAndLoopDispositions(Value *V=nullptr)

Called when the client has changed the disposition of values in a loop or block.

Definition:ScalarEvolution.cpp:8597

llvm::ScalarEvolution::forgetLcssaPhiWithNewPredecessor

void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)

Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...

Definition:ScalarEvolution.cpp:8557

llvm::SelectInst

This class represents the LLVM 'select' instruction.

Definition:Instructions.h:1657

llvm::SetVector::size

size_type size() const

Determine the number of elements in the SetVector.

Definition:SetVector.h:98

llvm::SetVector::count

size_type count(const key_type &key) const

Count the number of elements of a given key in the SetVector.

Definition:SetVector.h:264

llvm::SetVector::begin

iterator begin()

Get an iterator to the beginning of the SetVector.

Definition:SetVector.h:103

llvm::SetVector::insert

bool insert(const value_type &X)

Insert a new element into the SetVector.

Definition:SetVector.h:162

llvm::SimpleLoopUnswitchPass::printPipeline

void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)

Definition:SimpleLoopUnswitch.cpp:3718

llvm::SimpleLoopUnswitchPass::run

PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

Definition:SimpleLoopUnswitch.cpp:3682

llvm::SmallDenseMap

Definition:DenseMap.h:883

llvm::SmallPtrSetImplBase::size

size_type size() const

Definition:SmallPtrSet.h:94

llvm::SmallPtrSetImplBase::clear

void clear()

Definition:SmallPtrSet.h:97

llvm::SmallPtrSetImplBase::empty

bool empty() const

Definition:SmallPtrSet.h:93

llvm::SmallPtrSetImpl

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

Definition:SmallPtrSet.h:363

llvm::SmallPtrSetImpl::erase

bool erase(PtrType Ptr)

Remove pointer from the set.

Definition:SmallPtrSet.h:401

llvm::SmallPtrSetImpl::count

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

Definition:SmallPtrSet.h:452

llvm::SmallPtrSetImpl::insert

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

Definition:SmallPtrSet.h:384

llvm::SmallPtrSet

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

Definition:SmallPtrSet.h:519

llvm::SmallSetVector

A SetVector that performs no allocations if smaller than a certain size.

Definition:SetVector.h:370

llvm::SmallVectorBase::empty

bool empty() const

Definition:SmallVector.h:81

llvm::SmallVectorBase::size

size_t size() const

Definition:SmallVector.h:78

llvm::SmallVectorImpl

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

Definition:SmallVector.h:573

llvm::SmallVectorImpl::pop_back_val

T pop_back_val()

Definition:SmallVector.h:673

llvm::SmallVectorImpl::emplace_back

reference emplace_back(ArgTypes &&... Args)

Definition:SmallVector.h:937

llvm::SmallVectorImpl::reserve

void reserve(size_type N)

Definition:SmallVector.h:663

llvm::SmallVectorImpl::append

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

Definition:SmallVector.h:683

llvm::SmallVectorImpl::insert

iterator insert(iterator I, T &&Elt)

Definition:SmallVector.h:805

llvm::SmallVectorImpl::clear

void clear()

Definition:SmallVector.h:610

llvm::SmallVectorTemplateBase::push_back

void push_back(const T &Elt)

Definition:SmallVector.h:413

llvm::SmallVectorTemplateCommon::end

iterator end()

Definition:SmallVector.h:269

llvm::SmallVectorTemplateCommon::begin

iterator begin()

Definition:SmallVector.h:267

llvm::SmallVectorTemplateCommon::back

reference back()

Definition:SmallVector.h:308

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::StringRef

StringRef - Represent a constant reference to a string, i.e.

Definition:StringRef.h:51

llvm::SwitchInstProfUpdateWrapper

A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...

Definition:Instructions.h:3492

llvm::SwitchInstProfUpdateWrapper::setSuccessorWeight

void setSuccessorWeight(unsigned idx, CaseWeightOpt W)

Definition:Instructions.cpp:4141

llvm::SwitchInstProfUpdateWrapper::eraseFromParent

Instruction::InstListType::iterator eraseFromParent()

Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...

Definition:Instructions.cpp:4126

llvm::SwitchInstProfUpdateWrapper::addCase

void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)

Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...

Definition:Instructions.cpp:4107

llvm::SwitchInstProfUpdateWrapper::getSuccessorWeight

CaseWeightOpt getSuccessorWeight(unsigned idx)

Definition:Instructions.cpp:4135

llvm::SwitchInstProfUpdateWrapper::CaseWeightOpt

std::optional< uint32_t > CaseWeightOpt

Definition:Instructions.h:3503

llvm::SwitchInstProfUpdateWrapper::removeCase

SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)

Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.

Definition:Instructions.cpp:4093

llvm::SwitchInst::CaseHandleImpl::getSuccessorIndex

unsigned getSuccessorIndex() const

Returns successor index for current case successor.

Definition:Instructions.h:3233

llvm::SwitchInst::CaseHandleImpl::getCaseSuccessor

BasicBlockT * getCaseSuccessor() const

Resolves successor for current case.

Definition:Instructions.h:3222

llvm::SwitchInst::CaseHandleImpl::getCaseValue

ConstantIntT * getCaseValue() const

Resolves case value for current case.

Definition:Instructions.h:3215

llvm::SwitchInst::CaseHandle

Definition:Instructions.h:3250

llvm::SwitchInst

Multiway switch.

Definition:Instructions.h:3154

llvm::SwitchInst::getDefaultDest

BasicBlock * getDefaultDest() const

Definition:Instructions.h:3351

llvm::SwitchInst::Create

static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)

Definition:Instructions.h:3338

llvm::SwitchInst::setDefaultDest

void setDefaultDest(BasicBlock *DefaultCase)

Definition:Instructions.h:3361

llvm::SwitchInst::cases

iterator_range< CaseIt > cases()

Iteration adapter for range-for loops.

Definition:Instructions.h:3396

llvm::TargetTransformInfo

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

Definition:TargetTransformInfo.h:212

llvm::TargetTransformInfo::hasBranchDivergence

bool hasBranchDivergence(const Function *F=nullptr) const

Return true if branch divergence exists.

Definition:TargetTransformInfo.cpp:289

llvm::TargetTransformInfo::TargetCostKind

TargetCostKind

The kind of cost model.

Definition:TargetTransformInfo.h:263

llvm::TargetTransformInfo::TCK_CodeSize

@ TCK_CodeSize

Instruction code size.

Definition:TargetTransformInfo.h:266

llvm::TargetTransformInfo::TCK_SizeAndLatency

@ TCK_SizeAndLatency

The weighted sum of size and latency.

Definition:TargetTransformInfo.h:267

llvm::TargetTransformInfo::getInstructionCost

InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const

Estimate the cost of a given IR user when lowered.

Definition:TargetTransformInfo.cpp:270

llvm::TinyPtrVector

TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...

Definition:TinyPtrVector.h:29

llvm::TinyPtrVector::push_back

void push_back(EltTy NewVal)

Definition:TinyPtrVector.h:242

llvm::TinyPtrVector::empty

bool empty() const

Definition:TinyPtrVector.h:162

llvm::Twine

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

Definition:Twine.h:81

llvm::Type::getIntegerBitWidth

unsigned getIntegerBitWidth() const

llvm::Type::isIntegerTy

bool isIntegerTy() const

True if this is an instance of IntegerType.

Definition:Type.h:237

llvm::Use

A Use represents the edge between a Value definition and its users.

Definition:Use.h:43

llvm::ValueMap< const Value *, WeakTrackingVH >

llvm::ValueMap::lookup

ValueT lookup(const KeyT &Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

Definition:ValueMap.h:164

llvm::ValueMap::count

size_type count(const KeyT &Val) const

Return 1 if the specified key is in the map, 0 otherwise.

Definition:ValueMap.h:151

llvm::Value

LLVM Value Representation.

Definition:Value.h:74

llvm::Value::getType

Type * getType() const

All values are typed, get the type of this value.

Definition:Value.h:255

llvm::Value::setName

void setName(const Twine &Name)

Change the name of the value.

Definition:Value.cpp:377

llvm::Value::getContext

LLVMContext & getContext() const

All values hold a context through their type.

Definition:Value.cpp:1075

llvm::Value::uses

iterator_range< use_iterator > uses()

Definition:Value.h:376

llvm::Value::getName

StringRef getName() const

Return a constant reference to the value's name.

Definition:Value.cpp:309

llvm::cl::opt

Definition:CommandLine.h:1423

llvm::function_ref

An efficient, type-erasing, non-owning reference to a callable.

Definition:STLFunctionalExtras.h:37

llvm::ilist_detail::node_parent_access::getParent

const ParentTy * getParent() const

Definition:ilist_node.h:32

llvm::ilist_node_impl::getIterator

self_iterator getIterator()

Definition:ilist_node.h:132

llvm::raw_ostream

This class implements an extremely fast bulk output stream that can only output to a stream.

Definition:raw_ostream.h:52

uint64_t

ErrorHandling.h

llvm_unreachable

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition:ErrorHandling.h:143

llvm::Intrinsic::getDeclarationIfExists

Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)

This version supports overloaded intrinsics.

Definition:Intrinsics.cpp:747

llvm::PatternMatch

Definition:PatternMatch.h:47

llvm::PatternMatch::match

bool match(Val *V, const Pattern &P)

Definition:PatternMatch.h:49

llvm::PatternMatch::m_One

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

Definition:PatternMatch.h:592

llvm::PatternMatch::m_Select

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

Definition:PatternMatch.h:1799

llvm::PatternMatch::m_LogicalOr

auto m_LogicalOr()

Matches L || R where L and R are arbitrary values.

Definition:PatternMatch.h:3099

llvm::PatternMatch::m_Br

brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)

Definition:PatternMatch.h:2220

llvm::PatternMatch::m_Value

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

Definition:PatternMatch.h:92

llvm::PatternMatch::m_ICmp

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

Definition:PatternMatch.h:1627

llvm::PatternMatch::m_LogicalAnd

auto m_LogicalAnd()

Matches L && R where L and R are arbitrary values.

Definition:PatternMatch.h:3081

llvm::PatternMatch::m_BasicBlock

class_match< BasicBlock > m_BasicBlock()

Match an arbitrary basic block value and ignore it.

Definition:PatternMatch.h:189

llvm::PatternMatch::m_Zero

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

Definition:PatternMatch.h:612

llvm::PatternMatch::m_CombineOr

match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)

Combine two pattern matchers matching L || R.

Definition:PatternMatch.h:239

llvm::cl::Hidden

@ Hidden

Definition:CommandLine.h:137

llvm::cl::init

initializer< Ty > init(const Ty &Val)

Definition:CommandLine.h:443

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::drop_begin

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

Definition:STLExtras.h:329

llvm::stable_sort

void stable_sort(R &&Range)

Definition:STLExtras.h:2037

llvm::find

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1759

llvm::all_of

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1739

llvm::RecursivelyDeleteTriviallyDeadInstructions

bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

Definition:Local.cpp:546

llvm::successors

auto successors(const MachineBasicBlock *BB)

Definition:MachineBasicBlock.h:1376

llvm::make_range

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

Definition:iterator_range.h:77

llvm::append_range

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

Definition:STLExtras.h:2115

llvm::RemapDbgRecordRange

void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)

Remap the Values used in the DbgRecords Range using the value map VM.

Definition:ValueMapper.h:299

llvm::make_early_inc_range

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

Definition:STLExtras.h:657

llvm::findOptionMDForLoop

MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)

Find string metadata for a loop.

Definition:LoopInfo.cpp:1067

llvm::Desc

Op::Description Desc

Definition:DWARFExpression.cpp:23

llvm::any_of

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1746

llvm::Log2_32

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

Definition:MathExtras.h:341

llvm::isGuard

bool isGuard(const User *U)

Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....

Definition:GuardUtils.cpp:18

llvm::reverse

auto reverse(ContainerTy &&C)

Definition:STLExtras.h:420

llvm::zip_first

detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)

zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.

Definition:STLExtras.h:877

llvm::sort

void sort(IteratorTy Start, IteratorTy End)

Definition:STLExtras.h:1664

llvm::RF_IgnoreMissingLocals

@ RF_IgnoreMissingLocals

If this flag is set, the remapper ignores missing function-local entries (Argument,...

Definition:ValueMapper.h:96

llvm::RF_NoModuleLevelChanges

@ RF_NoModuleLevelChanges

If this flag is set, the remapper knows that only local values within a function (such as an instruct...

Definition:ValueMapper.h:78

llvm::dbgs

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition:Debug.cpp:163

llvm::RemapInstruction

void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)

Convert the instruction operands from referencing the current values into those specified by VM.

Definition:ValueMapper.h:277

llvm::VerifyLoopInfo

bool VerifyLoopInfo

Enable verification of loop info.

Definition:LoopInfo.cpp:51

llvm::CloneBasicBlock

BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr)

Return a copy of the specified basic block, but without embedding the block into a particular functio...

Definition:CloneFunction.cpp:44

llvm::VerifyMemorySSA

bool VerifyMemorySSA

Enables verification of MemorySSA.

Definition:MemorySSA.cpp:84

llvm::formDedicatedExitBlocks

bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)

Ensure that all exit blocks of the loop are dedicated exits.

Definition:LoopUtils.cpp:57

llvm::isGuaranteedNotToBeUndefOrPoison

bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Return true if this function can prove that V does not have undef bits and is never poison.

Definition:ValueTracking.cpp:7841

llvm::ValueToValueMapTy

ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy

Definition:MemorySSAUpdater.h:50

llvm::extractBranchWeights

bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)

Extract branch weights from MD_prof metadata.

Definition:ProfDataUtils.cpp:170

llvm::count_if

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

Definition:STLExtras.h:1945

llvm::SplitBlock

BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)

Split the specified block at the specified instruction.

Definition:BasicBlockUtils.cpp:1084

llvm::getLoopPassPreservedAnalyses

PreservedAnalyses getLoopPassPreservedAnalyses()

Returns the minimum set of Analyses that all loop passes must preserve.

Definition:LoopAnalysisManager.cpp:138

llvm::makePostTransformationMetadata

llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)

Create a new LoopID after the loop has been transformed.

Definition:LoopInfo.cpp:1170

llvm::erase_if

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

Definition:STLExtras.h:2099

llvm::predecessors

auto predecessors(const MachineBasicBlock *BB)

Definition:MachineBasicBlock.h:1377

llvm::pred_empty

bool pred_empty(const BasicBlock *BB)

Definition:CFG.h:118

llvm::SplitBlockAndInsertIfThen

Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)

Split the containing block at the specified instruction - everything before SplitBefore stays in the ...

Definition:BasicBlockUtils.cpp:1609

llvm::SplitEdge

BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")

Split the edge connecting the specified blocks, and return the newly created basic block between From...

Definition:BasicBlockUtils.cpp:762

llvm::hasPartialIVCondition

std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)

Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...

Definition:LoopUtils.cpp:2058

llvm::formLCSSA

bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)

Put loop into LCSSA form.

Definition:LCSSA.cpp:443

std::swap

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)